@midscene/core 0.8.7-beta-20241218070032.0 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4355,8 +4355,8 @@ var allAIConfig = () => {
4355
4355
 
4356
4356
  // src/ai-model/common.ts
4357
4357
  async function callAiFn(options) {
4358
- const { useModel, msgs, AIActionType: AIActionTypeValue } = options;
4359
- if (preferOpenAIModel(useModel)) {
4358
+ const { msgs, AIActionType: AIActionTypeValue } = options;
4359
+ if (preferOpenAIModel("openAI")) {
4360
4360
  const { content, usage } = await callToGetJSONObject(
4361
4361
  msgs,
4362
4362
  AIActionTypeValue
@@ -4830,7 +4830,7 @@ You are a versatile professional in software UI automation. Your outstanding con
4830
4830
  ## Workflow
4831
4831
 
4832
4832
  1. Receive the user's element description, screenshot, and instruction.
4833
- 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyIfStatement / Sleep). The "About the action" section below will give you more details.
4833
+ 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
4834
4834
  3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
4835
4835
  4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
4836
4836
  5. Consider whether the user's instruction will be accomplished after all the actions
@@ -4841,7 +4841,8 @@ You are a versatile professional in software UI automation. Your outstanding con
4841
4841
 
4842
4842
  - All the actions you composed MUST be based on the page context information you get.
4843
4843
  - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
4844
- - When the user says "If something is true, do something" in the instruction, follow it, tell if it's truthy, and give the corresponding actions. If it's not truthy, as long as the instruction is an "if" statement, it means the user can tolerate it. Just leave a \`FalsyIfStatement\` action.
4844
+ - Respond only with valid JSON. Do not write an introduction or summary.
4845
+ - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
4845
4846
 
4846
4847
  ## About the \`actions\` field
4847
4848
 
@@ -4877,8 +4878,9 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
4877
4878
  }
4878
4879
  * To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
4879
4880
  * \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
4880
- - type: 'FalsyIfStatement', when there is a falsy condition and the instruction is an "if" statement (means the user can tolerate this situation)
4881
+ - type: 'FalsyConditionStatement'
4881
4882
  * { param: null }
4883
+ * use this action when the instruction is an "if" statement and the condition is falsy.
4882
4884
  - type: 'Sleep'
4883
4885
  * { param: { timeMs: number } }
4884
4886
 
@@ -4892,7 +4894,8 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
4892
4894
 
4893
4895
  ## Output JSON Format:
4894
4896
 
4895
- Please return the result in JSON format as follows:
4897
+ The JSON format is as follows:
4898
+
4896
4899
  {
4897
4900
  "actions": [
4898
4901
  {
@@ -4965,13 +4968,13 @@ By viewing the page screenshot and description, you should consider this and out
4965
4968
  If the user says "If there is a popup, close it", you should consider this and output the JSON:
4966
4969
 
4967
4970
  * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
4968
- * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyIfStatement\` action.
4971
+ * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
4969
4972
 
4970
4973
  \`\`\`json
4971
4974
  {
4972
4975
  "actions": [{
4973
4976
  "thought": "There is no popup on the page",
4974
- "type": "FalsyIfStatement",
4977
+ "type": "FalsyConditionStatement",
4975
4978
  "param": null
4976
4979
  }
4977
4980
  ],
@@ -5196,11 +5199,9 @@ async function call(messages, responseFormat) {
5196
5199
  return { content, usage: completion.usage };
5197
5200
  }
5198
5201
  async function callToGetJSONObject(messages, AIActionTypeValue) {
5199
- let responseFormat = {
5200
- type: "json_object" /* JSON */
5201
- };
5202
+ let responseFormat;
5202
5203
  const model = getModelName();
5203
- if (model === "gpt-4o-2024-08-06") {
5204
+ if (model.includes("gpt-4o")) {
5204
5205
  switch (AIActionTypeValue) {
5205
5206
  case 0 /* ASSERT */:
5206
5207
  responseFormat = assertSchema;
@@ -5214,9 +5215,9 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5214
5215
  responseFormat = planSchema;
5215
5216
  break;
5216
5217
  }
5217
- }
5218
- if (model.startsWith("gemini")) {
5219
- responseFormat = { type: "text" /* TEXT */ };
5218
+ if (model === "gpt-4o-2024-05-13") {
5219
+ responseFormat = { type: "json_object" /* JSON */ };
5220
+ }
5220
5221
  }
5221
5222
  const safeJsonParse = (input) => {
5222
5223
  try {
@@ -5234,7 +5235,7 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5234
5235
  try {
5235
5236
  return { content: JSON.parse(jsonContent), usage: response.usage };
5236
5237
  } catch (e) {
5237
- throw Error(`parse json error: ${response.content}`);
5238
+ throw Error(`failed to parse json response: ${response.content}`);
5238
5239
  }
5239
5240
  }
5240
5241
  function extractJSONFromCodeBlock(response) {
@@ -5281,7 +5282,7 @@ function transformElementPositionToId(aiResult, elementsInfo) {
5281
5282
  };
5282
5283
  }
5283
5284
  async function AiInspectElement(options) {
5284
- const { context, multi, targetElementDescription, callAI, useModel } = options;
5285
+ const { context, multi, targetElementDescription, callAI } = options;
5285
5286
  const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5286
5287
  const { description, elementById, elementByPosition: elementByPosition2 } = await describeUserPage(context);
5287
5288
  if (options.quickAnswer) {
@@ -5356,8 +5357,7 @@ ${JSON.stringify({
5356
5357
  if (callAI) {
5357
5358
  const res = await callAI({
5358
5359
  msgs,
5359
- AIActionType: 1 /* INSPECT_ELEMENT */,
5360
- useModel
5360
+ AIActionType: 1 /* INSPECT_ELEMENT */
5361
5361
  });
5362
5362
  return {
5363
5363
  parseResult: transformElementPositionToId(res.content, context.content),
@@ -5368,8 +5368,7 @@ ${JSON.stringify({
5368
5368
  }
5369
5369
  const inspectElement = await callAiFn({
5370
5370
  msgs,
5371
- AIActionType: 1 /* INSPECT_ELEMENT */,
5372
- useModel
5371
+ AIActionType: 1 /* INSPECT_ELEMENT */
5373
5372
  });
5374
5373
  return {
5375
5374
  parseResult: transformElementPositionToId(
@@ -5419,7 +5418,6 @@ DATA_DEMAND ends.
5419
5418
  ];
5420
5419
  const result = await callAiFn({
5421
5420
  msgs,
5422
- useModel,
5423
5421
  AIActionType: 2 /* EXTRACT_DATA */
5424
5422
  });
5425
5423
  return {
@@ -5462,8 +5460,7 @@ async function AiAssert(options) {
5462
5460
  ];
5463
5461
  const { content: assertResult, usage } = await callAiFn({
5464
5462
  msgs,
5465
- AIActionType: 0 /* ASSERT */,
5466
- useModel
5463
+ AIActionType: 0 /* ASSERT */
5467
5464
  });
5468
5465
  return {
5469
5466
  content: assertResult,
@@ -5473,7 +5470,7 @@ async function AiAssert(options) {
5473
5470
 
5474
5471
  // src/ai-model/automation/index.ts
5475
5472
  var import_node_assert4 = __toESM(require("assert"));
5476
- async function plan(userPrompt, opts, useModel) {
5473
+ async function plan(userPrompt, opts) {
5477
5474
  const { callAI, context } = opts || {};
5478
5475
  const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5479
5476
  const { description: pageDescription, elementByPosition: elementByPosition2 } = await describeUserPage(context);
@@ -5524,8 +5521,7 @@ ${taskBackgroundContext}
5524
5521
  const call2 = callAI || callAiFn;
5525
5522
  const { content, usage } = await call2({
5526
5523
  msgs,
5527
- AIActionType: 3 /* PLAN */,
5528
- useModel
5524
+ AIActionType: 3 /* PLAN */
5529
5525
  });
5530
5526
  const planFromAI = content;
5531
5527
  const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
package/dist/lib/index.js CHANGED
@@ -4506,7 +4506,7 @@ function stringifyDumpData(data, indents) {
4506
4506
  return JSON.stringify(data, replacerForPageObject, indents);
4507
4507
  }
4508
4508
  function getVersion() {
4509
- return "0.8.7-beta-20241218070032.0";
4509
+ return "0.8.7";
4510
4510
  }
4511
4511
 
4512
4512
  // src/action/executor.ts
@@ -5177,7 +5177,7 @@ You are a versatile professional in software UI automation. Your outstanding con
5177
5177
  ## Workflow
5178
5178
 
5179
5179
  1. Receive the user's element description, screenshot, and instruction.
5180
- 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyIfStatement / Sleep). The "About the action" section below will give you more details.
5180
+ 2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
5181
5181
  3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
5182
5182
  4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
5183
5183
  5. Consider whether the user's instruction will be accomplished after all the actions
@@ -5188,7 +5188,8 @@ You are a versatile professional in software UI automation. Your outstanding con
5188
5188
 
5189
5189
  - All the actions you composed MUST be based on the page context information you get.
5190
5190
  - Trust the "What have been done" field about the task (if any), don't repeat actions in it.
5191
- - When the user says "If something is true, do something" in the instruction, follow it, tell if it's truthy, and give the corresponding actions. If it's not truthy, as long as the instruction is an "if" statement, it means the user can tolerate it. Just leave a \`FalsyIfStatement\` action.
5191
+ - Respond only with valid JSON. Do not write an introduction or summary.
5192
+ - If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
5192
5193
 
5193
5194
  ## About the \`actions\` field
5194
5195
 
@@ -5224,8 +5225,9 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
5224
5225
  }
5225
5226
  * To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
5226
5227
  * \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
5227
- - type: 'FalsyIfStatement', when there is a falsy condition and the instruction is an "if" statement (means the user can tolerate this situation)
5228
+ - type: 'FalsyConditionStatement'
5228
5229
  * { param: null }
5230
+ * use this action when the instruction is an "if" statement and the condition is falsy.
5229
5231
  - type: 'Sleep'
5230
5232
  * { param: { timeMs: number } }
5231
5233
 
@@ -5239,7 +5241,8 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
5239
5241
 
5240
5242
  ## Output JSON Format:
5241
5243
 
5242
- Please return the result in JSON format as follows:
5244
+ The JSON format is as follows:
5245
+
5243
5246
  {
5244
5247
  "actions": [
5245
5248
  {
@@ -5312,13 +5315,13 @@ By viewing the page screenshot and description, you should consider this and out
5312
5315
  If the user says "If there is a popup, close it", you should consider this and output the JSON:
5313
5316
 
5314
5317
  * By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
5315
- * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyIfStatement\` action.
5318
+ * The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
5316
5319
 
5317
5320
  \`\`\`json
5318
5321
  {
5319
5322
  "actions": [{
5320
5323
  "thought": "There is no popup on the page",
5321
- "type": "FalsyIfStatement",
5324
+ "type": "FalsyConditionStatement",
5322
5325
  "param": null
5323
5326
  }
5324
5327
  ],
@@ -5543,11 +5546,9 @@ async function call(messages, responseFormat) {
5543
5546
  return { content, usage: completion.usage };
5544
5547
  }
5545
5548
  async function callToGetJSONObject(messages, AIActionTypeValue) {
5546
- let responseFormat = {
5547
- type: "json_object" /* JSON */
5548
- };
5549
+ let responseFormat;
5549
5550
  const model = getModelName();
5550
- if (model === "gpt-4o-2024-08-06") {
5551
+ if (model.includes("gpt-4o")) {
5551
5552
  switch (AIActionTypeValue) {
5552
5553
  case 0 /* ASSERT */:
5553
5554
  responseFormat = assertSchema;
@@ -5561,9 +5562,9 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5561
5562
  responseFormat = planSchema;
5562
5563
  break;
5563
5564
  }
5564
- }
5565
- if (model.startsWith("gemini")) {
5566
- responseFormat = { type: "text" /* TEXT */ };
5565
+ if (model === "gpt-4o-2024-05-13") {
5566
+ responseFormat = { type: "json_object" /* JSON */ };
5567
+ }
5567
5568
  }
5568
5569
  const safeJsonParse = (input) => {
5569
5570
  try {
@@ -5581,7 +5582,7 @@ async function callToGetJSONObject(messages, AIActionTypeValue) {
5581
5582
  try {
5582
5583
  return { content: JSON.parse(jsonContent), usage: response.usage };
5583
5584
  } catch (e) {
5584
- throw Error(`parse json error: ${response.content}`);
5585
+ throw Error(`failed to parse json response: ${response.content}`);
5585
5586
  }
5586
5587
  }
5587
5588
  function extractJSONFromCodeBlock(response) {
@@ -5602,8 +5603,8 @@ function extractJSONFromCodeBlock(response) {
5602
5603
 
5603
5604
  // src/ai-model/common.ts
5604
5605
  async function callAiFn(options) {
5605
- const { useModel, msgs, AIActionType: AIActionTypeValue } = options;
5606
- if (preferOpenAIModel(useModel)) {
5606
+ const { msgs, AIActionType: AIActionTypeValue } = options;
5607
+ if (preferOpenAIModel("openAI")) {
5607
5608
  const { content, usage } = await callToGetJSONObject(
5608
5609
  msgs,
5609
5610
  AIActionTypeValue
@@ -5654,7 +5655,7 @@ function transformElementPositionToId(aiResult, elementsInfo) {
5654
5655
  };
5655
5656
  }
5656
5657
  async function AiInspectElement(options) {
5657
- const { context, multi, targetElementDescription, callAI, useModel } = options;
5658
+ const { context, multi, targetElementDescription, callAI } = options;
5658
5659
  const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5659
5660
  const { description, elementById, elementByPosition: elementByPosition2 } = await describeUserPage(context);
5660
5661
  if (options.quickAnswer) {
@@ -5729,8 +5730,7 @@ ${JSON.stringify({
5729
5730
  if (callAI) {
5730
5731
  const res = await callAI({
5731
5732
  msgs,
5732
- AIActionType: 1 /* INSPECT_ELEMENT */,
5733
- useModel
5733
+ AIActionType: 1 /* INSPECT_ELEMENT */
5734
5734
  });
5735
5735
  return {
5736
5736
  parseResult: transformElementPositionToId(res.content, context.content),
@@ -5741,8 +5741,7 @@ ${JSON.stringify({
5741
5741
  }
5742
5742
  const inspectElement = await callAiFn({
5743
5743
  msgs,
5744
- AIActionType: 1 /* INSPECT_ELEMENT */,
5745
- useModel
5744
+ AIActionType: 1 /* INSPECT_ELEMENT */
5746
5745
  });
5747
5746
  return {
5748
5747
  parseResult: transformElementPositionToId(
@@ -5792,7 +5791,6 @@ DATA_DEMAND ends.
5792
5791
  ];
5793
5792
  const result = await callAiFn({
5794
5793
  msgs,
5795
- useModel,
5796
5794
  AIActionType: 2 /* EXTRACT_DATA */
5797
5795
  });
5798
5796
  return {
@@ -5835,8 +5833,7 @@ async function AiAssert(options) {
5835
5833
  ];
5836
5834
  const { content: assertResult, usage } = await callAiFn({
5837
5835
  msgs,
5838
- AIActionType: 0 /* ASSERT */,
5839
- useModel
5836
+ AIActionType: 0 /* ASSERT */
5840
5837
  });
5841
5838
  return {
5842
5839
  content: assertResult,
@@ -5846,7 +5843,7 @@ async function AiAssert(options) {
5846
5843
 
5847
5844
  // src/ai-model/automation/index.ts
5848
5845
  var import_node_assert6 = __toESM(require("assert"));
5849
- async function plan(userPrompt, opts, useModel) {
5846
+ async function plan(userPrompt, opts) {
5850
5847
  const { callAI, context } = opts || {};
5851
5848
  const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5852
5849
  const { description: pageDescription, elementByPosition: elementByPosition2 } = await describeUserPage(context);
@@ -5897,8 +5894,7 @@ ${taskBackgroundContext}
5897
5894
  const call2 = callAI || callAiFn;
5898
5895
  const { content, usage } = await call2({
5899
5896
  msgs,
5900
- AIActionType: 3 /* PLAN */,
5901
- useModel
5897
+ AIActionType: 3 /* PLAN */
5902
5898
  });
5903
5899
  const planFromAI = content;
5904
5900
  const actions = (planFromAI == null ? void 0 : planFromAI.actions) || [];
@@ -1,8 +1,8 @@
1
- import { g as AIUsageInfo } from './types-20204347.js';
1
+ import { g as AIUsageInfo } from './types-55182ae1.js';
2
2
  import { ChatCompletionMessageParam } from 'openai/resources';
3
3
  export { ChatCompletionMessageParam } from 'openai/resources';
4
- import { A as AIActionType } from './index-12fdcf10.js';
5
- export { f as AiAssert, e as AiExtractElementInfo, b as AiInspectElement, c as callAiFn, d as describeUserPage, p as plan, t as transformElementPositionToId } from './index-12fdcf10.js';
4
+ import { A as AIActionType } from './index-43fd19f4.js';
5
+ export { f as AiAssert, e as AiExtractElementInfo, b as AiInspectElement, c as callAiFn, d as describeUserPage, p as plan, t as transformElementPositionToId } from './index-43fd19f4.js';
6
6
 
7
7
  declare function callToGetJSONObject<T>(messages: ChatCompletionMessageParam[], AIActionTypeValue: AIActionType): Promise<{
8
8
  content: T;
@@ -1,4 +1,4 @@
1
- import { g as AIUsageInfo, B as BaseElement, U as UIContext, m as AIElementResponse, A as AISingleElementResponse, i as AISingleElementResponseById, n as AISectionParseResponse, o as AIAssertionResponse, F as PlanningAIResponse } from './types-20204347.js';
1
+ import { g as AIUsageInfo, B as BaseElement, U as UIContext, m as AIElementResponse, A as AISingleElementResponse, i as AISingleElementResponseById, n as AISectionParseResponse, o as AIAssertionResponse, F as PlanningAIResponse } from './types-55182ae1.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -14,7 +14,6 @@ declare enum AIActionType {
14
14
  declare function callAiFn<T>(options: {
15
15
  msgs: AIArgs;
16
16
  AIActionType: AIActionType;
17
- useModel?: 'openAI' | 'coze';
18
17
  }): Promise<{
19
18
  content: T;
20
19
  usage?: AIUsageInfo;
@@ -116,6 +115,6 @@ declare function plan(userPrompt: string, opts: {
116
115
  originalPrompt?: string;
117
116
  context: UIContext;
118
117
  callAI?: typeof callAiFn<PlanningAIResponse>;
119
- }, useModel?: 'coze' | 'openAI'): Promise<PlanningAIResponse>;
118
+ }): Promise<PlanningAIResponse>;
120
119
 
121
120
  export { AIActionType as A, retrieveSection as a, AiInspectElement as b, callAiFn as c, describeUserPage as d, AiExtractElementInfo as e, AiAssert as f, plan as p, retrieveElement as r, transformElementPositionToId as t };
@@ -1,8 +1,8 @@
1
- import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, A as AISingleElementResponse, f as InsightAssertionResponse } from './types-20204347.js';
2
- export { o as AIAssertionResponse, k as AIElementIdResponse, l as AIElementPositionResponse, m as AIElementResponse, h as AIResponseFormat, n as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, x as AgentAssertOpt, w as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, r as DumpMeta, v as ElementById, p as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, t as InsightDump, q as InsightExtractParam, L as LiteUISection, u as PartialInsightDumpFromSDK, F as PlanningAIResponse, z as PlanningAction, O as PlanningActionParamAssert, T as PlanningActionParamError, K as PlanningActionParamHover, M as PlanningActionParamInputOrKeyPress, H as PlanningActionParamPlan, N as PlanningActionParamScroll, Q as PlanningActionParamSleep, J as PlanningActionParamTap, V as PlanningActionParamWaitFor, G as PlanningFurtherPlan, y as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, s as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-20204347.js';
1
+ import { E as ExecutionTask, a as ExecutionTaskProgressOptions, b as ExecutionTaskApply, c as ExecutionDump, B as BaseElement, U as UIContext, I as InsightAction, D as DumpSubscriber, d as InsightOptions, e as InsightTaskInfo, A as AISingleElementResponse, f as InsightAssertionResponse } from './types-55182ae1.js';
2
+ export { o as AIAssertionResponse, k as AIElementIdResponse, l as AIElementPositionResponse, m as AIElementResponse, h as AIResponseFormat, n as AISectionParseResponse, i as AISingleElementResponseById, j as AISingleElementResponseByPosition, g as AIUsageInfo, x as AgentAssertOpt, w as AgentWaitForOpt, X as BaseAgentParserOpt, C as CallAIFn, W as Color, r as DumpMeta, v as ElementById, p as EnsureObject, _ as ExecutionRecorderItem, ag as ExecutionTaskAction, af as ExecutionTaskActionApply, ae as ExecutionTaskInsightAssertion, ad as ExecutionTaskInsightAssertionApply, ac as ExecutionTaskInsightAssertionParam, a5 as ExecutionTaskInsightDumpLog, a7 as ExecutionTaskInsightLocate, a6 as ExecutionTaskInsightLocateApply, a4 as ExecutionTaskInsightLocateOutput, a3 as ExecutionTaskInsightLocateParam, ab as ExecutionTaskInsightQuery, aa as ExecutionTaskInsightQueryApply, a9 as ExecutionTaskInsightQueryOutput, a8 as ExecutionTaskInsightQueryParam, ai as ExecutionTaskPlanning, ah as ExecutionTaskPlanningApply, a2 as ExecutionTaskReturn, $ as ExecutionTaskType, a0 as ExecutorContext, aj as GroupedActionDump, t as InsightDump, q as InsightExtractParam, L as LiteUISection, u as PartialInsightDumpFromSDK, F as PlanningAIResponse, z as PlanningAction, O as PlanningActionParamAssert, T as PlanningActionParamError, K as PlanningActionParamHover, M as PlanningActionParamInputOrKeyPress, H as PlanningActionParamPlan, N as PlanningActionParamScroll, Q as PlanningActionParamSleep, J as PlanningActionParamTap, V as PlanningActionParamWaitFor, G as PlanningFurtherPlan, y as PlanningLocateParam, Z as PlaywrightParserOpt, P as Point, Y as PuppeteerParserOpt, R as Rect, s as ReportDumpWithAttributes, S as Size, a1 as TaskCacheInfo } from './types-55182ae1.js';
3
3
  export { allAIConfig, getAIConfig, overrideAIConfig } from './env.js';
4
- import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-12fdcf10.js';
5
- export { p as plan, t as transformElementPositionToId } from './index-12fdcf10.js';
4
+ import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-43fd19f4.js';
5
+ export { p as plan, t as transformElementPositionToId } from './index-43fd19f4.js';
6
6
  export { getLogDirByType, getVersion, setLogDir } from './utils.js';
7
7
  import 'openai/resources';
8
8
 
@@ -176,7 +176,7 @@ interface PlanningLocateParam {
176
176
  }
177
177
  interface PlanningAction<ParamType = any> {
178
178
  thought?: string;
179
- type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'FalsyIfStatement' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
179
+ type: 'Locate' | 'Tap' | 'Hover' | 'Input' | 'KeyboardPress' | 'Scroll' | 'Error' | 'FalsyConditionStatement' | 'Assert' | 'AssertWithoutThrow' | 'Sleep';
180
180
  param: ParamType;
181
181
  locate: PlanningLocateParam | null;
182
182
  }
@@ -1,4 +1,4 @@
1
- import { s as ReportDumpWithAttributes, R as Rect } from './types-20204347.js';
1
+ import { s as ReportDumpWithAttributes, R as Rect } from './types-55182ae1.js';
2
2
  import 'openai/resources';
3
3
 
4
4
  declare const insightDumpFileExt = "insight-dump.json";
package/dist/lib/utils.js CHANGED
@@ -272,7 +272,7 @@ function stringifyDumpData(data, indents) {
272
272
  return JSON.stringify(data, replacerForPageObject, indents);
273
273
  }
274
274
  function getVersion() {
275
- return "0.8.7-beta-20241218070032.0";
275
+ return "0.8.7";
276
276
  }
277
277
  function debugLog(...message) {
278
278
  const debugMode = getAIConfig(MIDSCENE_DEBUG_MODE);
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
4
- "version": "0.8.7-beta-20241218070032.0",
4
+ "version": "0.8.7",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -39,7 +39,7 @@
39
39
  "openai": "4.57.1",
40
40
  "optional": "0.1.4",
41
41
  "socks-proxy-agent": "8.0.4",
42
- "@midscene/shared": "0.8.7-beta-20241218070032.0"
42
+ "@midscene/shared": "0.8.7"
43
43
  },
44
44
  "devDependencies": {
45
45
  "@modern-js/module-tools": "2.60.6",