@midscene/core 0.6.2 → 0.6.3-beta-20241017035917.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4587,16 +4587,16 @@ Remember:
4587
4587
 
4588
4588
  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4589
4589
 
4590
- ## Objective 2 (sub objective): Give a quick answer to the action with type "Locate" you just planned
4590
+ ## Objective 2 (sub objective, only for action with type "Locate"): Give a quick answer to the action with type "Locate" you just planned, append a \`quickAnswer\` field after the \`param\` field
4591
4591
 
4592
- Review the action you just planned. If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4592
+ If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4593
4593
  {
4594
4594
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
4595
4595
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4596
4596
  "id": "wefew2222few2" // id of this element, replace with actual value in practice
4597
4597
  }
4598
4598
 
4599
- If the action type is not 'Locate', or there is no element meets the description in the prompt (usually because it will show up after some interaction), the answer should be null.
4599
+ If there is no element meets the description in the prompt (usually because it will show up later after some interaction), the \`quickAnswer\` field should be null.
4600
4600
 
4601
4601
  ## Output JSON Format:
4602
4602
 
@@ -4610,7 +4610,7 @@ Please return the result in JSON format as follows:
4610
4610
  "param": {
4611
4611
  "prompt": "The search bar"
4612
4612
  },
4613
- "quickAnswer": { // since the first action is Locate, so we need to give a quick answer
4613
+ "quickAnswer": { // since this action type is 'Locate', and we can find the element, so we need to give a quick answer
4614
4614
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar",
4615
4615
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4616
4616
  "id": "wefew2222few2" // ID of this element, replace with actual value in practice
@@ -4621,6 +4621,14 @@ Please return the result in JSON format as follows:
4621
4621
  "type": "Tap", // Type of action, like 'Tap' 'Hover' ...
4622
4622
  "param": any, // Parameter towards the task type
4623
4623
  },
4624
+ {
4625
+ "thought": "Reasons for generating this task, and why this task is feasible on this page",
4626
+ "type": "Locate", // Type of action, like 'Tap' 'Hover' ...
4627
+ "param": {
4628
+ "prompt": "The search bar"
4629
+ },
4630
+ "quickAnswer": null,
4631
+ },
4624
4632
  // ... more actions
4625
4633
  ],
4626
4634
  error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here,
@@ -4654,7 +4662,7 @@ var planSchema = {
4654
4662
  },
4655
4663
  param: {
4656
4664
  type: ["object", "null"],
4657
- description: "Parameter towards the task type, can be null"
4665
+ description: "Parameter towards the task type, can be null only when the type field is Tap or Hover"
4658
4666
  },
4659
4667
  quickAnswer: {
4660
4668
  type: ["object", "null"],
@@ -4948,7 +4956,7 @@ import assert4 from "assert";
4948
4956
  async function AiInspectElement(options) {
4949
4957
  var _a;
4950
4958
  const { context, multi, targetElementDescription, callAI, useModel } = options;
4951
- const { screenshotBase64 } = context;
4959
+ const { screenshotBase64, screenshotBase64WithElementMarker } = context;
4952
4960
  const { description, elementById } = await describeUserPage(context);
4953
4961
  if (((_a = options.quickAnswer) == null ? void 0 : _a.id) && elementById(options.quickAnswer.id)) {
4954
4962
  return {
@@ -4967,7 +4975,7 @@ async function AiInspectElement(options) {
4967
4975
  {
4968
4976
  type: "image_url",
4969
4977
  image_url: {
4970
- url: screenshotBase64
4978
+ url: screenshotBase64WithElementMarker || screenshotBase64
4971
4979
  }
4972
4980
  },
4973
4981
  {
package/dist/es/index.js CHANGED
@@ -4752,16 +4752,16 @@ Remember:
4752
4752
 
4753
4753
  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4754
4754
 
4755
- ## Objective 2 (sub objective): Give a quick answer to the action with type "Locate" you just planned
4755
+ ## Objective 2 (sub objective, only for action with type "Locate"): Give a quick answer to the action with type "Locate" you just planned, append a \`quickAnswer\` field after the \`param\` field
4756
4756
 
4757
- Review the action you just planned. If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4757
+ If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4758
4758
  {
4759
4759
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
4760
4760
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4761
4761
  "id": "wefew2222few2" // id of this element, replace with actual value in practice
4762
4762
  }
4763
4763
 
4764
- If the action type is not 'Locate', or there is no element meets the description in the prompt (usually because it will show up after some interaction), the answer should be null.
4764
+ If there is no element meets the description in the prompt (usually because it will show up later after some interaction), the \`quickAnswer\` field should be null.
4765
4765
 
4766
4766
  ## Output JSON Format:
4767
4767
 
@@ -4775,7 +4775,7 @@ Please return the result in JSON format as follows:
4775
4775
  "param": {
4776
4776
  "prompt": "The search bar"
4777
4777
  },
4778
- "quickAnswer": { // since the first action is Locate, so we need to give a quick answer
4778
+ "quickAnswer": { // since this action type is 'Locate', and we can find the element, so we need to give a quick answer
4779
4779
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar",
4780
4780
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4781
4781
  "id": "wefew2222few2" // ID of this element, replace with actual value in practice
@@ -4786,6 +4786,14 @@ Please return the result in JSON format as follows:
4786
4786
  "type": "Tap", // Type of action, like 'Tap' 'Hover' ...
4787
4787
  "param": any, // Parameter towards the task type
4788
4788
  },
4789
+ {
4790
+ "thought": "Reasons for generating this task, and why this task is feasible on this page",
4791
+ "type": "Locate", // Type of action, like 'Tap' 'Hover' ...
4792
+ "param": {
4793
+ "prompt": "The search bar"
4794
+ },
4795
+ "quickAnswer": null,
4796
+ },
4789
4797
  // ... more actions
4790
4798
  ],
4791
4799
  error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here,
@@ -4819,7 +4827,7 @@ var planSchema = {
4819
4827
  },
4820
4828
  param: {
4821
4829
  type: ["object", "null"],
4822
- description: "Parameter towards the task type, can be null"
4830
+ description: "Parameter towards the task type, can be null only when the type field is Tap or Hover"
4823
4831
  },
4824
4832
  quickAnswer: {
4825
4833
  type: ["object", "null"],
@@ -5150,7 +5158,7 @@ import assert5 from "assert";
5150
5158
  async function AiInspectElement(options) {
5151
5159
  var _a;
5152
5160
  const { context, multi, targetElementDescription, callAI: callAI2, useModel } = options;
5153
- const { screenshotBase64 } = context;
5161
+ const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5154
5162
  const { description, elementById } = await describeUserPage(context);
5155
5163
  if (((_a = options.quickAnswer) == null ? void 0 : _a.id) && elementById(options.quickAnswer.id)) {
5156
5164
  return {
@@ -5169,7 +5177,7 @@ async function AiInspectElement(options) {
5169
5177
  {
5170
5178
  type: "image_url",
5171
5179
  image_url: {
5172
- url: screenshotBase64
5180
+ url: screenshotBase64WithElementMarker || screenshotBase64
5173
5181
  }
5174
5182
  },
5175
5183
  {
@@ -5399,7 +5407,9 @@ function writeDumpReport(fileName, dumpData) {
5399
5407
  const attributesArr = Object.keys(attributes || {}).map((key) => {
5400
5408
  return `${key}="${encodeURIComponent(attributes[key])}"`;
5401
5409
  });
5402
- return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>${dumpString}</script>`;
5410
+ return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>
5411
+ ${dumpString}
5412
+ </script>`;
5403
5413
  });
5404
5414
  reportContent = tpl.replace("{{dump}}", dumps.join("\n"));
5405
5415
  }
package/dist/es/utils.js CHANGED
@@ -45,7 +45,9 @@ function writeDumpReport(fileName, dumpData) {
45
45
  const attributesArr = Object.keys(attributes || {}).map((key) => {
46
46
  return `${key}="${encodeURIComponent(attributes[key])}"`;
47
47
  });
48
- return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>${dumpString}</script>`;
48
+ return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>
49
+ ${dumpString}
50
+ </script>`;
49
51
  });
50
52
  reportContent = tpl.replace("{{dump}}", dumps.join("\n"));
51
53
  }
@@ -4607,16 +4607,16 @@ Remember:
4607
4607
 
4608
4608
  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4609
4609
 
4610
- ## Objective 2 (sub objective): Give a quick answer to the action with type "Locate" you just planned
4610
+ ## Objective 2 (sub objective, only for action with type "Locate"): Give a quick answer to the action with type "Locate" you just planned, append a \`quickAnswer\` field after the \`param\` field
4611
4611
 
4612
- Review the action you just planned. If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4612
+ If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4613
4613
  {
4614
4614
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
4615
4615
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4616
4616
  "id": "wefew2222few2" // id of this element, replace with actual value in practice
4617
4617
  }
4618
4618
 
4619
- If the action type is not 'Locate', or there is no element meets the description in the prompt (usually because it will show up after some interaction), the answer should be null.
4619
+ If there is no element meets the description in the prompt (usually because it will show up later after some interaction), the \`quickAnswer\` field should be null.
4620
4620
 
4621
4621
  ## Output JSON Format:
4622
4622
 
@@ -4630,7 +4630,7 @@ Please return the result in JSON format as follows:
4630
4630
  "param": {
4631
4631
  "prompt": "The search bar"
4632
4632
  },
4633
- "quickAnswer": { // since the first action is Locate, so we need to give a quick answer
4633
+ "quickAnswer": { // since this action type is 'Locate', and we can find the element, so we need to give a quick answer
4634
4634
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar",
4635
4635
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4636
4636
  "id": "wefew2222few2" // ID of this element, replace with actual value in practice
@@ -4641,6 +4641,14 @@ Please return the result in JSON format as follows:
4641
4641
  "type": "Tap", // Type of action, like 'Tap' 'Hover' ...
4642
4642
  "param": any, // Parameter towards the task type
4643
4643
  },
4644
+ {
4645
+ "thought": "Reasons for generating this task, and why this task is feasible on this page",
4646
+ "type": "Locate", // Type of action, like 'Tap' 'Hover' ...
4647
+ "param": {
4648
+ "prompt": "The search bar"
4649
+ },
4650
+ "quickAnswer": null,
4651
+ },
4644
4652
  // ... more actions
4645
4653
  ],
4646
4654
  error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here,
@@ -4674,7 +4682,7 @@ var planSchema = {
4674
4682
  },
4675
4683
  param: {
4676
4684
  type: ["object", "null"],
4677
- description: "Parameter towards the task type, can be null"
4685
+ description: "Parameter towards the task type, can be null only when the type field is Tap or Hover"
4678
4686
  },
4679
4687
  quickAnswer: {
4680
4688
  type: ["object", "null"],
@@ -4960,7 +4968,7 @@ var import_node_assert4 = __toESM(require("assert"));
4960
4968
  async function AiInspectElement(options) {
4961
4969
  var _a;
4962
4970
  const { context, multi, targetElementDescription, callAI, useModel } = options;
4963
- const { screenshotBase64 } = context;
4971
+ const { screenshotBase64, screenshotBase64WithElementMarker } = context;
4964
4972
  const { description, elementById } = await describeUserPage(context);
4965
4973
  if (((_a = options.quickAnswer) == null ? void 0 : _a.id) && elementById(options.quickAnswer.id)) {
4966
4974
  return {
@@ -4979,7 +4987,7 @@ async function AiInspectElement(options) {
4979
4987
  {
4980
4988
  type: "image_url",
4981
4989
  image_url: {
4982
- url: screenshotBase64
4990
+ url: screenshotBase64WithElementMarker || screenshotBase64
4983
4991
  }
4984
4992
  },
4985
4993
  {
package/dist/lib/index.js CHANGED
@@ -4774,16 +4774,16 @@ Remember:
4774
4774
 
4775
4775
  If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
4776
4776
 
4777
- ## Objective 2 (sub objective): Give a quick answer to the action with type "Locate" you just planned
4777
+ ## Objective 2 (sub objective, only for action with type "Locate"): Give a quick answer to the action with type "Locate" you just planned, append a \`quickAnswer\` field after the \`param\` field
4778
4778
 
4779
- Review the action you just planned. If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4779
+ If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
4780
4780
  {
4781
4781
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
4782
4782
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4783
4783
  "id": "wefew2222few2" // id of this element, replace with actual value in practice
4784
4784
  }
4785
4785
 
4786
- If the action type is not 'Locate', or there is no element meets the description in the prompt (usually because it will show up after some interaction), the answer should be null.
4786
+ If there is no element meets the description in the prompt (usually because it will show up later after some interaction), the \`quickAnswer\` field should be null.
4787
4787
 
4788
4788
  ## Output JSON Format:
4789
4789
 
@@ -4797,7 +4797,7 @@ Please return the result in JSON format as follows:
4797
4797
  "param": {
4798
4798
  "prompt": "The search bar"
4799
4799
  },
4800
- "quickAnswer": { // since the first action is Locate, so we need to give a quick answer
4800
+ "quickAnswer": { // since this action type is 'Locate', and we can find the element, so we need to give a quick answer
4801
4801
  "reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar",
4802
4802
  "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
4803
4803
  "id": "wefew2222few2" // ID of this element, replace with actual value in practice
@@ -4808,6 +4808,14 @@ Please return the result in JSON format as follows:
4808
4808
  "type": "Tap", // Type of action, like 'Tap' 'Hover' ...
4809
4809
  "param": any, // Parameter towards the task type
4810
4810
  },
4811
+ {
4812
+ "thought": "Reasons for generating this task, and why this task is feasible on this page",
4813
+ "type": "Locate", // Type of action, like 'Tap' 'Hover' ...
4814
+ "param": {
4815
+ "prompt": "The search bar"
4816
+ },
4817
+ "quickAnswer": null,
4818
+ },
4811
4819
  // ... more actions
4812
4820
  ],
4813
4821
  error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here,
@@ -4841,7 +4849,7 @@ var planSchema = {
4841
4849
  },
4842
4850
  param: {
4843
4851
  type: ["object", "null"],
4844
- description: "Parameter towards the task type, can be null"
4852
+ description: "Parameter towards the task type, can be null only when the type field is Tap or Hover"
4845
4853
  },
4846
4854
  quickAnswer: {
4847
4855
  type: ["object", "null"],
@@ -5164,7 +5172,7 @@ var import_node_assert5 = __toESM(require("assert"));
5164
5172
  async function AiInspectElement(options) {
5165
5173
  var _a;
5166
5174
  const { context, multi, targetElementDescription, callAI: callAI2, useModel } = options;
5167
- const { screenshotBase64 } = context;
5175
+ const { screenshotBase64, screenshotBase64WithElementMarker } = context;
5168
5176
  const { description, elementById } = await describeUserPage(context);
5169
5177
  if (((_a = options.quickAnswer) == null ? void 0 : _a.id) && elementById(options.quickAnswer.id)) {
5170
5178
  return {
@@ -5183,7 +5191,7 @@ async function AiInspectElement(options) {
5183
5191
  {
5184
5192
  type: "image_url",
5185
5193
  image_url: {
5186
- url: screenshotBase64
5194
+ url: screenshotBase64WithElementMarker || screenshotBase64
5187
5195
  }
5188
5196
  },
5189
5197
  {
@@ -5413,7 +5421,9 @@ function writeDumpReport(fileName, dumpData) {
5413
5421
  const attributesArr = Object.keys(attributes || {}).map((key) => {
5414
5422
  return `${key}="${encodeURIComponent(attributes[key])}"`;
5415
5423
  });
5416
- return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>${dumpString}</script>`;
5424
+ return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>
5425
+ ${dumpString}
5426
+ </script>`;
5417
5427
  });
5418
5428
  reportContent = tpl.replace("{{dump}}", dumps.join("\n"));
5419
5429
  }
package/dist/lib/utils.js CHANGED
@@ -91,7 +91,9 @@ function writeDumpReport(fileName, dumpData) {
91
91
  const attributesArr = Object.keys(attributes || {}).map((key) => {
92
92
  return `${key}="${encodeURIComponent(attributes[key])}"`;
93
93
  });
94
- return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>${dumpString}</script>`;
94
+ return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(" ")}>
95
+ ${dumpString}
96
+ </script>`;
95
97
  });
96
98
  reportContent = tpl.replace("{{dump}}", dumps.join("\n"));
97
99
  }
@@ -1,8 +1,8 @@
1
1
  import { ChatCompletionMessageParam } from 'openai/resources';
2
2
  export { ChatCompletionMessageParam } from 'openai/resources';
3
- import { c as callAiFn, A as AIActionType } from './index-52ae633b.js';
4
- export { d as describeUserPage, p as plan } from './index-52ae633b.js';
5
- import { B as BaseElement, U as UIContext, A as AISingleElementResponse, b as AIElementParseResponse, c as AISectionParseResponse, d as AIAssertionResponse } from './types-57e850bf.js';
3
+ import { c as callAiFn, A as AIActionType } from './index-1fd0dc54.js';
4
+ export { d as describeUserPage, p as plan } from './index-1fd0dc54.js';
5
+ import { B as BaseElement, U as UIContext, A as AISingleElementResponse, b as AIElementParseResponse, c as AISectionParseResponse, d as AIAssertionResponse } from './types-25ace486.js';
6
6
 
7
7
  declare function AiInspectElement<ElementType extends BaseElement = BaseElement>(options: {
8
8
  context: UIContext<ElementType>;
@@ -1,4 +1,4 @@
1
- import { B as BaseElement, U as UIContext, P as PlanningAction } from './types-57e850bf.js';
1
+ import { B as BaseElement, U as UIContext, P as PlanningAction } from './types-25ace486.js';
2
2
  import { ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from 'openai/resources';
3
3
 
4
4
  type AIArgs = [
@@ -1,7 +1,7 @@
1
- import { E as ExecutionTask, e as ExecutionTaskApply, f as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, g as InsightOptions, A as AISingleElementResponse, h as InsightAssertionResponse } from './types-57e850bf.js';
2
- export { d as AIAssertionResponse, b as AIElementParseResponse, j as AIResponseFormat, c as AISectionParseResponse, s as AgentWaitForOpt, J as BaseAgentParserOpt, m as BasicSectionQuery, C as CallAIFn, H as Color, o as DumpMeta, r as ElementById, l as EnsureObject, N as ExecutionRecorderItem, a7 as ExecutionTaskAction, a6 as ExecutionTaskActionApply, a5 as ExecutionTaskInsightAssertion, a4 as ExecutionTaskInsightAssertionApply, a3 as ExecutionTaskInsightAssertionParam, Y as ExecutionTaskInsightDumpLog, _ as ExecutionTaskInsightLocate, Z as ExecutionTaskInsightLocateApply, X as ExecutionTaskInsightLocateOutput, W as ExecutionTaskInsightLocateParam, a2 as ExecutionTaskInsightQuery, a1 as ExecutionTaskInsightQueryApply, a0 as ExecutionTaskInsightQueryOutput, $ as ExecutionTaskInsightQueryParam, a9 as ExecutionTaskPlanning, a8 as ExecutionTaskPlanningApply, V as ExecutionTaskReturn, O as ExecutionTaskType, Q as ExecutorContext, aa as GroupedActionDump, p as InsightDump, n as InsightExtractParam, L as LiteUISection, q as PartialInsightDumpFromSDK, t as PlanningAIResponse, P as PlanningAction, y as PlanningActionParamAssert, F as PlanningActionParamError, v as PlanningActionParamHover, w as PlanningActionParamInputOrKeyPress, x as PlanningActionParamScroll, z as PlanningActionParamSleep, u as PlanningActionParamTap, G as PlanningActionParamWaitFor, M as PlaywrightParserOpt, i as Point, K as PuppeteerParserOpt, a as Rect, R as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, k as UISection } from './types-57e850bf.js';
3
- import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-52ae633b.js';
4
- export { p as plan } from './index-52ae633b.js';
1
+ import { E as ExecutionTask, e as ExecutionTaskApply, f as ExecutionDump, B as BaseElement, U as UIContext, D as DumpSubscriber, I as InsightTaskInfo, g as InsightOptions, A as AISingleElementResponse, h as InsightAssertionResponse } from './types-25ace486.js';
2
+ export { d as AIAssertionResponse, b as AIElementParseResponse, j as AIResponseFormat, c as AISectionParseResponse, s as AgentWaitForOpt, J as BaseAgentParserOpt, m as BasicSectionQuery, C as CallAIFn, H as Color, o as DumpMeta, r as ElementById, l as EnsureObject, N as ExecutionRecorderItem, a7 as ExecutionTaskAction, a6 as ExecutionTaskActionApply, a5 as ExecutionTaskInsightAssertion, a4 as ExecutionTaskInsightAssertionApply, a3 as ExecutionTaskInsightAssertionParam, Y as ExecutionTaskInsightDumpLog, _ as ExecutionTaskInsightLocate, Z as ExecutionTaskInsightLocateApply, X as ExecutionTaskInsightLocateOutput, W as ExecutionTaskInsightLocateParam, a2 as ExecutionTaskInsightQuery, a1 as ExecutionTaskInsightQueryApply, a0 as ExecutionTaskInsightQueryOutput, $ as ExecutionTaskInsightQueryParam, a9 as ExecutionTaskPlanning, a8 as ExecutionTaskPlanningApply, V as ExecutionTaskReturn, O as ExecutionTaskType, Q as ExecutorContext, aa as GroupedActionDump, p as InsightDump, n as InsightExtractParam, L as LiteUISection, q as PartialInsightDumpFromSDK, t as PlanningAIResponse, P as PlanningAction, y as PlanningActionParamAssert, F as PlanningActionParamError, v as PlanningActionParamHover, w as PlanningActionParamInputOrKeyPress, x as PlanningActionParamScroll, z as PlanningActionParamSleep, u as PlanningActionParamTap, G as PlanningActionParamWaitFor, M as PlaywrightParserOpt, i as Point, K as PuppeteerParserOpt, a as Rect, R as ReportDumpWithAttributes, S as Size, T as TaskCacheInfo, k as UISection } from './types-25ace486.js';
3
+ import { c as callAiFn, r as retrieveElement, a as retrieveSection } from './index-1fd0dc54.js';
4
+ export { p as plan } from './index-1fd0dc54.js';
5
5
  export { setLogDir } from './utils.js';
6
6
  import 'openai/resources';
7
7
 
@@ -58,6 +58,7 @@ interface AIAssertionResponse {
58
58
  */
59
59
  declare abstract class UIContext<ElementType extends BaseElement = BaseElement> {
60
60
  abstract screenshotBase64: string;
61
+ abstract screenshotBase64WithElementMarker?: string;
61
62
  abstract content: ElementType[];
62
63
  abstract size: Size;
63
64
  }
@@ -1,4 +1,4 @@
1
- import { R as ReportDumpWithAttributes, a as Rect } from './types-57e850bf.js';
1
+ import { R as ReportDumpWithAttributes, a as Rect } from './types-25ace486.js';
2
2
  import 'openai/resources';
3
3
 
4
4
  declare const insightDumpFileExt = "insight-dump.json";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@midscene/core",
3
3
  "description": "An AI-powered automation SDK can control the page, perform assertions, and extract data in JSON format using natural language. See https://midscenejs.com/ for details.",
4
- "version": "0.6.2",
4
+ "version": "0.6.3-beta-20241017035917.0",
5
5
  "repository": "https://github.com/web-infra-dev/midscene",
6
6
  "homepage": "https://midscenejs.com/",
7
7
  "jsnext:source": "./src/index.ts",
@@ -40,7 +40,7 @@
40
40
  "node-fetch": "2.6.7",
41
41
  "openai": "4.57.1",
42
42
  "optional": "0.1.4",
43
- "@midscene/shared": "0.6.2"
43
+ "@midscene/shared": "0.6.3-beta-20241017035917.0"
44
44
  },
45
45
  "devDependencies": {
46
46
  "@modern-js/module-tools": "2.58.2",