@midscene/core 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/es/agent/agent.mjs +233 -144
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/index.mjs +3 -3
  6. package/dist/es/agent/task-builder.mjs +319 -0
  7. package/dist/es/agent/task-builder.mjs.map +1 -0
  8. package/dist/es/agent/task-cache.mjs +4 -4
  9. package/dist/es/agent/task-cache.mjs.map +1 -1
  10. package/dist/es/agent/tasks.mjs +197 -504
  11. package/dist/es/agent/tasks.mjs.map +1 -1
  12. package/dist/es/agent/ui-utils.mjs +54 -35
  13. package/dist/es/agent/ui-utils.mjs.map +1 -1
  14. package/dist/es/agent/utils.mjs +16 -58
  15. package/dist/es/agent/utils.mjs.map +1 -1
  16. package/dist/es/ai-model/conversation-history.mjs +25 -13
  17. package/dist/es/ai-model/conversation-history.mjs.map +1 -1
  18. package/dist/es/ai-model/index.mjs +4 -4
  19. package/dist/es/ai-model/inspect.mjs +45 -54
  20. package/dist/es/ai-model/inspect.mjs.map +1 -1
  21. package/dist/es/ai-model/llm-planning.mjs +47 -65
  22. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  23. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
  24. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  25. package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
  26. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  27. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
  28. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  29. package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
  30. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  31. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
  32. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  33. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  34. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  35. package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
  36. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  37. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
  38. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
  39. package/dist/es/ai-model/prompt/util.mjs +3 -88
  40. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  41. package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
  42. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  43. package/dist/es/ai-model/service-caller/index.mjs +182 -274
  44. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  45. package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
  46. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  47. package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
  48. package/dist/es/common.mjs.map +1 -0
  49. package/dist/es/device/device-options.mjs +0 -0
  50. package/dist/es/device/index.mjs +29 -12
  51. package/dist/es/device/index.mjs.map +1 -1
  52. package/dist/es/index.mjs +5 -4
  53. package/dist/es/index.mjs.map +1 -1
  54. package/dist/es/report.mjs.map +1 -1
  55. package/dist/es/{insight → service}/index.mjs +38 -51
  56. package/dist/es/service/index.mjs.map +1 -0
  57. package/dist/es/{insight → service}/utils.mjs +3 -3
  58. package/dist/es/service/utils.mjs.map +1 -0
  59. package/dist/es/task-runner.mjs +264 -0
  60. package/dist/es/task-runner.mjs.map +1 -0
  61. package/dist/es/tree.mjs +13 -2
  62. package/dist/es/tree.mjs.map +1 -0
  63. package/dist/es/types.mjs +18 -1
  64. package/dist/es/types.mjs.map +1 -1
  65. package/dist/es/utils.mjs +6 -7
  66. package/dist/es/utils.mjs.map +1 -1
  67. package/dist/es/yaml/builder.mjs.map +1 -1
  68. package/dist/es/yaml/player.mjs +121 -98
  69. package/dist/es/yaml/player.mjs.map +1 -1
  70. package/dist/es/yaml/utils.mjs +1 -1
  71. package/dist/es/yaml/utils.mjs.map +1 -1
  72. package/dist/lib/agent/agent.js +231 -142
  73. package/dist/lib/agent/agent.js.map +1 -1
  74. package/dist/lib/agent/common.js +1 -1
  75. package/dist/lib/agent/execution-session.js +75 -0
  76. package/dist/lib/agent/execution-session.js.map +1 -0
  77. package/dist/lib/agent/index.js +14 -14
  78. package/dist/lib/agent/index.js.map +1 -1
  79. package/dist/lib/agent/task-builder.js +356 -0
  80. package/dist/lib/agent/task-builder.js.map +1 -0
  81. package/dist/lib/agent/task-cache.js +8 -8
  82. package/dist/lib/agent/task-cache.js.map +1 -1
  83. package/dist/lib/agent/tasks.js +202 -506
  84. package/dist/lib/agent/tasks.js.map +1 -1
  85. package/dist/lib/agent/ui-utils.js +58 -36
  86. package/dist/lib/agent/ui-utils.js.map +1 -1
  87. package/dist/lib/agent/utils.js +26 -68
  88. package/dist/lib/agent/utils.js.map +1 -1
  89. package/dist/lib/ai-model/conversation-history.js +27 -15
  90. package/dist/lib/ai-model/conversation-history.js.map +1 -1
  91. package/dist/lib/ai-model/index.js +27 -27
  92. package/dist/lib/ai-model/index.js.map +1 -1
  93. package/dist/lib/ai-model/inspect.js +51 -57
  94. package/dist/lib/ai-model/inspect.js.map +1 -1
  95. package/dist/lib/ai-model/llm-planning.js +49 -67
  96. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  97. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  98. package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
  99. package/dist/lib/ai-model/prompt/common.js +2 -2
  100. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  101. package/dist/lib/ai-model/prompt/describe.js +2 -2
  102. package/dist/lib/ai-model/prompt/describe.js.map +1 -1
  103. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  104. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  105. package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
  106. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  107. package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
  108. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  109. package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
  110. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  111. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  112. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
  114. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  115. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  116. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
  117. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  118. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
  119. package/dist/lib/ai-model/prompt/util.js +7 -95
  120. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  121. package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
  122. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  123. package/dist/lib/ai-model/service-caller/index.js +288 -401
  124. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  125. package/dist/lib/ai-model/ui-tars-planning.js +71 -10
  126. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  127. package/dist/lib/{ai-model/common.js → common.js} +40 -55
  128. package/dist/lib/common.js.map +1 -0
  129. package/dist/lib/device/device-options.js +20 -0
  130. package/dist/lib/device/device-options.js.map +1 -0
  131. package/dist/lib/device/index.js +63 -40
  132. package/dist/lib/device/index.js.map +1 -1
  133. package/dist/lib/image/index.js +5 -5
  134. package/dist/lib/image/index.js.map +1 -1
  135. package/dist/lib/index.js +24 -20
  136. package/dist/lib/index.js.map +1 -1
  137. package/dist/lib/report.js +2 -2
  138. package/dist/lib/report.js.map +1 -1
  139. package/dist/lib/{insight → service}/index.js +41 -54
  140. package/dist/lib/service/index.js.map +1 -0
  141. package/dist/lib/{insight → service}/utils.js +7 -7
  142. package/dist/lib/service/utils.js.map +1 -0
  143. package/dist/lib/task-runner.js +301 -0
  144. package/dist/lib/task-runner.js.map +1 -0
  145. package/dist/lib/tree.js +13 -4
  146. package/dist/lib/tree.js.map +1 -1
  147. package/dist/lib/types.js +31 -12
  148. package/dist/lib/types.js.map +1 -1
  149. package/dist/lib/utils.js +16 -17
  150. package/dist/lib/utils.js.map +1 -1
  151. package/dist/lib/yaml/builder.js +2 -2
  152. package/dist/lib/yaml/builder.js.map +1 -1
  153. package/dist/lib/yaml/index.js +16 -22
  154. package/dist/lib/yaml/index.js.map +1 -1
  155. package/dist/lib/yaml/player.js +123 -100
  156. package/dist/lib/yaml/player.js.map +1 -1
  157. package/dist/lib/yaml/utils.js +6 -6
  158. package/dist/lib/yaml/utils.js.map +1 -1
  159. package/dist/lib/yaml.js +1 -1
  160. package/dist/lib/yaml.js.map +1 -1
  161. package/dist/types/agent/agent.d.ts +62 -17
  162. package/dist/types/agent/execution-session.d.ts +36 -0
  163. package/dist/types/agent/index.d.ts +3 -2
  164. package/dist/types/agent/task-builder.d.ts +35 -0
  165. package/dist/types/agent/tasks.d.ts +32 -23
  166. package/dist/types/agent/ui-utils.d.ts +9 -2
  167. package/dist/types/agent/utils.d.ts +9 -35
  168. package/dist/types/ai-model/conversation-history.d.ts +8 -4
  169. package/dist/types/ai-model/index.d.ts +5 -5
  170. package/dist/types/ai-model/inspect.d.ts +20 -12
  171. package/dist/types/ai-model/llm-planning.d.ts +3 -1
  172. package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
  173. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
  174. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
  175. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  176. package/dist/types/ai-model/prompt/util.d.ts +2 -34
  177. package/dist/types/ai-model/service-caller/index.d.ts +2 -3
  178. package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
  179. package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
  180. package/dist/types/device/device-options.d.ts +57 -0
  181. package/dist/types/device/index.d.ts +55 -39
  182. package/dist/types/index.d.ts +7 -6
  183. package/dist/types/service/index.d.ts +26 -0
  184. package/dist/types/service/utils.d.ts +2 -0
  185. package/dist/types/task-runner.d.ts +49 -0
  186. package/dist/types/tree.d.ts +4 -1
  187. package/dist/types/types.d.ts +103 -66
  188. package/dist/types/yaml/utils.d.ts +1 -1
  189. package/dist/types/yaml.d.ts +68 -43
  190. package/package.json +9 -12
  191. package/dist/es/ai-model/action-executor.mjs +0 -129
  192. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  193. package/dist/es/ai-model/common.mjs.map +0 -1
  194. package/dist/es/insight/index.mjs.map +0 -1
  195. package/dist/es/insight/utils.mjs.map +0 -1
  196. package/dist/lib/ai-model/action-executor.js +0 -163
  197. package/dist/lib/ai-model/action-executor.js.map +0 -1
  198. package/dist/lib/ai-model/common.js.map +0 -1
  199. package/dist/lib/insight/index.js.map +0 -1
  200. package/dist/lib/insight/utils.js.map +0 -1
  201. package/dist/types/ai-model/action-executor.d.ts +0 -19
  202. package/dist/types/insight/index.d.ts +0 -31
  203. package/dist/types/insight/utils.d.ts +0 -2
@@ -24,283 +24,56 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
- findElementPrompt: ()=>findElementPrompt,
28
- locatorSchema: ()=>locatorSchema,
29
- systemPromptToLocateElement: ()=>systemPromptToLocateElement
27
+ systemPromptToLocateElement: ()=>systemPromptToLocateElement,
28
+ findElementPrompt: ()=>findElementPrompt
30
29
  });
31
30
  const external_common_js_namespaceObject = require("./common.js");
32
31
  function systemPromptToLocateElement(vlMode) {
33
- if (vlMode) {
34
- const bboxComment = (0, external_common_js_namespaceObject.bboxDescription)(vlMode);
35
- return `
32
+ const bboxComment = (0, external_common_js_namespaceObject.bboxDescription)(vlMode);
33
+ return `
36
34
  ## Role:
37
- You are an expert in software testing.
35
+ You are an AI assistant that helps identify UI elements.
38
36
 
39
37
  ## Objective:
40
- - Identify elements in screenshots and text that match the user's description.
41
- - Give the coordinates of the element that matches the user's description best in the screenshot.
42
- - Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
38
+ - Identify elements in screenshots that match the user's description.
39
+ - Provide the coordinates of the element that matches the user's description.
43
40
 
44
41
  ## Output Format:
45
42
  \`\`\`json
46
43
  {
47
44
  "bbox": [number, number, number, number], // ${bboxComment}
48
- "errors"?: string[],
49
- "isOrderSensitive": boolean // Whether the targetElementDescription is order-sensitive (true/false)
45
+ "errors"?: string[]
50
46
  }
51
47
  \`\`\`
52
48
 
53
49
  Fields:
54
- * \`bbox\` is the bounding box of the element that matches the user's description best in the screenshot
55
- * \`isOrderSensitive\` is a boolean indicating whether the user's description is order-sensitive (true/false)
50
+ * \`bbox\` is the bounding box of the element that matches the user's description
56
51
  * \`errors\` is an optional array of error messages (if any)
57
52
 
58
- Order-sensitive means the description contains phrases like:
59
- - "the third item in the list"
60
- - "the last button"
61
- - "the first input box"
62
- - "the second row"
63
-
64
- Not order-sensitive means the description is like:
65
- - "confirm button"
66
- - "search box"
67
- - "password input"
68
-
69
- For example, when an element is found and the description is order-sensitive:
53
+ For example, when an element is found:
70
54
  \`\`\`json
71
55
  {
72
56
  "bbox": [100, 100, 200, 200],
73
- "isOrderSensitive": true,
74
57
  "errors": []
75
58
  }
76
59
  \`\`\`
77
60
 
78
- When no element is found and the description is not order-sensitive:
61
+ When no element is found:
79
62
  \`\`\`json
80
63
  {
81
64
  "bbox": [],
82
- "isOrderSensitive": false,
83
65
  "errors": ["I can see ..., but {some element} is not found"]
84
66
  }
85
67
  \`\`\`
86
68
  `;
87
- }
88
- return `
89
- ## Role:
90
- You are an expert in software page image (2D) and page element text analysis.
91
-
92
- ## Objective:
93
- - Identify elements in screenshots and text that match the user's description.
94
- - Return JSON data containing the selection reason and element ID.
95
- - Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
96
-
97
- ## Skills:
98
- - Image analysis and recognition
99
- - Multilingual text understanding
100
- - Software UI design and testing
101
-
102
- ## Workflow:
103
- 1. Receive the user's element description, screenshot, and element description information. Note that the text may contain non-English characters (e.g., Chinese), indicating that the application may be non-English.
104
- 2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot.
105
- 3. Found the required number of elements
106
- 4. Return JSON data containing the selection reason and element ID.
107
- 5. Judge whether the user's description is order-sensitive (see below for definition and examples).
108
-
109
- ## Constraints:
110
- - Strictly adhere to the specified location when describing the required element; do not select elements from other locations.
111
- - Elements in the image with NodeType other than "TEXT Node" have been highlighted to identify the element among multiple non-text elements.
112
- - Accurately identify element information based on the user's description and return the corresponding element ID from the element description information, not extracted from the image.
113
- - If no elements are found, the "elements" array should be empty.
114
- - The returned data must conform to the specified JSON format.
115
- - The returned value id information must use the id from element info (important: **use id not indexId, id is hash content**)
116
-
117
- ## Order-Sensitive Definition:
118
- - If the description contains phrases like "the third item in the list", "the last button", "the first input box", "the second row", etc., it is order-sensitive (isOrderSensitive = true).
119
- - If the description is like "confirm button", "search box", "password input", etc., it is not order-sensitive (isOrderSensitive = false).
120
-
121
- ## Output Format:
122
-
123
- Please return the result in JSON format as follows:
124
-
125
- \`\`\`json
126
- {
127
- "elements": [
128
- // If no matching elements are found, return an empty array []
129
- {
130
- "reason": "PLACEHOLDER", // The thought process for finding the element, replace PLACEHOLDER with your thought process
131
- "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
132
- "id": "PLACEHOLDER" // Replace PLACEHOLDER with the ID (important: **use id not indexId, id is hash content**) of elementInfo
133
- }
134
- // More elements...
135
- ],
136
- "isOrderSensitive": true, // or false, depending on the user's description
137
- "errors": [] // Array of strings containing any error messages
138
- }
139
- \`\`\`
140
-
141
- ## Example:
142
- Example 1:
143
- Input Example:
144
- \`\`\`json
145
- // Description: "Shopping cart icon in the upper right corner"
146
- {
147
- "description": "PLACEHOLDER", // Description of the target element
148
- "screenshot": "path/screenshot.png",
149
- "text": '{
150
- "pageSize": {
151
- "width": 400, // Width of the page
152
- "height": 905 // Height of the page
153
- },
154
- "elementInfos": [
155
- {
156
- "id": "1231", // ID of the element
157
- "indexId": "0", // Index of the element\u{FF0C}The image is labeled to the left of the element
158
- "attributes": { // Attributes of the element
159
- "nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
160
- "src": "https://ap-southeast-3.m",
161
- "class": ".img"
162
- },
163
- "content": "", // Text content of the element
164
- "rect": {
165
- "left": 280, // Distance from the left side of the page
166
- "top": 8, // Distance from the top of the page
167
- "width": 44, // Width of the element
168
- "height": 44 // Height of the element
169
- }
170
- },
171
- {
172
- "id": "66551", // ID of the element
173
- "indexId": "1", // Index of the element,The image is labeled to the left of the element
174
- "attributes": { // Attributes of the element
175
- "nodeType": "IMG Node", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node
176
- "src": "data:image/png;base64,iVBORw0KGgoAAAANSU...",
177
- "class": ".icon"
178
- },
179
- "content": "", // Text content of the element
180
- "rect": {
181
- "left": 350, // Distance from the left side of the page
182
- "top": 16, // Distance from the top of the page
183
- "width": 25, // Width of the element
184
- "height": 25 // Height of the element
185
- }
186
- },
187
- ...
188
- {
189
- "id": "12344",
190
- "indexId": "2", // Index of the element\u{FF0C}The image is labeled to the left of the element
191
- "attributes": {
192
- "nodeType": "TEXT Node",
193
- "class": ".product-name"
194
- },
195
- "center": [
196
- 288,
197
- 834
198
- ],
199
- "content": "Mango Drink",
200
- "rect": {
201
- "left": 188,
202
- "top": 827,
203
- "width": 199,
204
- "height": 13
205
- }
206
- },
207
- ...
208
- ]
209
- }
210
- '
211
69
  }
212
- \`\`\`
213
- Output Example:
214
- \`\`\`json
215
- {
216
- "elements": [
217
- {
218
- // Describe the reason for finding this element, replace with actual value in practice
219
- "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
220
- "text": "",
221
- // ID(**use id not indexId**) of this element, replace with actual value in practice, **use id not indexId**
222
- "id": "1231"
223
- }
224
- ],
225
- "isOrderSensitive": true,
226
- "errors": []
227
- }
228
- \`\`\`
229
-
230
- `;
231
- }
232
- const locatorSchema = {
233
- type: 'json_schema',
234
- json_schema: {
235
- name: 'find_elements',
236
- strict: true,
237
- schema: {
238
- type: 'object',
239
- properties: {
240
- elements: {
241
- type: 'array',
242
- items: {
243
- type: 'object',
244
- properties: {
245
- reason: {
246
- type: 'string',
247
- description: 'Reason for finding this element'
248
- },
249
- text: {
250
- type: 'string',
251
- description: 'Text content of the element'
252
- },
253
- id: {
254
- type: 'string',
255
- description: 'ID of this element'
256
- }
257
- },
258
- required: [
259
- 'reason',
260
- 'text',
261
- 'id'
262
- ],
263
- additionalProperties: false
264
- },
265
- description: 'List of found elements'
266
- },
267
- isOrderSensitive: {
268
- type: 'boolean',
269
- description: "Whether the targetElementDescription is order-sensitive (true/false)"
270
- },
271
- errors: {
272
- type: 'array',
273
- items: {
274
- type: 'string'
275
- },
276
- description: 'List of error messages, if any'
277
- }
278
- },
279
- required: [
280
- 'elements',
281
- 'isOrderSensitive',
282
- 'errors'
283
- ],
284
- additionalProperties: false
285
- }
286
- }
287
- };
288
- const findElementPrompt = ({ pageDescription, targetElementDescription })=>`
289
- Here is the item user want to find:
290
- =====================================
291
- ${targetElementDescription}
292
- =====================================
293
-
294
- ${pageDescription}
295
- `;
70
+ const findElementPrompt = (targetElementDescription)=>`Find: ${targetElementDescription}`;
296
71
  exports.findElementPrompt = __webpack_exports__.findElementPrompt;
297
- exports.locatorSchema = __webpack_exports__.locatorSchema;
298
72
  exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
299
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
73
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
300
74
  "findElementPrompt",
301
- "locatorSchema",
302
75
  "systemPromptToLocateElement"
303
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
76
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
304
77
  Object.defineProperty(exports, '__esModule', {
305
78
  value: true
306
79
  });
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-locator.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n if (vlMode) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an expert in software testing.\n\n## Objective:\n- Identify elements in screenshots and text that match the user's description.\n- Give the coordinates of the element that matches the user's description best in the screenshot.\n- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[],\n \"isOrderSensitive\": boolean // Whether the targetElementDescription is order-sensitive (true/false)\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description best in the screenshot\n* \\`isOrderSensitive\\` is a boolean indicating whether the user's description is order-sensitive (true/false)\n* \\`errors\\` is an optional array of error messages (if any)\n\nOrder-sensitive means the description contains phrases like:\n- \"the third item in the list\"\n- \"the last button\"\n- \"the first input box\"\n- \"the second row\"\n\nNot order-sensitive means the description is like:\n- \"confirm button\"\n- \"search box\"\n- \"password input\"\n\nFor example, when an element is found and the description is order-sensitive:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"isOrderSensitive\": true,\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found and the description is not order-sensitive:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"isOrderSensitive\": false,\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n }\n\n return `\n## Role:\nYou are an expert in software page image (2D) and page element text analysis.\n\n## Objective:\n- Identify elements in screenshots and text that match the user's description.\n- Return JSON data containing the selection reason and element ID.\n- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).\n\n## Skills:\n- Image analysis and recognition\n- Multilingual text understanding\n- Software UI design and testing\n\n## Workflow:\n1. Receive the user's element description, screenshot, and element description information. Note that the text may contain non-English characters (e.g., Chinese), indicating that the application may be non-English.\n2. Based on the user's description, locate the target element ID in the list of element descriptions and the screenshot.\n3. Found the required number of elements\n4. Return JSON data containing the selection reason and element ID.\n5. Judge whether the user's description is order-sensitive (see below for definition and examples).\n\n## Constraints:\n- Strictly adhere to the specified location when describing the required element; do not select elements from other locations.\n- Elements in the image with NodeType other than \"TEXT Node\" have been highlighted to identify the element among multiple non-text elements.\n- Accurately identify element information based on the user's description and return the corresponding element ID from the element description information, not extracted from the image.\n- If no elements are found, the \"elements\" array should be empty.\n- The returned data must conform to the specified JSON format.\n- The returned value id information must use the id from element info (important: **use id not indexId, id is hash content**)\n\n## Order-Sensitive Definition:\n- If the description contains phrases like \"the third item in the list\", \"the last button\", \"the first input box\", \"the second row\", etc., it is order-sensitive (isOrderSensitive = true).\n- If the description is like \"confirm button\", \"search box\", \"password input\", etc., it is not order-sensitive (isOrderSensitive = false).\n\n## Output Format:\n\nPlease return the result in JSON format as follows:\n\n\\`\\`\\`json\n{\n \"elements\": [\n // If no matching elements are found, return an empty array []\n {\n \"reason\": \"PLACEHOLDER\", // The thought process for finding the element, replace PLACEHOLDER with your thought process\n \"text\": \"PLACEHOLDER\", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty\n \"id\": \"PLACEHOLDER\" // Replace PLACEHOLDER with the ID (important: **use id not indexId, id is hash content**) of elementInfo\n }\n // More elements...\n ],\n \"isOrderSensitive\": true, // or false, depending on the user's description\n \"errors\": [] // Array of strings containing any error messages\n}\n\\`\\`\\`\n\n## Example:\nExample 1:\nInput Example:\n\\`\\`\\`json\n// Description: \"Shopping cart icon in the upper right corner\"\n{\n \"description\": \"PLACEHOLDER\", // Description of the target element\n \"screenshot\": \"path/screenshot.png\",\n \"text\": '{\n \"pageSize\": {\n \"width\": 400, // Width of the page\n \"height\": 905 // Height of the page\n },\n \"elementInfos\": [\n {\n \"id\": \"1231\", // ID of the element\n \"indexId\": \"0\", // Index of the element,The image is labeled to the left of the element\n \"attributes\": { // Attributes of the element\n \"nodeType\": \"IMG Node\", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node\n \"src\": \"https://ap-southeast-3.m\",\n \"class\": \".img\"\n },\n \"content\": \"\", // Text content of the element\n \"rect\": {\n \"left\": 280, // Distance from the left side of the page\n \"top\": 8, // Distance from the top of the page\n \"width\": 44, // Width of the element\n \"height\": 44 // Height of the element\n }\n },\n {\n \"id\": \"66551\", // ID of the element\n \"indexId\": \"1\", // Index of the element,The image is labeled to the left of the element\n \"attributes\": { // Attributes of the element\n \"nodeType\": \"IMG Node\", // Type of element, types include: TEXT Node, IMG Node, BUTTON Node, INPUT Node\n \"src\": \"data:image/png;base64,iVBORw0KGgoAAAANSU...\",\n \"class\": \".icon\"\n },\n \"content\": \"\", // Text content of the element\n \"rect\": {\n \"left\": 350, // Distance from the left side of the page\n \"top\": 16, // Distance from the top of the page\n \"width\": 25, // Width of the element\n \"height\": 25 // Height of the element\n }\n },\n ...\n {\n \"id\": \"12344\",\n \"indexId\": \"2\", // Index of the element,The image is labeled to the left of the element\n \"attributes\": {\n \"nodeType\": \"TEXT Node\",\n \"class\": \".product-name\"\n },\n \"center\": [\n 288,\n 834\n ],\n \"content\": \"Mango Drink\",\n \"rect\": {\n \"left\": 188,\n \"top\": 827,\n \"width\": 199,\n \"height\": 13\n }\n },\n ...\n ]\n }\n '\n}\n\\`\\`\\`\nOutput Example:\n\\`\\`\\`json\n{\n \"elements\": [\n {\n // Describe the reason for finding this element, replace with actual value in practice\n \"reason\": \"Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button\",\n \"text\": \"\",\n // ID(**use id not indexId**) of this element, replace with actual value in practice, **use id not indexId**\n \"id\": \"1231\"\n }\n ],\n \"isOrderSensitive\": true,\n \"errors\": []\n}\n\\`\\`\\`\n \n `;\n}\n\nexport const locatorSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'find_elements',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n elements: {\n type: 'array',\n items: {\n type: 'object',\n properties: {\n reason: {\n type: 'string',\n description: 'Reason for finding this element',\n },\n text: {\n type: 'string',\n description: 'Text content of the element',\n },\n id: {\n type: 'string',\n description: 'ID of this element',\n },\n },\n required: ['reason', 'text', 'id'],\n additionalProperties: false,\n },\n description: 'List of found elements',\n },\n isOrderSensitive: {\n type: 'boolean',\n description:\n 'Whether the targetElementDescription is order-sensitive (true/false)',\n },\n errors: {\n type: 'array',\n items: {\n type: 'string',\n },\n description: 'List of error messages, if any',\n },\n },\n required: ['elements', 'isOrderSensitive', 'errors'],\n additionalProperties: false,\n },\n },\n};\n\nexport const findElementPrompt = ({\n pageDescription,\n targetElementDescription,\n}: {\n pageDescription: string;\n targetElementDescription: string;\n}) => `\nHere is the item user want to find:\n=====================================\n${targetElementDescription}\n=====================================\n\n${pageDescription}\n `;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","locatorSchema","findElementPrompt","pageDescription","targetElementDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACHO,SAASI,4BAA4BC,MAAgC;IAC1E,IAAIA,QAAQ;QACV,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBF;QACpC,OAAO,CAAC;;;;;;;;;;;;gDAYoC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuC9D,CAAC;IACC;IAEA,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;EA8IR,CAAC;AACH;AAEO,MAAME,gBAA0C;IACrD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,UAAU;oBACR,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,YAAY;4BACV,QAAQ;gCACN,MAAM;gCACN,aAAa;4BACf;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,IAAI;gCACF,MAAM;gCACN,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAU;4BAAQ;yBAAK;wBAClC,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,kBAAkB;oBAChB,MAAM;oBACN,aACE;gBACJ;gBACA,QAAQ;oBACN,MAAM;oBACN,OAAO;wBACL,MAAM;oBACR;oBACA,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAY;gBAAoB;aAAS;YACpD,sBAAsB;QACxB;IACF;AACF;AAEO,MAAMC,oBAAoB,CAAC,EAChCC,eAAe,EACfC,wBAAwB,EAIzB,GAAK,CAAC;;;AAGP,EAAEA,yBAAyB;;;AAG3B,EAAED,gBAAgB;EAChB,CAAC"}
1
+ {"version":3,"file":"ai-model/prompt/llm-locator.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;ACJO,SAASI,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}