@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,268 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ parseXMLPlanningResponse: ()=>parseXMLPlanningResponse,
28
+ plan: ()=>plan
29
+ });
30
+ const img_namespaceObject = require("@godscene/shared/img");
31
+ const logger_namespaceObject = require("@godscene/shared/logger");
32
+ const utils_namespaceObject = require("@godscene/shared/utils");
33
+ const external_common_js_namespaceObject = require("../common.js");
34
+ const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
35
+ const util_js_namespaceObject = require("./prompt/util.js");
36
+ const index_js_namespaceObject = require("./service-caller/index.js");
37
+ const debug = (0, logger_namespaceObject.getDebug)('planning');
38
+ const warnLog = (0, logger_namespaceObject.getDebug)('planning', {
39
+ console: true
40
+ });
41
+ function parseXMLPlanningResponse(xmlString, modelFamily) {
42
+ const thought = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'thought');
43
+ const memory = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'memory');
44
+ const log = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'log') || '';
45
+ const error = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'error');
46
+ const actionType = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'action-type');
47
+ const actionParamStr = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'action-param-json');
48
+ const completeGoalRegex = /<complete\s+success="(true|false)">([\s\S]*?)<\/complete>/i;
49
+ const completeGoalMatch = xmlString.match(completeGoalRegex);
50
+ let finalizeMessage;
51
+ let finalizeSuccess;
52
+ if (completeGoalMatch) {
53
+ finalizeSuccess = 'true' === completeGoalMatch[1];
54
+ finalizeMessage = completeGoalMatch[2]?.trim() || void 0;
55
+ }
56
+ const updatePlanContent = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'update-plan-content');
57
+ const markSubGoalDone = (0, util_js_namespaceObject.extractXMLTag)(xmlString, 'mark-sub-goal-done');
58
+ const updateSubGoals = updatePlanContent ? (0, util_js_namespaceObject.parseSubGoalsFromXML)(updatePlanContent) : void 0;
59
+ const markFinishedIndexes = markSubGoalDone ? (0, util_js_namespaceObject.parseMarkFinishedIndexes)(markSubGoalDone) : void 0;
60
+ let action = null;
61
+ if (actionType && 'null' !== actionType.toLowerCase()) {
62
+ const type = actionType.split('<')[0].trim();
63
+ let param;
64
+ if (actionParamStr) try {
65
+ param = (0, index_js_namespaceObject.safeParseJson)(actionParamStr, modelFamily);
66
+ } catch (e) {
67
+ throw new Error(`Failed to parse action-param-json: ${e}`);
68
+ }
69
+ action = {
70
+ type,
71
+ ...void 0 !== param ? {
72
+ param
73
+ } : {}
74
+ };
75
+ }
76
+ return {
77
+ ...thought ? {
78
+ thought
79
+ } : {},
80
+ ...memory ? {
81
+ memory
82
+ } : {},
83
+ log,
84
+ ...error ? {
85
+ error
86
+ } : {},
87
+ action,
88
+ ...void 0 !== finalizeMessage ? {
89
+ finalizeMessage
90
+ } : {},
91
+ ...void 0 !== finalizeSuccess ? {
92
+ finalizeSuccess
93
+ } : {},
94
+ ...updateSubGoals?.length ? {
95
+ updateSubGoals
96
+ } : {},
97
+ ...markFinishedIndexes?.length ? {
98
+ markFinishedIndexes
99
+ } : {}
100
+ };
101
+ }
102
+ async function plan(userInstruction, opts) {
103
+ const { context, modelConfig, conversationHistory } = opts;
104
+ const { shotSize } = context;
105
+ const screenshotBase64 = context.screenshot.base64;
106
+ const { modelFamily } = modelConfig;
107
+ const includeSubGoals = true === opts.deepThink;
108
+ const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
109
+ actionSpace: opts.actionSpace,
110
+ modelFamily,
111
+ includeBbox: opts.includeBbox,
112
+ includeThought: true,
113
+ includeSubGoals
114
+ });
115
+ let imagePayload = screenshotBase64;
116
+ let imageWidth = shotSize.width;
117
+ let imageHeight = shotSize.height;
118
+ if ('qwen2.5-vl' === modelFamily) {
119
+ const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
120
+ imageWidth = paddedResult.width;
121
+ imageHeight = paddedResult.height;
122
+ imagePayload = paddedResult.imageBase64;
123
+ }
124
+ const actionContext = opts.actionContext ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\n` : '';
125
+ const instruction = [
126
+ {
127
+ role: 'user',
128
+ content: [
129
+ {
130
+ type: 'text',
131
+ text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`
132
+ }
133
+ ]
134
+ }
135
+ ];
136
+ let latestFeedbackMessage;
137
+ const subGoalsText = includeSubGoals ? conversationHistory.subGoalsToText() : conversationHistory.historicalLogsToText();
138
+ const subGoalsSection = subGoalsText ? `\n\n${subGoalsText}` : '';
139
+ const memoriesText = conversationHistory.memoriesToText();
140
+ const memoriesSection = memoriesText ? `\n\n${memoriesText}` : '';
141
+ if (conversationHistory.pendingFeedbackMessage) {
142
+ latestFeedbackMessage = {
143
+ role: 'user',
144
+ content: [
145
+ {
146
+ type: 'text',
147
+ text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${subGoalsSection}`
148
+ },
149
+ {
150
+ type: 'image_url',
151
+ image_url: {
152
+ url: imagePayload,
153
+ detail: 'high'
154
+ }
155
+ }
156
+ ]
157
+ };
158
+ conversationHistory.resetPendingFeedbackMessageIfExists();
159
+ } else latestFeedbackMessage = {
160
+ role: 'user',
161
+ content: [
162
+ {
163
+ type: 'text',
164
+ text: `this is the latest screenshot${memoriesSection}${subGoalsSection}`
165
+ },
166
+ {
167
+ type: 'image_url',
168
+ image_url: {
169
+ url: imagePayload,
170
+ detail: 'high'
171
+ }
172
+ }
173
+ ]
174
+ };
175
+ conversationHistory.append(latestFeedbackMessage);
176
+ conversationHistory.compressHistory(50, 20);
177
+ const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);
178
+ const msgs = [
179
+ {
180
+ role: 'system',
181
+ content: systemPrompt
182
+ },
183
+ ...instruction,
184
+ ...historyLog
185
+ ];
186
+ let { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelConfig, {
187
+ deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink,
188
+ abortSignal: opts.abortSignal
189
+ });
190
+ let planFromAI;
191
+ try {
192
+ try {
193
+ planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
194
+ } catch {
195
+ const retry = await (0, index_js_namespaceObject.callAI)(msgs, modelConfig, {
196
+ deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink,
197
+ abortSignal: opts.abortSignal
198
+ });
199
+ rawResponse = retry.content;
200
+ usage = retry.usage;
201
+ reasoning_content = retry.reasoning_content;
202
+ planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
203
+ }
204
+ if (planFromAI.action && void 0 !== planFromAI.finalizeSuccess) {
205
+ warnLog('Planning response included both an action and <complete>; ignoring <complete> output.');
206
+ planFromAI.finalizeMessage = void 0;
207
+ planFromAI.finalizeSuccess = void 0;
208
+ }
209
+ const actions = planFromAI.action ? [
210
+ planFromAI.action
211
+ ] : [];
212
+ let shouldContinuePlanning = true;
213
+ if (void 0 !== planFromAI.finalizeSuccess) {
214
+ debug('task completed via <complete> tag, stop planning');
215
+ shouldContinuePlanning = false;
216
+ if (includeSubGoals) conversationHistory.markAllSubGoalsFinished();
217
+ }
218
+ const returnValue = {
219
+ ...planFromAI,
220
+ actions,
221
+ rawResponse,
222
+ usage,
223
+ reasoning_content,
224
+ yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, opts.actionSpace),
225
+ shouldContinuePlanning
226
+ };
227
+ (0, utils_namespaceObject.assert)(planFromAI, "can't get plans from AI");
228
+ actions.forEach((action)=>{
229
+ const type = action.type;
230
+ const actionInActionSpace = opts.actionSpace.find((action)=>action.name === type);
231
+ debug('actionInActionSpace matched', actionInActionSpace);
232
+ const locateFields = actionInActionSpace ? (0, external_common_js_namespaceObject.findAllMidsceneLocatorField)(actionInActionSpace.paramSchema) : [];
233
+ debug('locateFields', locateFields);
234
+ locateFields.forEach((field)=>{
235
+ const locateResult = action.param[field];
236
+ if (locateResult && void 0 !== modelFamily) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, imageWidth, imageHeight, modelFamily);
237
+ });
238
+ });
239
+ if (includeSubGoals) {
240
+ if (planFromAI.updateSubGoals?.length) conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);
241
+ if (planFromAI.markFinishedIndexes?.length) for (const index of planFromAI.markFinishedIndexes)conversationHistory.markSubGoalFinished(index);
242
+ if (planFromAI.log) conversationHistory.appendSubGoalLog(planFromAI.log);
243
+ } else if (planFromAI.log) conversationHistory.appendHistoricalLog(planFromAI.log);
244
+ if (planFromAI.memory) conversationHistory.appendMemory(planFromAI.memory);
245
+ conversationHistory.append({
246
+ role: 'assistant',
247
+ content: [
248
+ {
249
+ type: 'text',
250
+ text: rawResponse
251
+ }
252
+ ]
253
+ });
254
+ return returnValue;
255
+ } catch (parseError) {
256
+ const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
257
+ throw new index_js_namespaceObject.AIResponseParseError(`XML parse error: ${errorMessage}`, rawResponse, usage);
258
+ }
259
+ }
260
+ exports.parseXMLPlanningResponse = __webpack_exports__.parseXMLPlanningResponse;
261
+ exports.plan = __webpack_exports__.plan;
262
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
263
+ "parseXMLPlanningResponse",
264
+ "plan"
265
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
266
+ Object.defineProperty(exports, '__esModule', {
267
+ value: true
268
+ });
@@ -0,0 +1,39 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ bboxDescription: ()=>bboxDescription
28
+ });
29
+ function bboxDescription(modelFamily) {
30
+ if ('gemini' === modelFamily) return 'box_2d bounding box for the target element, should be [ymin, xmin, ymax, xmax] normalized to 0-1000.';
31
+ return '2d bounding box as [xmin, ymin, xmax, ymax]';
32
+ }
33
+ exports.bboxDescription = __webpack_exports__.bboxDescription;
34
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
35
+ "bboxDescription"
36
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
37
+ Object.defineProperty(exports, '__esModule', {
38
+ value: true
39
+ });
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ elementDescriberInstruction: ()=>elementDescriberInstruction
28
+ });
29
+ const env_namespaceObject = require("@godscene/shared/env");
30
+ const examplesMap = {
31
+ Chinese: [
32
+ '"登录表单中的"登录"按钮"',
33
+ '"搜索输入框,placeholder 为"请输入关键词""',
34
+ '"顶部导航栏中文字为"首页"的链接"',
35
+ '"联系表单中的提交按钮"',
36
+ '"aria-label 为"打开菜单"的菜单图标"'
37
+ ],
38
+ English: [
39
+ '"Login button with text \'Sign In\'"',
40
+ '"Search input with placeholder \'Enter keywords\'"',
41
+ '"Navigation link with text \'Home\' in header"',
42
+ '"Submit button in contact form"',
43
+ '"Menu icon with aria-label \'Open menu\'"'
44
+ ]
45
+ };
46
+ const getExamples = (language)=>{
47
+ const examples = examplesMap[language] || examplesMap.English;
48
+ return examples.map((e)=>`- ${e}`).join('\n');
49
+ };
50
+ const elementDescriberInstruction = ()=>{
51
+ const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
52
+ return `
53
+ Describe the element in the red rectangle for precise identification.
54
+
55
+ IMPORTANT: You MUST write the description in ${preferredLanguage}.
56
+
57
+ CRITICAL REQUIREMENTS:
58
+ 1. UNIQUENESS: The description must uniquely identify this element on the current page
59
+ 2. UNIVERSALITY: Use generic, reusable selectors that work across different contexts
60
+ 3. PRECISION: Be specific enough to distinguish from similar elements
61
+
62
+ DESCRIPTION STRUCTURE:
63
+ 1. Element type (button, input, link, div, etc.)
64
+ 2. Primary identifier (in order of preference):
65
+ - Unique text content: "with text 'Login'"
66
+ - Unique attribute: "with aria-label 'Search'"
67
+ - Unique class/ID: "with class 'primary-button'"
68
+ - Unique position: "in header navigation"
69
+ 3. Secondary identifiers (if needed for uniqueness):
70
+ - Visual features: "blue background", "with icon"
71
+ - Relative position: "below search bar", "in sidebar"
72
+ - Parent context: "in login form", "in main menu"
73
+
74
+ GUIDELINES:
75
+ - Keep description under 25 words
76
+ - Prioritize semantic identifiers over visual ones
77
+ - Use consistent terminology across similar elements
78
+ - Avoid page-specific or temporary content
79
+ - Don't mention the red rectangle or selection box
80
+ - Focus on stable, reusable characteristics
81
+ - **Write the description in ${preferredLanguage}**
82
+
83
+ EXAMPLES:
84
+ ${getExamples(preferredLanguage)}
85
+
86
+ Return JSON:
87
+ {
88
+ "description": "unique element identifier",
89
+ "error"?: "error message if any"
90
+ }`;
91
+ };
92
+ exports.elementDescriberInstruction = __webpack_exports__.elementDescriberInstruction;
93
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
94
+ "elementDescriberInstruction"
95
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
96
+ Object.defineProperty(exports, '__esModule', {
97
+ value: true
98
+ });
@@ -0,0 +1,169 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ parseXMLExtractionResponse: ()=>parseXMLExtractionResponse,
28
+ systemPromptToExtract: ()=>systemPromptToExtract,
29
+ extractDataQueryPrompt: ()=>extractDataQueryPrompt
30
+ });
31
+ const env_namespaceObject = require("@godscene/shared/env");
32
+ const index_js_namespaceObject = require("../service-caller/index.js");
33
+ const external_util_js_namespaceObject = require("./util.js");
34
+ function parseXMLExtractionResponse(xmlString) {
35
+ const thought = (0, external_util_js_namespaceObject.extractXMLTag)(xmlString, 'thought');
36
+ const dataJsonStr = (0, external_util_js_namespaceObject.extractXMLTag)(xmlString, 'data-json');
37
+ const errorsStr = (0, external_util_js_namespaceObject.extractXMLTag)(xmlString, 'errors');
38
+ if (!dataJsonStr) throw new Error('Missing required field: data-json');
39
+ let data;
40
+ try {
41
+ data = (0, index_js_namespaceObject.safeParseJson)(dataJsonStr, void 0);
42
+ } catch (e) {
43
+ throw new Error(`Failed to parse data-json: ${e}`);
44
+ }
45
+ let errors;
46
+ if (errorsStr) try {
47
+ const parsedErrors = (0, index_js_namespaceObject.safeParseJson)(errorsStr, void 0);
48
+ if (Array.isArray(parsedErrors)) errors = parsedErrors;
49
+ } catch (e) {}
50
+ return {
51
+ ...thought ? {
52
+ thought
53
+ } : {},
54
+ data,
55
+ ...errors && errors.length > 0 ? {
56
+ errors
57
+ } : {}
58
+ };
59
+ }
60
+ function systemPromptToExtract() {
61
+ const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
62
+ return `
63
+ You are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.
64
+
65
+ The user will give you a screenshot, the contents of it (optional), and some data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.
66
+
67
+ If a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.
68
+
69
+ When DATA_DEMAND is a JSON object, the keys in your response must exactly match the keys in DATA_DEMAND. Do not rename, translate, or substitute any key.
70
+
71
+ If the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.
72
+
73
+
74
+ Return in the following XML format:
75
+ <thought>the thinking process of the extraction, less than 300 words. Use ${preferredLanguage} in this field.</thought>
76
+ <data-json>the extracted data as JSON. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.</data-json>
77
+ <errors>optional error messages as JSON array, e.g., ["error1", "error2"]</errors>
78
+
79
+ # Example 1
80
+ For example, if the DATA_DEMAND is:
81
+
82
+ <DATA_DEMAND>
83
+ {
84
+ "name": "name shows on the left panel, string",
85
+ "age": "age shows on the right panel, number",
86
+ "isAdmin": "if the user is admin, boolean"
87
+ }
88
+ </DATA_DEMAND>
89
+
90
+ By viewing the screenshot and page contents, you can extract the following data:
91
+
92
+ <thought>According to the screenshot, i can see ...</thought>
93
+ <data-json>
94
+ {
95
+ "name": "John",
96
+ "age": 30,
97
+ "isAdmin": true
98
+ }
99
+ </data-json>
100
+
101
+ # Example 2
102
+ If the DATA_DEMAND is:
103
+
104
+ <DATA_DEMAND>
105
+ the todo items list, string[]
106
+ </DATA_DEMAND>
107
+
108
+ By viewing the screenshot and page contents, you can extract the following data:
109
+
110
+ <thought>According to the screenshot, i can see ...</thought>
111
+ <data-json>
112
+ ["todo 1", "todo 2", "todo 3"]
113
+ </data-json>
114
+
115
+ # Example 3
116
+ If the DATA_DEMAND is:
117
+
118
+ <DATA_DEMAND>
119
+ the page title, string
120
+ </DATA_DEMAND>
121
+
122
+ By viewing the screenshot and page contents, you can extract the following data:
123
+
124
+ <thought>According to the screenshot, i can see ...</thought>
125
+ <data-json>
126
+ "todo list"
127
+ </data-json>
128
+
129
+ # Example 4
130
+ If the DATA_DEMAND is:
131
+
132
+ <DATA_DEMAND>
133
+ {
134
+ "StatementIsTruthy": "Boolean, is it currently the SMS page?"
135
+ }
136
+ </DATA_DEMAND>
137
+
138
+ By viewing the screenshot and page contents, you can extract the following data:
139
+
140
+ <thought>According to the screenshot, i can see ...</thought>
141
+ <data-json>
142
+ { "StatementIsTruthy": true }
143
+ </data-json>
144
+ `;
145
+ }
146
+ const extractDataQueryPrompt = (pageDescription, dataQuery)=>{
147
+ let dataQueryText = '';
148
+ dataQueryText = 'string' == typeof dataQuery ? dataQuery : JSON.stringify(dataQuery, null, 2);
149
+ return `
150
+ <PageDescription>
151
+ ${pageDescription}
152
+ </PageDescription>
153
+
154
+ <DATA_DEMAND>
155
+ ${dataQueryText}
156
+ </DATA_DEMAND>
157
+ `;
158
+ };
159
+ exports.extractDataQueryPrompt = __webpack_exports__.extractDataQueryPrompt;
160
+ exports.parseXMLExtractionResponse = __webpack_exports__.parseXMLExtractionResponse;
161
+ exports.systemPromptToExtract = __webpack_exports__.systemPromptToExtract;
162
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
163
+ "extractDataQueryPrompt",
164
+ "parseXMLExtractionResponse",
165
+ "systemPromptToExtract"
166
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
167
+ Object.defineProperty(exports, '__esModule', {
168
+ value: true
169
+ });
@@ -0,0 +1,86 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ systemPromptToLocateElement: ()=>systemPromptToLocateElement,
28
+ findElementPrompt: ()=>findElementPrompt
29
+ });
30
+ const env_namespaceObject = require("@godscene/shared/env");
31
+ const external_common_js_namespaceObject = require("./common.js");
32
+ function systemPromptToLocateElement(modelFamily) {
33
+ const preferredLanguage = (0, env_namespaceObject.getPreferredLanguage)();
34
+ const bboxComment = (0, external_common_js_namespaceObject.bboxDescription)(modelFamily);
35
+ return `
36
+ ## Role:
37
+ You are an AI assistant that helps identify UI elements.
38
+
39
+ ## Objective:
40
+ - Identify elements in screenshots that match the user's description.
41
+ - Provide the coordinates of the element that matches the user's description.
42
+
43
+ ## Important Notes for Locating Elements:
44
+ - When the user describes an element that contains text (such as buttons, input fields, dropdown options, radio buttons, etc.), you should locate ONLY the text region of that element, not the entire element boundary.
45
+ - For example: If an input field is large (both wide and tall) with a placeholder text "Please enter your comment", you should locate only the area where the placeholder text appears, not the entire input field.
46
+ - This principle applies to all text-containing elements: focus on the visible text region rather than the full element container.
47
+
48
+ ## Output Format:
49
+ \`\`\`json
50
+ {
51
+ "bbox": [number, number, number, number], // ${bboxComment}
52
+ "errors"?: string[]
53
+ }
54
+ \`\`\`
55
+
56
+ Fields:
57
+ * \`bbox\` is the bounding box of the element that matches the user's description
58
+ * \`errors\` is an optional array of error messages (if any)
59
+
60
+ For example, when an element is found:
61
+ \`\`\`json
62
+ {
63
+ "bbox": [100, 100, 200, 200],
64
+ "errors": []
65
+ }
66
+ \`\`\`
67
+
68
+ When no element is found:
69
+ \`\`\`json
70
+ {
71
+ "bbox": [],
72
+ "errors": ["I can see ..., but {some element} is not found. Use ${preferredLanguage}."]
73
+ }
74
+ \`\`\`
75
+ `;
76
+ }
77
+ const findElementPrompt = (targetElementDescription)=>`Find: ${targetElementDescription}`;
78
+ exports.findElementPrompt = __webpack_exports__.findElementPrompt;
79
+ exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
80
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
81
+ "findElementPrompt",
82
+ "systemPromptToLocateElement"
83
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
84
+ Object.defineProperty(exports, '__esModule', {
85
+ value: true
86
+ });