@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,491 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ dumpMidsceneLocatorField: ()=>dumpMidsceneLocatorField,
28
+ TUserPromptSchema: ()=>TUserPromptSchema,
29
+ buildYamlFlowFromPlans: ()=>buildYamlFlowFromPlans,
30
+ adaptGpt5Bbox: ()=>adaptGpt5Bbox,
31
+ markupImageForLLM: ()=>markupImageForLLM,
32
+ pointToBbox: ()=>pointToBbox,
33
+ adaptGeminiBbox: ()=>adaptGeminiBbox,
34
+ parseActionParam: ()=>parseActionParam,
35
+ adaptBboxToRect: ()=>adaptBboxToRect,
36
+ getReadableTimeString: ()=>getReadableTimeString,
37
+ expandSearchArea: ()=>expandSearchArea,
38
+ mergeRects: ()=>mergeRects,
39
+ normalized01000: ()=>normalized01000,
40
+ SizeSchema: ()=>SizeSchema,
41
+ findAllMidsceneLocatorField: ()=>findAllMidsceneLocatorField,
42
+ adaptQwen2_5Bbox: ()=>adaptQwen2_5Bbox,
43
+ getMidsceneLocationSchema: ()=>getMidsceneLocationSchema,
44
+ adaptDoubaoBbox: ()=>adaptDoubaoBbox,
45
+ fillBboxParam: ()=>fillBboxParam,
46
+ finalizeActionName: ()=>finalizeActionName,
47
+ TMultimodalPromptSchema: ()=>TMultimodalPromptSchema,
48
+ PointSchema: ()=>PointSchema,
49
+ RectSchema: ()=>RectSchema,
50
+ dumpActionParam: ()=>dumpActionParam,
51
+ ifMidsceneLocatorField: ()=>ifMidsceneLocatorField,
52
+ adaptBbox: ()=>adaptBbox
53
+ });
54
+ const utils_namespaceObject = require("@godscene/shared/utils");
55
+ const util_js_namespaceObject = require("./ai-model/auto-glm/util.js");
56
+ const constants_namespaceObject = require("@godscene/shared/constants");
57
+ const extractor_namespaceObject = require("@godscene/shared/extractor");
58
+ const img_namespaceObject = require("@godscene/shared/img");
59
+ const logger_namespaceObject = require("@godscene/shared/logger");
60
+ const external_zod_namespaceObject = require("zod");
61
+ const defaultBboxSize = 20;
62
+ const debugInspectUtils = (0, logger_namespaceObject.getDebug)('ai:common');
63
+ function pointToBbox(x, y, bboxSize = defaultBboxSize) {
64
+ const halfSize = bboxSize / 2;
65
+ const x1 = Math.max(x - halfSize, 0);
66
+ const y1 = Math.max(y - halfSize, 0);
67
+ const x2 = Math.min(x + halfSize, 1000);
68
+ const y2 = Math.min(y + halfSize, 1000);
69
+ return [
70
+ x1,
71
+ y1,
72
+ x2,
73
+ y2
74
+ ];
75
+ }
76
+ function fillBboxParam(locate, width, height, modelFamily) {
77
+ if (locate.bbox_2d && !locate?.bbox) {
78
+ locate.bbox = locate.bbox_2d;
79
+ delete locate.bbox_2d;
80
+ }
81
+ if (locate?.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, modelFamily);
82
+ return locate;
83
+ }
84
+ function adaptQwen2_5Bbox(bbox) {
85
+ if (bbox.length < 2) {
86
+ const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
87
+ throw new Error(msg);
88
+ }
89
+ const result = [
90
+ Math.round(bbox[0]),
91
+ Math.round(bbox[1]),
92
+ 'number' == typeof bbox[2] ? Math.round(bbox[2]) : Math.round(bbox[0] + defaultBboxSize),
93
+ 'number' == typeof bbox[3] ? Math.round(bbox[3]) : Math.round(bbox[1] + defaultBboxSize)
94
+ ];
95
+ return result;
96
+ }
97
+ function adaptGpt5Bbox(bbox) {
98
+ if (!Array.isArray(bbox) || 4 !== bbox.length || !bbox.every((value)=>'number' == typeof value && Number.isFinite(value))) {
99
+ const msg = `invalid bbox data for gpt-5 mode: ${JSON.stringify(bbox)} `;
100
+ throw new Error(msg);
101
+ }
102
+ const numericBbox = bbox;
103
+ return [
104
+ numericBbox[0],
105
+ numericBbox[1],
106
+ numericBbox[2],
107
+ numericBbox[3]
108
+ ];
109
+ }
110
+ function adaptDoubaoBbox(bbox, width, height) {
111
+ (0, utils_namespaceObject.assert)(width > 0 && height > 0, 'width and height must be greater than 0 in doubao mode');
112
+ if ('string' == typeof bbox) {
113
+ (0, utils_namespaceObject.assert)(/^(\d+)\s(\d+)\s(\d+)\s(\d+)$/.test(bbox.trim()), `invalid bbox data string for doubao-vision mode: ${bbox}`);
114
+ const splitted = bbox.split(' ');
115
+ if (4 === splitted.length) return [
116
+ Math.round(Number(splitted[0]) * width / 1000),
117
+ Math.round(Number(splitted[1]) * height / 1000),
118
+ Math.round(Number(splitted[2]) * width / 1000),
119
+ Math.round(Number(splitted[3]) * height / 1000)
120
+ ];
121
+ throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);
122
+ }
123
+ let bboxList = [];
124
+ if (Array.isArray(bbox) && 'string' == typeof bbox[0]) bbox.forEach((item)=>{
125
+ if ('string' == typeof item && item.includes(',')) {
126
+ const [x, y] = item.split(',');
127
+ bboxList.push(Number(x.trim()), Number(y.trim()));
128
+ } else if ('string' == typeof item && item.includes(' ')) {
129
+ const [x, y] = item.split(' ');
130
+ bboxList.push(Number(x.trim()), Number(y.trim()));
131
+ } else bboxList.push(Number(item));
132
+ });
133
+ else bboxList = bbox;
134
+ if (4 === bboxList.length || 5 === bboxList.length) return [
135
+ Math.round(bboxList[0] * width / 1000),
136
+ Math.round(bboxList[1] * height / 1000),
137
+ Math.round(bboxList[2] * width / 1000),
138
+ Math.round(bboxList[3] * height / 1000)
139
+ ];
140
+ if (6 === bboxList.length || 2 === bboxList.length || 3 === bboxList.length || 7 === bboxList.length) return [
141
+ Math.max(0, Math.round(bboxList[0] * width / 1000) - defaultBboxSize / 2),
142
+ Math.max(0, Math.round(bboxList[1] * height / 1000) - defaultBboxSize / 2),
143
+ Math.min(width, Math.round(bboxList[0] * width / 1000) + defaultBboxSize / 2),
144
+ Math.min(height, Math.round(bboxList[1] * height / 1000) + defaultBboxSize / 2)
145
+ ];
146
+ if (8 === bbox.length) return [
147
+ Math.round(bboxList[0] * width / 1000),
148
+ Math.round(bboxList[1] * height / 1000),
149
+ Math.round(bboxList[4] * width / 1000),
150
+ Math.round(bboxList[5] * height / 1000)
151
+ ];
152
+ const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
153
+ throw new Error(msg);
154
+ }
155
+ function normalizeBboxInput(bbox) {
156
+ if (Array.isArray(bbox)) {
157
+ if (Array.isArray(bbox[0])) return bbox[0];
158
+ }
159
+ return bbox;
160
+ }
161
+ function adaptBbox(bbox, width, height, modelFamily) {
162
+ const normalizedBbox = normalizeBboxInput(bbox);
163
+ let result = [
164
+ 0,
165
+ 0,
166
+ 0,
167
+ 0
168
+ ];
169
+ result = 'doubao-vision' === modelFamily || 'doubao-seed' === modelFamily || (0, util_js_namespaceObject.isUITars)(modelFamily) ? adaptDoubaoBbox(normalizedBbox, width, height) : 'gemini' === modelFamily ? adaptGeminiBbox(normalizedBbox, width, height) : 'qwen2.5-vl' === modelFamily ? adaptQwen2_5Bbox(normalizedBbox) : 'gpt-5' === modelFamily ? adaptGpt5Bbox(normalizedBbox) : normalized01000(normalizedBbox, width, height);
170
+ return result;
171
+ }
172
+ function normalized01000(bbox, width, height) {
173
+ return [
174
+ Math.round(bbox[0] * width / 1000),
175
+ Math.round(bbox[1] * height / 1000),
176
+ Math.round(bbox[2] * width / 1000),
177
+ Math.round(bbox[3] * height / 1000)
178
+ ];
179
+ }
180
+ function adaptGeminiBbox(bbox, width, height) {
181
+ const left = Math.round(bbox[1] * width / 1000);
182
+ const top = Math.round(bbox[0] * height / 1000);
183
+ const right = Math.round(bbox[3] * width / 1000);
184
+ const bottom = Math.round(bbox[2] * height / 1000);
185
+ return [
186
+ left,
187
+ top,
188
+ right,
189
+ bottom
190
+ ];
191
+ }
192
+ function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, rightLimit = width, bottomLimit = height, modelFamily, scale = 1) {
193
+ debugInspectUtils('adaptBboxToRect', bbox, width, height, 'offset', offsetX, offsetY, 'limit', rightLimit, bottomLimit, 'modelFamily', modelFamily, 'scale', scale);
194
+ const [left, top, right, bottom] = adaptBbox(bbox, width, height, modelFamily);
195
+ const rectLeft = Math.max(0, left);
196
+ const rectTop = Math.max(0, top);
197
+ const boundedRight = Math.min(right, rightLimit);
198
+ const boundedBottom = Math.min(bottom, bottomLimit);
199
+ const rectWidth = boundedRight - rectLeft + 1;
200
+ const rectHeight = boundedBottom - rectTop + 1;
201
+ const finalLeft = 1 !== scale ? Math.round(rectLeft / scale) : rectLeft;
202
+ const finalTop = 1 !== scale ? Math.round(rectTop / scale) : rectTop;
203
+ const finalWidth = 1 !== scale ? Math.round(rectWidth / scale) : rectWidth;
204
+ const finalHeight = 1 !== scale ? Math.round(rectHeight / scale) : rectHeight;
205
+ const rect = {
206
+ left: finalLeft + offsetX,
207
+ top: finalTop + offsetY,
208
+ width: finalWidth,
209
+ height: finalHeight
210
+ };
211
+ debugInspectUtils('adaptBboxToRect, result=', rect);
212
+ return rect;
213
+ }
214
+ function mergeRects(rects) {
215
+ const minLeft = Math.min(...rects.map((r)=>r.left));
216
+ const minTop = Math.min(...rects.map((r)=>r.top));
217
+ const maxRight = Math.max(...rects.map((r)=>r.left + r.width));
218
+ const maxBottom = Math.max(...rects.map((r)=>r.top + r.height));
219
+ return {
220
+ left: minLeft,
221
+ top: minTop,
222
+ width: maxRight - minLeft,
223
+ height: maxBottom - minTop
224
+ };
225
+ }
226
+ function expandSearchArea(rect, screenSize) {
227
+ const minArea = 160000;
228
+ const expandSize = 100;
229
+ const expandedLeft = Math.max(rect.left - expandSize, 0);
230
+ const expandedTop = Math.max(rect.top - expandSize, 0);
231
+ const expandRect = {
232
+ left: expandedLeft,
233
+ top: expandedTop,
234
+ width: Math.min(rect.left - expandedLeft + rect.width + expandSize, screenSize.width - expandedLeft),
235
+ height: Math.min(rect.top - expandedTop + rect.height + expandSize, screenSize.height - expandedTop)
236
+ };
237
+ const currentArea = expandRect.width * expandRect.height;
238
+ if (currentArea >= minArea) return expandRect;
239
+ const centerX = expandRect.left + expandRect.width / 2;
240
+ const centerY = expandRect.top + expandRect.height / 2;
241
+ const scaleFactor = Math.sqrt(minArea / currentArea);
242
+ const newWidth = Math.round(expandRect.width * scaleFactor);
243
+ const newHeight = Math.round(expandRect.height * scaleFactor);
244
+ const newLeft = Math.round(centerX - newWidth / 2);
245
+ const newTop = Math.round(centerY - newHeight / 2);
246
+ const left = Math.max(newLeft, 0);
247
+ const top = Math.max(newTop, 0);
248
+ return {
249
+ left,
250
+ top,
251
+ width: Math.min(newWidth, screenSize.width - left),
252
+ height: Math.min(newHeight, screenSize.height - top)
253
+ };
254
+ }
255
+ async function markupImageForLLM(screenshotBase64, tree, size) {
256
+ const elementsInfo = (0, extractor_namespaceObject.treeToList)(tree);
257
+ const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo)=>{
258
+ if (elementInfo.attributes.nodeType === constants_namespaceObject.NodeType.TEXT) return false;
259
+ return true;
260
+ });
261
+ const imagePayload = await (0, img_namespaceObject.compositeElementInfoImg)({
262
+ inputImgBase64: screenshotBase64,
263
+ elementsPositionInfo: elementsPositionInfoWithoutText,
264
+ size
265
+ });
266
+ return imagePayload;
267
+ }
268
+ function buildYamlFlowFromPlans(plans, actionSpace) {
269
+ const flow = [];
270
+ for (const plan of plans){
271
+ const verb = plan.type;
272
+ const action = actionSpace.find((action)=>action.name === verb);
273
+ if (!action) {
274
+ console.warn(`Cannot convert action ${verb} to yaml flow. Will ignore it.`);
275
+ continue;
276
+ }
277
+ const flowKey = action.interfaceAlias || verb;
278
+ const flowParam = action.paramSchema ? dumpActionParam(plan.param || {}, action.paramSchema) : {};
279
+ const shortcutField = 'Launch' === action.name || 'launch' === action.interfaceAlias ? 'uri' : 'Terminate' === action.name || 'terminate' === action.interfaceAlias ? 'uri' : 'RunAdbShell' === action.name || 'runAdbShell' === action.interfaceAlias ? 'command' : void 0;
280
+ const shortcutKeys = shortcutField ? Object.keys(flowParam) : [];
281
+ const canInlineShortcut = shortcutField && 1 === shortcutKeys.length && shortcutKeys[0] === shortcutField && 'string' == typeof flowParam[shortcutField];
282
+ const flowItem = canInlineShortcut ? {
283
+ [flowKey]: flowParam[shortcutField]
284
+ } : {
285
+ [flowKey]: '',
286
+ ...flowParam
287
+ };
288
+ flow.push(flowItem);
289
+ }
290
+ return flow;
291
+ }
292
+ const PointSchema = external_zod_namespaceObject.z.object({
293
+ left: external_zod_namespaceObject.z.number(),
294
+ top: external_zod_namespaceObject.z.number()
295
+ });
296
+ const SizeSchema = external_zod_namespaceObject.z.object({
297
+ width: external_zod_namespaceObject.z.number(),
298
+ height: external_zod_namespaceObject.z.number()
299
+ });
300
+ const RectSchema = PointSchema.and(SizeSchema).and(external_zod_namespaceObject.z.object({
301
+ zoom: external_zod_namespaceObject.z.number().optional()
302
+ }));
303
+ const TMultimodalPromptSchema = external_zod_namespaceObject.z.object({
304
+ images: external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.object({
305
+ name: external_zod_namespaceObject.z.string(),
306
+ url: external_zod_namespaceObject.z.string()
307
+ })).optional(),
308
+ convertHttpImage2Base64: external_zod_namespaceObject.z.boolean().optional()
309
+ });
310
+ const TUserPromptSchema = external_zod_namespaceObject.z.union([
311
+ external_zod_namespaceObject.z.string(),
312
+ external_zod_namespaceObject.z.object({
313
+ prompt: external_zod_namespaceObject.z.string()
314
+ }).and(TMultimodalPromptSchema.partial())
315
+ ]);
316
+ const locateFieldFlagName = 'midscene_location_field_flag';
317
+ const MidsceneLocationInput = external_zod_namespaceObject.z.object({
318
+ prompt: TUserPromptSchema,
319
+ deepLocate: external_zod_namespaceObject.z.boolean().optional(),
320
+ deepThink: external_zod_namespaceObject.z.boolean().optional().describe('@deprecated Use `deepLocate` instead.'),
321
+ cacheable: external_zod_namespaceObject.z.boolean().optional(),
322
+ xpath: external_zod_namespaceObject.z.union([
323
+ external_zod_namespaceObject.z.string(),
324
+ external_zod_namespaceObject.z.boolean()
325
+ ]).optional()
326
+ }).passthrough();
327
+ const getMidsceneLocationSchema = ()=>MidsceneLocationInput;
328
+ const ifMidsceneLocatorField = (field)=>{
329
+ let actualField = field;
330
+ if (actualField._def?.typeName === 'ZodOptional') actualField = actualField._def.innerType;
331
+ if (actualField._def?.typeName === 'ZodObject') {
332
+ const shape = actualField._def.shape();
333
+ if (locateFieldFlagName in shape) return true;
334
+ if ('prompt' in shape && shape.prompt) return true;
335
+ }
336
+ return false;
337
+ };
338
+ const formatPromptWithImages = (promptObj)=>{
339
+ let promptString = promptObj.prompt;
340
+ if (Array.isArray(promptObj.images) && promptObj.images.length > 0) {
341
+ const imageCount = promptObj.images.length;
342
+ promptString += ` (with ${imageCount} image${imageCount > 1 ? 's' : ''})`;
343
+ }
344
+ return promptString;
345
+ };
346
+ const dumpMidsceneLocatorField = (field)=>{
347
+ (0, utils_namespaceObject.assert)(ifMidsceneLocatorField(field), 'field is not a midscene locator field');
348
+ if ('string' == typeof field) return field;
349
+ if (field && 'object' == typeof field && field.prompt) {
350
+ if ('string' == typeof field.prompt) return field.prompt;
351
+ if ('object' == typeof field.prompt && field.prompt.prompt) return formatPromptWithImages(field.prompt);
352
+ }
353
+ return String(field);
354
+ };
355
+ const findAllMidsceneLocatorField = (zodType, requiredOnly)=>{
356
+ if (!zodType) return [];
357
+ const zodObject = zodType;
358
+ if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {
359
+ const keys = Object.keys(zodObject.shape);
360
+ return keys.filter((key)=>{
361
+ const field = zodObject.shape[key];
362
+ if (!ifMidsceneLocatorField(field)) return false;
363
+ if (requiredOnly) return field._def?.typeName !== 'ZodOptional';
364
+ return true;
365
+ });
366
+ }
367
+ return [];
368
+ };
369
+ const dumpActionParam = (jsonObject, zodSchema)=>{
370
+ if (!(0, utils_namespaceObject.isPlainObject)(jsonObject)) return {};
371
+ const locatorFields = findAllMidsceneLocatorField(zodSchema);
372
+ const result = {
373
+ ...jsonObject
374
+ };
375
+ for (const fieldName of locatorFields){
376
+ const fieldValue = result[fieldName];
377
+ if (fieldValue) {
378
+ if ('string' == typeof fieldValue) result[fieldName] = fieldValue;
379
+ else if ('object' == typeof fieldValue) {
380
+ if (fieldValue.prompt) {
381
+ if ('string' == typeof fieldValue.prompt) result[fieldName] = fieldValue.prompt;
382
+ else if ('object' == typeof fieldValue.prompt && fieldValue.prompt.prompt) result[fieldName] = formatPromptWithImages(fieldValue.prompt);
383
+ }
384
+ }
385
+ }
386
+ }
387
+ return result;
388
+ };
389
+ const parseActionParam = (rawParam, zodSchema, options)=>{
390
+ if (!zodSchema) return;
391
+ const param = rawParam ?? {};
392
+ const locateFields = findAllMidsceneLocatorField(zodSchema);
393
+ if (0 === locateFields.length) return zodSchema.parse(param);
394
+ const locateFieldValues = {};
395
+ for (const fieldName of locateFields)if (fieldName in param) locateFieldValues[fieldName] = param[fieldName];
396
+ const paramsForValidation = {};
397
+ for(const key in param)if (locateFields.includes(key)) paramsForValidation[key] = {
398
+ prompt: '_dummy_'
399
+ };
400
+ else paramsForValidation[key] = param[key];
401
+ const validated = zodSchema.parse(paramsForValidation);
402
+ const ratio = options?.shrunkShotToLogicalRatio;
403
+ for(const fieldName in locateFieldValues){
404
+ let value = locateFieldValues[fieldName];
405
+ if (void 0 !== ratio && 1 !== ratio && value && 'object' == typeof value && value.center && value.rect) value = {
406
+ ...value,
407
+ center: [
408
+ Math.round(value.center[0] / ratio),
409
+ Math.round(value.center[1] / ratio)
410
+ ],
411
+ rect: {
412
+ ...value.rect,
413
+ left: Math.round(value.rect.left / ratio),
414
+ top: Math.round(value.rect.top / ratio),
415
+ width: Math.round(value.rect.width / ratio),
416
+ height: Math.round(value.rect.height / ratio)
417
+ }
418
+ };
419
+ validated[fieldName] = value;
420
+ }
421
+ return validated;
422
+ };
423
+ const finalizeActionName = 'Finalize';
424
+ const getReadableTimeString = (format = 'YYYY-MM-DD HH:mm:ss', timestamp)=>{
425
+ const now = void 0 !== timestamp ? new Date(timestamp) : new Date();
426
+ const year = now.getFullYear();
427
+ const month = String(now.getMonth() + 1).padStart(2, '0');
428
+ const day = String(now.getDate()).padStart(2, '0');
429
+ const hours = String(now.getHours()).padStart(2, '0');
430
+ const minutes = String(now.getMinutes()).padStart(2, '0');
431
+ const seconds = String(now.getSeconds()).padStart(2, '0');
432
+ const timeString = format.replace('YYYY', String(year)).replace('MM', month).replace('DD', day).replace('HH', hours).replace('mm', minutes).replace('ss', seconds);
433
+ return `${timeString} (${format})`;
434
+ };
435
+ exports.PointSchema = __webpack_exports__.PointSchema;
436
+ exports.RectSchema = __webpack_exports__.RectSchema;
437
+ exports.SizeSchema = __webpack_exports__.SizeSchema;
438
+ exports.TMultimodalPromptSchema = __webpack_exports__.TMultimodalPromptSchema;
439
+ exports.TUserPromptSchema = __webpack_exports__.TUserPromptSchema;
440
+ exports.adaptBbox = __webpack_exports__.adaptBbox;
441
+ exports.adaptBboxToRect = __webpack_exports__.adaptBboxToRect;
442
+ exports.adaptDoubaoBbox = __webpack_exports__.adaptDoubaoBbox;
443
+ exports.adaptGeminiBbox = __webpack_exports__.adaptGeminiBbox;
444
+ exports.adaptGpt5Bbox = __webpack_exports__.adaptGpt5Bbox;
445
+ exports.adaptQwen2_5Bbox = __webpack_exports__.adaptQwen2_5Bbox;
446
+ exports.buildYamlFlowFromPlans = __webpack_exports__.buildYamlFlowFromPlans;
447
+ exports.dumpActionParam = __webpack_exports__.dumpActionParam;
448
+ exports.dumpMidsceneLocatorField = __webpack_exports__.dumpMidsceneLocatorField;
449
+ exports.expandSearchArea = __webpack_exports__.expandSearchArea;
450
+ exports.fillBboxParam = __webpack_exports__.fillBboxParam;
451
+ exports.finalizeActionName = __webpack_exports__.finalizeActionName;
452
+ exports.findAllMidsceneLocatorField = __webpack_exports__.findAllMidsceneLocatorField;
453
+ exports.getMidsceneLocationSchema = __webpack_exports__.getMidsceneLocationSchema;
454
+ exports.getReadableTimeString = __webpack_exports__.getReadableTimeString;
455
+ exports.ifMidsceneLocatorField = __webpack_exports__.ifMidsceneLocatorField;
456
+ exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
457
+ exports.mergeRects = __webpack_exports__.mergeRects;
458
+ exports.normalized01000 = __webpack_exports__.normalized01000;
459
+ exports.parseActionParam = __webpack_exports__.parseActionParam;
460
+ exports.pointToBbox = __webpack_exports__.pointToBbox;
461
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
462
+ "PointSchema",
463
+ "RectSchema",
464
+ "SizeSchema",
465
+ "TMultimodalPromptSchema",
466
+ "TUserPromptSchema",
467
+ "adaptBbox",
468
+ "adaptBboxToRect",
469
+ "adaptDoubaoBbox",
470
+ "adaptGeminiBbox",
471
+ "adaptGpt5Bbox",
472
+ "adaptQwen2_5Bbox",
473
+ "buildYamlFlowFromPlans",
474
+ "dumpActionParam",
475
+ "dumpMidsceneLocatorField",
476
+ "expandSearchArea",
477
+ "fillBboxParam",
478
+ "finalizeActionName",
479
+ "findAllMidsceneLocatorField",
480
+ "getMidsceneLocationSchema",
481
+ "getReadableTimeString",
482
+ "ifMidsceneLocatorField",
483
+ "markupImageForLLM",
484
+ "mergeRects",
485
+ "normalized01000",
486
+ "parseActionParam",
487
+ "pointToBbox"
488
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
489
+ Object.defineProperty(exports, '__esModule', {
490
+ value: true
491
+ });
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.r = (exports1)=>{
5
+ if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
6
+ value: 'Module'
7
+ });
8
+ Object.defineProperty(exports1, '__esModule', {
9
+ value: true
10
+ });
11
+ };
12
+ })();
13
+ var __webpack_exports__ = {};
14
+ __webpack_require__.r(__webpack_exports__);
15
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
16
+ Object.defineProperty(exports, '__esModule', {
17
+ value: true
18
+ });