@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,272 @@
1
+ import { isAutoGLM } from "../ai-model/auto-glm/util.mjs";
2
+ import { AIResponseParseError, AiExtractElementInfo, AiLocateElement, callAIWithObjectResponse } from "../ai-model/index.mjs";
3
+ import { AiLocateSection, buildSearchAreaConfig } from "../ai-model/inspect.mjs";
4
+ import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
5
+ import { expandSearchArea } from "../common.mjs";
6
+ import { ServiceError } from "../types.mjs";
7
+ import { compositeElementInfoImg, cropByRect } from "@godscene/shared/img";
8
+ import { getDebug } from "@godscene/shared/logger";
9
+ import { assert } from "@godscene/shared/utils";
10
+ import { createServiceDump } from "./utils.mjs";
11
+ function _define_property(obj, key, value) {
12
+ if (key in obj) Object.defineProperty(obj, key, {
13
+ value: value,
14
+ enumerable: true,
15
+ configurable: true,
16
+ writable: true
17
+ });
18
+ else obj[key] = value;
19
+ return obj;
20
+ }
21
+ const debug = getDebug('ai:service');
22
+ class Service {
23
+ async locate(query, opt, modelConfig, abortSignal) {
24
+ const queryPrompt = 'string' == typeof query ? query : query.prompt;
25
+ assert(queryPrompt, 'query is required for locate');
26
+ assert('object' == typeof query, 'query should be an object for locate');
27
+ const hasPlanLocatedElement = !!opt?.planLocatedElement?.rect;
28
+ let searchAreaPrompt;
29
+ if (query.deepLocate && !hasPlanLocatedElement) searchAreaPrompt = query.prompt;
30
+ const { modelFamily } = modelConfig;
31
+ if (searchAreaPrompt && !modelFamily) {
32
+ console.warn('The "deepLocate" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config');
33
+ searchAreaPrompt = void 0;
34
+ }
35
+ if (searchAreaPrompt && isAutoGLM(modelFamily)) {
36
+ console.warn('The "deepLocate" feature is not supported with AutoGLM.');
37
+ searchAreaPrompt = void 0;
38
+ }
39
+ const context = opt?.context || await this.contextRetrieverFn();
40
+ let searchArea;
41
+ let searchAreaRawResponse;
42
+ let searchAreaUsage;
43
+ let searchAreaResponse;
44
+ if (query.deepLocate && hasPlanLocatedElement) {
45
+ const searchAreaConfig = await buildSearchAreaConfig({
46
+ context,
47
+ baseRect: opt.planLocatedElement.rect,
48
+ modelFamily
49
+ });
50
+ searchArea = searchAreaConfig.rect;
51
+ searchAreaRawResponse = JSON.stringify({
52
+ source: 'plan-located-element',
53
+ rect: opt.planLocatedElement.rect
54
+ });
55
+ searchAreaResponse = {
56
+ rect: searchArea,
57
+ imageBase64: searchAreaConfig.imageBase64,
58
+ scale: searchAreaConfig.scale,
59
+ rawResponse: searchAreaRawResponse
60
+ };
61
+ } else if (searchAreaPrompt) {
62
+ searchAreaResponse = await AiLocateSection({
63
+ context,
64
+ sectionDescription: searchAreaPrompt,
65
+ modelConfig,
66
+ abortSignal
67
+ });
68
+ assert(searchAreaResponse.rect, `cannot find search area for "${searchAreaPrompt}"${searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''}`);
69
+ searchAreaRawResponse = searchAreaResponse.rawResponse;
70
+ searchAreaUsage = searchAreaResponse.usage;
71
+ searchArea = searchAreaResponse.rect;
72
+ }
73
+ const startTime = Date.now();
74
+ const { parseResult, rect, rawResponse, usage, reasoning_content } = await AiLocateElement({
75
+ context,
76
+ targetElementDescription: queryPrompt,
77
+ searchConfig: searchAreaResponse,
78
+ modelConfig,
79
+ abortSignal
80
+ });
81
+ const timeCost = Date.now() - startTime;
82
+ const taskInfo = {
83
+ ...this.taskInfo ? this.taskInfo : {},
84
+ durationMs: timeCost,
85
+ rawResponse: JSON.stringify(rawResponse),
86
+ formatResponse: JSON.stringify(parseResult),
87
+ usage,
88
+ searchArea,
89
+ searchAreaRawResponse,
90
+ searchAreaUsage,
91
+ reasoning_content
92
+ };
93
+ let errorLog;
94
+ if (parseResult.errors?.length) errorLog = `failed to locate element: \n${parseResult.errors.join('\n')}`;
95
+ const dumpData = {
96
+ type: 'locate',
97
+ userQuery: {
98
+ element: queryPrompt
99
+ },
100
+ matchedElement: [],
101
+ matchedRect: rect,
102
+ data: null,
103
+ taskInfo,
104
+ deepLocate: !!searchArea,
105
+ error: errorLog
106
+ };
107
+ const elements = parseResult.elements || [];
108
+ const dump = createServiceDump({
109
+ ...dumpData,
110
+ matchedElement: elements
111
+ });
112
+ if (errorLog) throw new ServiceError(errorLog, dump);
113
+ if (elements.length > 1) throw new ServiceError(`locate: multiple elements found, length = ${elements.length}`, dump);
114
+ if (1 === elements.length) return {
115
+ element: {
116
+ center: elements[0].center,
117
+ rect: elements[0].rect,
118
+ description: elements[0].description
119
+ },
120
+ rect,
121
+ dump
122
+ };
123
+ return {
124
+ element: null,
125
+ rect,
126
+ dump
127
+ };
128
+ }
129
+ async extract(dataDemand, modelConfig, opt, pageDescription, multimodalPrompt, context) {
130
+ assert(context, 'context is required for extract');
131
+ assert('object' == typeof dataDemand || 'string' == typeof dataDemand, `dataDemand should be object or string, but get ${typeof dataDemand}`);
132
+ const startTime = Date.now();
133
+ let parseResult;
134
+ let rawResponse;
135
+ let usage;
136
+ let reasoning_content;
137
+ try {
138
+ const result = await AiExtractElementInfo({
139
+ context,
140
+ dataQuery: dataDemand,
141
+ multimodalPrompt,
142
+ extractOption: opt,
143
+ modelConfig,
144
+ pageDescription
145
+ });
146
+ parseResult = result.parseResult;
147
+ rawResponse = result.rawResponse;
148
+ usage = result.usage;
149
+ reasoning_content = result.reasoning_content;
150
+ } catch (error) {
151
+ if (error instanceof AIResponseParseError) {
152
+ const timeCost = Date.now() - startTime;
153
+ const taskInfo = {
154
+ ...this.taskInfo ? this.taskInfo : {},
155
+ durationMs: timeCost,
156
+ rawResponse: error.rawResponse,
157
+ usage: error.usage
158
+ };
159
+ const dump = createServiceDump({
160
+ type: 'extract',
161
+ userQuery: {
162
+ dataDemand
163
+ },
164
+ matchedElement: [],
165
+ data: null,
166
+ taskInfo,
167
+ error: error.message
168
+ });
169
+ throw new ServiceError(error.message, dump);
170
+ }
171
+ throw error;
172
+ }
173
+ const timeCost = Date.now() - startTime;
174
+ const taskInfo = {
175
+ ...this.taskInfo ? this.taskInfo : {},
176
+ durationMs: timeCost,
177
+ rawResponse,
178
+ formatResponse: JSON.stringify(parseResult),
179
+ usage,
180
+ reasoning_content
181
+ };
182
+ let errorLog;
183
+ if (parseResult.errors?.length) errorLog = `AI response error: \n${parseResult.errors.join('\n')}`;
184
+ const dumpData = {
185
+ type: 'extract',
186
+ userQuery: {
187
+ dataDemand
188
+ },
189
+ matchedElement: [],
190
+ data: null,
191
+ taskInfo,
192
+ error: errorLog
193
+ };
194
+ const { data, thought } = parseResult || {};
195
+ const dump = createServiceDump({
196
+ ...dumpData,
197
+ data
198
+ });
199
+ if (errorLog && !data) throw new ServiceError(errorLog, dump);
200
+ return {
201
+ data,
202
+ thought,
203
+ usage,
204
+ reasoning_content,
205
+ dump
206
+ };
207
+ }
208
+ async describe(target, modelConfig, opt) {
209
+ assert(target, 'target is required for service.describe');
210
+ const context = await this.contextRetrieverFn();
211
+ const { shotSize } = context;
212
+ const screenshotBase64 = context.screenshot.base64;
213
+ assert(screenshotBase64, 'screenshot is required for service.describe');
214
+ const { modelFamily } = modelConfig;
215
+ const systemPrompt = elementDescriberInstruction();
216
+ const defaultRectSize = 30;
217
+ const targetRect = Array.isArray(target) ? {
218
+ left: Math.floor(target[0] - defaultRectSize / 2),
219
+ top: Math.floor(target[1] - defaultRectSize / 2),
220
+ width: defaultRectSize,
221
+ height: defaultRectSize
222
+ } : target;
223
+ let imagePayload = await compositeElementInfoImg({
224
+ inputImgBase64: screenshotBase64,
225
+ size: shotSize,
226
+ elementsPositionInfo: [
227
+ {
228
+ rect: targetRect
229
+ }
230
+ ],
231
+ borderThickness: 3
232
+ });
233
+ if (opt?.deepLocate) {
234
+ const searchArea = expandSearchArea(targetRect, shotSize);
235
+ debug('describe: cropping to searchArea', searchArea);
236
+ const croppedResult = await cropByRect(imagePayload, searchArea, 'qwen2.5-vl' === modelFamily);
237
+ imagePayload = croppedResult.imageBase64;
238
+ }
239
+ const msgs = [
240
+ {
241
+ role: 'system',
242
+ content: systemPrompt
243
+ },
244
+ {
245
+ role: 'user',
246
+ content: [
247
+ {
248
+ type: 'image_url',
249
+ image_url: {
250
+ url: imagePayload,
251
+ detail: 'high'
252
+ }
253
+ }
254
+ ]
255
+ }
256
+ ];
257
+ const res = await callAIWithObjectResponse(msgs, modelConfig);
258
+ const { content } = res;
259
+ assert(!content.error, `describe failed: ${content.error}`);
260
+ assert(content.description, 'failed to describe the element');
261
+ return content;
262
+ }
263
+ constructor(context, opt){
264
+ _define_property(this, "contextRetrieverFn", void 0);
265
+ _define_property(this, "taskInfo", void 0);
266
+ assert(context, 'context is required for Service');
267
+ if ('function' == typeof context) this.contextRetrieverFn = context;
268
+ else this.contextRetrieverFn = ()=>Promise.resolve(context);
269
+ if (void 0 !== opt?.taskInfo) this.taskInfo = opt.taskInfo;
270
+ }
271
+ }
272
+ export { Service as default };
@@ -0,0 +1,13 @@
1
+ import { uuid } from "@godscene/shared/utils";
2
+ function createServiceDump(data) {
3
+ const baseData = {
4
+ logTime: Date.now()
5
+ };
6
+ const finalData = {
7
+ logId: uuid(),
8
+ ...baseData,
9
+ ...data
10
+ };
11
+ return finalData;
12
+ }
13
+ export { createServiceDump };
@@ -0,0 +1,35 @@
1
+ import { reportCLIError, runToolsCLI } from "@godscene/shared/cli";
2
+ import { BaseMidsceneTools } from "@godscene/shared/mcp/base-tools";
3
+ import { Agent } from "../agent/agent.mjs";
4
+ function _define_property(obj, key, value) {
5
+ if (key in obj) Object.defineProperty(obj, key, {
6
+ value: value,
7
+ enumerable: true,
8
+ configurable: true,
9
+ writable: true
10
+ });
11
+ else obj[key] = value;
12
+ return obj;
13
+ }
14
+ class SkillMidsceneTools extends BaseMidsceneTools {
15
+ createTemporaryDevice() {
16
+ return new this.DeviceClass();
17
+ }
18
+ async ensureAgent() {
19
+ if (!this.agent) {
20
+ const device = new this.DeviceClass();
21
+ this.agent = new Agent(device);
22
+ }
23
+ return this.agent;
24
+ }
25
+ constructor(DeviceClass){
26
+ super(), _define_property(this, "DeviceClass", void 0), this.DeviceClass = DeviceClass;
27
+ }
28
+ }
29
+ function runSkillCLI(options) {
30
+ const tools = new SkillMidsceneTools(options.DeviceClass);
31
+ return runToolsCLI(tools, options.scriptName).catch((e)=>{
32
+ process.exit(reportCLIError(e));
33
+ });
34
+ }
35
+ export { runSkillCLI };
@@ -0,0 +1,261 @@
1
+ import { setTimingFieldOnce } from "./task-timing.mjs";
2
+ import { ExecutionDump } from "./types.mjs";
3
+ import { getDebug } from "@godscene/shared/logger";
4
+ import { assert, uuid } from "@godscene/shared/utils";
5
+ function _define_property(obj, key, value) {
6
+ if (key in obj) Object.defineProperty(obj, key, {
7
+ value: value,
8
+ enumerable: true,
9
+ configurable: true,
10
+ writable: true
11
+ });
12
+ else obj[key] = value;
13
+ return obj;
14
+ }
15
+ const debug = getDebug('task-runner');
16
+ const UI_CONTEXT_CACHE_TTL_MS = 300;
17
+ class TaskRunner {
18
+ async emitOnTaskUpdate(error) {
19
+ if (!this.onTaskUpdate) return;
20
+ await this.onTaskUpdate(this, error);
21
+ }
22
+ async getUiContext(options) {
23
+ const now = Date.now();
24
+ const shouldReuse = !options?.forceRefresh && this.lastUiContext && now - this.lastUiContext.capturedAt <= UI_CONTEXT_CACHE_TTL_MS;
25
+ if (shouldReuse && this.lastUiContext?.context) {
26
+ debug(`reuse cached uiContext captured ${now - this.lastUiContext.capturedAt}ms ago`);
27
+ return this.lastUiContext?.context;
28
+ }
29
+ try {
30
+ const uiContext = await this.uiContextBuilder();
31
+ if (uiContext) this.lastUiContext = {
32
+ context: uiContext,
33
+ capturedAt: Date.now()
34
+ };
35
+ else this.lastUiContext = void 0;
36
+ return uiContext;
37
+ } catch (error) {
38
+ this.lastUiContext = void 0;
39
+ throw error;
40
+ }
41
+ }
42
+ async captureScreenshot() {
43
+ try {
44
+ const uiContext = await this.getUiContext({
45
+ forceRefresh: true
46
+ });
47
+ return uiContext?.screenshot;
48
+ } catch (error) {
49
+ console.error('error while capturing screenshot', error);
50
+ }
51
+ }
52
+ attachRecorderItem(task, screenshot, phase) {
53
+ if (!phase || !screenshot) return;
54
+ const recorderItem = {
55
+ type: 'screenshot',
56
+ ts: Date.now(),
57
+ screenshot,
58
+ timing: phase
59
+ };
60
+ if (!task.recorder) {
61
+ task.recorder = [
62
+ recorderItem
63
+ ];
64
+ return;
65
+ }
66
+ task.recorder.push(recorderItem);
67
+ }
68
+ markTaskAsPending(task) {
69
+ return {
70
+ taskId: uuid(),
71
+ status: 'pending',
72
+ ...task
73
+ };
74
+ }
75
+ normalizeStatusFromError(options, errorMessage) {
76
+ if ('error' !== this.status) return;
77
+ assert(options?.allowWhenError, errorMessage || `task runner is in error state, cannot proceed\nerror=${this.latestErrorTask()?.error}\n${this.latestErrorTask()?.errorStack}`);
78
+ this.status = this.tasks.length > 0 ? 'pending' : 'init';
79
+ }
80
+ async append(task, options) {
81
+ this.normalizeStatusFromError(options, `task runner is in error state, cannot append task\nerror=${this.latestErrorTask()?.error}\n${this.latestErrorTask()?.errorStack}`);
82
+ if (Array.isArray(task)) this.tasks.push(...task.map((item)=>this.markTaskAsPending(item)));
83
+ else this.tasks.push(this.markTaskAsPending(task));
84
+ if ('running' !== this.status) this.status = 'pending';
85
+ await this.emitOnTaskUpdate();
86
+ }
87
+ async appendAndFlush(task, options) {
88
+ await this.append(task, options);
89
+ return this.flush(options);
90
+ }
91
+ async flush(options) {
92
+ if ('init' === this.status && this.tasks.length > 0) console.warn('illegal state for task runner, status is init but tasks are not empty');
93
+ this.normalizeStatusFromError(options, 'task runner is in error state');
94
+ assert('running' !== this.status, 'task runner is already running');
95
+ assert('completed' !== this.status, 'task runner is already completed');
96
+ const nextPendingIndex = this.tasks.findIndex((task)=>'pending' === task.status);
97
+ if (nextPendingIndex < 0) return;
98
+ this.status = 'running';
99
+ await this.emitOnTaskUpdate();
100
+ let taskIndex = nextPendingIndex;
101
+ let successfullyCompleted = true;
102
+ let previousFindOutput;
103
+ while(taskIndex < this.tasks.length){
104
+ const task = this.tasks[taskIndex];
105
+ assert('pending' === task.status, `task status should be pending, but got: ${task.status}`);
106
+ task.timing = {
107
+ start: Date.now()
108
+ };
109
+ try {
110
+ task.status = 'running';
111
+ await this.emitOnTaskUpdate();
112
+ try {
113
+ if (this.onTaskStart) await this.onTaskStart(task);
114
+ } catch (e) {
115
+ console.error('error in onTaskStart', e);
116
+ }
117
+ assert([
118
+ 'Insight',
119
+ 'Action Space',
120
+ 'Planning'
121
+ ].indexOf(task.type) >= 0, `unsupported task type: ${task.type}`);
122
+ const { executor, param } = task;
123
+ assert(executor, `executor is required for task type: ${task.type}`);
124
+ let returnValue;
125
+ const forceRefresh = 'Insight' === task.type;
126
+ setTimingFieldOnce(task.timing, 'getUiContextStart');
127
+ const uiContext = await this.getUiContext({
128
+ forceRefresh
129
+ });
130
+ setTimingFieldOnce(task.timing, 'getUiContextEnd');
131
+ task.uiContext = uiContext;
132
+ const executorContext = {
133
+ task,
134
+ element: previousFindOutput?.element,
135
+ uiContext
136
+ };
137
+ if ('Insight' === task.type) {
138
+ assert('Query' === task.subType || 'Assert' === task.subType || 'WaitFor' === task.subType || 'Boolean' === task.subType || 'Number' === task.subType || 'String' === task.subType, `unsupported service subType: ${task.subType}`);
139
+ returnValue = await task.executor(param, executorContext);
140
+ } else if ('Planning' === task.type) {
141
+ returnValue = await task.executor(param, executorContext);
142
+ if ('Locate' === task.subType) previousFindOutput = returnValue?.output;
143
+ } else if ('Action Space' === task.type) returnValue = await task.executor(param, executorContext);
144
+ else {
145
+ console.warn(`unsupported task type: ${task.type}, will try to execute it directly`);
146
+ returnValue = await task.executor(param, executorContext);
147
+ }
148
+ const isLastTask = taskIndex === this.tasks.length - 1;
149
+ if (isLastTask) {
150
+ setTimingFieldOnce(task.timing, 'captureAfterCallingSnapshotStart');
151
+ const screenshot = await this.captureScreenshot();
152
+ this.attachRecorderItem(task, screenshot, 'after-calling');
153
+ setTimingFieldOnce(task.timing, 'captureAfterCallingSnapshotEnd');
154
+ }
155
+ Object.assign(task, returnValue);
156
+ task.status = 'finished';
157
+ task.timing.end = Date.now();
158
+ task.timing.cost = task.timing.end - task.timing.start;
159
+ await this.emitOnTaskUpdate();
160
+ taskIndex++;
161
+ } catch (e) {
162
+ successfullyCompleted = false;
163
+ task.error = e;
164
+ task.errorMessage = e?.message || ('string' == typeof e ? e : 'error-without-message');
165
+ task.errorStack = e.stack;
166
+ task.status = 'failed';
167
+ task.timing.end = Date.now();
168
+ task.timing.cost = task.timing.end - task.timing.start;
169
+ await this.emitOnTaskUpdate();
170
+ break;
171
+ }
172
+ }
173
+ for(let i = taskIndex + 1; i < this.tasks.length; i++)this.tasks[i].status = 'cancelled';
174
+ if (taskIndex + 1 < this.tasks.length) await this.emitOnTaskUpdate();
175
+ let finalizeError;
176
+ if (successfullyCompleted) {
177
+ this.status = 'completed';
178
+ await this.emitOnTaskUpdate();
179
+ } else {
180
+ this.status = 'error';
181
+ const errorTask = this.latestErrorTask();
182
+ const messageBase = errorTask?.errorMessage || (errorTask?.error ? String(errorTask.error) : 'Task execution failed');
183
+ const stack = errorTask?.errorStack;
184
+ const message = stack ? `${messageBase}\n${stack}` : messageBase;
185
+ finalizeError = new TaskExecutionError(message, this, errorTask, {
186
+ cause: errorTask?.error
187
+ });
188
+ await this.emitOnTaskUpdate(finalizeError);
189
+ }
190
+ if (finalizeError) throw finalizeError;
191
+ if (this.tasks.length) {
192
+ const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
193
+ const { thought, output } = this.tasks[outputIndex];
194
+ return {
195
+ thought,
196
+ output
197
+ };
198
+ }
199
+ }
200
+ isInErrorState() {
201
+ return 'error' === this.status;
202
+ }
203
+ latestErrorTask() {
204
+ if ('error' !== this.status) return null;
205
+ for(let i = this.tasks.length - 1; i >= 0; i--)if ('failed' === this.tasks[i].status) return this.tasks[i];
206
+ return null;
207
+ }
208
+ dump() {
209
+ return new ExecutionDump({
210
+ id: this.id,
211
+ logTime: this.executionLogTime,
212
+ name: this.name,
213
+ tasks: this.tasks
214
+ });
215
+ }
216
+ async appendErrorPlan(errorMsg) {
217
+ const errorTask = {
218
+ type: 'Action Space',
219
+ subType: 'Error',
220
+ param: {
221
+ thought: errorMsg
222
+ },
223
+ thought: errorMsg,
224
+ executor: async ()=>{
225
+ throw new Error(errorMsg || 'error without thought');
226
+ }
227
+ };
228
+ await this.appendAndFlush(errorTask);
229
+ return {
230
+ output: void 0,
231
+ runner: this
232
+ };
233
+ }
234
+ constructor(name, uiContextBuilder, options){
235
+ _define_property(this, "id", void 0);
236
+ _define_property(this, "name", void 0);
237
+ _define_property(this, "tasks", void 0);
238
+ _define_property(this, "status", void 0);
239
+ _define_property(this, "onTaskStart", void 0);
240
+ _define_property(this, "uiContextBuilder", void 0);
241
+ _define_property(this, "onTaskUpdate", void 0);
242
+ _define_property(this, "executionLogTime", void 0);
243
+ _define_property(this, "lastUiContext", void 0);
244
+ this.id = uuid();
245
+ this.status = options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';
246
+ this.name = name;
247
+ this.tasks = (options?.tasks || []).map((item)=>this.markTaskAsPending(item));
248
+ this.onTaskStart = options?.onTaskStart;
249
+ this.uiContextBuilder = uiContextBuilder;
250
+ this.onTaskUpdate = options?.onTaskUpdate;
251
+ this.executionLogTime = Date.now();
252
+ }
253
+ }
254
+ class TaskExecutionError extends Error {
255
+ constructor(message, runner, errorTask, options){
256
+ super(message, options), _define_property(this, "runner", void 0), _define_property(this, "errorTask", void 0);
257
+ this.runner = runner;
258
+ this.errorTask = errorTask;
259
+ }
260
+ }
261
+ export { TaskExecutionError, TaskRunner };
@@ -0,0 +1,10 @@
1
+ import { getDebug } from "@godscene/shared/logger";
2
+ const debugTiming = getDebug('task-timing');
3
+ function setTimingFieldOnce(timing, field) {
4
+ if (!timing) return void debugTiming(`[warning] timing object missing, skip set. field=${field}`);
5
+ const value = Date.now();
6
+ const existingValue = timing[field];
7
+ if (void 0 !== existingValue) return void debugTiming(`[warning] duplicate timing field set ignored. field=${field}, existing=${existingValue}, incoming=${value}`);
8
+ timing[field] = value;
9
+ }
10
+ export { setTimingFieldOnce };
@@ -0,0 +1,11 @@
1
+ import { descriptionOfTree, treeToList, trimAttributes, truncateText } from "@godscene/shared/extractor";
2
+ const ELEMENT_COUNT_WARNING_THRESHOLD = 5000;
3
+ const TREE_SIZE_WARNING_MESSAGE = 'The number of elements is too large, it may cause the prompt to be too long, please use domIncluded: "visible-only" to reduce the number of elements';
4
+ function tree_descriptionOfTree(tree, truncateTextLength, filterNonTextContent = false, visibleOnly = true) {
5
+ if (!visibleOnly) {
6
+ const flatElements = treeToList(tree);
7
+ if (flatElements.length >= ELEMENT_COUNT_WARNING_THRESHOLD) console.warn(TREE_SIZE_WARNING_MESSAGE);
8
+ }
9
+ return descriptionOfTree(tree, truncateTextLength, filterNonTextContent, visibleOnly);
10
+ }
11
+ export { tree_descriptionOfTree as descriptionOfTree, trimAttributes, truncateText };