@godscene/core 1.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +767 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/execution-session.mjs +39 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-builder.mjs +343 -0
- package/dist/es/agent/task-cache.mjs +212 -0
- package/dist/es/agent/tasks.mjs +428 -0
- package/dist/es/agent/ui-utils.mjs +101 -0
- package/dist/es/agent/utils.mjs +167 -0
- package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
- package/dist/es/ai-model/auto-glm/util.mjs +7 -0
- package/dist/es/ai-model/connectivity.mjs +136 -0
- package/dist/es/ai-model/conversation-history.mjs +193 -0
- package/dist/es/ai-model/index.mjs +12 -0
- package/dist/es/ai-model/inspect.mjs +395 -0
- package/dist/es/ai-model/llm-planning.mjs +231 -0
- package/dist/es/ai-model/prompt/common.mjs +5 -0
- package/dist/es/ai-model/prompt/describe.mjs +64 -0
- package/dist/es/ai-model/prompt/extraction.mjs +129 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
- package/dist/es/ai-model/prompt/util.mjs +57 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
- package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
- package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
- package/dist/es/ai-model/service-caller/index.mjs +648 -0
- package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
- package/dist/es/common.mjs +382 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +340 -0
- package/dist/es/dump/html-utils.mjs +290 -0
- package/dist/es/dump/index.mjs +3 -0
- package/dist/es/dump/screenshot-restoration.mjs +30 -0
- package/dist/es/dump/screenshot-store.mjs +125 -0
- package/dist/es/index.mjs +17 -0
- package/dist/es/report-cli.mjs +149 -0
- package/dist/es/report-generator.mjs +203 -0
- package/dist/es/report-markdown.mjs +216 -0
- package/dist/es/report.mjs +287 -0
- package/dist/es/screenshot-item.mjs +120 -0
- package/dist/es/service/index.mjs +272 -0
- package/dist/es/service/utils.mjs +13 -0
- package/dist/es/skill/index.mjs +35 -0
- package/dist/es/task-runner.mjs +261 -0
- package/dist/es/task-timing.mjs +10 -0
- package/dist/es/tree.mjs +11 -0
- package/dist/es/types.mjs +202 -0
- package/dist/es/utils.mjs +232 -0
- package/dist/es/yaml/builder.mjs +11 -0
- package/dist/es/yaml/index.mjs +4 -0
- package/dist/es/yaml/player.mjs +425 -0
- package/dist/es/yaml/utils.mjs +100 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +815 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/execution-session.js +73 -0
- package/dist/lib/agent/index.js +76 -0
- package/dist/lib/agent/task-builder.js +380 -0
- package/dist/lib/agent/task-cache.js +264 -0
- package/dist/lib/agent/tasks.js +471 -0
- package/dist/lib/agent/ui-utils.js +153 -0
- package/dist/lib/agent/utils.js +238 -0
- package/dist/lib/ai-model/auto-glm/actions.js +271 -0
- package/dist/lib/ai-model/auto-glm/index.js +64 -0
- package/dist/lib/ai-model/auto-glm/parser.js +280 -0
- package/dist/lib/ai-model/auto-glm/planning.js +103 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
- package/dist/lib/ai-model/auto-glm/util.js +44 -0
- package/dist/lib/ai-model/connectivity.js +180 -0
- package/dist/lib/ai-model/conversation-history.js +227 -0
- package/dist/lib/ai-model/index.js +127 -0
- package/dist/lib/ai-model/inspect.js +441 -0
- package/dist/lib/ai-model/llm-planning.js +268 -0
- package/dist/lib/ai-model/prompt/common.js +39 -0
- package/dist/lib/ai-model/prompt/describe.js +98 -0
- package/dist/lib/ai-model/prompt/extraction.js +169 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
- package/dist/lib/ai-model/prompt/util.js +103 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
- package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
- package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
- package/dist/lib/ai-model/service-caller/index.js +716 -0
- package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
- package/dist/lib/ai-model/ui-tars-planning.js +281 -0
- package/dist/lib/common.js +491 -0
- package/dist/lib/device/device-options.js +18 -0
- package/dist/lib/device/index.js +467 -0
- package/dist/lib/dump/html-utils.js +366 -0
- package/dist/lib/dump/index.js +58 -0
- package/dist/lib/dump/screenshot-restoration.js +64 -0
- package/dist/lib/dump/screenshot-store.js +165 -0
- package/dist/lib/index.js +184 -0
- package/dist/lib/report-cli.js +189 -0
- package/dist/lib/report-generator.js +244 -0
- package/dist/lib/report-markdown.js +253 -0
- package/dist/lib/report.js +333 -0
- package/dist/lib/screenshot-item.js +154 -0
- package/dist/lib/service/index.js +306 -0
- package/dist/lib/service/utils.js +47 -0
- package/dist/lib/skill/index.js +69 -0
- package/dist/lib/task-runner.js +298 -0
- package/dist/lib/task-timing.js +44 -0
- package/dist/lib/tree.js +51 -0
- package/dist/lib/types.js +298 -0
- package/dist/lib/utils.js +314 -0
- package/dist/lib/yaml/builder.js +55 -0
- package/dist/lib/yaml/index.js +79 -0
- package/dist/lib/yaml/player.js +459 -0
- package/dist/lib/yaml/utils.js +153 -0
- package/dist/lib/yaml.js +18 -0
- package/dist/types/agent/agent.d.ts +220 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/task-cache.d.ts +49 -0
- package/dist/types/agent/tasks.d.ts +70 -0
- package/dist/types/agent/ui-utils.d.ts +14 -0
- package/dist/types/agent/utils.d.ts +25 -0
- package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
- package/dist/types/ai-model/connectivity.d.ts +20 -0
- package/dist/types/ai-model/conversation-history.d.ts +105 -0
- package/dist/types/ai-model/index.d.ts +16 -0
- package/dist/types/ai-model/inspect.d.ts +67 -0
- package/dist/types/ai-model/llm-planning.d.ts +19 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +33 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
- package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
- package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
- package/dist/types/ai-model/service-caller/index.d.ts +60 -0
- package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
- package/dist/types/common.d.ts +288 -0
- package/dist/types/device/device-options.d.ts +155 -0
- package/dist/types/device/index.d.ts +2565 -0
- package/dist/types/dump/html-utils.d.ts +75 -0
- package/dist/types/dump/index.d.ts +5 -0
- package/dist/types/dump/screenshot-restoration.d.ts +8 -0
- package/dist/types/dump/screenshot-store.d.ts +49 -0
- package/dist/types/index.d.ts +21 -0
- package/dist/types/report-cli.d.ts +36 -0
- package/dist/types/report-generator.d.ts +88 -0
- package/dist/types/report-markdown.d.ts +24 -0
- package/dist/types/report.d.ts +52 -0
- package/dist/types/screenshot-item.d.ts +67 -0
- package/dist/types/service/index.d.ts +24 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/skill/index.d.ts +25 -0
- package/dist/types/task-runner.d.ts +50 -0
- package/dist/types/task-timing.d.ts +8 -0
- package/dist/types/tree.d.ts +4 -0
- package/dist/types/types.d.ts +684 -0
- package/dist/types/utils.d.ts +45 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +4 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +215 -0
- package/package.json +130 -0
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
import { isAutoGLM } from "../ai-model/auto-glm/util.mjs";
|
|
2
|
+
import { AIResponseParseError, AiExtractElementInfo, AiLocateElement, callAIWithObjectResponse } from "../ai-model/index.mjs";
|
|
3
|
+
import { AiLocateSection, buildSearchAreaConfig } from "../ai-model/inspect.mjs";
|
|
4
|
+
import { elementDescriberInstruction } from "../ai-model/prompt/describe.mjs";
|
|
5
|
+
import { expandSearchArea } from "../common.mjs";
|
|
6
|
+
import { ServiceError } from "../types.mjs";
|
|
7
|
+
import { compositeElementInfoImg, cropByRect } from "@godscene/shared/img";
|
|
8
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
9
|
+
import { assert } from "@godscene/shared/utils";
|
|
10
|
+
import { createServiceDump } from "./utils.mjs";
|
|
11
|
+
function _define_property(obj, key, value) {
|
|
12
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
13
|
+
value: value,
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true
|
|
17
|
+
});
|
|
18
|
+
else obj[key] = value;
|
|
19
|
+
return obj;
|
|
20
|
+
}
|
|
21
|
+
const debug = getDebug('ai:service');
|
|
22
|
+
class Service {
|
|
23
|
+
async locate(query, opt, modelConfig, abortSignal) {
|
|
24
|
+
const queryPrompt = 'string' == typeof query ? query : query.prompt;
|
|
25
|
+
assert(queryPrompt, 'query is required for locate');
|
|
26
|
+
assert('object' == typeof query, 'query should be an object for locate');
|
|
27
|
+
const hasPlanLocatedElement = !!opt?.planLocatedElement?.rect;
|
|
28
|
+
let searchAreaPrompt;
|
|
29
|
+
if (query.deepLocate && !hasPlanLocatedElement) searchAreaPrompt = query.prompt;
|
|
30
|
+
const { modelFamily } = modelConfig;
|
|
31
|
+
if (searchAreaPrompt && !modelFamily) {
|
|
32
|
+
console.warn('The "deepLocate" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config');
|
|
33
|
+
searchAreaPrompt = void 0;
|
|
34
|
+
}
|
|
35
|
+
if (searchAreaPrompt && isAutoGLM(modelFamily)) {
|
|
36
|
+
console.warn('The "deepLocate" feature is not supported with AutoGLM.');
|
|
37
|
+
searchAreaPrompt = void 0;
|
|
38
|
+
}
|
|
39
|
+
const context = opt?.context || await this.contextRetrieverFn();
|
|
40
|
+
let searchArea;
|
|
41
|
+
let searchAreaRawResponse;
|
|
42
|
+
let searchAreaUsage;
|
|
43
|
+
let searchAreaResponse;
|
|
44
|
+
if (query.deepLocate && hasPlanLocatedElement) {
|
|
45
|
+
const searchAreaConfig = await buildSearchAreaConfig({
|
|
46
|
+
context,
|
|
47
|
+
baseRect: opt.planLocatedElement.rect,
|
|
48
|
+
modelFamily
|
|
49
|
+
});
|
|
50
|
+
searchArea = searchAreaConfig.rect;
|
|
51
|
+
searchAreaRawResponse = JSON.stringify({
|
|
52
|
+
source: 'plan-located-element',
|
|
53
|
+
rect: opt.planLocatedElement.rect
|
|
54
|
+
});
|
|
55
|
+
searchAreaResponse = {
|
|
56
|
+
rect: searchArea,
|
|
57
|
+
imageBase64: searchAreaConfig.imageBase64,
|
|
58
|
+
scale: searchAreaConfig.scale,
|
|
59
|
+
rawResponse: searchAreaRawResponse
|
|
60
|
+
};
|
|
61
|
+
} else if (searchAreaPrompt) {
|
|
62
|
+
searchAreaResponse = await AiLocateSection({
|
|
63
|
+
context,
|
|
64
|
+
sectionDescription: searchAreaPrompt,
|
|
65
|
+
modelConfig,
|
|
66
|
+
abortSignal
|
|
67
|
+
});
|
|
68
|
+
assert(searchAreaResponse.rect, `cannot find search area for "${searchAreaPrompt}"${searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''}`);
|
|
69
|
+
searchAreaRawResponse = searchAreaResponse.rawResponse;
|
|
70
|
+
searchAreaUsage = searchAreaResponse.usage;
|
|
71
|
+
searchArea = searchAreaResponse.rect;
|
|
72
|
+
}
|
|
73
|
+
const startTime = Date.now();
|
|
74
|
+
const { parseResult, rect, rawResponse, usage, reasoning_content } = await AiLocateElement({
|
|
75
|
+
context,
|
|
76
|
+
targetElementDescription: queryPrompt,
|
|
77
|
+
searchConfig: searchAreaResponse,
|
|
78
|
+
modelConfig,
|
|
79
|
+
abortSignal
|
|
80
|
+
});
|
|
81
|
+
const timeCost = Date.now() - startTime;
|
|
82
|
+
const taskInfo = {
|
|
83
|
+
...this.taskInfo ? this.taskInfo : {},
|
|
84
|
+
durationMs: timeCost,
|
|
85
|
+
rawResponse: JSON.stringify(rawResponse),
|
|
86
|
+
formatResponse: JSON.stringify(parseResult),
|
|
87
|
+
usage,
|
|
88
|
+
searchArea,
|
|
89
|
+
searchAreaRawResponse,
|
|
90
|
+
searchAreaUsage,
|
|
91
|
+
reasoning_content
|
|
92
|
+
};
|
|
93
|
+
let errorLog;
|
|
94
|
+
if (parseResult.errors?.length) errorLog = `failed to locate element: \n${parseResult.errors.join('\n')}`;
|
|
95
|
+
const dumpData = {
|
|
96
|
+
type: 'locate',
|
|
97
|
+
userQuery: {
|
|
98
|
+
element: queryPrompt
|
|
99
|
+
},
|
|
100
|
+
matchedElement: [],
|
|
101
|
+
matchedRect: rect,
|
|
102
|
+
data: null,
|
|
103
|
+
taskInfo,
|
|
104
|
+
deepLocate: !!searchArea,
|
|
105
|
+
error: errorLog
|
|
106
|
+
};
|
|
107
|
+
const elements = parseResult.elements || [];
|
|
108
|
+
const dump = createServiceDump({
|
|
109
|
+
...dumpData,
|
|
110
|
+
matchedElement: elements
|
|
111
|
+
});
|
|
112
|
+
if (errorLog) throw new ServiceError(errorLog, dump);
|
|
113
|
+
if (elements.length > 1) throw new ServiceError(`locate: multiple elements found, length = ${elements.length}`, dump);
|
|
114
|
+
if (1 === elements.length) return {
|
|
115
|
+
element: {
|
|
116
|
+
center: elements[0].center,
|
|
117
|
+
rect: elements[0].rect,
|
|
118
|
+
description: elements[0].description
|
|
119
|
+
},
|
|
120
|
+
rect,
|
|
121
|
+
dump
|
|
122
|
+
};
|
|
123
|
+
return {
|
|
124
|
+
element: null,
|
|
125
|
+
rect,
|
|
126
|
+
dump
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
async extract(dataDemand, modelConfig, opt, pageDescription, multimodalPrompt, context) {
|
|
130
|
+
assert(context, 'context is required for extract');
|
|
131
|
+
assert('object' == typeof dataDemand || 'string' == typeof dataDemand, `dataDemand should be object or string, but get ${typeof dataDemand}`);
|
|
132
|
+
const startTime = Date.now();
|
|
133
|
+
let parseResult;
|
|
134
|
+
let rawResponse;
|
|
135
|
+
let usage;
|
|
136
|
+
let reasoning_content;
|
|
137
|
+
try {
|
|
138
|
+
const result = await AiExtractElementInfo({
|
|
139
|
+
context,
|
|
140
|
+
dataQuery: dataDemand,
|
|
141
|
+
multimodalPrompt,
|
|
142
|
+
extractOption: opt,
|
|
143
|
+
modelConfig,
|
|
144
|
+
pageDescription
|
|
145
|
+
});
|
|
146
|
+
parseResult = result.parseResult;
|
|
147
|
+
rawResponse = result.rawResponse;
|
|
148
|
+
usage = result.usage;
|
|
149
|
+
reasoning_content = result.reasoning_content;
|
|
150
|
+
} catch (error) {
|
|
151
|
+
if (error instanceof AIResponseParseError) {
|
|
152
|
+
const timeCost = Date.now() - startTime;
|
|
153
|
+
const taskInfo = {
|
|
154
|
+
...this.taskInfo ? this.taskInfo : {},
|
|
155
|
+
durationMs: timeCost,
|
|
156
|
+
rawResponse: error.rawResponse,
|
|
157
|
+
usage: error.usage
|
|
158
|
+
};
|
|
159
|
+
const dump = createServiceDump({
|
|
160
|
+
type: 'extract',
|
|
161
|
+
userQuery: {
|
|
162
|
+
dataDemand
|
|
163
|
+
},
|
|
164
|
+
matchedElement: [],
|
|
165
|
+
data: null,
|
|
166
|
+
taskInfo,
|
|
167
|
+
error: error.message
|
|
168
|
+
});
|
|
169
|
+
throw new ServiceError(error.message, dump);
|
|
170
|
+
}
|
|
171
|
+
throw error;
|
|
172
|
+
}
|
|
173
|
+
const timeCost = Date.now() - startTime;
|
|
174
|
+
const taskInfo = {
|
|
175
|
+
...this.taskInfo ? this.taskInfo : {},
|
|
176
|
+
durationMs: timeCost,
|
|
177
|
+
rawResponse,
|
|
178
|
+
formatResponse: JSON.stringify(parseResult),
|
|
179
|
+
usage,
|
|
180
|
+
reasoning_content
|
|
181
|
+
};
|
|
182
|
+
let errorLog;
|
|
183
|
+
if (parseResult.errors?.length) errorLog = `AI response error: \n${parseResult.errors.join('\n')}`;
|
|
184
|
+
const dumpData = {
|
|
185
|
+
type: 'extract',
|
|
186
|
+
userQuery: {
|
|
187
|
+
dataDemand
|
|
188
|
+
},
|
|
189
|
+
matchedElement: [],
|
|
190
|
+
data: null,
|
|
191
|
+
taskInfo,
|
|
192
|
+
error: errorLog
|
|
193
|
+
};
|
|
194
|
+
const { data, thought } = parseResult || {};
|
|
195
|
+
const dump = createServiceDump({
|
|
196
|
+
...dumpData,
|
|
197
|
+
data
|
|
198
|
+
});
|
|
199
|
+
if (errorLog && !data) throw new ServiceError(errorLog, dump);
|
|
200
|
+
return {
|
|
201
|
+
data,
|
|
202
|
+
thought,
|
|
203
|
+
usage,
|
|
204
|
+
reasoning_content,
|
|
205
|
+
dump
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
async describe(target, modelConfig, opt) {
|
|
209
|
+
assert(target, 'target is required for service.describe');
|
|
210
|
+
const context = await this.contextRetrieverFn();
|
|
211
|
+
const { shotSize } = context;
|
|
212
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
213
|
+
assert(screenshotBase64, 'screenshot is required for service.describe');
|
|
214
|
+
const { modelFamily } = modelConfig;
|
|
215
|
+
const systemPrompt = elementDescriberInstruction();
|
|
216
|
+
const defaultRectSize = 30;
|
|
217
|
+
const targetRect = Array.isArray(target) ? {
|
|
218
|
+
left: Math.floor(target[0] - defaultRectSize / 2),
|
|
219
|
+
top: Math.floor(target[1] - defaultRectSize / 2),
|
|
220
|
+
width: defaultRectSize,
|
|
221
|
+
height: defaultRectSize
|
|
222
|
+
} : target;
|
|
223
|
+
let imagePayload = await compositeElementInfoImg({
|
|
224
|
+
inputImgBase64: screenshotBase64,
|
|
225
|
+
size: shotSize,
|
|
226
|
+
elementsPositionInfo: [
|
|
227
|
+
{
|
|
228
|
+
rect: targetRect
|
|
229
|
+
}
|
|
230
|
+
],
|
|
231
|
+
borderThickness: 3
|
|
232
|
+
});
|
|
233
|
+
if (opt?.deepLocate) {
|
|
234
|
+
const searchArea = expandSearchArea(targetRect, shotSize);
|
|
235
|
+
debug('describe: cropping to searchArea', searchArea);
|
|
236
|
+
const croppedResult = await cropByRect(imagePayload, searchArea, 'qwen2.5-vl' === modelFamily);
|
|
237
|
+
imagePayload = croppedResult.imageBase64;
|
|
238
|
+
}
|
|
239
|
+
const msgs = [
|
|
240
|
+
{
|
|
241
|
+
role: 'system',
|
|
242
|
+
content: systemPrompt
|
|
243
|
+
},
|
|
244
|
+
{
|
|
245
|
+
role: 'user',
|
|
246
|
+
content: [
|
|
247
|
+
{
|
|
248
|
+
type: 'image_url',
|
|
249
|
+
image_url: {
|
|
250
|
+
url: imagePayload,
|
|
251
|
+
detail: 'high'
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
]
|
|
255
|
+
}
|
|
256
|
+
];
|
|
257
|
+
const res = await callAIWithObjectResponse(msgs, modelConfig);
|
|
258
|
+
const { content } = res;
|
|
259
|
+
assert(!content.error, `describe failed: ${content.error}`);
|
|
260
|
+
assert(content.description, 'failed to describe the element');
|
|
261
|
+
return content;
|
|
262
|
+
}
|
|
263
|
+
constructor(context, opt){
|
|
264
|
+
_define_property(this, "contextRetrieverFn", void 0);
|
|
265
|
+
_define_property(this, "taskInfo", void 0);
|
|
266
|
+
assert(context, 'context is required for Service');
|
|
267
|
+
if ('function' == typeof context) this.contextRetrieverFn = context;
|
|
268
|
+
else this.contextRetrieverFn = ()=>Promise.resolve(context);
|
|
269
|
+
if (void 0 !== opt?.taskInfo) this.taskInfo = opt.taskInfo;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
export { Service as default };
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { uuid } from "@godscene/shared/utils";
|
|
2
|
+
function createServiceDump(data) {
|
|
3
|
+
const baseData = {
|
|
4
|
+
logTime: Date.now()
|
|
5
|
+
};
|
|
6
|
+
const finalData = {
|
|
7
|
+
logId: uuid(),
|
|
8
|
+
...baseData,
|
|
9
|
+
...data
|
|
10
|
+
};
|
|
11
|
+
return finalData;
|
|
12
|
+
}
|
|
13
|
+
export { createServiceDump };
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { reportCLIError, runToolsCLI } from "@godscene/shared/cli";
|
|
2
|
+
import { BaseMidsceneTools } from "@godscene/shared/mcp/base-tools";
|
|
3
|
+
import { Agent } from "../agent/agent.mjs";
|
|
4
|
+
function _define_property(obj, key, value) {
|
|
5
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
6
|
+
value: value,
|
|
7
|
+
enumerable: true,
|
|
8
|
+
configurable: true,
|
|
9
|
+
writable: true
|
|
10
|
+
});
|
|
11
|
+
else obj[key] = value;
|
|
12
|
+
return obj;
|
|
13
|
+
}
|
|
14
|
+
class SkillMidsceneTools extends BaseMidsceneTools {
|
|
15
|
+
createTemporaryDevice() {
|
|
16
|
+
return new this.DeviceClass();
|
|
17
|
+
}
|
|
18
|
+
async ensureAgent() {
|
|
19
|
+
if (!this.agent) {
|
|
20
|
+
const device = new this.DeviceClass();
|
|
21
|
+
this.agent = new Agent(device);
|
|
22
|
+
}
|
|
23
|
+
return this.agent;
|
|
24
|
+
}
|
|
25
|
+
constructor(DeviceClass){
|
|
26
|
+
super(), _define_property(this, "DeviceClass", void 0), this.DeviceClass = DeviceClass;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
function runSkillCLI(options) {
|
|
30
|
+
const tools = new SkillMidsceneTools(options.DeviceClass);
|
|
31
|
+
return runToolsCLI(tools, options.scriptName).catch((e)=>{
|
|
32
|
+
process.exit(reportCLIError(e));
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
export { runSkillCLI };
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import { setTimingFieldOnce } from "./task-timing.mjs";
|
|
2
|
+
import { ExecutionDump } from "./types.mjs";
|
|
3
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
4
|
+
import { assert, uuid } from "@godscene/shared/utils";
|
|
5
|
+
function _define_property(obj, key, value) {
|
|
6
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
7
|
+
value: value,
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true
|
|
11
|
+
});
|
|
12
|
+
else obj[key] = value;
|
|
13
|
+
return obj;
|
|
14
|
+
}
|
|
15
|
+
const debug = getDebug('task-runner');
|
|
16
|
+
const UI_CONTEXT_CACHE_TTL_MS = 300;
|
|
17
|
+
class TaskRunner {
|
|
18
|
+
async emitOnTaskUpdate(error) {
|
|
19
|
+
if (!this.onTaskUpdate) return;
|
|
20
|
+
await this.onTaskUpdate(this, error);
|
|
21
|
+
}
|
|
22
|
+
async getUiContext(options) {
|
|
23
|
+
const now = Date.now();
|
|
24
|
+
const shouldReuse = !options?.forceRefresh && this.lastUiContext && now - this.lastUiContext.capturedAt <= UI_CONTEXT_CACHE_TTL_MS;
|
|
25
|
+
if (shouldReuse && this.lastUiContext?.context) {
|
|
26
|
+
debug(`reuse cached uiContext captured ${now - this.lastUiContext.capturedAt}ms ago`);
|
|
27
|
+
return this.lastUiContext?.context;
|
|
28
|
+
}
|
|
29
|
+
try {
|
|
30
|
+
const uiContext = await this.uiContextBuilder();
|
|
31
|
+
if (uiContext) this.lastUiContext = {
|
|
32
|
+
context: uiContext,
|
|
33
|
+
capturedAt: Date.now()
|
|
34
|
+
};
|
|
35
|
+
else this.lastUiContext = void 0;
|
|
36
|
+
return uiContext;
|
|
37
|
+
} catch (error) {
|
|
38
|
+
this.lastUiContext = void 0;
|
|
39
|
+
throw error;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async captureScreenshot() {
|
|
43
|
+
try {
|
|
44
|
+
const uiContext = await this.getUiContext({
|
|
45
|
+
forceRefresh: true
|
|
46
|
+
});
|
|
47
|
+
return uiContext?.screenshot;
|
|
48
|
+
} catch (error) {
|
|
49
|
+
console.error('error while capturing screenshot', error);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
attachRecorderItem(task, screenshot, phase) {
|
|
53
|
+
if (!phase || !screenshot) return;
|
|
54
|
+
const recorderItem = {
|
|
55
|
+
type: 'screenshot',
|
|
56
|
+
ts: Date.now(),
|
|
57
|
+
screenshot,
|
|
58
|
+
timing: phase
|
|
59
|
+
};
|
|
60
|
+
if (!task.recorder) {
|
|
61
|
+
task.recorder = [
|
|
62
|
+
recorderItem
|
|
63
|
+
];
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
task.recorder.push(recorderItem);
|
|
67
|
+
}
|
|
68
|
+
markTaskAsPending(task) {
|
|
69
|
+
return {
|
|
70
|
+
taskId: uuid(),
|
|
71
|
+
status: 'pending',
|
|
72
|
+
...task
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
normalizeStatusFromError(options, errorMessage) {
|
|
76
|
+
if ('error' !== this.status) return;
|
|
77
|
+
assert(options?.allowWhenError, errorMessage || `task runner is in error state, cannot proceed\nerror=${this.latestErrorTask()?.error}\n${this.latestErrorTask()?.errorStack}`);
|
|
78
|
+
this.status = this.tasks.length > 0 ? 'pending' : 'init';
|
|
79
|
+
}
|
|
80
|
+
async append(task, options) {
|
|
81
|
+
this.normalizeStatusFromError(options, `task runner is in error state, cannot append task\nerror=${this.latestErrorTask()?.error}\n${this.latestErrorTask()?.errorStack}`);
|
|
82
|
+
if (Array.isArray(task)) this.tasks.push(...task.map((item)=>this.markTaskAsPending(item)));
|
|
83
|
+
else this.tasks.push(this.markTaskAsPending(task));
|
|
84
|
+
if ('running' !== this.status) this.status = 'pending';
|
|
85
|
+
await this.emitOnTaskUpdate();
|
|
86
|
+
}
|
|
87
|
+
async appendAndFlush(task, options) {
|
|
88
|
+
await this.append(task, options);
|
|
89
|
+
return this.flush(options);
|
|
90
|
+
}
|
|
91
|
+
async flush(options) {
|
|
92
|
+
if ('init' === this.status && this.tasks.length > 0) console.warn('illegal state for task runner, status is init but tasks are not empty');
|
|
93
|
+
this.normalizeStatusFromError(options, 'task runner is in error state');
|
|
94
|
+
assert('running' !== this.status, 'task runner is already running');
|
|
95
|
+
assert('completed' !== this.status, 'task runner is already completed');
|
|
96
|
+
const nextPendingIndex = this.tasks.findIndex((task)=>'pending' === task.status);
|
|
97
|
+
if (nextPendingIndex < 0) return;
|
|
98
|
+
this.status = 'running';
|
|
99
|
+
await this.emitOnTaskUpdate();
|
|
100
|
+
let taskIndex = nextPendingIndex;
|
|
101
|
+
let successfullyCompleted = true;
|
|
102
|
+
let previousFindOutput;
|
|
103
|
+
while(taskIndex < this.tasks.length){
|
|
104
|
+
const task = this.tasks[taskIndex];
|
|
105
|
+
assert('pending' === task.status, `task status should be pending, but got: ${task.status}`);
|
|
106
|
+
task.timing = {
|
|
107
|
+
start: Date.now()
|
|
108
|
+
};
|
|
109
|
+
try {
|
|
110
|
+
task.status = 'running';
|
|
111
|
+
await this.emitOnTaskUpdate();
|
|
112
|
+
try {
|
|
113
|
+
if (this.onTaskStart) await this.onTaskStart(task);
|
|
114
|
+
} catch (e) {
|
|
115
|
+
console.error('error in onTaskStart', e);
|
|
116
|
+
}
|
|
117
|
+
assert([
|
|
118
|
+
'Insight',
|
|
119
|
+
'Action Space',
|
|
120
|
+
'Planning'
|
|
121
|
+
].indexOf(task.type) >= 0, `unsupported task type: ${task.type}`);
|
|
122
|
+
const { executor, param } = task;
|
|
123
|
+
assert(executor, `executor is required for task type: ${task.type}`);
|
|
124
|
+
let returnValue;
|
|
125
|
+
const forceRefresh = 'Insight' === task.type;
|
|
126
|
+
setTimingFieldOnce(task.timing, 'getUiContextStart');
|
|
127
|
+
const uiContext = await this.getUiContext({
|
|
128
|
+
forceRefresh
|
|
129
|
+
});
|
|
130
|
+
setTimingFieldOnce(task.timing, 'getUiContextEnd');
|
|
131
|
+
task.uiContext = uiContext;
|
|
132
|
+
const executorContext = {
|
|
133
|
+
task,
|
|
134
|
+
element: previousFindOutput?.element,
|
|
135
|
+
uiContext
|
|
136
|
+
};
|
|
137
|
+
if ('Insight' === task.type) {
|
|
138
|
+
assert('Query' === task.subType || 'Assert' === task.subType || 'WaitFor' === task.subType || 'Boolean' === task.subType || 'Number' === task.subType || 'String' === task.subType, `unsupported service subType: ${task.subType}`);
|
|
139
|
+
returnValue = await task.executor(param, executorContext);
|
|
140
|
+
} else if ('Planning' === task.type) {
|
|
141
|
+
returnValue = await task.executor(param, executorContext);
|
|
142
|
+
if ('Locate' === task.subType) previousFindOutput = returnValue?.output;
|
|
143
|
+
} else if ('Action Space' === task.type) returnValue = await task.executor(param, executorContext);
|
|
144
|
+
else {
|
|
145
|
+
console.warn(`unsupported task type: ${task.type}, will try to execute it directly`);
|
|
146
|
+
returnValue = await task.executor(param, executorContext);
|
|
147
|
+
}
|
|
148
|
+
const isLastTask = taskIndex === this.tasks.length - 1;
|
|
149
|
+
if (isLastTask) {
|
|
150
|
+
setTimingFieldOnce(task.timing, 'captureAfterCallingSnapshotStart');
|
|
151
|
+
const screenshot = await this.captureScreenshot();
|
|
152
|
+
this.attachRecorderItem(task, screenshot, 'after-calling');
|
|
153
|
+
setTimingFieldOnce(task.timing, 'captureAfterCallingSnapshotEnd');
|
|
154
|
+
}
|
|
155
|
+
Object.assign(task, returnValue);
|
|
156
|
+
task.status = 'finished';
|
|
157
|
+
task.timing.end = Date.now();
|
|
158
|
+
task.timing.cost = task.timing.end - task.timing.start;
|
|
159
|
+
await this.emitOnTaskUpdate();
|
|
160
|
+
taskIndex++;
|
|
161
|
+
} catch (e) {
|
|
162
|
+
successfullyCompleted = false;
|
|
163
|
+
task.error = e;
|
|
164
|
+
task.errorMessage = e?.message || ('string' == typeof e ? e : 'error-without-message');
|
|
165
|
+
task.errorStack = e.stack;
|
|
166
|
+
task.status = 'failed';
|
|
167
|
+
task.timing.end = Date.now();
|
|
168
|
+
task.timing.cost = task.timing.end - task.timing.start;
|
|
169
|
+
await this.emitOnTaskUpdate();
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
for(let i = taskIndex + 1; i < this.tasks.length; i++)this.tasks[i].status = 'cancelled';
|
|
174
|
+
if (taskIndex + 1 < this.tasks.length) await this.emitOnTaskUpdate();
|
|
175
|
+
let finalizeError;
|
|
176
|
+
if (successfullyCompleted) {
|
|
177
|
+
this.status = 'completed';
|
|
178
|
+
await this.emitOnTaskUpdate();
|
|
179
|
+
} else {
|
|
180
|
+
this.status = 'error';
|
|
181
|
+
const errorTask = this.latestErrorTask();
|
|
182
|
+
const messageBase = errorTask?.errorMessage || (errorTask?.error ? String(errorTask.error) : 'Task execution failed');
|
|
183
|
+
const stack = errorTask?.errorStack;
|
|
184
|
+
const message = stack ? `${messageBase}\n${stack}` : messageBase;
|
|
185
|
+
finalizeError = new TaskExecutionError(message, this, errorTask, {
|
|
186
|
+
cause: errorTask?.error
|
|
187
|
+
});
|
|
188
|
+
await this.emitOnTaskUpdate(finalizeError);
|
|
189
|
+
}
|
|
190
|
+
if (finalizeError) throw finalizeError;
|
|
191
|
+
if (this.tasks.length) {
|
|
192
|
+
const outputIndex = Math.min(taskIndex, this.tasks.length - 1);
|
|
193
|
+
const { thought, output } = this.tasks[outputIndex];
|
|
194
|
+
return {
|
|
195
|
+
thought,
|
|
196
|
+
output
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
isInErrorState() {
|
|
201
|
+
return 'error' === this.status;
|
|
202
|
+
}
|
|
203
|
+
latestErrorTask() {
|
|
204
|
+
if ('error' !== this.status) return null;
|
|
205
|
+
for(let i = this.tasks.length - 1; i >= 0; i--)if ('failed' === this.tasks[i].status) return this.tasks[i];
|
|
206
|
+
return null;
|
|
207
|
+
}
|
|
208
|
+
dump() {
|
|
209
|
+
return new ExecutionDump({
|
|
210
|
+
id: this.id,
|
|
211
|
+
logTime: this.executionLogTime,
|
|
212
|
+
name: this.name,
|
|
213
|
+
tasks: this.tasks
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
async appendErrorPlan(errorMsg) {
|
|
217
|
+
const errorTask = {
|
|
218
|
+
type: 'Action Space',
|
|
219
|
+
subType: 'Error',
|
|
220
|
+
param: {
|
|
221
|
+
thought: errorMsg
|
|
222
|
+
},
|
|
223
|
+
thought: errorMsg,
|
|
224
|
+
executor: async ()=>{
|
|
225
|
+
throw new Error(errorMsg || 'error without thought');
|
|
226
|
+
}
|
|
227
|
+
};
|
|
228
|
+
await this.appendAndFlush(errorTask);
|
|
229
|
+
return {
|
|
230
|
+
output: void 0,
|
|
231
|
+
runner: this
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
constructor(name, uiContextBuilder, options){
|
|
235
|
+
_define_property(this, "id", void 0);
|
|
236
|
+
_define_property(this, "name", void 0);
|
|
237
|
+
_define_property(this, "tasks", void 0);
|
|
238
|
+
_define_property(this, "status", void 0);
|
|
239
|
+
_define_property(this, "onTaskStart", void 0);
|
|
240
|
+
_define_property(this, "uiContextBuilder", void 0);
|
|
241
|
+
_define_property(this, "onTaskUpdate", void 0);
|
|
242
|
+
_define_property(this, "executionLogTime", void 0);
|
|
243
|
+
_define_property(this, "lastUiContext", void 0);
|
|
244
|
+
this.id = uuid();
|
|
245
|
+
this.status = options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';
|
|
246
|
+
this.name = name;
|
|
247
|
+
this.tasks = (options?.tasks || []).map((item)=>this.markTaskAsPending(item));
|
|
248
|
+
this.onTaskStart = options?.onTaskStart;
|
|
249
|
+
this.uiContextBuilder = uiContextBuilder;
|
|
250
|
+
this.onTaskUpdate = options?.onTaskUpdate;
|
|
251
|
+
this.executionLogTime = Date.now();
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
class TaskExecutionError extends Error {
|
|
255
|
+
constructor(message, runner, errorTask, options){
|
|
256
|
+
super(message, options), _define_property(this, "runner", void 0), _define_property(this, "errorTask", void 0);
|
|
257
|
+
this.runner = runner;
|
|
258
|
+
this.errorTask = errorTask;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
export { TaskExecutionError, TaskRunner };
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
2
|
+
const debugTiming = getDebug('task-timing');
|
|
3
|
+
function setTimingFieldOnce(timing, field) {
|
|
4
|
+
if (!timing) return void debugTiming(`[warning] timing object missing, skip set. field=${field}`);
|
|
5
|
+
const value = Date.now();
|
|
6
|
+
const existingValue = timing[field];
|
|
7
|
+
if (void 0 !== existingValue) return void debugTiming(`[warning] duplicate timing field set ignored. field=${field}, existing=${existingValue}, incoming=${value}`);
|
|
8
|
+
timing[field] = value;
|
|
9
|
+
}
|
|
10
|
+
export { setTimingFieldOnce };
|
package/dist/es/tree.mjs
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { descriptionOfTree, treeToList, trimAttributes, truncateText } from "@godscene/shared/extractor";
|
|
2
|
+
const ELEMENT_COUNT_WARNING_THRESHOLD = 5000;
|
|
3
|
+
const TREE_SIZE_WARNING_MESSAGE = 'The number of elements is too large, it may cause the prompt to be too long, please use domIncluded: "visible-only" to reduce the number of elements';
|
|
4
|
+
function tree_descriptionOfTree(tree, truncateTextLength, filterNonTextContent = false, visibleOnly = true) {
|
|
5
|
+
if (!visibleOnly) {
|
|
6
|
+
const flatElements = treeToList(tree);
|
|
7
|
+
if (flatElements.length >= ELEMENT_COUNT_WARNING_THRESHOLD) console.warn(TREE_SIZE_WARNING_MESSAGE);
|
|
8
|
+
}
|
|
9
|
+
return descriptionOfTree(tree, truncateTextLength, filterNonTextContent, visibleOnly);
|
|
10
|
+
}
|
|
11
|
+
export { tree_descriptionOfTree as descriptionOfTree, trimAttributes, truncateText };
|