@godscene/core 1.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +767 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/execution-session.mjs +39 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-builder.mjs +343 -0
- package/dist/es/agent/task-cache.mjs +212 -0
- package/dist/es/agent/tasks.mjs +428 -0
- package/dist/es/agent/ui-utils.mjs +101 -0
- package/dist/es/agent/utils.mjs +167 -0
- package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
- package/dist/es/ai-model/auto-glm/util.mjs +7 -0
- package/dist/es/ai-model/connectivity.mjs +136 -0
- package/dist/es/ai-model/conversation-history.mjs +193 -0
- package/dist/es/ai-model/index.mjs +12 -0
- package/dist/es/ai-model/inspect.mjs +395 -0
- package/dist/es/ai-model/llm-planning.mjs +231 -0
- package/dist/es/ai-model/prompt/common.mjs +5 -0
- package/dist/es/ai-model/prompt/describe.mjs +64 -0
- package/dist/es/ai-model/prompt/extraction.mjs +129 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
- package/dist/es/ai-model/prompt/util.mjs +57 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
- package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
- package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
- package/dist/es/ai-model/service-caller/index.mjs +648 -0
- package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
- package/dist/es/common.mjs +382 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +340 -0
- package/dist/es/dump/html-utils.mjs +290 -0
- package/dist/es/dump/index.mjs +3 -0
- package/dist/es/dump/screenshot-restoration.mjs +30 -0
- package/dist/es/dump/screenshot-store.mjs +125 -0
- package/dist/es/index.mjs +17 -0
- package/dist/es/report-cli.mjs +149 -0
- package/dist/es/report-generator.mjs +203 -0
- package/dist/es/report-markdown.mjs +216 -0
- package/dist/es/report.mjs +287 -0
- package/dist/es/screenshot-item.mjs +120 -0
- package/dist/es/service/index.mjs +272 -0
- package/dist/es/service/utils.mjs +13 -0
- package/dist/es/skill/index.mjs +35 -0
- package/dist/es/task-runner.mjs +261 -0
- package/dist/es/task-timing.mjs +10 -0
- package/dist/es/tree.mjs +11 -0
- package/dist/es/types.mjs +202 -0
- package/dist/es/utils.mjs +232 -0
- package/dist/es/yaml/builder.mjs +11 -0
- package/dist/es/yaml/index.mjs +4 -0
- package/dist/es/yaml/player.mjs +425 -0
- package/dist/es/yaml/utils.mjs +100 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +815 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/execution-session.js +73 -0
- package/dist/lib/agent/index.js +76 -0
- package/dist/lib/agent/task-builder.js +380 -0
- package/dist/lib/agent/task-cache.js +264 -0
- package/dist/lib/agent/tasks.js +471 -0
- package/dist/lib/agent/ui-utils.js +153 -0
- package/dist/lib/agent/utils.js +238 -0
- package/dist/lib/ai-model/auto-glm/actions.js +271 -0
- package/dist/lib/ai-model/auto-glm/index.js +64 -0
- package/dist/lib/ai-model/auto-glm/parser.js +280 -0
- package/dist/lib/ai-model/auto-glm/planning.js +103 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
- package/dist/lib/ai-model/auto-glm/util.js +44 -0
- package/dist/lib/ai-model/connectivity.js +180 -0
- package/dist/lib/ai-model/conversation-history.js +227 -0
- package/dist/lib/ai-model/index.js +127 -0
- package/dist/lib/ai-model/inspect.js +441 -0
- package/dist/lib/ai-model/llm-planning.js +268 -0
- package/dist/lib/ai-model/prompt/common.js +39 -0
- package/dist/lib/ai-model/prompt/describe.js +98 -0
- package/dist/lib/ai-model/prompt/extraction.js +169 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
- package/dist/lib/ai-model/prompt/util.js +103 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
- package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
- package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
- package/dist/lib/ai-model/service-caller/index.js +716 -0
- package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
- package/dist/lib/ai-model/ui-tars-planning.js +281 -0
- package/dist/lib/common.js +491 -0
- package/dist/lib/device/device-options.js +18 -0
- package/dist/lib/device/index.js +467 -0
- package/dist/lib/dump/html-utils.js +366 -0
- package/dist/lib/dump/index.js +58 -0
- package/dist/lib/dump/screenshot-restoration.js +64 -0
- package/dist/lib/dump/screenshot-store.js +165 -0
- package/dist/lib/index.js +184 -0
- package/dist/lib/report-cli.js +189 -0
- package/dist/lib/report-generator.js +244 -0
- package/dist/lib/report-markdown.js +253 -0
- package/dist/lib/report.js +333 -0
- package/dist/lib/screenshot-item.js +154 -0
- package/dist/lib/service/index.js +306 -0
- package/dist/lib/service/utils.js +47 -0
- package/dist/lib/skill/index.js +69 -0
- package/dist/lib/task-runner.js +298 -0
- package/dist/lib/task-timing.js +44 -0
- package/dist/lib/tree.js +51 -0
- package/dist/lib/types.js +298 -0
- package/dist/lib/utils.js +314 -0
- package/dist/lib/yaml/builder.js +55 -0
- package/dist/lib/yaml/index.js +79 -0
- package/dist/lib/yaml/player.js +459 -0
- package/dist/lib/yaml/utils.js +153 -0
- package/dist/lib/yaml.js +18 -0
- package/dist/types/agent/agent.d.ts +220 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/task-cache.d.ts +49 -0
- package/dist/types/agent/tasks.d.ts +70 -0
- package/dist/types/agent/ui-utils.d.ts +14 -0
- package/dist/types/agent/utils.d.ts +25 -0
- package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
- package/dist/types/ai-model/connectivity.d.ts +20 -0
- package/dist/types/ai-model/conversation-history.d.ts +105 -0
- package/dist/types/ai-model/index.d.ts +16 -0
- package/dist/types/ai-model/inspect.d.ts +67 -0
- package/dist/types/ai-model/llm-planning.d.ts +19 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +33 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
- package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
- package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
- package/dist/types/ai-model/service-caller/index.d.ts +60 -0
- package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
- package/dist/types/common.d.ts +288 -0
- package/dist/types/device/device-options.d.ts +155 -0
- package/dist/types/device/index.d.ts +2565 -0
- package/dist/types/dump/html-utils.d.ts +75 -0
- package/dist/types/dump/index.d.ts +5 -0
- package/dist/types/dump/screenshot-restoration.d.ts +8 -0
- package/dist/types/dump/screenshot-store.d.ts +49 -0
- package/dist/types/index.d.ts +21 -0
- package/dist/types/report-cli.d.ts +36 -0
- package/dist/types/report-generator.d.ts +88 -0
- package/dist/types/report-markdown.d.ts +24 -0
- package/dist/types/report.d.ts +52 -0
- package/dist/types/screenshot-item.d.ts +67 -0
- package/dist/types/service/index.d.ts +24 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/skill/index.d.ts +25 -0
- package/dist/types/task-runner.d.ts +50 -0
- package/dist/types/task-timing.d.ts +8 -0
- package/dist/types/tree.d.ts +4 -0
- package/dist/types/types.d.ts +684 -0
- package/dist/types/utils.d.ts +45 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +4 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +215 -0
- package/package.json +130 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { ScreenshotItem } from "../screenshot-item.mjs";
|
|
2
|
+
import { uploadTestInfoToServer } from "../utils.mjs";
|
|
3
|
+
import { MIDSCENE_REPORT_QUIET, MIDSCENE_REPORT_TAG_NAME, globalConfigManager } from "@godscene/shared/env";
|
|
4
|
+
import { generateElementByRect } from "@godscene/shared/extractor";
|
|
5
|
+
import { imageInfoOfBase64, resizeImgBase64 } from "@godscene/shared/img";
|
|
6
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
7
|
+
import { assert, logMsg, uuid } from "@godscene/shared/utils";
|
|
8
|
+
import dayjs from "dayjs";
|
|
9
|
+
import { debug as external_task_cache_mjs_debug } from "./task-cache.mjs";
|
|
10
|
+
async function commonContextParser(interfaceInstance, _opt) {
|
|
11
|
+
const debug = getDebug('commonContextParser');
|
|
12
|
+
assert(interfaceInstance, 'interfaceInstance is required');
|
|
13
|
+
debug("Getting interface description");
|
|
14
|
+
const description = interfaceInstance.describe?.() || '';
|
|
15
|
+
debug("Interface description end");
|
|
16
|
+
debug('Uploading test info to server');
|
|
17
|
+
uploadTestInfoToServer({
|
|
18
|
+
testUrl: description,
|
|
19
|
+
serverUrl: _opt.uploadServerUrl
|
|
20
|
+
});
|
|
21
|
+
debug('UploadTestInfoToServer end');
|
|
22
|
+
debug('will get size');
|
|
23
|
+
const interfaceSize = await interfaceInstance.size();
|
|
24
|
+
const { width: logicalWidth, height: logicalHeight } = interfaceSize;
|
|
25
|
+
if (interfaceSize.dpr) console.warn('Warning: return value of interface.size() include a dpr property, which is not expected and ignored. ');
|
|
26
|
+
if (!Number.isFinite(logicalWidth) || !Number.isFinite(logicalHeight)) throw new Error(`Invalid interface size: width and height must be finite numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`);
|
|
27
|
+
if (logicalWidth <= 0 || logicalHeight <= 0) throw new Error(`Invalid interface size: width and height must be positive numbers. Received width: ${logicalWidth}, height: ${logicalHeight}`);
|
|
28
|
+
debug(`size: ${logicalWidth}x${logicalHeight}`);
|
|
29
|
+
const screenshotBase64 = await interfaceInstance.screenshotBase64();
|
|
30
|
+
const screenshotCapturedAt = Date.now();
|
|
31
|
+
assert(screenshotBase64, 'screenshotBase64 is required');
|
|
32
|
+
debug('will get screenshot dimensions');
|
|
33
|
+
const { width: imgWidth, height: imgHeight } = await imageInfoOfBase64(screenshotBase64);
|
|
34
|
+
if (!Number.isFinite(imgWidth) || !Number.isFinite(imgHeight)) throw new Error(`Invalid screenshot dimensions: width and height must be finite numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
|
|
35
|
+
if (imgWidth <= 0 || imgHeight <= 0) throw new Error(`Invalid screenshot dimensions: width and height must be positive numbers. Received width: ${imgWidth}, height: ${imgHeight}`);
|
|
36
|
+
debug('screenshot dimensions', imgWidth, 'x', imgHeight);
|
|
37
|
+
const logicalIsPortrait = logicalWidth < logicalHeight;
|
|
38
|
+
const screenshotIsPortrait = imgWidth < imgHeight;
|
|
39
|
+
let finalLogicalWidth = logicalWidth;
|
|
40
|
+
if (logicalIsPortrait !== screenshotIsPortrait) {
|
|
41
|
+
debug(`Orientation mismatch detected: logical size ${logicalWidth}x${logicalHeight} (${logicalIsPortrait ? 'portrait' : 'landscape'}) vs screenshot ${imgWidth}x${imgHeight} (${screenshotIsPortrait ? 'portrait' : 'landscape'}). Swapping logical dimensions.`);
|
|
42
|
+
finalLogicalWidth = logicalHeight;
|
|
43
|
+
}
|
|
44
|
+
const userShrinkFactor = _opt.screenshotShrinkFactor ?? 1;
|
|
45
|
+
if (!Number.isFinite(userShrinkFactor) || userShrinkFactor < 1) throw new Error(`Invalid screenshotShrinkFactor: must be a finite number >= 1. Received: ${userShrinkFactor}`);
|
|
46
|
+
const dpr = imgWidth / finalLogicalWidth;
|
|
47
|
+
debug('calculated dpr:', dpr);
|
|
48
|
+
const shrunkShotToLogicalRatio = dpr / userShrinkFactor;
|
|
49
|
+
debug('shrunkShotToLogicalRatio', shrunkShotToLogicalRatio);
|
|
50
|
+
if (1 !== userShrinkFactor) {
|
|
51
|
+
const targetWidth = Math.round(imgWidth / userShrinkFactor);
|
|
52
|
+
const targetHeight = Math.round(imgHeight / userShrinkFactor);
|
|
53
|
+
debug(`Applying screenshot shrink factor: ${userShrinkFactor} (physical: ${imgWidth}x${imgHeight} -> target: ${targetWidth}x${targetHeight})`);
|
|
54
|
+
const resizedBase64 = await resizeImgBase64(screenshotBase64, {
|
|
55
|
+
width: targetWidth,
|
|
56
|
+
height: targetHeight
|
|
57
|
+
});
|
|
58
|
+
return {
|
|
59
|
+
shotSize: {
|
|
60
|
+
width: targetWidth,
|
|
61
|
+
height: targetHeight
|
|
62
|
+
},
|
|
63
|
+
deprecatedDpr: dpr,
|
|
64
|
+
screenshot: ScreenshotItem.create(resizedBase64, screenshotCapturedAt),
|
|
65
|
+
shrunkShotToLogicalRatio
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
return {
|
|
69
|
+
shotSize: {
|
|
70
|
+
width: imgWidth,
|
|
71
|
+
height: imgHeight
|
|
72
|
+
},
|
|
73
|
+
deprecatedDpr: dpr,
|
|
74
|
+
screenshot: ScreenshotItem.create(screenshotBase64, screenshotCapturedAt),
|
|
75
|
+
shrunkShotToLogicalRatio
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
function getReportFileName(tag = 'web') {
|
|
79
|
+
const reportTagName = globalConfigManager.getEnvConfigValue(MIDSCENE_REPORT_TAG_NAME);
|
|
80
|
+
const dateTimeInFileName = dayjs().format('YYYY-MM-DD_HH-mm-ss');
|
|
81
|
+
const uniqueId = uuid().substring(0, 8);
|
|
82
|
+
return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
|
|
83
|
+
}
|
|
84
|
+
function printReportMsg(filepath) {
|
|
85
|
+
if (globalConfigManager.getEnvConfigInBoolean(MIDSCENE_REPORT_QUIET)) return;
|
|
86
|
+
logMsg(`Midscene - report file updated: ${filepath}`);
|
|
87
|
+
}
|
|
88
|
+
function ifPlanLocateParamIsBbox(planLocateParam) {
|
|
89
|
+
return !!(planLocateParam.bbox && Array.isArray(planLocateParam.bbox) && 4 === planLocateParam.bbox.length);
|
|
90
|
+
}
|
|
91
|
+
function matchElementFromPlan(planLocateParam) {
|
|
92
|
+
if (!planLocateParam) return;
|
|
93
|
+
if (planLocateParam.bbox) {
|
|
94
|
+
const rect = {
|
|
95
|
+
left: planLocateParam.bbox[0],
|
|
96
|
+
top: planLocateParam.bbox[1],
|
|
97
|
+
width: planLocateParam.bbox[2] - planLocateParam.bbox[0] + 1,
|
|
98
|
+
height: planLocateParam.bbox[3] - planLocateParam.bbox[1] + 1
|
|
99
|
+
};
|
|
100
|
+
const element = generateElementByRect(rect, 'string' == typeof planLocateParam.prompt ? planLocateParam.prompt : planLocateParam.prompt?.prompt || '');
|
|
101
|
+
return element;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable) {
|
|
105
|
+
if (!cacheEntry) return;
|
|
106
|
+
if (false === cacheable) return void external_task_cache_mjs_debug('cache disabled for prompt: %s', cachePrompt);
|
|
107
|
+
if (!context.taskCache?.isCacheResultUsed) return;
|
|
108
|
+
if (!context.interfaceInstance.rectMatchesCacheFeature) return void external_task_cache_mjs_debug('interface does not implement rectMatchesCacheFeature, skip cache');
|
|
109
|
+
try {
|
|
110
|
+
const rect = await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);
|
|
111
|
+
const element = {
|
|
112
|
+
center: [
|
|
113
|
+
Math.round(rect.left + rect.width / 2),
|
|
114
|
+
Math.round(rect.top + rect.height / 2)
|
|
115
|
+
],
|
|
116
|
+
rect,
|
|
117
|
+
description: 'string' == typeof cachePrompt ? cachePrompt : cachePrompt.prompt || ''
|
|
118
|
+
};
|
|
119
|
+
external_task_cache_mjs_debug('cache hit, prompt: %s', cachePrompt);
|
|
120
|
+
return element;
|
|
121
|
+
} catch (error) {
|
|
122
|
+
external_task_cache_mjs_debug('rectMatchesCacheFeature error: %s', error);
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
const getMidsceneVersion = ()=>"1.7.10";
|
|
127
|
+
const parsePrompt = (prompt)=>{
|
|
128
|
+
if ('string' == typeof prompt) return {
|
|
129
|
+
textPrompt: prompt,
|
|
130
|
+
multimodalPrompt: void 0
|
|
131
|
+
};
|
|
132
|
+
return {
|
|
133
|
+
textPrompt: prompt.prompt,
|
|
134
|
+
multimodalPrompt: prompt.images ? {
|
|
135
|
+
images: prompt.images,
|
|
136
|
+
convertHttpImage2Base64: !!prompt.convertHttpImage2Base64
|
|
137
|
+
} : void 0
|
|
138
|
+
};
|
|
139
|
+
};
|
|
140
|
+
const transformLogicalElementToScreenshot = (element, shrunkShotToLogicalRatio)=>{
|
|
141
|
+
if (1 === shrunkShotToLogicalRatio) return element;
|
|
142
|
+
return {
|
|
143
|
+
...element,
|
|
144
|
+
center: [
|
|
145
|
+
Math.round(element.center[0] * shrunkShotToLogicalRatio),
|
|
146
|
+
Math.round(element.center[1] * shrunkShotToLogicalRatio)
|
|
147
|
+
],
|
|
148
|
+
rect: {
|
|
149
|
+
...element.rect,
|
|
150
|
+
left: Math.round(element.rect.left * shrunkShotToLogicalRatio),
|
|
151
|
+
top: Math.round(element.rect.top * shrunkShotToLogicalRatio),
|
|
152
|
+
width: Math.round(element.rect.width * shrunkShotToLogicalRatio),
|
|
153
|
+
height: Math.round(element.rect.height * shrunkShotToLogicalRatio)
|
|
154
|
+
}
|
|
155
|
+
};
|
|
156
|
+
};
|
|
157
|
+
const transformLogicalRectToScreenshotRect = (rect, shrunkShotToLogicalRatio)=>{
|
|
158
|
+
if (1 === shrunkShotToLogicalRatio) return rect;
|
|
159
|
+
return {
|
|
160
|
+
...rect,
|
|
161
|
+
left: Math.round(rect.left * shrunkShotToLogicalRatio),
|
|
162
|
+
top: Math.round(rect.top * shrunkShotToLogicalRatio),
|
|
163
|
+
width: Math.round(rect.width * shrunkShotToLogicalRatio),
|
|
164
|
+
height: Math.round(rect.height * shrunkShotToLogicalRatio)
|
|
165
|
+
};
|
|
166
|
+
};
|
|
167
|
+
export { commonContextParser, getMidsceneVersion, getReportFileName, ifPlanLocateParamIsBbox, matchElementFromCache, matchElementFromPlan, parsePrompt, printReportMsg, transformLogicalElementToScreenshot, transformLogicalRectToScreenshotRect };
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import { adaptBbox, pointToBbox } from "../../common.mjs";
|
|
2
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
3
|
+
const debug = getDebug('auto-glm-actions');
|
|
4
|
+
const AUTO_GLM_COORDINATE_MAX = 1000;
|
|
5
|
+
function autoGLMCoordinateToBbox(x, y, width, height) {
|
|
6
|
+
const bbox = pointToBbox(x, y, 10);
|
|
7
|
+
return adaptBbox(bbox, width, height, 'auto-glm');
|
|
8
|
+
}
|
|
9
|
+
const BACK_BUTTON_NAMES = [
|
|
10
|
+
'AndroidBackButton',
|
|
11
|
+
'HarmonyBackButton'
|
|
12
|
+
];
|
|
13
|
+
const HOME_BUTTON_NAMES = [
|
|
14
|
+
'AndroidHomeButton',
|
|
15
|
+
'HarmonyHomeButton'
|
|
16
|
+
];
|
|
17
|
+
function findActionName(actionSpace, knownNames, defaultName) {
|
|
18
|
+
if (!actionSpace) return defaultName;
|
|
19
|
+
const match = actionSpace.find((a)=>knownNames.includes(a.name));
|
|
20
|
+
return match ? match.name : defaultName;
|
|
21
|
+
}
|
|
22
|
+
function transformAutoGLMAction(action, size, actionSpace) {
|
|
23
|
+
try {
|
|
24
|
+
switch(action._metadata){
|
|
25
|
+
case 'finish':
|
|
26
|
+
{
|
|
27
|
+
const finishAction = action;
|
|
28
|
+
debug('Transform finish action:', finishAction);
|
|
29
|
+
return [
|
|
30
|
+
{
|
|
31
|
+
type: 'Finished',
|
|
32
|
+
param: {},
|
|
33
|
+
thought: finishAction.message
|
|
34
|
+
}
|
|
35
|
+
];
|
|
36
|
+
}
|
|
37
|
+
case 'do':
|
|
38
|
+
{
|
|
39
|
+
const doAction = action;
|
|
40
|
+
switch(doAction.action){
|
|
41
|
+
case 'Tap':
|
|
42
|
+
{
|
|
43
|
+
const tapAction = doAction;
|
|
44
|
+
debug('Transform Tap action:', tapAction);
|
|
45
|
+
const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(tapAction.element[0], tapAction.element[1], size.width, size.height);
|
|
46
|
+
const locate = {
|
|
47
|
+
prompt: '',
|
|
48
|
+
bbox: [
|
|
49
|
+
x1,
|
|
50
|
+
y1,
|
|
51
|
+
x2,
|
|
52
|
+
y2
|
|
53
|
+
]
|
|
54
|
+
};
|
|
55
|
+
return [
|
|
56
|
+
{
|
|
57
|
+
type: 'Tap',
|
|
58
|
+
param: {
|
|
59
|
+
locate
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
];
|
|
63
|
+
}
|
|
64
|
+
case 'Double Tap':
|
|
65
|
+
{
|
|
66
|
+
const doubleTapAction = doAction;
|
|
67
|
+
debug('Transform Double Tap action:', doubleTapAction);
|
|
68
|
+
const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(doubleTapAction.element[0], doubleTapAction.element[1], size.width, size.height);
|
|
69
|
+
const locate = {
|
|
70
|
+
prompt: '',
|
|
71
|
+
bbox: [
|
|
72
|
+
x1,
|
|
73
|
+
y1,
|
|
74
|
+
x2,
|
|
75
|
+
y2
|
|
76
|
+
]
|
|
77
|
+
};
|
|
78
|
+
return [
|
|
79
|
+
{
|
|
80
|
+
type: 'DoubleClick',
|
|
81
|
+
param: {
|
|
82
|
+
locate
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
];
|
|
86
|
+
}
|
|
87
|
+
case 'Type':
|
|
88
|
+
{
|
|
89
|
+
const typeAction = doAction;
|
|
90
|
+
debug('Transform Type action:', typeAction);
|
|
91
|
+
return [
|
|
92
|
+
{
|
|
93
|
+
type: 'Input',
|
|
94
|
+
param: {
|
|
95
|
+
value: typeAction.text
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
];
|
|
99
|
+
}
|
|
100
|
+
case 'Swipe':
|
|
101
|
+
{
|
|
102
|
+
const swipeAction = doAction;
|
|
103
|
+
debug('Transform Swipe action:', swipeAction);
|
|
104
|
+
const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(swipeAction.start[0], swipeAction.start[1], size.width, size.height);
|
|
105
|
+
const locate = {
|
|
106
|
+
prompt: '',
|
|
107
|
+
bbox: [
|
|
108
|
+
x1,
|
|
109
|
+
y1,
|
|
110
|
+
x2,
|
|
111
|
+
y2
|
|
112
|
+
]
|
|
113
|
+
};
|
|
114
|
+
const deltaX = swipeAction.end[0] - swipeAction.start[0];
|
|
115
|
+
const deltaY = swipeAction.end[1] - swipeAction.start[1];
|
|
116
|
+
let direction;
|
|
117
|
+
let distance;
|
|
118
|
+
const absDeltaX = Math.abs(deltaX);
|
|
119
|
+
const absDeltaY = Math.abs(deltaY);
|
|
120
|
+
if (absDeltaY > absDeltaX) {
|
|
121
|
+
distance = Math.round(absDeltaY * size.height / AUTO_GLM_COORDINATE_MAX);
|
|
122
|
+
direction = deltaY > 0 ? 'up' : 'down';
|
|
123
|
+
} else {
|
|
124
|
+
distance = Math.round(absDeltaX * size.width / AUTO_GLM_COORDINATE_MAX);
|
|
125
|
+
direction = deltaX > 0 ? 'left' : 'right';
|
|
126
|
+
}
|
|
127
|
+
debug(`Calculate swipe direction: ${direction}, distance: ${distance}`);
|
|
128
|
+
return [
|
|
129
|
+
{
|
|
130
|
+
type: 'Scroll',
|
|
131
|
+
param: {
|
|
132
|
+
locate,
|
|
133
|
+
distance,
|
|
134
|
+
direction
|
|
135
|
+
},
|
|
136
|
+
thought: swipeAction.think || ''
|
|
137
|
+
}
|
|
138
|
+
];
|
|
139
|
+
}
|
|
140
|
+
case 'Long Press':
|
|
141
|
+
{
|
|
142
|
+
const longPressAction = doAction;
|
|
143
|
+
debug('Transform Long Press action:', longPressAction);
|
|
144
|
+
const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(longPressAction.element[0], longPressAction.element[1], size.width, size.height);
|
|
145
|
+
const locate = {
|
|
146
|
+
prompt: '',
|
|
147
|
+
bbox: [
|
|
148
|
+
x1,
|
|
149
|
+
y1,
|
|
150
|
+
x2,
|
|
151
|
+
y2
|
|
152
|
+
]
|
|
153
|
+
};
|
|
154
|
+
return [
|
|
155
|
+
{
|
|
156
|
+
type: 'LongPress',
|
|
157
|
+
param: {
|
|
158
|
+
locate
|
|
159
|
+
},
|
|
160
|
+
thought: longPressAction.think || ''
|
|
161
|
+
}
|
|
162
|
+
];
|
|
163
|
+
}
|
|
164
|
+
case 'Back':
|
|
165
|
+
{
|
|
166
|
+
const backAction = doAction;
|
|
167
|
+
debug('Transform Back action:', backAction);
|
|
168
|
+
return [
|
|
169
|
+
{
|
|
170
|
+
type: findActionName(actionSpace, BACK_BUTTON_NAMES, 'AndroidBackButton'),
|
|
171
|
+
param: {},
|
|
172
|
+
thought: backAction.think || ''
|
|
173
|
+
}
|
|
174
|
+
];
|
|
175
|
+
}
|
|
176
|
+
case 'Home':
|
|
177
|
+
{
|
|
178
|
+
const homeAction = doAction;
|
|
179
|
+
debug('Transform Home action:', homeAction);
|
|
180
|
+
return [
|
|
181
|
+
{
|
|
182
|
+
type: findActionName(actionSpace, HOME_BUTTON_NAMES, 'AndroidHomeButton'),
|
|
183
|
+
param: {},
|
|
184
|
+
thought: homeAction.think || ''
|
|
185
|
+
}
|
|
186
|
+
];
|
|
187
|
+
}
|
|
188
|
+
case 'Wait':
|
|
189
|
+
{
|
|
190
|
+
const waitAction = doAction;
|
|
191
|
+
debug('Transform Wait action:', waitAction);
|
|
192
|
+
return [
|
|
193
|
+
{
|
|
194
|
+
type: 'Sleep',
|
|
195
|
+
param: {
|
|
196
|
+
timeMs: waitAction.durationMs
|
|
197
|
+
},
|
|
198
|
+
thought: waitAction.think || ''
|
|
199
|
+
}
|
|
200
|
+
];
|
|
201
|
+
}
|
|
202
|
+
case 'Launch':
|
|
203
|
+
{
|
|
204
|
+
const launchAction = doAction;
|
|
205
|
+
debug('Transform Launch action:', launchAction);
|
|
206
|
+
return [
|
|
207
|
+
{
|
|
208
|
+
type: 'Launch',
|
|
209
|
+
param: {
|
|
210
|
+
uri: launchAction.app
|
|
211
|
+
},
|
|
212
|
+
thought: launchAction.think || ''
|
|
213
|
+
}
|
|
214
|
+
];
|
|
215
|
+
}
|
|
216
|
+
case 'Interact':
|
|
217
|
+
throw new Error('Action "Interact" from auto-glm is not supported in the current implementation.');
|
|
218
|
+
case 'Call_API':
|
|
219
|
+
throw new Error('Action "Call_API" from auto-glm is not supported in the current implementation.');
|
|
220
|
+
case 'Take_over':
|
|
221
|
+
throw new Error('Action "Take_over" from auto-glm is not supported in the current implementation.');
|
|
222
|
+
case 'Note':
|
|
223
|
+
throw new Error('Action "Note" from auto-glm is not supported in the current implementation.');
|
|
224
|
+
default:
|
|
225
|
+
throw new Error(`Unknown do() action type: ${doAction.action}`);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
default:
|
|
229
|
+
throw new Error(`Unknown action metadata: ${action._metadata}`);
|
|
230
|
+
}
|
|
231
|
+
} catch (error) {
|
|
232
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
233
|
+
debug('Transform error:', errorMessage);
|
|
234
|
+
throw new Error(`Failed to transform action: ${errorMessage}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
export { transformAutoGLMAction };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { getAutoGLMLocatePrompt, getAutoGLMPlanPrompt } from "./prompt.mjs";
|
|
2
|
+
import { parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse } from "./parser.mjs";
|
|
3
|
+
import { autoGLMPlanning } from "./planning.mjs";
|
|
4
|
+
import { transformAutoGLMAction } from "./actions.mjs";
|
|
5
|
+
import { isAutoGLM, isUITars } from "./util.mjs";
|
|
6
|
+
export { autoGLMPlanning, getAutoGLMLocatePrompt, getAutoGLMPlanPrompt, isAutoGLM, isUITars, parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse, transformAutoGLMAction };
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
2
|
+
const debug = getDebug('auto-glm-parser');
|
|
3
|
+
const extractValueAfter = (src, key)=>{
|
|
4
|
+
const idx = src.indexOf(key);
|
|
5
|
+
if (-1 === idx) throw new Error(`Missing key ${key} in action payload ${src}`);
|
|
6
|
+
let rest = src.slice(idx + key.length).trim();
|
|
7
|
+
if (rest.endsWith('")')) rest = rest.slice(0, -2);
|
|
8
|
+
return rest;
|
|
9
|
+
};
|
|
10
|
+
function parseAction(response) {
|
|
11
|
+
debug('Parsing action:', response);
|
|
12
|
+
let trimmedResponse = '';
|
|
13
|
+
try {
|
|
14
|
+
trimmedResponse = response.content.trim();
|
|
15
|
+
if (trimmedResponse.startsWith('do(action="Type"') || trimmedResponse.startsWith('do(action="Type_Name"')) {
|
|
16
|
+
const text = extractValueAfter(trimmedResponse, 'text="');
|
|
17
|
+
return {
|
|
18
|
+
_metadata: 'do',
|
|
19
|
+
action: 'Type',
|
|
20
|
+
text,
|
|
21
|
+
think: response.think
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
if (trimmedResponse.startsWith('finish(message=')) {
|
|
25
|
+
let message = extractValueAfter(trimmedResponse, 'finish(message="');
|
|
26
|
+
if (message.endsWith(')')) message = message.slice(0, -1);
|
|
27
|
+
return {
|
|
28
|
+
_metadata: 'finish',
|
|
29
|
+
message,
|
|
30
|
+
think: response.think
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
if (trimmedResponse.startsWith('do(')) {
|
|
34
|
+
const actionMatch = trimmedResponse.match(/do\(action="([^"]+)"/);
|
|
35
|
+
if (!actionMatch) throw new Error(`Failed to extract action type from do() call; raw="${trimmedResponse}"`);
|
|
36
|
+
const actionType = actionMatch[1];
|
|
37
|
+
const baseAction = {
|
|
38
|
+
_metadata: 'do',
|
|
39
|
+
think: response.think
|
|
40
|
+
};
|
|
41
|
+
switch(actionType){
|
|
42
|
+
case 'Tap':
|
|
43
|
+
{
|
|
44
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
45
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Tap; raw="${trimmedResponse}"`);
|
|
46
|
+
return {
|
|
47
|
+
...baseAction,
|
|
48
|
+
action: 'Tap',
|
|
49
|
+
element: [
|
|
50
|
+
Number(elementMatch[1]),
|
|
51
|
+
Number(elementMatch[2])
|
|
52
|
+
]
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
case 'Double Tap':
|
|
56
|
+
{
|
|
57
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
58
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Double Tap; raw="${trimmedResponse}"`);
|
|
59
|
+
return {
|
|
60
|
+
...baseAction,
|
|
61
|
+
action: 'Double Tap',
|
|
62
|
+
element: [
|
|
63
|
+
Number(elementMatch[1]),
|
|
64
|
+
Number(elementMatch[2])
|
|
65
|
+
]
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
case 'Swipe':
|
|
69
|
+
{
|
|
70
|
+
const startMatch = trimmedResponse.match(/start=\[(\d+),(\d+)\]/);
|
|
71
|
+
const endMatch = trimmedResponse.match(/end=\[(\d+),(\d+)\]/);
|
|
72
|
+
if (!startMatch || !endMatch) throw new Error(`Failed to extract start/end coordinates for Swipe; raw="${trimmedResponse}"`);
|
|
73
|
+
return {
|
|
74
|
+
...baseAction,
|
|
75
|
+
action: 'Swipe',
|
|
76
|
+
start: [
|
|
77
|
+
Number(startMatch[1]),
|
|
78
|
+
Number(startMatch[2])
|
|
79
|
+
],
|
|
80
|
+
end: [
|
|
81
|
+
Number(endMatch[1]),
|
|
82
|
+
Number(endMatch[2])
|
|
83
|
+
]
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
case 'Long Press':
|
|
87
|
+
{
|
|
88
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
89
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Long Press; raw="${trimmedResponse}"`);
|
|
90
|
+
return {
|
|
91
|
+
...baseAction,
|
|
92
|
+
action: 'Long Press',
|
|
93
|
+
element: [
|
|
94
|
+
Number(elementMatch[1]),
|
|
95
|
+
Number(elementMatch[2])
|
|
96
|
+
]
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
case 'Launch':
|
|
100
|
+
{
|
|
101
|
+
const app = extractValueAfter(trimmedResponse, 'app="');
|
|
102
|
+
return {
|
|
103
|
+
...baseAction,
|
|
104
|
+
action: 'Launch',
|
|
105
|
+
app
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
case 'Back':
|
|
109
|
+
return {
|
|
110
|
+
...baseAction,
|
|
111
|
+
action: 'Back'
|
|
112
|
+
};
|
|
113
|
+
case 'Home':
|
|
114
|
+
return {
|
|
115
|
+
...baseAction,
|
|
116
|
+
action: 'Home'
|
|
117
|
+
};
|
|
118
|
+
case 'Wait':
|
|
119
|
+
{
|
|
120
|
+
const durationMatch = trimmedResponse.match(/duration=(?:["\[])?(\d+)/);
|
|
121
|
+
if (!durationMatch) throw new Error(`Failed to extract duration for Wait; raw="${trimmedResponse}"`);
|
|
122
|
+
const seconds = Number.parseInt(durationMatch[1], 10);
|
|
123
|
+
const durationMs = 1000 * seconds;
|
|
124
|
+
return {
|
|
125
|
+
...baseAction,
|
|
126
|
+
action: 'Wait',
|
|
127
|
+
durationMs
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
case 'Interact':
|
|
131
|
+
return {
|
|
132
|
+
...baseAction,
|
|
133
|
+
action: 'Interact'
|
|
134
|
+
};
|
|
135
|
+
case 'Call_API':
|
|
136
|
+
{
|
|
137
|
+
const instruction = extractValueAfter(trimmedResponse, 'instruction="');
|
|
138
|
+
return {
|
|
139
|
+
...baseAction,
|
|
140
|
+
action: 'Call_API',
|
|
141
|
+
instruction
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
case 'Take_over':
|
|
145
|
+
{
|
|
146
|
+
const message = extractValueAfter(trimmedResponse, 'message="');
|
|
147
|
+
return {
|
|
148
|
+
...baseAction,
|
|
149
|
+
action: 'Take_over',
|
|
150
|
+
message
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
case 'Note':
|
|
154
|
+
{
|
|
155
|
+
const message = extractValueAfter(trimmedResponse, 'message="');
|
|
156
|
+
return {
|
|
157
|
+
...baseAction,
|
|
158
|
+
action: 'Note',
|
|
159
|
+
message
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
default:
|
|
163
|
+
throw new Error(`Unknown action type: ${actionType}; raw="${trimmedResponse}"`);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
throw new Error(`Failed to parse action: ${trimmedResponse}`);
|
|
167
|
+
} catch (error) {
|
|
168
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
169
|
+
throw new Error(`Failed to parse action: ${errorMessage}; raw="${trimmedResponse}"`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function parseAutoGLMResponse(content) {
|
|
173
|
+
if (content.includes('finish(message=')) {
|
|
174
|
+
const parts = content.split('finish(message=');
|
|
175
|
+
const think = parts[0].trim();
|
|
176
|
+
const actionContent = `finish(message=${parts[1]}`;
|
|
177
|
+
return {
|
|
178
|
+
think,
|
|
179
|
+
content: actionContent
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
if (content.includes('do(action=')) {
|
|
183
|
+
const parts = content.split('do(action=');
|
|
184
|
+
const think = parts[0].trim();
|
|
185
|
+
const actionContent = `do(action=${parts[1]}`;
|
|
186
|
+
return {
|
|
187
|
+
think,
|
|
188
|
+
content: actionContent
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
if (content.includes('<answer>')) {
|
|
192
|
+
const parts = content.split('<answer>');
|
|
193
|
+
const think = parts[0].replace(/<think>/g, '').replace(/<\/think>/g, '').trim();
|
|
194
|
+
const actionContent = parts[1].replace(/<\/answer>/g, '').trim();
|
|
195
|
+
return {
|
|
196
|
+
think,
|
|
197
|
+
content: actionContent
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
think: '',
|
|
202
|
+
content
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
function parseAutoGLMLocateResponse(rawResponse) {
|
|
206
|
+
const { think, content: actionContent } = parseAutoGLMResponse(rawResponse);
|
|
207
|
+
if (!actionContent.startsWith('do(action="Tap"')) return {
|
|
208
|
+
think,
|
|
209
|
+
coordinates: null,
|
|
210
|
+
error: `Unexpected action type in auto-glm locate response: ${actionContent}`
|
|
211
|
+
};
|
|
212
|
+
try {
|
|
213
|
+
const elementMatch = actionContent.match(/element=\[(\d+),(\d+)\]/);
|
|
214
|
+
if (!elementMatch) return {
|
|
215
|
+
think,
|
|
216
|
+
coordinates: null,
|
|
217
|
+
error: `Failed to extract element coordinates from auto-glm response: ${actionContent}`
|
|
218
|
+
};
|
|
219
|
+
const x = Number(elementMatch[1]);
|
|
220
|
+
const y = Number(elementMatch[2]);
|
|
221
|
+
return {
|
|
222
|
+
think,
|
|
223
|
+
coordinates: {
|
|
224
|
+
x,
|
|
225
|
+
y
|
|
226
|
+
}
|
|
227
|
+
};
|
|
228
|
+
} catch (e) {
|
|
229
|
+
const errorMessage = e instanceof Error ? e.message : String(e);
|
|
230
|
+
return {
|
|
231
|
+
think,
|
|
232
|
+
coordinates: null,
|
|
233
|
+
error: `Failed to parse coordinates "${actionContent}" with errorMessage: ${errorMessage}`
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
export { extractValueAfter, parseAction, parseAutoGLMLocateResponse, parseAutoGLMResponse };
|