@midscene/core 1.2.1-beta-20260112114129.0 → 1.2.1-beta-20260114072539.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +5 -2
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +4 -2
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/auto-glm/actions.mjs +217 -0
- package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/index.mjs +5 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
- package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
- package/dist/es/ai-model/auto-glm/util.mjs +22 -0
- package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +2 -1
- package/dist/es/ai-model/inspect.mjs +68 -3
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/latest-locate-recorder.mjs +29 -0
- package/dist/es/ai-model/latest-locate-recorder.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +5 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +41 -29
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/common.mjs +2 -1
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/service/index.mjs +5 -0
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +3 -4
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +5 -2
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +3 -1
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/auto-glm/actions.js +251 -0
- package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/index.js +59 -0
- package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/parser.js +282 -0
- package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/planning.js +97 -0
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
- package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
- package/dist/lib/ai-model/auto-glm/util.js +62 -0
- package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
- package/dist/lib/ai-model/index.js +15 -11
- package/dist/lib/ai-model/inspect.js +67 -2
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/latest-locate-recorder.js +63 -0
- package/dist/lib/ai-model/latest-locate-recorder.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +5 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +41 -29
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/common.js +1 -0
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/service/index.js +5 -0
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +3 -4
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +5 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +16 -0
- package/dist/types/ai-model/index.d.ts +1 -0
- package/dist/types/ai-model/latest-locate-recorder.d.ts +14 -0
- package/dist/types/yaml.d.ts +1 -5
- package/package.json +2 -2
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
transformAutoGLMAction: ()=>transformAutoGLMAction
|
|
28
|
+
});
|
|
29
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
30
|
+
const external_latest_locate_recorder_js_namespaceObject = require("../latest-locate-recorder.js");
|
|
31
|
+
const external_util_js_namespaceObject = require("./util.js");
|
|
32
|
+
const debug = (0, logger_namespaceObject.getDebug)('auto-glm-actions');
|
|
33
|
+
const lastLocateRecorder = new external_latest_locate_recorder_js_namespaceObject.LatestLocateRecorder();
|
|
34
|
+
function transformAutoGLMAction(action, size) {
|
|
35
|
+
try {
|
|
36
|
+
switch(action._metadata){
|
|
37
|
+
case 'finish':
|
|
38
|
+
{
|
|
39
|
+
const finishAction = action;
|
|
40
|
+
debug('Transform finish action:', finishAction);
|
|
41
|
+
return [
|
|
42
|
+
{
|
|
43
|
+
type: 'Finished',
|
|
44
|
+
param: {},
|
|
45
|
+
thought: finishAction.message
|
|
46
|
+
}
|
|
47
|
+
];
|
|
48
|
+
}
|
|
49
|
+
case 'do':
|
|
50
|
+
{
|
|
51
|
+
const doAction = action;
|
|
52
|
+
switch(doAction.action){
|
|
53
|
+
case 'Tap':
|
|
54
|
+
{
|
|
55
|
+
const tapAction = doAction;
|
|
56
|
+
debug('Transform Tap action:', tapAction);
|
|
57
|
+
const [x1, y1, x2, y2] = (0, external_util_js_namespaceObject.autoGLMCoordinateToBbox)(tapAction.element[0], tapAction.element[1], size.width, size.height);
|
|
58
|
+
const locate = {
|
|
59
|
+
prompt: '',
|
|
60
|
+
bbox: [
|
|
61
|
+
x1,
|
|
62
|
+
y1,
|
|
63
|
+
x2,
|
|
64
|
+
y2
|
|
65
|
+
]
|
|
66
|
+
};
|
|
67
|
+
lastLocateRecorder.recordLocate(locate, 'Tap');
|
|
68
|
+
return [
|
|
69
|
+
{
|
|
70
|
+
type: 'Tap',
|
|
71
|
+
param: {
|
|
72
|
+
locate
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
];
|
|
76
|
+
}
|
|
77
|
+
case 'Double Tap':
|
|
78
|
+
{
|
|
79
|
+
const doubleTapAction = doAction;
|
|
80
|
+
debug('Transform Double Tap action:', doubleTapAction);
|
|
81
|
+
const [x1, y1, x2, y2] = (0, external_util_js_namespaceObject.autoGLMCoordinateToBbox)(doubleTapAction.element[0], doubleTapAction.element[1], size.width, size.height);
|
|
82
|
+
const locate = {
|
|
83
|
+
prompt: '',
|
|
84
|
+
bbox: [
|
|
85
|
+
x1,
|
|
86
|
+
y1,
|
|
87
|
+
x2,
|
|
88
|
+
y2
|
|
89
|
+
]
|
|
90
|
+
};
|
|
91
|
+
lastLocateRecorder.recordLocate(locate, 'Double Tap');
|
|
92
|
+
return [
|
|
93
|
+
{
|
|
94
|
+
type: 'DoubleClick',
|
|
95
|
+
param: {
|
|
96
|
+
locate
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
];
|
|
100
|
+
}
|
|
101
|
+
case 'Type':
|
|
102
|
+
{
|
|
103
|
+
const typeAction = doAction;
|
|
104
|
+
debug('Transform Type action:', typeAction);
|
|
105
|
+
const { locate: latestLocate, source } = lastLocateRecorder.getLatestLocate();
|
|
106
|
+
debug(`use latestLocate from ${source} as locate when Input`, latestLocate);
|
|
107
|
+
return [
|
|
108
|
+
{
|
|
109
|
+
type: 'Input',
|
|
110
|
+
param: {
|
|
111
|
+
value: typeAction.text,
|
|
112
|
+
locate: latestLocate
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
];
|
|
116
|
+
}
|
|
117
|
+
case 'Swipe':
|
|
118
|
+
{
|
|
119
|
+
const swipeAction = doAction;
|
|
120
|
+
debug('Transform Swipe action:', swipeAction);
|
|
121
|
+
const [x1, y1, x2, y2] = (0, external_util_js_namespaceObject.autoGLMCoordinateToBbox)(swipeAction.start[0], swipeAction.start[1], size.width, size.height);
|
|
122
|
+
const locate = {
|
|
123
|
+
prompt: '',
|
|
124
|
+
bbox: [
|
|
125
|
+
x1,
|
|
126
|
+
y1,
|
|
127
|
+
x2,
|
|
128
|
+
y2
|
|
129
|
+
]
|
|
130
|
+
};
|
|
131
|
+
const deltaX = swipeAction.end[0] - swipeAction.start[0];
|
|
132
|
+
const deltaY = swipeAction.end[1] - swipeAction.start[1];
|
|
133
|
+
let direction;
|
|
134
|
+
let distance;
|
|
135
|
+
const absDeltaX = Math.abs(deltaX);
|
|
136
|
+
const absDeltaY = Math.abs(deltaY);
|
|
137
|
+
if (absDeltaY > absDeltaX) {
|
|
138
|
+
distance = Math.round(absDeltaY * size.height / external_util_js_namespaceObject.AUTO_GLM_COORDINATE_MAX);
|
|
139
|
+
direction = deltaY > 0 ? 'up' : 'down';
|
|
140
|
+
} else {
|
|
141
|
+
distance = Math.round(absDeltaX * size.width / external_util_js_namespaceObject.AUTO_GLM_COORDINATE_MAX);
|
|
142
|
+
direction = deltaX > 0 ? 'left' : 'right';
|
|
143
|
+
}
|
|
144
|
+
debug(`Calculate swipe direction: ${direction}, distance: ${distance}`);
|
|
145
|
+
return [
|
|
146
|
+
{
|
|
147
|
+
type: 'Scroll',
|
|
148
|
+
param: {
|
|
149
|
+
locate,
|
|
150
|
+
distance,
|
|
151
|
+
direction
|
|
152
|
+
},
|
|
153
|
+
thought: swipeAction.think || ''
|
|
154
|
+
}
|
|
155
|
+
];
|
|
156
|
+
}
|
|
157
|
+
case 'Long Press':
|
|
158
|
+
{
|
|
159
|
+
const longPressAction = doAction;
|
|
160
|
+
debug('Transform Long Press action:', longPressAction);
|
|
161
|
+
const [x1, y1, x2, y2] = (0, external_util_js_namespaceObject.autoGLMCoordinateToBbox)(longPressAction.element[0], longPressAction.element[1], size.width, size.height);
|
|
162
|
+
const locate = {
|
|
163
|
+
prompt: '',
|
|
164
|
+
bbox: [
|
|
165
|
+
x1,
|
|
166
|
+
y1,
|
|
167
|
+
x2,
|
|
168
|
+
y2
|
|
169
|
+
]
|
|
170
|
+
};
|
|
171
|
+
lastLocateRecorder.recordLocate(locate, 'Long Press');
|
|
172
|
+
return [
|
|
173
|
+
{
|
|
174
|
+
type: 'AndroidLongPress',
|
|
175
|
+
param: {
|
|
176
|
+
locate
|
|
177
|
+
},
|
|
178
|
+
thought: longPressAction.think || ''
|
|
179
|
+
}
|
|
180
|
+
];
|
|
181
|
+
}
|
|
182
|
+
case 'Back':
|
|
183
|
+
{
|
|
184
|
+
const backAction = doAction;
|
|
185
|
+
debug('Transform Back action:', backAction);
|
|
186
|
+
return [
|
|
187
|
+
{
|
|
188
|
+
type: 'AndroidBackButton',
|
|
189
|
+
param: {},
|
|
190
|
+
thought: backAction.think || ''
|
|
191
|
+
}
|
|
192
|
+
];
|
|
193
|
+
}
|
|
194
|
+
case 'Home':
|
|
195
|
+
{
|
|
196
|
+
const homeAction = doAction;
|
|
197
|
+
debug('Transform Home action:', homeAction);
|
|
198
|
+
return [
|
|
199
|
+
{
|
|
200
|
+
type: 'AndroidHomeButton',
|
|
201
|
+
param: {},
|
|
202
|
+
thought: homeAction.think || ''
|
|
203
|
+
}
|
|
204
|
+
];
|
|
205
|
+
}
|
|
206
|
+
case 'Wait':
|
|
207
|
+
{
|
|
208
|
+
const waitAction = doAction;
|
|
209
|
+
debug('Transform Wait action:', waitAction);
|
|
210
|
+
return [
|
|
211
|
+
{
|
|
212
|
+
type: 'Sleep',
|
|
213
|
+
param: {
|
|
214
|
+
timeMs: waitAction.durationMs
|
|
215
|
+
},
|
|
216
|
+
thought: waitAction.think || ''
|
|
217
|
+
}
|
|
218
|
+
];
|
|
219
|
+
}
|
|
220
|
+
case 'Launch':
|
|
221
|
+
throw new Error('Action "Launch" from auto-glm is not supported in the current implementation.');
|
|
222
|
+
case 'Interact':
|
|
223
|
+
throw new Error('Action "Interact" from auto-glm is not supported in the current implementation.');
|
|
224
|
+
case 'Call_API':
|
|
225
|
+
throw new Error('Action "Call_API" from auto-glm is not supported in the current implementation.');
|
|
226
|
+
case 'Take_over':
|
|
227
|
+
throw new Error('Action "Take_over" from auto-glm is not supported in the current implementation.');
|
|
228
|
+
case 'Note':
|
|
229
|
+
throw new Error('Action "Note" from auto-glm is not supported in the current implementation.');
|
|
230
|
+
default:
|
|
231
|
+
throw new Error(`Unknown do() action type: ${doAction.action}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
default:
|
|
235
|
+
throw new Error(`Unknown action metadata: ${action._metadata}`);
|
|
236
|
+
}
|
|
237
|
+
} catch (error) {
|
|
238
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
239
|
+
debug('Transform error:', errorMessage);
|
|
240
|
+
throw new Error(`Failed to transform action: ${errorMessage}`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
exports.transformAutoGLMAction = __webpack_exports__.transformAutoGLMAction;
|
|
244
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
245
|
+
"transformAutoGLMAction"
|
|
246
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
247
|
+
Object.defineProperty(exports, '__esModule', {
|
|
248
|
+
value: true
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
//# sourceMappingURL=actions.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/auto-glm/actions.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/auto-glm/actions.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { PlanningAction } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { LatestLocateRecorder } from '../latest-locate-recorder';\nimport { AUTO_GLM_COORDINATE_MAX, autoGLMCoordinateToBbox } from './util';\n\nconst debug = getDebug('auto-glm-actions');\nconst lastLocateRecorder = new LatestLocateRecorder();\n\nexport interface BaseAction {\n _metadata: string;\n think?: string;\n}\n\nexport interface TapAction extends BaseAction {\n _metadata: 'do';\n action: 'Tap';\n element: [number, number];\n}\n\nexport interface DoubleTapAction extends BaseAction {\n _metadata: 'do';\n action: 'Double Tap';\n element: [number, number];\n}\n\nexport interface TypeAction extends BaseAction {\n _metadata: 'do';\n action: 'Type';\n text: string;\n}\n\nexport interface SwipeAction extends BaseAction {\n _metadata: 'do';\n action: 'Swipe';\n start: [number, number];\n end: [number, number];\n}\n\nexport interface LongPressAction extends BaseAction {\n _metadata: 'do';\n action: 'Long Press';\n element: [number, number];\n}\n\nexport interface LaunchAction extends BaseAction {\n _metadata: 'do';\n action: 'Launch';\n app: string;\n}\n\nexport interface BackAction extends BaseAction {\n _metadata: 'do';\n action: 'Back';\n}\n\nexport interface HomeAction extends BaseAction {\n _metadata: 'do';\n action: 'Home';\n}\n\nexport interface WaitAction extends BaseAction {\n _metadata: 'do';\n action: 'Wait';\n durationMs: number;\n}\n\nexport interface InteractAction extends BaseAction {\n _metadata: 'do';\n action: 'Interact';\n}\n\nexport interface CallAPIAction extends BaseAction {\n _metadata: 'do';\n action: 'Call_API';\n instruction: string;\n}\n\nexport interface TakeoverAction extends BaseAction {\n _metadata: 'do';\n action: 'Take_over';\n message: string;\n}\n\nexport interface NoteAction extends BaseAction {\n _metadata: 'do';\n action: 'Note';\n message: string;\n}\n\nexport interface FinishAction extends BaseAction {\n _metadata: 'finish';\n message: string;\n}\n\nexport type ParsedAction =\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction\n | FinishAction;\n\nexport function transformAutoGLMAction(\n action: ParsedAction,\n size: { width: number; height: number },\n): PlanningAction[] {\n try {\n switch (action._metadata) {\n case 'finish': {\n const finishAction = action as FinishAction;\n debug('Transform finish action:', finishAction);\n return [\n {\n type: 'Finished',\n param: {},\n thought: finishAction.message,\n },\n ];\n }\n case 'do': {\n const doAction = action as\n | TapAction\n | DoubleTapAction\n | TypeAction\n | SwipeAction\n | LongPressAction\n | LaunchAction\n | BackAction\n | HomeAction\n | WaitAction\n | InteractAction\n | CallAPIAction\n | TakeoverAction\n | NoteAction;\n\n switch ((doAction as any).action) {\n case 'Tap': {\n const tapAction = doAction as TapAction;\n debug('Transform Tap action:', tapAction);\n const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(\n tapAction.element[0],\n tapAction.element[1],\n size.width,\n size.height,\n );\n\n const locate: {\n prompt: string;\n bbox: [number, number, number, number];\n } = {\n prompt: '',\n bbox: [x1, y1, x2, y2],\n };\n lastLocateRecorder.recordLocate(locate, 'Tap');\n\n return [\n {\n type: 'Tap',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Double Tap': {\n const doubleTapAction = doAction as DoubleTapAction;\n debug('Transform Double Tap action:', doubleTapAction);\n const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(\n doubleTapAction.element[0],\n doubleTapAction.element[1],\n size.width,\n size.height,\n );\n\n const locate: {\n prompt: string;\n bbox: [number, number, number, number];\n } = {\n prompt: '',\n bbox: [x1, y1, x2, y2],\n };\n lastLocateRecorder.recordLocate(locate, 'Double Tap');\n\n return [\n {\n type: 'DoubleClick',\n param: {\n locate,\n },\n },\n ];\n }\n case 'Type': {\n const typeAction = doAction as TypeAction;\n debug('Transform Type action:', typeAction);\n const { locate: latestLocate, source } =\n lastLocateRecorder.getLatestLocate();\n debug(\n `use latestLocate from ${source} as locate when Input`,\n latestLocate,\n );\n\n return [\n {\n type: 'Input',\n param: {\n value: typeAction.text,\n locate: latestLocate,\n },\n },\n ];\n }\n case 'Swipe': {\n const swipeAction = doAction as SwipeAction;\n debug('Transform Swipe action:', swipeAction);\n\n // Calculate locate using start coordinate\n const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(\n swipeAction.start[0],\n swipeAction.start[1],\n size.width,\n size.height,\n );\n\n const locate: {\n prompt: string;\n bbox: [number, number, number, number];\n } = {\n prompt: '',\n bbox: [x1, y1, x2, y2],\n };\n\n // Calculate horizontal and vertical delta in [0,AUTO_GLM_COORDINATE_MAX] coordinate system\n const deltaX = swipeAction.end[0] - swipeAction.start[0];\n const deltaY = swipeAction.end[1] - swipeAction.start[1];\n\n // Determine direction and distance\n let direction: 'up' | 'down' | 'left' | 'right';\n let distance: number;\n\n const absDeltaX = Math.abs(deltaX);\n const absDeltaY = Math.abs(deltaY);\n\n if (absDeltaY > absDeltaX) {\n // Vertical scroll\n distance = Math.round(\n (absDeltaY * size.height) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaY > 0 ? 'up' : 'down';\n } else {\n // Horizontal scroll\n distance = Math.round(\n (absDeltaX * size.width) / AUTO_GLM_COORDINATE_MAX,\n );\n direction = deltaX > 0 ? 'left' : 'right';\n }\n\n debug(\n `Calculate swipe direction: ${direction}, distance: ${distance}`,\n );\n\n return [\n {\n type: 'Scroll',\n param: {\n locate,\n // The scrolling direction here all refers to which direction of the page's content will appear on the screen.\n distance,\n direction,\n },\n thought: swipeAction.think || '',\n },\n ];\n }\n case 'Long Press': {\n const longPressAction = doAction as LongPressAction;\n debug('Transform Long Press action:', longPressAction);\n const [x1, y1, x2, y2] = autoGLMCoordinateToBbox(\n longPressAction.element[0],\n longPressAction.element[1],\n size.width,\n size.height,\n );\n\n const locate: {\n prompt: string;\n bbox: [number, number, number, number];\n } = {\n prompt: '',\n bbox: [x1, y1, x2, y2],\n };\n lastLocateRecorder.recordLocate(locate, 'Long Press');\n\n return [\n {\n type: 'AndroidLongPress',\n param: {\n locate,\n },\n thought: longPressAction.think || '',\n },\n ];\n }\n case 'Back': {\n const backAction = doAction as BackAction;\n debug('Transform Back action:', backAction);\n return [\n {\n type: 'AndroidBackButton',\n param: {},\n thought: backAction.think || '',\n },\n ];\n }\n case 'Home': {\n const homeAction = doAction as HomeAction;\n debug('Transform Home action:', homeAction);\n return [\n {\n type: 'AndroidHomeButton',\n param: {},\n thought: homeAction.think || '',\n },\n ];\n }\n case 'Wait': {\n const waitAction = doAction as WaitAction;\n debug('Transform Wait action:', waitAction);\n return [\n {\n type: 'Sleep',\n param: {\n timeMs: waitAction.durationMs,\n },\n thought: waitAction.think || '',\n },\n ];\n }\n case 'Launch': {\n throw new Error(\n `Action \"Launch\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Interact': {\n throw new Error(\n `Action \"Interact\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Call_API': {\n throw new Error(\n `Action \"Call_API\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Take_over': {\n throw new Error(\n `Action \"Take_over\" from auto-glm is not supported in the current implementation.`,\n );\n }\n case 'Note': {\n throw new Error(\n `Action \"Note\" from auto-glm is not supported in the current implementation.`,\n );\n }\n default:\n throw new Error(\n `Unknown do() action type: ${(doAction as any).action}`,\n );\n }\n }\n default:\n throw new Error(\n `Unknown action metadata: ${(action as any)._metadata}`,\n );\n }\n } catch (error) {\n const errorMessage = error instanceof Error ? error.message : String(error);\n debug('Transform error:', errorMessage);\n throw new Error(`Failed to transform action: ${errorMessage}`);\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","lastLocateRecorder","LatestLocateRecorder","transformAutoGLMAction","action","size","finishAction","doAction","tapAction","x1","y1","x2","y2","autoGLMCoordinateToBbox","locate","doubleTapAction","typeAction","latestLocate","source","swipeAction","deltaX","deltaY","direction","distance","absDeltaX","Math","absDeltaY","AUTO_GLM_COORDINATE_MAX","longPressAction","backAction","homeAction","waitAction","Error","error","errorMessage","String"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACDA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,qBAAqB,IAAIC,mDAAAA,oBAAoBA;AAwG5C,SAASC,uBACdC,MAAoB,EACpBC,IAAuC;IAEvC,IAAI;QACF,OAAQD,OAAO,SAAS;YACtB,KAAK;gBAAU;oBACb,MAAME,eAAeF;oBACrBL,MAAM,4BAA4BO;oBAClC,OAAO;wBACL;4BACE,MAAM;4BACN,OAAO,CAAC;4BACR,SAASA,aAAa,OAAO;wBAC/B;qBACD;gBACH;YACA,KAAK;gBAAM;oBACT,MAAMC,WAAWH;oBAejB,OAASG,SAAiB,MAAM;wBAC9B,KAAK;4BAAO;gCACV,MAAMC,YAAYD;gCAClBR,MAAM,yBAAyBS;gCAC/B,MAAM,CAACC,IAAIC,IAAIC,IAAIC,GAAG,GAAGC,AAAAA,IAAAA,iCAAAA,uBAAAA,AAAAA,EACvBL,UAAU,OAAO,CAAC,EAAE,EACpBA,UAAU,OAAO,CAAC,EAAE,EACpBH,KAAK,KAAK,EACVA,KAAK,MAAM;gCAGb,MAAMS,SAGF;oCACF,QAAQ;oCACR,MAAM;wCAACL;wCAAIC;wCAAIC;wCAAIC;qCAAG;gCACxB;gCACAX,mBAAmB,YAAY,CAACa,QAAQ;gCAExC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLA;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMC,kBAAkBR;gCACxBR,MAAM,gCAAgCgB;gCACtC,MAAM,CAACN,IAAIC,IAAIC,IAAIC,GAAG,GAAGC,AAAAA,IAAAA,iCAAAA,uBAAAA,AAAAA,EACvBE,gBAAgB,OAAO,CAAC,EAAE,EAC1BA,gBAAgB,OAAO,CAAC,EAAE,EAC1BV,KAAK,KAAK,EACVA,KAAK,MAAM;gCAGb,MAAMS,SAGF;oCACF,QAAQ;oCACR,MAAM;wCAACL;wCAAIC;wCAAIC;wCAAIC;qCAAG;gCACxB;gCACAX,mBAAmB,YAAY,CAACa,QAAQ;gCAExC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLA;wCACF;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAME,aAAaT;gCACnBR,MAAM,0BAA0BiB;gCAChC,MAAM,EAAE,QAAQC,YAAY,EAAEC,MAAM,EAAE,GACpCjB,mBAAmB,eAAe;gCACpCF,MACE,CAAC,sBAAsB,EAAEmB,OAAO,qBAAqB,CAAC,EACtDD;gCAGF,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,OAAOD,WAAW,IAAI;4CACtB,QAAQC;wCACV;oCACF;iCACD;4BACH;wBACA,KAAK;4BAAS;gCACZ,MAAME,cAAcZ;gCACpBR,MAAM,2BAA2BoB;gCAGjC,MAAM,CAACV,IAAIC,IAAIC,IAAIC,GAAG,GAAGC,AAAAA,IAAAA,iCAAAA,uBAAAA,AAAAA,EACvBM,YAAY,KAAK,CAAC,EAAE,EACpBA,YAAY,KAAK,CAAC,EAAE,EACpBd,KAAK,KAAK,EACVA,KAAK,MAAM;gCAGb,MAAMS,SAGF;oCACF,QAAQ;oCACR,MAAM;wCAACL;wCAAIC;wCAAIC;wCAAIC;qCAAG;gCACxB;gCAGA,MAAMQ,SAASD,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCACxD,MAAME,SAASF,YAAY,GAAG,CAAC,EAAE,GAAGA,YAAY,KAAK,CAAC,EAAE;gCAGxD,IAAIG;gCACJ,IAAIC;gCAEJ,MAAMC,YAAYC,KAAK,GAAG,CAACL;gCAC3B,MAAMM,YAAYD,KAAK,GAAG,CAACJ;gCAE3B,IAAIK,YAAYF,WAAW;oCAEzBD,WAAWE,KAAK,KAAK,CAClBC,YAAYrB,KAAK,MAAM,GAAIsB,iCAAAA,uBAAuBA;oCAErDL,YAAYD,SAAS,IAAI,OAAO;gCAClC,OAAO;oCAELE,WAAWE,KAAK,KAAK,CAClBD,YAAYnB,KAAK,KAAK,GAAIsB,iCAAAA,uBAAuBA;oCAEpDL,YAAYF,SAAS,IAAI,SAAS;gCACpC;gCAEArB,MACE,CAAC,2BAA2B,EAAEuB,UAAU,YAAY,EAAEC,UAAU;gCAGlE,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLT;4CAEAS;4CACAD;wCACF;wCACA,SAASH,YAAY,KAAK,IAAI;oCAChC;iCACD;4BACH;wBACA,KAAK;4BAAc;gCACjB,MAAMS,kBAAkBrB;gCACxBR,MAAM,gCAAgC6B;gCACtC,MAAM,CAACnB,IAAIC,IAAIC,IAAIC,GAAG,GAAGC,AAAAA,IAAAA,iCAAAA,uBAAAA,AAAAA,EACvBe,gBAAgB,OAAO,CAAC,EAAE,EAC1BA,gBAAgB,OAAO,CAAC,EAAE,EAC1BvB,KAAK,KAAK,EACVA,KAAK,MAAM;gCAGb,MAAMS,SAGF;oCACF,QAAQ;oCACR,MAAM;wCAACL;wCAAIC;wCAAIC;wCAAIC;qCAAG;gCACxB;gCACAX,mBAAmB,YAAY,CAACa,QAAQ;gCAExC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACLA;wCACF;wCACA,SAASc,gBAAgB,KAAK,IAAI;oCACpC;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAatB;gCACnBR,MAAM,0BAA0B8B;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO,CAAC;wCACR,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAavB;gCACnBR,MAAM,0BAA0B+B;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO,CAAC;wCACR,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BAAQ;gCACX,MAAMC,aAAaxB;gCACnBR,MAAM,0BAA0BgC;gCAChC,OAAO;oCACL;wCACE,MAAM;wCACN,OAAO;4CACL,QAAQA,WAAW,UAAU;wCAC/B;wCACA,SAASA,WAAW,KAAK,IAAI;oCAC/B;iCACD;4BACH;wBACA,KAAK;4BACH,MAAM,IAAIC,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ,KAAK;4BACH,MAAM,IAAIA,MACR;wBAGJ;4BACE,MAAM,IAAIA,MACR,CAAC,0BAA0B,EAAGzB,SAAiB,MAAM,EAAE;oBAE7D;gBACF;YACA;gBACE,MAAM,IAAIyB,MACR,CAAC,yBAAyB,EAAG5B,OAAe,SAAS,EAAE;QAE7D;IACF,EAAE,OAAO6B,OAAO;QACd,MAAMC,eAAeD,iBAAiBD,QAAQC,MAAM,OAAO,GAAGE,OAAOF;QACrElC,MAAM,oBAAoBmC;QAC1B,MAAM,IAAIF,MAAM,CAAC,4BAA4B,EAAEE,cAAc;IAC/D;AACF"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
getAutoGLMLocatePrompt: ()=>external_prompt_js_namespaceObject.getAutoGLMLocatePrompt,
|
|
28
|
+
autoGLMCoordinateToBbox: ()=>external_util_js_namespaceObject.autoGLMCoordinateToBbox,
|
|
29
|
+
parseAction: ()=>external_parser_js_namespaceObject.parseAction,
|
|
30
|
+
parseAutoGLMLocateResponse: ()=>external_parser_js_namespaceObject.parseAutoGLMLocateResponse,
|
|
31
|
+
parseAutoGLMResponse: ()=>external_parser_js_namespaceObject.parseAutoGLMResponse,
|
|
32
|
+
autoGLMPlanning: ()=>external_planning_js_namespaceObject.autoGLMPlanning,
|
|
33
|
+
getAutoGLMPlanPrompt: ()=>external_prompt_js_namespaceObject.getAutoGLMPlanPrompt
|
|
34
|
+
});
|
|
35
|
+
const external_prompt_js_namespaceObject = require("./prompt.js");
|
|
36
|
+
const external_parser_js_namespaceObject = require("./parser.js");
|
|
37
|
+
const external_planning_js_namespaceObject = require("./planning.js");
|
|
38
|
+
const external_util_js_namespaceObject = require("./util.js");
|
|
39
|
+
exports.autoGLMCoordinateToBbox = __webpack_exports__.autoGLMCoordinateToBbox;
|
|
40
|
+
exports.autoGLMPlanning = __webpack_exports__.autoGLMPlanning;
|
|
41
|
+
exports.getAutoGLMLocatePrompt = __webpack_exports__.getAutoGLMLocatePrompt;
|
|
42
|
+
exports.getAutoGLMPlanPrompt = __webpack_exports__.getAutoGLMPlanPrompt;
|
|
43
|
+
exports.parseAction = __webpack_exports__.parseAction;
|
|
44
|
+
exports.parseAutoGLMLocateResponse = __webpack_exports__.parseAutoGLMLocateResponse;
|
|
45
|
+
exports.parseAutoGLMResponse = __webpack_exports__.parseAutoGLMResponse;
|
|
46
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
47
|
+
"autoGLMCoordinateToBbox",
|
|
48
|
+
"autoGLMPlanning",
|
|
49
|
+
"getAutoGLMLocatePrompt",
|
|
50
|
+
"getAutoGLMPlanPrompt",
|
|
51
|
+
"parseAction",
|
|
52
|
+
"parseAutoGLMLocateResponse",
|
|
53
|
+
"parseAutoGLMResponse"
|
|
54
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
55
|
+
Object.defineProperty(exports, '__esModule', {
|
|
56
|
+
value: true
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/auto-glm/index.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
extractValueAfter: ()=>extractValueAfter,
|
|
28
|
+
parseAction: ()=>parseAction,
|
|
29
|
+
parseAutoGLMLocateResponse: ()=>parseAutoGLMLocateResponse,
|
|
30
|
+
parseAutoGLMResponse: ()=>parseAutoGLMResponse
|
|
31
|
+
});
|
|
32
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
33
|
+
const debug = (0, logger_namespaceObject.getDebug)('auto-glm-parser');
|
|
34
|
+
const extractValueAfter = (src, key)=>{
|
|
35
|
+
const idx = src.indexOf(key);
|
|
36
|
+
if (-1 === idx) throw new Error(`Missing key ${key} in action payload ${src}`);
|
|
37
|
+
let rest = src.slice(idx + key.length).trim();
|
|
38
|
+
if (rest.endsWith('")')) rest = rest.slice(0, -2);
|
|
39
|
+
return rest;
|
|
40
|
+
};
|
|
41
|
+
function parseAction(response) {
|
|
42
|
+
debug('Parsing action:', response);
|
|
43
|
+
let trimmedResponse = '';
|
|
44
|
+
try {
|
|
45
|
+
trimmedResponse = response.content.trim();
|
|
46
|
+
if (trimmedResponse.startsWith('do(action="Type"') || trimmedResponse.startsWith('do(action="Type_Name"')) {
|
|
47
|
+
const text = extractValueAfter(trimmedResponse, 'text="');
|
|
48
|
+
return {
|
|
49
|
+
_metadata: 'do',
|
|
50
|
+
action: 'Type',
|
|
51
|
+
text,
|
|
52
|
+
think: response.think
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
if (trimmedResponse.startsWith('finish(message=')) {
|
|
56
|
+
let message = extractValueAfter(trimmedResponse, 'finish(message="');
|
|
57
|
+
if (message.endsWith(')')) message = message.slice(0, -1);
|
|
58
|
+
return {
|
|
59
|
+
_metadata: 'finish',
|
|
60
|
+
message,
|
|
61
|
+
think: response.think
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
if (trimmedResponse.startsWith('do(')) {
|
|
65
|
+
const actionMatch = trimmedResponse.match(/do\(action="([^"]+)"/);
|
|
66
|
+
if (!actionMatch) throw new Error(`Failed to extract action type from do() call; raw="${trimmedResponse}"`);
|
|
67
|
+
const actionType = actionMatch[1];
|
|
68
|
+
const baseAction = {
|
|
69
|
+
_metadata: 'do',
|
|
70
|
+
think: response.think
|
|
71
|
+
};
|
|
72
|
+
switch(actionType){
|
|
73
|
+
case 'Tap':
|
|
74
|
+
{
|
|
75
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
76
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Tap; raw="${trimmedResponse}"`);
|
|
77
|
+
return {
|
|
78
|
+
...baseAction,
|
|
79
|
+
action: 'Tap',
|
|
80
|
+
element: [
|
|
81
|
+
Number(elementMatch[1]),
|
|
82
|
+
Number(elementMatch[2])
|
|
83
|
+
]
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
case 'Double Tap':
|
|
87
|
+
{
|
|
88
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
89
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Double Tap; raw="${trimmedResponse}"`);
|
|
90
|
+
return {
|
|
91
|
+
...baseAction,
|
|
92
|
+
action: 'Double Tap',
|
|
93
|
+
element: [
|
|
94
|
+
Number(elementMatch[1]),
|
|
95
|
+
Number(elementMatch[2])
|
|
96
|
+
]
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
case 'Swipe':
|
|
100
|
+
{
|
|
101
|
+
const startMatch = trimmedResponse.match(/start=\[(\d+),(\d+)\]/);
|
|
102
|
+
const endMatch = trimmedResponse.match(/end=\[(\d+),(\d+)\]/);
|
|
103
|
+
if (!startMatch || !endMatch) throw new Error(`Failed to extract start/end coordinates for Swipe; raw="${trimmedResponse}"`);
|
|
104
|
+
return {
|
|
105
|
+
...baseAction,
|
|
106
|
+
action: 'Swipe',
|
|
107
|
+
start: [
|
|
108
|
+
Number(startMatch[1]),
|
|
109
|
+
Number(startMatch[2])
|
|
110
|
+
],
|
|
111
|
+
end: [
|
|
112
|
+
Number(endMatch[1]),
|
|
113
|
+
Number(endMatch[2])
|
|
114
|
+
]
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
case 'Long Press':
|
|
118
|
+
{
|
|
119
|
+
const elementMatch = trimmedResponse.match(/element=\[(\d+),(\d+)\]/);
|
|
120
|
+
if (!elementMatch) throw new Error(`Failed to extract element coordinates for Long Press; raw="${trimmedResponse}"`);
|
|
121
|
+
return {
|
|
122
|
+
...baseAction,
|
|
123
|
+
action: 'Long Press',
|
|
124
|
+
element: [
|
|
125
|
+
Number(elementMatch[1]),
|
|
126
|
+
Number(elementMatch[2])
|
|
127
|
+
]
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
case 'Launch':
|
|
131
|
+
{
|
|
132
|
+
const app = extractValueAfter(trimmedResponse, 'app="');
|
|
133
|
+
return {
|
|
134
|
+
...baseAction,
|
|
135
|
+
action: 'Launch',
|
|
136
|
+
app
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
case 'Back':
|
|
140
|
+
return {
|
|
141
|
+
...baseAction,
|
|
142
|
+
action: 'Back'
|
|
143
|
+
};
|
|
144
|
+
case 'Home':
|
|
145
|
+
return {
|
|
146
|
+
...baseAction,
|
|
147
|
+
action: 'Home'
|
|
148
|
+
};
|
|
149
|
+
case 'Wait':
|
|
150
|
+
{
|
|
151
|
+
const durationMatch = trimmedResponse.match(/duration=(?:["\[])?(\d+)/);
|
|
152
|
+
if (!durationMatch) throw new Error(`Failed to extract duration for Wait; raw="${trimmedResponse}"`);
|
|
153
|
+
const seconds = Number.parseInt(durationMatch[1], 10);
|
|
154
|
+
const durationMs = 1000 * seconds;
|
|
155
|
+
return {
|
|
156
|
+
...baseAction,
|
|
157
|
+
action: 'Wait',
|
|
158
|
+
durationMs
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
case 'Interact':
|
|
162
|
+
return {
|
|
163
|
+
...baseAction,
|
|
164
|
+
action: 'Interact'
|
|
165
|
+
};
|
|
166
|
+
case 'Call_API':
|
|
167
|
+
{
|
|
168
|
+
const instruction = extractValueAfter(trimmedResponse, 'instruction="');
|
|
169
|
+
return {
|
|
170
|
+
...baseAction,
|
|
171
|
+
action: 'Call_API',
|
|
172
|
+
instruction
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
case 'Take_over':
|
|
176
|
+
{
|
|
177
|
+
const message = extractValueAfter(trimmedResponse, 'message="');
|
|
178
|
+
return {
|
|
179
|
+
...baseAction,
|
|
180
|
+
action: 'Take_over',
|
|
181
|
+
message
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
case 'Note':
|
|
185
|
+
{
|
|
186
|
+
const message = extractValueAfter(trimmedResponse, 'message="');
|
|
187
|
+
return {
|
|
188
|
+
...baseAction,
|
|
189
|
+
action: 'Note',
|
|
190
|
+
message
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
default:
|
|
194
|
+
throw new Error(`Unknown action type: ${actionType}; raw="${trimmedResponse}"`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
throw new Error(`Failed to parse action: ${trimmedResponse}`);
|
|
198
|
+
} catch (error) {
|
|
199
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
200
|
+
throw new Error(`Failed to parse action: ${errorMessage}; raw="${trimmedResponse}"`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
function parseAutoGLMResponse(content) {
|
|
204
|
+
if (content.includes('finish(message=')) {
|
|
205
|
+
const parts = content.split('finish(message=');
|
|
206
|
+
const think = parts[0].trim();
|
|
207
|
+
const actionContent = `finish(message=${parts[1]}`;
|
|
208
|
+
return {
|
|
209
|
+
think,
|
|
210
|
+
content: actionContent
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
if (content.includes('do(action=')) {
|
|
214
|
+
const parts = content.split('do(action=');
|
|
215
|
+
const think = parts[0].trim();
|
|
216
|
+
const actionContent = `do(action=${parts[1]}`;
|
|
217
|
+
return {
|
|
218
|
+
think,
|
|
219
|
+
content: actionContent
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
if (content.includes('<answer>')) {
|
|
223
|
+
const parts = content.split('<answer>');
|
|
224
|
+
const think = parts[0].replace(/<think>/g, '').replace(/<\/think>/g, '').trim();
|
|
225
|
+
const actionContent = parts[1].replace(/<\/answer>/g, '').trim();
|
|
226
|
+
return {
|
|
227
|
+
think,
|
|
228
|
+
content: actionContent
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
return {
|
|
232
|
+
think: '',
|
|
233
|
+
content
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
function parseAutoGLMLocateResponse(rawResponse) {
|
|
237
|
+
const { think, content: actionContent } = parseAutoGLMResponse(rawResponse);
|
|
238
|
+
if (!actionContent.startsWith('do(action="Tap"')) return {
|
|
239
|
+
think,
|
|
240
|
+
coordinates: null,
|
|
241
|
+
error: `Unexpected action type in auto-glm locate response: ${actionContent}`
|
|
242
|
+
};
|
|
243
|
+
try {
|
|
244
|
+
const elementMatch = actionContent.match(/element=\[(\d+),(\d+)\]/);
|
|
245
|
+
if (!elementMatch) return {
|
|
246
|
+
think,
|
|
247
|
+
coordinates: null,
|
|
248
|
+
error: `Failed to extract element coordinates from auto-glm response: ${actionContent}`
|
|
249
|
+
};
|
|
250
|
+
const x = Number(elementMatch[1]);
|
|
251
|
+
const y = Number(elementMatch[2]);
|
|
252
|
+
return {
|
|
253
|
+
think,
|
|
254
|
+
coordinates: {
|
|
255
|
+
x,
|
|
256
|
+
y
|
|
257
|
+
}
|
|
258
|
+
};
|
|
259
|
+
} catch (e) {
|
|
260
|
+
const errorMessage = e instanceof Error ? e.message : String(e);
|
|
261
|
+
return {
|
|
262
|
+
think,
|
|
263
|
+
coordinates: null,
|
|
264
|
+
error: `Failed to parse coordinates "${actionContent}" with errorMessage: ${errorMessage}`
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
exports.extractValueAfter = __webpack_exports__.extractValueAfter;
|
|
269
|
+
exports.parseAction = __webpack_exports__.parseAction;
|
|
270
|
+
exports.parseAutoGLMLocateResponse = __webpack_exports__.parseAutoGLMLocateResponse;
|
|
271
|
+
exports.parseAutoGLMResponse = __webpack_exports__.parseAutoGLMResponse;
|
|
272
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
273
|
+
"extractValueAfter",
|
|
274
|
+
"parseAction",
|
|
275
|
+
"parseAutoGLMLocateResponse",
|
|
276
|
+
"parseAutoGLMResponse"
|
|
277
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
278
|
+
Object.defineProperty(exports, '__esModule', {
|
|
279
|
+
value: true
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
//# sourceMappingURL=parser.js.map
|