@midscene/core 0.30.6-beta-20251022093704.0 → 1.0.1-beta-20251022061922.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +41 -33
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/task-builder.mjs +315 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/tasks.mjs +80 -405
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +6 -6
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +1 -15
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/inspect.mjs +2 -3
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +11 -30
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +3 -204
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +101 -231
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/index.mjs +3 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +18 -19
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +3 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{ai-model/action-executor.mjs → task-runner.mjs} +81 -10
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +18 -14
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +41 -33
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/task-builder.js +352 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/tasks.js +80 -405
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +6 -6
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/common.js +2 -19
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +1 -2
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +10 -29
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +2 -206
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +236 -384
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/index.js +9 -5
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +17 -18
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +5 -5
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{ai-model/action-executor.js → task-runner.js} +83 -12
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/types.js +22 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +18 -14
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +16 -0
- package/dist/types/agent/execution-session.d.ts +27 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/tasks.d.ts +14 -13
- package/dist/types/agent/ui-utils.d.ts +2 -2
- package/dist/types/agent/utils.d.ts +6 -2
- package/dist/types/ai-model/common.d.ts +0 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -2
- package/dist/types/device/index.d.ts +20 -20
- package/dist/types/index.d.ts +4 -3
- package/dist/types/insight/index.d.ts +5 -10
- package/dist/types/insight/utils.d.ts +2 -2
- package/dist/types/task-runner.d.ts +31 -0
- package/dist/types/types.d.ts +53 -14
- package/dist/types/yaml.d.ts +3 -1
- package/package.json +4 -7
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js.map +0 -1
- package/dist/types/ai-model/action-executor.d.ts +0 -19
package/dist/lib/agent/tasks.js
CHANGED
|
@@ -24,17 +24,18 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
locatePlanForLocate: ()=>locatePlanForLocate,
|
|
27
|
+
locatePlanForLocate: ()=>external_task_builder_js_namespaceObject.locatePlanForLocate,
|
|
28
28
|
TaskExecutor: ()=>TaskExecutor
|
|
29
29
|
});
|
|
30
30
|
const index_js_namespaceObject = require("../ai-model/index.js");
|
|
31
|
-
const
|
|
32
|
-
const external_utils_js_namespaceObject = require("../utils.js");
|
|
31
|
+
const external_types_js_namespaceObject = require("../types.js");
|
|
33
32
|
const env_namespaceObject = require("@midscene/shared/env");
|
|
34
33
|
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
35
34
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
35
|
+
const external_execution_session_js_namespaceObject = require("./execution-session.js");
|
|
36
|
+
const external_task_builder_js_namespaceObject = require("./task-builder.js");
|
|
36
37
|
const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
|
|
37
|
-
const
|
|
38
|
+
const external_utils_js_namespaceObject = require("./utils.js");
|
|
38
39
|
function _define_property(obj, key, value) {
|
|
39
40
|
if (key in obj) Object.defineProperty(obj, key, {
|
|
40
41
|
value: value,
|
|
@@ -48,325 +49,21 @@ function _define_property(obj, key, value) {
|
|
|
48
49
|
const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
|
|
49
50
|
const defaultReplanningCycleLimit = 10;
|
|
50
51
|
const defaultVlmUiTarsReplanningCycleLimit = 40;
|
|
51
|
-
function locatePlanForLocate(param) {
|
|
52
|
-
const locate = 'string' == typeof param ? {
|
|
53
|
-
prompt: param
|
|
54
|
-
} : param;
|
|
55
|
-
const locatePlan = {
|
|
56
|
-
type: 'Locate',
|
|
57
|
-
locate,
|
|
58
|
-
param: locate,
|
|
59
|
-
thought: ''
|
|
60
|
-
};
|
|
61
|
-
return locatePlan;
|
|
62
|
-
}
|
|
63
52
|
class TaskExecutor {
|
|
64
53
|
get page() {
|
|
65
54
|
return this.interface;
|
|
66
55
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
ts: Date.now(),
|
|
72
|
-
screenshot: base64,
|
|
73
|
-
timing
|
|
74
|
-
};
|
|
75
|
-
return item;
|
|
76
|
-
}
|
|
77
|
-
prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
|
|
78
|
-
const taskWithScreenshot = {
|
|
79
|
-
...taskApply,
|
|
80
|
-
executor: async (param, context, ...args)=>{
|
|
81
|
-
const recorder = [];
|
|
82
|
-
const { task } = context;
|
|
83
|
-
task.recorder = recorder;
|
|
84
|
-
const shot = await this.recordScreenshot(`before ${task.type}`);
|
|
85
|
-
recorder.push(shot);
|
|
86
|
-
const result = await taskApply.executor(param, context, ...args);
|
|
87
|
-
if (appendAfterExecution) {
|
|
88
|
-
const shot2 = await this.recordScreenshot('after Action');
|
|
89
|
-
recorder.push(shot2);
|
|
90
|
-
}
|
|
91
|
-
return result;
|
|
92
|
-
}
|
|
93
|
-
};
|
|
94
|
-
return taskWithScreenshot;
|
|
95
|
-
}
|
|
96
|
-
async convertPlanToExecutable(plans, modelConfig, cacheable) {
|
|
97
|
-
const tasks = [];
|
|
98
|
-
const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
|
|
99
|
-
if ('string' == typeof detailedLocateParam) detailedLocateParam = {
|
|
100
|
-
prompt: detailedLocateParam
|
|
101
|
-
};
|
|
102
|
-
if (void 0 !== cacheable) detailedLocateParam = {
|
|
103
|
-
...detailedLocateParam,
|
|
104
|
-
cacheable
|
|
105
|
-
};
|
|
106
|
-
const taskFind = {
|
|
107
|
-
type: 'Insight',
|
|
108
|
-
subType: 'Locate',
|
|
109
|
-
param: detailedLocateParam,
|
|
110
|
-
thought: plan.thought,
|
|
111
|
-
executor: async (param, taskContext)=>{
|
|
112
|
-
var _this_taskCache, _locateCacheRecord_cacheContent;
|
|
113
|
-
const { task } = taskContext;
|
|
114
|
-
(0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
|
|
115
|
-
let insightDump;
|
|
116
|
-
let usage;
|
|
117
|
-
const dumpCollector = (dump)=>{
|
|
118
|
-
var _dump_taskInfo, _dump_taskInfo1;
|
|
119
|
-
insightDump = dump;
|
|
120
|
-
usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
|
|
121
|
-
task.log = {
|
|
122
|
-
dump: insightDump
|
|
123
|
-
};
|
|
124
|
-
task.usage = usage;
|
|
125
|
-
if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
|
|
126
|
-
};
|
|
127
|
-
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
128
|
-
const shotTime = Date.now();
|
|
129
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
130
|
-
task.uiContext = uiContext;
|
|
131
|
-
const recordItem = {
|
|
132
|
-
type: 'screenshot',
|
|
133
|
-
ts: shotTime,
|
|
134
|
-
screenshot: uiContext.screenshotBase64,
|
|
135
|
-
timing: 'before Insight'
|
|
136
|
-
};
|
|
137
|
-
task.recorder = [
|
|
138
|
-
recordItem
|
|
139
|
-
];
|
|
140
|
-
const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
|
|
141
|
-
const userExpectedPathHitFlag = !!elementFromXpath;
|
|
142
|
-
const cachePrompt = param.prompt;
|
|
143
|
-
const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
|
|
144
|
-
const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
|
|
145
|
-
const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
|
|
146
|
-
const cacheHitFlag = !!elementFromCache;
|
|
147
|
-
const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
|
|
148
|
-
const planHitFlag = !!elementFromPlan;
|
|
149
|
-
const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
|
|
150
|
-
context: uiContext
|
|
151
|
-
}, modelConfig)).element;
|
|
152
|
-
const aiLocateHitFlag = !!elementFromAiLocate;
|
|
153
|
-
const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
|
|
154
|
-
let currentCacheEntry;
|
|
155
|
-
if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
|
|
156
|
-
const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
|
|
157
|
-
_orderSensitive: element.isOrderSensitive
|
|
158
|
-
} : void 0);
|
|
159
|
-
if (feature && Object.keys(feature).length > 0) {
|
|
160
|
-
debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
|
|
161
|
-
currentCacheEntry = feature;
|
|
162
|
-
this.taskCache.updateOrAppendCacheRecord({
|
|
163
|
-
type: 'locate',
|
|
164
|
-
prompt: cachePrompt,
|
|
165
|
-
cache: feature
|
|
166
|
-
}, locateCacheRecord);
|
|
167
|
-
} else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
|
|
168
|
-
} catch (error) {
|
|
169
|
-
debug('cacheFeatureForRect failed: %s', error);
|
|
170
|
-
}
|
|
171
|
-
else debug('cacheFeatureForRect is not supported, skip cache update');
|
|
172
|
-
if (!element) throw new Error(`Element not found: ${param.prompt}`);
|
|
173
|
-
let hitBy;
|
|
174
|
-
if (userExpectedPathHitFlag) hitBy = {
|
|
175
|
-
from: 'User expected path',
|
|
176
|
-
context: {
|
|
177
|
-
xpath: param.xpath
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
else if (cacheHitFlag) hitBy = {
|
|
181
|
-
from: 'Cache',
|
|
182
|
-
context: {
|
|
183
|
-
cacheEntry,
|
|
184
|
-
cacheToSave: currentCacheEntry
|
|
185
|
-
}
|
|
186
|
-
};
|
|
187
|
-
else if (planHitFlag) hitBy = {
|
|
188
|
-
from: 'Planning',
|
|
189
|
-
context: {
|
|
190
|
-
id: null == elementFromPlan ? void 0 : elementFromPlan.id,
|
|
191
|
-
bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
|
|
192
|
-
}
|
|
193
|
-
};
|
|
194
|
-
else if (aiLocateHitFlag) hitBy = {
|
|
195
|
-
from: 'AI model',
|
|
196
|
-
context: {
|
|
197
|
-
prompt: param.prompt
|
|
198
|
-
}
|
|
199
|
-
};
|
|
200
|
-
null == onResult || onResult(element);
|
|
201
|
-
return {
|
|
202
|
-
output: {
|
|
203
|
-
element
|
|
204
|
-
},
|
|
205
|
-
uiContext,
|
|
206
|
-
hitBy
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
};
|
|
210
|
-
return taskFind;
|
|
211
|
-
};
|
|
212
|
-
for (const plan of plans)if ('Locate' === plan.type) {
|
|
213
|
-
var _plan_locate, _plan_locate1;
|
|
214
|
-
if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
|
|
215
|
-
debug('Locate action with id is null, will be ignored', plan);
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
const taskLocate = taskForLocatePlan(plan, plan.locate);
|
|
219
|
-
tasks.push(taskLocate);
|
|
220
|
-
} else if ('Error' === plan.type) {
|
|
221
|
-
var _plan_param;
|
|
222
|
-
const taskActionError = {
|
|
223
|
-
type: 'Action',
|
|
224
|
-
subType: 'Error',
|
|
225
|
-
param: plan.param,
|
|
226
|
-
thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
|
|
227
|
-
locate: plan.locate,
|
|
228
|
-
executor: async ()=>{
|
|
229
|
-
var _plan_param;
|
|
230
|
-
throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
|
|
231
|
-
}
|
|
232
|
-
};
|
|
233
|
-
tasks.push(taskActionError);
|
|
234
|
-
} else if ('Finished' === plan.type) {
|
|
235
|
-
const taskActionFinished = {
|
|
236
|
-
type: 'Action',
|
|
237
|
-
subType: 'Finished',
|
|
238
|
-
param: null,
|
|
239
|
-
thought: plan.thought,
|
|
240
|
-
locate: plan.locate,
|
|
241
|
-
executor: async (param)=>{}
|
|
242
|
-
};
|
|
243
|
-
tasks.push(taskActionFinished);
|
|
244
|
-
} else if ('Sleep' === plan.type) {
|
|
245
|
-
const taskActionSleep = {
|
|
246
|
-
type: 'Action',
|
|
247
|
-
subType: 'Sleep',
|
|
248
|
-
param: plan.param,
|
|
249
|
-
thought: plan.thought,
|
|
250
|
-
locate: plan.locate,
|
|
251
|
-
executor: async (taskParam)=>{
|
|
252
|
-
await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
|
|
253
|
-
}
|
|
254
|
-
};
|
|
255
|
-
tasks.push(taskActionSleep);
|
|
256
|
-
} else {
|
|
257
|
-
const planType = plan.type;
|
|
258
|
-
const actionSpace = await this.interface.actionSpace();
|
|
259
|
-
const action = actionSpace.find((action)=>action.name === planType);
|
|
260
|
-
const param = plan.param;
|
|
261
|
-
if (!action) throw new Error(`Action type '${planType}' not found`);
|
|
262
|
-
const locateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema) : [];
|
|
263
|
-
const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema, true) : [];
|
|
264
|
-
locateFields.forEach((field)=>{
|
|
265
|
-
if (param[field]) {
|
|
266
|
-
const locatePlan = locatePlanForLocate(param[field]);
|
|
267
|
-
debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
|
|
268
|
-
const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
|
|
269
|
-
param[field] = result;
|
|
270
|
-
});
|
|
271
|
-
tasks.push(locateTask);
|
|
272
|
-
} else {
|
|
273
|
-
(0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
|
|
274
|
-
debug(`field '${field}' is not provided for action ${planType}`);
|
|
275
|
-
}
|
|
276
|
-
});
|
|
277
|
-
const task = {
|
|
278
|
-
type: 'Action',
|
|
279
|
-
subType: planType,
|
|
280
|
-
thought: plan.thought,
|
|
281
|
-
param: plan.param,
|
|
282
|
-
executor: async (param, context)=>{
|
|
283
|
-
var _context_element;
|
|
284
|
-
debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
|
|
285
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
286
|
-
context.task.uiContext = uiContext;
|
|
287
|
-
requiredLocateFields.forEach((field)=>{
|
|
288
|
-
(0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
|
|
289
|
-
});
|
|
290
|
-
try {
|
|
291
|
-
await Promise.all([
|
|
292
|
-
(async ()=>{
|
|
293
|
-
if (this.interface.beforeInvokeAction) {
|
|
294
|
-
debug('will call "beforeInvokeAction" for interface');
|
|
295
|
-
await this.interface.beforeInvokeAction(action.name, param);
|
|
296
|
-
debug('called "beforeInvokeAction" for interface');
|
|
297
|
-
}
|
|
298
|
-
})(),
|
|
299
|
-
(0, external_utils_js_namespaceObject.sleep)(200)
|
|
300
|
-
]);
|
|
301
|
-
} catch (originalError) {
|
|
302
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
303
|
-
throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
304
|
-
cause: originalError
|
|
305
|
-
});
|
|
306
|
-
}
|
|
307
|
-
if (action.paramSchema) try {
|
|
308
|
-
param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
|
|
309
|
-
} catch (error) {
|
|
310
|
-
throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
|
|
311
|
-
cause: error
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
debug('calling action', action.name);
|
|
315
|
-
const actionFn = action.call.bind(this.interface);
|
|
316
|
-
await actionFn(param, context);
|
|
317
|
-
debug('called action', action.name);
|
|
318
|
-
try {
|
|
319
|
-
if (this.interface.afterInvokeAction) {
|
|
320
|
-
debug('will call "afterInvokeAction" for interface');
|
|
321
|
-
await this.interface.afterInvokeAction(action.name, param);
|
|
322
|
-
debug('called "afterInvokeAction" for interface');
|
|
323
|
-
}
|
|
324
|
-
} catch (originalError) {
|
|
325
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
326
|
-
throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
327
|
-
cause: originalError
|
|
328
|
-
});
|
|
329
|
-
}
|
|
330
|
-
return {
|
|
331
|
-
output: {
|
|
332
|
-
success: true,
|
|
333
|
-
action: planType,
|
|
334
|
-
param: param
|
|
335
|
-
}
|
|
336
|
-
};
|
|
337
|
-
}
|
|
338
|
-
};
|
|
339
|
-
tasks.push(task);
|
|
340
|
-
}
|
|
341
|
-
const wrappedTasks = tasks.map((task, index)=>{
|
|
342
|
-
if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
|
|
343
|
-
return task;
|
|
56
|
+
createExecutionSession(title, options) {
|
|
57
|
+
return new external_execution_session_js_namespaceObject.ExecutionSession(title, ()=>Promise.resolve(this.insight.contextRetrieverFn()), {
|
|
58
|
+
onTaskStart: this.onTaskStartCallback,
|
|
59
|
+
tasks: null == options ? void 0 : options.tasks
|
|
344
60
|
});
|
|
345
|
-
return {
|
|
346
|
-
tasks: wrappedTasks
|
|
347
|
-
};
|
|
348
61
|
}
|
|
349
|
-
async
|
|
350
|
-
|
|
351
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
352
|
-
const recordItem = {
|
|
353
|
-
type: 'screenshot',
|
|
354
|
-
ts: shotTime,
|
|
355
|
-
screenshot: uiContext.screenshotBase64,
|
|
356
|
-
timing: 'before Planning'
|
|
357
|
-
};
|
|
358
|
-
executorContext.task.recorder = [
|
|
359
|
-
recordItem
|
|
360
|
-
];
|
|
361
|
-
executorContext.task.uiContext = uiContext;
|
|
362
|
-
return {
|
|
363
|
-
uiContext
|
|
364
|
-
};
|
|
62
|
+
async convertPlanToExecutable(plans, modelConfig, options) {
|
|
63
|
+
return this.taskBuilder.build(plans, modelConfig, options);
|
|
365
64
|
}
|
|
366
65
|
async loadYamlFlowAsPlanning(userInstruction, yamlString) {
|
|
367
|
-
const
|
|
368
|
-
onTaskStart: this.onTaskStartCallback
|
|
369
|
-
});
|
|
66
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction));
|
|
370
67
|
const task = {
|
|
371
68
|
type: 'Planning',
|
|
372
69
|
subType: 'LoadYaml',
|
|
@@ -375,7 +72,8 @@ class TaskExecutor {
|
|
|
375
72
|
userInstruction
|
|
376
73
|
},
|
|
377
74
|
executor: async (param, executorContext)=>{
|
|
378
|
-
|
|
75
|
+
const { uiContext } = executorContext;
|
|
76
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
|
|
379
77
|
return {
|
|
380
78
|
output: {
|
|
381
79
|
actions: [],
|
|
@@ -395,10 +93,9 @@ class TaskExecutor {
|
|
|
395
93
|
};
|
|
396
94
|
}
|
|
397
95
|
};
|
|
398
|
-
await
|
|
399
|
-
await taskExecutor.flush();
|
|
96
|
+
await session.appendAndRun(task);
|
|
400
97
|
return {
|
|
401
|
-
|
|
98
|
+
runner: session.getRunner()
|
|
402
99
|
};
|
|
403
100
|
}
|
|
404
101
|
createPlanningTask(userInstruction, actionContext, modelConfig) {
|
|
@@ -411,7 +108,8 @@ class TaskExecutor {
|
|
|
411
108
|
},
|
|
412
109
|
executor: async (param, executorContext)=>{
|
|
413
110
|
const startTime = Date.now();
|
|
414
|
-
const { uiContext } =
|
|
111
|
+
const { uiContext } = executorContext;
|
|
112
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
|
|
415
113
|
const { vlMode } = modelConfig;
|
|
416
114
|
const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
|
|
417
115
|
(0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
|
|
@@ -438,13 +136,7 @@ class TaskExecutor {
|
|
|
438
136
|
if (sleep) {
|
|
439
137
|
const timeNow = Date.now();
|
|
440
138
|
const timeRemaining = sleep - (timeNow - startTime);
|
|
441
|
-
if (timeRemaining > 0) finalActions.push(
|
|
442
|
-
type: 'Sleep',
|
|
443
|
-
param: {
|
|
444
|
-
timeMs: timeRemaining
|
|
445
|
-
},
|
|
446
|
-
locate: null
|
|
447
|
-
});
|
|
139
|
+
if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
|
|
448
140
|
}
|
|
449
141
|
if (0 === finalActions.length) (0, utils_namespaceObject.assert)(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
|
|
450
142
|
return {
|
|
@@ -464,16 +156,13 @@ class TaskExecutor {
|
|
|
464
156
|
return task;
|
|
465
157
|
}
|
|
466
158
|
async runPlans(title, plans, modelConfig) {
|
|
467
|
-
const
|
|
468
|
-
onTaskStart: this.onTaskStartCallback
|
|
469
|
-
});
|
|
159
|
+
const session = this.createExecutionSession(title);
|
|
470
160
|
const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
|
|
471
|
-
await
|
|
472
|
-
const result = await taskExecutor.flush();
|
|
161
|
+
const result = await session.appendAndRun(tasks);
|
|
473
162
|
const { output } = result;
|
|
474
163
|
return {
|
|
475
164
|
output,
|
|
476
|
-
|
|
165
|
+
runner: session.getRunner()
|
|
477
166
|
};
|
|
478
167
|
}
|
|
479
168
|
getReplanningCycleLimit(isVlmUiTars) {
|
|
@@ -481,38 +170,38 @@ class TaskExecutor {
|
|
|
481
170
|
}
|
|
482
171
|
async action(userPrompt, modelConfig, actionContext, cacheable) {
|
|
483
172
|
this.conversationHistory.reset();
|
|
484
|
-
const
|
|
485
|
-
|
|
486
|
-
});
|
|
173
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt));
|
|
174
|
+
const runner = session.getRunner();
|
|
487
175
|
let replanCount = 0;
|
|
488
176
|
const yamlFlow = [];
|
|
489
177
|
const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
|
|
490
178
|
while(true){
|
|
491
179
|
if (replanCount > replanningCycleLimit) {
|
|
492
180
|
const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
|
|
493
|
-
return
|
|
181
|
+
return session.appendErrorPlan(errorMsg);
|
|
494
182
|
}
|
|
495
183
|
const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
|
|
496
|
-
await
|
|
497
|
-
const result = await taskExecutor.flush();
|
|
184
|
+
const result = await session.appendAndRun(planningTask);
|
|
498
185
|
const planResult = null == result ? void 0 : result.output;
|
|
499
|
-
if (
|
|
186
|
+
if (session.isInErrorState()) return {
|
|
500
187
|
output: planResult,
|
|
501
|
-
|
|
188
|
+
runner
|
|
502
189
|
};
|
|
503
190
|
const plans = planResult.actions || [];
|
|
504
191
|
yamlFlow.push(...planResult.yamlFlow || []);
|
|
505
192
|
let executables;
|
|
506
193
|
try {
|
|
507
|
-
executables = await this.convertPlanToExecutable(plans, modelConfig,
|
|
508
|
-
|
|
194
|
+
executables = await this.convertPlanToExecutable(plans, modelConfig, {
|
|
195
|
+
cacheable,
|
|
196
|
+
subTask: true
|
|
197
|
+
});
|
|
198
|
+
await session.appendAndRun(executables.tasks);
|
|
509
199
|
} catch (error) {
|
|
510
|
-
return
|
|
200
|
+
return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
|
|
511
201
|
}
|
|
512
|
-
|
|
513
|
-
if (taskExecutor.isInErrorState()) return {
|
|
202
|
+
if (session.isInErrorState()) return {
|
|
514
203
|
output: void 0,
|
|
515
|
-
|
|
204
|
+
runner
|
|
516
205
|
};
|
|
517
206
|
if (!planResult.more_actions_needed_by_instruction) break;
|
|
518
207
|
replanCount++;
|
|
@@ -521,7 +210,7 @@ class TaskExecutor {
|
|
|
521
210
|
output: {
|
|
522
211
|
yamlFlow
|
|
523
212
|
},
|
|
524
|
-
|
|
213
|
+
runner
|
|
525
214
|
};
|
|
526
215
|
}
|
|
527
216
|
createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
@@ -537,23 +226,15 @@ class TaskExecutor {
|
|
|
537
226
|
},
|
|
538
227
|
executor: async (param, taskContext)=>{
|
|
539
228
|
const { task } = taskContext;
|
|
540
|
-
let
|
|
541
|
-
const
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
const uiContext = await this.insight.contextRetrieverFn('extract');
|
|
547
|
-
task.uiContext = uiContext;
|
|
548
|
-
const recordItem = {
|
|
549
|
-
type: 'screenshot',
|
|
550
|
-
ts: shotTime,
|
|
551
|
-
screenshot: uiContext.screenshotBase64,
|
|
552
|
-
timing: 'before Extract'
|
|
229
|
+
let queryDump;
|
|
230
|
+
const applyDump = (dump)=>{
|
|
231
|
+
queryDump = dump;
|
|
232
|
+
task.log = {
|
|
233
|
+
dump
|
|
234
|
+
};
|
|
553
235
|
};
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
];
|
|
236
|
+
const uiContext = taskContext.uiContext;
|
|
237
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Query task');
|
|
557
238
|
const ifTypeRestricted = 'Query' !== type;
|
|
558
239
|
let demandInput = demand;
|
|
559
240
|
let keyOfResult = 'result';
|
|
@@ -566,7 +247,15 @@ class TaskExecutor {
|
|
|
566
247
|
} else if (ifTypeRestricted) demandInput = {
|
|
567
248
|
[keyOfResult]: `${type}, ${demand}`
|
|
568
249
|
};
|
|
569
|
-
|
|
250
|
+
let extractResult;
|
|
251
|
+
try {
|
|
252
|
+
extractResult = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
|
|
253
|
+
} catch (error) {
|
|
254
|
+
if (error instanceof external_types_js_namespaceObject.InsightError) applyDump(error.dump);
|
|
255
|
+
throw error;
|
|
256
|
+
}
|
|
257
|
+
const { data, usage, thought, dump } = extractResult;
|
|
258
|
+
applyDump(dump);
|
|
570
259
|
let outputResult = data;
|
|
571
260
|
if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
|
|
572
261
|
else {
|
|
@@ -575,7 +264,7 @@ class TaskExecutor {
|
|
|
575
264
|
}
|
|
576
265
|
return {
|
|
577
266
|
output: outputResult,
|
|
578
|
-
log:
|
|
267
|
+
log: queryDump,
|
|
579
268
|
usage,
|
|
580
269
|
thought
|
|
581
270
|
};
|
|
@@ -584,57 +273,36 @@ class TaskExecutor {
|
|
|
584
273
|
return queryTask;
|
|
585
274
|
}
|
|
586
275
|
async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
587
|
-
const
|
|
588
|
-
onTaskStart: this.onTaskStartCallback
|
|
589
|
-
});
|
|
276
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
|
|
590
277
|
const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
|
|
591
|
-
await
|
|
592
|
-
const result = await taskExecutor.flush();
|
|
278
|
+
const result = await session.appendAndRun(queryTask);
|
|
593
279
|
if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
|
|
594
280
|
const { output, thought } = result;
|
|
595
281
|
return {
|
|
596
282
|
output,
|
|
597
283
|
thought,
|
|
598
|
-
|
|
284
|
+
runner: session.getRunner()
|
|
599
285
|
};
|
|
600
286
|
}
|
|
601
|
-
|
|
602
|
-
const errorPlan = {
|
|
603
|
-
type: 'Error',
|
|
604
|
-
param: {
|
|
605
|
-
thought: errorMsg
|
|
606
|
-
},
|
|
607
|
-
locate: null
|
|
608
|
-
};
|
|
609
|
-
const { tasks } = await this.convertPlanToExecutable([
|
|
610
|
-
errorPlan
|
|
611
|
-
], modelConfig);
|
|
612
|
-
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
|
|
613
|
-
await taskExecutor.flush();
|
|
287
|
+
sleepPlan(timeMs) {
|
|
614
288
|
return {
|
|
615
|
-
output: void 0,
|
|
616
|
-
executor: taskExecutor
|
|
617
|
-
};
|
|
618
|
-
}
|
|
619
|
-
async taskForSleep(timeMs, modelConfig) {
|
|
620
|
-
const sleepPlan = {
|
|
621
289
|
type: 'Sleep',
|
|
622
290
|
param: {
|
|
623
291
|
timeMs
|
|
624
292
|
},
|
|
625
293
|
locate: null
|
|
626
294
|
};
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
295
|
+
}
|
|
296
|
+
async taskForSleep(timeMs, _modelConfig) {
|
|
297
|
+
return this.taskBuilder.createSleepTask({
|
|
298
|
+
timeMs
|
|
299
|
+
});
|
|
631
300
|
}
|
|
632
301
|
async waitFor(assertion, opt, modelConfig) {
|
|
633
|
-
const { textPrompt, multimodalPrompt } = (0,
|
|
302
|
+
const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject.parsePrompt)(assertion);
|
|
634
303
|
const description = `waitFor: ${textPrompt}`;
|
|
635
|
-
const
|
|
636
|
-
|
|
637
|
-
});
|
|
304
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description));
|
|
305
|
+
const runner = session.getRunner();
|
|
638
306
|
const { timeoutMs, checkIntervalMs } = opt;
|
|
639
307
|
(0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
|
|
640
308
|
(0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
|
|
@@ -648,26 +316,28 @@ class TaskExecutor {
|
|
|
648
316
|
const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
|
|
649
317
|
doNotThrowError: true
|
|
650
318
|
}, multimodalPrompt);
|
|
651
|
-
await
|
|
652
|
-
const result = await taskExecutor.flush();
|
|
319
|
+
const result = await session.appendAndRun(queryTask);
|
|
653
320
|
if (null == result ? void 0 : result.output) return {
|
|
654
321
|
output: void 0,
|
|
655
|
-
|
|
322
|
+
runner
|
|
656
323
|
};
|
|
657
324
|
errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
|
|
658
325
|
const now = Date.now();
|
|
659
326
|
if (now - startTime < checkIntervalMs) {
|
|
660
327
|
const timeRemaining = checkIntervalMs - (now - startTime);
|
|
661
|
-
const sleepTask =
|
|
662
|
-
|
|
328
|
+
const sleepTask = this.taskBuilder.createSleepTask({
|
|
329
|
+
timeMs: timeRemaining
|
|
330
|
+
});
|
|
331
|
+
await session.append(sleepTask);
|
|
663
332
|
}
|
|
664
333
|
}
|
|
665
|
-
return
|
|
334
|
+
return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
|
|
666
335
|
}
|
|
667
336
|
constructor(interfaceInstance, insight, opts){
|
|
668
337
|
_define_property(this, "interface", void 0);
|
|
669
338
|
_define_property(this, "insight", void 0);
|
|
670
339
|
_define_property(this, "taskCache", void 0);
|
|
340
|
+
_define_property(this, "taskBuilder", void 0);
|
|
671
341
|
_define_property(this, "conversationHistory", void 0);
|
|
672
342
|
_define_property(this, "onTaskStartCallback", void 0);
|
|
673
343
|
_define_property(this, "replanningCycleLimit", void 0);
|
|
@@ -677,6 +347,11 @@ class TaskExecutor {
|
|
|
677
347
|
this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
|
|
678
348
|
this.replanningCycleLimit = opts.replanningCycleLimit;
|
|
679
349
|
this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
|
|
350
|
+
this.taskBuilder = new external_task_builder_js_namespaceObject.TaskBuilder({
|
|
351
|
+
interfaceInstance,
|
|
352
|
+
insight,
|
|
353
|
+
taskCache: opts.taskCache
|
|
354
|
+
});
|
|
680
355
|
}
|
|
681
356
|
}
|
|
682
357
|
exports.TaskExecutor = __webpack_exports__.TaskExecutor;
|