@midscene/core 0.30.5 → 1.0.1-beta-20251021060907.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +41 -33
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/execution-session.mjs +41 -0
- package/dist/es/agent/execution-session.mjs.map +1 -0
- package/dist/es/agent/task-builder.mjs +303 -0
- package/dist/es/agent/task-builder.mjs.map +1 -0
- package/dist/es/agent/tasks.mjs +68 -391
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/ui-utils.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +6 -6
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +1 -15
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/inspect.mjs +2 -3
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +6 -24
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +3 -204
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +101 -231
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/index.mjs +3 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +18 -19
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +3 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/report.mjs.map +1 -1
- package/dist/es/{ai-model/action-executor.mjs → task-runner.mjs} +69 -10
- package/dist/es/task-runner.mjs.map +1 -0
- package/dist/es/types.mjs +18 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +18 -14
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +41 -33
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/execution-session.js +75 -0
- package/dist/lib/agent/execution-session.js.map +1 -0
- package/dist/lib/agent/task-builder.js +340 -0
- package/dist/lib/agent/task-builder.js.map +1 -0
- package/dist/lib/agent/tasks.js +68 -391
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/ui-utils.js.map +1 -1
- package/dist/lib/agent/utils.js +6 -6
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/common.js +2 -19
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +1 -2
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +5 -23
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +2 -206
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +236 -384
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/index.js +9 -5
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +17 -18
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +5 -5
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/report.js.map +1 -1
- package/dist/lib/{ai-model/action-executor.js → task-runner.js} +71 -12
- package/dist/lib/task-runner.js.map +1 -0
- package/dist/lib/types.js +22 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +18 -14
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +16 -0
- package/dist/types/agent/execution-session.d.ts +27 -0
- package/dist/types/agent/task-builder.d.ts +24 -0
- package/dist/types/agent/tasks.d.ts +8 -11
- package/dist/types/agent/ui-utils.d.ts +2 -2
- package/dist/types/agent/utils.d.ts +5 -2
- package/dist/types/ai-model/common.d.ts +0 -1
- package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -2
- package/dist/types/index.d.ts +4 -3
- package/dist/types/insight/index.d.ts +5 -10
- package/dist/types/insight/utils.d.ts +2 -2
- package/dist/types/{ai-model/action-executor.d.ts → task-runner.d.ts} +14 -3
- package/dist/types/types.d.ts +47 -4
- package/dist/types/yaml.d.ts +3 -1
- package/package.json +4 -7
- package/dist/es/ai-model/action-executor.mjs.map +0 -1
- package/dist/lib/ai-model/action-executor.js.map +0 -1
package/dist/lib/agent/tasks.js
CHANGED
|
@@ -24,17 +24,18 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
locatePlanForLocate: ()=>locatePlanForLocate,
|
|
27
|
+
locatePlanForLocate: ()=>external_task_builder_js_namespaceObject.locatePlanForLocate,
|
|
28
28
|
TaskExecutor: ()=>TaskExecutor
|
|
29
29
|
});
|
|
30
30
|
const index_js_namespaceObject = require("../ai-model/index.js");
|
|
31
|
-
const
|
|
32
|
-
const external_utils_js_namespaceObject = require("../utils.js");
|
|
31
|
+
const external_types_js_namespaceObject = require("../types.js");
|
|
33
32
|
const env_namespaceObject = require("@midscene/shared/env");
|
|
34
33
|
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
35
34
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
35
|
+
const external_execution_session_js_namespaceObject = require("./execution-session.js");
|
|
36
|
+
const external_task_builder_js_namespaceObject = require("./task-builder.js");
|
|
36
37
|
const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
|
|
37
|
-
const
|
|
38
|
+
const external_utils_js_namespaceObject = require("./utils.js");
|
|
38
39
|
function _define_property(obj, key, value) {
|
|
39
40
|
if (key in obj) Object.defineProperty(obj, key, {
|
|
40
41
|
value: value,
|
|
@@ -48,325 +49,23 @@ function _define_property(obj, key, value) {
|
|
|
48
49
|
const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
|
|
49
50
|
const defaultReplanningCycleLimit = 10;
|
|
50
51
|
const defaultVlmUiTarsReplanningCycleLimit = 40;
|
|
51
|
-
function locatePlanForLocate(param) {
|
|
52
|
-
const locate = 'string' == typeof param ? {
|
|
53
|
-
prompt: param
|
|
54
|
-
} : param;
|
|
55
|
-
const locatePlan = {
|
|
56
|
-
type: 'Locate',
|
|
57
|
-
locate,
|
|
58
|
-
param: locate,
|
|
59
|
-
thought: ''
|
|
60
|
-
};
|
|
61
|
-
return locatePlan;
|
|
62
|
-
}
|
|
63
52
|
class TaskExecutor {
|
|
64
53
|
get page() {
|
|
65
54
|
return this.interface;
|
|
66
55
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
screenshot: base64,
|
|
73
|
-
timing
|
|
74
|
-
};
|
|
75
|
-
return item;
|
|
76
|
-
}
|
|
77
|
-
prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
|
|
78
|
-
const taskWithScreenshot = {
|
|
79
|
-
...taskApply,
|
|
80
|
-
executor: async (param, context, ...args)=>{
|
|
81
|
-
const recorder = [];
|
|
82
|
-
const { task } = context;
|
|
83
|
-
task.recorder = recorder;
|
|
84
|
-
const shot = await this.recordScreenshot(`before ${task.type}`);
|
|
85
|
-
recorder.push(shot);
|
|
86
|
-
const result = await taskApply.executor(param, context, ...args);
|
|
87
|
-
if (appendAfterExecution) {
|
|
88
|
-
const shot2 = await this.recordScreenshot('after Action');
|
|
89
|
-
recorder.push(shot2);
|
|
90
|
-
}
|
|
91
|
-
return result;
|
|
92
|
-
}
|
|
93
|
-
};
|
|
94
|
-
return taskWithScreenshot;
|
|
56
|
+
createExecutionSession(title, options) {
|
|
57
|
+
return new external_execution_session_js_namespaceObject.ExecutionSession(title, ()=>Promise.resolve(this.insight.contextRetrieverFn()), {
|
|
58
|
+
onTaskStart: this.onTaskStartCallback,
|
|
59
|
+
tasks: null == options ? void 0 : options.tasks
|
|
60
|
+
});
|
|
95
61
|
}
|
|
96
62
|
async convertPlanToExecutable(plans, modelConfig, cacheable) {
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
if ('string' == typeof detailedLocateParam) detailedLocateParam = {
|
|
100
|
-
prompt: detailedLocateParam
|
|
101
|
-
};
|
|
102
|
-
if (void 0 !== cacheable) detailedLocateParam = {
|
|
103
|
-
...detailedLocateParam,
|
|
104
|
-
cacheable
|
|
105
|
-
};
|
|
106
|
-
const taskFind = {
|
|
107
|
-
type: 'Insight',
|
|
108
|
-
subType: 'Locate',
|
|
109
|
-
param: detailedLocateParam,
|
|
110
|
-
thought: plan.thought,
|
|
111
|
-
executor: async (param, taskContext)=>{
|
|
112
|
-
var _this_taskCache, _locateCacheRecord_cacheContent;
|
|
113
|
-
const { task } = taskContext;
|
|
114
|
-
(0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
|
|
115
|
-
let insightDump;
|
|
116
|
-
let usage;
|
|
117
|
-
const dumpCollector = (dump)=>{
|
|
118
|
-
var _dump_taskInfo, _dump_taskInfo1;
|
|
119
|
-
insightDump = dump;
|
|
120
|
-
usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
|
|
121
|
-
task.log = {
|
|
122
|
-
dump: insightDump
|
|
123
|
-
};
|
|
124
|
-
task.usage = usage;
|
|
125
|
-
if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
|
|
126
|
-
};
|
|
127
|
-
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
128
|
-
const shotTime = Date.now();
|
|
129
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
130
|
-
task.uiContext = uiContext;
|
|
131
|
-
const recordItem = {
|
|
132
|
-
type: 'screenshot',
|
|
133
|
-
ts: shotTime,
|
|
134
|
-
screenshot: uiContext.screenshotBase64,
|
|
135
|
-
timing: 'before Insight'
|
|
136
|
-
};
|
|
137
|
-
task.recorder = [
|
|
138
|
-
recordItem
|
|
139
|
-
];
|
|
140
|
-
const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
|
|
141
|
-
const userExpectedPathHitFlag = !!elementFromXpath;
|
|
142
|
-
const cachePrompt = param.prompt;
|
|
143
|
-
const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
|
|
144
|
-
const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
|
|
145
|
-
const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
|
|
146
|
-
const cacheHitFlag = !!elementFromCache;
|
|
147
|
-
const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
|
|
148
|
-
const planHitFlag = !!elementFromPlan;
|
|
149
|
-
const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
|
|
150
|
-
context: uiContext
|
|
151
|
-
}, modelConfig)).element;
|
|
152
|
-
const aiLocateHitFlag = !!elementFromAiLocate;
|
|
153
|
-
const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
|
|
154
|
-
let currentCacheEntry;
|
|
155
|
-
if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
|
|
156
|
-
const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
|
|
157
|
-
_orderSensitive: element.isOrderSensitive
|
|
158
|
-
} : void 0);
|
|
159
|
-
if (feature && Object.keys(feature).length > 0) {
|
|
160
|
-
debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
|
|
161
|
-
currentCacheEntry = feature;
|
|
162
|
-
this.taskCache.updateOrAppendCacheRecord({
|
|
163
|
-
type: 'locate',
|
|
164
|
-
prompt: cachePrompt,
|
|
165
|
-
cache: feature
|
|
166
|
-
}, locateCacheRecord);
|
|
167
|
-
} else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
|
|
168
|
-
} catch (error) {
|
|
169
|
-
debug('cacheFeatureForRect failed: %s', error);
|
|
170
|
-
}
|
|
171
|
-
else debug('cacheFeatureForRect is not supported, skip cache update');
|
|
172
|
-
if (!element) throw new Error(`Element not found: ${param.prompt}`);
|
|
173
|
-
let hitBy;
|
|
174
|
-
if (userExpectedPathHitFlag) hitBy = {
|
|
175
|
-
from: 'User expected path',
|
|
176
|
-
context: {
|
|
177
|
-
xpath: param.xpath
|
|
178
|
-
}
|
|
179
|
-
};
|
|
180
|
-
else if (cacheHitFlag) hitBy = {
|
|
181
|
-
from: 'Cache',
|
|
182
|
-
context: {
|
|
183
|
-
cacheEntry,
|
|
184
|
-
cacheToSave: currentCacheEntry
|
|
185
|
-
}
|
|
186
|
-
};
|
|
187
|
-
else if (planHitFlag) hitBy = {
|
|
188
|
-
from: 'Planning',
|
|
189
|
-
context: {
|
|
190
|
-
id: null == elementFromPlan ? void 0 : elementFromPlan.id,
|
|
191
|
-
bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
|
|
192
|
-
}
|
|
193
|
-
};
|
|
194
|
-
else if (aiLocateHitFlag) hitBy = {
|
|
195
|
-
from: 'AI model',
|
|
196
|
-
context: {
|
|
197
|
-
prompt: param.prompt
|
|
198
|
-
}
|
|
199
|
-
};
|
|
200
|
-
null == onResult || onResult(element);
|
|
201
|
-
return {
|
|
202
|
-
output: {
|
|
203
|
-
element
|
|
204
|
-
},
|
|
205
|
-
uiContext,
|
|
206
|
-
hitBy
|
|
207
|
-
};
|
|
208
|
-
}
|
|
209
|
-
};
|
|
210
|
-
return taskFind;
|
|
211
|
-
};
|
|
212
|
-
for (const plan of plans)if ('Locate' === plan.type) {
|
|
213
|
-
var _plan_locate, _plan_locate1;
|
|
214
|
-
if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
|
|
215
|
-
debug('Locate action with id is null, will be ignored', plan);
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
const taskLocate = taskForLocatePlan(plan, plan.locate);
|
|
219
|
-
tasks.push(taskLocate);
|
|
220
|
-
} else if ('Error' === plan.type) {
|
|
221
|
-
var _plan_param;
|
|
222
|
-
const taskActionError = {
|
|
223
|
-
type: 'Action',
|
|
224
|
-
subType: 'Error',
|
|
225
|
-
param: plan.param,
|
|
226
|
-
thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
|
|
227
|
-
locate: plan.locate,
|
|
228
|
-
executor: async ()=>{
|
|
229
|
-
var _plan_param;
|
|
230
|
-
throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
|
|
231
|
-
}
|
|
232
|
-
};
|
|
233
|
-
tasks.push(taskActionError);
|
|
234
|
-
} else if ('Finished' === plan.type) {
|
|
235
|
-
const taskActionFinished = {
|
|
236
|
-
type: 'Action',
|
|
237
|
-
subType: 'Finished',
|
|
238
|
-
param: null,
|
|
239
|
-
thought: plan.thought,
|
|
240
|
-
locate: plan.locate,
|
|
241
|
-
executor: async (param)=>{}
|
|
242
|
-
};
|
|
243
|
-
tasks.push(taskActionFinished);
|
|
244
|
-
} else if ('Sleep' === plan.type) {
|
|
245
|
-
const taskActionSleep = {
|
|
246
|
-
type: 'Action',
|
|
247
|
-
subType: 'Sleep',
|
|
248
|
-
param: plan.param,
|
|
249
|
-
thought: plan.thought,
|
|
250
|
-
locate: plan.locate,
|
|
251
|
-
executor: async (taskParam)=>{
|
|
252
|
-
await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
|
|
253
|
-
}
|
|
254
|
-
};
|
|
255
|
-
tasks.push(taskActionSleep);
|
|
256
|
-
} else {
|
|
257
|
-
const planType = plan.type;
|
|
258
|
-
const actionSpace = await this.interface.actionSpace();
|
|
259
|
-
const action = actionSpace.find((action)=>action.name === planType);
|
|
260
|
-
const param = plan.param;
|
|
261
|
-
if (!action) throw new Error(`Action type '${planType}' not found`);
|
|
262
|
-
const locateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema) : [];
|
|
263
|
-
const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema, true) : [];
|
|
264
|
-
locateFields.forEach((field)=>{
|
|
265
|
-
if (param[field]) {
|
|
266
|
-
const locatePlan = locatePlanForLocate(param[field]);
|
|
267
|
-
debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
|
|
268
|
-
const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
|
|
269
|
-
param[field] = result;
|
|
270
|
-
});
|
|
271
|
-
tasks.push(locateTask);
|
|
272
|
-
} else {
|
|
273
|
-
(0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
|
|
274
|
-
debug(`field '${field}' is not provided for action ${planType}`);
|
|
275
|
-
}
|
|
276
|
-
});
|
|
277
|
-
const task = {
|
|
278
|
-
type: 'Action',
|
|
279
|
-
subType: planType,
|
|
280
|
-
thought: plan.thought,
|
|
281
|
-
param: plan.param,
|
|
282
|
-
executor: async (param, context)=>{
|
|
283
|
-
var _context_element;
|
|
284
|
-
debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
|
|
285
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
286
|
-
context.task.uiContext = uiContext;
|
|
287
|
-
requiredLocateFields.forEach((field)=>{
|
|
288
|
-
(0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
|
|
289
|
-
});
|
|
290
|
-
try {
|
|
291
|
-
await Promise.all([
|
|
292
|
-
(async ()=>{
|
|
293
|
-
if (this.interface.beforeInvokeAction) {
|
|
294
|
-
debug('will call "beforeInvokeAction" for interface');
|
|
295
|
-
await this.interface.beforeInvokeAction(action.name, param);
|
|
296
|
-
debug('called "beforeInvokeAction" for interface');
|
|
297
|
-
}
|
|
298
|
-
})(),
|
|
299
|
-
(0, external_utils_js_namespaceObject.sleep)(200)
|
|
300
|
-
]);
|
|
301
|
-
} catch (originalError) {
|
|
302
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
303
|
-
throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
304
|
-
cause: originalError
|
|
305
|
-
});
|
|
306
|
-
}
|
|
307
|
-
if (action.paramSchema) try {
|
|
308
|
-
param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
|
|
309
|
-
} catch (error) {
|
|
310
|
-
throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
|
|
311
|
-
cause: error
|
|
312
|
-
});
|
|
313
|
-
}
|
|
314
|
-
debug('calling action', action.name);
|
|
315
|
-
const actionFn = action.call.bind(this.interface);
|
|
316
|
-
await actionFn(param, context);
|
|
317
|
-
debug('called action', action.name);
|
|
318
|
-
try {
|
|
319
|
-
if (this.interface.afterInvokeAction) {
|
|
320
|
-
debug('will call "afterInvokeAction" for interface');
|
|
321
|
-
await this.interface.afterInvokeAction(action.name, param);
|
|
322
|
-
debug('called "afterInvokeAction" for interface');
|
|
323
|
-
}
|
|
324
|
-
} catch (originalError) {
|
|
325
|
-
const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
|
|
326
|
-
throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
|
|
327
|
-
cause: originalError
|
|
328
|
-
});
|
|
329
|
-
}
|
|
330
|
-
return {
|
|
331
|
-
output: {
|
|
332
|
-
success: true,
|
|
333
|
-
action: planType,
|
|
334
|
-
param: param
|
|
335
|
-
}
|
|
336
|
-
};
|
|
337
|
-
}
|
|
338
|
-
};
|
|
339
|
-
tasks.push(task);
|
|
340
|
-
}
|
|
341
|
-
const wrappedTasks = tasks.map((task, index)=>{
|
|
342
|
-
if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
|
|
343
|
-
return task;
|
|
63
|
+
return this.taskBuilder.build(plans, modelConfig, {
|
|
64
|
+
cacheable
|
|
344
65
|
});
|
|
345
|
-
return {
|
|
346
|
-
tasks: wrappedTasks
|
|
347
|
-
};
|
|
348
|
-
}
|
|
349
|
-
async setupPlanningContext(executorContext) {
|
|
350
|
-
const shotTime = Date.now();
|
|
351
|
-
const uiContext = await this.insight.contextRetrieverFn('locate');
|
|
352
|
-
const recordItem = {
|
|
353
|
-
type: 'screenshot',
|
|
354
|
-
ts: shotTime,
|
|
355
|
-
screenshot: uiContext.screenshotBase64,
|
|
356
|
-
timing: 'before Planning'
|
|
357
|
-
};
|
|
358
|
-
executorContext.task.recorder = [
|
|
359
|
-
recordItem
|
|
360
|
-
];
|
|
361
|
-
executorContext.task.uiContext = uiContext;
|
|
362
|
-
return {
|
|
363
|
-
uiContext
|
|
364
|
-
};
|
|
365
66
|
}
|
|
366
67
|
async loadYamlFlowAsPlanning(userInstruction, yamlString) {
|
|
367
|
-
const
|
|
368
|
-
onTaskStart: this.onTaskStartCallback
|
|
369
|
-
});
|
|
68
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction));
|
|
370
69
|
const task = {
|
|
371
70
|
type: 'Planning',
|
|
372
71
|
subType: 'LoadYaml',
|
|
@@ -375,7 +74,8 @@ class TaskExecutor {
|
|
|
375
74
|
userInstruction
|
|
376
75
|
},
|
|
377
76
|
executor: async (param, executorContext)=>{
|
|
378
|
-
|
|
77
|
+
const { uiContext } = executorContext;
|
|
78
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
|
|
379
79
|
return {
|
|
380
80
|
output: {
|
|
381
81
|
actions: [],
|
|
@@ -395,10 +95,9 @@ class TaskExecutor {
|
|
|
395
95
|
};
|
|
396
96
|
}
|
|
397
97
|
};
|
|
398
|
-
await
|
|
399
|
-
await taskExecutor.flush();
|
|
98
|
+
await session.appendAndRun(task);
|
|
400
99
|
return {
|
|
401
|
-
|
|
100
|
+
runner: session.getRunner()
|
|
402
101
|
};
|
|
403
102
|
}
|
|
404
103
|
createPlanningTask(userInstruction, actionContext, modelConfig) {
|
|
@@ -411,7 +110,8 @@ class TaskExecutor {
|
|
|
411
110
|
},
|
|
412
111
|
executor: async (param, executorContext)=>{
|
|
413
112
|
const startTime = Date.now();
|
|
414
|
-
const { uiContext } =
|
|
113
|
+
const { uiContext } = executorContext;
|
|
114
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
|
|
415
115
|
const { vlMode } = modelConfig;
|
|
416
116
|
const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
|
|
417
117
|
(0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
|
|
@@ -464,16 +164,13 @@ class TaskExecutor {
|
|
|
464
164
|
return task;
|
|
465
165
|
}
|
|
466
166
|
async runPlans(title, plans, modelConfig) {
|
|
467
|
-
const
|
|
468
|
-
onTaskStart: this.onTaskStartCallback
|
|
469
|
-
});
|
|
167
|
+
const session = this.createExecutionSession(title);
|
|
470
168
|
const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
|
|
471
|
-
await
|
|
472
|
-
const result = await taskExecutor.flush();
|
|
169
|
+
const result = await session.appendAndRun(tasks);
|
|
473
170
|
const { output } = result;
|
|
474
171
|
return {
|
|
475
172
|
output,
|
|
476
|
-
|
|
173
|
+
runner: session.getRunner()
|
|
477
174
|
};
|
|
478
175
|
}
|
|
479
176
|
getReplanningCycleLimit(isVlmUiTars) {
|
|
@@ -481,38 +178,35 @@ class TaskExecutor {
|
|
|
481
178
|
}
|
|
482
179
|
async action(userPrompt, modelConfig, actionContext, cacheable) {
|
|
483
180
|
this.conversationHistory.reset();
|
|
484
|
-
const
|
|
485
|
-
|
|
486
|
-
});
|
|
181
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt));
|
|
182
|
+
const runner = session.getRunner();
|
|
487
183
|
let replanCount = 0;
|
|
488
184
|
const yamlFlow = [];
|
|
489
185
|
const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
|
|
490
186
|
while(true){
|
|
491
187
|
if (replanCount > replanningCycleLimit) {
|
|
492
188
|
const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
|
|
493
|
-
return
|
|
189
|
+
return session.appendErrorPlan(errorMsg);
|
|
494
190
|
}
|
|
495
191
|
const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
|
|
496
|
-
await
|
|
497
|
-
const result = await taskExecutor.flush();
|
|
192
|
+
const result = await session.appendAndRun(planningTask);
|
|
498
193
|
const planResult = null == result ? void 0 : result.output;
|
|
499
|
-
if (
|
|
194
|
+
if (session.isInErrorState()) return {
|
|
500
195
|
output: planResult,
|
|
501
|
-
|
|
196
|
+
runner
|
|
502
197
|
};
|
|
503
198
|
const plans = planResult.actions || [];
|
|
504
199
|
yamlFlow.push(...planResult.yamlFlow || []);
|
|
505
200
|
let executables;
|
|
506
201
|
try {
|
|
507
202
|
executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
|
|
508
|
-
|
|
203
|
+
await session.appendAndRun(executables.tasks);
|
|
509
204
|
} catch (error) {
|
|
510
|
-
return
|
|
205
|
+
return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
|
|
511
206
|
}
|
|
512
|
-
|
|
513
|
-
if (taskExecutor.isInErrorState()) return {
|
|
207
|
+
if (session.isInErrorState()) return {
|
|
514
208
|
output: void 0,
|
|
515
|
-
|
|
209
|
+
runner
|
|
516
210
|
};
|
|
517
211
|
if (!planResult.more_actions_needed_by_instruction) break;
|
|
518
212
|
replanCount++;
|
|
@@ -521,7 +215,7 @@ class TaskExecutor {
|
|
|
521
215
|
output: {
|
|
522
216
|
yamlFlow
|
|
523
217
|
},
|
|
524
|
-
|
|
218
|
+
runner
|
|
525
219
|
};
|
|
526
220
|
}
|
|
527
221
|
createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
@@ -537,23 +231,15 @@ class TaskExecutor {
|
|
|
537
231
|
},
|
|
538
232
|
executor: async (param, taskContext)=>{
|
|
539
233
|
const { task } = taskContext;
|
|
540
|
-
let
|
|
541
|
-
const
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
const uiContext = await this.insight.contextRetrieverFn('extract');
|
|
547
|
-
task.uiContext = uiContext;
|
|
548
|
-
const recordItem = {
|
|
549
|
-
type: 'screenshot',
|
|
550
|
-
ts: shotTime,
|
|
551
|
-
screenshot: uiContext.screenshotBase64,
|
|
552
|
-
timing: 'before Extract'
|
|
234
|
+
let queryDump;
|
|
235
|
+
const applyDump = (dump)=>{
|
|
236
|
+
queryDump = dump;
|
|
237
|
+
task.log = {
|
|
238
|
+
dump
|
|
239
|
+
};
|
|
553
240
|
};
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
];
|
|
241
|
+
const uiContext = taskContext.uiContext;
|
|
242
|
+
(0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Query task');
|
|
557
243
|
const ifTypeRestricted = 'Query' !== type;
|
|
558
244
|
let demandInput = demand;
|
|
559
245
|
let keyOfResult = 'result';
|
|
@@ -566,7 +252,15 @@ class TaskExecutor {
|
|
|
566
252
|
} else if (ifTypeRestricted) demandInput = {
|
|
567
253
|
[keyOfResult]: `${type}, ${demand}`
|
|
568
254
|
};
|
|
569
|
-
|
|
255
|
+
let extractResult;
|
|
256
|
+
try {
|
|
257
|
+
extractResult = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
|
|
258
|
+
} catch (error) {
|
|
259
|
+
if (error instanceof external_types_js_namespaceObject.InsightError) applyDump(error.dump);
|
|
260
|
+
throw error;
|
|
261
|
+
}
|
|
262
|
+
const { data, usage, thought, dump } = extractResult;
|
|
263
|
+
applyDump(dump);
|
|
570
264
|
let outputResult = data;
|
|
571
265
|
if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
|
|
572
266
|
else {
|
|
@@ -575,7 +269,7 @@ class TaskExecutor {
|
|
|
575
269
|
}
|
|
576
270
|
return {
|
|
577
271
|
output: outputResult,
|
|
578
|
-
log:
|
|
272
|
+
log: queryDump,
|
|
579
273
|
usage,
|
|
580
274
|
thought
|
|
581
275
|
};
|
|
@@ -584,36 +278,15 @@ class TaskExecutor {
|
|
|
584
278
|
return queryTask;
|
|
585
279
|
}
|
|
586
280
|
async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
|
|
587
|
-
const
|
|
588
|
-
onTaskStart: this.onTaskStartCallback
|
|
589
|
-
});
|
|
281
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
|
|
590
282
|
const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
|
|
591
|
-
await
|
|
592
|
-
const result = await taskExecutor.flush();
|
|
283
|
+
const result = await session.appendAndRun(queryTask);
|
|
593
284
|
if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
|
|
594
285
|
const { output, thought } = result;
|
|
595
286
|
return {
|
|
596
287
|
output,
|
|
597
288
|
thought,
|
|
598
|
-
|
|
599
|
-
};
|
|
600
|
-
}
|
|
601
|
-
async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
|
|
602
|
-
const errorPlan = {
|
|
603
|
-
type: 'Error',
|
|
604
|
-
param: {
|
|
605
|
-
thought: errorMsg
|
|
606
|
-
},
|
|
607
|
-
locate: null
|
|
608
|
-
};
|
|
609
|
-
const { tasks } = await this.convertPlanToExecutable([
|
|
610
|
-
errorPlan
|
|
611
|
-
], modelConfig);
|
|
612
|
-
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
|
|
613
|
-
await taskExecutor.flush();
|
|
614
|
-
return {
|
|
615
|
-
output: void 0,
|
|
616
|
-
executor: taskExecutor
|
|
289
|
+
runner: session.getRunner()
|
|
617
290
|
};
|
|
618
291
|
}
|
|
619
292
|
async taskForSleep(timeMs, modelConfig) {
|
|
@@ -627,14 +300,13 @@ class TaskExecutor {
|
|
|
627
300
|
const { tasks: sleepTasks } = await this.convertPlanToExecutable([
|
|
628
301
|
sleepPlan
|
|
629
302
|
], modelConfig);
|
|
630
|
-
return
|
|
303
|
+
return sleepTasks[0];
|
|
631
304
|
}
|
|
632
305
|
async waitFor(assertion, opt, modelConfig) {
|
|
633
|
-
const { textPrompt, multimodalPrompt } = (0,
|
|
306
|
+
const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject.parsePrompt)(assertion);
|
|
634
307
|
const description = `waitFor: ${textPrompt}`;
|
|
635
|
-
const
|
|
636
|
-
|
|
637
|
-
});
|
|
308
|
+
const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description));
|
|
309
|
+
const runner = session.getRunner();
|
|
638
310
|
const { timeoutMs, checkIntervalMs } = opt;
|
|
639
311
|
(0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
|
|
640
312
|
(0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
|
|
@@ -648,26 +320,26 @@ class TaskExecutor {
|
|
|
648
320
|
const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
|
|
649
321
|
doNotThrowError: true
|
|
650
322
|
}, multimodalPrompt);
|
|
651
|
-
await
|
|
652
|
-
const result = await taskExecutor.flush();
|
|
323
|
+
const result = await session.appendAndRun(queryTask);
|
|
653
324
|
if (null == result ? void 0 : result.output) return {
|
|
654
325
|
output: void 0,
|
|
655
|
-
|
|
326
|
+
runner
|
|
656
327
|
};
|
|
657
328
|
errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
|
|
658
329
|
const now = Date.now();
|
|
659
330
|
if (now - startTime < checkIntervalMs) {
|
|
660
331
|
const timeRemaining = checkIntervalMs - (now - startTime);
|
|
661
332
|
const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
|
|
662
|
-
await
|
|
333
|
+
await session.append(sleepTask);
|
|
663
334
|
}
|
|
664
335
|
}
|
|
665
|
-
return
|
|
336
|
+
return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
|
|
666
337
|
}
|
|
667
338
|
constructor(interfaceInstance, insight, opts){
|
|
668
339
|
_define_property(this, "interface", void 0);
|
|
669
340
|
_define_property(this, "insight", void 0);
|
|
670
341
|
_define_property(this, "taskCache", void 0);
|
|
342
|
+
_define_property(this, "taskBuilder", void 0);
|
|
671
343
|
_define_property(this, "conversationHistory", void 0);
|
|
672
344
|
_define_property(this, "onTaskStartCallback", void 0);
|
|
673
345
|
_define_property(this, "replanningCycleLimit", void 0);
|
|
@@ -677,6 +349,11 @@ class TaskExecutor {
|
|
|
677
349
|
this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
|
|
678
350
|
this.replanningCycleLimit = opts.replanningCycleLimit;
|
|
679
351
|
this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
|
|
352
|
+
this.taskBuilder = new external_task_builder_js_namespaceObject.TaskBuilder({
|
|
353
|
+
interfaceInstance,
|
|
354
|
+
insight,
|
|
355
|
+
taskCache: opts.taskCache
|
|
356
|
+
});
|
|
680
357
|
}
|
|
681
358
|
}
|
|
682
359
|
exports.TaskExecutor = __webpack_exports__.TaskExecutor;
|