@midscene/core 0.30.6-beta-20251022093704.0 → 1.0.1-beta-20251022061922.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/dist/es/agent/agent.mjs +41 -33
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/task-builder.mjs +315 -0
  6. package/dist/es/agent/task-builder.mjs.map +1 -0
  7. package/dist/es/agent/tasks.mjs +80 -405
  8. package/dist/es/agent/tasks.mjs.map +1 -1
  9. package/dist/es/agent/ui-utils.mjs.map +1 -1
  10. package/dist/es/agent/utils.mjs +6 -6
  11. package/dist/es/agent/utils.mjs.map +1 -1
  12. package/dist/es/ai-model/common.mjs +1 -15
  13. package/dist/es/ai-model/common.mjs.map +1 -1
  14. package/dist/es/ai-model/inspect.mjs +2 -3
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +11 -30
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/prompt/llm-locator.mjs +3 -204
  19. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  20. package/dist/es/ai-model/service-caller/index.mjs +101 -231
  21. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  22. package/dist/es/index.mjs +3 -2
  23. package/dist/es/index.mjs.map +1 -1
  24. package/dist/es/insight/index.mjs +18 -19
  25. package/dist/es/insight/index.mjs.map +1 -1
  26. package/dist/es/insight/utils.mjs +3 -3
  27. package/dist/es/insight/utils.mjs.map +1 -1
  28. package/dist/es/report.mjs.map +1 -1
  29. package/dist/es/{ai-model/action-executor.mjs → task-runner.mjs} +81 -10
  30. package/dist/es/task-runner.mjs.map +1 -0
  31. package/dist/es/types.mjs +18 -1
  32. package/dist/es/types.mjs.map +1 -1
  33. package/dist/es/utils.mjs +2 -2
  34. package/dist/es/yaml/player.mjs +18 -14
  35. package/dist/es/yaml/player.mjs.map +1 -1
  36. package/dist/lib/agent/agent.js +41 -33
  37. package/dist/lib/agent/agent.js.map +1 -1
  38. package/dist/lib/agent/execution-session.js +75 -0
  39. package/dist/lib/agent/execution-session.js.map +1 -0
  40. package/dist/lib/agent/task-builder.js +352 -0
  41. package/dist/lib/agent/task-builder.js.map +1 -0
  42. package/dist/lib/agent/tasks.js +80 -405
  43. package/dist/lib/agent/tasks.js.map +1 -1
  44. package/dist/lib/agent/ui-utils.js.map +1 -1
  45. package/dist/lib/agent/utils.js +6 -6
  46. package/dist/lib/agent/utils.js.map +1 -1
  47. package/dist/lib/ai-model/common.js +2 -19
  48. package/dist/lib/ai-model/common.js.map +1 -1
  49. package/dist/lib/ai-model/inspect.js +1 -2
  50. package/dist/lib/ai-model/inspect.js.map +1 -1
  51. package/dist/lib/ai-model/llm-planning.js +10 -29
  52. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  53. package/dist/lib/ai-model/prompt/llm-locator.js +2 -206
  54. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  55. package/dist/lib/ai-model/service-caller/index.js +236 -384
  56. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  57. package/dist/lib/index.js +9 -5
  58. package/dist/lib/index.js.map +1 -1
  59. package/dist/lib/insight/index.js +17 -18
  60. package/dist/lib/insight/index.js.map +1 -1
  61. package/dist/lib/insight/utils.js +5 -5
  62. package/dist/lib/insight/utils.js.map +1 -1
  63. package/dist/lib/report.js.map +1 -1
  64. package/dist/lib/{ai-model/action-executor.js → task-runner.js} +83 -12
  65. package/dist/lib/task-runner.js.map +1 -0
  66. package/dist/lib/types.js +22 -1
  67. package/dist/lib/types.js.map +1 -1
  68. package/dist/lib/utils.js +2 -2
  69. package/dist/lib/yaml/player.js +18 -14
  70. package/dist/lib/yaml/player.js.map +1 -1
  71. package/dist/types/agent/agent.d.ts +16 -0
  72. package/dist/types/agent/execution-session.d.ts +27 -0
  73. package/dist/types/agent/task-builder.d.ts +34 -0
  74. package/dist/types/agent/tasks.d.ts +14 -13
  75. package/dist/types/agent/ui-utils.d.ts +2 -2
  76. package/dist/types/agent/utils.d.ts +6 -2
  77. package/dist/types/ai-model/common.d.ts +0 -1
  78. package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -2
  79. package/dist/types/device/index.d.ts +20 -20
  80. package/dist/types/index.d.ts +4 -3
  81. package/dist/types/insight/index.d.ts +5 -10
  82. package/dist/types/insight/utils.d.ts +2 -2
  83. package/dist/types/task-runner.d.ts +31 -0
  84. package/dist/types/types.d.ts +53 -14
  85. package/dist/types/yaml.d.ts +3 -1
  86. package/package.json +4 -7
  87. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  88. package/dist/lib/ai-model/action-executor.js.map +0 -1
  89. package/dist/types/ai-model/action-executor.d.ts +0 -19
@@ -24,17 +24,18 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
- locatePlanForLocate: ()=>locatePlanForLocate,
27
+ locatePlanForLocate: ()=>external_task_builder_js_namespaceObject.locatePlanForLocate,
28
28
  TaskExecutor: ()=>TaskExecutor
29
29
  });
30
30
  const index_js_namespaceObject = require("../ai-model/index.js");
31
- const action_executor_js_namespaceObject = require("../ai-model/action-executor.js");
32
- const external_utils_js_namespaceObject = require("../utils.js");
31
+ const external_types_js_namespaceObject = require("../types.js");
33
32
  const env_namespaceObject = require("@midscene/shared/env");
34
33
  const logger_namespaceObject = require("@midscene/shared/logger");
35
34
  const utils_namespaceObject = require("@midscene/shared/utils");
35
+ const external_execution_session_js_namespaceObject = require("./execution-session.js");
36
+ const external_task_builder_js_namespaceObject = require("./task-builder.js");
36
37
  const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
37
- const external_utils_js_namespaceObject_1 = require("./utils.js");
38
+ const external_utils_js_namespaceObject = require("./utils.js");
38
39
  function _define_property(obj, key, value) {
39
40
  if (key in obj) Object.defineProperty(obj, key, {
40
41
  value: value,
@@ -48,325 +49,21 @@ function _define_property(obj, key, value) {
48
49
  const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
49
50
  const defaultReplanningCycleLimit = 10;
50
51
  const defaultVlmUiTarsReplanningCycleLimit = 40;
51
- function locatePlanForLocate(param) {
52
- const locate = 'string' == typeof param ? {
53
- prompt: param
54
- } : param;
55
- const locatePlan = {
56
- type: 'Locate',
57
- locate,
58
- param: locate,
59
- thought: ''
60
- };
61
- return locatePlan;
62
- }
63
52
  class TaskExecutor {
64
53
  get page() {
65
54
  return this.interface;
66
55
  }
67
- async recordScreenshot(timing) {
68
- const base64 = await this.interface.screenshotBase64();
69
- const item = {
70
- type: 'screenshot',
71
- ts: Date.now(),
72
- screenshot: base64,
73
- timing
74
- };
75
- return item;
76
- }
77
- prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
78
- const taskWithScreenshot = {
79
- ...taskApply,
80
- executor: async (param, context, ...args)=>{
81
- const recorder = [];
82
- const { task } = context;
83
- task.recorder = recorder;
84
- const shot = await this.recordScreenshot(`before ${task.type}`);
85
- recorder.push(shot);
86
- const result = await taskApply.executor(param, context, ...args);
87
- if (appendAfterExecution) {
88
- const shot2 = await this.recordScreenshot('after Action');
89
- recorder.push(shot2);
90
- }
91
- return result;
92
- }
93
- };
94
- return taskWithScreenshot;
95
- }
96
- async convertPlanToExecutable(plans, modelConfig, cacheable) {
97
- const tasks = [];
98
- const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
99
- if ('string' == typeof detailedLocateParam) detailedLocateParam = {
100
- prompt: detailedLocateParam
101
- };
102
- if (void 0 !== cacheable) detailedLocateParam = {
103
- ...detailedLocateParam,
104
- cacheable
105
- };
106
- const taskFind = {
107
- type: 'Insight',
108
- subType: 'Locate',
109
- param: detailedLocateParam,
110
- thought: plan.thought,
111
- executor: async (param, taskContext)=>{
112
- var _this_taskCache, _locateCacheRecord_cacheContent;
113
- const { task } = taskContext;
114
- (0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
115
- let insightDump;
116
- let usage;
117
- const dumpCollector = (dump)=>{
118
- var _dump_taskInfo, _dump_taskInfo1;
119
- insightDump = dump;
120
- usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
121
- task.log = {
122
- dump: insightDump
123
- };
124
- task.usage = usage;
125
- if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
126
- };
127
- this.insight.onceDumpUpdatedFn = dumpCollector;
128
- const shotTime = Date.now();
129
- const uiContext = await this.insight.contextRetrieverFn('locate');
130
- task.uiContext = uiContext;
131
- const recordItem = {
132
- type: 'screenshot',
133
- ts: shotTime,
134
- screenshot: uiContext.screenshotBase64,
135
- timing: 'before Insight'
136
- };
137
- task.recorder = [
138
- recordItem
139
- ];
140
- const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
141
- const userExpectedPathHitFlag = !!elementFromXpath;
142
- const cachePrompt = param.prompt;
143
- const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
144
- const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
145
- const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
146
- const cacheHitFlag = !!elementFromCache;
147
- const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
148
- const planHitFlag = !!elementFromPlan;
149
- const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
150
- context: uiContext
151
- }, modelConfig)).element;
152
- const aiLocateHitFlag = !!elementFromAiLocate;
153
- const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
154
- let currentCacheEntry;
155
- if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
156
- const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
157
- _orderSensitive: element.isOrderSensitive
158
- } : void 0);
159
- if (feature && Object.keys(feature).length > 0) {
160
- debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
161
- currentCacheEntry = feature;
162
- this.taskCache.updateOrAppendCacheRecord({
163
- type: 'locate',
164
- prompt: cachePrompt,
165
- cache: feature
166
- }, locateCacheRecord);
167
- } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
168
- } catch (error) {
169
- debug('cacheFeatureForRect failed: %s', error);
170
- }
171
- else debug('cacheFeatureForRect is not supported, skip cache update');
172
- if (!element) throw new Error(`Element not found: ${param.prompt}`);
173
- let hitBy;
174
- if (userExpectedPathHitFlag) hitBy = {
175
- from: 'User expected path',
176
- context: {
177
- xpath: param.xpath
178
- }
179
- };
180
- else if (cacheHitFlag) hitBy = {
181
- from: 'Cache',
182
- context: {
183
- cacheEntry,
184
- cacheToSave: currentCacheEntry
185
- }
186
- };
187
- else if (planHitFlag) hitBy = {
188
- from: 'Planning',
189
- context: {
190
- id: null == elementFromPlan ? void 0 : elementFromPlan.id,
191
- bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
192
- }
193
- };
194
- else if (aiLocateHitFlag) hitBy = {
195
- from: 'AI model',
196
- context: {
197
- prompt: param.prompt
198
- }
199
- };
200
- null == onResult || onResult(element);
201
- return {
202
- output: {
203
- element
204
- },
205
- uiContext,
206
- hitBy
207
- };
208
- }
209
- };
210
- return taskFind;
211
- };
212
- for (const plan of plans)if ('Locate' === plan.type) {
213
- var _plan_locate, _plan_locate1;
214
- if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
215
- debug('Locate action with id is null, will be ignored', plan);
216
- continue;
217
- }
218
- const taskLocate = taskForLocatePlan(plan, plan.locate);
219
- tasks.push(taskLocate);
220
- } else if ('Error' === plan.type) {
221
- var _plan_param;
222
- const taskActionError = {
223
- type: 'Action',
224
- subType: 'Error',
225
- param: plan.param,
226
- thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
227
- locate: plan.locate,
228
- executor: async ()=>{
229
- var _plan_param;
230
- throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
231
- }
232
- };
233
- tasks.push(taskActionError);
234
- } else if ('Finished' === plan.type) {
235
- const taskActionFinished = {
236
- type: 'Action',
237
- subType: 'Finished',
238
- param: null,
239
- thought: plan.thought,
240
- locate: plan.locate,
241
- executor: async (param)=>{}
242
- };
243
- tasks.push(taskActionFinished);
244
- } else if ('Sleep' === plan.type) {
245
- const taskActionSleep = {
246
- type: 'Action',
247
- subType: 'Sleep',
248
- param: plan.param,
249
- thought: plan.thought,
250
- locate: plan.locate,
251
- executor: async (taskParam)=>{
252
- await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
253
- }
254
- };
255
- tasks.push(taskActionSleep);
256
- } else {
257
- const planType = plan.type;
258
- const actionSpace = await this.interface.actionSpace();
259
- const action = actionSpace.find((action)=>action.name === planType);
260
- const param = plan.param;
261
- if (!action) throw new Error(`Action type '${planType}' not found`);
262
- const locateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema) : [];
263
- const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema, true) : [];
264
- locateFields.forEach((field)=>{
265
- if (param[field]) {
266
- const locatePlan = locatePlanForLocate(param[field]);
267
- debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
268
- const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
269
- param[field] = result;
270
- });
271
- tasks.push(locateTask);
272
- } else {
273
- (0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
274
- debug(`field '${field}' is not provided for action ${planType}`);
275
- }
276
- });
277
- const task = {
278
- type: 'Action',
279
- subType: planType,
280
- thought: plan.thought,
281
- param: plan.param,
282
- executor: async (param, context)=>{
283
- var _context_element;
284
- debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
285
- const uiContext = await this.insight.contextRetrieverFn('locate');
286
- context.task.uiContext = uiContext;
287
- requiredLocateFields.forEach((field)=>{
288
- (0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
289
- });
290
- try {
291
- await Promise.all([
292
- (async ()=>{
293
- if (this.interface.beforeInvokeAction) {
294
- debug('will call "beforeInvokeAction" for interface');
295
- await this.interface.beforeInvokeAction(action.name, param);
296
- debug('called "beforeInvokeAction" for interface');
297
- }
298
- })(),
299
- (0, external_utils_js_namespaceObject.sleep)(200)
300
- ]);
301
- } catch (originalError) {
302
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
303
- throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
304
- cause: originalError
305
- });
306
- }
307
- if (action.paramSchema) try {
308
- param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
309
- } catch (error) {
310
- throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
311
- cause: error
312
- });
313
- }
314
- debug('calling action', action.name);
315
- const actionFn = action.call.bind(this.interface);
316
- await actionFn(param, context);
317
- debug('called action', action.name);
318
- try {
319
- if (this.interface.afterInvokeAction) {
320
- debug('will call "afterInvokeAction" for interface');
321
- await this.interface.afterInvokeAction(action.name, param);
322
- debug('called "afterInvokeAction" for interface');
323
- }
324
- } catch (originalError) {
325
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
326
- throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
327
- cause: originalError
328
- });
329
- }
330
- return {
331
- output: {
332
- success: true,
333
- action: planType,
334
- param: param
335
- }
336
- };
337
- }
338
- };
339
- tasks.push(task);
340
- }
341
- const wrappedTasks = tasks.map((task, index)=>{
342
- if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
343
- return task;
56
+ createExecutionSession(title, options) {
57
+ return new external_execution_session_js_namespaceObject.ExecutionSession(title, ()=>Promise.resolve(this.insight.contextRetrieverFn()), {
58
+ onTaskStart: this.onTaskStartCallback,
59
+ tasks: null == options ? void 0 : options.tasks
344
60
  });
345
- return {
346
- tasks: wrappedTasks
347
- };
348
61
  }
349
- async setupPlanningContext(executorContext) {
350
- const shotTime = Date.now();
351
- const uiContext = await this.insight.contextRetrieverFn('locate');
352
- const recordItem = {
353
- type: 'screenshot',
354
- ts: shotTime,
355
- screenshot: uiContext.screenshotBase64,
356
- timing: 'before Planning'
357
- };
358
- executorContext.task.recorder = [
359
- recordItem
360
- ];
361
- executorContext.task.uiContext = uiContext;
362
- return {
363
- uiContext
364
- };
62
+ async convertPlanToExecutable(plans, modelConfig, options) {
63
+ return this.taskBuilder.build(plans, modelConfig, options);
365
64
  }
366
65
  async loadYamlFlowAsPlanning(userInstruction, yamlString) {
367
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction), {
368
- onTaskStart: this.onTaskStartCallback
369
- });
66
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction));
370
67
  const task = {
371
68
  type: 'Planning',
372
69
  subType: 'LoadYaml',
@@ -375,7 +72,8 @@ class TaskExecutor {
375
72
  userInstruction
376
73
  },
377
74
  executor: async (param, executorContext)=>{
378
- await this.setupPlanningContext(executorContext);
75
+ const { uiContext } = executorContext;
76
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
379
77
  return {
380
78
  output: {
381
79
  actions: [],
@@ -395,10 +93,9 @@ class TaskExecutor {
395
93
  };
396
94
  }
397
95
  };
398
- await taskExecutor.append(task);
399
- await taskExecutor.flush();
96
+ await session.appendAndRun(task);
400
97
  return {
401
- executor: taskExecutor
98
+ runner: session.getRunner()
402
99
  };
403
100
  }
404
101
  createPlanningTask(userInstruction, actionContext, modelConfig) {
@@ -411,7 +108,8 @@ class TaskExecutor {
411
108
  },
412
109
  executor: async (param, executorContext)=>{
413
110
  const startTime = Date.now();
414
- const { uiContext } = await this.setupPlanningContext(executorContext);
111
+ const { uiContext } = executorContext;
112
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
415
113
  const { vlMode } = modelConfig;
416
114
  const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
417
115
  (0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
@@ -438,13 +136,7 @@ class TaskExecutor {
438
136
  if (sleep) {
439
137
  const timeNow = Date.now();
440
138
  const timeRemaining = sleep - (timeNow - startTime);
441
- if (timeRemaining > 0) finalActions.push({
442
- type: 'Sleep',
443
- param: {
444
- timeMs: timeRemaining
445
- },
446
- locate: null
447
- });
139
+ if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
448
140
  }
449
141
  if (0 === finalActions.length) (0, utils_namespaceObject.assert)(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
450
142
  return {
@@ -464,16 +156,13 @@ class TaskExecutor {
464
156
  return task;
465
157
  }
466
158
  async runPlans(title, plans, modelConfig) {
467
- const taskExecutor = new action_executor_js_namespaceObject.Executor(title, {
468
- onTaskStart: this.onTaskStartCallback
469
- });
159
+ const session = this.createExecutionSession(title);
470
160
  const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
471
- await taskExecutor.append(tasks);
472
- const result = await taskExecutor.flush();
161
+ const result = await session.appendAndRun(tasks);
473
162
  const { output } = result;
474
163
  return {
475
164
  output,
476
- executor: taskExecutor
165
+ runner: session.getRunner()
477
166
  };
478
167
  }
479
168
  getReplanningCycleLimit(isVlmUiTars) {
@@ -481,38 +170,38 @@ class TaskExecutor {
481
170
  }
482
171
  async action(userPrompt, modelConfig, actionContext, cacheable) {
483
172
  this.conversationHistory.reset();
484
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt), {
485
- onTaskStart: this.onTaskStartCallback
486
- });
173
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt));
174
+ const runner = session.getRunner();
487
175
  let replanCount = 0;
488
176
  const yamlFlow = [];
489
177
  const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
490
178
  while(true){
491
179
  if (replanCount > replanningCycleLimit) {
492
180
  const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
493
- return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
181
+ return session.appendErrorPlan(errorMsg);
494
182
  }
495
183
  const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
496
- await taskExecutor.append(planningTask);
497
- const result = await taskExecutor.flush();
184
+ const result = await session.appendAndRun(planningTask);
498
185
  const planResult = null == result ? void 0 : result.output;
499
- if (taskExecutor.isInErrorState()) return {
186
+ if (session.isInErrorState()) return {
500
187
  output: planResult,
501
- executor: taskExecutor
188
+ runner
502
189
  };
503
190
  const plans = planResult.actions || [];
504
191
  yamlFlow.push(...planResult.yamlFlow || []);
505
192
  let executables;
506
193
  try {
507
- executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
508
- taskExecutor.append(executables.tasks);
194
+ executables = await this.convertPlanToExecutable(plans, modelConfig, {
195
+ cacheable,
196
+ subTask: true
197
+ });
198
+ await session.appendAndRun(executables.tasks);
509
199
  } catch (error) {
510
- return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
200
+ return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
511
201
  }
512
- await taskExecutor.flush();
513
- if (taskExecutor.isInErrorState()) return {
202
+ if (session.isInErrorState()) return {
514
203
  output: void 0,
515
- executor: taskExecutor
204
+ runner
516
205
  };
517
206
  if (!planResult.more_actions_needed_by_instruction) break;
518
207
  replanCount++;
@@ -521,7 +210,7 @@ class TaskExecutor {
521
210
  output: {
522
211
  yamlFlow
523
212
  },
524
- executor: taskExecutor
213
+ runner
525
214
  };
526
215
  }
527
216
  createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
@@ -537,23 +226,15 @@ class TaskExecutor {
537
226
  },
538
227
  executor: async (param, taskContext)=>{
539
228
  const { task } = taskContext;
540
- let insightDump;
541
- const dumpCollector = (dump)=>{
542
- insightDump = dump;
543
- };
544
- this.insight.onceDumpUpdatedFn = dumpCollector;
545
- const shotTime = Date.now();
546
- const uiContext = await this.insight.contextRetrieverFn('extract');
547
- task.uiContext = uiContext;
548
- const recordItem = {
549
- type: 'screenshot',
550
- ts: shotTime,
551
- screenshot: uiContext.screenshotBase64,
552
- timing: 'before Extract'
229
+ let queryDump;
230
+ const applyDump = (dump)=>{
231
+ queryDump = dump;
232
+ task.log = {
233
+ dump
234
+ };
553
235
  };
554
- task.recorder = [
555
- recordItem
556
- ];
236
+ const uiContext = taskContext.uiContext;
237
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Query task');
557
238
  const ifTypeRestricted = 'Query' !== type;
558
239
  let demandInput = demand;
559
240
  let keyOfResult = 'result';
@@ -566,7 +247,15 @@ class TaskExecutor {
566
247
  } else if (ifTypeRestricted) demandInput = {
567
248
  [keyOfResult]: `${type}, ${demand}`
568
249
  };
569
- const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
250
+ let extractResult;
251
+ try {
252
+ extractResult = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
253
+ } catch (error) {
254
+ if (error instanceof external_types_js_namespaceObject.InsightError) applyDump(error.dump);
255
+ throw error;
256
+ }
257
+ const { data, usage, thought, dump } = extractResult;
258
+ applyDump(dump);
570
259
  let outputResult = data;
571
260
  if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
572
261
  else {
@@ -575,7 +264,7 @@ class TaskExecutor {
575
264
  }
576
265
  return {
577
266
  output: outputResult,
578
- log: insightDump,
267
+ log: queryDump,
579
268
  usage,
580
269
  thought
581
270
  };
@@ -584,57 +273,36 @@ class TaskExecutor {
584
273
  return queryTask;
585
274
  }
586
275
  async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
587
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
588
- onTaskStart: this.onTaskStartCallback
589
- });
276
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
590
277
  const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
591
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
592
- const result = await taskExecutor.flush();
278
+ const result = await session.appendAndRun(queryTask);
593
279
  if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
594
280
  const { output, thought } = result;
595
281
  return {
596
282
  output,
597
283
  thought,
598
- executor: taskExecutor
284
+ runner: session.getRunner()
599
285
  };
600
286
  }
601
- async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
602
- const errorPlan = {
603
- type: 'Error',
604
- param: {
605
- thought: errorMsg
606
- },
607
- locate: null
608
- };
609
- const { tasks } = await this.convertPlanToExecutable([
610
- errorPlan
611
- ], modelConfig);
612
- await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
613
- await taskExecutor.flush();
287
+ sleepPlan(timeMs) {
614
288
  return {
615
- output: void 0,
616
- executor: taskExecutor
617
- };
618
- }
619
- async taskForSleep(timeMs, modelConfig) {
620
- const sleepPlan = {
621
289
  type: 'Sleep',
622
290
  param: {
623
291
  timeMs
624
292
  },
625
293
  locate: null
626
294
  };
627
- const { tasks: sleepTasks } = await this.convertPlanToExecutable([
628
- sleepPlan
629
- ], modelConfig);
630
- return this.prependExecutorWithScreenshot(sleepTasks[0]);
295
+ }
296
+ async taskForSleep(timeMs, _modelConfig) {
297
+ return this.taskBuilder.createSleepTask({
298
+ timeMs
299
+ });
631
300
  }
632
301
  async waitFor(assertion, opt, modelConfig) {
633
- const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
302
+ const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject.parsePrompt)(assertion);
634
303
  const description = `waitFor: ${textPrompt}`;
635
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description), {
636
- onTaskStart: this.onTaskStartCallback
637
- });
304
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description));
305
+ const runner = session.getRunner();
638
306
  const { timeoutMs, checkIntervalMs } = opt;
639
307
  (0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
640
308
  (0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
@@ -648,26 +316,28 @@ class TaskExecutor {
648
316
  const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
649
317
  doNotThrowError: true
650
318
  }, multimodalPrompt);
651
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
652
- const result = await taskExecutor.flush();
319
+ const result = await session.appendAndRun(queryTask);
653
320
  if (null == result ? void 0 : result.output) return {
654
321
  output: void 0,
655
- executor: taskExecutor
322
+ runner
656
323
  };
657
324
  errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
658
325
  const now = Date.now();
659
326
  if (now - startTime < checkIntervalMs) {
660
327
  const timeRemaining = checkIntervalMs - (now - startTime);
661
- const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
662
- await taskExecutor.append(sleepTask);
328
+ const sleepTask = this.taskBuilder.createSleepTask({
329
+ timeMs: timeRemaining
330
+ });
331
+ await session.append(sleepTask);
663
332
  }
664
333
  }
665
- return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
334
+ return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
666
335
  }
667
336
  constructor(interfaceInstance, insight, opts){
668
337
  _define_property(this, "interface", void 0);
669
338
  _define_property(this, "insight", void 0);
670
339
  _define_property(this, "taskCache", void 0);
340
+ _define_property(this, "taskBuilder", void 0);
671
341
  _define_property(this, "conversationHistory", void 0);
672
342
  _define_property(this, "onTaskStartCallback", void 0);
673
343
  _define_property(this, "replanningCycleLimit", void 0);
@@ -677,6 +347,11 @@ class TaskExecutor {
677
347
  this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
678
348
  this.replanningCycleLimit = opts.replanningCycleLimit;
679
349
  this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
350
+ this.taskBuilder = new external_task_builder_js_namespaceObject.TaskBuilder({
351
+ interfaceInstance,
352
+ insight,
353
+ taskCache: opts.taskCache
354
+ });
680
355
  }
681
356
  }
682
357
  exports.TaskExecutor = __webpack_exports__.TaskExecutor;