@midscene/core 0.30.5 → 1.0.1-beta-20251021060907.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/es/agent/agent.mjs +41 -33
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/task-builder.mjs +303 -0
  6. package/dist/es/agent/task-builder.mjs.map +1 -0
  7. package/dist/es/agent/tasks.mjs +68 -391
  8. package/dist/es/agent/tasks.mjs.map +1 -1
  9. package/dist/es/agent/ui-utils.mjs.map +1 -1
  10. package/dist/es/agent/utils.mjs +6 -6
  11. package/dist/es/agent/utils.mjs.map +1 -1
  12. package/dist/es/ai-model/common.mjs +1 -15
  13. package/dist/es/ai-model/common.mjs.map +1 -1
  14. package/dist/es/ai-model/inspect.mjs +2 -3
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +6 -24
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/prompt/llm-locator.mjs +3 -204
  19. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  20. package/dist/es/ai-model/service-caller/index.mjs +101 -231
  21. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  22. package/dist/es/index.mjs +3 -2
  23. package/dist/es/index.mjs.map +1 -1
  24. package/dist/es/insight/index.mjs +18 -19
  25. package/dist/es/insight/index.mjs.map +1 -1
  26. package/dist/es/insight/utils.mjs +3 -3
  27. package/dist/es/insight/utils.mjs.map +1 -1
  28. package/dist/es/report.mjs.map +1 -1
  29. package/dist/es/{ai-model/action-executor.mjs → task-runner.mjs} +69 -10
  30. package/dist/es/task-runner.mjs.map +1 -0
  31. package/dist/es/types.mjs +18 -1
  32. package/dist/es/types.mjs.map +1 -1
  33. package/dist/es/utils.mjs +2 -2
  34. package/dist/es/yaml/player.mjs +18 -14
  35. package/dist/es/yaml/player.mjs.map +1 -1
  36. package/dist/lib/agent/agent.js +41 -33
  37. package/dist/lib/agent/agent.js.map +1 -1
  38. package/dist/lib/agent/execution-session.js +75 -0
  39. package/dist/lib/agent/execution-session.js.map +1 -0
  40. package/dist/lib/agent/task-builder.js +340 -0
  41. package/dist/lib/agent/task-builder.js.map +1 -0
  42. package/dist/lib/agent/tasks.js +68 -391
  43. package/dist/lib/agent/tasks.js.map +1 -1
  44. package/dist/lib/agent/ui-utils.js.map +1 -1
  45. package/dist/lib/agent/utils.js +6 -6
  46. package/dist/lib/agent/utils.js.map +1 -1
  47. package/dist/lib/ai-model/common.js +2 -19
  48. package/dist/lib/ai-model/common.js.map +1 -1
  49. package/dist/lib/ai-model/inspect.js +1 -2
  50. package/dist/lib/ai-model/inspect.js.map +1 -1
  51. package/dist/lib/ai-model/llm-planning.js +5 -23
  52. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  53. package/dist/lib/ai-model/prompt/llm-locator.js +2 -206
  54. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  55. package/dist/lib/ai-model/service-caller/index.js +236 -384
  56. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  57. package/dist/lib/index.js +9 -5
  58. package/dist/lib/index.js.map +1 -1
  59. package/dist/lib/insight/index.js +17 -18
  60. package/dist/lib/insight/index.js.map +1 -1
  61. package/dist/lib/insight/utils.js +5 -5
  62. package/dist/lib/insight/utils.js.map +1 -1
  63. package/dist/lib/report.js.map +1 -1
  64. package/dist/lib/{ai-model/action-executor.js → task-runner.js} +71 -12
  65. package/dist/lib/task-runner.js.map +1 -0
  66. package/dist/lib/types.js +22 -1
  67. package/dist/lib/types.js.map +1 -1
  68. package/dist/lib/utils.js +2 -2
  69. package/dist/lib/yaml/player.js +18 -14
  70. package/dist/lib/yaml/player.js.map +1 -1
  71. package/dist/types/agent/agent.d.ts +16 -0
  72. package/dist/types/agent/execution-session.d.ts +27 -0
  73. package/dist/types/agent/task-builder.d.ts +24 -0
  74. package/dist/types/agent/tasks.d.ts +8 -11
  75. package/dist/types/agent/ui-utils.d.ts +2 -2
  76. package/dist/types/agent/utils.d.ts +5 -2
  77. package/dist/types/ai-model/common.d.ts +0 -1
  78. package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -2
  79. package/dist/types/index.d.ts +4 -3
  80. package/dist/types/insight/index.d.ts +5 -10
  81. package/dist/types/insight/utils.d.ts +2 -2
  82. package/dist/types/{ai-model/action-executor.d.ts → task-runner.d.ts} +14 -3
  83. package/dist/types/types.d.ts +47 -4
  84. package/dist/types/yaml.d.ts +3 -1
  85. package/package.json +4 -7
  86. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  87. package/dist/lib/ai-model/action-executor.js.map +0 -1
@@ -24,17 +24,18 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
- locatePlanForLocate: ()=>locatePlanForLocate,
27
+ locatePlanForLocate: ()=>external_task_builder_js_namespaceObject.locatePlanForLocate,
28
28
  TaskExecutor: ()=>TaskExecutor
29
29
  });
30
30
  const index_js_namespaceObject = require("../ai-model/index.js");
31
- const action_executor_js_namespaceObject = require("../ai-model/action-executor.js");
32
- const external_utils_js_namespaceObject = require("../utils.js");
31
+ const external_types_js_namespaceObject = require("../types.js");
33
32
  const env_namespaceObject = require("@midscene/shared/env");
34
33
  const logger_namespaceObject = require("@midscene/shared/logger");
35
34
  const utils_namespaceObject = require("@midscene/shared/utils");
35
+ const external_execution_session_js_namespaceObject = require("./execution-session.js");
36
+ const external_task_builder_js_namespaceObject = require("./task-builder.js");
36
37
  const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
37
- const external_utils_js_namespaceObject_1 = require("./utils.js");
38
+ const external_utils_js_namespaceObject = require("./utils.js");
38
39
  function _define_property(obj, key, value) {
39
40
  if (key in obj) Object.defineProperty(obj, key, {
40
41
  value: value,
@@ -48,325 +49,23 @@ function _define_property(obj, key, value) {
48
49
  const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
49
50
  const defaultReplanningCycleLimit = 10;
50
51
  const defaultVlmUiTarsReplanningCycleLimit = 40;
51
- function locatePlanForLocate(param) {
52
- const locate = 'string' == typeof param ? {
53
- prompt: param
54
- } : param;
55
- const locatePlan = {
56
- type: 'Locate',
57
- locate,
58
- param: locate,
59
- thought: ''
60
- };
61
- return locatePlan;
62
- }
63
52
  class TaskExecutor {
64
53
  get page() {
65
54
  return this.interface;
66
55
  }
67
- async recordScreenshot(timing) {
68
- const base64 = await this.interface.screenshotBase64();
69
- const item = {
70
- type: 'screenshot',
71
- ts: Date.now(),
72
- screenshot: base64,
73
- timing
74
- };
75
- return item;
76
- }
77
- prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
78
- const taskWithScreenshot = {
79
- ...taskApply,
80
- executor: async (param, context, ...args)=>{
81
- const recorder = [];
82
- const { task } = context;
83
- task.recorder = recorder;
84
- const shot = await this.recordScreenshot(`before ${task.type}`);
85
- recorder.push(shot);
86
- const result = await taskApply.executor(param, context, ...args);
87
- if (appendAfterExecution) {
88
- const shot2 = await this.recordScreenshot('after Action');
89
- recorder.push(shot2);
90
- }
91
- return result;
92
- }
93
- };
94
- return taskWithScreenshot;
56
+ createExecutionSession(title, options) {
57
+ return new external_execution_session_js_namespaceObject.ExecutionSession(title, ()=>Promise.resolve(this.insight.contextRetrieverFn()), {
58
+ onTaskStart: this.onTaskStartCallback,
59
+ tasks: null == options ? void 0 : options.tasks
60
+ });
95
61
  }
96
62
  async convertPlanToExecutable(plans, modelConfig, cacheable) {
97
- const tasks = [];
98
- const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
99
- if ('string' == typeof detailedLocateParam) detailedLocateParam = {
100
- prompt: detailedLocateParam
101
- };
102
- if (void 0 !== cacheable) detailedLocateParam = {
103
- ...detailedLocateParam,
104
- cacheable
105
- };
106
- const taskFind = {
107
- type: 'Insight',
108
- subType: 'Locate',
109
- param: detailedLocateParam,
110
- thought: plan.thought,
111
- executor: async (param, taskContext)=>{
112
- var _this_taskCache, _locateCacheRecord_cacheContent;
113
- const { task } = taskContext;
114
- (0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
115
- let insightDump;
116
- let usage;
117
- const dumpCollector = (dump)=>{
118
- var _dump_taskInfo, _dump_taskInfo1;
119
- insightDump = dump;
120
- usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
121
- task.log = {
122
- dump: insightDump
123
- };
124
- task.usage = usage;
125
- if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
126
- };
127
- this.insight.onceDumpUpdatedFn = dumpCollector;
128
- const shotTime = Date.now();
129
- const uiContext = await this.insight.contextRetrieverFn('locate');
130
- task.uiContext = uiContext;
131
- const recordItem = {
132
- type: 'screenshot',
133
- ts: shotTime,
134
- screenshot: uiContext.screenshotBase64,
135
- timing: 'before Insight'
136
- };
137
- task.recorder = [
138
- recordItem
139
- ];
140
- const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
141
- const userExpectedPathHitFlag = !!elementFromXpath;
142
- const cachePrompt = param.prompt;
143
- const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
144
- const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
145
- const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
146
- const cacheHitFlag = !!elementFromCache;
147
- const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
148
- const planHitFlag = !!elementFromPlan;
149
- const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
150
- context: uiContext
151
- }, modelConfig)).element;
152
- const aiLocateHitFlag = !!elementFromAiLocate;
153
- const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
154
- let currentCacheEntry;
155
- if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
156
- const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
157
- _orderSensitive: element.isOrderSensitive
158
- } : void 0);
159
- if (feature && Object.keys(feature).length > 0) {
160
- debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
161
- currentCacheEntry = feature;
162
- this.taskCache.updateOrAppendCacheRecord({
163
- type: 'locate',
164
- prompt: cachePrompt,
165
- cache: feature
166
- }, locateCacheRecord);
167
- } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
168
- } catch (error) {
169
- debug('cacheFeatureForRect failed: %s', error);
170
- }
171
- else debug('cacheFeatureForRect is not supported, skip cache update');
172
- if (!element) throw new Error(`Element not found: ${param.prompt}`);
173
- let hitBy;
174
- if (userExpectedPathHitFlag) hitBy = {
175
- from: 'User expected path',
176
- context: {
177
- xpath: param.xpath
178
- }
179
- };
180
- else if (cacheHitFlag) hitBy = {
181
- from: 'Cache',
182
- context: {
183
- cacheEntry,
184
- cacheToSave: currentCacheEntry
185
- }
186
- };
187
- else if (planHitFlag) hitBy = {
188
- from: 'Planning',
189
- context: {
190
- id: null == elementFromPlan ? void 0 : elementFromPlan.id,
191
- bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
192
- }
193
- };
194
- else if (aiLocateHitFlag) hitBy = {
195
- from: 'AI model',
196
- context: {
197
- prompt: param.prompt
198
- }
199
- };
200
- null == onResult || onResult(element);
201
- return {
202
- output: {
203
- element
204
- },
205
- uiContext,
206
- hitBy
207
- };
208
- }
209
- };
210
- return taskFind;
211
- };
212
- for (const plan of plans)if ('Locate' === plan.type) {
213
- var _plan_locate, _plan_locate1;
214
- if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
215
- debug('Locate action with id is null, will be ignored', plan);
216
- continue;
217
- }
218
- const taskLocate = taskForLocatePlan(plan, plan.locate);
219
- tasks.push(taskLocate);
220
- } else if ('Error' === plan.type) {
221
- var _plan_param;
222
- const taskActionError = {
223
- type: 'Action',
224
- subType: 'Error',
225
- param: plan.param,
226
- thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
227
- locate: plan.locate,
228
- executor: async ()=>{
229
- var _plan_param;
230
- throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
231
- }
232
- };
233
- tasks.push(taskActionError);
234
- } else if ('Finished' === plan.type) {
235
- const taskActionFinished = {
236
- type: 'Action',
237
- subType: 'Finished',
238
- param: null,
239
- thought: plan.thought,
240
- locate: plan.locate,
241
- executor: async (param)=>{}
242
- };
243
- tasks.push(taskActionFinished);
244
- } else if ('Sleep' === plan.type) {
245
- const taskActionSleep = {
246
- type: 'Action',
247
- subType: 'Sleep',
248
- param: plan.param,
249
- thought: plan.thought,
250
- locate: plan.locate,
251
- executor: async (taskParam)=>{
252
- await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
253
- }
254
- };
255
- tasks.push(taskActionSleep);
256
- } else {
257
- const planType = plan.type;
258
- const actionSpace = await this.interface.actionSpace();
259
- const action = actionSpace.find((action)=>action.name === planType);
260
- const param = plan.param;
261
- if (!action) throw new Error(`Action type '${planType}' not found`);
262
- const locateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema) : [];
263
- const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema, true) : [];
264
- locateFields.forEach((field)=>{
265
- if (param[field]) {
266
- const locatePlan = locatePlanForLocate(param[field]);
267
- debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
268
- const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
269
- param[field] = result;
270
- });
271
- tasks.push(locateTask);
272
- } else {
273
- (0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
274
- debug(`field '${field}' is not provided for action ${planType}`);
275
- }
276
- });
277
- const task = {
278
- type: 'Action',
279
- subType: planType,
280
- thought: plan.thought,
281
- param: plan.param,
282
- executor: async (param, context)=>{
283
- var _context_element;
284
- debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
285
- const uiContext = await this.insight.contextRetrieverFn('locate');
286
- context.task.uiContext = uiContext;
287
- requiredLocateFields.forEach((field)=>{
288
- (0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
289
- });
290
- try {
291
- await Promise.all([
292
- (async ()=>{
293
- if (this.interface.beforeInvokeAction) {
294
- debug('will call "beforeInvokeAction" for interface');
295
- await this.interface.beforeInvokeAction(action.name, param);
296
- debug('called "beforeInvokeAction" for interface');
297
- }
298
- })(),
299
- (0, external_utils_js_namespaceObject.sleep)(200)
300
- ]);
301
- } catch (originalError) {
302
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
303
- throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
304
- cause: originalError
305
- });
306
- }
307
- if (action.paramSchema) try {
308
- param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
309
- } catch (error) {
310
- throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
311
- cause: error
312
- });
313
- }
314
- debug('calling action', action.name);
315
- const actionFn = action.call.bind(this.interface);
316
- await actionFn(param, context);
317
- debug('called action', action.name);
318
- try {
319
- if (this.interface.afterInvokeAction) {
320
- debug('will call "afterInvokeAction" for interface');
321
- await this.interface.afterInvokeAction(action.name, param);
322
- debug('called "afterInvokeAction" for interface');
323
- }
324
- } catch (originalError) {
325
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
326
- throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
327
- cause: originalError
328
- });
329
- }
330
- return {
331
- output: {
332
- success: true,
333
- action: planType,
334
- param: param
335
- }
336
- };
337
- }
338
- };
339
- tasks.push(task);
340
- }
341
- const wrappedTasks = tasks.map((task, index)=>{
342
- if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
343
- return task;
63
+ return this.taskBuilder.build(plans, modelConfig, {
64
+ cacheable
344
65
  });
345
- return {
346
- tasks: wrappedTasks
347
- };
348
- }
349
- async setupPlanningContext(executorContext) {
350
- const shotTime = Date.now();
351
- const uiContext = await this.insight.contextRetrieverFn('locate');
352
- const recordItem = {
353
- type: 'screenshot',
354
- ts: shotTime,
355
- screenshot: uiContext.screenshotBase64,
356
- timing: 'before Planning'
357
- };
358
- executorContext.task.recorder = [
359
- recordItem
360
- ];
361
- executorContext.task.uiContext = uiContext;
362
- return {
363
- uiContext
364
- };
365
66
  }
366
67
  async loadYamlFlowAsPlanning(userInstruction, yamlString) {
367
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction), {
368
- onTaskStart: this.onTaskStartCallback
369
- });
68
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction));
370
69
  const task = {
371
70
  type: 'Planning',
372
71
  subType: 'LoadYaml',
@@ -375,7 +74,8 @@ class TaskExecutor {
375
74
  userInstruction
376
75
  },
377
76
  executor: async (param, executorContext)=>{
378
- await this.setupPlanningContext(executorContext);
77
+ const { uiContext } = executorContext;
78
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
379
79
  return {
380
80
  output: {
381
81
  actions: [],
@@ -395,10 +95,9 @@ class TaskExecutor {
395
95
  };
396
96
  }
397
97
  };
398
- await taskExecutor.append(task);
399
- await taskExecutor.flush();
98
+ await session.appendAndRun(task);
400
99
  return {
401
- executor: taskExecutor
100
+ runner: session.getRunner()
402
101
  };
403
102
  }
404
103
  createPlanningTask(userInstruction, actionContext, modelConfig) {
@@ -411,7 +110,8 @@ class TaskExecutor {
411
110
  },
412
111
  executor: async (param, executorContext)=>{
413
112
  const startTime = Date.now();
414
- const { uiContext } = await this.setupPlanningContext(executorContext);
113
+ const { uiContext } = executorContext;
114
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
415
115
  const { vlMode } = modelConfig;
416
116
  const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
417
117
  (0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
@@ -464,16 +164,13 @@ class TaskExecutor {
464
164
  return task;
465
165
  }
466
166
  async runPlans(title, plans, modelConfig) {
467
- const taskExecutor = new action_executor_js_namespaceObject.Executor(title, {
468
- onTaskStart: this.onTaskStartCallback
469
- });
167
+ const session = this.createExecutionSession(title);
470
168
  const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
471
- await taskExecutor.append(tasks);
472
- const result = await taskExecutor.flush();
169
+ const result = await session.appendAndRun(tasks);
473
170
  const { output } = result;
474
171
  return {
475
172
  output,
476
- executor: taskExecutor
173
+ runner: session.getRunner()
477
174
  };
478
175
  }
479
176
  getReplanningCycleLimit(isVlmUiTars) {
@@ -481,38 +178,35 @@ class TaskExecutor {
481
178
  }
482
179
  async action(userPrompt, modelConfig, actionContext, cacheable) {
483
180
  this.conversationHistory.reset();
484
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt), {
485
- onTaskStart: this.onTaskStartCallback
486
- });
181
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt));
182
+ const runner = session.getRunner();
487
183
  let replanCount = 0;
488
184
  const yamlFlow = [];
489
185
  const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
490
186
  while(true){
491
187
  if (replanCount > replanningCycleLimit) {
492
188
  const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
493
- return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
189
+ return session.appendErrorPlan(errorMsg);
494
190
  }
495
191
  const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
496
- await taskExecutor.append(planningTask);
497
- const result = await taskExecutor.flush();
192
+ const result = await session.appendAndRun(planningTask);
498
193
  const planResult = null == result ? void 0 : result.output;
499
- if (taskExecutor.isInErrorState()) return {
194
+ if (session.isInErrorState()) return {
500
195
  output: planResult,
501
- executor: taskExecutor
196
+ runner
502
197
  };
503
198
  const plans = planResult.actions || [];
504
199
  yamlFlow.push(...planResult.yamlFlow || []);
505
200
  let executables;
506
201
  try {
507
202
  executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
508
- taskExecutor.append(executables.tasks);
203
+ await session.appendAndRun(executables.tasks);
509
204
  } catch (error) {
510
- return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
205
+ return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
511
206
  }
512
- await taskExecutor.flush();
513
- if (taskExecutor.isInErrorState()) return {
207
+ if (session.isInErrorState()) return {
514
208
  output: void 0,
515
- executor: taskExecutor
209
+ runner
516
210
  };
517
211
  if (!planResult.more_actions_needed_by_instruction) break;
518
212
  replanCount++;
@@ -521,7 +215,7 @@ class TaskExecutor {
521
215
  output: {
522
216
  yamlFlow
523
217
  },
524
- executor: taskExecutor
218
+ runner
525
219
  };
526
220
  }
527
221
  createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
@@ -537,23 +231,15 @@ class TaskExecutor {
537
231
  },
538
232
  executor: async (param, taskContext)=>{
539
233
  const { task } = taskContext;
540
- let insightDump;
541
- const dumpCollector = (dump)=>{
542
- insightDump = dump;
543
- };
544
- this.insight.onceDumpUpdatedFn = dumpCollector;
545
- const shotTime = Date.now();
546
- const uiContext = await this.insight.contextRetrieverFn('extract');
547
- task.uiContext = uiContext;
548
- const recordItem = {
549
- type: 'screenshot',
550
- ts: shotTime,
551
- screenshot: uiContext.screenshotBase64,
552
- timing: 'before Extract'
234
+ let queryDump;
235
+ const applyDump = (dump)=>{
236
+ queryDump = dump;
237
+ task.log = {
238
+ dump
239
+ };
553
240
  };
554
- task.recorder = [
555
- recordItem
556
- ];
241
+ const uiContext = taskContext.uiContext;
242
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Query task');
557
243
  const ifTypeRestricted = 'Query' !== type;
558
244
  let demandInput = demand;
559
245
  let keyOfResult = 'result';
@@ -566,7 +252,15 @@ class TaskExecutor {
566
252
  } else if (ifTypeRestricted) demandInput = {
567
253
  [keyOfResult]: `${type}, ${demand}`
568
254
  };
569
- const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
255
+ let extractResult;
256
+ try {
257
+ extractResult = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
258
+ } catch (error) {
259
+ if (error instanceof external_types_js_namespaceObject.InsightError) applyDump(error.dump);
260
+ throw error;
261
+ }
262
+ const { data, usage, thought, dump } = extractResult;
263
+ applyDump(dump);
570
264
  let outputResult = data;
571
265
  if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
572
266
  else {
@@ -575,7 +269,7 @@ class TaskExecutor {
575
269
  }
576
270
  return {
577
271
  output: outputResult,
578
- log: insightDump,
272
+ log: queryDump,
579
273
  usage,
580
274
  thought
581
275
  };
@@ -584,36 +278,15 @@ class TaskExecutor {
584
278
  return queryTask;
585
279
  }
586
280
  async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
587
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
588
- onTaskStart: this.onTaskStartCallback
589
- });
281
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
590
282
  const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
591
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
592
- const result = await taskExecutor.flush();
283
+ const result = await session.appendAndRun(queryTask);
593
284
  if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
594
285
  const { output, thought } = result;
595
286
  return {
596
287
  output,
597
288
  thought,
598
- executor: taskExecutor
599
- };
600
- }
601
- async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
602
- const errorPlan = {
603
- type: 'Error',
604
- param: {
605
- thought: errorMsg
606
- },
607
- locate: null
608
- };
609
- const { tasks } = await this.convertPlanToExecutable([
610
- errorPlan
611
- ], modelConfig);
612
- await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
613
- await taskExecutor.flush();
614
- return {
615
- output: void 0,
616
- executor: taskExecutor
289
+ runner: session.getRunner()
617
290
  };
618
291
  }
619
292
  async taskForSleep(timeMs, modelConfig) {
@@ -627,14 +300,13 @@ class TaskExecutor {
627
300
  const { tasks: sleepTasks } = await this.convertPlanToExecutable([
628
301
  sleepPlan
629
302
  ], modelConfig);
630
- return this.prependExecutorWithScreenshot(sleepTasks[0]);
303
+ return sleepTasks[0];
631
304
  }
632
305
  async waitFor(assertion, opt, modelConfig) {
633
- const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
306
+ const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject.parsePrompt)(assertion);
634
307
  const description = `waitFor: ${textPrompt}`;
635
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description), {
636
- onTaskStart: this.onTaskStartCallback
637
- });
308
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description));
309
+ const runner = session.getRunner();
638
310
  const { timeoutMs, checkIntervalMs } = opt;
639
311
  (0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
640
312
  (0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
@@ -648,26 +320,26 @@ class TaskExecutor {
648
320
  const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
649
321
  doNotThrowError: true
650
322
  }, multimodalPrompt);
651
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
652
- const result = await taskExecutor.flush();
323
+ const result = await session.appendAndRun(queryTask);
653
324
  if (null == result ? void 0 : result.output) return {
654
325
  output: void 0,
655
- executor: taskExecutor
326
+ runner
656
327
  };
657
328
  errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
658
329
  const now = Date.now();
659
330
  if (now - startTime < checkIntervalMs) {
660
331
  const timeRemaining = checkIntervalMs - (now - startTime);
661
332
  const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
662
- await taskExecutor.append(sleepTask);
333
+ await session.append(sleepTask);
663
334
  }
664
335
  }
665
- return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
336
+ return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
666
337
  }
667
338
  constructor(interfaceInstance, insight, opts){
668
339
  _define_property(this, "interface", void 0);
669
340
  _define_property(this, "insight", void 0);
670
341
  _define_property(this, "taskCache", void 0);
342
+ _define_property(this, "taskBuilder", void 0);
671
343
  _define_property(this, "conversationHistory", void 0);
672
344
  _define_property(this, "onTaskStartCallback", void 0);
673
345
  _define_property(this, "replanningCycleLimit", void 0);
@@ -677,6 +349,11 @@ class TaskExecutor {
677
349
  this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
678
350
  this.replanningCycleLimit = opts.replanningCycleLimit;
679
351
  this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
352
+ this.taskBuilder = new external_task_builder_js_namespaceObject.TaskBuilder({
353
+ interfaceInstance,
354
+ insight,
355
+ taskCache: opts.taskCache
356
+ });
680
357
  }
681
358
  }
682
359
  exports.TaskExecutor = __webpack_exports__.TaskExecutor;