@midscene/core 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/es/agent/agent.mjs +233 -144
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/index.mjs +3 -3
  6. package/dist/es/agent/task-builder.mjs +319 -0
  7. package/dist/es/agent/task-builder.mjs.map +1 -0
  8. package/dist/es/agent/task-cache.mjs +4 -4
  9. package/dist/es/agent/task-cache.mjs.map +1 -1
  10. package/dist/es/agent/tasks.mjs +197 -504
  11. package/dist/es/agent/tasks.mjs.map +1 -1
  12. package/dist/es/agent/ui-utils.mjs +54 -35
  13. package/dist/es/agent/ui-utils.mjs.map +1 -1
  14. package/dist/es/agent/utils.mjs +16 -58
  15. package/dist/es/agent/utils.mjs.map +1 -1
  16. package/dist/es/ai-model/conversation-history.mjs +25 -13
  17. package/dist/es/ai-model/conversation-history.mjs.map +1 -1
  18. package/dist/es/ai-model/index.mjs +4 -4
  19. package/dist/es/ai-model/inspect.mjs +45 -54
  20. package/dist/es/ai-model/inspect.mjs.map +1 -1
  21. package/dist/es/ai-model/llm-planning.mjs +47 -65
  22. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  23. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
  24. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  25. package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
  26. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  27. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
  28. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  29. package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
  30. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  31. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
  32. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  33. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  34. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  35. package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
  36. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  37. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
  38. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
  39. package/dist/es/ai-model/prompt/util.mjs +3 -88
  40. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  41. package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
  42. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  43. package/dist/es/ai-model/service-caller/index.mjs +182 -274
  44. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  45. package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
  46. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  47. package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
  48. package/dist/es/common.mjs.map +1 -0
  49. package/dist/es/device/device-options.mjs +0 -0
  50. package/dist/es/device/index.mjs +29 -12
  51. package/dist/es/device/index.mjs.map +1 -1
  52. package/dist/es/index.mjs +5 -4
  53. package/dist/es/index.mjs.map +1 -1
  54. package/dist/es/report.mjs.map +1 -1
  55. package/dist/es/{insight → service}/index.mjs +38 -51
  56. package/dist/es/service/index.mjs.map +1 -0
  57. package/dist/es/{insight → service}/utils.mjs +3 -3
  58. package/dist/es/service/utils.mjs.map +1 -0
  59. package/dist/es/task-runner.mjs +264 -0
  60. package/dist/es/task-runner.mjs.map +1 -0
  61. package/dist/es/tree.mjs +13 -2
  62. package/dist/es/tree.mjs.map +1 -0
  63. package/dist/es/types.mjs +18 -1
  64. package/dist/es/types.mjs.map +1 -1
  65. package/dist/es/utils.mjs +6 -7
  66. package/dist/es/utils.mjs.map +1 -1
  67. package/dist/es/yaml/builder.mjs.map +1 -1
  68. package/dist/es/yaml/player.mjs +121 -98
  69. package/dist/es/yaml/player.mjs.map +1 -1
  70. package/dist/es/yaml/utils.mjs +1 -1
  71. package/dist/es/yaml/utils.mjs.map +1 -1
  72. package/dist/lib/agent/agent.js +231 -142
  73. package/dist/lib/agent/agent.js.map +1 -1
  74. package/dist/lib/agent/common.js +1 -1
  75. package/dist/lib/agent/execution-session.js +75 -0
  76. package/dist/lib/agent/execution-session.js.map +1 -0
  77. package/dist/lib/agent/index.js +14 -14
  78. package/dist/lib/agent/index.js.map +1 -1
  79. package/dist/lib/agent/task-builder.js +356 -0
  80. package/dist/lib/agent/task-builder.js.map +1 -0
  81. package/dist/lib/agent/task-cache.js +8 -8
  82. package/dist/lib/agent/task-cache.js.map +1 -1
  83. package/dist/lib/agent/tasks.js +202 -506
  84. package/dist/lib/agent/tasks.js.map +1 -1
  85. package/dist/lib/agent/ui-utils.js +58 -36
  86. package/dist/lib/agent/ui-utils.js.map +1 -1
  87. package/dist/lib/agent/utils.js +26 -68
  88. package/dist/lib/agent/utils.js.map +1 -1
  89. package/dist/lib/ai-model/conversation-history.js +27 -15
  90. package/dist/lib/ai-model/conversation-history.js.map +1 -1
  91. package/dist/lib/ai-model/index.js +27 -27
  92. package/dist/lib/ai-model/index.js.map +1 -1
  93. package/dist/lib/ai-model/inspect.js +51 -57
  94. package/dist/lib/ai-model/inspect.js.map +1 -1
  95. package/dist/lib/ai-model/llm-planning.js +49 -67
  96. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  97. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  98. package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
  99. package/dist/lib/ai-model/prompt/common.js +2 -2
  100. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  101. package/dist/lib/ai-model/prompt/describe.js +2 -2
  102. package/dist/lib/ai-model/prompt/describe.js.map +1 -1
  103. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  104. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  105. package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
  106. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  107. package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
  108. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  109. package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
  110. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  111. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  112. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
  114. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  115. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  116. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
  117. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  118. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
  119. package/dist/lib/ai-model/prompt/util.js +7 -95
  120. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  121. package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
  122. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  123. package/dist/lib/ai-model/service-caller/index.js +288 -401
  124. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  125. package/dist/lib/ai-model/ui-tars-planning.js +71 -10
  126. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  127. package/dist/lib/{ai-model/common.js → common.js} +40 -55
  128. package/dist/lib/common.js.map +1 -0
  129. package/dist/lib/device/device-options.js +20 -0
  130. package/dist/lib/device/device-options.js.map +1 -0
  131. package/dist/lib/device/index.js +63 -40
  132. package/dist/lib/device/index.js.map +1 -1
  133. package/dist/lib/image/index.js +5 -5
  134. package/dist/lib/image/index.js.map +1 -1
  135. package/dist/lib/index.js +24 -20
  136. package/dist/lib/index.js.map +1 -1
  137. package/dist/lib/report.js +2 -2
  138. package/dist/lib/report.js.map +1 -1
  139. package/dist/lib/{insight → service}/index.js +41 -54
  140. package/dist/lib/service/index.js.map +1 -0
  141. package/dist/lib/{insight → service}/utils.js +7 -7
  142. package/dist/lib/service/utils.js.map +1 -0
  143. package/dist/lib/task-runner.js +301 -0
  144. package/dist/lib/task-runner.js.map +1 -0
  145. package/dist/lib/tree.js +13 -4
  146. package/dist/lib/tree.js.map +1 -1
  147. package/dist/lib/types.js +31 -12
  148. package/dist/lib/types.js.map +1 -1
  149. package/dist/lib/utils.js +16 -17
  150. package/dist/lib/utils.js.map +1 -1
  151. package/dist/lib/yaml/builder.js +2 -2
  152. package/dist/lib/yaml/builder.js.map +1 -1
  153. package/dist/lib/yaml/index.js +16 -22
  154. package/dist/lib/yaml/index.js.map +1 -1
  155. package/dist/lib/yaml/player.js +123 -100
  156. package/dist/lib/yaml/player.js.map +1 -1
  157. package/dist/lib/yaml/utils.js +6 -6
  158. package/dist/lib/yaml/utils.js.map +1 -1
  159. package/dist/lib/yaml.js +1 -1
  160. package/dist/lib/yaml.js.map +1 -1
  161. package/dist/types/agent/agent.d.ts +62 -17
  162. package/dist/types/agent/execution-session.d.ts +36 -0
  163. package/dist/types/agent/index.d.ts +3 -2
  164. package/dist/types/agent/task-builder.d.ts +35 -0
  165. package/dist/types/agent/tasks.d.ts +32 -23
  166. package/dist/types/agent/ui-utils.d.ts +9 -2
  167. package/dist/types/agent/utils.d.ts +9 -35
  168. package/dist/types/ai-model/conversation-history.d.ts +8 -4
  169. package/dist/types/ai-model/index.d.ts +5 -5
  170. package/dist/types/ai-model/inspect.d.ts +20 -12
  171. package/dist/types/ai-model/llm-planning.d.ts +3 -1
  172. package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
  173. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
  174. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
  175. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  176. package/dist/types/ai-model/prompt/util.d.ts +2 -34
  177. package/dist/types/ai-model/service-caller/index.d.ts +2 -3
  178. package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
  179. package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
  180. package/dist/types/device/device-options.d.ts +57 -0
  181. package/dist/types/device/index.d.ts +55 -39
  182. package/dist/types/index.d.ts +7 -6
  183. package/dist/types/service/index.d.ts +26 -0
  184. package/dist/types/service/utils.d.ts +2 -0
  185. package/dist/types/task-runner.d.ts +49 -0
  186. package/dist/types/tree.d.ts +4 -1
  187. package/dist/types/types.d.ts +103 -66
  188. package/dist/types/yaml/utils.d.ts +1 -1
  189. package/dist/types/yaml.d.ts +68 -43
  190. package/package.json +9 -12
  191. package/dist/es/ai-model/action-executor.mjs +0 -129
  192. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  193. package/dist/es/ai-model/common.mjs.map +0 -1
  194. package/dist/es/insight/index.mjs.map +0 -1
  195. package/dist/es/insight/utils.mjs.map +0 -1
  196. package/dist/lib/ai-model/action-executor.js +0 -163
  197. package/dist/lib/ai-model/action-executor.js.map +0 -1
  198. package/dist/lib/ai-model/common.js.map +0 -1
  199. package/dist/lib/insight/index.js.map +0 -1
  200. package/dist/lib/insight/utils.js.map +0 -1
  201. package/dist/types/ai-model/action-executor.d.ts +0 -19
  202. package/dist/types/insight/index.d.ts +0 -31
  203. package/dist/types/insight/utils.d.ts +0 -2
@@ -24,17 +24,20 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
- locatePlanForLocate: ()=>locatePlanForLocate,
27
+ TaskExecutionError: ()=>external_task_runner_js_namespaceObject.TaskExecutionError,
28
+ locatePlanForLocate: ()=>external_task_builder_js_namespaceObject.locatePlanForLocate,
28
29
  TaskExecutor: ()=>TaskExecutor
29
30
  });
30
31
  const index_js_namespaceObject = require("../ai-model/index.js");
31
- const action_executor_js_namespaceObject = require("../ai-model/action-executor.js");
32
- const external_utils_js_namespaceObject = require("../utils.js");
33
- const env_namespaceObject = require("@midscene/shared/env");
32
+ const external_task_runner_js_namespaceObject = require("../task-runner.js");
33
+ const external_types_js_namespaceObject = require("../types.js");
34
34
  const logger_namespaceObject = require("@midscene/shared/logger");
35
35
  const utils_namespaceObject = require("@midscene/shared/utils");
36
+ const external_execution_session_js_namespaceObject = require("./execution-session.js");
37
+ const external_task_builder_js_namespaceObject = require("./task-builder.js");
38
+ const extractor_namespaceObject = require("@midscene/shared/extractor");
36
39
  const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
37
- const external_utils_js_namespaceObject_1 = require("./utils.js");
40
+ const external_utils_js_namespaceObject = require("./utils.js");
38
41
  function _define_property(obj, key, value) {
39
42
  if (key in obj) Object.defineProperty(obj, key, {
40
43
  value: value,
@@ -46,337 +49,35 @@ function _define_property(obj, key, value) {
46
49
  return obj;
47
50
  }
48
51
  const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
49
- const defaultReplanningCycleLimit = 10;
50
- const defaultVlmUiTarsReplanningCycleLimit = 40;
51
- function locatePlanForLocate(param) {
52
- const locate = 'string' == typeof param ? {
53
- prompt: param
54
- } : param;
55
- const locatePlan = {
56
- type: 'Locate',
57
- locate,
58
- param: locate,
59
- thought: ''
60
- };
61
- return locatePlan;
62
- }
52
+ const maxErrorCountAllowedInOnePlanningLoop = 5;
63
53
  class TaskExecutor {
64
54
  get page() {
65
55
  return this.interface;
66
56
  }
67
- async recordScreenshot(timing) {
68
- const base64 = await this.interface.screenshotBase64();
69
- const item = {
70
- type: 'screenshot',
71
- ts: Date.now(),
72
- screenshot: base64,
73
- timing
74
- };
75
- return item;
76
- }
77
- prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
78
- const taskWithScreenshot = {
79
- ...taskApply,
80
- executor: async (param, context, ...args)=>{
81
- const recorder = [];
82
- const { task } = context;
83
- task.recorder = recorder;
84
- const shot = await this.recordScreenshot(`before ${task.type}`);
85
- recorder.push(shot);
86
- const result = await taskApply.executor(param, context, ...args);
87
- if (appendAfterExecution) {
88
- const shot2 = await this.recordScreenshot('after Action');
89
- recorder.push(shot2);
90
- }
91
- return result;
92
- }
93
- };
94
- return taskWithScreenshot;
95
- }
96
- async convertPlanToExecutable(plans, modelConfig, cacheable) {
97
- const tasks = [];
98
- const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
99
- if ('string' == typeof detailedLocateParam) detailedLocateParam = {
100
- prompt: detailedLocateParam
101
- };
102
- if (void 0 !== cacheable) detailedLocateParam = {
103
- ...detailedLocateParam,
104
- cacheable
105
- };
106
- const taskFind = {
107
- type: 'Insight',
108
- subType: 'Locate',
109
- param: detailedLocateParam,
110
- thought: plan.thought,
111
- executor: async (param, taskContext)=>{
112
- var _this_taskCache, _locateCacheRecord_cacheContent;
113
- const { task } = taskContext;
114
- (0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
115
- let insightDump;
116
- let usage;
117
- const dumpCollector = (dump)=>{
118
- var _dump_taskInfo, _dump_taskInfo1;
119
- insightDump = dump;
120
- usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
121
- task.log = {
122
- dump: insightDump
123
- };
124
- task.usage = usage;
125
- if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
126
- };
127
- this.insight.onceDumpUpdatedFn = dumpCollector;
128
- const shotTime = Date.now();
129
- const uiContext = await this.insight.contextRetrieverFn('locate');
130
- task.uiContext = uiContext;
131
- const recordItem = {
132
- type: 'screenshot',
133
- ts: shotTime,
134
- screenshot: uiContext.screenshotBase64,
135
- timing: 'before Insight'
136
- };
137
- task.recorder = [
138
- recordItem
139
- ];
140
- const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
141
- const userExpectedPathHitFlag = !!elementFromXpath;
142
- const cachePrompt = param.prompt;
143
- const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
144
- const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
145
- const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
146
- const cacheHitFlag = !!elementFromCache;
147
- const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
148
- const planHitFlag = !!elementFromPlan;
149
- const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
150
- context: uiContext
151
- }, modelConfig)).element;
152
- const aiLocateHitFlag = !!elementFromAiLocate;
153
- const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
154
- let currentCacheEntry;
155
- if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
156
- const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
157
- _orderSensitive: element.isOrderSensitive
158
- } : void 0);
159
- if (feature && Object.keys(feature).length > 0) {
160
- debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
161
- currentCacheEntry = feature;
162
- this.taskCache.updateOrAppendCacheRecord({
163
- type: 'locate',
164
- prompt: cachePrompt,
165
- cache: feature
166
- }, locateCacheRecord);
167
- } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
168
- } catch (error) {
169
- debug('cacheFeatureForRect failed: %s', error);
170
- }
171
- else debug('cacheFeatureForRect is not supported, skip cache update');
172
- if (!element) throw new Error(`Element not found: ${param.prompt}`);
173
- let hitBy;
174
- if (userExpectedPathHitFlag) hitBy = {
175
- from: 'User expected path',
176
- context: {
177
- xpath: param.xpath
178
- }
179
- };
180
- else if (cacheHitFlag) hitBy = {
181
- from: 'Cache',
182
- context: {
183
- cacheEntry,
184
- cacheToSave: currentCacheEntry
185
- }
186
- };
187
- else if (planHitFlag) hitBy = {
188
- from: 'Planning',
189
- context: {
190
- id: null == elementFromPlan ? void 0 : elementFromPlan.id,
191
- bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
192
- }
193
- };
194
- else if (aiLocateHitFlag) hitBy = {
195
- from: 'AI model',
196
- context: {
197
- prompt: param.prompt
198
- }
199
- };
200
- null == onResult || onResult(element);
201
- return {
202
- output: {
203
- element
204
- },
205
- uiContext,
206
- hitBy
207
- };
208
- }
209
- };
210
- return taskFind;
211
- };
212
- for (const plan of plans)if ('Locate' === plan.type) {
213
- var _plan_locate, _plan_locate1;
214
- if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
215
- debug('Locate action with id is null, will be ignored', plan);
216
- continue;
217
- }
218
- const taskLocate = taskForLocatePlan(plan, plan.locate);
219
- tasks.push(taskLocate);
220
- } else if ('Error' === plan.type) {
221
- var _plan_param;
222
- const taskActionError = {
223
- type: 'Action',
224
- subType: 'Error',
225
- param: plan.param,
226
- thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
227
- locate: plan.locate,
228
- executor: async ()=>{
229
- var _plan_param;
230
- throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
231
- }
232
- };
233
- tasks.push(taskActionError);
234
- } else if ('Finished' === plan.type) {
235
- const taskActionFinished = {
236
- type: 'Action',
237
- subType: 'Finished',
238
- param: null,
239
- thought: plan.thought,
240
- locate: plan.locate,
241
- executor: async (param)=>{}
242
- };
243
- tasks.push(taskActionFinished);
244
- } else if ('Sleep' === plan.type) {
245
- const taskActionSleep = {
246
- type: 'Action',
247
- subType: 'Sleep',
248
- param: plan.param,
249
- thought: plan.thought,
250
- locate: plan.locate,
251
- executor: async (taskParam)=>{
252
- await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
253
- }
254
- };
255
- tasks.push(taskActionSleep);
256
- } else {
257
- const planType = plan.type;
258
- const actionSpace = await this.interface.actionSpace();
259
- const action = actionSpace.find((action)=>action.name === planType);
260
- const param = plan.param;
261
- if (!action) throw new Error(`Action type '${planType}' not found`);
262
- const locateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema) : [];
263
- const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllMidsceneLocatorField)(action.paramSchema, true) : [];
264
- locateFields.forEach((field)=>{
265
- if (param[field]) {
266
- const locatePlan = locatePlanForLocate(param[field]);
267
- debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
268
- const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
269
- param[field] = result;
270
- });
271
- tasks.push(locateTask);
272
- } else {
273
- (0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
274
- debug(`field '${field}' is not provided for action ${planType}`);
275
- }
276
- });
277
- const task = {
278
- type: 'Action',
279
- subType: planType,
280
- thought: plan.thought,
281
- param: plan.param,
282
- executor: async (param, context)=>{
283
- var _context_element;
284
- debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
285
- const uiContext = await this.insight.contextRetrieverFn('locate');
286
- context.task.uiContext = uiContext;
287
- requiredLocateFields.forEach((field)=>{
288
- (0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
289
- });
290
- try {
291
- await Promise.all([
292
- (async ()=>{
293
- if (this.interface.beforeInvokeAction) {
294
- debug('will call "beforeInvokeAction" for interface');
295
- await this.interface.beforeInvokeAction(action.name, param);
296
- debug('called "beforeInvokeAction" for interface');
297
- }
298
- })(),
299
- (0, external_utils_js_namespaceObject.sleep)(200)
300
- ]);
301
- } catch (originalError) {
302
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
303
- throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
304
- cause: originalError
305
- });
306
- }
307
- if (action.paramSchema) try {
308
- param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
309
- } catch (error) {
310
- throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
311
- cause: error
312
- });
313
- }
314
- debug('calling action', action.name);
315
- const actionFn = action.call.bind(this.interface);
316
- await actionFn(param, context);
317
- debug('called action', action.name);
318
- await (0, external_utils_js_namespaceObject.sleep)(300);
319
- try {
320
- if (this.interface.afterInvokeAction) {
321
- debug('will call "afterInvokeAction" for interface');
322
- await this.interface.afterInvokeAction(action.name, param);
323
- debug('called "afterInvokeAction" for interface');
324
- }
325
- } catch (originalError) {
326
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
327
- throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
328
- cause: originalError
329
- });
330
- }
331
- return {
332
- output: {
333
- success: true,
334
- action: planType,
335
- param: param
336
- }
337
- };
338
- }
339
- };
340
- tasks.push(task);
341
- }
342
- const wrappedTasks = tasks.map((task, index)=>{
343
- if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
344
- return task;
57
+ createExecutionSession(title, options) {
58
+ return new external_execution_session_js_namespaceObject.ExecutionSession(title, ()=>Promise.resolve(this.service.contextRetrieverFn()), {
59
+ onTaskStart: this.onTaskStartCallback,
60
+ tasks: options?.tasks,
61
+ onTaskUpdate: this.hooks?.onTaskUpdate
345
62
  });
346
- return {
347
- tasks: wrappedTasks
348
- };
349
63
  }
350
- async setupPlanningContext(executorContext) {
351
- const shotTime = Date.now();
352
- const uiContext = await this.insight.contextRetrieverFn('locate');
353
- const recordItem = {
354
- type: 'screenshot',
355
- ts: shotTime,
356
- screenshot: uiContext.screenshotBase64,
357
- timing: 'before Planning'
358
- };
359
- executorContext.task.recorder = [
360
- recordItem
361
- ];
362
- executorContext.task.uiContext = uiContext;
363
- return {
364
- uiContext
365
- };
64
+ getActionSpace() {
65
+ return this.providedActionSpace;
66
+ }
67
+ async convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options) {
68
+ return this.taskBuilder.build(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options);
366
69
  }
367
70
  async loadYamlFlowAsPlanning(userInstruction, yamlString) {
368
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction), {
369
- onTaskStart: this.onTaskStartCallback
370
- });
71
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction));
371
72
  const task = {
372
73
  type: 'Planning',
373
74
  subType: 'LoadYaml',
374
- locate: null,
375
75
  param: {
376
76
  userInstruction
377
77
  },
378
78
  executor: async (param, executorContext)=>{
379
- await this.setupPlanningContext(executorContext);
79
+ const { uiContext } = executorContext;
80
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
380
81
  return {
381
82
  output: {
382
83
  actions: [],
@@ -396,140 +97,137 @@ class TaskExecutor {
396
97
  };
397
98
  }
398
99
  };
399
- await taskExecutor.append(task);
400
- await taskExecutor.flush();
100
+ const runner = session.getRunner();
101
+ await session.appendAndRun(task);
401
102
  return {
402
- executor: taskExecutor
403
- };
404
- }
405
- createPlanningTask(userInstruction, actionContext, modelConfig) {
406
- const task = {
407
- type: 'Planning',
408
- subType: 'Plan',
409
- locate: null,
410
- param: {
411
- userInstruction
412
- },
413
- executor: async (param, executorContext)=>{
414
- const startTime = Date.now();
415
- const { uiContext } = await this.setupPlanningContext(executorContext);
416
- const { vlMode } = modelConfig;
417
- const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
418
- (0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
419
- const actionSpace = await this.interface.actionSpace();
420
- debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
421
- (0, utils_namespaceObject.assert)(Array.isArray(actionSpace), 'actionSpace must be an array');
422
- if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
423
- const planResult = await (uiTarsModelVersion ? index_js_namespaceObject.uiTarsPlanning : index_js_namespaceObject.plan)(param.userInstruction, {
424
- context: uiContext,
425
- actionContext,
426
- interfaceType: this.interface.interfaceType,
427
- actionSpace,
428
- modelConfig,
429
- conversationHistory: this.conversationHistory
430
- });
431
- debug('planResult', JSON.stringify(planResult, null, 2));
432
- const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
433
- executorContext.task.log = {
434
- ...executorContext.task.log || {},
435
- rawResponse
436
- };
437
- executorContext.task.usage = usage;
438
- const finalActions = actions || [];
439
- if (sleep) {
440
- const timeNow = Date.now();
441
- const timeRemaining = sleep - (timeNow - startTime);
442
- if (timeRemaining > 0) finalActions.push({
443
- type: 'Sleep',
444
- param: {
445
- timeMs: timeRemaining
446
- },
447
- locate: null
448
- });
449
- }
450
- if (0 === finalActions.length) (0, utils_namespaceObject.assert)(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
451
- return {
452
- output: {
453
- actions: finalActions,
454
- more_actions_needed_by_instruction,
455
- log,
456
- yamlFlow: planResult.yamlFlow
457
- },
458
- cache: {
459
- hit: false
460
- },
461
- uiContext
462
- };
463
- }
103
+ runner
464
104
  };
465
- return task;
466
105
  }
467
- async runPlans(title, plans, modelConfig) {
468
- const taskExecutor = new action_executor_js_namespaceObject.Executor(title, {
469
- onTaskStart: this.onTaskStartCallback
470
- });
471
- const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
472
- await taskExecutor.append(tasks);
473
- const result = await taskExecutor.flush();
474
- const { output } = result;
106
+ async runPlans(title, plans, modelConfigForPlanning, modelConfigForDefaultIntent) {
107
+ const session = this.createExecutionSession(title);
108
+ const { tasks } = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent);
109
+ const runner = session.getRunner();
110
+ const result = await session.appendAndRun(tasks);
111
+ const { output } = result ?? {};
475
112
  return {
476
113
  output,
477
- executor: taskExecutor
114
+ runner
478
115
  };
479
116
  }
480
- getReplanningCycleLimit(isVlmUiTars) {
481
- return this.replanningCycleLimit || env_namespaceObject.globalConfigManager.getEnvConfigInNumber(env_namespaceObject.MIDSCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
482
- }
483
- async action(userPrompt, modelConfig, actionContext, cacheable) {
117
+ async action(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount) {
484
118
  this.conversationHistory.reset();
485
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt), {
486
- onTaskStart: this.onTaskStartCallback
487
- });
119
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt));
120
+ const runner = session.getRunner();
488
121
  let replanCount = 0;
489
122
  const yamlFlow = [];
490
- const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
123
+ const replanningCycleLimit = replanningCycleLimitOverride ?? this.replanningCycleLimit;
124
+ (0, utils_namespaceObject.assert)(void 0 !== replanningCycleLimit, 'replanningCycleLimit is required for TaskExecutor.action');
125
+ let errorCountInOnePlanningLoop = 0;
491
126
  while(true){
492
- if (replanCount > replanningCycleLimit) {
493
- const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
494
- return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
495
- }
496
- const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
497
- await taskExecutor.append(planningTask);
498
- const result = await taskExecutor.flush();
499
- const planResult = null == result ? void 0 : result.output;
500
- if (taskExecutor.isInErrorState()) return {
501
- output: planResult,
502
- executor: taskExecutor
503
- };
504
- const plans = planResult.actions || [];
505
- yamlFlow.push(...planResult.yamlFlow || []);
127
+ const result = await session.appendAndRun({
128
+ type: 'Planning',
129
+ subType: 'Plan',
130
+ param: {
131
+ userInstruction: userPrompt,
132
+ aiActContext,
133
+ imagesIncludeCount
134
+ },
135
+ executor: async (param, executorContext)=>{
136
+ const startTime = Date.now();
137
+ const { uiContext } = executorContext;
138
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Planning task');
139
+ const { vlMode } = modelConfigForPlanning;
140
+ const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfigForPlanning.uiTarsModelVersion : void 0;
141
+ const actionSpace = this.getActionSpace();
142
+ debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
143
+ (0, utils_namespaceObject.assert)(Array.isArray(actionSpace), 'actionSpace must be an array');
144
+ if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
145
+ const planResult = await (uiTarsModelVersion ? index_js_namespaceObject.uiTarsPlanning : index_js_namespaceObject.plan)(param.userInstruction, {
146
+ context: uiContext,
147
+ actionContext: param.aiActContext,
148
+ interfaceType: this.interface.interfaceType,
149
+ actionSpace,
150
+ modelConfig: modelConfigForPlanning,
151
+ conversationHistory: this.conversationHistory,
152
+ includeBbox: includeBboxInPlanning,
153
+ imagesIncludeCount
154
+ });
155
+ debug('planResult', JSON.stringify(planResult, null, 2));
156
+ const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
157
+ executorContext.task.log = {
158
+ ...executorContext.task.log || {},
159
+ rawResponse
160
+ };
161
+ executorContext.task.usage = usage;
162
+ executorContext.task.output = {
163
+ actions: actions || [],
164
+ more_actions_needed_by_instruction,
165
+ log,
166
+ yamlFlow: planResult.yamlFlow
167
+ };
168
+ executorContext.uiContext = uiContext;
169
+ const finalActions = [
170
+ ...actions || []
171
+ ];
172
+ if (sleep) {
173
+ const timeNow = Date.now();
174
+ const timeRemaining = sleep - (timeNow - startTime);
175
+ if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
176
+ }
177
+ (0, utils_namespaceObject.assert)(!error, `Failed to continue: ${error}\n${log || ''}`);
178
+ return {
179
+ cache: {
180
+ hit: false
181
+ }
182
+ };
183
+ }
184
+ }, {
185
+ allowWhenError: true
186
+ });
187
+ const planResult = result?.output;
188
+ const plans = planResult?.actions || [];
189
+ yamlFlow.push(...planResult?.yamlFlow || []);
506
190
  let executables;
507
191
  try {
508
- executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
509
- taskExecutor.append(executables.tasks);
192
+ executables = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, {
193
+ cacheable,
194
+ subTask: true
195
+ });
510
196
  } catch (error) {
511
- return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
197
+ return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
512
198
  }
513
- await taskExecutor.flush();
514
- if (taskExecutor.isInErrorState()) return {
515
- output: void 0,
516
- executor: taskExecutor
517
- };
518
- if (!planResult.more_actions_needed_by_instruction) break;
519
- replanCount++;
199
+ if (this.conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', this.conversationHistory.pendingFeedbackMessage);
200
+ let errorFlag = false;
201
+ try {
202
+ await session.appendAndRun(executables.tasks);
203
+ } catch (error) {
204
+ errorFlag = true;
205
+ errorCountInOnePlanningLoop++;
206
+ this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;
207
+ debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
208
+ }
209
+ if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
210
+ if (!planResult?.more_actions_needed_by_instruction) if (errorFlag) debug('more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run');
211
+ else break;
212
+ ++replanCount;
213
+ if (replanCount > replanningCycleLimit) {
214
+ const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;
215
+ return session.appendErrorPlan(errorMsg);
216
+ }
217
+ if (!this.conversationHistory.pendingFeedbackMessage) this.conversationHistory.pendingFeedbackMessage = 'I have finished the action previously planned.';
520
218
  }
521
- return {
219
+ const finalResult = {
522
220
  output: {
523
221
  yamlFlow
524
222
  },
525
- executor: taskExecutor
223
+ runner
526
224
  };
225
+ return finalResult;
527
226
  }
528
227
  createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
529
228
  const queryTask = {
530
229
  type: 'Insight',
531
230
  subType: type,
532
- locate: null,
533
231
  param: {
534
232
  dataDemand: multimodalPrompt ? {
535
233
  demand,
@@ -538,23 +236,15 @@ class TaskExecutor {
538
236
  },
539
237
  executor: async (param, taskContext)=>{
540
238
  const { task } = taskContext;
541
- let insightDump;
542
- const dumpCollector = (dump)=>{
543
- insightDump = dump;
544
- };
545
- this.insight.onceDumpUpdatedFn = dumpCollector;
546
- const shotTime = Date.now();
547
- const uiContext = await this.insight.contextRetrieverFn('extract');
548
- task.uiContext = uiContext;
549
- const recordItem = {
550
- type: 'screenshot',
551
- ts: shotTime,
552
- screenshot: uiContext.screenshotBase64,
553
- timing: 'before Extract'
239
+ let queryDump;
240
+ const applyDump = (dump)=>{
241
+ queryDump = dump;
242
+ task.log = {
243
+ dump
244
+ };
554
245
  };
555
- task.recorder = [
556
- recordItem
557
- ];
246
+ const uiContext = taskContext.uiContext;
247
+ (0, utils_namespaceObject.assert)(uiContext, 'uiContext is required for Query task');
558
248
  const ifTypeRestricted = 'Query' !== type;
559
249
  let demandInput = demand;
560
250
  let keyOfResult = 'result';
@@ -567,13 +257,27 @@ class TaskExecutor {
567
257
  } else if (ifTypeRestricted) demandInput = {
568
258
  [keyOfResult]: `${type}, ${demand}`
569
259
  };
570
- const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
260
+ let extractResult;
261
+ let extraPageDescription = '';
262
+ if (opt?.domIncluded && this.interface.getElementsNodeTree) {
263
+ debug('appending tree info for page');
264
+ const tree = await this.interface.getElementsNodeTree();
265
+ extraPageDescription = await (0, extractor_namespaceObject.descriptionOfTree)(tree, 200, false, opt?.domIncluded === 'visible-only');
266
+ }
267
+ try {
268
+ extractResult = await this.service.extract(demandInput, modelConfig, opt, extraPageDescription, multimodalPrompt);
269
+ } catch (error) {
270
+ if (error instanceof external_types_js_namespaceObject.ServiceError) applyDump(error.dump);
271
+ throw error;
272
+ }
273
+ const { data, usage, thought, dump } = extractResult;
274
+ applyDump(dump);
571
275
  let outputResult = data;
572
276
  if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
573
277
  else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
574
278
  else if (null == data) outputResult = null;
575
279
  else {
576
- (0, utils_namespaceObject.assert)((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
280
+ (0, utils_namespaceObject.assert)(data?.[keyOfResult] !== void 0, 'No result in query data');
577
281
  outputResult = data[keyOfResult];
578
282
  }
579
283
  if ('Assert' === type && !outputResult) {
@@ -583,7 +287,7 @@ class TaskExecutor {
583
287
  }
584
288
  return {
585
289
  output: outputResult,
586
- log: insightDump,
290
+ log: queryDump,
587
291
  usage,
588
292
  thought
589
293
  };
@@ -592,107 +296,99 @@ class TaskExecutor {
592
296
  return queryTask;
593
297
  }
594
298
  async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
595
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
596
- onTaskStart: this.onTaskStartCallback
597
- });
299
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
598
300
  const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
599
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
600
- const result = await taskExecutor.flush();
301
+ const runner = session.getRunner();
302
+ const result = await session.appendAndRun(queryTask);
601
303
  if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
602
304
  const { output, thought } = result;
603
305
  return {
604
306
  output,
605
307
  thought,
606
- executor: taskExecutor
308
+ runner
607
309
  };
608
310
  }
609
- async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
610
- const errorPlan = {
611
- type: 'Error',
612
- param: {
613
- thought: errorMsg
614
- },
615
- locate: null
616
- };
617
- const { tasks } = await this.convertPlanToExecutable([
618
- errorPlan
619
- ], modelConfig);
620
- await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
621
- await taskExecutor.flush();
311
+ sleepPlan(timeMs) {
622
312
  return {
623
- output: void 0,
624
- executor: taskExecutor
625
- };
626
- }
627
- async taskForSleep(timeMs, modelConfig) {
628
- const sleepPlan = {
629
313
  type: 'Sleep',
630
314
  param: {
631
315
  timeMs
632
- },
633
- locate: null
316
+ }
634
317
  };
635
- const { tasks: sleepTasks } = await this.convertPlanToExecutable([
636
- sleepPlan
637
- ], modelConfig);
638
- return this.prependExecutorWithScreenshot(sleepTasks[0]);
318
+ }
319
+ async taskForSleep(timeMs, _modelConfig) {
320
+ return this.taskBuilder.createSleepTask({
321
+ timeMs
322
+ });
639
323
  }
640
324
  async waitFor(assertion, opt, modelConfig) {
641
- const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
325
+ const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject.parsePrompt)(assertion);
642
326
  const description = `waitFor: ${textPrompt}`;
643
- const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description), {
644
- onTaskStart: this.onTaskStartCallback
645
- });
327
+ const session = this.createExecutionSession((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description));
328
+ const runner = session.getRunner();
646
329
  const { timeoutMs, checkIntervalMs } = opt;
647
330
  (0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
648
331
  (0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
649
332
  (0, utils_namespaceObject.assert)(checkIntervalMs, 'No checkIntervalMs for waitFor');
650
333
  (0, utils_namespaceObject.assert)(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
651
334
  const overallStartTime = Date.now();
652
- let startTime = Date.now();
335
+ let lastCheckStart = overallStartTime;
653
336
  let errorThought = '';
654
- while(Date.now() - overallStartTime < timeoutMs){
655
- startTime = Date.now();
656
- const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
657
- doNotThrowError: true
658
- }, multimodalPrompt);
659
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
660
- const result = await taskExecutor.flush();
661
- if (null == result ? void 0 : result.output) return {
337
+ while(lastCheckStart - overallStartTime <= timeoutMs){
338
+ const currentCheckStart = Date.now();
339
+ lastCheckStart = currentCheckStart;
340
+ const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, void 0, multimodalPrompt);
341
+ const result = await session.appendAndRun(queryTask);
342
+ if (result?.output) return {
662
343
  output: void 0,
663
- executor: taskExecutor
344
+ runner
664
345
  };
665
- errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
346
+ errorThought = result?.thought || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
666
347
  const now = Date.now();
667
- if (now - startTime < checkIntervalMs) {
668
- const timeRemaining = checkIntervalMs - (now - startTime);
669
- const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
670
- await taskExecutor.append(sleepTask);
348
+ if (now - currentCheckStart < checkIntervalMs) {
349
+ const timeRemaining = checkIntervalMs - (now - currentCheckStart);
350
+ const sleepTask = this.taskBuilder.createSleepTask({
351
+ timeMs: timeRemaining
352
+ });
353
+ await session.append(sleepTask);
671
354
  }
672
355
  }
673
- return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
356
+ return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
674
357
  }
675
- constructor(interfaceInstance, insight, opts){
358
+ constructor(interfaceInstance, service, opts){
676
359
  _define_property(this, "interface", void 0);
677
- _define_property(this, "insight", void 0);
360
+ _define_property(this, "service", void 0);
678
361
  _define_property(this, "taskCache", void 0);
362
+ _define_property(this, "providedActionSpace", void 0);
363
+ _define_property(this, "taskBuilder", void 0);
679
364
  _define_property(this, "conversationHistory", void 0);
680
365
  _define_property(this, "onTaskStartCallback", void 0);
366
+ _define_property(this, "hooks", void 0);
681
367
  _define_property(this, "replanningCycleLimit", void 0);
682
368
  this.interface = interfaceInstance;
683
- this.insight = insight;
369
+ this.service = service;
684
370
  this.taskCache = opts.taskCache;
685
- this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
371
+ this.onTaskStartCallback = opts?.onTaskStart;
686
372
  this.replanningCycleLimit = opts.replanningCycleLimit;
373
+ this.hooks = opts.hooks;
687
374
  this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
375
+ this.providedActionSpace = opts.actionSpace;
376
+ this.taskBuilder = new external_task_builder_js_namespaceObject.TaskBuilder({
377
+ interfaceInstance,
378
+ service,
379
+ taskCache: opts.taskCache,
380
+ actionSpace: this.getActionSpace()
381
+ });
688
382
  }
689
383
  }
384
+ exports.TaskExecutionError = __webpack_exports__.TaskExecutionError;
690
385
  exports.TaskExecutor = __webpack_exports__.TaskExecutor;
691
386
  exports.locatePlanForLocate = __webpack_exports__.locatePlanForLocate;
692
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
387
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
388
+ "TaskExecutionError",
693
389
  "TaskExecutor",
694
390
  "locatePlanForLocate"
695
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
391
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
696
392
  Object.defineProperty(exports, '__esModule', {
697
393
  value: true
698
394
  });