@midscene/core 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/es/agent/agent.mjs +233 -144
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/execution-session.mjs +41 -0
  4. package/dist/es/agent/execution-session.mjs.map +1 -0
  5. package/dist/es/agent/index.mjs +3 -3
  6. package/dist/es/agent/task-builder.mjs +319 -0
  7. package/dist/es/agent/task-builder.mjs.map +1 -0
  8. package/dist/es/agent/task-cache.mjs +4 -4
  9. package/dist/es/agent/task-cache.mjs.map +1 -1
  10. package/dist/es/agent/tasks.mjs +197 -504
  11. package/dist/es/agent/tasks.mjs.map +1 -1
  12. package/dist/es/agent/ui-utils.mjs +54 -35
  13. package/dist/es/agent/ui-utils.mjs.map +1 -1
  14. package/dist/es/agent/utils.mjs +16 -58
  15. package/dist/es/agent/utils.mjs.map +1 -1
  16. package/dist/es/ai-model/conversation-history.mjs +25 -13
  17. package/dist/es/ai-model/conversation-history.mjs.map +1 -1
  18. package/dist/es/ai-model/index.mjs +4 -4
  19. package/dist/es/ai-model/inspect.mjs +45 -54
  20. package/dist/es/ai-model/inspect.mjs.map +1 -1
  21. package/dist/es/ai-model/llm-planning.mjs +47 -65
  22. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  23. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
  24. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  25. package/dist/es/ai-model/prompt/describe.mjs.map +1 -1
  26. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  27. package/dist/es/ai-model/prompt/llm-locator.mjs +11 -235
  28. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  29. package/dist/es/ai-model/prompt/llm-planning.mjs +76 -322
  30. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  31. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
  32. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  33. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  34. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  35. package/dist/es/ai-model/prompt/playwright-generator.mjs +2 -2
  36. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
  37. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -1
  38. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -1
  39. package/dist/es/ai-model/prompt/util.mjs +3 -88
  40. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  41. package/dist/es/ai-model/prompt/yaml-generator.mjs +10 -10
  42. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
  43. package/dist/es/ai-model/service-caller/index.mjs +182 -274
  44. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  45. package/dist/es/ai-model/ui-tars-planning.mjs +69 -8
  46. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  47. package/dist/es/{ai-model/common.mjs → common.mjs} +18 -30
  48. package/dist/es/common.mjs.map +1 -0
  49. package/dist/es/device/device-options.mjs +0 -0
  50. package/dist/es/device/index.mjs +29 -12
  51. package/dist/es/device/index.mjs.map +1 -1
  52. package/dist/es/index.mjs +5 -4
  53. package/dist/es/index.mjs.map +1 -1
  54. package/dist/es/report.mjs.map +1 -1
  55. package/dist/es/{insight → service}/index.mjs +38 -51
  56. package/dist/es/service/index.mjs.map +1 -0
  57. package/dist/es/{insight → service}/utils.mjs +3 -3
  58. package/dist/es/service/utils.mjs.map +1 -0
  59. package/dist/es/task-runner.mjs +264 -0
  60. package/dist/es/task-runner.mjs.map +1 -0
  61. package/dist/es/tree.mjs +13 -2
  62. package/dist/es/tree.mjs.map +1 -0
  63. package/dist/es/types.mjs +18 -1
  64. package/dist/es/types.mjs.map +1 -1
  65. package/dist/es/utils.mjs +6 -7
  66. package/dist/es/utils.mjs.map +1 -1
  67. package/dist/es/yaml/builder.mjs.map +1 -1
  68. package/dist/es/yaml/player.mjs +121 -98
  69. package/dist/es/yaml/player.mjs.map +1 -1
  70. package/dist/es/yaml/utils.mjs +1 -1
  71. package/dist/es/yaml/utils.mjs.map +1 -1
  72. package/dist/lib/agent/agent.js +231 -142
  73. package/dist/lib/agent/agent.js.map +1 -1
  74. package/dist/lib/agent/common.js +1 -1
  75. package/dist/lib/agent/execution-session.js +75 -0
  76. package/dist/lib/agent/execution-session.js.map +1 -0
  77. package/dist/lib/agent/index.js +14 -14
  78. package/dist/lib/agent/index.js.map +1 -1
  79. package/dist/lib/agent/task-builder.js +356 -0
  80. package/dist/lib/agent/task-builder.js.map +1 -0
  81. package/dist/lib/agent/task-cache.js +8 -8
  82. package/dist/lib/agent/task-cache.js.map +1 -1
  83. package/dist/lib/agent/tasks.js +202 -506
  84. package/dist/lib/agent/tasks.js.map +1 -1
  85. package/dist/lib/agent/ui-utils.js +58 -36
  86. package/dist/lib/agent/ui-utils.js.map +1 -1
  87. package/dist/lib/agent/utils.js +26 -68
  88. package/dist/lib/agent/utils.js.map +1 -1
  89. package/dist/lib/ai-model/conversation-history.js +27 -15
  90. package/dist/lib/ai-model/conversation-history.js.map +1 -1
  91. package/dist/lib/ai-model/index.js +27 -27
  92. package/dist/lib/ai-model/index.js.map +1 -1
  93. package/dist/lib/ai-model/inspect.js +51 -57
  94. package/dist/lib/ai-model/inspect.js.map +1 -1
  95. package/dist/lib/ai-model/llm-planning.js +49 -67
  96. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  97. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  98. package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
  99. package/dist/lib/ai-model/prompt/common.js +2 -2
  100. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  101. package/dist/lib/ai-model/prompt/describe.js +2 -2
  102. package/dist/lib/ai-model/prompt/describe.js.map +1 -1
  103. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  104. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  105. package/dist/lib/ai-model/prompt/llm-locator.js +14 -241
  106. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  107. package/dist/lib/ai-model/prompt/llm-planning.js +79 -328
  108. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  109. package/dist/lib/ai-model/prompt/llm-section-locator.js +17 -16
  110. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  111. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  112. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/playwright-generator.js +11 -11
  114. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
  115. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  116. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -1
  117. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  118. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -1
  119. package/dist/lib/ai-model/prompt/util.js +7 -95
  120. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  121. package/dist/lib/ai-model/prompt/yaml-generator.js +18 -18
  122. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
  123. package/dist/lib/ai-model/service-caller/index.js +288 -401
  124. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  125. package/dist/lib/ai-model/ui-tars-planning.js +71 -10
  126. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  127. package/dist/lib/{ai-model/common.js → common.js} +40 -55
  128. package/dist/lib/common.js.map +1 -0
  129. package/dist/lib/device/device-options.js +20 -0
  130. package/dist/lib/device/device-options.js.map +1 -0
  131. package/dist/lib/device/index.js +63 -40
  132. package/dist/lib/device/index.js.map +1 -1
  133. package/dist/lib/image/index.js +5 -5
  134. package/dist/lib/image/index.js.map +1 -1
  135. package/dist/lib/index.js +24 -20
  136. package/dist/lib/index.js.map +1 -1
  137. package/dist/lib/report.js +2 -2
  138. package/dist/lib/report.js.map +1 -1
  139. package/dist/lib/{insight → service}/index.js +41 -54
  140. package/dist/lib/service/index.js.map +1 -0
  141. package/dist/lib/{insight → service}/utils.js +7 -7
  142. package/dist/lib/service/utils.js.map +1 -0
  143. package/dist/lib/task-runner.js +301 -0
  144. package/dist/lib/task-runner.js.map +1 -0
  145. package/dist/lib/tree.js +13 -4
  146. package/dist/lib/tree.js.map +1 -1
  147. package/dist/lib/types.js +31 -12
  148. package/dist/lib/types.js.map +1 -1
  149. package/dist/lib/utils.js +16 -17
  150. package/dist/lib/utils.js.map +1 -1
  151. package/dist/lib/yaml/builder.js +2 -2
  152. package/dist/lib/yaml/builder.js.map +1 -1
  153. package/dist/lib/yaml/index.js +16 -22
  154. package/dist/lib/yaml/index.js.map +1 -1
  155. package/dist/lib/yaml/player.js +123 -100
  156. package/dist/lib/yaml/player.js.map +1 -1
  157. package/dist/lib/yaml/utils.js +6 -6
  158. package/dist/lib/yaml/utils.js.map +1 -1
  159. package/dist/lib/yaml.js +1 -1
  160. package/dist/lib/yaml.js.map +1 -1
  161. package/dist/types/agent/agent.d.ts +62 -17
  162. package/dist/types/agent/execution-session.d.ts +36 -0
  163. package/dist/types/agent/index.d.ts +3 -2
  164. package/dist/types/agent/task-builder.d.ts +35 -0
  165. package/dist/types/agent/tasks.d.ts +32 -23
  166. package/dist/types/agent/ui-utils.d.ts +9 -2
  167. package/dist/types/agent/utils.d.ts +9 -35
  168. package/dist/types/ai-model/conversation-history.d.ts +8 -4
  169. package/dist/types/ai-model/index.d.ts +5 -5
  170. package/dist/types/ai-model/inspect.d.ts +20 -12
  171. package/dist/types/ai-model/llm-planning.d.ts +3 -1
  172. package/dist/types/ai-model/prompt/llm-locator.d.ts +1 -6
  173. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -3
  174. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +1 -3
  175. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  176. package/dist/types/ai-model/prompt/util.d.ts +2 -34
  177. package/dist/types/ai-model/service-caller/index.d.ts +2 -3
  178. package/dist/types/ai-model/ui-tars-planning.d.ts +15 -2
  179. package/dist/types/{ai-model/common.d.ts → common.d.ts} +6 -6
  180. package/dist/types/device/device-options.d.ts +57 -0
  181. package/dist/types/device/index.d.ts +55 -39
  182. package/dist/types/index.d.ts +7 -6
  183. package/dist/types/service/index.d.ts +26 -0
  184. package/dist/types/service/utils.d.ts +2 -0
  185. package/dist/types/task-runner.d.ts +49 -0
  186. package/dist/types/tree.d.ts +4 -1
  187. package/dist/types/types.d.ts +103 -66
  188. package/dist/types/yaml/utils.d.ts +1 -1
  189. package/dist/types/yaml.d.ts +68 -43
  190. package/package.json +9 -12
  191. package/dist/es/ai-model/action-executor.mjs +0 -129
  192. package/dist/es/ai-model/action-executor.mjs.map +0 -1
  193. package/dist/es/ai-model/common.mjs.map +0 -1
  194. package/dist/es/insight/index.mjs.map +0 -1
  195. package/dist/es/insight/utils.mjs.map +0 -1
  196. package/dist/lib/ai-model/action-executor.js +0 -163
  197. package/dist/lib/ai-model/action-executor.js.map +0 -1
  198. package/dist/lib/ai-model/common.js.map +0 -1
  199. package/dist/lib/insight/index.js.map +0 -1
  200. package/dist/lib/insight/utils.js.map +0 -1
  201. package/dist/types/ai-model/action-executor.d.ts +0 -19
  202. package/dist/types/insight/index.d.ts +0 -31
  203. package/dist/types/insight/utils.d.ts +0 -2
@@ -1,11 +1,13 @@
1
- import { ConversationHistory, findAllMidsceneLocatorField, parseActionParam, plan as index_mjs_plan, uiTarsPlanning } from "../ai-model/index.mjs";
2
- import { Executor } from "../ai-model/action-executor.mjs";
3
- import { sleep as external_utils_mjs_sleep } from "../utils.mjs";
4
- import { MIDSCENE_REPLANNING_CYCLE_LIMIT, globalConfigManager } from "@midscene/shared/env";
1
+ import { ConversationHistory, plan, uiTarsPlanning } from "../ai-model/index.mjs";
2
+ import { TaskExecutionError } from "../task-runner.mjs";
3
+ import { ServiceError } from "../types.mjs";
5
4
  import { getDebug } from "@midscene/shared/logger";
6
5
  import { assert } from "@midscene/shared/utils";
6
+ import { ExecutionSession } from "./execution-session.mjs";
7
+ import { TaskBuilder, locatePlanForLocate } from "./task-builder.mjs";
8
+ import { descriptionOfTree } from "@midscene/shared/extractor";
7
9
  import { taskTitleStr } from "./ui-utils.mjs";
8
- import { matchElementFromCache, matchElementFromPlan, parsePrompt } from "./utils.mjs";
10
+ import { parsePrompt } from "./utils.mjs";
9
11
  function _define_property(obj, key, value) {
10
12
  if (key in obj) Object.defineProperty(obj, key, {
11
13
  value: value,
@@ -17,337 +19,35 @@ function _define_property(obj, key, value) {
17
19
  return obj;
18
20
  }
19
21
  const debug = getDebug('device-task-executor');
20
- const defaultReplanningCycleLimit = 10;
21
- const defaultVlmUiTarsReplanningCycleLimit = 40;
22
- function locatePlanForLocate(param) {
23
- const locate = 'string' == typeof param ? {
24
- prompt: param
25
- } : param;
26
- const locatePlan = {
27
- type: 'Locate',
28
- locate,
29
- param: locate,
30
- thought: ''
31
- };
32
- return locatePlan;
33
- }
22
+ const maxErrorCountAllowedInOnePlanningLoop = 5;
34
23
  class TaskExecutor {
35
24
  get page() {
36
25
  return this.interface;
37
26
  }
38
- async recordScreenshot(timing) {
39
- const base64 = await this.interface.screenshotBase64();
40
- const item = {
41
- type: 'screenshot',
42
- ts: Date.now(),
43
- screenshot: base64,
44
- timing
45
- };
46
- return item;
47
- }
48
- prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
49
- const taskWithScreenshot = {
50
- ...taskApply,
51
- executor: async (param, context, ...args)=>{
52
- const recorder = [];
53
- const { task } = context;
54
- task.recorder = recorder;
55
- const shot = await this.recordScreenshot(`before ${task.type}`);
56
- recorder.push(shot);
57
- const result = await taskApply.executor(param, context, ...args);
58
- if (appendAfterExecution) {
59
- const shot2 = await this.recordScreenshot('after Action');
60
- recorder.push(shot2);
61
- }
62
- return result;
63
- }
64
- };
65
- return taskWithScreenshot;
66
- }
67
- async convertPlanToExecutable(plans, modelConfig, cacheable) {
68
- const tasks = [];
69
- const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
70
- if ('string' == typeof detailedLocateParam) detailedLocateParam = {
71
- prompt: detailedLocateParam
72
- };
73
- if (void 0 !== cacheable) detailedLocateParam = {
74
- ...detailedLocateParam,
75
- cacheable
76
- };
77
- const taskFind = {
78
- type: 'Insight',
79
- subType: 'Locate',
80
- param: detailedLocateParam,
81
- thought: plan.thought,
82
- executor: async (param, taskContext)=>{
83
- var _this_taskCache, _locateCacheRecord_cacheContent;
84
- const { task } = taskContext;
85
- assert((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
86
- let insightDump;
87
- let usage;
88
- const dumpCollector = (dump)=>{
89
- var _dump_taskInfo, _dump_taskInfo1;
90
- insightDump = dump;
91
- usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
92
- task.log = {
93
- dump: insightDump
94
- };
95
- task.usage = usage;
96
- if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
97
- };
98
- this.insight.onceDumpUpdatedFn = dumpCollector;
99
- const shotTime = Date.now();
100
- const uiContext = await this.insight.contextRetrieverFn('locate');
101
- task.uiContext = uiContext;
102
- const recordItem = {
103
- type: 'screenshot',
104
- ts: shotTime,
105
- screenshot: uiContext.screenshotBase64,
106
- timing: 'before Insight'
107
- };
108
- task.recorder = [
109
- recordItem
110
- ];
111
- const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
112
- const userExpectedPathHitFlag = !!elementFromXpath;
113
- const cachePrompt = param.prompt;
114
- const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
115
- const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
116
- const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(this, cacheEntry, cachePrompt, param.cacheable);
117
- const cacheHitFlag = !!elementFromCache;
118
- const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : matchElementFromPlan(param, uiContext.tree);
119
- const planHitFlag = !!elementFromPlan;
120
- const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
121
- context: uiContext
122
- }, modelConfig)).element;
123
- const aiLocateHitFlag = !!elementFromAiLocate;
124
- const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
125
- let currentCacheEntry;
126
- if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
127
- const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
128
- _orderSensitive: element.isOrderSensitive
129
- } : void 0);
130
- if (feature && Object.keys(feature).length > 0) {
131
- debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
132
- currentCacheEntry = feature;
133
- this.taskCache.updateOrAppendCacheRecord({
134
- type: 'locate',
135
- prompt: cachePrompt,
136
- cache: feature
137
- }, locateCacheRecord);
138
- } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
139
- } catch (error) {
140
- debug('cacheFeatureForRect failed: %s', error);
141
- }
142
- else debug('cacheFeatureForRect is not supported, skip cache update');
143
- if (!element) throw new Error(`Element not found: ${param.prompt}`);
144
- let hitBy;
145
- if (userExpectedPathHitFlag) hitBy = {
146
- from: 'User expected path',
147
- context: {
148
- xpath: param.xpath
149
- }
150
- };
151
- else if (cacheHitFlag) hitBy = {
152
- from: 'Cache',
153
- context: {
154
- cacheEntry,
155
- cacheToSave: currentCacheEntry
156
- }
157
- };
158
- else if (planHitFlag) hitBy = {
159
- from: 'Planning',
160
- context: {
161
- id: null == elementFromPlan ? void 0 : elementFromPlan.id,
162
- bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
163
- }
164
- };
165
- else if (aiLocateHitFlag) hitBy = {
166
- from: 'AI model',
167
- context: {
168
- prompt: param.prompt
169
- }
170
- };
171
- null == onResult || onResult(element);
172
- return {
173
- output: {
174
- element
175
- },
176
- uiContext,
177
- hitBy
178
- };
179
- }
180
- };
181
- return taskFind;
182
- };
183
- for (const plan of plans)if ('Locate' === plan.type) {
184
- var _plan_locate, _plan_locate1;
185
- if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
186
- debug('Locate action with id is null, will be ignored', plan);
187
- continue;
188
- }
189
- const taskLocate = taskForLocatePlan(plan, plan.locate);
190
- tasks.push(taskLocate);
191
- } else if ('Error' === plan.type) {
192
- var _plan_param;
193
- const taskActionError = {
194
- type: 'Action',
195
- subType: 'Error',
196
- param: plan.param,
197
- thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
198
- locate: plan.locate,
199
- executor: async ()=>{
200
- var _plan_param;
201
- throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
202
- }
203
- };
204
- tasks.push(taskActionError);
205
- } else if ('Finished' === plan.type) {
206
- const taskActionFinished = {
207
- type: 'Action',
208
- subType: 'Finished',
209
- param: null,
210
- thought: plan.thought,
211
- locate: plan.locate,
212
- executor: async (param)=>{}
213
- };
214
- tasks.push(taskActionFinished);
215
- } else if ('Sleep' === plan.type) {
216
- const taskActionSleep = {
217
- type: 'Action',
218
- subType: 'Sleep',
219
- param: plan.param,
220
- thought: plan.thought,
221
- locate: plan.locate,
222
- executor: async (taskParam)=>{
223
- await external_utils_mjs_sleep((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
224
- }
225
- };
226
- tasks.push(taskActionSleep);
227
- } else {
228
- const planType = plan.type;
229
- const actionSpace = await this.interface.actionSpace();
230
- const action = actionSpace.find((action)=>action.name === planType);
231
- const param = plan.param;
232
- if (!action) throw new Error(`Action type '${planType}' not found`);
233
- const locateFields = action ? findAllMidsceneLocatorField(action.paramSchema) : [];
234
- const requiredLocateFields = action ? findAllMidsceneLocatorField(action.paramSchema, true) : [];
235
- locateFields.forEach((field)=>{
236
- if (param[field]) {
237
- const locatePlan = locatePlanForLocate(param[field]);
238
- debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
239
- const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
240
- param[field] = result;
241
- });
242
- tasks.push(locateTask);
243
- } else {
244
- assert(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
245
- debug(`field '${field}' is not provided for action ${planType}`);
246
- }
247
- });
248
- const task = {
249
- type: 'Action',
250
- subType: planType,
251
- thought: plan.thought,
252
- param: plan.param,
253
- executor: async (param, context)=>{
254
- var _context_element;
255
- debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
256
- const uiContext = await this.insight.contextRetrieverFn('locate');
257
- context.task.uiContext = uiContext;
258
- requiredLocateFields.forEach((field)=>{
259
- assert(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
260
- });
261
- try {
262
- await Promise.all([
263
- (async ()=>{
264
- if (this.interface.beforeInvokeAction) {
265
- debug('will call "beforeInvokeAction" for interface');
266
- await this.interface.beforeInvokeAction(action.name, param);
267
- debug('called "beforeInvokeAction" for interface');
268
- }
269
- })(),
270
- external_utils_mjs_sleep(200)
271
- ]);
272
- } catch (originalError) {
273
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
274
- throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
275
- cause: originalError
276
- });
277
- }
278
- if (action.paramSchema) try {
279
- param = parseActionParam(param, action.paramSchema);
280
- } catch (error) {
281
- throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
282
- cause: error
283
- });
284
- }
285
- debug('calling action', action.name);
286
- const actionFn = action.call.bind(this.interface);
287
- await actionFn(param, context);
288
- debug('called action', action.name);
289
- await external_utils_mjs_sleep(300);
290
- try {
291
- if (this.interface.afterInvokeAction) {
292
- debug('will call "afterInvokeAction" for interface');
293
- await this.interface.afterInvokeAction(action.name, param);
294
- debug('called "afterInvokeAction" for interface');
295
- }
296
- } catch (originalError) {
297
- const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
298
- throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
299
- cause: originalError
300
- });
301
- }
302
- return {
303
- output: {
304
- success: true,
305
- action: planType,
306
- param: param
307
- }
308
- };
309
- }
310
- };
311
- tasks.push(task);
312
- }
313
- const wrappedTasks = tasks.map((task, index)=>{
314
- if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
315
- return task;
27
+ createExecutionSession(title, options) {
28
+ return new ExecutionSession(title, ()=>Promise.resolve(this.service.contextRetrieverFn()), {
29
+ onTaskStart: this.onTaskStartCallback,
30
+ tasks: options?.tasks,
31
+ onTaskUpdate: this.hooks?.onTaskUpdate
316
32
  });
317
- return {
318
- tasks: wrappedTasks
319
- };
320
33
  }
321
- async setupPlanningContext(executorContext) {
322
- const shotTime = Date.now();
323
- const uiContext = await this.insight.contextRetrieverFn('locate');
324
- const recordItem = {
325
- type: 'screenshot',
326
- ts: shotTime,
327
- screenshot: uiContext.screenshotBase64,
328
- timing: 'before Planning'
329
- };
330
- executorContext.task.recorder = [
331
- recordItem
332
- ];
333
- executorContext.task.uiContext = uiContext;
334
- return {
335
- uiContext
336
- };
34
+ getActionSpace() {
35
+ return this.providedActionSpace;
36
+ }
37
+ async convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options) {
38
+ return this.taskBuilder.build(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options);
337
39
  }
338
40
  async loadYamlFlowAsPlanning(userInstruction, yamlString) {
339
- const taskExecutor = new Executor(taskTitleStr('Action', userInstruction), {
340
- onTaskStart: this.onTaskStartCallback
341
- });
41
+ const session = this.createExecutionSession(taskTitleStr('Action', userInstruction));
342
42
  const task = {
343
43
  type: 'Planning',
344
44
  subType: 'LoadYaml',
345
- locate: null,
346
45
  param: {
347
46
  userInstruction
348
47
  },
349
48
  executor: async (param, executorContext)=>{
350
- await this.setupPlanningContext(executorContext);
49
+ const { uiContext } = executorContext;
50
+ assert(uiContext, 'uiContext is required for Planning task');
351
51
  return {
352
52
  output: {
353
53
  actions: [],
@@ -367,140 +67,137 @@ class TaskExecutor {
367
67
  };
368
68
  }
369
69
  };
370
- await taskExecutor.append(task);
371
- await taskExecutor.flush();
70
+ const runner = session.getRunner();
71
+ await session.appendAndRun(task);
372
72
  return {
373
- executor: taskExecutor
73
+ runner
374
74
  };
375
75
  }
376
- createPlanningTask(userInstruction, actionContext, modelConfig) {
377
- const task = {
378
- type: 'Planning',
379
- subType: 'Plan',
380
- locate: null,
381
- param: {
382
- userInstruction
383
- },
384
- executor: async (param, executorContext)=>{
385
- const startTime = Date.now();
386
- const { uiContext } = await this.setupPlanningContext(executorContext);
387
- const { vlMode } = modelConfig;
388
- const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
389
- assert(this.interface.actionSpace, 'actionSpace for device is not implemented');
390
- const actionSpace = await this.interface.actionSpace();
391
- debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
392
- assert(Array.isArray(actionSpace), 'actionSpace must be an array');
393
- if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
394
- const planResult = await (uiTarsModelVersion ? uiTarsPlanning : index_mjs_plan)(param.userInstruction, {
395
- context: uiContext,
396
- actionContext,
397
- interfaceType: this.interface.interfaceType,
398
- actionSpace,
399
- modelConfig,
400
- conversationHistory: this.conversationHistory
401
- });
402
- debug('planResult', JSON.stringify(planResult, null, 2));
403
- const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
404
- executorContext.task.log = {
405
- ...executorContext.task.log || {},
406
- rawResponse
407
- };
408
- executorContext.task.usage = usage;
409
- const finalActions = actions || [];
410
- if (sleep) {
411
- const timeNow = Date.now();
412
- const timeRemaining = sleep - (timeNow - startTime);
413
- if (timeRemaining > 0) finalActions.push({
414
- type: 'Sleep',
415
- param: {
416
- timeMs: timeRemaining
417
- },
418
- locate: null
419
- });
420
- }
421
- if (0 === finalActions.length) assert(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
422
- return {
423
- output: {
424
- actions: finalActions,
425
- more_actions_needed_by_instruction,
426
- log,
427
- yamlFlow: planResult.yamlFlow
428
- },
429
- cache: {
430
- hit: false
431
- },
432
- uiContext
433
- };
434
- }
435
- };
436
- return task;
437
- }
438
- async runPlans(title, plans, modelConfig) {
439
- const taskExecutor = new Executor(title, {
440
- onTaskStart: this.onTaskStartCallback
441
- });
442
- const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
443
- await taskExecutor.append(tasks);
444
- const result = await taskExecutor.flush();
445
- const { output } = result;
76
+ async runPlans(title, plans, modelConfigForPlanning, modelConfigForDefaultIntent) {
77
+ const session = this.createExecutionSession(title);
78
+ const { tasks } = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent);
79
+ const runner = session.getRunner();
80
+ const result = await session.appendAndRun(tasks);
81
+ const { output } = result ?? {};
446
82
  return {
447
83
  output,
448
- executor: taskExecutor
84
+ runner
449
85
  };
450
86
  }
451
- getReplanningCycleLimit(isVlmUiTars) {
452
- return this.replanningCycleLimit || globalConfigManager.getEnvConfigInNumber(MIDSCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
453
- }
454
- async action(userPrompt, modelConfig, actionContext, cacheable) {
87
+ async action(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount) {
455
88
  this.conversationHistory.reset();
456
- const taskExecutor = new Executor(taskTitleStr('Action', userPrompt), {
457
- onTaskStart: this.onTaskStartCallback
458
- });
89
+ const session = this.createExecutionSession(taskTitleStr('Action', userPrompt));
90
+ const runner = session.getRunner();
459
91
  let replanCount = 0;
460
92
  const yamlFlow = [];
461
- const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
93
+ const replanningCycleLimit = replanningCycleLimitOverride ?? this.replanningCycleLimit;
94
+ assert(void 0 !== replanningCycleLimit, 'replanningCycleLimit is required for TaskExecutor.action');
95
+ let errorCountInOnePlanningLoop = 0;
462
96
  while(true){
463
- if (replanCount > replanningCycleLimit) {
464
- const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
465
- return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
466
- }
467
- const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
468
- await taskExecutor.append(planningTask);
469
- const result = await taskExecutor.flush();
470
- const planResult = null == result ? void 0 : result.output;
471
- if (taskExecutor.isInErrorState()) return {
472
- output: planResult,
473
- executor: taskExecutor
474
- };
475
- const plans = planResult.actions || [];
476
- yamlFlow.push(...planResult.yamlFlow || []);
97
+ const result = await session.appendAndRun({
98
+ type: 'Planning',
99
+ subType: 'Plan',
100
+ param: {
101
+ userInstruction: userPrompt,
102
+ aiActContext,
103
+ imagesIncludeCount
104
+ },
105
+ executor: async (param, executorContext)=>{
106
+ const startTime = Date.now();
107
+ const { uiContext } = executorContext;
108
+ assert(uiContext, 'uiContext is required for Planning task');
109
+ const { vlMode } = modelConfigForPlanning;
110
+ const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfigForPlanning.uiTarsModelVersion : void 0;
111
+ const actionSpace = this.getActionSpace();
112
+ debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
113
+ assert(Array.isArray(actionSpace), 'actionSpace must be an array');
114
+ if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
115
+ const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(param.userInstruction, {
116
+ context: uiContext,
117
+ actionContext: param.aiActContext,
118
+ interfaceType: this.interface.interfaceType,
119
+ actionSpace,
120
+ modelConfig: modelConfigForPlanning,
121
+ conversationHistory: this.conversationHistory,
122
+ includeBbox: includeBboxInPlanning,
123
+ imagesIncludeCount
124
+ });
125
+ debug('planResult', JSON.stringify(planResult, null, 2));
126
+ const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
127
+ executorContext.task.log = {
128
+ ...executorContext.task.log || {},
129
+ rawResponse
130
+ };
131
+ executorContext.task.usage = usage;
132
+ executorContext.task.output = {
133
+ actions: actions || [],
134
+ more_actions_needed_by_instruction,
135
+ log,
136
+ yamlFlow: planResult.yamlFlow
137
+ };
138
+ executorContext.uiContext = uiContext;
139
+ const finalActions = [
140
+ ...actions || []
141
+ ];
142
+ if (sleep) {
143
+ const timeNow = Date.now();
144
+ const timeRemaining = sleep - (timeNow - startTime);
145
+ if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
146
+ }
147
+ assert(!error, `Failed to continue: ${error}\n${log || ''}`);
148
+ return {
149
+ cache: {
150
+ hit: false
151
+ }
152
+ };
153
+ }
154
+ }, {
155
+ allowWhenError: true
156
+ });
157
+ const planResult = result?.output;
158
+ const plans = planResult?.actions || [];
159
+ yamlFlow.push(...planResult?.yamlFlow || []);
477
160
  let executables;
478
161
  try {
479
- executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
480
- taskExecutor.append(executables.tasks);
162
+ executables = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, {
163
+ cacheable,
164
+ subTask: true
165
+ });
481
166
  } catch (error) {
482
- return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
167
+ return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
483
168
  }
484
- await taskExecutor.flush();
485
- if (taskExecutor.isInErrorState()) return {
486
- output: void 0,
487
- executor: taskExecutor
488
- };
489
- if (!planResult.more_actions_needed_by_instruction) break;
490
- replanCount++;
169
+ if (this.conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', this.conversationHistory.pendingFeedbackMessage);
170
+ let errorFlag = false;
171
+ try {
172
+ await session.appendAndRun(executables.tasks);
173
+ } catch (error) {
174
+ errorFlag = true;
175
+ errorCountInOnePlanningLoop++;
176
+ this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;
177
+ debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
178
+ }
179
+ if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
180
+ if (!planResult?.more_actions_needed_by_instruction) if (errorFlag) debug('more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run');
181
+ else break;
182
+ ++replanCount;
183
+ if (replanCount > replanningCycleLimit) {
184
+ const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;
185
+ return session.appendErrorPlan(errorMsg);
186
+ }
187
+ if (!this.conversationHistory.pendingFeedbackMessage) this.conversationHistory.pendingFeedbackMessage = 'I have finished the action previously planned.';
491
188
  }
492
- return {
189
+ const finalResult = {
493
190
  output: {
494
191
  yamlFlow
495
192
  },
496
- executor: taskExecutor
193
+ runner
497
194
  };
195
+ return finalResult;
498
196
  }
499
197
  createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
500
198
  const queryTask = {
501
199
  type: 'Insight',
502
200
  subType: type,
503
- locate: null,
504
201
  param: {
505
202
  dataDemand: multimodalPrompt ? {
506
203
  demand,
@@ -509,23 +206,15 @@ class TaskExecutor {
509
206
  },
510
207
  executor: async (param, taskContext)=>{
511
208
  const { task } = taskContext;
512
- let insightDump;
513
- const dumpCollector = (dump)=>{
514
- insightDump = dump;
515
- };
516
- this.insight.onceDumpUpdatedFn = dumpCollector;
517
- const shotTime = Date.now();
518
- const uiContext = await this.insight.contextRetrieverFn('extract');
519
- task.uiContext = uiContext;
520
- const recordItem = {
521
- type: 'screenshot',
522
- ts: shotTime,
523
- screenshot: uiContext.screenshotBase64,
524
- timing: 'before Extract'
209
+ let queryDump;
210
+ const applyDump = (dump)=>{
211
+ queryDump = dump;
212
+ task.log = {
213
+ dump
214
+ };
525
215
  };
526
- task.recorder = [
527
- recordItem
528
- ];
216
+ const uiContext = taskContext.uiContext;
217
+ assert(uiContext, 'uiContext is required for Query task');
529
218
  const ifTypeRestricted = 'Query' !== type;
530
219
  let demandInput = demand;
531
220
  let keyOfResult = 'result';
@@ -538,13 +227,27 @@ class TaskExecutor {
538
227
  } else if (ifTypeRestricted) demandInput = {
539
228
  [keyOfResult]: `${type}, ${demand}`
540
229
  };
541
- const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
230
+ let extractResult;
231
+ let extraPageDescription = '';
232
+ if (opt?.domIncluded && this.interface.getElementsNodeTree) {
233
+ debug('appending tree info for page');
234
+ const tree = await this.interface.getElementsNodeTree();
235
+ extraPageDescription = await descriptionOfTree(tree, 200, false, opt?.domIncluded === 'visible-only');
236
+ }
237
+ try {
238
+ extractResult = await this.service.extract(demandInput, modelConfig, opt, extraPageDescription, multimodalPrompt);
239
+ } catch (error) {
240
+ if (error instanceof ServiceError) applyDump(error.dump);
241
+ throw error;
242
+ }
243
+ const { data, usage, thought, dump } = extractResult;
244
+ applyDump(dump);
542
245
  let outputResult = data;
543
246
  if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
544
247
  else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
545
248
  else if (null == data) outputResult = null;
546
249
  else {
547
- assert((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
250
+ assert(data?.[keyOfResult] !== void 0, 'No result in query data');
548
251
  outputResult = data[keyOfResult];
549
252
  }
550
253
  if ('Assert' === type && !outputResult) {
@@ -554,7 +257,7 @@ class TaskExecutor {
554
257
  }
555
258
  return {
556
259
  output: outputResult,
557
- log: insightDump,
260
+ log: queryDump,
558
261
  usage,
559
262
  thought
560
263
  };
@@ -563,101 +266,91 @@ class TaskExecutor {
563
266
  return queryTask;
564
267
  }
565
268
  async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
566
- const taskExecutor = new Executor(taskTitleStr(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
567
- onTaskStart: this.onTaskStartCallback
568
- });
269
+ const session = this.createExecutionSession(taskTitleStr(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
569
270
  const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
570
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
571
- const result = await taskExecutor.flush();
271
+ const runner = session.getRunner();
272
+ const result = await session.appendAndRun(queryTask);
572
273
  if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
573
274
  const { output, thought } = result;
574
275
  return {
575
276
  output,
576
277
  thought,
577
- executor: taskExecutor
278
+ runner
578
279
  };
579
280
  }
580
- async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
581
- const errorPlan = {
582
- type: 'Error',
583
- param: {
584
- thought: errorMsg
585
- },
586
- locate: null
587
- };
588
- const { tasks } = await this.convertPlanToExecutable([
589
- errorPlan
590
- ], modelConfig);
591
- await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
592
- await taskExecutor.flush();
281
+ sleepPlan(timeMs) {
593
282
  return {
594
- output: void 0,
595
- executor: taskExecutor
596
- };
597
- }
598
- async taskForSleep(timeMs, modelConfig) {
599
- const sleepPlan = {
600
283
  type: 'Sleep',
601
284
  param: {
602
285
  timeMs
603
- },
604
- locate: null
286
+ }
605
287
  };
606
- const { tasks: sleepTasks } = await this.convertPlanToExecutable([
607
- sleepPlan
608
- ], modelConfig);
609
- return this.prependExecutorWithScreenshot(sleepTasks[0]);
288
+ }
289
+ async taskForSleep(timeMs, _modelConfig) {
290
+ return this.taskBuilder.createSleepTask({
291
+ timeMs
292
+ });
610
293
  }
611
294
  async waitFor(assertion, opt, modelConfig) {
612
295
  const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
613
296
  const description = `waitFor: ${textPrompt}`;
614
- const taskExecutor = new Executor(taskTitleStr('WaitFor', description), {
615
- onTaskStart: this.onTaskStartCallback
616
- });
297
+ const session = this.createExecutionSession(taskTitleStr('WaitFor', description));
298
+ const runner = session.getRunner();
617
299
  const { timeoutMs, checkIntervalMs } = opt;
618
300
  assert(assertion, 'No assertion for waitFor');
619
301
  assert(timeoutMs, 'No timeoutMs for waitFor');
620
302
  assert(checkIntervalMs, 'No checkIntervalMs for waitFor');
621
303
  assert(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
622
304
  const overallStartTime = Date.now();
623
- let startTime = Date.now();
305
+ let lastCheckStart = overallStartTime;
624
306
  let errorThought = '';
625
- while(Date.now() - overallStartTime < timeoutMs){
626
- startTime = Date.now();
627
- const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
628
- doNotThrowError: true
629
- }, multimodalPrompt);
630
- await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
631
- const result = await taskExecutor.flush();
632
- if (null == result ? void 0 : result.output) return {
307
+ while(lastCheckStart - overallStartTime <= timeoutMs){
308
+ const currentCheckStart = Date.now();
309
+ lastCheckStart = currentCheckStart;
310
+ const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, void 0, multimodalPrompt);
311
+ const result = await session.appendAndRun(queryTask);
312
+ if (result?.output) return {
633
313
  output: void 0,
634
- executor: taskExecutor
314
+ runner
635
315
  };
636
- errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
316
+ errorThought = result?.thought || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
637
317
  const now = Date.now();
638
- if (now - startTime < checkIntervalMs) {
639
- const timeRemaining = checkIntervalMs - (now - startTime);
640
- const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
641
- await taskExecutor.append(sleepTask);
318
+ if (now - currentCheckStart < checkIntervalMs) {
319
+ const timeRemaining = checkIntervalMs - (now - currentCheckStart);
320
+ const sleepTask = this.taskBuilder.createSleepTask({
321
+ timeMs: timeRemaining
322
+ });
323
+ await session.append(sleepTask);
642
324
  }
643
325
  }
644
- return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
326
+ return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
645
327
  }
646
- constructor(interfaceInstance, insight, opts){
328
+ constructor(interfaceInstance, service, opts){
647
329
  _define_property(this, "interface", void 0);
648
- _define_property(this, "insight", void 0);
330
+ _define_property(this, "service", void 0);
649
331
  _define_property(this, "taskCache", void 0);
332
+ _define_property(this, "providedActionSpace", void 0);
333
+ _define_property(this, "taskBuilder", void 0);
650
334
  _define_property(this, "conversationHistory", void 0);
651
335
  _define_property(this, "onTaskStartCallback", void 0);
336
+ _define_property(this, "hooks", void 0);
652
337
  _define_property(this, "replanningCycleLimit", void 0);
653
338
  this.interface = interfaceInstance;
654
- this.insight = insight;
339
+ this.service = service;
655
340
  this.taskCache = opts.taskCache;
656
- this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
341
+ this.onTaskStartCallback = opts?.onTaskStart;
657
342
  this.replanningCycleLimit = opts.replanningCycleLimit;
343
+ this.hooks = opts.hooks;
658
344
  this.conversationHistory = new ConversationHistory();
345
+ this.providedActionSpace = opts.actionSpace;
346
+ this.taskBuilder = new TaskBuilder({
347
+ interfaceInstance,
348
+ service,
349
+ taskCache: opts.taskCache,
350
+ actionSpace: this.getActionSpace()
351
+ });
659
352
  }
660
353
  }
661
- export { TaskExecutor, locatePlanForLocate };
354
+ export { TaskExecutionError, TaskExecutor, locatePlanForLocate };
662
355
 
663
356
  //# sourceMappingURL=tasks.mjs.map