@godscene/core 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +767 -0
  4. package/dist/es/agent/common.mjs +0 -0
  5. package/dist/es/agent/execution-session.mjs +39 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-builder.mjs +343 -0
  8. package/dist/es/agent/task-cache.mjs +212 -0
  9. package/dist/es/agent/tasks.mjs +428 -0
  10. package/dist/es/agent/ui-utils.mjs +101 -0
  11. package/dist/es/agent/utils.mjs +167 -0
  12. package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
  13. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  14. package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
  15. package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
  16. package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
  17. package/dist/es/ai-model/auto-glm/util.mjs +7 -0
  18. package/dist/es/ai-model/connectivity.mjs +136 -0
  19. package/dist/es/ai-model/conversation-history.mjs +193 -0
  20. package/dist/es/ai-model/index.mjs +12 -0
  21. package/dist/es/ai-model/inspect.mjs +395 -0
  22. package/dist/es/ai-model/llm-planning.mjs +231 -0
  23. package/dist/es/ai-model/prompt/common.mjs +5 -0
  24. package/dist/es/ai-model/prompt/describe.mjs +64 -0
  25. package/dist/es/ai-model/prompt/extraction.mjs +129 -0
  26. package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
  27. package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
  28. package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
  29. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
  30. package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
  31. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
  32. package/dist/es/ai-model/prompt/util.mjs +57 -0
  33. package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
  34. package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
  35. package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
  36. package/dist/es/ai-model/service-caller/index.mjs +648 -0
  37. package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
  38. package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
  39. package/dist/es/common.mjs +382 -0
  40. package/dist/es/device/device-options.mjs +0 -0
  41. package/dist/es/device/index.mjs +340 -0
  42. package/dist/es/dump/html-utils.mjs +290 -0
  43. package/dist/es/dump/index.mjs +3 -0
  44. package/dist/es/dump/screenshot-restoration.mjs +30 -0
  45. package/dist/es/dump/screenshot-store.mjs +125 -0
  46. package/dist/es/index.mjs +17 -0
  47. package/dist/es/report-cli.mjs +149 -0
  48. package/dist/es/report-generator.mjs +203 -0
  49. package/dist/es/report-markdown.mjs +216 -0
  50. package/dist/es/report.mjs +287 -0
  51. package/dist/es/screenshot-item.mjs +120 -0
  52. package/dist/es/service/index.mjs +272 -0
  53. package/dist/es/service/utils.mjs +13 -0
  54. package/dist/es/skill/index.mjs +35 -0
  55. package/dist/es/task-runner.mjs +261 -0
  56. package/dist/es/task-timing.mjs +10 -0
  57. package/dist/es/tree.mjs +11 -0
  58. package/dist/es/types.mjs +202 -0
  59. package/dist/es/utils.mjs +232 -0
  60. package/dist/es/yaml/builder.mjs +11 -0
  61. package/dist/es/yaml/index.mjs +4 -0
  62. package/dist/es/yaml/player.mjs +425 -0
  63. package/dist/es/yaml/utils.mjs +100 -0
  64. package/dist/es/yaml.mjs +0 -0
  65. package/dist/lib/agent/agent.js +815 -0
  66. package/dist/lib/agent/common.js +5 -0
  67. package/dist/lib/agent/execution-session.js +73 -0
  68. package/dist/lib/agent/index.js +76 -0
  69. package/dist/lib/agent/task-builder.js +380 -0
  70. package/dist/lib/agent/task-cache.js +264 -0
  71. package/dist/lib/agent/tasks.js +471 -0
  72. package/dist/lib/agent/ui-utils.js +153 -0
  73. package/dist/lib/agent/utils.js +238 -0
  74. package/dist/lib/ai-model/auto-glm/actions.js +271 -0
  75. package/dist/lib/ai-model/auto-glm/index.js +64 -0
  76. package/dist/lib/ai-model/auto-glm/parser.js +280 -0
  77. package/dist/lib/ai-model/auto-glm/planning.js +103 -0
  78. package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
  79. package/dist/lib/ai-model/auto-glm/util.js +44 -0
  80. package/dist/lib/ai-model/connectivity.js +180 -0
  81. package/dist/lib/ai-model/conversation-history.js +227 -0
  82. package/dist/lib/ai-model/index.js +127 -0
  83. package/dist/lib/ai-model/inspect.js +441 -0
  84. package/dist/lib/ai-model/llm-planning.js +268 -0
  85. package/dist/lib/ai-model/prompt/common.js +39 -0
  86. package/dist/lib/ai-model/prompt/describe.js +98 -0
  87. package/dist/lib/ai-model/prompt/extraction.js +169 -0
  88. package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
  89. package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
  90. package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
  91. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
  92. package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
  93. package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
  94. package/dist/lib/ai-model/prompt/util.js +103 -0
  95. package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
  96. package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
  97. package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
  98. package/dist/lib/ai-model/service-caller/index.js +716 -0
  99. package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
  100. package/dist/lib/ai-model/ui-tars-planning.js +281 -0
  101. package/dist/lib/common.js +491 -0
  102. package/dist/lib/device/device-options.js +18 -0
  103. package/dist/lib/device/index.js +467 -0
  104. package/dist/lib/dump/html-utils.js +366 -0
  105. package/dist/lib/dump/index.js +58 -0
  106. package/dist/lib/dump/screenshot-restoration.js +64 -0
  107. package/dist/lib/dump/screenshot-store.js +165 -0
  108. package/dist/lib/index.js +184 -0
  109. package/dist/lib/report-cli.js +189 -0
  110. package/dist/lib/report-generator.js +244 -0
  111. package/dist/lib/report-markdown.js +253 -0
  112. package/dist/lib/report.js +333 -0
  113. package/dist/lib/screenshot-item.js +154 -0
  114. package/dist/lib/service/index.js +306 -0
  115. package/dist/lib/service/utils.js +47 -0
  116. package/dist/lib/skill/index.js +69 -0
  117. package/dist/lib/task-runner.js +298 -0
  118. package/dist/lib/task-timing.js +44 -0
  119. package/dist/lib/tree.js +51 -0
  120. package/dist/lib/types.js +298 -0
  121. package/dist/lib/utils.js +314 -0
  122. package/dist/lib/yaml/builder.js +55 -0
  123. package/dist/lib/yaml/index.js +79 -0
  124. package/dist/lib/yaml/player.js +459 -0
  125. package/dist/lib/yaml/utils.js +153 -0
  126. package/dist/lib/yaml.js +18 -0
  127. package/dist/types/agent/agent.d.ts +220 -0
  128. package/dist/types/agent/common.d.ts +0 -0
  129. package/dist/types/agent/execution-session.d.ts +36 -0
  130. package/dist/types/agent/index.d.ts +9 -0
  131. package/dist/types/agent/task-builder.d.ts +34 -0
  132. package/dist/types/agent/task-cache.d.ts +49 -0
  133. package/dist/types/agent/tasks.d.ts +70 -0
  134. package/dist/types/agent/ui-utils.d.ts +14 -0
  135. package/dist/types/agent/utils.d.ts +25 -0
  136. package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
  137. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  138. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  139. package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
  140. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  141. package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
  142. package/dist/types/ai-model/connectivity.d.ts +20 -0
  143. package/dist/types/ai-model/conversation-history.d.ts +105 -0
  144. package/dist/types/ai-model/index.d.ts +16 -0
  145. package/dist/types/ai-model/inspect.d.ts +67 -0
  146. package/dist/types/ai-model/llm-planning.d.ts +19 -0
  147. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  148. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  149. package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
  150. package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
  151. package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
  152. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
  153. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  154. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  155. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  156. package/dist/types/ai-model/prompt/util.d.ts +33 -0
  157. package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
  158. package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
  159. package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
  160. package/dist/types/ai-model/service-caller/index.d.ts +60 -0
  161. package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
  162. package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
  163. package/dist/types/common.d.ts +288 -0
  164. package/dist/types/device/device-options.d.ts +155 -0
  165. package/dist/types/device/index.d.ts +2565 -0
  166. package/dist/types/dump/html-utils.d.ts +75 -0
  167. package/dist/types/dump/index.d.ts +5 -0
  168. package/dist/types/dump/screenshot-restoration.d.ts +8 -0
  169. package/dist/types/dump/screenshot-store.d.ts +49 -0
  170. package/dist/types/index.d.ts +21 -0
  171. package/dist/types/report-cli.d.ts +36 -0
  172. package/dist/types/report-generator.d.ts +88 -0
  173. package/dist/types/report-markdown.d.ts +24 -0
  174. package/dist/types/report.d.ts +52 -0
  175. package/dist/types/screenshot-item.d.ts +67 -0
  176. package/dist/types/service/index.d.ts +24 -0
  177. package/dist/types/service/utils.d.ts +2 -0
  178. package/dist/types/skill/index.d.ts +25 -0
  179. package/dist/types/task-runner.d.ts +50 -0
  180. package/dist/types/task-timing.d.ts +8 -0
  181. package/dist/types/tree.d.ts +4 -0
  182. package/dist/types/types.d.ts +684 -0
  183. package/dist/types/utils.d.ts +45 -0
  184. package/dist/types/yaml/builder.d.ts +2 -0
  185. package/dist/types/yaml/index.d.ts +4 -0
  186. package/dist/types/yaml/player.d.ts +34 -0
  187. package/dist/types/yaml/utils.d.ts +9 -0
  188. package/dist/types/yaml.d.ts +215 -0
  189. package/package.json +130 -0
@@ -0,0 +1,428 @@
1
+ import { AIResponseParseError, ConversationHistory, autoGLMPlanning, plan, uiTarsPlanning } from "../ai-model/index.mjs";
2
+ import { isAutoGLM, isUITars } from "../ai-model/auto-glm/util.mjs";
3
+ import { getReadableTimeString } from "../common.mjs";
4
+ import { TaskExecutionError } from "../task-runner.mjs";
5
+ import { ServiceError } from "../types.mjs";
6
+ import { getDebug } from "@godscene/shared/logger";
7
+ import { assert } from "@godscene/shared/utils";
8
+ import { ExecutionSession } from "./execution-session.mjs";
9
+ import { TaskBuilder, locatePlanForLocate } from "./task-builder.mjs";
10
+ import { setTimingFieldOnce } from "../task-timing.mjs";
11
+ import { descriptionOfTree } from "@godscene/shared/extractor";
12
+ import { taskTitleStr } from "./ui-utils.mjs";
13
+ import { parsePrompt } from "./utils.mjs";
14
+ function _define_property(obj, key, value) {
15
+ if (key in obj) Object.defineProperty(obj, key, {
16
+ value: value,
17
+ enumerable: true,
18
+ configurable: true,
19
+ writable: true
20
+ });
21
+ else obj[key] = value;
22
+ return obj;
23
+ }
24
+ const debug = getDebug('device-task-executor');
25
+ const warnLog = getDebug('device-task-executor', {
26
+ console: true
27
+ });
28
+ const maxErrorCountAllowedInOnePlanningLoop = 5;
29
+ class TaskExecutor {
30
+ get page() {
31
+ return this.interface;
32
+ }
33
+ createExecutionSession(title, options) {
34
+ return new ExecutionSession(title, ()=>Promise.resolve(this.service.contextRetrieverFn()), {
35
+ onTaskStart: this.onTaskStartCallback,
36
+ tasks: options?.tasks,
37
+ onTaskUpdate: this.hooks?.onTaskUpdate
38
+ });
39
+ }
40
+ getActionSpace() {
41
+ return this.providedActionSpace;
42
+ }
43
+ async getTimeString(format) {
44
+ if (this.useDeviceTime) if (this.interface.getDeviceLocalTimeString) try {
45
+ return await this.interface.getDeviceLocalTimeString(format);
46
+ } catch (error) {
47
+ warnLog(`Failed to get device time string, falling back to runtime time: ${error}`);
48
+ }
49
+ else warnLog('useDeviceTime is enabled but getDeviceLocalTimeString is not implemented, falling back to runtime time.');
50
+ return getReadableTimeString(format);
51
+ }
52
+ async convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options) {
53
+ return this.taskBuilder.build(plans, modelConfigForPlanning, modelConfigForDefaultIntent, options);
54
+ }
55
+ async loadYamlFlowAsPlanning(userInstruction, yamlString) {
56
+ const session = this.createExecutionSession(taskTitleStr('Act', userInstruction));
57
+ const task = {
58
+ type: 'Planning',
59
+ subType: 'LoadYaml',
60
+ param: {
61
+ userInstruction
62
+ },
63
+ executor: async (param, executorContext)=>{
64
+ const { uiContext } = executorContext;
65
+ assert(uiContext, 'uiContext is required for Planning task');
66
+ return {
67
+ output: {
68
+ actions: [],
69
+ shouldContinuePlanning: false,
70
+ log: '',
71
+ yamlString
72
+ },
73
+ cache: {
74
+ hit: true
75
+ },
76
+ hitBy: {
77
+ from: 'Cache',
78
+ context: {
79
+ yamlString
80
+ }
81
+ }
82
+ };
83
+ }
84
+ };
85
+ const runner = session.getRunner();
86
+ await session.appendAndRun(task);
87
+ return {
88
+ runner
89
+ };
90
+ }
91
+ async runPlans(title, plans, modelConfigForPlanning, modelConfigForDefaultIntent) {
92
+ const session = this.createExecutionSession(title);
93
+ const { tasks } = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent);
94
+ const runner = session.getRunner();
95
+ const result = await session.appendAndRun(tasks);
96
+ const { output } = result ?? {};
97
+ return {
98
+ output,
99
+ runner
100
+ };
101
+ }
102
+ async action(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount, deepThink, fileChooserAccept, deepLocate, abortSignal) {
103
+ return withFileChooser(this.interface, fileChooserAccept, async ()=>this.runAction(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount, deepThink, deepLocate, abortSignal));
104
+ }
105
+ async runAction(userPrompt, modelConfigForPlanning, modelConfigForDefaultIntent, includeBboxInPlanning, aiActContext, cacheable, replanningCycleLimitOverride, imagesIncludeCount, deepThink, deepLocate, abortSignal) {
106
+ const conversationHistory = new ConversationHistory();
107
+ const session = this.createExecutionSession(taskTitleStr('Act', userPrompt));
108
+ const runner = session.getRunner();
109
+ let replanCount = 0;
110
+ const yamlFlow = [];
111
+ const replanningCycleLimit = replanningCycleLimitOverride ?? this.replanningCycleLimit;
112
+ assert(void 0 !== replanningCycleLimit, 'replanningCycleLimit is required for TaskExecutor.action');
113
+ let errorCountInOnePlanningLoop = 0;
114
+ let outputString;
115
+ while(true){
116
+ if (abortSignal?.aborted) return session.appendErrorPlan(`Task aborted: ${abortSignal.reason || 'abort signal received'}`);
117
+ const subGoalStatus = conversationHistory.subGoalsToText() || void 0;
118
+ const memoriesStatus = conversationHistory.memoriesToText() || void 0;
119
+ const result = await session.appendAndRun({
120
+ type: 'Planning',
121
+ subType: 'Plan',
122
+ param: {
123
+ userInstruction: userPrompt,
124
+ aiActContext,
125
+ imagesIncludeCount,
126
+ deepThink,
127
+ ...subGoalStatus ? {
128
+ subGoalStatus
129
+ } : {},
130
+ ...memoriesStatus ? {
131
+ memoriesStatus
132
+ } : {}
133
+ },
134
+ executor: async (param, executorContext)=>{
135
+ const { uiContext } = executorContext;
136
+ assert(uiContext, 'uiContext is required for Planning task');
137
+ const { modelFamily } = modelConfigForPlanning;
138
+ const timing = executorContext.task.timing;
139
+ const actionSpace = this.getActionSpace();
140
+ debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
141
+ assert(Array.isArray(actionSpace), 'actionSpace must be an array');
142
+ if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
143
+ const planImpl = isUITars(modelFamily) ? uiTarsPlanning : isAutoGLM(modelFamily) ? autoGLMPlanning : plan;
144
+ let planResult;
145
+ try {
146
+ setTimingFieldOnce(timing, 'callAiStart');
147
+ planResult = await planImpl(param.userInstruction, {
148
+ context: uiContext,
149
+ actionContext: param.aiActContext,
150
+ interfaceType: this.interface.interfaceType,
151
+ actionSpace,
152
+ modelConfig: modelConfigForPlanning,
153
+ conversationHistory,
154
+ includeBbox: includeBboxInPlanning,
155
+ imagesIncludeCount,
156
+ deepThink,
157
+ abortSignal
158
+ });
159
+ } catch (planError) {
160
+ if (planError instanceof AIResponseParseError) {
161
+ executorContext.task.usage = planError.usage;
162
+ executorContext.task.log = {
163
+ ...executorContext.task.log || {},
164
+ rawResponse: planError.rawResponse
165
+ };
166
+ }
167
+ throw planError;
168
+ } finally{
169
+ setTimingFieldOnce(timing, 'callAiEnd');
170
+ }
171
+ debug('planResult', JSON.stringify(planResult, null, 2));
172
+ const { actions, thought, log, memory, error, usage, rawResponse, reasoning_content, finalizeSuccess, finalizeMessage, updateSubGoals, markFinishedIndexes } = planResult;
173
+ outputString = finalizeMessage;
174
+ executorContext.task.log = {
175
+ ...executorContext.task.log || {},
176
+ rawResponse
177
+ };
178
+ executorContext.task.usage = usage;
179
+ executorContext.task.reasoning_content = reasoning_content;
180
+ executorContext.task.output = {
181
+ actions: actions || [],
182
+ log,
183
+ thought,
184
+ memory,
185
+ yamlFlow: planResult.yamlFlow,
186
+ output: finalizeMessage,
187
+ shouldContinuePlanning: planResult.shouldContinuePlanning,
188
+ updateSubGoals,
189
+ markFinishedIndexes
190
+ };
191
+ executorContext.uiContext = uiContext;
192
+ assert(!error, `Failed to continue: ${error}\n${log || ''}`);
193
+ if (false === finalizeSuccess) assert(false, `Task failed: ${finalizeMessage || 'No error message provided'}\n${log || ''}`);
194
+ return {
195
+ cache: {
196
+ hit: false
197
+ }
198
+ };
199
+ }
200
+ }, {
201
+ allowWhenError: true
202
+ });
203
+ const planResult = result?.output;
204
+ const plans = planResult?.actions || [];
205
+ yamlFlow.push(...planResult?.yamlFlow || []);
206
+ let executables;
207
+ try {
208
+ executables = await this.convertPlanToExecutable(plans, modelConfigForPlanning, modelConfigForDefaultIntent, {
209
+ cacheable,
210
+ deepLocate,
211
+ abortSignal
212
+ });
213
+ } catch (error) {
214
+ return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
215
+ }
216
+ if (conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', conversationHistory.pendingFeedbackMessage);
217
+ const initialTimeString = await this.getTimeString();
218
+ conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;
219
+ try {
220
+ await session.appendAndRun(executables.tasks);
221
+ } catch (error) {
222
+ errorCountInOnePlanningLoop++;
223
+ const timeString = await this.getTimeString();
224
+ conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;
225
+ debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
226
+ }
227
+ if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
228
+ if (abortSignal?.aborted) return session.appendErrorPlan(`Task aborted: ${abortSignal.reason || 'abort signal received'}`);
229
+ if (!planResult?.shouldContinuePlanning) break;
230
+ ++replanCount;
231
+ if (replanCount > replanningCycleLimit) {
232
+ const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;
233
+ return session.appendErrorPlan(errorMsg);
234
+ }
235
+ if (!conversationHistory.pendingFeedbackMessage) {
236
+ const timeString = await this.getTimeString();
237
+ conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;
238
+ }
239
+ }
240
+ return {
241
+ output: {
242
+ yamlFlow,
243
+ output: outputString
244
+ },
245
+ runner
246
+ };
247
+ }
248
+ createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
249
+ const queryTask = {
250
+ type: 'Insight',
251
+ subType: type,
252
+ param: {
253
+ domIncluded: opt?.domIncluded,
254
+ dataDemand: multimodalPrompt ? {
255
+ demand,
256
+ multimodalPrompt
257
+ } : demand
258
+ },
259
+ executor: async (param, taskContext)=>{
260
+ const { task } = taskContext;
261
+ let queryDump;
262
+ const applyDump = (dump)=>{
263
+ queryDump = dump;
264
+ task.log = {
265
+ dump,
266
+ rawResponse: dump.taskInfo?.rawResponse
267
+ };
268
+ task.usage = dump.taskInfo?.usage;
269
+ if (dump.taskInfo?.reasoning_content) task.reasoning_content = dump.taskInfo.reasoning_content;
270
+ };
271
+ const uiContext = taskContext.uiContext;
272
+ assert(uiContext, 'uiContext is required for Query task');
273
+ const ifTypeRestricted = 'Query' !== type;
274
+ let demandInput = demand;
275
+ let keyOfResult = 'result';
276
+ if (ifTypeRestricted && ('Assert' === type || 'WaitFor' === type)) {
277
+ keyOfResult = 'StatementIsTruthy';
278
+ const booleanPrompt = 'Assert' === type ? `Boolean, whether the following statement is true: ${demand}` : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;
279
+ demandInput = {
280
+ [keyOfResult]: booleanPrompt
281
+ };
282
+ } else if (ifTypeRestricted) {
283
+ keyOfResult = type;
284
+ demandInput = {
285
+ [keyOfResult]: `${type}, ${demand}`
286
+ };
287
+ }
288
+ let extractResult;
289
+ let extraPageDescription = '';
290
+ if (opt?.domIncluded && this.interface.getElementsNodeTree) {
291
+ debug('appending tree info for page');
292
+ const tree = await this.interface.getElementsNodeTree();
293
+ extraPageDescription = await descriptionOfTree(tree, 200, false, opt?.domIncluded === 'visible-only');
294
+ }
295
+ try {
296
+ extractResult = await this.service.extract(demandInput, modelConfig, opt, extraPageDescription, multimodalPrompt, uiContext);
297
+ } catch (error) {
298
+ if (error instanceof ServiceError) applyDump(error.dump);
299
+ throw error;
300
+ }
301
+ const { data, thought, dump } = extractResult;
302
+ applyDump(dump);
303
+ let outputResult = data;
304
+ if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
305
+ else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
306
+ else if (null == data) outputResult = null;
307
+ else if (data?.[keyOfResult] !== void 0) outputResult = data[keyOfResult];
308
+ else if (data?.result !== void 0) outputResult = data.result;
309
+ else assert(false, 'No result in query data');
310
+ if ('Assert' === type && !outputResult) {
311
+ task.thought = thought;
312
+ throw new Error(`Assertion failed: ${thought}`);
313
+ }
314
+ return {
315
+ output: outputResult,
316
+ log: queryDump,
317
+ thought
318
+ };
319
+ }
320
+ };
321
+ return queryTask;
322
+ }
323
+ async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
324
+ const session = this.createExecutionSession(taskTitleStr(type, 'string' == typeof demand ? demand : JSON.stringify(demand)));
325
+ const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
326
+ const runner = session.getRunner();
327
+ const result = await session.appendAndRun(queryTask);
328
+ if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
329
+ const { output, thought } = result;
330
+ return {
331
+ output,
332
+ thought,
333
+ runner
334
+ };
335
+ }
336
+ async waitFor(assertion, opt, modelConfig) {
337
+ const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
338
+ const description = `waitFor: ${textPrompt}`;
339
+ const session = this.createExecutionSession(taskTitleStr('WaitFor', description));
340
+ const runner = session.getRunner();
341
+ const { timeoutMs, checkIntervalMs, domIncluded, screenshotIncluded, ...restOpt } = opt;
342
+ const serviceExtractOpt = {
343
+ domIncluded,
344
+ screenshotIncluded,
345
+ ...restOpt
346
+ };
347
+ assert(assertion, 'No assertion for waitFor');
348
+ assert(timeoutMs, 'No timeoutMs for waitFor');
349
+ assert(checkIntervalMs, 'No checkIntervalMs for waitFor');
350
+ assert(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
351
+ const overallStartTime = Date.now();
352
+ let lastCheckStart = overallStartTime;
353
+ let errorThought = '';
354
+ while(lastCheckStart - overallStartTime <= timeoutMs){
355
+ const currentCheckStart = Date.now();
356
+ lastCheckStart = currentCheckStart;
357
+ const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, serviceExtractOpt, multimodalPrompt);
358
+ const result = await session.appendAndRun(queryTask);
359
+ if (result?.output) return {
360
+ output: void 0,
361
+ runner
362
+ };
363
+ errorThought = result?.thought || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
364
+ const now = Date.now();
365
+ if (now - currentCheckStart < checkIntervalMs) {
366
+ const elapsed = now - currentCheckStart;
367
+ const timeRemaining = checkIntervalMs - elapsed;
368
+ const thought = `Check interval is ${checkIntervalMs}ms, ${elapsed}ms elapsed since last check, sleeping for ${timeRemaining}ms`;
369
+ const { tasks: sleepTasks } = await this.convertPlanToExecutable([
370
+ {
371
+ type: 'Sleep',
372
+ param: {
373
+ timeMs: timeRemaining
374
+ },
375
+ thought
376
+ }
377
+ ], modelConfig, modelConfig);
378
+ if (sleepTasks[0]) await session.appendAndRun(sleepTasks[0]);
379
+ }
380
+ }
381
+ return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);
382
+ }
383
+ constructor(interfaceInstance, service, opts){
384
+ _define_property(this, "interface", void 0);
385
+ _define_property(this, "service", void 0);
386
+ _define_property(this, "taskCache", void 0);
387
+ _define_property(this, "providedActionSpace", void 0);
388
+ _define_property(this, "taskBuilder", void 0);
389
+ _define_property(this, "onTaskStartCallback", void 0);
390
+ _define_property(this, "hooks", void 0);
391
+ _define_property(this, "replanningCycleLimit", void 0);
392
+ _define_property(this, "waitAfterAction", void 0);
393
+ _define_property(this, "useDeviceTime", void 0);
394
+ this.interface = interfaceInstance;
395
+ this.service = service;
396
+ this.taskCache = opts.taskCache;
397
+ this.onTaskStartCallback = opts?.onTaskStart;
398
+ this.replanningCycleLimit = opts.replanningCycleLimit;
399
+ this.waitAfterAction = opts.waitAfterAction;
400
+ this.useDeviceTime = opts.useDeviceTime;
401
+ this.hooks = opts.hooks;
402
+ this.providedActionSpace = opts.actionSpace;
403
+ this.taskBuilder = new TaskBuilder({
404
+ interfaceInstance,
405
+ service,
406
+ taskCache: opts.taskCache,
407
+ actionSpace: this.getActionSpace(),
408
+ waitAfterAction: opts.waitAfterAction
409
+ });
410
+ }
411
+ }
412
+ async function withFileChooser(interfaceInstance, fileChooserAccept, action) {
413
+ if (!fileChooserAccept?.length) return action();
414
+ if (!interfaceInstance.registerFileChooserListener) throw new Error(`File upload is not supported on ${interfaceInstance.interfaceType}`);
415
+ const handler = async (chooser)=>{
416
+ await chooser.accept(fileChooserAccept);
417
+ };
418
+ const { dispose, getError } = await interfaceInstance.registerFileChooserListener(handler);
419
+ try {
420
+ const result = await action();
421
+ const error = getError();
422
+ if (error) throw error;
423
+ return result;
424
+ } finally{
425
+ dispose();
426
+ }
427
+ }
428
+ export { TaskExecutionError, TaskExecutor, locatePlanForLocate, withFileChooser };
@@ -0,0 +1,101 @@
1
+ function typeStr(task) {
2
+ return task.subType || task.type;
3
+ }
4
+ function locateParamStr(locate) {
5
+ if (!locate) return '';
6
+ if ('string' == typeof locate) return locate;
7
+ if ('object' == typeof locate) {
8
+ if ('object' == typeof locate.prompt && null !== locate.prompt && locate.prompt.prompt) {
9
+ const prompt = locate.prompt.prompt;
10
+ return prompt;
11
+ }
12
+ if ('string' == typeof locate.prompt) return locate.prompt;
13
+ if ('string' == typeof locate.description) return locate.description;
14
+ }
15
+ return '';
16
+ }
17
+ function scrollParamStr(scrollParam) {
18
+ if (!scrollParam) return '';
19
+ return `${scrollParam.direction || 'down'}, ${scrollParam.scrollType || 'singleAction'}, ${scrollParam.distance || 'distance-not-set'}`;
20
+ }
21
+ function pullParamStr(pullParam) {
22
+ if (!pullParam) return '';
23
+ const parts = [];
24
+ parts.push(`direction: ${pullParam.direction || 'down'}`);
25
+ if (pullParam.distance) parts.push(`distance: ${pullParam.distance}`);
26
+ if (pullParam.duration) parts.push(`duration: ${pullParam.duration}ms`);
27
+ return parts.join(', ');
28
+ }
29
+ function extractInsightParam(taskParam) {
30
+ if (!taskParam) return {
31
+ content: ''
32
+ };
33
+ const extractImages = (source)=>source?.multimodalPrompt?.images && Array.isArray(source.multimodalPrompt.images) ? source.multimodalPrompt.images : void 0;
34
+ const toContent = (value)=>'string' == typeof value ? value : JSON.stringify(value);
35
+ if (taskParam.demand) return {
36
+ content: toContent(taskParam.demand),
37
+ images: extractImages(taskParam)
38
+ };
39
+ if (taskParam.assertion) return {
40
+ content: toContent(taskParam.assertion),
41
+ images: extractImages(taskParam)
42
+ };
43
+ if (taskParam.dataDemand) {
44
+ const { dataDemand } = taskParam;
45
+ if ('string' == typeof dataDemand) return {
46
+ content: dataDemand
47
+ };
48
+ if ('object' == typeof dataDemand) return {
49
+ content: toContent(dataDemand.demand || dataDemand),
50
+ images: extractImages(dataDemand)
51
+ };
52
+ }
53
+ return {
54
+ content: ''
55
+ };
56
+ }
57
+ function taskTitleStr(type, prompt) {
58
+ if (prompt) return `${type} - ${prompt}`;
59
+ return type;
60
+ }
61
+ function paramStr(task) {
62
+ let value;
63
+ if ('Planning' === task.type) if ('Locate' === task.subType) value = locateParamStr(task?.param);
64
+ else {
65
+ const planTask = task;
66
+ value = planTask.output?.log || planTask.param?.userInstruction;
67
+ }
68
+ if ('Insight' === task.type) value = extractInsightParam(task?.param).content;
69
+ if ('Action Space' === task.type) {
70
+ const locate = task?.param?.locate;
71
+ const locateStr = locate ? locateParamStr(locate) : '';
72
+ value = task.thought || '';
73
+ if ('number' == typeof task?.param?.timeMs) value = `${task?.param?.timeMs}ms`;
74
+ else if ('string' == typeof task?.param?.scrollType) value = scrollParamStr(task?.param);
75
+ else if ('string' == typeof task?.param?.direction && task?.subType === 'PullGesture') value = pullParamStr(task?.param);
76
+ else if (void 0 !== task?.param?.value) value = task?.param?.value;
77
+ else if (task?.param && 'object' == typeof task?.param && Object.keys(task?.param || {}).length > 0) value = task?.param;
78
+ if (locateStr) value = value && 'object' != typeof value ? `${locateStr} - ${value}` : locateStr;
79
+ }
80
+ if (void 0 === value) return '';
81
+ if ('string' == typeof value) return value;
82
+ if ('object' == typeof value) {
83
+ const locateStr = locateParamStr(value);
84
+ if (locateStr) return locateStr;
85
+ const entries = Object.entries(value);
86
+ if (0 === entries.length) return '';
87
+ const formatValue = (v)=>{
88
+ if ('string' == typeof v) return v;
89
+ if (null == v) return String(v);
90
+ if ('object' == typeof v) return JSON.stringify(v);
91
+ return String(v);
92
+ };
93
+ if (1 === entries.length) {
94
+ const [key, v] = entries[0];
95
+ return `${key}: ${formatValue(v)}`;
96
+ }
97
+ return entries.map(([key, v])=>`${key}: ${formatValue(v)}`).join(', ');
98
+ }
99
+ return String(value);
100
+ }
101
+ export { extractInsightParam, locateParamStr, paramStr, pullParamStr, scrollParamStr, taskTitleStr, typeStr };