@rpascene/core 0.30.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +636 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-cache.mjs +184 -0
  8. package/dist/es/agent/task-cache.mjs.map +1 -0
  9. package/dist/es/agent/tasks.mjs +666 -0
  10. package/dist/es/agent/tasks.mjs.map +1 -0
  11. package/dist/es/agent/ui-utils.mjs +72 -0
  12. package/dist/es/agent/ui-utils.mjs.map +1 -0
  13. package/dist/es/agent/utils.mjs +162 -0
  14. package/dist/es/agent/utils.mjs.map +1 -0
  15. package/dist/es/ai-model/action-executor.mjs +129 -0
  16. package/dist/es/ai-model/action-executor.mjs.map +1 -0
  17. package/dist/es/ai-model/common.mjs +355 -0
  18. package/dist/es/ai-model/common.mjs.map +1 -0
  19. package/dist/es/ai-model/conversation-history.mjs +58 -0
  20. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  21. package/dist/es/ai-model/index.mjs +11 -0
  22. package/dist/es/ai-model/inspect.mjs +286 -0
  23. package/dist/es/ai-model/inspect.mjs.map +1 -0
  24. package/dist/es/ai-model/llm-planning.mjs +140 -0
  25. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  26. package/dist/es/ai-model/prompt/assertion.mjs +31 -0
  27. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
  28. package/dist/es/ai-model/prompt/common.mjs +7 -0
  29. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  30. package/dist/es/ai-model/prompt/describe.mjs +44 -0
  31. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  32. package/dist/es/ai-model/prompt/extraction.mjs +140 -0
  33. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  34. package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
  35. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  36. package/dist/es/ai-model/prompt/llm-planning.mjs +367 -0
  37. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  38. package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
  39. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  40. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  41. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  42. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
  43. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
  44. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  45. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  46. package/dist/es/ai-model/prompt/util.mjs +124 -0
  47. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  48. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  49. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  50. package/dist/es/ai-model/service-caller/index.mjs +537 -0
  51. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  52. package/dist/es/ai-model/ui-tars-planning.mjs +201 -0
  53. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  54. package/dist/es/device/index.mjs +152 -0
  55. package/dist/es/device/index.mjs.map +1 -0
  56. package/dist/es/image/index.mjs +2 -0
  57. package/dist/es/index.mjs +11 -0
  58. package/dist/es/index.mjs.map +1 -0
  59. package/dist/es/insight/index.mjs +233 -0
  60. package/dist/es/insight/index.mjs.map +1 -0
  61. package/dist/es/insight/utils.mjs +15 -0
  62. package/dist/es/insight/utils.mjs.map +1 -0
  63. package/dist/es/report.mjs +88 -0
  64. package/dist/es/report.mjs.map +1 -0
  65. package/dist/es/tree.mjs +2 -0
  66. package/dist/es/types.mjs +11 -0
  67. package/dist/es/types.mjs.map +1 -0
  68. package/dist/es/utils.mjs +204 -0
  69. package/dist/es/utils.mjs.map +1 -0
  70. package/dist/es/yaml/builder.mjs +13 -0
  71. package/dist/es/yaml/builder.mjs.map +1 -0
  72. package/dist/es/yaml/index.mjs +3 -0
  73. package/dist/es/yaml/player.mjs +372 -0
  74. package/dist/es/yaml/player.mjs.map +1 -0
  75. package/dist/es/yaml/utils.mjs +73 -0
  76. package/dist/es/yaml/utils.mjs.map +1 -0
  77. package/dist/es/yaml.mjs +0 -0
  78. package/dist/lib/agent/agent.js +683 -0
  79. package/dist/lib/agent/agent.js.map +1 -0
  80. package/dist/lib/agent/common.js +5 -0
  81. package/dist/lib/agent/index.js +81 -0
  82. package/dist/lib/agent/index.js.map +1 -0
  83. package/dist/lib/agent/task-cache.js +236 -0
  84. package/dist/lib/agent/task-cache.js.map +1 -0
  85. package/dist/lib/agent/tasks.js +703 -0
  86. package/dist/lib/agent/tasks.js.map +1 -0
  87. package/dist/lib/agent/ui-utils.js +121 -0
  88. package/dist/lib/agent/ui-utils.js.map +1 -0
  89. package/dist/lib/agent/utils.js +233 -0
  90. package/dist/lib/agent/utils.js.map +1 -0
  91. package/dist/lib/ai-model/action-executor.js +163 -0
  92. package/dist/lib/ai-model/action-executor.js.map +1 -0
  93. package/dist/lib/ai-model/common.js +461 -0
  94. package/dist/lib/ai-model/common.js.map +1 -0
  95. package/dist/lib/ai-model/conversation-history.js +92 -0
  96. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  97. package/dist/lib/ai-model/index.js +131 -0
  98. package/dist/lib/ai-model/index.js.map +1 -0
  99. package/dist/lib/ai-model/inspect.js +326 -0
  100. package/dist/lib/ai-model/inspect.js.map +1 -0
  101. package/dist/lib/ai-model/llm-planning.js +174 -0
  102. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  103. package/dist/lib/ai-model/prompt/assertion.js +65 -0
  104. package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
  105. package/dist/lib/ai-model/prompt/common.js +41 -0
  106. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  107. package/dist/lib/ai-model/prompt/describe.js +78 -0
  108. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  109. package/dist/lib/ai-model/prompt/extraction.js +180 -0
  110. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  111. package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
  112. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/llm-planning.js +407 -0
  114. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  115. package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
  116. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  117. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  118. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  119. package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
  120. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
  121. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  122. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  123. package/dist/lib/ai-model/prompt/util.js +176 -0
  124. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  125. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  126. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  127. package/dist/lib/ai-model/service-caller/index.js +623 -0
  128. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  129. package/dist/lib/ai-model/ui-tars-planning.js +238 -0
  130. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  131. package/dist/lib/device/index.js +255 -0
  132. package/dist/lib/device/index.js.map +1 -0
  133. package/dist/lib/image/index.js +56 -0
  134. package/dist/lib/image/index.js.map +1 -0
  135. package/dist/lib/index.js +103 -0
  136. package/dist/lib/index.js.map +1 -0
  137. package/dist/lib/insight/index.js +267 -0
  138. package/dist/lib/insight/index.js.map +1 -0
  139. package/dist/lib/insight/utils.js +49 -0
  140. package/dist/lib/insight/utils.js.map +1 -0
  141. package/dist/lib/report.js +122 -0
  142. package/dist/lib/report.js.map +1 -0
  143. package/dist/lib/tree.js +44 -0
  144. package/dist/lib/tree.js.map +1 -0
  145. package/dist/lib/types.js +82 -0
  146. package/dist/lib/types.js.map +1 -0
  147. package/dist/lib/utils.js +283 -0
  148. package/dist/lib/utils.js.map +1 -0
  149. package/dist/lib/yaml/builder.js +57 -0
  150. package/dist/lib/yaml/builder.js.map +1 -0
  151. package/dist/lib/yaml/index.js +80 -0
  152. package/dist/lib/yaml/index.js.map +1 -0
  153. package/dist/lib/yaml/player.js +406 -0
  154. package/dist/lib/yaml/player.js.map +1 -0
  155. package/dist/lib/yaml/utils.js +126 -0
  156. package/dist/lib/yaml/utils.js.map +1 -0
  157. package/dist/lib/yaml.js +20 -0
  158. package/dist/lib/yaml.js.map +1 -0
  159. package/dist/types/agent/agent.d.ts +156 -0
  160. package/dist/types/agent/common.d.ts +0 -0
  161. package/dist/types/agent/index.d.ts +9 -0
  162. package/dist/types/agent/task-cache.d.ts +48 -0
  163. package/dist/types/agent/tasks.d.ts +48 -0
  164. package/dist/types/agent/ui-utils.d.ts +7 -0
  165. package/dist/types/agent/utils.d.ts +52 -0
  166. package/dist/types/ai-model/action-executor.d.ts +19 -0
  167. package/dist/types/ai-model/common.d.ts +569 -0
  168. package/dist/types/ai-model/conversation-history.d.ts +18 -0
  169. package/dist/types/ai-model/index.d.ts +13 -0
  170. package/dist/types/ai-model/inspect.d.ts +46 -0
  171. package/dist/types/ai-model/llm-planning.d.ts +11 -0
  172. package/dist/types/ai-model/prompt/assertion.d.ts +2 -0
  173. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  174. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  175. package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
  176. package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
  177. package/dist/types/ai-model/prompt/llm-planning.d.ts +9 -0
  178. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
  179. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  180. package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
  181. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  182. package/dist/types/ai-model/prompt/util.d.ts +47 -0
  183. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  184. package/dist/types/ai-model/service-caller/index.d.ts +48 -0
  185. package/dist/types/ai-model/ui-tars-planning.d.ts +59 -0
  186. package/dist/types/device/index.d.ts +2158 -0
  187. package/dist/types/image/index.d.ts +1 -0
  188. package/dist/types/index.d.ts +12 -0
  189. package/dist/types/insight/index.d.ts +31 -0
  190. package/dist/types/insight/utils.d.ts +2 -0
  191. package/dist/types/report.d.ts +12 -0
  192. package/dist/types/tree.d.ts +1 -0
  193. package/dist/types/types.d.ts +414 -0
  194. package/dist/types/utils.d.ts +40 -0
  195. package/dist/types/yaml/builder.d.ts +2 -0
  196. package/dist/types/yaml/index.d.ts +3 -0
  197. package/dist/types/yaml/player.d.ts +34 -0
  198. package/dist/types/yaml/utils.d.ts +9 -0
  199. package/dist/types/yaml.d.ts +178 -0
  200. package/package.json +108 -0
@@ -0,0 +1,666 @@
1
+ import { ConversationHistory, findAllRpasceneLocatorField, parseActionParam, plan as index_mjs_plan, uiTarsPlanning } from "../ai-model/index.mjs";
2
+ import { Executor } from "../ai-model/action-executor.mjs";
3
+ import { sleep as external_utils_mjs_sleep } from "../utils.mjs";
4
+ import { RPASCENE_REPLANNING_CYCLE_LIMIT, globalConfigManager } from "@rpascene/shared/env";
5
+ import { getDebug } from "@rpascene/shared/logger";
6
+ import { assert, ifInNode } from "@rpascene/shared/utils";
7
+ import { taskTitleStr } from "./ui-utils.mjs";
8
+ import { matchElementFromCache, matchElementFromPlan, parsePrompt } from "./utils.mjs";
9
+ function _define_property(obj, key, value) {
10
+ if (key in obj) Object.defineProperty(obj, key, {
11
+ value: value,
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true
15
+ });
16
+ else obj[key] = value;
17
+ return obj;
18
+ }
19
+ const debug = getDebug('device-task-executor');
20
+ const defaultReplanningCycleLimit = 10;
21
+ const defaultVlmUiTarsReplanningCycleLimit = 40;
22
+ function locatePlanForLocate(param) {
23
+ const locate = 'string' == typeof param ? {
24
+ prompt: param
25
+ } : param;
26
+ const locatePlan = {
27
+ type: 'Locate',
28
+ locate,
29
+ param: locate,
30
+ thought: ''
31
+ };
32
+ return locatePlan;
33
+ }
34
+ class TaskExecutor {
35
+ get page() {
36
+ return this.interface;
37
+ }
38
+ async recordScreenshot(timing) {
39
+ const base64 = await this.interface.screenshotBase64();
40
+ const item = {
41
+ type: 'screenshot',
42
+ ts: Date.now(),
43
+ screenshot: base64,
44
+ timing
45
+ };
46
+ return item;
47
+ }
48
+ prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
49
+ const taskWithScreenshot = {
50
+ ...taskApply,
51
+ executor: async (param, context, ...args)=>{
52
+ const recorder = [];
53
+ const { task } = context;
54
+ task.recorder = recorder;
55
+ const shot = await this.recordScreenshot(`before ${task.type}`);
56
+ recorder.push(shot);
57
+ const result = await taskApply.executor(param, context, ...args);
58
+ if (appendAfterExecution) {
59
+ const shot2 = await this.recordScreenshot('after Action');
60
+ recorder.push(shot2);
61
+ }
62
+ return result;
63
+ }
64
+ };
65
+ return taskWithScreenshot;
66
+ }
67
+ async convertPlanToExecutable(plans, modelConfig, cacheable) {
68
+ const tasks = [];
69
+ const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
70
+ if ('string' == typeof detailedLocateParam) detailedLocateParam = {
71
+ prompt: detailedLocateParam
72
+ };
73
+ if (void 0 !== cacheable) detailedLocateParam = {
74
+ ...detailedLocateParam,
75
+ cacheable
76
+ };
77
+ const taskFind = {
78
+ type: 'Insight',
79
+ subType: 'Locate',
80
+ param: detailedLocateParam,
81
+ thought: plan.thought,
82
+ executor: async (param, taskContext)=>{
83
+ var _this_taskCache, _locateCacheRecord_cacheContent;
84
+ const { task } = taskContext;
85
+ assert((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
86
+ let insightDump;
87
+ let usage;
88
+ const dumpCollector = (dump)=>{
89
+ var _dump_taskInfo, _dump_taskInfo1;
90
+ insightDump = dump;
91
+ usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
92
+ task.log = {
93
+ dump: insightDump
94
+ };
95
+ task.usage = usage;
96
+ if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
97
+ };
98
+ this.insight.onceDumpUpdatedFn = dumpCollector;
99
+ const shotTime = Date.now();
100
+ const uiContext = await this.insight.contextRetrieverFn('locate');
101
+ task.uiContext = uiContext;
102
+ const recordItem = {
103
+ type: 'screenshot',
104
+ ts: shotTime,
105
+ screenshot: uiContext.screenshotBase64,
106
+ timing: 'before Insight'
107
+ };
108
+ task.recorder = [
109
+ recordItem
110
+ ];
111
+ const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
112
+ const userExpectedPathHitFlag = !!elementFromXpath;
113
+ const cachePrompt = param.prompt;
114
+ const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
115
+ const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
116
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(this, cacheEntry, cachePrompt, param.cacheable);
117
+ const cacheHitFlag = !!elementFromCache;
118
+ const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : matchElementFromPlan(param, uiContext.tree);
119
+ const planHitFlag = !!elementFromPlan;
120
+ const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
121
+ context: uiContext
122
+ }, modelConfig)).element;
123
+ const aiLocateHitFlag = !!elementFromAiLocate;
124
+ const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
125
+ let currentCacheEntry;
126
+ if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
127
+ const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
128
+ _orderSensitive: element.isOrderSensitive
129
+ } : void 0);
130
+ if (feature && Object.keys(feature).length > 0) {
131
+ debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
132
+ currentCacheEntry = feature;
133
+ this.taskCache.updateOrAppendCacheRecord({
134
+ type: 'locate',
135
+ prompt: cachePrompt,
136
+ cache: feature
137
+ }, locateCacheRecord);
138
+ } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
139
+ } catch (error) {
140
+ debug('cacheFeatureForRect failed: %s', error);
141
+ }
142
+ else debug('cacheFeatureForRect is not supported, skip cache update');
143
+ if (!element) throw new Error(`Element not found: ${param.prompt}`);
144
+ let hitBy;
145
+ if (userExpectedPathHitFlag) hitBy = {
146
+ from: 'User expected path',
147
+ context: {
148
+ xpath: param.xpath
149
+ }
150
+ };
151
+ else if (cacheHitFlag) hitBy = {
152
+ from: 'Cache',
153
+ context: {
154
+ cacheEntry,
155
+ cacheToSave: currentCacheEntry
156
+ }
157
+ };
158
+ else if (planHitFlag) hitBy = {
159
+ from: 'Planning',
160
+ context: {
161
+ id: null == elementFromPlan ? void 0 : elementFromPlan.id,
162
+ bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
163
+ }
164
+ };
165
+ else if (aiLocateHitFlag) hitBy = {
166
+ from: 'AI model',
167
+ context: {
168
+ prompt: param.prompt
169
+ }
170
+ };
171
+ null == onResult || onResult(element);
172
+ return {
173
+ output: {
174
+ element
175
+ },
176
+ uiContext,
177
+ hitBy
178
+ };
179
+ }
180
+ };
181
+ return taskFind;
182
+ };
183
+ for (const plan of plans)if ('Locate' === plan.type) {
184
+ var _plan_locate, _plan_locate1;
185
+ if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
186
+ debug('Locate action with id is null, will be ignored', plan);
187
+ continue;
188
+ }
189
+ const taskLocate = taskForLocatePlan(plan, plan.locate);
190
+ tasks.push(taskLocate);
191
+ } else if ('Error' === plan.type) {
192
+ var _plan_param;
193
+ const taskActionError = {
194
+ type: 'Action',
195
+ subType: 'Error',
196
+ param: plan.param,
197
+ thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
198
+ locate: plan.locate,
199
+ executor: async ()=>{
200
+ var _plan_param;
201
+ throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
202
+ }
203
+ };
204
+ tasks.push(taskActionError);
205
+ } else if ('Finished' === plan.type) {
206
+ const taskActionFinished = {
207
+ type: 'Action',
208
+ subType: 'Finished',
209
+ param: null,
210
+ thought: plan.thought,
211
+ locate: plan.locate,
212
+ executor: async (param)=>{}
213
+ };
214
+ tasks.push(taskActionFinished);
215
+ } else if ('Sleep' === plan.type) {
216
+ const taskActionSleep = {
217
+ type: 'Action',
218
+ subType: 'Sleep',
219
+ param: plan.param,
220
+ thought: plan.thought,
221
+ locate: plan.locate,
222
+ executor: async (taskParam)=>{
223
+ await external_utils_mjs_sleep((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
224
+ }
225
+ };
226
+ tasks.push(taskActionSleep);
227
+ } else {
228
+ const planType = plan.type;
229
+ const actionSpace = await this.interface.actionSpace();
230
+ const action = actionSpace.find((action)=>action.name === planType);
231
+ const param = plan.param;
232
+ if (!action) throw new Error(`Action type '${planType}' not found`);
233
+ const locateFields = action ? findAllRpasceneLocatorField(action.paramSchema) : [];
234
+ const requiredLocateFields = action ? findAllRpasceneLocatorField(action.paramSchema, true) : [];
235
+ locateFields.forEach((field)=>{
236
+ if (param[field]) {
237
+ const locatePlan = locatePlanForLocate(param[field]);
238
+ debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
239
+ const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
240
+ param[field] = result;
241
+ });
242
+ tasks.push(locateTask);
243
+ } else {
244
+ assert(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
245
+ debug(`field '${field}' is not provided for action ${planType}`);
246
+ }
247
+ });
248
+ const task = {
249
+ type: 'Action',
250
+ subType: planType,
251
+ thought: plan.thought,
252
+ param: plan.param,
253
+ executor: async (param, context)=>{
254
+ var _context_element;
255
+ debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
256
+ const uiContext = await this.insight.contextRetrieverFn('locate');
257
+ context.task.uiContext = uiContext;
258
+ requiredLocateFields.forEach((field)=>{
259
+ assert(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
260
+ });
261
+ try {
262
+ await Promise.all([
263
+ (async ()=>{
264
+ if (this.interface.beforeInvokeAction) {
265
+ debug('will call "beforeInvokeAction" for interface');
266
+ await this.interface.beforeInvokeAction(action.name, param);
267
+ debug('called "beforeInvokeAction" for interface');
268
+ }
269
+ })(),
270
+ external_utils_mjs_sleep(200)
271
+ ]);
272
+ } catch (originalError) {
273
+ const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
274
+ throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
275
+ cause: originalError
276
+ });
277
+ }
278
+ if (action.paramSchema) try {
279
+ param = parseActionParam(param, action.paramSchema);
280
+ } catch (error) {
281
+ throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
282
+ cause: error
283
+ });
284
+ }
285
+ debug('calling action', action.name);
286
+ if (ifInNode) {
287
+ const actionFn = action.call.bind(this.interface);
288
+ await actionFn(param, context);
289
+ }
290
+ debug('called action', action.name);
291
+ await external_utils_mjs_sleep(300);
292
+ try {
293
+ if (this.interface.afterInvokeAction) {
294
+ debug('will call "afterInvokeAction" for interface');
295
+ await this.interface.afterInvokeAction(action.name, param);
296
+ debug('called "afterInvokeAction" for interface');
297
+ }
298
+ } catch (originalError) {
299
+ const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
300
+ throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
301
+ cause: originalError
302
+ });
303
+ }
304
+ return {
305
+ output: {
306
+ success: true,
307
+ action: planType,
308
+ param: param
309
+ }
310
+ };
311
+ }
312
+ };
313
+ tasks.push(task);
314
+ }
315
+ const wrappedTasks = tasks.map((task, index)=>{
316
+ if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
317
+ return task;
318
+ });
319
+ return {
320
+ tasks: wrappedTasks
321
+ };
322
+ }
323
+ async setupPlanningContext(executorContext) {
324
+ const shotTime = Date.now();
325
+ const uiContext = await this.insight.contextRetrieverFn('locate');
326
+ const recordItem = {
327
+ type: 'screenshot',
328
+ ts: shotTime,
329
+ screenshot: uiContext.screenshotBase64,
330
+ timing: 'before Planning'
331
+ };
332
+ executorContext.task.recorder = [
333
+ recordItem
334
+ ];
335
+ executorContext.task.uiContext = uiContext;
336
+ return {
337
+ uiContext
338
+ };
339
+ }
340
+ async loadYamlFlowAsPlanning(userInstruction, yamlString) {
341
+ const taskExecutor = new Executor(taskTitleStr('Action', userInstruction), {
342
+ onTaskStart: this.onTaskStartCallback
343
+ });
344
+ const task = {
345
+ type: 'Planning',
346
+ subType: 'LoadYaml',
347
+ locate: null,
348
+ param: {
349
+ userInstruction
350
+ },
351
+ executor: async (param, executorContext)=>{
352
+ await this.setupPlanningContext(executorContext);
353
+ return {
354
+ output: {
355
+ actions: [],
356
+ more_actions_needed_by_instruction: false,
357
+ log: '',
358
+ yamlString
359
+ },
360
+ cache: {
361
+ hit: true
362
+ },
363
+ hitBy: {
364
+ from: 'Cache',
365
+ context: {
366
+ yamlString
367
+ }
368
+ }
369
+ };
370
+ }
371
+ };
372
+ await taskExecutor.append(task);
373
+ await taskExecutor.flush();
374
+ return {
375
+ executor: taskExecutor
376
+ };
377
+ }
378
+ createPlanningTask(userInstruction, actionContext, modelConfig) {
379
+ const task = {
380
+ type: 'Planning',
381
+ subType: 'Plan',
382
+ locate: null,
383
+ param: {
384
+ userInstruction
385
+ },
386
+ executor: async (param, executorContext)=>{
387
+ const startTime = Date.now();
388
+ const { uiContext } = await this.setupPlanningContext(executorContext);
389
+ const { vlMode } = modelConfig;
390
+ const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
391
+ assert(this.interface.actionSpace, 'actionSpace for device is not implemented');
392
+ const actionSpace = await this.interface.actionSpace();
393
+ debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
394
+ assert(Array.isArray(actionSpace), 'actionSpace must be an array');
395
+ if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
396
+ const planResult = await (uiTarsModelVersion ? uiTarsPlanning : index_mjs_plan)(param.userInstruction, {
397
+ context: uiContext,
398
+ actionContext,
399
+ interfaceType: this.interface.interfaceType,
400
+ actionSpace,
401
+ modelConfig,
402
+ conversationHistory: this.conversationHistory
403
+ });
404
+ debug('planResult', JSON.stringify(planResult, null, 2));
405
+ const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
406
+ executorContext.task.log = {
407
+ ...executorContext.task.log || {},
408
+ rawResponse
409
+ };
410
+ executorContext.task.usage = usage;
411
+ const finalActions = actions || [];
412
+ if (sleep) {
413
+ const timeNow = Date.now();
414
+ const timeRemaining = sleep - (timeNow - startTime);
415
+ if (timeRemaining > 0) finalActions.push({
416
+ type: 'Sleep',
417
+ param: {
418
+ timeMs: timeRemaining
419
+ },
420
+ locate: null
421
+ });
422
+ }
423
+ if (0 === finalActions.length) assert(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
424
+ return {
425
+ output: {
426
+ actions: finalActions,
427
+ more_actions_needed_by_instruction,
428
+ log,
429
+ yamlFlow: planResult.yamlFlow
430
+ },
431
+ cache: {
432
+ hit: false
433
+ },
434
+ uiContext
435
+ };
436
+ }
437
+ };
438
+ return task;
439
+ }
440
+ async runPlans(title, plans, modelConfig) {
441
+ const taskExecutor = new Executor(title, {
442
+ onTaskStart: this.onTaskStartCallback
443
+ });
444
+ const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
445
+ await taskExecutor.append(tasks);
446
+ const result = await taskExecutor.flush();
447
+ const { output } = result;
448
+ return {
449
+ output,
450
+ executor: taskExecutor
451
+ };
452
+ }
453
+ getReplanningCycleLimit(isVlmUiTars) {
454
+ return this.replanningCycleLimit || globalConfigManager.getEnvConfigInNumber(RPASCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
455
+ }
456
+ async action(userPrompt, modelConfig, actionContext, cacheable) {
457
+ this.conversationHistory.reset();
458
+ const taskExecutor = new Executor(taskTitleStr('Action', userPrompt), {
459
+ onTaskStart: this.onTaskStartCallback
460
+ });
461
+ let replanCount = 0;
462
+ const yamlFlow = [];
463
+ const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
464
+ while(true){
465
+ if (replanCount > replanningCycleLimit) {
466
+ const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
467
+ return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
468
+ }
469
+ const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
470
+ await taskExecutor.append(planningTask);
471
+ const result = await taskExecutor.flush();
472
+ console.dir(result.output.actions, 'result');
473
+ const planResult = null == result ? void 0 : result.output;
474
+ if (taskExecutor.isInErrorState()) return {
475
+ output: planResult,
476
+ executor: taskExecutor
477
+ };
478
+ const plans = planResult.actions || [];
479
+ yamlFlow.push(...planResult.yamlFlow || []);
480
+ let executables;
481
+ try {
482
+ executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
483
+ taskExecutor.append(executables.tasks);
484
+ } catch (error) {
485
+ return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
486
+ }
487
+ await taskExecutor.flush();
488
+ if (taskExecutor.isInErrorState()) return {
489
+ output: void 0,
490
+ executor: taskExecutor
491
+ };
492
+ if (!planResult.more_actions_needed_by_instruction) break;
493
+ replanCount++;
494
+ }
495
+ return {
496
+ output: {
497
+ yamlFlow
498
+ },
499
+ executor: taskExecutor
500
+ };
501
+ }
502
+ createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
503
+ const queryTask = {
504
+ type: 'Insight',
505
+ subType: type,
506
+ locate: null,
507
+ param: {
508
+ dataDemand: multimodalPrompt ? {
509
+ demand,
510
+ multimodalPrompt
511
+ } : demand
512
+ },
513
+ executor: async (param, taskContext)=>{
514
+ const { task } = taskContext;
515
+ let insightDump;
516
+ const dumpCollector = (dump)=>{
517
+ insightDump = dump;
518
+ };
519
+ this.insight.onceDumpUpdatedFn = dumpCollector;
520
+ const shotTime = Date.now();
521
+ const uiContext = await this.insight.contextRetrieverFn('extract');
522
+ task.uiContext = uiContext;
523
+ const recordItem = {
524
+ type: 'screenshot',
525
+ ts: shotTime,
526
+ screenshot: uiContext.screenshotBase64,
527
+ timing: 'before Extract'
528
+ };
529
+ task.recorder = [
530
+ recordItem
531
+ ];
532
+ const ifTypeRestricted = 'Query' !== type;
533
+ let demandInput = demand;
534
+ let keyOfResult = 'result';
535
+ if (ifTypeRestricted && ('Assert' === type || 'WaitFor' === type)) {
536
+ keyOfResult = 'StatementIsTruthy';
537
+ const booleanPrompt = 'Assert' === type ? `Boolean, whether the following statement is true: ${demand}` : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;
538
+ demandInput = {
539
+ [keyOfResult]: booleanPrompt
540
+ };
541
+ } else if (ifTypeRestricted) demandInput = {
542
+ [keyOfResult]: `${type}, ${demand}`
543
+ };
544
+ const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
545
+ let outputResult = data;
546
+ if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
547
+ else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
548
+ else if (null == data) outputResult = null;
549
+ else {
550
+ assert((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
551
+ outputResult = data[keyOfResult];
552
+ }
553
+ if ('Assert' === type && !outputResult) {
554
+ task.usage = usage;
555
+ task.thought = thought;
556
+ throw new Error(`Assertion failed: ${thought}`);
557
+ }
558
+ return {
559
+ output: outputResult,
560
+ log: insightDump,
561
+ usage,
562
+ thought
563
+ };
564
+ }
565
+ };
566
+ return queryTask;
567
+ }
568
+ async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
569
+ const taskExecutor = new Executor(taskTitleStr(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
570
+ onTaskStart: this.onTaskStartCallback
571
+ });
572
+ const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
573
+ await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
574
+ const result = await taskExecutor.flush();
575
+ if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
576
+ const { output, thought } = result;
577
+ return {
578
+ output,
579
+ thought,
580
+ executor: taskExecutor
581
+ };
582
+ }
583
+ async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
584
+ const errorPlan = {
585
+ type: 'Error',
586
+ param: {
587
+ thought: errorMsg
588
+ },
589
+ locate: null
590
+ };
591
+ const { tasks } = await this.convertPlanToExecutable([
592
+ errorPlan
593
+ ], modelConfig);
594
+ await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
595
+ await taskExecutor.flush();
596
+ return {
597
+ output: void 0,
598
+ executor: taskExecutor
599
+ };
600
+ }
601
+ async taskForSleep(timeMs, modelConfig) {
602
+ const sleepPlan = {
603
+ type: 'Sleep',
604
+ param: {
605
+ timeMs
606
+ },
607
+ locate: null
608
+ };
609
+ const { tasks: sleepTasks } = await this.convertPlanToExecutable([
610
+ sleepPlan
611
+ ], modelConfig);
612
+ return this.prependExecutorWithScreenshot(sleepTasks[0]);
613
+ }
614
+ async waitFor(assertion, opt, modelConfig) {
615
+ const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
616
+ const description = `waitFor: ${textPrompt}`;
617
+ const taskExecutor = new Executor(taskTitleStr('WaitFor', description), {
618
+ onTaskStart: this.onTaskStartCallback
619
+ });
620
+ const { timeoutMs, checkIntervalMs } = opt;
621
+ assert(assertion, 'No assertion for waitFor');
622
+ assert(timeoutMs, 'No timeoutMs for waitFor');
623
+ assert(checkIntervalMs, 'No checkIntervalMs for waitFor');
624
+ assert(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
625
+ const overallStartTime = Date.now();
626
+ let startTime = Date.now();
627
+ let errorThought = '';
628
+ while(Date.now() - overallStartTime < timeoutMs){
629
+ startTime = Date.now();
630
+ const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
631
+ doNotThrowError: true
632
+ }, multimodalPrompt);
633
+ await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
634
+ const result = await taskExecutor.flush();
635
+ if (null == result ? void 0 : result.output) return {
636
+ output: void 0,
637
+ executor: taskExecutor
638
+ };
639
+ errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
640
+ const now = Date.now();
641
+ if (now - startTime < checkIntervalMs) {
642
+ const timeRemaining = checkIntervalMs - (now - startTime);
643
+ const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
644
+ await taskExecutor.append(sleepTask);
645
+ }
646
+ }
647
+ return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
648
+ }
649
+ constructor(interfaceInstance, insight, opts){
650
+ _define_property(this, "interface", void 0);
651
+ _define_property(this, "insight", void 0);
652
+ _define_property(this, "taskCache", void 0);
653
+ _define_property(this, "conversationHistory", void 0);
654
+ _define_property(this, "onTaskStartCallback", void 0);
655
+ _define_property(this, "replanningCycleLimit", void 0);
656
+ this.interface = interfaceInstance;
657
+ this.insight = insight;
658
+ this.taskCache = opts.taskCache;
659
+ this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
660
+ this.replanningCycleLimit = opts.replanningCycleLimit;
661
+ this.conversationHistory = new ConversationHistory();
662
+ }
663
+ }
664
+ export { TaskExecutor, locatePlanForLocate };
665
+
666
+ //# sourceMappingURL=tasks.mjs.map