@rpascene/core 0.30.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +636 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-cache.mjs +184 -0
  8. package/dist/es/agent/task-cache.mjs.map +1 -0
  9. package/dist/es/agent/tasks.mjs +666 -0
  10. package/dist/es/agent/tasks.mjs.map +1 -0
  11. package/dist/es/agent/ui-utils.mjs +72 -0
  12. package/dist/es/agent/ui-utils.mjs.map +1 -0
  13. package/dist/es/agent/utils.mjs +162 -0
  14. package/dist/es/agent/utils.mjs.map +1 -0
  15. package/dist/es/ai-model/action-executor.mjs +129 -0
  16. package/dist/es/ai-model/action-executor.mjs.map +1 -0
  17. package/dist/es/ai-model/common.mjs +355 -0
  18. package/dist/es/ai-model/common.mjs.map +1 -0
  19. package/dist/es/ai-model/conversation-history.mjs +58 -0
  20. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  21. package/dist/es/ai-model/index.mjs +11 -0
  22. package/dist/es/ai-model/inspect.mjs +286 -0
  23. package/dist/es/ai-model/inspect.mjs.map +1 -0
  24. package/dist/es/ai-model/llm-planning.mjs +140 -0
  25. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  26. package/dist/es/ai-model/prompt/assertion.mjs +31 -0
  27. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
  28. package/dist/es/ai-model/prompt/common.mjs +7 -0
  29. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  30. package/dist/es/ai-model/prompt/describe.mjs +44 -0
  31. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  32. package/dist/es/ai-model/prompt/extraction.mjs +140 -0
  33. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  34. package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
  35. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  36. package/dist/es/ai-model/prompt/llm-planning.mjs +367 -0
  37. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  38. package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
  39. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  40. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  41. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  42. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
  43. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
  44. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  45. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  46. package/dist/es/ai-model/prompt/util.mjs +124 -0
  47. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  48. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  49. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  50. package/dist/es/ai-model/service-caller/index.mjs +537 -0
  51. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  52. package/dist/es/ai-model/ui-tars-planning.mjs +201 -0
  53. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  54. package/dist/es/device/index.mjs +152 -0
  55. package/dist/es/device/index.mjs.map +1 -0
  56. package/dist/es/image/index.mjs +2 -0
  57. package/dist/es/index.mjs +11 -0
  58. package/dist/es/index.mjs.map +1 -0
  59. package/dist/es/insight/index.mjs +233 -0
  60. package/dist/es/insight/index.mjs.map +1 -0
  61. package/dist/es/insight/utils.mjs +15 -0
  62. package/dist/es/insight/utils.mjs.map +1 -0
  63. package/dist/es/report.mjs +88 -0
  64. package/dist/es/report.mjs.map +1 -0
  65. package/dist/es/tree.mjs +2 -0
  66. package/dist/es/types.mjs +11 -0
  67. package/dist/es/types.mjs.map +1 -0
  68. package/dist/es/utils.mjs +204 -0
  69. package/dist/es/utils.mjs.map +1 -0
  70. package/dist/es/yaml/builder.mjs +13 -0
  71. package/dist/es/yaml/builder.mjs.map +1 -0
  72. package/dist/es/yaml/index.mjs +3 -0
  73. package/dist/es/yaml/player.mjs +372 -0
  74. package/dist/es/yaml/player.mjs.map +1 -0
  75. package/dist/es/yaml/utils.mjs +73 -0
  76. package/dist/es/yaml/utils.mjs.map +1 -0
  77. package/dist/es/yaml.mjs +0 -0
  78. package/dist/lib/agent/agent.js +683 -0
  79. package/dist/lib/agent/agent.js.map +1 -0
  80. package/dist/lib/agent/common.js +5 -0
  81. package/dist/lib/agent/index.js +81 -0
  82. package/dist/lib/agent/index.js.map +1 -0
  83. package/dist/lib/agent/task-cache.js +236 -0
  84. package/dist/lib/agent/task-cache.js.map +1 -0
  85. package/dist/lib/agent/tasks.js +703 -0
  86. package/dist/lib/agent/tasks.js.map +1 -0
  87. package/dist/lib/agent/ui-utils.js +121 -0
  88. package/dist/lib/agent/ui-utils.js.map +1 -0
  89. package/dist/lib/agent/utils.js +233 -0
  90. package/dist/lib/agent/utils.js.map +1 -0
  91. package/dist/lib/ai-model/action-executor.js +163 -0
  92. package/dist/lib/ai-model/action-executor.js.map +1 -0
  93. package/dist/lib/ai-model/common.js +461 -0
  94. package/dist/lib/ai-model/common.js.map +1 -0
  95. package/dist/lib/ai-model/conversation-history.js +92 -0
  96. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  97. package/dist/lib/ai-model/index.js +131 -0
  98. package/dist/lib/ai-model/index.js.map +1 -0
  99. package/dist/lib/ai-model/inspect.js +326 -0
  100. package/dist/lib/ai-model/inspect.js.map +1 -0
  101. package/dist/lib/ai-model/llm-planning.js +174 -0
  102. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  103. package/dist/lib/ai-model/prompt/assertion.js +65 -0
  104. package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
  105. package/dist/lib/ai-model/prompt/common.js +41 -0
  106. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  107. package/dist/lib/ai-model/prompt/describe.js +78 -0
  108. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  109. package/dist/lib/ai-model/prompt/extraction.js +180 -0
  110. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  111. package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
  112. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/llm-planning.js +407 -0
  114. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  115. package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
  116. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  117. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  118. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  119. package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
  120. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
  121. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  122. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  123. package/dist/lib/ai-model/prompt/util.js +176 -0
  124. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  125. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  126. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  127. package/dist/lib/ai-model/service-caller/index.js +623 -0
  128. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  129. package/dist/lib/ai-model/ui-tars-planning.js +238 -0
  130. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  131. package/dist/lib/device/index.js +255 -0
  132. package/dist/lib/device/index.js.map +1 -0
  133. package/dist/lib/image/index.js +56 -0
  134. package/dist/lib/image/index.js.map +1 -0
  135. package/dist/lib/index.js +103 -0
  136. package/dist/lib/index.js.map +1 -0
  137. package/dist/lib/insight/index.js +267 -0
  138. package/dist/lib/insight/index.js.map +1 -0
  139. package/dist/lib/insight/utils.js +49 -0
  140. package/dist/lib/insight/utils.js.map +1 -0
  141. package/dist/lib/report.js +122 -0
  142. package/dist/lib/report.js.map +1 -0
  143. package/dist/lib/tree.js +44 -0
  144. package/dist/lib/tree.js.map +1 -0
  145. package/dist/lib/types.js +82 -0
  146. package/dist/lib/types.js.map +1 -0
  147. package/dist/lib/utils.js +283 -0
  148. package/dist/lib/utils.js.map +1 -0
  149. package/dist/lib/yaml/builder.js +57 -0
  150. package/dist/lib/yaml/builder.js.map +1 -0
  151. package/dist/lib/yaml/index.js +80 -0
  152. package/dist/lib/yaml/index.js.map +1 -0
  153. package/dist/lib/yaml/player.js +406 -0
  154. package/dist/lib/yaml/player.js.map +1 -0
  155. package/dist/lib/yaml/utils.js +126 -0
  156. package/dist/lib/yaml/utils.js.map +1 -0
  157. package/dist/lib/yaml.js +20 -0
  158. package/dist/lib/yaml.js.map +1 -0
  159. package/dist/types/agent/agent.d.ts +156 -0
  160. package/dist/types/agent/common.d.ts +0 -0
  161. package/dist/types/agent/index.d.ts +9 -0
  162. package/dist/types/agent/task-cache.d.ts +48 -0
  163. package/dist/types/agent/tasks.d.ts +48 -0
  164. package/dist/types/agent/ui-utils.d.ts +7 -0
  165. package/dist/types/agent/utils.d.ts +52 -0
  166. package/dist/types/ai-model/action-executor.d.ts +19 -0
  167. package/dist/types/ai-model/common.d.ts +569 -0
  168. package/dist/types/ai-model/conversation-history.d.ts +18 -0
  169. package/dist/types/ai-model/index.d.ts +13 -0
  170. package/dist/types/ai-model/inspect.d.ts +46 -0
  171. package/dist/types/ai-model/llm-planning.d.ts +11 -0
  172. package/dist/types/ai-model/prompt/assertion.d.ts +2 -0
  173. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  174. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  175. package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
  176. package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
  177. package/dist/types/ai-model/prompt/llm-planning.d.ts +9 -0
  178. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
  179. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  180. package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
  181. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  182. package/dist/types/ai-model/prompt/util.d.ts +47 -0
  183. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  184. package/dist/types/ai-model/service-caller/index.d.ts +48 -0
  185. package/dist/types/ai-model/ui-tars-planning.d.ts +59 -0
  186. package/dist/types/device/index.d.ts +2158 -0
  187. package/dist/types/image/index.d.ts +1 -0
  188. package/dist/types/index.d.ts +12 -0
  189. package/dist/types/insight/index.d.ts +31 -0
  190. package/dist/types/insight/utils.d.ts +2 -0
  191. package/dist/types/report.d.ts +12 -0
  192. package/dist/types/tree.d.ts +1 -0
  193. package/dist/types/types.d.ts +414 -0
  194. package/dist/types/utils.d.ts +40 -0
  195. package/dist/types/yaml/builder.d.ts +2 -0
  196. package/dist/types/yaml/index.d.ts +3 -0
  197. package/dist/types/yaml/player.d.ts +34 -0
  198. package/dist/types/yaml/utils.d.ts +9 -0
  199. package/dist/types/yaml.d.ts +178 -0
  200. package/package.json +108 -0
@@ -0,0 +1,703 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ locatePlanForLocate: ()=>locatePlanForLocate,
28
+ TaskExecutor: ()=>TaskExecutor
29
+ });
30
+ const index_js_namespaceObject = require("../ai-model/index.js");
31
+ const action_executor_js_namespaceObject = require("../ai-model/action-executor.js");
32
+ const external_utils_js_namespaceObject = require("../utils.js");
33
+ const env_namespaceObject = require("@rpascene/shared/env");
34
+ const logger_namespaceObject = require("@rpascene/shared/logger");
35
+ const utils_namespaceObject = require("@rpascene/shared/utils");
36
+ const external_ui_utils_js_namespaceObject = require("./ui-utils.js");
37
+ const external_utils_js_namespaceObject_1 = require("./utils.js");
38
+ function _define_property(obj, key, value) {
39
+ if (key in obj) Object.defineProperty(obj, key, {
40
+ value: value,
41
+ enumerable: true,
42
+ configurable: true,
43
+ writable: true
44
+ });
45
+ else obj[key] = value;
46
+ return obj;
47
+ }
48
+ const debug = (0, logger_namespaceObject.getDebug)('device-task-executor');
49
+ const defaultReplanningCycleLimit = 10;
50
+ const defaultVlmUiTarsReplanningCycleLimit = 40;
51
+ function locatePlanForLocate(param) {
52
+ const locate = 'string' == typeof param ? {
53
+ prompt: param
54
+ } : param;
55
+ const locatePlan = {
56
+ type: 'Locate',
57
+ locate,
58
+ param: locate,
59
+ thought: ''
60
+ };
61
+ return locatePlan;
62
+ }
63
+ class TaskExecutor {
64
+ get page() {
65
+ return this.interface;
66
+ }
67
+ async recordScreenshot(timing) {
68
+ const base64 = await this.interface.screenshotBase64();
69
+ const item = {
70
+ type: 'screenshot',
71
+ ts: Date.now(),
72
+ screenshot: base64,
73
+ timing
74
+ };
75
+ return item;
76
+ }
77
+ prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
78
+ const taskWithScreenshot = {
79
+ ...taskApply,
80
+ executor: async (param, context, ...args)=>{
81
+ const recorder = [];
82
+ const { task } = context;
83
+ task.recorder = recorder;
84
+ const shot = await this.recordScreenshot(`before ${task.type}`);
85
+ recorder.push(shot);
86
+ const result = await taskApply.executor(param, context, ...args);
87
+ if (appendAfterExecution) {
88
+ const shot2 = await this.recordScreenshot('after Action');
89
+ recorder.push(shot2);
90
+ }
91
+ return result;
92
+ }
93
+ };
94
+ return taskWithScreenshot;
95
+ }
96
+ async convertPlanToExecutable(plans, modelConfig, cacheable) {
97
+ const tasks = [];
98
+ const taskForLocatePlan = (plan, detailedLocateParam, onResult)=>{
99
+ if ('string' == typeof detailedLocateParam) detailedLocateParam = {
100
+ prompt: detailedLocateParam
101
+ };
102
+ if (void 0 !== cacheable) detailedLocateParam = {
103
+ ...detailedLocateParam,
104
+ cacheable
105
+ };
106
+ const taskFind = {
107
+ type: 'Insight',
108
+ subType: 'Locate',
109
+ param: detailedLocateParam,
110
+ thought: plan.thought,
111
+ executor: async (param, taskContext)=>{
112
+ var _this_taskCache, _locateCacheRecord_cacheContent;
113
+ const { task } = taskContext;
114
+ (0, utils_namespaceObject.assert)((null == param ? void 0 : param.prompt) || (null == param ? void 0 : param.id) || (null == param ? void 0 : param.bbox), `No prompt or id or position or bbox to locate, param=${JSON.stringify(param)}`);
115
+ let insightDump;
116
+ let usage;
117
+ const dumpCollector = (dump)=>{
118
+ var _dump_taskInfo, _dump_taskInfo1;
119
+ insightDump = dump;
120
+ usage = null == dump ? void 0 : null == (_dump_taskInfo = dump.taskInfo) ? void 0 : _dump_taskInfo.usage;
121
+ task.log = {
122
+ dump: insightDump
123
+ };
124
+ task.usage = usage;
125
+ if (null == dump ? void 0 : null == (_dump_taskInfo1 = dump.taskInfo) ? void 0 : _dump_taskInfo1.searchAreaUsage) task.searchAreaUsage = dump.taskInfo.searchAreaUsage;
126
+ };
127
+ this.insight.onceDumpUpdatedFn = dumpCollector;
128
+ const shotTime = Date.now();
129
+ const uiContext = await this.insight.contextRetrieverFn('locate');
130
+ task.uiContext = uiContext;
131
+ const recordItem = {
132
+ type: 'screenshot',
133
+ ts: shotTime,
134
+ screenshot: uiContext.screenshotBase64,
135
+ timing: 'before Insight'
136
+ };
137
+ task.recorder = [
138
+ recordItem
139
+ ];
140
+ const elementFromXpath = param.xpath && this.interface.getElementInfoByXpath ? await this.interface.getElementInfoByXpath(param.xpath) : void 0;
141
+ const userExpectedPathHitFlag = !!elementFromXpath;
142
+ const cachePrompt = param.prompt;
143
+ const locateCacheRecord = null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchLocateCache(cachePrompt);
144
+ const cacheEntry = null == locateCacheRecord ? void 0 : null == (_locateCacheRecord_cacheContent = locateCacheRecord.cacheContent) ? void 0 : _locateCacheRecord_cacheContent.cache;
145
+ const elementFromCache = userExpectedPathHitFlag ? null : await (0, external_utils_js_namespaceObject_1.matchElementFromCache)(this, cacheEntry, cachePrompt, param.cacheable);
146
+ const cacheHitFlag = !!elementFromCache;
147
+ const elementFromPlan = userExpectedPathHitFlag || cacheHitFlag ? void 0 : (0, external_utils_js_namespaceObject_1.matchElementFromPlan)(param, uiContext.tree);
148
+ const planHitFlag = !!elementFromPlan;
149
+ const elementFromAiLocate = userExpectedPathHitFlag || cacheHitFlag || planHitFlag ? void 0 : (await this.insight.locate(param, {
150
+ context: uiContext
151
+ }, modelConfig)).element;
152
+ const aiLocateHitFlag = !!elementFromAiLocate;
153
+ const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
154
+ let currentCacheEntry;
155
+ if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
156
+ const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
157
+ _orderSensitive: element.isOrderSensitive
158
+ } : void 0);
159
+ if (feature && Object.keys(feature).length > 0) {
160
+ debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
161
+ currentCacheEntry = feature;
162
+ this.taskCache.updateOrAppendCacheRecord({
163
+ type: 'locate',
164
+ prompt: cachePrompt,
165
+ cache: feature
166
+ }, locateCacheRecord);
167
+ } else debug('no cache data returned, skip cache update, prompt: %s', cachePrompt);
168
+ } catch (error) {
169
+ debug('cacheFeatureForRect failed: %s', error);
170
+ }
171
+ else debug('cacheFeatureForRect is not supported, skip cache update');
172
+ if (!element) throw new Error(`Element not found: ${param.prompt}`);
173
+ let hitBy;
174
+ if (userExpectedPathHitFlag) hitBy = {
175
+ from: 'User expected path',
176
+ context: {
177
+ xpath: param.xpath
178
+ }
179
+ };
180
+ else if (cacheHitFlag) hitBy = {
181
+ from: 'Cache',
182
+ context: {
183
+ cacheEntry,
184
+ cacheToSave: currentCacheEntry
185
+ }
186
+ };
187
+ else if (planHitFlag) hitBy = {
188
+ from: 'Planning',
189
+ context: {
190
+ id: null == elementFromPlan ? void 0 : elementFromPlan.id,
191
+ bbox: null == elementFromPlan ? void 0 : elementFromPlan.bbox
192
+ }
193
+ };
194
+ else if (aiLocateHitFlag) hitBy = {
195
+ from: 'AI model',
196
+ context: {
197
+ prompt: param.prompt
198
+ }
199
+ };
200
+ null == onResult || onResult(element);
201
+ return {
202
+ output: {
203
+ element
204
+ },
205
+ uiContext,
206
+ hitBy
207
+ };
208
+ }
209
+ };
210
+ return taskFind;
211
+ };
212
+ for (const plan of plans)if ('Locate' === plan.type) {
213
+ var _plan_locate, _plan_locate1;
214
+ if (!plan.locate || null === plan.locate || (null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.id) === null || (null == (_plan_locate1 = plan.locate) ? void 0 : _plan_locate1.id) === 'null') {
215
+ debug('Locate action with id is null, will be ignored', plan);
216
+ continue;
217
+ }
218
+ const taskLocate = taskForLocatePlan(plan, plan.locate);
219
+ tasks.push(taskLocate);
220
+ } else if ('Error' === plan.type) {
221
+ var _plan_param;
222
+ const taskActionError = {
223
+ type: 'Action',
224
+ subType: 'Error',
225
+ param: plan.param,
226
+ thought: plan.thought || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought),
227
+ locate: plan.locate,
228
+ executor: async ()=>{
229
+ var _plan_param;
230
+ throw new Error((null == plan ? void 0 : plan.thought) || (null == (_plan_param = plan.param) ? void 0 : _plan_param.thought) || 'error without thought');
231
+ }
232
+ };
233
+ tasks.push(taskActionError);
234
+ } else if ('Finished' === plan.type) {
235
+ const taskActionFinished = {
236
+ type: 'Action',
237
+ subType: 'Finished',
238
+ param: null,
239
+ thought: plan.thought,
240
+ locate: plan.locate,
241
+ executor: async (param)=>{}
242
+ };
243
+ tasks.push(taskActionFinished);
244
+ } else if ('Sleep' === plan.type) {
245
+ const taskActionSleep = {
246
+ type: 'Action',
247
+ subType: 'Sleep',
248
+ param: plan.param,
249
+ thought: plan.thought,
250
+ locate: plan.locate,
251
+ executor: async (taskParam)=>{
252
+ await (0, external_utils_js_namespaceObject.sleep)((null == taskParam ? void 0 : taskParam.timeMs) || 3000);
253
+ }
254
+ };
255
+ tasks.push(taskActionSleep);
256
+ } else {
257
+ const planType = plan.type;
258
+ const actionSpace = await this.interface.actionSpace();
259
+ const action = actionSpace.find((action)=>action.name === planType);
260
+ const param = plan.param;
261
+ if (!action) throw new Error(`Action type '${planType}' not found`);
262
+ const locateFields = action ? (0, index_js_namespaceObject.findAllRpasceneLocatorField)(action.paramSchema) : [];
263
+ const requiredLocateFields = action ? (0, index_js_namespaceObject.findAllRpasceneLocatorField)(action.paramSchema, true) : [];
264
+ locateFields.forEach((field)=>{
265
+ if (param[field]) {
266
+ const locatePlan = locatePlanForLocate(param[field]);
267
+ debug('will prepend locate param for field', `action.type=${planType}`, `param=${JSON.stringify(param[field])}`, `locatePlan=${JSON.stringify(locatePlan)}`);
268
+ const locateTask = taskForLocatePlan(locatePlan, param[field], (result)=>{
269
+ param[field] = result;
270
+ });
271
+ tasks.push(locateTask);
272
+ } else {
273
+ (0, utils_namespaceObject.assert)(!requiredLocateFields.includes(field), `Required locate field '${field}' is not provided for action ${planType}`);
274
+ debug(`field '${field}' is not provided for action ${planType}`);
275
+ }
276
+ });
277
+ const task = {
278
+ type: 'Action',
279
+ subType: planType,
280
+ thought: plan.thought,
281
+ param: plan.param,
282
+ executor: async (param, context)=>{
283
+ var _context_element;
284
+ debug('executing action', planType, param, `context.element.center: ${null == (_context_element = context.element) ? void 0 : _context_element.center}`);
285
+ const uiContext = await this.insight.contextRetrieverFn('locate');
286
+ context.task.uiContext = uiContext;
287
+ requiredLocateFields.forEach((field)=>{
288
+ (0, utils_namespaceObject.assert)(param[field], `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`);
289
+ });
290
+ try {
291
+ await Promise.all([
292
+ (async ()=>{
293
+ if (this.interface.beforeInvokeAction) {
294
+ debug('will call "beforeInvokeAction" for interface');
295
+ await this.interface.beforeInvokeAction(action.name, param);
296
+ debug('called "beforeInvokeAction" for interface');
297
+ }
298
+ })(),
299
+ (0, external_utils_js_namespaceObject.sleep)(200)
300
+ ]);
301
+ } catch (originalError) {
302
+ const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
303
+ throw new Error(`error in running beforeInvokeAction for ${action.name}: ${originalMessage}`, {
304
+ cause: originalError
305
+ });
306
+ }
307
+ if (action.paramSchema) try {
308
+ param = (0, index_js_namespaceObject.parseActionParam)(param, action.paramSchema);
309
+ } catch (error) {
310
+ throw new Error(`Invalid parameters for action ${action.name}: ${error.message}\nParameters: ${JSON.stringify(param)}`, {
311
+ cause: error
312
+ });
313
+ }
314
+ debug('calling action', action.name);
315
+ if (utils_namespaceObject.ifInNode) {
316
+ const actionFn = action.call.bind(this.interface);
317
+ await actionFn(param, context);
318
+ }
319
+ debug('called action', action.name);
320
+ await (0, external_utils_js_namespaceObject.sleep)(300);
321
+ try {
322
+ if (this.interface.afterInvokeAction) {
323
+ debug('will call "afterInvokeAction" for interface');
324
+ await this.interface.afterInvokeAction(action.name, param);
325
+ debug('called "afterInvokeAction" for interface');
326
+ }
327
+ } catch (originalError) {
328
+ const originalMessage = (null == originalError ? void 0 : originalError.message) || String(originalError);
329
+ throw new Error(`error in running afterInvokeAction for ${action.name}: ${originalMessage}`, {
330
+ cause: originalError
331
+ });
332
+ }
333
+ return {
334
+ output: {
335
+ success: true,
336
+ action: planType,
337
+ param: param
338
+ }
339
+ };
340
+ }
341
+ };
342
+ tasks.push(task);
343
+ }
344
+ const wrappedTasks = tasks.map((task, index)=>{
345
+ if ('Action' === task.type) return this.prependExecutorWithScreenshot(task, index === tasks.length - 1);
346
+ return task;
347
+ });
348
+ return {
349
+ tasks: wrappedTasks
350
+ };
351
+ }
352
+ async setupPlanningContext(executorContext) {
353
+ const shotTime = Date.now();
354
+ const uiContext = await this.insight.contextRetrieverFn('locate');
355
+ const recordItem = {
356
+ type: 'screenshot',
357
+ ts: shotTime,
358
+ screenshot: uiContext.screenshotBase64,
359
+ timing: 'before Planning'
360
+ };
361
+ executorContext.task.recorder = [
362
+ recordItem
363
+ ];
364
+ executorContext.task.uiContext = uiContext;
365
+ return {
366
+ uiContext
367
+ };
368
+ }
369
+ async loadYamlFlowAsPlanning(userInstruction, yamlString) {
370
+ const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userInstruction), {
371
+ onTaskStart: this.onTaskStartCallback
372
+ });
373
+ const task = {
374
+ type: 'Planning',
375
+ subType: 'LoadYaml',
376
+ locate: null,
377
+ param: {
378
+ userInstruction
379
+ },
380
+ executor: async (param, executorContext)=>{
381
+ await this.setupPlanningContext(executorContext);
382
+ return {
383
+ output: {
384
+ actions: [],
385
+ more_actions_needed_by_instruction: false,
386
+ log: '',
387
+ yamlString
388
+ },
389
+ cache: {
390
+ hit: true
391
+ },
392
+ hitBy: {
393
+ from: 'Cache',
394
+ context: {
395
+ yamlString
396
+ }
397
+ }
398
+ };
399
+ }
400
+ };
401
+ await taskExecutor.append(task);
402
+ await taskExecutor.flush();
403
+ return {
404
+ executor: taskExecutor
405
+ };
406
+ }
407
+ createPlanningTask(userInstruction, actionContext, modelConfig) {
408
+ const task = {
409
+ type: 'Planning',
410
+ subType: 'Plan',
411
+ locate: null,
412
+ param: {
413
+ userInstruction
414
+ },
415
+ executor: async (param, executorContext)=>{
416
+ const startTime = Date.now();
417
+ const { uiContext } = await this.setupPlanningContext(executorContext);
418
+ const { vlMode } = modelConfig;
419
+ const uiTarsModelVersion = 'vlm-ui-tars' === vlMode ? modelConfig.uiTarsModelVersion : void 0;
420
+ (0, utils_namespaceObject.assert)(this.interface.actionSpace, 'actionSpace for device is not implemented');
421
+ const actionSpace = await this.interface.actionSpace();
422
+ debug('actionSpace for this interface is:', actionSpace.map((action)=>action.name).join(', '));
423
+ (0, utils_namespaceObject.assert)(Array.isArray(actionSpace), 'actionSpace must be an array');
424
+ if (0 === actionSpace.length) console.warn(`ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`);
425
+ const planResult = await (uiTarsModelVersion ? index_js_namespaceObject.uiTarsPlanning : index_js_namespaceObject.plan)(param.userInstruction, {
426
+ context: uiContext,
427
+ actionContext,
428
+ interfaceType: this.interface.interfaceType,
429
+ actionSpace,
430
+ modelConfig,
431
+ conversationHistory: this.conversationHistory
432
+ });
433
+ debug('planResult', JSON.stringify(planResult, null, 2));
434
+ const { actions, log, more_actions_needed_by_instruction, error, usage, rawResponse, sleep } = planResult;
435
+ executorContext.task.log = {
436
+ ...executorContext.task.log || {},
437
+ rawResponse
438
+ };
439
+ executorContext.task.usage = usage;
440
+ const finalActions = actions || [];
441
+ if (sleep) {
442
+ const timeNow = Date.now();
443
+ const timeRemaining = sleep - (timeNow - startTime);
444
+ if (timeRemaining > 0) finalActions.push({
445
+ type: 'Sleep',
446
+ param: {
447
+ timeMs: timeRemaining
448
+ },
449
+ locate: null
450
+ });
451
+ }
452
+ if (0 === finalActions.length) (0, utils_namespaceObject.assert)(!more_actions_needed_by_instruction || sleep, error ? `Failed to plan: ${error}` : 'No plan found');
453
+ return {
454
+ output: {
455
+ actions: finalActions,
456
+ more_actions_needed_by_instruction,
457
+ log,
458
+ yamlFlow: planResult.yamlFlow
459
+ },
460
+ cache: {
461
+ hit: false
462
+ },
463
+ uiContext
464
+ };
465
+ }
466
+ };
467
+ return task;
468
+ }
469
+ async runPlans(title, plans, modelConfig) {
470
+ const taskExecutor = new action_executor_js_namespaceObject.Executor(title, {
471
+ onTaskStart: this.onTaskStartCallback
472
+ });
473
+ const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);
474
+ await taskExecutor.append(tasks);
475
+ const result = await taskExecutor.flush();
476
+ const { output } = result;
477
+ return {
478
+ output,
479
+ executor: taskExecutor
480
+ };
481
+ }
482
+ getReplanningCycleLimit(isVlmUiTars) {
483
+ return this.replanningCycleLimit || env_namespaceObject.globalConfigManager.getEnvConfigInNumber(env_namespaceObject.RPASCENE_REPLANNING_CYCLE_LIMIT) || (isVlmUiTars ? defaultVlmUiTarsReplanningCycleLimit : defaultReplanningCycleLimit);
484
+ }
485
+ async action(userPrompt, modelConfig, actionContext, cacheable) {
486
+ this.conversationHistory.reset();
487
+ const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('Action', userPrompt), {
488
+ onTaskStart: this.onTaskStartCallback
489
+ });
490
+ let replanCount = 0;
491
+ const yamlFlow = [];
492
+ const replanningCycleLimit = this.getReplanningCycleLimit('vlm-ui-tars' === modelConfig.vlMode);
493
+ while(true){
494
+ if (replanCount > replanningCycleLimit) {
495
+ const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;
496
+ return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);
497
+ }
498
+ const planningTask = this.createPlanningTask(userPrompt, actionContext, modelConfig);
499
+ await taskExecutor.append(planningTask);
500
+ const result = await taskExecutor.flush();
501
+ console.dir(result.output.actions, 'result');
502
+ const planResult = null == result ? void 0 : result.output;
503
+ if (taskExecutor.isInErrorState()) return {
504
+ output: planResult,
505
+ executor: taskExecutor
506
+ };
507
+ const plans = planResult.actions || [];
508
+ yamlFlow.push(...planResult.yamlFlow || []);
509
+ let executables;
510
+ try {
511
+ executables = await this.convertPlanToExecutable(plans, modelConfig, cacheable);
512
+ taskExecutor.append(executables.tasks);
513
+ } catch (error) {
514
+ return this.appendErrorPlan(taskExecutor, `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`, modelConfig);
515
+ }
516
+ await taskExecutor.flush();
517
+ if (taskExecutor.isInErrorState()) return {
518
+ output: void 0,
519
+ executor: taskExecutor
520
+ };
521
+ if (!planResult.more_actions_needed_by_instruction) break;
522
+ replanCount++;
523
+ }
524
+ return {
525
+ output: {
526
+ yamlFlow
527
+ },
528
+ executor: taskExecutor
529
+ };
530
+ }
531
+ createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt) {
532
+ const queryTask = {
533
+ type: 'Insight',
534
+ subType: type,
535
+ locate: null,
536
+ param: {
537
+ dataDemand: multimodalPrompt ? {
538
+ demand,
539
+ multimodalPrompt
540
+ } : demand
541
+ },
542
+ executor: async (param, taskContext)=>{
543
+ const { task } = taskContext;
544
+ let insightDump;
545
+ const dumpCollector = (dump)=>{
546
+ insightDump = dump;
547
+ };
548
+ this.insight.onceDumpUpdatedFn = dumpCollector;
549
+ const shotTime = Date.now();
550
+ const uiContext = await this.insight.contextRetrieverFn('extract');
551
+ task.uiContext = uiContext;
552
+ const recordItem = {
553
+ type: 'screenshot',
554
+ ts: shotTime,
555
+ screenshot: uiContext.screenshotBase64,
556
+ timing: 'before Extract'
557
+ };
558
+ task.recorder = [
559
+ recordItem
560
+ ];
561
+ const ifTypeRestricted = 'Query' !== type;
562
+ let demandInput = demand;
563
+ let keyOfResult = 'result';
564
+ if (ifTypeRestricted && ('Assert' === type || 'WaitFor' === type)) {
565
+ keyOfResult = 'StatementIsTruthy';
566
+ const booleanPrompt = 'Assert' === type ? `Boolean, whether the following statement is true: ${demand}` : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;
567
+ demandInput = {
568
+ [keyOfResult]: booleanPrompt
569
+ };
570
+ } else if (ifTypeRestricted) demandInput = {
571
+ [keyOfResult]: `${type}, ${demand}`
572
+ };
573
+ const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
574
+ let outputResult = data;
575
+ if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
576
+ else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
577
+ else if (null == data) outputResult = null;
578
+ else {
579
+ (0, utils_namespaceObject.assert)((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
580
+ outputResult = data[keyOfResult];
581
+ }
582
+ if ('Assert' === type && !outputResult) {
583
+ task.usage = usage;
584
+ task.thought = thought;
585
+ throw new Error(`Assertion failed: ${thought}`);
586
+ }
587
+ return {
588
+ output: outputResult,
589
+ log: insightDump,
590
+ usage,
591
+ thought
592
+ };
593
+ }
594
+ };
595
+ return queryTask;
596
+ }
597
+ async createTypeQueryExecution(type, demand, modelConfig, opt, multimodalPrompt) {
598
+ const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)(type, 'string' == typeof demand ? demand : JSON.stringify(demand)), {
599
+ onTaskStart: this.onTaskStartCallback
600
+ });
601
+ const queryTask = await this.createTypeQueryTask(type, demand, modelConfig, opt, multimodalPrompt);
602
+ await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
603
+ const result = await taskExecutor.flush();
604
+ if (!result) throw new Error('result of taskExecutor.flush() is undefined in function createTypeQueryTask');
605
+ const { output, thought } = result;
606
+ return {
607
+ output,
608
+ thought,
609
+ executor: taskExecutor
610
+ };
611
+ }
612
+ async appendErrorPlan(taskExecutor, errorMsg, modelConfig) {
613
+ const errorPlan = {
614
+ type: 'Error',
615
+ param: {
616
+ thought: errorMsg
617
+ },
618
+ locate: null
619
+ };
620
+ const { tasks } = await this.convertPlanToExecutable([
621
+ errorPlan
622
+ ], modelConfig);
623
+ await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
624
+ await taskExecutor.flush();
625
+ return {
626
+ output: void 0,
627
+ executor: taskExecutor
628
+ };
629
+ }
630
+ async taskForSleep(timeMs, modelConfig) {
631
+ const sleepPlan = {
632
+ type: 'Sleep',
633
+ param: {
634
+ timeMs
635
+ },
636
+ locate: null
637
+ };
638
+ const { tasks: sleepTasks } = await this.convertPlanToExecutable([
639
+ sleepPlan
640
+ ], modelConfig);
641
+ return this.prependExecutorWithScreenshot(sleepTasks[0]);
642
+ }
643
+ async waitFor(assertion, opt, modelConfig) {
644
+ const { textPrompt, multimodalPrompt } = (0, external_utils_js_namespaceObject_1.parsePrompt)(assertion);
645
+ const description = `waitFor: ${textPrompt}`;
646
+ const taskExecutor = new action_executor_js_namespaceObject.Executor((0, external_ui_utils_js_namespaceObject.taskTitleStr)('WaitFor', description), {
647
+ onTaskStart: this.onTaskStartCallback
648
+ });
649
+ const { timeoutMs, checkIntervalMs } = opt;
650
+ (0, utils_namespaceObject.assert)(assertion, 'No assertion for waitFor');
651
+ (0, utils_namespaceObject.assert)(timeoutMs, 'No timeoutMs for waitFor');
652
+ (0, utils_namespaceObject.assert)(checkIntervalMs, 'No checkIntervalMs for waitFor');
653
+ (0, utils_namespaceObject.assert)(checkIntervalMs <= timeoutMs, `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`);
654
+ const overallStartTime = Date.now();
655
+ let startTime = Date.now();
656
+ let errorThought = '';
657
+ while(Date.now() - overallStartTime < timeoutMs){
658
+ startTime = Date.now();
659
+ const queryTask = await this.createTypeQueryTask('WaitFor', textPrompt, modelConfig, {
660
+ doNotThrowError: true
661
+ }, multimodalPrompt);
662
+ await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
663
+ const result = await taskExecutor.flush();
664
+ if (null == result ? void 0 : result.output) return {
665
+ output: void 0,
666
+ executor: taskExecutor
667
+ };
668
+ errorThought = (null == result ? void 0 : result.thought) || !result && `No result from assertion: ${textPrompt}` || `unknown error when waiting for assertion: ${textPrompt}`;
669
+ const now = Date.now();
670
+ if (now - startTime < checkIntervalMs) {
671
+ const timeRemaining = checkIntervalMs - (now - startTime);
672
+ const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);
673
+ await taskExecutor.append(sleepTask);
674
+ }
675
+ }
676
+ return this.appendErrorPlan(taskExecutor, `waitFor timeout: ${errorThought}`, modelConfig);
677
+ }
678
+ constructor(interfaceInstance, insight, opts){
679
+ _define_property(this, "interface", void 0);
680
+ _define_property(this, "insight", void 0);
681
+ _define_property(this, "taskCache", void 0);
682
+ _define_property(this, "conversationHistory", void 0);
683
+ _define_property(this, "onTaskStartCallback", void 0);
684
+ _define_property(this, "replanningCycleLimit", void 0);
685
+ this.interface = interfaceInstance;
686
+ this.insight = insight;
687
+ this.taskCache = opts.taskCache;
688
+ this.onTaskStartCallback = null == opts ? void 0 : opts.onTaskStart;
689
+ this.replanningCycleLimit = opts.replanningCycleLimit;
690
+ this.conversationHistory = new index_js_namespaceObject.ConversationHistory();
691
+ }
692
+ }
693
+ exports.TaskExecutor = __webpack_exports__.TaskExecutor;
694
+ exports.locatePlanForLocate = __webpack_exports__.locatePlanForLocate;
695
+ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
696
+ "TaskExecutor",
697
+ "locatePlanForLocate"
698
+ ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
699
+ Object.defineProperty(exports, '__esModule', {
700
+ value: true
701
+ });
702
+
703
+ //# sourceMappingURL=tasks.js.map