@rpascene/core 0.30.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +9 -0
  3. package/dist/es/agent/agent.mjs +636 -0
  4. package/dist/es/agent/agent.mjs.map +1 -0
  5. package/dist/es/agent/common.mjs +0 -0
  6. package/dist/es/agent/index.mjs +6 -0
  7. package/dist/es/agent/task-cache.mjs +184 -0
  8. package/dist/es/agent/task-cache.mjs.map +1 -0
  9. package/dist/es/agent/tasks.mjs +666 -0
  10. package/dist/es/agent/tasks.mjs.map +1 -0
  11. package/dist/es/agent/ui-utils.mjs +72 -0
  12. package/dist/es/agent/ui-utils.mjs.map +1 -0
  13. package/dist/es/agent/utils.mjs +162 -0
  14. package/dist/es/agent/utils.mjs.map +1 -0
  15. package/dist/es/ai-model/action-executor.mjs +129 -0
  16. package/dist/es/ai-model/action-executor.mjs.map +1 -0
  17. package/dist/es/ai-model/common.mjs +355 -0
  18. package/dist/es/ai-model/common.mjs.map +1 -0
  19. package/dist/es/ai-model/conversation-history.mjs +58 -0
  20. package/dist/es/ai-model/conversation-history.mjs.map +1 -0
  21. package/dist/es/ai-model/index.mjs +11 -0
  22. package/dist/es/ai-model/inspect.mjs +286 -0
  23. package/dist/es/ai-model/inspect.mjs.map +1 -0
  24. package/dist/es/ai-model/llm-planning.mjs +140 -0
  25. package/dist/es/ai-model/llm-planning.mjs.map +1 -0
  26. package/dist/es/ai-model/prompt/assertion.mjs +31 -0
  27. package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
  28. package/dist/es/ai-model/prompt/common.mjs +7 -0
  29. package/dist/es/ai-model/prompt/common.mjs.map +1 -0
  30. package/dist/es/ai-model/prompt/describe.mjs +44 -0
  31. package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
  32. package/dist/es/ai-model/prompt/extraction.mjs +140 -0
  33. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
  34. package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
  35. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
  36. package/dist/es/ai-model/prompt/llm-planning.mjs +367 -0
  37. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
  38. package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
  39. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
  40. package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
  41. package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
  42. package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
  43. package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
  44. package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
  45. package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
  46. package/dist/es/ai-model/prompt/util.mjs +124 -0
  47. package/dist/es/ai-model/prompt/util.mjs.map +1 -0
  48. package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
  49. package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
  50. package/dist/es/ai-model/service-caller/index.mjs +537 -0
  51. package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
  52. package/dist/es/ai-model/ui-tars-planning.mjs +201 -0
  53. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
  54. package/dist/es/device/index.mjs +152 -0
  55. package/dist/es/device/index.mjs.map +1 -0
  56. package/dist/es/image/index.mjs +2 -0
  57. package/dist/es/index.mjs +11 -0
  58. package/dist/es/index.mjs.map +1 -0
  59. package/dist/es/insight/index.mjs +233 -0
  60. package/dist/es/insight/index.mjs.map +1 -0
  61. package/dist/es/insight/utils.mjs +15 -0
  62. package/dist/es/insight/utils.mjs.map +1 -0
  63. package/dist/es/report.mjs +88 -0
  64. package/dist/es/report.mjs.map +1 -0
  65. package/dist/es/tree.mjs +2 -0
  66. package/dist/es/types.mjs +11 -0
  67. package/dist/es/types.mjs.map +1 -0
  68. package/dist/es/utils.mjs +204 -0
  69. package/dist/es/utils.mjs.map +1 -0
  70. package/dist/es/yaml/builder.mjs +13 -0
  71. package/dist/es/yaml/builder.mjs.map +1 -0
  72. package/dist/es/yaml/index.mjs +3 -0
  73. package/dist/es/yaml/player.mjs +372 -0
  74. package/dist/es/yaml/player.mjs.map +1 -0
  75. package/dist/es/yaml/utils.mjs +73 -0
  76. package/dist/es/yaml/utils.mjs.map +1 -0
  77. package/dist/es/yaml.mjs +0 -0
  78. package/dist/lib/agent/agent.js +683 -0
  79. package/dist/lib/agent/agent.js.map +1 -0
  80. package/dist/lib/agent/common.js +5 -0
  81. package/dist/lib/agent/index.js +81 -0
  82. package/dist/lib/agent/index.js.map +1 -0
  83. package/dist/lib/agent/task-cache.js +236 -0
  84. package/dist/lib/agent/task-cache.js.map +1 -0
  85. package/dist/lib/agent/tasks.js +703 -0
  86. package/dist/lib/agent/tasks.js.map +1 -0
  87. package/dist/lib/agent/ui-utils.js +121 -0
  88. package/dist/lib/agent/ui-utils.js.map +1 -0
  89. package/dist/lib/agent/utils.js +233 -0
  90. package/dist/lib/agent/utils.js.map +1 -0
  91. package/dist/lib/ai-model/action-executor.js +163 -0
  92. package/dist/lib/ai-model/action-executor.js.map +1 -0
  93. package/dist/lib/ai-model/common.js +461 -0
  94. package/dist/lib/ai-model/common.js.map +1 -0
  95. package/dist/lib/ai-model/conversation-history.js +92 -0
  96. package/dist/lib/ai-model/conversation-history.js.map +1 -0
  97. package/dist/lib/ai-model/index.js +131 -0
  98. package/dist/lib/ai-model/index.js.map +1 -0
  99. package/dist/lib/ai-model/inspect.js +326 -0
  100. package/dist/lib/ai-model/inspect.js.map +1 -0
  101. package/dist/lib/ai-model/llm-planning.js +174 -0
  102. package/dist/lib/ai-model/llm-planning.js.map +1 -0
  103. package/dist/lib/ai-model/prompt/assertion.js +65 -0
  104. package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
  105. package/dist/lib/ai-model/prompt/common.js +41 -0
  106. package/dist/lib/ai-model/prompt/common.js.map +1 -0
  107. package/dist/lib/ai-model/prompt/describe.js +78 -0
  108. package/dist/lib/ai-model/prompt/describe.js.map +1 -0
  109. package/dist/lib/ai-model/prompt/extraction.js +180 -0
  110. package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
  111. package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
  112. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
  113. package/dist/lib/ai-model/prompt/llm-planning.js +407 -0
  114. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
  115. package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
  116. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
  117. package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
  118. package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
  119. package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
  120. package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
  121. package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
  122. package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
  123. package/dist/lib/ai-model/prompt/util.js +176 -0
  124. package/dist/lib/ai-model/prompt/util.js.map +1 -0
  125. package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
  126. package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
  127. package/dist/lib/ai-model/service-caller/index.js +623 -0
  128. package/dist/lib/ai-model/service-caller/index.js.map +1 -0
  129. package/dist/lib/ai-model/ui-tars-planning.js +238 -0
  130. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
  131. package/dist/lib/device/index.js +255 -0
  132. package/dist/lib/device/index.js.map +1 -0
  133. package/dist/lib/image/index.js +56 -0
  134. package/dist/lib/image/index.js.map +1 -0
  135. package/dist/lib/index.js +103 -0
  136. package/dist/lib/index.js.map +1 -0
  137. package/dist/lib/insight/index.js +267 -0
  138. package/dist/lib/insight/index.js.map +1 -0
  139. package/dist/lib/insight/utils.js +49 -0
  140. package/dist/lib/insight/utils.js.map +1 -0
  141. package/dist/lib/report.js +122 -0
  142. package/dist/lib/report.js.map +1 -0
  143. package/dist/lib/tree.js +44 -0
  144. package/dist/lib/tree.js.map +1 -0
  145. package/dist/lib/types.js +82 -0
  146. package/dist/lib/types.js.map +1 -0
  147. package/dist/lib/utils.js +283 -0
  148. package/dist/lib/utils.js.map +1 -0
  149. package/dist/lib/yaml/builder.js +57 -0
  150. package/dist/lib/yaml/builder.js.map +1 -0
  151. package/dist/lib/yaml/index.js +80 -0
  152. package/dist/lib/yaml/index.js.map +1 -0
  153. package/dist/lib/yaml/player.js +406 -0
  154. package/dist/lib/yaml/player.js.map +1 -0
  155. package/dist/lib/yaml/utils.js +126 -0
  156. package/dist/lib/yaml/utils.js.map +1 -0
  157. package/dist/lib/yaml.js +20 -0
  158. package/dist/lib/yaml.js.map +1 -0
  159. package/dist/types/agent/agent.d.ts +156 -0
  160. package/dist/types/agent/common.d.ts +0 -0
  161. package/dist/types/agent/index.d.ts +9 -0
  162. package/dist/types/agent/task-cache.d.ts +48 -0
  163. package/dist/types/agent/tasks.d.ts +48 -0
  164. package/dist/types/agent/ui-utils.d.ts +7 -0
  165. package/dist/types/agent/utils.d.ts +52 -0
  166. package/dist/types/ai-model/action-executor.d.ts +19 -0
  167. package/dist/types/ai-model/common.d.ts +569 -0
  168. package/dist/types/ai-model/conversation-history.d.ts +18 -0
  169. package/dist/types/ai-model/index.d.ts +13 -0
  170. package/dist/types/ai-model/inspect.d.ts +46 -0
  171. package/dist/types/ai-model/llm-planning.d.ts +11 -0
  172. package/dist/types/ai-model/prompt/assertion.d.ts +2 -0
  173. package/dist/types/ai-model/prompt/common.d.ts +2 -0
  174. package/dist/types/ai-model/prompt/describe.d.ts +1 -0
  175. package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
  176. package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
  177. package/dist/types/ai-model/prompt/llm-planning.d.ts +9 -0
  178. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
  179. package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
  180. package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
  181. package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
  182. package/dist/types/ai-model/prompt/util.d.ts +47 -0
  183. package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
  184. package/dist/types/ai-model/service-caller/index.d.ts +48 -0
  185. package/dist/types/ai-model/ui-tars-planning.d.ts +59 -0
  186. package/dist/types/device/index.d.ts +2158 -0
  187. package/dist/types/image/index.d.ts +1 -0
  188. package/dist/types/index.d.ts +12 -0
  189. package/dist/types/insight/index.d.ts +31 -0
  190. package/dist/types/insight/utils.d.ts +2 -0
  191. package/dist/types/report.d.ts +12 -0
  192. package/dist/types/tree.d.ts +1 -0
  193. package/dist/types/types.d.ts +414 -0
  194. package/dist/types/utils.d.ts +40 -0
  195. package/dist/types/yaml/builder.d.ts +2 -0
  196. package/dist/types/yaml/index.d.ts +3 -0
  197. package/dist/types/yaml/player.d.ts +34 -0
  198. package/dist/types/yaml/utils.d.ts +9 -0
  199. package/dist/types/yaml.d.ts +178 -0
  200. package/package.json +108 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-present Bytedance, Inc. and its affiliates.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,9 @@
1
+ ## Documentation
2
+
3
+ Automate browser actions, extract data, and perform assertions using AI. It offers JavaScript SDK, Chrome extension, and support for scripting in YAML.
4
+
5
+ See https://rpascenejs.com/ for details.
6
+
7
+ ## License
8
+
9
+ Rpascene is MIT licensed.
@@ -0,0 +1,636 @@
1
+ import { Insight } from "../index.mjs";
2
+ import js_yaml from "js-yaml";
3
+ import { getVersion, groupedActionDumpFileExt, processCacheConfig, reportHTMLContent, stringifyDumpData, writeLogFile } from "../utils.mjs";
4
+ import { ScriptPlayer, buildDetailedLocateParam, parseYamlScript } from "../yaml/index.mjs";
5
+ import { ModelConfigManager, globalModelConfigManager } from "@rpascene/shared/env";
6
+ import { imageInfoOfBase64, resizeImgBase64 } from "@rpascene/shared/img";
7
+ import { getDebug } from "@rpascene/shared/logger";
8
+ import { assert } from "@rpascene/shared/utils";
9
+ import { TaskCache } from "./task-cache.mjs";
10
+ import { TaskExecutor, locatePlanForLocate } from "./tasks.mjs";
11
+ import { locateParamStr, paramStr, taskTitleStr, typeStr } from "./ui-utils.mjs";
12
+ import { commonContextParser, getReportFileName, parsePrompt, printReportMsg, trimContextByViewport } from "./utils.mjs";
13
+ function _define_property(obj, key, value) {
14
+ if (key in obj) Object.defineProperty(obj, key, {
15
+ value: value,
16
+ enumerable: true,
17
+ configurable: true,
18
+ writable: true
19
+ });
20
+ else obj[key] = value;
21
+ return obj;
22
+ }
23
+ const debug = getDebug('agent');
24
+ const distanceOfTwoPoints = (p1, p2)=>{
25
+ const [x1, y1] = p1;
26
+ const [x2, y2] = p2;
27
+ return Math.round(Math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2));
28
+ };
29
+ const includedInRect = (point, rect)=>{
30
+ const [x, y] = point;
31
+ const { left, top, width, height } = rect;
32
+ return x >= left && x <= left + width && y >= top && y <= top + height;
33
+ };
34
+ const defaultInsightExtractOption = {
35
+ domIncluded: false,
36
+ screenshotIncluded: true
37
+ };
38
+ const CACHE_STRATEGIES = [
39
+ 'read-only',
40
+ 'read-write',
41
+ 'write-only'
42
+ ];
43
+ const isValidCacheStrategy = (strategy)=>CACHE_STRATEGIES.some((value)=>value === strategy);
44
+ const CACHE_STRATEGY_VALUES = CACHE_STRATEGIES.map((value)=>`"${value}"`).join(', ');
45
+ class Agent {
46
+ get page() {
47
+ return this.interface;
48
+ }
49
+ ensureVLModelWarning() {
50
+ if (!this.hasWarnedNonVLModel && 'puppeteer' !== this.interface.interfaceType && 'playwright' !== this.interface.interfaceType && 'static' !== this.interface.interfaceType && 'chrome-extension-proxy' !== this.interface.interfaceType && 'page-over-chrome-extension-bridge' !== this.interface.interfaceType) {
51
+ this.modelConfigManager.throwErrorIfNonVLModel();
52
+ this.hasWarnedNonVLModel = true;
53
+ }
54
+ }
55
+ async getScreenshotScale(context) {
56
+ if (void 0 !== this.screenshotScale) return this.screenshotScale;
57
+ if (!this.screenshotScalePromise) this.screenshotScalePromise = (async ()=>{
58
+ var _context_size;
59
+ const pageWidth = null == (_context_size = context.size) ? void 0 : _context_size.width;
60
+ assert(pageWidth && pageWidth > 0, `Invalid page width when computing screenshot scale: ${pageWidth}`);
61
+ const { width: screenshotWidth } = await imageInfoOfBase64(context.screenshotBase64);
62
+ assert(Number.isFinite(screenshotWidth) && screenshotWidth > 0, `Invalid screenshot width when computing screenshot scale: ${screenshotWidth}`);
63
+ const computedScale = screenshotWidth / pageWidth;
64
+ assert(Number.isFinite(computedScale) && computedScale > 0, `Invalid computed screenshot scale: ${computedScale}`);
65
+ debug(`Computed screenshot scale ${computedScale} from screenshot width ${screenshotWidth} and page width ${pageWidth}`);
66
+ return computedScale;
67
+ })();
68
+ try {
69
+ this.screenshotScale = await this.screenshotScalePromise;
70
+ return this.screenshotScale;
71
+ } finally{
72
+ this.screenshotScalePromise = void 0;
73
+ }
74
+ }
75
+ async getActionSpace() {
76
+ return this.interface.actionSpace();
77
+ }
78
+ async getUIContext(action) {
79
+ this.ensureVLModelWarning();
80
+ if (this.frozenUIContext) {
81
+ debug('Using frozen page context for action:', action);
82
+ return this.frozenUIContext;
83
+ }
84
+ let context;
85
+ if (this.interface.getContext) {
86
+ debug('Using page.getContext for action:', action);
87
+ context = await this.interface.getContext();
88
+ } else {
89
+ debug('Using commonContextParser for action:', action);
90
+ context = await commonContextParser(this.interface, {
91
+ uploadServerUrl: this.modelConfigManager.getUploadTestServerUrl()
92
+ });
93
+ }
94
+ const computedScreenshotScale = await this.getScreenshotScale(context);
95
+ if (1 !== computedScreenshotScale) {
96
+ const scaleForLog = Number.parseFloat(computedScreenshotScale.toFixed(4));
97
+ debug(`Applying computed screenshot scale: ${scaleForLog} (resize to logical size)`);
98
+ const targetWidth = Math.round(context.size.width);
99
+ const targetHeight = Math.round(context.size.height);
100
+ debug(`Resizing screenshot to ${targetWidth}x${targetHeight}`);
101
+ context.screenshotBase64 = await resizeImgBase64(context.screenshotBase64, {
102
+ width: targetWidth,
103
+ height: targetHeight
104
+ });
105
+ } else debug(`screenshot scale=${computedScreenshotScale}`);
106
+ return context;
107
+ }
108
+ async _snapshotContext() {
109
+ return await this.getUIContext('locate');
110
+ }
111
+ async setAIActionContext(prompt) {
112
+ if (this.opts.aiActionContext) console.warn('aiActionContext is already set, and it is called again, will override the previous setting');
113
+ this.opts.aiActionContext = prompt;
114
+ }
115
+ resetDump() {
116
+ this.dump = {
117
+ sdkVersion: getVersion(),
118
+ groupName: this.opts.groupName,
119
+ groupDescription: this.opts.groupDescription,
120
+ executions: [],
121
+ modelBriefs: []
122
+ };
123
+ return this.dump;
124
+ }
125
+ appendExecutionDump(execution) {
126
+ const trimmedExecution = trimContextByViewport(execution);
127
+ const currentDump = this.dump;
128
+ currentDump.executions.push(trimmedExecution);
129
+ }
130
+ dumpDataString() {
131
+ this.dump.groupName = this.opts.groupName;
132
+ this.dump.groupDescription = this.opts.groupDescription;
133
+ return stringifyDumpData(this.dump);
134
+ }
135
+ reportHTMLString() {
136
+ return reportHTMLContent(this.dumpDataString());
137
+ }
138
+ writeOutActionDumps() {
139
+ if (this.destroyed) throw new Error('PageAgent has been destroyed. Cannot update report file.');
140
+ const { generateReport, autoPrintReportMsg } = this.opts;
141
+ this.reportFile = writeLogFile({
142
+ fileName: this.reportFileName,
143
+ fileExt: groupedActionDumpFileExt,
144
+ fileContent: this.dumpDataString(),
145
+ type: 'dump',
146
+ generateReport
147
+ });
148
+ debug('writeOutActionDumps', this.reportFile);
149
+ if (generateReport && autoPrintReportMsg && this.reportFile) printReportMsg(this.reportFile);
150
+ }
151
+ async callbackOnTaskStartTip(task) {
152
+ const param = paramStr(task);
153
+ const tip = param ? `${typeStr(task)} - ${param}` : typeStr(task);
154
+ if (this.onTaskStartTip) await this.onTaskStartTip(tip);
155
+ }
156
+ async afterTaskRunning(executor, doNotThrowError = false) {
157
+ const executionDump = executor.dump();
158
+ if (this.opts.aiActionContext) executionDump.aiActionContext = this.opts.aiActionContext;
159
+ this.appendExecutionDump(executionDump);
160
+ try {
161
+ if (this.onDumpUpdate) this.onDumpUpdate(this.dumpDataString());
162
+ } catch (error) {
163
+ console.error('Error in onDumpUpdate', error);
164
+ }
165
+ this.writeOutActionDumps();
166
+ if (executor.isInErrorState() && !doNotThrowError) {
167
+ const errorTask = executor.latestErrorTask();
168
+ throw new Error(`${null == errorTask ? void 0 : errorTask.errorMessage}\n${null == errorTask ? void 0 : errorTask.errorStack}`, {
169
+ cause: null == errorTask ? void 0 : errorTask.error
170
+ });
171
+ }
172
+ }
173
+ async callActionInActionSpace(type, opt) {
174
+ debug('callActionInActionSpace', type, ',', opt);
175
+ const actionPlan = {
176
+ type: type,
177
+ param: opt || {},
178
+ thought: ''
179
+ };
180
+ debug('actionPlan', actionPlan);
181
+ const plans = [
182
+ actionPlan
183
+ ].filter(Boolean);
184
+ const title = taskTitleStr(type, locateParamStr((null == opt ? void 0 : opt.locate) || {}));
185
+ const modelConfig = this.modelConfigManager.getModelConfig('grounding');
186
+ const { output, executor } = await this.taskExecutor.runPlans(title, plans, modelConfig);
187
+ await this.afterTaskRunning(executor);
188
+ return output;
189
+ }
190
+ async aiTap(locatePrompt, opt) {
191
+ assert(locatePrompt, 'missing locate prompt for tap');
192
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
193
+ return this.callActionInActionSpace('Tap', {
194
+ locate: detailedLocateParam
195
+ });
196
+ }
197
+ async aiRightClick(locatePrompt, opt) {
198
+ assert(locatePrompt, 'missing locate prompt for right click');
199
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
200
+ return this.callActionInActionSpace('RightClick', {
201
+ locate: detailedLocateParam
202
+ });
203
+ }
204
+ async aiDoubleClick(locatePrompt, opt) {
205
+ assert(locatePrompt, 'missing locate prompt for double click');
206
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
207
+ return this.callActionInActionSpace('DoubleClick', {
208
+ locate: detailedLocateParam
209
+ });
210
+ }
211
+ async aiHover(locatePrompt, opt) {
212
+ assert(locatePrompt, 'missing locate prompt for hover');
213
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
214
+ return this.callActionInActionSpace('Hover', {
215
+ locate: detailedLocateParam
216
+ });
217
+ }
218
+ async aiInput(locatePromptOrValue, locatePromptOrOpt, optOrUndefined) {
219
+ let value;
220
+ let locatePrompt;
221
+ let opt;
222
+ if ('object' == typeof locatePromptOrOpt && null !== locatePromptOrOpt && 'value' in locatePromptOrOpt) {
223
+ locatePrompt = locatePromptOrValue;
224
+ const optWithValue = locatePromptOrOpt;
225
+ value = optWithValue.value;
226
+ opt = optWithValue;
227
+ } else {
228
+ value = locatePromptOrValue;
229
+ locatePrompt = locatePromptOrOpt;
230
+ opt = {
231
+ ...optOrUndefined,
232
+ value
233
+ };
234
+ }
235
+ assert('string' == typeof value || 'number' == typeof value, 'input value must be a string or number, use empty string if you want to clear the input');
236
+ assert(locatePrompt, 'missing locate prompt for input');
237
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt, opt);
238
+ return this.callActionInActionSpace('Input', {
239
+ ...opt || {},
240
+ locate: detailedLocateParam
241
+ });
242
+ }
243
+ async aiKeyboardPress(locatePromptOrKeyName, locatePromptOrOpt, optOrUndefined) {
244
+ let keyName;
245
+ let locatePrompt;
246
+ let opt;
247
+ if ('object' == typeof locatePromptOrOpt && null !== locatePromptOrOpt && 'keyName' in locatePromptOrOpt) {
248
+ locatePrompt = locatePromptOrKeyName;
249
+ opt = locatePromptOrOpt;
250
+ } else {
251
+ keyName = locatePromptOrKeyName;
252
+ locatePrompt = locatePromptOrOpt;
253
+ opt = {
254
+ ...optOrUndefined || {},
255
+ keyName
256
+ };
257
+ }
258
+ assert(null == opt ? void 0 : opt.keyName, 'missing keyName for keyboard press');
259
+ const detailedLocateParam = locatePrompt ? buildDetailedLocateParam(locatePrompt, opt) : void 0;
260
+ return this.callActionInActionSpace('KeyboardPress', {
261
+ ...opt || {},
262
+ locate: detailedLocateParam
263
+ });
264
+ }
265
+ async aiScroll(locatePromptOrScrollParam, locatePromptOrOpt, optOrUndefined) {
266
+ let scrollParam;
267
+ let locatePrompt;
268
+ let opt;
269
+ if ('object' == typeof locatePromptOrOpt && ('direction' in locatePromptOrOpt || 'scrollType' in locatePromptOrOpt || 'distance' in locatePromptOrOpt)) {
270
+ locatePrompt = locatePromptOrScrollParam;
271
+ opt = locatePromptOrOpt;
272
+ } else {
273
+ scrollParam = locatePromptOrScrollParam;
274
+ locatePrompt = locatePromptOrOpt;
275
+ opt = {
276
+ ...optOrUndefined || {},
277
+ ...scrollParam || {}
278
+ };
279
+ }
280
+ const detailedLocateParam = buildDetailedLocateParam(locatePrompt || '', opt);
281
+ return this.callActionInActionSpace('Scroll', {
282
+ ...opt || {},
283
+ locate: detailedLocateParam
284
+ });
285
+ }
286
+ async aiAction(taskPrompt, opt) {
287
+ var _this_taskCache, _this_taskCache1;
288
+ const modelConfig = this.modelConfigManager.getModelConfig('planning');
289
+ const cacheable = null == opt ? void 0 : opt.cacheable;
290
+ const isVlmUiTars = 'vlm-ui-tars' === modelConfig.vlMode;
291
+ const matchedCache = isVlmUiTars || false === cacheable ? void 0 : null == (_this_taskCache = this.taskCache) ? void 0 : _this_taskCache.matchPlanCache(taskPrompt);
292
+ if (matchedCache && (null == (_this_taskCache1 = this.taskCache) ? void 0 : _this_taskCache1.isCacheResultUsed)) {
293
+ var _matchedCache_cacheContent, _matchedCache_cacheContent1;
294
+ const { executor } = await this.taskExecutor.loadYamlFlowAsPlanning(taskPrompt, null == (_matchedCache_cacheContent = matchedCache.cacheContent) ? void 0 : _matchedCache_cacheContent.yamlWorkflow);
295
+ await this.afterTaskRunning(executor);
296
+ debug('matched cache, will call .runYaml to run the action');
297
+ const yaml = null == (_matchedCache_cacheContent1 = matchedCache.cacheContent) ? void 0 : _matchedCache_cacheContent1.yamlWorkflow;
298
+ return this.runYaml(yaml);
299
+ }
300
+ const { output, executor } = await this.taskExecutor.action(taskPrompt, modelConfig, this.opts.aiActionContext, cacheable);
301
+ console.log(output, 'output');
302
+ if (this.taskCache && (null == output ? void 0 : output.yamlFlow) && false !== cacheable) {
303
+ const yamlContent = {
304
+ tasks: [
305
+ {
306
+ name: taskPrompt,
307
+ flow: output.yamlFlow
308
+ }
309
+ ]
310
+ };
311
+ const yamlFlowStr = js_yaml.dump(yamlContent);
312
+ this.taskCache.updateOrAppendCacheRecord({
313
+ type: 'plan',
314
+ prompt: taskPrompt,
315
+ yamlWorkflow: yamlFlowStr
316
+ }, matchedCache);
317
+ }
318
+ await this.afterTaskRunning(executor);
319
+ return output;
320
+ }
321
+ async aiQuery(demand, opt = defaultInsightExtractOption) {
322
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
323
+ const { output, executor } = await this.taskExecutor.createTypeQueryExecution('Query', demand, modelConfig, opt);
324
+ await this.afterTaskRunning(executor);
325
+ return output;
326
+ }
327
+ async aiBoolean(prompt, opt = defaultInsightExtractOption) {
328
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
329
+ const { textPrompt, multimodalPrompt } = parsePrompt(prompt);
330
+ const { output, executor } = await this.taskExecutor.createTypeQueryExecution('Boolean', textPrompt, modelConfig, opt, multimodalPrompt);
331
+ await this.afterTaskRunning(executor);
332
+ return output;
333
+ }
334
+ async aiNumber(prompt, opt = defaultInsightExtractOption) {
335
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
336
+ const { textPrompt, multimodalPrompt } = parsePrompt(prompt);
337
+ const { output, executor } = await this.taskExecutor.createTypeQueryExecution('Number', textPrompt, modelConfig, opt, multimodalPrompt);
338
+ await this.afterTaskRunning(executor);
339
+ return output;
340
+ }
341
+ async aiString(prompt, opt = defaultInsightExtractOption) {
342
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
343
+ const { textPrompt, multimodalPrompt } = parsePrompt(prompt);
344
+ const { output, executor } = await this.taskExecutor.createTypeQueryExecution('String', textPrompt, modelConfig, opt, multimodalPrompt);
345
+ await this.afterTaskRunning(executor);
346
+ return output;
347
+ }
348
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
349
+ return this.aiString(prompt, opt);
350
+ }
351
+ async describeElementAtPoint(center, opt) {
352
+ const { verifyPrompt = true, retryLimit = 3 } = opt || {};
353
+ let success = false;
354
+ let retryCount = 0;
355
+ let resultPrompt = '';
356
+ let deepThink = (null == opt ? void 0 : opt.deepThink) || false;
357
+ let verifyResult;
358
+ while(!success && retryCount < retryLimit){
359
+ if (retryCount >= 2) deepThink = true;
360
+ debug('aiDescribe', center, 'verifyPrompt', verifyPrompt, 'retryCount', retryCount, 'deepThink', deepThink);
361
+ const modelConfig = this.modelConfigManager.getModelConfig('grounding');
362
+ const text = await this.insight.describe(center, modelConfig, {
363
+ deepThink
364
+ });
365
+ debug('aiDescribe text', text);
366
+ assert(text.description, `failed to describe element at [${center}]`);
367
+ resultPrompt = text.description;
368
+ verifyResult = await this.verifyLocator(resultPrompt, deepThink ? {
369
+ deepThink: true
370
+ } : void 0, center, opt);
371
+ if (verifyResult.pass) success = true;
372
+ else retryCount++;
373
+ }
374
+ return {
375
+ prompt: resultPrompt,
376
+ deepThink,
377
+ verifyResult
378
+ };
379
+ }
380
+ async verifyLocator(prompt, locateOpt, expectCenter, verifyLocateOption) {
381
+ debug('verifyLocator', prompt, locateOpt, expectCenter, verifyLocateOption);
382
+ const { center: verifyCenter, rect: verifyRect } = await this.aiLocate(prompt, locateOpt);
383
+ const distance = distanceOfTwoPoints(expectCenter, verifyCenter);
384
+ const included = includedInRect(expectCenter, verifyRect);
385
+ const pass = distance <= ((null == verifyLocateOption ? void 0 : verifyLocateOption.centerDistanceThreshold) || 20) || included;
386
+ const verifyResult = {
387
+ pass,
388
+ rect: verifyRect,
389
+ center: verifyCenter,
390
+ centerDistance: distance
391
+ };
392
+ debug('aiDescribe verifyResult', verifyResult);
393
+ return verifyResult;
394
+ }
395
+ async aiLocate(prompt, opt) {
396
+ const locateParam = buildDetailedLocateParam(prompt, opt);
397
+ assert(locateParam, 'cannot get locate param for aiLocate');
398
+ const locatePlan = locatePlanForLocate(locateParam);
399
+ const plans = [
400
+ locatePlan
401
+ ];
402
+ const modelConfig = this.modelConfigManager.getModelConfig('grounding');
403
+ const { executor, output } = await this.taskExecutor.runPlans(taskTitleStr('Locate', locateParamStr(locateParam)), plans, modelConfig);
404
+ await this.afterTaskRunning(executor);
405
+ const { element } = output;
406
+ const dprValue = await this.interface.size().dpr;
407
+ const dprEntry = dprValue ? {
408
+ dpr: dprValue
409
+ } : {};
410
+ return {
411
+ rect: null == element ? void 0 : element.rect,
412
+ center: null == element ? void 0 : element.center,
413
+ ...dprEntry
414
+ };
415
+ }
416
+ async aiAssert(assertion, msg, opt) {
417
+ var _executor_latestErrorTask;
418
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
419
+ const insightOpt = {
420
+ domIncluded: (null == opt ? void 0 : opt.domIncluded) ?? defaultInsightExtractOption.domIncluded,
421
+ screenshotIncluded: (null == opt ? void 0 : opt.screenshotIncluded) ?? defaultInsightExtractOption.screenshotIncluded,
422
+ doNotThrowError: null == opt ? void 0 : opt.doNotThrowError
423
+ };
424
+ const { textPrompt, multimodalPrompt } = parsePrompt(assertion);
425
+ const { output, executor, thought } = await this.taskExecutor.createTypeQueryExecution('Assert', textPrompt, modelConfig, insightOpt, multimodalPrompt);
426
+ await this.afterTaskRunning(executor, true);
427
+ const message = output ? void 0 : `Assertion failed: ${msg || ('string' == typeof assertion ? assertion : assertion.prompt)}\nReason: ${thought || (null == (_executor_latestErrorTask = executor.latestErrorTask()) ? void 0 : _executor_latestErrorTask.error) || '(no_reason)'}`;
428
+ if (null == opt ? void 0 : opt.keepRawResponse) return {
429
+ pass: output,
430
+ thought,
431
+ message
432
+ };
433
+ if (!output) throw new Error(message);
434
+ }
435
+ async aiWaitFor(assertion, opt) {
436
+ const modelConfig = this.modelConfigManager.getModelConfig('VQA');
437
+ const { executor } = await this.taskExecutor.waitFor(assertion, {
438
+ timeoutMs: (null == opt ? void 0 : opt.timeoutMs) || 15000,
439
+ checkIntervalMs: (null == opt ? void 0 : opt.checkIntervalMs) || 3000
440
+ }, modelConfig);
441
+ await this.afterTaskRunning(executor, true);
442
+ if (executor.isInErrorState()) {
443
+ const errorTask = executor.latestErrorTask();
444
+ throw new Error(`${null == errorTask ? void 0 : errorTask.error}\n${null == errorTask ? void 0 : errorTask.errorStack}`);
445
+ }
446
+ }
447
+ async ai(taskPrompt, type = 'action') {
448
+ if ('action' === type) return this.aiAction(taskPrompt);
449
+ if ('query' === type) return this.aiQuery(taskPrompt);
450
+ if ('assert' === type) return this.aiAssert(taskPrompt);
451
+ if ('tap' === type) return this.aiTap(taskPrompt);
452
+ if ('rightClick' === type) return this.aiRightClick(taskPrompt);
453
+ if ('doubleClick' === type) return this.aiDoubleClick(taskPrompt);
454
+ throw new Error(`Unknown type: ${type}, only support 'action', 'query', 'assert', 'tap', 'rightClick', 'doubleClick'`);
455
+ }
456
+ async runYaml(yamlScriptContent) {
457
+ const script = parseYamlScript(yamlScriptContent, 'yaml');
458
+ const player = new ScriptPlayer(script, async ()=>({
459
+ agent: this,
460
+ freeFn: []
461
+ }));
462
+ await player.run();
463
+ if ('error' === player.status) {
464
+ const errors = player.taskStatusList.filter((task)=>'error' === task.status).map((task)=>{
465
+ var _task_error;
466
+ return `task - ${task.name}: ${null == (_task_error = task.error) ? void 0 : _task_error.message}`;
467
+ }).join('\n');
468
+ throw new Error(`Error(s) occurred in running yaml script:\n${errors}`);
469
+ }
470
+ return {
471
+ result: player.result
472
+ };
473
+ }
474
+ async evaluateJavaScript(script) {
475
+ assert(this.interface.evaluateJavaScript, 'evaluateJavaScript is not supported in current agent');
476
+ return this.interface.evaluateJavaScript(script);
477
+ }
478
+ async destroy() {
479
+ var _this_interface_destroy, _this_interface;
480
+ if (this.destroyed) return;
481
+ await (null == (_this_interface_destroy = (_this_interface = this.interface).destroy) ? void 0 : _this_interface_destroy.call(_this_interface));
482
+ this.resetDump();
483
+ this.destroyed = true;
484
+ }
485
+ async logScreenshot(title, opt) {
486
+ const base64 = await this.interface.screenshotBase64();
487
+ const now = Date.now();
488
+ const recorder = [
489
+ {
490
+ type: 'screenshot',
491
+ ts: now,
492
+ screenshot: base64
493
+ }
494
+ ];
495
+ const task = {
496
+ type: 'Log',
497
+ subType: 'Screenshot',
498
+ status: 'finished',
499
+ recorder,
500
+ timing: {
501
+ start: now,
502
+ end: now,
503
+ cost: 0
504
+ },
505
+ param: {
506
+ content: (null == opt ? void 0 : opt.content) || ''
507
+ },
508
+ executor: async ()=>{}
509
+ };
510
+ const executionDump = {
511
+ logTime: now,
512
+ name: `Log - ${title || 'untitled'}`,
513
+ description: (null == opt ? void 0 : opt.content) || '',
514
+ tasks: [
515
+ task
516
+ ]
517
+ };
518
+ if (this.opts.aiActionContext) executionDump.aiActionContext = this.opts.aiActionContext;
519
+ this.appendExecutionDump(executionDump);
520
+ try {
521
+ var _this_onDumpUpdate, _this;
522
+ null == (_this_onDumpUpdate = (_this = this).onDumpUpdate) || _this_onDumpUpdate.call(_this, this.dumpDataString());
523
+ } catch (error) {
524
+ console.error('Failed to update dump', error);
525
+ }
526
+ this.writeOutActionDumps();
527
+ }
528
+ _unstableLogContent() {
529
+ const { groupName, groupDescription, executions } = this.dump;
530
+ const newExecutions = Array.isArray(executions) ? executions.map((execution)=>{
531
+ const { tasks, ...restExecution } = execution;
532
+ let newTasks = tasks;
533
+ if (Array.isArray(tasks)) newTasks = tasks.map((task)=>{
534
+ const { uiContext, log, ...restTask } = task;
535
+ return restTask;
536
+ });
537
+ return {
538
+ ...restExecution,
539
+ ...newTasks ? {
540
+ tasks: newTasks
541
+ } : {}
542
+ };
543
+ }) : [];
544
+ return {
545
+ groupName,
546
+ groupDescription,
547
+ executions: newExecutions
548
+ };
549
+ }
550
+ async freezePageContext() {
551
+ debug('Freezing page context');
552
+ const context = await this._snapshotContext();
553
+ context._isFrozen = true;
554
+ this.frozenUIContext = context;
555
+ debug('Page context frozen successfully');
556
+ }
557
+ async unfreezePageContext() {
558
+ debug('Unfreezing page context');
559
+ this.frozenUIContext = void 0;
560
+ debug('Page context unfrozen successfully');
561
+ }
562
+ processCacheConfig(opts) {
563
+ if (true === opts.cache) throw new Error('cache: true requires an explicit cache ID. Please provide:\nExample: cache: { id: "my-cache-id" }');
564
+ if (opts.cache && 'object' == typeof opts.cache && null !== opts.cache && !opts.cache.id) throw new Error('cache configuration requires an explicit id.\nExample: cache: { id: "my-cache-id" }');
565
+ const cacheConfig = processCacheConfig(opts.cache, opts.cacheId || opts.testId || 'default');
566
+ if (!cacheConfig) return null;
567
+ if ('object' == typeof cacheConfig && null !== cacheConfig) {
568
+ const id = cacheConfig.id;
569
+ const rawStrategy = cacheConfig.strategy;
570
+ let strategyValue;
571
+ if (void 0 === rawStrategy) strategyValue = 'read-write';
572
+ else if ('string' == typeof rawStrategy) strategyValue = rawStrategy;
573
+ else throw new Error(`cache.strategy must be a string when provided, but received type ${typeof rawStrategy}`);
574
+ if (!isValidCacheStrategy(strategyValue)) throw new Error(`cache.strategy must be one of ${CACHE_STRATEGY_VALUES}, but received "${strategyValue}"`);
575
+ const isReadOnly = 'read-only' === strategyValue;
576
+ const isWriteOnly = 'write-only' === strategyValue;
577
+ return {
578
+ id,
579
+ enabled: !isWriteOnly,
580
+ readOnly: isReadOnly,
581
+ writeOnly: isWriteOnly
582
+ };
583
+ }
584
+ return null;
585
+ }
586
+ async flushCache(options) {
587
+ if (!this.taskCache) throw new Error('Cache is not configured');
588
+ this.taskCache.flushCacheToFile(options);
589
+ }
590
+ constructor(interfaceInstance, opts){
591
+ _define_property(this, "interface", void 0);
592
+ _define_property(this, "insight", void 0);
593
+ _define_property(this, "dump", void 0);
594
+ _define_property(this, "reportFile", void 0);
595
+ _define_property(this, "reportFileName", void 0);
596
+ _define_property(this, "taskExecutor", void 0);
597
+ _define_property(this, "opts", void 0);
598
+ _define_property(this, "dryMode", false);
599
+ _define_property(this, "onTaskStartTip", void 0);
600
+ _define_property(this, "taskCache", void 0);
601
+ _define_property(this, "onDumpUpdate", void 0);
602
+ _define_property(this, "destroyed", false);
603
+ _define_property(this, "modelConfigManager", void 0);
604
+ _define_property(this, "frozenUIContext", void 0);
605
+ _define_property(this, "hasWarnedNonVLModel", false);
606
+ _define_property(this, "screenshotScale", void 0);
607
+ _define_property(this, "screenshotScalePromise", void 0);
608
+ this.interface = interfaceInstance;
609
+ this.opts = Object.assign({
610
+ generateReport: true,
611
+ autoPrintReportMsg: true,
612
+ groupName: 'Rpascene Report',
613
+ groupDescription: ''
614
+ }, opts || {});
615
+ if ((null == opts ? void 0 : opts.modelConfig) && 'function' != typeof (null == opts ? void 0 : opts.modelConfig)) throw new Error(`opts.modelConfig must be one of function or undefined, but got ${typeof (null == opts ? void 0 : opts.modelConfig)}`);
616
+ this.modelConfigManager = (null == opts ? void 0 : opts.modelConfig) ? new ModelConfigManager(opts.modelConfig) : globalModelConfigManager;
617
+ this.onTaskStartTip = this.opts.onTaskStartTip;
618
+ this.insight = new Insight(async (action)=>this.getUIContext(action));
619
+ const cacheConfigObj = this.processCacheConfig(opts || {});
620
+ if (cacheConfigObj) this.taskCache = new TaskCache(cacheConfigObj.id, cacheConfigObj.enabled, void 0, {
621
+ readOnly: cacheConfigObj.readOnly,
622
+ writeOnly: cacheConfigObj.writeOnly
623
+ });
624
+ this.taskExecutor = new TaskExecutor(this.interface, this.insight, {
625
+ taskCache: this.taskCache,
626
+ onTaskStart: this.callbackOnTaskStartTip.bind(this),
627
+ replanningCycleLimit: this.opts.replanningCycleLimit
628
+ });
629
+ this.dump = this.resetDump();
630
+ this.reportFileName = (null == opts ? void 0 : opts.reportFileName) || getReportFileName((null == opts ? void 0 : opts.testId) || this.interface.interfaceType || 'web');
631
+ }
632
+ }
633
+ const createAgent = (interfaceInstance, opts)=>new Agent(interfaceInstance, opts);
634
+ export { Agent, createAgent };
635
+
636
+ //# sourceMappingURL=agent.mjs.map