misoai-web 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +349 -5
  2. package/dist/es/agent.js +2676 -0
  3. package/dist/es/agent.js.map +1 -0
  4. package/dist/es/bridge-mode-browser.js +955 -0
  5. package/dist/es/bridge-mode-browser.js.map +1 -0
  6. package/dist/es/bridge-mode.js +3037 -0
  7. package/dist/es/bridge-mode.js.map +1 -0
  8. package/dist/es/chrome-extension.js +3424 -0
  9. package/dist/es/chrome-extension.js.map +1 -0
  10. package/dist/es/index.js +3292 -0
  11. package/dist/es/index.js.map +1 -0
  12. package/dist/es/midscene-playground.js +3012 -0
  13. package/dist/es/midscene-playground.js.map +1 -0
  14. package/dist/es/midscene-server.js +247 -0
  15. package/dist/es/midscene-server.js.map +1 -0
  16. package/dist/es/playground.js +2783 -0
  17. package/dist/es/playground.js.map +1 -0
  18. package/dist/es/playwright-report.js +120 -0
  19. package/dist/es/playwright-report.js.map +1 -0
  20. package/dist/es/playwright.js +3237 -0
  21. package/dist/es/playwright.js.map +1 -0
  22. package/dist/es/puppeteer-agent-launcher.js +3187 -0
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -0
  24. package/dist/es/puppeteer.js +3034 -0
  25. package/dist/es/puppeteer.js.map +1 -0
  26. package/dist/es/ui-utils.js +106 -0
  27. package/dist/es/ui-utils.js.map +1 -0
  28. package/dist/es/utils.js +197 -0
  29. package/dist/es/utils.js.map +1 -0
  30. package/dist/es/yaml.js +351 -0
  31. package/dist/es/yaml.js.map +1 -0
  32. package/dist/lib/agent.js +2691 -0
  33. package/dist/lib/agent.js.map +1 -0
  34. package/dist/lib/bridge-mode-browser.js +989 -0
  35. package/dist/lib/bridge-mode-browser.js.map +1 -0
  36. package/dist/lib/bridge-mode.js +3057 -0
  37. package/dist/lib/bridge-mode.js.map +1 -0
  38. package/dist/lib/chrome-extension.js +3441 -0
  39. package/dist/lib/chrome-extension.js.map +1 -0
  40. package/dist/lib/index.js +3308 -0
  41. package/dist/lib/index.js.map +1 -0
  42. package/dist/lib/midscene-playground.js +3016 -0
  43. package/dist/lib/midscene-playground.js.map +1 -0
  44. package/dist/lib/midscene-server.js +273 -0
  45. package/dist/lib/midscene-server.js.map +1 -0
  46. package/dist/lib/playground.js +2802 -0
  47. package/dist/lib/playground.js.map +1 -0
  48. package/dist/lib/playwright-report.js +148 -0
  49. package/dist/lib/playwright-report.js.map +1 -0
  50. package/dist/lib/playwright.js +3254 -0
  51. package/dist/lib/playwright.js.map +1 -0
  52. package/dist/lib/puppeteer-agent-launcher.js +3200 -0
  53. package/dist/lib/puppeteer-agent-launcher.js.map +1 -0
  54. package/dist/lib/puppeteer.js +3045 -0
  55. package/dist/lib/puppeteer.js.map +1 -0
  56. package/dist/lib/ui-utils.js +137 -0
  57. package/dist/lib/ui-utils.js.map +1 -0
  58. package/dist/lib/utils.js +235 -0
  59. package/dist/lib/utils.js.map +1 -0
  60. package/dist/lib/yaml.js +390 -0
  61. package/dist/lib/yaml.js.map +1 -0
  62. package/dist/types/agent.d.ts +290 -0
  63. package/dist/types/bridge-mode-browser.d.ts +9 -0
  64. package/dist/types/bridge-mode.d.ts +40 -0
  65. package/dist/types/browser-a1877d18.d.ts +37 -0
  66. package/dist/types/chrome-extension.d.ts +18 -0
  67. package/dist/types/index.d.ts +16 -0
  68. package/dist/types/midscene-playground.d.ts +2 -0
  69. package/dist/types/midscene-server.d.ts +31 -0
  70. package/dist/types/page-663ece08.d.ts +333 -0
  71. package/dist/types/playground.d.ts +17 -0
  72. package/dist/types/playwright-report.d.ts +11 -0
  73. package/dist/types/playwright.d.ts +87 -0
  74. package/dist/types/puppeteer-agent-launcher.d.ts +40 -0
  75. package/dist/types/puppeteer.d.ts +17 -0
  76. package/dist/types/ui-utils.d.ts +14 -0
  77. package/dist/types/utils-badc824e.d.ts +34 -0
  78. package/dist/types/utils.d.ts +8 -0
  79. package/dist/types/yaml.d.ts +15 -0
  80. package/iife-script/htmlElement.js +99 -37
  81. package/iife-script/htmlElementDebug.js +92 -9
  82. package/package.json +2 -2
@@ -0,0 +1,3016 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __copyProps = (to, from, except, desc) => {
9
+ if (from && typeof from === "object" || typeof from === "function") {
10
+ for (let key of __getOwnPropNames(from))
11
+ if (!__hasOwnProp.call(to, key) && key !== except)
12
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
13
+ }
14
+ return to;
15
+ };
16
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
17
+ // If the importer is in node compatibility mode or this is not an ESM
18
+ // file that has been converted to a CommonJS file using a Babel-
19
+ // compatible transform (i.e. "__esModule" has not been set), then set
20
+ // "default" to the CommonJS "module.exports" for node compatibility.
21
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
22
+ mod
23
+ ));
24
+
25
+ // src/common/utils.ts
26
+ var import_ai_model = require("misoai-core/ai-model");
27
+ var import_utils = require("misoai-core/utils");
28
+ var import_env = require("misoai-shared/env");
29
+ var import_extractor = require("misoai-shared/extractor");
30
+ var import_img = require("misoai-shared/img");
31
+ var import_utils2 = require("misoai-shared/utils");
32
+ var import_dayjs = __toESM(require("dayjs"));
33
+
34
+ // src/web-element.ts
35
+ var WebElementInfo = class {
36
+ constructor({
37
+ content,
38
+ rect,
39
+ // page,
40
+ locator,
41
+ id,
42
+ attributes,
43
+ indexId,
44
+ xpaths
45
+ }) {
46
+ this.content = content;
47
+ this.rect = rect;
48
+ this.center = [
49
+ Math.floor(rect.left + rect.width / 2),
50
+ Math.floor(rect.top + rect.height / 2)
51
+ ];
52
+ this.locator = locator;
53
+ this.id = id;
54
+ this.attributes = attributes;
55
+ this.indexId = indexId;
56
+ this.xpaths = xpaths;
57
+ }
58
+ };
59
+
60
+ // src/common/utils.ts
61
+ async function parseContextFromWebPage(page, _opt) {
62
+ (0, import_utils2.assert)(page, "page is required");
63
+ if (page._forceUsePageContext) {
64
+ return await page._forceUsePageContext();
65
+ }
66
+ const url = await page.url();
67
+ (0, import_utils.uploadTestInfoToServer)({ testUrl: url });
68
+ let screenshotBase64;
69
+ let tree;
70
+ await Promise.all([
71
+ page.screenshotBase64().then((base64) => {
72
+ screenshotBase64 = base64;
73
+ }),
74
+ page.getElementsNodeTree().then(async (treeRoot) => {
75
+ tree = treeRoot;
76
+ })
77
+ ]);
78
+ const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
79
+ const { rect, id, content, attributes, locator, indexId } = elementInfo;
80
+ return new WebElementInfo({
81
+ rect,
82
+ locator,
83
+ id,
84
+ content,
85
+ attributes,
86
+ indexId
87
+ });
88
+ });
89
+ (0, import_utils2.assert)(screenshotBase64, "screenshotBase64 is required");
90
+ const elementsInfo = (0, import_extractor.treeToList)(webTree);
91
+ const size = await page.size();
92
+ if (size.dpr && size.dpr > 1) {
93
+ screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
94
+ width: size.width,
95
+ height: size.height
96
+ });
97
+ }
98
+ return {
99
+ content: elementsInfo,
100
+ tree: webTree,
101
+ size,
102
+ screenshotBase64,
103
+ url
104
+ };
105
+ }
106
+ function reportFileName(tag = "web") {
107
+ const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
108
+ const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
109
+ const uniqueId = (0, import_utils2.uuid)().substring(0, 8);
110
+ return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
111
+ }
112
+ function printReportMsg(filepath) {
113
+ (0, import_utils2.logMsg)(`Midscene - report file updated: ${filepath}`);
114
+ }
115
+ var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
116
+ function replaceIllegalPathCharsAndSpace(str) {
117
+ return str.replace(/[/\\:*?"<>| ]/g, "-");
118
+ }
119
+ function matchElementFromPlan(planLocateParam, tree) {
120
+ if (!planLocateParam) {
121
+ return void 0;
122
+ }
123
+ if (planLocateParam.id) {
124
+ return (0, import_extractor.getNodeFromCacheList)(planLocateParam.id);
125
+ }
126
+ if (planLocateParam.bbox) {
127
+ const centerPosition = {
128
+ x: Math.floor((planLocateParam.bbox[0] + planLocateParam.bbox[2]) / 2),
129
+ y: Math.floor((planLocateParam.bbox[1] + planLocateParam.bbox[3]) / 2)
130
+ };
131
+ let element = (0, import_ai_model.elementByPositionWithElementInfo)(tree, centerPosition);
132
+ if (!element) {
133
+ element = (0, import_extractor.generateElementByPosition)(centerPosition);
134
+ }
135
+ return element;
136
+ }
137
+ return void 0;
138
+ }
139
+
140
+ // src/common/agent.ts
141
+ var import_misoai_core2 = require("misoai-core");
142
+ var import_js_yaml4 = __toESM(require("js-yaml"));
143
+
144
+ // src/yaml/player.ts
145
+ var import_node_fs = require("fs");
146
+ var import_node_path = require("path");
147
+ var import_utils3 = require("misoai-shared/utils");
148
+ var import_common = require("misoai-shared/common");
149
+ var ScriptPlayer = class {
150
+ constructor(script, setupAgent, onTaskStatusChange) {
151
+ this.script = script;
152
+ this.setupAgent = setupAgent;
153
+ this.onTaskStatusChange = onTaskStatusChange;
154
+ this.taskStatusList = [];
155
+ this.status = "init";
156
+ this.unnamedResultIndex = 0;
157
+ this.pageAgent = null;
158
+ this.result = {};
159
+ if (import_utils3.ifInBrowser) {
160
+ this.output = void 0;
161
+ } else if (script.target?.output) {
162
+ this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
163
+ } else {
164
+ this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
165
+ }
166
+ this.taskStatusList = (script.tasks || []).map((task, taskIndex) => ({
167
+ ...task,
168
+ index: taskIndex,
169
+ status: "init",
170
+ totalSteps: task.flow?.length || 0
171
+ }));
172
+ }
173
+ setResult(key, value) {
174
+ const keyToUse = key || this.unnamedResultIndex++;
175
+ if (this.result[keyToUse]) {
176
+ console.warn(`result key ${keyToUse} already exists, will overwrite`);
177
+ }
178
+ this.result[keyToUse] = value;
179
+ this.flushResult();
180
+ }
181
+ setPlayerStatus(status, error) {
182
+ this.status = status;
183
+ this.errorInSetup = error;
184
+ }
185
+ notifyCurrentTaskStatusChange(taskIndex) {
186
+ const taskIndexToNotify = typeof taskIndex === "number" ? taskIndex : this.currentTaskIndex;
187
+ if (typeof taskIndexToNotify !== "number") {
188
+ return;
189
+ }
190
+ const taskStatus = this.taskStatusList[taskIndexToNotify];
191
+ if (this.onTaskStatusChange) {
192
+ this.onTaskStatusChange(taskStatus);
193
+ }
194
+ }
195
+ async setTaskStatus(index, statusValue, error) {
196
+ this.taskStatusList[index].status = statusValue;
197
+ if (error) {
198
+ this.taskStatusList[index].error = error;
199
+ }
200
+ this.notifyCurrentTaskStatusChange(index);
201
+ }
202
+ setTaskIndex(taskIndex) {
203
+ this.currentTaskIndex = taskIndex;
204
+ }
205
+ flushResult() {
206
+ if (Object.keys(this.result).length && this.output) {
207
+ const output = (0, import_node_path.resolve)(process.cwd(), this.output);
208
+ const outputDir = (0, import_node_path.dirname)(output);
209
+ if (!(0, import_node_fs.existsSync)(outputDir)) {
210
+ (0, import_node_fs.mkdirSync)(outputDir, { recursive: true });
211
+ }
212
+ (0, import_node_fs.writeFileSync)(output, JSON.stringify(this.result, void 0, 2));
213
+ }
214
+ }
215
+ async playTask(taskStatus, agent) {
216
+ const { flow } = taskStatus;
217
+ (0, import_utils3.assert)(flow, "missing flow in task");
218
+ for (const flowItemIndex in flow) {
219
+ const currentStep = Number.parseInt(flowItemIndex, 10);
220
+ taskStatus.currentStep = currentStep;
221
+ const flowItem = flow[flowItemIndex];
222
+ if ("aiAction" in flowItem || "ai" in flowItem) {
223
+ const actionTask = flowItem;
224
+ const prompt = actionTask.aiAction || actionTask.ai;
225
+ (0, import_utils3.assert)(prompt, "missing prompt for ai (aiAction)");
226
+ (0, import_utils3.assert)(
227
+ typeof prompt === "string",
228
+ "prompt for aiAction must be a string"
229
+ );
230
+ await agent.aiAction(prompt, {
231
+ cacheable: actionTask.cacheable
232
+ });
233
+ } else if ("aiAssert" in flowItem) {
234
+ const assertTask = flowItem;
235
+ const prompt = assertTask.aiAssert;
236
+ (0, import_utils3.assert)(prompt, "missing prompt for aiAssert");
237
+ (0, import_utils3.assert)(
238
+ typeof prompt === "string",
239
+ "prompt for aiAssert must be a string"
240
+ );
241
+ await agent.aiAssert(prompt);
242
+ } else if ("aiQuery" in flowItem) {
243
+ const queryTask = flowItem;
244
+ const prompt = queryTask.aiQuery;
245
+ (0, import_utils3.assert)(prompt, "missing prompt for aiQuery");
246
+ (0, import_utils3.assert)(
247
+ typeof prompt === "string",
248
+ "prompt for aiQuery must be a string"
249
+ );
250
+ const queryResult = await agent.aiQuery(prompt);
251
+ this.setResult(queryTask.name, queryResult);
252
+ } else if ("aiNumber" in flowItem) {
253
+ const numberTask = flowItem;
254
+ const prompt = numberTask.aiNumber;
255
+ (0, import_utils3.assert)(prompt, "missing prompt for number");
256
+ (0, import_utils3.assert)(
257
+ typeof prompt === "string",
258
+ "prompt for number must be a string"
259
+ );
260
+ const numberResult = await agent.aiNumber(prompt);
261
+ this.setResult(numberTask.name, numberResult);
262
+ } else if ("aiString" in flowItem) {
263
+ const stringTask = flowItem;
264
+ const prompt = stringTask.aiString;
265
+ (0, import_utils3.assert)(prompt, "missing prompt for string");
266
+ (0, import_utils3.assert)(
267
+ typeof prompt === "string",
268
+ "prompt for string must be a string"
269
+ );
270
+ const stringResult = await agent.aiString(prompt);
271
+ this.setResult(stringTask.name, stringResult);
272
+ } else if ("aiBoolean" in flowItem) {
273
+ const booleanTask = flowItem;
274
+ const prompt = booleanTask.aiBoolean;
275
+ (0, import_utils3.assert)(prompt, "missing prompt for boolean");
276
+ (0, import_utils3.assert)(
277
+ typeof prompt === "string",
278
+ "prompt for boolean must be a string"
279
+ );
280
+ const booleanResult = await agent.aiBoolean(prompt);
281
+ this.setResult(booleanTask.name, booleanResult);
282
+ } else if ("aiLocate" in flowItem) {
283
+ const locateTask = flowItem;
284
+ const prompt = locateTask.aiLocate;
285
+ (0, import_utils3.assert)(prompt, "missing prompt for aiLocate");
286
+ (0, import_utils3.assert)(
287
+ typeof prompt === "string",
288
+ "prompt for aiLocate must be a string"
289
+ );
290
+ const locateResult = await agent.aiLocate(prompt);
291
+ this.setResult(locateTask.name, locateResult);
292
+ } else if ("aiWaitFor" in flowItem) {
293
+ const waitForTask = flowItem;
294
+ const prompt = waitForTask.aiWaitFor;
295
+ (0, import_utils3.assert)(prompt, "missing prompt for aiWaitFor");
296
+ (0, import_utils3.assert)(
297
+ typeof prompt === "string",
298
+ "prompt for aiWaitFor must be a string"
299
+ );
300
+ const timeout = waitForTask.timeout;
301
+ await agent.aiWaitFor(prompt, { timeoutMs: timeout });
302
+ } else if ("sleep" in flowItem) {
303
+ const sleepTask = flowItem;
304
+ const ms = sleepTask.sleep;
305
+ let msNumber = ms;
306
+ if (typeof ms === "string") {
307
+ msNumber = Number.parseInt(ms, 10);
308
+ }
309
+ (0, import_utils3.assert)(
310
+ msNumber && msNumber > 0,
311
+ `ms for sleep must be greater than 0, but got ${ms}`
312
+ );
313
+ await new Promise((resolve2) => setTimeout(resolve2, msNumber));
314
+ } else if ("aiTap" in flowItem) {
315
+ const tapTask = flowItem;
316
+ await agent.aiTap(tapTask.aiTap, tapTask);
317
+ } else if ("aiHover" in flowItem) {
318
+ const hoverTask = flowItem;
319
+ await agent.aiHover(hoverTask.aiHover, hoverTask);
320
+ } else if ("aiInput" in flowItem) {
321
+ const inputTask = flowItem;
322
+ await agent.aiInput(inputTask.aiInput, inputTask.locate, inputTask);
323
+ } else if ("aiKeyboardPress" in flowItem) {
324
+ const keyboardPressTask = flowItem;
325
+ await agent.aiKeyboardPress(
326
+ keyboardPressTask.aiKeyboardPress,
327
+ keyboardPressTask.locate,
328
+ keyboardPressTask
329
+ );
330
+ } else if ("aiScroll" in flowItem) {
331
+ const scrollTask = flowItem;
332
+ await agent.aiScroll(scrollTask, scrollTask.locate, scrollTask);
333
+ } else if ("javascript" in flowItem) {
334
+ const evaluateJavaScriptTask = flowItem;
335
+ const result = await agent.evaluateJavaScript(
336
+ evaluateJavaScriptTask.javascript
337
+ );
338
+ this.setResult(evaluateJavaScriptTask.name, result);
339
+ } else {
340
+ throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
341
+ }
342
+ }
343
+ this.reportFile = agent.reportFile;
344
+ }
345
+ async run() {
346
+ const { target, web, android, tasks } = this.script;
347
+ const webEnv = web || target;
348
+ const androidEnv = android;
349
+ const platform = webEnv || androidEnv;
350
+ this.setPlayerStatus("running");
351
+ let agent = null;
352
+ let freeFn = [];
353
+ try {
354
+ const { agent: newAgent, freeFn: newFreeFn } = await this.setupAgent(
355
+ platform
356
+ );
357
+ agent = newAgent;
358
+ const originalOnTaskStartTip = agent.onTaskStartTip;
359
+ agent.onTaskStartTip = (tip) => {
360
+ if (this.status === "running") {
361
+ this.agentStatusTip = tip;
362
+ }
363
+ originalOnTaskStartTip?.(tip);
364
+ };
365
+ freeFn = [
366
+ ...newFreeFn || [],
367
+ {
368
+ name: "restore-agent-onTaskStartTip",
369
+ fn: () => {
370
+ if (agent) {
371
+ agent.onTaskStartTip = originalOnTaskStartTip;
372
+ }
373
+ }
374
+ }
375
+ ];
376
+ } catch (e) {
377
+ this.setPlayerStatus("error", e);
378
+ return;
379
+ }
380
+ this.pageAgent = agent;
381
+ let taskIndex = 0;
382
+ this.setPlayerStatus("running");
383
+ let errorFlag = false;
384
+ while (taskIndex < tasks.length) {
385
+ const taskStatus = this.taskStatusList[taskIndex];
386
+ this.setTaskStatus(taskIndex, "running");
387
+ this.setTaskIndex(taskIndex);
388
+ try {
389
+ await this.playTask(taskStatus, this.pageAgent);
390
+ this.setTaskStatus(taskIndex, "done");
391
+ } catch (e) {
392
+ this.setTaskStatus(taskIndex, "error", e);
393
+ if (taskStatus.continueOnError) {
394
+ } else {
395
+ this.reportFile = agent.reportFile;
396
+ errorFlag = true;
397
+ break;
398
+ }
399
+ }
400
+ this.reportFile = agent.reportFile;
401
+ taskIndex++;
402
+ }
403
+ if (errorFlag) {
404
+ this.setPlayerStatus("error");
405
+ } else {
406
+ this.setPlayerStatus("done");
407
+ }
408
+ this.agentStatusTip = "";
409
+ for (const fn of freeFn) {
410
+ try {
411
+ await fn.fn();
412
+ } catch (e) {
413
+ }
414
+ }
415
+ }
416
+ };
417
+
418
+ // src/yaml/builder.ts
419
+ var import_js_yaml = __toESM(require("js-yaml"));
420
+
421
+ // src/yaml/utils.ts
422
+ var import_utils4 = require("misoai-shared/utils");
423
+ var import_js_yaml2 = __toESM(require("js-yaml"));
424
+ function interpolateEnvVars(content) {
425
+ return content.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
426
+ const value = process.env[envVar.trim()];
427
+ if (value === void 0) {
428
+ throw new Error(`Environment variable "${envVar.trim()}" is not defined`);
429
+ }
430
+ return value;
431
+ });
432
+ }
433
+ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
434
+ let processedContent = content;
435
+ if (content.indexOf("android") !== -1 && content.match(/deviceId:\s*(\d+)/)) {
436
+ let matchedDeviceId;
437
+ processedContent = content.replace(
438
+ /deviceId:\s*(\d+)/g,
439
+ (match, deviceId) => {
440
+ matchedDeviceId = deviceId;
441
+ return `deviceId: '${deviceId}'`;
442
+ }
443
+ );
444
+ console.warn(
445
+ `please use string-style deviceId in yaml script, for example: deviceId: "${matchedDeviceId}"`
446
+ );
447
+ }
448
+ const interpolatedContent = interpolateEnvVars(processedContent);
449
+ const obj = import_js_yaml2.default.load(interpolatedContent, {
450
+ schema: import_js_yaml2.default.JSON_SCHEMA
451
+ });
452
+ const pathTip = filePath ? `, failed to load ${filePath}` : "";
453
+ const android = typeof obj.android !== "undefined" ? Object.assign({}, obj.android || {}) : void 0;
454
+ const webConfig = obj.web || obj.target;
455
+ const web = typeof webConfig !== "undefined" ? Object.assign({}, webConfig || {}) : void 0;
456
+ if (!ignoreCheckingTarget) {
457
+ (0, import_utils4.assert)(
458
+ web || android,
459
+ `at least one of "target", "web", or "android" properties is required in yaml script${pathTip}`
460
+ );
461
+ (0, import_utils4.assert)(
462
+ web && !android || !web && android,
463
+ `only one of "target", "web", or "android" properties is allowed in yaml script${pathTip}`
464
+ );
465
+ if (web || android) {
466
+ (0, import_utils4.assert)(
467
+ typeof web === "object" || typeof android === "object",
468
+ `property "target/web/android" must be an object${pathTip}`
469
+ );
470
+ }
471
+ }
472
+ (0, import_utils4.assert)(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
473
+ (0, import_utils4.assert)(
474
+ Array.isArray(obj.tasks),
475
+ `property "tasks" must be an array in yaml script, but got ${obj.tasks}`
476
+ );
477
+ return obj;
478
+ }
479
+
480
+ // src/common/agent.ts
481
+ var import_utils11 = require("misoai-core/utils");
482
+ var import_constants2 = require("misoai-shared/constants");
483
+ var import_env2 = require("misoai-shared/env");
484
+ var import_logger4 = require("misoai-shared/logger");
485
+ var import_utils12 = require("misoai-shared/utils");
486
+
487
+ // src/common/tasks.ts
488
+ var import_misoai_core = require("misoai-core");
489
+ var import_ai_model2 = require("misoai-core/ai-model");
490
+ var import_utils5 = require("misoai-core/utils");
491
+ var import_constants = require("misoai-shared/constants");
492
+ var import_logger = require("misoai-shared/logger");
493
+ var import_utils6 = require("misoai-shared/utils");
494
+
495
+ // src/common/ui-utils.ts
496
+ function typeStr(task) {
497
+ return task.subType && task.subType !== "Plan" ? `${task.type} / ${task.subType || ""}` : task.type;
498
+ }
499
+ function getKeyCommands(value) {
500
+ const keys = Array.isArray(value) ? value : [value];
501
+ return keys.reduce((acc, k) => {
502
+ const includeMeta = keys.includes("Meta") || keys.includes("Control");
503
+ if (includeMeta && (k === "a" || k === "A")) {
504
+ return acc.concat([{ key: k, command: "SelectAll" }]);
505
+ }
506
+ if (includeMeta && (k === "c" || k === "C")) {
507
+ return acc.concat([{ key: k, command: "Copy" }]);
508
+ }
509
+ if (includeMeta && (k === "v" || k === "V")) {
510
+ return acc.concat([{ key: k, command: "Paste" }]);
511
+ }
512
+ return acc.concat([{ key: k }]);
513
+ }, []);
514
+ }
515
+ function locateParamStr(locate) {
516
+ if (!locate) {
517
+ return "";
518
+ }
519
+ if (typeof locate === "string") {
520
+ return locate;
521
+ }
522
+ return locate.prompt;
523
+ }
524
+ function scrollParamStr(scrollParam) {
525
+ if (!scrollParam) {
526
+ return "";
527
+ }
528
+ return `${scrollParam.direction || "down"}, ${scrollParam.scrollType || "once"}, ${scrollParam.distance || "distance-not-set"}`;
529
+ }
530
+ function taskTitleStr(type, prompt) {
531
+ if (prompt) {
532
+ return `${type} - ${prompt}`;
533
+ }
534
+ return type;
535
+ }
536
+ function paramStr(task) {
537
+ let value;
538
+ if (task.type === "Planning") {
539
+ value = task?.param?.userInstruction;
540
+ }
541
+ if (task.type === "Insight") {
542
+ value = task?.param?.prompt || task?.param?.id || task?.param?.dataDemand || task?.param?.assertion;
543
+ }
544
+ if (task.type === "Action") {
545
+ const locate = task?.locate;
546
+ const locateStr = locate ? locateParamStr(locate) : "";
547
+ value = task.thought || "";
548
+ if (typeof task?.param?.timeMs === "number") {
549
+ value = `${task?.param?.timeMs}ms`;
550
+ } else if (typeof task?.param?.scrollType === "string") {
551
+ value = scrollParamStr(task?.param);
552
+ } else if (typeof task?.param?.value !== "undefined") {
553
+ value = task?.param?.value;
554
+ }
555
+ if (locateStr) {
556
+ if (value) {
557
+ value = `${locateStr} - ${value}`;
558
+ } else {
559
+ value = locateStr;
560
+ }
561
+ }
562
+ }
563
+ if (typeof value === "undefined")
564
+ return "";
565
+ return typeof value === "string" ? value : JSON.stringify(value, void 0, 2);
566
+ }
567
+
568
+ // src/common/tasks.ts
569
+ var debug = (0, import_logger.getDebug)("page-task-executor");
570
+ var replanningCountLimit = 10;
571
+ var isAndroidPage = (page) => {
572
+ return page.pageType === "android";
573
+ };
574
+ var PageTaskExecutor = class {
575
+ constructor(page, insight, opts) {
576
+ this.conversationHistory = [];
577
+ this.page = page;
578
+ this.insight = insight;
579
+ this.taskCache = opts.taskCache;
580
+ this.onTaskStartCallback = opts?.onTaskStart;
581
+ }
582
+ async recordScreenshot(timing) {
583
+ const base64 = await this.page.screenshotBase64();
584
+ const item = {
585
+ type: "screenshot",
586
+ ts: Date.now(),
587
+ screenshot: base64,
588
+ timing
589
+ };
590
+ return item;
591
+ }
592
+ async getElementXpath(pageContext, element) {
593
+ let elementId = element?.id;
594
+ if (element?.attributes?.nodeType === import_constants.NodeType.POSITION) {
595
+ await this.insight.contextRetrieverFn("locate");
596
+ const info = (0, import_ai_model2.elementByPositionWithElementInfo)(
597
+ pageContext.tree,
598
+ {
599
+ x: element.center[0],
600
+ y: element.center[1]
601
+ },
602
+ {
603
+ requireStrictDistance: false,
604
+ filterPositionElements: true
605
+ }
606
+ );
607
+ if (info?.id) {
608
+ elementId = info.id;
609
+ } else {
610
+ debug(
611
+ "no element id found for position node, will not update cache",
612
+ element
613
+ );
614
+ }
615
+ }
616
+ if (!elementId) {
617
+ return void 0;
618
+ }
619
+ try {
620
+ const result = await this.page.getXpathsById(elementId);
621
+ return result;
622
+ } catch (error) {
623
+ debug("getXpathsById error: ", error);
624
+ }
625
+ }
626
+ prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
627
+ const taskWithScreenshot = {
628
+ ...taskApply,
629
+ executor: async (param, context, ...args) => {
630
+ const recorder = [];
631
+ const { task } = context;
632
+ task.recorder = recorder;
633
+ const shot = await this.recordScreenshot(`before ${task.type}`);
634
+ recorder.push(shot);
635
+ const result = await taskApply.executor(param, context, ...args);
636
+ if (taskApply.type === "Action") {
637
+ await Promise.all([
638
+ (async () => {
639
+ await (0, import_utils5.sleep)(100);
640
+ if (this.page.waitUntilNetworkIdle) {
641
+ try {
642
+ await this.page.waitUntilNetworkIdle();
643
+ } catch (error) {
644
+ }
645
+ }
646
+ })(),
647
+ (0, import_utils5.sleep)(200)
648
+ ]);
649
+ }
650
+ if (appendAfterExecution) {
651
+ const shot2 = await this.recordScreenshot("after Action");
652
+ recorder.push(shot2);
653
+ }
654
+ return result;
655
+ }
656
+ };
657
+ return taskWithScreenshot;
658
+ }
659
+ async convertPlanToExecutable(plans, opts) {
660
+ const tasks = [];
661
+ plans.forEach((plan2) => {
662
+ if (plan2.type === "Locate") {
663
+ if (plan2.locate === null || plan2.locate?.id === null || plan2.locate?.id === "null") {
664
+ return;
665
+ }
666
+ const taskFind = {
667
+ type: "Insight",
668
+ subType: "Locate",
669
+ param: plan2.locate ? {
670
+ ...plan2.locate,
671
+ cacheable: opts?.cacheable
672
+ } : void 0,
673
+ thought: plan2.thought,
674
+ locate: plan2.locate,
675
+ executor: async (param, taskContext) => {
676
+ const { task } = taskContext;
677
+ (0, import_utils6.assert)(
678
+ param?.prompt || param?.id || param?.bbox,
679
+ "No prompt or id or position or bbox to locate"
680
+ );
681
+ let insightDump;
682
+ let usage;
683
+ const dumpCollector = (dump) => {
684
+ insightDump = dump;
685
+ usage = dump?.taskInfo?.usage;
686
+ task.log = {
687
+ dump: insightDump
688
+ };
689
+ task.usage = usage;
690
+ };
691
+ this.insight.onceDumpUpdatedFn = dumpCollector;
692
+ const shotTime = Date.now();
693
+ const pageContext = await this.insight.contextRetrieverFn("locate");
694
+ task.pageContext = pageContext;
695
+ const recordItem = {
696
+ type: "screenshot",
697
+ ts: shotTime,
698
+ screenshot: pageContext.screenshotBase64,
699
+ timing: "before locate"
700
+ };
701
+ task.recorder = [recordItem];
702
+ let cacheHitFlag = false;
703
+ const cachePrompt = param.prompt;
704
+ const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
705
+ const xpaths = locateCacheRecord?.cacheContent?.xpaths;
706
+ let elementFromCache = null;
707
+ try {
708
+ if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
709
+ for (let i = 0; i < xpaths.length; i++) {
710
+ const element2 = await this.page.getElementInfoByXpath(
711
+ xpaths[i]
712
+ );
713
+ if (element2?.id) {
714
+ elementFromCache = element2;
715
+ debug("cache hit, prompt: %s", cachePrompt);
716
+ cacheHitFlag = true;
717
+ debug(
718
+ "found a new new element with same xpath, xpath: %s, id: %s",
719
+ xpaths[i],
720
+ element2?.id
721
+ );
722
+ break;
723
+ }
724
+ }
725
+ }
726
+ } catch (error) {
727
+ debug("get element info by xpath error: ", error);
728
+ }
729
+ const startTime = Date.now();
730
+ const element = elementFromCache || // try to match element from cache
731
+ matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
732
+ (await this.insight.locate(param, {
733
+ context: pageContext
734
+ })).element;
735
+ const aiCost = Date.now() - startTime;
736
+ let currentXpaths;
737
+ if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
738
+ const elementXpaths = await this.getElementXpath(
739
+ pageContext,
740
+ element
741
+ );
742
+ if (elementXpaths?.length) {
743
+ currentXpaths = elementXpaths;
744
+ this.taskCache.updateOrAppendCacheRecord(
745
+ {
746
+ type: "locate",
747
+ prompt: cachePrompt,
748
+ xpaths: elementXpaths
749
+ },
750
+ locateCacheRecord
751
+ );
752
+ } else {
753
+ debug(
754
+ "no xpaths found, will not update cache",
755
+ cachePrompt,
756
+ elementXpaths
757
+ );
758
+ }
759
+ }
760
+ if (!element) {
761
+ throw new Error(`Element not found: ${param.prompt}`);
762
+ }
763
+ return {
764
+ output: {
765
+ element
766
+ },
767
+ pageContext,
768
+ cache: {
769
+ hit: cacheHitFlag,
770
+ originalXpaths: xpaths,
771
+ currentXpaths
772
+ },
773
+ aiCost
774
+ };
775
+ }
776
+ };
777
+ tasks.push(taskFind);
778
+ } else if (plan2.type === "Assert" || plan2.type === "AssertWithoutThrow") {
779
+ const assertPlan = plan2;
780
+ const taskAssert = {
781
+ type: "Insight",
782
+ subType: "Assert",
783
+ param: assertPlan.param,
784
+ thought: assertPlan.thought,
785
+ locate: assertPlan.locate,
786
+ executor: async (param, taskContext) => {
787
+ const { task } = taskContext;
788
+ let insightDump;
789
+ const dumpCollector = (dump) => {
790
+ insightDump = dump;
791
+ };
792
+ this.insight.onceDumpUpdatedFn = dumpCollector;
793
+ const assertion = await this.insight.assert(
794
+ assertPlan.param.assertion
795
+ );
796
+ if (!assertion.pass) {
797
+ if (plan2.type === "Assert") {
798
+ task.output = assertion;
799
+ task.log = {
800
+ dump: insightDump
801
+ };
802
+ throw new Error(
803
+ assertion.thought || "Assertion failed without reason"
804
+ );
805
+ }
806
+ task.error = assertion.thought;
807
+ }
808
+ return {
809
+ output: assertion,
810
+ log: {
811
+ dump: insightDump
812
+ },
813
+ usage: assertion.usage
814
+ };
815
+ }
816
+ };
817
+ tasks.push(taskAssert);
818
+ } else if (plan2.type === "Input") {
819
+ const taskActionInput = {
820
+ type: "Action",
821
+ subType: "Input",
822
+ param: plan2.param,
823
+ thought: plan2.thought,
824
+ locate: plan2.locate,
825
+ executor: async (taskParam, { element }) => {
826
+ if (element) {
827
+ await this.page.clearInput(element);
828
+ if (!taskParam || !taskParam.value) {
829
+ return;
830
+ }
831
+ await this.page.keyboard.type(taskParam.value);
832
+ } else {
833
+ await this.page.keyboard.type(taskParam.value);
834
+ }
835
+ }
836
+ };
837
+ tasks.push(taskActionInput);
838
+ } else if (plan2.type === "KeyboardPress") {
839
+ const taskActionKeyboardPress = {
840
+ type: "Action",
841
+ subType: "KeyboardPress",
842
+ param: plan2.param,
843
+ thought: plan2.thought,
844
+ locate: plan2.locate,
845
+ executor: async (taskParam) => {
846
+ const keys = getKeyCommands(taskParam.value);
847
+ await this.page.keyboard.press(keys);
848
+ }
849
+ };
850
+ tasks.push(taskActionKeyboardPress);
851
+ } else if (plan2.type === "Tap") {
852
+ const taskActionTap = {
853
+ type: "Action",
854
+ subType: "Tap",
855
+ thought: plan2.thought,
856
+ locate: plan2.locate,
857
+ executor: async (param, { element }) => {
858
+ (0, import_utils6.assert)(element, "Element not found, cannot tap");
859
+ await this.page.mouse.click(element.center[0], element.center[1]);
860
+ }
861
+ };
862
+ tasks.push(taskActionTap);
863
+ } else if (plan2.type === "Drag") {
864
+ const taskActionDrag = {
865
+ type: "Action",
866
+ subType: "Drag",
867
+ param: plan2.param,
868
+ thought: plan2.thought,
869
+ locate: plan2.locate,
870
+ executor: async (taskParam) => {
871
+ (0, import_utils6.assert)(
872
+ taskParam?.start_box && taskParam?.end_box,
873
+ "No start_box or end_box to drag"
874
+ );
875
+ await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
876
+ }
877
+ };
878
+ tasks.push(taskActionDrag);
879
+ } else if (plan2.type === "Hover") {
880
+ const taskActionHover = {
881
+ type: "Action",
882
+ subType: "Hover",
883
+ thought: plan2.thought,
884
+ locate: plan2.locate,
885
+ executor: async (param, { element }) => {
886
+ (0, import_utils6.assert)(element, "Element not found, cannot hover");
887
+ await this.page.mouse.move(element.center[0], element.center[1]);
888
+ }
889
+ };
890
+ tasks.push(taskActionHover);
891
+ } else if (plan2.type === "Scroll") {
892
+ const taskActionScroll = {
893
+ type: "Action",
894
+ subType: "Scroll",
895
+ param: plan2.param,
896
+ thought: plan2.thought,
897
+ locate: plan2.locate,
898
+ executor: async (taskParam, { element }) => {
899
+ const startingPoint = element ? {
900
+ left: element.center[0],
901
+ top: element.center[1]
902
+ } : void 0;
903
+ const scrollToEventName = taskParam?.scrollType;
904
+ if (scrollToEventName === "untilTop") {
905
+ await this.page.scrollUntilTop(startingPoint);
906
+ } else if (scrollToEventName === "untilBottom") {
907
+ await this.page.scrollUntilBottom(startingPoint);
908
+ } else if (scrollToEventName === "untilRight") {
909
+ await this.page.scrollUntilRight(startingPoint);
910
+ } else if (scrollToEventName === "untilLeft") {
911
+ await this.page.scrollUntilLeft(startingPoint);
912
+ } else if (scrollToEventName === "once" || !scrollToEventName) {
913
+ if (taskParam?.direction === "down" || !taskParam || !taskParam.direction) {
914
+ await this.page.scrollDown(
915
+ taskParam?.distance || void 0,
916
+ startingPoint
917
+ );
918
+ } else if (taskParam.direction === "up") {
919
+ await this.page.scrollUp(
920
+ taskParam.distance || void 0,
921
+ startingPoint
922
+ );
923
+ } else if (taskParam.direction === "left") {
924
+ await this.page.scrollLeft(
925
+ taskParam.distance || void 0,
926
+ startingPoint
927
+ );
928
+ } else if (taskParam.direction === "right") {
929
+ await this.page.scrollRight(
930
+ taskParam.distance || void 0,
931
+ startingPoint
932
+ );
933
+ } else {
934
+ throw new Error(
935
+ `Unknown scroll direction: ${taskParam.direction}`
936
+ );
937
+ }
938
+ await (0, import_utils5.sleep)(500);
939
+ } else {
940
+ throw new Error(
941
+ `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
942
+ taskParam
943
+ )}`
944
+ );
945
+ }
946
+ }
947
+ };
948
+ tasks.push(taskActionScroll);
949
+ } else if (plan2.type === "Sleep") {
950
+ const taskActionSleep = {
951
+ type: "Action",
952
+ subType: "Sleep",
953
+ param: plan2.param,
954
+ thought: plan2.thought,
955
+ locate: plan2.locate,
956
+ executor: async (taskParam) => {
957
+ await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
958
+ }
959
+ };
960
+ tasks.push(taskActionSleep);
961
+ } else if (plan2.type === "Error") {
962
+ const taskActionError = {
963
+ type: "Action",
964
+ subType: "Error",
965
+ param: plan2.param,
966
+ thought: plan2.thought || plan2.param?.thought,
967
+ locate: plan2.locate,
968
+ executor: async () => {
969
+ throw new Error(
970
+ plan2?.thought || plan2.param?.thought || "error without thought"
971
+ );
972
+ }
973
+ };
974
+ tasks.push(taskActionError);
975
+ } else if (plan2.type === "ExpectedFalsyCondition") {
976
+ const taskActionFalsyConditionStatement = {
977
+ type: "Action",
978
+ subType: "ExpectedFalsyCondition",
979
+ param: null,
980
+ thought: plan2.param?.reason,
981
+ locate: plan2.locate,
982
+ executor: async () => {
983
+ }
984
+ };
985
+ tasks.push(taskActionFalsyConditionStatement);
986
+ } else if (plan2.type === "Finished") {
987
+ const taskActionFinished = {
988
+ type: "Action",
989
+ subType: "Finished",
990
+ param: null,
991
+ thought: plan2.thought,
992
+ locate: plan2.locate,
993
+ executor: async (param) => {
994
+ }
995
+ };
996
+ tasks.push(taskActionFinished);
997
+ } else if (plan2.type === "AndroidHomeButton") {
998
+ const taskActionAndroidHomeButton = {
999
+ type: "Action",
1000
+ subType: "AndroidHomeButton",
1001
+ param: null,
1002
+ thought: plan2.thought,
1003
+ locate: plan2.locate,
1004
+ executor: async (param) => {
1005
+ (0, import_utils6.assert)(
1006
+ isAndroidPage(this.page),
1007
+ "Cannot use home button on non-Android devices"
1008
+ );
1009
+ await this.page.home();
1010
+ }
1011
+ };
1012
+ tasks.push(taskActionAndroidHomeButton);
1013
+ } else if (plan2.type === "AndroidBackButton") {
1014
+ const taskActionAndroidBackButton = {
1015
+ type: "Action",
1016
+ subType: "AndroidBackButton",
1017
+ param: null,
1018
+ thought: plan2.thought,
1019
+ locate: plan2.locate,
1020
+ executor: async (param) => {
1021
+ (0, import_utils6.assert)(
1022
+ isAndroidPage(this.page),
1023
+ "Cannot use back button on non-Android devices"
1024
+ );
1025
+ await this.page.back();
1026
+ }
1027
+ };
1028
+ tasks.push(taskActionAndroidBackButton);
1029
+ } else if (plan2.type === "AndroidRecentAppsButton") {
1030
+ const taskActionAndroidRecentAppsButton = {
1031
+ type: "Action",
1032
+ subType: "AndroidRecentAppsButton",
1033
+ param: null,
1034
+ thought: plan2.thought,
1035
+ locate: plan2.locate,
1036
+ executor: async (param) => {
1037
+ (0, import_utils6.assert)(
1038
+ isAndroidPage(this.page),
1039
+ "Cannot use recent apps button on non-Android devices"
1040
+ );
1041
+ await this.page.recentApps();
1042
+ }
1043
+ };
1044
+ tasks.push(taskActionAndroidRecentAppsButton);
1045
+ } else {
1046
+ throw new Error(`Unknown or unsupported task type: ${plan2.type}`);
1047
+ }
1048
+ });
1049
+ const wrappedTasks = tasks.map(
1050
+ (task, index) => {
1051
+ if (task.type === "Action") {
1052
+ return this.prependExecutorWithScreenshot(
1053
+ task,
1054
+ index === tasks.length - 1
1055
+ );
1056
+ }
1057
+ return task;
1058
+ }
1059
+ );
1060
+ return {
1061
+ tasks: wrappedTasks
1062
+ };
1063
+ }
1064
+ async setupPlanningContext(executorContext) {
1065
+ const shotTime = Date.now();
1066
+ const pageContext = await this.insight.contextRetrieverFn("locate");
1067
+ const recordItem = {
1068
+ type: "screenshot",
1069
+ ts: shotTime,
1070
+ screenshot: pageContext.screenshotBase64,
1071
+ timing: "before planning"
1072
+ };
1073
+ executorContext.task.recorder = [recordItem];
1074
+ executorContext.task.pageContext = pageContext;
1075
+ return {
1076
+ pageContext
1077
+ };
1078
+ }
1079
+ async loadYamlFlowAsPlanning(userInstruction, yamlString) {
1080
+ const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Action", userInstruction), {
1081
+ onTaskStart: this.onTaskStartCallback
1082
+ });
1083
+ const task = {
1084
+ type: "Planning",
1085
+ subType: "LoadYaml",
1086
+ locate: null,
1087
+ param: {
1088
+ userInstruction
1089
+ },
1090
+ executor: async (param, executorContext) => {
1091
+ await this.setupPlanningContext(executorContext);
1092
+ return {
1093
+ output: {
1094
+ actions: [],
1095
+ more_actions_needed_by_instruction: false,
1096
+ log: "",
1097
+ yamlString
1098
+ },
1099
+ cache: {
1100
+ hit: true
1101
+ }
1102
+ };
1103
+ }
1104
+ };
1105
+ await taskExecutor.append(task);
1106
+ await taskExecutor.flush();
1107
+ return {
1108
+ executor: taskExecutor
1109
+ };
1110
+ }
1111
+ planningTaskFromPrompt(userInstruction, log, actionContext) {
1112
+ const task = {
1113
+ type: "Planning",
1114
+ subType: "Plan",
1115
+ locate: null,
1116
+ param: {
1117
+ userInstruction,
1118
+ log
1119
+ },
1120
+ executor: async (param, executorContext) => {
1121
+ const startTime = Date.now();
1122
+ const { pageContext } = await this.setupPlanningContext(executorContext);
1123
+ const planResult = await (0, import_misoai_core.plan)(param.userInstruction, {
1124
+ context: pageContext,
1125
+ log: param.log,
1126
+ actionContext,
1127
+ pageType: this.page.pageType
1128
+ });
1129
+ const {
1130
+ actions,
1131
+ log: log2,
1132
+ more_actions_needed_by_instruction,
1133
+ error,
1134
+ usage,
1135
+ rawResponse,
1136
+ sleep: sleep2
1137
+ } = planResult;
1138
+ executorContext.task.log = {
1139
+ ...executorContext.task.log || {},
1140
+ rawResponse
1141
+ };
1142
+ executorContext.task.usage = usage;
1143
+ let stopCollecting = false;
1144
+ let bboxCollected = false;
1145
+ let planParsingError = "";
1146
+ const finalActions = (actions || []).reduce(
1147
+ (acc, planningAction) => {
1148
+ if (stopCollecting) {
1149
+ return acc;
1150
+ }
1151
+ if (planningAction.locate) {
1152
+ if (bboxCollected && planningAction.locate.bbox) {
1153
+ delete planningAction.locate.bbox;
1154
+ }
1155
+ if (planningAction.locate.bbox) {
1156
+ bboxCollected = true;
1157
+ }
1158
+ acc.push({
1159
+ type: "Locate",
1160
+ locate: planningAction.locate,
1161
+ param: null,
1162
+ thought: planningAction.locate.prompt
1163
+ });
1164
+ } else if (["Tap", "Hover", "Input"].includes(planningAction.type)) {
1165
+ planParsingError = `invalid planning response: ${JSON.stringify(planningAction)}`;
1166
+ stopCollecting = true;
1167
+ return acc;
1168
+ }
1169
+ acc.push(planningAction);
1170
+ return acc;
1171
+ },
1172
+ []
1173
+ );
1174
+ if (sleep2) {
1175
+ const timeNow = Date.now();
1176
+ const timeRemaining = sleep2 - (timeNow - startTime);
1177
+ if (timeRemaining > 0) {
1178
+ finalActions.push({
1179
+ type: "Sleep",
1180
+ param: {
1181
+ timeMs: timeRemaining
1182
+ },
1183
+ locate: null
1184
+ });
1185
+ }
1186
+ }
1187
+ if (finalActions.length === 0) {
1188
+ (0, import_utils6.assert)(
1189
+ !more_actions_needed_by_instruction || sleep2,
1190
+ error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1191
+ );
1192
+ }
1193
+ return {
1194
+ output: {
1195
+ actions: finalActions,
1196
+ more_actions_needed_by_instruction,
1197
+ log: log2,
1198
+ yamlFlow: planResult.yamlFlow
1199
+ },
1200
+ cache: {
1201
+ hit: false
1202
+ },
1203
+ pageContext
1204
+ };
1205
+ }
1206
+ };
1207
+ return task;
1208
+ }
1209
+ planningTaskToGoal(userInstruction) {
1210
+ const task = {
1211
+ type: "Planning",
1212
+ subType: "Plan",
1213
+ locate: null,
1214
+ param: {
1215
+ userInstruction
1216
+ },
1217
+ executor: async (param, executorContext) => {
1218
+ const { pageContext } = await this.setupPlanningContext(executorContext);
1219
+ const imagePayload = await (0, import_ai_model2.resizeImageForUiTars)(
1220
+ pageContext.screenshotBase64,
1221
+ pageContext.size
1222
+ );
1223
+ this.appendConversationHistory({
1224
+ role: "user",
1225
+ content: [
1226
+ {
1227
+ type: "image_url",
1228
+ image_url: {
1229
+ url: imagePayload
1230
+ }
1231
+ }
1232
+ ]
1233
+ });
1234
+ const startTime = Date.now();
1235
+ const planResult = await (0, import_ai_model2.vlmPlanning)({
1236
+ userInstruction: param.userInstruction,
1237
+ conversationHistory: this.conversationHistory,
1238
+ size: pageContext.size
1239
+ });
1240
+ const aiCost = Date.now() - startTime;
1241
+ const { actions, action_summary } = planResult;
1242
+ this.appendConversationHistory({
1243
+ role: "assistant",
1244
+ content: action_summary
1245
+ });
1246
+ return {
1247
+ output: {
1248
+ actions,
1249
+ thought: actions[0]?.thought,
1250
+ actionType: actions[0].type,
1251
+ more_actions_needed_by_instruction: true,
1252
+ log: "",
1253
+ yamlFlow: planResult.yamlFlow
1254
+ },
1255
+ cache: {
1256
+ hit: false
1257
+ },
1258
+ aiCost
1259
+ };
1260
+ }
1261
+ };
1262
+ return task;
1263
+ }
1264
+ async runPlans(title, plans, opts) {
1265
+ const taskExecutor = new import_misoai_core.Executor(title, {
1266
+ onTaskStart: this.onTaskStartCallback
1267
+ });
1268
+ const { tasks } = await this.convertPlanToExecutable(plans, opts);
1269
+ await taskExecutor.append(tasks);
1270
+ const result = await taskExecutor.flush();
1271
+ return {
1272
+ output: result,
1273
+ executor: taskExecutor
1274
+ };
1275
+ }
1276
+ async action(userPrompt, actionContext, opts) {
1277
+ const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Action", userPrompt), {
1278
+ onTaskStart: this.onTaskStartCallback
1279
+ });
1280
+ let planningTask = this.planningTaskFromPrompt(userPrompt, void 0, actionContext);
1281
+ let replanCount = 0;
1282
+ const logList = [];
1283
+ const yamlFlow = [];
1284
+ while (planningTask) {
1285
+ if (replanCount > replanningCountLimit) {
1286
+ const errorMsg = "Replanning too many times, please split the task into multiple steps";
1287
+ return this.appendErrorPlan(taskExecutor, errorMsg);
1288
+ }
1289
+ await taskExecutor.append(planningTask);
1290
+ const planResult = await taskExecutor.flush();
1291
+ if (taskExecutor.isInErrorState()) {
1292
+ return {
1293
+ output: planResult,
1294
+ executor: taskExecutor
1295
+ };
1296
+ }
1297
+ const plans = planResult.actions || [];
1298
+ yamlFlow.push(...planResult.yamlFlow || []);
1299
+ let executables;
1300
+ try {
1301
+ executables = await this.convertPlanToExecutable(plans, opts);
1302
+ taskExecutor.append(executables.tasks);
1303
+ } catch (error) {
1304
+ return this.appendErrorPlan(
1305
+ taskExecutor,
1306
+ `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(
1307
+ plans
1308
+ )}`
1309
+ );
1310
+ }
1311
+ await taskExecutor.flush();
1312
+ if (taskExecutor.isInErrorState()) {
1313
+ return {
1314
+ output: void 0,
1315
+ executor: taskExecutor
1316
+ };
1317
+ }
1318
+ if (planResult?.log) {
1319
+ logList.push(planResult.log);
1320
+ }
1321
+ if (!planResult.more_actions_needed_by_instruction) {
1322
+ planningTask = null;
1323
+ break;
1324
+ }
1325
+ planningTask = this.planningTaskFromPrompt(
1326
+ userPrompt,
1327
+ logList.length > 0 ? `- ${logList.join("\n- ")}` : void 0,
1328
+ actionContext
1329
+ );
1330
+ replanCount++;
1331
+ }
1332
+ return {
1333
+ output: {
1334
+ yamlFlow
1335
+ },
1336
+ executor: taskExecutor
1337
+ };
1338
+ }
1339
+ async actionToGoal(userPrompt, opts) {
1340
+ const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Action", userPrompt), {
1341
+ onTaskStart: this.onTaskStartCallback
1342
+ });
1343
+ this.conversationHistory = [];
1344
+ const isCompleted = false;
1345
+ let currentActionNumber = 0;
1346
+ const maxActionNumber = 40;
1347
+ const yamlFlow = [];
1348
+ while (!isCompleted && currentActionNumber < maxActionNumber) {
1349
+ currentActionNumber++;
1350
+ const planningTask = this.planningTaskToGoal(userPrompt);
1351
+ await taskExecutor.append(planningTask);
1352
+ const output = await taskExecutor.flush();
1353
+ if (taskExecutor.isInErrorState()) {
1354
+ return {
1355
+ output: void 0,
1356
+ executor: taskExecutor
1357
+ };
1358
+ }
1359
+ const plans = output.actions;
1360
+ yamlFlow.push(...output.yamlFlow || []);
1361
+ let executables;
1362
+ try {
1363
+ executables = await this.convertPlanToExecutable(plans, opts);
1364
+ taskExecutor.append(executables.tasks);
1365
+ } catch (error) {
1366
+ return this.appendErrorPlan(
1367
+ taskExecutor,
1368
+ `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(
1369
+ plans
1370
+ )}`
1371
+ );
1372
+ }
1373
+ await taskExecutor.flush();
1374
+ if (taskExecutor.isInErrorState()) {
1375
+ return {
1376
+ output: void 0,
1377
+ executor: taskExecutor
1378
+ };
1379
+ }
1380
+ if (plans[0].type === "Finished") {
1381
+ break;
1382
+ }
1383
+ }
1384
+ return {
1385
+ output: {
1386
+ yamlFlow
1387
+ },
1388
+ executor: taskExecutor
1389
+ };
1390
+ }
1391
+ async createTypeQueryTask(type, demand) {
1392
+ const taskExecutor = new import_misoai_core.Executor(
1393
+ taskTitleStr(
1394
+ type,
1395
+ typeof demand === "string" ? demand : JSON.stringify(demand)
1396
+ ),
1397
+ {
1398
+ onTaskStart: this.onTaskStartCallback
1399
+ }
1400
+ );
1401
+ const queryTask = {
1402
+ type: "Insight",
1403
+ subType: type,
1404
+ locate: null,
1405
+ param: {
1406
+ dataDemand: demand
1407
+ // for user param presentation in report right sidebar
1408
+ },
1409
+ executor: async (param) => {
1410
+ let insightDump;
1411
+ const dumpCollector = (dump) => {
1412
+ insightDump = dump;
1413
+ };
1414
+ this.insight.onceDumpUpdatedFn = dumpCollector;
1415
+ const ifTypeRestricted = type !== "Query";
1416
+ let demandInput = demand;
1417
+ if (ifTypeRestricted) {
1418
+ demandInput = {
1419
+ result: `${type}, ${demand}`
1420
+ };
1421
+ }
1422
+ const { data, usage } = await this.insight.extract(demandInput);
1423
+ let outputResult = data;
1424
+ if (ifTypeRestricted) {
1425
+ (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1426
+ outputResult = data.result;
1427
+ }
1428
+ return {
1429
+ output: outputResult,
1430
+ log: { dump: insightDump },
1431
+ usage
1432
+ };
1433
+ }
1434
+ };
1435
+ await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
1436
+ const output = await taskExecutor.flush();
1437
+ return {
1438
+ output,
1439
+ executor: taskExecutor
1440
+ };
1441
+ }
1442
+ async query(demand) {
1443
+ return this.createTypeQueryTask("Query", demand);
1444
+ }
1445
+ async boolean(prompt) {
1446
+ return this.createTypeQueryTask("Boolean", prompt);
1447
+ }
1448
+ async number(prompt) {
1449
+ return this.createTypeQueryTask("Number", prompt);
1450
+ }
1451
+ async string(prompt) {
1452
+ return this.createTypeQueryTask("String", prompt);
1453
+ }
1454
+ async assert(assertion) {
1455
+ const description = `assert: ${assertion}`;
1456
+ const taskExecutor = new import_misoai_core.Executor(taskTitleStr("Assert", description), {
1457
+ onTaskStart: this.onTaskStartCallback
1458
+ });
1459
+ const assertionPlan = {
1460
+ type: "Assert",
1461
+ param: {
1462
+ assertion
1463
+ },
1464
+ locate: null
1465
+ };
1466
+ const { tasks } = await this.convertPlanToExecutable([assertionPlan]);
1467
+ await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
1468
+ const output = await taskExecutor.flush();
1469
+ return {
1470
+ output,
1471
+ executor: taskExecutor
1472
+ };
1473
+ }
1474
+ /**
1475
+ * Append a message to the conversation history
1476
+ * For user messages with images:
1477
+ * - Keep max 4 user image messages in history
1478
+ * - Remove oldest user image message when limit reached
1479
+ * For assistant messages:
1480
+ * - Simply append to history
1481
+ * @param conversationHistory Message to append
1482
+ */
1483
+ appendConversationHistory(conversationHistory) {
1484
+ if (conversationHistory.role === "user") {
1485
+ const userImgItems = this.conversationHistory.filter(
1486
+ (item) => item.role === "user"
1487
+ );
1488
+ if (userImgItems.length >= 4 && conversationHistory.role === "user") {
1489
+ const firstUserImgIndex = this.conversationHistory.findIndex(
1490
+ (item) => item.role === "user"
1491
+ );
1492
+ if (firstUserImgIndex >= 0) {
1493
+ this.conversationHistory.splice(firstUserImgIndex, 1);
1494
+ }
1495
+ }
1496
+ }
1497
+ this.conversationHistory.push(conversationHistory);
1498
+ }
1499
+ async appendErrorPlan(taskExecutor, errorMsg) {
1500
+ const errorPlan = {
1501
+ type: "Error",
1502
+ param: {
1503
+ thought: errorMsg
1504
+ },
1505
+ locate: null
1506
+ };
1507
+ const { tasks } = await this.convertPlanToExecutable([errorPlan]);
1508
+ await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
1509
+ await taskExecutor.flush();
1510
+ return {
1511
+ output: void 0,
1512
+ executor: taskExecutor
1513
+ };
1514
+ }
1515
+ async waitFor(assertion, opt) {
1516
+ const description = `waitFor: ${assertion}`;
1517
+ const taskExecutor = new import_misoai_core.Executor(taskTitleStr("WaitFor", description), {
1518
+ onTaskStart: this.onTaskStartCallback
1519
+ });
1520
+ const { timeoutMs, checkIntervalMs } = opt;
1521
+ (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1522
+ (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1523
+ (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1524
+ const overallStartTime = Date.now();
1525
+ let startTime = Date.now();
1526
+ let errorThought = "";
1527
+ while (Date.now() - overallStartTime < timeoutMs) {
1528
+ startTime = Date.now();
1529
+ const assertPlan = {
1530
+ type: "AssertWithoutThrow",
1531
+ param: {
1532
+ assertion
1533
+ },
1534
+ locate: null
1535
+ };
1536
+ const { tasks: assertTasks } = await this.convertPlanToExecutable([
1537
+ assertPlan
1538
+ ]);
1539
+ await taskExecutor.append(
1540
+ this.prependExecutorWithScreenshot(assertTasks[0])
1541
+ );
1542
+ const output = await taskExecutor.flush();
1543
+ if (output?.pass) {
1544
+ return {
1545
+ output: void 0,
1546
+ executor: taskExecutor
1547
+ };
1548
+ }
1549
+ errorThought = output?.thought || `unknown error when waiting for assertion: ${assertion}`;
1550
+ const now = Date.now();
1551
+ if (now - startTime < checkIntervalMs) {
1552
+ const timeRemaining = checkIntervalMs - (now - startTime);
1553
+ const sleepPlan = {
1554
+ type: "Sleep",
1555
+ param: {
1556
+ timeMs: timeRemaining
1557
+ },
1558
+ locate: null
1559
+ };
1560
+ const { tasks: sleepTasks } = await this.convertPlanToExecutable([
1561
+ sleepPlan
1562
+ ]);
1563
+ await taskExecutor.append(
1564
+ this.prependExecutorWithScreenshot(sleepTasks[0])
1565
+ );
1566
+ await taskExecutor.flush();
1567
+ }
1568
+ }
1569
+ return this.appendErrorPlan(
1570
+ taskExecutor,
1571
+ `waitFor timeout: ${errorThought}`
1572
+ );
1573
+ }
1574
+ };
1575
+
1576
+ // src/common/plan-builder.ts
1577
+ var import_logger2 = require("misoai-shared/logger");
1578
+ var import_utils8 = require("misoai-shared/utils");
1579
+ var debug2 = (0, import_logger2.getDebug)("plan-builder");
1580
+ function buildPlans(type, locateParam, param) {
1581
+ let returnPlans = [];
1582
+ const locatePlan = locateParam ? {
1583
+ type: "Locate",
1584
+ locate: locateParam,
1585
+ param: locateParam,
1586
+ thought: ""
1587
+ } : null;
1588
+ if (type === "Tap" || type === "Hover") {
1589
+ (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1590
+ (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1591
+ const tapPlan = {
1592
+ type,
1593
+ param: null,
1594
+ thought: "",
1595
+ locate: locateParam
1596
+ };
1597
+ returnPlans = [locatePlan, tapPlan];
1598
+ }
1599
+ if (type === "Input" || type === "KeyboardPress") {
1600
+ if (type === "Input") {
1601
+ (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1602
+ }
1603
+ (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1604
+ const inputPlan = {
1605
+ type,
1606
+ param,
1607
+ thought: "",
1608
+ locate: locateParam
1609
+ };
1610
+ if (locatePlan) {
1611
+ returnPlans = [locatePlan, inputPlan];
1612
+ } else {
1613
+ returnPlans = [inputPlan];
1614
+ }
1615
+ }
1616
+ if (type === "Scroll") {
1617
+ (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1618
+ const scrollPlan = {
1619
+ type,
1620
+ param,
1621
+ thought: "",
1622
+ locate: locateParam
1623
+ };
1624
+ if (locatePlan) {
1625
+ returnPlans = [locatePlan, scrollPlan];
1626
+ } else {
1627
+ returnPlans = [scrollPlan];
1628
+ }
1629
+ }
1630
+ if (type === "Sleep") {
1631
+ (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1632
+ const sleepPlan = {
1633
+ type,
1634
+ param,
1635
+ thought: "",
1636
+ locate: null
1637
+ };
1638
+ returnPlans = [sleepPlan];
1639
+ }
1640
+ if (type === "Locate") {
1641
+ (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1642
+ const locatePlan2 = {
1643
+ type,
1644
+ param: locateParam,
1645
+ locate: locateParam,
1646
+ thought: ""
1647
+ };
1648
+ returnPlans = [locatePlan2];
1649
+ }
1650
+ if (returnPlans) {
1651
+ debug2("buildPlans", returnPlans);
1652
+ return returnPlans;
1653
+ }
1654
+ throw new Error(`Not supported type: ${type}`);
1655
+ }
1656
+
1657
+ // src/common/task-cache.ts
1658
+ var import_node_assert = __toESM(require("assert"));
1659
+ var import_node_fs2 = require("fs");
1660
+ var import_node_path2 = require("path");
1661
+ var import_common2 = require("misoai-shared/common");
1662
+ var import_logger3 = require("misoai-shared/logger");
1663
+ var import_utils9 = require("misoai-shared/utils");
1664
+ var import_js_yaml3 = __toESM(require("js-yaml"));
1665
+ var import_semver = __toESM(require("semver"));
1666
+
1667
+ // package.json
1668
+ var version = "1.0.5";
1669
+
1670
+ // src/common/task-cache.ts
1671
+ var debug3 = (0, import_logger3.getDebug)("cache");
1672
+ var lowestSupportedMidsceneVersion = "0.16.10";
1673
+ var cacheFileExt = ".cache.yaml";
1674
+ var TaskCache = class {
1675
+ // Track matched records
1676
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1677
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
1678
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
1679
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1680
+ this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1681
+ this.isCacheResultUsed = isCacheResultUsed;
1682
+ let cacheContent;
1683
+ if (this.cacheFilePath) {
1684
+ cacheContent = this.loadCacheFromFile();
1685
+ }
1686
+ if (!cacheContent) {
1687
+ cacheContent = {
1688
+ midsceneVersion: version,
1689
+ cacheId: this.cacheId,
1690
+ caches: []
1691
+ };
1692
+ }
1693
+ this.cache = cacheContent;
1694
+ this.cacheOriginalLength = this.cache.caches.length;
1695
+ }
1696
+ matchCache(prompt, type) {
1697
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
1698
+ const item = this.cache.caches[i];
1699
+ const key = `${type}:${prompt}:${i}`;
1700
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1701
+ this.matchedCacheIndices.add(key);
1702
+ debug3(
1703
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1704
+ type,
1705
+ prompt,
1706
+ i
1707
+ );
1708
+ return {
1709
+ cacheContent: item,
1710
+ updateFn: (cb) => {
1711
+ debug3(
1712
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1713
+ type,
1714
+ prompt,
1715
+ i
1716
+ );
1717
+ cb(item);
1718
+ debug3(
1719
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1720
+ type,
1721
+ prompt,
1722
+ i
1723
+ );
1724
+ this.flushCacheToFile();
1725
+ }
1726
+ };
1727
+ }
1728
+ }
1729
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1730
+ return void 0;
1731
+ }
1732
+ matchPlanCache(prompt) {
1733
+ return this.matchCache(prompt, "plan");
1734
+ }
1735
+ matchLocateCache(prompt) {
1736
+ return this.matchCache(prompt, "locate");
1737
+ }
1738
+ appendCache(cache) {
1739
+ debug3("will append cache", cache);
1740
+ this.cache.caches.push(cache);
1741
+ this.flushCacheToFile();
1742
+ }
1743
+ loadCacheFromFile() {
1744
+ const cacheFile = this.cacheFilePath;
1745
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
1746
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1747
+ debug3("no cache file found, path: %s", cacheFile);
1748
+ return void 0;
1749
+ }
1750
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1751
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1752
+ console.warn(
1753
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1754
+ );
1755
+ return void 0;
1756
+ }
1757
+ try {
1758
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1759
+ const jsonData = import_js_yaml3.default.load(data);
1760
+ if (!version) {
1761
+ debug3("no midscene version info, will not read cache from file");
1762
+ return void 0;
1763
+ }
1764
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1765
+ console.warn(
1766
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1767
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1768
+ cache file: ${cacheFile}`
1769
+ );
1770
+ return void 0;
1771
+ }
1772
+ debug3(
1773
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
1774
+ cacheFile,
1775
+ jsonData.midsceneVersion,
1776
+ jsonData.caches.length
1777
+ );
1778
+ jsonData.midsceneVersion = version;
1779
+ return jsonData;
1780
+ } catch (err) {
1781
+ debug3(
1782
+ "cache file exists but load failed, path: %s, error: %s",
1783
+ cacheFile,
1784
+ err
1785
+ );
1786
+ return void 0;
1787
+ }
1788
+ }
1789
+ flushCacheToFile() {
1790
+ if (!version) {
1791
+ debug3("no midscene version info, will not write cache to file");
1792
+ return;
1793
+ }
1794
+ if (!this.cacheFilePath) {
1795
+ debug3("no cache file path, will not write cache to file");
1796
+ return;
1797
+ }
1798
+ try {
1799
+ const yamlData = import_js_yaml3.default.dump(this.cache);
1800
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1801
+ } catch (err) {
1802
+ debug3(
1803
+ "write cache to file failed, path: %s, error: %s",
1804
+ this.cacheFilePath,
1805
+ err
1806
+ );
1807
+ }
1808
+ }
1809
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1810
+ if (cachedRecord) {
1811
+ if (newRecord.type === "plan") {
1812
+ cachedRecord.updateFn((cache) => {
1813
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1814
+ });
1815
+ } else {
1816
+ cachedRecord.updateFn((cache) => {
1817
+ cache.xpaths = newRecord.xpaths;
1818
+ });
1819
+ }
1820
+ } else {
1821
+ this.appendCache(newRecord);
1822
+ }
1823
+ }
1824
+ };
1825
+
1826
+ // src/common/agent.ts
1827
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1828
+ var distanceOfTwoPoints = (p1, p2) => {
1829
+ const [x1, y1] = p1;
1830
+ const [x2, y2] = p2;
1831
+ return Math.round(Math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2));
1832
+ };
1833
+ var includedInRect = (point, rect) => {
1834
+ const [x, y] = point;
1835
+ const { left, top, width, height } = rect;
1836
+ return x >= left && x <= left + width && y >= top && y <= top + height;
1837
+ };
1838
+ var PageAgent = class {
1839
+ constructor(page, opts) {
1840
+ /**
1841
+ * If true, the agent will not perform any actions
1842
+ */
1843
+ this.dryMode = false;
1844
+ this.page = page;
1845
+ this.opts = Object.assign(
1846
+ {
1847
+ generateReport: true,
1848
+ autoPrintReportMsg: true,
1849
+ groupName: "Midscene Report",
1850
+ groupDescription: "",
1851
+ enableCumulativeContext: true,
1852
+ autoClearContext: false
1853
+ },
1854
+ opts || {}
1855
+ );
1856
+ this.initializeContextStore();
1857
+ if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1858
+ this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1859
+ this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
1860
+ }
1861
+ this.onTaskStartTip = this.opts.onTaskStartTip;
1862
+ this.insight = new import_misoai_core2.Insight(
1863
+ async (action) => {
1864
+ return this.getUIContext(action);
1865
+ }
1866
+ );
1867
+ if (opts?.cacheId && this.page.pageType !== "android") {
1868
+ this.taskCache = new TaskCache(
1869
+ opts.cacheId,
1870
+ (0, import_env2.getAIConfigInBoolean)("MIDSCENE_CACHE")
1871
+ // if we should use cache to match the element
1872
+ );
1873
+ }
1874
+ this.taskExecutor = new PageTaskExecutor(this.page, this.insight, {
1875
+ taskCache: this.taskCache,
1876
+ onTaskStart: this.callbackOnTaskStartTip.bind(this)
1877
+ });
1878
+ this.dump = this.resetDump();
1879
+ this.reportFileName = reportFileName(
1880
+ opts?.testId || this.page.pageType || "web"
1881
+ );
1882
+ }
1883
+ /**
1884
+ * Initialize context store for cumulative context functionality
1885
+ */
1886
+ async initializeContextStore() {
1887
+ if (!this.opts.enableCumulativeContext) {
1888
+ debug4("Cumulative context disabled via options");
1889
+ return;
1890
+ }
1891
+ try {
1892
+ const aiModel = await import("misoai-core/ai-model");
1893
+ this.contextStore = aiModel.getContextStore();
1894
+ debug4("Context store initialized successfully", {
1895
+ autoClearContext: this.opts.autoClearContext,
1896
+ testId: this.opts.testId
1897
+ });
1898
+ if (this.opts.autoClearContext) {
1899
+ this.contextStore.clear();
1900
+ debug4("Context store cleared due to autoClearContext option");
1901
+ } else {
1902
+ const existingData = this.contextStore.getAllData();
1903
+ const existingSteps = this.contextStore.getRecentSteps(100).length;
1904
+ debug4("Context store preserving existing data", {
1905
+ existingDataKeys: Object.keys(existingData),
1906
+ existingStepsCount: existingSteps
1907
+ });
1908
+ }
1909
+ } catch (error) {
1910
+ debug4("Failed to initialize context store:", error);
1911
+ console.warn("⚠️ Could not initialize context store:", error);
1912
+ }
1913
+ }
1914
+ /**
1915
+ * Get the context store instance
1916
+ */
1917
+ getContextStore() {
1918
+ return this.contextStore;
1919
+ }
1920
+ /**
1921
+ * Clear the context store
1922
+ */
1923
+ clearContext() {
1924
+ if (this.contextStore) {
1925
+ this.contextStore.clear();
1926
+ }
1927
+ }
1928
+ /**
1929
+ * Get all stored data from context store
1930
+ */
1931
+ getStoredData() {
1932
+ if (this.contextStore) {
1933
+ return this.contextStore.getAllData();
1934
+ }
1935
+ return {};
1936
+ }
1937
+ /**
1938
+ * Get step summary from context store
1939
+ */
1940
+ getStepSummary() {
1941
+ if (this.contextStore) {
1942
+ return this.contextStore.getStepSummary();
1943
+ }
1944
+ return "";
1945
+ }
1946
+ async getUIContext(action) {
1947
+ if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1948
+ return await parseContextFromWebPage(this.page, {
1949
+ ignoreMarker: true
1950
+ });
1951
+ }
1952
+ return await parseContextFromWebPage(this.page, {
1953
+ ignoreMarker: !!(0, import_env2.vlLocateMode)()
1954
+ });
1955
+ }
1956
+ // Helper method to call the insight.captcha method
1957
+ async _callInsightCaptcha(options) {
1958
+ const context = await this.getUIContext();
1959
+ if (this.page.url) {
1960
+ const url = await this.page.url();
1961
+ context.url = url;
1962
+ }
1963
+ return this.insight.captcha(context, options);
1964
+ }
1965
+ async setAIActionContext(prompt) {
1966
+ this.opts.aiActionContext = prompt;
1967
+ }
1968
+ resetDump() {
1969
+ this.dump = {
1970
+ groupName: this.opts.groupName,
1971
+ groupDescription: this.opts.groupDescription,
1972
+ executions: []
1973
+ };
1974
+ return this.dump;
1975
+ }
1976
+ appendExecutionDump(execution) {
1977
+ const currentDump = this.dump;
1978
+ currentDump.executions.push(execution);
1979
+ }
1980
+ dumpDataString() {
1981
+ this.dump.groupName = this.opts.groupName;
1982
+ this.dump.groupDescription = this.opts.groupDescription;
1983
+ return (0, import_utils11.stringifyDumpData)(this.dump);
1984
+ }
1985
+ reportHTMLString() {
1986
+ return (0, import_utils11.reportHTMLContent)(this.dumpDataString());
1987
+ }
1988
+ writeOutActionDumps() {
1989
+ const { generateReport, autoPrintReportMsg } = this.opts;
1990
+ this.reportFile = (0, import_utils11.writeLogFile)({
1991
+ fileName: this.reportFileName,
1992
+ fileExt: import_utils11.groupedActionDumpFileExt,
1993
+ fileContent: this.dumpDataString(),
1994
+ type: "dump",
1995
+ generateReport
1996
+ });
1997
+ debug4("writeOutActionDumps", this.reportFile);
1998
+ if (generateReport && autoPrintReportMsg && this.reportFile) {
1999
+ printReportMsg(this.reportFile);
2000
+ }
2001
+ }
2002
+ async callbackOnTaskStartTip(task) {
2003
+ const param = paramStr(task);
2004
+ const tip = param ? `${typeStr(task)} - ${param}` : typeStr(task);
2005
+ if (this.onTaskStartTip) {
2006
+ await this.onTaskStartTip(tip);
2007
+ }
2008
+ }
2009
+ afterTaskRunning(executor, doNotThrowError = false) {
2010
+ this.appendExecutionDump(executor.dump());
2011
+ this.writeOutActionDumps();
2012
+ if (executor.isInErrorState() && !doNotThrowError) {
2013
+ const errorTask = executor.latestErrorTask();
2014
+ throw new Error(`${errorTask?.error}`);
2015
+ }
2016
+ const lastTask = executor.tasks[executor.tasks.length - 1];
2017
+ const allThoughts = executor.tasks.filter((task) => task.thought).map((task) => task.thought);
2018
+ const allLocates = executor.tasks.filter((task) => task.locate).map((task) => task.locate);
2019
+ const allPlans = executor.tasks.filter((task) => task.param?.plans).map((task) => task.param?.plans);
2020
+ const planningTasks = executor.tasks.filter((task) => task.type === "Planning");
2021
+ const insightTasks = executor.tasks.filter((task) => task.type === "Insight");
2022
+ const actionTasks = executor.tasks.filter((task) => task.type === "Action");
2023
+ const planning = planningTasks.length > 0 ? {
2024
+ type: "Planning",
2025
+ description: `Planning for task execution`,
2026
+ steps: planningTasks.map((task) => task.thought || "Planning step")
2027
+ } : void 0;
2028
+ const insight = insightTasks.length > 0 ? {
2029
+ type: "Insight",
2030
+ description: `Insight for task execution`,
2031
+ elements: insightTasks.map((task) => task.thought || "Insight element")
2032
+ } : void 0;
2033
+ const action = actionTasks.length > 0 ? {
2034
+ type: "Action",
2035
+ description: `Action for task execution`,
2036
+ result: lastTask?.output
2037
+ } : void 0;
2038
+ const actionDetails = executor.tasks.map((task) => ({
2039
+ type: task.type,
2040
+ subType: task.subType,
2041
+ status: task.status,
2042
+ thought: task.thought
2043
+ }));
2044
+ const metadata = {
2045
+ status: lastTask?.status,
2046
+ start: lastTask?.timing?.start,
2047
+ end: lastTask?.timing?.end,
2048
+ totalTime: lastTask?.timing?.cost,
2049
+ cache: lastTask?.cache,
2050
+ usage: lastTask?.usage,
2051
+ thought: allThoughts.length > 0 ? allThoughts.join("\n") : lastTask?.thought,
2052
+ locate: allLocates.length > 0 ? allLocates : lastTask?.locate,
2053
+ plan: allPlans.length > 0 ? allPlans : lastTask?.param?.plans,
2054
+ // Add planning, insight, and action information
2055
+ planning,
2056
+ insight,
2057
+ action,
2058
+ actionDetails,
2059
+ // Include raw tasks for debugging
2060
+ tasks: executor.tasks.map((task) => ({
2061
+ type: task.type,
2062
+ subType: task.subType,
2063
+ status: task.status,
2064
+ thought: task.thought,
2065
+ locate: task.locate,
2066
+ timing: task.timing,
2067
+ usage: task.usage,
2068
+ cache: task.cache,
2069
+ error: task.error
2070
+ }))
2071
+ };
2072
+ return metadata;
2073
+ }
2074
+ buildDetailedLocateParam(locatePrompt, opt) {
2075
+ (0, import_utils12.assert)(locatePrompt, "missing locate prompt");
2076
+ if (typeof opt === "object") {
2077
+ const prompt = opt.prompt ?? locatePrompt;
2078
+ const deepThink = opt.deepThink ?? false;
2079
+ const cacheable = opt.cacheable ?? true;
2080
+ return {
2081
+ prompt,
2082
+ deepThink,
2083
+ cacheable
2084
+ };
2085
+ }
2086
+ return {
2087
+ prompt: locatePrompt
2088
+ };
2089
+ }
2090
+ async aiTap(locatePrompt, opt) {
2091
+ const detailedLocateParam = this.buildDetailedLocateParam(
2092
+ locatePrompt,
2093
+ opt
2094
+ );
2095
+ const plans = buildPlans("Tap", detailedLocateParam);
2096
+ const { executor, output } = await this.taskExecutor.runPlans(
2097
+ taskTitleStr("Tap", locateParamStr(detailedLocateParam)),
2098
+ plans,
2099
+ { cacheable: opt?.cacheable }
2100
+ );
2101
+ const metadata = this.afterTaskRunning(executor);
2102
+ return {
2103
+ result: output,
2104
+ metadata
2105
+ };
2106
+ }
2107
+ async aiHover(locatePrompt, opt) {
2108
+ const detailedLocateParam = this.buildDetailedLocateParam(
2109
+ locatePrompt,
2110
+ opt
2111
+ );
2112
+ const plans = buildPlans("Hover", detailedLocateParam);
2113
+ const { executor, output } = await this.taskExecutor.runPlans(
2114
+ taskTitleStr("Hover", locateParamStr(detailedLocateParam)),
2115
+ plans,
2116
+ { cacheable: opt?.cacheable }
2117
+ );
2118
+ const metadata = this.afterTaskRunning(executor);
2119
+ return {
2120
+ result: output,
2121
+ metadata
2122
+ };
2123
+ }
2124
+ async aiInput(value, locatePrompt, opt) {
2125
+ (0, import_utils12.assert)(
2126
+ typeof value === "string",
2127
+ "input value must be a string, use empty string if you want to clear the input"
2128
+ );
2129
+ (0, import_utils12.assert)(locatePrompt, "missing locate prompt for input");
2130
+ const detailedLocateParam = this.buildDetailedLocateParam(
2131
+ locatePrompt,
2132
+ opt
2133
+ );
2134
+ const plans = buildPlans("Input", detailedLocateParam, {
2135
+ value
2136
+ });
2137
+ const { executor, output } = await this.taskExecutor.runPlans(
2138
+ taskTitleStr("Input", locateParamStr(detailedLocateParam)),
2139
+ plans,
2140
+ { cacheable: opt?.cacheable }
2141
+ );
2142
+ const metadata = this.afterTaskRunning(executor);
2143
+ return {
2144
+ result: output,
2145
+ metadata
2146
+ };
2147
+ }
2148
+ async aiKeyboardPress(keyName, locatePrompt, opt) {
2149
+ (0, import_utils12.assert)(keyName, "missing keyName for keyboard press");
2150
+ const detailedLocateParam = locatePrompt ? this.buildDetailedLocateParam(locatePrompt, opt) : void 0;
2151
+ const plans = buildPlans("KeyboardPress", detailedLocateParam, {
2152
+ value: keyName
2153
+ });
2154
+ const { executor, output } = await this.taskExecutor.runPlans(
2155
+ taskTitleStr("KeyboardPress", locateParamStr(detailedLocateParam)),
2156
+ plans,
2157
+ { cacheable: opt?.cacheable }
2158
+ );
2159
+ const metadata = this.afterTaskRunning(executor);
2160
+ return {
2161
+ result: output,
2162
+ metadata
2163
+ };
2164
+ }
2165
+ async aiScroll(scrollParam, locatePrompt, opt) {
2166
+ const detailedLocateParam = locatePrompt ? this.buildDetailedLocateParam(locatePrompt, opt) : void 0;
2167
+ const plans = buildPlans("Scroll", detailedLocateParam, scrollParam);
2168
+ const paramInTitle = locatePrompt ? `${locateParamStr(detailedLocateParam)} - ${scrollParamStr(scrollParam)}` : scrollParamStr(scrollParam);
2169
+ const { executor, output } = await this.taskExecutor.runPlans(
2170
+ taskTitleStr("Scroll", paramInTitle),
2171
+ plans,
2172
+ { cacheable: opt?.cacheable }
2173
+ );
2174
+ const metadata = this.afterTaskRunning(executor);
2175
+ return {
2176
+ result: output,
2177
+ metadata
2178
+ };
2179
+ }
2180
+ async aiAction(taskPrompt, opt) {
2181
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2182
+ try {
2183
+ const originalPrompt = taskPrompt;
2184
+ const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2185
+ if (originalPrompt !== processedPrompt) {
2186
+ debug4("Context replacement in aiAction:", {
2187
+ original: originalPrompt,
2188
+ processed: processedPrompt,
2189
+ storedData: this.contextStore.getAllData()
2190
+ });
2191
+ }
2192
+ this.contextStore.addStep({
2193
+ type: "action",
2194
+ summary: `Action: ${processedPrompt}`,
2195
+ prompt: processedPrompt
2196
+ });
2197
+ debug4("Added action step to context store:", {
2198
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2199
+ totalSteps: this.contextStore.getRecentSteps(100).length
2200
+ });
2201
+ taskPrompt = processedPrompt;
2202
+ } catch (error) {
2203
+ debug4("Context store operation failed:", error);
2204
+ }
2205
+ }
2206
+ const cacheable = opt?.cacheable;
2207
+ const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2208
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2209
+ if (matchedCache && this.taskCache?.isCacheResultUsed) {
2210
+ const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2211
+ taskPrompt,
2212
+ matchedCache.cacheContent?.yamlWorkflow
2213
+ );
2214
+ const metadata2 = this.afterTaskRunning(executor2);
2215
+ debug4("matched cache, will call .runYaml to run the action");
2216
+ const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2217
+ const result = await this.runYaml(yaml5);
2218
+ return {
2219
+ result: result.result,
2220
+ metadata: metadata2
2221
+ };
2222
+ }
2223
+ const { output, executor } = await (isVlmUiTars ? this.taskExecutor.actionToGoal(taskPrompt, { cacheable }) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext, {
2224
+ cacheable
2225
+ }));
2226
+ if (this.taskCache && output?.yamlFlow && cacheable !== false) {
2227
+ const yamlContent = {
2228
+ tasks: [
2229
+ {
2230
+ name: taskPrompt,
2231
+ flow: output.yamlFlow
2232
+ }
2233
+ ]
2234
+ };
2235
+ const yamlFlowStr = import_js_yaml4.default.dump(yamlContent);
2236
+ this.taskCache.updateOrAppendCacheRecord(
2237
+ {
2238
+ type: "plan",
2239
+ prompt: taskPrompt,
2240
+ yamlWorkflow: yamlFlowStr
2241
+ },
2242
+ matchedCache
2243
+ );
2244
+ }
2245
+ const metadata = this.afterTaskRunning(executor);
2246
+ return {
2247
+ result: output,
2248
+ metadata
2249
+ };
2250
+ }
2251
+ async aiQuery(demand) {
2252
+ let processedDemand = demand;
2253
+ let storageKey;
2254
+ try {
2255
+ const aiModel = await import("misoai-core/ai-model");
2256
+ const contextStore = aiModel.getContextStore();
2257
+ if (typeof demand === "string") {
2258
+ const storageInstruction = contextStore.parseStorageInstruction(demand);
2259
+ if (storageInstruction) {
2260
+ storageKey = storageInstruction.key;
2261
+ processedDemand = storageInstruction.cleanText;
2262
+ contextStore._pendingAliases = storageInstruction.aliases;
2263
+ } else {
2264
+ const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2265
+ if (storageMatch) {
2266
+ storageKey = storageMatch[1];
2267
+ processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2268
+ }
2269
+ }
2270
+ }
2271
+ } catch (error) {
2272
+ debug4("Context store not available:", error);
2273
+ }
2274
+ const { output, executor } = await this.taskExecutor.query(processedDemand);
2275
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2276
+ if (storageKey && output) {
2277
+ try {
2278
+ const pendingAliases = this.contextStore._pendingAliases;
2279
+ if (pendingAliases) {
2280
+ this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2281
+ delete this.contextStore._pendingAliases;
2282
+ debug4("Stored query result with aliases:", {
2283
+ key: storageKey,
2284
+ value: output,
2285
+ aliases: pendingAliases
2286
+ });
2287
+ } else {
2288
+ this.contextStore.storeData(storageKey, output);
2289
+ debug4("Stored query result:", {
2290
+ key: storageKey,
2291
+ value: output
2292
+ });
2293
+ }
2294
+ this.contextStore.addStep({
2295
+ type: "query",
2296
+ summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2297
+ data: output,
2298
+ prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2299
+ });
2300
+ debug4("Added query step to context store:", {
2301
+ storageKey,
2302
+ totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2303
+ totalSteps: this.contextStore.getRecentSteps(100).length
2304
+ });
2305
+ } catch (error) {
2306
+ debug4("Failed to store query result:", error);
2307
+ }
2308
+ } else {
2309
+ try {
2310
+ this.contextStore.addStep({
2311
+ type: "query",
2312
+ summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2313
+ data: output,
2314
+ prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2315
+ });
2316
+ } catch (error) {
2317
+ debug4("Failed to add query step:", error);
2318
+ }
2319
+ }
2320
+ }
2321
+ const metadata = this.afterTaskRunning(executor);
2322
+ return {
2323
+ result: output,
2324
+ metadata
2325
+ };
2326
+ }
2327
+ async aiBoolean(prompt) {
2328
+ const { output, executor } = await this.taskExecutor.boolean(prompt);
2329
+ const metadata = this.afterTaskRunning(executor);
2330
+ return {
2331
+ result: output,
2332
+ metadata
2333
+ };
2334
+ }
2335
+ async aiNumber(prompt) {
2336
+ const { output, executor } = await this.taskExecutor.number(prompt);
2337
+ const metadata = this.afterTaskRunning(executor);
2338
+ return {
2339
+ result: output,
2340
+ metadata
2341
+ };
2342
+ }
2343
+ async aiString(prompt) {
2344
+ const { output, executor } = await this.taskExecutor.string(prompt);
2345
+ const metadata = this.afterTaskRunning(executor);
2346
+ return {
2347
+ result: output,
2348
+ metadata
2349
+ };
2350
+ }
2351
+ async describeElementAtPoint(center, opt) {
2352
+ const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2353
+ let success = false;
2354
+ let retryCount = 0;
2355
+ let resultPrompt = "";
2356
+ let deepThink = opt?.deepThink || false;
2357
+ let verifyResult;
2358
+ while (!success && retryCount < retryLimit) {
2359
+ if (retryCount >= 2) {
2360
+ deepThink = true;
2361
+ }
2362
+ debug4(
2363
+ "aiDescribe",
2364
+ center,
2365
+ "verifyPrompt",
2366
+ verifyPrompt,
2367
+ "retryCount",
2368
+ retryCount,
2369
+ "deepThink",
2370
+ deepThink
2371
+ );
2372
+ const text = await this.insight.describe(center, { deepThink });
2373
+ debug4("aiDescribe text", text);
2374
+ (0, import_utils12.assert)(text.description, `failed to describe element at [${center}]`);
2375
+ resultPrompt = text.description;
2376
+ verifyResult = await this.verifyLocator(
2377
+ resultPrompt,
2378
+ deepThink ? { deepThink: true } : void 0,
2379
+ center,
2380
+ opt
2381
+ );
2382
+ if (verifyResult.pass) {
2383
+ success = true;
2384
+ } else {
2385
+ retryCount++;
2386
+ }
2387
+ }
2388
+ return {
2389
+ prompt: resultPrompt,
2390
+ deepThink,
2391
+ verifyResult
2392
+ };
2393
+ }
2394
+ async verifyLocator(prompt, locateOpt, expectCenter, verifyLocateOption) {
2395
+ debug4("verifyLocator", prompt, locateOpt, expectCenter, verifyLocateOption);
2396
+ const locateResult = await this.aiLocate(prompt, locateOpt);
2397
+ const { center: verifyCenter, rect: verifyRect } = locateResult.result;
2398
+ const distance = distanceOfTwoPoints(expectCenter, verifyCenter);
2399
+ const included = includedInRect(expectCenter, verifyRect);
2400
+ const pass = distance <= (verifyLocateOption?.centerDistanceThreshold || 20) || included;
2401
+ const verifyResult = {
2402
+ pass,
2403
+ rect: verifyRect,
2404
+ center: verifyCenter,
2405
+ centerDistance: distance
2406
+ };
2407
+ debug4("aiDescribe verifyResult", verifyResult);
2408
+ return verifyResult;
2409
+ }
2410
+ async aiLocate(prompt, opt) {
2411
+ const detailedLocateParam = this.buildDetailedLocateParam(prompt, opt);
2412
+ const plans = buildPlans("Locate", detailedLocateParam);
2413
+ const { executor, output } = await this.taskExecutor.runPlans(
2414
+ taskTitleStr("Locate", locateParamStr(detailedLocateParam)),
2415
+ plans,
2416
+ { cacheable: opt?.cacheable }
2417
+ );
2418
+ const metadata = this.afterTaskRunning(executor);
2419
+ const { element } = output;
2420
+ const result = {
2421
+ rect: element?.rect,
2422
+ center: element?.center
2423
+ };
2424
+ return {
2425
+ result,
2426
+ metadata
2427
+ };
2428
+ }
2429
+ async aiAssert(assertion, msg, opt) {
2430
+ let processedAssertion = assertion;
2431
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2432
+ try {
2433
+ const originalAssertion = assertion;
2434
+ processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2435
+ if (originalAssertion !== processedAssertion) {
2436
+ debug4("Context replacement in aiAssert:", {
2437
+ original: originalAssertion,
2438
+ processed: processedAssertion,
2439
+ context: "assertion",
2440
+ storedData: this.contextStore.getAllData()
2441
+ });
2442
+ }
2443
+ this.contextStore.addStep({
2444
+ type: "assertion",
2445
+ summary: `Assertion: ${processedAssertion}`,
2446
+ prompt: processedAssertion
2447
+ });
2448
+ debug4("Added assertion step to context store:", {
2449
+ totalSteps: this.contextStore.getRecentSteps(100).length
2450
+ });
2451
+ } catch (error) {
2452
+ debug4("Context store operation failed:", error);
2453
+ }
2454
+ }
2455
+ let currentUrl = "";
2456
+ if (this.page.url) {
2457
+ try {
2458
+ currentUrl = await this.page.url();
2459
+ } catch (e) {
2460
+ }
2461
+ }
2462
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2463
+ const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2464
+ const metadata = this.afterTaskRunning(executor, true);
2465
+ if (output && opt?.keepRawResponse) {
2466
+ return {
2467
+ result: output,
2468
+ metadata
2469
+ };
2470
+ }
2471
+ if (!output?.pass) {
2472
+ const errMsg = msg || `Assertion failed: ${assertion}`;
2473
+ const reasonMsg = `Reason: ${output?.thought || executor.latestErrorTask()?.error || "(no_reason)"}`;
2474
+ throw new Error(`${errMsg}
2475
+ ${reasonMsg}`);
2476
+ }
2477
+ return {
2478
+ result: true,
2479
+ metadata
2480
+ };
2481
+ }
2482
+ async aiCaptcha(options) {
2483
+ const { deepThink = false, autoDetectComplexity = true } = options || {};
2484
+ let shouldUseDeepThink = deepThink;
2485
+ if (autoDetectComplexity && !deepThink) {
2486
+ const context = await this.getUIContext();
2487
+ const { screenshotBase64 } = context;
2488
+ try {
2489
+ const complexityAnalysisPrompt = `
2490
+ Analyze this screenshot and determine if it contains a complex CAPTCHA that would benefit from deep thinking.
2491
+ A complex CAPTCHA typically has one or more of these characteristics:
2492
+ - Distorted or overlapping text that is hard to read
2493
+ - Multiple images that need to be selected based on a specific criteria
2494
+ - Puzzles that require spatial reasoning
2495
+ - Multiple steps or verification methods
2496
+ - Small or hard-to-distinguish elements
2497
+
2498
+ Return only "complex" or "simple" based on your analysis.
2499
+ `;
2500
+ const complexityMsgs = [
2501
+ { role: "system", content: "You are an AI assistant that analyzes screenshots to determine CAPTCHA complexity." },
2502
+ {
2503
+ role: "user",
2504
+ content: [
2505
+ {
2506
+ type: "image_url",
2507
+ image_url: {
2508
+ url: screenshotBase64,
2509
+ detail: "high"
2510
+ }
2511
+ },
2512
+ {
2513
+ type: "text",
2514
+ text: complexityAnalysisPrompt
2515
+ }
2516
+ ]
2517
+ }
2518
+ ];
2519
+ const complexityResult = await this.insight.aiVendorFn(
2520
+ complexityMsgs,
2521
+ { type: "extract_data" }
2522
+ );
2523
+ const responseText = typeof complexityResult.content === "string" ? complexityResult.content.toLowerCase() : JSON.stringify(complexityResult.content).toLowerCase();
2524
+ shouldUseDeepThink = responseText.includes("complex");
2525
+ debug4("CAPTCHA complexity analysis:", responseText, "Using deep think:", shouldUseDeepThink);
2526
+ } catch (error) {
2527
+ debug4("Failed to analyze CAPTCHA complexity:", error);
2528
+ }
2529
+ }
2530
+ const captchaResponse = await this._callInsightCaptcha({
2531
+ deepThink: shouldUseDeepThink
2532
+ });
2533
+ const captchaResult = captchaResponse.content;
2534
+ const usage = captchaResponse.usage;
2535
+ const actualDeepThink = captchaResponse.deepThink || false;
2536
+ if (captchaResult.captchaType === "text") {
2537
+ for (const action of captchaResult.actions) {
2538
+ if (action.type === "click" && action.target) {
2539
+ await this.aiTap(action.target, { deepThink: shouldUseDeepThink });
2540
+ } else if (action.type === "input" && action.value) {
2541
+ if (action.target) {
2542
+ await this.aiInput(action.value, action.target, { deepThink: shouldUseDeepThink });
2543
+ }
2544
+ } else if (action.type === "verify" && action.target) {
2545
+ await this.aiTap(action.target, { deepThink: shouldUseDeepThink });
2546
+ }
2547
+ }
2548
+ } else if (captchaResult.captchaType === "image") {
2549
+ for (const action of captchaResult.actions) {
2550
+ if (action.type === "click") {
2551
+ if (action.coordinates) {
2552
+ const x = action.coordinates[0];
2553
+ const y = action.coordinates[1];
2554
+ await this.aiTap(`element at coordinates (${x}, ${y})`, { deepThink: shouldUseDeepThink });
2555
+ } else if (action.target) {
2556
+ await this.aiTap(action.target, { deepThink: shouldUseDeepThink });
2557
+ }
2558
+ } else if (action.type === "verify" && action.target) {
2559
+ await this.aiTap(action.target, { deepThink: shouldUseDeepThink });
2560
+ }
2561
+ }
2562
+ }
2563
+ await new Promise((resolve2) => setTimeout(resolve2, 3e3));
2564
+ const metadata = {
2565
+ status: "finished",
2566
+ usage,
2567
+ thought: captchaResult.thought
2568
+ };
2569
+ metadata.deepThink = actualDeepThink;
2570
+ if (autoDetectComplexity && !deepThink) {
2571
+ metadata.autoDetectedComplexity = shouldUseDeepThink;
2572
+ }
2573
+ return {
2574
+ result: captchaResult,
2575
+ metadata
2576
+ };
2577
+ }
2578
+ async aiWaitFor(assertion, opt) {
2579
+ const startTime = Date.now();
2580
+ const { executor } = await this.taskExecutor.waitFor(assertion, {
2581
+ timeoutMs: opt?.timeoutMs || 15 * 1e3,
2582
+ checkIntervalMs: opt?.checkIntervalMs || 3 * 1e3,
2583
+ assertion
2584
+ });
2585
+ const metadata = {
2586
+ status: executor.isInErrorState() ? "failed" : "finished",
2587
+ start: startTime,
2588
+ end: Date.now(),
2589
+ totalTime: Date.now() - startTime,
2590
+ thought: executor.latestErrorTask()?.thought,
2591
+ actionDetails: executor.tasks.map((task) => ({
2592
+ type: task.type,
2593
+ subType: task.subType,
2594
+ status: task.status,
2595
+ thought: task.thought
2596
+ }))
2597
+ };
2598
+ this.appendExecutionDump(executor.dump());
2599
+ this.writeOutActionDumps();
2600
+ if (executor.isInErrorState()) {
2601
+ const errorTask = executor.latestErrorTask();
2602
+ throw new Error(`${errorTask?.error}
2603
+ ${errorTask?.errorStack}`);
2604
+ }
2605
+ return {
2606
+ result: true,
2607
+ // Successfully waited
2608
+ metadata
2609
+ };
2610
+ }
2611
+ async ai(taskPrompt, type = "action", options) {
2612
+ if (type === "action") {
2613
+ return this.aiAction(taskPrompt);
2614
+ }
2615
+ if (type === "query") {
2616
+ return this.aiQuery(taskPrompt);
2617
+ }
2618
+ if (type === "assert") {
2619
+ return this.aiAssert(taskPrompt);
2620
+ }
2621
+ if (type === "tap") {
2622
+ return this.aiTap(taskPrompt, options);
2623
+ }
2624
+ if (type === "captcha") {
2625
+ return this.aiCaptcha(options);
2626
+ }
2627
+ throw new Error(
2628
+ `Unknown type: ${type}, only support 'action', 'query', 'assert', 'tap', 'captcha'`
2629
+ );
2630
+ }
2631
+ async runYaml(yamlScriptContent) {
2632
+ const startTime = Date.now();
2633
+ const script = parseYamlScript(yamlScriptContent, "yaml", true);
2634
+ const player = new ScriptPlayer(script, async () => {
2635
+ return { agent: this, freeFn: [] };
2636
+ });
2637
+ await player.run();
2638
+ const endTime = Date.now();
2639
+ const metadata = {
2640
+ status: player.status,
2641
+ start: startTime,
2642
+ end: endTime,
2643
+ totalTime: endTime - startTime,
2644
+ tasks: player.taskStatusList.map((task) => ({
2645
+ type: "yaml-task",
2646
+ subType: task.name,
2647
+ status: task.status,
2648
+ error: task.error?.message
2649
+ }))
2650
+ };
2651
+ if (player.status === "error") {
2652
+ const errors = player.taskStatusList.filter((task) => task.status === "error").map((task) => {
2653
+ return `task - ${task.name}: ${task.error?.message}`;
2654
+ }).join("\n");
2655
+ throw new Error(`Error(s) occurred in running yaml script:
2656
+ ${errors}`);
2657
+ }
2658
+ return {
2659
+ result: player.result,
2660
+ metadata
2661
+ };
2662
+ }
2663
+ async evaluateJavaScript(script) {
2664
+ (0, import_utils12.assert)(
2665
+ this.page.evaluateJavaScript,
2666
+ "evaluateJavaScript is not supported in current agent"
2667
+ );
2668
+ if (this.page.evaluateJavaScript) {
2669
+ return this.page.evaluateJavaScript(script);
2670
+ }
2671
+ throw new Error("evaluateJavaScript is not supported in current agent");
2672
+ }
2673
+ async destroy() {
2674
+ await this.page.destroy();
2675
+ }
2676
+ };
2677
+
2678
+ // src/playground/agent.ts
2679
+ var StaticPageAgent = class extends PageAgent {
2680
+ constructor(page) {
2681
+ super(page, {});
2682
+ this.dryMode = true;
2683
+ }
2684
+ };
2685
+
2686
+ // src/playground/static-page.ts
2687
+ var ThrowNotImplemented = (methodName) => {
2688
+ throw new Error(
2689
+ `The method "${methodName}" is not implemented as designed since this is a static UI context. (${ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED})`
2690
+ );
2691
+ };
2692
+ var StaticPage = class {
2693
+ constructor(uiContext) {
2694
+ this.pageType = "static";
2695
+ this.mouse = {
2696
+ click: ThrowNotImplemented.bind(null, "mouse.click"),
2697
+ wheel: ThrowNotImplemented.bind(null, "mouse.wheel"),
2698
+ move: ThrowNotImplemented.bind(null, "mouse.move"),
2699
+ drag: ThrowNotImplemented.bind(null, "mouse.drag")
2700
+ };
2701
+ this.keyboard = {
2702
+ type: ThrowNotImplemented.bind(null, "keyboard.type"),
2703
+ press: ThrowNotImplemented.bind(null, "keyboard.press")
2704
+ };
2705
+ if (uiContext.tree) {
2706
+ this.uiContext = uiContext;
2707
+ } else {
2708
+ const contents = uiContext.content || [];
2709
+ this.uiContext = Object.assign(uiContext, {
2710
+ tree: {
2711
+ node: null,
2712
+ children: contents.map((content) => ({
2713
+ node: content,
2714
+ children: []
2715
+ }))
2716
+ }
2717
+ });
2718
+ }
2719
+ }
2720
+ async evaluateJavaScript(script) {
2721
+ return ThrowNotImplemented("evaluateJavaScript");
2722
+ }
2723
+ async getElementsInfo() {
2724
+ return ThrowNotImplemented("getElementsInfo");
2725
+ }
2726
+ async getElementsNodeTree() {
2727
+ return ThrowNotImplemented("getElementsNodeTree");
2728
+ }
2729
+ async getXpathsById(id) {
2730
+ return ThrowNotImplemented("getXpathsById");
2731
+ }
2732
+ async getElementInfoByXpath(xpath) {
2733
+ return ThrowNotImplemented("getElementInfoByXpath");
2734
+ }
2735
+ async size() {
2736
+ return this.uiContext.size;
2737
+ }
2738
+ async screenshotBase64() {
2739
+ const base64 = this.uiContext.screenshotBase64;
2740
+ if (!base64) {
2741
+ throw new Error("screenshot base64 is empty");
2742
+ }
2743
+ return base64;
2744
+ }
2745
+ async url() {
2746
+ return this.uiContext.url;
2747
+ }
2748
+ async scrollUntilTop(startingPoint) {
2749
+ return ThrowNotImplemented("scrollUntilTop");
2750
+ }
2751
+ async scrollUntilBottom(startingPoint) {
2752
+ return ThrowNotImplemented("scrollUntilBottom");
2753
+ }
2754
+ async scrollUntilLeft(startingPoint) {
2755
+ return ThrowNotImplemented("scrollUntilLeft");
2756
+ }
2757
+ async scrollUntilRight(startingPoint) {
2758
+ return ThrowNotImplemented("scrollUntilRight");
2759
+ }
2760
+ async scrollUp(distance, startingPoint) {
2761
+ return ThrowNotImplemented("scrollUp");
2762
+ }
2763
+ async scrollDown(distance, startingPoint) {
2764
+ return ThrowNotImplemented("scrollDown");
2765
+ }
2766
+ async scrollLeft(distance, startingPoint) {
2767
+ return ThrowNotImplemented("scrollLeft");
2768
+ }
2769
+ async scrollRight(distance, startingPoint) {
2770
+ return ThrowNotImplemented("scrollRight");
2771
+ }
2772
+ async clearInput() {
2773
+ return ThrowNotImplemented("clearInput");
2774
+ }
2775
+ async _forceUsePageContext() {
2776
+ return this.uiContext;
2777
+ }
2778
+ async destroy() {
2779
+ }
2780
+ };
2781
+
2782
+ // src/playground/server.ts
2783
+ var import_node_crypto = require("crypto");
2784
+ var import_node_fs3 = require("fs");
2785
+ var import_node_path3 = require("path");
2786
+ var import_utils18 = require("misoai-core/utils");
2787
+ var import_constants3 = require("misoai-shared/constants");
2788
+ var import_env3 = require("misoai-shared/env");
2789
+ var import_utils19 = require("misoai-shared/utils");
2790
+ var import_cors = __toESM(require("cors"));
2791
+ var import_dotenv = __toESM(require("dotenv"));
2792
+ var import_express = __toESM(require("express"));
2793
+ var defaultPort = import_constants3.PLAYGROUND_SERVER_PORT;
2794
+ var errorHandler = (err, req, res, next) => {
2795
+ console.error(err);
2796
+ res.status(500).json({
2797
+ error: err.message
2798
+ });
2799
+ };
2800
+ var setup = async () => {
2801
+ if (!import_utils19.ifInBrowser) {
2802
+ const { parsed } = import_dotenv.default.config();
2803
+ if (parsed) {
2804
+ (0, import_env3.overrideAIConfig)(parsed);
2805
+ }
2806
+ }
2807
+ };
2808
+ var PlaygroundServer = class {
2809
+ constructor(pageClass, agentClass, staticPath) {
2810
+ this.app = (0, import_express.default)();
2811
+ this.tmpDir = (0, import_utils18.getTmpDir)();
2812
+ this.pageClass = pageClass;
2813
+ this.agentClass = agentClass;
2814
+ this.staticPath = staticPath;
2815
+ this.taskProgressTips = {};
2816
+ setup();
2817
+ }
2818
+ filePathForUuid(uuid2) {
2819
+ return (0, import_node_path3.join)(this.tmpDir, `${uuid2}.json`);
2820
+ }
2821
+ saveContextFile(uuid2, context) {
2822
+ const tmpFile = this.filePathForUuid(uuid2);
2823
+ console.log(`save context file: ${tmpFile}`);
2824
+ (0, import_node_fs3.writeFileSync)(tmpFile, context);
2825
+ return tmpFile;
2826
+ }
2827
+ async launch(port) {
2828
+ this.port = port || defaultPort;
2829
+ this.app.use(errorHandler);
2830
+ this.app.use(
2831
+ (0, import_cors.default)({
2832
+ origin: "*",
2833
+ credentials: true
2834
+ })
2835
+ );
2836
+ this.app.get("/status", (0, import_cors.default)(), async (req, res) => {
2837
+ res.send({
2838
+ status: "ok"
2839
+ });
2840
+ });
2841
+ this.app.get("/context/:uuid", async (req, res) => {
2842
+ const { uuid: uuid2 } = req.params;
2843
+ const contextFile = this.filePathForUuid(uuid2);
2844
+ if (!(0, import_node_fs3.existsSync)(contextFile)) {
2845
+ return res.status(404).json({
2846
+ error: "Context not found"
2847
+ });
2848
+ }
2849
+ const context = (0, import_node_fs3.readFileSync)(contextFile, "utf8");
2850
+ res.json({
2851
+ context
2852
+ });
2853
+ });
2854
+ this.app.get("/task-progress/:requestId", (0, import_cors.default)(), async (req, res) => {
2855
+ const { requestId } = req.params;
2856
+ res.json({
2857
+ tip: this.taskProgressTips[requestId] || ""
2858
+ });
2859
+ });
2860
+ this.app.post(
2861
+ "/playground-with-context",
2862
+ import_express.default.json({ limit: "50mb" }),
2863
+ async (req, res) => {
2864
+ const context = req.body.context;
2865
+ if (!context) {
2866
+ return res.status(400).json({
2867
+ error: "context is required"
2868
+ });
2869
+ }
2870
+ const uuid2 = (0, import_node_crypto.randomUUID)();
2871
+ this.saveContextFile(uuid2, context);
2872
+ return res.json({
2873
+ location: `/playground/${uuid2}`,
2874
+ uuid: uuid2
2875
+ });
2876
+ }
2877
+ );
2878
+ this.app.post(
2879
+ "/execute",
2880
+ import_express.default.json({ limit: "30mb" }),
2881
+ async (req, res) => {
2882
+ const { context, type, prompt, requestId, deepThink } = req.body;
2883
+ if (!context) {
2884
+ return res.status(400).json({
2885
+ error: "context is required"
2886
+ });
2887
+ }
2888
+ if (!type) {
2889
+ return res.status(400).json({
2890
+ error: "type is required"
2891
+ });
2892
+ }
2893
+ if (!prompt) {
2894
+ return res.status(400).json({
2895
+ error: "prompt is required"
2896
+ });
2897
+ }
2898
+ const page = new this.pageClass(context);
2899
+ const agent = new this.agentClass(page);
2900
+ if (requestId) {
2901
+ this.taskProgressTips[requestId] = "";
2902
+ agent.onTaskStartTip = (tip) => {
2903
+ this.taskProgressTips[requestId] = tip;
2904
+ };
2905
+ }
2906
+ const response = {
2907
+ result: null,
2908
+ dump: null,
2909
+ error: null,
2910
+ reportHTML: null,
2911
+ requestId
2912
+ };
2913
+ const startTime = Date.now();
2914
+ try {
2915
+ if (type === "aiQuery") {
2916
+ response.result = await agent.aiQuery(prompt);
2917
+ } else if (type === "aiAction") {
2918
+ response.result = await agent.aiAction(prompt);
2919
+ } else if (type === "aiAssert") {
2920
+ response.result = await agent.aiAssert(prompt, void 0, {
2921
+ keepRawResponse: true
2922
+ });
2923
+ } else if (type === "aiTap") {
2924
+ response.result = await agent.aiTap(prompt, {
2925
+ deepThink
2926
+ });
2927
+ } else {
2928
+ response.error = `Unknown type: ${type}`;
2929
+ }
2930
+ } catch (error) {
2931
+ if (!error.message.includes(ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED)) {
2932
+ response.error = error.message;
2933
+ }
2934
+ }
2935
+ try {
2936
+ response.dump = JSON.parse(agent.dumpDataString());
2937
+ response.reportHTML = agent.reportHTMLString() || null;
2938
+ agent.writeOutActionDumps();
2939
+ } catch (error) {
2940
+ console.error(
2941
+ `write out dump failed: requestId: ${requestId}, ${error.message}`
2942
+ );
2943
+ }
2944
+ res.send(response);
2945
+ const timeCost = Date.now() - startTime;
2946
+ if (response.error) {
2947
+ console.error(
2948
+ `handle request failed after ${timeCost}ms: requestId: ${requestId}, ${response.error}`
2949
+ );
2950
+ } else {
2951
+ console.log(
2952
+ `handle request done after ${timeCost}ms: requestId: ${requestId}`
2953
+ );
2954
+ }
2955
+ }
2956
+ );
2957
+ this.app.post(
2958
+ "/config",
2959
+ import_express.default.json({ limit: "1mb" }),
2960
+ async (req, res) => {
2961
+ const { aiConfig } = req.body;
2962
+ if (!aiConfig || typeof aiConfig !== "object") {
2963
+ return res.status(400).json({
2964
+ error: "aiConfig is required and must be an object"
2965
+ });
2966
+ }
2967
+ try {
2968
+ (0, import_env3.overrideAIConfig)(aiConfig);
2969
+ return res.json({
2970
+ status: "ok",
2971
+ message: "AI config updated successfully"
2972
+ });
2973
+ } catch (error) {
2974
+ console.error(`Failed to update AI config: ${error.message}`);
2975
+ return res.status(500).json({
2976
+ error: `Failed to update AI config: ${error.message}`
2977
+ });
2978
+ }
2979
+ }
2980
+ );
2981
+ if (this.staticPath) {
2982
+ this.app.get("/", (req, res) => {
2983
+ res.redirect("/index.html");
2984
+ });
2985
+ this.app.get("*", (req, res) => {
2986
+ const requestedPath = (0, import_node_path3.join)(this.staticPath, req.path);
2987
+ if ((0, import_node_fs3.existsSync)(requestedPath)) {
2988
+ res.sendFile(requestedPath);
2989
+ } else {
2990
+ res.sendFile((0, import_node_path3.join)(this.staticPath, "index.html"));
2991
+ }
2992
+ });
2993
+ }
2994
+ return new Promise((resolve2, reject) => {
2995
+ const port2 = this.port;
2996
+ this.server = this.app.listen(port2, () => {
2997
+ resolve2(this);
2998
+ });
2999
+ });
3000
+ }
3001
+ close() {
3002
+ if (this.server) {
3003
+ return this.server.close();
3004
+ }
3005
+ }
3006
+ };
3007
+
3008
+ // src/playground/bin.ts
3009
+ var server = new PlaygroundServer(StaticPage, StaticPageAgent);
3010
+ Promise.resolve().then(() => server.launch()).then(() => {
3011
+ console.log(
3012
+ `Midscene playground server is running on http://localhost:${server.port}`
3013
+ );
3014
+ });
3015
+
3016
+ //# sourceMappingURL=midscene-playground.js.map