@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -8,7 +8,7 @@ import {
8
8
  traverseTree
9
9
  } from "@midscene/shared/extractor";
10
10
  import { resizeImgBase64 } from "@midscene/shared/img";
11
- import { assert, logMsg, uuid } from "@midscene/shared/utils";
11
+ import { assert as assert2, logMsg, uuid } from "@midscene/shared/utils";
12
12
  import dayjs from "dayjs";
13
13
 
14
14
  // src/web-element.ts
@@ -36,9 +36,184 @@ var WebElementInfo = class {
36
36
  }
37
37
  };
38
38
 
39
+ // src/common/task-cache.ts
40
+ import assert from "assert";
41
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
42
+ import { dirname, join } from "path";
43
+ import { getMidsceneRunSubDir } from "@midscene/shared/common";
44
+ import { getDebug } from "@midscene/shared/logger";
45
+ import { ifInBrowser } from "@midscene/shared/utils";
46
+ import yaml from "js-yaml";
47
+ import semver from "semver";
48
+
49
+ // package.json
50
+ var version = "0.20.0";
51
+
52
+ // src/common/task-cache.ts
53
+ var debug = getDebug("cache");
54
+ var lowestSupportedMidsceneVersion = "0.16.10";
55
+ var cacheFileExt = ".cache.yaml";
56
+ var TaskCache = class {
57
+ // Track matched records
58
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
59
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
60
+ assert(cacheId, "cacheId is required");
61
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
62
+ this.cacheFilePath = ifInBrowser ? void 0 : cacheFilePath || join(getMidsceneRunSubDir("cache"), `${this.cacheId}${cacheFileExt}`);
63
+ this.isCacheResultUsed = isCacheResultUsed;
64
+ let cacheContent;
65
+ if (this.cacheFilePath) {
66
+ cacheContent = this.loadCacheFromFile();
67
+ }
68
+ if (!cacheContent) {
69
+ cacheContent = {
70
+ midsceneVersion: version,
71
+ cacheId: this.cacheId,
72
+ caches: []
73
+ };
74
+ }
75
+ this.cache = cacheContent;
76
+ this.cacheOriginalLength = this.cache.caches.length;
77
+ }
78
+ matchCache(prompt, type) {
79
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
80
+ const item = this.cache.caches[i];
81
+ const key = `${type}:${prompt}:${i}`;
82
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
83
+ this.matchedCacheIndices.add(key);
84
+ debug(
85
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
86
+ type,
87
+ prompt,
88
+ i
89
+ );
90
+ return {
91
+ cacheContent: item,
92
+ updateFn: (cb) => {
93
+ debug(
94
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
95
+ type,
96
+ prompt,
97
+ i
98
+ );
99
+ cb(item);
100
+ debug(
101
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
102
+ type,
103
+ prompt,
104
+ i
105
+ );
106
+ this.flushCacheToFile();
107
+ }
108
+ };
109
+ }
110
+ }
111
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
112
+ return void 0;
113
+ }
114
+ matchPlanCache(prompt) {
115
+ return this.matchCache(prompt, "plan");
116
+ }
117
+ matchLocateCache(prompt) {
118
+ return this.matchCache(prompt, "locate");
119
+ }
120
+ appendCache(cache) {
121
+ debug("will append cache", cache);
122
+ this.cache.caches.push(cache);
123
+ this.flushCacheToFile();
124
+ }
125
+ loadCacheFromFile() {
126
+ const cacheFile = this.cacheFilePath;
127
+ assert(cacheFile, "cache file path is required");
128
+ if (!existsSync(cacheFile)) {
129
+ debug("no cache file found, path: %s", cacheFile);
130
+ return void 0;
131
+ }
132
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
133
+ if (existsSync(jsonTypeCacheFile) && this.isCacheResultUsed) {
134
+ console.warn(
135
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
136
+ );
137
+ return void 0;
138
+ }
139
+ try {
140
+ const data = readFileSync(cacheFile, "utf8");
141
+ const jsonData = yaml.load(data);
142
+ if (!version) {
143
+ debug("no midscene version info, will not read cache from file");
144
+ return void 0;
145
+ }
146
+ if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
147
+ console.warn(
148
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
149
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
150
+ cache file: ${cacheFile}`
151
+ );
152
+ return void 0;
153
+ }
154
+ debug(
155
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
156
+ cacheFile,
157
+ jsonData.midsceneVersion,
158
+ jsonData.caches.length
159
+ );
160
+ jsonData.midsceneVersion = version;
161
+ return jsonData;
162
+ } catch (err) {
163
+ debug(
164
+ "cache file exists but load failed, path: %s, error: %s",
165
+ cacheFile,
166
+ err
167
+ );
168
+ return void 0;
169
+ }
170
+ }
171
+ flushCacheToFile() {
172
+ if (!version) {
173
+ debug("no midscene version info, will not write cache to file");
174
+ return;
175
+ }
176
+ if (!this.cacheFilePath) {
177
+ debug("no cache file path, will not write cache to file");
178
+ return;
179
+ }
180
+ try {
181
+ const dir = dirname(this.cacheFilePath);
182
+ if (!existsSync(dir)) {
183
+ mkdirSync(dir, { recursive: true });
184
+ debug("created cache directory: %s", dir);
185
+ }
186
+ const yamlData = yaml.dump(this.cache);
187
+ writeFileSync(this.cacheFilePath, yamlData);
188
+ debug("cache flushed to file: %s", this.cacheFilePath);
189
+ } catch (err) {
190
+ debug(
191
+ "write cache to file failed, path: %s, error: %s",
192
+ this.cacheFilePath,
193
+ err
194
+ );
195
+ }
196
+ }
197
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
198
+ if (cachedRecord) {
199
+ if (newRecord.type === "plan") {
200
+ cachedRecord.updateFn((cache) => {
201
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
202
+ });
203
+ } else {
204
+ cachedRecord.updateFn((cache) => {
205
+ cache.xpaths = newRecord.xpaths;
206
+ });
207
+ }
208
+ } else {
209
+ this.appendCache(newRecord);
210
+ }
211
+ }
212
+ };
213
+
39
214
  // src/common/utils.ts
40
215
  async function parseContextFromWebPage(page, _opt) {
41
- assert(page, "page is required");
216
+ assert2(page, "page is required");
42
217
  if (page._forceUsePageContext) {
43
218
  return await page._forceUsePageContext();
44
219
  }
@@ -65,7 +240,7 @@ async function parseContextFromWebPage(page, _opt) {
65
240
  isVisible
66
241
  });
67
242
  });
68
- assert(screenshotBase64, "screenshotBase64 is required");
243
+ assert2(screenshotBase64, "screenshotBase64 is required");
69
244
  const size = await page.size();
70
245
  if (size.dpr && size.dpr > 1) {
71
246
  screenshotBase64 = await resizeImgBase64(screenshotBase64, {
@@ -113,6 +288,28 @@ function matchElementFromPlan(planLocateParam, tree) {
113
288
  }
114
289
  return void 0;
115
290
  }
291
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
292
+ try {
293
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
294
+ for (let i = 0; i < xpaths.length; i++) {
295
+ const element = await taskExecutor.page.getElementInfoByXpath(
296
+ xpaths[i]
297
+ );
298
+ if (element?.id) {
299
+ debug("cache hit, prompt: %s", cachePrompt);
300
+ debug(
301
+ "found a new new element with same xpath, xpath: %s, id: %s",
302
+ xpaths[i],
303
+ element?.id
304
+ );
305
+ return element;
306
+ }
307
+ }
308
+ }
309
+ } catch (error) {
310
+ debug("get element info by xpath error: ", error);
311
+ }
312
+ }
116
313
  function trimContextByViewport(execution) {
117
314
  function filterVisibleTree(node) {
118
315
  if (!node)
@@ -157,10 +354,10 @@ import {
157
354
  import yaml4 from "js-yaml";
158
355
 
159
356
  // src/yaml/player.ts
160
- import { existsSync, mkdirSync, writeFileSync } from "fs";
161
- import { dirname, join, resolve } from "path";
162
- import { assert as assert2, ifInBrowser } from "@midscene/shared/utils";
163
- import { getMidsceneRunSubDir } from "@midscene/shared/common";
357
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, writeFileSync as writeFileSync2 } from "fs";
358
+ import { dirname as dirname2, join as join2, resolve } from "path";
359
+ import { assert as assert3, ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
360
+ import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
164
361
  var ScriptPlayer = class {
165
362
  constructor(script, setupAgent, onTaskStatusChange) {
166
363
  this.script = script;
@@ -172,14 +369,14 @@ var ScriptPlayer = class {
172
369
  this.pageAgent = null;
173
370
  this.result = {};
174
371
  this.target = script.target || script.web || script.android;
175
- if (ifInBrowser) {
372
+ if (ifInBrowser2) {
176
373
  this.output = void 0;
177
374
  } else if (this.target?.output) {
178
375
  this.output = resolve(process.cwd(), this.target.output);
179
376
  } else {
180
- this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
377
+ this.output = join2(getMidsceneRunSubDir2("output"), `${process.pid}.json`);
181
378
  }
182
- if (ifInBrowser) {
379
+ if (ifInBrowser2) {
183
380
  this.unstableLogContent = void 0;
184
381
  } else if (typeof this.target?.unstableLogContent === "string") {
185
382
  this.unstableLogContent = resolve(
@@ -187,8 +384,8 @@ var ScriptPlayer = class {
187
384
  this.target.unstableLogContent
188
385
  );
189
386
  } else if (this.target?.unstableLogContent === true) {
190
- this.unstableLogContent = join(
191
- getMidsceneRunSubDir("output"),
387
+ this.unstableLogContent = join2(
388
+ getMidsceneRunSubDir2("output"),
192
389
  "unstableLogContent.json"
193
390
  );
194
391
  }
@@ -234,27 +431,27 @@ var ScriptPlayer = class {
234
431
  flushResult() {
235
432
  if (Object.keys(this.result).length && this.output) {
236
433
  const output = resolve(process.cwd(), this.output);
237
- const outputDir = dirname(output);
238
- if (!existsSync(outputDir)) {
239
- mkdirSync(outputDir, { recursive: true });
434
+ const outputDir = dirname2(output);
435
+ if (!existsSync2(outputDir)) {
436
+ mkdirSync2(outputDir, { recursive: true });
240
437
  }
241
- writeFileSync(output, JSON.stringify(this.result, void 0, 2));
438
+ writeFileSync2(output, JSON.stringify(this.result, void 0, 2));
242
439
  }
243
440
  }
244
441
  flushUnstableLogContent() {
245
442
  if (this.unstableLogContent) {
246
443
  const content = this.pageAgent?._unstableLogContent();
247
444
  const filePath = resolve(process.cwd(), this.unstableLogContent);
248
- const outputDir = dirname(filePath);
249
- if (!existsSync(outputDir)) {
250
- mkdirSync(outputDir, { recursive: true });
445
+ const outputDir = dirname2(filePath);
446
+ if (!existsSync2(outputDir)) {
447
+ mkdirSync2(outputDir, { recursive: true });
251
448
  }
252
- writeFileSync(filePath, JSON.stringify(content, null, 2));
449
+ writeFileSync2(filePath, JSON.stringify(content, null, 2));
253
450
  }
254
451
  }
255
452
  async playTask(taskStatus, agent) {
256
453
  const { flow } = taskStatus;
257
- assert2(flow, "missing flow in task");
454
+ assert3(flow, "missing flow in task");
258
455
  for (const flowItemIndex in flow) {
259
456
  const currentStep = Number.parseInt(flowItemIndex, 10);
260
457
  taskStatus.currentStep = currentStep;
@@ -262,8 +459,8 @@ var ScriptPlayer = class {
262
459
  if ("aiAction" in flowItem || "ai" in flowItem) {
263
460
  const actionTask = flowItem;
264
461
  const prompt = actionTask.aiAction || actionTask.ai;
265
- assert2(prompt, "missing prompt for ai (aiAction)");
266
- assert2(
462
+ assert3(prompt, "missing prompt for ai (aiAction)");
463
+ assert3(
267
464
  typeof prompt === "string",
268
465
  "prompt for aiAction must be a string"
269
466
  );
@@ -274,8 +471,8 @@ var ScriptPlayer = class {
274
471
  const assertTask = flowItem;
275
472
  const prompt = assertTask.aiAssert;
276
473
  const msg = assertTask.errorMessage;
277
- assert2(prompt, "missing prompt for aiAssert");
278
- assert2(
474
+ assert3(prompt, "missing prompt for aiAssert");
475
+ assert3(
279
476
  typeof prompt === "string",
280
477
  "prompt for aiAssert must be a string"
281
478
  );
@@ -287,8 +484,8 @@ var ScriptPlayer = class {
287
484
  domIncluded: queryTask.domIncluded,
288
485
  screenshotIncluded: queryTask.screenshotIncluded
289
486
  };
290
- assert2(prompt, "missing prompt for aiQuery");
291
- assert2(
487
+ assert3(prompt, "missing prompt for aiQuery");
488
+ assert3(
292
489
  typeof prompt === "string",
293
490
  "prompt for aiQuery must be a string"
294
491
  );
@@ -301,8 +498,8 @@ var ScriptPlayer = class {
301
498
  domIncluded: numberTask.domIncluded,
302
499
  screenshotIncluded: numberTask.screenshotIncluded
303
500
  };
304
- assert2(prompt, "missing prompt for number");
305
- assert2(
501
+ assert3(prompt, "missing prompt for aiNumber");
502
+ assert3(
306
503
  typeof prompt === "string",
307
504
  "prompt for number must be a string"
308
505
  );
@@ -315,8 +512,8 @@ var ScriptPlayer = class {
315
512
  domIncluded: stringTask.domIncluded,
316
513
  screenshotIncluded: stringTask.screenshotIncluded
317
514
  };
318
- assert2(prompt, "missing prompt for string");
319
- assert2(
515
+ assert3(prompt, "missing prompt for aiNumber");
516
+ assert3(
320
517
  typeof prompt === "string",
321
518
  "prompt for string must be a string"
322
519
  );
@@ -329,28 +526,35 @@ var ScriptPlayer = class {
329
526
  domIncluded: booleanTask.domIncluded,
330
527
  screenshotIncluded: booleanTask.screenshotIncluded
331
528
  };
332
- assert2(prompt, "missing prompt for boolean");
333
- assert2(
529
+ assert3(prompt, "missing prompt for aiBoolean");
530
+ assert3(
334
531
  typeof prompt === "string",
335
532
  "prompt for boolean must be a string"
336
533
  );
337
534
  const booleanResult = await agent.aiBoolean(prompt, options);
338
535
  this.setResult(booleanTask.name, booleanResult);
536
+ } else if ("aiAsk" in flowItem) {
537
+ const askTask = flowItem;
538
+ const prompt = askTask.aiAsk;
539
+ assert3(prompt, "missing prompt for aiAsk");
540
+ assert3(typeof prompt === "string", "prompt for aiAsk must be a string");
541
+ const askResult = await agent.aiAsk(prompt);
542
+ this.setResult(askTask.name, askResult);
339
543
  } else if ("aiLocate" in flowItem) {
340
544
  const locateTask = flowItem;
341
545
  const prompt = locateTask.aiLocate;
342
- assert2(prompt, "missing prompt for aiLocate");
343
- assert2(
546
+ assert3(prompt, "missing prompt for aiLocate");
547
+ assert3(
344
548
  typeof prompt === "string",
345
549
  "prompt for aiLocate must be a string"
346
550
  );
347
- const locateResult = await agent.aiLocate(prompt);
551
+ const locateResult = await agent.aiLocate(prompt, locateTask);
348
552
  this.setResult(locateTask.name, locateResult);
349
553
  } else if ("aiWaitFor" in flowItem) {
350
554
  const waitForTask = flowItem;
351
555
  const prompt = waitForTask.aiWaitFor;
352
- assert2(prompt, "missing prompt for aiWaitFor");
353
- assert2(
556
+ assert3(prompt, "missing prompt for aiWaitFor");
557
+ assert3(
354
558
  typeof prompt === "string",
355
559
  "prompt for aiWaitFor must be a string"
356
560
  );
@@ -363,7 +567,7 @@ var ScriptPlayer = class {
363
567
  if (typeof ms === "string") {
364
568
  msNumber = Number.parseInt(ms, 10);
365
569
  }
366
- assert2(
570
+ assert3(
367
571
  msNumber && msNumber > 0,
368
572
  `ms for sleep must be greater than 0, but got ${ms}`
369
573
  );
@@ -482,11 +686,11 @@ var ScriptPlayer = class {
482
686
  };
483
687
 
484
688
  // src/yaml/builder.ts
485
- import yaml from "js-yaml";
689
+ import yaml2 from "js-yaml";
486
690
 
487
691
  // src/yaml/utils.ts
488
- import { assert as assert3 } from "@midscene/shared/utils";
489
- import yaml2 from "js-yaml";
692
+ import { assert as assert4 } from "@midscene/shared/utils";
693
+ import yaml3 from "js-yaml";
490
694
  function interpolateEnvVars(content) {
491
695
  return content.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
492
696
  const value = process.env[envVar.trim()];
@@ -512,31 +716,31 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
512
716
  );
513
717
  }
514
718
  const interpolatedContent = interpolateEnvVars(processedContent);
515
- const obj = yaml2.load(interpolatedContent, {
516
- schema: yaml2.JSON_SCHEMA
719
+ const obj = yaml3.load(interpolatedContent, {
720
+ schema: yaml3.JSON_SCHEMA
517
721
  });
518
722
  const pathTip = filePath ? `, failed to load ${filePath}` : "";
519
723
  const android = typeof obj.android !== "undefined" ? Object.assign({}, obj.android || {}) : void 0;
520
724
  const webConfig = obj.web || obj.target;
521
725
  const web = typeof webConfig !== "undefined" ? Object.assign({}, webConfig || {}) : void 0;
522
726
  if (!ignoreCheckingTarget) {
523
- assert3(
727
+ assert4(
524
728
  web || android,
525
729
  `at least one of "target", "web", or "android" properties is required in yaml script${pathTip}`
526
730
  );
527
- assert3(
731
+ assert4(
528
732
  web && !android || !web && android,
529
733
  `only one of "target", "web", or "android" properties is allowed in yaml script${pathTip}`
530
734
  );
531
735
  if (web || android) {
532
- assert3(
736
+ assert4(
533
737
  typeof web === "object" || typeof android === "object",
534
738
  `property "target/web/android" must be an object${pathTip}`
535
739
  );
536
740
  }
537
741
  }
538
- assert3(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
539
- assert3(
742
+ assert4(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
743
+ assert4(
540
744
  Array.isArray(obj.tasks),
541
745
  `property "tasks" must be an array in yaml script, but got ${obj.tasks}`
542
746
  );
@@ -570,8 +774,8 @@ import {
570
774
  } from "@midscene/core/ai-model";
571
775
  import { sleep } from "@midscene/core/utils";
572
776
  import { NodeType } from "@midscene/shared/constants";
573
- import { getDebug } from "@midscene/shared/logger";
574
- import { assert as assert4 } from "@midscene/shared/utils";
777
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
778
+ import { assert as assert5 } from "@midscene/shared/utils";
575
779
 
576
780
  // src/common/ui-utils.ts
577
781
  function typeStr(task) {
@@ -647,7 +851,7 @@ function paramStr(task) {
647
851
  }
648
852
 
649
853
  // src/common/tasks.ts
650
- var debug = getDebug("page-task-executor");
854
+ var debug2 = getDebug2("page-task-executor");
651
855
  var replanningCountLimit = 10;
652
856
  var isAndroidPage = (page) => {
653
857
  return page.pageType === "android";
@@ -688,7 +892,7 @@ var PageTaskExecutor = class {
688
892
  if (info?.id) {
689
893
  elementId = info.id;
690
894
  } else {
691
- debug(
895
+ debug2(
692
896
  "no element id found for position node, will not update cache",
693
897
  element
694
898
  );
@@ -701,7 +905,7 @@ var PageTaskExecutor = class {
701
905
  const result = await this.page.getXpathsById(elementId);
702
906
  return result;
703
907
  } catch (error) {
704
- debug("getXpathsById error: ", error);
908
+ debug2("getXpathsById error: ", error);
705
909
  }
706
910
  }
707
911
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -755,7 +959,7 @@ var PageTaskExecutor = class {
755
959
  locate: plan2.locate,
756
960
  executor: async (param, taskContext) => {
757
961
  const { task } = taskContext;
758
- assert4(
962
+ assert5(
759
963
  param?.prompt || param?.id || param?.bbox,
760
964
  "No prompt or id or position or bbox to locate"
761
965
  );
@@ -780,39 +984,29 @@ var PageTaskExecutor = class {
780
984
  timing: "before Insight"
781
985
  };
782
986
  task.recorder = [recordItem];
783
- let cacheHitFlag = false;
987
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
988
+ const userExpectedPathHitFlag = !!elementFromXpath;
784
989
  const cachePrompt = param.prompt;
785
990
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
786
991
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
787
- let elementFromCache = null;
788
- try {
789
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
790
- for (let i = 0; i < xpaths.length; i++) {
791
- const element2 = await this.page.getElementInfoByXpath(
792
- xpaths[i]
793
- );
794
- if (element2?.id) {
795
- elementFromCache = element2;
796
- debug("cache hit, prompt: %s", cachePrompt);
797
- cacheHitFlag = true;
798
- debug(
799
- "found a new new element with same xpath, xpath: %s, id: %s",
800
- xpaths[i],
801
- element2?.id
802
- );
803
- break;
804
- }
805
- }
806
- }
807
- } catch (error) {
808
- debug("get element info by xpath error: ", error);
809
- }
810
- const startTime = Date.now();
811
- const element = elementFromCache || // try to match element from cache
812
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
813
- (await this.insight.locate(param, {
992
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
993
+ this,
994
+ xpaths,
995
+ cachePrompt,
996
+ param.cacheable
997
+ );
998
+ const cacheHitFlag = !!elementFromCache;
999
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1000
+ const planHitFlag = !!elementFromPlan;
1001
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1002
+ // fallback to ai locate
814
1003
  context: pageContext
815
- })).element;
1004
+ })).element : void 0;
1005
+ const aiLocateHitFlag = !!elementFromAiLocate;
1006
+ const element = elementFromXpath || // highest priority
1007
+ elementFromCache || // second priority
1008
+ elementFromPlan || // third priority
1009
+ elementFromAiLocate;
816
1010
  let currentXpaths;
817
1011
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
818
1012
  const elementXpaths = await this.getElementXpath(
@@ -830,7 +1024,7 @@ var PageTaskExecutor = class {
830
1024
  locateCacheRecord
831
1025
  );
832
1026
  } else {
833
- debug(
1027
+ debug2(
834
1028
  "no xpaths found, will not update cache",
835
1029
  cachePrompt,
836
1030
  elementXpaths
@@ -840,16 +1034,44 @@ var PageTaskExecutor = class {
840
1034
  if (!element) {
841
1035
  throw new Error(`Element not found: ${param.prompt}`);
842
1036
  }
1037
+ let hitBy;
1038
+ if (userExpectedPathHitFlag) {
1039
+ hitBy = {
1040
+ from: "User expected path",
1041
+ context: {
1042
+ xpath: param.xpath
1043
+ }
1044
+ };
1045
+ } else if (cacheHitFlag) {
1046
+ hitBy = {
1047
+ from: "Cache",
1048
+ context: {
1049
+ xpathsFromCache: xpaths,
1050
+ xpathsToSave: currentXpaths
1051
+ }
1052
+ };
1053
+ } else if (planHitFlag) {
1054
+ hitBy = {
1055
+ from: "Planning",
1056
+ context: {
1057
+ id: elementFromPlan?.id,
1058
+ bbox: elementFromPlan?.bbox
1059
+ }
1060
+ };
1061
+ } else if (aiLocateHitFlag) {
1062
+ hitBy = {
1063
+ from: "AI model",
1064
+ context: {
1065
+ prompt: param.prompt
1066
+ }
1067
+ };
1068
+ }
843
1069
  return {
844
1070
  output: {
845
1071
  element
846
1072
  },
847
1073
  pageContext,
848
- cache: {
849
- hit: cacheHitFlag,
850
- originalXpaths: xpaths,
851
- currentXpaths
852
- }
1074
+ hitBy
853
1075
  };
854
1076
  }
855
1077
  };
@@ -945,7 +1167,7 @@ var PageTaskExecutor = class {
945
1167
  thought: plan2.thought,
946
1168
  locate: plan2.locate,
947
1169
  executor: async (param, { element }) => {
948
- assert4(element, "Element not found, cannot tap");
1170
+ assert5(element, "Element not found, cannot tap");
949
1171
  await this.page.mouse.click(element.center[0], element.center[1]);
950
1172
  }
951
1173
  };
@@ -957,7 +1179,7 @@ var PageTaskExecutor = class {
957
1179
  thought: plan2.thought,
958
1180
  locate: plan2.locate,
959
1181
  executor: async (param, { element }) => {
960
- assert4(element, "Element not found, cannot right click");
1182
+ assert5(element, "Element not found, cannot right click");
961
1183
  await this.page.mouse.click(
962
1184
  element.center[0],
963
1185
  element.center[1],
@@ -974,7 +1196,7 @@ var PageTaskExecutor = class {
974
1196
  thought: plan2.thought,
975
1197
  locate: plan2.locate,
976
1198
  executor: async (taskParam) => {
977
- assert4(
1199
+ assert5(
978
1200
  taskParam?.start_box && taskParam?.end_box,
979
1201
  "No start_box or end_box to drag"
980
1202
  );
@@ -989,7 +1211,7 @@ var PageTaskExecutor = class {
989
1211
  thought: plan2.thought,
990
1212
  locate: plan2.locate,
991
1213
  executor: async (param, { element }) => {
992
- assert4(element, "Element not found, cannot hover");
1214
+ assert5(element, "Element not found, cannot hover");
993
1215
  await this.page.mouse.move(element.center[0], element.center[1]);
994
1216
  }
995
1217
  };
@@ -1108,7 +1330,7 @@ var PageTaskExecutor = class {
1108
1330
  thought: plan2.thought,
1109
1331
  locate: plan2.locate,
1110
1332
  executor: async (param) => {
1111
- assert4(
1333
+ assert5(
1112
1334
  isAndroidPage(this.page),
1113
1335
  "Cannot use home button on non-Android devices"
1114
1336
  );
@@ -1124,7 +1346,7 @@ var PageTaskExecutor = class {
1124
1346
  thought: plan2.thought,
1125
1347
  locate: plan2.locate,
1126
1348
  executor: async (param) => {
1127
- assert4(
1349
+ assert5(
1128
1350
  isAndroidPage(this.page),
1129
1351
  "Cannot use back button on non-Android devices"
1130
1352
  );
@@ -1140,7 +1362,7 @@ var PageTaskExecutor = class {
1140
1362
  thought: plan2.thought,
1141
1363
  locate: plan2.locate,
1142
1364
  executor: async (param) => {
1143
- assert4(
1365
+ assert5(
1144
1366
  isAndroidPage(this.page),
1145
1367
  "Cannot use recent apps button on non-Android devices"
1146
1368
  );
@@ -1291,7 +1513,7 @@ var PageTaskExecutor = class {
1291
1513
  }
1292
1514
  }
1293
1515
  if (finalActions.length === 0) {
1294
- assert4(
1516
+ assert5(
1295
1517
  !more_actions_needed_by_instruction || sleep2,
1296
1518
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1297
1519
  );
@@ -1529,7 +1751,7 @@ var PageTaskExecutor = class {
1529
1751
  );
1530
1752
  let outputResult = data;
1531
1753
  if (ifTypeRestricted) {
1532
- assert4(data?.result !== void 0, "No result in query data");
1754
+ assert5(data?.result !== void 0, "No result in query data");
1533
1755
  outputResult = data.result;
1534
1756
  }
1535
1757
  return {
@@ -1625,9 +1847,9 @@ var PageTaskExecutor = class {
1625
1847
  onTaskStart: this.onTaskStartCallback
1626
1848
  });
1627
1849
  const { timeoutMs, checkIntervalMs } = opt;
1628
- assert4(assertion, "No assertion for waitFor");
1629
- assert4(timeoutMs, "No timeoutMs for waitFor");
1630
- assert4(checkIntervalMs, "No checkIntervalMs for waitFor");
1850
+ assert5(assertion, "No assertion for waitFor");
1851
+ assert5(timeoutMs, "No timeoutMs for waitFor");
1852
+ assert5(checkIntervalMs, "No checkIntervalMs for waitFor");
1631
1853
  const overallStartTime = Date.now();
1632
1854
  let startTime = Date.now();
1633
1855
  let errorThought = "";
@@ -1681,9 +1903,9 @@ var PageTaskExecutor = class {
1681
1903
  };
1682
1904
 
1683
1905
  // src/common/plan-builder.ts
1684
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1685
- import { assert as assert5 } from "@midscene/shared/utils";
1686
- var debug2 = getDebug2("plan-builder");
1906
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1907
+ import { assert as assert6 } from "@midscene/shared/utils";
1908
+ var debug3 = getDebug3("plan-builder");
1687
1909
  function buildPlans(type, locateParam, param) {
1688
1910
  let returnPlans = [];
1689
1911
  const locatePlan = locateParam ? {
@@ -1693,8 +1915,8 @@ function buildPlans(type, locateParam, param) {
1693
1915
  thought: ""
1694
1916
  } : null;
1695
1917
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1696
- assert5(locateParam, `missing locate info for action "${type}"`);
1697
- assert5(locatePlan, `missing locate info for action "${type}"`);
1918
+ assert6(locateParam, `missing locate info for action "${type}"`);
1919
+ assert6(locatePlan, `missing locate info for action "${type}"`);
1698
1920
  const tapPlan = {
1699
1921
  type,
1700
1922
  param: null,
@@ -1705,9 +1927,9 @@ function buildPlans(type, locateParam, param) {
1705
1927
  }
1706
1928
  if (type === "Input" || type === "KeyboardPress") {
1707
1929
  if (type === "Input") {
1708
- assert5(locateParam, `missing locate info for action "${type}"`);
1930
+ assert6(locateParam, `missing locate info for action "${type}"`);
1709
1931
  }
1710
- assert5(param, `missing param for action "${type}"`);
1932
+ assert6(param, `missing param for action "${type}"`);
1711
1933
  const inputPlan = {
1712
1934
  type,
1713
1935
  param,
@@ -1721,7 +1943,7 @@ function buildPlans(type, locateParam, param) {
1721
1943
  }
1722
1944
  }
1723
1945
  if (type === "Scroll") {
1724
- assert5(param, `missing param for action "${type}"`);
1946
+ assert6(param, `missing param for action "${type}"`);
1725
1947
  const scrollPlan = {
1726
1948
  type,
1727
1949
  param,
@@ -1735,7 +1957,7 @@ function buildPlans(type, locateParam, param) {
1735
1957
  }
1736
1958
  }
1737
1959
  if (type === "Sleep") {
1738
- assert5(param, `missing param for action "${type}"`);
1960
+ assert6(param, `missing param for action "${type}"`);
1739
1961
  const sleepPlan = {
1740
1962
  type,
1741
1963
  param,
@@ -1745,7 +1967,7 @@ function buildPlans(type, locateParam, param) {
1745
1967
  returnPlans = [sleepPlan];
1746
1968
  }
1747
1969
  if (type === "Locate") {
1748
- assert5(locateParam, `missing locate info for action "${type}"`);
1970
+ assert6(locateParam, `missing locate info for action "${type}"`);
1749
1971
  const locatePlan2 = {
1750
1972
  type,
1751
1973
  param: locateParam,
@@ -1755,187 +1977,12 @@ function buildPlans(type, locateParam, param) {
1755
1977
  returnPlans = [locatePlan2];
1756
1978
  }
1757
1979
  if (returnPlans) {
1758
- debug2("buildPlans", returnPlans);
1980
+ debug3("buildPlans", returnPlans);
1759
1981
  return returnPlans;
1760
1982
  }
1761
1983
  throw new Error(`Not supported type: ${type}`);
1762
1984
  }
1763
1985
 
1764
- // src/common/task-cache.ts
1765
- import assert6 from "assert";
1766
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1767
- import { dirname as dirname2, join as join2 } from "path";
1768
- import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
1769
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
1770
- import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
1771
- import yaml3 from "js-yaml";
1772
- import semver from "semver";
1773
-
1774
- // package.json
1775
- var version = "0.19.1";
1776
-
1777
- // src/common/task-cache.ts
1778
- var debug3 = getDebug3("cache");
1779
- var lowestSupportedMidsceneVersion = "0.16.10";
1780
- var cacheFileExt = ".cache.yaml";
1781
- var TaskCache = class {
1782
- // Track matched records
1783
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1784
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1785
- assert6(cacheId, "cacheId is required");
1786
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1787
- this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
1788
- this.isCacheResultUsed = isCacheResultUsed;
1789
- let cacheContent;
1790
- if (this.cacheFilePath) {
1791
- cacheContent = this.loadCacheFromFile();
1792
- }
1793
- if (!cacheContent) {
1794
- cacheContent = {
1795
- midsceneVersion: version,
1796
- cacheId: this.cacheId,
1797
- caches: []
1798
- };
1799
- }
1800
- this.cache = cacheContent;
1801
- this.cacheOriginalLength = this.cache.caches.length;
1802
- }
1803
- matchCache(prompt, type) {
1804
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1805
- const item = this.cache.caches[i];
1806
- const key = `${type}:${prompt}:${i}`;
1807
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1808
- this.matchedCacheIndices.add(key);
1809
- debug3(
1810
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1811
- type,
1812
- prompt,
1813
- i
1814
- );
1815
- return {
1816
- cacheContent: item,
1817
- updateFn: (cb) => {
1818
- debug3(
1819
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1820
- type,
1821
- prompt,
1822
- i
1823
- );
1824
- cb(item);
1825
- debug3(
1826
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1827
- type,
1828
- prompt,
1829
- i
1830
- );
1831
- this.flushCacheToFile();
1832
- }
1833
- };
1834
- }
1835
- }
1836
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1837
- return void 0;
1838
- }
1839
- matchPlanCache(prompt) {
1840
- return this.matchCache(prompt, "plan");
1841
- }
1842
- matchLocateCache(prompt) {
1843
- return this.matchCache(prompt, "locate");
1844
- }
1845
- appendCache(cache) {
1846
- debug3("will append cache", cache);
1847
- this.cache.caches.push(cache);
1848
- this.flushCacheToFile();
1849
- }
1850
- loadCacheFromFile() {
1851
- const cacheFile = this.cacheFilePath;
1852
- assert6(cacheFile, "cache file path is required");
1853
- if (!existsSync2(cacheFile)) {
1854
- debug3("no cache file found, path: %s", cacheFile);
1855
- return void 0;
1856
- }
1857
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1858
- if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
1859
- console.warn(
1860
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1861
- );
1862
- return void 0;
1863
- }
1864
- try {
1865
- const data = readFileSync(cacheFile, "utf8");
1866
- const jsonData = yaml3.load(data);
1867
- if (!version) {
1868
- debug3("no midscene version info, will not read cache from file");
1869
- return void 0;
1870
- }
1871
- if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1872
- console.warn(
1873
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1874
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1875
- cache file: ${cacheFile}`
1876
- );
1877
- return void 0;
1878
- }
1879
- debug3(
1880
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1881
- cacheFile,
1882
- jsonData.midsceneVersion,
1883
- jsonData.caches.length
1884
- );
1885
- jsonData.midsceneVersion = version;
1886
- return jsonData;
1887
- } catch (err) {
1888
- debug3(
1889
- "cache file exists but load failed, path: %s, error: %s",
1890
- cacheFile,
1891
- err
1892
- );
1893
- return void 0;
1894
- }
1895
- }
1896
- flushCacheToFile() {
1897
- if (!version) {
1898
- debug3("no midscene version info, will not write cache to file");
1899
- return;
1900
- }
1901
- if (!this.cacheFilePath) {
1902
- debug3("no cache file path, will not write cache to file");
1903
- return;
1904
- }
1905
- try {
1906
- const dir = dirname2(this.cacheFilePath);
1907
- if (!existsSync2(dir)) {
1908
- mkdirSync2(dir, { recursive: true });
1909
- debug3("created cache directory: %s", dir);
1910
- }
1911
- const yamlData = yaml3.dump(this.cache);
1912
- writeFileSync2(this.cacheFilePath, yamlData);
1913
- debug3("cache flushed to file: %s", this.cacheFilePath);
1914
- } catch (err) {
1915
- debug3(
1916
- "write cache to file failed, path: %s, error: %s",
1917
- this.cacheFilePath,
1918
- err
1919
- );
1920
- }
1921
- }
1922
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1923
- if (cachedRecord) {
1924
- if (newRecord.type === "plan") {
1925
- cachedRecord.updateFn((cache) => {
1926
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1927
- });
1928
- } else {
1929
- cachedRecord.updateFn((cache) => {
1930
- cache.xpaths = newRecord.xpaths;
1931
- });
1932
- }
1933
- } else {
1934
- this.appendCache(newRecord);
1935
- }
1936
- }
1937
- };
1938
-
1939
1986
  // src/common/agent.ts
1940
1987
  var debug4 = getDebug4("web-integration");
1941
1988
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2064,10 +2111,12 @@ ${errorTask?.errorStack}`);
2064
2111
  const prompt = opt.prompt ?? locatePrompt;
2065
2112
  const deepThink = opt.deepThink ?? false;
2066
2113
  const cacheable = opt.cacheable ?? true;
2114
+ const xpath = opt.xpath;
2067
2115
  return {
2068
2116
  prompt,
2069
2117
  deepThink,
2070
- cacheable
2118
+ cacheable,
2119
+ xpath
2071
2120
  };
2072
2121
  }
2073
2122
  return {
@@ -2225,6 +2274,9 @@ ${errorTask?.errorStack}`);
2225
2274
  this.afterTaskRunning(executor);
2226
2275
  return output;
2227
2276
  }
2277
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2278
+ return this.aiString(prompt, opt);
2279
+ }
2228
2280
  async describeElementAtPoint(center, opt) {
2229
2281
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2230
2282
  let success = false;