@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -31,7 +31,7 @@ import {
31
31
  traverseTree
32
32
  } from "@midscene/shared/extractor";
33
33
  import { resizeImgBase64 } from "@midscene/shared/img";
34
- import { assert, logMsg, uuid } from "@midscene/shared/utils";
34
+ import { assert as assert2, logMsg, uuid } from "@midscene/shared/utils";
35
35
  import dayjs from "dayjs";
36
36
 
37
37
  // src/web-element.ts
@@ -59,9 +59,184 @@ var WebElementInfo = class {
59
59
  }
60
60
  };
61
61
 
62
+ // src/common/task-cache.ts
63
+ import assert from "assert";
64
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
65
+ import { dirname, join } from "path";
66
+ import { getMidsceneRunSubDir } from "@midscene/shared/common";
67
+ import { getDebug } from "@midscene/shared/logger";
68
+ import { ifInBrowser } from "@midscene/shared/utils";
69
+ import yaml from "js-yaml";
70
+ import semver from "semver";
71
+
72
+ // package.json
73
+ var version = "0.20.0";
74
+
75
+ // src/common/task-cache.ts
76
+ var debug = getDebug("cache");
77
+ var lowestSupportedMidsceneVersion = "0.16.10";
78
+ var cacheFileExt = ".cache.yaml";
79
+ var TaskCache = class {
80
+ // Track matched records
81
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
82
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
83
+ assert(cacheId, "cacheId is required");
84
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
85
+ this.cacheFilePath = ifInBrowser ? void 0 : cacheFilePath || join(getMidsceneRunSubDir("cache"), `${this.cacheId}${cacheFileExt}`);
86
+ this.isCacheResultUsed = isCacheResultUsed;
87
+ let cacheContent;
88
+ if (this.cacheFilePath) {
89
+ cacheContent = this.loadCacheFromFile();
90
+ }
91
+ if (!cacheContent) {
92
+ cacheContent = {
93
+ midsceneVersion: version,
94
+ cacheId: this.cacheId,
95
+ caches: []
96
+ };
97
+ }
98
+ this.cache = cacheContent;
99
+ this.cacheOriginalLength = this.cache.caches.length;
100
+ }
101
+ matchCache(prompt, type) {
102
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
103
+ const item = this.cache.caches[i];
104
+ const key = `${type}:${prompt}:${i}`;
105
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
106
+ this.matchedCacheIndices.add(key);
107
+ debug(
108
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
109
+ type,
110
+ prompt,
111
+ i
112
+ );
113
+ return {
114
+ cacheContent: item,
115
+ updateFn: (cb) => {
116
+ debug(
117
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
118
+ type,
119
+ prompt,
120
+ i
121
+ );
122
+ cb(item);
123
+ debug(
124
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
125
+ type,
126
+ prompt,
127
+ i
128
+ );
129
+ this.flushCacheToFile();
130
+ }
131
+ };
132
+ }
133
+ }
134
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
135
+ return void 0;
136
+ }
137
+ matchPlanCache(prompt) {
138
+ return this.matchCache(prompt, "plan");
139
+ }
140
+ matchLocateCache(prompt) {
141
+ return this.matchCache(prompt, "locate");
142
+ }
143
+ appendCache(cache) {
144
+ debug("will append cache", cache);
145
+ this.cache.caches.push(cache);
146
+ this.flushCacheToFile();
147
+ }
148
+ loadCacheFromFile() {
149
+ const cacheFile = this.cacheFilePath;
150
+ assert(cacheFile, "cache file path is required");
151
+ if (!existsSync(cacheFile)) {
152
+ debug("no cache file found, path: %s", cacheFile);
153
+ return void 0;
154
+ }
155
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
156
+ if (existsSync(jsonTypeCacheFile) && this.isCacheResultUsed) {
157
+ console.warn(
158
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
159
+ );
160
+ return void 0;
161
+ }
162
+ try {
163
+ const data = readFileSync(cacheFile, "utf8");
164
+ const jsonData = yaml.load(data);
165
+ if (!version) {
166
+ debug("no midscene version info, will not read cache from file");
167
+ return void 0;
168
+ }
169
+ if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
170
+ console.warn(
171
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
172
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
173
+ cache file: ${cacheFile}`
174
+ );
175
+ return void 0;
176
+ }
177
+ debug(
178
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
179
+ cacheFile,
180
+ jsonData.midsceneVersion,
181
+ jsonData.caches.length
182
+ );
183
+ jsonData.midsceneVersion = version;
184
+ return jsonData;
185
+ } catch (err) {
186
+ debug(
187
+ "cache file exists but load failed, path: %s, error: %s",
188
+ cacheFile,
189
+ err
190
+ );
191
+ return void 0;
192
+ }
193
+ }
194
+ flushCacheToFile() {
195
+ if (!version) {
196
+ debug("no midscene version info, will not write cache to file");
197
+ return;
198
+ }
199
+ if (!this.cacheFilePath) {
200
+ debug("no cache file path, will not write cache to file");
201
+ return;
202
+ }
203
+ try {
204
+ const dir = dirname(this.cacheFilePath);
205
+ if (!existsSync(dir)) {
206
+ mkdirSync(dir, { recursive: true });
207
+ debug("created cache directory: %s", dir);
208
+ }
209
+ const yamlData = yaml.dump(this.cache);
210
+ writeFileSync(this.cacheFilePath, yamlData);
211
+ debug("cache flushed to file: %s", this.cacheFilePath);
212
+ } catch (err) {
213
+ debug(
214
+ "write cache to file failed, path: %s, error: %s",
215
+ this.cacheFilePath,
216
+ err
217
+ );
218
+ }
219
+ }
220
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
221
+ if (cachedRecord) {
222
+ if (newRecord.type === "plan") {
223
+ cachedRecord.updateFn((cache) => {
224
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
225
+ });
226
+ } else {
227
+ cachedRecord.updateFn((cache) => {
228
+ cache.xpaths = newRecord.xpaths;
229
+ });
230
+ }
231
+ } else {
232
+ this.appendCache(newRecord);
233
+ }
234
+ }
235
+ };
236
+
62
237
  // src/common/utils.ts
63
238
  async function parseContextFromWebPage(page, _opt) {
64
- assert(page, "page is required");
239
+ assert2(page, "page is required");
65
240
  if (page._forceUsePageContext) {
66
241
  return await page._forceUsePageContext();
67
242
  }
@@ -88,7 +263,7 @@ async function parseContextFromWebPage(page, _opt) {
88
263
  isVisible
89
264
  });
90
265
  });
91
- assert(screenshotBase64, "screenshotBase64 is required");
266
+ assert2(screenshotBase64, "screenshotBase64 is required");
92
267
  const size = await page.size();
93
268
  if (size.dpr && size.dpr > 1) {
94
269
  screenshotBase64 = await resizeImgBase64(screenshotBase64, {
@@ -136,6 +311,28 @@ function matchElementFromPlan(planLocateParam, tree) {
136
311
  }
137
312
  return void 0;
138
313
  }
314
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
315
+ try {
316
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
317
+ for (let i = 0; i < xpaths.length; i++) {
318
+ const element = await taskExecutor.page.getElementInfoByXpath(
319
+ xpaths[i]
320
+ );
321
+ if (element?.id) {
322
+ debug("cache hit, prompt: %s", cachePrompt);
323
+ debug(
324
+ "found a new new element with same xpath, xpath: %s, id: %s",
325
+ xpaths[i],
326
+ element?.id
327
+ );
328
+ return element;
329
+ }
330
+ }
331
+ }
332
+ } catch (error) {
333
+ debug("get element info by xpath error: ", error);
334
+ }
335
+ }
139
336
  function trimContextByViewport(execution) {
140
337
  function filterVisibleTree(node) {
141
338
  if (!node)
@@ -180,10 +377,10 @@ import {
180
377
  import yaml4 from "js-yaml";
181
378
 
182
379
  // src/yaml/player.ts
183
- import { existsSync, mkdirSync, writeFileSync } from "fs";
184
- import { dirname, join, resolve } from "path";
185
- import { assert as assert2, ifInBrowser } from "@midscene/shared/utils";
186
- import { getMidsceneRunSubDir } from "@midscene/shared/common";
380
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, writeFileSync as writeFileSync2 } from "fs";
381
+ import { dirname as dirname2, join as join2, resolve } from "path";
382
+ import { assert as assert3, ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
383
+ import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
187
384
  var ScriptPlayer = class {
188
385
  constructor(script, setupAgent, onTaskStatusChange) {
189
386
  this.script = script;
@@ -195,14 +392,14 @@ var ScriptPlayer = class {
195
392
  this.pageAgent = null;
196
393
  this.result = {};
197
394
  this.target = script.target || script.web || script.android;
198
- if (ifInBrowser) {
395
+ if (ifInBrowser2) {
199
396
  this.output = void 0;
200
397
  } else if (this.target?.output) {
201
398
  this.output = resolve(process.cwd(), this.target.output);
202
399
  } else {
203
- this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
400
+ this.output = join2(getMidsceneRunSubDir2("output"), `${process.pid}.json`);
204
401
  }
205
- if (ifInBrowser) {
402
+ if (ifInBrowser2) {
206
403
  this.unstableLogContent = void 0;
207
404
  } else if (typeof this.target?.unstableLogContent === "string") {
208
405
  this.unstableLogContent = resolve(
@@ -210,8 +407,8 @@ var ScriptPlayer = class {
210
407
  this.target.unstableLogContent
211
408
  );
212
409
  } else if (this.target?.unstableLogContent === true) {
213
- this.unstableLogContent = join(
214
- getMidsceneRunSubDir("output"),
410
+ this.unstableLogContent = join2(
411
+ getMidsceneRunSubDir2("output"),
215
412
  "unstableLogContent.json"
216
413
  );
217
414
  }
@@ -257,27 +454,27 @@ var ScriptPlayer = class {
257
454
  flushResult() {
258
455
  if (Object.keys(this.result).length && this.output) {
259
456
  const output = resolve(process.cwd(), this.output);
260
- const outputDir = dirname(output);
261
- if (!existsSync(outputDir)) {
262
- mkdirSync(outputDir, { recursive: true });
457
+ const outputDir = dirname2(output);
458
+ if (!existsSync2(outputDir)) {
459
+ mkdirSync2(outputDir, { recursive: true });
263
460
  }
264
- writeFileSync(output, JSON.stringify(this.result, void 0, 2));
461
+ writeFileSync2(output, JSON.stringify(this.result, void 0, 2));
265
462
  }
266
463
  }
267
464
  flushUnstableLogContent() {
268
465
  if (this.unstableLogContent) {
269
466
  const content = this.pageAgent?._unstableLogContent();
270
467
  const filePath = resolve(process.cwd(), this.unstableLogContent);
271
- const outputDir = dirname(filePath);
272
- if (!existsSync(outputDir)) {
273
- mkdirSync(outputDir, { recursive: true });
468
+ const outputDir = dirname2(filePath);
469
+ if (!existsSync2(outputDir)) {
470
+ mkdirSync2(outputDir, { recursive: true });
274
471
  }
275
- writeFileSync(filePath, JSON.stringify(content, null, 2));
472
+ writeFileSync2(filePath, JSON.stringify(content, null, 2));
276
473
  }
277
474
  }
278
475
  async playTask(taskStatus, agent) {
279
476
  const { flow } = taskStatus;
280
- assert2(flow, "missing flow in task");
477
+ assert3(flow, "missing flow in task");
281
478
  for (const flowItemIndex in flow) {
282
479
  const currentStep = Number.parseInt(flowItemIndex, 10);
283
480
  taskStatus.currentStep = currentStep;
@@ -285,8 +482,8 @@ var ScriptPlayer = class {
285
482
  if ("aiAction" in flowItem || "ai" in flowItem) {
286
483
  const actionTask = flowItem;
287
484
  const prompt = actionTask.aiAction || actionTask.ai;
288
- assert2(prompt, "missing prompt for ai (aiAction)");
289
- assert2(
485
+ assert3(prompt, "missing prompt for ai (aiAction)");
486
+ assert3(
290
487
  typeof prompt === "string",
291
488
  "prompt for aiAction must be a string"
292
489
  );
@@ -297,8 +494,8 @@ var ScriptPlayer = class {
297
494
  const assertTask = flowItem;
298
495
  const prompt = assertTask.aiAssert;
299
496
  const msg = assertTask.errorMessage;
300
- assert2(prompt, "missing prompt for aiAssert");
301
- assert2(
497
+ assert3(prompt, "missing prompt for aiAssert");
498
+ assert3(
302
499
  typeof prompt === "string",
303
500
  "prompt for aiAssert must be a string"
304
501
  );
@@ -310,8 +507,8 @@ var ScriptPlayer = class {
310
507
  domIncluded: queryTask.domIncluded,
311
508
  screenshotIncluded: queryTask.screenshotIncluded
312
509
  };
313
- assert2(prompt, "missing prompt for aiQuery");
314
- assert2(
510
+ assert3(prompt, "missing prompt for aiQuery");
511
+ assert3(
315
512
  typeof prompt === "string",
316
513
  "prompt for aiQuery must be a string"
317
514
  );
@@ -324,8 +521,8 @@ var ScriptPlayer = class {
324
521
  domIncluded: numberTask.domIncluded,
325
522
  screenshotIncluded: numberTask.screenshotIncluded
326
523
  };
327
- assert2(prompt, "missing prompt for number");
328
- assert2(
524
+ assert3(prompt, "missing prompt for aiNumber");
525
+ assert3(
329
526
  typeof prompt === "string",
330
527
  "prompt for number must be a string"
331
528
  );
@@ -338,8 +535,8 @@ var ScriptPlayer = class {
338
535
  domIncluded: stringTask.domIncluded,
339
536
  screenshotIncluded: stringTask.screenshotIncluded
340
537
  };
341
- assert2(prompt, "missing prompt for string");
342
- assert2(
538
+ assert3(prompt, "missing prompt for aiNumber");
539
+ assert3(
343
540
  typeof prompt === "string",
344
541
  "prompt for string must be a string"
345
542
  );
@@ -352,28 +549,35 @@ var ScriptPlayer = class {
352
549
  domIncluded: booleanTask.domIncluded,
353
550
  screenshotIncluded: booleanTask.screenshotIncluded
354
551
  };
355
- assert2(prompt, "missing prompt for boolean");
356
- assert2(
552
+ assert3(prompt, "missing prompt for aiBoolean");
553
+ assert3(
357
554
  typeof prompt === "string",
358
555
  "prompt for boolean must be a string"
359
556
  );
360
557
  const booleanResult = await agent.aiBoolean(prompt, options);
361
558
  this.setResult(booleanTask.name, booleanResult);
559
+ } else if ("aiAsk" in flowItem) {
560
+ const askTask = flowItem;
561
+ const prompt = askTask.aiAsk;
562
+ assert3(prompt, "missing prompt for aiAsk");
563
+ assert3(typeof prompt === "string", "prompt for aiAsk must be a string");
564
+ const askResult = await agent.aiAsk(prompt);
565
+ this.setResult(askTask.name, askResult);
362
566
  } else if ("aiLocate" in flowItem) {
363
567
  const locateTask = flowItem;
364
568
  const prompt = locateTask.aiLocate;
365
- assert2(prompt, "missing prompt for aiLocate");
366
- assert2(
569
+ assert3(prompt, "missing prompt for aiLocate");
570
+ assert3(
367
571
  typeof prompt === "string",
368
572
  "prompt for aiLocate must be a string"
369
573
  );
370
- const locateResult = await agent.aiLocate(prompt);
574
+ const locateResult = await agent.aiLocate(prompt, locateTask);
371
575
  this.setResult(locateTask.name, locateResult);
372
576
  } else if ("aiWaitFor" in flowItem) {
373
577
  const waitForTask = flowItem;
374
578
  const prompt = waitForTask.aiWaitFor;
375
- assert2(prompt, "missing prompt for aiWaitFor");
376
- assert2(
579
+ assert3(prompt, "missing prompt for aiWaitFor");
580
+ assert3(
377
581
  typeof prompt === "string",
378
582
  "prompt for aiWaitFor must be a string"
379
583
  );
@@ -386,7 +590,7 @@ var ScriptPlayer = class {
386
590
  if (typeof ms === "string") {
387
591
  msNumber = Number.parseInt(ms, 10);
388
592
  }
389
- assert2(
593
+ assert3(
390
594
  msNumber && msNumber > 0,
391
595
  `ms for sleep must be greater than 0, but got ${ms}`
392
596
  );
@@ -505,11 +709,11 @@ var ScriptPlayer = class {
505
709
  };
506
710
 
507
711
  // src/yaml/builder.ts
508
- import yaml from "js-yaml";
712
+ import yaml2 from "js-yaml";
509
713
 
510
714
  // src/yaml/utils.ts
511
- import { assert as assert3 } from "@midscene/shared/utils";
512
- import yaml2 from "js-yaml";
715
+ import { assert as assert4 } from "@midscene/shared/utils";
716
+ import yaml3 from "js-yaml";
513
717
  function interpolateEnvVars(content) {
514
718
  return content.replace(/\$\{([^}]+)\}/g, (_, envVar) => {
515
719
  const value = process.env[envVar.trim()];
@@ -535,31 +739,31 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
535
739
  );
536
740
  }
537
741
  const interpolatedContent = interpolateEnvVars(processedContent);
538
- const obj = yaml2.load(interpolatedContent, {
539
- schema: yaml2.JSON_SCHEMA
742
+ const obj = yaml3.load(interpolatedContent, {
743
+ schema: yaml3.JSON_SCHEMA
540
744
  });
541
745
  const pathTip = filePath ? `, failed to load ${filePath}` : "";
542
746
  const android = typeof obj.android !== "undefined" ? Object.assign({}, obj.android || {}) : void 0;
543
747
  const webConfig = obj.web || obj.target;
544
748
  const web = typeof webConfig !== "undefined" ? Object.assign({}, webConfig || {}) : void 0;
545
749
  if (!ignoreCheckingTarget) {
546
- assert3(
750
+ assert4(
547
751
  web || android,
548
752
  `at least one of "target", "web", or "android" properties is required in yaml script${pathTip}`
549
753
  );
550
- assert3(
754
+ assert4(
551
755
  web && !android || !web && android,
552
756
  `only one of "target", "web", or "android" properties is allowed in yaml script${pathTip}`
553
757
  );
554
758
  if (web || android) {
555
- assert3(
759
+ assert4(
556
760
  typeof web === "object" || typeof android === "object",
557
761
  `property "target/web/android" must be an object${pathTip}`
558
762
  );
559
763
  }
560
764
  }
561
- assert3(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
562
- assert3(
765
+ assert4(obj.tasks, `property "tasks" is required in yaml script ${pathTip}`);
766
+ assert4(
563
767
  Array.isArray(obj.tasks),
564
768
  `property "tasks" must be an array in yaml script, but got ${obj.tasks}`
565
769
  );
@@ -593,8 +797,8 @@ import {
593
797
  } from "@midscene/core/ai-model";
594
798
  import { sleep } from "@midscene/core/utils";
595
799
  import { NodeType } from "@midscene/shared/constants";
596
- import { getDebug } from "@midscene/shared/logger";
597
- import { assert as assert4 } from "@midscene/shared/utils";
800
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
801
+ import { assert as assert5 } from "@midscene/shared/utils";
598
802
 
599
803
  // src/common/ui-utils.ts
600
804
  function typeStr(task) {
@@ -693,7 +897,7 @@ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
693
897
  `;
694
898
 
695
899
  // src/common/tasks.ts
696
- var debug = getDebug("page-task-executor");
900
+ var debug2 = getDebug2("page-task-executor");
697
901
  var replanningCountLimit = 10;
698
902
  var isAndroidPage = (page) => {
699
903
  return page.pageType === "android";
@@ -734,7 +938,7 @@ var PageTaskExecutor = class {
734
938
  if (info?.id) {
735
939
  elementId = info.id;
736
940
  } else {
737
- debug(
941
+ debug2(
738
942
  "no element id found for position node, will not update cache",
739
943
  element
740
944
  );
@@ -747,7 +951,7 @@ var PageTaskExecutor = class {
747
951
  const result = await this.page.getXpathsById(elementId);
748
952
  return result;
749
953
  } catch (error) {
750
- debug("getXpathsById error: ", error);
954
+ debug2("getXpathsById error: ", error);
751
955
  }
752
956
  }
753
957
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -801,7 +1005,7 @@ var PageTaskExecutor = class {
801
1005
  locate: plan2.locate,
802
1006
  executor: async (param, taskContext) => {
803
1007
  const { task } = taskContext;
804
- assert4(
1008
+ assert5(
805
1009
  param?.prompt || param?.id || param?.bbox,
806
1010
  "No prompt or id or position or bbox to locate"
807
1011
  );
@@ -826,39 +1030,29 @@ var PageTaskExecutor = class {
826
1030
  timing: "before Insight"
827
1031
  };
828
1032
  task.recorder = [recordItem];
829
- let cacheHitFlag = false;
1033
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1034
+ const userExpectedPathHitFlag = !!elementFromXpath;
830
1035
  const cachePrompt = param.prompt;
831
1036
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
832
1037
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
833
- let elementFromCache = null;
834
- try {
835
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
836
- for (let i = 0; i < xpaths.length; i++) {
837
- const element2 = await this.page.getElementInfoByXpath(
838
- xpaths[i]
839
- );
840
- if (element2?.id) {
841
- elementFromCache = element2;
842
- debug("cache hit, prompt: %s", cachePrompt);
843
- cacheHitFlag = true;
844
- debug(
845
- "found a new new element with same xpath, xpath: %s, id: %s",
846
- xpaths[i],
847
- element2?.id
848
- );
849
- break;
850
- }
851
- }
852
- }
853
- } catch (error) {
854
- debug("get element info by xpath error: ", error);
855
- }
856
- const startTime = Date.now();
857
- const element = elementFromCache || // try to match element from cache
858
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
859
- (await this.insight.locate(param, {
1038
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1039
+ this,
1040
+ xpaths,
1041
+ cachePrompt,
1042
+ param.cacheable
1043
+ );
1044
+ const cacheHitFlag = !!elementFromCache;
1045
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1046
+ const planHitFlag = !!elementFromPlan;
1047
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1048
+ // fallback to ai locate
860
1049
  context: pageContext
861
- })).element;
1050
+ })).element : void 0;
1051
+ const aiLocateHitFlag = !!elementFromAiLocate;
1052
+ const element = elementFromXpath || // highest priority
1053
+ elementFromCache || // second priority
1054
+ elementFromPlan || // third priority
1055
+ elementFromAiLocate;
862
1056
  let currentXpaths;
863
1057
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
864
1058
  const elementXpaths = await this.getElementXpath(
@@ -876,7 +1070,7 @@ var PageTaskExecutor = class {
876
1070
  locateCacheRecord
877
1071
  );
878
1072
  } else {
879
- debug(
1073
+ debug2(
880
1074
  "no xpaths found, will not update cache",
881
1075
  cachePrompt,
882
1076
  elementXpaths
@@ -886,16 +1080,44 @@ var PageTaskExecutor = class {
886
1080
  if (!element) {
887
1081
  throw new Error(`Element not found: ${param.prompt}`);
888
1082
  }
1083
+ let hitBy;
1084
+ if (userExpectedPathHitFlag) {
1085
+ hitBy = {
1086
+ from: "User expected path",
1087
+ context: {
1088
+ xpath: param.xpath
1089
+ }
1090
+ };
1091
+ } else if (cacheHitFlag) {
1092
+ hitBy = {
1093
+ from: "Cache",
1094
+ context: {
1095
+ xpathsFromCache: xpaths,
1096
+ xpathsToSave: currentXpaths
1097
+ }
1098
+ };
1099
+ } else if (planHitFlag) {
1100
+ hitBy = {
1101
+ from: "Planning",
1102
+ context: {
1103
+ id: elementFromPlan?.id,
1104
+ bbox: elementFromPlan?.bbox
1105
+ }
1106
+ };
1107
+ } else if (aiLocateHitFlag) {
1108
+ hitBy = {
1109
+ from: "AI model",
1110
+ context: {
1111
+ prompt: param.prompt
1112
+ }
1113
+ };
1114
+ }
889
1115
  return {
890
1116
  output: {
891
1117
  element
892
1118
  },
893
1119
  pageContext,
894
- cache: {
895
- hit: cacheHitFlag,
896
- originalXpaths: xpaths,
897
- currentXpaths
898
- }
1120
+ hitBy
899
1121
  };
900
1122
  }
901
1123
  };
@@ -991,7 +1213,7 @@ var PageTaskExecutor = class {
991
1213
  thought: plan2.thought,
992
1214
  locate: plan2.locate,
993
1215
  executor: async (param, { element }) => {
994
- assert4(element, "Element not found, cannot tap");
1216
+ assert5(element, "Element not found, cannot tap");
995
1217
  await this.page.mouse.click(element.center[0], element.center[1]);
996
1218
  }
997
1219
  };
@@ -1003,7 +1225,7 @@ var PageTaskExecutor = class {
1003
1225
  thought: plan2.thought,
1004
1226
  locate: plan2.locate,
1005
1227
  executor: async (param, { element }) => {
1006
- assert4(element, "Element not found, cannot right click");
1228
+ assert5(element, "Element not found, cannot right click");
1007
1229
  await this.page.mouse.click(
1008
1230
  element.center[0],
1009
1231
  element.center[1],
@@ -1020,7 +1242,7 @@ var PageTaskExecutor = class {
1020
1242
  thought: plan2.thought,
1021
1243
  locate: plan2.locate,
1022
1244
  executor: async (taskParam) => {
1023
- assert4(
1245
+ assert5(
1024
1246
  taskParam?.start_box && taskParam?.end_box,
1025
1247
  "No start_box or end_box to drag"
1026
1248
  );
@@ -1035,7 +1257,7 @@ var PageTaskExecutor = class {
1035
1257
  thought: plan2.thought,
1036
1258
  locate: plan2.locate,
1037
1259
  executor: async (param, { element }) => {
1038
- assert4(element, "Element not found, cannot hover");
1260
+ assert5(element, "Element not found, cannot hover");
1039
1261
  await this.page.mouse.move(element.center[0], element.center[1]);
1040
1262
  }
1041
1263
  };
@@ -1154,7 +1376,7 @@ var PageTaskExecutor = class {
1154
1376
  thought: plan2.thought,
1155
1377
  locate: plan2.locate,
1156
1378
  executor: async (param) => {
1157
- assert4(
1379
+ assert5(
1158
1380
  isAndroidPage(this.page),
1159
1381
  "Cannot use home button on non-Android devices"
1160
1382
  );
@@ -1170,7 +1392,7 @@ var PageTaskExecutor = class {
1170
1392
  thought: plan2.thought,
1171
1393
  locate: plan2.locate,
1172
1394
  executor: async (param) => {
1173
- assert4(
1395
+ assert5(
1174
1396
  isAndroidPage(this.page),
1175
1397
  "Cannot use back button on non-Android devices"
1176
1398
  );
@@ -1186,7 +1408,7 @@ var PageTaskExecutor = class {
1186
1408
  thought: plan2.thought,
1187
1409
  locate: plan2.locate,
1188
1410
  executor: async (param) => {
1189
- assert4(
1411
+ assert5(
1190
1412
  isAndroidPage(this.page),
1191
1413
  "Cannot use recent apps button on non-Android devices"
1192
1414
  );
@@ -1337,7 +1559,7 @@ var PageTaskExecutor = class {
1337
1559
  }
1338
1560
  }
1339
1561
  if (finalActions.length === 0) {
1340
- assert4(
1562
+ assert5(
1341
1563
  !more_actions_needed_by_instruction || sleep3,
1342
1564
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1343
1565
  );
@@ -1575,7 +1797,7 @@ var PageTaskExecutor = class {
1575
1797
  );
1576
1798
  let outputResult = data;
1577
1799
  if (ifTypeRestricted) {
1578
- assert4(data?.result !== void 0, "No result in query data");
1800
+ assert5(data?.result !== void 0, "No result in query data");
1579
1801
  outputResult = data.result;
1580
1802
  }
1581
1803
  return {
@@ -1671,9 +1893,9 @@ var PageTaskExecutor = class {
1671
1893
  onTaskStart: this.onTaskStartCallback
1672
1894
  });
1673
1895
  const { timeoutMs, checkIntervalMs } = opt;
1674
- assert4(assertion, "No assertion for waitFor");
1675
- assert4(timeoutMs, "No timeoutMs for waitFor");
1676
- assert4(checkIntervalMs, "No checkIntervalMs for waitFor");
1896
+ assert5(assertion, "No assertion for waitFor");
1897
+ assert5(timeoutMs, "No timeoutMs for waitFor");
1898
+ assert5(checkIntervalMs, "No checkIntervalMs for waitFor");
1677
1899
  const overallStartTime = Date.now();
1678
1900
  let startTime = Date.now();
1679
1901
  let errorThought = "";
@@ -1727,9 +1949,9 @@ var PageTaskExecutor = class {
1727
1949
  };
1728
1950
 
1729
1951
  // src/common/plan-builder.ts
1730
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1731
- import { assert as assert5 } from "@midscene/shared/utils";
1732
- var debug2 = getDebug2("plan-builder");
1952
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1953
+ import { assert as assert6 } from "@midscene/shared/utils";
1954
+ var debug3 = getDebug3("plan-builder");
1733
1955
  function buildPlans(type, locateParam, param) {
1734
1956
  let returnPlans = [];
1735
1957
  const locatePlan = locateParam ? {
@@ -1739,8 +1961,8 @@ function buildPlans(type, locateParam, param) {
1739
1961
  thought: ""
1740
1962
  } : null;
1741
1963
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1742
- assert5(locateParam, `missing locate info for action "${type}"`);
1743
- assert5(locatePlan, `missing locate info for action "${type}"`);
1964
+ assert6(locateParam, `missing locate info for action "${type}"`);
1965
+ assert6(locatePlan, `missing locate info for action "${type}"`);
1744
1966
  const tapPlan = {
1745
1967
  type,
1746
1968
  param: null,
@@ -1751,9 +1973,9 @@ function buildPlans(type, locateParam, param) {
1751
1973
  }
1752
1974
  if (type === "Input" || type === "KeyboardPress") {
1753
1975
  if (type === "Input") {
1754
- assert5(locateParam, `missing locate info for action "${type}"`);
1976
+ assert6(locateParam, `missing locate info for action "${type}"`);
1755
1977
  }
1756
- assert5(param, `missing param for action "${type}"`);
1978
+ assert6(param, `missing param for action "${type}"`);
1757
1979
  const inputPlan = {
1758
1980
  type,
1759
1981
  param,
@@ -1767,7 +1989,7 @@ function buildPlans(type, locateParam, param) {
1767
1989
  }
1768
1990
  }
1769
1991
  if (type === "Scroll") {
1770
- assert5(param, `missing param for action "${type}"`);
1992
+ assert6(param, `missing param for action "${type}"`);
1771
1993
  const scrollPlan = {
1772
1994
  type,
1773
1995
  param,
@@ -1781,7 +2003,7 @@ function buildPlans(type, locateParam, param) {
1781
2003
  }
1782
2004
  }
1783
2005
  if (type === "Sleep") {
1784
- assert5(param, `missing param for action "${type}"`);
2006
+ assert6(param, `missing param for action "${type}"`);
1785
2007
  const sleepPlan = {
1786
2008
  type,
1787
2009
  param,
@@ -1791,7 +2013,7 @@ function buildPlans(type, locateParam, param) {
1791
2013
  returnPlans = [sleepPlan];
1792
2014
  }
1793
2015
  if (type === "Locate") {
1794
- assert5(locateParam, `missing locate info for action "${type}"`);
2016
+ assert6(locateParam, `missing locate info for action "${type}"`);
1795
2017
  const locatePlan2 = {
1796
2018
  type,
1797
2019
  param: locateParam,
@@ -1801,187 +2023,12 @@ function buildPlans(type, locateParam, param) {
1801
2023
  returnPlans = [locatePlan2];
1802
2024
  }
1803
2025
  if (returnPlans) {
1804
- debug2("buildPlans", returnPlans);
2026
+ debug3("buildPlans", returnPlans);
1805
2027
  return returnPlans;
1806
2028
  }
1807
2029
  throw new Error(`Not supported type: ${type}`);
1808
2030
  }
1809
2031
 
1810
- // src/common/task-cache.ts
1811
- import assert6 from "assert";
1812
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1813
- import { dirname as dirname2, join as join2 } from "path";
1814
- import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
1815
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
1816
- import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
1817
- import yaml3 from "js-yaml";
1818
- import semver from "semver";
1819
-
1820
- // package.json
1821
- var version = "0.19.1";
1822
-
1823
- // src/common/task-cache.ts
1824
- var debug3 = getDebug3("cache");
1825
- var lowestSupportedMidsceneVersion = "0.16.10";
1826
- var cacheFileExt = ".cache.yaml";
1827
- var TaskCache = class {
1828
- // Track matched records
1829
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1830
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1831
- assert6(cacheId, "cacheId is required");
1832
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1833
- this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
1834
- this.isCacheResultUsed = isCacheResultUsed;
1835
- let cacheContent;
1836
- if (this.cacheFilePath) {
1837
- cacheContent = this.loadCacheFromFile();
1838
- }
1839
- if (!cacheContent) {
1840
- cacheContent = {
1841
- midsceneVersion: version,
1842
- cacheId: this.cacheId,
1843
- caches: []
1844
- };
1845
- }
1846
- this.cache = cacheContent;
1847
- this.cacheOriginalLength = this.cache.caches.length;
1848
- }
1849
- matchCache(prompt, type) {
1850
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1851
- const item = this.cache.caches[i];
1852
- const key = `${type}:${prompt}:${i}`;
1853
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1854
- this.matchedCacheIndices.add(key);
1855
- debug3(
1856
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1857
- type,
1858
- prompt,
1859
- i
1860
- );
1861
- return {
1862
- cacheContent: item,
1863
- updateFn: (cb) => {
1864
- debug3(
1865
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1866
- type,
1867
- prompt,
1868
- i
1869
- );
1870
- cb(item);
1871
- debug3(
1872
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1873
- type,
1874
- prompt,
1875
- i
1876
- );
1877
- this.flushCacheToFile();
1878
- }
1879
- };
1880
- }
1881
- }
1882
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1883
- return void 0;
1884
- }
1885
- matchPlanCache(prompt) {
1886
- return this.matchCache(prompt, "plan");
1887
- }
1888
- matchLocateCache(prompt) {
1889
- return this.matchCache(prompt, "locate");
1890
- }
1891
- appendCache(cache) {
1892
- debug3("will append cache", cache);
1893
- this.cache.caches.push(cache);
1894
- this.flushCacheToFile();
1895
- }
1896
- loadCacheFromFile() {
1897
- const cacheFile = this.cacheFilePath;
1898
- assert6(cacheFile, "cache file path is required");
1899
- if (!existsSync2(cacheFile)) {
1900
- debug3("no cache file found, path: %s", cacheFile);
1901
- return void 0;
1902
- }
1903
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1904
- if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
1905
- console.warn(
1906
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1907
- );
1908
- return void 0;
1909
- }
1910
- try {
1911
- const data = readFileSync(cacheFile, "utf8");
1912
- const jsonData = yaml3.load(data);
1913
- if (!version) {
1914
- debug3("no midscene version info, will not read cache from file");
1915
- return void 0;
1916
- }
1917
- if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1918
- console.warn(
1919
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1920
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1921
- cache file: ${cacheFile}`
1922
- );
1923
- return void 0;
1924
- }
1925
- debug3(
1926
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1927
- cacheFile,
1928
- jsonData.midsceneVersion,
1929
- jsonData.caches.length
1930
- );
1931
- jsonData.midsceneVersion = version;
1932
- return jsonData;
1933
- } catch (err) {
1934
- debug3(
1935
- "cache file exists but load failed, path: %s, error: %s",
1936
- cacheFile,
1937
- err
1938
- );
1939
- return void 0;
1940
- }
1941
- }
1942
- flushCacheToFile() {
1943
- if (!version) {
1944
- debug3("no midscene version info, will not write cache to file");
1945
- return;
1946
- }
1947
- if (!this.cacheFilePath) {
1948
- debug3("no cache file path, will not write cache to file");
1949
- return;
1950
- }
1951
- try {
1952
- const dir = dirname2(this.cacheFilePath);
1953
- if (!existsSync2(dir)) {
1954
- mkdirSync2(dir, { recursive: true });
1955
- debug3("created cache directory: %s", dir);
1956
- }
1957
- const yamlData = yaml3.dump(this.cache);
1958
- writeFileSync2(this.cacheFilePath, yamlData);
1959
- debug3("cache flushed to file: %s", this.cacheFilePath);
1960
- } catch (err) {
1961
- debug3(
1962
- "write cache to file failed, path: %s, error: %s",
1963
- this.cacheFilePath,
1964
- err
1965
- );
1966
- }
1967
- }
1968
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1969
- if (cachedRecord) {
1970
- if (newRecord.type === "plan") {
1971
- cachedRecord.updateFn((cache) => {
1972
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1973
- });
1974
- } else {
1975
- cachedRecord.updateFn((cache) => {
1976
- cache.xpaths = newRecord.xpaths;
1977
- });
1978
- }
1979
- } else {
1980
- this.appendCache(newRecord);
1981
- }
1982
- }
1983
- };
1984
-
1985
2032
  // src/common/agent.ts
1986
2033
  var debug4 = getDebug4("web-integration");
1987
2034
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2110,10 +2157,12 @@ ${errorTask?.errorStack}`);
2110
2157
  const prompt = opt.prompt ?? locatePrompt;
2111
2158
  const deepThink = opt.deepThink ?? false;
2112
2159
  const cacheable = opt.cacheable ?? true;
2160
+ const xpath = opt.xpath;
2113
2161
  return {
2114
2162
  prompt,
2115
2163
  deepThink,
2116
- cacheable
2164
+ cacheable,
2165
+ xpath
2117
2166
  };
2118
2167
  }
2119
2168
  return {
@@ -2271,6 +2320,9 @@ ${errorTask?.errorStack}`);
2271
2320
  this.afterTaskRunning(executor);
2272
2321
  return output;
2273
2322
  }
2323
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2324
+ return this.aiString(prompt, opt);
2325
+ }
2274
2326
  async describeElementAtPoint(center, opt) {
2275
2327
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2276
2328
  let success = false;
@@ -2697,7 +2749,7 @@ function sleep2(ms) {
2697
2749
  var ChromeExtensionProxyPage = class {
2698
2750
  constructor(forceSameTabNavigation) {
2699
2751
  this.pageType = "chrome-extension-proxy";
2700
- this.version = "0.19.1";
2752
+ this.version = "0.20.0";
2701
2753
  this.activeTabId = null;
2702
2754
  this.tabIdOfDebuggerAttached = null;
2703
2755
  this.attachingDebugger = null;