@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -186,7 +186,7 @@ var ScriptPlayer = class {
186
186
  domIncluded: numberTask.domIncluded,
187
187
  screenshotIncluded: numberTask.screenshotIncluded
188
188
  };
189
- (0, import_utils.assert)(prompt, "missing prompt for number");
189
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
190
190
  (0, import_utils.assert)(
191
191
  typeof prompt === "string",
192
192
  "prompt for number must be a string"
@@ -200,7 +200,7 @@ var ScriptPlayer = class {
200
200
  domIncluded: stringTask.domIncluded,
201
201
  screenshotIncluded: stringTask.screenshotIncluded
202
202
  };
203
- (0, import_utils.assert)(prompt, "missing prompt for string");
203
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
204
204
  (0, import_utils.assert)(
205
205
  typeof prompt === "string",
206
206
  "prompt for string must be a string"
@@ -214,13 +214,20 @@ var ScriptPlayer = class {
214
214
  domIncluded: booleanTask.domIncluded,
215
215
  screenshotIncluded: booleanTask.screenshotIncluded
216
216
  };
217
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
217
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
218
218
  (0, import_utils.assert)(
219
219
  typeof prompt === "string",
220
220
  "prompt for boolean must be a string"
221
221
  );
222
222
  const booleanResult = await agent.aiBoolean(prompt, options);
223
223
  this.setResult(booleanTask.name, booleanResult);
224
+ } else if ("aiAsk" in flowItem) {
225
+ const askTask = flowItem;
226
+ const prompt = askTask.aiAsk;
227
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
228
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
229
+ const askResult = await agent.aiAsk(prompt);
230
+ this.setResult(askTask.name, askResult);
224
231
  } else if ("aiLocate" in flowItem) {
225
232
  const locateTask = flowItem;
226
233
  const prompt = locateTask.aiLocate;
@@ -229,7 +236,7 @@ var ScriptPlayer = class {
229
236
  typeof prompt === "string",
230
237
  "prompt for aiLocate must be a string"
231
238
  );
232
- const locateResult = await agent.aiLocate(prompt);
239
+ const locateResult = await agent.aiLocate(prompt, locateTask);
233
240
  this.setResult(locateTask.name, locateResult);
234
241
  } else if ("aiWaitFor" in flowItem) {
235
242
  const waitForTask = flowItem;
@@ -438,10 +445,10 @@ var import_utils12 = require("@midscene/shared/utils");
438
445
  // src/common/tasks.ts
439
446
  var import_core = require("@midscene/core");
440
447
  var import_ai_model2 = require("@midscene/core/ai-model");
441
- var import_utils5 = require("@midscene/core/utils");
448
+ var import_utils7 = require("@midscene/core/utils");
442
449
  var import_constants = require("@midscene/shared/constants");
443
- var import_logger = require("@midscene/shared/logger");
444
- var import_utils6 = require("@midscene/shared/utils");
450
+ var import_logger2 = require("@midscene/shared/logger");
451
+ var import_utils8 = require("@midscene/shared/utils");
445
452
 
446
453
  // src/common/ui-utils.ts
447
454
  function typeStr(task) {
@@ -518,11 +525,11 @@ function paramStr(task) {
518
525
 
519
526
  // src/common/utils.ts
520
527
  var import_ai_model = require("@midscene/core/ai-model");
521
- var import_utils3 = require("@midscene/core/utils");
528
+ var import_utils5 = require("@midscene/core/utils");
522
529
  var import_env = require("@midscene/shared/env");
523
530
  var import_extractor = require("@midscene/shared/extractor");
524
531
  var import_img = require("@midscene/shared/img");
525
- var import_utils4 = require("@midscene/shared/utils");
532
+ var import_utils6 = require("@midscene/shared/utils");
526
533
  var import_dayjs = __toESM(require("dayjs"));
527
534
 
528
535
  // src/web-element.ts
@@ -550,14 +557,189 @@ var WebElementInfo = class {
550
557
  }
551
558
  };
552
559
 
560
+ // src/common/task-cache.ts
561
+ var import_node_assert = __toESM(require("assert"));
562
+ var import_node_fs2 = require("fs");
563
+ var import_node_path2 = require("path");
564
+ var import_common2 = require("@midscene/shared/common");
565
+ var import_logger = require("@midscene/shared/logger");
566
+ var import_utils3 = require("@midscene/shared/utils");
567
+ var import_js_yaml3 = __toESM(require("js-yaml"));
568
+ var import_semver = __toESM(require("semver"));
569
+
570
+ // package.json
571
+ var version = "0.20.0";
572
+
573
+ // src/common/task-cache.ts
574
+ var debug = (0, import_logger.getDebug)("cache");
575
+ var lowestSupportedMidsceneVersion = "0.16.10";
576
+ var cacheFileExt = ".cache.yaml";
577
+ var TaskCache = class {
578
+ // Track matched records
579
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
580
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
581
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
582
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
583
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
584
+ this.isCacheResultUsed = isCacheResultUsed;
585
+ let cacheContent;
586
+ if (this.cacheFilePath) {
587
+ cacheContent = this.loadCacheFromFile();
588
+ }
589
+ if (!cacheContent) {
590
+ cacheContent = {
591
+ midsceneVersion: version,
592
+ cacheId: this.cacheId,
593
+ caches: []
594
+ };
595
+ }
596
+ this.cache = cacheContent;
597
+ this.cacheOriginalLength = this.cache.caches.length;
598
+ }
599
+ matchCache(prompt, type) {
600
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
601
+ const item = this.cache.caches[i];
602
+ const key = `${type}:${prompt}:${i}`;
603
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
604
+ this.matchedCacheIndices.add(key);
605
+ debug(
606
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
607
+ type,
608
+ prompt,
609
+ i
610
+ );
611
+ return {
612
+ cacheContent: item,
613
+ updateFn: (cb) => {
614
+ debug(
615
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
616
+ type,
617
+ prompt,
618
+ i
619
+ );
620
+ cb(item);
621
+ debug(
622
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
623
+ type,
624
+ prompt,
625
+ i
626
+ );
627
+ this.flushCacheToFile();
628
+ }
629
+ };
630
+ }
631
+ }
632
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
633
+ return void 0;
634
+ }
635
+ matchPlanCache(prompt) {
636
+ return this.matchCache(prompt, "plan");
637
+ }
638
+ matchLocateCache(prompt) {
639
+ return this.matchCache(prompt, "locate");
640
+ }
641
+ appendCache(cache) {
642
+ debug("will append cache", cache);
643
+ this.cache.caches.push(cache);
644
+ this.flushCacheToFile();
645
+ }
646
+ loadCacheFromFile() {
647
+ const cacheFile = this.cacheFilePath;
648
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
649
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
650
+ debug("no cache file found, path: %s", cacheFile);
651
+ return void 0;
652
+ }
653
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
654
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
655
+ console.warn(
656
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
657
+ );
658
+ return void 0;
659
+ }
660
+ try {
661
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
662
+ const jsonData = import_js_yaml3.default.load(data);
663
+ if (!version) {
664
+ debug("no midscene version info, will not read cache from file");
665
+ return void 0;
666
+ }
667
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
668
+ console.warn(
669
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
670
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
671
+ cache file: ${cacheFile}`
672
+ );
673
+ return void 0;
674
+ }
675
+ debug(
676
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
677
+ cacheFile,
678
+ jsonData.midsceneVersion,
679
+ jsonData.caches.length
680
+ );
681
+ jsonData.midsceneVersion = version;
682
+ return jsonData;
683
+ } catch (err) {
684
+ debug(
685
+ "cache file exists but load failed, path: %s, error: %s",
686
+ cacheFile,
687
+ err
688
+ );
689
+ return void 0;
690
+ }
691
+ }
692
+ flushCacheToFile() {
693
+ if (!version) {
694
+ debug("no midscene version info, will not write cache to file");
695
+ return;
696
+ }
697
+ if (!this.cacheFilePath) {
698
+ debug("no cache file path, will not write cache to file");
699
+ return;
700
+ }
701
+ try {
702
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
703
+ if (!(0, import_node_fs2.existsSync)(dir)) {
704
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
705
+ debug("created cache directory: %s", dir);
706
+ }
707
+ const yamlData = import_js_yaml3.default.dump(this.cache);
708
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
709
+ debug("cache flushed to file: %s", this.cacheFilePath);
710
+ } catch (err) {
711
+ debug(
712
+ "write cache to file failed, path: %s, error: %s",
713
+ this.cacheFilePath,
714
+ err
715
+ );
716
+ }
717
+ }
718
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
719
+ if (cachedRecord) {
720
+ if (newRecord.type === "plan") {
721
+ cachedRecord.updateFn((cache) => {
722
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
723
+ });
724
+ } else {
725
+ cachedRecord.updateFn((cache) => {
726
+ cache.xpaths = newRecord.xpaths;
727
+ });
728
+ }
729
+ } else {
730
+ this.appendCache(newRecord);
731
+ }
732
+ }
733
+ };
734
+
553
735
  // src/common/utils.ts
554
736
  async function parseContextFromWebPage(page, _opt) {
555
- (0, import_utils4.assert)(page, "page is required");
737
+ (0, import_utils6.assert)(page, "page is required");
556
738
  if (page._forceUsePageContext) {
557
739
  return await page._forceUsePageContext();
558
740
  }
559
741
  const url = await page.url();
560
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
742
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
561
743
  let screenshotBase64;
562
744
  let tree;
563
745
  await Promise.all([
@@ -579,7 +761,7 @@ async function parseContextFromWebPage(page, _opt) {
579
761
  isVisible
580
762
  });
581
763
  });
582
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
764
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
583
765
  const size = await page.size();
584
766
  if (size.dpr && size.dpr > 1) {
585
767
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -597,11 +779,11 @@ async function parseContextFromWebPage(page, _opt) {
597
779
  function reportFileName(tag = "web") {
598
780
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
599
781
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
600
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
782
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
601
783
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
602
784
  }
603
785
  function printReportMsg(filepath) {
604
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
786
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
605
787
  }
606
788
  function replaceIllegalPathCharsAndSpace(str) {
607
789
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -654,6 +836,28 @@ function matchElementFromPlan(planLocateParam, tree) {
654
836
  }
655
837
  return void 0;
656
838
  }
839
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
840
+ try {
841
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
842
+ for (let i = 0; i < xpaths.length; i++) {
843
+ const element = await taskExecutor.page.getElementInfoByXpath(
844
+ xpaths[i]
845
+ );
846
+ if (element?.id) {
847
+ debug("cache hit, prompt: %s", cachePrompt);
848
+ debug(
849
+ "found a new new element with same xpath, xpath: %s, id: %s",
850
+ xpaths[i],
851
+ element?.id
852
+ );
853
+ return element;
854
+ }
855
+ }
856
+ }
857
+ } catch (error) {
858
+ debug("get element info by xpath error: ", error);
859
+ }
860
+ }
657
861
  function trimContextByViewport(execution) {
658
862
  function filterVisibleTree(node) {
659
863
  if (!node)
@@ -692,7 +896,7 @@ function trimContextByViewport(execution) {
692
896
  }
693
897
 
694
898
  // src/common/tasks.ts
695
- var debug = (0, import_logger.getDebug)("page-task-executor");
899
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
696
900
  var replanningCountLimit = 10;
697
901
  var isAndroidPage = (page) => {
698
902
  return page.pageType === "android";
@@ -733,7 +937,7 @@ var PageTaskExecutor = class {
733
937
  if (info?.id) {
734
938
  elementId = info.id;
735
939
  } else {
736
- debug(
940
+ debug2(
737
941
  "no element id found for position node, will not update cache",
738
942
  element
739
943
  );
@@ -746,7 +950,7 @@ var PageTaskExecutor = class {
746
950
  const result = await this.page.getXpathsById(elementId);
747
951
  return result;
748
952
  } catch (error) {
749
- debug("getXpathsById error: ", error);
953
+ debug2("getXpathsById error: ", error);
750
954
  }
751
955
  }
752
956
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -762,7 +966,7 @@ var PageTaskExecutor = class {
762
966
  if (taskApply.type === "Action") {
763
967
  await Promise.all([
764
968
  (async () => {
765
- await (0, import_utils5.sleep)(100);
969
+ await (0, import_utils7.sleep)(100);
766
970
  if (this.page.waitUntilNetworkIdle) {
767
971
  try {
768
972
  await this.page.waitUntilNetworkIdle();
@@ -770,7 +974,7 @@ var PageTaskExecutor = class {
770
974
  }
771
975
  }
772
976
  })(),
773
- (0, import_utils5.sleep)(200)
977
+ (0, import_utils7.sleep)(200)
774
978
  ]);
775
979
  }
776
980
  if (appendAfterExecution) {
@@ -800,7 +1004,7 @@ var PageTaskExecutor = class {
800
1004
  locate: plan2.locate,
801
1005
  executor: async (param, taskContext) => {
802
1006
  const { task } = taskContext;
803
- (0, import_utils6.assert)(
1007
+ (0, import_utils8.assert)(
804
1008
  param?.prompt || param?.id || param?.bbox,
805
1009
  "No prompt or id or position or bbox to locate"
806
1010
  );
@@ -825,39 +1029,29 @@ var PageTaskExecutor = class {
825
1029
  timing: "before Insight"
826
1030
  };
827
1031
  task.recorder = [recordItem];
828
- let cacheHitFlag = false;
1032
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1033
+ const userExpectedPathHitFlag = !!elementFromXpath;
829
1034
  const cachePrompt = param.prompt;
830
1035
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
831
1036
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
832
- let elementFromCache = null;
833
- try {
834
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
835
- for (let i = 0; i < xpaths.length; i++) {
836
- const element2 = await this.page.getElementInfoByXpath(
837
- xpaths[i]
838
- );
839
- if (element2?.id) {
840
- elementFromCache = element2;
841
- debug("cache hit, prompt: %s", cachePrompt);
842
- cacheHitFlag = true;
843
- debug(
844
- "found a new new element with same xpath, xpath: %s, id: %s",
845
- xpaths[i],
846
- element2?.id
847
- );
848
- break;
849
- }
850
- }
851
- }
852
- } catch (error) {
853
- debug("get element info by xpath error: ", error);
854
- }
855
- const startTime = Date.now();
856
- const element = elementFromCache || // try to match element from cache
857
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
858
- (await this.insight.locate(param, {
1037
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1038
+ this,
1039
+ xpaths,
1040
+ cachePrompt,
1041
+ param.cacheable
1042
+ );
1043
+ const cacheHitFlag = !!elementFromCache;
1044
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1045
+ const planHitFlag = !!elementFromPlan;
1046
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1047
+ // fallback to ai locate
859
1048
  context: pageContext
860
- })).element;
1049
+ })).element : void 0;
1050
+ const aiLocateHitFlag = !!elementFromAiLocate;
1051
+ const element = elementFromXpath || // highest priority
1052
+ elementFromCache || // second priority
1053
+ elementFromPlan || // third priority
1054
+ elementFromAiLocate;
861
1055
  let currentXpaths;
862
1056
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
863
1057
  const elementXpaths = await this.getElementXpath(
@@ -875,7 +1069,7 @@ var PageTaskExecutor = class {
875
1069
  locateCacheRecord
876
1070
  );
877
1071
  } else {
878
- debug(
1072
+ debug2(
879
1073
  "no xpaths found, will not update cache",
880
1074
  cachePrompt,
881
1075
  elementXpaths
@@ -885,16 +1079,44 @@ var PageTaskExecutor = class {
885
1079
  if (!element) {
886
1080
  throw new Error(`Element not found: ${param.prompt}`);
887
1081
  }
1082
+ let hitBy;
1083
+ if (userExpectedPathHitFlag) {
1084
+ hitBy = {
1085
+ from: "User expected path",
1086
+ context: {
1087
+ xpath: param.xpath
1088
+ }
1089
+ };
1090
+ } else if (cacheHitFlag) {
1091
+ hitBy = {
1092
+ from: "Cache",
1093
+ context: {
1094
+ xpathsFromCache: xpaths,
1095
+ xpathsToSave: currentXpaths
1096
+ }
1097
+ };
1098
+ } else if (planHitFlag) {
1099
+ hitBy = {
1100
+ from: "Planning",
1101
+ context: {
1102
+ id: elementFromPlan?.id,
1103
+ bbox: elementFromPlan?.bbox
1104
+ }
1105
+ };
1106
+ } else if (aiLocateHitFlag) {
1107
+ hitBy = {
1108
+ from: "AI model",
1109
+ context: {
1110
+ prompt: param.prompt
1111
+ }
1112
+ };
1113
+ }
888
1114
  return {
889
1115
  output: {
890
1116
  element
891
1117
  },
892
1118
  pageContext,
893
- cache: {
894
- hit: cacheHitFlag,
895
- originalXpaths: xpaths,
896
- currentXpaths
897
- }
1119
+ hitBy
898
1120
  };
899
1121
  }
900
1122
  };
@@ -990,7 +1212,7 @@ var PageTaskExecutor = class {
990
1212
  thought: plan2.thought,
991
1213
  locate: plan2.locate,
992
1214
  executor: async (param, { element }) => {
993
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1215
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
994
1216
  await this.page.mouse.click(element.center[0], element.center[1]);
995
1217
  }
996
1218
  };
@@ -1002,7 +1224,7 @@ var PageTaskExecutor = class {
1002
1224
  thought: plan2.thought,
1003
1225
  locate: plan2.locate,
1004
1226
  executor: async (param, { element }) => {
1005
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1227
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
1006
1228
  await this.page.mouse.click(
1007
1229
  element.center[0],
1008
1230
  element.center[1],
@@ -1019,7 +1241,7 @@ var PageTaskExecutor = class {
1019
1241
  thought: plan2.thought,
1020
1242
  locate: plan2.locate,
1021
1243
  executor: async (taskParam) => {
1022
- (0, import_utils6.assert)(
1244
+ (0, import_utils8.assert)(
1023
1245
  taskParam?.start_box && taskParam?.end_box,
1024
1246
  "No start_box or end_box to drag"
1025
1247
  );
@@ -1034,7 +1256,7 @@ var PageTaskExecutor = class {
1034
1256
  thought: plan2.thought,
1035
1257
  locate: plan2.locate,
1036
1258
  executor: async (param, { element }) => {
1037
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1259
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1038
1260
  await this.page.mouse.move(element.center[0], element.center[1]);
1039
1261
  }
1040
1262
  };
@@ -1086,7 +1308,7 @@ var PageTaskExecutor = class {
1086
1308
  `Unknown scroll direction: ${taskParam.direction}`
1087
1309
  );
1088
1310
  }
1089
- await (0, import_utils5.sleep)(500);
1311
+ await (0, import_utils7.sleep)(500);
1090
1312
  } else {
1091
1313
  throw new Error(
1092
1314
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1105,7 +1327,7 @@ var PageTaskExecutor = class {
1105
1327
  thought: plan2.thought,
1106
1328
  locate: plan2.locate,
1107
1329
  executor: async (taskParam) => {
1108
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1330
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1109
1331
  }
1110
1332
  };
1111
1333
  tasks.push(taskActionSleep);
@@ -1153,7 +1375,7 @@ var PageTaskExecutor = class {
1153
1375
  thought: plan2.thought,
1154
1376
  locate: plan2.locate,
1155
1377
  executor: async (param) => {
1156
- (0, import_utils6.assert)(
1378
+ (0, import_utils8.assert)(
1157
1379
  isAndroidPage(this.page),
1158
1380
  "Cannot use home button on non-Android devices"
1159
1381
  );
@@ -1169,7 +1391,7 @@ var PageTaskExecutor = class {
1169
1391
  thought: plan2.thought,
1170
1392
  locate: plan2.locate,
1171
1393
  executor: async (param) => {
1172
- (0, import_utils6.assert)(
1394
+ (0, import_utils8.assert)(
1173
1395
  isAndroidPage(this.page),
1174
1396
  "Cannot use back button on non-Android devices"
1175
1397
  );
@@ -1185,7 +1407,7 @@ var PageTaskExecutor = class {
1185
1407
  thought: plan2.thought,
1186
1408
  locate: plan2.locate,
1187
1409
  executor: async (param) => {
1188
- (0, import_utils6.assert)(
1410
+ (0, import_utils8.assert)(
1189
1411
  isAndroidPage(this.page),
1190
1412
  "Cannot use recent apps button on non-Android devices"
1191
1413
  );
@@ -1336,7 +1558,7 @@ var PageTaskExecutor = class {
1336
1558
  }
1337
1559
  }
1338
1560
  if (finalActions.length === 0) {
1339
- (0, import_utils6.assert)(
1561
+ (0, import_utils8.assert)(
1340
1562
  !more_actions_needed_by_instruction || sleep3,
1341
1563
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1342
1564
  );
@@ -1574,7 +1796,7 @@ var PageTaskExecutor = class {
1574
1796
  );
1575
1797
  let outputResult = data;
1576
1798
  if (ifTypeRestricted) {
1577
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1799
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1578
1800
  outputResult = data.result;
1579
1801
  }
1580
1802
  return {
@@ -1670,9 +1892,9 @@ var PageTaskExecutor = class {
1670
1892
  onTaskStart: this.onTaskStartCallback
1671
1893
  });
1672
1894
  const { timeoutMs, checkIntervalMs } = opt;
1673
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1674
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1675
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1895
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1896
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1897
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1676
1898
  const overallStartTime = Date.now();
1677
1899
  let startTime = Date.now();
1678
1900
  let errorThought = "";
@@ -1726,9 +1948,9 @@ var PageTaskExecutor = class {
1726
1948
  };
1727
1949
 
1728
1950
  // src/common/plan-builder.ts
1729
- var import_logger2 = require("@midscene/shared/logger");
1730
- var import_utils8 = require("@midscene/shared/utils");
1731
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1951
+ var import_logger3 = require("@midscene/shared/logger");
1952
+ var import_utils10 = require("@midscene/shared/utils");
1953
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1732
1954
  function buildPlans(type, locateParam, param) {
1733
1955
  let returnPlans = [];
1734
1956
  const locatePlan = locateParam ? {
@@ -1738,8 +1960,8 @@ function buildPlans(type, locateParam, param) {
1738
1960
  thought: ""
1739
1961
  } : null;
1740
1962
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1741
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1742
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1963
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1964
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1743
1965
  const tapPlan = {
1744
1966
  type,
1745
1967
  param: null,
@@ -1750,9 +1972,9 @@ function buildPlans(type, locateParam, param) {
1750
1972
  }
1751
1973
  if (type === "Input" || type === "KeyboardPress") {
1752
1974
  if (type === "Input") {
1753
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1975
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1754
1976
  }
1755
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1977
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1756
1978
  const inputPlan = {
1757
1979
  type,
1758
1980
  param,
@@ -1766,7 +1988,7 @@ function buildPlans(type, locateParam, param) {
1766
1988
  }
1767
1989
  }
1768
1990
  if (type === "Scroll") {
1769
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1991
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1770
1992
  const scrollPlan = {
1771
1993
  type,
1772
1994
  param,
@@ -1780,7 +2002,7 @@ function buildPlans(type, locateParam, param) {
1780
2002
  }
1781
2003
  }
1782
2004
  if (type === "Sleep") {
1783
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
2005
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1784
2006
  const sleepPlan = {
1785
2007
  type,
1786
2008
  param,
@@ -1790,7 +2012,7 @@ function buildPlans(type, locateParam, param) {
1790
2012
  returnPlans = [sleepPlan];
1791
2013
  }
1792
2014
  if (type === "Locate") {
1793
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
2015
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1794
2016
  const locatePlan2 = {
1795
2017
  type,
1796
2018
  param: locateParam,
@@ -1800,187 +2022,12 @@ function buildPlans(type, locateParam, param) {
1800
2022
  returnPlans = [locatePlan2];
1801
2023
  }
1802
2024
  if (returnPlans) {
1803
- debug2("buildPlans", returnPlans);
2025
+ debug3("buildPlans", returnPlans);
1804
2026
  return returnPlans;
1805
2027
  }
1806
2028
  throw new Error(`Not supported type: ${type}`);
1807
2029
  }
1808
2030
 
1809
- // src/common/task-cache.ts
1810
- var import_node_assert = __toESM(require("assert"));
1811
- var import_node_fs2 = require("fs");
1812
- var import_node_path2 = require("path");
1813
- var import_common2 = require("@midscene/shared/common");
1814
- var import_logger3 = require("@midscene/shared/logger");
1815
- var import_utils9 = require("@midscene/shared/utils");
1816
- var import_js_yaml3 = __toESM(require("js-yaml"));
1817
- var import_semver = __toESM(require("semver"));
1818
-
1819
- // package.json
1820
- var version = "0.19.1";
1821
-
1822
- // src/common/task-cache.ts
1823
- var debug3 = (0, import_logger3.getDebug)("cache");
1824
- var lowestSupportedMidsceneVersion = "0.16.10";
1825
- var cacheFileExt = ".cache.yaml";
1826
- var TaskCache = class {
1827
- // Track matched records
1828
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1829
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1830
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1831
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1832
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1833
- this.isCacheResultUsed = isCacheResultUsed;
1834
- let cacheContent;
1835
- if (this.cacheFilePath) {
1836
- cacheContent = this.loadCacheFromFile();
1837
- }
1838
- if (!cacheContent) {
1839
- cacheContent = {
1840
- midsceneVersion: version,
1841
- cacheId: this.cacheId,
1842
- caches: []
1843
- };
1844
- }
1845
- this.cache = cacheContent;
1846
- this.cacheOriginalLength = this.cache.caches.length;
1847
- }
1848
- matchCache(prompt, type) {
1849
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1850
- const item = this.cache.caches[i];
1851
- const key = `${type}:${prompt}:${i}`;
1852
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1853
- this.matchedCacheIndices.add(key);
1854
- debug3(
1855
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1856
- type,
1857
- prompt,
1858
- i
1859
- );
1860
- return {
1861
- cacheContent: item,
1862
- updateFn: (cb) => {
1863
- debug3(
1864
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1865
- type,
1866
- prompt,
1867
- i
1868
- );
1869
- cb(item);
1870
- debug3(
1871
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1872
- type,
1873
- prompt,
1874
- i
1875
- );
1876
- this.flushCacheToFile();
1877
- }
1878
- };
1879
- }
1880
- }
1881
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1882
- return void 0;
1883
- }
1884
- matchPlanCache(prompt) {
1885
- return this.matchCache(prompt, "plan");
1886
- }
1887
- matchLocateCache(prompt) {
1888
- return this.matchCache(prompt, "locate");
1889
- }
1890
- appendCache(cache) {
1891
- debug3("will append cache", cache);
1892
- this.cache.caches.push(cache);
1893
- this.flushCacheToFile();
1894
- }
1895
- loadCacheFromFile() {
1896
- const cacheFile = this.cacheFilePath;
1897
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1898
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1899
- debug3("no cache file found, path: %s", cacheFile);
1900
- return void 0;
1901
- }
1902
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1903
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1904
- console.warn(
1905
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1906
- );
1907
- return void 0;
1908
- }
1909
- try {
1910
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1911
- const jsonData = import_js_yaml3.default.load(data);
1912
- if (!version) {
1913
- debug3("no midscene version info, will not read cache from file");
1914
- return void 0;
1915
- }
1916
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1917
- console.warn(
1918
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1919
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1920
- cache file: ${cacheFile}`
1921
- );
1922
- return void 0;
1923
- }
1924
- debug3(
1925
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1926
- cacheFile,
1927
- jsonData.midsceneVersion,
1928
- jsonData.caches.length
1929
- );
1930
- jsonData.midsceneVersion = version;
1931
- return jsonData;
1932
- } catch (err) {
1933
- debug3(
1934
- "cache file exists but load failed, path: %s, error: %s",
1935
- cacheFile,
1936
- err
1937
- );
1938
- return void 0;
1939
- }
1940
- }
1941
- flushCacheToFile() {
1942
- if (!version) {
1943
- debug3("no midscene version info, will not write cache to file");
1944
- return;
1945
- }
1946
- if (!this.cacheFilePath) {
1947
- debug3("no cache file path, will not write cache to file");
1948
- return;
1949
- }
1950
- try {
1951
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1952
- if (!(0, import_node_fs2.existsSync)(dir)) {
1953
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1954
- debug3("created cache directory: %s", dir);
1955
- }
1956
- const yamlData = import_js_yaml3.default.dump(this.cache);
1957
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1958
- debug3("cache flushed to file: %s", this.cacheFilePath);
1959
- } catch (err) {
1960
- debug3(
1961
- "write cache to file failed, path: %s, error: %s",
1962
- this.cacheFilePath,
1963
- err
1964
- );
1965
- }
1966
- }
1967
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1968
- if (cachedRecord) {
1969
- if (newRecord.type === "plan") {
1970
- cachedRecord.updateFn((cache) => {
1971
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1972
- });
1973
- } else {
1974
- cachedRecord.updateFn((cache) => {
1975
- cache.xpaths = newRecord.xpaths;
1976
- });
1977
- }
1978
- } else {
1979
- this.appendCache(newRecord);
1980
- }
1981
- }
1982
- };
1983
-
1984
2031
  // src/common/agent.ts
1985
2032
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1986
2033
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2109,10 +2156,12 @@ ${errorTask?.errorStack}`);
2109
2156
  const prompt = opt.prompt ?? locatePrompt;
2110
2157
  const deepThink = opt.deepThink ?? false;
2111
2158
  const cacheable = opt.cacheable ?? true;
2159
+ const xpath = opt.xpath;
2112
2160
  return {
2113
2161
  prompt,
2114
2162
  deepThink,
2115
- cacheable
2163
+ cacheable,
2164
+ xpath
2116
2165
  };
2117
2166
  }
2118
2167
  return {
@@ -2270,6 +2319,9 @@ ${errorTask?.errorStack}`);
2270
2319
  this.afterTaskRunning(executor);
2271
2320
  return output;
2272
2321
  }
2322
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2323
+ return this.aiString(prompt, opt);
2324
+ }
2273
2325
  async describeElementAtPoint(center, opt) {
2274
2326
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2275
2327
  let success = false;
@@ -2992,6 +3044,14 @@ var PlaywrightAiFixture = (options) => {
2992
3044
  use,
2993
3045
  aiActionType: "aiBoolean"
2994
3046
  });
3047
+ },
3048
+ aiAsk: async ({ page }, use, testInfo) => {
3049
+ await generateAiFunction({
3050
+ page,
3051
+ testInfo,
3052
+ use,
3053
+ aiActionType: "aiAsk"
3054
+ });
2995
3055
  }
2996
3056
  };
2997
3057
  };