@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -185,7 +185,7 @@ var ScriptPlayer = class {
185
185
  domIncluded: numberTask.domIncluded,
186
186
  screenshotIncluded: numberTask.screenshotIncluded
187
187
  };
188
- (0, import_utils.assert)(prompt, "missing prompt for number");
188
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
189
189
  (0, import_utils.assert)(
190
190
  typeof prompt === "string",
191
191
  "prompt for number must be a string"
@@ -199,7 +199,7 @@ var ScriptPlayer = class {
199
199
  domIncluded: stringTask.domIncluded,
200
200
  screenshotIncluded: stringTask.screenshotIncluded
201
201
  };
202
- (0, import_utils.assert)(prompt, "missing prompt for string");
202
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
203
203
  (0, import_utils.assert)(
204
204
  typeof prompt === "string",
205
205
  "prompt for string must be a string"
@@ -213,13 +213,20 @@ var ScriptPlayer = class {
213
213
  domIncluded: booleanTask.domIncluded,
214
214
  screenshotIncluded: booleanTask.screenshotIncluded
215
215
  };
216
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
216
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
217
217
  (0, import_utils.assert)(
218
218
  typeof prompt === "string",
219
219
  "prompt for boolean must be a string"
220
220
  );
221
221
  const booleanResult = await agent.aiBoolean(prompt, options);
222
222
  this.setResult(booleanTask.name, booleanResult);
223
+ } else if ("aiAsk" in flowItem) {
224
+ const askTask = flowItem;
225
+ const prompt = askTask.aiAsk;
226
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
227
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
228
+ const askResult = await agent.aiAsk(prompt);
229
+ this.setResult(askTask.name, askResult);
223
230
  } else if ("aiLocate" in flowItem) {
224
231
  const locateTask = flowItem;
225
232
  const prompt = locateTask.aiLocate;
@@ -228,7 +235,7 @@ var ScriptPlayer = class {
228
235
  typeof prompt === "string",
229
236
  "prompt for aiLocate must be a string"
230
237
  );
231
- const locateResult = await agent.aiLocate(prompt);
238
+ const locateResult = await agent.aiLocate(prompt, locateTask);
232
239
  this.setResult(locateTask.name, locateResult);
233
240
  } else if ("aiWaitFor" in flowItem) {
234
241
  const waitForTask = flowItem;
@@ -437,10 +444,10 @@ var import_utils12 = require("@midscene/shared/utils");
437
444
  // src/common/tasks.ts
438
445
  var import_core = require("@midscene/core");
439
446
  var import_ai_model2 = require("@midscene/core/ai-model");
440
- var import_utils5 = require("@midscene/core/utils");
447
+ var import_utils7 = require("@midscene/core/utils");
441
448
  var import_constants = require("@midscene/shared/constants");
442
- var import_logger = require("@midscene/shared/logger");
443
- var import_utils6 = require("@midscene/shared/utils");
449
+ var import_logger2 = require("@midscene/shared/logger");
450
+ var import_utils8 = require("@midscene/shared/utils");
444
451
 
445
452
  // src/common/ui-utils.ts
446
453
  function typeStr(task) {
@@ -517,11 +524,11 @@ function paramStr(task) {
517
524
 
518
525
  // src/common/utils.ts
519
526
  var import_ai_model = require("@midscene/core/ai-model");
520
- var import_utils3 = require("@midscene/core/utils");
527
+ var import_utils5 = require("@midscene/core/utils");
521
528
  var import_env = require("@midscene/shared/env");
522
529
  var import_extractor = require("@midscene/shared/extractor");
523
530
  var import_img = require("@midscene/shared/img");
524
- var import_utils4 = require("@midscene/shared/utils");
531
+ var import_utils6 = require("@midscene/shared/utils");
525
532
  var import_dayjs = __toESM(require("dayjs"));
526
533
 
527
534
  // src/web-element.ts
@@ -549,14 +556,189 @@ var WebElementInfo = class {
549
556
  }
550
557
  };
551
558
 
559
+ // src/common/task-cache.ts
560
+ var import_node_assert = __toESM(require("assert"));
561
+ var import_node_fs2 = require("fs");
562
+ var import_node_path2 = require("path");
563
+ var import_common2 = require("@midscene/shared/common");
564
+ var import_logger = require("@midscene/shared/logger");
565
+ var import_utils3 = require("@midscene/shared/utils");
566
+ var import_js_yaml3 = __toESM(require("js-yaml"));
567
+ var import_semver = __toESM(require("semver"));
568
+
569
+ // package.json
570
+ var version = "0.20.0";
571
+
572
+ // src/common/task-cache.ts
573
+ var debug = (0, import_logger.getDebug)("cache");
574
+ var lowestSupportedMidsceneVersion = "0.16.10";
575
+ var cacheFileExt = ".cache.yaml";
576
+ var TaskCache = class {
577
+ // Track matched records
578
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
579
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
580
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
581
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
582
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
583
+ this.isCacheResultUsed = isCacheResultUsed;
584
+ let cacheContent;
585
+ if (this.cacheFilePath) {
586
+ cacheContent = this.loadCacheFromFile();
587
+ }
588
+ if (!cacheContent) {
589
+ cacheContent = {
590
+ midsceneVersion: version,
591
+ cacheId: this.cacheId,
592
+ caches: []
593
+ };
594
+ }
595
+ this.cache = cacheContent;
596
+ this.cacheOriginalLength = this.cache.caches.length;
597
+ }
598
+ matchCache(prompt, type) {
599
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
600
+ const item = this.cache.caches[i];
601
+ const key = `${type}:${prompt}:${i}`;
602
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
603
+ this.matchedCacheIndices.add(key);
604
+ debug(
605
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
606
+ type,
607
+ prompt,
608
+ i
609
+ );
610
+ return {
611
+ cacheContent: item,
612
+ updateFn: (cb) => {
613
+ debug(
614
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
615
+ type,
616
+ prompt,
617
+ i
618
+ );
619
+ cb(item);
620
+ debug(
621
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
622
+ type,
623
+ prompt,
624
+ i
625
+ );
626
+ this.flushCacheToFile();
627
+ }
628
+ };
629
+ }
630
+ }
631
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
632
+ return void 0;
633
+ }
634
+ matchPlanCache(prompt) {
635
+ return this.matchCache(prompt, "plan");
636
+ }
637
+ matchLocateCache(prompt) {
638
+ return this.matchCache(prompt, "locate");
639
+ }
640
+ appendCache(cache) {
641
+ debug("will append cache", cache);
642
+ this.cache.caches.push(cache);
643
+ this.flushCacheToFile();
644
+ }
645
+ loadCacheFromFile() {
646
+ const cacheFile = this.cacheFilePath;
647
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
648
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
649
+ debug("no cache file found, path: %s", cacheFile);
650
+ return void 0;
651
+ }
652
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
653
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
654
+ console.warn(
655
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
656
+ );
657
+ return void 0;
658
+ }
659
+ try {
660
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
661
+ const jsonData = import_js_yaml3.default.load(data);
662
+ if (!version) {
663
+ debug("no midscene version info, will not read cache from file");
664
+ return void 0;
665
+ }
666
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
667
+ console.warn(
668
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
669
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
670
+ cache file: ${cacheFile}`
671
+ );
672
+ return void 0;
673
+ }
674
+ debug(
675
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
676
+ cacheFile,
677
+ jsonData.midsceneVersion,
678
+ jsonData.caches.length
679
+ );
680
+ jsonData.midsceneVersion = version;
681
+ return jsonData;
682
+ } catch (err) {
683
+ debug(
684
+ "cache file exists but load failed, path: %s, error: %s",
685
+ cacheFile,
686
+ err
687
+ );
688
+ return void 0;
689
+ }
690
+ }
691
+ flushCacheToFile() {
692
+ if (!version) {
693
+ debug("no midscene version info, will not write cache to file");
694
+ return;
695
+ }
696
+ if (!this.cacheFilePath) {
697
+ debug("no cache file path, will not write cache to file");
698
+ return;
699
+ }
700
+ try {
701
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
702
+ if (!(0, import_node_fs2.existsSync)(dir)) {
703
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
704
+ debug("created cache directory: %s", dir);
705
+ }
706
+ const yamlData = import_js_yaml3.default.dump(this.cache);
707
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
708
+ debug("cache flushed to file: %s", this.cacheFilePath);
709
+ } catch (err) {
710
+ debug(
711
+ "write cache to file failed, path: %s, error: %s",
712
+ this.cacheFilePath,
713
+ err
714
+ );
715
+ }
716
+ }
717
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
718
+ if (cachedRecord) {
719
+ if (newRecord.type === "plan") {
720
+ cachedRecord.updateFn((cache) => {
721
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
722
+ });
723
+ } else {
724
+ cachedRecord.updateFn((cache) => {
725
+ cache.xpaths = newRecord.xpaths;
726
+ });
727
+ }
728
+ } else {
729
+ this.appendCache(newRecord);
730
+ }
731
+ }
732
+ };
733
+
552
734
  // src/common/utils.ts
553
735
  async function parseContextFromWebPage(page, _opt) {
554
- (0, import_utils4.assert)(page, "page is required");
736
+ (0, import_utils6.assert)(page, "page is required");
555
737
  if (page._forceUsePageContext) {
556
738
  return await page._forceUsePageContext();
557
739
  }
558
740
  const url = await page.url();
559
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
741
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
560
742
  let screenshotBase64;
561
743
  let tree;
562
744
  await Promise.all([
@@ -578,7 +760,7 @@ async function parseContextFromWebPage(page, _opt) {
578
760
  isVisible
579
761
  });
580
762
  });
581
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
763
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
582
764
  const size = await page.size();
583
765
  if (size.dpr && size.dpr > 1) {
584
766
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -596,11 +778,11 @@ async function parseContextFromWebPage(page, _opt) {
596
778
  function reportFileName(tag = "web") {
597
779
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
598
780
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
599
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
781
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
600
782
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
601
783
  }
602
784
  function printReportMsg(filepath) {
603
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
785
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
604
786
  }
605
787
  function replaceIllegalPathCharsAndSpace(str) {
606
788
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -653,6 +835,28 @@ function matchElementFromPlan(planLocateParam, tree) {
653
835
  }
654
836
  return void 0;
655
837
  }
838
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
839
+ try {
840
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
841
+ for (let i = 0; i < xpaths.length; i++) {
842
+ const element = await taskExecutor.page.getElementInfoByXpath(
843
+ xpaths[i]
844
+ );
845
+ if (element?.id) {
846
+ debug("cache hit, prompt: %s", cachePrompt);
847
+ debug(
848
+ "found a new new element with same xpath, xpath: %s, id: %s",
849
+ xpaths[i],
850
+ element?.id
851
+ );
852
+ return element;
853
+ }
854
+ }
855
+ }
856
+ } catch (error) {
857
+ debug("get element info by xpath error: ", error);
858
+ }
859
+ }
656
860
  function trimContextByViewport(execution) {
657
861
  function filterVisibleTree(node) {
658
862
  if (!node)
@@ -691,7 +895,7 @@ function trimContextByViewport(execution) {
691
895
  }
692
896
 
693
897
  // src/common/tasks.ts
694
- var debug = (0, import_logger.getDebug)("page-task-executor");
898
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
695
899
  var replanningCountLimit = 10;
696
900
  var isAndroidPage = (page) => {
697
901
  return page.pageType === "android";
@@ -732,7 +936,7 @@ var PageTaskExecutor = class {
732
936
  if (info?.id) {
733
937
  elementId = info.id;
734
938
  } else {
735
- debug(
939
+ debug2(
736
940
  "no element id found for position node, will not update cache",
737
941
  element
738
942
  );
@@ -745,7 +949,7 @@ var PageTaskExecutor = class {
745
949
  const result = await this.page.getXpathsById(elementId);
746
950
  return result;
747
951
  } catch (error) {
748
- debug("getXpathsById error: ", error);
952
+ debug2("getXpathsById error: ", error);
749
953
  }
750
954
  }
751
955
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -761,7 +965,7 @@ var PageTaskExecutor = class {
761
965
  if (taskApply.type === "Action") {
762
966
  await Promise.all([
763
967
  (async () => {
764
- await (0, import_utils5.sleep)(100);
968
+ await (0, import_utils7.sleep)(100);
765
969
  if (this.page.waitUntilNetworkIdle) {
766
970
  try {
767
971
  await this.page.waitUntilNetworkIdle();
@@ -769,7 +973,7 @@ var PageTaskExecutor = class {
769
973
  }
770
974
  }
771
975
  })(),
772
- (0, import_utils5.sleep)(200)
976
+ (0, import_utils7.sleep)(200)
773
977
  ]);
774
978
  }
775
979
  if (appendAfterExecution) {
@@ -799,7 +1003,7 @@ var PageTaskExecutor = class {
799
1003
  locate: plan2.locate,
800
1004
  executor: async (param, taskContext) => {
801
1005
  const { task } = taskContext;
802
- (0, import_utils6.assert)(
1006
+ (0, import_utils8.assert)(
803
1007
  param?.prompt || param?.id || param?.bbox,
804
1008
  "No prompt or id or position or bbox to locate"
805
1009
  );
@@ -824,39 +1028,29 @@ var PageTaskExecutor = class {
824
1028
  timing: "before Insight"
825
1029
  };
826
1030
  task.recorder = [recordItem];
827
- let cacheHitFlag = false;
1031
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1032
+ const userExpectedPathHitFlag = !!elementFromXpath;
828
1033
  const cachePrompt = param.prompt;
829
1034
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
830
1035
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
831
- let elementFromCache = null;
832
- try {
833
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
834
- for (let i = 0; i < xpaths.length; i++) {
835
- const element2 = await this.page.getElementInfoByXpath(
836
- xpaths[i]
837
- );
838
- if (element2?.id) {
839
- elementFromCache = element2;
840
- debug("cache hit, prompt: %s", cachePrompt);
841
- cacheHitFlag = true;
842
- debug(
843
- "found a new new element with same xpath, xpath: %s, id: %s",
844
- xpaths[i],
845
- element2?.id
846
- );
847
- break;
848
- }
849
- }
850
- }
851
- } catch (error) {
852
- debug("get element info by xpath error: ", error);
853
- }
854
- const startTime = Date.now();
855
- const element = elementFromCache || // try to match element from cache
856
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
857
- (await this.insight.locate(param, {
1036
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1037
+ this,
1038
+ xpaths,
1039
+ cachePrompt,
1040
+ param.cacheable
1041
+ );
1042
+ const cacheHitFlag = !!elementFromCache;
1043
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1044
+ const planHitFlag = !!elementFromPlan;
1045
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1046
+ // fallback to ai locate
858
1047
  context: pageContext
859
- })).element;
1048
+ })).element : void 0;
1049
+ const aiLocateHitFlag = !!elementFromAiLocate;
1050
+ const element = elementFromXpath || // highest priority
1051
+ elementFromCache || // second priority
1052
+ elementFromPlan || // third priority
1053
+ elementFromAiLocate;
860
1054
  let currentXpaths;
861
1055
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
862
1056
  const elementXpaths = await this.getElementXpath(
@@ -874,7 +1068,7 @@ var PageTaskExecutor = class {
874
1068
  locateCacheRecord
875
1069
  );
876
1070
  } else {
877
- debug(
1071
+ debug2(
878
1072
  "no xpaths found, will not update cache",
879
1073
  cachePrompt,
880
1074
  elementXpaths
@@ -884,16 +1078,44 @@ var PageTaskExecutor = class {
884
1078
  if (!element) {
885
1079
  throw new Error(`Element not found: ${param.prompt}`);
886
1080
  }
1081
+ let hitBy;
1082
+ if (userExpectedPathHitFlag) {
1083
+ hitBy = {
1084
+ from: "User expected path",
1085
+ context: {
1086
+ xpath: param.xpath
1087
+ }
1088
+ };
1089
+ } else if (cacheHitFlag) {
1090
+ hitBy = {
1091
+ from: "Cache",
1092
+ context: {
1093
+ xpathsFromCache: xpaths,
1094
+ xpathsToSave: currentXpaths
1095
+ }
1096
+ };
1097
+ } else if (planHitFlag) {
1098
+ hitBy = {
1099
+ from: "Planning",
1100
+ context: {
1101
+ id: elementFromPlan?.id,
1102
+ bbox: elementFromPlan?.bbox
1103
+ }
1104
+ };
1105
+ } else if (aiLocateHitFlag) {
1106
+ hitBy = {
1107
+ from: "AI model",
1108
+ context: {
1109
+ prompt: param.prompt
1110
+ }
1111
+ };
1112
+ }
887
1113
  return {
888
1114
  output: {
889
1115
  element
890
1116
  },
891
1117
  pageContext,
892
- cache: {
893
- hit: cacheHitFlag,
894
- originalXpaths: xpaths,
895
- currentXpaths
896
- }
1118
+ hitBy
897
1119
  };
898
1120
  }
899
1121
  };
@@ -989,7 +1211,7 @@ var PageTaskExecutor = class {
989
1211
  thought: plan2.thought,
990
1212
  locate: plan2.locate,
991
1213
  executor: async (param, { element }) => {
992
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1214
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
993
1215
  await this.page.mouse.click(element.center[0], element.center[1]);
994
1216
  }
995
1217
  };
@@ -1001,7 +1223,7 @@ var PageTaskExecutor = class {
1001
1223
  thought: plan2.thought,
1002
1224
  locate: plan2.locate,
1003
1225
  executor: async (param, { element }) => {
1004
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1226
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
1005
1227
  await this.page.mouse.click(
1006
1228
  element.center[0],
1007
1229
  element.center[1],
@@ -1018,7 +1240,7 @@ var PageTaskExecutor = class {
1018
1240
  thought: plan2.thought,
1019
1241
  locate: plan2.locate,
1020
1242
  executor: async (taskParam) => {
1021
- (0, import_utils6.assert)(
1243
+ (0, import_utils8.assert)(
1022
1244
  taskParam?.start_box && taskParam?.end_box,
1023
1245
  "No start_box or end_box to drag"
1024
1246
  );
@@ -1033,7 +1255,7 @@ var PageTaskExecutor = class {
1033
1255
  thought: plan2.thought,
1034
1256
  locate: plan2.locate,
1035
1257
  executor: async (param, { element }) => {
1036
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1258
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1037
1259
  await this.page.mouse.move(element.center[0], element.center[1]);
1038
1260
  }
1039
1261
  };
@@ -1085,7 +1307,7 @@ var PageTaskExecutor = class {
1085
1307
  `Unknown scroll direction: ${taskParam.direction}`
1086
1308
  );
1087
1309
  }
1088
- await (0, import_utils5.sleep)(500);
1310
+ await (0, import_utils7.sleep)(500);
1089
1311
  } else {
1090
1312
  throw new Error(
1091
1313
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1104,7 +1326,7 @@ var PageTaskExecutor = class {
1104
1326
  thought: plan2.thought,
1105
1327
  locate: plan2.locate,
1106
1328
  executor: async (taskParam) => {
1107
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1329
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1108
1330
  }
1109
1331
  };
1110
1332
  tasks.push(taskActionSleep);
@@ -1152,7 +1374,7 @@ var PageTaskExecutor = class {
1152
1374
  thought: plan2.thought,
1153
1375
  locate: plan2.locate,
1154
1376
  executor: async (param) => {
1155
- (0, import_utils6.assert)(
1377
+ (0, import_utils8.assert)(
1156
1378
  isAndroidPage(this.page),
1157
1379
  "Cannot use home button on non-Android devices"
1158
1380
  );
@@ -1168,7 +1390,7 @@ var PageTaskExecutor = class {
1168
1390
  thought: plan2.thought,
1169
1391
  locate: plan2.locate,
1170
1392
  executor: async (param) => {
1171
- (0, import_utils6.assert)(
1393
+ (0, import_utils8.assert)(
1172
1394
  isAndroidPage(this.page),
1173
1395
  "Cannot use back button on non-Android devices"
1174
1396
  );
@@ -1184,7 +1406,7 @@ var PageTaskExecutor = class {
1184
1406
  thought: plan2.thought,
1185
1407
  locate: plan2.locate,
1186
1408
  executor: async (param) => {
1187
- (0, import_utils6.assert)(
1409
+ (0, import_utils8.assert)(
1188
1410
  isAndroidPage(this.page),
1189
1411
  "Cannot use recent apps button on non-Android devices"
1190
1412
  );
@@ -1335,7 +1557,7 @@ var PageTaskExecutor = class {
1335
1557
  }
1336
1558
  }
1337
1559
  if (finalActions.length === 0) {
1338
- (0, import_utils6.assert)(
1560
+ (0, import_utils8.assert)(
1339
1561
  !more_actions_needed_by_instruction || sleep3,
1340
1562
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1341
1563
  );
@@ -1573,7 +1795,7 @@ var PageTaskExecutor = class {
1573
1795
  );
1574
1796
  let outputResult = data;
1575
1797
  if (ifTypeRestricted) {
1576
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1798
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1577
1799
  outputResult = data.result;
1578
1800
  }
1579
1801
  return {
@@ -1669,9 +1891,9 @@ var PageTaskExecutor = class {
1669
1891
  onTaskStart: this.onTaskStartCallback
1670
1892
  });
1671
1893
  const { timeoutMs, checkIntervalMs } = opt;
1672
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1673
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1674
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1894
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1895
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1896
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1675
1897
  const overallStartTime = Date.now();
1676
1898
  let startTime = Date.now();
1677
1899
  let errorThought = "";
@@ -1725,9 +1947,9 @@ var PageTaskExecutor = class {
1725
1947
  };
1726
1948
 
1727
1949
  // src/common/plan-builder.ts
1728
- var import_logger2 = require("@midscene/shared/logger");
1729
- var import_utils8 = require("@midscene/shared/utils");
1730
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1950
+ var import_logger3 = require("@midscene/shared/logger");
1951
+ var import_utils10 = require("@midscene/shared/utils");
1952
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1731
1953
  function buildPlans(type, locateParam, param) {
1732
1954
  let returnPlans = [];
1733
1955
  const locatePlan = locateParam ? {
@@ -1737,8 +1959,8 @@ function buildPlans(type, locateParam, param) {
1737
1959
  thought: ""
1738
1960
  } : null;
1739
1961
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1740
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1741
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1962
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1963
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1742
1964
  const tapPlan = {
1743
1965
  type,
1744
1966
  param: null,
@@ -1749,9 +1971,9 @@ function buildPlans(type, locateParam, param) {
1749
1971
  }
1750
1972
  if (type === "Input" || type === "KeyboardPress") {
1751
1973
  if (type === "Input") {
1752
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1974
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1753
1975
  }
1754
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1976
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1755
1977
  const inputPlan = {
1756
1978
  type,
1757
1979
  param,
@@ -1765,7 +1987,7 @@ function buildPlans(type, locateParam, param) {
1765
1987
  }
1766
1988
  }
1767
1989
  if (type === "Scroll") {
1768
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1990
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1769
1991
  const scrollPlan = {
1770
1992
  type,
1771
1993
  param,
@@ -1779,7 +2001,7 @@ function buildPlans(type, locateParam, param) {
1779
2001
  }
1780
2002
  }
1781
2003
  if (type === "Sleep") {
1782
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
2004
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1783
2005
  const sleepPlan = {
1784
2006
  type,
1785
2007
  param,
@@ -1789,7 +2011,7 @@ function buildPlans(type, locateParam, param) {
1789
2011
  returnPlans = [sleepPlan];
1790
2012
  }
1791
2013
  if (type === "Locate") {
1792
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
2014
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1793
2015
  const locatePlan2 = {
1794
2016
  type,
1795
2017
  param: locateParam,
@@ -1799,187 +2021,12 @@ function buildPlans(type, locateParam, param) {
1799
2021
  returnPlans = [locatePlan2];
1800
2022
  }
1801
2023
  if (returnPlans) {
1802
- debug2("buildPlans", returnPlans);
2024
+ debug3("buildPlans", returnPlans);
1803
2025
  return returnPlans;
1804
2026
  }
1805
2027
  throw new Error(`Not supported type: ${type}`);
1806
2028
  }
1807
2029
 
1808
- // src/common/task-cache.ts
1809
- var import_node_assert = __toESM(require("assert"));
1810
- var import_node_fs2 = require("fs");
1811
- var import_node_path2 = require("path");
1812
- var import_common2 = require("@midscene/shared/common");
1813
- var import_logger3 = require("@midscene/shared/logger");
1814
- var import_utils9 = require("@midscene/shared/utils");
1815
- var import_js_yaml3 = __toESM(require("js-yaml"));
1816
- var import_semver = __toESM(require("semver"));
1817
-
1818
- // package.json
1819
- var version = "0.19.1";
1820
-
1821
- // src/common/task-cache.ts
1822
- var debug3 = (0, import_logger3.getDebug)("cache");
1823
- var lowestSupportedMidsceneVersion = "0.16.10";
1824
- var cacheFileExt = ".cache.yaml";
1825
- var TaskCache = class {
1826
- // Track matched records
1827
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1828
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1829
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1830
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1831
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1832
- this.isCacheResultUsed = isCacheResultUsed;
1833
- let cacheContent;
1834
- if (this.cacheFilePath) {
1835
- cacheContent = this.loadCacheFromFile();
1836
- }
1837
- if (!cacheContent) {
1838
- cacheContent = {
1839
- midsceneVersion: version,
1840
- cacheId: this.cacheId,
1841
- caches: []
1842
- };
1843
- }
1844
- this.cache = cacheContent;
1845
- this.cacheOriginalLength = this.cache.caches.length;
1846
- }
1847
- matchCache(prompt, type) {
1848
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1849
- const item = this.cache.caches[i];
1850
- const key = `${type}:${prompt}:${i}`;
1851
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1852
- this.matchedCacheIndices.add(key);
1853
- debug3(
1854
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1855
- type,
1856
- prompt,
1857
- i
1858
- );
1859
- return {
1860
- cacheContent: item,
1861
- updateFn: (cb) => {
1862
- debug3(
1863
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1864
- type,
1865
- prompt,
1866
- i
1867
- );
1868
- cb(item);
1869
- debug3(
1870
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1871
- type,
1872
- prompt,
1873
- i
1874
- );
1875
- this.flushCacheToFile();
1876
- }
1877
- };
1878
- }
1879
- }
1880
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1881
- return void 0;
1882
- }
1883
- matchPlanCache(prompt) {
1884
- return this.matchCache(prompt, "plan");
1885
- }
1886
- matchLocateCache(prompt) {
1887
- return this.matchCache(prompt, "locate");
1888
- }
1889
- appendCache(cache) {
1890
- debug3("will append cache", cache);
1891
- this.cache.caches.push(cache);
1892
- this.flushCacheToFile();
1893
- }
1894
- loadCacheFromFile() {
1895
- const cacheFile = this.cacheFilePath;
1896
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1897
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1898
- debug3("no cache file found, path: %s", cacheFile);
1899
- return void 0;
1900
- }
1901
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1902
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1903
- console.warn(
1904
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1905
- );
1906
- return void 0;
1907
- }
1908
- try {
1909
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1910
- const jsonData = import_js_yaml3.default.load(data);
1911
- if (!version) {
1912
- debug3("no midscene version info, will not read cache from file");
1913
- return void 0;
1914
- }
1915
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1916
- console.warn(
1917
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1918
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1919
- cache file: ${cacheFile}`
1920
- );
1921
- return void 0;
1922
- }
1923
- debug3(
1924
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1925
- cacheFile,
1926
- jsonData.midsceneVersion,
1927
- jsonData.caches.length
1928
- );
1929
- jsonData.midsceneVersion = version;
1930
- return jsonData;
1931
- } catch (err) {
1932
- debug3(
1933
- "cache file exists but load failed, path: %s, error: %s",
1934
- cacheFile,
1935
- err
1936
- );
1937
- return void 0;
1938
- }
1939
- }
1940
- flushCacheToFile() {
1941
- if (!version) {
1942
- debug3("no midscene version info, will not write cache to file");
1943
- return;
1944
- }
1945
- if (!this.cacheFilePath) {
1946
- debug3("no cache file path, will not write cache to file");
1947
- return;
1948
- }
1949
- try {
1950
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1951
- if (!(0, import_node_fs2.existsSync)(dir)) {
1952
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1953
- debug3("created cache directory: %s", dir);
1954
- }
1955
- const yamlData = import_js_yaml3.default.dump(this.cache);
1956
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1957
- debug3("cache flushed to file: %s", this.cacheFilePath);
1958
- } catch (err) {
1959
- debug3(
1960
- "write cache to file failed, path: %s, error: %s",
1961
- this.cacheFilePath,
1962
- err
1963
- );
1964
- }
1965
- }
1966
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1967
- if (cachedRecord) {
1968
- if (newRecord.type === "plan") {
1969
- cachedRecord.updateFn((cache) => {
1970
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1971
- });
1972
- } else {
1973
- cachedRecord.updateFn((cache) => {
1974
- cache.xpaths = newRecord.xpaths;
1975
- });
1976
- }
1977
- } else {
1978
- this.appendCache(newRecord);
1979
- }
1980
- }
1981
- };
1982
-
1983
2030
  // src/common/agent.ts
1984
2031
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1985
2032
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2108,10 +2155,12 @@ ${errorTask?.errorStack}`);
2108
2155
  const prompt = opt.prompt ?? locatePrompt;
2109
2156
  const deepThink = opt.deepThink ?? false;
2110
2157
  const cacheable = opt.cacheable ?? true;
2158
+ const xpath = opt.xpath;
2111
2159
  return {
2112
2160
  prompt,
2113
2161
  deepThink,
2114
- cacheable
2162
+ cacheable,
2163
+ xpath
2115
2164
  };
2116
2165
  }
2117
2166
  return {
@@ -2269,6 +2318,9 @@ ${errorTask?.errorStack}`);
2269
2318
  this.afterTaskRunning(executor);
2270
2319
  return output;
2271
2320
  }
2321
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2322
+ return this.aiString(prompt, opt);
2323
+ }
2272
2324
  async describeElementAtPoint(center, opt) {
2273
2325
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2274
2326
  let success = false;