@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -192,7 +192,7 @@ var ScriptPlayer = class {
192
192
  domIncluded: numberTask.domIncluded,
193
193
  screenshotIncluded: numberTask.screenshotIncluded
194
194
  };
195
- (0, import_utils.assert)(prompt, "missing prompt for number");
195
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
196
196
  (0, import_utils.assert)(
197
197
  typeof prompt === "string",
198
198
  "prompt for number must be a string"
@@ -206,7 +206,7 @@ var ScriptPlayer = class {
206
206
  domIncluded: stringTask.domIncluded,
207
207
  screenshotIncluded: stringTask.screenshotIncluded
208
208
  };
209
- (0, import_utils.assert)(prompt, "missing prompt for string");
209
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
210
210
  (0, import_utils.assert)(
211
211
  typeof prompt === "string",
212
212
  "prompt for string must be a string"
@@ -220,13 +220,20 @@ var ScriptPlayer = class {
220
220
  domIncluded: booleanTask.domIncluded,
221
221
  screenshotIncluded: booleanTask.screenshotIncluded
222
222
  };
223
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
223
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
224
224
  (0, import_utils.assert)(
225
225
  typeof prompt === "string",
226
226
  "prompt for boolean must be a string"
227
227
  );
228
228
  const booleanResult = await agent.aiBoolean(prompt, options);
229
229
  this.setResult(booleanTask.name, booleanResult);
230
+ } else if ("aiAsk" in flowItem) {
231
+ const askTask = flowItem;
232
+ const prompt = askTask.aiAsk;
233
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
234
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
235
+ const askResult = await agent.aiAsk(prompt);
236
+ this.setResult(askTask.name, askResult);
230
237
  } else if ("aiLocate" in flowItem) {
231
238
  const locateTask = flowItem;
232
239
  const prompt = locateTask.aiLocate;
@@ -235,7 +242,7 @@ var ScriptPlayer = class {
235
242
  typeof prompt === "string",
236
243
  "prompt for aiLocate must be a string"
237
244
  );
238
- const locateResult = await agent.aiLocate(prompt);
245
+ const locateResult = await agent.aiLocate(prompt, locateTask);
239
246
  this.setResult(locateTask.name, locateResult);
240
247
  } else if ("aiWaitFor" in flowItem) {
241
248
  const waitForTask = flowItem;
@@ -444,10 +451,10 @@ var import_utils12 = require("@midscene/shared/utils");
444
451
  // src/common/tasks.ts
445
452
  var import_core = require("@midscene/core");
446
453
  var import_ai_model2 = require("@midscene/core/ai-model");
447
- var import_utils5 = require("@midscene/core/utils");
454
+ var import_utils7 = require("@midscene/core/utils");
448
455
  var import_constants = require("@midscene/shared/constants");
449
- var import_logger = require("@midscene/shared/logger");
450
- var import_utils6 = require("@midscene/shared/utils");
456
+ var import_logger2 = require("@midscene/shared/logger");
457
+ var import_utils8 = require("@midscene/shared/utils");
451
458
 
452
459
  // src/common/ui-utils.ts
453
460
  function typeStr(task) {
@@ -524,11 +531,11 @@ function paramStr(task) {
524
531
 
525
532
  // src/common/utils.ts
526
533
  var import_ai_model = require("@midscene/core/ai-model");
527
- var import_utils3 = require("@midscene/core/utils");
534
+ var import_utils5 = require("@midscene/core/utils");
528
535
  var import_env = require("@midscene/shared/env");
529
536
  var import_extractor = require("@midscene/shared/extractor");
530
537
  var import_img = require("@midscene/shared/img");
531
- var import_utils4 = require("@midscene/shared/utils");
538
+ var import_utils6 = require("@midscene/shared/utils");
532
539
  var import_dayjs = __toESM(require("dayjs"));
533
540
 
534
541
  // src/web-element.ts
@@ -556,14 +563,189 @@ var WebElementInfo = class {
556
563
  }
557
564
  };
558
565
 
566
+ // src/common/task-cache.ts
567
+ var import_node_assert = __toESM(require("assert"));
568
+ var import_node_fs2 = require("fs");
569
+ var import_node_path2 = require("path");
570
+ var import_common2 = require("@midscene/shared/common");
571
+ var import_logger = require("@midscene/shared/logger");
572
+ var import_utils3 = require("@midscene/shared/utils");
573
+ var import_js_yaml3 = __toESM(require("js-yaml"));
574
+ var import_semver = __toESM(require("semver"));
575
+
576
+ // package.json
577
+ var version = "0.20.0";
578
+
579
+ // src/common/task-cache.ts
580
+ var debug = (0, import_logger.getDebug)("cache");
581
+ var lowestSupportedMidsceneVersion = "0.16.10";
582
+ var cacheFileExt = ".cache.yaml";
583
+ var TaskCache = class {
584
+ // Track matched records
585
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
586
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
587
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
588
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
589
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
590
+ this.isCacheResultUsed = isCacheResultUsed;
591
+ let cacheContent;
592
+ if (this.cacheFilePath) {
593
+ cacheContent = this.loadCacheFromFile();
594
+ }
595
+ if (!cacheContent) {
596
+ cacheContent = {
597
+ midsceneVersion: version,
598
+ cacheId: this.cacheId,
599
+ caches: []
600
+ };
601
+ }
602
+ this.cache = cacheContent;
603
+ this.cacheOriginalLength = this.cache.caches.length;
604
+ }
605
+ matchCache(prompt, type) {
606
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
607
+ const item = this.cache.caches[i];
608
+ const key = `${type}:${prompt}:${i}`;
609
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
610
+ this.matchedCacheIndices.add(key);
611
+ debug(
612
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
613
+ type,
614
+ prompt,
615
+ i
616
+ );
617
+ return {
618
+ cacheContent: item,
619
+ updateFn: (cb) => {
620
+ debug(
621
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
622
+ type,
623
+ prompt,
624
+ i
625
+ );
626
+ cb(item);
627
+ debug(
628
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
629
+ type,
630
+ prompt,
631
+ i
632
+ );
633
+ this.flushCacheToFile();
634
+ }
635
+ };
636
+ }
637
+ }
638
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
639
+ return void 0;
640
+ }
641
+ matchPlanCache(prompt) {
642
+ return this.matchCache(prompt, "plan");
643
+ }
644
+ matchLocateCache(prompt) {
645
+ return this.matchCache(prompt, "locate");
646
+ }
647
+ appendCache(cache) {
648
+ debug("will append cache", cache);
649
+ this.cache.caches.push(cache);
650
+ this.flushCacheToFile();
651
+ }
652
+ loadCacheFromFile() {
653
+ const cacheFile = this.cacheFilePath;
654
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
655
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
656
+ debug("no cache file found, path: %s", cacheFile);
657
+ return void 0;
658
+ }
659
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
660
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
661
+ console.warn(
662
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
663
+ );
664
+ return void 0;
665
+ }
666
+ try {
667
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
668
+ const jsonData = import_js_yaml3.default.load(data);
669
+ if (!version) {
670
+ debug("no midscene version info, will not read cache from file");
671
+ return void 0;
672
+ }
673
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
674
+ console.warn(
675
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
676
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
677
+ cache file: ${cacheFile}`
678
+ );
679
+ return void 0;
680
+ }
681
+ debug(
682
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
683
+ cacheFile,
684
+ jsonData.midsceneVersion,
685
+ jsonData.caches.length
686
+ );
687
+ jsonData.midsceneVersion = version;
688
+ return jsonData;
689
+ } catch (err) {
690
+ debug(
691
+ "cache file exists but load failed, path: %s, error: %s",
692
+ cacheFile,
693
+ err
694
+ );
695
+ return void 0;
696
+ }
697
+ }
698
+ flushCacheToFile() {
699
+ if (!version) {
700
+ debug("no midscene version info, will not write cache to file");
701
+ return;
702
+ }
703
+ if (!this.cacheFilePath) {
704
+ debug("no cache file path, will not write cache to file");
705
+ return;
706
+ }
707
+ try {
708
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
709
+ if (!(0, import_node_fs2.existsSync)(dir)) {
710
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
711
+ debug("created cache directory: %s", dir);
712
+ }
713
+ const yamlData = import_js_yaml3.default.dump(this.cache);
714
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
715
+ debug("cache flushed to file: %s", this.cacheFilePath);
716
+ } catch (err) {
717
+ debug(
718
+ "write cache to file failed, path: %s, error: %s",
719
+ this.cacheFilePath,
720
+ err
721
+ );
722
+ }
723
+ }
724
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
725
+ if (cachedRecord) {
726
+ if (newRecord.type === "plan") {
727
+ cachedRecord.updateFn((cache) => {
728
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
729
+ });
730
+ } else {
731
+ cachedRecord.updateFn((cache) => {
732
+ cache.xpaths = newRecord.xpaths;
733
+ });
734
+ }
735
+ } else {
736
+ this.appendCache(newRecord);
737
+ }
738
+ }
739
+ };
740
+
559
741
  // src/common/utils.ts
560
742
  async function parseContextFromWebPage(page, _opt) {
561
- (0, import_utils4.assert)(page, "page is required");
743
+ (0, import_utils6.assert)(page, "page is required");
562
744
  if (page._forceUsePageContext) {
563
745
  return await page._forceUsePageContext();
564
746
  }
565
747
  const url = await page.url();
566
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
748
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
567
749
  let screenshotBase64;
568
750
  let tree;
569
751
  await Promise.all([
@@ -585,7 +767,7 @@ async function parseContextFromWebPage(page, _opt) {
585
767
  isVisible
586
768
  });
587
769
  });
588
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
770
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
589
771
  const size = await page.size();
590
772
  if (size.dpr && size.dpr > 1) {
591
773
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -603,11 +785,11 @@ async function parseContextFromWebPage(page, _opt) {
603
785
  function reportFileName(tag = "web") {
604
786
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
605
787
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
606
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
788
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
607
789
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
608
790
  }
609
791
  function printReportMsg(filepath) {
610
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
792
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
611
793
  }
612
794
  function replaceIllegalPathCharsAndSpace(str) {
613
795
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -660,6 +842,28 @@ function matchElementFromPlan(planLocateParam, tree) {
660
842
  }
661
843
  return void 0;
662
844
  }
845
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
846
+ try {
847
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
848
+ for (let i = 0; i < xpaths.length; i++) {
849
+ const element = await taskExecutor.page.getElementInfoByXpath(
850
+ xpaths[i]
851
+ );
852
+ if (element?.id) {
853
+ debug("cache hit, prompt: %s", cachePrompt);
854
+ debug(
855
+ "found a new new element with same xpath, xpath: %s, id: %s",
856
+ xpaths[i],
857
+ element?.id
858
+ );
859
+ return element;
860
+ }
861
+ }
862
+ }
863
+ } catch (error) {
864
+ debug("get element info by xpath error: ", error);
865
+ }
866
+ }
663
867
  function trimContextByViewport(execution) {
664
868
  function filterVisibleTree(node) {
665
869
  if (!node)
@@ -698,7 +902,7 @@ function trimContextByViewport(execution) {
698
902
  }
699
903
 
700
904
  // src/common/tasks.ts
701
- var debug = (0, import_logger.getDebug)("page-task-executor");
905
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
702
906
  var replanningCountLimit = 10;
703
907
  var isAndroidPage = (page) => {
704
908
  return page.pageType === "android";
@@ -739,7 +943,7 @@ var PageTaskExecutor = class {
739
943
  if (info?.id) {
740
944
  elementId = info.id;
741
945
  } else {
742
- debug(
946
+ debug2(
743
947
  "no element id found for position node, will not update cache",
744
948
  element
745
949
  );
@@ -752,7 +956,7 @@ var PageTaskExecutor = class {
752
956
  const result = await this.page.getXpathsById(elementId);
753
957
  return result;
754
958
  } catch (error) {
755
- debug("getXpathsById error: ", error);
959
+ debug2("getXpathsById error: ", error);
756
960
  }
757
961
  }
758
962
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -768,7 +972,7 @@ var PageTaskExecutor = class {
768
972
  if (taskApply.type === "Action") {
769
973
  await Promise.all([
770
974
  (async () => {
771
- await (0, import_utils5.sleep)(100);
975
+ await (0, import_utils7.sleep)(100);
772
976
  if (this.page.waitUntilNetworkIdle) {
773
977
  try {
774
978
  await this.page.waitUntilNetworkIdle();
@@ -776,7 +980,7 @@ var PageTaskExecutor = class {
776
980
  }
777
981
  }
778
982
  })(),
779
- (0, import_utils5.sleep)(200)
983
+ (0, import_utils7.sleep)(200)
780
984
  ]);
781
985
  }
782
986
  if (appendAfterExecution) {
@@ -806,7 +1010,7 @@ var PageTaskExecutor = class {
806
1010
  locate: plan2.locate,
807
1011
  executor: async (param, taskContext) => {
808
1012
  const { task } = taskContext;
809
- (0, import_utils6.assert)(
1013
+ (0, import_utils8.assert)(
810
1014
  param?.prompt || param?.id || param?.bbox,
811
1015
  "No prompt or id or position or bbox to locate"
812
1016
  );
@@ -831,39 +1035,29 @@ var PageTaskExecutor = class {
831
1035
  timing: "before Insight"
832
1036
  };
833
1037
  task.recorder = [recordItem];
834
- let cacheHitFlag = false;
1038
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1039
+ const userExpectedPathHitFlag = !!elementFromXpath;
835
1040
  const cachePrompt = param.prompt;
836
1041
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
837
1042
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
838
- let elementFromCache = null;
839
- try {
840
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
841
- for (let i = 0; i < xpaths.length; i++) {
842
- const element2 = await this.page.getElementInfoByXpath(
843
- xpaths[i]
844
- );
845
- if (element2?.id) {
846
- elementFromCache = element2;
847
- debug("cache hit, prompt: %s", cachePrompt);
848
- cacheHitFlag = true;
849
- debug(
850
- "found a new new element with same xpath, xpath: %s, id: %s",
851
- xpaths[i],
852
- element2?.id
853
- );
854
- break;
855
- }
856
- }
857
- }
858
- } catch (error) {
859
- debug("get element info by xpath error: ", error);
860
- }
861
- const startTime = Date.now();
862
- const element = elementFromCache || // try to match element from cache
863
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
864
- (await this.insight.locate(param, {
1043
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1044
+ this,
1045
+ xpaths,
1046
+ cachePrompt,
1047
+ param.cacheable
1048
+ );
1049
+ const cacheHitFlag = !!elementFromCache;
1050
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1051
+ const planHitFlag = !!elementFromPlan;
1052
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1053
+ // fallback to ai locate
865
1054
  context: pageContext
866
- })).element;
1055
+ })).element : void 0;
1056
+ const aiLocateHitFlag = !!elementFromAiLocate;
1057
+ const element = elementFromXpath || // highest priority
1058
+ elementFromCache || // second priority
1059
+ elementFromPlan || // third priority
1060
+ elementFromAiLocate;
867
1061
  let currentXpaths;
868
1062
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
869
1063
  const elementXpaths = await this.getElementXpath(
@@ -881,7 +1075,7 @@ var PageTaskExecutor = class {
881
1075
  locateCacheRecord
882
1076
  );
883
1077
  } else {
884
- debug(
1078
+ debug2(
885
1079
  "no xpaths found, will not update cache",
886
1080
  cachePrompt,
887
1081
  elementXpaths
@@ -891,16 +1085,44 @@ var PageTaskExecutor = class {
891
1085
  if (!element) {
892
1086
  throw new Error(`Element not found: ${param.prompt}`);
893
1087
  }
1088
+ let hitBy;
1089
+ if (userExpectedPathHitFlag) {
1090
+ hitBy = {
1091
+ from: "User expected path",
1092
+ context: {
1093
+ xpath: param.xpath
1094
+ }
1095
+ };
1096
+ } else if (cacheHitFlag) {
1097
+ hitBy = {
1098
+ from: "Cache",
1099
+ context: {
1100
+ xpathsFromCache: xpaths,
1101
+ xpathsToSave: currentXpaths
1102
+ }
1103
+ };
1104
+ } else if (planHitFlag) {
1105
+ hitBy = {
1106
+ from: "Planning",
1107
+ context: {
1108
+ id: elementFromPlan?.id,
1109
+ bbox: elementFromPlan?.bbox
1110
+ }
1111
+ };
1112
+ } else if (aiLocateHitFlag) {
1113
+ hitBy = {
1114
+ from: "AI model",
1115
+ context: {
1116
+ prompt: param.prompt
1117
+ }
1118
+ };
1119
+ }
894
1120
  return {
895
1121
  output: {
896
1122
  element
897
1123
  },
898
1124
  pageContext,
899
- cache: {
900
- hit: cacheHitFlag,
901
- originalXpaths: xpaths,
902
- currentXpaths
903
- }
1125
+ hitBy
904
1126
  };
905
1127
  }
906
1128
  };
@@ -996,7 +1218,7 @@ var PageTaskExecutor = class {
996
1218
  thought: plan2.thought,
997
1219
  locate: plan2.locate,
998
1220
  executor: async (param, { element }) => {
999
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1221
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
1000
1222
  await this.page.mouse.click(element.center[0], element.center[1]);
1001
1223
  }
1002
1224
  };
@@ -1008,7 +1230,7 @@ var PageTaskExecutor = class {
1008
1230
  thought: plan2.thought,
1009
1231
  locate: plan2.locate,
1010
1232
  executor: async (param, { element }) => {
1011
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1233
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
1012
1234
  await this.page.mouse.click(
1013
1235
  element.center[0],
1014
1236
  element.center[1],
@@ -1025,7 +1247,7 @@ var PageTaskExecutor = class {
1025
1247
  thought: plan2.thought,
1026
1248
  locate: plan2.locate,
1027
1249
  executor: async (taskParam) => {
1028
- (0, import_utils6.assert)(
1250
+ (0, import_utils8.assert)(
1029
1251
  taskParam?.start_box && taskParam?.end_box,
1030
1252
  "No start_box or end_box to drag"
1031
1253
  );
@@ -1040,7 +1262,7 @@ var PageTaskExecutor = class {
1040
1262
  thought: plan2.thought,
1041
1263
  locate: plan2.locate,
1042
1264
  executor: async (param, { element }) => {
1043
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1265
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1044
1266
  await this.page.mouse.move(element.center[0], element.center[1]);
1045
1267
  }
1046
1268
  };
@@ -1092,7 +1314,7 @@ var PageTaskExecutor = class {
1092
1314
  `Unknown scroll direction: ${taskParam.direction}`
1093
1315
  );
1094
1316
  }
1095
- await (0, import_utils5.sleep)(500);
1317
+ await (0, import_utils7.sleep)(500);
1096
1318
  } else {
1097
1319
  throw new Error(
1098
1320
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1111,7 +1333,7 @@ var PageTaskExecutor = class {
1111
1333
  thought: plan2.thought,
1112
1334
  locate: plan2.locate,
1113
1335
  executor: async (taskParam) => {
1114
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1336
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1115
1337
  }
1116
1338
  };
1117
1339
  tasks.push(taskActionSleep);
@@ -1159,7 +1381,7 @@ var PageTaskExecutor = class {
1159
1381
  thought: plan2.thought,
1160
1382
  locate: plan2.locate,
1161
1383
  executor: async (param) => {
1162
- (0, import_utils6.assert)(
1384
+ (0, import_utils8.assert)(
1163
1385
  isAndroidPage(this.page),
1164
1386
  "Cannot use home button on non-Android devices"
1165
1387
  );
@@ -1175,7 +1397,7 @@ var PageTaskExecutor = class {
1175
1397
  thought: plan2.thought,
1176
1398
  locate: plan2.locate,
1177
1399
  executor: async (param) => {
1178
- (0, import_utils6.assert)(
1400
+ (0, import_utils8.assert)(
1179
1401
  isAndroidPage(this.page),
1180
1402
  "Cannot use back button on non-Android devices"
1181
1403
  );
@@ -1191,7 +1413,7 @@ var PageTaskExecutor = class {
1191
1413
  thought: plan2.thought,
1192
1414
  locate: plan2.locate,
1193
1415
  executor: async (param) => {
1194
- (0, import_utils6.assert)(
1416
+ (0, import_utils8.assert)(
1195
1417
  isAndroidPage(this.page),
1196
1418
  "Cannot use recent apps button on non-Android devices"
1197
1419
  );
@@ -1342,7 +1564,7 @@ var PageTaskExecutor = class {
1342
1564
  }
1343
1565
  }
1344
1566
  if (finalActions.length === 0) {
1345
- (0, import_utils6.assert)(
1567
+ (0, import_utils8.assert)(
1346
1568
  !more_actions_needed_by_instruction || sleep3,
1347
1569
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1348
1570
  );
@@ -1580,7 +1802,7 @@ var PageTaskExecutor = class {
1580
1802
  );
1581
1803
  let outputResult = data;
1582
1804
  if (ifTypeRestricted) {
1583
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1805
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1584
1806
  outputResult = data.result;
1585
1807
  }
1586
1808
  return {
@@ -1676,9 +1898,9 @@ var PageTaskExecutor = class {
1676
1898
  onTaskStart: this.onTaskStartCallback
1677
1899
  });
1678
1900
  const { timeoutMs, checkIntervalMs } = opt;
1679
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1680
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1681
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1901
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1902
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1903
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1682
1904
  const overallStartTime = Date.now();
1683
1905
  let startTime = Date.now();
1684
1906
  let errorThought = "";
@@ -1732,9 +1954,9 @@ var PageTaskExecutor = class {
1732
1954
  };
1733
1955
 
1734
1956
  // src/common/plan-builder.ts
1735
- var import_logger2 = require("@midscene/shared/logger");
1736
- var import_utils8 = require("@midscene/shared/utils");
1737
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1957
+ var import_logger3 = require("@midscene/shared/logger");
1958
+ var import_utils10 = require("@midscene/shared/utils");
1959
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1738
1960
  function buildPlans(type, locateParam, param) {
1739
1961
  let returnPlans = [];
1740
1962
  const locatePlan = locateParam ? {
@@ -1744,8 +1966,8 @@ function buildPlans(type, locateParam, param) {
1744
1966
  thought: ""
1745
1967
  } : null;
1746
1968
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1747
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1748
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1969
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1970
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1749
1971
  const tapPlan = {
1750
1972
  type,
1751
1973
  param: null,
@@ -1756,9 +1978,9 @@ function buildPlans(type, locateParam, param) {
1756
1978
  }
1757
1979
  if (type === "Input" || type === "KeyboardPress") {
1758
1980
  if (type === "Input") {
1759
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1981
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1760
1982
  }
1761
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1983
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1762
1984
  const inputPlan = {
1763
1985
  type,
1764
1986
  param,
@@ -1772,7 +1994,7 @@ function buildPlans(type, locateParam, param) {
1772
1994
  }
1773
1995
  }
1774
1996
  if (type === "Scroll") {
1775
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1997
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1776
1998
  const scrollPlan = {
1777
1999
  type,
1778
2000
  param,
@@ -1786,7 +2008,7 @@ function buildPlans(type, locateParam, param) {
1786
2008
  }
1787
2009
  }
1788
2010
  if (type === "Sleep") {
1789
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
2011
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1790
2012
  const sleepPlan = {
1791
2013
  type,
1792
2014
  param,
@@ -1796,7 +2018,7 @@ function buildPlans(type, locateParam, param) {
1796
2018
  returnPlans = [sleepPlan];
1797
2019
  }
1798
2020
  if (type === "Locate") {
1799
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
2021
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1800
2022
  const locatePlan2 = {
1801
2023
  type,
1802
2024
  param: locateParam,
@@ -1806,187 +2028,12 @@ function buildPlans(type, locateParam, param) {
1806
2028
  returnPlans = [locatePlan2];
1807
2029
  }
1808
2030
  if (returnPlans) {
1809
- debug2("buildPlans", returnPlans);
2031
+ debug3("buildPlans", returnPlans);
1810
2032
  return returnPlans;
1811
2033
  }
1812
2034
  throw new Error(`Not supported type: ${type}`);
1813
2035
  }
1814
2036
 
1815
- // src/common/task-cache.ts
1816
- var import_node_assert = __toESM(require("assert"));
1817
- var import_node_fs2 = require("fs");
1818
- var import_node_path2 = require("path");
1819
- var import_common2 = require("@midscene/shared/common");
1820
- var import_logger3 = require("@midscene/shared/logger");
1821
- var import_utils9 = require("@midscene/shared/utils");
1822
- var import_js_yaml3 = __toESM(require("js-yaml"));
1823
- var import_semver = __toESM(require("semver"));
1824
-
1825
- // package.json
1826
- var version = "0.19.1";
1827
-
1828
- // src/common/task-cache.ts
1829
- var debug3 = (0, import_logger3.getDebug)("cache");
1830
- var lowestSupportedMidsceneVersion = "0.16.10";
1831
- var cacheFileExt = ".cache.yaml";
1832
- var TaskCache = class {
1833
- // Track matched records
1834
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1835
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1836
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1837
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1838
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1839
- this.isCacheResultUsed = isCacheResultUsed;
1840
- let cacheContent;
1841
- if (this.cacheFilePath) {
1842
- cacheContent = this.loadCacheFromFile();
1843
- }
1844
- if (!cacheContent) {
1845
- cacheContent = {
1846
- midsceneVersion: version,
1847
- cacheId: this.cacheId,
1848
- caches: []
1849
- };
1850
- }
1851
- this.cache = cacheContent;
1852
- this.cacheOriginalLength = this.cache.caches.length;
1853
- }
1854
- matchCache(prompt, type) {
1855
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1856
- const item = this.cache.caches[i];
1857
- const key = `${type}:${prompt}:${i}`;
1858
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1859
- this.matchedCacheIndices.add(key);
1860
- debug3(
1861
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1862
- type,
1863
- prompt,
1864
- i
1865
- );
1866
- return {
1867
- cacheContent: item,
1868
- updateFn: (cb) => {
1869
- debug3(
1870
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1871
- type,
1872
- prompt,
1873
- i
1874
- );
1875
- cb(item);
1876
- debug3(
1877
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1878
- type,
1879
- prompt,
1880
- i
1881
- );
1882
- this.flushCacheToFile();
1883
- }
1884
- };
1885
- }
1886
- }
1887
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1888
- return void 0;
1889
- }
1890
- matchPlanCache(prompt) {
1891
- return this.matchCache(prompt, "plan");
1892
- }
1893
- matchLocateCache(prompt) {
1894
- return this.matchCache(prompt, "locate");
1895
- }
1896
- appendCache(cache) {
1897
- debug3("will append cache", cache);
1898
- this.cache.caches.push(cache);
1899
- this.flushCacheToFile();
1900
- }
1901
- loadCacheFromFile() {
1902
- const cacheFile = this.cacheFilePath;
1903
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1904
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1905
- debug3("no cache file found, path: %s", cacheFile);
1906
- return void 0;
1907
- }
1908
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1909
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1910
- console.warn(
1911
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1912
- );
1913
- return void 0;
1914
- }
1915
- try {
1916
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1917
- const jsonData = import_js_yaml3.default.load(data);
1918
- if (!version) {
1919
- debug3("no midscene version info, will not read cache from file");
1920
- return void 0;
1921
- }
1922
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1923
- console.warn(
1924
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1925
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1926
- cache file: ${cacheFile}`
1927
- );
1928
- return void 0;
1929
- }
1930
- debug3(
1931
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1932
- cacheFile,
1933
- jsonData.midsceneVersion,
1934
- jsonData.caches.length
1935
- );
1936
- jsonData.midsceneVersion = version;
1937
- return jsonData;
1938
- } catch (err) {
1939
- debug3(
1940
- "cache file exists but load failed, path: %s, error: %s",
1941
- cacheFile,
1942
- err
1943
- );
1944
- return void 0;
1945
- }
1946
- }
1947
- flushCacheToFile() {
1948
- if (!version) {
1949
- debug3("no midscene version info, will not write cache to file");
1950
- return;
1951
- }
1952
- if (!this.cacheFilePath) {
1953
- debug3("no cache file path, will not write cache to file");
1954
- return;
1955
- }
1956
- try {
1957
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1958
- if (!(0, import_node_fs2.existsSync)(dir)) {
1959
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1960
- debug3("created cache directory: %s", dir);
1961
- }
1962
- const yamlData = import_js_yaml3.default.dump(this.cache);
1963
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1964
- debug3("cache flushed to file: %s", this.cacheFilePath);
1965
- } catch (err) {
1966
- debug3(
1967
- "write cache to file failed, path: %s, error: %s",
1968
- this.cacheFilePath,
1969
- err
1970
- );
1971
- }
1972
- }
1973
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1974
- if (cachedRecord) {
1975
- if (newRecord.type === "plan") {
1976
- cachedRecord.updateFn((cache) => {
1977
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1978
- });
1979
- } else {
1980
- cachedRecord.updateFn((cache) => {
1981
- cache.xpaths = newRecord.xpaths;
1982
- });
1983
- }
1984
- } else {
1985
- this.appendCache(newRecord);
1986
- }
1987
- }
1988
- };
1989
-
1990
2037
  // src/common/agent.ts
1991
2038
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1992
2039
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2115,10 +2162,12 @@ ${errorTask?.errorStack}`);
2115
2162
  const prompt = opt.prompt ?? locatePrompt;
2116
2163
  const deepThink = opt.deepThink ?? false;
2117
2164
  const cacheable = opt.cacheable ?? true;
2165
+ const xpath = opt.xpath;
2118
2166
  return {
2119
2167
  prompt,
2120
2168
  deepThink,
2121
- cacheable
2169
+ cacheable,
2170
+ xpath
2122
2171
  };
2123
2172
  }
2124
2173
  return {
@@ -2276,6 +2325,9 @@ ${errorTask?.errorStack}`);
2276
2325
  this.afterTaskRunning(executor);
2277
2326
  return output;
2278
2327
  }
2328
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2329
+ return this.aiString(prompt, opt);
2330
+ }
2279
2331
  async describeElementAtPoint(center, opt) {
2280
2332
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2281
2333
  let success = false;