@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
package/dist/lib/agent.js CHANGED
@@ -181,7 +181,7 @@ var ScriptPlayer = class {
181
181
  domIncluded: numberTask.domIncluded,
182
182
  screenshotIncluded: numberTask.screenshotIncluded
183
183
  };
184
- (0, import_utils.assert)(prompt, "missing prompt for number");
184
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
185
185
  (0, import_utils.assert)(
186
186
  typeof prompt === "string",
187
187
  "prompt for number must be a string"
@@ -195,7 +195,7 @@ var ScriptPlayer = class {
195
195
  domIncluded: stringTask.domIncluded,
196
196
  screenshotIncluded: stringTask.screenshotIncluded
197
197
  };
198
- (0, import_utils.assert)(prompt, "missing prompt for string");
198
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
199
199
  (0, import_utils.assert)(
200
200
  typeof prompt === "string",
201
201
  "prompt for string must be a string"
@@ -209,13 +209,20 @@ var ScriptPlayer = class {
209
209
  domIncluded: booleanTask.domIncluded,
210
210
  screenshotIncluded: booleanTask.screenshotIncluded
211
211
  };
212
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
212
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
213
213
  (0, import_utils.assert)(
214
214
  typeof prompt === "string",
215
215
  "prompt for boolean must be a string"
216
216
  );
217
217
  const booleanResult = await agent.aiBoolean(prompt, options);
218
218
  this.setResult(booleanTask.name, booleanResult);
219
+ } else if ("aiAsk" in flowItem) {
220
+ const askTask = flowItem;
221
+ const prompt = askTask.aiAsk;
222
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
223
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
224
+ const askResult = await agent.aiAsk(prompt);
225
+ this.setResult(askTask.name, askResult);
219
226
  } else if ("aiLocate" in flowItem) {
220
227
  const locateTask = flowItem;
221
228
  const prompt = locateTask.aiLocate;
@@ -224,7 +231,7 @@ var ScriptPlayer = class {
224
231
  typeof prompt === "string",
225
232
  "prompt for aiLocate must be a string"
226
233
  );
227
- const locateResult = await agent.aiLocate(prompt);
234
+ const locateResult = await agent.aiLocate(prompt, locateTask);
228
235
  this.setResult(locateTask.name, locateResult);
229
236
  } else if ("aiWaitFor" in flowItem) {
230
237
  const waitForTask = flowItem;
@@ -433,10 +440,10 @@ var import_utils12 = require("@midscene/shared/utils");
433
440
  // src/common/tasks.ts
434
441
  var import_core = require("@midscene/core");
435
442
  var import_ai_model2 = require("@midscene/core/ai-model");
436
- var import_utils5 = require("@midscene/core/utils");
443
+ var import_utils7 = require("@midscene/core/utils");
437
444
  var import_constants = require("@midscene/shared/constants");
438
- var import_logger = require("@midscene/shared/logger");
439
- var import_utils6 = require("@midscene/shared/utils");
445
+ var import_logger2 = require("@midscene/shared/logger");
446
+ var import_utils8 = require("@midscene/shared/utils");
440
447
 
441
448
  // src/common/ui-utils.ts
442
449
  function typeStr(task) {
@@ -513,11 +520,11 @@ function paramStr(task) {
513
520
 
514
521
  // src/common/utils.ts
515
522
  var import_ai_model = require("@midscene/core/ai-model");
516
- var import_utils3 = require("@midscene/core/utils");
523
+ var import_utils5 = require("@midscene/core/utils");
517
524
  var import_env = require("@midscene/shared/env");
518
525
  var import_extractor = require("@midscene/shared/extractor");
519
526
  var import_img = require("@midscene/shared/img");
520
- var import_utils4 = require("@midscene/shared/utils");
527
+ var import_utils6 = require("@midscene/shared/utils");
521
528
  var import_dayjs = __toESM(require("dayjs"));
522
529
 
523
530
  // src/web-element.ts
@@ -545,14 +552,189 @@ var WebElementInfo = class {
545
552
  }
546
553
  };
547
554
 
555
+ // src/common/task-cache.ts
556
+ var import_node_assert = __toESM(require("assert"));
557
+ var import_node_fs2 = require("fs");
558
+ var import_node_path2 = require("path");
559
+ var import_common2 = require("@midscene/shared/common");
560
+ var import_logger = require("@midscene/shared/logger");
561
+ var import_utils3 = require("@midscene/shared/utils");
562
+ var import_js_yaml3 = __toESM(require("js-yaml"));
563
+ var import_semver = __toESM(require("semver"));
564
+
565
+ // package.json
566
+ var version = "0.20.0";
567
+
568
+ // src/common/task-cache.ts
569
+ var debug = (0, import_logger.getDebug)("cache");
570
+ var lowestSupportedMidsceneVersion = "0.16.10";
571
+ var cacheFileExt = ".cache.yaml";
572
+ var TaskCache = class {
573
+ // Track matched records
574
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
575
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
576
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
577
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
578
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
579
+ this.isCacheResultUsed = isCacheResultUsed;
580
+ let cacheContent;
581
+ if (this.cacheFilePath) {
582
+ cacheContent = this.loadCacheFromFile();
583
+ }
584
+ if (!cacheContent) {
585
+ cacheContent = {
586
+ midsceneVersion: version,
587
+ cacheId: this.cacheId,
588
+ caches: []
589
+ };
590
+ }
591
+ this.cache = cacheContent;
592
+ this.cacheOriginalLength = this.cache.caches.length;
593
+ }
594
+ matchCache(prompt, type) {
595
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
596
+ const item = this.cache.caches[i];
597
+ const key = `${type}:${prompt}:${i}`;
598
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
599
+ this.matchedCacheIndices.add(key);
600
+ debug(
601
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
602
+ type,
603
+ prompt,
604
+ i
605
+ );
606
+ return {
607
+ cacheContent: item,
608
+ updateFn: (cb) => {
609
+ debug(
610
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
611
+ type,
612
+ prompt,
613
+ i
614
+ );
615
+ cb(item);
616
+ debug(
617
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
618
+ type,
619
+ prompt,
620
+ i
621
+ );
622
+ this.flushCacheToFile();
623
+ }
624
+ };
625
+ }
626
+ }
627
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
628
+ return void 0;
629
+ }
630
+ matchPlanCache(prompt) {
631
+ return this.matchCache(prompt, "plan");
632
+ }
633
+ matchLocateCache(prompt) {
634
+ return this.matchCache(prompt, "locate");
635
+ }
636
+ appendCache(cache) {
637
+ debug("will append cache", cache);
638
+ this.cache.caches.push(cache);
639
+ this.flushCacheToFile();
640
+ }
641
+ loadCacheFromFile() {
642
+ const cacheFile = this.cacheFilePath;
643
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
644
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
645
+ debug("no cache file found, path: %s", cacheFile);
646
+ return void 0;
647
+ }
648
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
649
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
650
+ console.warn(
651
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
652
+ );
653
+ return void 0;
654
+ }
655
+ try {
656
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
657
+ const jsonData = import_js_yaml3.default.load(data);
658
+ if (!version) {
659
+ debug("no midscene version info, will not read cache from file");
660
+ return void 0;
661
+ }
662
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
663
+ console.warn(
664
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
665
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
666
+ cache file: ${cacheFile}`
667
+ );
668
+ return void 0;
669
+ }
670
+ debug(
671
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
672
+ cacheFile,
673
+ jsonData.midsceneVersion,
674
+ jsonData.caches.length
675
+ );
676
+ jsonData.midsceneVersion = version;
677
+ return jsonData;
678
+ } catch (err) {
679
+ debug(
680
+ "cache file exists but load failed, path: %s, error: %s",
681
+ cacheFile,
682
+ err
683
+ );
684
+ return void 0;
685
+ }
686
+ }
687
+ flushCacheToFile() {
688
+ if (!version) {
689
+ debug("no midscene version info, will not write cache to file");
690
+ return;
691
+ }
692
+ if (!this.cacheFilePath) {
693
+ debug("no cache file path, will not write cache to file");
694
+ return;
695
+ }
696
+ try {
697
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
698
+ if (!(0, import_node_fs2.existsSync)(dir)) {
699
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
700
+ debug("created cache directory: %s", dir);
701
+ }
702
+ const yamlData = import_js_yaml3.default.dump(this.cache);
703
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
704
+ debug("cache flushed to file: %s", this.cacheFilePath);
705
+ } catch (err) {
706
+ debug(
707
+ "write cache to file failed, path: %s, error: %s",
708
+ this.cacheFilePath,
709
+ err
710
+ );
711
+ }
712
+ }
713
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
714
+ if (cachedRecord) {
715
+ if (newRecord.type === "plan") {
716
+ cachedRecord.updateFn((cache) => {
717
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
718
+ });
719
+ } else {
720
+ cachedRecord.updateFn((cache) => {
721
+ cache.xpaths = newRecord.xpaths;
722
+ });
723
+ }
724
+ } else {
725
+ this.appendCache(newRecord);
726
+ }
727
+ }
728
+ };
729
+
548
730
  // src/common/utils.ts
549
731
  async function parseContextFromWebPage(page, _opt) {
550
- (0, import_utils4.assert)(page, "page is required");
732
+ (0, import_utils6.assert)(page, "page is required");
551
733
  if (page._forceUsePageContext) {
552
734
  return await page._forceUsePageContext();
553
735
  }
554
736
  const url = await page.url();
555
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
737
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
556
738
  let screenshotBase64;
557
739
  let tree;
558
740
  await Promise.all([
@@ -574,7 +756,7 @@ async function parseContextFromWebPage(page, _opt) {
574
756
  isVisible
575
757
  });
576
758
  });
577
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
759
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
578
760
  const size = await page.size();
579
761
  if (size.dpr && size.dpr > 1) {
580
762
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -592,11 +774,11 @@ async function parseContextFromWebPage(page, _opt) {
592
774
  function reportFileName(tag = "web") {
593
775
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
594
776
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
595
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
777
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
596
778
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
597
779
  }
598
780
  function printReportMsg(filepath) {
599
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
781
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
600
782
  }
601
783
  function replaceIllegalPathCharsAndSpace(str) {
602
784
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -621,6 +803,28 @@ function matchElementFromPlan(planLocateParam, tree) {
621
803
  }
622
804
  return void 0;
623
805
  }
806
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
807
+ try {
808
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
809
+ for (let i = 0; i < xpaths.length; i++) {
810
+ const element = await taskExecutor.page.getElementInfoByXpath(
811
+ xpaths[i]
812
+ );
813
+ if (element?.id) {
814
+ debug("cache hit, prompt: %s", cachePrompt);
815
+ debug(
816
+ "found a new new element with same xpath, xpath: %s, id: %s",
817
+ xpaths[i],
818
+ element?.id
819
+ );
820
+ return element;
821
+ }
822
+ }
823
+ }
824
+ } catch (error) {
825
+ debug("get element info by xpath error: ", error);
826
+ }
827
+ }
624
828
  function trimContextByViewport(execution) {
625
829
  function filterVisibleTree(node) {
626
830
  if (!node)
@@ -659,7 +863,7 @@ function trimContextByViewport(execution) {
659
863
  }
660
864
 
661
865
  // src/common/tasks.ts
662
- var debug = (0, import_logger.getDebug)("page-task-executor");
866
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
663
867
  var replanningCountLimit = 10;
664
868
  var isAndroidPage = (page) => {
665
869
  return page.pageType === "android";
@@ -700,7 +904,7 @@ var PageTaskExecutor = class {
700
904
  if (info?.id) {
701
905
  elementId = info.id;
702
906
  } else {
703
- debug(
907
+ debug2(
704
908
  "no element id found for position node, will not update cache",
705
909
  element
706
910
  );
@@ -713,7 +917,7 @@ var PageTaskExecutor = class {
713
917
  const result = await this.page.getXpathsById(elementId);
714
918
  return result;
715
919
  } catch (error) {
716
- debug("getXpathsById error: ", error);
920
+ debug2("getXpathsById error: ", error);
717
921
  }
718
922
  }
719
923
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -729,7 +933,7 @@ var PageTaskExecutor = class {
729
933
  if (taskApply.type === "Action") {
730
934
  await Promise.all([
731
935
  (async () => {
732
- await (0, import_utils5.sleep)(100);
936
+ await (0, import_utils7.sleep)(100);
733
937
  if (this.page.waitUntilNetworkIdle) {
734
938
  try {
735
939
  await this.page.waitUntilNetworkIdle();
@@ -737,7 +941,7 @@ var PageTaskExecutor = class {
737
941
  }
738
942
  }
739
943
  })(),
740
- (0, import_utils5.sleep)(200)
944
+ (0, import_utils7.sleep)(200)
741
945
  ]);
742
946
  }
743
947
  if (appendAfterExecution) {
@@ -767,7 +971,7 @@ var PageTaskExecutor = class {
767
971
  locate: plan2.locate,
768
972
  executor: async (param, taskContext) => {
769
973
  const { task } = taskContext;
770
- (0, import_utils6.assert)(
974
+ (0, import_utils8.assert)(
771
975
  param?.prompt || param?.id || param?.bbox,
772
976
  "No prompt or id or position or bbox to locate"
773
977
  );
@@ -792,39 +996,29 @@ var PageTaskExecutor = class {
792
996
  timing: "before Insight"
793
997
  };
794
998
  task.recorder = [recordItem];
795
- let cacheHitFlag = false;
999
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1000
+ const userExpectedPathHitFlag = !!elementFromXpath;
796
1001
  const cachePrompt = param.prompt;
797
1002
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
798
1003
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
799
- let elementFromCache = null;
800
- try {
801
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
802
- for (let i = 0; i < xpaths.length; i++) {
803
- const element2 = await this.page.getElementInfoByXpath(
804
- xpaths[i]
805
- );
806
- if (element2?.id) {
807
- elementFromCache = element2;
808
- debug("cache hit, prompt: %s", cachePrompt);
809
- cacheHitFlag = true;
810
- debug(
811
- "found a new new element with same xpath, xpath: %s, id: %s",
812
- xpaths[i],
813
- element2?.id
814
- );
815
- break;
816
- }
817
- }
818
- }
819
- } catch (error) {
820
- debug("get element info by xpath error: ", error);
821
- }
822
- const startTime = Date.now();
823
- const element = elementFromCache || // try to match element from cache
824
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
825
- (await this.insight.locate(param, {
1004
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1005
+ this,
1006
+ xpaths,
1007
+ cachePrompt,
1008
+ param.cacheable
1009
+ );
1010
+ const cacheHitFlag = !!elementFromCache;
1011
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1012
+ const planHitFlag = !!elementFromPlan;
1013
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1014
+ // fallback to ai locate
826
1015
  context: pageContext
827
- })).element;
1016
+ })).element : void 0;
1017
+ const aiLocateHitFlag = !!elementFromAiLocate;
1018
+ const element = elementFromXpath || // highest priority
1019
+ elementFromCache || // second priority
1020
+ elementFromPlan || // third priority
1021
+ elementFromAiLocate;
828
1022
  let currentXpaths;
829
1023
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
830
1024
  const elementXpaths = await this.getElementXpath(
@@ -842,7 +1036,7 @@ var PageTaskExecutor = class {
842
1036
  locateCacheRecord
843
1037
  );
844
1038
  } else {
845
- debug(
1039
+ debug2(
846
1040
  "no xpaths found, will not update cache",
847
1041
  cachePrompt,
848
1042
  elementXpaths
@@ -852,16 +1046,44 @@ var PageTaskExecutor = class {
852
1046
  if (!element) {
853
1047
  throw new Error(`Element not found: ${param.prompt}`);
854
1048
  }
1049
+ let hitBy;
1050
+ if (userExpectedPathHitFlag) {
1051
+ hitBy = {
1052
+ from: "User expected path",
1053
+ context: {
1054
+ xpath: param.xpath
1055
+ }
1056
+ };
1057
+ } else if (cacheHitFlag) {
1058
+ hitBy = {
1059
+ from: "Cache",
1060
+ context: {
1061
+ xpathsFromCache: xpaths,
1062
+ xpathsToSave: currentXpaths
1063
+ }
1064
+ };
1065
+ } else if (planHitFlag) {
1066
+ hitBy = {
1067
+ from: "Planning",
1068
+ context: {
1069
+ id: elementFromPlan?.id,
1070
+ bbox: elementFromPlan?.bbox
1071
+ }
1072
+ };
1073
+ } else if (aiLocateHitFlag) {
1074
+ hitBy = {
1075
+ from: "AI model",
1076
+ context: {
1077
+ prompt: param.prompt
1078
+ }
1079
+ };
1080
+ }
855
1081
  return {
856
1082
  output: {
857
1083
  element
858
1084
  },
859
1085
  pageContext,
860
- cache: {
861
- hit: cacheHitFlag,
862
- originalXpaths: xpaths,
863
- currentXpaths
864
- }
1086
+ hitBy
865
1087
  };
866
1088
  }
867
1089
  };
@@ -957,7 +1179,7 @@ var PageTaskExecutor = class {
957
1179
  thought: plan2.thought,
958
1180
  locate: plan2.locate,
959
1181
  executor: async (param, { element }) => {
960
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1182
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
961
1183
  await this.page.mouse.click(element.center[0], element.center[1]);
962
1184
  }
963
1185
  };
@@ -969,7 +1191,7 @@ var PageTaskExecutor = class {
969
1191
  thought: plan2.thought,
970
1192
  locate: plan2.locate,
971
1193
  executor: async (param, { element }) => {
972
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1194
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
973
1195
  await this.page.mouse.click(
974
1196
  element.center[0],
975
1197
  element.center[1],
@@ -986,7 +1208,7 @@ var PageTaskExecutor = class {
986
1208
  thought: plan2.thought,
987
1209
  locate: plan2.locate,
988
1210
  executor: async (taskParam) => {
989
- (0, import_utils6.assert)(
1211
+ (0, import_utils8.assert)(
990
1212
  taskParam?.start_box && taskParam?.end_box,
991
1213
  "No start_box or end_box to drag"
992
1214
  );
@@ -1001,7 +1223,7 @@ var PageTaskExecutor = class {
1001
1223
  thought: plan2.thought,
1002
1224
  locate: plan2.locate,
1003
1225
  executor: async (param, { element }) => {
1004
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1226
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1005
1227
  await this.page.mouse.move(element.center[0], element.center[1]);
1006
1228
  }
1007
1229
  };
@@ -1053,7 +1275,7 @@ var PageTaskExecutor = class {
1053
1275
  `Unknown scroll direction: ${taskParam.direction}`
1054
1276
  );
1055
1277
  }
1056
- await (0, import_utils5.sleep)(500);
1278
+ await (0, import_utils7.sleep)(500);
1057
1279
  } else {
1058
1280
  throw new Error(
1059
1281
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1072,7 +1294,7 @@ var PageTaskExecutor = class {
1072
1294
  thought: plan2.thought,
1073
1295
  locate: plan2.locate,
1074
1296
  executor: async (taskParam) => {
1075
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1297
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1076
1298
  }
1077
1299
  };
1078
1300
  tasks.push(taskActionSleep);
@@ -1120,7 +1342,7 @@ var PageTaskExecutor = class {
1120
1342
  thought: plan2.thought,
1121
1343
  locate: plan2.locate,
1122
1344
  executor: async (param) => {
1123
- (0, import_utils6.assert)(
1345
+ (0, import_utils8.assert)(
1124
1346
  isAndroidPage(this.page),
1125
1347
  "Cannot use home button on non-Android devices"
1126
1348
  );
@@ -1136,7 +1358,7 @@ var PageTaskExecutor = class {
1136
1358
  thought: plan2.thought,
1137
1359
  locate: plan2.locate,
1138
1360
  executor: async (param) => {
1139
- (0, import_utils6.assert)(
1361
+ (0, import_utils8.assert)(
1140
1362
  isAndroidPage(this.page),
1141
1363
  "Cannot use back button on non-Android devices"
1142
1364
  );
@@ -1152,7 +1374,7 @@ var PageTaskExecutor = class {
1152
1374
  thought: plan2.thought,
1153
1375
  locate: plan2.locate,
1154
1376
  executor: async (param) => {
1155
- (0, import_utils6.assert)(
1377
+ (0, import_utils8.assert)(
1156
1378
  isAndroidPage(this.page),
1157
1379
  "Cannot use recent apps button on non-Android devices"
1158
1380
  );
@@ -1303,7 +1525,7 @@ var PageTaskExecutor = class {
1303
1525
  }
1304
1526
  }
1305
1527
  if (finalActions.length === 0) {
1306
- (0, import_utils6.assert)(
1528
+ (0, import_utils8.assert)(
1307
1529
  !more_actions_needed_by_instruction || sleep2,
1308
1530
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1309
1531
  );
@@ -1541,7 +1763,7 @@ var PageTaskExecutor = class {
1541
1763
  );
1542
1764
  let outputResult = data;
1543
1765
  if (ifTypeRestricted) {
1544
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1766
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1545
1767
  outputResult = data.result;
1546
1768
  }
1547
1769
  return {
@@ -1637,9 +1859,9 @@ var PageTaskExecutor = class {
1637
1859
  onTaskStart: this.onTaskStartCallback
1638
1860
  });
1639
1861
  const { timeoutMs, checkIntervalMs } = opt;
1640
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1641
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1642
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1862
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1863
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1864
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1643
1865
  const overallStartTime = Date.now();
1644
1866
  let startTime = Date.now();
1645
1867
  let errorThought = "";
@@ -1693,9 +1915,9 @@ var PageTaskExecutor = class {
1693
1915
  };
1694
1916
 
1695
1917
  // src/common/plan-builder.ts
1696
- var import_logger2 = require("@midscene/shared/logger");
1697
- var import_utils8 = require("@midscene/shared/utils");
1698
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1918
+ var import_logger3 = require("@midscene/shared/logger");
1919
+ var import_utils10 = require("@midscene/shared/utils");
1920
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1699
1921
  function buildPlans(type, locateParam, param) {
1700
1922
  let returnPlans = [];
1701
1923
  const locatePlan = locateParam ? {
@@ -1705,8 +1927,8 @@ function buildPlans(type, locateParam, param) {
1705
1927
  thought: ""
1706
1928
  } : null;
1707
1929
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1708
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1709
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1930
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1931
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1710
1932
  const tapPlan = {
1711
1933
  type,
1712
1934
  param: null,
@@ -1717,9 +1939,9 @@ function buildPlans(type, locateParam, param) {
1717
1939
  }
1718
1940
  if (type === "Input" || type === "KeyboardPress") {
1719
1941
  if (type === "Input") {
1720
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1942
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1721
1943
  }
1722
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1944
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1723
1945
  const inputPlan = {
1724
1946
  type,
1725
1947
  param,
@@ -1733,7 +1955,7 @@ function buildPlans(type, locateParam, param) {
1733
1955
  }
1734
1956
  }
1735
1957
  if (type === "Scroll") {
1736
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1958
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1737
1959
  const scrollPlan = {
1738
1960
  type,
1739
1961
  param,
@@ -1747,7 +1969,7 @@ function buildPlans(type, locateParam, param) {
1747
1969
  }
1748
1970
  }
1749
1971
  if (type === "Sleep") {
1750
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1972
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1751
1973
  const sleepPlan = {
1752
1974
  type,
1753
1975
  param,
@@ -1757,7 +1979,7 @@ function buildPlans(type, locateParam, param) {
1757
1979
  returnPlans = [sleepPlan];
1758
1980
  }
1759
1981
  if (type === "Locate") {
1760
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1982
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1761
1983
  const locatePlan2 = {
1762
1984
  type,
1763
1985
  param: locateParam,
@@ -1767,187 +1989,12 @@ function buildPlans(type, locateParam, param) {
1767
1989
  returnPlans = [locatePlan2];
1768
1990
  }
1769
1991
  if (returnPlans) {
1770
- debug2("buildPlans", returnPlans);
1992
+ debug3("buildPlans", returnPlans);
1771
1993
  return returnPlans;
1772
1994
  }
1773
1995
  throw new Error(`Not supported type: ${type}`);
1774
1996
  }
1775
1997
 
1776
- // src/common/task-cache.ts
1777
- var import_node_assert = __toESM(require("assert"));
1778
- var import_node_fs2 = require("fs");
1779
- var import_node_path2 = require("path");
1780
- var import_common2 = require("@midscene/shared/common");
1781
- var import_logger3 = require("@midscene/shared/logger");
1782
- var import_utils9 = require("@midscene/shared/utils");
1783
- var import_js_yaml3 = __toESM(require("js-yaml"));
1784
- var import_semver = __toESM(require("semver"));
1785
-
1786
- // package.json
1787
- var version = "0.19.1";
1788
-
1789
- // src/common/task-cache.ts
1790
- var debug3 = (0, import_logger3.getDebug)("cache");
1791
- var lowestSupportedMidsceneVersion = "0.16.10";
1792
- var cacheFileExt = ".cache.yaml";
1793
- var TaskCache = class {
1794
- // Track matched records
1795
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1796
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1797
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1798
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1799
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1800
- this.isCacheResultUsed = isCacheResultUsed;
1801
- let cacheContent;
1802
- if (this.cacheFilePath) {
1803
- cacheContent = this.loadCacheFromFile();
1804
- }
1805
- if (!cacheContent) {
1806
- cacheContent = {
1807
- midsceneVersion: version,
1808
- cacheId: this.cacheId,
1809
- caches: []
1810
- };
1811
- }
1812
- this.cache = cacheContent;
1813
- this.cacheOriginalLength = this.cache.caches.length;
1814
- }
1815
- matchCache(prompt, type) {
1816
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1817
- const item = this.cache.caches[i];
1818
- const key = `${type}:${prompt}:${i}`;
1819
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1820
- this.matchedCacheIndices.add(key);
1821
- debug3(
1822
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1823
- type,
1824
- prompt,
1825
- i
1826
- );
1827
- return {
1828
- cacheContent: item,
1829
- updateFn: (cb) => {
1830
- debug3(
1831
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1832
- type,
1833
- prompt,
1834
- i
1835
- );
1836
- cb(item);
1837
- debug3(
1838
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1839
- type,
1840
- prompt,
1841
- i
1842
- );
1843
- this.flushCacheToFile();
1844
- }
1845
- };
1846
- }
1847
- }
1848
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1849
- return void 0;
1850
- }
1851
- matchPlanCache(prompt) {
1852
- return this.matchCache(prompt, "plan");
1853
- }
1854
- matchLocateCache(prompt) {
1855
- return this.matchCache(prompt, "locate");
1856
- }
1857
- appendCache(cache) {
1858
- debug3("will append cache", cache);
1859
- this.cache.caches.push(cache);
1860
- this.flushCacheToFile();
1861
- }
1862
- loadCacheFromFile() {
1863
- const cacheFile = this.cacheFilePath;
1864
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1865
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1866
- debug3("no cache file found, path: %s", cacheFile);
1867
- return void 0;
1868
- }
1869
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1870
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1871
- console.warn(
1872
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1873
- );
1874
- return void 0;
1875
- }
1876
- try {
1877
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1878
- const jsonData = import_js_yaml3.default.load(data);
1879
- if (!version) {
1880
- debug3("no midscene version info, will not read cache from file");
1881
- return void 0;
1882
- }
1883
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1884
- console.warn(
1885
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1886
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1887
- cache file: ${cacheFile}`
1888
- );
1889
- return void 0;
1890
- }
1891
- debug3(
1892
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1893
- cacheFile,
1894
- jsonData.midsceneVersion,
1895
- jsonData.caches.length
1896
- );
1897
- jsonData.midsceneVersion = version;
1898
- return jsonData;
1899
- } catch (err) {
1900
- debug3(
1901
- "cache file exists but load failed, path: %s, error: %s",
1902
- cacheFile,
1903
- err
1904
- );
1905
- return void 0;
1906
- }
1907
- }
1908
- flushCacheToFile() {
1909
- if (!version) {
1910
- debug3("no midscene version info, will not write cache to file");
1911
- return;
1912
- }
1913
- if (!this.cacheFilePath) {
1914
- debug3("no cache file path, will not write cache to file");
1915
- return;
1916
- }
1917
- try {
1918
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1919
- if (!(0, import_node_fs2.existsSync)(dir)) {
1920
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1921
- debug3("created cache directory: %s", dir);
1922
- }
1923
- const yamlData = import_js_yaml3.default.dump(this.cache);
1924
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1925
- debug3("cache flushed to file: %s", this.cacheFilePath);
1926
- } catch (err) {
1927
- debug3(
1928
- "write cache to file failed, path: %s, error: %s",
1929
- this.cacheFilePath,
1930
- err
1931
- );
1932
- }
1933
- }
1934
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1935
- if (cachedRecord) {
1936
- if (newRecord.type === "plan") {
1937
- cachedRecord.updateFn((cache) => {
1938
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1939
- });
1940
- } else {
1941
- cachedRecord.updateFn((cache) => {
1942
- cache.xpaths = newRecord.xpaths;
1943
- });
1944
- }
1945
- } else {
1946
- this.appendCache(newRecord);
1947
- }
1948
- }
1949
- };
1950
-
1951
1998
  // src/common/agent.ts
1952
1999
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1953
2000
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2076,10 +2123,12 @@ ${errorTask?.errorStack}`);
2076
2123
  const prompt = opt.prompt ?? locatePrompt;
2077
2124
  const deepThink = opt.deepThink ?? false;
2078
2125
  const cacheable = opt.cacheable ?? true;
2126
+ const xpath = opt.xpath;
2079
2127
  return {
2080
2128
  prompt,
2081
2129
  deepThink,
2082
- cacheable
2130
+ cacheable,
2131
+ xpath
2083
2132
  };
2084
2133
  }
2085
2134
  return {
@@ -2237,6 +2286,9 @@ ${errorTask?.errorStack}`);
2237
2286
  this.afterTaskRunning(executor);
2238
2287
  return output;
2239
2288
  }
2289
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2290
+ return this.aiString(prompt, opt);
2291
+ }
2240
2292
  async describeElementAtPoint(center, opt) {
2241
2293
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2242
2294
  let success = false;