@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -186,7 +186,7 @@ var ScriptPlayer = class {
186
186
  domIncluded: numberTask.domIncluded,
187
187
  screenshotIncluded: numberTask.screenshotIncluded
188
188
  };
189
- (0, import_utils.assert)(prompt, "missing prompt for number");
189
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
190
190
  (0, import_utils.assert)(
191
191
  typeof prompt === "string",
192
192
  "prompt for number must be a string"
@@ -200,7 +200,7 @@ var ScriptPlayer = class {
200
200
  domIncluded: stringTask.domIncluded,
201
201
  screenshotIncluded: stringTask.screenshotIncluded
202
202
  };
203
- (0, import_utils.assert)(prompt, "missing prompt for string");
203
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
204
204
  (0, import_utils.assert)(
205
205
  typeof prompt === "string",
206
206
  "prompt for string must be a string"
@@ -214,13 +214,20 @@ var ScriptPlayer = class {
214
214
  domIncluded: booleanTask.domIncluded,
215
215
  screenshotIncluded: booleanTask.screenshotIncluded
216
216
  };
217
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
217
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
218
218
  (0, import_utils.assert)(
219
219
  typeof prompt === "string",
220
220
  "prompt for boolean must be a string"
221
221
  );
222
222
  const booleanResult = await agent.aiBoolean(prompt, options);
223
223
  this.setResult(booleanTask.name, booleanResult);
224
+ } else if ("aiAsk" in flowItem) {
225
+ const askTask = flowItem;
226
+ const prompt = askTask.aiAsk;
227
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
228
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
229
+ const askResult = await agent.aiAsk(prompt);
230
+ this.setResult(askTask.name, askResult);
224
231
  } else if ("aiLocate" in flowItem) {
225
232
  const locateTask = flowItem;
226
233
  const prompt = locateTask.aiLocate;
@@ -229,7 +236,7 @@ var ScriptPlayer = class {
229
236
  typeof prompt === "string",
230
237
  "prompt for aiLocate must be a string"
231
238
  );
232
- const locateResult = await agent.aiLocate(prompt);
239
+ const locateResult = await agent.aiLocate(prompt, locateTask);
233
240
  this.setResult(locateTask.name, locateResult);
234
241
  } else if ("aiWaitFor" in flowItem) {
235
242
  const waitForTask = flowItem;
@@ -438,10 +445,10 @@ var import_utils12 = require("@midscene/shared/utils");
438
445
  // src/common/tasks.ts
439
446
  var import_core = require("@midscene/core");
440
447
  var import_ai_model2 = require("@midscene/core/ai-model");
441
- var import_utils5 = require("@midscene/core/utils");
448
+ var import_utils7 = require("@midscene/core/utils");
442
449
  var import_constants = require("@midscene/shared/constants");
443
- var import_logger = require("@midscene/shared/logger");
444
- var import_utils6 = require("@midscene/shared/utils");
450
+ var import_logger2 = require("@midscene/shared/logger");
451
+ var import_utils8 = require("@midscene/shared/utils");
445
452
 
446
453
  // src/common/ui-utils.ts
447
454
  function typeStr(task) {
@@ -518,11 +525,11 @@ function paramStr(task) {
518
525
 
519
526
  // src/common/utils.ts
520
527
  var import_ai_model = require("@midscene/core/ai-model");
521
- var import_utils3 = require("@midscene/core/utils");
528
+ var import_utils5 = require("@midscene/core/utils");
522
529
  var import_env = require("@midscene/shared/env");
523
530
  var import_extractor = require("@midscene/shared/extractor");
524
531
  var import_img = require("@midscene/shared/img");
525
- var import_utils4 = require("@midscene/shared/utils");
532
+ var import_utils6 = require("@midscene/shared/utils");
526
533
  var import_dayjs = __toESM(require("dayjs"));
527
534
 
528
535
  // src/web-element.ts
@@ -550,14 +557,189 @@ var WebElementInfo = class {
550
557
  }
551
558
  };
552
559
 
560
+ // src/common/task-cache.ts
561
+ var import_node_assert = __toESM(require("assert"));
562
+ var import_node_fs2 = require("fs");
563
+ var import_node_path2 = require("path");
564
+ var import_common2 = require("@midscene/shared/common");
565
+ var import_logger = require("@midscene/shared/logger");
566
+ var import_utils3 = require("@midscene/shared/utils");
567
+ var import_js_yaml3 = __toESM(require("js-yaml"));
568
+ var import_semver = __toESM(require("semver"));
569
+
570
+ // package.json
571
+ var version = "0.20.0";
572
+
573
+ // src/common/task-cache.ts
574
+ var debug = (0, import_logger.getDebug)("cache");
575
+ var lowestSupportedMidsceneVersion = "0.16.10";
576
+ var cacheFileExt = ".cache.yaml";
577
+ var TaskCache = class {
578
+ // Track matched records
579
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
580
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
581
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
582
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
583
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
584
+ this.isCacheResultUsed = isCacheResultUsed;
585
+ let cacheContent;
586
+ if (this.cacheFilePath) {
587
+ cacheContent = this.loadCacheFromFile();
588
+ }
589
+ if (!cacheContent) {
590
+ cacheContent = {
591
+ midsceneVersion: version,
592
+ cacheId: this.cacheId,
593
+ caches: []
594
+ };
595
+ }
596
+ this.cache = cacheContent;
597
+ this.cacheOriginalLength = this.cache.caches.length;
598
+ }
599
+ matchCache(prompt, type) {
600
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
601
+ const item = this.cache.caches[i];
602
+ const key = `${type}:${prompt}:${i}`;
603
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
604
+ this.matchedCacheIndices.add(key);
605
+ debug(
606
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
607
+ type,
608
+ prompt,
609
+ i
610
+ );
611
+ return {
612
+ cacheContent: item,
613
+ updateFn: (cb) => {
614
+ debug(
615
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
616
+ type,
617
+ prompt,
618
+ i
619
+ );
620
+ cb(item);
621
+ debug(
622
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
623
+ type,
624
+ prompt,
625
+ i
626
+ );
627
+ this.flushCacheToFile();
628
+ }
629
+ };
630
+ }
631
+ }
632
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
633
+ return void 0;
634
+ }
635
+ matchPlanCache(prompt) {
636
+ return this.matchCache(prompt, "plan");
637
+ }
638
+ matchLocateCache(prompt) {
639
+ return this.matchCache(prompt, "locate");
640
+ }
641
+ appendCache(cache) {
642
+ debug("will append cache", cache);
643
+ this.cache.caches.push(cache);
644
+ this.flushCacheToFile();
645
+ }
646
+ loadCacheFromFile() {
647
+ const cacheFile = this.cacheFilePath;
648
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
649
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
650
+ debug("no cache file found, path: %s", cacheFile);
651
+ return void 0;
652
+ }
653
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
654
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
655
+ console.warn(
656
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
657
+ );
658
+ return void 0;
659
+ }
660
+ try {
661
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
662
+ const jsonData = import_js_yaml3.default.load(data);
663
+ if (!version) {
664
+ debug("no midscene version info, will not read cache from file");
665
+ return void 0;
666
+ }
667
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
668
+ console.warn(
669
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
670
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
671
+ cache file: ${cacheFile}`
672
+ );
673
+ return void 0;
674
+ }
675
+ debug(
676
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
677
+ cacheFile,
678
+ jsonData.midsceneVersion,
679
+ jsonData.caches.length
680
+ );
681
+ jsonData.midsceneVersion = version;
682
+ return jsonData;
683
+ } catch (err) {
684
+ debug(
685
+ "cache file exists but load failed, path: %s, error: %s",
686
+ cacheFile,
687
+ err
688
+ );
689
+ return void 0;
690
+ }
691
+ }
692
+ flushCacheToFile() {
693
+ if (!version) {
694
+ debug("no midscene version info, will not write cache to file");
695
+ return;
696
+ }
697
+ if (!this.cacheFilePath) {
698
+ debug("no cache file path, will not write cache to file");
699
+ return;
700
+ }
701
+ try {
702
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
703
+ if (!(0, import_node_fs2.existsSync)(dir)) {
704
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
705
+ debug("created cache directory: %s", dir);
706
+ }
707
+ const yamlData = import_js_yaml3.default.dump(this.cache);
708
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
709
+ debug("cache flushed to file: %s", this.cacheFilePath);
710
+ } catch (err) {
711
+ debug(
712
+ "write cache to file failed, path: %s, error: %s",
713
+ this.cacheFilePath,
714
+ err
715
+ );
716
+ }
717
+ }
718
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
719
+ if (cachedRecord) {
720
+ if (newRecord.type === "plan") {
721
+ cachedRecord.updateFn((cache) => {
722
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
723
+ });
724
+ } else {
725
+ cachedRecord.updateFn((cache) => {
726
+ cache.xpaths = newRecord.xpaths;
727
+ });
728
+ }
729
+ } else {
730
+ this.appendCache(newRecord);
731
+ }
732
+ }
733
+ };
734
+
553
735
  // src/common/utils.ts
554
736
  async function parseContextFromWebPage(page, _opt) {
555
- (0, import_utils4.assert)(page, "page is required");
737
+ (0, import_utils6.assert)(page, "page is required");
556
738
  if (page._forceUsePageContext) {
557
739
  return await page._forceUsePageContext();
558
740
  }
559
741
  const url = await page.url();
560
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
742
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
561
743
  let screenshotBase64;
562
744
  let tree;
563
745
  await Promise.all([
@@ -579,7 +761,7 @@ async function parseContextFromWebPage(page, _opt) {
579
761
  isVisible
580
762
  });
581
763
  });
582
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
764
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
583
765
  const size = await page.size();
584
766
  if (size.dpr && size.dpr > 1) {
585
767
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -597,11 +779,11 @@ async function parseContextFromWebPage(page, _opt) {
597
779
  function reportFileName(tag = "web") {
598
780
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
599
781
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
600
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
782
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
601
783
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
602
784
  }
603
785
  function printReportMsg(filepath) {
604
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
786
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
605
787
  }
606
788
  function replaceIllegalPathCharsAndSpace(str) {
607
789
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -626,6 +808,28 @@ function matchElementFromPlan(planLocateParam, tree) {
626
808
  }
627
809
  return void 0;
628
810
  }
811
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
812
+ try {
813
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
814
+ for (let i = 0; i < xpaths.length; i++) {
815
+ const element = await taskExecutor.page.getElementInfoByXpath(
816
+ xpaths[i]
817
+ );
818
+ if (element?.id) {
819
+ debug("cache hit, prompt: %s", cachePrompt);
820
+ debug(
821
+ "found a new new element with same xpath, xpath: %s, id: %s",
822
+ xpaths[i],
823
+ element?.id
824
+ );
825
+ return element;
826
+ }
827
+ }
828
+ }
829
+ } catch (error) {
830
+ debug("get element info by xpath error: ", error);
831
+ }
832
+ }
629
833
  function trimContextByViewport(execution) {
630
834
  function filterVisibleTree(node) {
631
835
  if (!node)
@@ -664,7 +868,7 @@ function trimContextByViewport(execution) {
664
868
  }
665
869
 
666
870
  // src/common/tasks.ts
667
- var debug = (0, import_logger.getDebug)("page-task-executor");
871
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
668
872
  var replanningCountLimit = 10;
669
873
  var isAndroidPage = (page) => {
670
874
  return page.pageType === "android";
@@ -705,7 +909,7 @@ var PageTaskExecutor = class {
705
909
  if (info?.id) {
706
910
  elementId = info.id;
707
911
  } else {
708
- debug(
912
+ debug2(
709
913
  "no element id found for position node, will not update cache",
710
914
  element
711
915
  );
@@ -718,7 +922,7 @@ var PageTaskExecutor = class {
718
922
  const result = await this.page.getXpathsById(elementId);
719
923
  return result;
720
924
  } catch (error) {
721
- debug("getXpathsById error: ", error);
925
+ debug2("getXpathsById error: ", error);
722
926
  }
723
927
  }
724
928
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -734,7 +938,7 @@ var PageTaskExecutor = class {
734
938
  if (taskApply.type === "Action") {
735
939
  await Promise.all([
736
940
  (async () => {
737
- await (0, import_utils5.sleep)(100);
941
+ await (0, import_utils7.sleep)(100);
738
942
  if (this.page.waitUntilNetworkIdle) {
739
943
  try {
740
944
  await this.page.waitUntilNetworkIdle();
@@ -742,7 +946,7 @@ var PageTaskExecutor = class {
742
946
  }
743
947
  }
744
948
  })(),
745
- (0, import_utils5.sleep)(200)
949
+ (0, import_utils7.sleep)(200)
746
950
  ]);
747
951
  }
748
952
  if (appendAfterExecution) {
@@ -772,7 +976,7 @@ var PageTaskExecutor = class {
772
976
  locate: plan2.locate,
773
977
  executor: async (param, taskContext) => {
774
978
  const { task } = taskContext;
775
- (0, import_utils6.assert)(
979
+ (0, import_utils8.assert)(
776
980
  param?.prompt || param?.id || param?.bbox,
777
981
  "No prompt or id or position or bbox to locate"
778
982
  );
@@ -797,39 +1001,29 @@ var PageTaskExecutor = class {
797
1001
  timing: "before Insight"
798
1002
  };
799
1003
  task.recorder = [recordItem];
800
- let cacheHitFlag = false;
1004
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1005
+ const userExpectedPathHitFlag = !!elementFromXpath;
801
1006
  const cachePrompt = param.prompt;
802
1007
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
803
1008
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
804
- let elementFromCache = null;
805
- try {
806
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
807
- for (let i = 0; i < xpaths.length; i++) {
808
- const element2 = await this.page.getElementInfoByXpath(
809
- xpaths[i]
810
- );
811
- if (element2?.id) {
812
- elementFromCache = element2;
813
- debug("cache hit, prompt: %s", cachePrompt);
814
- cacheHitFlag = true;
815
- debug(
816
- "found a new new element with same xpath, xpath: %s, id: %s",
817
- xpaths[i],
818
- element2?.id
819
- );
820
- break;
821
- }
822
- }
823
- }
824
- } catch (error) {
825
- debug("get element info by xpath error: ", error);
826
- }
827
- const startTime = Date.now();
828
- const element = elementFromCache || // try to match element from cache
829
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
830
- (await this.insight.locate(param, {
1009
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1010
+ this,
1011
+ xpaths,
1012
+ cachePrompt,
1013
+ param.cacheable
1014
+ );
1015
+ const cacheHitFlag = !!elementFromCache;
1016
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1017
+ const planHitFlag = !!elementFromPlan;
1018
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1019
+ // fallback to ai locate
831
1020
  context: pageContext
832
- })).element;
1021
+ })).element : void 0;
1022
+ const aiLocateHitFlag = !!elementFromAiLocate;
1023
+ const element = elementFromXpath || // highest priority
1024
+ elementFromCache || // second priority
1025
+ elementFromPlan || // third priority
1026
+ elementFromAiLocate;
833
1027
  let currentXpaths;
834
1028
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
835
1029
  const elementXpaths = await this.getElementXpath(
@@ -847,7 +1041,7 @@ var PageTaskExecutor = class {
847
1041
  locateCacheRecord
848
1042
  );
849
1043
  } else {
850
- debug(
1044
+ debug2(
851
1045
  "no xpaths found, will not update cache",
852
1046
  cachePrompt,
853
1047
  elementXpaths
@@ -857,16 +1051,44 @@ var PageTaskExecutor = class {
857
1051
  if (!element) {
858
1052
  throw new Error(`Element not found: ${param.prompt}`);
859
1053
  }
1054
+ let hitBy;
1055
+ if (userExpectedPathHitFlag) {
1056
+ hitBy = {
1057
+ from: "User expected path",
1058
+ context: {
1059
+ xpath: param.xpath
1060
+ }
1061
+ };
1062
+ } else if (cacheHitFlag) {
1063
+ hitBy = {
1064
+ from: "Cache",
1065
+ context: {
1066
+ xpathsFromCache: xpaths,
1067
+ xpathsToSave: currentXpaths
1068
+ }
1069
+ };
1070
+ } else if (planHitFlag) {
1071
+ hitBy = {
1072
+ from: "Planning",
1073
+ context: {
1074
+ id: elementFromPlan?.id,
1075
+ bbox: elementFromPlan?.bbox
1076
+ }
1077
+ };
1078
+ } else if (aiLocateHitFlag) {
1079
+ hitBy = {
1080
+ from: "AI model",
1081
+ context: {
1082
+ prompt: param.prompt
1083
+ }
1084
+ };
1085
+ }
860
1086
  return {
861
1087
  output: {
862
1088
  element
863
1089
  },
864
1090
  pageContext,
865
- cache: {
866
- hit: cacheHitFlag,
867
- originalXpaths: xpaths,
868
- currentXpaths
869
- }
1091
+ hitBy
870
1092
  };
871
1093
  }
872
1094
  };
@@ -962,7 +1184,7 @@ var PageTaskExecutor = class {
962
1184
  thought: plan2.thought,
963
1185
  locate: plan2.locate,
964
1186
  executor: async (param, { element }) => {
965
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1187
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
966
1188
  await this.page.mouse.click(element.center[0], element.center[1]);
967
1189
  }
968
1190
  };
@@ -974,7 +1196,7 @@ var PageTaskExecutor = class {
974
1196
  thought: plan2.thought,
975
1197
  locate: plan2.locate,
976
1198
  executor: async (param, { element }) => {
977
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1199
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
978
1200
  await this.page.mouse.click(
979
1201
  element.center[0],
980
1202
  element.center[1],
@@ -991,7 +1213,7 @@ var PageTaskExecutor = class {
991
1213
  thought: plan2.thought,
992
1214
  locate: plan2.locate,
993
1215
  executor: async (taskParam) => {
994
- (0, import_utils6.assert)(
1216
+ (0, import_utils8.assert)(
995
1217
  taskParam?.start_box && taskParam?.end_box,
996
1218
  "No start_box or end_box to drag"
997
1219
  );
@@ -1006,7 +1228,7 @@ var PageTaskExecutor = class {
1006
1228
  thought: plan2.thought,
1007
1229
  locate: plan2.locate,
1008
1230
  executor: async (param, { element }) => {
1009
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1231
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1010
1232
  await this.page.mouse.move(element.center[0], element.center[1]);
1011
1233
  }
1012
1234
  };
@@ -1058,7 +1280,7 @@ var PageTaskExecutor = class {
1058
1280
  `Unknown scroll direction: ${taskParam.direction}`
1059
1281
  );
1060
1282
  }
1061
- await (0, import_utils5.sleep)(500);
1283
+ await (0, import_utils7.sleep)(500);
1062
1284
  } else {
1063
1285
  throw new Error(
1064
1286
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1077,7 +1299,7 @@ var PageTaskExecutor = class {
1077
1299
  thought: plan2.thought,
1078
1300
  locate: plan2.locate,
1079
1301
  executor: async (taskParam) => {
1080
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1302
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1081
1303
  }
1082
1304
  };
1083
1305
  tasks.push(taskActionSleep);
@@ -1125,7 +1347,7 @@ var PageTaskExecutor = class {
1125
1347
  thought: plan2.thought,
1126
1348
  locate: plan2.locate,
1127
1349
  executor: async (param) => {
1128
- (0, import_utils6.assert)(
1350
+ (0, import_utils8.assert)(
1129
1351
  isAndroidPage(this.page),
1130
1352
  "Cannot use home button on non-Android devices"
1131
1353
  );
@@ -1141,7 +1363,7 @@ var PageTaskExecutor = class {
1141
1363
  thought: plan2.thought,
1142
1364
  locate: plan2.locate,
1143
1365
  executor: async (param) => {
1144
- (0, import_utils6.assert)(
1366
+ (0, import_utils8.assert)(
1145
1367
  isAndroidPage(this.page),
1146
1368
  "Cannot use back button on non-Android devices"
1147
1369
  );
@@ -1157,7 +1379,7 @@ var PageTaskExecutor = class {
1157
1379
  thought: plan2.thought,
1158
1380
  locate: plan2.locate,
1159
1381
  executor: async (param) => {
1160
- (0, import_utils6.assert)(
1382
+ (0, import_utils8.assert)(
1161
1383
  isAndroidPage(this.page),
1162
1384
  "Cannot use recent apps button on non-Android devices"
1163
1385
  );
@@ -1308,7 +1530,7 @@ var PageTaskExecutor = class {
1308
1530
  }
1309
1531
  }
1310
1532
  if (finalActions.length === 0) {
1311
- (0, import_utils6.assert)(
1533
+ (0, import_utils8.assert)(
1312
1534
  !more_actions_needed_by_instruction || sleep4,
1313
1535
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1314
1536
  );
@@ -1546,7 +1768,7 @@ var PageTaskExecutor = class {
1546
1768
  );
1547
1769
  let outputResult = data;
1548
1770
  if (ifTypeRestricted) {
1549
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1771
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1550
1772
  outputResult = data.result;
1551
1773
  }
1552
1774
  return {
@@ -1642,9 +1864,9 @@ var PageTaskExecutor = class {
1642
1864
  onTaskStart: this.onTaskStartCallback
1643
1865
  });
1644
1866
  const { timeoutMs, checkIntervalMs } = opt;
1645
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1646
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1647
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1867
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1868
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1869
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1648
1870
  const overallStartTime = Date.now();
1649
1871
  let startTime = Date.now();
1650
1872
  let errorThought = "";
@@ -1698,9 +1920,9 @@ var PageTaskExecutor = class {
1698
1920
  };
1699
1921
 
1700
1922
  // src/common/plan-builder.ts
1701
- var import_logger2 = require("@midscene/shared/logger");
1702
- var import_utils8 = require("@midscene/shared/utils");
1703
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1923
+ var import_logger3 = require("@midscene/shared/logger");
1924
+ var import_utils10 = require("@midscene/shared/utils");
1925
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1704
1926
  function buildPlans(type, locateParam, param) {
1705
1927
  let returnPlans = [];
1706
1928
  const locatePlan = locateParam ? {
@@ -1710,8 +1932,8 @@ function buildPlans(type, locateParam, param) {
1710
1932
  thought: ""
1711
1933
  } : null;
1712
1934
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1713
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1714
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1935
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1936
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1715
1937
  const tapPlan = {
1716
1938
  type,
1717
1939
  param: null,
@@ -1722,9 +1944,9 @@ function buildPlans(type, locateParam, param) {
1722
1944
  }
1723
1945
  if (type === "Input" || type === "KeyboardPress") {
1724
1946
  if (type === "Input") {
1725
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1947
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1726
1948
  }
1727
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1949
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1728
1950
  const inputPlan = {
1729
1951
  type,
1730
1952
  param,
@@ -1738,7 +1960,7 @@ function buildPlans(type, locateParam, param) {
1738
1960
  }
1739
1961
  }
1740
1962
  if (type === "Scroll") {
1741
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1963
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1742
1964
  const scrollPlan = {
1743
1965
  type,
1744
1966
  param,
@@ -1752,7 +1974,7 @@ function buildPlans(type, locateParam, param) {
1752
1974
  }
1753
1975
  }
1754
1976
  if (type === "Sleep") {
1755
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1977
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1756
1978
  const sleepPlan = {
1757
1979
  type,
1758
1980
  param,
@@ -1762,7 +1984,7 @@ function buildPlans(type, locateParam, param) {
1762
1984
  returnPlans = [sleepPlan];
1763
1985
  }
1764
1986
  if (type === "Locate") {
1765
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1987
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1766
1988
  const locatePlan2 = {
1767
1989
  type,
1768
1990
  param: locateParam,
@@ -1772,187 +1994,12 @@ function buildPlans(type, locateParam, param) {
1772
1994
  returnPlans = [locatePlan2];
1773
1995
  }
1774
1996
  if (returnPlans) {
1775
- debug2("buildPlans", returnPlans);
1997
+ debug3("buildPlans", returnPlans);
1776
1998
  return returnPlans;
1777
1999
  }
1778
2000
  throw new Error(`Not supported type: ${type}`);
1779
2001
  }
1780
2002
 
1781
- // src/common/task-cache.ts
1782
- var import_node_assert = __toESM(require("assert"));
1783
- var import_node_fs2 = require("fs");
1784
- var import_node_path2 = require("path");
1785
- var import_common2 = require("@midscene/shared/common");
1786
- var import_logger3 = require("@midscene/shared/logger");
1787
- var import_utils9 = require("@midscene/shared/utils");
1788
- var import_js_yaml3 = __toESM(require("js-yaml"));
1789
- var import_semver = __toESM(require("semver"));
1790
-
1791
- // package.json
1792
- var version = "0.19.1";
1793
-
1794
- // src/common/task-cache.ts
1795
- var debug3 = (0, import_logger3.getDebug)("cache");
1796
- var lowestSupportedMidsceneVersion = "0.16.10";
1797
- var cacheFileExt = ".cache.yaml";
1798
- var TaskCache = class {
1799
- // Track matched records
1800
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1801
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1802
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1803
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1804
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1805
- this.isCacheResultUsed = isCacheResultUsed;
1806
- let cacheContent;
1807
- if (this.cacheFilePath) {
1808
- cacheContent = this.loadCacheFromFile();
1809
- }
1810
- if (!cacheContent) {
1811
- cacheContent = {
1812
- midsceneVersion: version,
1813
- cacheId: this.cacheId,
1814
- caches: []
1815
- };
1816
- }
1817
- this.cache = cacheContent;
1818
- this.cacheOriginalLength = this.cache.caches.length;
1819
- }
1820
- matchCache(prompt, type) {
1821
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1822
- const item = this.cache.caches[i];
1823
- const key = `${type}:${prompt}:${i}`;
1824
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1825
- this.matchedCacheIndices.add(key);
1826
- debug3(
1827
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1828
- type,
1829
- prompt,
1830
- i
1831
- );
1832
- return {
1833
- cacheContent: item,
1834
- updateFn: (cb) => {
1835
- debug3(
1836
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1837
- type,
1838
- prompt,
1839
- i
1840
- );
1841
- cb(item);
1842
- debug3(
1843
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1844
- type,
1845
- prompt,
1846
- i
1847
- );
1848
- this.flushCacheToFile();
1849
- }
1850
- };
1851
- }
1852
- }
1853
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1854
- return void 0;
1855
- }
1856
- matchPlanCache(prompt) {
1857
- return this.matchCache(prompt, "plan");
1858
- }
1859
- matchLocateCache(prompt) {
1860
- return this.matchCache(prompt, "locate");
1861
- }
1862
- appendCache(cache) {
1863
- debug3("will append cache", cache);
1864
- this.cache.caches.push(cache);
1865
- this.flushCacheToFile();
1866
- }
1867
- loadCacheFromFile() {
1868
- const cacheFile = this.cacheFilePath;
1869
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1870
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1871
- debug3("no cache file found, path: %s", cacheFile);
1872
- return void 0;
1873
- }
1874
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1875
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1876
- console.warn(
1877
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1878
- );
1879
- return void 0;
1880
- }
1881
- try {
1882
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1883
- const jsonData = import_js_yaml3.default.load(data);
1884
- if (!version) {
1885
- debug3("no midscene version info, will not read cache from file");
1886
- return void 0;
1887
- }
1888
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1889
- console.warn(
1890
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1891
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1892
- cache file: ${cacheFile}`
1893
- );
1894
- return void 0;
1895
- }
1896
- debug3(
1897
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1898
- cacheFile,
1899
- jsonData.midsceneVersion,
1900
- jsonData.caches.length
1901
- );
1902
- jsonData.midsceneVersion = version;
1903
- return jsonData;
1904
- } catch (err) {
1905
- debug3(
1906
- "cache file exists but load failed, path: %s, error: %s",
1907
- cacheFile,
1908
- err
1909
- );
1910
- return void 0;
1911
- }
1912
- }
1913
- flushCacheToFile() {
1914
- if (!version) {
1915
- debug3("no midscene version info, will not write cache to file");
1916
- return;
1917
- }
1918
- if (!this.cacheFilePath) {
1919
- debug3("no cache file path, will not write cache to file");
1920
- return;
1921
- }
1922
- try {
1923
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1924
- if (!(0, import_node_fs2.existsSync)(dir)) {
1925
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1926
- debug3("created cache directory: %s", dir);
1927
- }
1928
- const yamlData = import_js_yaml3.default.dump(this.cache);
1929
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1930
- debug3("cache flushed to file: %s", this.cacheFilePath);
1931
- } catch (err) {
1932
- debug3(
1933
- "write cache to file failed, path: %s, error: %s",
1934
- this.cacheFilePath,
1935
- err
1936
- );
1937
- }
1938
- }
1939
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1940
- if (cachedRecord) {
1941
- if (newRecord.type === "plan") {
1942
- cachedRecord.updateFn((cache) => {
1943
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1944
- });
1945
- } else {
1946
- cachedRecord.updateFn((cache) => {
1947
- cache.xpaths = newRecord.xpaths;
1948
- });
1949
- }
1950
- } else {
1951
- this.appendCache(newRecord);
1952
- }
1953
- }
1954
- };
1955
-
1956
2003
  // src/common/agent.ts
1957
2004
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1958
2005
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2081,10 +2128,12 @@ ${errorTask?.errorStack}`);
2081
2128
  const prompt = opt.prompt ?? locatePrompt;
2082
2129
  const deepThink = opt.deepThink ?? false;
2083
2130
  const cacheable = opt.cacheable ?? true;
2131
+ const xpath = opt.xpath;
2084
2132
  return {
2085
2133
  prompt,
2086
2134
  deepThink,
2087
- cacheable
2135
+ cacheable,
2136
+ xpath
2088
2137
  };
2089
2138
  }
2090
2139
  return {
@@ -2242,6 +2291,9 @@ ${errorTask?.errorStack}`);
2242
2291
  this.afterTaskRunning(executor);
2243
2292
  return output;
2244
2293
  }
2294
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2295
+ return this.aiString(prompt, opt);
2296
+ }
2245
2297
  async describeElementAtPoint(center, opt) {
2246
2298
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2247
2299
  let success = false;
@@ -2561,7 +2613,7 @@ var BridgeServer = class {
2561
2613
  this.socket = socket;
2562
2614
  const clientVersion = socket.handshake.query.version;
2563
2615
  (0, import_utils17.logMsg)(
2564
- `Bridge connected, cli-side version v${"0.19.1"}, browser-side version v${clientVersion}`
2616
+ `Bridge connected, cli-side version v${"0.20.0"}, browser-side version v${clientVersion}`
2565
2617
  );
2566
2618
  socket.on("bridge-call-response" /* CallResponse */, (params) => {
2567
2619
  const id = params.id;
@@ -2592,7 +2644,7 @@ var BridgeServer = class {
2592
2644
  setTimeout(() => {
2593
2645
  this.onConnect?.();
2594
2646
  const payload = {
2595
- version: "0.19.1"
2647
+ version: "0.20.0"
2596
2648
  };
2597
2649
  socket.emit("bridge-connected" /* Connected */, payload);
2598
2650
  Promise.resolve().then(() => {