@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
package/dist/lib/index.js CHANGED
@@ -190,7 +190,7 @@ var ScriptPlayer = class {
190
190
  domIncluded: numberTask.domIncluded,
191
191
  screenshotIncluded: numberTask.screenshotIncluded
192
192
  };
193
- (0, import_utils.assert)(prompt, "missing prompt for number");
193
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
194
194
  (0, import_utils.assert)(
195
195
  typeof prompt === "string",
196
196
  "prompt for number must be a string"
@@ -204,7 +204,7 @@ var ScriptPlayer = class {
204
204
  domIncluded: stringTask.domIncluded,
205
205
  screenshotIncluded: stringTask.screenshotIncluded
206
206
  };
207
- (0, import_utils.assert)(prompt, "missing prompt for string");
207
+ (0, import_utils.assert)(prompt, "missing prompt for aiNumber");
208
208
  (0, import_utils.assert)(
209
209
  typeof prompt === "string",
210
210
  "prompt for string must be a string"
@@ -218,13 +218,20 @@ var ScriptPlayer = class {
218
218
  domIncluded: booleanTask.domIncluded,
219
219
  screenshotIncluded: booleanTask.screenshotIncluded
220
220
  };
221
- (0, import_utils.assert)(prompt, "missing prompt for boolean");
221
+ (0, import_utils.assert)(prompt, "missing prompt for aiBoolean");
222
222
  (0, import_utils.assert)(
223
223
  typeof prompt === "string",
224
224
  "prompt for boolean must be a string"
225
225
  );
226
226
  const booleanResult = await agent.aiBoolean(prompt, options);
227
227
  this.setResult(booleanTask.name, booleanResult);
228
+ } else if ("aiAsk" in flowItem) {
229
+ const askTask = flowItem;
230
+ const prompt = askTask.aiAsk;
231
+ (0, import_utils.assert)(prompt, "missing prompt for aiAsk");
232
+ (0, import_utils.assert)(typeof prompt === "string", "prompt for aiAsk must be a string");
233
+ const askResult = await agent.aiAsk(prompt);
234
+ this.setResult(askTask.name, askResult);
228
235
  } else if ("aiLocate" in flowItem) {
229
236
  const locateTask = flowItem;
230
237
  const prompt = locateTask.aiLocate;
@@ -233,7 +240,7 @@ var ScriptPlayer = class {
233
240
  typeof prompt === "string",
234
241
  "prompt for aiLocate must be a string"
235
242
  );
236
- const locateResult = await agent.aiLocate(prompt);
243
+ const locateResult = await agent.aiLocate(prompt, locateTask);
237
244
  this.setResult(locateTask.name, locateResult);
238
245
  } else if ("aiWaitFor" in flowItem) {
239
246
  const waitForTask = flowItem;
@@ -442,10 +449,10 @@ var import_utils12 = require("@midscene/shared/utils");
442
449
  // src/common/tasks.ts
443
450
  var import_core = require("@midscene/core");
444
451
  var import_ai_model2 = require("@midscene/core/ai-model");
445
- var import_utils5 = require("@midscene/core/utils");
452
+ var import_utils7 = require("@midscene/core/utils");
446
453
  var import_constants = require("@midscene/shared/constants");
447
- var import_logger = require("@midscene/shared/logger");
448
- var import_utils6 = require("@midscene/shared/utils");
454
+ var import_logger2 = require("@midscene/shared/logger");
455
+ var import_utils8 = require("@midscene/shared/utils");
449
456
 
450
457
  // src/common/ui-utils.ts
451
458
  function typeStr(task) {
@@ -522,11 +529,11 @@ function paramStr(task) {
522
529
 
523
530
  // src/common/utils.ts
524
531
  var import_ai_model = require("@midscene/core/ai-model");
525
- var import_utils3 = require("@midscene/core/utils");
532
+ var import_utils5 = require("@midscene/core/utils");
526
533
  var import_env = require("@midscene/shared/env");
527
534
  var import_extractor = require("@midscene/shared/extractor");
528
535
  var import_img = require("@midscene/shared/img");
529
- var import_utils4 = require("@midscene/shared/utils");
536
+ var import_utils6 = require("@midscene/shared/utils");
530
537
  var import_dayjs = __toESM(require("dayjs"));
531
538
 
532
539
  // src/web-element.ts
@@ -554,14 +561,189 @@ var WebElementInfo = class {
554
561
  }
555
562
  };
556
563
 
564
+ // src/common/task-cache.ts
565
+ var import_node_assert = __toESM(require("assert"));
566
+ var import_node_fs2 = require("fs");
567
+ var import_node_path2 = require("path");
568
+ var import_common2 = require("@midscene/shared/common");
569
+ var import_logger = require("@midscene/shared/logger");
570
+ var import_utils3 = require("@midscene/shared/utils");
571
+ var import_js_yaml3 = __toESM(require("js-yaml"));
572
+ var import_semver = __toESM(require("semver"));
573
+
574
+ // package.json
575
+ var version = "0.20.0";
576
+
577
+ // src/common/task-cache.ts
578
+ var debug = (0, import_logger.getDebug)("cache");
579
+ var lowestSupportedMidsceneVersion = "0.16.10";
580
+ var cacheFileExt = ".cache.yaml";
581
+ var TaskCache = class {
582
+ // Track matched records
583
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
584
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
585
+ (0, import_node_assert.default)(cacheId, "cacheId is required");
586
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
587
+ this.cacheFilePath = import_utils3.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
588
+ this.isCacheResultUsed = isCacheResultUsed;
589
+ let cacheContent;
590
+ if (this.cacheFilePath) {
591
+ cacheContent = this.loadCacheFromFile();
592
+ }
593
+ if (!cacheContent) {
594
+ cacheContent = {
595
+ midsceneVersion: version,
596
+ cacheId: this.cacheId,
597
+ caches: []
598
+ };
599
+ }
600
+ this.cache = cacheContent;
601
+ this.cacheOriginalLength = this.cache.caches.length;
602
+ }
603
+ matchCache(prompt, type) {
604
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
605
+ const item = this.cache.caches[i];
606
+ const key = `${type}:${prompt}:${i}`;
607
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
608
+ this.matchedCacheIndices.add(key);
609
+ debug(
610
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
611
+ type,
612
+ prompt,
613
+ i
614
+ );
615
+ return {
616
+ cacheContent: item,
617
+ updateFn: (cb) => {
618
+ debug(
619
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
620
+ type,
621
+ prompt,
622
+ i
623
+ );
624
+ cb(item);
625
+ debug(
626
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
627
+ type,
628
+ prompt,
629
+ i
630
+ );
631
+ this.flushCacheToFile();
632
+ }
633
+ };
634
+ }
635
+ }
636
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
637
+ return void 0;
638
+ }
639
+ matchPlanCache(prompt) {
640
+ return this.matchCache(prompt, "plan");
641
+ }
642
+ matchLocateCache(prompt) {
643
+ return this.matchCache(prompt, "locate");
644
+ }
645
+ appendCache(cache) {
646
+ debug("will append cache", cache);
647
+ this.cache.caches.push(cache);
648
+ this.flushCacheToFile();
649
+ }
650
+ loadCacheFromFile() {
651
+ const cacheFile = this.cacheFilePath;
652
+ (0, import_node_assert.default)(cacheFile, "cache file path is required");
653
+ if (!(0, import_node_fs2.existsSync)(cacheFile)) {
654
+ debug("no cache file found, path: %s", cacheFile);
655
+ return void 0;
656
+ }
657
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
658
+ if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
659
+ console.warn(
660
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
661
+ );
662
+ return void 0;
663
+ }
664
+ try {
665
+ const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
666
+ const jsonData = import_js_yaml3.default.load(data);
667
+ if (!version) {
668
+ debug("no midscene version info, will not read cache from file");
669
+ return void 0;
670
+ }
671
+ if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
672
+ console.warn(
673
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
674
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
675
+ cache file: ${cacheFile}`
676
+ );
677
+ return void 0;
678
+ }
679
+ debug(
680
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
681
+ cacheFile,
682
+ jsonData.midsceneVersion,
683
+ jsonData.caches.length
684
+ );
685
+ jsonData.midsceneVersion = version;
686
+ return jsonData;
687
+ } catch (err) {
688
+ debug(
689
+ "cache file exists but load failed, path: %s, error: %s",
690
+ cacheFile,
691
+ err
692
+ );
693
+ return void 0;
694
+ }
695
+ }
696
+ flushCacheToFile() {
697
+ if (!version) {
698
+ debug("no midscene version info, will not write cache to file");
699
+ return;
700
+ }
701
+ if (!this.cacheFilePath) {
702
+ debug("no cache file path, will not write cache to file");
703
+ return;
704
+ }
705
+ try {
706
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
707
+ if (!(0, import_node_fs2.existsSync)(dir)) {
708
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
709
+ debug("created cache directory: %s", dir);
710
+ }
711
+ const yamlData = import_js_yaml3.default.dump(this.cache);
712
+ (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
713
+ debug("cache flushed to file: %s", this.cacheFilePath);
714
+ } catch (err) {
715
+ debug(
716
+ "write cache to file failed, path: %s, error: %s",
717
+ this.cacheFilePath,
718
+ err
719
+ );
720
+ }
721
+ }
722
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
723
+ if (cachedRecord) {
724
+ if (newRecord.type === "plan") {
725
+ cachedRecord.updateFn((cache) => {
726
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
727
+ });
728
+ } else {
729
+ cachedRecord.updateFn((cache) => {
730
+ cache.xpaths = newRecord.xpaths;
731
+ });
732
+ }
733
+ } else {
734
+ this.appendCache(newRecord);
735
+ }
736
+ }
737
+ };
738
+
557
739
  // src/common/utils.ts
558
740
  async function parseContextFromWebPage(page, _opt) {
559
- (0, import_utils4.assert)(page, "page is required");
741
+ (0, import_utils6.assert)(page, "page is required");
560
742
  if (page._forceUsePageContext) {
561
743
  return await page._forceUsePageContext();
562
744
  }
563
745
  const url = await page.url();
564
- (0, import_utils3.uploadTestInfoToServer)({ testUrl: url });
746
+ (0, import_utils5.uploadTestInfoToServer)({ testUrl: url });
565
747
  let screenshotBase64;
566
748
  let tree;
567
749
  await Promise.all([
@@ -583,7 +765,7 @@ async function parseContextFromWebPage(page, _opt) {
583
765
  isVisible
584
766
  });
585
767
  });
586
- (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
768
+ (0, import_utils6.assert)(screenshotBase64, "screenshotBase64 is required");
587
769
  const size = await page.size();
588
770
  if (size.dpr && size.dpr > 1) {
589
771
  screenshotBase64 = await (0, import_img.resizeImgBase64)(screenshotBase64, {
@@ -601,11 +783,11 @@ async function parseContextFromWebPage(page, _opt) {
601
783
  function reportFileName(tag = "web") {
602
784
  const reportTagName = (0, import_env.getAIConfig)(import_env.MIDSCENE_REPORT_TAG_NAME);
603
785
  const dateTimeInFileName = (0, import_dayjs.default)().format("YYYY-MM-DD_HH-mm-ss");
604
- const uniqueId = (0, import_utils4.uuid)().substring(0, 8);
786
+ const uniqueId = (0, import_utils6.uuid)().substring(0, 8);
605
787
  return `${reportTagName || tag}-${dateTimeInFileName}-${uniqueId}`;
606
788
  }
607
789
  function printReportMsg(filepath) {
608
- (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
790
+ (0, import_utils6.logMsg)(`Midscene - report file updated: ${filepath}`);
609
791
  }
610
792
  function replaceIllegalPathCharsAndSpace(str) {
611
793
  return str.replace(/[:*?"<>| ]/g, "-");
@@ -658,6 +840,28 @@ function matchElementFromPlan(planLocateParam, tree) {
658
840
  }
659
841
  return void 0;
660
842
  }
843
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
844
+ try {
845
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
846
+ for (let i = 0; i < xpaths.length; i++) {
847
+ const element = await taskExecutor.page.getElementInfoByXpath(
848
+ xpaths[i]
849
+ );
850
+ if (element?.id) {
851
+ debug("cache hit, prompt: %s", cachePrompt);
852
+ debug(
853
+ "found a new new element with same xpath, xpath: %s, id: %s",
854
+ xpaths[i],
855
+ element?.id
856
+ );
857
+ return element;
858
+ }
859
+ }
860
+ }
861
+ } catch (error) {
862
+ debug("get element info by xpath error: ", error);
863
+ }
864
+ }
661
865
  function trimContextByViewport(execution) {
662
866
  function filterVisibleTree(node) {
663
867
  if (!node)
@@ -696,7 +900,7 @@ function trimContextByViewport(execution) {
696
900
  }
697
901
 
698
902
  // src/common/tasks.ts
699
- var debug = (0, import_logger.getDebug)("page-task-executor");
903
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
700
904
  var replanningCountLimit = 10;
701
905
  var isAndroidPage = (page) => {
702
906
  return page.pageType === "android";
@@ -737,7 +941,7 @@ var PageTaskExecutor = class {
737
941
  if (info?.id) {
738
942
  elementId = info.id;
739
943
  } else {
740
- debug(
944
+ debug2(
741
945
  "no element id found for position node, will not update cache",
742
946
  element
743
947
  );
@@ -750,7 +954,7 @@ var PageTaskExecutor = class {
750
954
  const result = await this.page.getXpathsById(elementId);
751
955
  return result;
752
956
  } catch (error) {
753
- debug("getXpathsById error: ", error);
957
+ debug2("getXpathsById error: ", error);
754
958
  }
755
959
  }
756
960
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -766,7 +970,7 @@ var PageTaskExecutor = class {
766
970
  if (taskApply.type === "Action") {
767
971
  await Promise.all([
768
972
  (async () => {
769
- await (0, import_utils5.sleep)(100);
973
+ await (0, import_utils7.sleep)(100);
770
974
  if (this.page.waitUntilNetworkIdle) {
771
975
  try {
772
976
  await this.page.waitUntilNetworkIdle();
@@ -774,7 +978,7 @@ var PageTaskExecutor = class {
774
978
  }
775
979
  }
776
980
  })(),
777
- (0, import_utils5.sleep)(200)
981
+ (0, import_utils7.sleep)(200)
778
982
  ]);
779
983
  }
780
984
  if (appendAfterExecution) {
@@ -804,7 +1008,7 @@ var PageTaskExecutor = class {
804
1008
  locate: plan2.locate,
805
1009
  executor: async (param, taskContext) => {
806
1010
  const { task } = taskContext;
807
- (0, import_utils6.assert)(
1011
+ (0, import_utils8.assert)(
808
1012
  param?.prompt || param?.id || param?.bbox,
809
1013
  "No prompt or id or position or bbox to locate"
810
1014
  );
@@ -829,39 +1033,29 @@ var PageTaskExecutor = class {
829
1033
  timing: "before Insight"
830
1034
  };
831
1035
  task.recorder = [recordItem];
832
- let cacheHitFlag = false;
1036
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1037
+ const userExpectedPathHitFlag = !!elementFromXpath;
833
1038
  const cachePrompt = param.prompt;
834
1039
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
835
1040
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
836
- let elementFromCache = null;
837
- try {
838
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
839
- for (let i = 0; i < xpaths.length; i++) {
840
- const element2 = await this.page.getElementInfoByXpath(
841
- xpaths[i]
842
- );
843
- if (element2?.id) {
844
- elementFromCache = element2;
845
- debug("cache hit, prompt: %s", cachePrompt);
846
- cacheHitFlag = true;
847
- debug(
848
- "found a new new element with same xpath, xpath: %s, id: %s",
849
- xpaths[i],
850
- element2?.id
851
- );
852
- break;
853
- }
854
- }
855
- }
856
- } catch (error) {
857
- debug("get element info by xpath error: ", error);
858
- }
859
- const startTime = Date.now();
860
- const element = elementFromCache || // try to match element from cache
861
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
862
- (await this.insight.locate(param, {
1041
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1042
+ this,
1043
+ xpaths,
1044
+ cachePrompt,
1045
+ param.cacheable
1046
+ );
1047
+ const cacheHitFlag = !!elementFromCache;
1048
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1049
+ const planHitFlag = !!elementFromPlan;
1050
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1051
+ // fallback to ai locate
863
1052
  context: pageContext
864
- })).element;
1053
+ })).element : void 0;
1054
+ const aiLocateHitFlag = !!elementFromAiLocate;
1055
+ const element = elementFromXpath || // highest priority
1056
+ elementFromCache || // second priority
1057
+ elementFromPlan || // third priority
1058
+ elementFromAiLocate;
865
1059
  let currentXpaths;
866
1060
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
867
1061
  const elementXpaths = await this.getElementXpath(
@@ -879,7 +1073,7 @@ var PageTaskExecutor = class {
879
1073
  locateCacheRecord
880
1074
  );
881
1075
  } else {
882
- debug(
1076
+ debug2(
883
1077
  "no xpaths found, will not update cache",
884
1078
  cachePrompt,
885
1079
  elementXpaths
@@ -889,16 +1083,44 @@ var PageTaskExecutor = class {
889
1083
  if (!element) {
890
1084
  throw new Error(`Element not found: ${param.prompt}`);
891
1085
  }
1086
+ let hitBy;
1087
+ if (userExpectedPathHitFlag) {
1088
+ hitBy = {
1089
+ from: "User expected path",
1090
+ context: {
1091
+ xpath: param.xpath
1092
+ }
1093
+ };
1094
+ } else if (cacheHitFlag) {
1095
+ hitBy = {
1096
+ from: "Cache",
1097
+ context: {
1098
+ xpathsFromCache: xpaths,
1099
+ xpathsToSave: currentXpaths
1100
+ }
1101
+ };
1102
+ } else if (planHitFlag) {
1103
+ hitBy = {
1104
+ from: "Planning",
1105
+ context: {
1106
+ id: elementFromPlan?.id,
1107
+ bbox: elementFromPlan?.bbox
1108
+ }
1109
+ };
1110
+ } else if (aiLocateHitFlag) {
1111
+ hitBy = {
1112
+ from: "AI model",
1113
+ context: {
1114
+ prompt: param.prompt
1115
+ }
1116
+ };
1117
+ }
892
1118
  return {
893
1119
  output: {
894
1120
  element
895
1121
  },
896
1122
  pageContext,
897
- cache: {
898
- hit: cacheHitFlag,
899
- originalXpaths: xpaths,
900
- currentXpaths
901
- }
1123
+ hitBy
902
1124
  };
903
1125
  }
904
1126
  };
@@ -994,7 +1216,7 @@ var PageTaskExecutor = class {
994
1216
  thought: plan2.thought,
995
1217
  locate: plan2.locate,
996
1218
  executor: async (param, { element }) => {
997
- (0, import_utils6.assert)(element, "Element not found, cannot tap");
1219
+ (0, import_utils8.assert)(element, "Element not found, cannot tap");
998
1220
  await this.page.mouse.click(element.center[0], element.center[1]);
999
1221
  }
1000
1222
  };
@@ -1006,7 +1228,7 @@ var PageTaskExecutor = class {
1006
1228
  thought: plan2.thought,
1007
1229
  locate: plan2.locate,
1008
1230
  executor: async (param, { element }) => {
1009
- (0, import_utils6.assert)(element, "Element not found, cannot right click");
1231
+ (0, import_utils8.assert)(element, "Element not found, cannot right click");
1010
1232
  await this.page.mouse.click(
1011
1233
  element.center[0],
1012
1234
  element.center[1],
@@ -1023,7 +1245,7 @@ var PageTaskExecutor = class {
1023
1245
  thought: plan2.thought,
1024
1246
  locate: plan2.locate,
1025
1247
  executor: async (taskParam) => {
1026
- (0, import_utils6.assert)(
1248
+ (0, import_utils8.assert)(
1027
1249
  taskParam?.start_box && taskParam?.end_box,
1028
1250
  "No start_box or end_box to drag"
1029
1251
  );
@@ -1038,7 +1260,7 @@ var PageTaskExecutor = class {
1038
1260
  thought: plan2.thought,
1039
1261
  locate: plan2.locate,
1040
1262
  executor: async (param, { element }) => {
1041
- (0, import_utils6.assert)(element, "Element not found, cannot hover");
1263
+ (0, import_utils8.assert)(element, "Element not found, cannot hover");
1042
1264
  await this.page.mouse.move(element.center[0], element.center[1]);
1043
1265
  }
1044
1266
  };
@@ -1090,7 +1312,7 @@ var PageTaskExecutor = class {
1090
1312
  `Unknown scroll direction: ${taskParam.direction}`
1091
1313
  );
1092
1314
  }
1093
- await (0, import_utils5.sleep)(500);
1315
+ await (0, import_utils7.sleep)(500);
1094
1316
  } else {
1095
1317
  throw new Error(
1096
1318
  `Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
@@ -1109,7 +1331,7 @@ var PageTaskExecutor = class {
1109
1331
  thought: plan2.thought,
1110
1332
  locate: plan2.locate,
1111
1333
  executor: async (taskParam) => {
1112
- await (0, import_utils5.sleep)(taskParam?.timeMs || 3e3);
1334
+ await (0, import_utils7.sleep)(taskParam?.timeMs || 3e3);
1113
1335
  }
1114
1336
  };
1115
1337
  tasks.push(taskActionSleep);
@@ -1157,7 +1379,7 @@ var PageTaskExecutor = class {
1157
1379
  thought: plan2.thought,
1158
1380
  locate: plan2.locate,
1159
1381
  executor: async (param) => {
1160
- (0, import_utils6.assert)(
1382
+ (0, import_utils8.assert)(
1161
1383
  isAndroidPage(this.page),
1162
1384
  "Cannot use home button on non-Android devices"
1163
1385
  );
@@ -1173,7 +1395,7 @@ var PageTaskExecutor = class {
1173
1395
  thought: plan2.thought,
1174
1396
  locate: plan2.locate,
1175
1397
  executor: async (param) => {
1176
- (0, import_utils6.assert)(
1398
+ (0, import_utils8.assert)(
1177
1399
  isAndroidPage(this.page),
1178
1400
  "Cannot use back button on non-Android devices"
1179
1401
  );
@@ -1189,7 +1411,7 @@ var PageTaskExecutor = class {
1189
1411
  thought: plan2.thought,
1190
1412
  locate: plan2.locate,
1191
1413
  executor: async (param) => {
1192
- (0, import_utils6.assert)(
1414
+ (0, import_utils8.assert)(
1193
1415
  isAndroidPage(this.page),
1194
1416
  "Cannot use recent apps button on non-Android devices"
1195
1417
  );
@@ -1340,7 +1562,7 @@ var PageTaskExecutor = class {
1340
1562
  }
1341
1563
  }
1342
1564
  if (finalActions.length === 0) {
1343
- (0, import_utils6.assert)(
1565
+ (0, import_utils8.assert)(
1344
1566
  !more_actions_needed_by_instruction || sleep3,
1345
1567
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1346
1568
  );
@@ -1578,7 +1800,7 @@ var PageTaskExecutor = class {
1578
1800
  );
1579
1801
  let outputResult = data;
1580
1802
  if (ifTypeRestricted) {
1581
- (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
1803
+ (0, import_utils8.assert)(data?.result !== void 0, "No result in query data");
1582
1804
  outputResult = data.result;
1583
1805
  }
1584
1806
  return {
@@ -1674,9 +1896,9 @@ var PageTaskExecutor = class {
1674
1896
  onTaskStart: this.onTaskStartCallback
1675
1897
  });
1676
1898
  const { timeoutMs, checkIntervalMs } = opt;
1677
- (0, import_utils6.assert)(assertion, "No assertion for waitFor");
1678
- (0, import_utils6.assert)(timeoutMs, "No timeoutMs for waitFor");
1679
- (0, import_utils6.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1899
+ (0, import_utils8.assert)(assertion, "No assertion for waitFor");
1900
+ (0, import_utils8.assert)(timeoutMs, "No timeoutMs for waitFor");
1901
+ (0, import_utils8.assert)(checkIntervalMs, "No checkIntervalMs for waitFor");
1680
1902
  const overallStartTime = Date.now();
1681
1903
  let startTime = Date.now();
1682
1904
  let errorThought = "";
@@ -1730,9 +1952,9 @@ var PageTaskExecutor = class {
1730
1952
  };
1731
1953
 
1732
1954
  // src/common/plan-builder.ts
1733
- var import_logger2 = require("@midscene/shared/logger");
1734
- var import_utils8 = require("@midscene/shared/utils");
1735
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1955
+ var import_logger3 = require("@midscene/shared/logger");
1956
+ var import_utils10 = require("@midscene/shared/utils");
1957
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1736
1958
  function buildPlans(type, locateParam, param) {
1737
1959
  let returnPlans = [];
1738
1960
  const locatePlan = locateParam ? {
@@ -1742,8 +1964,8 @@ function buildPlans(type, locateParam, param) {
1742
1964
  thought: ""
1743
1965
  } : null;
1744
1966
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1745
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1746
- (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1967
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1968
+ (0, import_utils10.assert)(locatePlan, `missing locate info for action "${type}"`);
1747
1969
  const tapPlan = {
1748
1970
  type,
1749
1971
  param: null,
@@ -1754,9 +1976,9 @@ function buildPlans(type, locateParam, param) {
1754
1976
  }
1755
1977
  if (type === "Input" || type === "KeyboardPress") {
1756
1978
  if (type === "Input") {
1757
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1979
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1758
1980
  }
1759
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1981
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1760
1982
  const inputPlan = {
1761
1983
  type,
1762
1984
  param,
@@ -1770,7 +1992,7 @@ function buildPlans(type, locateParam, param) {
1770
1992
  }
1771
1993
  }
1772
1994
  if (type === "Scroll") {
1773
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
1995
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1774
1996
  const scrollPlan = {
1775
1997
  type,
1776
1998
  param,
@@ -1784,7 +2006,7 @@ function buildPlans(type, locateParam, param) {
1784
2006
  }
1785
2007
  }
1786
2008
  if (type === "Sleep") {
1787
- (0, import_utils8.assert)(param, `missing param for action "${type}"`);
2009
+ (0, import_utils10.assert)(param, `missing param for action "${type}"`);
1788
2010
  const sleepPlan = {
1789
2011
  type,
1790
2012
  param,
@@ -1794,7 +2016,7 @@ function buildPlans(type, locateParam, param) {
1794
2016
  returnPlans = [sleepPlan];
1795
2017
  }
1796
2018
  if (type === "Locate") {
1797
- (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
2019
+ (0, import_utils10.assert)(locateParam, `missing locate info for action "${type}"`);
1798
2020
  const locatePlan2 = {
1799
2021
  type,
1800
2022
  param: locateParam,
@@ -1804,187 +2026,12 @@ function buildPlans(type, locateParam, param) {
1804
2026
  returnPlans = [locatePlan2];
1805
2027
  }
1806
2028
  if (returnPlans) {
1807
- debug2("buildPlans", returnPlans);
2029
+ debug3("buildPlans", returnPlans);
1808
2030
  return returnPlans;
1809
2031
  }
1810
2032
  throw new Error(`Not supported type: ${type}`);
1811
2033
  }
1812
2034
 
1813
- // src/common/task-cache.ts
1814
- var import_node_assert = __toESM(require("assert"));
1815
- var import_node_fs2 = require("fs");
1816
- var import_node_path2 = require("path");
1817
- var import_common2 = require("@midscene/shared/common");
1818
- var import_logger3 = require("@midscene/shared/logger");
1819
- var import_utils9 = require("@midscene/shared/utils");
1820
- var import_js_yaml3 = __toESM(require("js-yaml"));
1821
- var import_semver = __toESM(require("semver"));
1822
-
1823
- // package.json
1824
- var version = "0.19.1";
1825
-
1826
- // src/common/task-cache.ts
1827
- var debug3 = (0, import_logger3.getDebug)("cache");
1828
- var lowestSupportedMidsceneVersion = "0.16.10";
1829
- var cacheFileExt = ".cache.yaml";
1830
- var TaskCache = class {
1831
- // Track matched records
1832
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1833
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1834
- (0, import_node_assert.default)(cacheId, "cacheId is required");
1835
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1836
- this.cacheFilePath = import_utils9.ifInBrowser ? void 0 : cacheFilePath || (0, import_node_path2.join)((0, import_common2.getMidsceneRunSubDir)("cache"), `${this.cacheId}${cacheFileExt}`);
1837
- this.isCacheResultUsed = isCacheResultUsed;
1838
- let cacheContent;
1839
- if (this.cacheFilePath) {
1840
- cacheContent = this.loadCacheFromFile();
1841
- }
1842
- if (!cacheContent) {
1843
- cacheContent = {
1844
- midsceneVersion: version,
1845
- cacheId: this.cacheId,
1846
- caches: []
1847
- };
1848
- }
1849
- this.cache = cacheContent;
1850
- this.cacheOriginalLength = this.cache.caches.length;
1851
- }
1852
- matchCache(prompt, type) {
1853
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1854
- const item = this.cache.caches[i];
1855
- const key = `${type}:${prompt}:${i}`;
1856
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1857
- this.matchedCacheIndices.add(key);
1858
- debug3(
1859
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1860
- type,
1861
- prompt,
1862
- i
1863
- );
1864
- return {
1865
- cacheContent: item,
1866
- updateFn: (cb) => {
1867
- debug3(
1868
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1869
- type,
1870
- prompt,
1871
- i
1872
- );
1873
- cb(item);
1874
- debug3(
1875
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1876
- type,
1877
- prompt,
1878
- i
1879
- );
1880
- this.flushCacheToFile();
1881
- }
1882
- };
1883
- }
1884
- }
1885
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1886
- return void 0;
1887
- }
1888
- matchPlanCache(prompt) {
1889
- return this.matchCache(prompt, "plan");
1890
- }
1891
- matchLocateCache(prompt) {
1892
- return this.matchCache(prompt, "locate");
1893
- }
1894
- appendCache(cache) {
1895
- debug3("will append cache", cache);
1896
- this.cache.caches.push(cache);
1897
- this.flushCacheToFile();
1898
- }
1899
- loadCacheFromFile() {
1900
- const cacheFile = this.cacheFilePath;
1901
- (0, import_node_assert.default)(cacheFile, "cache file path is required");
1902
- if (!(0, import_node_fs2.existsSync)(cacheFile)) {
1903
- debug3("no cache file found, path: %s", cacheFile);
1904
- return void 0;
1905
- }
1906
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1907
- if ((0, import_node_fs2.existsSync)(jsonTypeCacheFile) && this.isCacheResultUsed) {
1908
- console.warn(
1909
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1910
- );
1911
- return void 0;
1912
- }
1913
- try {
1914
- const data = (0, import_node_fs2.readFileSync)(cacheFile, "utf8");
1915
- const jsonData = import_js_yaml3.default.load(data);
1916
- if (!version) {
1917
- debug3("no midscene version info, will not read cache from file");
1918
- return void 0;
1919
- }
1920
- if (import_semver.default.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1921
- console.warn(
1922
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1923
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1924
- cache file: ${cacheFile}`
1925
- );
1926
- return void 0;
1927
- }
1928
- debug3(
1929
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1930
- cacheFile,
1931
- jsonData.midsceneVersion,
1932
- jsonData.caches.length
1933
- );
1934
- jsonData.midsceneVersion = version;
1935
- return jsonData;
1936
- } catch (err) {
1937
- debug3(
1938
- "cache file exists but load failed, path: %s, error: %s",
1939
- cacheFile,
1940
- err
1941
- );
1942
- return void 0;
1943
- }
1944
- }
1945
- flushCacheToFile() {
1946
- if (!version) {
1947
- debug3("no midscene version info, will not write cache to file");
1948
- return;
1949
- }
1950
- if (!this.cacheFilePath) {
1951
- debug3("no cache file path, will not write cache to file");
1952
- return;
1953
- }
1954
- try {
1955
- const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1956
- if (!(0, import_node_fs2.existsSync)(dir)) {
1957
- (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1958
- debug3("created cache directory: %s", dir);
1959
- }
1960
- const yamlData = import_js_yaml3.default.dump(this.cache);
1961
- (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1962
- debug3("cache flushed to file: %s", this.cacheFilePath);
1963
- } catch (err) {
1964
- debug3(
1965
- "write cache to file failed, path: %s, error: %s",
1966
- this.cacheFilePath,
1967
- err
1968
- );
1969
- }
1970
- }
1971
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1972
- if (cachedRecord) {
1973
- if (newRecord.type === "plan") {
1974
- cachedRecord.updateFn((cache) => {
1975
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1976
- });
1977
- } else {
1978
- cachedRecord.updateFn((cache) => {
1979
- cache.xpaths = newRecord.xpaths;
1980
- });
1981
- }
1982
- } else {
1983
- this.appendCache(newRecord);
1984
- }
1985
- }
1986
- };
1987
-
1988
2035
  // src/common/agent.ts
1989
2036
  var debug4 = (0, import_logger4.getDebug)("web-integration");
1990
2037
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2113,10 +2160,12 @@ ${errorTask?.errorStack}`);
2113
2160
  const prompt = opt.prompt ?? locatePrompt;
2114
2161
  const deepThink = opt.deepThink ?? false;
2115
2162
  const cacheable = opt.cacheable ?? true;
2163
+ const xpath = opt.xpath;
2116
2164
  return {
2117
2165
  prompt,
2118
2166
  deepThink,
2119
- cacheable
2167
+ cacheable,
2168
+ xpath
2120
2169
  };
2121
2170
  }
2122
2171
  return {
@@ -2274,6 +2323,9 @@ ${errorTask?.errorStack}`);
2274
2323
  this.afterTaskRunning(executor);
2275
2324
  return output;
2276
2325
  }
2326
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2327
+ return this.aiString(prompt, opt);
2328
+ }
2277
2329
  async describeElementAtPoint(center, opt) {
2278
2330
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2279
2331
  let success = false;
@@ -2996,6 +3048,14 @@ var PlaywrightAiFixture = (options) => {
2996
3048
  use,
2997
3049
  aiActionType: "aiBoolean"
2998
3050
  });
3051
+ },
3052
+ aiAsk: async ({ page }, use, testInfo) => {
3053
+ await generateAiFunction({
3054
+ page,
3055
+ testInfo,
3056
+ use,
3057
+ aiActionType: "aiAsk"
3058
+ });
2999
3059
  }
3000
3060
  };
3001
3061
  };