@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
@@ -154,7 +154,7 @@ var ScriptPlayer = class {
154
154
  domIncluded: numberTask.domIncluded,
155
155
  screenshotIncluded: numberTask.screenshotIncluded
156
156
  };
157
- assert(prompt, "missing prompt for number");
157
+ assert(prompt, "missing prompt for aiNumber");
158
158
  assert(
159
159
  typeof prompt === "string",
160
160
  "prompt for number must be a string"
@@ -168,7 +168,7 @@ var ScriptPlayer = class {
168
168
  domIncluded: stringTask.domIncluded,
169
169
  screenshotIncluded: stringTask.screenshotIncluded
170
170
  };
171
- assert(prompt, "missing prompt for string");
171
+ assert(prompt, "missing prompt for aiNumber");
172
172
  assert(
173
173
  typeof prompt === "string",
174
174
  "prompt for string must be a string"
@@ -182,13 +182,20 @@ var ScriptPlayer = class {
182
182
  domIncluded: booleanTask.domIncluded,
183
183
  screenshotIncluded: booleanTask.screenshotIncluded
184
184
  };
185
- assert(prompt, "missing prompt for boolean");
185
+ assert(prompt, "missing prompt for aiBoolean");
186
186
  assert(
187
187
  typeof prompt === "string",
188
188
  "prompt for boolean must be a string"
189
189
  );
190
190
  const booleanResult = await agent.aiBoolean(prompt, options);
191
191
  this.setResult(booleanTask.name, booleanResult);
192
+ } else if ("aiAsk" in flowItem) {
193
+ const askTask = flowItem;
194
+ const prompt = askTask.aiAsk;
195
+ assert(prompt, "missing prompt for aiAsk");
196
+ assert(typeof prompt === "string", "prompt for aiAsk must be a string");
197
+ const askResult = await agent.aiAsk(prompt);
198
+ this.setResult(askTask.name, askResult);
192
199
  } else if ("aiLocate" in flowItem) {
193
200
  const locateTask = flowItem;
194
201
  const prompt = locateTask.aiLocate;
@@ -197,7 +204,7 @@ var ScriptPlayer = class {
197
204
  typeof prompt === "string",
198
205
  "prompt for aiLocate must be a string"
199
206
  );
200
- const locateResult = await agent.aiLocate(prompt);
207
+ const locateResult = await agent.aiLocate(prompt, locateTask);
201
208
  this.setResult(locateTask.name, locateResult);
202
209
  } else if ("aiWaitFor" in flowItem) {
203
210
  const waitForTask = flowItem;
@@ -423,8 +430,8 @@ import {
423
430
  } from "@midscene/core/ai-model";
424
431
  import { sleep } from "@midscene/core/utils";
425
432
  import { NodeType } from "@midscene/shared/constants";
426
- import { getDebug } from "@midscene/shared/logger";
427
- import { assert as assert4 } from "@midscene/shared/utils";
433
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
434
+ import { assert as assert5 } from "@midscene/shared/utils";
428
435
 
429
436
  // src/common/ui-utils.ts
430
437
  function typeStr(task) {
@@ -509,7 +516,7 @@ import {
509
516
  traverseTree
510
517
  } from "@midscene/shared/extractor";
511
518
  import { resizeImgBase64 } from "@midscene/shared/img";
512
- import { assert as assert3, logMsg, uuid } from "@midscene/shared/utils";
519
+ import { assert as assert4, logMsg, uuid } from "@midscene/shared/utils";
513
520
  import dayjs from "dayjs";
514
521
 
515
522
  // src/web-element.ts
@@ -537,9 +544,184 @@ var WebElementInfo = class {
537
544
  }
538
545
  };
539
546
 
547
+ // src/common/task-cache.ts
548
+ import assert3 from "assert";
549
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
550
+ import { dirname as dirname2, join as join2 } from "path";
551
+ import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
552
+ import { getDebug } from "@midscene/shared/logger";
553
+ import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
554
+ import yaml3 from "js-yaml";
555
+ import semver from "semver";
556
+
557
+ // package.json
558
+ var version = "0.20.0";
559
+
560
+ // src/common/task-cache.ts
561
+ var debug = getDebug("cache");
562
+ var lowestSupportedMidsceneVersion = "0.16.10";
563
+ var cacheFileExt = ".cache.yaml";
564
+ var TaskCache = class {
565
+ // Track matched records
566
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
567
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
568
+ assert3(cacheId, "cacheId is required");
569
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
570
+ this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
571
+ this.isCacheResultUsed = isCacheResultUsed;
572
+ let cacheContent;
573
+ if (this.cacheFilePath) {
574
+ cacheContent = this.loadCacheFromFile();
575
+ }
576
+ if (!cacheContent) {
577
+ cacheContent = {
578
+ midsceneVersion: version,
579
+ cacheId: this.cacheId,
580
+ caches: []
581
+ };
582
+ }
583
+ this.cache = cacheContent;
584
+ this.cacheOriginalLength = this.cache.caches.length;
585
+ }
586
+ matchCache(prompt, type) {
587
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
588
+ const item = this.cache.caches[i];
589
+ const key = `${type}:${prompt}:${i}`;
590
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
591
+ this.matchedCacheIndices.add(key);
592
+ debug(
593
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
594
+ type,
595
+ prompt,
596
+ i
597
+ );
598
+ return {
599
+ cacheContent: item,
600
+ updateFn: (cb) => {
601
+ debug(
602
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
603
+ type,
604
+ prompt,
605
+ i
606
+ );
607
+ cb(item);
608
+ debug(
609
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
610
+ type,
611
+ prompt,
612
+ i
613
+ );
614
+ this.flushCacheToFile();
615
+ }
616
+ };
617
+ }
618
+ }
619
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
620
+ return void 0;
621
+ }
622
+ matchPlanCache(prompt) {
623
+ return this.matchCache(prompt, "plan");
624
+ }
625
+ matchLocateCache(prompt) {
626
+ return this.matchCache(prompt, "locate");
627
+ }
628
+ appendCache(cache) {
629
+ debug("will append cache", cache);
630
+ this.cache.caches.push(cache);
631
+ this.flushCacheToFile();
632
+ }
633
+ loadCacheFromFile() {
634
+ const cacheFile = this.cacheFilePath;
635
+ assert3(cacheFile, "cache file path is required");
636
+ if (!existsSync2(cacheFile)) {
637
+ debug("no cache file found, path: %s", cacheFile);
638
+ return void 0;
639
+ }
640
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
641
+ if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
642
+ console.warn(
643
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
644
+ );
645
+ return void 0;
646
+ }
647
+ try {
648
+ const data = readFileSync(cacheFile, "utf8");
649
+ const jsonData = yaml3.load(data);
650
+ if (!version) {
651
+ debug("no midscene version info, will not read cache from file");
652
+ return void 0;
653
+ }
654
+ if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
655
+ console.warn(
656
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
657
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
658
+ cache file: ${cacheFile}`
659
+ );
660
+ return void 0;
661
+ }
662
+ debug(
663
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
664
+ cacheFile,
665
+ jsonData.midsceneVersion,
666
+ jsonData.caches.length
667
+ );
668
+ jsonData.midsceneVersion = version;
669
+ return jsonData;
670
+ } catch (err) {
671
+ debug(
672
+ "cache file exists but load failed, path: %s, error: %s",
673
+ cacheFile,
674
+ err
675
+ );
676
+ return void 0;
677
+ }
678
+ }
679
+ flushCacheToFile() {
680
+ if (!version) {
681
+ debug("no midscene version info, will not write cache to file");
682
+ return;
683
+ }
684
+ if (!this.cacheFilePath) {
685
+ debug("no cache file path, will not write cache to file");
686
+ return;
687
+ }
688
+ try {
689
+ const dir = dirname2(this.cacheFilePath);
690
+ if (!existsSync2(dir)) {
691
+ mkdirSync2(dir, { recursive: true });
692
+ debug("created cache directory: %s", dir);
693
+ }
694
+ const yamlData = yaml3.dump(this.cache);
695
+ writeFileSync2(this.cacheFilePath, yamlData);
696
+ debug("cache flushed to file: %s", this.cacheFilePath);
697
+ } catch (err) {
698
+ debug(
699
+ "write cache to file failed, path: %s, error: %s",
700
+ this.cacheFilePath,
701
+ err
702
+ );
703
+ }
704
+ }
705
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
706
+ if (cachedRecord) {
707
+ if (newRecord.type === "plan") {
708
+ cachedRecord.updateFn((cache) => {
709
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
710
+ });
711
+ } else {
712
+ cachedRecord.updateFn((cache) => {
713
+ cache.xpaths = newRecord.xpaths;
714
+ });
715
+ }
716
+ } else {
717
+ this.appendCache(newRecord);
718
+ }
719
+ }
720
+ };
721
+
540
722
  // src/common/utils.ts
541
723
  async function parseContextFromWebPage(page, _opt) {
542
- assert3(page, "page is required");
724
+ assert4(page, "page is required");
543
725
  if (page._forceUsePageContext) {
544
726
  return await page._forceUsePageContext();
545
727
  }
@@ -566,7 +748,7 @@ async function parseContextFromWebPage(page, _opt) {
566
748
  isVisible
567
749
  });
568
750
  });
569
- assert3(screenshotBase64, "screenshotBase64 is required");
751
+ assert4(screenshotBase64, "screenshotBase64 is required");
570
752
  const size = await page.size();
571
753
  if (size.dpr && size.dpr > 1) {
572
754
  screenshotBase64 = await resizeImgBase64(screenshotBase64, {
@@ -641,6 +823,28 @@ function matchElementFromPlan(planLocateParam, tree) {
641
823
  }
642
824
  return void 0;
643
825
  }
826
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
827
+ try {
828
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
829
+ for (let i = 0; i < xpaths.length; i++) {
830
+ const element = await taskExecutor.page.getElementInfoByXpath(
831
+ xpaths[i]
832
+ );
833
+ if (element?.id) {
834
+ debug("cache hit, prompt: %s", cachePrompt);
835
+ debug(
836
+ "found a new new element with same xpath, xpath: %s, id: %s",
837
+ xpaths[i],
838
+ element?.id
839
+ );
840
+ return element;
841
+ }
842
+ }
843
+ }
844
+ } catch (error) {
845
+ debug("get element info by xpath error: ", error);
846
+ }
847
+ }
644
848
  function trimContextByViewport(execution) {
645
849
  function filterVisibleTree(node) {
646
850
  if (!node)
@@ -679,7 +883,7 @@ function trimContextByViewport(execution) {
679
883
  }
680
884
 
681
885
  // src/common/tasks.ts
682
- var debug = getDebug("page-task-executor");
886
+ var debug2 = getDebug2("page-task-executor");
683
887
  var replanningCountLimit = 10;
684
888
  var isAndroidPage = (page) => {
685
889
  return page.pageType === "android";
@@ -720,7 +924,7 @@ var PageTaskExecutor = class {
720
924
  if (info?.id) {
721
925
  elementId = info.id;
722
926
  } else {
723
- debug(
927
+ debug2(
724
928
  "no element id found for position node, will not update cache",
725
929
  element
726
930
  );
@@ -733,7 +937,7 @@ var PageTaskExecutor = class {
733
937
  const result = await this.page.getXpathsById(elementId);
734
938
  return result;
735
939
  } catch (error) {
736
- debug("getXpathsById error: ", error);
940
+ debug2("getXpathsById error: ", error);
737
941
  }
738
942
  }
739
943
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -787,7 +991,7 @@ var PageTaskExecutor = class {
787
991
  locate: plan2.locate,
788
992
  executor: async (param, taskContext) => {
789
993
  const { task } = taskContext;
790
- assert4(
994
+ assert5(
791
995
  param?.prompt || param?.id || param?.bbox,
792
996
  "No prompt or id or position or bbox to locate"
793
997
  );
@@ -812,39 +1016,29 @@ var PageTaskExecutor = class {
812
1016
  timing: "before Insight"
813
1017
  };
814
1018
  task.recorder = [recordItem];
815
- let cacheHitFlag = false;
1019
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
1020
+ const userExpectedPathHitFlag = !!elementFromXpath;
816
1021
  const cachePrompt = param.prompt;
817
1022
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
818
1023
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
819
- let elementFromCache = null;
820
- try {
821
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
822
- for (let i = 0; i < xpaths.length; i++) {
823
- const element2 = await this.page.getElementInfoByXpath(
824
- xpaths[i]
825
- );
826
- if (element2?.id) {
827
- elementFromCache = element2;
828
- debug("cache hit, prompt: %s", cachePrompt);
829
- cacheHitFlag = true;
830
- debug(
831
- "found a new new element with same xpath, xpath: %s, id: %s",
832
- xpaths[i],
833
- element2?.id
834
- );
835
- break;
836
- }
837
- }
838
- }
839
- } catch (error) {
840
- debug("get element info by xpath error: ", error);
841
- }
842
- const startTime = Date.now();
843
- const element = elementFromCache || // try to match element from cache
844
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
845
- (await this.insight.locate(param, {
1024
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
1025
+ this,
1026
+ xpaths,
1027
+ cachePrompt,
1028
+ param.cacheable
1029
+ );
1030
+ const cacheHitFlag = !!elementFromCache;
1031
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
1032
+ const planHitFlag = !!elementFromPlan;
1033
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1034
+ // fallback to ai locate
846
1035
  context: pageContext
847
- })).element;
1036
+ })).element : void 0;
1037
+ const aiLocateHitFlag = !!elementFromAiLocate;
1038
+ const element = elementFromXpath || // highest priority
1039
+ elementFromCache || // second priority
1040
+ elementFromPlan || // third priority
1041
+ elementFromAiLocate;
848
1042
  let currentXpaths;
849
1043
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
850
1044
  const elementXpaths = await this.getElementXpath(
@@ -862,7 +1056,7 @@ var PageTaskExecutor = class {
862
1056
  locateCacheRecord
863
1057
  );
864
1058
  } else {
865
- debug(
1059
+ debug2(
866
1060
  "no xpaths found, will not update cache",
867
1061
  cachePrompt,
868
1062
  elementXpaths
@@ -872,16 +1066,44 @@ var PageTaskExecutor = class {
872
1066
  if (!element) {
873
1067
  throw new Error(`Element not found: ${param.prompt}`);
874
1068
  }
1069
+ let hitBy;
1070
+ if (userExpectedPathHitFlag) {
1071
+ hitBy = {
1072
+ from: "User expected path",
1073
+ context: {
1074
+ xpath: param.xpath
1075
+ }
1076
+ };
1077
+ } else if (cacheHitFlag) {
1078
+ hitBy = {
1079
+ from: "Cache",
1080
+ context: {
1081
+ xpathsFromCache: xpaths,
1082
+ xpathsToSave: currentXpaths
1083
+ }
1084
+ };
1085
+ } else if (planHitFlag) {
1086
+ hitBy = {
1087
+ from: "Planning",
1088
+ context: {
1089
+ id: elementFromPlan?.id,
1090
+ bbox: elementFromPlan?.bbox
1091
+ }
1092
+ };
1093
+ } else if (aiLocateHitFlag) {
1094
+ hitBy = {
1095
+ from: "AI model",
1096
+ context: {
1097
+ prompt: param.prompt
1098
+ }
1099
+ };
1100
+ }
875
1101
  return {
876
1102
  output: {
877
1103
  element
878
1104
  },
879
1105
  pageContext,
880
- cache: {
881
- hit: cacheHitFlag,
882
- originalXpaths: xpaths,
883
- currentXpaths
884
- }
1106
+ hitBy
885
1107
  };
886
1108
  }
887
1109
  };
@@ -977,7 +1199,7 @@ var PageTaskExecutor = class {
977
1199
  thought: plan2.thought,
978
1200
  locate: plan2.locate,
979
1201
  executor: async (param, { element }) => {
980
- assert4(element, "Element not found, cannot tap");
1202
+ assert5(element, "Element not found, cannot tap");
981
1203
  await this.page.mouse.click(element.center[0], element.center[1]);
982
1204
  }
983
1205
  };
@@ -989,7 +1211,7 @@ var PageTaskExecutor = class {
989
1211
  thought: plan2.thought,
990
1212
  locate: plan2.locate,
991
1213
  executor: async (param, { element }) => {
992
- assert4(element, "Element not found, cannot right click");
1214
+ assert5(element, "Element not found, cannot right click");
993
1215
  await this.page.mouse.click(
994
1216
  element.center[0],
995
1217
  element.center[1],
@@ -1006,7 +1228,7 @@ var PageTaskExecutor = class {
1006
1228
  thought: plan2.thought,
1007
1229
  locate: plan2.locate,
1008
1230
  executor: async (taskParam) => {
1009
- assert4(
1231
+ assert5(
1010
1232
  taskParam?.start_box && taskParam?.end_box,
1011
1233
  "No start_box or end_box to drag"
1012
1234
  );
@@ -1021,7 +1243,7 @@ var PageTaskExecutor = class {
1021
1243
  thought: plan2.thought,
1022
1244
  locate: plan2.locate,
1023
1245
  executor: async (param, { element }) => {
1024
- assert4(element, "Element not found, cannot hover");
1246
+ assert5(element, "Element not found, cannot hover");
1025
1247
  await this.page.mouse.move(element.center[0], element.center[1]);
1026
1248
  }
1027
1249
  };
@@ -1140,7 +1362,7 @@ var PageTaskExecutor = class {
1140
1362
  thought: plan2.thought,
1141
1363
  locate: plan2.locate,
1142
1364
  executor: async (param) => {
1143
- assert4(
1365
+ assert5(
1144
1366
  isAndroidPage(this.page),
1145
1367
  "Cannot use home button on non-Android devices"
1146
1368
  );
@@ -1156,7 +1378,7 @@ var PageTaskExecutor = class {
1156
1378
  thought: plan2.thought,
1157
1379
  locate: plan2.locate,
1158
1380
  executor: async (param) => {
1159
- assert4(
1381
+ assert5(
1160
1382
  isAndroidPage(this.page),
1161
1383
  "Cannot use back button on non-Android devices"
1162
1384
  );
@@ -1172,7 +1394,7 @@ var PageTaskExecutor = class {
1172
1394
  thought: plan2.thought,
1173
1395
  locate: plan2.locate,
1174
1396
  executor: async (param) => {
1175
- assert4(
1397
+ assert5(
1176
1398
  isAndroidPage(this.page),
1177
1399
  "Cannot use recent apps button on non-Android devices"
1178
1400
  );
@@ -1323,7 +1545,7 @@ var PageTaskExecutor = class {
1323
1545
  }
1324
1546
  }
1325
1547
  if (finalActions.length === 0) {
1326
- assert4(
1548
+ assert5(
1327
1549
  !more_actions_needed_by_instruction || sleep3,
1328
1550
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1329
1551
  );
@@ -1561,7 +1783,7 @@ var PageTaskExecutor = class {
1561
1783
  );
1562
1784
  let outputResult = data;
1563
1785
  if (ifTypeRestricted) {
1564
- assert4(data?.result !== void 0, "No result in query data");
1786
+ assert5(data?.result !== void 0, "No result in query data");
1565
1787
  outputResult = data.result;
1566
1788
  }
1567
1789
  return {
@@ -1657,9 +1879,9 @@ var PageTaskExecutor = class {
1657
1879
  onTaskStart: this.onTaskStartCallback
1658
1880
  });
1659
1881
  const { timeoutMs, checkIntervalMs } = opt;
1660
- assert4(assertion, "No assertion for waitFor");
1661
- assert4(timeoutMs, "No timeoutMs for waitFor");
1662
- assert4(checkIntervalMs, "No checkIntervalMs for waitFor");
1882
+ assert5(assertion, "No assertion for waitFor");
1883
+ assert5(timeoutMs, "No timeoutMs for waitFor");
1884
+ assert5(checkIntervalMs, "No checkIntervalMs for waitFor");
1663
1885
  const overallStartTime = Date.now();
1664
1886
  let startTime = Date.now();
1665
1887
  let errorThought = "";
@@ -1713,9 +1935,9 @@ var PageTaskExecutor = class {
1713
1935
  };
1714
1936
 
1715
1937
  // src/common/plan-builder.ts
1716
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1717
- import { assert as assert5 } from "@midscene/shared/utils";
1718
- var debug2 = getDebug2("plan-builder");
1938
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1939
+ import { assert as assert6 } from "@midscene/shared/utils";
1940
+ var debug3 = getDebug3("plan-builder");
1719
1941
  function buildPlans(type, locateParam, param) {
1720
1942
  let returnPlans = [];
1721
1943
  const locatePlan = locateParam ? {
@@ -1725,8 +1947,8 @@ function buildPlans(type, locateParam, param) {
1725
1947
  thought: ""
1726
1948
  } : null;
1727
1949
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1728
- assert5(locateParam, `missing locate info for action "${type}"`);
1729
- assert5(locatePlan, `missing locate info for action "${type}"`);
1950
+ assert6(locateParam, `missing locate info for action "${type}"`);
1951
+ assert6(locatePlan, `missing locate info for action "${type}"`);
1730
1952
  const tapPlan = {
1731
1953
  type,
1732
1954
  param: null,
@@ -1737,9 +1959,9 @@ function buildPlans(type, locateParam, param) {
1737
1959
  }
1738
1960
  if (type === "Input" || type === "KeyboardPress") {
1739
1961
  if (type === "Input") {
1740
- assert5(locateParam, `missing locate info for action "${type}"`);
1962
+ assert6(locateParam, `missing locate info for action "${type}"`);
1741
1963
  }
1742
- assert5(param, `missing param for action "${type}"`);
1964
+ assert6(param, `missing param for action "${type}"`);
1743
1965
  const inputPlan = {
1744
1966
  type,
1745
1967
  param,
@@ -1753,7 +1975,7 @@ function buildPlans(type, locateParam, param) {
1753
1975
  }
1754
1976
  }
1755
1977
  if (type === "Scroll") {
1756
- assert5(param, `missing param for action "${type}"`);
1978
+ assert6(param, `missing param for action "${type}"`);
1757
1979
  const scrollPlan = {
1758
1980
  type,
1759
1981
  param,
@@ -1767,7 +1989,7 @@ function buildPlans(type, locateParam, param) {
1767
1989
  }
1768
1990
  }
1769
1991
  if (type === "Sleep") {
1770
- assert5(param, `missing param for action "${type}"`);
1992
+ assert6(param, `missing param for action "${type}"`);
1771
1993
  const sleepPlan = {
1772
1994
  type,
1773
1995
  param,
@@ -1777,7 +1999,7 @@ function buildPlans(type, locateParam, param) {
1777
1999
  returnPlans = [sleepPlan];
1778
2000
  }
1779
2001
  if (type === "Locate") {
1780
- assert5(locateParam, `missing locate info for action "${type}"`);
2002
+ assert6(locateParam, `missing locate info for action "${type}"`);
1781
2003
  const locatePlan2 = {
1782
2004
  type,
1783
2005
  param: locateParam,
@@ -1787,187 +2009,12 @@ function buildPlans(type, locateParam, param) {
1787
2009
  returnPlans = [locatePlan2];
1788
2010
  }
1789
2011
  if (returnPlans) {
1790
- debug2("buildPlans", returnPlans);
2012
+ debug3("buildPlans", returnPlans);
1791
2013
  return returnPlans;
1792
2014
  }
1793
2015
  throw new Error(`Not supported type: ${type}`);
1794
2016
  }
1795
2017
 
1796
- // src/common/task-cache.ts
1797
- import assert6 from "assert";
1798
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1799
- import { dirname as dirname2, join as join2 } from "path";
1800
- import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
1801
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
1802
- import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
1803
- import yaml3 from "js-yaml";
1804
- import semver from "semver";
1805
-
1806
- // package.json
1807
- var version = "0.19.1";
1808
-
1809
- // src/common/task-cache.ts
1810
- var debug3 = getDebug3("cache");
1811
- var lowestSupportedMidsceneVersion = "0.16.10";
1812
- var cacheFileExt = ".cache.yaml";
1813
- var TaskCache = class {
1814
- // Track matched records
1815
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1816
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1817
- assert6(cacheId, "cacheId is required");
1818
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1819
- this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
1820
- this.isCacheResultUsed = isCacheResultUsed;
1821
- let cacheContent;
1822
- if (this.cacheFilePath) {
1823
- cacheContent = this.loadCacheFromFile();
1824
- }
1825
- if (!cacheContent) {
1826
- cacheContent = {
1827
- midsceneVersion: version,
1828
- cacheId: this.cacheId,
1829
- caches: []
1830
- };
1831
- }
1832
- this.cache = cacheContent;
1833
- this.cacheOriginalLength = this.cache.caches.length;
1834
- }
1835
- matchCache(prompt, type) {
1836
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1837
- const item = this.cache.caches[i];
1838
- const key = `${type}:${prompt}:${i}`;
1839
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1840
- this.matchedCacheIndices.add(key);
1841
- debug3(
1842
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1843
- type,
1844
- prompt,
1845
- i
1846
- );
1847
- return {
1848
- cacheContent: item,
1849
- updateFn: (cb) => {
1850
- debug3(
1851
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1852
- type,
1853
- prompt,
1854
- i
1855
- );
1856
- cb(item);
1857
- debug3(
1858
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1859
- type,
1860
- prompt,
1861
- i
1862
- );
1863
- this.flushCacheToFile();
1864
- }
1865
- };
1866
- }
1867
- }
1868
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1869
- return void 0;
1870
- }
1871
- matchPlanCache(prompt) {
1872
- return this.matchCache(prompt, "plan");
1873
- }
1874
- matchLocateCache(prompt) {
1875
- return this.matchCache(prompt, "locate");
1876
- }
1877
- appendCache(cache) {
1878
- debug3("will append cache", cache);
1879
- this.cache.caches.push(cache);
1880
- this.flushCacheToFile();
1881
- }
1882
- loadCacheFromFile() {
1883
- const cacheFile = this.cacheFilePath;
1884
- assert6(cacheFile, "cache file path is required");
1885
- if (!existsSync2(cacheFile)) {
1886
- debug3("no cache file found, path: %s", cacheFile);
1887
- return void 0;
1888
- }
1889
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1890
- if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
1891
- console.warn(
1892
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1893
- );
1894
- return void 0;
1895
- }
1896
- try {
1897
- const data = readFileSync(cacheFile, "utf8");
1898
- const jsonData = yaml3.load(data);
1899
- if (!version) {
1900
- debug3("no midscene version info, will not read cache from file");
1901
- return void 0;
1902
- }
1903
- if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1904
- console.warn(
1905
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1906
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1907
- cache file: ${cacheFile}`
1908
- );
1909
- return void 0;
1910
- }
1911
- debug3(
1912
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1913
- cacheFile,
1914
- jsonData.midsceneVersion,
1915
- jsonData.caches.length
1916
- );
1917
- jsonData.midsceneVersion = version;
1918
- return jsonData;
1919
- } catch (err) {
1920
- debug3(
1921
- "cache file exists but load failed, path: %s, error: %s",
1922
- cacheFile,
1923
- err
1924
- );
1925
- return void 0;
1926
- }
1927
- }
1928
- flushCacheToFile() {
1929
- if (!version) {
1930
- debug3("no midscene version info, will not write cache to file");
1931
- return;
1932
- }
1933
- if (!this.cacheFilePath) {
1934
- debug3("no cache file path, will not write cache to file");
1935
- return;
1936
- }
1937
- try {
1938
- const dir = dirname2(this.cacheFilePath);
1939
- if (!existsSync2(dir)) {
1940
- mkdirSync2(dir, { recursive: true });
1941
- debug3("created cache directory: %s", dir);
1942
- }
1943
- const yamlData = yaml3.dump(this.cache);
1944
- writeFileSync2(this.cacheFilePath, yamlData);
1945
- debug3("cache flushed to file: %s", this.cacheFilePath);
1946
- } catch (err) {
1947
- debug3(
1948
- "write cache to file failed, path: %s, error: %s",
1949
- this.cacheFilePath,
1950
- err
1951
- );
1952
- }
1953
- }
1954
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1955
- if (cachedRecord) {
1956
- if (newRecord.type === "plan") {
1957
- cachedRecord.updateFn((cache) => {
1958
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1959
- });
1960
- } else {
1961
- cachedRecord.updateFn((cache) => {
1962
- cache.xpaths = newRecord.xpaths;
1963
- });
1964
- }
1965
- } else {
1966
- this.appendCache(newRecord);
1967
- }
1968
- }
1969
- };
1970
-
1971
2018
  // src/common/agent.ts
1972
2019
  var debug4 = getDebug4("web-integration");
1973
2020
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2096,10 +2143,12 @@ ${errorTask?.errorStack}`);
2096
2143
  const prompt = opt.prompt ?? locatePrompt;
2097
2144
  const deepThink = opt.deepThink ?? false;
2098
2145
  const cacheable = opt.cacheable ?? true;
2146
+ const xpath = opt.xpath;
2099
2147
  return {
2100
2148
  prompt,
2101
2149
  deepThink,
2102
- cacheable
2150
+ cacheable,
2151
+ xpath
2103
2152
  };
2104
2153
  }
2105
2154
  return {
@@ -2257,6 +2306,9 @@ ${errorTask?.errorStack}`);
2257
2306
  this.afterTaskRunning(executor);
2258
2307
  return output;
2259
2308
  }
2309
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2310
+ return this.aiString(prompt, opt);
2311
+ }
2260
2312
  async describeElementAtPoint(center, opt) {
2261
2313
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2262
2314
  let success = false;