@midscene/web 0.19.1 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/es/agent.js +299 -247
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +301 -249
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +342 -290
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +307 -247
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +341 -289
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/midscene-server.js +25 -12
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +341 -289
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +14 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright-reporter.js +14 -1
  19. package/dist/es/playwright-reporter.js.map +1 -1
  20. package/dist/es/playwright.js +307 -247
  21. package/dist/es/playwright.js.map +1 -1
  22. package/dist/es/puppeteer-agent-launcher.js +299 -247
  23. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  24. package/dist/es/puppeteer.js +299 -247
  25. package/dist/es/puppeteer.js.map +1 -1
  26. package/dist/es/utils.js +42 -8
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +11 -4
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +308 -256
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +3 -3
  33. package/dist/lib/bridge-mode.js +310 -258
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +355 -303
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +316 -256
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +354 -302
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js +28 -15
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +354 -302
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +20 -7
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright-reporter.js +20 -7
  48. package/dist/lib/playwright-reporter.js.map +1 -1
  49. package/dist/lib/playwright.js +316 -256
  50. package/dist/lib/playwright.js.map +1 -1
  51. package/dist/lib/puppeteer-agent-launcher.js +308 -256
  52. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  53. package/dist/lib/puppeteer.js +308 -256
  54. package/dist/lib/puppeteer.js.map +1 -1
  55. package/dist/lib/utils.js +48 -13
  56. package/dist/lib/utils.js.map +1 -1
  57. package/dist/lib/yaml.js +11 -4
  58. package/dist/lib/yaml.js.map +1 -1
  59. package/dist/types/agent.d.ts +6 -102
  60. package/dist/types/bridge-mode-browser.d.ts +3 -2
  61. package/dist/types/bridge-mode.d.ts +4 -4
  62. package/dist/types/{browser-5dbb4bfb.d.ts → browser-118d886d.d.ts} +1 -1
  63. package/dist/types/chrome-extension.d.ts +2 -2
  64. package/dist/types/index.d.ts +1 -1
  65. package/dist/types/midscene-server.d.ts +2 -2
  66. package/dist/types/{page-90e9f9a7.d.ts → page-471361cd.d.ts} +102 -3
  67. package/dist/types/playground.d.ts +2 -2
  68. package/dist/types/playwright.d.ts +6 -2
  69. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  70. package/dist/types/puppeteer.d.ts +3 -3
  71. package/dist/types/utils.d.ts +2 -1
  72. package/dist/types/yaml.d.ts +1 -1
  73. package/package.json +3 -3
package/dist/es/agent.js CHANGED
@@ -149,7 +149,7 @@ var ScriptPlayer = class {
149
149
  domIncluded: numberTask.domIncluded,
150
150
  screenshotIncluded: numberTask.screenshotIncluded
151
151
  };
152
- assert(prompt, "missing prompt for number");
152
+ assert(prompt, "missing prompt for aiNumber");
153
153
  assert(
154
154
  typeof prompt === "string",
155
155
  "prompt for number must be a string"
@@ -163,7 +163,7 @@ var ScriptPlayer = class {
163
163
  domIncluded: stringTask.domIncluded,
164
164
  screenshotIncluded: stringTask.screenshotIncluded
165
165
  };
166
- assert(prompt, "missing prompt for string");
166
+ assert(prompt, "missing prompt for aiNumber");
167
167
  assert(
168
168
  typeof prompt === "string",
169
169
  "prompt for string must be a string"
@@ -177,13 +177,20 @@ var ScriptPlayer = class {
177
177
  domIncluded: booleanTask.domIncluded,
178
178
  screenshotIncluded: booleanTask.screenshotIncluded
179
179
  };
180
- assert(prompt, "missing prompt for boolean");
180
+ assert(prompt, "missing prompt for aiBoolean");
181
181
  assert(
182
182
  typeof prompt === "string",
183
183
  "prompt for boolean must be a string"
184
184
  );
185
185
  const booleanResult = await agent.aiBoolean(prompt, options);
186
186
  this.setResult(booleanTask.name, booleanResult);
187
+ } else if ("aiAsk" in flowItem) {
188
+ const askTask = flowItem;
189
+ const prompt = askTask.aiAsk;
190
+ assert(prompt, "missing prompt for aiAsk");
191
+ assert(typeof prompt === "string", "prompt for aiAsk must be a string");
192
+ const askResult = await agent.aiAsk(prompt);
193
+ this.setResult(askTask.name, askResult);
187
194
  } else if ("aiLocate" in flowItem) {
188
195
  const locateTask = flowItem;
189
196
  const prompt = locateTask.aiLocate;
@@ -192,7 +199,7 @@ var ScriptPlayer = class {
192
199
  typeof prompt === "string",
193
200
  "prompt for aiLocate must be a string"
194
201
  );
195
- const locateResult = await agent.aiLocate(prompt);
202
+ const locateResult = await agent.aiLocate(prompt, locateTask);
196
203
  this.setResult(locateTask.name, locateResult);
197
204
  } else if ("aiWaitFor" in flowItem) {
198
205
  const waitForTask = flowItem;
@@ -418,8 +425,8 @@ import {
418
425
  } from "@midscene/core/ai-model";
419
426
  import { sleep } from "@midscene/core/utils";
420
427
  import { NodeType } from "@midscene/shared/constants";
421
- import { getDebug } from "@midscene/shared/logger";
422
- import { assert as assert4 } from "@midscene/shared/utils";
428
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
429
+ import { assert as assert5 } from "@midscene/shared/utils";
423
430
 
424
431
  // src/common/ui-utils.ts
425
432
  function typeStr(task) {
@@ -504,7 +511,7 @@ import {
504
511
  traverseTree
505
512
  } from "@midscene/shared/extractor";
506
513
  import { resizeImgBase64 } from "@midscene/shared/img";
507
- import { assert as assert3, logMsg, uuid } from "@midscene/shared/utils";
514
+ import { assert as assert4, logMsg, uuid } from "@midscene/shared/utils";
508
515
  import dayjs from "dayjs";
509
516
 
510
517
  // src/web-element.ts
@@ -532,9 +539,184 @@ var WebElementInfo = class {
532
539
  }
533
540
  };
534
541
 
542
+ // src/common/task-cache.ts
543
+ import assert3 from "assert";
544
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
545
+ import { dirname as dirname2, join as join2 } from "path";
546
+ import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
547
+ import { getDebug } from "@midscene/shared/logger";
548
+ import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
549
+ import yaml3 from "js-yaml";
550
+ import semver from "semver";
551
+
552
+ // package.json
553
+ var version = "0.20.0";
554
+
555
+ // src/common/task-cache.ts
556
+ var debug = getDebug("cache");
557
+ var lowestSupportedMidsceneVersion = "0.16.10";
558
+ var cacheFileExt = ".cache.yaml";
559
+ var TaskCache = class {
560
+ // Track matched records
561
+ constructor(cacheId, isCacheResultUsed, cacheFilePath) {
562
+ this.matchedCacheIndices = /* @__PURE__ */ new Set();
563
+ assert3(cacheId, "cacheId is required");
564
+ this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
565
+ this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
566
+ this.isCacheResultUsed = isCacheResultUsed;
567
+ let cacheContent;
568
+ if (this.cacheFilePath) {
569
+ cacheContent = this.loadCacheFromFile();
570
+ }
571
+ if (!cacheContent) {
572
+ cacheContent = {
573
+ midsceneVersion: version,
574
+ cacheId: this.cacheId,
575
+ caches: []
576
+ };
577
+ }
578
+ this.cache = cacheContent;
579
+ this.cacheOriginalLength = this.cache.caches.length;
580
+ }
581
+ matchCache(prompt, type) {
582
+ for (let i = 0; i < this.cacheOriginalLength; i++) {
583
+ const item = this.cache.caches[i];
584
+ const key = `${type}:${prompt}:${i}`;
585
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
586
+ this.matchedCacheIndices.add(key);
587
+ debug(
588
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
589
+ type,
590
+ prompt,
591
+ i
592
+ );
593
+ return {
594
+ cacheContent: item,
595
+ updateFn: (cb) => {
596
+ debug(
597
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
598
+ type,
599
+ prompt,
600
+ i
601
+ );
602
+ cb(item);
603
+ debug(
604
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
605
+ type,
606
+ prompt,
607
+ i
608
+ );
609
+ this.flushCacheToFile();
610
+ }
611
+ };
612
+ }
613
+ }
614
+ debug("no unused cache found, type: %s, prompt: %s", type, prompt);
615
+ return void 0;
616
+ }
617
+ matchPlanCache(prompt) {
618
+ return this.matchCache(prompt, "plan");
619
+ }
620
+ matchLocateCache(prompt) {
621
+ return this.matchCache(prompt, "locate");
622
+ }
623
+ appendCache(cache) {
624
+ debug("will append cache", cache);
625
+ this.cache.caches.push(cache);
626
+ this.flushCacheToFile();
627
+ }
628
+ loadCacheFromFile() {
629
+ const cacheFile = this.cacheFilePath;
630
+ assert3(cacheFile, "cache file path is required");
631
+ if (!existsSync2(cacheFile)) {
632
+ debug("no cache file found, path: %s", cacheFile);
633
+ return void 0;
634
+ }
635
+ const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
636
+ if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
637
+ console.warn(
638
+ `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
639
+ );
640
+ return void 0;
641
+ }
642
+ try {
643
+ const data = readFileSync(cacheFile, "utf8");
644
+ const jsonData = yaml3.load(data);
645
+ if (!version) {
646
+ debug("no midscene version info, will not read cache from file");
647
+ return void 0;
648
+ }
649
+ if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
650
+ console.warn(
651
+ `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
652
+ Please delete the existing cache and rebuild it. Sorry for the inconvenience.
653
+ cache file: ${cacheFile}`
654
+ );
655
+ return void 0;
656
+ }
657
+ debug(
658
+ "cache loaded from file, path: %s, cache version: %s, record length: %s",
659
+ cacheFile,
660
+ jsonData.midsceneVersion,
661
+ jsonData.caches.length
662
+ );
663
+ jsonData.midsceneVersion = version;
664
+ return jsonData;
665
+ } catch (err) {
666
+ debug(
667
+ "cache file exists but load failed, path: %s, error: %s",
668
+ cacheFile,
669
+ err
670
+ );
671
+ return void 0;
672
+ }
673
+ }
674
+ flushCacheToFile() {
675
+ if (!version) {
676
+ debug("no midscene version info, will not write cache to file");
677
+ return;
678
+ }
679
+ if (!this.cacheFilePath) {
680
+ debug("no cache file path, will not write cache to file");
681
+ return;
682
+ }
683
+ try {
684
+ const dir = dirname2(this.cacheFilePath);
685
+ if (!existsSync2(dir)) {
686
+ mkdirSync2(dir, { recursive: true });
687
+ debug("created cache directory: %s", dir);
688
+ }
689
+ const yamlData = yaml3.dump(this.cache);
690
+ writeFileSync2(this.cacheFilePath, yamlData);
691
+ debug("cache flushed to file: %s", this.cacheFilePath);
692
+ } catch (err) {
693
+ debug(
694
+ "write cache to file failed, path: %s, error: %s",
695
+ this.cacheFilePath,
696
+ err
697
+ );
698
+ }
699
+ }
700
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
701
+ if (cachedRecord) {
702
+ if (newRecord.type === "plan") {
703
+ cachedRecord.updateFn((cache) => {
704
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
705
+ });
706
+ } else {
707
+ cachedRecord.updateFn((cache) => {
708
+ cache.xpaths = newRecord.xpaths;
709
+ });
710
+ }
711
+ } else {
712
+ this.appendCache(newRecord);
713
+ }
714
+ }
715
+ };
716
+
535
717
  // src/common/utils.ts
536
718
  async function parseContextFromWebPage(page, _opt) {
537
- assert3(page, "page is required");
719
+ assert4(page, "page is required");
538
720
  if (page._forceUsePageContext) {
539
721
  return await page._forceUsePageContext();
540
722
  }
@@ -561,7 +743,7 @@ async function parseContextFromWebPage(page, _opt) {
561
743
  isVisible
562
744
  });
563
745
  });
564
- assert3(screenshotBase64, "screenshotBase64 is required");
746
+ assert4(screenshotBase64, "screenshotBase64 is required");
565
747
  const size = await page.size();
566
748
  if (size.dpr && size.dpr > 1) {
567
749
  screenshotBase64 = await resizeImgBase64(screenshotBase64, {
@@ -608,6 +790,28 @@ function matchElementFromPlan(planLocateParam, tree) {
608
790
  }
609
791
  return void 0;
610
792
  }
793
+ async function matchElementFromCache(taskExecutor, xpaths, cachePrompt, cacheable) {
794
+ try {
795
+ if (xpaths?.length && taskExecutor.taskCache?.isCacheResultUsed && cacheable !== false) {
796
+ for (let i = 0; i < xpaths.length; i++) {
797
+ const element = await taskExecutor.page.getElementInfoByXpath(
798
+ xpaths[i]
799
+ );
800
+ if (element?.id) {
801
+ debug("cache hit, prompt: %s", cachePrompt);
802
+ debug(
803
+ "found a new new element with same xpath, xpath: %s, id: %s",
804
+ xpaths[i],
805
+ element?.id
806
+ );
807
+ return element;
808
+ }
809
+ }
810
+ }
811
+ } catch (error) {
812
+ debug("get element info by xpath error: ", error);
813
+ }
814
+ }
611
815
  function trimContextByViewport(execution) {
612
816
  function filterVisibleTree(node) {
613
817
  if (!node)
@@ -646,7 +850,7 @@ function trimContextByViewport(execution) {
646
850
  }
647
851
 
648
852
  // src/common/tasks.ts
649
- var debug = getDebug("page-task-executor");
853
+ var debug2 = getDebug2("page-task-executor");
650
854
  var replanningCountLimit = 10;
651
855
  var isAndroidPage = (page) => {
652
856
  return page.pageType === "android";
@@ -687,7 +891,7 @@ var PageTaskExecutor = class {
687
891
  if (info?.id) {
688
892
  elementId = info.id;
689
893
  } else {
690
- debug(
894
+ debug2(
691
895
  "no element id found for position node, will not update cache",
692
896
  element
693
897
  );
@@ -700,7 +904,7 @@ var PageTaskExecutor = class {
700
904
  const result = await this.page.getXpathsById(elementId);
701
905
  return result;
702
906
  } catch (error) {
703
- debug("getXpathsById error: ", error);
907
+ debug2("getXpathsById error: ", error);
704
908
  }
705
909
  }
706
910
  prependExecutorWithScreenshot(taskApply, appendAfterExecution = false) {
@@ -754,7 +958,7 @@ var PageTaskExecutor = class {
754
958
  locate: plan2.locate,
755
959
  executor: async (param, taskContext) => {
756
960
  const { task } = taskContext;
757
- assert4(
961
+ assert5(
758
962
  param?.prompt || param?.id || param?.bbox,
759
963
  "No prompt or id or position or bbox to locate"
760
964
  );
@@ -779,39 +983,29 @@ var PageTaskExecutor = class {
779
983
  timing: "before Insight"
780
984
  };
781
985
  task.recorder = [recordItem];
782
- let cacheHitFlag = false;
986
+ const elementFromXpath = param.xpath ? await this.page.getElementInfoByXpath(param.xpath) : void 0;
987
+ const userExpectedPathHitFlag = !!elementFromXpath;
783
988
  const cachePrompt = param.prompt;
784
989
  const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);
785
990
  const xpaths = locateCacheRecord?.cacheContent?.xpaths;
786
- let elementFromCache = null;
787
- try {
788
- if (xpaths?.length && this.taskCache?.isCacheResultUsed && param?.cacheable !== false) {
789
- for (let i = 0; i < xpaths.length; i++) {
790
- const element2 = await this.page.getElementInfoByXpath(
791
- xpaths[i]
792
- );
793
- if (element2?.id) {
794
- elementFromCache = element2;
795
- debug("cache hit, prompt: %s", cachePrompt);
796
- cacheHitFlag = true;
797
- debug(
798
- "found a new new element with same xpath, xpath: %s, id: %s",
799
- xpaths[i],
800
- element2?.id
801
- );
802
- break;
803
- }
804
- }
805
- }
806
- } catch (error) {
807
- debug("get element info by xpath error: ", error);
808
- }
809
- const startTime = Date.now();
810
- const element = elementFromCache || // try to match element from cache
811
- matchElementFromPlan(param, pageContext.tree) || // try to match element from plan
812
- (await this.insight.locate(param, {
991
+ const elementFromCache = userExpectedPathHitFlag ? null : await matchElementFromCache(
992
+ this,
993
+ xpaths,
994
+ cachePrompt,
995
+ param.cacheable
996
+ );
997
+ const cacheHitFlag = !!elementFromCache;
998
+ const elementFromPlan = !userExpectedPathHitFlag && !cacheHitFlag ? matchElementFromPlan(param, pageContext.tree) : void 0;
999
+ const planHitFlag = !!elementFromPlan;
1000
+ const elementFromAiLocate = !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag ? (await this.insight.locate(param, {
1001
+ // fallback to ai locate
813
1002
  context: pageContext
814
- })).element;
1003
+ })).element : void 0;
1004
+ const aiLocateHitFlag = !!elementFromAiLocate;
1005
+ const element = elementFromXpath || // highest priority
1006
+ elementFromCache || // second priority
1007
+ elementFromPlan || // third priority
1008
+ elementFromAiLocate;
815
1009
  let currentXpaths;
816
1010
  if (element && this.taskCache && !cacheHitFlag && param?.cacheable !== false) {
817
1011
  const elementXpaths = await this.getElementXpath(
@@ -829,7 +1023,7 @@ var PageTaskExecutor = class {
829
1023
  locateCacheRecord
830
1024
  );
831
1025
  } else {
832
- debug(
1026
+ debug2(
833
1027
  "no xpaths found, will not update cache",
834
1028
  cachePrompt,
835
1029
  elementXpaths
@@ -839,16 +1033,44 @@ var PageTaskExecutor = class {
839
1033
  if (!element) {
840
1034
  throw new Error(`Element not found: ${param.prompt}`);
841
1035
  }
1036
+ let hitBy;
1037
+ if (userExpectedPathHitFlag) {
1038
+ hitBy = {
1039
+ from: "User expected path",
1040
+ context: {
1041
+ xpath: param.xpath
1042
+ }
1043
+ };
1044
+ } else if (cacheHitFlag) {
1045
+ hitBy = {
1046
+ from: "Cache",
1047
+ context: {
1048
+ xpathsFromCache: xpaths,
1049
+ xpathsToSave: currentXpaths
1050
+ }
1051
+ };
1052
+ } else if (planHitFlag) {
1053
+ hitBy = {
1054
+ from: "Planning",
1055
+ context: {
1056
+ id: elementFromPlan?.id,
1057
+ bbox: elementFromPlan?.bbox
1058
+ }
1059
+ };
1060
+ } else if (aiLocateHitFlag) {
1061
+ hitBy = {
1062
+ from: "AI model",
1063
+ context: {
1064
+ prompt: param.prompt
1065
+ }
1066
+ };
1067
+ }
842
1068
  return {
843
1069
  output: {
844
1070
  element
845
1071
  },
846
1072
  pageContext,
847
- cache: {
848
- hit: cacheHitFlag,
849
- originalXpaths: xpaths,
850
- currentXpaths
851
- }
1073
+ hitBy
852
1074
  };
853
1075
  }
854
1076
  };
@@ -944,7 +1166,7 @@ var PageTaskExecutor = class {
944
1166
  thought: plan2.thought,
945
1167
  locate: plan2.locate,
946
1168
  executor: async (param, { element }) => {
947
- assert4(element, "Element not found, cannot tap");
1169
+ assert5(element, "Element not found, cannot tap");
948
1170
  await this.page.mouse.click(element.center[0], element.center[1]);
949
1171
  }
950
1172
  };
@@ -956,7 +1178,7 @@ var PageTaskExecutor = class {
956
1178
  thought: plan2.thought,
957
1179
  locate: plan2.locate,
958
1180
  executor: async (param, { element }) => {
959
- assert4(element, "Element not found, cannot right click");
1181
+ assert5(element, "Element not found, cannot right click");
960
1182
  await this.page.mouse.click(
961
1183
  element.center[0],
962
1184
  element.center[1],
@@ -973,7 +1195,7 @@ var PageTaskExecutor = class {
973
1195
  thought: plan2.thought,
974
1196
  locate: plan2.locate,
975
1197
  executor: async (taskParam) => {
976
- assert4(
1198
+ assert5(
977
1199
  taskParam?.start_box && taskParam?.end_box,
978
1200
  "No start_box or end_box to drag"
979
1201
  );
@@ -988,7 +1210,7 @@ var PageTaskExecutor = class {
988
1210
  thought: plan2.thought,
989
1211
  locate: plan2.locate,
990
1212
  executor: async (param, { element }) => {
991
- assert4(element, "Element not found, cannot hover");
1213
+ assert5(element, "Element not found, cannot hover");
992
1214
  await this.page.mouse.move(element.center[0], element.center[1]);
993
1215
  }
994
1216
  };
@@ -1107,7 +1329,7 @@ var PageTaskExecutor = class {
1107
1329
  thought: plan2.thought,
1108
1330
  locate: plan2.locate,
1109
1331
  executor: async (param) => {
1110
- assert4(
1332
+ assert5(
1111
1333
  isAndroidPage(this.page),
1112
1334
  "Cannot use home button on non-Android devices"
1113
1335
  );
@@ -1123,7 +1345,7 @@ var PageTaskExecutor = class {
1123
1345
  thought: plan2.thought,
1124
1346
  locate: plan2.locate,
1125
1347
  executor: async (param) => {
1126
- assert4(
1348
+ assert5(
1127
1349
  isAndroidPage(this.page),
1128
1350
  "Cannot use back button on non-Android devices"
1129
1351
  );
@@ -1139,7 +1361,7 @@ var PageTaskExecutor = class {
1139
1361
  thought: plan2.thought,
1140
1362
  locate: plan2.locate,
1141
1363
  executor: async (param) => {
1142
- assert4(
1364
+ assert5(
1143
1365
  isAndroidPage(this.page),
1144
1366
  "Cannot use recent apps button on non-Android devices"
1145
1367
  );
@@ -1290,7 +1512,7 @@ var PageTaskExecutor = class {
1290
1512
  }
1291
1513
  }
1292
1514
  if (finalActions.length === 0) {
1293
- assert4(
1515
+ assert5(
1294
1516
  !more_actions_needed_by_instruction || sleep2,
1295
1517
  error ? `Failed to plan: ${error}` : planParsingError || "No plan found"
1296
1518
  );
@@ -1528,7 +1750,7 @@ var PageTaskExecutor = class {
1528
1750
  );
1529
1751
  let outputResult = data;
1530
1752
  if (ifTypeRestricted) {
1531
- assert4(data?.result !== void 0, "No result in query data");
1753
+ assert5(data?.result !== void 0, "No result in query data");
1532
1754
  outputResult = data.result;
1533
1755
  }
1534
1756
  return {
@@ -1624,9 +1846,9 @@ var PageTaskExecutor = class {
1624
1846
  onTaskStart: this.onTaskStartCallback
1625
1847
  });
1626
1848
  const { timeoutMs, checkIntervalMs } = opt;
1627
- assert4(assertion, "No assertion for waitFor");
1628
- assert4(timeoutMs, "No timeoutMs for waitFor");
1629
- assert4(checkIntervalMs, "No checkIntervalMs for waitFor");
1849
+ assert5(assertion, "No assertion for waitFor");
1850
+ assert5(timeoutMs, "No timeoutMs for waitFor");
1851
+ assert5(checkIntervalMs, "No checkIntervalMs for waitFor");
1630
1852
  const overallStartTime = Date.now();
1631
1853
  let startTime = Date.now();
1632
1854
  let errorThought = "";
@@ -1680,9 +1902,9 @@ var PageTaskExecutor = class {
1680
1902
  };
1681
1903
 
1682
1904
  // src/common/plan-builder.ts
1683
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1684
- import { assert as assert5 } from "@midscene/shared/utils";
1685
- var debug2 = getDebug2("plan-builder");
1905
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1906
+ import { assert as assert6 } from "@midscene/shared/utils";
1907
+ var debug3 = getDebug3("plan-builder");
1686
1908
  function buildPlans(type, locateParam, param) {
1687
1909
  let returnPlans = [];
1688
1910
  const locatePlan = locateParam ? {
@@ -1692,8 +1914,8 @@ function buildPlans(type, locateParam, param) {
1692
1914
  thought: ""
1693
1915
  } : null;
1694
1916
  if (type === "Tap" || type === "Hover" || type === "RightClick") {
1695
- assert5(locateParam, `missing locate info for action "${type}"`);
1696
- assert5(locatePlan, `missing locate info for action "${type}"`);
1917
+ assert6(locateParam, `missing locate info for action "${type}"`);
1918
+ assert6(locatePlan, `missing locate info for action "${type}"`);
1697
1919
  const tapPlan = {
1698
1920
  type,
1699
1921
  param: null,
@@ -1704,9 +1926,9 @@ function buildPlans(type, locateParam, param) {
1704
1926
  }
1705
1927
  if (type === "Input" || type === "KeyboardPress") {
1706
1928
  if (type === "Input") {
1707
- assert5(locateParam, `missing locate info for action "${type}"`);
1929
+ assert6(locateParam, `missing locate info for action "${type}"`);
1708
1930
  }
1709
- assert5(param, `missing param for action "${type}"`);
1931
+ assert6(param, `missing param for action "${type}"`);
1710
1932
  const inputPlan = {
1711
1933
  type,
1712
1934
  param,
@@ -1720,7 +1942,7 @@ function buildPlans(type, locateParam, param) {
1720
1942
  }
1721
1943
  }
1722
1944
  if (type === "Scroll") {
1723
- assert5(param, `missing param for action "${type}"`);
1945
+ assert6(param, `missing param for action "${type}"`);
1724
1946
  const scrollPlan = {
1725
1947
  type,
1726
1948
  param,
@@ -1734,7 +1956,7 @@ function buildPlans(type, locateParam, param) {
1734
1956
  }
1735
1957
  }
1736
1958
  if (type === "Sleep") {
1737
- assert5(param, `missing param for action "${type}"`);
1959
+ assert6(param, `missing param for action "${type}"`);
1738
1960
  const sleepPlan = {
1739
1961
  type,
1740
1962
  param,
@@ -1744,7 +1966,7 @@ function buildPlans(type, locateParam, param) {
1744
1966
  returnPlans = [sleepPlan];
1745
1967
  }
1746
1968
  if (type === "Locate") {
1747
- assert5(locateParam, `missing locate info for action "${type}"`);
1969
+ assert6(locateParam, `missing locate info for action "${type}"`);
1748
1970
  const locatePlan2 = {
1749
1971
  type,
1750
1972
  param: locateParam,
@@ -1754,187 +1976,12 @@ function buildPlans(type, locateParam, param) {
1754
1976
  returnPlans = [locatePlan2];
1755
1977
  }
1756
1978
  if (returnPlans) {
1757
- debug2("buildPlans", returnPlans);
1979
+ debug3("buildPlans", returnPlans);
1758
1980
  return returnPlans;
1759
1981
  }
1760
1982
  throw new Error(`Not supported type: ${type}`);
1761
1983
  }
1762
1984
 
1763
- // src/common/task-cache.ts
1764
- import assert6 from "assert";
1765
- import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1766
- import { dirname as dirname2, join as join2 } from "path";
1767
- import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "@midscene/shared/common";
1768
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
1769
- import { ifInBrowser as ifInBrowser2 } from "@midscene/shared/utils";
1770
- import yaml3 from "js-yaml";
1771
- import semver from "semver";
1772
-
1773
- // package.json
1774
- var version = "0.19.1";
1775
-
1776
- // src/common/task-cache.ts
1777
- var debug3 = getDebug3("cache");
1778
- var lowestSupportedMidsceneVersion = "0.16.10";
1779
- var cacheFileExt = ".cache.yaml";
1780
- var TaskCache = class {
1781
- // Track matched records
1782
- constructor(cacheId, isCacheResultUsed, cacheFilePath) {
1783
- this.matchedCacheIndices = /* @__PURE__ */ new Set();
1784
- assert6(cacheId, "cacheId is required");
1785
- this.cacheId = replaceIllegalPathCharsAndSpace(cacheId);
1786
- this.cacheFilePath = ifInBrowser2 ? void 0 : cacheFilePath || join2(getMidsceneRunSubDir2("cache"), `${this.cacheId}${cacheFileExt}`);
1787
- this.isCacheResultUsed = isCacheResultUsed;
1788
- let cacheContent;
1789
- if (this.cacheFilePath) {
1790
- cacheContent = this.loadCacheFromFile();
1791
- }
1792
- if (!cacheContent) {
1793
- cacheContent = {
1794
- midsceneVersion: version,
1795
- cacheId: this.cacheId,
1796
- caches: []
1797
- };
1798
- }
1799
- this.cache = cacheContent;
1800
- this.cacheOriginalLength = this.cache.caches.length;
1801
- }
1802
- matchCache(prompt, type) {
1803
- for (let i = 0; i < this.cacheOriginalLength; i++) {
1804
- const item = this.cache.caches[i];
1805
- const key = `${type}:${prompt}:${i}`;
1806
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1807
- this.matchedCacheIndices.add(key);
1808
- debug3(
1809
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1810
- type,
1811
- prompt,
1812
- i
1813
- );
1814
- return {
1815
- cacheContent: item,
1816
- updateFn: (cb) => {
1817
- debug3(
1818
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1819
- type,
1820
- prompt,
1821
- i
1822
- );
1823
- cb(item);
1824
- debug3(
1825
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1826
- type,
1827
- prompt,
1828
- i
1829
- );
1830
- this.flushCacheToFile();
1831
- }
1832
- };
1833
- }
1834
- }
1835
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1836
- return void 0;
1837
- }
1838
- matchPlanCache(prompt) {
1839
- return this.matchCache(prompt, "plan");
1840
- }
1841
- matchLocateCache(prompt) {
1842
- return this.matchCache(prompt, "locate");
1843
- }
1844
- appendCache(cache) {
1845
- debug3("will append cache", cache);
1846
- this.cache.caches.push(cache);
1847
- this.flushCacheToFile();
1848
- }
1849
- loadCacheFromFile() {
1850
- const cacheFile = this.cacheFilePath;
1851
- assert6(cacheFile, "cache file path is required");
1852
- if (!existsSync2(cacheFile)) {
1853
- debug3("no cache file found, path: %s", cacheFile);
1854
- return void 0;
1855
- }
1856
- const jsonTypeCacheFile = cacheFile.replace(cacheFileExt, ".json");
1857
- if (existsSync2(jsonTypeCacheFile) && this.isCacheResultUsed) {
1858
- console.warn(
1859
- `An outdated cache file from an earlier version of Midscene has been detected. Since version 0.17, we have implemented an improved caching strategy. Please delete the old file located at: ${jsonTypeCacheFile}.`
1860
- );
1861
- return void 0;
1862
- }
1863
- try {
1864
- const data = readFileSync(cacheFile, "utf8");
1865
- const jsonData = yaml3.load(data);
1866
- if (!version) {
1867
- debug3("no midscene version info, will not read cache from file");
1868
- return void 0;
1869
- }
1870
- if (semver.lt(jsonData.midsceneVersion, lowestSupportedMidsceneVersion) && !jsonData.midsceneVersion.includes("beta")) {
1871
- console.warn(
1872
- `You are using an old version of Midscene cache file, and we cannot match any info from it. Starting from Midscene v0.17, we changed our strategy to use xpath for cache info, providing better performance.
1873
- Please delete the existing cache and rebuild it. Sorry for the inconvenience.
1874
- cache file: ${cacheFile}`
1875
- );
1876
- return void 0;
1877
- }
1878
- debug3(
1879
- "cache loaded from file, path: %s, cache version: %s, record length: %s",
1880
- cacheFile,
1881
- jsonData.midsceneVersion,
1882
- jsonData.caches.length
1883
- );
1884
- jsonData.midsceneVersion = version;
1885
- return jsonData;
1886
- } catch (err) {
1887
- debug3(
1888
- "cache file exists but load failed, path: %s, error: %s",
1889
- cacheFile,
1890
- err
1891
- );
1892
- return void 0;
1893
- }
1894
- }
1895
- flushCacheToFile() {
1896
- if (!version) {
1897
- debug3("no midscene version info, will not write cache to file");
1898
- return;
1899
- }
1900
- if (!this.cacheFilePath) {
1901
- debug3("no cache file path, will not write cache to file");
1902
- return;
1903
- }
1904
- try {
1905
- const dir = dirname2(this.cacheFilePath);
1906
- if (!existsSync2(dir)) {
1907
- mkdirSync2(dir, { recursive: true });
1908
- debug3("created cache directory: %s", dir);
1909
- }
1910
- const yamlData = yaml3.dump(this.cache);
1911
- writeFileSync2(this.cacheFilePath, yamlData);
1912
- debug3("cache flushed to file: %s", this.cacheFilePath);
1913
- } catch (err) {
1914
- debug3(
1915
- "write cache to file failed, path: %s, error: %s",
1916
- this.cacheFilePath,
1917
- err
1918
- );
1919
- }
1920
- }
1921
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1922
- if (cachedRecord) {
1923
- if (newRecord.type === "plan") {
1924
- cachedRecord.updateFn((cache) => {
1925
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1926
- });
1927
- } else {
1928
- cachedRecord.updateFn((cache) => {
1929
- cache.xpaths = newRecord.xpaths;
1930
- });
1931
- }
1932
- } else {
1933
- this.appendCache(newRecord);
1934
- }
1935
- }
1936
- };
1937
-
1938
1985
  // src/common/agent.ts
1939
1986
  var debug4 = getDebug4("web-integration");
1940
1987
  var distanceOfTwoPoints = (p1, p2) => {
@@ -2063,10 +2110,12 @@ ${errorTask?.errorStack}`);
2063
2110
  const prompt = opt.prompt ?? locatePrompt;
2064
2111
  const deepThink = opt.deepThink ?? false;
2065
2112
  const cacheable = opt.cacheable ?? true;
2113
+ const xpath = opt.xpath;
2066
2114
  return {
2067
2115
  prompt,
2068
2116
  deepThink,
2069
- cacheable
2117
+ cacheable,
2118
+ xpath
2070
2119
  };
2071
2120
  }
2072
2121
  return {
@@ -2224,6 +2273,9 @@ ${errorTask?.errorStack}`);
2224
2273
  this.afterTaskRunning(executor);
2225
2274
  return output;
2226
2275
  }
2276
+ async aiAsk(prompt, opt = defaultInsightExtractOption) {
2277
+ return this.aiString(prompt, opt);
2278
+ }
2227
2279
  async describeElementAtPoint(center, opt) {
2228
2280
  const { verifyPrompt = true, retryLimit = 3 } = opt || {};
2229
2281
  let success = false;