misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -62,10 +62,11 @@ var ScriptPlayer = class {
62
62
  this.unnamedResultIndex = 0;
63
63
  this.pageAgent = null;
64
64
  this.result = {};
65
+ const target = script.target || script.web || script.android;
65
66
  if (import_utils.ifInBrowser) {
66
67
  this.output = void 0;
67
- } else if (script.target?.output) {
68
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
68
+ } else if (target?.output) {
69
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
69
70
  } else {
70
71
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
71
72
  }
@@ -139,15 +140,20 @@ var ScriptPlayer = class {
139
140
  } else if ("aiAssert" in flowItem) {
140
141
  const assertTask = flowItem;
141
142
  const prompt = assertTask.aiAssert;
143
+ const msg = assertTask.errorMessage;
142
144
  (0, import_utils.assert)(prompt, "missing prompt for aiAssert");
143
145
  (0, import_utils.assert)(
144
146
  typeof prompt === "string",
145
147
  "prompt for aiAssert must be a string"
146
148
  );
147
- await agent.aiAssert(prompt);
149
+ await agent.aiAssert(prompt, msg);
148
150
  } else if ("aiQuery" in flowItem) {
149
151
  const queryTask = flowItem;
150
152
  const prompt = queryTask.aiQuery;
153
+ const options = {
154
+ domIncluded: queryTask.domIncluded,
155
+ screenshotIncluded: queryTask.screenshotIncluded
156
+ };
151
157
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
152
158
  (0, import_utils.assert)(
153
159
  typeof prompt === "string",
@@ -158,6 +164,10 @@ var ScriptPlayer = class {
158
164
  } else if ("aiNumber" in flowItem) {
159
165
  const numberTask = flowItem;
160
166
  const prompt = numberTask.aiNumber;
167
+ const options = {
168
+ domIncluded: numberTask.domIncluded,
169
+ screenshotIncluded: numberTask.screenshotIncluded
170
+ };
161
171
  (0, import_utils.assert)(prompt, "missing prompt for number");
162
172
  (0, import_utils.assert)(
163
173
  typeof prompt === "string",
@@ -168,6 +178,10 @@ var ScriptPlayer = class {
168
178
  } else if ("aiString" in flowItem) {
169
179
  const stringTask = flowItem;
170
180
  const prompt = stringTask.aiString;
181
+ const options = {
182
+ domIncluded: stringTask.domIncluded,
183
+ screenshotIncluded: stringTask.screenshotIncluded
184
+ };
171
185
  (0, import_utils.assert)(prompt, "missing prompt for string");
172
186
  (0, import_utils.assert)(
173
187
  typeof prompt === "string",
@@ -178,6 +192,10 @@ var ScriptPlayer = class {
178
192
  } else if ("aiBoolean" in flowItem) {
179
193
  const booleanTask = flowItem;
180
194
  const prompt = booleanTask.aiBoolean;
195
+ const options = {
196
+ domIncluded: booleanTask.domIncluded,
197
+ screenshotIncluded: booleanTask.screenshotIncluded
198
+ };
181
199
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
182
200
  (0, import_utils.assert)(
183
201
  typeof prompt === "string",
@@ -220,6 +238,9 @@ var ScriptPlayer = class {
220
238
  } else if ("aiTap" in flowItem) {
221
239
  const tapTask = flowItem;
222
240
  await agent.aiTap(tapTask.aiTap, tapTask);
241
+ } else if ("aiRightClick" in flowItem) {
242
+ const rightClickTask = flowItem;
243
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
223
244
  } else if ("aiHover" in flowItem) {
224
245
  const hoverTask = flowItem;
225
246
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -242,6 +263,11 @@ var ScriptPlayer = class {
242
263
  evaluateJavaScriptTask.javascript
243
264
  );
244
265
  this.setResult(evaluateJavaScriptTask.name, result);
266
+ } else if ("logScreenshot" in flowItem) {
267
+ const logScreenshotTask = flowItem;
268
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
269
+ content: logScreenshotTask.content || ""
270
+ });
245
271
  } else {
246
272
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
247
273
  }
@@ -490,7 +516,8 @@ var WebElementInfo = class {
490
516
  id,
491
517
  attributes,
492
518
  indexId,
493
- xpaths
519
+ xpaths,
520
+ isVisible
494
521
  }) {
495
522
  this.content = content;
496
523
  this.rect = rect;
@@ -503,6 +530,7 @@ var WebElementInfo = class {
503
530
  this.attributes = attributes;
504
531
  this.indexId = indexId;
505
532
  this.xpaths = xpaths;
533
+ this.isVisible = isVisible;
506
534
  }
507
535
  };
508
536
 
@@ -525,14 +553,15 @@ async function parseContextFromWebPage(page, _opt) {
525
553
  })
526
554
  ]);
527
555
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
528
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
556
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
529
557
  return new WebElementInfo({
530
558
  rect,
531
559
  locator,
532
560
  id,
533
561
  content,
534
562
  attributes,
535
- indexId
563
+ indexId,
564
+ isVisible
536
565
  });
537
566
  });
538
567
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -562,7 +591,7 @@ function printReportMsg(filepath) {
562
591
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
563
592
  }
564
593
  function replaceIllegalPathCharsAndSpace(str) {
565
- return str.replace(/[/\\:*?"<>| ]/g, "-");
594
+ return str.replace(/[:*?"<>| ]/g, "-");
566
595
  }
567
596
  function forceClosePopup(page, debug6) {
568
597
  page.on("popup", async (popup) => {
@@ -876,10 +905,10 @@ var PageTaskExecutor = class {
876
905
  if (!taskParam || !taskParam.value) {
877
906
  return;
878
907
  }
879
- await this.page.keyboard.type(taskParam.value);
880
- } else {
881
- await this.page.keyboard.type(taskParam.value);
882
908
  }
909
+ await this.page.keyboard.type(taskParam.value, {
910
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
911
+ });
883
912
  }
884
913
  };
885
914
  tasks.push(taskActionInput);
@@ -908,6 +937,22 @@ var PageTaskExecutor = class {
908
937
  }
909
938
  };
910
939
  tasks.push(taskActionTap);
940
+ } else if (plan2.type === "RightClick") {
941
+ const taskActionRightClick = {
942
+ type: "Action",
943
+ subType: "RightClick",
944
+ thought: plan2.thought,
945
+ locate: plan2.locate,
946
+ executor: async (param, { element }) => {
947
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
948
+ await this.page.mouse.click(
949
+ element.center[0],
950
+ element.center[1],
951
+ { button: "right" }
952
+ );
953
+ }
954
+ };
955
+ tasks.push(taskActionRightClick);
911
956
  } else if (plan2.type === "Drag") {
912
957
  const taskActionDrag = {
913
958
  type: "Action",
@@ -1436,7 +1481,7 @@ var PageTaskExecutor = class {
1436
1481
  executor: taskExecutor
1437
1482
  };
1438
1483
  }
1439
- async createTypeQueryTask(type, demand) {
1484
+ async createTypeQueryTask(type, demand, opt) {
1440
1485
  const taskExecutor = new import_misoai_core.Executor(
1441
1486
  taskTitleStr(
1442
1487
  type,
@@ -1467,7 +1512,10 @@ var PageTaskExecutor = class {
1467
1512
  result: `${type}, ${demand}`
1468
1513
  };
1469
1514
  }
1470
- const { data, usage } = await this.insight.extract(demandInput);
1515
+ const { data, usage } = await this.insight.extract(
1516
+ demandInput,
1517
+ opt
1518
+ );
1471
1519
  let outputResult = data;
1472
1520
  if (ifTypeRestricted) {
1473
1521
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1487,17 +1535,17 @@ var PageTaskExecutor = class {
1487
1535
  executor: taskExecutor
1488
1536
  };
1489
1537
  }
1490
- async query(demand) {
1491
- return this.createTypeQueryTask("Query", demand);
1538
+ async query(demand, opt) {
1539
+ return this.createTypeQueryTask("Query", demand, opt);
1492
1540
  }
1493
- async boolean(prompt) {
1494
- return this.createTypeQueryTask("Boolean", prompt);
1541
+ async boolean(prompt, opt) {
1542
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1495
1543
  }
1496
- async number(prompt) {
1497
- return this.createTypeQueryTask("Number", prompt);
1544
+ async number(prompt, opt) {
1545
+ return this.createTypeQueryTask("Number", prompt, opt);
1498
1546
  }
1499
- async string(prompt) {
1500
- return this.createTypeQueryTask("String", prompt);
1547
+ async string(prompt, opt) {
1548
+ return this.createTypeQueryTask("String", prompt, opt);
1501
1549
  }
1502
1550
  async assert(assertion) {
1503
1551
  const description = `assert: ${assertion}`;
@@ -1633,7 +1681,7 @@ function buildPlans(type, locateParam, param) {
1633
1681
  param: locateParam,
1634
1682
  thought: ""
1635
1683
  } : null;
1636
- if (type === "Tap" || type === "Hover") {
1684
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1637
1685
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1638
1686
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1639
1687
  const tapPlan = {
@@ -1713,7 +1761,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1713
1761
  var import_semver = __toESM(require("semver"));
1714
1762
 
1715
1763
  // package.json
1716
- var version = "1.0.5";
1764
+ var version = "1.0.3";
1717
1765
 
1718
1766
  // src/common/task-cache.ts
1719
1767
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1741,70 +1789,44 @@ var TaskCache = class {
1741
1789
  this.cache = cacheContent;
1742
1790
  this.cacheOriginalLength = this.cache.caches.length;
1743
1791
  }
1744
- matchCache(prompt, type, contextData) {
1745
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1792
+ matchCache(prompt, type) {
1746
1793
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1747
1794
  const item = this.cache.caches[i];
1748
1795
  const key = `${type}:${prompt}:${i}`;
1749
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1750
- continue;
1751
- }
1752
- if (type === "plan" && item.type === "plan") {
1753
- const planItem = item;
1754
- if (contextHash && planItem.contextHash) {
1755
- if (contextHash !== planItem.contextHash) {
1756
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1757
- continue;
1796
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1797
+ this.matchedCacheIndices.add(key);
1798
+ debug3(
1799
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1800
+ type,
1801
+ prompt,
1802
+ i
1803
+ );
1804
+ return {
1805
+ cacheContent: item,
1806
+ updateFn: (cb) => {
1807
+ debug3(
1808
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1809
+ type,
1810
+ prompt,
1811
+ i
1812
+ );
1813
+ cb(item);
1814
+ debug3(
1815
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1816
+ type,
1817
+ prompt,
1818
+ i
1819
+ );
1820
+ this.flushCacheToFile();
1758
1821
  }
1759
- } else if (contextHash || planItem.contextHash) {
1760
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1761
- continue;
1762
- }
1822
+ };
1763
1823
  }
1764
- this.matchedCacheIndices.add(key);
1765
- debug3(
1766
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1767
- type,
1768
- prompt,
1769
- i,
1770
- contextHash ? "yes" : "no-context"
1771
- );
1772
- return {
1773
- cacheContent: item,
1774
- updateFn: (cb) => {
1775
- debug3(
1776
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1777
- type,
1778
- prompt,
1779
- i
1780
- );
1781
- cb(item);
1782
- debug3(
1783
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1784
- type,
1785
- prompt,
1786
- i
1787
- );
1788
- this.flushCacheToFile();
1789
- }
1790
- };
1791
1824
  }
1792
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1825
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1793
1826
  return void 0;
1794
1827
  }
1795
- generateContextHash(contextData) {
1796
- const sortedKeys = Object.keys(contextData).sort();
1797
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1798
- let hash = 0;
1799
- for (let i = 0; i < stableString.length; i++) {
1800
- const char = stableString.charCodeAt(i);
1801
- hash = (hash << 5) - hash + char;
1802
- hash = hash & hash;
1803
- }
1804
- return hash.toString(36);
1805
- }
1806
- matchPlanCache(prompt, contextData) {
1807
- return this.matchCache(prompt, "plan", contextData);
1828
+ matchPlanCache(prompt) {
1829
+ return this.matchCache(prompt, "plan");
1808
1830
  }
1809
1831
  matchLocateCache(prompt) {
1810
1832
  return this.matchCache(prompt, "locate");
@@ -1870,8 +1892,14 @@ cache file: ${cacheFile}`
1870
1892
  return;
1871
1893
  }
1872
1894
  try {
1895
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1896
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1897
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1898
+ debug3("created cache directory: %s", dir);
1899
+ }
1873
1900
  const yamlData = import_js_yaml3.default.dump(this.cache);
1874
1901
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1902
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1875
1903
  } catch (err) {
1876
1904
  debug3(
1877
1905
  "write cache to file failed, path: %s, error: %s",
@@ -1880,16 +1908,11 @@ cache file: ${cacheFile}`
1880
1908
  );
1881
1909
  }
1882
1910
  }
1883
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1911
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1884
1912
  if (cachedRecord) {
1885
1913
  if (newRecord.type === "plan") {
1886
1914
  cachedRecord.updateFn((cache) => {
1887
- const planCache = cache;
1888
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1889
- if (contextData) {
1890
- planCache.contextHash = this.generateContextHash(contextData);
1891
- planCache.contextData = { ...contextData };
1892
- }
1915
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1893
1916
  });
1894
1917
  } else {
1895
1918
  cachedRecord.updateFn((cache) => {
@@ -1897,11 +1920,6 @@ cache file: ${cacheFile}`
1897
1920
  });
1898
1921
  }
1899
1922
  } else {
1900
- if (newRecord.type === "plan" && contextData) {
1901
- const planRecord = newRecord;
1902
- planRecord.contextHash = this.generateContextHash(contextData);
1903
- planRecord.contextData = { ...contextData };
1904
- }
1905
1923
  this.appendCache(newRecord);
1906
1924
  }
1907
1925
  }
@@ -1931,13 +1949,10 @@ var PageAgent = class {
1931
1949
  generateReport: true,
1932
1950
  autoPrintReportMsg: true,
1933
1951
  groupName: "Midscene Report",
1934
- groupDescription: "",
1935
- enableCumulativeContext: true,
1936
- autoClearContext: false
1952
+ groupDescription: ""
1937
1953
  },
1938
1954
  opts || {}
1939
1955
  );
1940
- this.initializeContextStore();
1941
1956
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1942
1957
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1943
1958
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1964,69 +1979,6 @@ var PageAgent = class {
1964
1979
  opts?.testId || this.page.pageType || "web"
1965
1980
  );
1966
1981
  }
1967
- /**
1968
- * Initialize context store for cumulative context functionality
1969
- */
1970
- async initializeContextStore() {
1971
- if (!this.opts.enableCumulativeContext) {
1972
- debug4("Cumulative context disabled via options");
1973
- return;
1974
- }
1975
- try {
1976
- const aiModel = await import("misoai-core/ai-model");
1977
- this.contextStore = aiModel.getContextStore();
1978
- debug4("Context store initialized successfully", {
1979
- autoClearContext: this.opts.autoClearContext,
1980
- testId: this.opts.testId
1981
- });
1982
- if (this.opts.autoClearContext) {
1983
- this.contextStore.clear();
1984
- debug4("Context store cleared due to autoClearContext option");
1985
- } else {
1986
- const existingData = this.contextStore.getAllData();
1987
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1988
- debug4("Context store preserving existing data", {
1989
- existingDataKeys: Object.keys(existingData),
1990
- existingStepsCount: existingSteps
1991
- });
1992
- }
1993
- } catch (error) {
1994
- debug4("Failed to initialize context store:", error);
1995
- console.warn("⚠️ Could not initialize context store:", error);
1996
- }
1997
- }
1998
- /**
1999
- * Get the context store instance
2000
- */
2001
- getContextStore() {
2002
- return this.contextStore;
2003
- }
2004
- /**
2005
- * Clear the context store
2006
- */
2007
- clearContext() {
2008
- if (this.contextStore) {
2009
- this.contextStore.clear();
2010
- }
2011
- }
2012
- /**
2013
- * Get all stored data from context store
2014
- */
2015
- getStoredData() {
2016
- if (this.contextStore) {
2017
- return this.contextStore.getAllData();
2018
- }
2019
- return {};
2020
- }
2021
- /**
2022
- * Get step summary from context store
2023
- */
2024
- getStepSummary() {
2025
- if (this.contextStore) {
2026
- return this.contextStore.getStepSummary();
2027
- }
2028
- return "";
2029
- }
2030
1982
  async getUIContext(action) {
2031
1983
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
2032
1984
  return await parseContextFromWebPage(this.page, {
@@ -2205,6 +2157,23 @@ var PageAgent = class {
2205
2157
  metadata
2206
2158
  };
2207
2159
  }
2160
+ async aiRightClick(locatePrompt, opt) {
2161
+ const detailedLocateParam = this.buildDetailedLocateParam(
2162
+ locatePrompt,
2163
+ opt
2164
+ );
2165
+ const plans = buildPlans("RightClick", detailedLocateParam);
2166
+ const { executor, output } = await this.taskExecutor.runPlans(
2167
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2168
+ plans,
2169
+ { cacheable: opt?.cacheable }
2170
+ );
2171
+ const metadata = this.afterTaskRunning(executor);
2172
+ return {
2173
+ result: output,
2174
+ metadata
2175
+ };
2176
+ }
2208
2177
  async aiInput(value, locatePrompt, opt) {
2209
2178
  (0, import_utils12.assert)(
2210
2179
  typeof value === "string",
@@ -2262,35 +2231,9 @@ var PageAgent = class {
2262
2231
  };
2263
2232
  }
2264
2233
  async aiAction(taskPrompt, opt) {
2265
- const originalPrompt = taskPrompt;
2266
- let processedPrompt = taskPrompt;
2267
- if (this.opts.enableCumulativeContext && this.contextStore) {
2268
- try {
2269
- const storedData = this.contextStore.getAllData();
2270
- if (Object.keys(storedData).length > 0) {
2271
- debug4("Available data for aiAction:", {
2272
- prompt: taskPrompt,
2273
- availableData: storedData
2274
- });
2275
- }
2276
- } catch (error) {
2277
- debug4("Context store operation failed:", error);
2278
- }
2279
- }
2280
2234
  const cacheable = opt?.cacheable;
2281
2235
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2282
- let contextData;
2283
- if (this.opts.enableCumulativeContext && this.contextStore) {
2284
- try {
2285
- contextData = this.contextStore.getAllData();
2286
- if (contextData && Object.keys(contextData).length === 0) {
2287
- contextData = void 0;
2288
- }
2289
- } catch (error) {
2290
- debug4("Failed to get context data for cache:", error);
2291
- }
2292
- }
2293
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2236
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2294
2237
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2295
2238
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2296
2239
  taskPrompt,
@@ -2300,28 +2243,6 @@ var PageAgent = class {
2300
2243
  debug4("matched cache, will call .runYaml to run the action");
2301
2244
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2302
2245
  const result = await this.runYaml(yaml5);
2303
- if (this.opts.enableCumulativeContext && this.contextStore) {
2304
- try {
2305
- const executionResult = {
2306
- success: true,
2307
- actionType: "cached",
2308
- description: `Executed cached action: ${processedPrompt}`,
2309
- timing: result.metadata?.totalTime
2310
- };
2311
- this.contextStore.addStep({
2312
- type: "action",
2313
- summary: `Action: ${processedPrompt} (cached)`,
2314
- prompt: processedPrompt,
2315
- executionResult
2316
- });
2317
- debug4("Added cached action step to context store:", {
2318
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2319
- totalSteps: this.contextStore.getRecentSteps(100).length
2320
- });
2321
- } catch (error) {
2322
- debug4("Failed to add cached action step:", error);
2323
- }
2324
- }
2325
2246
  return {
2326
2247
  result: result.result,
2327
2248
  metadata: metadata2
@@ -2346,114 +2267,17 @@ var PageAgent = class {
2346
2267
  prompt: taskPrompt,
2347
2268
  yamlWorkflow: yamlFlowStr
2348
2269
  },
2349
- matchedCache,
2350
- contextData
2351
- // Pass context data for cache creation
2270
+ matchedCache
2352
2271
  );
2353
2272
  }
2354
2273
  const metadata = this.afterTaskRunning(executor);
2355
- if (this.opts.enableCumulativeContext && this.contextStore) {
2356
- try {
2357
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2358
- this.contextStore.addStep({
2359
- type: "action",
2360
- summary: `Action: ${processedPrompt}`,
2361
- prompt: processedPrompt,
2362
- executionResult
2363
- });
2364
- debug4("Added action step with execution result to context store:", {
2365
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2366
- totalSteps: this.contextStore.getRecentSteps(100).length,
2367
- executionResult
2368
- });
2369
- } catch (error) {
2370
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2371
- try {
2372
- this.contextStore.addStep({
2373
- type: "action",
2374
- summary: `Action: ${processedPrompt}`,
2375
- prompt: processedPrompt
2376
- });
2377
- } catch (stepError) {
2378
- debug4("Failed to add action step:", stepError);
2379
- }
2380
- }
2381
- }
2382
2274
  return {
2383
2275
  result: output,
2384
2276
  metadata
2385
2277
  };
2386
2278
  }
2387
2279
  async aiQuery(demand) {
2388
- let processedDemand = demand;
2389
- let storageKey;
2390
- try {
2391
- const aiModel = await import("misoai-core/ai-model");
2392
- const contextStore = aiModel.getContextStore();
2393
- if (typeof demand === "string") {
2394
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2395
- if (storageInstruction) {
2396
- storageKey = storageInstruction.key;
2397
- processedDemand = storageInstruction.cleanText;
2398
- contextStore._pendingAliases = storageInstruction.aliases;
2399
- } else {
2400
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2401
- if (storageMatch) {
2402
- storageKey = storageMatch[1];
2403
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2404
- }
2405
- }
2406
- }
2407
- } catch (error) {
2408
- debug4("Context store not available:", error);
2409
- }
2410
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2411
- if (this.opts.enableCumulativeContext && this.contextStore) {
2412
- if (storageKey && output) {
2413
- try {
2414
- const pendingAliases = this.contextStore._pendingAliases;
2415
- if (pendingAliases) {
2416
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2417
- delete this.contextStore._pendingAliases;
2418
- debug4("Stored query result with aliases:", {
2419
- key: storageKey,
2420
- value: output,
2421
- aliases: pendingAliases
2422
- });
2423
- } else {
2424
- this.contextStore.storeData(storageKey, output);
2425
- debug4("Stored query result:", {
2426
- key: storageKey,
2427
- value: output
2428
- });
2429
- }
2430
- this.contextStore.addStep({
2431
- type: "query",
2432
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2433
- data: output,
2434
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2435
- });
2436
- debug4("Added query step to context store:", {
2437
- storageKey,
2438
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2439
- totalSteps: this.contextStore.getRecentSteps(100).length
2440
- });
2441
- } catch (error) {
2442
- debug4("Failed to store query result:", error);
2443
- }
2444
- } else {
2445
- try {
2446
- this.contextStore.addStep({
2447
- type: "query",
2448
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2449
- data: output,
2450
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2451
- });
2452
- } catch (error) {
2453
- debug4("Failed to add query step:", error);
2454
- }
2455
- }
2456
- }
2280
+ const { output, executor } = await this.taskExecutor.query(demand);
2457
2281
  const metadata = this.afterTaskRunning(executor);
2458
2282
  return {
2459
2283
  result: output,
@@ -2563,48 +2387,6 @@ var PageAgent = class {
2563
2387
  };
2564
2388
  }
2565
2389
  async aiAssert(assertion, msg, opt) {
2566
- let executionContext = "";
2567
- if (this.opts.enableCumulativeContext && this.contextStore) {
2568
- try {
2569
- const recentSteps = this.contextStore.getRecentSteps(3);
2570
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2571
- const storedData = this.contextStore.getAllData();
2572
- if (stepsWithExecutionResults.length > 0) {
2573
- const recentActions = stepsWithExecutionResults.map((step) => {
2574
- const result = step.executionResult;
2575
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2576
- }).join("\n");
2577
- executionContext = `
2578
-
2579
- Recent actions performed:
2580
- ${recentActions}
2581
-
2582
- This context may help verify the assertion.`;
2583
- }
2584
- if (storedData && Object.keys(storedData).length > 0) {
2585
- executionContext += `
2586
-
2587
- Available data for reference:
2588
- ${JSON.stringify(storedData, null, 2)}
2589
-
2590
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2591
- debug4("Available data for aiAssert:", {
2592
- assertion,
2593
- availableData: storedData
2594
- });
2595
- }
2596
- this.contextStore.addStep({
2597
- type: "assertion",
2598
- summary: `Assertion: ${assertion}`,
2599
- prompt: assertion
2600
- });
2601
- debug4("Added assertion step to context store:", {
2602
- totalSteps: this.contextStore.getRecentSteps(100).length
2603
- });
2604
- } catch (error) {
2605
- debug4("Context store operation failed:", error);
2606
- }
2607
- }
2608
2390
  let currentUrl = "";
2609
2391
  if (this.page.url) {
2610
2392
  try {
@@ -2612,13 +2394,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2612
2394
  } catch (e) {
2613
2395
  }
2614
2396
  }
2615
- let assertionWithContext = assertion;
2616
- if (currentUrl) {
2617
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2618
- }
2619
- if (executionContext) {
2620
- assertionWithContext += executionContext;
2621
- }
2397
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2622
2398
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2623
2399
  const metadata = this.afterTaskRunning(executor, true);
2624
2400
  if (output && opt?.keepRawResponse) {
@@ -2829,81 +2605,42 @@ ${errors}`);
2829
2605
  }
2830
2606
  throw new Error("evaluateJavaScript is not supported in current agent");
2831
2607
  }
2832
- async destroy() {
2833
- await this.page.destroy();
2834
- }
2835
- /**
2836
- * Analyze execution results from executor to generate meaningful descriptions
2837
- */
2838
- analyzeExecutionResults(executor, originalPrompt) {
2839
- const tasks = executor.tasks;
2840
- const success = !executor.isInErrorState();
2841
- if (!success) {
2842
- const errorTask = executor.latestErrorTask();
2843
- return {
2844
- success: false,
2845
- actionType: "error",
2846
- description: `Failed to execute: ${originalPrompt}`,
2847
- error: errorTask?.error
2848
- };
2849
- }
2850
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2851
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2852
- const lastAction = actionTasks[actionTasks.length - 1];
2853
- const lastLocate = locateTasks[locateTasks.length - 1];
2854
- if (!lastAction) {
2855
- return {
2856
- success: true,
2857
- actionType: "unknown",
2858
- description: `Completed: ${originalPrompt}`
2608
+ async logScreenshot(title, options) {
2609
+ const screenshotTitle = title || "untitled";
2610
+ const content = options?.content || "";
2611
+ const screenshot = await this.page.screenshotBase64?.();
2612
+ if (screenshot) {
2613
+ const executionDump = {
2614
+ name: screenshotTitle,
2615
+ description: content,
2616
+ tasks: [{
2617
+ type: "Screenshot",
2618
+ subType: "log",
2619
+ status: "finished",
2620
+ executor: null,
2621
+ param: {
2622
+ title: screenshotTitle,
2623
+ content
2624
+ },
2625
+ output: {
2626
+ screenshot
2627
+ },
2628
+ thought: `Logged screenshot: ${screenshotTitle}`,
2629
+ timing: {
2630
+ start: Date.now(),
2631
+ end: Date.now(),
2632
+ cost: 0
2633
+ }
2634
+ }],
2635
+ sdkVersion: "1.0.0",
2636
+ logTime: Date.now(),
2637
+ model_name: "screenshot"
2859
2638
  };
2639
+ this.appendExecutionDump(executionDump);
2860
2640
  }
2861
- const actionType = lastAction.subType || "unknown";
2862
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2863
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2864
- return {
2865
- success: true,
2866
- actionType,
2867
- description,
2868
- elementInfo,
2869
- timing: lastAction.timing?.cost
2870
- };
2871
- }
2872
- /**
2873
- * Extract element information from locate task
2874
- */
2875
- extractElementInfo(locateTask, _actionTask) {
2876
- if (!locateTask?.output?.element)
2877
- return void 0;
2878
- const element = locateTask.output.element;
2879
- return {
2880
- type: element.attributes?.nodeType || "unknown",
2881
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2882
- location: `(${element.center[0]}, ${element.center[1]})`
2883
- };
2884
2641
  }
2885
- /**
2886
- * Generate natural language description for actions
2887
- */
2888
- generateActionDescription(actionType, param, elementInfo) {
2889
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2890
- switch (actionType) {
2891
- case "Tap":
2892
- return `Clicked on ${elementDesc}`;
2893
- case "Input":
2894
- const inputValue = param?.value || "";
2895
- return `Entered "${inputValue}" into ${elementDesc}`;
2896
- case "KeyboardPress":
2897
- return `Pressed ${param?.value || "key"}`;
2898
- case "Scroll":
2899
- return `Scrolled ${param?.direction || "on page"}`;
2900
- case "Hover":
2901
- return `Hovered over ${elementDesc}`;
2902
- case "Drag":
2903
- return `Dragged ${elementDesc}`;
2904
- default:
2905
- return `Performed ${actionType} action on ${elementDesc}`;
2906
- }
2642
+ async destroy() {
2643
+ await this.page.destroy();
2907
2644
  }
2908
2645
  };
2909
2646
 
@@ -2926,7 +2663,7 @@ var Page = class {
2926
2663
  this.everMoved = false;
2927
2664
  this.underlyingPage = underlyingPage;
2928
2665
  this.pageType = pageType;
2929
- this.waitForNavigationTimeout = opts?.waitForNavigationTimeout || import_constants3.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
2666
+ this.waitForNavigationTimeout = opts?.waitForNavigationTimeout ?? import_constants3.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
2930
2667
  }
2931
2668
  async evaluate(pageFunction, arg) {
2932
2669
  let result;
@@ -3206,9 +2943,9 @@ var WebPage = class extends Page {
3206
2943
  }
3207
2944
  async waitUntilNetworkIdle(options) {
3208
2945
  await this.underlyingPage.waitForNetworkIdle({
3209
- idleTime: options?.idleTime || import_constants4.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIME,
3210
- concurrency: options?.concurrency || import_constants4.DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY,
3211
- timeout: options?.timeout || this.waitForNetworkIdleTimeout
2946
+ idleTime: options?.idleTime ?? import_constants4.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIME,
2947
+ concurrency: options?.concurrency ?? import_constants4.DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY,
2948
+ timeout: options?.timeout ?? this.waitForNetworkIdleTimeout
3212
2949
  });
3213
2950
  }
3214
2951
  };