misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -17,10 +17,11 @@ var ScriptPlayer = class {
17
17
  this.unnamedResultIndex = 0;
18
18
  this.pageAgent = null;
19
19
  this.result = {};
20
+ const target = script.target || script.web || script.android;
20
21
  if (ifInBrowser) {
21
22
  this.output = void 0;
22
- } else if (script.target?.output) {
23
- this.output = resolve(process.cwd(), script.target.output);
23
+ } else if (target?.output) {
24
+ this.output = resolve(process.cwd(), target.output);
24
25
  } else {
25
26
  this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
26
27
  }
@@ -94,15 +95,20 @@ var ScriptPlayer = class {
94
95
  } else if ("aiAssert" in flowItem) {
95
96
  const assertTask = flowItem;
96
97
  const prompt = assertTask.aiAssert;
98
+ const msg = assertTask.errorMessage;
97
99
  assert(prompt, "missing prompt for aiAssert");
98
100
  assert(
99
101
  typeof prompt === "string",
100
102
  "prompt for aiAssert must be a string"
101
103
  );
102
- await agent.aiAssert(prompt);
104
+ await agent.aiAssert(prompt, msg);
103
105
  } else if ("aiQuery" in flowItem) {
104
106
  const queryTask = flowItem;
105
107
  const prompt = queryTask.aiQuery;
108
+ const options = {
109
+ domIncluded: queryTask.domIncluded,
110
+ screenshotIncluded: queryTask.screenshotIncluded
111
+ };
106
112
  assert(prompt, "missing prompt for aiQuery");
107
113
  assert(
108
114
  typeof prompt === "string",
@@ -113,6 +119,10 @@ var ScriptPlayer = class {
113
119
  } else if ("aiNumber" in flowItem) {
114
120
  const numberTask = flowItem;
115
121
  const prompt = numberTask.aiNumber;
122
+ const options = {
123
+ domIncluded: numberTask.domIncluded,
124
+ screenshotIncluded: numberTask.screenshotIncluded
125
+ };
116
126
  assert(prompt, "missing prompt for number");
117
127
  assert(
118
128
  typeof prompt === "string",
@@ -123,6 +133,10 @@ var ScriptPlayer = class {
123
133
  } else if ("aiString" in flowItem) {
124
134
  const stringTask = flowItem;
125
135
  const prompt = stringTask.aiString;
136
+ const options = {
137
+ domIncluded: stringTask.domIncluded,
138
+ screenshotIncluded: stringTask.screenshotIncluded
139
+ };
126
140
  assert(prompt, "missing prompt for string");
127
141
  assert(
128
142
  typeof prompt === "string",
@@ -133,6 +147,10 @@ var ScriptPlayer = class {
133
147
  } else if ("aiBoolean" in flowItem) {
134
148
  const booleanTask = flowItem;
135
149
  const prompt = booleanTask.aiBoolean;
150
+ const options = {
151
+ domIncluded: booleanTask.domIncluded,
152
+ screenshotIncluded: booleanTask.screenshotIncluded
153
+ };
136
154
  assert(prompt, "missing prompt for boolean");
137
155
  assert(
138
156
  typeof prompt === "string",
@@ -175,6 +193,9 @@ var ScriptPlayer = class {
175
193
  } else if ("aiTap" in flowItem) {
176
194
  const tapTask = flowItem;
177
195
  await agent.aiTap(tapTask.aiTap, tapTask);
196
+ } else if ("aiRightClick" in flowItem) {
197
+ const rightClickTask = flowItem;
198
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
178
199
  } else if ("aiHover" in flowItem) {
179
200
  const hoverTask = flowItem;
180
201
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -197,6 +218,11 @@ var ScriptPlayer = class {
197
218
  evaluateJavaScriptTask.javascript
198
219
  );
199
220
  this.setResult(evaluateJavaScriptTask.name, result);
221
+ } else if ("logScreenshot" in flowItem) {
222
+ const logScreenshotTask = flowItem;
223
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
224
+ content: logScreenshotTask.content || ""
225
+ });
200
226
  } else {
201
227
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
202
228
  }
@@ -465,7 +491,8 @@ var WebElementInfo = class {
465
491
  id,
466
492
  attributes,
467
493
  indexId,
468
- xpaths
494
+ xpaths,
495
+ isVisible
469
496
  }) {
470
497
  this.content = content;
471
498
  this.rect = rect;
@@ -478,6 +505,7 @@ var WebElementInfo = class {
478
505
  this.attributes = attributes;
479
506
  this.indexId = indexId;
480
507
  this.xpaths = xpaths;
508
+ this.isVisible = isVisible;
481
509
  }
482
510
  };
483
511
 
@@ -500,14 +528,15 @@ async function parseContextFromWebPage(page, _opt) {
500
528
  })
501
529
  ]);
502
530
  const webTree = traverseTree(tree, (elementInfo) => {
503
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
531
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
504
532
  return new WebElementInfo({
505
533
  rect,
506
534
  locator,
507
535
  id,
508
536
  content,
509
537
  attributes,
510
- indexId
538
+ indexId,
539
+ isVisible
511
540
  });
512
541
  });
513
542
  assert3(screenshotBase64, "screenshotBase64 is required");
@@ -537,7 +566,7 @@ function printReportMsg(filepath) {
537
566
  logMsg(`Midscene - report file updated: ${filepath}`);
538
567
  }
539
568
  function replaceIllegalPathCharsAndSpace(str) {
540
- return str.replace(/[/\\:*?"<>| ]/g, "-");
569
+ return str.replace(/[:*?"<>| ]/g, "-");
541
570
  }
542
571
  function forceClosePopup(page, debug6) {
543
572
  page.on("popup", async (popup) => {
@@ -851,10 +880,10 @@ var PageTaskExecutor = class {
851
880
  if (!taskParam || !taskParam.value) {
852
881
  return;
853
882
  }
854
- await this.page.keyboard.type(taskParam.value);
855
- } else {
856
- await this.page.keyboard.type(taskParam.value);
857
883
  }
884
+ await this.page.keyboard.type(taskParam.value, {
885
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
886
+ });
858
887
  }
859
888
  };
860
889
  tasks.push(taskActionInput);
@@ -883,6 +912,22 @@ var PageTaskExecutor = class {
883
912
  }
884
913
  };
885
914
  tasks.push(taskActionTap);
915
+ } else if (plan2.type === "RightClick") {
916
+ const taskActionRightClick = {
917
+ type: "Action",
918
+ subType: "RightClick",
919
+ thought: plan2.thought,
920
+ locate: plan2.locate,
921
+ executor: async (param, { element }) => {
922
+ assert4(element, "Element not found, cannot right click");
923
+ await this.page.mouse.click(
924
+ element.center[0],
925
+ element.center[1],
926
+ { button: "right" }
927
+ );
928
+ }
929
+ };
930
+ tasks.push(taskActionRightClick);
886
931
  } else if (plan2.type === "Drag") {
887
932
  const taskActionDrag = {
888
933
  type: "Action",
@@ -1411,7 +1456,7 @@ var PageTaskExecutor = class {
1411
1456
  executor: taskExecutor
1412
1457
  };
1413
1458
  }
1414
- async createTypeQueryTask(type, demand) {
1459
+ async createTypeQueryTask(type, demand, opt) {
1415
1460
  const taskExecutor = new Executor(
1416
1461
  taskTitleStr(
1417
1462
  type,
@@ -1442,7 +1487,10 @@ var PageTaskExecutor = class {
1442
1487
  result: `${type}, ${demand}`
1443
1488
  };
1444
1489
  }
1445
- const { data, usage } = await this.insight.extract(demandInput);
1490
+ const { data, usage } = await this.insight.extract(
1491
+ demandInput,
1492
+ opt
1493
+ );
1446
1494
  let outputResult = data;
1447
1495
  if (ifTypeRestricted) {
1448
1496
  assert4(data?.result !== void 0, "No result in query data");
@@ -1462,17 +1510,17 @@ var PageTaskExecutor = class {
1462
1510
  executor: taskExecutor
1463
1511
  };
1464
1512
  }
1465
- async query(demand) {
1466
- return this.createTypeQueryTask("Query", demand);
1513
+ async query(demand, opt) {
1514
+ return this.createTypeQueryTask("Query", demand, opt);
1467
1515
  }
1468
- async boolean(prompt) {
1469
- return this.createTypeQueryTask("Boolean", prompt);
1516
+ async boolean(prompt, opt) {
1517
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1470
1518
  }
1471
- async number(prompt) {
1472
- return this.createTypeQueryTask("Number", prompt);
1519
+ async number(prompt, opt) {
1520
+ return this.createTypeQueryTask("Number", prompt, opt);
1473
1521
  }
1474
- async string(prompt) {
1475
- return this.createTypeQueryTask("String", prompt);
1522
+ async string(prompt, opt) {
1523
+ return this.createTypeQueryTask("String", prompt, opt);
1476
1524
  }
1477
1525
  async assert(assertion) {
1478
1526
  const description = `assert: ${assertion}`;
@@ -1608,7 +1656,7 @@ function buildPlans(type, locateParam, param) {
1608
1656
  param: locateParam,
1609
1657
  thought: ""
1610
1658
  } : null;
1611
- if (type === "Tap" || type === "Hover") {
1659
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1612
1660
  assert5(locateParam, `missing locate info for action "${type}"`);
1613
1661
  assert5(locatePlan, `missing locate info for action "${type}"`);
1614
1662
  const tapPlan = {
@@ -1679,8 +1727,8 @@ function buildPlans(type, locateParam, param) {
1679
1727
 
1680
1728
  // src/common/task-cache.ts
1681
1729
  import assert6 from "assert";
1682
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1683
- import { join as join2 } from "path";
1730
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1731
+ import { dirname as dirname2, join as join2 } from "path";
1684
1732
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1685
1733
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1686
1734
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1688,7 +1736,7 @@ import yaml3 from "js-yaml";
1688
1736
  import semver from "semver";
1689
1737
 
1690
1738
  // package.json
1691
- var version = "1.0.5";
1739
+ var version = "1.0.3";
1692
1740
 
1693
1741
  // src/common/task-cache.ts
1694
1742
  var debug3 = getDebug3("cache");
@@ -1716,70 +1764,44 @@ var TaskCache = class {
1716
1764
  this.cache = cacheContent;
1717
1765
  this.cacheOriginalLength = this.cache.caches.length;
1718
1766
  }
1719
- matchCache(prompt, type, contextData) {
1720
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1767
+ matchCache(prompt, type) {
1721
1768
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1722
1769
  const item = this.cache.caches[i];
1723
1770
  const key = `${type}:${prompt}:${i}`;
1724
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1725
- continue;
1726
- }
1727
- if (type === "plan" && item.type === "plan") {
1728
- const planItem = item;
1729
- if (contextHash && planItem.contextHash) {
1730
- if (contextHash !== planItem.contextHash) {
1731
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1732
- continue;
1771
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1772
+ this.matchedCacheIndices.add(key);
1773
+ debug3(
1774
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1775
+ type,
1776
+ prompt,
1777
+ i
1778
+ );
1779
+ return {
1780
+ cacheContent: item,
1781
+ updateFn: (cb) => {
1782
+ debug3(
1783
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1784
+ type,
1785
+ prompt,
1786
+ i
1787
+ );
1788
+ cb(item);
1789
+ debug3(
1790
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1791
+ type,
1792
+ prompt,
1793
+ i
1794
+ );
1795
+ this.flushCacheToFile();
1733
1796
  }
1734
- } else if (contextHash || planItem.contextHash) {
1735
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1736
- continue;
1737
- }
1797
+ };
1738
1798
  }
1739
- this.matchedCacheIndices.add(key);
1740
- debug3(
1741
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1742
- type,
1743
- prompt,
1744
- i,
1745
- contextHash ? "yes" : "no-context"
1746
- );
1747
- return {
1748
- cacheContent: item,
1749
- updateFn: (cb) => {
1750
- debug3(
1751
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1752
- type,
1753
- prompt,
1754
- i
1755
- );
1756
- cb(item);
1757
- debug3(
1758
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1759
- type,
1760
- prompt,
1761
- i
1762
- );
1763
- this.flushCacheToFile();
1764
- }
1765
- };
1766
1799
  }
1767
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1800
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1768
1801
  return void 0;
1769
1802
  }
1770
- generateContextHash(contextData) {
1771
- const sortedKeys = Object.keys(contextData).sort();
1772
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1773
- let hash = 0;
1774
- for (let i = 0; i < stableString.length; i++) {
1775
- const char = stableString.charCodeAt(i);
1776
- hash = (hash << 5) - hash + char;
1777
- hash = hash & hash;
1778
- }
1779
- return hash.toString(36);
1780
- }
1781
- matchPlanCache(prompt, contextData) {
1782
- return this.matchCache(prompt, "plan", contextData);
1803
+ matchPlanCache(prompt) {
1804
+ return this.matchCache(prompt, "plan");
1783
1805
  }
1784
1806
  matchLocateCache(prompt) {
1785
1807
  return this.matchCache(prompt, "locate");
@@ -1845,8 +1867,14 @@ cache file: ${cacheFile}`
1845
1867
  return;
1846
1868
  }
1847
1869
  try {
1870
+ const dir = dirname2(this.cacheFilePath);
1871
+ if (!existsSync2(dir)) {
1872
+ mkdirSync2(dir, { recursive: true });
1873
+ debug3("created cache directory: %s", dir);
1874
+ }
1848
1875
  const yamlData = yaml3.dump(this.cache);
1849
1876
  writeFileSync2(this.cacheFilePath, yamlData);
1877
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1850
1878
  } catch (err) {
1851
1879
  debug3(
1852
1880
  "write cache to file failed, path: %s, error: %s",
@@ -1855,16 +1883,11 @@ cache file: ${cacheFile}`
1855
1883
  );
1856
1884
  }
1857
1885
  }
1858
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1886
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1859
1887
  if (cachedRecord) {
1860
1888
  if (newRecord.type === "plan") {
1861
1889
  cachedRecord.updateFn((cache) => {
1862
- const planCache = cache;
1863
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1864
- if (contextData) {
1865
- planCache.contextHash = this.generateContextHash(contextData);
1866
- planCache.contextData = { ...contextData };
1867
- }
1890
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1868
1891
  });
1869
1892
  } else {
1870
1893
  cachedRecord.updateFn((cache) => {
@@ -1872,11 +1895,6 @@ cache file: ${cacheFile}`
1872
1895
  });
1873
1896
  }
1874
1897
  } else {
1875
- if (newRecord.type === "plan" && contextData) {
1876
- const planRecord = newRecord;
1877
- planRecord.contextHash = this.generateContextHash(contextData);
1878
- planRecord.contextData = { ...contextData };
1879
- }
1880
1898
  this.appendCache(newRecord);
1881
1899
  }
1882
1900
  }
@@ -1906,13 +1924,10 @@ var PageAgent = class {
1906
1924
  generateReport: true,
1907
1925
  autoPrintReportMsg: true,
1908
1926
  groupName: "Midscene Report",
1909
- groupDescription: "",
1910
- enableCumulativeContext: true,
1911
- autoClearContext: false
1927
+ groupDescription: ""
1912
1928
  },
1913
1929
  opts || {}
1914
1930
  );
1915
- this.initializeContextStore();
1916
1931
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1917
1932
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1918
1933
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1939,69 +1954,6 @@ var PageAgent = class {
1939
1954
  opts?.testId || this.page.pageType || "web"
1940
1955
  );
1941
1956
  }
1942
- /**
1943
- * Initialize context store for cumulative context functionality
1944
- */
1945
- async initializeContextStore() {
1946
- if (!this.opts.enableCumulativeContext) {
1947
- debug4("Cumulative context disabled via options");
1948
- return;
1949
- }
1950
- try {
1951
- const aiModel = await import("misoai-core/ai-model");
1952
- this.contextStore = aiModel.getContextStore();
1953
- debug4("Context store initialized successfully", {
1954
- autoClearContext: this.opts.autoClearContext,
1955
- testId: this.opts.testId
1956
- });
1957
- if (this.opts.autoClearContext) {
1958
- this.contextStore.clear();
1959
- debug4("Context store cleared due to autoClearContext option");
1960
- } else {
1961
- const existingData = this.contextStore.getAllData();
1962
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1963
- debug4("Context store preserving existing data", {
1964
- existingDataKeys: Object.keys(existingData),
1965
- existingStepsCount: existingSteps
1966
- });
1967
- }
1968
- } catch (error) {
1969
- debug4("Failed to initialize context store:", error);
1970
- console.warn("⚠️ Could not initialize context store:", error);
1971
- }
1972
- }
1973
- /**
1974
- * Get the context store instance
1975
- */
1976
- getContextStore() {
1977
- return this.contextStore;
1978
- }
1979
- /**
1980
- * Clear the context store
1981
- */
1982
- clearContext() {
1983
- if (this.contextStore) {
1984
- this.contextStore.clear();
1985
- }
1986
- }
1987
- /**
1988
- * Get all stored data from context store
1989
- */
1990
- getStoredData() {
1991
- if (this.contextStore) {
1992
- return this.contextStore.getAllData();
1993
- }
1994
- return {};
1995
- }
1996
- /**
1997
- * Get step summary from context store
1998
- */
1999
- getStepSummary() {
2000
- if (this.contextStore) {
2001
- return this.contextStore.getStepSummary();
2002
- }
2003
- return "";
2004
- }
2005
1957
  async getUIContext(action) {
2006
1958
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
2007
1959
  return await parseContextFromWebPage(this.page, {
@@ -2180,6 +2132,23 @@ var PageAgent = class {
2180
2132
  metadata
2181
2133
  };
2182
2134
  }
2135
+ async aiRightClick(locatePrompt, opt) {
2136
+ const detailedLocateParam = this.buildDetailedLocateParam(
2137
+ locatePrompt,
2138
+ opt
2139
+ );
2140
+ const plans = buildPlans("RightClick", detailedLocateParam);
2141
+ const { executor, output } = await this.taskExecutor.runPlans(
2142
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2143
+ plans,
2144
+ { cacheable: opt?.cacheable }
2145
+ );
2146
+ const metadata = this.afterTaskRunning(executor);
2147
+ return {
2148
+ result: output,
2149
+ metadata
2150
+ };
2151
+ }
2183
2152
  async aiInput(value, locatePrompt, opt) {
2184
2153
  assert7(
2185
2154
  typeof value === "string",
@@ -2237,35 +2206,9 @@ var PageAgent = class {
2237
2206
  };
2238
2207
  }
2239
2208
  async aiAction(taskPrompt, opt) {
2240
- const originalPrompt = taskPrompt;
2241
- let processedPrompt = taskPrompt;
2242
- if (this.opts.enableCumulativeContext && this.contextStore) {
2243
- try {
2244
- const storedData = this.contextStore.getAllData();
2245
- if (Object.keys(storedData).length > 0) {
2246
- debug4("Available data for aiAction:", {
2247
- prompt: taskPrompt,
2248
- availableData: storedData
2249
- });
2250
- }
2251
- } catch (error) {
2252
- debug4("Context store operation failed:", error);
2253
- }
2254
- }
2255
2209
  const cacheable = opt?.cacheable;
2256
2210
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2257
- let contextData;
2258
- if (this.opts.enableCumulativeContext && this.contextStore) {
2259
- try {
2260
- contextData = this.contextStore.getAllData();
2261
- if (contextData && Object.keys(contextData).length === 0) {
2262
- contextData = void 0;
2263
- }
2264
- } catch (error) {
2265
- debug4("Failed to get context data for cache:", error);
2266
- }
2267
- }
2268
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2211
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2269
2212
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2270
2213
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2271
2214
  taskPrompt,
@@ -2275,28 +2218,6 @@ var PageAgent = class {
2275
2218
  debug4("matched cache, will call .runYaml to run the action");
2276
2219
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2277
2220
  const result = await this.runYaml(yaml5);
2278
- if (this.opts.enableCumulativeContext && this.contextStore) {
2279
- try {
2280
- const executionResult = {
2281
- success: true,
2282
- actionType: "cached",
2283
- description: `Executed cached action: ${processedPrompt}`,
2284
- timing: result.metadata?.totalTime
2285
- };
2286
- this.contextStore.addStep({
2287
- type: "action",
2288
- summary: `Action: ${processedPrompt} (cached)`,
2289
- prompt: processedPrompt,
2290
- executionResult
2291
- });
2292
- debug4("Added cached action step to context store:", {
2293
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2294
- totalSteps: this.contextStore.getRecentSteps(100).length
2295
- });
2296
- } catch (error) {
2297
- debug4("Failed to add cached action step:", error);
2298
- }
2299
- }
2300
2221
  return {
2301
2222
  result: result.result,
2302
2223
  metadata: metadata2
@@ -2321,114 +2242,17 @@ var PageAgent = class {
2321
2242
  prompt: taskPrompt,
2322
2243
  yamlWorkflow: yamlFlowStr
2323
2244
  },
2324
- matchedCache,
2325
- contextData
2326
- // Pass context data for cache creation
2245
+ matchedCache
2327
2246
  );
2328
2247
  }
2329
2248
  const metadata = this.afterTaskRunning(executor);
2330
- if (this.opts.enableCumulativeContext && this.contextStore) {
2331
- try {
2332
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2333
- this.contextStore.addStep({
2334
- type: "action",
2335
- summary: `Action: ${processedPrompt}`,
2336
- prompt: processedPrompt,
2337
- executionResult
2338
- });
2339
- debug4("Added action step with execution result to context store:", {
2340
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2341
- totalSteps: this.contextStore.getRecentSteps(100).length,
2342
- executionResult
2343
- });
2344
- } catch (error) {
2345
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2346
- try {
2347
- this.contextStore.addStep({
2348
- type: "action",
2349
- summary: `Action: ${processedPrompt}`,
2350
- prompt: processedPrompt
2351
- });
2352
- } catch (stepError) {
2353
- debug4("Failed to add action step:", stepError);
2354
- }
2355
- }
2356
- }
2357
2249
  return {
2358
2250
  result: output,
2359
2251
  metadata
2360
2252
  };
2361
2253
  }
2362
2254
  async aiQuery(demand) {
2363
- let processedDemand = demand;
2364
- let storageKey;
2365
- try {
2366
- const aiModel = await import("misoai-core/ai-model");
2367
- const contextStore = aiModel.getContextStore();
2368
- if (typeof demand === "string") {
2369
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2370
- if (storageInstruction) {
2371
- storageKey = storageInstruction.key;
2372
- processedDemand = storageInstruction.cleanText;
2373
- contextStore._pendingAliases = storageInstruction.aliases;
2374
- } else {
2375
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2376
- if (storageMatch) {
2377
- storageKey = storageMatch[1];
2378
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2379
- }
2380
- }
2381
- }
2382
- } catch (error) {
2383
- debug4("Context store not available:", error);
2384
- }
2385
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2386
- if (this.opts.enableCumulativeContext && this.contextStore) {
2387
- if (storageKey && output) {
2388
- try {
2389
- const pendingAliases = this.contextStore._pendingAliases;
2390
- if (pendingAliases) {
2391
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2392
- delete this.contextStore._pendingAliases;
2393
- debug4("Stored query result with aliases:", {
2394
- key: storageKey,
2395
- value: output,
2396
- aliases: pendingAliases
2397
- });
2398
- } else {
2399
- this.contextStore.storeData(storageKey, output);
2400
- debug4("Stored query result:", {
2401
- key: storageKey,
2402
- value: output
2403
- });
2404
- }
2405
- this.contextStore.addStep({
2406
- type: "query",
2407
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2408
- data: output,
2409
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2410
- });
2411
- debug4("Added query step to context store:", {
2412
- storageKey,
2413
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2414
- totalSteps: this.contextStore.getRecentSteps(100).length
2415
- });
2416
- } catch (error) {
2417
- debug4("Failed to store query result:", error);
2418
- }
2419
- } else {
2420
- try {
2421
- this.contextStore.addStep({
2422
- type: "query",
2423
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2424
- data: output,
2425
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2426
- });
2427
- } catch (error) {
2428
- debug4("Failed to add query step:", error);
2429
- }
2430
- }
2431
- }
2255
+ const { output, executor } = await this.taskExecutor.query(demand);
2432
2256
  const metadata = this.afterTaskRunning(executor);
2433
2257
  return {
2434
2258
  result: output,
@@ -2538,48 +2362,6 @@ var PageAgent = class {
2538
2362
  };
2539
2363
  }
2540
2364
  async aiAssert(assertion, msg, opt) {
2541
- let executionContext = "";
2542
- if (this.opts.enableCumulativeContext && this.contextStore) {
2543
- try {
2544
- const recentSteps = this.contextStore.getRecentSteps(3);
2545
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2546
- const storedData = this.contextStore.getAllData();
2547
- if (stepsWithExecutionResults.length > 0) {
2548
- const recentActions = stepsWithExecutionResults.map((step) => {
2549
- const result = step.executionResult;
2550
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2551
- }).join("\n");
2552
- executionContext = `
2553
-
2554
- Recent actions performed:
2555
- ${recentActions}
2556
-
2557
- This context may help verify the assertion.`;
2558
- }
2559
- if (storedData && Object.keys(storedData).length > 0) {
2560
- executionContext += `
2561
-
2562
- Available data for reference:
2563
- ${JSON.stringify(storedData, null, 2)}
2564
-
2565
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2566
- debug4("Available data for aiAssert:", {
2567
- assertion,
2568
- availableData: storedData
2569
- });
2570
- }
2571
- this.contextStore.addStep({
2572
- type: "assertion",
2573
- summary: `Assertion: ${assertion}`,
2574
- prompt: assertion
2575
- });
2576
- debug4("Added assertion step to context store:", {
2577
- totalSteps: this.contextStore.getRecentSteps(100).length
2578
- });
2579
- } catch (error) {
2580
- debug4("Context store operation failed:", error);
2581
- }
2582
- }
2583
2365
  let currentUrl = "";
2584
2366
  if (this.page.url) {
2585
2367
  try {
@@ -2587,13 +2369,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2587
2369
  } catch (e) {
2588
2370
  }
2589
2371
  }
2590
- let assertionWithContext = assertion;
2591
- if (currentUrl) {
2592
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2593
- }
2594
- if (executionContext) {
2595
- assertionWithContext += executionContext;
2596
- }
2372
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2597
2373
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2598
2374
  const metadata = this.afterTaskRunning(executor, true);
2599
2375
  if (output && opt?.keepRawResponse) {
@@ -2804,81 +2580,42 @@ ${errors}`);
2804
2580
  }
2805
2581
  throw new Error("evaluateJavaScript is not supported in current agent");
2806
2582
  }
2807
- async destroy() {
2808
- await this.page.destroy();
2809
- }
2810
- /**
2811
- * Analyze execution results from executor to generate meaningful descriptions
2812
- */
2813
- analyzeExecutionResults(executor, originalPrompt) {
2814
- const tasks = executor.tasks;
2815
- const success = !executor.isInErrorState();
2816
- if (!success) {
2817
- const errorTask = executor.latestErrorTask();
2818
- return {
2819
- success: false,
2820
- actionType: "error",
2821
- description: `Failed to execute: ${originalPrompt}`,
2822
- error: errorTask?.error
2823
- };
2824
- }
2825
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2826
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2827
- const lastAction = actionTasks[actionTasks.length - 1];
2828
- const lastLocate = locateTasks[locateTasks.length - 1];
2829
- if (!lastAction) {
2830
- return {
2831
- success: true,
2832
- actionType: "unknown",
2833
- description: `Completed: ${originalPrompt}`
2583
+ async logScreenshot(title, options) {
2584
+ const screenshotTitle = title || "untitled";
2585
+ const content = options?.content || "";
2586
+ const screenshot = await this.page.screenshotBase64?.();
2587
+ if (screenshot) {
2588
+ const executionDump = {
2589
+ name: screenshotTitle,
2590
+ description: content,
2591
+ tasks: [{
2592
+ type: "Screenshot",
2593
+ subType: "log",
2594
+ status: "finished",
2595
+ executor: null,
2596
+ param: {
2597
+ title: screenshotTitle,
2598
+ content
2599
+ },
2600
+ output: {
2601
+ screenshot
2602
+ },
2603
+ thought: `Logged screenshot: ${screenshotTitle}`,
2604
+ timing: {
2605
+ start: Date.now(),
2606
+ end: Date.now(),
2607
+ cost: 0
2608
+ }
2609
+ }],
2610
+ sdkVersion: "1.0.0",
2611
+ logTime: Date.now(),
2612
+ model_name: "screenshot"
2834
2613
  };
2614
+ this.appendExecutionDump(executionDump);
2835
2615
  }
2836
- const actionType = lastAction.subType || "unknown";
2837
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2838
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2839
- return {
2840
- success: true,
2841
- actionType,
2842
- description,
2843
- elementInfo,
2844
- timing: lastAction.timing?.cost
2845
- };
2846
- }
2847
- /**
2848
- * Extract element information from locate task
2849
- */
2850
- extractElementInfo(locateTask, _actionTask) {
2851
- if (!locateTask?.output?.element)
2852
- return void 0;
2853
- const element = locateTask.output.element;
2854
- return {
2855
- type: element.attributes?.nodeType || "unknown",
2856
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2857
- location: `(${element.center[0]}, ${element.center[1]})`
2858
- };
2859
2616
  }
2860
- /**
2861
- * Generate natural language description for actions
2862
- */
2863
- generateActionDescription(actionType, param, elementInfo) {
2864
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2865
- switch (actionType) {
2866
- case "Tap":
2867
- return `Clicked on ${elementDesc}`;
2868
- case "Input":
2869
- const inputValue = param?.value || "";
2870
- return `Entered "${inputValue}" into ${elementDesc}`;
2871
- case "KeyboardPress":
2872
- return `Pressed ${param?.value || "key"}`;
2873
- case "Scroll":
2874
- return `Scrolled ${param?.direction || "on page"}`;
2875
- case "Hover":
2876
- return `Hovered over ${elementDesc}`;
2877
- case "Drag":
2878
- return `Dragged ${elementDesc}`;
2879
- default:
2880
- return `Performed ${actionType} action on ${elementDesc}`;
2881
- }
2617
+ async destroy() {
2618
+ await this.page.destroy();
2882
2619
  }
2883
2620
  };
2884
2621
 
@@ -2909,7 +2646,7 @@ var Page = class {
2909
2646
  this.everMoved = false;
2910
2647
  this.underlyingPage = underlyingPage;
2911
2648
  this.pageType = pageType;
2912
- this.waitForNavigationTimeout = opts?.waitForNavigationTimeout || DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT2;
2649
+ this.waitForNavigationTimeout = opts?.waitForNavigationTimeout ?? DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT2;
2913
2650
  }
2914
2651
  async evaluate(pageFunction, arg) {
2915
2652
  let result;
@@ -3189,9 +2926,9 @@ var WebPage = class extends Page {
3189
2926
  }
3190
2927
  async waitUntilNetworkIdle(options) {
3191
2928
  await this.underlyingPage.waitForNetworkIdle({
3192
- idleTime: options?.idleTime || DEFAULT_WAIT_FOR_NETWORK_IDLE_TIME,
3193
- concurrency: options?.concurrency || DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY,
3194
- timeout: options?.timeout || this.waitForNetworkIdleTimeout
2929
+ idleTime: options?.idleTime ?? DEFAULT_WAIT_FOR_NETWORK_IDLE_TIME,
2930
+ concurrency: options?.concurrency ?? DEFAULT_WAIT_FOR_NETWORK_IDLE_CONCURRENCY,
2931
+ timeout: options?.timeout ?? this.waitForNetworkIdleTimeout
3195
2932
  });
3196
2933
  }
3197
2934
  };