misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -22,7 +22,8 @@ var WebElementInfo = class {
22
22
  id,
23
23
  attributes,
24
24
  indexId,
25
- xpaths
25
+ xpaths,
26
+ isVisible
26
27
  }) {
27
28
  this.content = content;
28
29
  this.rect = rect;
@@ -35,6 +36,7 @@ var WebElementInfo = class {
35
36
  this.attributes = attributes;
36
37
  this.indexId = indexId;
37
38
  this.xpaths = xpaths;
39
+ this.isVisible = isVisible;
38
40
  }
39
41
  };
40
42
 
@@ -57,14 +59,15 @@ async function parseContextFromWebPage(page, _opt) {
57
59
  })
58
60
  ]);
59
61
  const webTree = traverseTree(tree, (elementInfo) => {
60
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
62
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
61
63
  return new WebElementInfo({
62
64
  rect,
63
65
  locator,
64
66
  id,
65
67
  content,
66
68
  attributes,
67
- indexId
69
+ indexId,
70
+ isVisible
68
71
  });
69
72
  });
70
73
  assert(screenshotBase64, "screenshotBase64 is required");
@@ -95,7 +98,7 @@ function printReportMsg(filepath) {
95
98
  }
96
99
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
97
100
  function replaceIllegalPathCharsAndSpace(str) {
98
- return str.replace(/[/\\:*?"<>| ]/g, "-");
101
+ return str.replace(/[:*?"<>| ]/g, "-");
99
102
  }
100
103
  function matchElementFromPlan(planLocateParam, tree) {
101
104
  if (!planLocateParam) {
@@ -137,10 +140,11 @@ var ScriptPlayer = class {
137
140
  this.unnamedResultIndex = 0;
138
141
  this.pageAgent = null;
139
142
  this.result = {};
143
+ const target = script.target || script.web || script.android;
140
144
  if (ifInBrowser) {
141
145
  this.output = void 0;
142
- } else if (script.target?.output) {
143
- this.output = resolve(process.cwd(), script.target.output);
146
+ } else if (target?.output) {
147
+ this.output = resolve(process.cwd(), target.output);
144
148
  } else {
145
149
  this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
146
150
  }
@@ -214,15 +218,20 @@ var ScriptPlayer = class {
214
218
  } else if ("aiAssert" in flowItem) {
215
219
  const assertTask = flowItem;
216
220
  const prompt = assertTask.aiAssert;
221
+ const msg = assertTask.errorMessage;
217
222
  assert2(prompt, "missing prompt for aiAssert");
218
223
  assert2(
219
224
  typeof prompt === "string",
220
225
  "prompt for aiAssert must be a string"
221
226
  );
222
- await agent.aiAssert(prompt);
227
+ await agent.aiAssert(prompt, msg);
223
228
  } else if ("aiQuery" in flowItem) {
224
229
  const queryTask = flowItem;
225
230
  const prompt = queryTask.aiQuery;
231
+ const options = {
232
+ domIncluded: queryTask.domIncluded,
233
+ screenshotIncluded: queryTask.screenshotIncluded
234
+ };
226
235
  assert2(prompt, "missing prompt for aiQuery");
227
236
  assert2(
228
237
  typeof prompt === "string",
@@ -233,6 +242,10 @@ var ScriptPlayer = class {
233
242
  } else if ("aiNumber" in flowItem) {
234
243
  const numberTask = flowItem;
235
244
  const prompt = numberTask.aiNumber;
245
+ const options = {
246
+ domIncluded: numberTask.domIncluded,
247
+ screenshotIncluded: numberTask.screenshotIncluded
248
+ };
236
249
  assert2(prompt, "missing prompt for number");
237
250
  assert2(
238
251
  typeof prompt === "string",
@@ -243,6 +256,10 @@ var ScriptPlayer = class {
243
256
  } else if ("aiString" in flowItem) {
244
257
  const stringTask = flowItem;
245
258
  const prompt = stringTask.aiString;
259
+ const options = {
260
+ domIncluded: stringTask.domIncluded,
261
+ screenshotIncluded: stringTask.screenshotIncluded
262
+ };
246
263
  assert2(prompt, "missing prompt for string");
247
264
  assert2(
248
265
  typeof prompt === "string",
@@ -253,6 +270,10 @@ var ScriptPlayer = class {
253
270
  } else if ("aiBoolean" in flowItem) {
254
271
  const booleanTask = flowItem;
255
272
  const prompt = booleanTask.aiBoolean;
273
+ const options = {
274
+ domIncluded: booleanTask.domIncluded,
275
+ screenshotIncluded: booleanTask.screenshotIncluded
276
+ };
256
277
  assert2(prompt, "missing prompt for boolean");
257
278
  assert2(
258
279
  typeof prompt === "string",
@@ -295,6 +316,9 @@ var ScriptPlayer = class {
295
316
  } else if ("aiTap" in flowItem) {
296
317
  const tapTask = flowItem;
297
318
  await agent.aiTap(tapTask.aiTap, tapTask);
319
+ } else if ("aiRightClick" in flowItem) {
320
+ const rightClickTask = flowItem;
321
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
298
322
  } else if ("aiHover" in flowItem) {
299
323
  const hoverTask = flowItem;
300
324
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -317,6 +341,11 @@ var ScriptPlayer = class {
317
341
  evaluateJavaScriptTask.javascript
318
342
  );
319
343
  this.setResult(evaluateJavaScriptTask.name, result);
344
+ } else if ("logScreenshot" in flowItem) {
345
+ const logScreenshotTask = flowItem;
346
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
347
+ content: logScreenshotTask.content || ""
348
+ });
320
349
  } else {
321
350
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
322
351
  }
@@ -824,10 +853,10 @@ var PageTaskExecutor = class {
824
853
  if (!taskParam || !taskParam.value) {
825
854
  return;
826
855
  }
827
- await this.page.keyboard.type(taskParam.value);
828
- } else {
829
- await this.page.keyboard.type(taskParam.value);
830
856
  }
857
+ await this.page.keyboard.type(taskParam.value, {
858
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
859
+ });
831
860
  }
832
861
  };
833
862
  tasks.push(taskActionInput);
@@ -856,6 +885,22 @@ var PageTaskExecutor = class {
856
885
  }
857
886
  };
858
887
  tasks.push(taskActionTap);
888
+ } else if (plan2.type === "RightClick") {
889
+ const taskActionRightClick = {
890
+ type: "Action",
891
+ subType: "RightClick",
892
+ thought: plan2.thought,
893
+ locate: plan2.locate,
894
+ executor: async (param, { element }) => {
895
+ assert4(element, "Element not found, cannot right click");
896
+ await this.page.mouse.click(
897
+ element.center[0],
898
+ element.center[1],
899
+ { button: "right" }
900
+ );
901
+ }
902
+ };
903
+ tasks.push(taskActionRightClick);
859
904
  } else if (plan2.type === "Drag") {
860
905
  const taskActionDrag = {
861
906
  type: "Action",
@@ -1384,7 +1429,7 @@ var PageTaskExecutor = class {
1384
1429
  executor: taskExecutor
1385
1430
  };
1386
1431
  }
1387
- async createTypeQueryTask(type, demand) {
1432
+ async createTypeQueryTask(type, demand, opt) {
1388
1433
  const taskExecutor = new Executor(
1389
1434
  taskTitleStr(
1390
1435
  type,
@@ -1415,7 +1460,10 @@ var PageTaskExecutor = class {
1415
1460
  result: `${type}, ${demand}`
1416
1461
  };
1417
1462
  }
1418
- const { data, usage } = await this.insight.extract(demandInput);
1463
+ const { data, usage } = await this.insight.extract(
1464
+ demandInput,
1465
+ opt
1466
+ );
1419
1467
  let outputResult = data;
1420
1468
  if (ifTypeRestricted) {
1421
1469
  assert4(data?.result !== void 0, "No result in query data");
@@ -1435,17 +1483,17 @@ var PageTaskExecutor = class {
1435
1483
  executor: taskExecutor
1436
1484
  };
1437
1485
  }
1438
- async query(demand) {
1439
- return this.createTypeQueryTask("Query", demand);
1486
+ async query(demand, opt) {
1487
+ return this.createTypeQueryTask("Query", demand, opt);
1440
1488
  }
1441
- async boolean(prompt) {
1442
- return this.createTypeQueryTask("Boolean", prompt);
1489
+ async boolean(prompt, opt) {
1490
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1443
1491
  }
1444
- async number(prompt) {
1445
- return this.createTypeQueryTask("Number", prompt);
1492
+ async number(prompt, opt) {
1493
+ return this.createTypeQueryTask("Number", prompt, opt);
1446
1494
  }
1447
- async string(prompt) {
1448
- return this.createTypeQueryTask("String", prompt);
1495
+ async string(prompt, opt) {
1496
+ return this.createTypeQueryTask("String", prompt, opt);
1449
1497
  }
1450
1498
  async assert(assertion) {
1451
1499
  const description = `assert: ${assertion}`;
@@ -1581,7 +1629,7 @@ function buildPlans(type, locateParam, param) {
1581
1629
  param: locateParam,
1582
1630
  thought: ""
1583
1631
  } : null;
1584
- if (type === "Tap" || type === "Hover") {
1632
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1585
1633
  assert5(locateParam, `missing locate info for action "${type}"`);
1586
1634
  assert5(locatePlan, `missing locate info for action "${type}"`);
1587
1635
  const tapPlan = {
@@ -1652,8 +1700,8 @@ function buildPlans(type, locateParam, param) {
1652
1700
 
1653
1701
  // src/common/task-cache.ts
1654
1702
  import assert6 from "assert";
1655
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1656
- import { join as join2 } from "path";
1703
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1704
+ import { dirname as dirname2, join as join2 } from "path";
1657
1705
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1658
1706
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1659
1707
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1661,7 +1709,7 @@ import yaml3 from "js-yaml";
1661
1709
  import semver from "semver";
1662
1710
 
1663
1711
  // package.json
1664
- var version = "1.0.5";
1712
+ var version = "1.0.3";
1665
1713
 
1666
1714
  // src/common/task-cache.ts
1667
1715
  var debug3 = getDebug3("cache");
@@ -1689,70 +1737,44 @@ var TaskCache = class {
1689
1737
  this.cache = cacheContent;
1690
1738
  this.cacheOriginalLength = this.cache.caches.length;
1691
1739
  }
1692
- matchCache(prompt, type, contextData) {
1693
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1740
+ matchCache(prompt, type) {
1694
1741
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1695
1742
  const item = this.cache.caches[i];
1696
1743
  const key = `${type}:${prompt}:${i}`;
1697
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1698
- continue;
1699
- }
1700
- if (type === "plan" && item.type === "plan") {
1701
- const planItem = item;
1702
- if (contextHash && planItem.contextHash) {
1703
- if (contextHash !== planItem.contextHash) {
1704
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1705
- continue;
1744
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1745
+ this.matchedCacheIndices.add(key);
1746
+ debug3(
1747
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1748
+ type,
1749
+ prompt,
1750
+ i
1751
+ );
1752
+ return {
1753
+ cacheContent: item,
1754
+ updateFn: (cb) => {
1755
+ debug3(
1756
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1757
+ type,
1758
+ prompt,
1759
+ i
1760
+ );
1761
+ cb(item);
1762
+ debug3(
1763
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1764
+ type,
1765
+ prompt,
1766
+ i
1767
+ );
1768
+ this.flushCacheToFile();
1706
1769
  }
1707
- } else if (contextHash || planItem.contextHash) {
1708
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1709
- continue;
1710
- }
1770
+ };
1711
1771
  }
1712
- this.matchedCacheIndices.add(key);
1713
- debug3(
1714
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1715
- type,
1716
- prompt,
1717
- i,
1718
- contextHash ? "yes" : "no-context"
1719
- );
1720
- return {
1721
- cacheContent: item,
1722
- updateFn: (cb) => {
1723
- debug3(
1724
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1725
- type,
1726
- prompt,
1727
- i
1728
- );
1729
- cb(item);
1730
- debug3(
1731
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1732
- type,
1733
- prompt,
1734
- i
1735
- );
1736
- this.flushCacheToFile();
1737
- }
1738
- };
1739
1772
  }
1740
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1773
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1741
1774
  return void 0;
1742
1775
  }
1743
- generateContextHash(contextData) {
1744
- const sortedKeys = Object.keys(contextData).sort();
1745
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1746
- let hash = 0;
1747
- for (let i = 0; i < stableString.length; i++) {
1748
- const char = stableString.charCodeAt(i);
1749
- hash = (hash << 5) - hash + char;
1750
- hash = hash & hash;
1751
- }
1752
- return hash.toString(36);
1753
- }
1754
- matchPlanCache(prompt, contextData) {
1755
- return this.matchCache(prompt, "plan", contextData);
1776
+ matchPlanCache(prompt) {
1777
+ return this.matchCache(prompt, "plan");
1756
1778
  }
1757
1779
  matchLocateCache(prompt) {
1758
1780
  return this.matchCache(prompt, "locate");
@@ -1818,8 +1840,14 @@ cache file: ${cacheFile}`
1818
1840
  return;
1819
1841
  }
1820
1842
  try {
1843
+ const dir = dirname2(this.cacheFilePath);
1844
+ if (!existsSync2(dir)) {
1845
+ mkdirSync2(dir, { recursive: true });
1846
+ debug3("created cache directory: %s", dir);
1847
+ }
1821
1848
  const yamlData = yaml3.dump(this.cache);
1822
1849
  writeFileSync2(this.cacheFilePath, yamlData);
1850
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1823
1851
  } catch (err) {
1824
1852
  debug3(
1825
1853
  "write cache to file failed, path: %s, error: %s",
@@ -1828,16 +1856,11 @@ cache file: ${cacheFile}`
1828
1856
  );
1829
1857
  }
1830
1858
  }
1831
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1859
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1832
1860
  if (cachedRecord) {
1833
1861
  if (newRecord.type === "plan") {
1834
1862
  cachedRecord.updateFn((cache) => {
1835
- const planCache = cache;
1836
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1837
- if (contextData) {
1838
- planCache.contextHash = this.generateContextHash(contextData);
1839
- planCache.contextData = { ...contextData };
1840
- }
1863
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1841
1864
  });
1842
1865
  } else {
1843
1866
  cachedRecord.updateFn((cache) => {
@@ -1845,11 +1868,6 @@ cache file: ${cacheFile}`
1845
1868
  });
1846
1869
  }
1847
1870
  } else {
1848
- if (newRecord.type === "plan" && contextData) {
1849
- const planRecord = newRecord;
1850
- planRecord.contextHash = this.generateContextHash(contextData);
1851
- planRecord.contextData = { ...contextData };
1852
- }
1853
1871
  this.appendCache(newRecord);
1854
1872
  }
1855
1873
  }
@@ -1879,13 +1897,10 @@ var PageAgent = class {
1879
1897
  generateReport: true,
1880
1898
  autoPrintReportMsg: true,
1881
1899
  groupName: "Midscene Report",
1882
- groupDescription: "",
1883
- enableCumulativeContext: true,
1884
- autoClearContext: false
1900
+ groupDescription: ""
1885
1901
  },
1886
1902
  opts || {}
1887
1903
  );
1888
- this.initializeContextStore();
1889
1904
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1890
1905
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1891
1906
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1912,69 +1927,6 @@ var PageAgent = class {
1912
1927
  opts?.testId || this.page.pageType || "web"
1913
1928
  );
1914
1929
  }
1915
- /**
1916
- * Initialize context store for cumulative context functionality
1917
- */
1918
- async initializeContextStore() {
1919
- if (!this.opts.enableCumulativeContext) {
1920
- debug4("Cumulative context disabled via options");
1921
- return;
1922
- }
1923
- try {
1924
- const aiModel = await import("misoai-core/ai-model");
1925
- this.contextStore = aiModel.getContextStore();
1926
- debug4("Context store initialized successfully", {
1927
- autoClearContext: this.opts.autoClearContext,
1928
- testId: this.opts.testId
1929
- });
1930
- if (this.opts.autoClearContext) {
1931
- this.contextStore.clear();
1932
- debug4("Context store cleared due to autoClearContext option");
1933
- } else {
1934
- const existingData = this.contextStore.getAllData();
1935
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1936
- debug4("Context store preserving existing data", {
1937
- existingDataKeys: Object.keys(existingData),
1938
- existingStepsCount: existingSteps
1939
- });
1940
- }
1941
- } catch (error) {
1942
- debug4("Failed to initialize context store:", error);
1943
- console.warn("⚠️ Could not initialize context store:", error);
1944
- }
1945
- }
1946
- /**
1947
- * Get the context store instance
1948
- */
1949
- getContextStore() {
1950
- return this.contextStore;
1951
- }
1952
- /**
1953
- * Clear the context store
1954
- */
1955
- clearContext() {
1956
- if (this.contextStore) {
1957
- this.contextStore.clear();
1958
- }
1959
- }
1960
- /**
1961
- * Get all stored data from context store
1962
- */
1963
- getStoredData() {
1964
- if (this.contextStore) {
1965
- return this.contextStore.getAllData();
1966
- }
1967
- return {};
1968
- }
1969
- /**
1970
- * Get step summary from context store
1971
- */
1972
- getStepSummary() {
1973
- if (this.contextStore) {
1974
- return this.contextStore.getStepSummary();
1975
- }
1976
- return "";
1977
- }
1978
1930
  async getUIContext(action) {
1979
1931
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1980
1932
  return await parseContextFromWebPage(this.page, {
@@ -2153,6 +2105,23 @@ var PageAgent = class {
2153
2105
  metadata
2154
2106
  };
2155
2107
  }
2108
+ async aiRightClick(locatePrompt, opt) {
2109
+ const detailedLocateParam = this.buildDetailedLocateParam(
2110
+ locatePrompt,
2111
+ opt
2112
+ );
2113
+ const plans = buildPlans("RightClick", detailedLocateParam);
2114
+ const { executor, output } = await this.taskExecutor.runPlans(
2115
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2116
+ plans,
2117
+ { cacheable: opt?.cacheable }
2118
+ );
2119
+ const metadata = this.afterTaskRunning(executor);
2120
+ return {
2121
+ result: output,
2122
+ metadata
2123
+ };
2124
+ }
2156
2125
  async aiInput(value, locatePrompt, opt) {
2157
2126
  assert7(
2158
2127
  typeof value === "string",
@@ -2210,35 +2179,9 @@ var PageAgent = class {
2210
2179
  };
2211
2180
  }
2212
2181
  async aiAction(taskPrompt, opt) {
2213
- const originalPrompt = taskPrompt;
2214
- let processedPrompt = taskPrompt;
2215
- if (this.opts.enableCumulativeContext && this.contextStore) {
2216
- try {
2217
- const storedData = this.contextStore.getAllData();
2218
- if (Object.keys(storedData).length > 0) {
2219
- debug4("Available data for aiAction:", {
2220
- prompt: taskPrompt,
2221
- availableData: storedData
2222
- });
2223
- }
2224
- } catch (error) {
2225
- debug4("Context store operation failed:", error);
2226
- }
2227
- }
2228
2182
  const cacheable = opt?.cacheable;
2229
2183
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2230
- let contextData;
2231
- if (this.opts.enableCumulativeContext && this.contextStore) {
2232
- try {
2233
- contextData = this.contextStore.getAllData();
2234
- if (contextData && Object.keys(contextData).length === 0) {
2235
- contextData = void 0;
2236
- }
2237
- } catch (error) {
2238
- debug4("Failed to get context data for cache:", error);
2239
- }
2240
- }
2241
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2184
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2242
2185
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2243
2186
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2244
2187
  taskPrompt,
@@ -2248,28 +2191,6 @@ var PageAgent = class {
2248
2191
  debug4("matched cache, will call .runYaml to run the action");
2249
2192
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2250
2193
  const result = await this.runYaml(yaml5);
2251
- if (this.opts.enableCumulativeContext && this.contextStore) {
2252
- try {
2253
- const executionResult = {
2254
- success: true,
2255
- actionType: "cached",
2256
- description: `Executed cached action: ${processedPrompt}`,
2257
- timing: result.metadata?.totalTime
2258
- };
2259
- this.contextStore.addStep({
2260
- type: "action",
2261
- summary: `Action: ${processedPrompt} (cached)`,
2262
- prompt: processedPrompt,
2263
- executionResult
2264
- });
2265
- debug4("Added cached action step to context store:", {
2266
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2267
- totalSteps: this.contextStore.getRecentSteps(100).length
2268
- });
2269
- } catch (error) {
2270
- debug4("Failed to add cached action step:", error);
2271
- }
2272
- }
2273
2194
  return {
2274
2195
  result: result.result,
2275
2196
  metadata: metadata2
@@ -2294,114 +2215,17 @@ var PageAgent = class {
2294
2215
  prompt: taskPrompt,
2295
2216
  yamlWorkflow: yamlFlowStr
2296
2217
  },
2297
- matchedCache,
2298
- contextData
2299
- // Pass context data for cache creation
2218
+ matchedCache
2300
2219
  );
2301
2220
  }
2302
2221
  const metadata = this.afterTaskRunning(executor);
2303
- if (this.opts.enableCumulativeContext && this.contextStore) {
2304
- try {
2305
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2306
- this.contextStore.addStep({
2307
- type: "action",
2308
- summary: `Action: ${processedPrompt}`,
2309
- prompt: processedPrompt,
2310
- executionResult
2311
- });
2312
- debug4("Added action step with execution result to context store:", {
2313
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2314
- totalSteps: this.contextStore.getRecentSteps(100).length,
2315
- executionResult
2316
- });
2317
- } catch (error) {
2318
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2319
- try {
2320
- this.contextStore.addStep({
2321
- type: "action",
2322
- summary: `Action: ${processedPrompt}`,
2323
- prompt: processedPrompt
2324
- });
2325
- } catch (stepError) {
2326
- debug4("Failed to add action step:", stepError);
2327
- }
2328
- }
2329
- }
2330
2222
  return {
2331
2223
  result: output,
2332
2224
  metadata
2333
2225
  };
2334
2226
  }
2335
2227
  async aiQuery(demand) {
2336
- let processedDemand = demand;
2337
- let storageKey;
2338
- try {
2339
- const aiModel = await import("misoai-core/ai-model");
2340
- const contextStore = aiModel.getContextStore();
2341
- if (typeof demand === "string") {
2342
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2343
- if (storageInstruction) {
2344
- storageKey = storageInstruction.key;
2345
- processedDemand = storageInstruction.cleanText;
2346
- contextStore._pendingAliases = storageInstruction.aliases;
2347
- } else {
2348
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2349
- if (storageMatch) {
2350
- storageKey = storageMatch[1];
2351
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2352
- }
2353
- }
2354
- }
2355
- } catch (error) {
2356
- debug4("Context store not available:", error);
2357
- }
2358
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2359
- if (this.opts.enableCumulativeContext && this.contextStore) {
2360
- if (storageKey && output) {
2361
- try {
2362
- const pendingAliases = this.contextStore._pendingAliases;
2363
- if (pendingAliases) {
2364
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2365
- delete this.contextStore._pendingAliases;
2366
- debug4("Stored query result with aliases:", {
2367
- key: storageKey,
2368
- value: output,
2369
- aliases: pendingAliases
2370
- });
2371
- } else {
2372
- this.contextStore.storeData(storageKey, output);
2373
- debug4("Stored query result:", {
2374
- key: storageKey,
2375
- value: output
2376
- });
2377
- }
2378
- this.contextStore.addStep({
2379
- type: "query",
2380
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2381
- data: output,
2382
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2383
- });
2384
- debug4("Added query step to context store:", {
2385
- storageKey,
2386
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2387
- totalSteps: this.contextStore.getRecentSteps(100).length
2388
- });
2389
- } catch (error) {
2390
- debug4("Failed to store query result:", error);
2391
- }
2392
- } else {
2393
- try {
2394
- this.contextStore.addStep({
2395
- type: "query",
2396
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2397
- data: output,
2398
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2399
- });
2400
- } catch (error) {
2401
- debug4("Failed to add query step:", error);
2402
- }
2403
- }
2404
- }
2228
+ const { output, executor } = await this.taskExecutor.query(demand);
2405
2229
  const metadata = this.afterTaskRunning(executor);
2406
2230
  return {
2407
2231
  result: output,
@@ -2511,48 +2335,6 @@ var PageAgent = class {
2511
2335
  };
2512
2336
  }
2513
2337
  async aiAssert(assertion, msg, opt) {
2514
- let executionContext = "";
2515
- if (this.opts.enableCumulativeContext && this.contextStore) {
2516
- try {
2517
- const recentSteps = this.contextStore.getRecentSteps(3);
2518
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2519
- const storedData = this.contextStore.getAllData();
2520
- if (stepsWithExecutionResults.length > 0) {
2521
- const recentActions = stepsWithExecutionResults.map((step) => {
2522
- const result = step.executionResult;
2523
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2524
- }).join("\n");
2525
- executionContext = `
2526
-
2527
- Recent actions performed:
2528
- ${recentActions}
2529
-
2530
- This context may help verify the assertion.`;
2531
- }
2532
- if (storedData && Object.keys(storedData).length > 0) {
2533
- executionContext += `
2534
-
2535
- Available data for reference:
2536
- ${JSON.stringify(storedData, null, 2)}
2537
-
2538
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2539
- debug4("Available data for aiAssert:", {
2540
- assertion,
2541
- availableData: storedData
2542
- });
2543
- }
2544
- this.contextStore.addStep({
2545
- type: "assertion",
2546
- summary: `Assertion: ${assertion}`,
2547
- prompt: assertion
2548
- });
2549
- debug4("Added assertion step to context store:", {
2550
- totalSteps: this.contextStore.getRecentSteps(100).length
2551
- });
2552
- } catch (error) {
2553
- debug4("Context store operation failed:", error);
2554
- }
2555
- }
2556
2338
  let currentUrl = "";
2557
2339
  if (this.page.url) {
2558
2340
  try {
@@ -2560,13 +2342,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2560
2342
  } catch (e) {
2561
2343
  }
2562
2344
  }
2563
- let assertionWithContext = assertion;
2564
- if (currentUrl) {
2565
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2566
- }
2567
- if (executionContext) {
2568
- assertionWithContext += executionContext;
2569
- }
2345
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2570
2346
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2571
2347
  const metadata = this.afterTaskRunning(executor, true);
2572
2348
  if (output && opt?.keepRawResponse) {
@@ -2777,81 +2553,42 @@ ${errors}`);
2777
2553
  }
2778
2554
  throw new Error("evaluateJavaScript is not supported in current agent");
2779
2555
  }
2780
- async destroy() {
2781
- await this.page.destroy();
2782
- }
2783
- /**
2784
- * Analyze execution results from executor to generate meaningful descriptions
2785
- */
2786
- analyzeExecutionResults(executor, originalPrompt) {
2787
- const tasks = executor.tasks;
2788
- const success = !executor.isInErrorState();
2789
- if (!success) {
2790
- const errorTask = executor.latestErrorTask();
2791
- return {
2792
- success: false,
2793
- actionType: "error",
2794
- description: `Failed to execute: ${originalPrompt}`,
2795
- error: errorTask?.error
2796
- };
2797
- }
2798
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2799
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2800
- const lastAction = actionTasks[actionTasks.length - 1];
2801
- const lastLocate = locateTasks[locateTasks.length - 1];
2802
- if (!lastAction) {
2803
- return {
2804
- success: true,
2805
- actionType: "unknown",
2806
- description: `Completed: ${originalPrompt}`
2556
+ async logScreenshot(title, options) {
2557
+ const screenshotTitle = title || "untitled";
2558
+ const content = options?.content || "";
2559
+ const screenshot = await this.page.screenshotBase64?.();
2560
+ if (screenshot) {
2561
+ const executionDump = {
2562
+ name: screenshotTitle,
2563
+ description: content,
2564
+ tasks: [{
2565
+ type: "Screenshot",
2566
+ subType: "log",
2567
+ status: "finished",
2568
+ executor: null,
2569
+ param: {
2570
+ title: screenshotTitle,
2571
+ content
2572
+ },
2573
+ output: {
2574
+ screenshot
2575
+ },
2576
+ thought: `Logged screenshot: ${screenshotTitle}`,
2577
+ timing: {
2578
+ start: Date.now(),
2579
+ end: Date.now(),
2580
+ cost: 0
2581
+ }
2582
+ }],
2583
+ sdkVersion: "1.0.0",
2584
+ logTime: Date.now(),
2585
+ model_name: "screenshot"
2807
2586
  };
2587
+ this.appendExecutionDump(executionDump);
2808
2588
  }
2809
- const actionType = lastAction.subType || "unknown";
2810
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2811
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2812
- return {
2813
- success: true,
2814
- actionType,
2815
- description,
2816
- elementInfo,
2817
- timing: lastAction.timing?.cost
2818
- };
2819
2589
  }
2820
- /**
2821
- * Extract element information from locate task
2822
- */
2823
- extractElementInfo(locateTask, _actionTask) {
2824
- if (!locateTask?.output?.element)
2825
- return void 0;
2826
- const element = locateTask.output.element;
2827
- return {
2828
- type: element.attributes?.nodeType || "unknown",
2829
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2830
- location: `(${element.center[0]}, ${element.center[1]})`
2831
- };
2832
- }
2833
- /**
2834
- * Generate natural language description for actions
2835
- */
2836
- generateActionDescription(actionType, param, elementInfo) {
2837
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2838
- switch (actionType) {
2839
- case "Tap":
2840
- return `Clicked on ${elementDesc}`;
2841
- case "Input":
2842
- const inputValue = param?.value || "";
2843
- return `Entered "${inputValue}" into ${elementDesc}`;
2844
- case "KeyboardPress":
2845
- return `Pressed ${param?.value || "key"}`;
2846
- case "Scroll":
2847
- return `Scrolled ${param?.direction || "on page"}`;
2848
- case "Hover":
2849
- return `Hovered over ${elementDesc}`;
2850
- case "Drag":
2851
- return `Dragged ${elementDesc}`;
2852
- default:
2853
- return `Performed ${actionType} action on ${elementDesc}`;
2854
- }
2590
+ async destroy() {
2591
+ await this.page.destroy();
2855
2592
  }
2856
2593
  };
2857
2594