misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
package/dist/es/agent.js CHANGED
@@ -17,10 +17,11 @@ var ScriptPlayer = class {
17
17
  this.unnamedResultIndex = 0;
18
18
  this.pageAgent = null;
19
19
  this.result = {};
20
+ const target = script.target || script.web || script.android;
20
21
  if (ifInBrowser) {
21
22
  this.output = void 0;
22
- } else if (script.target?.output) {
23
- this.output = resolve(process.cwd(), script.target.output);
23
+ } else if (target?.output) {
24
+ this.output = resolve(process.cwd(), target.output);
24
25
  } else {
25
26
  this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
26
27
  }
@@ -94,15 +95,20 @@ var ScriptPlayer = class {
94
95
  } else if ("aiAssert" in flowItem) {
95
96
  const assertTask = flowItem;
96
97
  const prompt = assertTask.aiAssert;
98
+ const msg = assertTask.errorMessage;
97
99
  assert(prompt, "missing prompt for aiAssert");
98
100
  assert(
99
101
  typeof prompt === "string",
100
102
  "prompt for aiAssert must be a string"
101
103
  );
102
- await agent.aiAssert(prompt);
104
+ await agent.aiAssert(prompt, msg);
103
105
  } else if ("aiQuery" in flowItem) {
104
106
  const queryTask = flowItem;
105
107
  const prompt = queryTask.aiQuery;
108
+ const options = {
109
+ domIncluded: queryTask.domIncluded,
110
+ screenshotIncluded: queryTask.screenshotIncluded
111
+ };
106
112
  assert(prompt, "missing prompt for aiQuery");
107
113
  assert(
108
114
  typeof prompt === "string",
@@ -113,6 +119,10 @@ var ScriptPlayer = class {
113
119
  } else if ("aiNumber" in flowItem) {
114
120
  const numberTask = flowItem;
115
121
  const prompt = numberTask.aiNumber;
122
+ const options = {
123
+ domIncluded: numberTask.domIncluded,
124
+ screenshotIncluded: numberTask.screenshotIncluded
125
+ };
116
126
  assert(prompt, "missing prompt for number");
117
127
  assert(
118
128
  typeof prompt === "string",
@@ -123,6 +133,10 @@ var ScriptPlayer = class {
123
133
  } else if ("aiString" in flowItem) {
124
134
  const stringTask = flowItem;
125
135
  const prompt = stringTask.aiString;
136
+ const options = {
137
+ domIncluded: stringTask.domIncluded,
138
+ screenshotIncluded: stringTask.screenshotIncluded
139
+ };
126
140
  assert(prompt, "missing prompt for string");
127
141
  assert(
128
142
  typeof prompt === "string",
@@ -133,6 +147,10 @@ var ScriptPlayer = class {
133
147
  } else if ("aiBoolean" in flowItem) {
134
148
  const booleanTask = flowItem;
135
149
  const prompt = booleanTask.aiBoolean;
150
+ const options = {
151
+ domIncluded: booleanTask.domIncluded,
152
+ screenshotIncluded: booleanTask.screenshotIncluded
153
+ };
136
154
  assert(prompt, "missing prompt for boolean");
137
155
  assert(
138
156
  typeof prompt === "string",
@@ -175,6 +193,9 @@ var ScriptPlayer = class {
175
193
  } else if ("aiTap" in flowItem) {
176
194
  const tapTask = flowItem;
177
195
  await agent.aiTap(tapTask.aiTap, tapTask);
196
+ } else if ("aiRightClick" in flowItem) {
197
+ const rightClickTask = flowItem;
198
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
178
199
  } else if ("aiHover" in flowItem) {
179
200
  const hoverTask = flowItem;
180
201
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -197,6 +218,11 @@ var ScriptPlayer = class {
197
218
  evaluateJavaScriptTask.javascript
198
219
  );
199
220
  this.setResult(evaluateJavaScriptTask.name, result);
221
+ } else if ("logScreenshot" in flowItem) {
222
+ const logScreenshotTask = flowItem;
223
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
224
+ content: logScreenshotTask.content || ""
225
+ });
200
226
  } else {
201
227
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
202
228
  }
@@ -465,7 +491,8 @@ var WebElementInfo = class {
465
491
  id,
466
492
  attributes,
467
493
  indexId,
468
- xpaths
494
+ xpaths,
495
+ isVisible
469
496
  }) {
470
497
  this.content = content;
471
498
  this.rect = rect;
@@ -478,6 +505,7 @@ var WebElementInfo = class {
478
505
  this.attributes = attributes;
479
506
  this.indexId = indexId;
480
507
  this.xpaths = xpaths;
508
+ this.isVisible = isVisible;
481
509
  }
482
510
  };
483
511
 
@@ -500,14 +528,15 @@ async function parseContextFromWebPage(page, _opt) {
500
528
  })
501
529
  ]);
502
530
  const webTree = traverseTree(tree, (elementInfo) => {
503
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
531
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
504
532
  return new WebElementInfo({
505
533
  rect,
506
534
  locator,
507
535
  id,
508
536
  content,
509
537
  attributes,
510
- indexId
538
+ indexId,
539
+ isVisible
511
540
  });
512
541
  });
513
542
  assert3(screenshotBase64, "screenshotBase64 is required");
@@ -537,7 +566,7 @@ function printReportMsg(filepath) {
537
566
  logMsg(`Midscene - report file updated: ${filepath}`);
538
567
  }
539
568
  function replaceIllegalPathCharsAndSpace(str) {
540
- return str.replace(/[/\\:*?"<>| ]/g, "-");
569
+ return str.replace(/[:*?"<>| ]/g, "-");
541
570
  }
542
571
  function matchElementFromPlan(planLocateParam, tree) {
543
572
  if (!planLocateParam) {
@@ -823,10 +852,10 @@ var PageTaskExecutor = class {
823
852
  if (!taskParam || !taskParam.value) {
824
853
  return;
825
854
  }
826
- await this.page.keyboard.type(taskParam.value);
827
- } else {
828
- await this.page.keyboard.type(taskParam.value);
829
855
  }
856
+ await this.page.keyboard.type(taskParam.value, {
857
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
858
+ });
830
859
  }
831
860
  };
832
861
  tasks.push(taskActionInput);
@@ -855,6 +884,22 @@ var PageTaskExecutor = class {
855
884
  }
856
885
  };
857
886
  tasks.push(taskActionTap);
887
+ } else if (plan2.type === "RightClick") {
888
+ const taskActionRightClick = {
889
+ type: "Action",
890
+ subType: "RightClick",
891
+ thought: plan2.thought,
892
+ locate: plan2.locate,
893
+ executor: async (param, { element }) => {
894
+ assert4(element, "Element not found, cannot right click");
895
+ await this.page.mouse.click(
896
+ element.center[0],
897
+ element.center[1],
898
+ { button: "right" }
899
+ );
900
+ }
901
+ };
902
+ tasks.push(taskActionRightClick);
858
903
  } else if (plan2.type === "Drag") {
859
904
  const taskActionDrag = {
860
905
  type: "Action",
@@ -1383,7 +1428,7 @@ var PageTaskExecutor = class {
1383
1428
  executor: taskExecutor
1384
1429
  };
1385
1430
  }
1386
- async createTypeQueryTask(type, demand) {
1431
+ async createTypeQueryTask(type, demand, opt) {
1387
1432
  const taskExecutor = new Executor(
1388
1433
  taskTitleStr(
1389
1434
  type,
@@ -1414,7 +1459,10 @@ var PageTaskExecutor = class {
1414
1459
  result: `${type}, ${demand}`
1415
1460
  };
1416
1461
  }
1417
- const { data, usage } = await this.insight.extract(demandInput);
1462
+ const { data, usage } = await this.insight.extract(
1463
+ demandInput,
1464
+ opt
1465
+ );
1418
1466
  let outputResult = data;
1419
1467
  if (ifTypeRestricted) {
1420
1468
  assert4(data?.result !== void 0, "No result in query data");
@@ -1434,17 +1482,17 @@ var PageTaskExecutor = class {
1434
1482
  executor: taskExecutor
1435
1483
  };
1436
1484
  }
1437
- async query(demand) {
1438
- return this.createTypeQueryTask("Query", demand);
1485
+ async query(demand, opt) {
1486
+ return this.createTypeQueryTask("Query", demand, opt);
1439
1487
  }
1440
- async boolean(prompt) {
1441
- return this.createTypeQueryTask("Boolean", prompt);
1488
+ async boolean(prompt, opt) {
1489
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1442
1490
  }
1443
- async number(prompt) {
1444
- return this.createTypeQueryTask("Number", prompt);
1491
+ async number(prompt, opt) {
1492
+ return this.createTypeQueryTask("Number", prompt, opt);
1445
1493
  }
1446
- async string(prompt) {
1447
- return this.createTypeQueryTask("String", prompt);
1494
+ async string(prompt, opt) {
1495
+ return this.createTypeQueryTask("String", prompt, opt);
1448
1496
  }
1449
1497
  async assert(assertion) {
1450
1498
  const description = `assert: ${assertion}`;
@@ -1580,7 +1628,7 @@ function buildPlans(type, locateParam, param) {
1580
1628
  param: locateParam,
1581
1629
  thought: ""
1582
1630
  } : null;
1583
- if (type === "Tap" || type === "Hover") {
1631
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1584
1632
  assert5(locateParam, `missing locate info for action "${type}"`);
1585
1633
  assert5(locatePlan, `missing locate info for action "${type}"`);
1586
1634
  const tapPlan = {
@@ -1651,8 +1699,8 @@ function buildPlans(type, locateParam, param) {
1651
1699
 
1652
1700
  // src/common/task-cache.ts
1653
1701
  import assert6 from "assert";
1654
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1655
- import { join as join2 } from "path";
1702
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1703
+ import { dirname as dirname2, join as join2 } from "path";
1656
1704
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1657
1705
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1658
1706
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1660,7 +1708,7 @@ import yaml3 from "js-yaml";
1660
1708
  import semver from "semver";
1661
1709
 
1662
1710
  // package.json
1663
- var version = "1.0.5";
1711
+ var version = "1.0.3";
1664
1712
 
1665
1713
  // src/common/task-cache.ts
1666
1714
  var debug3 = getDebug3("cache");
@@ -1688,70 +1736,44 @@ var TaskCache = class {
1688
1736
  this.cache = cacheContent;
1689
1737
  this.cacheOriginalLength = this.cache.caches.length;
1690
1738
  }
1691
- matchCache(prompt, type, contextData) {
1692
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1739
+ matchCache(prompt, type) {
1693
1740
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1694
1741
  const item = this.cache.caches[i];
1695
1742
  const key = `${type}:${prompt}:${i}`;
1696
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1697
- continue;
1698
- }
1699
- if (type === "plan" && item.type === "plan") {
1700
- const planItem = item;
1701
- if (contextHash && planItem.contextHash) {
1702
- if (contextHash !== planItem.contextHash) {
1703
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1704
- continue;
1743
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1744
+ this.matchedCacheIndices.add(key);
1745
+ debug3(
1746
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1747
+ type,
1748
+ prompt,
1749
+ i
1750
+ );
1751
+ return {
1752
+ cacheContent: item,
1753
+ updateFn: (cb) => {
1754
+ debug3(
1755
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1756
+ type,
1757
+ prompt,
1758
+ i
1759
+ );
1760
+ cb(item);
1761
+ debug3(
1762
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1763
+ type,
1764
+ prompt,
1765
+ i
1766
+ );
1767
+ this.flushCacheToFile();
1705
1768
  }
1706
- } else if (contextHash || planItem.contextHash) {
1707
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1708
- continue;
1709
- }
1769
+ };
1710
1770
  }
1711
- this.matchedCacheIndices.add(key);
1712
- debug3(
1713
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1714
- type,
1715
- prompt,
1716
- i,
1717
- contextHash ? "yes" : "no-context"
1718
- );
1719
- return {
1720
- cacheContent: item,
1721
- updateFn: (cb) => {
1722
- debug3(
1723
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1724
- type,
1725
- prompt,
1726
- i
1727
- );
1728
- cb(item);
1729
- debug3(
1730
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1731
- type,
1732
- prompt,
1733
- i
1734
- );
1735
- this.flushCacheToFile();
1736
- }
1737
- };
1738
1771
  }
1739
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1772
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1740
1773
  return void 0;
1741
1774
  }
1742
- generateContextHash(contextData) {
1743
- const sortedKeys = Object.keys(contextData).sort();
1744
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1745
- let hash = 0;
1746
- for (let i = 0; i < stableString.length; i++) {
1747
- const char = stableString.charCodeAt(i);
1748
- hash = (hash << 5) - hash + char;
1749
- hash = hash & hash;
1750
- }
1751
- return hash.toString(36);
1752
- }
1753
- matchPlanCache(prompt, contextData) {
1754
- return this.matchCache(prompt, "plan", contextData);
1775
+ matchPlanCache(prompt) {
1776
+ return this.matchCache(prompt, "plan");
1755
1777
  }
1756
1778
  matchLocateCache(prompt) {
1757
1779
  return this.matchCache(prompt, "locate");
@@ -1817,8 +1839,14 @@ cache file: ${cacheFile}`
1817
1839
  return;
1818
1840
  }
1819
1841
  try {
1842
+ const dir = dirname2(this.cacheFilePath);
1843
+ if (!existsSync2(dir)) {
1844
+ mkdirSync2(dir, { recursive: true });
1845
+ debug3("created cache directory: %s", dir);
1846
+ }
1820
1847
  const yamlData = yaml3.dump(this.cache);
1821
1848
  writeFileSync2(this.cacheFilePath, yamlData);
1849
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1822
1850
  } catch (err) {
1823
1851
  debug3(
1824
1852
  "write cache to file failed, path: %s, error: %s",
@@ -1827,16 +1855,11 @@ cache file: ${cacheFile}`
1827
1855
  );
1828
1856
  }
1829
1857
  }
1830
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1858
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1831
1859
  if (cachedRecord) {
1832
1860
  if (newRecord.type === "plan") {
1833
1861
  cachedRecord.updateFn((cache) => {
1834
- const planCache = cache;
1835
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1836
- if (contextData) {
1837
- planCache.contextHash = this.generateContextHash(contextData);
1838
- planCache.contextData = { ...contextData };
1839
- }
1862
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1840
1863
  });
1841
1864
  } else {
1842
1865
  cachedRecord.updateFn((cache) => {
@@ -1844,11 +1867,6 @@ cache file: ${cacheFile}`
1844
1867
  });
1845
1868
  }
1846
1869
  } else {
1847
- if (newRecord.type === "plan" && contextData) {
1848
- const planRecord = newRecord;
1849
- planRecord.contextHash = this.generateContextHash(contextData);
1850
- planRecord.contextData = { ...contextData };
1851
- }
1852
1870
  this.appendCache(newRecord);
1853
1871
  }
1854
1872
  }
@@ -1878,13 +1896,10 @@ var PageAgent = class {
1878
1896
  generateReport: true,
1879
1897
  autoPrintReportMsg: true,
1880
1898
  groupName: "Midscene Report",
1881
- groupDescription: "",
1882
- enableCumulativeContext: true,
1883
- autoClearContext: false
1899
+ groupDescription: ""
1884
1900
  },
1885
1901
  opts || {}
1886
1902
  );
1887
- this.initializeContextStore();
1888
1903
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1889
1904
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1890
1905
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1911,69 +1926,6 @@ var PageAgent = class {
1911
1926
  opts?.testId || this.page.pageType || "web"
1912
1927
  );
1913
1928
  }
1914
- /**
1915
- * Initialize context store for cumulative context functionality
1916
- */
1917
- async initializeContextStore() {
1918
- if (!this.opts.enableCumulativeContext) {
1919
- debug4("Cumulative context disabled via options");
1920
- return;
1921
- }
1922
- try {
1923
- const aiModel = await import("misoai-core/ai-model");
1924
- this.contextStore = aiModel.getContextStore();
1925
- debug4("Context store initialized successfully", {
1926
- autoClearContext: this.opts.autoClearContext,
1927
- testId: this.opts.testId
1928
- });
1929
- if (this.opts.autoClearContext) {
1930
- this.contextStore.clear();
1931
- debug4("Context store cleared due to autoClearContext option");
1932
- } else {
1933
- const existingData = this.contextStore.getAllData();
1934
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1935
- debug4("Context store preserving existing data", {
1936
- existingDataKeys: Object.keys(existingData),
1937
- existingStepsCount: existingSteps
1938
- });
1939
- }
1940
- } catch (error) {
1941
- debug4("Failed to initialize context store:", error);
1942
- console.warn("⚠️ Could not initialize context store:", error);
1943
- }
1944
- }
1945
- /**
1946
- * Get the context store instance
1947
- */
1948
- getContextStore() {
1949
- return this.contextStore;
1950
- }
1951
- /**
1952
- * Clear the context store
1953
- */
1954
- clearContext() {
1955
- if (this.contextStore) {
1956
- this.contextStore.clear();
1957
- }
1958
- }
1959
- /**
1960
- * Get all stored data from context store
1961
- */
1962
- getStoredData() {
1963
- if (this.contextStore) {
1964
- return this.contextStore.getAllData();
1965
- }
1966
- return {};
1967
- }
1968
- /**
1969
- * Get step summary from context store
1970
- */
1971
- getStepSummary() {
1972
- if (this.contextStore) {
1973
- return this.contextStore.getStepSummary();
1974
- }
1975
- return "";
1976
- }
1977
1929
  async getUIContext(action) {
1978
1930
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1979
1931
  return await parseContextFromWebPage(this.page, {
@@ -2152,6 +2104,23 @@ var PageAgent = class {
2152
2104
  metadata
2153
2105
  };
2154
2106
  }
2107
+ async aiRightClick(locatePrompt, opt) {
2108
+ const detailedLocateParam = this.buildDetailedLocateParam(
2109
+ locatePrompt,
2110
+ opt
2111
+ );
2112
+ const plans = buildPlans("RightClick", detailedLocateParam);
2113
+ const { executor, output } = await this.taskExecutor.runPlans(
2114
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2115
+ plans,
2116
+ { cacheable: opt?.cacheable }
2117
+ );
2118
+ const metadata = this.afterTaskRunning(executor);
2119
+ return {
2120
+ result: output,
2121
+ metadata
2122
+ };
2123
+ }
2155
2124
  async aiInput(value, locatePrompt, opt) {
2156
2125
  assert7(
2157
2126
  typeof value === "string",
@@ -2209,35 +2178,9 @@ var PageAgent = class {
2209
2178
  };
2210
2179
  }
2211
2180
  async aiAction(taskPrompt, opt) {
2212
- const originalPrompt = taskPrompt;
2213
- let processedPrompt = taskPrompt;
2214
- if (this.opts.enableCumulativeContext && this.contextStore) {
2215
- try {
2216
- const storedData = this.contextStore.getAllData();
2217
- if (Object.keys(storedData).length > 0) {
2218
- debug4("Available data for aiAction:", {
2219
- prompt: taskPrompt,
2220
- availableData: storedData
2221
- });
2222
- }
2223
- } catch (error) {
2224
- debug4("Context store operation failed:", error);
2225
- }
2226
- }
2227
2181
  const cacheable = opt?.cacheable;
2228
2182
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2229
- let contextData;
2230
- if (this.opts.enableCumulativeContext && this.contextStore) {
2231
- try {
2232
- contextData = this.contextStore.getAllData();
2233
- if (contextData && Object.keys(contextData).length === 0) {
2234
- contextData = void 0;
2235
- }
2236
- } catch (error) {
2237
- debug4("Failed to get context data for cache:", error);
2238
- }
2239
- }
2240
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2183
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2241
2184
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2242
2185
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2243
2186
  taskPrompt,
@@ -2247,28 +2190,6 @@ var PageAgent = class {
2247
2190
  debug4("matched cache, will call .runYaml to run the action");
2248
2191
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2249
2192
  const result = await this.runYaml(yaml5);
2250
- if (this.opts.enableCumulativeContext && this.contextStore) {
2251
- try {
2252
- const executionResult = {
2253
- success: true,
2254
- actionType: "cached",
2255
- description: `Executed cached action: ${processedPrompt}`,
2256
- timing: result.metadata?.totalTime
2257
- };
2258
- this.contextStore.addStep({
2259
- type: "action",
2260
- summary: `Action: ${processedPrompt} (cached)`,
2261
- prompt: processedPrompt,
2262
- executionResult
2263
- });
2264
- debug4("Added cached action step to context store:", {
2265
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2266
- totalSteps: this.contextStore.getRecentSteps(100).length
2267
- });
2268
- } catch (error) {
2269
- debug4("Failed to add cached action step:", error);
2270
- }
2271
- }
2272
2193
  return {
2273
2194
  result: result.result,
2274
2195
  metadata: metadata2
@@ -2293,114 +2214,17 @@ var PageAgent = class {
2293
2214
  prompt: taskPrompt,
2294
2215
  yamlWorkflow: yamlFlowStr
2295
2216
  },
2296
- matchedCache,
2297
- contextData
2298
- // Pass context data for cache creation
2217
+ matchedCache
2299
2218
  );
2300
2219
  }
2301
2220
  const metadata = this.afterTaskRunning(executor);
2302
- if (this.opts.enableCumulativeContext && this.contextStore) {
2303
- try {
2304
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2305
- this.contextStore.addStep({
2306
- type: "action",
2307
- summary: `Action: ${processedPrompt}`,
2308
- prompt: processedPrompt,
2309
- executionResult
2310
- });
2311
- debug4("Added action step with execution result to context store:", {
2312
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2313
- totalSteps: this.contextStore.getRecentSteps(100).length,
2314
- executionResult
2315
- });
2316
- } catch (error) {
2317
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2318
- try {
2319
- this.contextStore.addStep({
2320
- type: "action",
2321
- summary: `Action: ${processedPrompt}`,
2322
- prompt: processedPrompt
2323
- });
2324
- } catch (stepError) {
2325
- debug4("Failed to add action step:", stepError);
2326
- }
2327
- }
2328
- }
2329
2221
  return {
2330
2222
  result: output,
2331
2223
  metadata
2332
2224
  };
2333
2225
  }
2334
2226
  async aiQuery(demand) {
2335
- let processedDemand = demand;
2336
- let storageKey;
2337
- try {
2338
- const aiModel = await import("misoai-core/ai-model");
2339
- const contextStore = aiModel.getContextStore();
2340
- if (typeof demand === "string") {
2341
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2342
- if (storageInstruction) {
2343
- storageKey = storageInstruction.key;
2344
- processedDemand = storageInstruction.cleanText;
2345
- contextStore._pendingAliases = storageInstruction.aliases;
2346
- } else {
2347
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2348
- if (storageMatch) {
2349
- storageKey = storageMatch[1];
2350
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2351
- }
2352
- }
2353
- }
2354
- } catch (error) {
2355
- debug4("Context store not available:", error);
2356
- }
2357
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2358
- if (this.opts.enableCumulativeContext && this.contextStore) {
2359
- if (storageKey && output) {
2360
- try {
2361
- const pendingAliases = this.contextStore._pendingAliases;
2362
- if (pendingAliases) {
2363
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2364
- delete this.contextStore._pendingAliases;
2365
- debug4("Stored query result with aliases:", {
2366
- key: storageKey,
2367
- value: output,
2368
- aliases: pendingAliases
2369
- });
2370
- } else {
2371
- this.contextStore.storeData(storageKey, output);
2372
- debug4("Stored query result:", {
2373
- key: storageKey,
2374
- value: output
2375
- });
2376
- }
2377
- this.contextStore.addStep({
2378
- type: "query",
2379
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2380
- data: output,
2381
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2382
- });
2383
- debug4("Added query step to context store:", {
2384
- storageKey,
2385
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2386
- totalSteps: this.contextStore.getRecentSteps(100).length
2387
- });
2388
- } catch (error) {
2389
- debug4("Failed to store query result:", error);
2390
- }
2391
- } else {
2392
- try {
2393
- this.contextStore.addStep({
2394
- type: "query",
2395
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2396
- data: output,
2397
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2398
- });
2399
- } catch (error) {
2400
- debug4("Failed to add query step:", error);
2401
- }
2402
- }
2403
- }
2227
+ const { output, executor } = await this.taskExecutor.query(demand);
2404
2228
  const metadata = this.afterTaskRunning(executor);
2405
2229
  return {
2406
2230
  result: output,
@@ -2510,48 +2334,6 @@ var PageAgent = class {
2510
2334
  };
2511
2335
  }
2512
2336
  async aiAssert(assertion, msg, opt) {
2513
- let executionContext = "";
2514
- if (this.opts.enableCumulativeContext && this.contextStore) {
2515
- try {
2516
- const recentSteps = this.contextStore.getRecentSteps(3);
2517
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2518
- const storedData = this.contextStore.getAllData();
2519
- if (stepsWithExecutionResults.length > 0) {
2520
- const recentActions = stepsWithExecutionResults.map((step) => {
2521
- const result = step.executionResult;
2522
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2523
- }).join("\n");
2524
- executionContext = `
2525
-
2526
- Recent actions performed:
2527
- ${recentActions}
2528
-
2529
- This context may help verify the assertion.`;
2530
- }
2531
- if (storedData && Object.keys(storedData).length > 0) {
2532
- executionContext += `
2533
-
2534
- Available data for reference:
2535
- ${JSON.stringify(storedData, null, 2)}
2536
-
2537
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2538
- debug4("Available data for aiAssert:", {
2539
- assertion,
2540
- availableData: storedData
2541
- });
2542
- }
2543
- this.contextStore.addStep({
2544
- type: "assertion",
2545
- summary: `Assertion: ${assertion}`,
2546
- prompt: assertion
2547
- });
2548
- debug4("Added assertion step to context store:", {
2549
- totalSteps: this.contextStore.getRecentSteps(100).length
2550
- });
2551
- } catch (error) {
2552
- debug4("Context store operation failed:", error);
2553
- }
2554
- }
2555
2337
  let currentUrl = "";
2556
2338
  if (this.page.url) {
2557
2339
  try {
@@ -2559,13 +2341,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2559
2341
  } catch (e) {
2560
2342
  }
2561
2343
  }
2562
- let assertionWithContext = assertion;
2563
- if (currentUrl) {
2564
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2565
- }
2566
- if (executionContext) {
2567
- assertionWithContext += executionContext;
2568
- }
2344
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2569
2345
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2570
2346
  const metadata = this.afterTaskRunning(executor, true);
2571
2347
  if (output && opt?.keepRawResponse) {
@@ -2776,81 +2552,42 @@ ${errors}`);
2776
2552
  }
2777
2553
  throw new Error("evaluateJavaScript is not supported in current agent");
2778
2554
  }
2779
- async destroy() {
2780
- await this.page.destroy();
2781
- }
2782
- /**
2783
- * Analyze execution results from executor to generate meaningful descriptions
2784
- */
2785
- analyzeExecutionResults(executor, originalPrompt) {
2786
- const tasks = executor.tasks;
2787
- const success = !executor.isInErrorState();
2788
- if (!success) {
2789
- const errorTask = executor.latestErrorTask();
2790
- return {
2791
- success: false,
2792
- actionType: "error",
2793
- description: `Failed to execute: ${originalPrompt}`,
2794
- error: errorTask?.error
2795
- };
2796
- }
2797
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2798
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2799
- const lastAction = actionTasks[actionTasks.length - 1];
2800
- const lastLocate = locateTasks[locateTasks.length - 1];
2801
- if (!lastAction) {
2802
- return {
2803
- success: true,
2804
- actionType: "unknown",
2805
- description: `Completed: ${originalPrompt}`
2555
+ async logScreenshot(title, options) {
2556
+ const screenshotTitle = title || "untitled";
2557
+ const content = options?.content || "";
2558
+ const screenshot = await this.page.screenshotBase64?.();
2559
+ if (screenshot) {
2560
+ const executionDump = {
2561
+ name: screenshotTitle,
2562
+ description: content,
2563
+ tasks: [{
2564
+ type: "Screenshot",
2565
+ subType: "log",
2566
+ status: "finished",
2567
+ executor: null,
2568
+ param: {
2569
+ title: screenshotTitle,
2570
+ content
2571
+ },
2572
+ output: {
2573
+ screenshot
2574
+ },
2575
+ thought: `Logged screenshot: ${screenshotTitle}`,
2576
+ timing: {
2577
+ start: Date.now(),
2578
+ end: Date.now(),
2579
+ cost: 0
2580
+ }
2581
+ }],
2582
+ sdkVersion: "1.0.0",
2583
+ logTime: Date.now(),
2584
+ model_name: "screenshot"
2806
2585
  };
2586
+ this.appendExecutionDump(executionDump);
2807
2587
  }
2808
- const actionType = lastAction.subType || "unknown";
2809
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2810
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2811
- return {
2812
- success: true,
2813
- actionType,
2814
- description,
2815
- elementInfo,
2816
- timing: lastAction.timing?.cost
2817
- };
2818
2588
  }
2819
- /**
2820
- * Extract element information from locate task
2821
- */
2822
- extractElementInfo(locateTask, _actionTask) {
2823
- if (!locateTask?.output?.element)
2824
- return void 0;
2825
- const element = locateTask.output.element;
2826
- return {
2827
- type: element.attributes?.nodeType || "unknown",
2828
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2829
- location: `(${element.center[0]}, ${element.center[1]})`
2830
- };
2831
- }
2832
- /**
2833
- * Generate natural language description for actions
2834
- */
2835
- generateActionDescription(actionType, param, elementInfo) {
2836
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2837
- switch (actionType) {
2838
- case "Tap":
2839
- return `Clicked on ${elementDesc}`;
2840
- case "Input":
2841
- const inputValue = param?.value || "";
2842
- return `Entered "${inputValue}" into ${elementDesc}`;
2843
- case "KeyboardPress":
2844
- return `Pressed ${param?.value || "key"}`;
2845
- case "Scroll":
2846
- return `Scrolled ${param?.direction || "on page"}`;
2847
- case "Hover":
2848
- return `Hovered over ${elementDesc}`;
2849
- case "Drag":
2850
- return `Dragged ${elementDesc}`;
2851
- default:
2852
- return `Performed ${actionType} action on ${elementDesc}`;
2853
- }
2589
+ async destroy() {
2590
+ await this.page.destroy();
2854
2591
  }
2855
2592
  };
2856
2593
  export {