misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
package/dist/lib/agent.js CHANGED
@@ -51,10 +51,11 @@ var ScriptPlayer = class {
51
51
  this.unnamedResultIndex = 0;
52
52
  this.pageAgent = null;
53
53
  this.result = {};
54
+ const target = script.target || script.web || script.android;
54
55
  if (import_utils.ifInBrowser) {
55
56
  this.output = void 0;
56
- } else if (script.target?.output) {
57
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
57
+ } else if (target?.output) {
58
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
58
59
  } else {
59
60
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
60
61
  }
@@ -128,15 +129,20 @@ var ScriptPlayer = class {
128
129
  } else if ("aiAssert" in flowItem) {
129
130
  const assertTask = flowItem;
130
131
  const prompt = assertTask.aiAssert;
132
+ const msg = assertTask.errorMessage;
131
133
  (0, import_utils.assert)(prompt, "missing prompt for aiAssert");
132
134
  (0, import_utils.assert)(
133
135
  typeof prompt === "string",
134
136
  "prompt for aiAssert must be a string"
135
137
  );
136
- await agent.aiAssert(prompt);
138
+ await agent.aiAssert(prompt, msg);
137
139
  } else if ("aiQuery" in flowItem) {
138
140
  const queryTask = flowItem;
139
141
  const prompt = queryTask.aiQuery;
142
+ const options = {
143
+ domIncluded: queryTask.domIncluded,
144
+ screenshotIncluded: queryTask.screenshotIncluded
145
+ };
140
146
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
141
147
  (0, import_utils.assert)(
142
148
  typeof prompt === "string",
@@ -147,6 +153,10 @@ var ScriptPlayer = class {
147
153
  } else if ("aiNumber" in flowItem) {
148
154
  const numberTask = flowItem;
149
155
  const prompt = numberTask.aiNumber;
156
+ const options = {
157
+ domIncluded: numberTask.domIncluded,
158
+ screenshotIncluded: numberTask.screenshotIncluded
159
+ };
150
160
  (0, import_utils.assert)(prompt, "missing prompt for number");
151
161
  (0, import_utils.assert)(
152
162
  typeof prompt === "string",
@@ -157,6 +167,10 @@ var ScriptPlayer = class {
157
167
  } else if ("aiString" in flowItem) {
158
168
  const stringTask = flowItem;
159
169
  const prompt = stringTask.aiString;
170
+ const options = {
171
+ domIncluded: stringTask.domIncluded,
172
+ screenshotIncluded: stringTask.screenshotIncluded
173
+ };
160
174
  (0, import_utils.assert)(prompt, "missing prompt for string");
161
175
  (0, import_utils.assert)(
162
176
  typeof prompt === "string",
@@ -167,6 +181,10 @@ var ScriptPlayer = class {
167
181
  } else if ("aiBoolean" in flowItem) {
168
182
  const booleanTask = flowItem;
169
183
  const prompt = booleanTask.aiBoolean;
184
+ const options = {
185
+ domIncluded: booleanTask.domIncluded,
186
+ screenshotIncluded: booleanTask.screenshotIncluded
187
+ };
170
188
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
171
189
  (0, import_utils.assert)(
172
190
  typeof prompt === "string",
@@ -209,6 +227,9 @@ var ScriptPlayer = class {
209
227
  } else if ("aiTap" in flowItem) {
210
228
  const tapTask = flowItem;
211
229
  await agent.aiTap(tapTask.aiTap, tapTask);
230
+ } else if ("aiRightClick" in flowItem) {
231
+ const rightClickTask = flowItem;
232
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
212
233
  } else if ("aiHover" in flowItem) {
213
234
  const hoverTask = flowItem;
214
235
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -231,6 +252,11 @@ var ScriptPlayer = class {
231
252
  evaluateJavaScriptTask.javascript
232
253
  );
233
254
  this.setResult(evaluateJavaScriptTask.name, result);
255
+ } else if ("logScreenshot" in flowItem) {
256
+ const logScreenshotTask = flowItem;
257
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
258
+ content: logScreenshotTask.content || ""
259
+ });
234
260
  } else {
235
261
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
236
262
  }
@@ -479,7 +505,8 @@ var WebElementInfo = class {
479
505
  id,
480
506
  attributes,
481
507
  indexId,
482
- xpaths
508
+ xpaths,
509
+ isVisible
483
510
  }) {
484
511
  this.content = content;
485
512
  this.rect = rect;
@@ -492,6 +519,7 @@ var WebElementInfo = class {
492
519
  this.attributes = attributes;
493
520
  this.indexId = indexId;
494
521
  this.xpaths = xpaths;
522
+ this.isVisible = isVisible;
495
523
  }
496
524
  };
497
525
 
@@ -514,14 +542,15 @@ async function parseContextFromWebPage(page, _opt) {
514
542
  })
515
543
  ]);
516
544
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
517
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
545
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
518
546
  return new WebElementInfo({
519
547
  rect,
520
548
  locator,
521
549
  id,
522
550
  content,
523
551
  attributes,
524
- indexId
552
+ indexId,
553
+ isVisible
525
554
  });
526
555
  });
527
556
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -551,7 +580,7 @@ function printReportMsg(filepath) {
551
580
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
552
581
  }
553
582
  function replaceIllegalPathCharsAndSpace(str) {
554
- return str.replace(/[/\\:*?"<>| ]/g, "-");
583
+ return str.replace(/[:*?"<>| ]/g, "-");
555
584
  }
556
585
  function matchElementFromPlan(planLocateParam, tree) {
557
586
  if (!planLocateParam) {
@@ -837,10 +866,10 @@ var PageTaskExecutor = class {
837
866
  if (!taskParam || !taskParam.value) {
838
867
  return;
839
868
  }
840
- await this.page.keyboard.type(taskParam.value);
841
- } else {
842
- await this.page.keyboard.type(taskParam.value);
843
869
  }
870
+ await this.page.keyboard.type(taskParam.value, {
871
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
872
+ });
844
873
  }
845
874
  };
846
875
  tasks.push(taskActionInput);
@@ -869,6 +898,22 @@ var PageTaskExecutor = class {
869
898
  }
870
899
  };
871
900
  tasks.push(taskActionTap);
901
+ } else if (plan2.type === "RightClick") {
902
+ const taskActionRightClick = {
903
+ type: "Action",
904
+ subType: "RightClick",
905
+ thought: plan2.thought,
906
+ locate: plan2.locate,
907
+ executor: async (param, { element }) => {
908
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
909
+ await this.page.mouse.click(
910
+ element.center[0],
911
+ element.center[1],
912
+ { button: "right" }
913
+ );
914
+ }
915
+ };
916
+ tasks.push(taskActionRightClick);
872
917
  } else if (plan2.type === "Drag") {
873
918
  const taskActionDrag = {
874
919
  type: "Action",
@@ -1397,7 +1442,7 @@ var PageTaskExecutor = class {
1397
1442
  executor: taskExecutor
1398
1443
  };
1399
1444
  }
1400
- async createTypeQueryTask(type, demand) {
1445
+ async createTypeQueryTask(type, demand, opt) {
1401
1446
  const taskExecutor = new import_misoai_core.Executor(
1402
1447
  taskTitleStr(
1403
1448
  type,
@@ -1428,7 +1473,10 @@ var PageTaskExecutor = class {
1428
1473
  result: `${type}, ${demand}`
1429
1474
  };
1430
1475
  }
1431
- const { data, usage } = await this.insight.extract(demandInput);
1476
+ const { data, usage } = await this.insight.extract(
1477
+ demandInput,
1478
+ opt
1479
+ );
1432
1480
  let outputResult = data;
1433
1481
  if (ifTypeRestricted) {
1434
1482
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1448,17 +1496,17 @@ var PageTaskExecutor = class {
1448
1496
  executor: taskExecutor
1449
1497
  };
1450
1498
  }
1451
- async query(demand) {
1452
- return this.createTypeQueryTask("Query", demand);
1499
+ async query(demand, opt) {
1500
+ return this.createTypeQueryTask("Query", demand, opt);
1453
1501
  }
1454
- async boolean(prompt) {
1455
- return this.createTypeQueryTask("Boolean", prompt);
1502
+ async boolean(prompt, opt) {
1503
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1456
1504
  }
1457
- async number(prompt) {
1458
- return this.createTypeQueryTask("Number", prompt);
1505
+ async number(prompt, opt) {
1506
+ return this.createTypeQueryTask("Number", prompt, opt);
1459
1507
  }
1460
- async string(prompt) {
1461
- return this.createTypeQueryTask("String", prompt);
1508
+ async string(prompt, opt) {
1509
+ return this.createTypeQueryTask("String", prompt, opt);
1462
1510
  }
1463
1511
  async assert(assertion) {
1464
1512
  const description = `assert: ${assertion}`;
@@ -1594,7 +1642,7 @@ function buildPlans(type, locateParam, param) {
1594
1642
  param: locateParam,
1595
1643
  thought: ""
1596
1644
  } : null;
1597
- if (type === "Tap" || type === "Hover") {
1645
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1598
1646
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1599
1647
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1600
1648
  const tapPlan = {
@@ -1674,7 +1722,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1674
1722
  var import_semver = __toESM(require("semver"));
1675
1723
 
1676
1724
  // package.json
1677
- var version = "1.0.5";
1725
+ var version = "1.0.3";
1678
1726
 
1679
1727
  // src/common/task-cache.ts
1680
1728
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1702,70 +1750,44 @@ var TaskCache = class {
1702
1750
  this.cache = cacheContent;
1703
1751
  this.cacheOriginalLength = this.cache.caches.length;
1704
1752
  }
1705
- matchCache(prompt, type, contextData) {
1706
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1753
+ matchCache(prompt, type) {
1707
1754
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1708
1755
  const item = this.cache.caches[i];
1709
1756
  const key = `${type}:${prompt}:${i}`;
1710
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1711
- continue;
1712
- }
1713
- if (type === "plan" && item.type === "plan") {
1714
- const planItem = item;
1715
- if (contextHash && planItem.contextHash) {
1716
- if (contextHash !== planItem.contextHash) {
1717
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1718
- continue;
1757
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1758
+ this.matchedCacheIndices.add(key);
1759
+ debug3(
1760
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1761
+ type,
1762
+ prompt,
1763
+ i
1764
+ );
1765
+ return {
1766
+ cacheContent: item,
1767
+ updateFn: (cb) => {
1768
+ debug3(
1769
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1770
+ type,
1771
+ prompt,
1772
+ i
1773
+ );
1774
+ cb(item);
1775
+ debug3(
1776
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1777
+ type,
1778
+ prompt,
1779
+ i
1780
+ );
1781
+ this.flushCacheToFile();
1719
1782
  }
1720
- } else if (contextHash || planItem.contextHash) {
1721
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1722
- continue;
1723
- }
1783
+ };
1724
1784
  }
1725
- this.matchedCacheIndices.add(key);
1726
- debug3(
1727
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1728
- type,
1729
- prompt,
1730
- i,
1731
- contextHash ? "yes" : "no-context"
1732
- );
1733
- return {
1734
- cacheContent: item,
1735
- updateFn: (cb) => {
1736
- debug3(
1737
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1738
- type,
1739
- prompt,
1740
- i
1741
- );
1742
- cb(item);
1743
- debug3(
1744
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1745
- type,
1746
- prompt,
1747
- i
1748
- );
1749
- this.flushCacheToFile();
1750
- }
1751
- };
1752
1785
  }
1753
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1786
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1754
1787
  return void 0;
1755
1788
  }
1756
- generateContextHash(contextData) {
1757
- const sortedKeys = Object.keys(contextData).sort();
1758
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1759
- let hash = 0;
1760
- for (let i = 0; i < stableString.length; i++) {
1761
- const char = stableString.charCodeAt(i);
1762
- hash = (hash << 5) - hash + char;
1763
- hash = hash & hash;
1764
- }
1765
- return hash.toString(36);
1766
- }
1767
- matchPlanCache(prompt, contextData) {
1768
- return this.matchCache(prompt, "plan", contextData);
1789
+ matchPlanCache(prompt) {
1790
+ return this.matchCache(prompt, "plan");
1769
1791
  }
1770
1792
  matchLocateCache(prompt) {
1771
1793
  return this.matchCache(prompt, "locate");
@@ -1831,8 +1853,14 @@ cache file: ${cacheFile}`
1831
1853
  return;
1832
1854
  }
1833
1855
  try {
1856
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1857
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1858
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1859
+ debug3("created cache directory: %s", dir);
1860
+ }
1834
1861
  const yamlData = import_js_yaml3.default.dump(this.cache);
1835
1862
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1863
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1836
1864
  } catch (err) {
1837
1865
  debug3(
1838
1866
  "write cache to file failed, path: %s, error: %s",
@@ -1841,16 +1869,11 @@ cache file: ${cacheFile}`
1841
1869
  );
1842
1870
  }
1843
1871
  }
1844
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1872
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1845
1873
  if (cachedRecord) {
1846
1874
  if (newRecord.type === "plan") {
1847
1875
  cachedRecord.updateFn((cache) => {
1848
- const planCache = cache;
1849
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1850
- if (contextData) {
1851
- planCache.contextHash = this.generateContextHash(contextData);
1852
- planCache.contextData = { ...contextData };
1853
- }
1876
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1854
1877
  });
1855
1878
  } else {
1856
1879
  cachedRecord.updateFn((cache) => {
@@ -1858,11 +1881,6 @@ cache file: ${cacheFile}`
1858
1881
  });
1859
1882
  }
1860
1883
  } else {
1861
- if (newRecord.type === "plan" && contextData) {
1862
- const planRecord = newRecord;
1863
- planRecord.contextHash = this.generateContextHash(contextData);
1864
- planRecord.contextData = { ...contextData };
1865
- }
1866
1884
  this.appendCache(newRecord);
1867
1885
  }
1868
1886
  }
@@ -1892,13 +1910,10 @@ var PageAgent = class {
1892
1910
  generateReport: true,
1893
1911
  autoPrintReportMsg: true,
1894
1912
  groupName: "Midscene Report",
1895
- groupDescription: "",
1896
- enableCumulativeContext: true,
1897
- autoClearContext: false
1913
+ groupDescription: ""
1898
1914
  },
1899
1915
  opts || {}
1900
1916
  );
1901
- this.initializeContextStore();
1902
1917
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1903
1918
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1904
1919
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1925,69 +1940,6 @@ var PageAgent = class {
1925
1940
  opts?.testId || this.page.pageType || "web"
1926
1941
  );
1927
1942
  }
1928
- /**
1929
- * Initialize context store for cumulative context functionality
1930
- */
1931
- async initializeContextStore() {
1932
- if (!this.opts.enableCumulativeContext) {
1933
- debug4("Cumulative context disabled via options");
1934
- return;
1935
- }
1936
- try {
1937
- const aiModel = await import("misoai-core/ai-model");
1938
- this.contextStore = aiModel.getContextStore();
1939
- debug4("Context store initialized successfully", {
1940
- autoClearContext: this.opts.autoClearContext,
1941
- testId: this.opts.testId
1942
- });
1943
- if (this.opts.autoClearContext) {
1944
- this.contextStore.clear();
1945
- debug4("Context store cleared due to autoClearContext option");
1946
- } else {
1947
- const existingData = this.contextStore.getAllData();
1948
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1949
- debug4("Context store preserving existing data", {
1950
- existingDataKeys: Object.keys(existingData),
1951
- existingStepsCount: existingSteps
1952
- });
1953
- }
1954
- } catch (error) {
1955
- debug4("Failed to initialize context store:", error);
1956
- console.warn("⚠️ Could not initialize context store:", error);
1957
- }
1958
- }
1959
- /**
1960
- * Get the context store instance
1961
- */
1962
- getContextStore() {
1963
- return this.contextStore;
1964
- }
1965
- /**
1966
- * Clear the context store
1967
- */
1968
- clearContext() {
1969
- if (this.contextStore) {
1970
- this.contextStore.clear();
1971
- }
1972
- }
1973
- /**
1974
- * Get all stored data from context store
1975
- */
1976
- getStoredData() {
1977
- if (this.contextStore) {
1978
- return this.contextStore.getAllData();
1979
- }
1980
- return {};
1981
- }
1982
- /**
1983
- * Get step summary from context store
1984
- */
1985
- getStepSummary() {
1986
- if (this.contextStore) {
1987
- return this.contextStore.getStepSummary();
1988
- }
1989
- return "";
1990
- }
1991
1943
  async getUIContext(action) {
1992
1944
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1993
1945
  return await parseContextFromWebPage(this.page, {
@@ -2166,6 +2118,23 @@ var PageAgent = class {
2166
2118
  metadata
2167
2119
  };
2168
2120
  }
2121
+ async aiRightClick(locatePrompt, opt) {
2122
+ const detailedLocateParam = this.buildDetailedLocateParam(
2123
+ locatePrompt,
2124
+ opt
2125
+ );
2126
+ const plans = buildPlans("RightClick", detailedLocateParam);
2127
+ const { executor, output } = await this.taskExecutor.runPlans(
2128
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2129
+ plans,
2130
+ { cacheable: opt?.cacheable }
2131
+ );
2132
+ const metadata = this.afterTaskRunning(executor);
2133
+ return {
2134
+ result: output,
2135
+ metadata
2136
+ };
2137
+ }
2169
2138
  async aiInput(value, locatePrompt, opt) {
2170
2139
  (0, import_utils12.assert)(
2171
2140
  typeof value === "string",
@@ -2223,35 +2192,9 @@ var PageAgent = class {
2223
2192
  };
2224
2193
  }
2225
2194
  async aiAction(taskPrompt, opt) {
2226
- const originalPrompt = taskPrompt;
2227
- let processedPrompt = taskPrompt;
2228
- if (this.opts.enableCumulativeContext && this.contextStore) {
2229
- try {
2230
- const storedData = this.contextStore.getAllData();
2231
- if (Object.keys(storedData).length > 0) {
2232
- debug4("Available data for aiAction:", {
2233
- prompt: taskPrompt,
2234
- availableData: storedData
2235
- });
2236
- }
2237
- } catch (error) {
2238
- debug4("Context store operation failed:", error);
2239
- }
2240
- }
2241
2195
  const cacheable = opt?.cacheable;
2242
2196
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2243
- let contextData;
2244
- if (this.opts.enableCumulativeContext && this.contextStore) {
2245
- try {
2246
- contextData = this.contextStore.getAllData();
2247
- if (contextData && Object.keys(contextData).length === 0) {
2248
- contextData = void 0;
2249
- }
2250
- } catch (error) {
2251
- debug4("Failed to get context data for cache:", error);
2252
- }
2253
- }
2254
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2197
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2255
2198
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2256
2199
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2257
2200
  taskPrompt,
@@ -2261,28 +2204,6 @@ var PageAgent = class {
2261
2204
  debug4("matched cache, will call .runYaml to run the action");
2262
2205
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2263
2206
  const result = await this.runYaml(yaml5);
2264
- if (this.opts.enableCumulativeContext && this.contextStore) {
2265
- try {
2266
- const executionResult = {
2267
- success: true,
2268
- actionType: "cached",
2269
- description: `Executed cached action: ${processedPrompt}`,
2270
- timing: result.metadata?.totalTime
2271
- };
2272
- this.contextStore.addStep({
2273
- type: "action",
2274
- summary: `Action: ${processedPrompt} (cached)`,
2275
- prompt: processedPrompt,
2276
- executionResult
2277
- });
2278
- debug4("Added cached action step to context store:", {
2279
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2280
- totalSteps: this.contextStore.getRecentSteps(100).length
2281
- });
2282
- } catch (error) {
2283
- debug4("Failed to add cached action step:", error);
2284
- }
2285
- }
2286
2207
  return {
2287
2208
  result: result.result,
2288
2209
  metadata: metadata2
@@ -2307,114 +2228,17 @@ var PageAgent = class {
2307
2228
  prompt: taskPrompt,
2308
2229
  yamlWorkflow: yamlFlowStr
2309
2230
  },
2310
- matchedCache,
2311
- contextData
2312
- // Pass context data for cache creation
2231
+ matchedCache
2313
2232
  );
2314
2233
  }
2315
2234
  const metadata = this.afterTaskRunning(executor);
2316
- if (this.opts.enableCumulativeContext && this.contextStore) {
2317
- try {
2318
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2319
- this.contextStore.addStep({
2320
- type: "action",
2321
- summary: `Action: ${processedPrompt}`,
2322
- prompt: processedPrompt,
2323
- executionResult
2324
- });
2325
- debug4("Added action step with execution result to context store:", {
2326
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2327
- totalSteps: this.contextStore.getRecentSteps(100).length,
2328
- executionResult
2329
- });
2330
- } catch (error) {
2331
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2332
- try {
2333
- this.contextStore.addStep({
2334
- type: "action",
2335
- summary: `Action: ${processedPrompt}`,
2336
- prompt: processedPrompt
2337
- });
2338
- } catch (stepError) {
2339
- debug4("Failed to add action step:", stepError);
2340
- }
2341
- }
2342
- }
2343
2235
  return {
2344
2236
  result: output,
2345
2237
  metadata
2346
2238
  };
2347
2239
  }
2348
2240
  async aiQuery(demand) {
2349
- let processedDemand = demand;
2350
- let storageKey;
2351
- try {
2352
- const aiModel = await import("misoai-core/ai-model");
2353
- const contextStore = aiModel.getContextStore();
2354
- if (typeof demand === "string") {
2355
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2356
- if (storageInstruction) {
2357
- storageKey = storageInstruction.key;
2358
- processedDemand = storageInstruction.cleanText;
2359
- contextStore._pendingAliases = storageInstruction.aliases;
2360
- } else {
2361
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2362
- if (storageMatch) {
2363
- storageKey = storageMatch[1];
2364
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2365
- }
2366
- }
2367
- }
2368
- } catch (error) {
2369
- debug4("Context store not available:", error);
2370
- }
2371
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2372
- if (this.opts.enableCumulativeContext && this.contextStore) {
2373
- if (storageKey && output) {
2374
- try {
2375
- const pendingAliases = this.contextStore._pendingAliases;
2376
- if (pendingAliases) {
2377
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2378
- delete this.contextStore._pendingAliases;
2379
- debug4("Stored query result with aliases:", {
2380
- key: storageKey,
2381
- value: output,
2382
- aliases: pendingAliases
2383
- });
2384
- } else {
2385
- this.contextStore.storeData(storageKey, output);
2386
- debug4("Stored query result:", {
2387
- key: storageKey,
2388
- value: output
2389
- });
2390
- }
2391
- this.contextStore.addStep({
2392
- type: "query",
2393
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2394
- data: output,
2395
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2396
- });
2397
- debug4("Added query step to context store:", {
2398
- storageKey,
2399
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2400
- totalSteps: this.contextStore.getRecentSteps(100).length
2401
- });
2402
- } catch (error) {
2403
- debug4("Failed to store query result:", error);
2404
- }
2405
- } else {
2406
- try {
2407
- this.contextStore.addStep({
2408
- type: "query",
2409
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2410
- data: output,
2411
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2412
- });
2413
- } catch (error) {
2414
- debug4("Failed to add query step:", error);
2415
- }
2416
- }
2417
- }
2241
+ const { output, executor } = await this.taskExecutor.query(demand);
2418
2242
  const metadata = this.afterTaskRunning(executor);
2419
2243
  return {
2420
2244
  result: output,
@@ -2524,48 +2348,6 @@ var PageAgent = class {
2524
2348
  };
2525
2349
  }
2526
2350
  async aiAssert(assertion, msg, opt) {
2527
- let executionContext = "";
2528
- if (this.opts.enableCumulativeContext && this.contextStore) {
2529
- try {
2530
- const recentSteps = this.contextStore.getRecentSteps(3);
2531
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2532
- const storedData = this.contextStore.getAllData();
2533
- if (stepsWithExecutionResults.length > 0) {
2534
- const recentActions = stepsWithExecutionResults.map((step) => {
2535
- const result = step.executionResult;
2536
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2537
- }).join("\n");
2538
- executionContext = `
2539
-
2540
- Recent actions performed:
2541
- ${recentActions}
2542
-
2543
- This context may help verify the assertion.`;
2544
- }
2545
- if (storedData && Object.keys(storedData).length > 0) {
2546
- executionContext += `
2547
-
2548
- Available data for reference:
2549
- ${JSON.stringify(storedData, null, 2)}
2550
-
2551
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2552
- debug4("Available data for aiAssert:", {
2553
- assertion,
2554
- availableData: storedData
2555
- });
2556
- }
2557
- this.contextStore.addStep({
2558
- type: "assertion",
2559
- summary: `Assertion: ${assertion}`,
2560
- prompt: assertion
2561
- });
2562
- debug4("Added assertion step to context store:", {
2563
- totalSteps: this.contextStore.getRecentSteps(100).length
2564
- });
2565
- } catch (error) {
2566
- debug4("Context store operation failed:", error);
2567
- }
2568
- }
2569
2351
  let currentUrl = "";
2570
2352
  if (this.page.url) {
2571
2353
  try {
@@ -2573,13 +2355,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2573
2355
  } catch (e) {
2574
2356
  }
2575
2357
  }
2576
- let assertionWithContext = assertion;
2577
- if (currentUrl) {
2578
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2579
- }
2580
- if (executionContext) {
2581
- assertionWithContext += executionContext;
2582
- }
2358
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2583
2359
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2584
2360
  const metadata = this.afterTaskRunning(executor, true);
2585
2361
  if (output && opt?.keepRawResponse) {
@@ -2790,81 +2566,42 @@ ${errors}`);
2790
2566
  }
2791
2567
  throw new Error("evaluateJavaScript is not supported in current agent");
2792
2568
  }
2793
- async destroy() {
2794
- await this.page.destroy();
2795
- }
2796
- /**
2797
- * Analyze execution results from executor to generate meaningful descriptions
2798
- */
2799
- analyzeExecutionResults(executor, originalPrompt) {
2800
- const tasks = executor.tasks;
2801
- const success = !executor.isInErrorState();
2802
- if (!success) {
2803
- const errorTask = executor.latestErrorTask();
2804
- return {
2805
- success: false,
2806
- actionType: "error",
2807
- description: `Failed to execute: ${originalPrompt}`,
2808
- error: errorTask?.error
2809
- };
2810
- }
2811
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2812
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2813
- const lastAction = actionTasks[actionTasks.length - 1];
2814
- const lastLocate = locateTasks[locateTasks.length - 1];
2815
- if (!lastAction) {
2816
- return {
2817
- success: true,
2818
- actionType: "unknown",
2819
- description: `Completed: ${originalPrompt}`
2569
+ async logScreenshot(title, options) {
2570
+ const screenshotTitle = title || "untitled";
2571
+ const content = options?.content || "";
2572
+ const screenshot = await this.page.screenshotBase64?.();
2573
+ if (screenshot) {
2574
+ const executionDump = {
2575
+ name: screenshotTitle,
2576
+ description: content,
2577
+ tasks: [{
2578
+ type: "Screenshot",
2579
+ subType: "log",
2580
+ status: "finished",
2581
+ executor: null,
2582
+ param: {
2583
+ title: screenshotTitle,
2584
+ content
2585
+ },
2586
+ output: {
2587
+ screenshot
2588
+ },
2589
+ thought: `Logged screenshot: ${screenshotTitle}`,
2590
+ timing: {
2591
+ start: Date.now(),
2592
+ end: Date.now(),
2593
+ cost: 0
2594
+ }
2595
+ }],
2596
+ sdkVersion: "1.0.0",
2597
+ logTime: Date.now(),
2598
+ model_name: "screenshot"
2820
2599
  };
2600
+ this.appendExecutionDump(executionDump);
2821
2601
  }
2822
- const actionType = lastAction.subType || "unknown";
2823
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2824
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2825
- return {
2826
- success: true,
2827
- actionType,
2828
- description,
2829
- elementInfo,
2830
- timing: lastAction.timing?.cost
2831
- };
2832
2602
  }
2833
- /**
2834
- * Extract element information from locate task
2835
- */
2836
- extractElementInfo(locateTask, _actionTask) {
2837
- if (!locateTask?.output?.element)
2838
- return void 0;
2839
- const element = locateTask.output.element;
2840
- return {
2841
- type: element.attributes?.nodeType || "unknown",
2842
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2843
- location: `(${element.center[0]}, ${element.center[1]})`
2844
- };
2845
- }
2846
- /**
2847
- * Generate natural language description for actions
2848
- */
2849
- generateActionDescription(actionType, param, elementInfo) {
2850
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2851
- switch (actionType) {
2852
- case "Tap":
2853
- return `Clicked on ${elementDesc}`;
2854
- case "Input":
2855
- const inputValue = param?.value || "";
2856
- return `Entered "${inputValue}" into ${elementDesc}`;
2857
- case "KeyboardPress":
2858
- return `Pressed ${param?.value || "key"}`;
2859
- case "Scroll":
2860
- return `Scrolled ${param?.direction || "on page"}`;
2861
- case "Hover":
2862
- return `Hovered over ${elementDesc}`;
2863
- case "Drag":
2864
- return `Dragged ${elementDesc}`;
2865
- default:
2866
- return `Performed ${actionType} action on ${elementDesc}`;
2867
- }
2603
+ async destroy() {
2604
+ await this.page.destroy();
2868
2605
  }
2869
2606
  };
2870
2607
  // Annotate the CommonJS export names for ESM import in node: