misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -55,7 +55,8 @@ var WebElementInfo = class {
55
55
  id,
56
56
  attributes,
57
57
  indexId,
58
- xpaths
58
+ xpaths,
59
+ isVisible
59
60
  }) {
60
61
  this.content = content;
61
62
  this.rect = rect;
@@ -68,6 +69,7 @@ var WebElementInfo = class {
68
69
  this.attributes = attributes;
69
70
  this.indexId = indexId;
70
71
  this.xpaths = xpaths;
72
+ this.isVisible = isVisible;
71
73
  }
72
74
  };
73
75
 
@@ -90,14 +92,15 @@ async function parseContextFromWebPage(page, _opt) {
90
92
  })
91
93
  ]);
92
94
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
93
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
95
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
94
96
  return new WebElementInfo({
95
97
  rect,
96
98
  locator,
97
99
  id,
98
100
  content,
99
101
  attributes,
100
- indexId
102
+ indexId,
103
+ isVisible
101
104
  });
102
105
  });
103
106
  (0, import_utils2.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -128,7 +131,7 @@ function printReportMsg(filepath) {
128
131
  }
129
132
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
130
133
  function replaceIllegalPathCharsAndSpace(str) {
131
- return str.replace(/[/\\:*?"<>| ]/g, "-");
134
+ return str.replace(/[:*?"<>| ]/g, "-");
132
135
  }
133
136
  function matchElementFromPlan(planLocateParam, tree) {
134
137
  if (!planLocateParam) {
@@ -170,10 +173,11 @@ var ScriptPlayer = class {
170
173
  this.unnamedResultIndex = 0;
171
174
  this.pageAgent = null;
172
175
  this.result = {};
176
+ const target = script.target || script.web || script.android;
173
177
  if (import_utils3.ifInBrowser) {
174
178
  this.output = void 0;
175
- } else if (script.target?.output) {
176
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
179
+ } else if (target?.output) {
180
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
177
181
  } else {
178
182
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
179
183
  }
@@ -247,15 +251,20 @@ var ScriptPlayer = class {
247
251
  } else if ("aiAssert" in flowItem) {
248
252
  const assertTask = flowItem;
249
253
  const prompt = assertTask.aiAssert;
254
+ const msg = assertTask.errorMessage;
250
255
  (0, import_utils3.assert)(prompt, "missing prompt for aiAssert");
251
256
  (0, import_utils3.assert)(
252
257
  typeof prompt === "string",
253
258
  "prompt for aiAssert must be a string"
254
259
  );
255
- await agent.aiAssert(prompt);
260
+ await agent.aiAssert(prompt, msg);
256
261
  } else if ("aiQuery" in flowItem) {
257
262
  const queryTask = flowItem;
258
263
  const prompt = queryTask.aiQuery;
264
+ const options = {
265
+ domIncluded: queryTask.domIncluded,
266
+ screenshotIncluded: queryTask.screenshotIncluded
267
+ };
259
268
  (0, import_utils3.assert)(prompt, "missing prompt for aiQuery");
260
269
  (0, import_utils3.assert)(
261
270
  typeof prompt === "string",
@@ -266,6 +275,10 @@ var ScriptPlayer = class {
266
275
  } else if ("aiNumber" in flowItem) {
267
276
  const numberTask = flowItem;
268
277
  const prompt = numberTask.aiNumber;
278
+ const options = {
279
+ domIncluded: numberTask.domIncluded,
280
+ screenshotIncluded: numberTask.screenshotIncluded
281
+ };
269
282
  (0, import_utils3.assert)(prompt, "missing prompt for number");
270
283
  (0, import_utils3.assert)(
271
284
  typeof prompt === "string",
@@ -276,6 +289,10 @@ var ScriptPlayer = class {
276
289
  } else if ("aiString" in flowItem) {
277
290
  const stringTask = flowItem;
278
291
  const prompt = stringTask.aiString;
292
+ const options = {
293
+ domIncluded: stringTask.domIncluded,
294
+ screenshotIncluded: stringTask.screenshotIncluded
295
+ };
279
296
  (0, import_utils3.assert)(prompt, "missing prompt for string");
280
297
  (0, import_utils3.assert)(
281
298
  typeof prompt === "string",
@@ -286,6 +303,10 @@ var ScriptPlayer = class {
286
303
  } else if ("aiBoolean" in flowItem) {
287
304
  const booleanTask = flowItem;
288
305
  const prompt = booleanTask.aiBoolean;
306
+ const options = {
307
+ domIncluded: booleanTask.domIncluded,
308
+ screenshotIncluded: booleanTask.screenshotIncluded
309
+ };
289
310
  (0, import_utils3.assert)(prompt, "missing prompt for boolean");
290
311
  (0, import_utils3.assert)(
291
312
  typeof prompt === "string",
@@ -328,6 +349,9 @@ var ScriptPlayer = class {
328
349
  } else if ("aiTap" in flowItem) {
329
350
  const tapTask = flowItem;
330
351
  await agent.aiTap(tapTask.aiTap, tapTask);
352
+ } else if ("aiRightClick" in flowItem) {
353
+ const rightClickTask = flowItem;
354
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
331
355
  } else if ("aiHover" in flowItem) {
332
356
  const hoverTask = flowItem;
333
357
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -350,6 +374,11 @@ var ScriptPlayer = class {
350
374
  evaluateJavaScriptTask.javascript
351
375
  );
352
376
  this.setResult(evaluateJavaScriptTask.name, result);
377
+ } else if ("logScreenshot" in flowItem) {
378
+ const logScreenshotTask = flowItem;
379
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
380
+ content: logScreenshotTask.content || ""
381
+ });
353
382
  } else {
354
383
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
355
384
  }
@@ -842,10 +871,10 @@ var PageTaskExecutor = class {
842
871
  if (!taskParam || !taskParam.value) {
843
872
  return;
844
873
  }
845
- await this.page.keyboard.type(taskParam.value);
846
- } else {
847
- await this.page.keyboard.type(taskParam.value);
848
874
  }
875
+ await this.page.keyboard.type(taskParam.value, {
876
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
877
+ });
849
878
  }
850
879
  };
851
880
  tasks.push(taskActionInput);
@@ -874,6 +903,22 @@ var PageTaskExecutor = class {
874
903
  }
875
904
  };
876
905
  tasks.push(taskActionTap);
906
+ } else if (plan2.type === "RightClick") {
907
+ const taskActionRightClick = {
908
+ type: "Action",
909
+ subType: "RightClick",
910
+ thought: plan2.thought,
911
+ locate: plan2.locate,
912
+ executor: async (param, { element }) => {
913
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
914
+ await this.page.mouse.click(
915
+ element.center[0],
916
+ element.center[1],
917
+ { button: "right" }
918
+ );
919
+ }
920
+ };
921
+ tasks.push(taskActionRightClick);
877
922
  } else if (plan2.type === "Drag") {
878
923
  const taskActionDrag = {
879
924
  type: "Action",
@@ -1402,7 +1447,7 @@ var PageTaskExecutor = class {
1402
1447
  executor: taskExecutor
1403
1448
  };
1404
1449
  }
1405
- async createTypeQueryTask(type, demand) {
1450
+ async createTypeQueryTask(type, demand, opt) {
1406
1451
  const taskExecutor = new import_misoai_core.Executor(
1407
1452
  taskTitleStr(
1408
1453
  type,
@@ -1433,7 +1478,10 @@ var PageTaskExecutor = class {
1433
1478
  result: `${type}, ${demand}`
1434
1479
  };
1435
1480
  }
1436
- const { data, usage } = await this.insight.extract(demandInput);
1481
+ const { data, usage } = await this.insight.extract(
1482
+ demandInput,
1483
+ opt
1484
+ );
1437
1485
  let outputResult = data;
1438
1486
  if (ifTypeRestricted) {
1439
1487
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1453,17 +1501,17 @@ var PageTaskExecutor = class {
1453
1501
  executor: taskExecutor
1454
1502
  };
1455
1503
  }
1456
- async query(demand) {
1457
- return this.createTypeQueryTask("Query", demand);
1504
+ async query(demand, opt) {
1505
+ return this.createTypeQueryTask("Query", demand, opt);
1458
1506
  }
1459
- async boolean(prompt) {
1460
- return this.createTypeQueryTask("Boolean", prompt);
1507
+ async boolean(prompt, opt) {
1508
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1461
1509
  }
1462
- async number(prompt) {
1463
- return this.createTypeQueryTask("Number", prompt);
1510
+ async number(prompt, opt) {
1511
+ return this.createTypeQueryTask("Number", prompt, opt);
1464
1512
  }
1465
- async string(prompt) {
1466
- return this.createTypeQueryTask("String", prompt);
1513
+ async string(prompt, opt) {
1514
+ return this.createTypeQueryTask("String", prompt, opt);
1467
1515
  }
1468
1516
  async assert(assertion) {
1469
1517
  const description = `assert: ${assertion}`;
@@ -1599,7 +1647,7 @@ function buildPlans(type, locateParam, param) {
1599
1647
  param: locateParam,
1600
1648
  thought: ""
1601
1649
  } : null;
1602
- if (type === "Tap" || type === "Hover") {
1650
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1603
1651
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1604
1652
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1605
1653
  const tapPlan = {
@@ -1679,7 +1727,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1679
1727
  var import_semver = __toESM(require("semver"));
1680
1728
 
1681
1729
  // package.json
1682
- var version = "1.0.5";
1730
+ var version = "1.0.3";
1683
1731
 
1684
1732
  // src/common/task-cache.ts
1685
1733
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1707,70 +1755,44 @@ var TaskCache = class {
1707
1755
  this.cache = cacheContent;
1708
1756
  this.cacheOriginalLength = this.cache.caches.length;
1709
1757
  }
1710
- matchCache(prompt, type, contextData) {
1711
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1758
+ matchCache(prompt, type) {
1712
1759
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1713
1760
  const item = this.cache.caches[i];
1714
1761
  const key = `${type}:${prompt}:${i}`;
1715
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1716
- continue;
1717
- }
1718
- if (type === "plan" && item.type === "plan") {
1719
- const planItem = item;
1720
- if (contextHash && planItem.contextHash) {
1721
- if (contextHash !== planItem.contextHash) {
1722
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1723
- continue;
1762
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1763
+ this.matchedCacheIndices.add(key);
1764
+ debug3(
1765
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1766
+ type,
1767
+ prompt,
1768
+ i
1769
+ );
1770
+ return {
1771
+ cacheContent: item,
1772
+ updateFn: (cb) => {
1773
+ debug3(
1774
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1775
+ type,
1776
+ prompt,
1777
+ i
1778
+ );
1779
+ cb(item);
1780
+ debug3(
1781
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1782
+ type,
1783
+ prompt,
1784
+ i
1785
+ );
1786
+ this.flushCacheToFile();
1724
1787
  }
1725
- } else if (contextHash || planItem.contextHash) {
1726
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1727
- continue;
1728
- }
1788
+ };
1729
1789
  }
1730
- this.matchedCacheIndices.add(key);
1731
- debug3(
1732
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1733
- type,
1734
- prompt,
1735
- i,
1736
- contextHash ? "yes" : "no-context"
1737
- );
1738
- return {
1739
- cacheContent: item,
1740
- updateFn: (cb) => {
1741
- debug3(
1742
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1743
- type,
1744
- prompt,
1745
- i
1746
- );
1747
- cb(item);
1748
- debug3(
1749
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1750
- type,
1751
- prompt,
1752
- i
1753
- );
1754
- this.flushCacheToFile();
1755
- }
1756
- };
1757
1790
  }
1758
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1791
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1759
1792
  return void 0;
1760
1793
  }
1761
- generateContextHash(contextData) {
1762
- const sortedKeys = Object.keys(contextData).sort();
1763
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1764
- let hash = 0;
1765
- for (let i = 0; i < stableString.length; i++) {
1766
- const char = stableString.charCodeAt(i);
1767
- hash = (hash << 5) - hash + char;
1768
- hash = hash & hash;
1769
- }
1770
- return hash.toString(36);
1771
- }
1772
- matchPlanCache(prompt, contextData) {
1773
- return this.matchCache(prompt, "plan", contextData);
1794
+ matchPlanCache(prompt) {
1795
+ return this.matchCache(prompt, "plan");
1774
1796
  }
1775
1797
  matchLocateCache(prompt) {
1776
1798
  return this.matchCache(prompt, "locate");
@@ -1836,8 +1858,14 @@ cache file: ${cacheFile}`
1836
1858
  return;
1837
1859
  }
1838
1860
  try {
1861
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1862
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1863
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1864
+ debug3("created cache directory: %s", dir);
1865
+ }
1839
1866
  const yamlData = import_js_yaml3.default.dump(this.cache);
1840
1867
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1868
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1841
1869
  } catch (err) {
1842
1870
  debug3(
1843
1871
  "write cache to file failed, path: %s, error: %s",
@@ -1846,16 +1874,11 @@ cache file: ${cacheFile}`
1846
1874
  );
1847
1875
  }
1848
1876
  }
1849
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1877
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1850
1878
  if (cachedRecord) {
1851
1879
  if (newRecord.type === "plan") {
1852
1880
  cachedRecord.updateFn((cache) => {
1853
- const planCache = cache;
1854
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1855
- if (contextData) {
1856
- planCache.contextHash = this.generateContextHash(contextData);
1857
- planCache.contextData = { ...contextData };
1858
- }
1881
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1859
1882
  });
1860
1883
  } else {
1861
1884
  cachedRecord.updateFn((cache) => {
@@ -1863,11 +1886,6 @@ cache file: ${cacheFile}`
1863
1886
  });
1864
1887
  }
1865
1888
  } else {
1866
- if (newRecord.type === "plan" && contextData) {
1867
- const planRecord = newRecord;
1868
- planRecord.contextHash = this.generateContextHash(contextData);
1869
- planRecord.contextData = { ...contextData };
1870
- }
1871
1889
  this.appendCache(newRecord);
1872
1890
  }
1873
1891
  }
@@ -1897,13 +1915,10 @@ var PageAgent = class {
1897
1915
  generateReport: true,
1898
1916
  autoPrintReportMsg: true,
1899
1917
  groupName: "Midscene Report",
1900
- groupDescription: "",
1901
- enableCumulativeContext: true,
1902
- autoClearContext: false
1918
+ groupDescription: ""
1903
1919
  },
1904
1920
  opts || {}
1905
1921
  );
1906
- this.initializeContextStore();
1907
1922
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1908
1923
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1909
1924
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1930,69 +1945,6 @@ var PageAgent = class {
1930
1945
  opts?.testId || this.page.pageType || "web"
1931
1946
  );
1932
1947
  }
1933
- /**
1934
- * Initialize context store for cumulative context functionality
1935
- */
1936
- async initializeContextStore() {
1937
- if (!this.opts.enableCumulativeContext) {
1938
- debug4("Cumulative context disabled via options");
1939
- return;
1940
- }
1941
- try {
1942
- const aiModel = await import("misoai-core/ai-model");
1943
- this.contextStore = aiModel.getContextStore();
1944
- debug4("Context store initialized successfully", {
1945
- autoClearContext: this.opts.autoClearContext,
1946
- testId: this.opts.testId
1947
- });
1948
- if (this.opts.autoClearContext) {
1949
- this.contextStore.clear();
1950
- debug4("Context store cleared due to autoClearContext option");
1951
- } else {
1952
- const existingData = this.contextStore.getAllData();
1953
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1954
- debug4("Context store preserving existing data", {
1955
- existingDataKeys: Object.keys(existingData),
1956
- existingStepsCount: existingSteps
1957
- });
1958
- }
1959
- } catch (error) {
1960
- debug4("Failed to initialize context store:", error);
1961
- console.warn("⚠️ Could not initialize context store:", error);
1962
- }
1963
- }
1964
- /**
1965
- * Get the context store instance
1966
- */
1967
- getContextStore() {
1968
- return this.contextStore;
1969
- }
1970
- /**
1971
- * Clear the context store
1972
- */
1973
- clearContext() {
1974
- if (this.contextStore) {
1975
- this.contextStore.clear();
1976
- }
1977
- }
1978
- /**
1979
- * Get all stored data from context store
1980
- */
1981
- getStoredData() {
1982
- if (this.contextStore) {
1983
- return this.contextStore.getAllData();
1984
- }
1985
- return {};
1986
- }
1987
- /**
1988
- * Get step summary from context store
1989
- */
1990
- getStepSummary() {
1991
- if (this.contextStore) {
1992
- return this.contextStore.getStepSummary();
1993
- }
1994
- return "";
1995
- }
1996
1948
  async getUIContext(action) {
1997
1949
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1998
1950
  return await parseContextFromWebPage(this.page, {
@@ -2171,6 +2123,23 @@ var PageAgent = class {
2171
2123
  metadata
2172
2124
  };
2173
2125
  }
2126
+ async aiRightClick(locatePrompt, opt) {
2127
+ const detailedLocateParam = this.buildDetailedLocateParam(
2128
+ locatePrompt,
2129
+ opt
2130
+ );
2131
+ const plans = buildPlans("RightClick", detailedLocateParam);
2132
+ const { executor, output } = await this.taskExecutor.runPlans(
2133
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2134
+ plans,
2135
+ { cacheable: opt?.cacheable }
2136
+ );
2137
+ const metadata = this.afterTaskRunning(executor);
2138
+ return {
2139
+ result: output,
2140
+ metadata
2141
+ };
2142
+ }
2174
2143
  async aiInput(value, locatePrompt, opt) {
2175
2144
  (0, import_utils12.assert)(
2176
2145
  typeof value === "string",
@@ -2228,35 +2197,9 @@ var PageAgent = class {
2228
2197
  };
2229
2198
  }
2230
2199
  async aiAction(taskPrompt, opt) {
2231
- const originalPrompt = taskPrompt;
2232
- let processedPrompt = taskPrompt;
2233
- if (this.opts.enableCumulativeContext && this.contextStore) {
2234
- try {
2235
- const storedData = this.contextStore.getAllData();
2236
- if (Object.keys(storedData).length > 0) {
2237
- debug4("Available data for aiAction:", {
2238
- prompt: taskPrompt,
2239
- availableData: storedData
2240
- });
2241
- }
2242
- } catch (error) {
2243
- debug4("Context store operation failed:", error);
2244
- }
2245
- }
2246
2200
  const cacheable = opt?.cacheable;
2247
2201
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2248
- let contextData;
2249
- if (this.opts.enableCumulativeContext && this.contextStore) {
2250
- try {
2251
- contextData = this.contextStore.getAllData();
2252
- if (contextData && Object.keys(contextData).length === 0) {
2253
- contextData = void 0;
2254
- }
2255
- } catch (error) {
2256
- debug4("Failed to get context data for cache:", error);
2257
- }
2258
- }
2259
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2202
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2260
2203
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2261
2204
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2262
2205
  taskPrompt,
@@ -2266,28 +2209,6 @@ var PageAgent = class {
2266
2209
  debug4("matched cache, will call .runYaml to run the action");
2267
2210
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2268
2211
  const result = await this.runYaml(yaml5);
2269
- if (this.opts.enableCumulativeContext && this.contextStore) {
2270
- try {
2271
- const executionResult = {
2272
- success: true,
2273
- actionType: "cached",
2274
- description: `Executed cached action: ${processedPrompt}`,
2275
- timing: result.metadata?.totalTime
2276
- };
2277
- this.contextStore.addStep({
2278
- type: "action",
2279
- summary: `Action: ${processedPrompt} (cached)`,
2280
- prompt: processedPrompt,
2281
- executionResult
2282
- });
2283
- debug4("Added cached action step to context store:", {
2284
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2285
- totalSteps: this.contextStore.getRecentSteps(100).length
2286
- });
2287
- } catch (error) {
2288
- debug4("Failed to add cached action step:", error);
2289
- }
2290
- }
2291
2212
  return {
2292
2213
  result: result.result,
2293
2214
  metadata: metadata2
@@ -2312,114 +2233,17 @@ var PageAgent = class {
2312
2233
  prompt: taskPrompt,
2313
2234
  yamlWorkflow: yamlFlowStr
2314
2235
  },
2315
- matchedCache,
2316
- contextData
2317
- // Pass context data for cache creation
2236
+ matchedCache
2318
2237
  );
2319
2238
  }
2320
2239
  const metadata = this.afterTaskRunning(executor);
2321
- if (this.opts.enableCumulativeContext && this.contextStore) {
2322
- try {
2323
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2324
- this.contextStore.addStep({
2325
- type: "action",
2326
- summary: `Action: ${processedPrompt}`,
2327
- prompt: processedPrompt,
2328
- executionResult
2329
- });
2330
- debug4("Added action step with execution result to context store:", {
2331
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2332
- totalSteps: this.contextStore.getRecentSteps(100).length,
2333
- executionResult
2334
- });
2335
- } catch (error) {
2336
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2337
- try {
2338
- this.contextStore.addStep({
2339
- type: "action",
2340
- summary: `Action: ${processedPrompt}`,
2341
- prompt: processedPrompt
2342
- });
2343
- } catch (stepError) {
2344
- debug4("Failed to add action step:", stepError);
2345
- }
2346
- }
2347
- }
2348
2240
  return {
2349
2241
  result: output,
2350
2242
  metadata
2351
2243
  };
2352
2244
  }
2353
2245
  async aiQuery(demand) {
2354
- let processedDemand = demand;
2355
- let storageKey;
2356
- try {
2357
- const aiModel = await import("misoai-core/ai-model");
2358
- const contextStore = aiModel.getContextStore();
2359
- if (typeof demand === "string") {
2360
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2361
- if (storageInstruction) {
2362
- storageKey = storageInstruction.key;
2363
- processedDemand = storageInstruction.cleanText;
2364
- contextStore._pendingAliases = storageInstruction.aliases;
2365
- } else {
2366
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2367
- if (storageMatch) {
2368
- storageKey = storageMatch[1];
2369
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2370
- }
2371
- }
2372
- }
2373
- } catch (error) {
2374
- debug4("Context store not available:", error);
2375
- }
2376
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2377
- if (this.opts.enableCumulativeContext && this.contextStore) {
2378
- if (storageKey && output) {
2379
- try {
2380
- const pendingAliases = this.contextStore._pendingAliases;
2381
- if (pendingAliases) {
2382
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2383
- delete this.contextStore._pendingAliases;
2384
- debug4("Stored query result with aliases:", {
2385
- key: storageKey,
2386
- value: output,
2387
- aliases: pendingAliases
2388
- });
2389
- } else {
2390
- this.contextStore.storeData(storageKey, output);
2391
- debug4("Stored query result:", {
2392
- key: storageKey,
2393
- value: output
2394
- });
2395
- }
2396
- this.contextStore.addStep({
2397
- type: "query",
2398
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2399
- data: output,
2400
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2401
- });
2402
- debug4("Added query step to context store:", {
2403
- storageKey,
2404
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2405
- totalSteps: this.contextStore.getRecentSteps(100).length
2406
- });
2407
- } catch (error) {
2408
- debug4("Failed to store query result:", error);
2409
- }
2410
- } else {
2411
- try {
2412
- this.contextStore.addStep({
2413
- type: "query",
2414
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2415
- data: output,
2416
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2417
- });
2418
- } catch (error) {
2419
- debug4("Failed to add query step:", error);
2420
- }
2421
- }
2422
- }
2246
+ const { output, executor } = await this.taskExecutor.query(demand);
2423
2247
  const metadata = this.afterTaskRunning(executor);
2424
2248
  return {
2425
2249
  result: output,
@@ -2529,48 +2353,6 @@ var PageAgent = class {
2529
2353
  };
2530
2354
  }
2531
2355
  async aiAssert(assertion, msg, opt) {
2532
- let executionContext = "";
2533
- if (this.opts.enableCumulativeContext && this.contextStore) {
2534
- try {
2535
- const recentSteps = this.contextStore.getRecentSteps(3);
2536
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2537
- const storedData = this.contextStore.getAllData();
2538
- if (stepsWithExecutionResults.length > 0) {
2539
- const recentActions = stepsWithExecutionResults.map((step) => {
2540
- const result = step.executionResult;
2541
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2542
- }).join("\n");
2543
- executionContext = `
2544
-
2545
- Recent actions performed:
2546
- ${recentActions}
2547
-
2548
- This context may help verify the assertion.`;
2549
- }
2550
- if (storedData && Object.keys(storedData).length > 0) {
2551
- executionContext += `
2552
-
2553
- Available data for reference:
2554
- ${JSON.stringify(storedData, null, 2)}
2555
-
2556
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2557
- debug4("Available data for aiAssert:", {
2558
- assertion,
2559
- availableData: storedData
2560
- });
2561
- }
2562
- this.contextStore.addStep({
2563
- type: "assertion",
2564
- summary: `Assertion: ${assertion}`,
2565
- prompt: assertion
2566
- });
2567
- debug4("Added assertion step to context store:", {
2568
- totalSteps: this.contextStore.getRecentSteps(100).length
2569
- });
2570
- } catch (error) {
2571
- debug4("Context store operation failed:", error);
2572
- }
2573
- }
2574
2356
  let currentUrl = "";
2575
2357
  if (this.page.url) {
2576
2358
  try {
@@ -2578,13 +2360,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2578
2360
  } catch (e) {
2579
2361
  }
2580
2362
  }
2581
- let assertionWithContext = assertion;
2582
- if (currentUrl) {
2583
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2584
- }
2585
- if (executionContext) {
2586
- assertionWithContext += executionContext;
2587
- }
2363
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2588
2364
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2589
2365
  const metadata = this.afterTaskRunning(executor, true);
2590
2366
  if (output && opt?.keepRawResponse) {
@@ -2795,81 +2571,42 @@ ${errors}`);
2795
2571
  }
2796
2572
  throw new Error("evaluateJavaScript is not supported in current agent");
2797
2573
  }
2798
- async destroy() {
2799
- await this.page.destroy();
2800
- }
2801
- /**
2802
- * Analyze execution results from executor to generate meaningful descriptions
2803
- */
2804
- analyzeExecutionResults(executor, originalPrompt) {
2805
- const tasks = executor.tasks;
2806
- const success = !executor.isInErrorState();
2807
- if (!success) {
2808
- const errorTask = executor.latestErrorTask();
2809
- return {
2810
- success: false,
2811
- actionType: "error",
2812
- description: `Failed to execute: ${originalPrompt}`,
2813
- error: errorTask?.error
2814
- };
2815
- }
2816
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2817
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2818
- const lastAction = actionTasks[actionTasks.length - 1];
2819
- const lastLocate = locateTasks[locateTasks.length - 1];
2820
- if (!lastAction) {
2821
- return {
2822
- success: true,
2823
- actionType: "unknown",
2824
- description: `Completed: ${originalPrompt}`
2574
+ async logScreenshot(title, options) {
2575
+ const screenshotTitle = title || "untitled";
2576
+ const content = options?.content || "";
2577
+ const screenshot = await this.page.screenshotBase64?.();
2578
+ if (screenshot) {
2579
+ const executionDump = {
2580
+ name: screenshotTitle,
2581
+ description: content,
2582
+ tasks: [{
2583
+ type: "Screenshot",
2584
+ subType: "log",
2585
+ status: "finished",
2586
+ executor: null,
2587
+ param: {
2588
+ title: screenshotTitle,
2589
+ content
2590
+ },
2591
+ output: {
2592
+ screenshot
2593
+ },
2594
+ thought: `Logged screenshot: ${screenshotTitle}`,
2595
+ timing: {
2596
+ start: Date.now(),
2597
+ end: Date.now(),
2598
+ cost: 0
2599
+ }
2600
+ }],
2601
+ sdkVersion: "1.0.0",
2602
+ logTime: Date.now(),
2603
+ model_name: "screenshot"
2825
2604
  };
2605
+ this.appendExecutionDump(executionDump);
2826
2606
  }
2827
- const actionType = lastAction.subType || "unknown";
2828
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2829
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2830
- return {
2831
- success: true,
2832
- actionType,
2833
- description,
2834
- elementInfo,
2835
- timing: lastAction.timing?.cost
2836
- };
2837
2607
  }
2838
- /**
2839
- * Extract element information from locate task
2840
- */
2841
- extractElementInfo(locateTask, _actionTask) {
2842
- if (!locateTask?.output?.element)
2843
- return void 0;
2844
- const element = locateTask.output.element;
2845
- return {
2846
- type: element.attributes?.nodeType || "unknown",
2847
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2848
- location: `(${element.center[0]}, ${element.center[1]})`
2849
- };
2850
- }
2851
- /**
2852
- * Generate natural language description for actions
2853
- */
2854
- generateActionDescription(actionType, param, elementInfo) {
2855
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2856
- switch (actionType) {
2857
- case "Tap":
2858
- return `Clicked on ${elementDesc}`;
2859
- case "Input":
2860
- const inputValue = param?.value || "";
2861
- return `Entered "${inputValue}" into ${elementDesc}`;
2862
- case "KeyboardPress":
2863
- return `Pressed ${param?.value || "key"}`;
2864
- case "Scroll":
2865
- return `Scrolled ${param?.direction || "on page"}`;
2866
- case "Hover":
2867
- return `Hovered over ${elementDesc}`;
2868
- case "Drag":
2869
- return `Dragged ${elementDesc}`;
2870
- default:
2871
- return `Performed ${actionType} action on ${elementDesc}`;
2872
- }
2608
+ async destroy() {
2609
+ await this.page.destroy();
2873
2610
  }
2874
2611
  };
2875
2612