misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -41,7 +41,8 @@ var WebElementInfo = class {
41
41
  id,
42
42
  attributes,
43
43
  indexId,
44
- xpaths
44
+ xpaths,
45
+ isVisible
45
46
  }) {
46
47
  this.content = content;
47
48
  this.rect = rect;
@@ -54,6 +55,7 @@ var WebElementInfo = class {
54
55
  this.attributes = attributes;
55
56
  this.indexId = indexId;
56
57
  this.xpaths = xpaths;
58
+ this.isVisible = isVisible;
57
59
  }
58
60
  };
59
61
 
@@ -76,14 +78,15 @@ async function parseContextFromWebPage(page, _opt) {
76
78
  })
77
79
  ]);
78
80
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
79
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
81
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
80
82
  return new WebElementInfo({
81
83
  rect,
82
84
  locator,
83
85
  id,
84
86
  content,
85
87
  attributes,
86
- indexId
88
+ indexId,
89
+ isVisible
87
90
  });
88
91
  });
89
92
  (0, import_utils2.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -114,7 +117,7 @@ function printReportMsg(filepath) {
114
117
  }
115
118
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
116
119
  function replaceIllegalPathCharsAndSpace(str) {
117
- return str.replace(/[/\\:*?"<>| ]/g, "-");
120
+ return str.replace(/[:*?"<>| ]/g, "-");
118
121
  }
119
122
  function matchElementFromPlan(planLocateParam, tree) {
120
123
  if (!planLocateParam) {
@@ -156,10 +159,11 @@ var ScriptPlayer = class {
156
159
  this.unnamedResultIndex = 0;
157
160
  this.pageAgent = null;
158
161
  this.result = {};
162
+ const target = script.target || script.web || script.android;
159
163
  if (import_utils3.ifInBrowser) {
160
164
  this.output = void 0;
161
- } else if (script.target?.output) {
162
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
165
+ } else if (target?.output) {
166
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
163
167
  } else {
164
168
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
165
169
  }
@@ -233,15 +237,20 @@ var ScriptPlayer = class {
233
237
  } else if ("aiAssert" in flowItem) {
234
238
  const assertTask = flowItem;
235
239
  const prompt = assertTask.aiAssert;
240
+ const msg = assertTask.errorMessage;
236
241
  (0, import_utils3.assert)(prompt, "missing prompt for aiAssert");
237
242
  (0, import_utils3.assert)(
238
243
  typeof prompt === "string",
239
244
  "prompt for aiAssert must be a string"
240
245
  );
241
- await agent.aiAssert(prompt);
246
+ await agent.aiAssert(prompt, msg);
242
247
  } else if ("aiQuery" in flowItem) {
243
248
  const queryTask = flowItem;
244
249
  const prompt = queryTask.aiQuery;
250
+ const options = {
251
+ domIncluded: queryTask.domIncluded,
252
+ screenshotIncluded: queryTask.screenshotIncluded
253
+ };
245
254
  (0, import_utils3.assert)(prompt, "missing prompt for aiQuery");
246
255
  (0, import_utils3.assert)(
247
256
  typeof prompt === "string",
@@ -252,6 +261,10 @@ var ScriptPlayer = class {
252
261
  } else if ("aiNumber" in flowItem) {
253
262
  const numberTask = flowItem;
254
263
  const prompt = numberTask.aiNumber;
264
+ const options = {
265
+ domIncluded: numberTask.domIncluded,
266
+ screenshotIncluded: numberTask.screenshotIncluded
267
+ };
255
268
  (0, import_utils3.assert)(prompt, "missing prompt for number");
256
269
  (0, import_utils3.assert)(
257
270
  typeof prompt === "string",
@@ -262,6 +275,10 @@ var ScriptPlayer = class {
262
275
  } else if ("aiString" in flowItem) {
263
276
  const stringTask = flowItem;
264
277
  const prompt = stringTask.aiString;
278
+ const options = {
279
+ domIncluded: stringTask.domIncluded,
280
+ screenshotIncluded: stringTask.screenshotIncluded
281
+ };
265
282
  (0, import_utils3.assert)(prompt, "missing prompt for string");
266
283
  (0, import_utils3.assert)(
267
284
  typeof prompt === "string",
@@ -272,6 +289,10 @@ var ScriptPlayer = class {
272
289
  } else if ("aiBoolean" in flowItem) {
273
290
  const booleanTask = flowItem;
274
291
  const prompt = booleanTask.aiBoolean;
292
+ const options = {
293
+ domIncluded: booleanTask.domIncluded,
294
+ screenshotIncluded: booleanTask.screenshotIncluded
295
+ };
275
296
  (0, import_utils3.assert)(prompt, "missing prompt for boolean");
276
297
  (0, import_utils3.assert)(
277
298
  typeof prompt === "string",
@@ -314,6 +335,9 @@ var ScriptPlayer = class {
314
335
  } else if ("aiTap" in flowItem) {
315
336
  const tapTask = flowItem;
316
337
  await agent.aiTap(tapTask.aiTap, tapTask);
338
+ } else if ("aiRightClick" in flowItem) {
339
+ const rightClickTask = flowItem;
340
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
317
341
  } else if ("aiHover" in flowItem) {
318
342
  const hoverTask = flowItem;
319
343
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -336,6 +360,11 @@ var ScriptPlayer = class {
336
360
  evaluateJavaScriptTask.javascript
337
361
  );
338
362
  this.setResult(evaluateJavaScriptTask.name, result);
363
+ } else if ("logScreenshot" in flowItem) {
364
+ const logScreenshotTask = flowItem;
365
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
366
+ content: logScreenshotTask.content || ""
367
+ });
339
368
  } else {
340
369
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
341
370
  }
@@ -828,10 +857,10 @@ var PageTaskExecutor = class {
828
857
  if (!taskParam || !taskParam.value) {
829
858
  return;
830
859
  }
831
- await this.page.keyboard.type(taskParam.value);
832
- } else {
833
- await this.page.keyboard.type(taskParam.value);
834
860
  }
861
+ await this.page.keyboard.type(taskParam.value, {
862
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
863
+ });
835
864
  }
836
865
  };
837
866
  tasks.push(taskActionInput);
@@ -860,6 +889,22 @@ var PageTaskExecutor = class {
860
889
  }
861
890
  };
862
891
  tasks.push(taskActionTap);
892
+ } else if (plan2.type === "RightClick") {
893
+ const taskActionRightClick = {
894
+ type: "Action",
895
+ subType: "RightClick",
896
+ thought: plan2.thought,
897
+ locate: plan2.locate,
898
+ executor: async (param, { element }) => {
899
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
900
+ await this.page.mouse.click(
901
+ element.center[0],
902
+ element.center[1],
903
+ { button: "right" }
904
+ );
905
+ }
906
+ };
907
+ tasks.push(taskActionRightClick);
863
908
  } else if (plan2.type === "Drag") {
864
909
  const taskActionDrag = {
865
910
  type: "Action",
@@ -1388,7 +1433,7 @@ var PageTaskExecutor = class {
1388
1433
  executor: taskExecutor
1389
1434
  };
1390
1435
  }
1391
- async createTypeQueryTask(type, demand) {
1436
+ async createTypeQueryTask(type, demand, opt) {
1392
1437
  const taskExecutor = new import_misoai_core.Executor(
1393
1438
  taskTitleStr(
1394
1439
  type,
@@ -1419,7 +1464,10 @@ var PageTaskExecutor = class {
1419
1464
  result: `${type}, ${demand}`
1420
1465
  };
1421
1466
  }
1422
- const { data, usage } = await this.insight.extract(demandInput);
1467
+ const { data, usage } = await this.insight.extract(
1468
+ demandInput,
1469
+ opt
1470
+ );
1423
1471
  let outputResult = data;
1424
1472
  if (ifTypeRestricted) {
1425
1473
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1439,17 +1487,17 @@ var PageTaskExecutor = class {
1439
1487
  executor: taskExecutor
1440
1488
  };
1441
1489
  }
1442
- async query(demand) {
1443
- return this.createTypeQueryTask("Query", demand);
1490
+ async query(demand, opt) {
1491
+ return this.createTypeQueryTask("Query", demand, opt);
1444
1492
  }
1445
- async boolean(prompt) {
1446
- return this.createTypeQueryTask("Boolean", prompt);
1493
+ async boolean(prompt, opt) {
1494
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1447
1495
  }
1448
- async number(prompt) {
1449
- return this.createTypeQueryTask("Number", prompt);
1496
+ async number(prompt, opt) {
1497
+ return this.createTypeQueryTask("Number", prompt, opt);
1450
1498
  }
1451
- async string(prompt) {
1452
- return this.createTypeQueryTask("String", prompt);
1499
+ async string(prompt, opt) {
1500
+ return this.createTypeQueryTask("String", prompt, opt);
1453
1501
  }
1454
1502
  async assert(assertion) {
1455
1503
  const description = `assert: ${assertion}`;
@@ -1585,7 +1633,7 @@ function buildPlans(type, locateParam, param) {
1585
1633
  param: locateParam,
1586
1634
  thought: ""
1587
1635
  } : null;
1588
- if (type === "Tap" || type === "Hover") {
1636
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1589
1637
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1590
1638
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1591
1639
  const tapPlan = {
@@ -1665,7 +1713,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1665
1713
  var import_semver = __toESM(require("semver"));
1666
1714
 
1667
1715
  // package.json
1668
- var version = "1.0.5";
1716
+ var version = "1.0.3";
1669
1717
 
1670
1718
  // src/common/task-cache.ts
1671
1719
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1693,70 +1741,44 @@ var TaskCache = class {
1693
1741
  this.cache = cacheContent;
1694
1742
  this.cacheOriginalLength = this.cache.caches.length;
1695
1743
  }
1696
- matchCache(prompt, type, contextData) {
1697
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1744
+ matchCache(prompt, type) {
1698
1745
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1699
1746
  const item = this.cache.caches[i];
1700
1747
  const key = `${type}:${prompt}:${i}`;
1701
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1702
- continue;
1703
- }
1704
- if (type === "plan" && item.type === "plan") {
1705
- const planItem = item;
1706
- if (contextHash && planItem.contextHash) {
1707
- if (contextHash !== planItem.contextHash) {
1708
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1709
- continue;
1748
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1749
+ this.matchedCacheIndices.add(key);
1750
+ debug3(
1751
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1752
+ type,
1753
+ prompt,
1754
+ i
1755
+ );
1756
+ return {
1757
+ cacheContent: item,
1758
+ updateFn: (cb) => {
1759
+ debug3(
1760
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1761
+ type,
1762
+ prompt,
1763
+ i
1764
+ );
1765
+ cb(item);
1766
+ debug3(
1767
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1768
+ type,
1769
+ prompt,
1770
+ i
1771
+ );
1772
+ this.flushCacheToFile();
1710
1773
  }
1711
- } else if (contextHash || planItem.contextHash) {
1712
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1713
- continue;
1714
- }
1774
+ };
1715
1775
  }
1716
- this.matchedCacheIndices.add(key);
1717
- debug3(
1718
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1719
- type,
1720
- prompt,
1721
- i,
1722
- contextHash ? "yes" : "no-context"
1723
- );
1724
- return {
1725
- cacheContent: item,
1726
- updateFn: (cb) => {
1727
- debug3(
1728
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1729
- type,
1730
- prompt,
1731
- i
1732
- );
1733
- cb(item);
1734
- debug3(
1735
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1736
- type,
1737
- prompt,
1738
- i
1739
- );
1740
- this.flushCacheToFile();
1741
- }
1742
- };
1743
1776
  }
1744
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1777
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1745
1778
  return void 0;
1746
1779
  }
1747
- generateContextHash(contextData) {
1748
- const sortedKeys = Object.keys(contextData).sort();
1749
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1750
- let hash = 0;
1751
- for (let i = 0; i < stableString.length; i++) {
1752
- const char = stableString.charCodeAt(i);
1753
- hash = (hash << 5) - hash + char;
1754
- hash = hash & hash;
1755
- }
1756
- return hash.toString(36);
1757
- }
1758
- matchPlanCache(prompt, contextData) {
1759
- return this.matchCache(prompt, "plan", contextData);
1780
+ matchPlanCache(prompt) {
1781
+ return this.matchCache(prompt, "plan");
1760
1782
  }
1761
1783
  matchLocateCache(prompt) {
1762
1784
  return this.matchCache(prompt, "locate");
@@ -1822,8 +1844,14 @@ cache file: ${cacheFile}`
1822
1844
  return;
1823
1845
  }
1824
1846
  try {
1847
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1848
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1849
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1850
+ debug3("created cache directory: %s", dir);
1851
+ }
1825
1852
  const yamlData = import_js_yaml3.default.dump(this.cache);
1826
1853
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1854
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1827
1855
  } catch (err) {
1828
1856
  debug3(
1829
1857
  "write cache to file failed, path: %s, error: %s",
@@ -1832,16 +1860,11 @@ cache file: ${cacheFile}`
1832
1860
  );
1833
1861
  }
1834
1862
  }
1835
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1863
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1836
1864
  if (cachedRecord) {
1837
1865
  if (newRecord.type === "plan") {
1838
1866
  cachedRecord.updateFn((cache) => {
1839
- const planCache = cache;
1840
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1841
- if (contextData) {
1842
- planCache.contextHash = this.generateContextHash(contextData);
1843
- planCache.contextData = { ...contextData };
1844
- }
1867
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1845
1868
  });
1846
1869
  } else {
1847
1870
  cachedRecord.updateFn((cache) => {
@@ -1849,11 +1872,6 @@ cache file: ${cacheFile}`
1849
1872
  });
1850
1873
  }
1851
1874
  } else {
1852
- if (newRecord.type === "plan" && contextData) {
1853
- const planRecord = newRecord;
1854
- planRecord.contextHash = this.generateContextHash(contextData);
1855
- planRecord.contextData = { ...contextData };
1856
- }
1857
1875
  this.appendCache(newRecord);
1858
1876
  }
1859
1877
  }
@@ -1883,13 +1901,10 @@ var PageAgent = class {
1883
1901
  generateReport: true,
1884
1902
  autoPrintReportMsg: true,
1885
1903
  groupName: "Midscene Report",
1886
- groupDescription: "",
1887
- enableCumulativeContext: true,
1888
- autoClearContext: false
1904
+ groupDescription: ""
1889
1905
  },
1890
1906
  opts || {}
1891
1907
  );
1892
- this.initializeContextStore();
1893
1908
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1894
1909
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1895
1910
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1916,69 +1931,6 @@ var PageAgent = class {
1916
1931
  opts?.testId || this.page.pageType || "web"
1917
1932
  );
1918
1933
  }
1919
- /**
1920
- * Initialize context store for cumulative context functionality
1921
- */
1922
- async initializeContextStore() {
1923
- if (!this.opts.enableCumulativeContext) {
1924
- debug4("Cumulative context disabled via options");
1925
- return;
1926
- }
1927
- try {
1928
- const aiModel = await import("misoai-core/ai-model");
1929
- this.contextStore = aiModel.getContextStore();
1930
- debug4("Context store initialized successfully", {
1931
- autoClearContext: this.opts.autoClearContext,
1932
- testId: this.opts.testId
1933
- });
1934
- if (this.opts.autoClearContext) {
1935
- this.contextStore.clear();
1936
- debug4("Context store cleared due to autoClearContext option");
1937
- } else {
1938
- const existingData = this.contextStore.getAllData();
1939
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1940
- debug4("Context store preserving existing data", {
1941
- existingDataKeys: Object.keys(existingData),
1942
- existingStepsCount: existingSteps
1943
- });
1944
- }
1945
- } catch (error) {
1946
- debug4("Failed to initialize context store:", error);
1947
- console.warn("⚠️ Could not initialize context store:", error);
1948
- }
1949
- }
1950
- /**
1951
- * Get the context store instance
1952
- */
1953
- getContextStore() {
1954
- return this.contextStore;
1955
- }
1956
- /**
1957
- * Clear the context store
1958
- */
1959
- clearContext() {
1960
- if (this.contextStore) {
1961
- this.contextStore.clear();
1962
- }
1963
- }
1964
- /**
1965
- * Get all stored data from context store
1966
- */
1967
- getStoredData() {
1968
- if (this.contextStore) {
1969
- return this.contextStore.getAllData();
1970
- }
1971
- return {};
1972
- }
1973
- /**
1974
- * Get step summary from context store
1975
- */
1976
- getStepSummary() {
1977
- if (this.contextStore) {
1978
- return this.contextStore.getStepSummary();
1979
- }
1980
- return "";
1981
- }
1982
1934
  async getUIContext(action) {
1983
1935
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1984
1936
  return await parseContextFromWebPage(this.page, {
@@ -2157,6 +2109,23 @@ var PageAgent = class {
2157
2109
  metadata
2158
2110
  };
2159
2111
  }
2112
+ async aiRightClick(locatePrompt, opt) {
2113
+ const detailedLocateParam = this.buildDetailedLocateParam(
2114
+ locatePrompt,
2115
+ opt
2116
+ );
2117
+ const plans = buildPlans("RightClick", detailedLocateParam);
2118
+ const { executor, output } = await this.taskExecutor.runPlans(
2119
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2120
+ plans,
2121
+ { cacheable: opt?.cacheable }
2122
+ );
2123
+ const metadata = this.afterTaskRunning(executor);
2124
+ return {
2125
+ result: output,
2126
+ metadata
2127
+ };
2128
+ }
2160
2129
  async aiInput(value, locatePrompt, opt) {
2161
2130
  (0, import_utils12.assert)(
2162
2131
  typeof value === "string",
@@ -2214,35 +2183,9 @@ var PageAgent = class {
2214
2183
  };
2215
2184
  }
2216
2185
  async aiAction(taskPrompt, opt) {
2217
- const originalPrompt = taskPrompt;
2218
- let processedPrompt = taskPrompt;
2219
- if (this.opts.enableCumulativeContext && this.contextStore) {
2220
- try {
2221
- const storedData = this.contextStore.getAllData();
2222
- if (Object.keys(storedData).length > 0) {
2223
- debug4("Available data for aiAction:", {
2224
- prompt: taskPrompt,
2225
- availableData: storedData
2226
- });
2227
- }
2228
- } catch (error) {
2229
- debug4("Context store operation failed:", error);
2230
- }
2231
- }
2232
2186
  const cacheable = opt?.cacheable;
2233
2187
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2234
- let contextData;
2235
- if (this.opts.enableCumulativeContext && this.contextStore) {
2236
- try {
2237
- contextData = this.contextStore.getAllData();
2238
- if (contextData && Object.keys(contextData).length === 0) {
2239
- contextData = void 0;
2240
- }
2241
- } catch (error) {
2242
- debug4("Failed to get context data for cache:", error);
2243
- }
2244
- }
2245
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2188
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2246
2189
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2247
2190
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2248
2191
  taskPrompt,
@@ -2252,28 +2195,6 @@ var PageAgent = class {
2252
2195
  debug4("matched cache, will call .runYaml to run the action");
2253
2196
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2254
2197
  const result = await this.runYaml(yaml5);
2255
- if (this.opts.enableCumulativeContext && this.contextStore) {
2256
- try {
2257
- const executionResult = {
2258
- success: true,
2259
- actionType: "cached",
2260
- description: `Executed cached action: ${processedPrompt}`,
2261
- timing: result.metadata?.totalTime
2262
- };
2263
- this.contextStore.addStep({
2264
- type: "action",
2265
- summary: `Action: ${processedPrompt} (cached)`,
2266
- prompt: processedPrompt,
2267
- executionResult
2268
- });
2269
- debug4("Added cached action step to context store:", {
2270
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2271
- totalSteps: this.contextStore.getRecentSteps(100).length
2272
- });
2273
- } catch (error) {
2274
- debug4("Failed to add cached action step:", error);
2275
- }
2276
- }
2277
2198
  return {
2278
2199
  result: result.result,
2279
2200
  metadata: metadata2
@@ -2298,114 +2219,17 @@ var PageAgent = class {
2298
2219
  prompt: taskPrompt,
2299
2220
  yamlWorkflow: yamlFlowStr
2300
2221
  },
2301
- matchedCache,
2302
- contextData
2303
- // Pass context data for cache creation
2222
+ matchedCache
2304
2223
  );
2305
2224
  }
2306
2225
  const metadata = this.afterTaskRunning(executor);
2307
- if (this.opts.enableCumulativeContext && this.contextStore) {
2308
- try {
2309
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2310
- this.contextStore.addStep({
2311
- type: "action",
2312
- summary: `Action: ${processedPrompt}`,
2313
- prompt: processedPrompt,
2314
- executionResult
2315
- });
2316
- debug4("Added action step with execution result to context store:", {
2317
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2318
- totalSteps: this.contextStore.getRecentSteps(100).length,
2319
- executionResult
2320
- });
2321
- } catch (error) {
2322
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2323
- try {
2324
- this.contextStore.addStep({
2325
- type: "action",
2326
- summary: `Action: ${processedPrompt}`,
2327
- prompt: processedPrompt
2328
- });
2329
- } catch (stepError) {
2330
- debug4("Failed to add action step:", stepError);
2331
- }
2332
- }
2333
- }
2334
2226
  return {
2335
2227
  result: output,
2336
2228
  metadata
2337
2229
  };
2338
2230
  }
2339
2231
  async aiQuery(demand) {
2340
- let processedDemand = demand;
2341
- let storageKey;
2342
- try {
2343
- const aiModel = await import("misoai-core/ai-model");
2344
- const contextStore = aiModel.getContextStore();
2345
- if (typeof demand === "string") {
2346
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2347
- if (storageInstruction) {
2348
- storageKey = storageInstruction.key;
2349
- processedDemand = storageInstruction.cleanText;
2350
- contextStore._pendingAliases = storageInstruction.aliases;
2351
- } else {
2352
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2353
- if (storageMatch) {
2354
- storageKey = storageMatch[1];
2355
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2356
- }
2357
- }
2358
- }
2359
- } catch (error) {
2360
- debug4("Context store not available:", error);
2361
- }
2362
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2363
- if (this.opts.enableCumulativeContext && this.contextStore) {
2364
- if (storageKey && output) {
2365
- try {
2366
- const pendingAliases = this.contextStore._pendingAliases;
2367
- if (pendingAliases) {
2368
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2369
- delete this.contextStore._pendingAliases;
2370
- debug4("Stored query result with aliases:", {
2371
- key: storageKey,
2372
- value: output,
2373
- aliases: pendingAliases
2374
- });
2375
- } else {
2376
- this.contextStore.storeData(storageKey, output);
2377
- debug4("Stored query result:", {
2378
- key: storageKey,
2379
- value: output
2380
- });
2381
- }
2382
- this.contextStore.addStep({
2383
- type: "query",
2384
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2385
- data: output,
2386
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2387
- });
2388
- debug4("Added query step to context store:", {
2389
- storageKey,
2390
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2391
- totalSteps: this.contextStore.getRecentSteps(100).length
2392
- });
2393
- } catch (error) {
2394
- debug4("Failed to store query result:", error);
2395
- }
2396
- } else {
2397
- try {
2398
- this.contextStore.addStep({
2399
- type: "query",
2400
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2401
- data: output,
2402
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2403
- });
2404
- } catch (error) {
2405
- debug4("Failed to add query step:", error);
2406
- }
2407
- }
2408
- }
2232
+ const { output, executor } = await this.taskExecutor.query(demand);
2409
2233
  const metadata = this.afterTaskRunning(executor);
2410
2234
  return {
2411
2235
  result: output,
@@ -2515,48 +2339,6 @@ var PageAgent = class {
2515
2339
  };
2516
2340
  }
2517
2341
  async aiAssert(assertion, msg, opt) {
2518
- let executionContext = "";
2519
- if (this.opts.enableCumulativeContext && this.contextStore) {
2520
- try {
2521
- const recentSteps = this.contextStore.getRecentSteps(3);
2522
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2523
- const storedData = this.contextStore.getAllData();
2524
- if (stepsWithExecutionResults.length > 0) {
2525
- const recentActions = stepsWithExecutionResults.map((step) => {
2526
- const result = step.executionResult;
2527
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2528
- }).join("\n");
2529
- executionContext = `
2530
-
2531
- Recent actions performed:
2532
- ${recentActions}
2533
-
2534
- This context may help verify the assertion.`;
2535
- }
2536
- if (storedData && Object.keys(storedData).length > 0) {
2537
- executionContext += `
2538
-
2539
- Available data for reference:
2540
- ${JSON.stringify(storedData, null, 2)}
2541
-
2542
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2543
- debug4("Available data for aiAssert:", {
2544
- assertion,
2545
- availableData: storedData
2546
- });
2547
- }
2548
- this.contextStore.addStep({
2549
- type: "assertion",
2550
- summary: `Assertion: ${assertion}`,
2551
- prompt: assertion
2552
- });
2553
- debug4("Added assertion step to context store:", {
2554
- totalSteps: this.contextStore.getRecentSteps(100).length
2555
- });
2556
- } catch (error) {
2557
- debug4("Context store operation failed:", error);
2558
- }
2559
- }
2560
2342
  let currentUrl = "";
2561
2343
  if (this.page.url) {
2562
2344
  try {
@@ -2564,13 +2346,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2564
2346
  } catch (e) {
2565
2347
  }
2566
2348
  }
2567
- let assertionWithContext = assertion;
2568
- if (currentUrl) {
2569
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2570
- }
2571
- if (executionContext) {
2572
- assertionWithContext += executionContext;
2573
- }
2349
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2574
2350
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2575
2351
  const metadata = this.afterTaskRunning(executor, true);
2576
2352
  if (output && opt?.keepRawResponse) {
@@ -2781,81 +2557,42 @@ ${errors}`);
2781
2557
  }
2782
2558
  throw new Error("evaluateJavaScript is not supported in current agent");
2783
2559
  }
2784
- async destroy() {
2785
- await this.page.destroy();
2786
- }
2787
- /**
2788
- * Analyze execution results from executor to generate meaningful descriptions
2789
- */
2790
- analyzeExecutionResults(executor, originalPrompt) {
2791
- const tasks = executor.tasks;
2792
- const success = !executor.isInErrorState();
2793
- if (!success) {
2794
- const errorTask = executor.latestErrorTask();
2795
- return {
2796
- success: false,
2797
- actionType: "error",
2798
- description: `Failed to execute: ${originalPrompt}`,
2799
- error: errorTask?.error
2800
- };
2801
- }
2802
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2803
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2804
- const lastAction = actionTasks[actionTasks.length - 1];
2805
- const lastLocate = locateTasks[locateTasks.length - 1];
2806
- if (!lastAction) {
2807
- return {
2808
- success: true,
2809
- actionType: "unknown",
2810
- description: `Completed: ${originalPrompt}`
2560
+ async logScreenshot(title, options) {
2561
+ const screenshotTitle = title || "untitled";
2562
+ const content = options?.content || "";
2563
+ const screenshot = await this.page.screenshotBase64?.();
2564
+ if (screenshot) {
2565
+ const executionDump = {
2566
+ name: screenshotTitle,
2567
+ description: content,
2568
+ tasks: [{
2569
+ type: "Screenshot",
2570
+ subType: "log",
2571
+ status: "finished",
2572
+ executor: null,
2573
+ param: {
2574
+ title: screenshotTitle,
2575
+ content
2576
+ },
2577
+ output: {
2578
+ screenshot
2579
+ },
2580
+ thought: `Logged screenshot: ${screenshotTitle}`,
2581
+ timing: {
2582
+ start: Date.now(),
2583
+ end: Date.now(),
2584
+ cost: 0
2585
+ }
2586
+ }],
2587
+ sdkVersion: "1.0.0",
2588
+ logTime: Date.now(),
2589
+ model_name: "screenshot"
2811
2590
  };
2591
+ this.appendExecutionDump(executionDump);
2812
2592
  }
2813
- const actionType = lastAction.subType || "unknown";
2814
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2815
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2816
- return {
2817
- success: true,
2818
- actionType,
2819
- description,
2820
- elementInfo,
2821
- timing: lastAction.timing?.cost
2822
- };
2823
2593
  }
2824
- /**
2825
- * Extract element information from locate task
2826
- */
2827
- extractElementInfo(locateTask, _actionTask) {
2828
- if (!locateTask?.output?.element)
2829
- return void 0;
2830
- const element = locateTask.output.element;
2831
- return {
2832
- type: element.attributes?.nodeType || "unknown",
2833
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2834
- location: `(${element.center[0]}, ${element.center[1]})`
2835
- };
2836
- }
2837
- /**
2838
- * Generate natural language description for actions
2839
- */
2840
- generateActionDescription(actionType, param, elementInfo) {
2841
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2842
- switch (actionType) {
2843
- case "Tap":
2844
- return `Clicked on ${elementDesc}`;
2845
- case "Input":
2846
- const inputValue = param?.value || "";
2847
- return `Entered "${inputValue}" into ${elementDesc}`;
2848
- case "KeyboardPress":
2849
- return `Pressed ${param?.value || "key"}`;
2850
- case "Scroll":
2851
- return `Scrolled ${param?.direction || "on page"}`;
2852
- case "Hover":
2853
- return `Hovered over ${elementDesc}`;
2854
- case "Drag":
2855
- return `Dragged ${elementDesc}`;
2856
- default:
2857
- return `Performed ${actionType} action on ${elementDesc}`;
2858
- }
2594
+ async destroy() {
2595
+ await this.page.destroy();
2859
2596
  }
2860
2597
  };
2861
2598