misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -78,7 +78,8 @@ var WebElementInfo = class {
78
78
  id,
79
79
  attributes,
80
80
  indexId,
81
- xpaths
81
+ xpaths,
82
+ isVisible
82
83
  }) {
83
84
  this.content = content;
84
85
  this.rect = rect;
@@ -91,6 +92,7 @@ var WebElementInfo = class {
91
92
  this.attributes = attributes;
92
93
  this.indexId = indexId;
93
94
  this.xpaths = xpaths;
95
+ this.isVisible = isVisible;
94
96
  }
95
97
  };
96
98
 
@@ -113,14 +115,15 @@ async function parseContextFromWebPage(page, _opt) {
113
115
  })
114
116
  ]);
115
117
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
116
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
118
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
117
119
  return new WebElementInfo({
118
120
  rect,
119
121
  locator,
120
122
  id,
121
123
  content,
122
124
  attributes,
123
- indexId
125
+ indexId,
126
+ isVisible
124
127
  });
125
128
  });
126
129
  (0, import_utils2.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -151,7 +154,7 @@ function printReportMsg(filepath) {
151
154
  }
152
155
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
153
156
  function replaceIllegalPathCharsAndSpace(str) {
154
- return str.replace(/[/\\:*?"<>| ]/g, "-");
157
+ return str.replace(/[:*?"<>| ]/g, "-");
155
158
  }
156
159
  function matchElementFromPlan(planLocateParam, tree) {
157
160
  if (!planLocateParam) {
@@ -193,10 +196,11 @@ var ScriptPlayer = class {
193
196
  this.unnamedResultIndex = 0;
194
197
  this.pageAgent = null;
195
198
  this.result = {};
199
+ const target = script.target || script.web || script.android;
196
200
  if (import_utils3.ifInBrowser) {
197
201
  this.output = void 0;
198
- } else if (script.target?.output) {
199
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
202
+ } else if (target?.output) {
203
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
200
204
  } else {
201
205
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
202
206
  }
@@ -270,15 +274,20 @@ var ScriptPlayer = class {
270
274
  } else if ("aiAssert" in flowItem) {
271
275
  const assertTask = flowItem;
272
276
  const prompt = assertTask.aiAssert;
277
+ const msg = assertTask.errorMessage;
273
278
  (0, import_utils3.assert)(prompt, "missing prompt for aiAssert");
274
279
  (0, import_utils3.assert)(
275
280
  typeof prompt === "string",
276
281
  "prompt for aiAssert must be a string"
277
282
  );
278
- await agent.aiAssert(prompt);
283
+ await agent.aiAssert(prompt, msg);
279
284
  } else if ("aiQuery" in flowItem) {
280
285
  const queryTask = flowItem;
281
286
  const prompt = queryTask.aiQuery;
287
+ const options = {
288
+ domIncluded: queryTask.domIncluded,
289
+ screenshotIncluded: queryTask.screenshotIncluded
290
+ };
282
291
  (0, import_utils3.assert)(prompt, "missing prompt for aiQuery");
283
292
  (0, import_utils3.assert)(
284
293
  typeof prompt === "string",
@@ -289,6 +298,10 @@ var ScriptPlayer = class {
289
298
  } else if ("aiNumber" in flowItem) {
290
299
  const numberTask = flowItem;
291
300
  const prompt = numberTask.aiNumber;
301
+ const options = {
302
+ domIncluded: numberTask.domIncluded,
303
+ screenshotIncluded: numberTask.screenshotIncluded
304
+ };
292
305
  (0, import_utils3.assert)(prompt, "missing prompt for number");
293
306
  (0, import_utils3.assert)(
294
307
  typeof prompt === "string",
@@ -299,6 +312,10 @@ var ScriptPlayer = class {
299
312
  } else if ("aiString" in flowItem) {
300
313
  const stringTask = flowItem;
301
314
  const prompt = stringTask.aiString;
315
+ const options = {
316
+ domIncluded: stringTask.domIncluded,
317
+ screenshotIncluded: stringTask.screenshotIncluded
318
+ };
302
319
  (0, import_utils3.assert)(prompt, "missing prompt for string");
303
320
  (0, import_utils3.assert)(
304
321
  typeof prompt === "string",
@@ -309,6 +326,10 @@ var ScriptPlayer = class {
309
326
  } else if ("aiBoolean" in flowItem) {
310
327
  const booleanTask = flowItem;
311
328
  const prompt = booleanTask.aiBoolean;
329
+ const options = {
330
+ domIncluded: booleanTask.domIncluded,
331
+ screenshotIncluded: booleanTask.screenshotIncluded
332
+ };
312
333
  (0, import_utils3.assert)(prompt, "missing prompt for boolean");
313
334
  (0, import_utils3.assert)(
314
335
  typeof prompt === "string",
@@ -351,6 +372,9 @@ var ScriptPlayer = class {
351
372
  } else if ("aiTap" in flowItem) {
352
373
  const tapTask = flowItem;
353
374
  await agent.aiTap(tapTask.aiTap, tapTask);
375
+ } else if ("aiRightClick" in flowItem) {
376
+ const rightClickTask = flowItem;
377
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
354
378
  } else if ("aiHover" in flowItem) {
355
379
  const hoverTask = flowItem;
356
380
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -373,6 +397,11 @@ var ScriptPlayer = class {
373
397
  evaluateJavaScriptTask.javascript
374
398
  );
375
399
  this.setResult(evaluateJavaScriptTask.name, result);
400
+ } else if ("logScreenshot" in flowItem) {
401
+ const logScreenshotTask = flowItem;
402
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
403
+ content: logScreenshotTask.content || ""
404
+ });
376
405
  } else {
377
406
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
378
407
  }
@@ -888,10 +917,10 @@ var PageTaskExecutor = class {
888
917
  if (!taskParam || !taskParam.value) {
889
918
  return;
890
919
  }
891
- await this.page.keyboard.type(taskParam.value);
892
- } else {
893
- await this.page.keyboard.type(taskParam.value);
894
920
  }
921
+ await this.page.keyboard.type(taskParam.value, {
922
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
923
+ });
895
924
  }
896
925
  };
897
926
  tasks.push(taskActionInput);
@@ -920,6 +949,22 @@ var PageTaskExecutor = class {
920
949
  }
921
950
  };
922
951
  tasks.push(taskActionTap);
952
+ } else if (plan2.type === "RightClick") {
953
+ const taskActionRightClick = {
954
+ type: "Action",
955
+ subType: "RightClick",
956
+ thought: plan2.thought,
957
+ locate: plan2.locate,
958
+ executor: async (param, { element }) => {
959
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
960
+ await this.page.mouse.click(
961
+ element.center[0],
962
+ element.center[1],
963
+ { button: "right" }
964
+ );
965
+ }
966
+ };
967
+ tasks.push(taskActionRightClick);
923
968
  } else if (plan2.type === "Drag") {
924
969
  const taskActionDrag = {
925
970
  type: "Action",
@@ -1448,7 +1493,7 @@ var PageTaskExecutor = class {
1448
1493
  executor: taskExecutor
1449
1494
  };
1450
1495
  }
1451
- async createTypeQueryTask(type, demand) {
1496
+ async createTypeQueryTask(type, demand, opt) {
1452
1497
  const taskExecutor = new import_misoai_core.Executor(
1453
1498
  taskTitleStr(
1454
1499
  type,
@@ -1479,7 +1524,10 @@ var PageTaskExecutor = class {
1479
1524
  result: `${type}, ${demand}`
1480
1525
  };
1481
1526
  }
1482
- const { data, usage } = await this.insight.extract(demandInput);
1527
+ const { data, usage } = await this.insight.extract(
1528
+ demandInput,
1529
+ opt
1530
+ );
1483
1531
  let outputResult = data;
1484
1532
  if (ifTypeRestricted) {
1485
1533
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1499,17 +1547,17 @@ var PageTaskExecutor = class {
1499
1547
  executor: taskExecutor
1500
1548
  };
1501
1549
  }
1502
- async query(demand) {
1503
- return this.createTypeQueryTask("Query", demand);
1550
+ async query(demand, opt) {
1551
+ return this.createTypeQueryTask("Query", demand, opt);
1504
1552
  }
1505
- async boolean(prompt) {
1506
- return this.createTypeQueryTask("Boolean", prompt);
1553
+ async boolean(prompt, opt) {
1554
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1507
1555
  }
1508
- async number(prompt) {
1509
- return this.createTypeQueryTask("Number", prompt);
1556
+ async number(prompt, opt) {
1557
+ return this.createTypeQueryTask("Number", prompt, opt);
1510
1558
  }
1511
- async string(prompt) {
1512
- return this.createTypeQueryTask("String", prompt);
1559
+ async string(prompt, opt) {
1560
+ return this.createTypeQueryTask("String", prompt, opt);
1513
1561
  }
1514
1562
  async assert(assertion) {
1515
1563
  const description = `assert: ${assertion}`;
@@ -1645,7 +1693,7 @@ function buildPlans(type, locateParam, param) {
1645
1693
  param: locateParam,
1646
1694
  thought: ""
1647
1695
  } : null;
1648
- if (type === "Tap" || type === "Hover") {
1696
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1649
1697
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1650
1698
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1651
1699
  const tapPlan = {
@@ -1725,7 +1773,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1725
1773
  var import_semver = __toESM(require("semver"));
1726
1774
 
1727
1775
  // package.json
1728
- var version = "1.0.5";
1776
+ var version = "1.0.3";
1729
1777
 
1730
1778
  // src/common/task-cache.ts
1731
1779
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1753,70 +1801,44 @@ var TaskCache = class {
1753
1801
  this.cache = cacheContent;
1754
1802
  this.cacheOriginalLength = this.cache.caches.length;
1755
1803
  }
1756
- matchCache(prompt, type, contextData) {
1757
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1804
+ matchCache(prompt, type) {
1758
1805
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1759
1806
  const item = this.cache.caches[i];
1760
1807
  const key = `${type}:${prompt}:${i}`;
1761
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1762
- continue;
1763
- }
1764
- if (type === "plan" && item.type === "plan") {
1765
- const planItem = item;
1766
- if (contextHash && planItem.contextHash) {
1767
- if (contextHash !== planItem.contextHash) {
1768
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1769
- continue;
1808
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1809
+ this.matchedCacheIndices.add(key);
1810
+ debug3(
1811
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1812
+ type,
1813
+ prompt,
1814
+ i
1815
+ );
1816
+ return {
1817
+ cacheContent: item,
1818
+ updateFn: (cb) => {
1819
+ debug3(
1820
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1821
+ type,
1822
+ prompt,
1823
+ i
1824
+ );
1825
+ cb(item);
1826
+ debug3(
1827
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1828
+ type,
1829
+ prompt,
1830
+ i
1831
+ );
1832
+ this.flushCacheToFile();
1770
1833
  }
1771
- } else if (contextHash || planItem.contextHash) {
1772
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1773
- continue;
1774
- }
1834
+ };
1775
1835
  }
1776
- this.matchedCacheIndices.add(key);
1777
- debug3(
1778
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1779
- type,
1780
- prompt,
1781
- i,
1782
- contextHash ? "yes" : "no-context"
1783
- );
1784
- return {
1785
- cacheContent: item,
1786
- updateFn: (cb) => {
1787
- debug3(
1788
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1789
- type,
1790
- prompt,
1791
- i
1792
- );
1793
- cb(item);
1794
- debug3(
1795
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1796
- type,
1797
- prompt,
1798
- i
1799
- );
1800
- this.flushCacheToFile();
1801
- }
1802
- };
1803
1836
  }
1804
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1837
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1805
1838
  return void 0;
1806
1839
  }
1807
- generateContextHash(contextData) {
1808
- const sortedKeys = Object.keys(contextData).sort();
1809
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1810
- let hash = 0;
1811
- for (let i = 0; i < stableString.length; i++) {
1812
- const char = stableString.charCodeAt(i);
1813
- hash = (hash << 5) - hash + char;
1814
- hash = hash & hash;
1815
- }
1816
- return hash.toString(36);
1817
- }
1818
- matchPlanCache(prompt, contextData) {
1819
- return this.matchCache(prompt, "plan", contextData);
1840
+ matchPlanCache(prompt) {
1841
+ return this.matchCache(prompt, "plan");
1820
1842
  }
1821
1843
  matchLocateCache(prompt) {
1822
1844
  return this.matchCache(prompt, "locate");
@@ -1882,8 +1904,14 @@ cache file: ${cacheFile}`
1882
1904
  return;
1883
1905
  }
1884
1906
  try {
1907
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1908
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1909
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1910
+ debug3("created cache directory: %s", dir);
1911
+ }
1885
1912
  const yamlData = import_js_yaml3.default.dump(this.cache);
1886
1913
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1914
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1887
1915
  } catch (err) {
1888
1916
  debug3(
1889
1917
  "write cache to file failed, path: %s, error: %s",
@@ -1892,16 +1920,11 @@ cache file: ${cacheFile}`
1892
1920
  );
1893
1921
  }
1894
1922
  }
1895
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1923
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1896
1924
  if (cachedRecord) {
1897
1925
  if (newRecord.type === "plan") {
1898
1926
  cachedRecord.updateFn((cache) => {
1899
- const planCache = cache;
1900
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1901
- if (contextData) {
1902
- planCache.contextHash = this.generateContextHash(contextData);
1903
- planCache.contextData = { ...contextData };
1904
- }
1927
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1905
1928
  });
1906
1929
  } else {
1907
1930
  cachedRecord.updateFn((cache) => {
@@ -1909,11 +1932,6 @@ cache file: ${cacheFile}`
1909
1932
  });
1910
1933
  }
1911
1934
  } else {
1912
- if (newRecord.type === "plan" && contextData) {
1913
- const planRecord = newRecord;
1914
- planRecord.contextHash = this.generateContextHash(contextData);
1915
- planRecord.contextData = { ...contextData };
1916
- }
1917
1935
  this.appendCache(newRecord);
1918
1936
  }
1919
1937
  }
@@ -1943,13 +1961,10 @@ var PageAgent = class {
1943
1961
  generateReport: true,
1944
1962
  autoPrintReportMsg: true,
1945
1963
  groupName: "Midscene Report",
1946
- groupDescription: "",
1947
- enableCumulativeContext: true,
1948
- autoClearContext: false
1964
+ groupDescription: ""
1949
1965
  },
1950
1966
  opts || {}
1951
1967
  );
1952
- this.initializeContextStore();
1953
1968
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1954
1969
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1955
1970
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1976,69 +1991,6 @@ var PageAgent = class {
1976
1991
  opts?.testId || this.page.pageType || "web"
1977
1992
  );
1978
1993
  }
1979
- /**
1980
- * Initialize context store for cumulative context functionality
1981
- */
1982
- async initializeContextStore() {
1983
- if (!this.opts.enableCumulativeContext) {
1984
- debug4("Cumulative context disabled via options");
1985
- return;
1986
- }
1987
- try {
1988
- const aiModel = await import("misoai-core/ai-model");
1989
- this.contextStore = aiModel.getContextStore();
1990
- debug4("Context store initialized successfully", {
1991
- autoClearContext: this.opts.autoClearContext,
1992
- testId: this.opts.testId
1993
- });
1994
- if (this.opts.autoClearContext) {
1995
- this.contextStore.clear();
1996
- debug4("Context store cleared due to autoClearContext option");
1997
- } else {
1998
- const existingData = this.contextStore.getAllData();
1999
- const existingSteps = this.contextStore.getRecentSteps(100).length;
2000
- debug4("Context store preserving existing data", {
2001
- existingDataKeys: Object.keys(existingData),
2002
- existingStepsCount: existingSteps
2003
- });
2004
- }
2005
- } catch (error) {
2006
- debug4("Failed to initialize context store:", error);
2007
- console.warn("⚠️ Could not initialize context store:", error);
2008
- }
2009
- }
2010
- /**
2011
- * Get the context store instance
2012
- */
2013
- getContextStore() {
2014
- return this.contextStore;
2015
- }
2016
- /**
2017
- * Clear the context store
2018
- */
2019
- clearContext() {
2020
- if (this.contextStore) {
2021
- this.contextStore.clear();
2022
- }
2023
- }
2024
- /**
2025
- * Get all stored data from context store
2026
- */
2027
- getStoredData() {
2028
- if (this.contextStore) {
2029
- return this.contextStore.getAllData();
2030
- }
2031
- return {};
2032
- }
2033
- /**
2034
- * Get step summary from context store
2035
- */
2036
- getStepSummary() {
2037
- if (this.contextStore) {
2038
- return this.contextStore.getStepSummary();
2039
- }
2040
- return "";
2041
- }
2042
1994
  async getUIContext(action) {
2043
1995
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
2044
1996
  return await parseContextFromWebPage(this.page, {
@@ -2217,6 +2169,23 @@ var PageAgent = class {
2217
2169
  metadata
2218
2170
  };
2219
2171
  }
2172
+ async aiRightClick(locatePrompt, opt) {
2173
+ const detailedLocateParam = this.buildDetailedLocateParam(
2174
+ locatePrompt,
2175
+ opt
2176
+ );
2177
+ const plans = buildPlans("RightClick", detailedLocateParam);
2178
+ const { executor, output } = await this.taskExecutor.runPlans(
2179
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2180
+ plans,
2181
+ { cacheable: opt?.cacheable }
2182
+ );
2183
+ const metadata = this.afterTaskRunning(executor);
2184
+ return {
2185
+ result: output,
2186
+ metadata
2187
+ };
2188
+ }
2220
2189
  async aiInput(value, locatePrompt, opt) {
2221
2190
  (0, import_utils12.assert)(
2222
2191
  typeof value === "string",
@@ -2274,35 +2243,9 @@ var PageAgent = class {
2274
2243
  };
2275
2244
  }
2276
2245
  async aiAction(taskPrompt, opt) {
2277
- const originalPrompt = taskPrompt;
2278
- let processedPrompt = taskPrompt;
2279
- if (this.opts.enableCumulativeContext && this.contextStore) {
2280
- try {
2281
- const storedData = this.contextStore.getAllData();
2282
- if (Object.keys(storedData).length > 0) {
2283
- debug4("Available data for aiAction:", {
2284
- prompt: taskPrompt,
2285
- availableData: storedData
2286
- });
2287
- }
2288
- } catch (error) {
2289
- debug4("Context store operation failed:", error);
2290
- }
2291
- }
2292
2246
  const cacheable = opt?.cacheable;
2293
2247
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2294
- let contextData;
2295
- if (this.opts.enableCumulativeContext && this.contextStore) {
2296
- try {
2297
- contextData = this.contextStore.getAllData();
2298
- if (contextData && Object.keys(contextData).length === 0) {
2299
- contextData = void 0;
2300
- }
2301
- } catch (error) {
2302
- debug4("Failed to get context data for cache:", error);
2303
- }
2304
- }
2305
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2248
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2306
2249
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2307
2250
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2308
2251
  taskPrompt,
@@ -2312,28 +2255,6 @@ var PageAgent = class {
2312
2255
  debug4("matched cache, will call .runYaml to run the action");
2313
2256
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2314
2257
  const result = await this.runYaml(yaml5);
2315
- if (this.opts.enableCumulativeContext && this.contextStore) {
2316
- try {
2317
- const executionResult = {
2318
- success: true,
2319
- actionType: "cached",
2320
- description: `Executed cached action: ${processedPrompt}`,
2321
- timing: result.metadata?.totalTime
2322
- };
2323
- this.contextStore.addStep({
2324
- type: "action",
2325
- summary: `Action: ${processedPrompt} (cached)`,
2326
- prompt: processedPrompt,
2327
- executionResult
2328
- });
2329
- debug4("Added cached action step to context store:", {
2330
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2331
- totalSteps: this.contextStore.getRecentSteps(100).length
2332
- });
2333
- } catch (error) {
2334
- debug4("Failed to add cached action step:", error);
2335
- }
2336
- }
2337
2258
  return {
2338
2259
  result: result.result,
2339
2260
  metadata: metadata2
@@ -2358,114 +2279,17 @@ var PageAgent = class {
2358
2279
  prompt: taskPrompt,
2359
2280
  yamlWorkflow: yamlFlowStr
2360
2281
  },
2361
- matchedCache,
2362
- contextData
2363
- // Pass context data for cache creation
2282
+ matchedCache
2364
2283
  );
2365
2284
  }
2366
2285
  const metadata = this.afterTaskRunning(executor);
2367
- if (this.opts.enableCumulativeContext && this.contextStore) {
2368
- try {
2369
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2370
- this.contextStore.addStep({
2371
- type: "action",
2372
- summary: `Action: ${processedPrompt}`,
2373
- prompt: processedPrompt,
2374
- executionResult
2375
- });
2376
- debug4("Added action step with execution result to context store:", {
2377
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2378
- totalSteps: this.contextStore.getRecentSteps(100).length,
2379
- executionResult
2380
- });
2381
- } catch (error) {
2382
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2383
- try {
2384
- this.contextStore.addStep({
2385
- type: "action",
2386
- summary: `Action: ${processedPrompt}`,
2387
- prompt: processedPrompt
2388
- });
2389
- } catch (stepError) {
2390
- debug4("Failed to add action step:", stepError);
2391
- }
2392
- }
2393
- }
2394
2286
  return {
2395
2287
  result: output,
2396
2288
  metadata
2397
2289
  };
2398
2290
  }
2399
2291
  async aiQuery(demand) {
2400
- let processedDemand = demand;
2401
- let storageKey;
2402
- try {
2403
- const aiModel = await import("misoai-core/ai-model");
2404
- const contextStore = aiModel.getContextStore();
2405
- if (typeof demand === "string") {
2406
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2407
- if (storageInstruction) {
2408
- storageKey = storageInstruction.key;
2409
- processedDemand = storageInstruction.cleanText;
2410
- contextStore._pendingAliases = storageInstruction.aliases;
2411
- } else {
2412
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2413
- if (storageMatch) {
2414
- storageKey = storageMatch[1];
2415
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2416
- }
2417
- }
2418
- }
2419
- } catch (error) {
2420
- debug4("Context store not available:", error);
2421
- }
2422
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2423
- if (this.opts.enableCumulativeContext && this.contextStore) {
2424
- if (storageKey && output) {
2425
- try {
2426
- const pendingAliases = this.contextStore._pendingAliases;
2427
- if (pendingAliases) {
2428
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2429
- delete this.contextStore._pendingAliases;
2430
- debug4("Stored query result with aliases:", {
2431
- key: storageKey,
2432
- value: output,
2433
- aliases: pendingAliases
2434
- });
2435
- } else {
2436
- this.contextStore.storeData(storageKey, output);
2437
- debug4("Stored query result:", {
2438
- key: storageKey,
2439
- value: output
2440
- });
2441
- }
2442
- this.contextStore.addStep({
2443
- type: "query",
2444
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2445
- data: output,
2446
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2447
- });
2448
- debug4("Added query step to context store:", {
2449
- storageKey,
2450
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2451
- totalSteps: this.contextStore.getRecentSteps(100).length
2452
- });
2453
- } catch (error) {
2454
- debug4("Failed to store query result:", error);
2455
- }
2456
- } else {
2457
- try {
2458
- this.contextStore.addStep({
2459
- type: "query",
2460
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2461
- data: output,
2462
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2463
- });
2464
- } catch (error) {
2465
- debug4("Failed to add query step:", error);
2466
- }
2467
- }
2468
- }
2292
+ const { output, executor } = await this.taskExecutor.query(demand);
2469
2293
  const metadata = this.afterTaskRunning(executor);
2470
2294
  return {
2471
2295
  result: output,
@@ -2575,48 +2399,6 @@ var PageAgent = class {
2575
2399
  };
2576
2400
  }
2577
2401
  async aiAssert(assertion, msg, opt) {
2578
- let executionContext = "";
2579
- if (this.opts.enableCumulativeContext && this.contextStore) {
2580
- try {
2581
- const recentSteps = this.contextStore.getRecentSteps(3);
2582
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2583
- const storedData = this.contextStore.getAllData();
2584
- if (stepsWithExecutionResults.length > 0) {
2585
- const recentActions = stepsWithExecutionResults.map((step) => {
2586
- const result = step.executionResult;
2587
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2588
- }).join("\n");
2589
- executionContext = `
2590
-
2591
- Recent actions performed:
2592
- ${recentActions}
2593
-
2594
- This context may help verify the assertion.`;
2595
- }
2596
- if (storedData && Object.keys(storedData).length > 0) {
2597
- executionContext += `
2598
-
2599
- Available data for reference:
2600
- ${JSON.stringify(storedData, null, 2)}
2601
-
2602
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2603
- debug4("Available data for aiAssert:", {
2604
- assertion,
2605
- availableData: storedData
2606
- });
2607
- }
2608
- this.contextStore.addStep({
2609
- type: "assertion",
2610
- summary: `Assertion: ${assertion}`,
2611
- prompt: assertion
2612
- });
2613
- debug4("Added assertion step to context store:", {
2614
- totalSteps: this.contextStore.getRecentSteps(100).length
2615
- });
2616
- } catch (error) {
2617
- debug4("Context store operation failed:", error);
2618
- }
2619
- }
2620
2402
  let currentUrl = "";
2621
2403
  if (this.page.url) {
2622
2404
  try {
@@ -2624,13 +2406,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2624
2406
  } catch (e) {
2625
2407
  }
2626
2408
  }
2627
- let assertionWithContext = assertion;
2628
- if (currentUrl) {
2629
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2630
- }
2631
- if (executionContext) {
2632
- assertionWithContext += executionContext;
2633
- }
2409
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2634
2410
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2635
2411
  const metadata = this.afterTaskRunning(executor, true);
2636
2412
  if (output && opt?.keepRawResponse) {
@@ -2841,81 +2617,42 @@ ${errors}`);
2841
2617
  }
2842
2618
  throw new Error("evaluateJavaScript is not supported in current agent");
2843
2619
  }
2844
- async destroy() {
2845
- await this.page.destroy();
2846
- }
2847
- /**
2848
- * Analyze execution results from executor to generate meaningful descriptions
2849
- */
2850
- analyzeExecutionResults(executor, originalPrompt) {
2851
- const tasks = executor.tasks;
2852
- const success = !executor.isInErrorState();
2853
- if (!success) {
2854
- const errorTask = executor.latestErrorTask();
2855
- return {
2856
- success: false,
2857
- actionType: "error",
2858
- description: `Failed to execute: ${originalPrompt}`,
2859
- error: errorTask?.error
2860
- };
2861
- }
2862
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2863
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2864
- const lastAction = actionTasks[actionTasks.length - 1];
2865
- const lastLocate = locateTasks[locateTasks.length - 1];
2866
- if (!lastAction) {
2867
- return {
2868
- success: true,
2869
- actionType: "unknown",
2870
- description: `Completed: ${originalPrompt}`
2620
+ async logScreenshot(title, options) {
2621
+ const screenshotTitle = title || "untitled";
2622
+ const content = options?.content || "";
2623
+ const screenshot = await this.page.screenshotBase64?.();
2624
+ if (screenshot) {
2625
+ const executionDump = {
2626
+ name: screenshotTitle,
2627
+ description: content,
2628
+ tasks: [{
2629
+ type: "Screenshot",
2630
+ subType: "log",
2631
+ status: "finished",
2632
+ executor: null,
2633
+ param: {
2634
+ title: screenshotTitle,
2635
+ content
2636
+ },
2637
+ output: {
2638
+ screenshot
2639
+ },
2640
+ thought: `Logged screenshot: ${screenshotTitle}`,
2641
+ timing: {
2642
+ start: Date.now(),
2643
+ end: Date.now(),
2644
+ cost: 0
2645
+ }
2646
+ }],
2647
+ sdkVersion: "1.0.0",
2648
+ logTime: Date.now(),
2649
+ model_name: "screenshot"
2871
2650
  };
2651
+ this.appendExecutionDump(executionDump);
2872
2652
  }
2873
- const actionType = lastAction.subType || "unknown";
2874
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2875
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2876
- return {
2877
- success: true,
2878
- actionType,
2879
- description,
2880
- elementInfo,
2881
- timing: lastAction.timing?.cost
2882
- };
2883
- }
2884
- /**
2885
- * Extract element information from locate task
2886
- */
2887
- extractElementInfo(locateTask, _actionTask) {
2888
- if (!locateTask?.output?.element)
2889
- return void 0;
2890
- const element = locateTask.output.element;
2891
- return {
2892
- type: element.attributes?.nodeType || "unknown",
2893
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2894
- location: `(${element.center[0]}, ${element.center[1]})`
2895
- };
2896
2653
  }
2897
- /**
2898
- * Generate natural language description for actions
2899
- */
2900
- generateActionDescription(actionType, param, elementInfo) {
2901
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2902
- switch (actionType) {
2903
- case "Tap":
2904
- return `Clicked on ${elementDesc}`;
2905
- case "Input":
2906
- const inputValue = param?.value || "";
2907
- return `Entered "${inputValue}" into ${elementDesc}`;
2908
- case "KeyboardPress":
2909
- return `Pressed ${param?.value || "key"}`;
2910
- case "Scroll":
2911
- return `Scrolled ${param?.direction || "on page"}`;
2912
- case "Hover":
2913
- return `Hovered over ${elementDesc}`;
2914
- case "Drag":
2915
- return `Dragged ${elementDesc}`;
2916
- default:
2917
- return `Performed ${actionType} action on ${elementDesc}`;
2918
- }
2654
+ async destroy() {
2655
+ await this.page.destroy();
2919
2656
  }
2920
2657
  };
2921
2658
 
@@ -3134,7 +2871,7 @@ function sleep2(ms) {
3134
2871
  var ChromeExtensionProxyPage = class {
3135
2872
  constructor(forceSameTabNavigation) {
3136
2873
  this.pageType = "chrome-extension-proxy";
3137
- this.version = "1.0.5";
2874
+ this.version = "1.0.3";
3138
2875
  this.activeTabId = null;
3139
2876
  this.tabIdOfDebuggerAttached = null;
3140
2877
  this.attachingDebugger = null;
@@ -3143,7 +2880,8 @@ var ChromeExtensionProxyPage = class {
3143
2880
  this.latestMouseX = 100;
3144
2881
  this.latestMouseY = 100;
3145
2882
  this.mouse = {
3146
- click: async (x, y) => {
2883
+ click: async (x, y, options) => {
2884
+ const { button = "left", count = 1 } = options || {};
3147
2885
  await this.mouse.move(x, y);
3148
2886
  if (this.isMobileEmulation === null) {
3149
2887
  const result = await this.sendCommandToDebugger("Runtime.evaluate", {
@@ -3154,7 +2892,7 @@ var ChromeExtensionProxyPage = class {
3154
2892
  });
3155
2893
  this.isMobileEmulation = result?.result?.value;
3156
2894
  }
3157
- if (this.isMobileEmulation) {
2895
+ if (this.isMobileEmulation && button === "left") {
3158
2896
  const touchPoints = [{ x: Math.round(x), y: Math.round(y) }];
3159
2897
  await this.sendCommandToDebugger("Input.dispatchTouchEvent", {
3160
2898
  type: "touchStart",
@@ -3171,15 +2909,15 @@ var ChromeExtensionProxyPage = class {
3171
2909
  type: "mousePressed",
3172
2910
  x,
3173
2911
  y,
3174
- button: "left",
3175
- clickCount: 1
2912
+ button,
2913
+ clickCount: count
3176
2914
  });
3177
2915
  await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
3178
2916
  type: "mouseReleased",
3179
2917
  x,
3180
2918
  y,
3181
- button: "left",
3182
- clickCount: 1
2919
+ button,
2920
+ clickCount: count
3183
2921
  });
3184
2922
  }
3185
2923
  },