misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -45,7 +45,8 @@ var WebElementInfo = class {
45
45
  id,
46
46
  attributes,
47
47
  indexId,
48
- xpaths
48
+ xpaths,
49
+ isVisible
49
50
  }) {
50
51
  this.content = content;
51
52
  this.rect = rect;
@@ -58,6 +59,7 @@ var WebElementInfo = class {
58
59
  this.attributes = attributes;
59
60
  this.indexId = indexId;
60
61
  this.xpaths = xpaths;
62
+ this.isVisible = isVisible;
61
63
  }
62
64
  };
63
65
 
@@ -80,14 +82,15 @@ async function parseContextFromWebPage(page, _opt) {
80
82
  })
81
83
  ]);
82
84
  const webTree = traverseTree(tree, (elementInfo) => {
83
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
85
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
84
86
  return new WebElementInfo({
85
87
  rect,
86
88
  locator,
87
89
  id,
88
90
  content,
89
91
  attributes,
90
- indexId
92
+ indexId,
93
+ isVisible
91
94
  });
92
95
  });
93
96
  assert(screenshotBase64, "screenshotBase64 is required");
@@ -118,7 +121,7 @@ function printReportMsg(filepath) {
118
121
  }
119
122
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
120
123
  function replaceIllegalPathCharsAndSpace(str) {
121
- return str.replace(/[/\\:*?"<>| ]/g, "-");
124
+ return str.replace(/[:*?"<>| ]/g, "-");
122
125
  }
123
126
  function matchElementFromPlan(planLocateParam, tree) {
124
127
  if (!planLocateParam) {
@@ -160,10 +163,11 @@ var ScriptPlayer = class {
160
163
  this.unnamedResultIndex = 0;
161
164
  this.pageAgent = null;
162
165
  this.result = {};
166
+ const target = script.target || script.web || script.android;
163
167
  if (ifInBrowser) {
164
168
  this.output = void 0;
165
- } else if (script.target?.output) {
166
- this.output = resolve(process.cwd(), script.target.output);
169
+ } else if (target?.output) {
170
+ this.output = resolve(process.cwd(), target.output);
167
171
  } else {
168
172
  this.output = join(getMidsceneRunSubDir("output"), `${process.pid}.json`);
169
173
  }
@@ -237,15 +241,20 @@ var ScriptPlayer = class {
237
241
  } else if ("aiAssert" in flowItem) {
238
242
  const assertTask = flowItem;
239
243
  const prompt = assertTask.aiAssert;
244
+ const msg = assertTask.errorMessage;
240
245
  assert2(prompt, "missing prompt for aiAssert");
241
246
  assert2(
242
247
  typeof prompt === "string",
243
248
  "prompt for aiAssert must be a string"
244
249
  );
245
- await agent.aiAssert(prompt);
250
+ await agent.aiAssert(prompt, msg);
246
251
  } else if ("aiQuery" in flowItem) {
247
252
  const queryTask = flowItem;
248
253
  const prompt = queryTask.aiQuery;
254
+ const options = {
255
+ domIncluded: queryTask.domIncluded,
256
+ screenshotIncluded: queryTask.screenshotIncluded
257
+ };
249
258
  assert2(prompt, "missing prompt for aiQuery");
250
259
  assert2(
251
260
  typeof prompt === "string",
@@ -256,6 +265,10 @@ var ScriptPlayer = class {
256
265
  } else if ("aiNumber" in flowItem) {
257
266
  const numberTask = flowItem;
258
267
  const prompt = numberTask.aiNumber;
268
+ const options = {
269
+ domIncluded: numberTask.domIncluded,
270
+ screenshotIncluded: numberTask.screenshotIncluded
271
+ };
259
272
  assert2(prompt, "missing prompt for number");
260
273
  assert2(
261
274
  typeof prompt === "string",
@@ -266,6 +279,10 @@ var ScriptPlayer = class {
266
279
  } else if ("aiString" in flowItem) {
267
280
  const stringTask = flowItem;
268
281
  const prompt = stringTask.aiString;
282
+ const options = {
283
+ domIncluded: stringTask.domIncluded,
284
+ screenshotIncluded: stringTask.screenshotIncluded
285
+ };
269
286
  assert2(prompt, "missing prompt for string");
270
287
  assert2(
271
288
  typeof prompt === "string",
@@ -276,6 +293,10 @@ var ScriptPlayer = class {
276
293
  } else if ("aiBoolean" in flowItem) {
277
294
  const booleanTask = flowItem;
278
295
  const prompt = booleanTask.aiBoolean;
296
+ const options = {
297
+ domIncluded: booleanTask.domIncluded,
298
+ screenshotIncluded: booleanTask.screenshotIncluded
299
+ };
279
300
  assert2(prompt, "missing prompt for boolean");
280
301
  assert2(
281
302
  typeof prompt === "string",
@@ -318,6 +339,9 @@ var ScriptPlayer = class {
318
339
  } else if ("aiTap" in flowItem) {
319
340
  const tapTask = flowItem;
320
341
  await agent.aiTap(tapTask.aiTap, tapTask);
342
+ } else if ("aiRightClick" in flowItem) {
343
+ const rightClickTask = flowItem;
344
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
321
345
  } else if ("aiHover" in flowItem) {
322
346
  const hoverTask = flowItem;
323
347
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -340,6 +364,11 @@ var ScriptPlayer = class {
340
364
  evaluateJavaScriptTask.javascript
341
365
  );
342
366
  this.setResult(evaluateJavaScriptTask.name, result);
367
+ } else if ("logScreenshot" in flowItem) {
368
+ const logScreenshotTask = flowItem;
369
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
370
+ content: logScreenshotTask.content || ""
371
+ });
343
372
  } else {
344
373
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
345
374
  }
@@ -870,10 +899,10 @@ var PageTaskExecutor = class {
870
899
  if (!taskParam || !taskParam.value) {
871
900
  return;
872
901
  }
873
- await this.page.keyboard.type(taskParam.value);
874
- } else {
875
- await this.page.keyboard.type(taskParam.value);
876
902
  }
903
+ await this.page.keyboard.type(taskParam.value, {
904
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
905
+ });
877
906
  }
878
907
  };
879
908
  tasks.push(taskActionInput);
@@ -902,6 +931,22 @@ var PageTaskExecutor = class {
902
931
  }
903
932
  };
904
933
  tasks.push(taskActionTap);
934
+ } else if (plan2.type === "RightClick") {
935
+ const taskActionRightClick = {
936
+ type: "Action",
937
+ subType: "RightClick",
938
+ thought: plan2.thought,
939
+ locate: plan2.locate,
940
+ executor: async (param, { element }) => {
941
+ assert4(element, "Element not found, cannot right click");
942
+ await this.page.mouse.click(
943
+ element.center[0],
944
+ element.center[1],
945
+ { button: "right" }
946
+ );
947
+ }
948
+ };
949
+ tasks.push(taskActionRightClick);
905
950
  } else if (plan2.type === "Drag") {
906
951
  const taskActionDrag = {
907
952
  type: "Action",
@@ -1430,7 +1475,7 @@ var PageTaskExecutor = class {
1430
1475
  executor: taskExecutor
1431
1476
  };
1432
1477
  }
1433
- async createTypeQueryTask(type, demand) {
1478
+ async createTypeQueryTask(type, demand, opt) {
1434
1479
  const taskExecutor = new Executor(
1435
1480
  taskTitleStr(
1436
1481
  type,
@@ -1461,7 +1506,10 @@ var PageTaskExecutor = class {
1461
1506
  result: `${type}, ${demand}`
1462
1507
  };
1463
1508
  }
1464
- const { data, usage } = await this.insight.extract(demandInput);
1509
+ const { data, usage } = await this.insight.extract(
1510
+ demandInput,
1511
+ opt
1512
+ );
1465
1513
  let outputResult = data;
1466
1514
  if (ifTypeRestricted) {
1467
1515
  assert4(data?.result !== void 0, "No result in query data");
@@ -1481,17 +1529,17 @@ var PageTaskExecutor = class {
1481
1529
  executor: taskExecutor
1482
1530
  };
1483
1531
  }
1484
- async query(demand) {
1485
- return this.createTypeQueryTask("Query", demand);
1532
+ async query(demand, opt) {
1533
+ return this.createTypeQueryTask("Query", demand, opt);
1486
1534
  }
1487
- async boolean(prompt) {
1488
- return this.createTypeQueryTask("Boolean", prompt);
1535
+ async boolean(prompt, opt) {
1536
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1489
1537
  }
1490
- async number(prompt) {
1491
- return this.createTypeQueryTask("Number", prompt);
1538
+ async number(prompt, opt) {
1539
+ return this.createTypeQueryTask("Number", prompt, opt);
1492
1540
  }
1493
- async string(prompt) {
1494
- return this.createTypeQueryTask("String", prompt);
1541
+ async string(prompt, opt) {
1542
+ return this.createTypeQueryTask("String", prompt, opt);
1495
1543
  }
1496
1544
  async assert(assertion) {
1497
1545
  const description = `assert: ${assertion}`;
@@ -1627,7 +1675,7 @@ function buildPlans(type, locateParam, param) {
1627
1675
  param: locateParam,
1628
1676
  thought: ""
1629
1677
  } : null;
1630
- if (type === "Tap" || type === "Hover") {
1678
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1631
1679
  assert5(locateParam, `missing locate info for action "${type}"`);
1632
1680
  assert5(locatePlan, `missing locate info for action "${type}"`);
1633
1681
  const tapPlan = {
@@ -1698,8 +1746,8 @@ function buildPlans(type, locateParam, param) {
1698
1746
 
1699
1747
  // src/common/task-cache.ts
1700
1748
  import assert6 from "assert";
1701
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1702
- import { join as join2 } from "path";
1749
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1750
+ import { dirname as dirname2, join as join2 } from "path";
1703
1751
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1704
1752
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1705
1753
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1707,7 +1755,7 @@ import yaml3 from "js-yaml";
1707
1755
  import semver from "semver";
1708
1756
 
1709
1757
  // package.json
1710
- var version = "1.0.5";
1758
+ var version = "1.0.3";
1711
1759
 
1712
1760
  // src/common/task-cache.ts
1713
1761
  var debug3 = getDebug3("cache");
@@ -1735,70 +1783,44 @@ var TaskCache = class {
1735
1783
  this.cache = cacheContent;
1736
1784
  this.cacheOriginalLength = this.cache.caches.length;
1737
1785
  }
1738
- matchCache(prompt, type, contextData) {
1739
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1786
+ matchCache(prompt, type) {
1740
1787
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1741
1788
  const item = this.cache.caches[i];
1742
1789
  const key = `${type}:${prompt}:${i}`;
1743
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1744
- continue;
1745
- }
1746
- if (type === "plan" && item.type === "plan") {
1747
- const planItem = item;
1748
- if (contextHash && planItem.contextHash) {
1749
- if (contextHash !== planItem.contextHash) {
1750
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1751
- continue;
1790
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1791
+ this.matchedCacheIndices.add(key);
1792
+ debug3(
1793
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1794
+ type,
1795
+ prompt,
1796
+ i
1797
+ );
1798
+ return {
1799
+ cacheContent: item,
1800
+ updateFn: (cb) => {
1801
+ debug3(
1802
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1803
+ type,
1804
+ prompt,
1805
+ i
1806
+ );
1807
+ cb(item);
1808
+ debug3(
1809
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1810
+ type,
1811
+ prompt,
1812
+ i
1813
+ );
1814
+ this.flushCacheToFile();
1752
1815
  }
1753
- } else if (contextHash || planItem.contextHash) {
1754
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1755
- continue;
1756
- }
1816
+ };
1757
1817
  }
1758
- this.matchedCacheIndices.add(key);
1759
- debug3(
1760
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1761
- type,
1762
- prompt,
1763
- i,
1764
- contextHash ? "yes" : "no-context"
1765
- );
1766
- return {
1767
- cacheContent: item,
1768
- updateFn: (cb) => {
1769
- debug3(
1770
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1771
- type,
1772
- prompt,
1773
- i
1774
- );
1775
- cb(item);
1776
- debug3(
1777
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1778
- type,
1779
- prompt,
1780
- i
1781
- );
1782
- this.flushCacheToFile();
1783
- }
1784
- };
1785
1818
  }
1786
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1819
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1787
1820
  return void 0;
1788
1821
  }
1789
- generateContextHash(contextData) {
1790
- const sortedKeys = Object.keys(contextData).sort();
1791
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1792
- let hash = 0;
1793
- for (let i = 0; i < stableString.length; i++) {
1794
- const char = stableString.charCodeAt(i);
1795
- hash = (hash << 5) - hash + char;
1796
- hash = hash & hash;
1797
- }
1798
- return hash.toString(36);
1799
- }
1800
- matchPlanCache(prompt, contextData) {
1801
- return this.matchCache(prompt, "plan", contextData);
1822
+ matchPlanCache(prompt) {
1823
+ return this.matchCache(prompt, "plan");
1802
1824
  }
1803
1825
  matchLocateCache(prompt) {
1804
1826
  return this.matchCache(prompt, "locate");
@@ -1864,8 +1886,14 @@ cache file: ${cacheFile}`
1864
1886
  return;
1865
1887
  }
1866
1888
  try {
1889
+ const dir = dirname2(this.cacheFilePath);
1890
+ if (!existsSync2(dir)) {
1891
+ mkdirSync2(dir, { recursive: true });
1892
+ debug3("created cache directory: %s", dir);
1893
+ }
1867
1894
  const yamlData = yaml3.dump(this.cache);
1868
1895
  writeFileSync2(this.cacheFilePath, yamlData);
1896
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1869
1897
  } catch (err) {
1870
1898
  debug3(
1871
1899
  "write cache to file failed, path: %s, error: %s",
@@ -1874,16 +1902,11 @@ cache file: ${cacheFile}`
1874
1902
  );
1875
1903
  }
1876
1904
  }
1877
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1905
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1878
1906
  if (cachedRecord) {
1879
1907
  if (newRecord.type === "plan") {
1880
1908
  cachedRecord.updateFn((cache) => {
1881
- const planCache = cache;
1882
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1883
- if (contextData) {
1884
- planCache.contextHash = this.generateContextHash(contextData);
1885
- planCache.contextData = { ...contextData };
1886
- }
1909
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1887
1910
  });
1888
1911
  } else {
1889
1912
  cachedRecord.updateFn((cache) => {
@@ -1891,11 +1914,6 @@ cache file: ${cacheFile}`
1891
1914
  });
1892
1915
  }
1893
1916
  } else {
1894
- if (newRecord.type === "plan" && contextData) {
1895
- const planRecord = newRecord;
1896
- planRecord.contextHash = this.generateContextHash(contextData);
1897
- planRecord.contextData = { ...contextData };
1898
- }
1899
1917
  this.appendCache(newRecord);
1900
1918
  }
1901
1919
  }
@@ -1925,13 +1943,10 @@ var PageAgent = class {
1925
1943
  generateReport: true,
1926
1944
  autoPrintReportMsg: true,
1927
1945
  groupName: "Midscene Report",
1928
- groupDescription: "",
1929
- enableCumulativeContext: true,
1930
- autoClearContext: false
1946
+ groupDescription: ""
1931
1947
  },
1932
1948
  opts || {}
1933
1949
  );
1934
- this.initializeContextStore();
1935
1950
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1936
1951
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1937
1952
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1958,69 +1973,6 @@ var PageAgent = class {
1958
1973
  opts?.testId || this.page.pageType || "web"
1959
1974
  );
1960
1975
  }
1961
- /**
1962
- * Initialize context store for cumulative context functionality
1963
- */
1964
- async initializeContextStore() {
1965
- if (!this.opts.enableCumulativeContext) {
1966
- debug4("Cumulative context disabled via options");
1967
- return;
1968
- }
1969
- try {
1970
- const aiModel = await import("misoai-core/ai-model");
1971
- this.contextStore = aiModel.getContextStore();
1972
- debug4("Context store initialized successfully", {
1973
- autoClearContext: this.opts.autoClearContext,
1974
- testId: this.opts.testId
1975
- });
1976
- if (this.opts.autoClearContext) {
1977
- this.contextStore.clear();
1978
- debug4("Context store cleared due to autoClearContext option");
1979
- } else {
1980
- const existingData = this.contextStore.getAllData();
1981
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1982
- debug4("Context store preserving existing data", {
1983
- existingDataKeys: Object.keys(existingData),
1984
- existingStepsCount: existingSteps
1985
- });
1986
- }
1987
- } catch (error) {
1988
- debug4("Failed to initialize context store:", error);
1989
- console.warn("⚠️ Could not initialize context store:", error);
1990
- }
1991
- }
1992
- /**
1993
- * Get the context store instance
1994
- */
1995
- getContextStore() {
1996
- return this.contextStore;
1997
- }
1998
- /**
1999
- * Clear the context store
2000
- */
2001
- clearContext() {
2002
- if (this.contextStore) {
2003
- this.contextStore.clear();
2004
- }
2005
- }
2006
- /**
2007
- * Get all stored data from context store
2008
- */
2009
- getStoredData() {
2010
- if (this.contextStore) {
2011
- return this.contextStore.getAllData();
2012
- }
2013
- return {};
2014
- }
2015
- /**
2016
- * Get step summary from context store
2017
- */
2018
- getStepSummary() {
2019
- if (this.contextStore) {
2020
- return this.contextStore.getStepSummary();
2021
- }
2022
- return "";
2023
- }
2024
1976
  async getUIContext(action) {
2025
1977
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
2026
1978
  return await parseContextFromWebPage(this.page, {
@@ -2199,6 +2151,23 @@ var PageAgent = class {
2199
2151
  metadata
2200
2152
  };
2201
2153
  }
2154
+ async aiRightClick(locatePrompt, opt) {
2155
+ const detailedLocateParam = this.buildDetailedLocateParam(
2156
+ locatePrompt,
2157
+ opt
2158
+ );
2159
+ const plans = buildPlans("RightClick", detailedLocateParam);
2160
+ const { executor, output } = await this.taskExecutor.runPlans(
2161
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2162
+ plans,
2163
+ { cacheable: opt?.cacheable }
2164
+ );
2165
+ const metadata = this.afterTaskRunning(executor);
2166
+ return {
2167
+ result: output,
2168
+ metadata
2169
+ };
2170
+ }
2202
2171
  async aiInput(value, locatePrompt, opt) {
2203
2172
  assert7(
2204
2173
  typeof value === "string",
@@ -2256,35 +2225,9 @@ var PageAgent = class {
2256
2225
  };
2257
2226
  }
2258
2227
  async aiAction(taskPrompt, opt) {
2259
- const originalPrompt = taskPrompt;
2260
- let processedPrompt = taskPrompt;
2261
- if (this.opts.enableCumulativeContext && this.contextStore) {
2262
- try {
2263
- const storedData = this.contextStore.getAllData();
2264
- if (Object.keys(storedData).length > 0) {
2265
- debug4("Available data for aiAction:", {
2266
- prompt: taskPrompt,
2267
- availableData: storedData
2268
- });
2269
- }
2270
- } catch (error) {
2271
- debug4("Context store operation failed:", error);
2272
- }
2273
- }
2274
2228
  const cacheable = opt?.cacheable;
2275
2229
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2276
- let contextData;
2277
- if (this.opts.enableCumulativeContext && this.contextStore) {
2278
- try {
2279
- contextData = this.contextStore.getAllData();
2280
- if (contextData && Object.keys(contextData).length === 0) {
2281
- contextData = void 0;
2282
- }
2283
- } catch (error) {
2284
- debug4("Failed to get context data for cache:", error);
2285
- }
2286
- }
2287
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2230
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2288
2231
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2289
2232
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2290
2233
  taskPrompt,
@@ -2294,28 +2237,6 @@ var PageAgent = class {
2294
2237
  debug4("matched cache, will call .runYaml to run the action");
2295
2238
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2296
2239
  const result = await this.runYaml(yaml5);
2297
- if (this.opts.enableCumulativeContext && this.contextStore) {
2298
- try {
2299
- const executionResult = {
2300
- success: true,
2301
- actionType: "cached",
2302
- description: `Executed cached action: ${processedPrompt}`,
2303
- timing: result.metadata?.totalTime
2304
- };
2305
- this.contextStore.addStep({
2306
- type: "action",
2307
- summary: `Action: ${processedPrompt} (cached)`,
2308
- prompt: processedPrompt,
2309
- executionResult
2310
- });
2311
- debug4("Added cached action step to context store:", {
2312
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2313
- totalSteps: this.contextStore.getRecentSteps(100).length
2314
- });
2315
- } catch (error) {
2316
- debug4("Failed to add cached action step:", error);
2317
- }
2318
- }
2319
2240
  return {
2320
2241
  result: result.result,
2321
2242
  metadata: metadata2
@@ -2340,114 +2261,17 @@ var PageAgent = class {
2340
2261
  prompt: taskPrompt,
2341
2262
  yamlWorkflow: yamlFlowStr
2342
2263
  },
2343
- matchedCache,
2344
- contextData
2345
- // Pass context data for cache creation
2264
+ matchedCache
2346
2265
  );
2347
2266
  }
2348
2267
  const metadata = this.afterTaskRunning(executor);
2349
- if (this.opts.enableCumulativeContext && this.contextStore) {
2350
- try {
2351
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2352
- this.contextStore.addStep({
2353
- type: "action",
2354
- summary: `Action: ${processedPrompt}`,
2355
- prompt: processedPrompt,
2356
- executionResult
2357
- });
2358
- debug4("Added action step with execution result to context store:", {
2359
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2360
- totalSteps: this.contextStore.getRecentSteps(100).length,
2361
- executionResult
2362
- });
2363
- } catch (error) {
2364
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2365
- try {
2366
- this.contextStore.addStep({
2367
- type: "action",
2368
- summary: `Action: ${processedPrompt}`,
2369
- prompt: processedPrompt
2370
- });
2371
- } catch (stepError) {
2372
- debug4("Failed to add action step:", stepError);
2373
- }
2374
- }
2375
- }
2376
2268
  return {
2377
2269
  result: output,
2378
2270
  metadata
2379
2271
  };
2380
2272
  }
2381
2273
  async aiQuery(demand) {
2382
- let processedDemand = demand;
2383
- let storageKey;
2384
- try {
2385
- const aiModel = await import("misoai-core/ai-model");
2386
- const contextStore = aiModel.getContextStore();
2387
- if (typeof demand === "string") {
2388
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2389
- if (storageInstruction) {
2390
- storageKey = storageInstruction.key;
2391
- processedDemand = storageInstruction.cleanText;
2392
- contextStore._pendingAliases = storageInstruction.aliases;
2393
- } else {
2394
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2395
- if (storageMatch) {
2396
- storageKey = storageMatch[1];
2397
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2398
- }
2399
- }
2400
- }
2401
- } catch (error) {
2402
- debug4("Context store not available:", error);
2403
- }
2404
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2405
- if (this.opts.enableCumulativeContext && this.contextStore) {
2406
- if (storageKey && output) {
2407
- try {
2408
- const pendingAliases = this.contextStore._pendingAliases;
2409
- if (pendingAliases) {
2410
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2411
- delete this.contextStore._pendingAliases;
2412
- debug4("Stored query result with aliases:", {
2413
- key: storageKey,
2414
- value: output,
2415
- aliases: pendingAliases
2416
- });
2417
- } else {
2418
- this.contextStore.storeData(storageKey, output);
2419
- debug4("Stored query result:", {
2420
- key: storageKey,
2421
- value: output
2422
- });
2423
- }
2424
- this.contextStore.addStep({
2425
- type: "query",
2426
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2427
- data: output,
2428
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2429
- });
2430
- debug4("Added query step to context store:", {
2431
- storageKey,
2432
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2433
- totalSteps: this.contextStore.getRecentSteps(100).length
2434
- });
2435
- } catch (error) {
2436
- debug4("Failed to store query result:", error);
2437
- }
2438
- } else {
2439
- try {
2440
- this.contextStore.addStep({
2441
- type: "query",
2442
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2443
- data: output,
2444
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2445
- });
2446
- } catch (error) {
2447
- debug4("Failed to add query step:", error);
2448
- }
2449
- }
2450
- }
2274
+ const { output, executor } = await this.taskExecutor.query(demand);
2451
2275
  const metadata = this.afterTaskRunning(executor);
2452
2276
  return {
2453
2277
  result: output,
@@ -2557,48 +2381,6 @@ var PageAgent = class {
2557
2381
  };
2558
2382
  }
2559
2383
  async aiAssert(assertion, msg, opt) {
2560
- let executionContext = "";
2561
- if (this.opts.enableCumulativeContext && this.contextStore) {
2562
- try {
2563
- const recentSteps = this.contextStore.getRecentSteps(3);
2564
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2565
- const storedData = this.contextStore.getAllData();
2566
- if (stepsWithExecutionResults.length > 0) {
2567
- const recentActions = stepsWithExecutionResults.map((step) => {
2568
- const result = step.executionResult;
2569
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2570
- }).join("\n");
2571
- executionContext = `
2572
-
2573
- Recent actions performed:
2574
- ${recentActions}
2575
-
2576
- This context may help verify the assertion.`;
2577
- }
2578
- if (storedData && Object.keys(storedData).length > 0) {
2579
- executionContext += `
2580
-
2581
- Available data for reference:
2582
- ${JSON.stringify(storedData, null, 2)}
2583
-
2584
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2585
- debug4("Available data for aiAssert:", {
2586
- assertion,
2587
- availableData: storedData
2588
- });
2589
- }
2590
- this.contextStore.addStep({
2591
- type: "assertion",
2592
- summary: `Assertion: ${assertion}`,
2593
- prompt: assertion
2594
- });
2595
- debug4("Added assertion step to context store:", {
2596
- totalSteps: this.contextStore.getRecentSteps(100).length
2597
- });
2598
- } catch (error) {
2599
- debug4("Context store operation failed:", error);
2600
- }
2601
- }
2602
2384
  let currentUrl = "";
2603
2385
  if (this.page.url) {
2604
2386
  try {
@@ -2606,13 +2388,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2606
2388
  } catch (e) {
2607
2389
  }
2608
2390
  }
2609
- let assertionWithContext = assertion;
2610
- if (currentUrl) {
2611
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2612
- }
2613
- if (executionContext) {
2614
- assertionWithContext += executionContext;
2615
- }
2391
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2616
2392
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2617
2393
  const metadata = this.afterTaskRunning(executor, true);
2618
2394
  if (output && opt?.keepRawResponse) {
@@ -2823,81 +2599,42 @@ ${errors}`);
2823
2599
  }
2824
2600
  throw new Error("evaluateJavaScript is not supported in current agent");
2825
2601
  }
2826
- async destroy() {
2827
- await this.page.destroy();
2828
- }
2829
- /**
2830
- * Analyze execution results from executor to generate meaningful descriptions
2831
- */
2832
- analyzeExecutionResults(executor, originalPrompt) {
2833
- const tasks = executor.tasks;
2834
- const success = !executor.isInErrorState();
2835
- if (!success) {
2836
- const errorTask = executor.latestErrorTask();
2837
- return {
2838
- success: false,
2839
- actionType: "error",
2840
- description: `Failed to execute: ${originalPrompt}`,
2841
- error: errorTask?.error
2842
- };
2843
- }
2844
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2845
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2846
- const lastAction = actionTasks[actionTasks.length - 1];
2847
- const lastLocate = locateTasks[locateTasks.length - 1];
2848
- if (!lastAction) {
2849
- return {
2850
- success: true,
2851
- actionType: "unknown",
2852
- description: `Completed: ${originalPrompt}`
2602
+ async logScreenshot(title, options) {
2603
+ const screenshotTitle = title || "untitled";
2604
+ const content = options?.content || "";
2605
+ const screenshot = await this.page.screenshotBase64?.();
2606
+ if (screenshot) {
2607
+ const executionDump = {
2608
+ name: screenshotTitle,
2609
+ description: content,
2610
+ tasks: [{
2611
+ type: "Screenshot",
2612
+ subType: "log",
2613
+ status: "finished",
2614
+ executor: null,
2615
+ param: {
2616
+ title: screenshotTitle,
2617
+ content
2618
+ },
2619
+ output: {
2620
+ screenshot
2621
+ },
2622
+ thought: `Logged screenshot: ${screenshotTitle}`,
2623
+ timing: {
2624
+ start: Date.now(),
2625
+ end: Date.now(),
2626
+ cost: 0
2627
+ }
2628
+ }],
2629
+ sdkVersion: "1.0.0",
2630
+ logTime: Date.now(),
2631
+ model_name: "screenshot"
2853
2632
  };
2633
+ this.appendExecutionDump(executionDump);
2854
2634
  }
2855
- const actionType = lastAction.subType || "unknown";
2856
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2857
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2858
- return {
2859
- success: true,
2860
- actionType,
2861
- description,
2862
- elementInfo,
2863
- timing: lastAction.timing?.cost
2864
- };
2865
- }
2866
- /**
2867
- * Extract element information from locate task
2868
- */
2869
- extractElementInfo(locateTask, _actionTask) {
2870
- if (!locateTask?.output?.element)
2871
- return void 0;
2872
- const element = locateTask.output.element;
2873
- return {
2874
- type: element.attributes?.nodeType || "unknown",
2875
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2876
- location: `(${element.center[0]}, ${element.center[1]})`
2877
- };
2878
2635
  }
2879
- /**
2880
- * Generate natural language description for actions
2881
- */
2882
- generateActionDescription(actionType, param, elementInfo) {
2883
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2884
- switch (actionType) {
2885
- case "Tap":
2886
- return `Clicked on ${elementDesc}`;
2887
- case "Input":
2888
- const inputValue = param?.value || "";
2889
- return `Entered "${inputValue}" into ${elementDesc}`;
2890
- case "KeyboardPress":
2891
- return `Pressed ${param?.value || "key"}`;
2892
- case "Scroll":
2893
- return `Scrolled ${param?.direction || "on page"}`;
2894
- case "Hover":
2895
- return `Hovered over ${elementDesc}`;
2896
- case "Drag":
2897
- return `Dragged ${elementDesc}`;
2898
- default:
2899
- return `Performed ${actionType} action on ${elementDesc}`;
2900
- }
2636
+ async destroy() {
2637
+ await this.page.destroy();
2901
2638
  }
2902
2639
  };
2903
2640
 
@@ -3118,7 +2855,7 @@ function sleep2(ms) {
3118
2855
  var ChromeExtensionProxyPage = class {
3119
2856
  constructor(forceSameTabNavigation) {
3120
2857
  this.pageType = "chrome-extension-proxy";
3121
- this.version = "1.0.5";
2858
+ this.version = "1.0.3";
3122
2859
  this.activeTabId = null;
3123
2860
  this.tabIdOfDebuggerAttached = null;
3124
2861
  this.attachingDebugger = null;
@@ -3127,7 +2864,8 @@ var ChromeExtensionProxyPage = class {
3127
2864
  this.latestMouseX = 100;
3128
2865
  this.latestMouseY = 100;
3129
2866
  this.mouse = {
3130
- click: async (x, y) => {
2867
+ click: async (x, y, options) => {
2868
+ const { button = "left", count = 1 } = options || {};
3131
2869
  await this.mouse.move(x, y);
3132
2870
  if (this.isMobileEmulation === null) {
3133
2871
  const result = await this.sendCommandToDebugger("Runtime.evaluate", {
@@ -3138,7 +2876,7 @@ var ChromeExtensionProxyPage = class {
3138
2876
  });
3139
2877
  this.isMobileEmulation = result?.result?.value;
3140
2878
  }
3141
- if (this.isMobileEmulation) {
2879
+ if (this.isMobileEmulation && button === "left") {
3142
2880
  const touchPoints = [{ x: Math.round(x), y: Math.round(y) }];
3143
2881
  await this.sendCommandToDebugger("Input.dispatchTouchEvent", {
3144
2882
  type: "touchStart",
@@ -3155,15 +2893,15 @@ var ChromeExtensionProxyPage = class {
3155
2893
  type: "mousePressed",
3156
2894
  x,
3157
2895
  y,
3158
- button: "left",
3159
- clickCount: 1
2896
+ button,
2897
+ clickCount: count
3160
2898
  });
3161
2899
  await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
3162
2900
  type: "mouseReleased",
3163
2901
  x,
3164
2902
  y,
3165
- button: "left",
3166
- clickCount: 1
2903
+ button,
2904
+ clickCount: count
3167
2905
  });
3168
2906
  }
3169
2907
  },