misoai-web 1.0.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -349
  2. package/dist/es/agent.js +165 -428
  3. package/dist/es/agent.js.map +1 -1
  4. package/dist/es/bridge-mode-browser.js +10 -9
  5. package/dist/es/bridge-mode-browser.js.map +1 -1
  6. package/dist/es/bridge-mode.js +167 -430
  7. package/dist/es/bridge-mode.js.map +1 -1
  8. package/dist/es/chrome-extension.js +173 -435
  9. package/dist/es/chrome-extension.js.map +1 -1
  10. package/dist/es/index.js +185 -432
  11. package/dist/es/index.js.map +1 -1
  12. package/dist/es/midscene-playground.js +165 -428
  13. package/dist/es/midscene-playground.js.map +1 -1
  14. package/dist/es/midscene-server.js.map +1 -1
  15. package/dist/es/playground.js +165 -428
  16. package/dist/es/playground.js.map +1 -1
  17. package/dist/es/playwright-report.js +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +182 -429
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +169 -432
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +169 -432
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js +7 -4
  27. package/dist/es/utils.js.map +1 -1
  28. package/dist/es/yaml.js +29 -3
  29. package/dist/es/yaml.js.map +1 -1
  30. package/dist/lib/agent.js +163 -426
  31. package/dist/lib/agent.js.map +1 -1
  32. package/dist/lib/bridge-mode-browser.js +10 -9
  33. package/dist/lib/bridge-mode-browser.js.map +1 -1
  34. package/dist/lib/bridge-mode.js +165 -428
  35. package/dist/lib/bridge-mode.js.map +1 -1
  36. package/dist/lib/chrome-extension.js +171 -433
  37. package/dist/lib/chrome-extension.js.map +1 -1
  38. package/dist/lib/index.js +183 -430
  39. package/dist/lib/index.js.map +1 -1
  40. package/dist/lib/midscene-playground.js +163 -426
  41. package/dist/lib/midscene-playground.js.map +1 -1
  42. package/dist/lib/midscene-server.js.map +1 -1
  43. package/dist/lib/playground.js +163 -426
  44. package/dist/lib/playground.js.map +1 -1
  45. package/dist/lib/playwright-report.js +1 -1
  46. package/dist/lib/playwright-report.js.map +1 -1
  47. package/dist/lib/playwright.js +180 -427
  48. package/dist/lib/playwright.js.map +1 -1
  49. package/dist/lib/puppeteer-agent-launcher.js +167 -430
  50. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  51. package/dist/lib/puppeteer.js +167 -430
  52. package/dist/lib/puppeteer.js.map +1 -1
  53. package/dist/lib/ui-utils.js.map +1 -1
  54. package/dist/lib/utils.js +7 -4
  55. package/dist/lib/utils.js.map +1 -1
  56. package/dist/lib/yaml.js +29 -3
  57. package/dist/lib/yaml.js.map +1 -1
  58. package/dist/types/agent.d.ts +13 -51
  59. package/dist/types/bridge-mode-browser.d.ts +2 -3
  60. package/dist/types/bridge-mode.d.ts +2 -3
  61. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  62. package/dist/types/chrome-extension.d.ts +2 -3
  63. package/dist/types/index.d.ts +1 -2
  64. package/dist/types/midscene-server.d.ts +1 -2
  65. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  66. package/dist/types/playground.d.ts +2 -3
  67. package/dist/types/playwright.d.ts +9 -2
  68. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  69. package/dist/types/puppeteer.d.ts +6 -5
  70. package/dist/types/ui-utils.d.ts +1 -1
  71. package/dist/types/utils.d.ts +1 -2
  72. package/dist/types/yaml.d.ts +1 -2
  73. package/iife-script/htmlElement.js +53 -75
  74. package/iife-script/htmlElementDebug.js +35 -56
  75. package/package.json +24 -24
  76. package/LICENSE +0 -21
@@ -56,10 +56,11 @@ var ScriptPlayer = class {
56
56
  this.unnamedResultIndex = 0;
57
57
  this.pageAgent = null;
58
58
  this.result = {};
59
+ const target = script.target || script.web || script.android;
59
60
  if (import_utils.ifInBrowser) {
60
61
  this.output = void 0;
61
- } else if (script.target?.output) {
62
- this.output = (0, import_node_path.resolve)(process.cwd(), script.target.output);
62
+ } else if (target?.output) {
63
+ this.output = (0, import_node_path.resolve)(process.cwd(), target.output);
63
64
  } else {
64
65
  this.output = (0, import_node_path.join)((0, import_common.getMidsceneRunSubDir)("output"), `${process.pid}.json`);
65
66
  }
@@ -133,15 +134,20 @@ var ScriptPlayer = class {
133
134
  } else if ("aiAssert" in flowItem) {
134
135
  const assertTask = flowItem;
135
136
  const prompt = assertTask.aiAssert;
137
+ const msg = assertTask.errorMessage;
136
138
  (0, import_utils.assert)(prompt, "missing prompt for aiAssert");
137
139
  (0, import_utils.assert)(
138
140
  typeof prompt === "string",
139
141
  "prompt for aiAssert must be a string"
140
142
  );
141
- await agent.aiAssert(prompt);
143
+ await agent.aiAssert(prompt, msg);
142
144
  } else if ("aiQuery" in flowItem) {
143
145
  const queryTask = flowItem;
144
146
  const prompt = queryTask.aiQuery;
147
+ const options = {
148
+ domIncluded: queryTask.domIncluded,
149
+ screenshotIncluded: queryTask.screenshotIncluded
150
+ };
145
151
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
146
152
  (0, import_utils.assert)(
147
153
  typeof prompt === "string",
@@ -152,6 +158,10 @@ var ScriptPlayer = class {
152
158
  } else if ("aiNumber" in flowItem) {
153
159
  const numberTask = flowItem;
154
160
  const prompt = numberTask.aiNumber;
161
+ const options = {
162
+ domIncluded: numberTask.domIncluded,
163
+ screenshotIncluded: numberTask.screenshotIncluded
164
+ };
155
165
  (0, import_utils.assert)(prompt, "missing prompt for number");
156
166
  (0, import_utils.assert)(
157
167
  typeof prompt === "string",
@@ -162,6 +172,10 @@ var ScriptPlayer = class {
162
172
  } else if ("aiString" in flowItem) {
163
173
  const stringTask = flowItem;
164
174
  const prompt = stringTask.aiString;
175
+ const options = {
176
+ domIncluded: stringTask.domIncluded,
177
+ screenshotIncluded: stringTask.screenshotIncluded
178
+ };
165
179
  (0, import_utils.assert)(prompt, "missing prompt for string");
166
180
  (0, import_utils.assert)(
167
181
  typeof prompt === "string",
@@ -172,6 +186,10 @@ var ScriptPlayer = class {
172
186
  } else if ("aiBoolean" in flowItem) {
173
187
  const booleanTask = flowItem;
174
188
  const prompt = booleanTask.aiBoolean;
189
+ const options = {
190
+ domIncluded: booleanTask.domIncluded,
191
+ screenshotIncluded: booleanTask.screenshotIncluded
192
+ };
175
193
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
176
194
  (0, import_utils.assert)(
177
195
  typeof prompt === "string",
@@ -214,6 +232,9 @@ var ScriptPlayer = class {
214
232
  } else if ("aiTap" in flowItem) {
215
233
  const tapTask = flowItem;
216
234
  await agent.aiTap(tapTask.aiTap, tapTask);
235
+ } else if ("aiRightClick" in flowItem) {
236
+ const rightClickTask = flowItem;
237
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
217
238
  } else if ("aiHover" in flowItem) {
218
239
  const hoverTask = flowItem;
219
240
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -236,6 +257,11 @@ var ScriptPlayer = class {
236
257
  evaluateJavaScriptTask.javascript
237
258
  );
238
259
  this.setResult(evaluateJavaScriptTask.name, result);
260
+ } else if ("logScreenshot" in flowItem) {
261
+ const logScreenshotTask = flowItem;
262
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
263
+ content: logScreenshotTask.content || ""
264
+ });
239
265
  } else {
240
266
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
241
267
  }
@@ -484,7 +510,8 @@ var WebElementInfo = class {
484
510
  id,
485
511
  attributes,
486
512
  indexId,
487
- xpaths
513
+ xpaths,
514
+ isVisible
488
515
  }) {
489
516
  this.content = content;
490
517
  this.rect = rect;
@@ -497,6 +524,7 @@ var WebElementInfo = class {
497
524
  this.attributes = attributes;
498
525
  this.indexId = indexId;
499
526
  this.xpaths = xpaths;
527
+ this.isVisible = isVisible;
500
528
  }
501
529
  };
502
530
 
@@ -519,14 +547,15 @@ async function parseContextFromWebPage(page, _opt) {
519
547
  })
520
548
  ]);
521
549
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
522
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
550
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
523
551
  return new WebElementInfo({
524
552
  rect,
525
553
  locator,
526
554
  id,
527
555
  content,
528
556
  attributes,
529
- indexId
557
+ indexId,
558
+ isVisible
530
559
  });
531
560
  });
532
561
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -556,7 +585,7 @@ function printReportMsg(filepath) {
556
585
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
557
586
  }
558
587
  function replaceIllegalPathCharsAndSpace(str) {
559
- return str.replace(/[/\\:*?"<>| ]/g, "-");
588
+ return str.replace(/[:*?"<>| ]/g, "-");
560
589
  }
561
590
  function matchElementFromPlan(planLocateParam, tree) {
562
591
  if (!planLocateParam) {
@@ -842,10 +871,10 @@ var PageTaskExecutor = class {
842
871
  if (!taskParam || !taskParam.value) {
843
872
  return;
844
873
  }
845
- await this.page.keyboard.type(taskParam.value);
846
- } else {
847
- await this.page.keyboard.type(taskParam.value);
848
874
  }
875
+ await this.page.keyboard.type(taskParam.value, {
876
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
877
+ });
849
878
  }
850
879
  };
851
880
  tasks.push(taskActionInput);
@@ -874,6 +903,22 @@ var PageTaskExecutor = class {
874
903
  }
875
904
  };
876
905
  tasks.push(taskActionTap);
906
+ } else if (plan2.type === "RightClick") {
907
+ const taskActionRightClick = {
908
+ type: "Action",
909
+ subType: "RightClick",
910
+ thought: plan2.thought,
911
+ locate: plan2.locate,
912
+ executor: async (param, { element }) => {
913
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
914
+ await this.page.mouse.click(
915
+ element.center[0],
916
+ element.center[1],
917
+ { button: "right" }
918
+ );
919
+ }
920
+ };
921
+ tasks.push(taskActionRightClick);
877
922
  } else if (plan2.type === "Drag") {
878
923
  const taskActionDrag = {
879
924
  type: "Action",
@@ -1402,7 +1447,7 @@ var PageTaskExecutor = class {
1402
1447
  executor: taskExecutor
1403
1448
  };
1404
1449
  }
1405
- async createTypeQueryTask(type, demand) {
1450
+ async createTypeQueryTask(type, demand, opt) {
1406
1451
  const taskExecutor = new import_misoai_core.Executor(
1407
1452
  taskTitleStr(
1408
1453
  type,
@@ -1433,7 +1478,10 @@ var PageTaskExecutor = class {
1433
1478
  result: `${type}, ${demand}`
1434
1479
  };
1435
1480
  }
1436
- const { data, usage } = await this.insight.extract(demandInput);
1481
+ const { data, usage } = await this.insight.extract(
1482
+ demandInput,
1483
+ opt
1484
+ );
1437
1485
  let outputResult = data;
1438
1486
  if (ifTypeRestricted) {
1439
1487
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1453,17 +1501,17 @@ var PageTaskExecutor = class {
1453
1501
  executor: taskExecutor
1454
1502
  };
1455
1503
  }
1456
- async query(demand) {
1457
- return this.createTypeQueryTask("Query", demand);
1504
+ async query(demand, opt) {
1505
+ return this.createTypeQueryTask("Query", demand, opt);
1458
1506
  }
1459
- async boolean(prompt) {
1460
- return this.createTypeQueryTask("Boolean", prompt);
1507
+ async boolean(prompt, opt) {
1508
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1461
1509
  }
1462
- async number(prompt) {
1463
- return this.createTypeQueryTask("Number", prompt);
1510
+ async number(prompt, opt) {
1511
+ return this.createTypeQueryTask("Number", prompt, opt);
1464
1512
  }
1465
- async string(prompt) {
1466
- return this.createTypeQueryTask("String", prompt);
1513
+ async string(prompt, opt) {
1514
+ return this.createTypeQueryTask("String", prompt, opt);
1467
1515
  }
1468
1516
  async assert(assertion) {
1469
1517
  const description = `assert: ${assertion}`;
@@ -1599,7 +1647,7 @@ function buildPlans(type, locateParam, param) {
1599
1647
  param: locateParam,
1600
1648
  thought: ""
1601
1649
  } : null;
1602
- if (type === "Tap" || type === "Hover") {
1650
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1603
1651
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1604
1652
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1605
1653
  const tapPlan = {
@@ -1679,7 +1727,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1679
1727
  var import_semver = __toESM(require("semver"));
1680
1728
 
1681
1729
  // package.json
1682
- var version = "1.0.5";
1730
+ var version = "1.0.3";
1683
1731
 
1684
1732
  // src/common/task-cache.ts
1685
1733
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1707,70 +1755,44 @@ var TaskCache = class {
1707
1755
  this.cache = cacheContent;
1708
1756
  this.cacheOriginalLength = this.cache.caches.length;
1709
1757
  }
1710
- matchCache(prompt, type, contextData) {
1711
- const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1758
+ matchCache(prompt, type) {
1712
1759
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1713
1760
  const item = this.cache.caches[i];
1714
1761
  const key = `${type}:${prompt}:${i}`;
1715
- if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1716
- continue;
1717
- }
1718
- if (type === "plan" && item.type === "plan") {
1719
- const planItem = item;
1720
- if (contextHash && planItem.contextHash) {
1721
- if (contextHash !== planItem.contextHash) {
1722
- debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1723
- continue;
1762
+ if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1763
+ this.matchedCacheIndices.add(key);
1764
+ debug3(
1765
+ "cache found and marked as used, type: %s, prompt: %s, index: %d",
1766
+ type,
1767
+ prompt,
1768
+ i
1769
+ );
1770
+ return {
1771
+ cacheContent: item,
1772
+ updateFn: (cb) => {
1773
+ debug3(
1774
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1775
+ type,
1776
+ prompt,
1777
+ i
1778
+ );
1779
+ cb(item);
1780
+ debug3(
1781
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1782
+ type,
1783
+ prompt,
1784
+ i
1785
+ );
1786
+ this.flushCacheToFile();
1724
1787
  }
1725
- } else if (contextHash || planItem.contextHash) {
1726
- debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1727
- continue;
1728
- }
1788
+ };
1729
1789
  }
1730
- this.matchedCacheIndices.add(key);
1731
- debug3(
1732
- "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1733
- type,
1734
- prompt,
1735
- i,
1736
- contextHash ? "yes" : "no-context"
1737
- );
1738
- return {
1739
- cacheContent: item,
1740
- updateFn: (cb) => {
1741
- debug3(
1742
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1743
- type,
1744
- prompt,
1745
- i
1746
- );
1747
- cb(item);
1748
- debug3(
1749
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1750
- type,
1751
- prompt,
1752
- i
1753
- );
1754
- this.flushCacheToFile();
1755
- }
1756
- };
1757
1790
  }
1758
- debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1791
+ debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1759
1792
  return void 0;
1760
1793
  }
1761
- generateContextHash(contextData) {
1762
- const sortedKeys = Object.keys(contextData).sort();
1763
- const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1764
- let hash = 0;
1765
- for (let i = 0; i < stableString.length; i++) {
1766
- const char = stableString.charCodeAt(i);
1767
- hash = (hash << 5) - hash + char;
1768
- hash = hash & hash;
1769
- }
1770
- return hash.toString(36);
1771
- }
1772
- matchPlanCache(prompt, contextData) {
1773
- return this.matchCache(prompt, "plan", contextData);
1794
+ matchPlanCache(prompt) {
1795
+ return this.matchCache(prompt, "plan");
1774
1796
  }
1775
1797
  matchLocateCache(prompt) {
1776
1798
  return this.matchCache(prompt, "locate");
@@ -1836,8 +1858,14 @@ cache file: ${cacheFile}`
1836
1858
  return;
1837
1859
  }
1838
1860
  try {
1861
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1862
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1863
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1864
+ debug3("created cache directory: %s", dir);
1865
+ }
1839
1866
  const yamlData = import_js_yaml3.default.dump(this.cache);
1840
1867
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1868
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1841
1869
  } catch (err) {
1842
1870
  debug3(
1843
1871
  "write cache to file failed, path: %s, error: %s",
@@ -1846,16 +1874,11 @@ cache file: ${cacheFile}`
1846
1874
  );
1847
1875
  }
1848
1876
  }
1849
- updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1877
+ updateOrAppendCacheRecord(newRecord, cachedRecord) {
1850
1878
  if (cachedRecord) {
1851
1879
  if (newRecord.type === "plan") {
1852
1880
  cachedRecord.updateFn((cache) => {
1853
- const planCache = cache;
1854
- planCache.yamlWorkflow = newRecord.yamlWorkflow;
1855
- if (contextData) {
1856
- planCache.contextHash = this.generateContextHash(contextData);
1857
- planCache.contextData = { ...contextData };
1858
- }
1881
+ cache.yamlWorkflow = newRecord.yamlWorkflow;
1859
1882
  });
1860
1883
  } else {
1861
1884
  cachedRecord.updateFn((cache) => {
@@ -1863,11 +1886,6 @@ cache file: ${cacheFile}`
1863
1886
  });
1864
1887
  }
1865
1888
  } else {
1866
- if (newRecord.type === "plan" && contextData) {
1867
- const planRecord = newRecord;
1868
- planRecord.contextHash = this.generateContextHash(contextData);
1869
- planRecord.contextData = { ...contextData };
1870
- }
1871
1889
  this.appendCache(newRecord);
1872
1890
  }
1873
1891
  }
@@ -1897,13 +1915,10 @@ var PageAgent = class {
1897
1915
  generateReport: true,
1898
1916
  autoPrintReportMsg: true,
1899
1917
  groupName: "Midscene Report",
1900
- groupDescription: "",
1901
- enableCumulativeContext: true,
1902
- autoClearContext: false
1918
+ groupDescription: ""
1903
1919
  },
1904
1920
  opts || {}
1905
1921
  );
1906
- this.initializeContextStore();
1907
1922
  if (this.page.pageType === "puppeteer" || this.page.pageType === "playwright") {
1908
1923
  this.page.waitForNavigationTimeout = this.opts.waitForNavigationTimeout || import_constants2.DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT;
1909
1924
  this.page.waitForNetworkIdleTimeout = this.opts.waitForNetworkIdleTimeout || import_constants2.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
@@ -1930,69 +1945,6 @@ var PageAgent = class {
1930
1945
  opts?.testId || this.page.pageType || "web"
1931
1946
  );
1932
1947
  }
1933
- /**
1934
- * Initialize context store for cumulative context functionality
1935
- */
1936
- async initializeContextStore() {
1937
- if (!this.opts.enableCumulativeContext) {
1938
- debug4("Cumulative context disabled via options");
1939
- return;
1940
- }
1941
- try {
1942
- const aiModel = await import("misoai-core/ai-model");
1943
- this.contextStore = aiModel.getContextStore();
1944
- debug4("Context store initialized successfully", {
1945
- autoClearContext: this.opts.autoClearContext,
1946
- testId: this.opts.testId
1947
- });
1948
- if (this.opts.autoClearContext) {
1949
- this.contextStore.clear();
1950
- debug4("Context store cleared due to autoClearContext option");
1951
- } else {
1952
- const existingData = this.contextStore.getAllData();
1953
- const existingSteps = this.contextStore.getRecentSteps(100).length;
1954
- debug4("Context store preserving existing data", {
1955
- existingDataKeys: Object.keys(existingData),
1956
- existingStepsCount: existingSteps
1957
- });
1958
- }
1959
- } catch (error) {
1960
- debug4("Failed to initialize context store:", error);
1961
- console.warn("⚠️ Could not initialize context store:", error);
1962
- }
1963
- }
1964
- /**
1965
- * Get the context store instance
1966
- */
1967
- getContextStore() {
1968
- return this.contextStore;
1969
- }
1970
- /**
1971
- * Clear the context store
1972
- */
1973
- clearContext() {
1974
- if (this.contextStore) {
1975
- this.contextStore.clear();
1976
- }
1977
- }
1978
- /**
1979
- * Get all stored data from context store
1980
- */
1981
- getStoredData() {
1982
- if (this.contextStore) {
1983
- return this.contextStore.getAllData();
1984
- }
1985
- return {};
1986
- }
1987
- /**
1988
- * Get step summary from context store
1989
- */
1990
- getStepSummary() {
1991
- if (this.contextStore) {
1992
- return this.contextStore.getStepSummary();
1993
- }
1994
- return "";
1995
- }
1996
1948
  async getUIContext(action) {
1997
1949
  if (action && (action === "extract" || action === "assert" || action === "captcha")) {
1998
1950
  return await parseContextFromWebPage(this.page, {
@@ -2171,6 +2123,23 @@ var PageAgent = class {
2171
2123
  metadata
2172
2124
  };
2173
2125
  }
2126
+ async aiRightClick(locatePrompt, opt) {
2127
+ const detailedLocateParam = this.buildDetailedLocateParam(
2128
+ locatePrompt,
2129
+ opt
2130
+ );
2131
+ const plans = buildPlans("RightClick", detailedLocateParam);
2132
+ const { executor, output } = await this.taskExecutor.runPlans(
2133
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2134
+ plans,
2135
+ { cacheable: opt?.cacheable }
2136
+ );
2137
+ const metadata = this.afterTaskRunning(executor);
2138
+ return {
2139
+ result: output,
2140
+ metadata
2141
+ };
2142
+ }
2174
2143
  async aiInput(value, locatePrompt, opt) {
2175
2144
  (0, import_utils12.assert)(
2176
2145
  typeof value === "string",
@@ -2228,35 +2197,9 @@ var PageAgent = class {
2228
2197
  };
2229
2198
  }
2230
2199
  async aiAction(taskPrompt, opt) {
2231
- const originalPrompt = taskPrompt;
2232
- let processedPrompt = taskPrompt;
2233
- if (this.opts.enableCumulativeContext && this.contextStore) {
2234
- try {
2235
- const storedData = this.contextStore.getAllData();
2236
- if (Object.keys(storedData).length > 0) {
2237
- debug4("Available data for aiAction:", {
2238
- prompt: taskPrompt,
2239
- availableData: storedData
2240
- });
2241
- }
2242
- } catch (error) {
2243
- debug4("Context store operation failed:", error);
2244
- }
2245
- }
2246
2200
  const cacheable = opt?.cacheable;
2247
2201
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2248
- let contextData;
2249
- if (this.opts.enableCumulativeContext && this.contextStore) {
2250
- try {
2251
- contextData = this.contextStore.getAllData();
2252
- if (contextData && Object.keys(contextData).length === 0) {
2253
- contextData = void 0;
2254
- }
2255
- } catch (error) {
2256
- debug4("Failed to get context data for cache:", error);
2257
- }
2258
- }
2259
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2202
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2260
2203
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2261
2204
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2262
2205
  taskPrompt,
@@ -2266,28 +2209,6 @@ var PageAgent = class {
2266
2209
  debug4("matched cache, will call .runYaml to run the action");
2267
2210
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2268
2211
  const result = await this.runYaml(yaml5);
2269
- if (this.opts.enableCumulativeContext && this.contextStore) {
2270
- try {
2271
- const executionResult = {
2272
- success: true,
2273
- actionType: "cached",
2274
- description: `Executed cached action: ${processedPrompt}`,
2275
- timing: result.metadata?.totalTime
2276
- };
2277
- this.contextStore.addStep({
2278
- type: "action",
2279
- summary: `Action: ${processedPrompt} (cached)`,
2280
- prompt: processedPrompt,
2281
- executionResult
2282
- });
2283
- debug4("Added cached action step to context store:", {
2284
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2285
- totalSteps: this.contextStore.getRecentSteps(100).length
2286
- });
2287
- } catch (error) {
2288
- debug4("Failed to add cached action step:", error);
2289
- }
2290
- }
2291
2212
  return {
2292
2213
  result: result.result,
2293
2214
  metadata: metadata2
@@ -2312,114 +2233,17 @@ var PageAgent = class {
2312
2233
  prompt: taskPrompt,
2313
2234
  yamlWorkflow: yamlFlowStr
2314
2235
  },
2315
- matchedCache,
2316
- contextData
2317
- // Pass context data for cache creation
2236
+ matchedCache
2318
2237
  );
2319
2238
  }
2320
2239
  const metadata = this.afterTaskRunning(executor);
2321
- if (this.opts.enableCumulativeContext && this.contextStore) {
2322
- try {
2323
- const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2324
- this.contextStore.addStep({
2325
- type: "action",
2326
- summary: `Action: ${processedPrompt}`,
2327
- prompt: processedPrompt,
2328
- executionResult
2329
- });
2330
- debug4("Added action step with execution result to context store:", {
2331
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2332
- totalSteps: this.contextStore.getRecentSteps(100).length,
2333
- executionResult
2334
- });
2335
- } catch (error) {
2336
- debug4("Failed to analyze execution results, adding step without execution result:", error);
2337
- try {
2338
- this.contextStore.addStep({
2339
- type: "action",
2340
- summary: `Action: ${processedPrompt}`,
2341
- prompt: processedPrompt
2342
- });
2343
- } catch (stepError) {
2344
- debug4("Failed to add action step:", stepError);
2345
- }
2346
- }
2347
- }
2348
2240
  return {
2349
2241
  result: output,
2350
2242
  metadata
2351
2243
  };
2352
2244
  }
2353
2245
  async aiQuery(demand) {
2354
- let processedDemand = demand;
2355
- let storageKey;
2356
- try {
2357
- const aiModel = await import("misoai-core/ai-model");
2358
- const contextStore = aiModel.getContextStore();
2359
- if (typeof demand === "string") {
2360
- const storageInstruction = contextStore.parseStorageInstruction(demand);
2361
- if (storageInstruction) {
2362
- storageKey = storageInstruction.key;
2363
- processedDemand = storageInstruction.cleanText;
2364
- contextStore._pendingAliases = storageInstruction.aliases;
2365
- } else {
2366
- const storageMatch = demand.match(/store\s+(?:as\s+)?(\w+)/i);
2367
- if (storageMatch) {
2368
- storageKey = storageMatch[1];
2369
- processedDemand = demand.replace(/,?\s*store\s+(?:as\s+)?\w+/i, "").trim();
2370
- }
2371
- }
2372
- }
2373
- } catch (error) {
2374
- debug4("Context store not available:", error);
2375
- }
2376
- const { output, executor } = await this.taskExecutor.query(processedDemand);
2377
- if (this.opts.enableCumulativeContext && this.contextStore) {
2378
- if (storageKey && output) {
2379
- try {
2380
- const pendingAliases = this.contextStore._pendingAliases;
2381
- if (pendingAliases) {
2382
- this.contextStore.storeDataWithAliases(storageKey, output, pendingAliases, typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand));
2383
- delete this.contextStore._pendingAliases;
2384
- debug4("Stored query result with aliases:", {
2385
- key: storageKey,
2386
- value: output,
2387
- aliases: pendingAliases
2388
- });
2389
- } else {
2390
- this.contextStore.storeData(storageKey, output);
2391
- debug4("Stored query result:", {
2392
- key: storageKey,
2393
- value: output
2394
- });
2395
- }
2396
- this.contextStore.addStep({
2397
- type: "query",
2398
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)} (stored as ${storageKey})`,
2399
- data: output,
2400
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2401
- });
2402
- debug4("Added query step to context store:", {
2403
- storageKey,
2404
- totalStoredItems: Object.keys(this.contextStore.getAllData()).length,
2405
- totalSteps: this.contextStore.getRecentSteps(100).length
2406
- });
2407
- } catch (error) {
2408
- debug4("Failed to store query result:", error);
2409
- }
2410
- } else {
2411
- try {
2412
- this.contextStore.addStep({
2413
- type: "query",
2414
- summary: `Query: ${typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)}`,
2415
- data: output,
2416
- prompt: typeof processedDemand === "string" ? processedDemand : JSON.stringify(processedDemand)
2417
- });
2418
- } catch (error) {
2419
- debug4("Failed to add query step:", error);
2420
- }
2421
- }
2422
- }
2246
+ const { output, executor } = await this.taskExecutor.query(demand);
2423
2247
  const metadata = this.afterTaskRunning(executor);
2424
2248
  return {
2425
2249
  result: output,
@@ -2529,48 +2353,6 @@ var PageAgent = class {
2529
2353
  };
2530
2354
  }
2531
2355
  async aiAssert(assertion, msg, opt) {
2532
- let executionContext = "";
2533
- if (this.opts.enableCumulativeContext && this.contextStore) {
2534
- try {
2535
- const recentSteps = this.contextStore.getRecentSteps(3);
2536
- const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2537
- const storedData = this.contextStore.getAllData();
2538
- if (stepsWithExecutionResults.length > 0) {
2539
- const recentActions = stepsWithExecutionResults.map((step) => {
2540
- const result = step.executionResult;
2541
- return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2542
- }).join("\n");
2543
- executionContext = `
2544
-
2545
- Recent actions performed:
2546
- ${recentActions}
2547
-
2548
- This context may help verify the assertion.`;
2549
- }
2550
- if (storedData && Object.keys(storedData).length > 0) {
2551
- executionContext += `
2552
-
2553
- Available data for reference:
2554
- ${JSON.stringify(storedData, null, 2)}
2555
-
2556
- Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2557
- debug4("Available data for aiAssert:", {
2558
- assertion,
2559
- availableData: storedData
2560
- });
2561
- }
2562
- this.contextStore.addStep({
2563
- type: "assertion",
2564
- summary: `Assertion: ${assertion}`,
2565
- prompt: assertion
2566
- });
2567
- debug4("Added assertion step to context store:", {
2568
- totalSteps: this.contextStore.getRecentSteps(100).length
2569
- });
2570
- } catch (error) {
2571
- debug4("Context store operation failed:", error);
2572
- }
2573
- }
2574
2356
  let currentUrl = "";
2575
2357
  if (this.page.url) {
2576
2358
  try {
@@ -2578,13 +2360,7 @@ Note: If the assertion references any data keys or natural language equivalents,
2578
2360
  } catch (e) {
2579
2361
  }
2580
2362
  }
2581
- let assertionWithContext = assertion;
2582
- if (currentUrl) {
2583
- assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2584
- }
2585
- if (executionContext) {
2586
- assertionWithContext += executionContext;
2587
- }
2363
+ const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${assertion}` : assertion;
2588
2364
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2589
2365
  const metadata = this.afterTaskRunning(executor, true);
2590
2366
  if (output && opt?.keepRawResponse) {
@@ -2795,81 +2571,42 @@ ${errors}`);
2795
2571
  }
2796
2572
  throw new Error("evaluateJavaScript is not supported in current agent");
2797
2573
  }
2798
- async destroy() {
2799
- await this.page.destroy();
2800
- }
2801
- /**
2802
- * Analyze execution results from executor to generate meaningful descriptions
2803
- */
2804
- analyzeExecutionResults(executor, originalPrompt) {
2805
- const tasks = executor.tasks;
2806
- const success = !executor.isInErrorState();
2807
- if (!success) {
2808
- const errorTask = executor.latestErrorTask();
2809
- return {
2810
- success: false,
2811
- actionType: "error",
2812
- description: `Failed to execute: ${originalPrompt}`,
2813
- error: errorTask?.error
2814
- };
2815
- }
2816
- const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2817
- const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2818
- const lastAction = actionTasks[actionTasks.length - 1];
2819
- const lastLocate = locateTasks[locateTasks.length - 1];
2820
- if (!lastAction) {
2821
- return {
2822
- success: true,
2823
- actionType: "unknown",
2824
- description: `Completed: ${originalPrompt}`
2574
+ async logScreenshot(title, options) {
2575
+ const screenshotTitle = title || "untitled";
2576
+ const content = options?.content || "";
2577
+ const screenshot = await this.page.screenshotBase64?.();
2578
+ if (screenshot) {
2579
+ const executionDump = {
2580
+ name: screenshotTitle,
2581
+ description: content,
2582
+ tasks: [{
2583
+ type: "Screenshot",
2584
+ subType: "log",
2585
+ status: "finished",
2586
+ executor: null,
2587
+ param: {
2588
+ title: screenshotTitle,
2589
+ content
2590
+ },
2591
+ output: {
2592
+ screenshot
2593
+ },
2594
+ thought: `Logged screenshot: ${screenshotTitle}`,
2595
+ timing: {
2596
+ start: Date.now(),
2597
+ end: Date.now(),
2598
+ cost: 0
2599
+ }
2600
+ }],
2601
+ sdkVersion: "1.0.0",
2602
+ logTime: Date.now(),
2603
+ model_name: "screenshot"
2825
2604
  };
2605
+ this.appendExecutionDump(executionDump);
2826
2606
  }
2827
- const actionType = lastAction.subType || "unknown";
2828
- const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2829
- const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2830
- return {
2831
- success: true,
2832
- actionType,
2833
- description,
2834
- elementInfo,
2835
- timing: lastAction.timing?.cost
2836
- };
2837
2607
  }
2838
- /**
2839
- * Extract element information from locate task
2840
- */
2841
- extractElementInfo(locateTask, _actionTask) {
2842
- if (!locateTask?.output?.element)
2843
- return void 0;
2844
- const element = locateTask.output.element;
2845
- return {
2846
- type: element.attributes?.nodeType || "unknown",
2847
- text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2848
- location: `(${element.center[0]}, ${element.center[1]})`
2849
- };
2850
- }
2851
- /**
2852
- * Generate natural language description for actions
2853
- */
2854
- generateActionDescription(actionType, param, elementInfo) {
2855
- const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2856
- switch (actionType) {
2857
- case "Tap":
2858
- return `Clicked on ${elementDesc}`;
2859
- case "Input":
2860
- const inputValue = param?.value || "";
2861
- return `Entered "${inputValue}" into ${elementDesc}`;
2862
- case "KeyboardPress":
2863
- return `Pressed ${param?.value || "key"}`;
2864
- case "Scroll":
2865
- return `Scrolled ${param?.direction || "on page"}`;
2866
- case "Hover":
2867
- return `Hovered over ${elementDesc}`;
2868
- case "Drag":
2869
- return `Dragged ${elementDesc}`;
2870
- default:
2871
- return `Performed ${actionType} action on ${elementDesc}`;
2872
- }
2608
+ async destroy() {
2609
+ await this.page.destroy();
2873
2610
  }
2874
2611
  };
2875
2612
 
@@ -2983,7 +2720,7 @@ var BridgeServer = class {
2983
2720
  this.socket = socket;
2984
2721
  const clientVersion = socket.handshake.query.version;
2985
2722
  (0, import_utils16.logMsg)(
2986
- `Bridge connected, cli-side version v${"1.0.5"}, browser-side version v${clientVersion}`
2723
+ `Bridge connected, cli-side version v${"1.0.3"}, browser-side version v${clientVersion}`
2987
2724
  );
2988
2725
  socket.on("bridge-call-response" /* CallResponse */, (params) => {
2989
2726
  const id = params.id;
@@ -3014,7 +2751,7 @@ var BridgeServer = class {
3014
2751
  setTimeout(() => {
3015
2752
  this.onConnect?.();
3016
2753
  const payload = {
3017
- version: "1.0.5"
2754
+ version: "1.0.3"
3018
2755
  };
3019
2756
  socket.emit("bridge-connected" /* Connected */, payload);
3020
2757
  Promise.resolve().then(() => {