misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -45,7 +45,8 @@ var WebElementInfo = class {
45
45
  id,
46
46
  attributes,
47
47
  indexId,
48
- xpaths
48
+ xpaths,
49
+ isVisible
49
50
  }) {
50
51
  this.content = content;
51
52
  this.rect = rect;
@@ -58,6 +59,7 @@ var WebElementInfo = class {
58
59
  this.attributes = attributes;
59
60
  this.indexId = indexId;
60
61
  this.xpaths = xpaths;
62
+ this.isVisible = isVisible;
61
63
  }
62
64
  };
63
65
 
@@ -80,14 +82,15 @@ async function parseContextFromWebPage(page, _opt) {
80
82
  })
81
83
  ]);
82
84
  const webTree = traverseTree(tree, (elementInfo) => {
83
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
85
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
84
86
  return new WebElementInfo({
85
87
  rect,
86
88
  locator,
87
89
  id,
88
90
  content,
89
91
  attributes,
90
- indexId
92
+ indexId,
93
+ isVisible
91
94
  });
92
95
  });
93
96
  assert(screenshotBase64, "screenshotBase64 is required");
@@ -118,7 +121,7 @@ function printReportMsg(filepath) {
118
121
  }
119
122
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
120
123
  function replaceIllegalPathCharsAndSpace(str) {
121
- return str.replace(/[/\\:*?"<>| ]/g, "-");
124
+ return str.replace(/[:*?"<>| ]/g, "-");
122
125
  }
123
126
  function matchElementFromPlan(planLocateParam, tree) {
124
127
  if (!planLocateParam) {
@@ -248,6 +251,10 @@ var ScriptPlayer = class {
248
251
  } else if ("aiQuery" in flowItem) {
249
252
  const queryTask = flowItem;
250
253
  const prompt = queryTask.aiQuery;
254
+ const options = {
255
+ domIncluded: queryTask.domIncluded,
256
+ screenshotIncluded: queryTask.screenshotIncluded
257
+ };
251
258
  assert2(prompt, "missing prompt for aiQuery");
252
259
  assert2(
253
260
  typeof prompt === "string",
@@ -258,6 +265,10 @@ var ScriptPlayer = class {
258
265
  } else if ("aiNumber" in flowItem) {
259
266
  const numberTask = flowItem;
260
267
  const prompt = numberTask.aiNumber;
268
+ const options = {
269
+ domIncluded: numberTask.domIncluded,
270
+ screenshotIncluded: numberTask.screenshotIncluded
271
+ };
261
272
  assert2(prompt, "missing prompt for number");
262
273
  assert2(
263
274
  typeof prompt === "string",
@@ -268,6 +279,10 @@ var ScriptPlayer = class {
268
279
  } else if ("aiString" in flowItem) {
269
280
  const stringTask = flowItem;
270
281
  const prompt = stringTask.aiString;
282
+ const options = {
283
+ domIncluded: stringTask.domIncluded,
284
+ screenshotIncluded: stringTask.screenshotIncluded
285
+ };
271
286
  assert2(prompt, "missing prompt for string");
272
287
  assert2(
273
288
  typeof prompt === "string",
@@ -278,6 +293,10 @@ var ScriptPlayer = class {
278
293
  } else if ("aiBoolean" in flowItem) {
279
294
  const booleanTask = flowItem;
280
295
  const prompt = booleanTask.aiBoolean;
296
+ const options = {
297
+ domIncluded: booleanTask.domIncluded,
298
+ screenshotIncluded: booleanTask.screenshotIncluded
299
+ };
281
300
  assert2(prompt, "missing prompt for boolean");
282
301
  assert2(
283
302
  typeof prompt === "string",
@@ -320,6 +339,9 @@ var ScriptPlayer = class {
320
339
  } else if ("aiTap" in flowItem) {
321
340
  const tapTask = flowItem;
322
341
  await agent.aiTap(tapTask.aiTap, tapTask);
342
+ } else if ("aiRightClick" in flowItem) {
343
+ const rightClickTask = flowItem;
344
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
323
345
  } else if ("aiHover" in flowItem) {
324
346
  const hoverTask = flowItem;
325
347
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -342,6 +364,11 @@ var ScriptPlayer = class {
342
364
  evaluateJavaScriptTask.javascript
343
365
  );
344
366
  this.setResult(evaluateJavaScriptTask.name, result);
367
+ } else if ("logScreenshot" in flowItem) {
368
+ const logScreenshotTask = flowItem;
369
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
370
+ content: logScreenshotTask.content || ""
371
+ });
345
372
  } else {
346
373
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
347
374
  }
@@ -872,10 +899,10 @@ var PageTaskExecutor = class {
872
899
  if (!taskParam || !taskParam.value) {
873
900
  return;
874
901
  }
875
- await this.page.keyboard.type(taskParam.value);
876
- } else {
877
- await this.page.keyboard.type(taskParam.value);
878
902
  }
903
+ await this.page.keyboard.type(taskParam.value, {
904
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
905
+ });
879
906
  }
880
907
  };
881
908
  tasks.push(taskActionInput);
@@ -904,6 +931,22 @@ var PageTaskExecutor = class {
904
931
  }
905
932
  };
906
933
  tasks.push(taskActionTap);
934
+ } else if (plan2.type === "RightClick") {
935
+ const taskActionRightClick = {
936
+ type: "Action",
937
+ subType: "RightClick",
938
+ thought: plan2.thought,
939
+ locate: plan2.locate,
940
+ executor: async (param, { element }) => {
941
+ assert4(element, "Element not found, cannot right click");
942
+ await this.page.mouse.click(
943
+ element.center[0],
944
+ element.center[1],
945
+ { button: "right" }
946
+ );
947
+ }
948
+ };
949
+ tasks.push(taskActionRightClick);
907
950
  } else if (plan2.type === "Drag") {
908
951
  const taskActionDrag = {
909
952
  type: "Action",
@@ -1432,7 +1475,7 @@ var PageTaskExecutor = class {
1432
1475
  executor: taskExecutor
1433
1476
  };
1434
1477
  }
1435
- async createTypeQueryTask(type, demand) {
1478
+ async createTypeQueryTask(type, demand, opt) {
1436
1479
  const taskExecutor = new Executor(
1437
1480
  taskTitleStr(
1438
1481
  type,
@@ -1463,7 +1506,10 @@ var PageTaskExecutor = class {
1463
1506
  result: `${type}, ${demand}`
1464
1507
  };
1465
1508
  }
1466
- const { data, usage } = await this.insight.extract(demandInput);
1509
+ const { data, usage } = await this.insight.extract(
1510
+ demandInput,
1511
+ opt
1512
+ );
1467
1513
  let outputResult = data;
1468
1514
  if (ifTypeRestricted) {
1469
1515
  assert4(data?.result !== void 0, "No result in query data");
@@ -1483,17 +1529,17 @@ var PageTaskExecutor = class {
1483
1529
  executor: taskExecutor
1484
1530
  };
1485
1531
  }
1486
- async query(demand) {
1487
- return this.createTypeQueryTask("Query", demand);
1532
+ async query(demand, opt) {
1533
+ return this.createTypeQueryTask("Query", demand, opt);
1488
1534
  }
1489
- async boolean(prompt) {
1490
- return this.createTypeQueryTask("Boolean", prompt);
1535
+ async boolean(prompt, opt) {
1536
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1491
1537
  }
1492
- async number(prompt) {
1493
- return this.createTypeQueryTask("Number", prompt);
1538
+ async number(prompt, opt) {
1539
+ return this.createTypeQueryTask("Number", prompt, opt);
1494
1540
  }
1495
- async string(prompt) {
1496
- return this.createTypeQueryTask("String", prompt);
1541
+ async string(prompt, opt) {
1542
+ return this.createTypeQueryTask("String", prompt, opt);
1497
1543
  }
1498
1544
  async assert(assertion) {
1499
1545
  const description = `assert: ${assertion}`;
@@ -1629,7 +1675,7 @@ function buildPlans(type, locateParam, param) {
1629
1675
  param: locateParam,
1630
1676
  thought: ""
1631
1677
  } : null;
1632
- if (type === "Tap" || type === "Hover") {
1678
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1633
1679
  assert5(locateParam, `missing locate info for action "${type}"`);
1634
1680
  assert5(locatePlan, `missing locate info for action "${type}"`);
1635
1681
  const tapPlan = {
@@ -1700,8 +1746,8 @@ function buildPlans(type, locateParam, param) {
1700
1746
 
1701
1747
  // src/common/task-cache.ts
1702
1748
  import assert6 from "assert";
1703
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1704
- import { join as join2 } from "path";
1749
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1750
+ import { dirname as dirname2, join as join2 } from "path";
1705
1751
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1706
1752
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1707
1753
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1709,7 +1755,7 @@ import yaml3 from "js-yaml";
1709
1755
  import semver from "semver";
1710
1756
 
1711
1757
  // package.json
1712
- var version = "1.5.6";
1758
+ var version = "1.0.3";
1713
1759
 
1714
1760
  // src/common/task-cache.ts
1715
1761
  var debug3 = getDebug3("cache");
@@ -1840,8 +1886,14 @@ cache file: ${cacheFile}`
1840
1886
  return;
1841
1887
  }
1842
1888
  try {
1889
+ const dir = dirname2(this.cacheFilePath);
1890
+ if (!existsSync2(dir)) {
1891
+ mkdirSync2(dir, { recursive: true });
1892
+ debug3("created cache directory: %s", dir);
1893
+ }
1843
1894
  const yamlData = yaml3.dump(this.cache);
1844
1895
  writeFileSync2(this.cacheFilePath, yamlData);
1896
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1845
1897
  } catch (err) {
1846
1898
  debug3(
1847
1899
  "write cache to file failed, path: %s, error: %s",
@@ -2099,6 +2151,23 @@ var PageAgent = class {
2099
2151
  metadata
2100
2152
  };
2101
2153
  }
2154
+ async aiRightClick(locatePrompt, opt) {
2155
+ const detailedLocateParam = this.buildDetailedLocateParam(
2156
+ locatePrompt,
2157
+ opt
2158
+ );
2159
+ const plans = buildPlans("RightClick", detailedLocateParam);
2160
+ const { executor, output } = await this.taskExecutor.runPlans(
2161
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2162
+ plans,
2163
+ { cacheable: opt?.cacheable }
2164
+ );
2165
+ const metadata = this.afterTaskRunning(executor);
2166
+ return {
2167
+ result: output,
2168
+ metadata
2169
+ };
2170
+ }
2102
2171
  async aiInput(value, locatePrompt, opt) {
2103
2172
  assert7(
2104
2173
  typeof value === "string",
@@ -2530,6 +2599,40 @@ ${errors}`);
2530
2599
  }
2531
2600
  throw new Error("evaluateJavaScript is not supported in current agent");
2532
2601
  }
2602
+ async logScreenshot(title, options) {
2603
+ const screenshotTitle = title || "untitled";
2604
+ const content = options?.content || "";
2605
+ const screenshot = await this.page.screenshotBase64?.();
2606
+ if (screenshot) {
2607
+ const executionDump = {
2608
+ name: screenshotTitle,
2609
+ description: content,
2610
+ tasks: [{
2611
+ type: "Screenshot",
2612
+ subType: "log",
2613
+ status: "finished",
2614
+ executor: null,
2615
+ param: {
2616
+ title: screenshotTitle,
2617
+ content
2618
+ },
2619
+ output: {
2620
+ screenshot
2621
+ },
2622
+ thought: `Logged screenshot: ${screenshotTitle}`,
2623
+ timing: {
2624
+ start: Date.now(),
2625
+ end: Date.now(),
2626
+ cost: 0
2627
+ }
2628
+ }],
2629
+ sdkVersion: "1.0.0",
2630
+ logTime: Date.now(),
2631
+ model_name: "screenshot"
2632
+ };
2633
+ this.appendExecutionDump(executionDump);
2634
+ }
2635
+ }
2533
2636
  async destroy() {
2534
2637
  await this.page.destroy();
2535
2638
  }
@@ -2752,7 +2855,7 @@ function sleep2(ms) {
2752
2855
  var ChromeExtensionProxyPage = class {
2753
2856
  constructor(forceSameTabNavigation) {
2754
2857
  this.pageType = "chrome-extension-proxy";
2755
- this.version = "1.5.6";
2858
+ this.version = "1.0.3";
2756
2859
  this.activeTabId = null;
2757
2860
  this.tabIdOfDebuggerAttached = null;
2758
2861
  this.attachingDebugger = null;
@@ -2761,7 +2864,8 @@ var ChromeExtensionProxyPage = class {
2761
2864
  this.latestMouseX = 100;
2762
2865
  this.latestMouseY = 100;
2763
2866
  this.mouse = {
2764
- click: async (x, y) => {
2867
+ click: async (x, y, options) => {
2868
+ const { button = "left", count = 1 } = options || {};
2765
2869
  await this.mouse.move(x, y);
2766
2870
  if (this.isMobileEmulation === null) {
2767
2871
  const result = await this.sendCommandToDebugger("Runtime.evaluate", {
@@ -2772,7 +2876,7 @@ var ChromeExtensionProxyPage = class {
2772
2876
  });
2773
2877
  this.isMobileEmulation = result?.result?.value;
2774
2878
  }
2775
- if (this.isMobileEmulation) {
2879
+ if (this.isMobileEmulation && button === "left") {
2776
2880
  const touchPoints = [{ x: Math.round(x), y: Math.round(y) }];
2777
2881
  await this.sendCommandToDebugger("Input.dispatchTouchEvent", {
2778
2882
  type: "touchStart",
@@ -2789,15 +2893,15 @@ var ChromeExtensionProxyPage = class {
2789
2893
  type: "mousePressed",
2790
2894
  x,
2791
2895
  y,
2792
- button: "left",
2793
- clickCount: 1
2896
+ button,
2897
+ clickCount: count
2794
2898
  });
2795
2899
  await this.sendCommandToDebugger("Input.dispatchMouseEvent", {
2796
2900
  type: "mouseReleased",
2797
2901
  x,
2798
2902
  y,
2799
- button: "left",
2800
- clickCount: 1
2903
+ button,
2904
+ clickCount: count
2801
2905
  });
2802
2906
  }
2803
2907
  },