misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -150,6 +150,10 @@ var ScriptPlayer = class {
150
150
  } else if ("aiQuery" in flowItem) {
151
151
  const queryTask = flowItem;
152
152
  const prompt = queryTask.aiQuery;
153
+ const options = {
154
+ domIncluded: queryTask.domIncluded,
155
+ screenshotIncluded: queryTask.screenshotIncluded
156
+ };
153
157
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
154
158
  (0, import_utils.assert)(
155
159
  typeof prompt === "string",
@@ -160,6 +164,10 @@ var ScriptPlayer = class {
160
164
  } else if ("aiNumber" in flowItem) {
161
165
  const numberTask = flowItem;
162
166
  const prompt = numberTask.aiNumber;
167
+ const options = {
168
+ domIncluded: numberTask.domIncluded,
169
+ screenshotIncluded: numberTask.screenshotIncluded
170
+ };
163
171
  (0, import_utils.assert)(prompt, "missing prompt for number");
164
172
  (0, import_utils.assert)(
165
173
  typeof prompt === "string",
@@ -170,6 +178,10 @@ var ScriptPlayer = class {
170
178
  } else if ("aiString" in flowItem) {
171
179
  const stringTask = flowItem;
172
180
  const prompt = stringTask.aiString;
181
+ const options = {
182
+ domIncluded: stringTask.domIncluded,
183
+ screenshotIncluded: stringTask.screenshotIncluded
184
+ };
173
185
  (0, import_utils.assert)(prompt, "missing prompt for string");
174
186
  (0, import_utils.assert)(
175
187
  typeof prompt === "string",
@@ -180,6 +192,10 @@ var ScriptPlayer = class {
180
192
  } else if ("aiBoolean" in flowItem) {
181
193
  const booleanTask = flowItem;
182
194
  const prompt = booleanTask.aiBoolean;
195
+ const options = {
196
+ domIncluded: booleanTask.domIncluded,
197
+ screenshotIncluded: booleanTask.screenshotIncluded
198
+ };
183
199
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
184
200
  (0, import_utils.assert)(
185
201
  typeof prompt === "string",
@@ -222,6 +238,9 @@ var ScriptPlayer = class {
222
238
  } else if ("aiTap" in flowItem) {
223
239
  const tapTask = flowItem;
224
240
  await agent.aiTap(tapTask.aiTap, tapTask);
241
+ } else if ("aiRightClick" in flowItem) {
242
+ const rightClickTask = flowItem;
243
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
225
244
  } else if ("aiHover" in flowItem) {
226
245
  const hoverTask = flowItem;
227
246
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -244,6 +263,11 @@ var ScriptPlayer = class {
244
263
  evaluateJavaScriptTask.javascript
245
264
  );
246
265
  this.setResult(evaluateJavaScriptTask.name, result);
266
+ } else if ("logScreenshot" in flowItem) {
267
+ const logScreenshotTask = flowItem;
268
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
269
+ content: logScreenshotTask.content || ""
270
+ });
247
271
  } else {
248
272
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
249
273
  }
@@ -492,7 +516,8 @@ var WebElementInfo = class {
492
516
  id,
493
517
  attributes,
494
518
  indexId,
495
- xpaths
519
+ xpaths,
520
+ isVisible
496
521
  }) {
497
522
  this.content = content;
498
523
  this.rect = rect;
@@ -505,6 +530,7 @@ var WebElementInfo = class {
505
530
  this.attributes = attributes;
506
531
  this.indexId = indexId;
507
532
  this.xpaths = xpaths;
533
+ this.isVisible = isVisible;
508
534
  }
509
535
  };
510
536
 
@@ -527,14 +553,15 @@ async function parseContextFromWebPage(page, _opt) {
527
553
  })
528
554
  ]);
529
555
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
530
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
556
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
531
557
  return new WebElementInfo({
532
558
  rect,
533
559
  locator,
534
560
  id,
535
561
  content,
536
562
  attributes,
537
- indexId
563
+ indexId,
564
+ isVisible
538
565
  });
539
566
  });
540
567
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -564,7 +591,7 @@ function printReportMsg(filepath) {
564
591
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
565
592
  }
566
593
  function replaceIllegalPathCharsAndSpace(str) {
567
- return str.replace(/[/\\:*?"<>| ]/g, "-");
594
+ return str.replace(/[:*?"<>| ]/g, "-");
568
595
  }
569
596
  function forceClosePopup(page, debug6) {
570
597
  page.on("popup", async (popup) => {
@@ -878,10 +905,10 @@ var PageTaskExecutor = class {
878
905
  if (!taskParam || !taskParam.value) {
879
906
  return;
880
907
  }
881
- await this.page.keyboard.type(taskParam.value);
882
- } else {
883
- await this.page.keyboard.type(taskParam.value);
884
908
  }
909
+ await this.page.keyboard.type(taskParam.value, {
910
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
911
+ });
885
912
  }
886
913
  };
887
914
  tasks.push(taskActionInput);
@@ -910,6 +937,22 @@ var PageTaskExecutor = class {
910
937
  }
911
938
  };
912
939
  tasks.push(taskActionTap);
940
+ } else if (plan2.type === "RightClick") {
941
+ const taskActionRightClick = {
942
+ type: "Action",
943
+ subType: "RightClick",
944
+ thought: plan2.thought,
945
+ locate: plan2.locate,
946
+ executor: async (param, { element }) => {
947
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
948
+ await this.page.mouse.click(
949
+ element.center[0],
950
+ element.center[1],
951
+ { button: "right" }
952
+ );
953
+ }
954
+ };
955
+ tasks.push(taskActionRightClick);
913
956
  } else if (plan2.type === "Drag") {
914
957
  const taskActionDrag = {
915
958
  type: "Action",
@@ -1438,7 +1481,7 @@ var PageTaskExecutor = class {
1438
1481
  executor: taskExecutor
1439
1482
  };
1440
1483
  }
1441
- async createTypeQueryTask(type, demand) {
1484
+ async createTypeQueryTask(type, demand, opt) {
1442
1485
  const taskExecutor = new import_misoai_core.Executor(
1443
1486
  taskTitleStr(
1444
1487
  type,
@@ -1469,7 +1512,10 @@ var PageTaskExecutor = class {
1469
1512
  result: `${type}, ${demand}`
1470
1513
  };
1471
1514
  }
1472
- const { data, usage } = await this.insight.extract(demandInput);
1515
+ const { data, usage } = await this.insight.extract(
1516
+ demandInput,
1517
+ opt
1518
+ );
1473
1519
  let outputResult = data;
1474
1520
  if (ifTypeRestricted) {
1475
1521
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1489,17 +1535,17 @@ var PageTaskExecutor = class {
1489
1535
  executor: taskExecutor
1490
1536
  };
1491
1537
  }
1492
- async query(demand) {
1493
- return this.createTypeQueryTask("Query", demand);
1538
+ async query(demand, opt) {
1539
+ return this.createTypeQueryTask("Query", demand, opt);
1494
1540
  }
1495
- async boolean(prompt) {
1496
- return this.createTypeQueryTask("Boolean", prompt);
1541
+ async boolean(prompt, opt) {
1542
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1497
1543
  }
1498
- async number(prompt) {
1499
- return this.createTypeQueryTask("Number", prompt);
1544
+ async number(prompt, opt) {
1545
+ return this.createTypeQueryTask("Number", prompt, opt);
1500
1546
  }
1501
- async string(prompt) {
1502
- return this.createTypeQueryTask("String", prompt);
1547
+ async string(prompt, opt) {
1548
+ return this.createTypeQueryTask("String", prompt, opt);
1503
1549
  }
1504
1550
  async assert(assertion) {
1505
1551
  const description = `assert: ${assertion}`;
@@ -1635,7 +1681,7 @@ function buildPlans(type, locateParam, param) {
1635
1681
  param: locateParam,
1636
1682
  thought: ""
1637
1683
  } : null;
1638
- if (type === "Tap" || type === "Hover") {
1684
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1639
1685
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1640
1686
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1641
1687
  const tapPlan = {
@@ -1715,7 +1761,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1715
1761
  var import_semver = __toESM(require("semver"));
1716
1762
 
1717
1763
  // package.json
1718
- var version = "1.5.6";
1764
+ var version = "1.0.3";
1719
1765
 
1720
1766
  // src/common/task-cache.ts
1721
1767
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1846,8 +1892,14 @@ cache file: ${cacheFile}`
1846
1892
  return;
1847
1893
  }
1848
1894
  try {
1895
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1896
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1897
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1898
+ debug3("created cache directory: %s", dir);
1899
+ }
1849
1900
  const yamlData = import_js_yaml3.default.dump(this.cache);
1850
1901
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1902
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1851
1903
  } catch (err) {
1852
1904
  debug3(
1853
1905
  "write cache to file failed, path: %s, error: %s",
@@ -2105,6 +2157,23 @@ var PageAgent = class {
2105
2157
  metadata
2106
2158
  };
2107
2159
  }
2160
+ async aiRightClick(locatePrompt, opt) {
2161
+ const detailedLocateParam = this.buildDetailedLocateParam(
2162
+ locatePrompt,
2163
+ opt
2164
+ );
2165
+ const plans = buildPlans("RightClick", detailedLocateParam);
2166
+ const { executor, output } = await this.taskExecutor.runPlans(
2167
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2168
+ plans,
2169
+ { cacheable: opt?.cacheable }
2170
+ );
2171
+ const metadata = this.afterTaskRunning(executor);
2172
+ return {
2173
+ result: output,
2174
+ metadata
2175
+ };
2176
+ }
2108
2177
  async aiInput(value, locatePrompt, opt) {
2109
2178
  (0, import_utils12.assert)(
2110
2179
  typeof value === "string",
@@ -2536,6 +2605,40 @@ ${errors}`);
2536
2605
  }
2537
2606
  throw new Error("evaluateJavaScript is not supported in current agent");
2538
2607
  }
2608
+ async logScreenshot(title, options) {
2609
+ const screenshotTitle = title || "untitled";
2610
+ const content = options?.content || "";
2611
+ const screenshot = await this.page.screenshotBase64?.();
2612
+ if (screenshot) {
2613
+ const executionDump = {
2614
+ name: screenshotTitle,
2615
+ description: content,
2616
+ tasks: [{
2617
+ type: "Screenshot",
2618
+ subType: "log",
2619
+ status: "finished",
2620
+ executor: null,
2621
+ param: {
2622
+ title: screenshotTitle,
2623
+ content
2624
+ },
2625
+ output: {
2626
+ screenshot
2627
+ },
2628
+ thought: `Logged screenshot: ${screenshotTitle}`,
2629
+ timing: {
2630
+ start: Date.now(),
2631
+ end: Date.now(),
2632
+ cost: 0
2633
+ }
2634
+ }],
2635
+ sdkVersion: "1.0.0",
2636
+ logTime: Date.now(),
2637
+ model_name: "screenshot"
2638
+ };
2639
+ this.appendExecutionDump(executionDump);
2640
+ }
2641
+ }
2539
2642
  async destroy() {
2540
2643
  await this.page.destroy();
2541
2644
  }