misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -143,6 +143,10 @@ var ScriptPlayer = class {
143
143
  } else if ("aiQuery" in flowItem) {
144
144
  const queryTask = flowItem;
145
145
  const prompt = queryTask.aiQuery;
146
+ const options = {
147
+ domIncluded: queryTask.domIncluded,
148
+ screenshotIncluded: queryTask.screenshotIncluded
149
+ };
146
150
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
147
151
  (0, import_utils.assert)(
148
152
  typeof prompt === "string",
@@ -153,6 +157,10 @@ var ScriptPlayer = class {
153
157
  } else if ("aiNumber" in flowItem) {
154
158
  const numberTask = flowItem;
155
159
  const prompt = numberTask.aiNumber;
160
+ const options = {
161
+ domIncluded: numberTask.domIncluded,
162
+ screenshotIncluded: numberTask.screenshotIncluded
163
+ };
156
164
  (0, import_utils.assert)(prompt, "missing prompt for number");
157
165
  (0, import_utils.assert)(
158
166
  typeof prompt === "string",
@@ -163,6 +171,10 @@ var ScriptPlayer = class {
163
171
  } else if ("aiString" in flowItem) {
164
172
  const stringTask = flowItem;
165
173
  const prompt = stringTask.aiString;
174
+ const options = {
175
+ domIncluded: stringTask.domIncluded,
176
+ screenshotIncluded: stringTask.screenshotIncluded
177
+ };
166
178
  (0, import_utils.assert)(prompt, "missing prompt for string");
167
179
  (0, import_utils.assert)(
168
180
  typeof prompt === "string",
@@ -173,6 +185,10 @@ var ScriptPlayer = class {
173
185
  } else if ("aiBoolean" in flowItem) {
174
186
  const booleanTask = flowItem;
175
187
  const prompt = booleanTask.aiBoolean;
188
+ const options = {
189
+ domIncluded: booleanTask.domIncluded,
190
+ screenshotIncluded: booleanTask.screenshotIncluded
191
+ };
176
192
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
177
193
  (0, import_utils.assert)(
178
194
  typeof prompt === "string",
@@ -215,6 +231,9 @@ var ScriptPlayer = class {
215
231
  } else if ("aiTap" in flowItem) {
216
232
  const tapTask = flowItem;
217
233
  await agent.aiTap(tapTask.aiTap, tapTask);
234
+ } else if ("aiRightClick" in flowItem) {
235
+ const rightClickTask = flowItem;
236
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
218
237
  } else if ("aiHover" in flowItem) {
219
238
  const hoverTask = flowItem;
220
239
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -237,6 +256,11 @@ var ScriptPlayer = class {
237
256
  evaluateJavaScriptTask.javascript
238
257
  );
239
258
  this.setResult(evaluateJavaScriptTask.name, result);
259
+ } else if ("logScreenshot" in flowItem) {
260
+ const logScreenshotTask = flowItem;
261
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
262
+ content: logScreenshotTask.content || ""
263
+ });
240
264
  } else {
241
265
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
242
266
  }
@@ -485,7 +509,8 @@ var WebElementInfo = class {
485
509
  id,
486
510
  attributes,
487
511
  indexId,
488
- xpaths
512
+ xpaths,
513
+ isVisible
489
514
  }) {
490
515
  this.content = content;
491
516
  this.rect = rect;
@@ -498,6 +523,7 @@ var WebElementInfo = class {
498
523
  this.attributes = attributes;
499
524
  this.indexId = indexId;
500
525
  this.xpaths = xpaths;
526
+ this.isVisible = isVisible;
501
527
  }
502
528
  };
503
529
 
@@ -520,14 +546,15 @@ async function parseContextFromWebPage(page, _opt) {
520
546
  })
521
547
  ]);
522
548
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
523
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
549
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
524
550
  return new WebElementInfo({
525
551
  rect,
526
552
  locator,
527
553
  id,
528
554
  content,
529
555
  attributes,
530
- indexId
556
+ indexId,
557
+ isVisible
531
558
  });
532
559
  });
533
560
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -557,7 +584,7 @@ function printReportMsg(filepath) {
557
584
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
558
585
  }
559
586
  function replaceIllegalPathCharsAndSpace(str) {
560
- return str.replace(/[/\\:*?"<>| ]/g, "-");
587
+ return str.replace(/[:*?"<>| ]/g, "-");
561
588
  }
562
589
  function forceClosePopup(page, debug6) {
563
590
  page.on("popup", async (popup) => {
@@ -871,10 +898,10 @@ var PageTaskExecutor = class {
871
898
  if (!taskParam || !taskParam.value) {
872
899
  return;
873
900
  }
874
- await this.page.keyboard.type(taskParam.value);
875
- } else {
876
- await this.page.keyboard.type(taskParam.value);
877
901
  }
902
+ await this.page.keyboard.type(taskParam.value, {
903
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
904
+ });
878
905
  }
879
906
  };
880
907
  tasks.push(taskActionInput);
@@ -903,6 +930,22 @@ var PageTaskExecutor = class {
903
930
  }
904
931
  };
905
932
  tasks.push(taskActionTap);
933
+ } else if (plan2.type === "RightClick") {
934
+ const taskActionRightClick = {
935
+ type: "Action",
936
+ subType: "RightClick",
937
+ thought: plan2.thought,
938
+ locate: plan2.locate,
939
+ executor: async (param, { element }) => {
940
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
941
+ await this.page.mouse.click(
942
+ element.center[0],
943
+ element.center[1],
944
+ { button: "right" }
945
+ );
946
+ }
947
+ };
948
+ tasks.push(taskActionRightClick);
906
949
  } else if (plan2.type === "Drag") {
907
950
  const taskActionDrag = {
908
951
  type: "Action",
@@ -1431,7 +1474,7 @@ var PageTaskExecutor = class {
1431
1474
  executor: taskExecutor
1432
1475
  };
1433
1476
  }
1434
- async createTypeQueryTask(type, demand) {
1477
+ async createTypeQueryTask(type, demand, opt) {
1435
1478
  const taskExecutor = new import_misoai_core.Executor(
1436
1479
  taskTitleStr(
1437
1480
  type,
@@ -1462,7 +1505,10 @@ var PageTaskExecutor = class {
1462
1505
  result: `${type}, ${demand}`
1463
1506
  };
1464
1507
  }
1465
- const { data, usage } = await this.insight.extract(demandInput);
1508
+ const { data, usage } = await this.insight.extract(
1509
+ demandInput,
1510
+ opt
1511
+ );
1466
1512
  let outputResult = data;
1467
1513
  if (ifTypeRestricted) {
1468
1514
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1482,17 +1528,17 @@ var PageTaskExecutor = class {
1482
1528
  executor: taskExecutor
1483
1529
  };
1484
1530
  }
1485
- async query(demand) {
1486
- return this.createTypeQueryTask("Query", demand);
1531
+ async query(demand, opt) {
1532
+ return this.createTypeQueryTask("Query", demand, opt);
1487
1533
  }
1488
- async boolean(prompt) {
1489
- return this.createTypeQueryTask("Boolean", prompt);
1534
+ async boolean(prompt, opt) {
1535
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1490
1536
  }
1491
- async number(prompt) {
1492
- return this.createTypeQueryTask("Number", prompt);
1537
+ async number(prompt, opt) {
1538
+ return this.createTypeQueryTask("Number", prompt, opt);
1493
1539
  }
1494
- async string(prompt) {
1495
- return this.createTypeQueryTask("String", prompt);
1540
+ async string(prompt, opt) {
1541
+ return this.createTypeQueryTask("String", prompt, opt);
1496
1542
  }
1497
1543
  async assert(assertion) {
1498
1544
  const description = `assert: ${assertion}`;
@@ -1628,7 +1674,7 @@ function buildPlans(type, locateParam, param) {
1628
1674
  param: locateParam,
1629
1675
  thought: ""
1630
1676
  } : null;
1631
- if (type === "Tap" || type === "Hover") {
1677
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1632
1678
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1633
1679
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1634
1680
  const tapPlan = {
@@ -1708,7 +1754,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1708
1754
  var import_semver = __toESM(require("semver"));
1709
1755
 
1710
1756
  // package.json
1711
- var version = "1.5.6";
1757
+ var version = "1.0.3";
1712
1758
 
1713
1759
  // src/common/task-cache.ts
1714
1760
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1839,8 +1885,14 @@ cache file: ${cacheFile}`
1839
1885
  return;
1840
1886
  }
1841
1887
  try {
1888
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1889
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1890
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1891
+ debug3("created cache directory: %s", dir);
1892
+ }
1842
1893
  const yamlData = import_js_yaml3.default.dump(this.cache);
1843
1894
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1895
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1844
1896
  } catch (err) {
1845
1897
  debug3(
1846
1898
  "write cache to file failed, path: %s, error: %s",
@@ -2098,6 +2150,23 @@ var PageAgent = class {
2098
2150
  metadata
2099
2151
  };
2100
2152
  }
2153
+ async aiRightClick(locatePrompt, opt) {
2154
+ const detailedLocateParam = this.buildDetailedLocateParam(
2155
+ locatePrompt,
2156
+ opt
2157
+ );
2158
+ const plans = buildPlans("RightClick", detailedLocateParam);
2159
+ const { executor, output } = await this.taskExecutor.runPlans(
2160
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2161
+ plans,
2162
+ { cacheable: opt?.cacheable }
2163
+ );
2164
+ const metadata = this.afterTaskRunning(executor);
2165
+ return {
2166
+ result: output,
2167
+ metadata
2168
+ };
2169
+ }
2101
2170
  async aiInput(value, locatePrompt, opt) {
2102
2171
  (0, import_utils12.assert)(
2103
2172
  typeof value === "string",
@@ -2529,6 +2598,40 @@ ${errors}`);
2529
2598
  }
2530
2599
  throw new Error("evaluateJavaScript is not supported in current agent");
2531
2600
  }
2601
+ async logScreenshot(title, options) {
2602
+ const screenshotTitle = title || "untitled";
2603
+ const content = options?.content || "";
2604
+ const screenshot = await this.page.screenshotBase64?.();
2605
+ if (screenshot) {
2606
+ const executionDump = {
2607
+ name: screenshotTitle,
2608
+ description: content,
2609
+ tasks: [{
2610
+ type: "Screenshot",
2611
+ subType: "log",
2612
+ status: "finished",
2613
+ executor: null,
2614
+ param: {
2615
+ title: screenshotTitle,
2616
+ content
2617
+ },
2618
+ output: {
2619
+ screenshot
2620
+ },
2621
+ thought: `Logged screenshot: ${screenshotTitle}`,
2622
+ timing: {
2623
+ start: Date.now(),
2624
+ end: Date.now(),
2625
+ cost: 0
2626
+ }
2627
+ }],
2628
+ sdkVersion: "1.0.0",
2629
+ logTime: Date.now(),
2630
+ model_name: "screenshot"
2631
+ };
2632
+ this.appendExecutionDump(executionDump);
2633
+ }
2634
+ }
2532
2635
  async destroy() {
2533
2636
  await this.page.destroy();
2534
2637
  }