misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -144,6 +144,10 @@ var ScriptPlayer = class {
144
144
  } else if ("aiQuery" in flowItem) {
145
145
  const queryTask = flowItem;
146
146
  const prompt = queryTask.aiQuery;
147
+ const options = {
148
+ domIncluded: queryTask.domIncluded,
149
+ screenshotIncluded: queryTask.screenshotIncluded
150
+ };
147
151
  (0, import_utils.assert)(prompt, "missing prompt for aiQuery");
148
152
  (0, import_utils.assert)(
149
153
  typeof prompt === "string",
@@ -154,6 +158,10 @@ var ScriptPlayer = class {
154
158
  } else if ("aiNumber" in flowItem) {
155
159
  const numberTask = flowItem;
156
160
  const prompt = numberTask.aiNumber;
161
+ const options = {
162
+ domIncluded: numberTask.domIncluded,
163
+ screenshotIncluded: numberTask.screenshotIncluded
164
+ };
157
165
  (0, import_utils.assert)(prompt, "missing prompt for number");
158
166
  (0, import_utils.assert)(
159
167
  typeof prompt === "string",
@@ -164,6 +172,10 @@ var ScriptPlayer = class {
164
172
  } else if ("aiString" in flowItem) {
165
173
  const stringTask = flowItem;
166
174
  const prompt = stringTask.aiString;
175
+ const options = {
176
+ domIncluded: stringTask.domIncluded,
177
+ screenshotIncluded: stringTask.screenshotIncluded
178
+ };
167
179
  (0, import_utils.assert)(prompt, "missing prompt for string");
168
180
  (0, import_utils.assert)(
169
181
  typeof prompt === "string",
@@ -174,6 +186,10 @@ var ScriptPlayer = class {
174
186
  } else if ("aiBoolean" in flowItem) {
175
187
  const booleanTask = flowItem;
176
188
  const prompt = booleanTask.aiBoolean;
189
+ const options = {
190
+ domIncluded: booleanTask.domIncluded,
191
+ screenshotIncluded: booleanTask.screenshotIncluded
192
+ };
177
193
  (0, import_utils.assert)(prompt, "missing prompt for boolean");
178
194
  (0, import_utils.assert)(
179
195
  typeof prompt === "string",
@@ -216,6 +232,9 @@ var ScriptPlayer = class {
216
232
  } else if ("aiTap" in flowItem) {
217
233
  const tapTask = flowItem;
218
234
  await agent.aiTap(tapTask.aiTap, tapTask);
235
+ } else if ("aiRightClick" in flowItem) {
236
+ const rightClickTask = flowItem;
237
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
219
238
  } else if ("aiHover" in flowItem) {
220
239
  const hoverTask = flowItem;
221
240
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -238,6 +257,11 @@ var ScriptPlayer = class {
238
257
  evaluateJavaScriptTask.javascript
239
258
  );
240
259
  this.setResult(evaluateJavaScriptTask.name, result);
260
+ } else if ("logScreenshot" in flowItem) {
261
+ const logScreenshotTask = flowItem;
262
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
263
+ content: logScreenshotTask.content || ""
264
+ });
241
265
  } else {
242
266
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
243
267
  }
@@ -486,7 +510,8 @@ var WebElementInfo = class {
486
510
  id,
487
511
  attributes,
488
512
  indexId,
489
- xpaths
513
+ xpaths,
514
+ isVisible
490
515
  }) {
491
516
  this.content = content;
492
517
  this.rect = rect;
@@ -499,6 +524,7 @@ var WebElementInfo = class {
499
524
  this.attributes = attributes;
500
525
  this.indexId = indexId;
501
526
  this.xpaths = xpaths;
527
+ this.isVisible = isVisible;
502
528
  }
503
529
  };
504
530
 
@@ -521,14 +547,15 @@ async function parseContextFromWebPage(page, _opt) {
521
547
  })
522
548
  ]);
523
549
  const webTree = (0, import_extractor.traverseTree)(tree, (elementInfo) => {
524
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
550
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
525
551
  return new WebElementInfo({
526
552
  rect,
527
553
  locator,
528
554
  id,
529
555
  content,
530
556
  attributes,
531
- indexId
557
+ indexId,
558
+ isVisible
532
559
  });
533
560
  });
534
561
  (0, import_utils4.assert)(screenshotBase64, "screenshotBase64 is required");
@@ -558,7 +585,7 @@ function printReportMsg(filepath) {
558
585
  (0, import_utils4.logMsg)(`Midscene - report file updated: ${filepath}`);
559
586
  }
560
587
  function replaceIllegalPathCharsAndSpace(str) {
561
- return str.replace(/[/\\:*?"<>| ]/g, "-");
588
+ return str.replace(/[:*?"<>| ]/g, "-");
562
589
  }
563
590
  function matchElementFromPlan(planLocateParam, tree) {
564
591
  if (!planLocateParam) {
@@ -844,10 +871,10 @@ var PageTaskExecutor = class {
844
871
  if (!taskParam || !taskParam.value) {
845
872
  return;
846
873
  }
847
- await this.page.keyboard.type(taskParam.value);
848
- } else {
849
- await this.page.keyboard.type(taskParam.value);
850
874
  }
875
+ await this.page.keyboard.type(taskParam.value, {
876
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
877
+ });
851
878
  }
852
879
  };
853
880
  tasks.push(taskActionInput);
@@ -876,6 +903,22 @@ var PageTaskExecutor = class {
876
903
  }
877
904
  };
878
905
  tasks.push(taskActionTap);
906
+ } else if (plan2.type === "RightClick") {
907
+ const taskActionRightClick = {
908
+ type: "Action",
909
+ subType: "RightClick",
910
+ thought: plan2.thought,
911
+ locate: plan2.locate,
912
+ executor: async (param, { element }) => {
913
+ (0, import_utils6.assert)(element, "Element not found, cannot right click");
914
+ await this.page.mouse.click(
915
+ element.center[0],
916
+ element.center[1],
917
+ { button: "right" }
918
+ );
919
+ }
920
+ };
921
+ tasks.push(taskActionRightClick);
879
922
  } else if (plan2.type === "Drag") {
880
923
  const taskActionDrag = {
881
924
  type: "Action",
@@ -1404,7 +1447,7 @@ var PageTaskExecutor = class {
1404
1447
  executor: taskExecutor
1405
1448
  };
1406
1449
  }
1407
- async createTypeQueryTask(type, demand) {
1450
+ async createTypeQueryTask(type, demand, opt) {
1408
1451
  const taskExecutor = new import_misoai_core.Executor(
1409
1452
  taskTitleStr(
1410
1453
  type,
@@ -1435,7 +1478,10 @@ var PageTaskExecutor = class {
1435
1478
  result: `${type}, ${demand}`
1436
1479
  };
1437
1480
  }
1438
- const { data, usage } = await this.insight.extract(demandInput);
1481
+ const { data, usage } = await this.insight.extract(
1482
+ demandInput,
1483
+ opt
1484
+ );
1439
1485
  let outputResult = data;
1440
1486
  if (ifTypeRestricted) {
1441
1487
  (0, import_utils6.assert)(data?.result !== void 0, "No result in query data");
@@ -1455,17 +1501,17 @@ var PageTaskExecutor = class {
1455
1501
  executor: taskExecutor
1456
1502
  };
1457
1503
  }
1458
- async query(demand) {
1459
- return this.createTypeQueryTask("Query", demand);
1504
+ async query(demand, opt) {
1505
+ return this.createTypeQueryTask("Query", demand, opt);
1460
1506
  }
1461
- async boolean(prompt) {
1462
- return this.createTypeQueryTask("Boolean", prompt);
1507
+ async boolean(prompt, opt) {
1508
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1463
1509
  }
1464
- async number(prompt) {
1465
- return this.createTypeQueryTask("Number", prompt);
1510
+ async number(prompt, opt) {
1511
+ return this.createTypeQueryTask("Number", prompt, opt);
1466
1512
  }
1467
- async string(prompt) {
1468
- return this.createTypeQueryTask("String", prompt);
1513
+ async string(prompt, opt) {
1514
+ return this.createTypeQueryTask("String", prompt, opt);
1469
1515
  }
1470
1516
  async assert(assertion) {
1471
1517
  const description = `assert: ${assertion}`;
@@ -1601,7 +1647,7 @@ function buildPlans(type, locateParam, param) {
1601
1647
  param: locateParam,
1602
1648
  thought: ""
1603
1649
  } : null;
1604
- if (type === "Tap" || type === "Hover") {
1650
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1605
1651
  (0, import_utils8.assert)(locateParam, `missing locate info for action "${type}"`);
1606
1652
  (0, import_utils8.assert)(locatePlan, `missing locate info for action "${type}"`);
1607
1653
  const tapPlan = {
@@ -1681,7 +1727,7 @@ var import_js_yaml3 = __toESM(require("js-yaml"));
1681
1727
  var import_semver = __toESM(require("semver"));
1682
1728
 
1683
1729
  // package.json
1684
- var version = "1.5.6";
1730
+ var version = "1.0.3";
1685
1731
 
1686
1732
  // src/common/task-cache.ts
1687
1733
  var debug3 = (0, import_logger3.getDebug)("cache");
@@ -1812,8 +1858,14 @@ cache file: ${cacheFile}`
1812
1858
  return;
1813
1859
  }
1814
1860
  try {
1861
+ const dir = (0, import_node_path2.dirname)(this.cacheFilePath);
1862
+ if (!(0, import_node_fs2.existsSync)(dir)) {
1863
+ (0, import_node_fs2.mkdirSync)(dir, { recursive: true });
1864
+ debug3("created cache directory: %s", dir);
1865
+ }
1815
1866
  const yamlData = import_js_yaml3.default.dump(this.cache);
1816
1867
  (0, import_node_fs2.writeFileSync)(this.cacheFilePath, yamlData);
1868
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1817
1869
  } catch (err) {
1818
1870
  debug3(
1819
1871
  "write cache to file failed, path: %s, error: %s",
@@ -2071,6 +2123,23 @@ var PageAgent = class {
2071
2123
  metadata
2072
2124
  };
2073
2125
  }
2126
+ async aiRightClick(locatePrompt, opt) {
2127
+ const detailedLocateParam = this.buildDetailedLocateParam(
2128
+ locatePrompt,
2129
+ opt
2130
+ );
2131
+ const plans = buildPlans("RightClick", detailedLocateParam);
2132
+ const { executor, output } = await this.taskExecutor.runPlans(
2133
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2134
+ plans,
2135
+ { cacheable: opt?.cacheable }
2136
+ );
2137
+ const metadata = this.afterTaskRunning(executor);
2138
+ return {
2139
+ result: output,
2140
+ metadata
2141
+ };
2142
+ }
2074
2143
  async aiInput(value, locatePrompt, opt) {
2075
2144
  (0, import_utils12.assert)(
2076
2145
  typeof value === "string",
@@ -2502,6 +2571,40 @@ ${errors}`);
2502
2571
  }
2503
2572
  throw new Error("evaluateJavaScript is not supported in current agent");
2504
2573
  }
2574
+ async logScreenshot(title, options) {
2575
+ const screenshotTitle = title || "untitled";
2576
+ const content = options?.content || "";
2577
+ const screenshot = await this.page.screenshotBase64?.();
2578
+ if (screenshot) {
2579
+ const executionDump = {
2580
+ name: screenshotTitle,
2581
+ description: content,
2582
+ tasks: [{
2583
+ type: "Screenshot",
2584
+ subType: "log",
2585
+ status: "finished",
2586
+ executor: null,
2587
+ param: {
2588
+ title: screenshotTitle,
2589
+ content
2590
+ },
2591
+ output: {
2592
+ screenshot
2593
+ },
2594
+ thought: `Logged screenshot: ${screenshotTitle}`,
2595
+ timing: {
2596
+ start: Date.now(),
2597
+ end: Date.now(),
2598
+ cost: 0
2599
+ }
2600
+ }],
2601
+ sdkVersion: "1.0.0",
2602
+ logTime: Date.now(),
2603
+ model_name: "screenshot"
2604
+ };
2605
+ this.appendExecutionDump(executionDump);
2606
+ }
2607
+ }
2505
2608
  async destroy() {
2506
2609
  await this.page.destroy();
2507
2610
  }
@@ -2617,7 +2720,7 @@ var BridgeServer = class {
2617
2720
  this.socket = socket;
2618
2721
  const clientVersion = socket.handshake.query.version;
2619
2722
  (0, import_utils16.logMsg)(
2620
- `Bridge connected, cli-side version v${"1.5.6"}, browser-side version v${clientVersion}`
2723
+ `Bridge connected, cli-side version v${"1.0.3"}, browser-side version v${clientVersion}`
2621
2724
  );
2622
2725
  socket.on("bridge-call-response" /* CallResponse */, (params) => {
2623
2726
  const id = params.id;
@@ -2648,7 +2751,7 @@ var BridgeServer = class {
2648
2751
  setTimeout(() => {
2649
2752
  this.onConnect?.();
2650
2753
  const payload = {
2651
- version: "1.5.6"
2754
+ version: "1.0.3"
2652
2755
  };
2653
2756
  socket.emit("bridge-connected" /* Connected */, payload);
2654
2757
  Promise.resolve().then(() => {