misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -22,7 +22,8 @@ var WebElementInfo = class {
22
22
  id,
23
23
  attributes,
24
24
  indexId,
25
- xpaths
25
+ xpaths,
26
+ isVisible
26
27
  }) {
27
28
  this.content = content;
28
29
  this.rect = rect;
@@ -35,6 +36,7 @@ var WebElementInfo = class {
35
36
  this.attributes = attributes;
36
37
  this.indexId = indexId;
37
38
  this.xpaths = xpaths;
39
+ this.isVisible = isVisible;
38
40
  }
39
41
  };
40
42
 
@@ -57,14 +59,15 @@ async function parseContextFromWebPage(page, _opt) {
57
59
  })
58
60
  ]);
59
61
  const webTree = traverseTree(tree, (elementInfo) => {
60
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
62
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
61
63
  return new WebElementInfo({
62
64
  rect,
63
65
  locator,
64
66
  id,
65
67
  content,
66
68
  attributes,
67
- indexId
69
+ indexId,
70
+ isVisible
68
71
  });
69
72
  });
70
73
  assert(screenshotBase64, "screenshotBase64 is required");
@@ -95,7 +98,7 @@ function printReportMsg(filepath) {
95
98
  }
96
99
  var ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED = "NOT_IMPLEMENTED_AS_DESIGNED";
97
100
  function replaceIllegalPathCharsAndSpace(str) {
98
- return str.replace(/[/\\:*?"<>| ]/g, "-");
101
+ return str.replace(/[:*?"<>| ]/g, "-");
99
102
  }
100
103
  function matchElementFromPlan(planLocateParam, tree) {
101
104
  if (!planLocateParam) {
@@ -225,6 +228,10 @@ var ScriptPlayer = class {
225
228
  } else if ("aiQuery" in flowItem) {
226
229
  const queryTask = flowItem;
227
230
  const prompt = queryTask.aiQuery;
231
+ const options = {
232
+ domIncluded: queryTask.domIncluded,
233
+ screenshotIncluded: queryTask.screenshotIncluded
234
+ };
228
235
  assert2(prompt, "missing prompt for aiQuery");
229
236
  assert2(
230
237
  typeof prompt === "string",
@@ -235,6 +242,10 @@ var ScriptPlayer = class {
235
242
  } else if ("aiNumber" in flowItem) {
236
243
  const numberTask = flowItem;
237
244
  const prompt = numberTask.aiNumber;
245
+ const options = {
246
+ domIncluded: numberTask.domIncluded,
247
+ screenshotIncluded: numberTask.screenshotIncluded
248
+ };
238
249
  assert2(prompt, "missing prompt for number");
239
250
  assert2(
240
251
  typeof prompt === "string",
@@ -245,6 +256,10 @@ var ScriptPlayer = class {
245
256
  } else if ("aiString" in flowItem) {
246
257
  const stringTask = flowItem;
247
258
  const prompt = stringTask.aiString;
259
+ const options = {
260
+ domIncluded: stringTask.domIncluded,
261
+ screenshotIncluded: stringTask.screenshotIncluded
262
+ };
248
263
  assert2(prompt, "missing prompt for string");
249
264
  assert2(
250
265
  typeof prompt === "string",
@@ -255,6 +270,10 @@ var ScriptPlayer = class {
255
270
  } else if ("aiBoolean" in flowItem) {
256
271
  const booleanTask = flowItem;
257
272
  const prompt = booleanTask.aiBoolean;
273
+ const options = {
274
+ domIncluded: booleanTask.domIncluded,
275
+ screenshotIncluded: booleanTask.screenshotIncluded
276
+ };
258
277
  assert2(prompt, "missing prompt for boolean");
259
278
  assert2(
260
279
  typeof prompt === "string",
@@ -297,6 +316,9 @@ var ScriptPlayer = class {
297
316
  } else if ("aiTap" in flowItem) {
298
317
  const tapTask = flowItem;
299
318
  await agent.aiTap(tapTask.aiTap, tapTask);
319
+ } else if ("aiRightClick" in flowItem) {
320
+ const rightClickTask = flowItem;
321
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
300
322
  } else if ("aiHover" in flowItem) {
301
323
  const hoverTask = flowItem;
302
324
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -319,6 +341,11 @@ var ScriptPlayer = class {
319
341
  evaluateJavaScriptTask.javascript
320
342
  );
321
343
  this.setResult(evaluateJavaScriptTask.name, result);
344
+ } else if ("logScreenshot" in flowItem) {
345
+ const logScreenshotTask = flowItem;
346
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
347
+ content: logScreenshotTask.content || ""
348
+ });
322
349
  } else {
323
350
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
324
351
  }
@@ -826,10 +853,10 @@ var PageTaskExecutor = class {
826
853
  if (!taskParam || !taskParam.value) {
827
854
  return;
828
855
  }
829
- await this.page.keyboard.type(taskParam.value);
830
- } else {
831
- await this.page.keyboard.type(taskParam.value);
832
856
  }
857
+ await this.page.keyboard.type(taskParam.value, {
858
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
859
+ });
833
860
  }
834
861
  };
835
862
  tasks.push(taskActionInput);
@@ -858,6 +885,22 @@ var PageTaskExecutor = class {
858
885
  }
859
886
  };
860
887
  tasks.push(taskActionTap);
888
+ } else if (plan2.type === "RightClick") {
889
+ const taskActionRightClick = {
890
+ type: "Action",
891
+ subType: "RightClick",
892
+ thought: plan2.thought,
893
+ locate: plan2.locate,
894
+ executor: async (param, { element }) => {
895
+ assert4(element, "Element not found, cannot right click");
896
+ await this.page.mouse.click(
897
+ element.center[0],
898
+ element.center[1],
899
+ { button: "right" }
900
+ );
901
+ }
902
+ };
903
+ tasks.push(taskActionRightClick);
861
904
  } else if (plan2.type === "Drag") {
862
905
  const taskActionDrag = {
863
906
  type: "Action",
@@ -1386,7 +1429,7 @@ var PageTaskExecutor = class {
1386
1429
  executor: taskExecutor
1387
1430
  };
1388
1431
  }
1389
- async createTypeQueryTask(type, demand) {
1432
+ async createTypeQueryTask(type, demand, opt) {
1390
1433
  const taskExecutor = new Executor(
1391
1434
  taskTitleStr(
1392
1435
  type,
@@ -1417,7 +1460,10 @@ var PageTaskExecutor = class {
1417
1460
  result: `${type}, ${demand}`
1418
1461
  };
1419
1462
  }
1420
- const { data, usage } = await this.insight.extract(demandInput);
1463
+ const { data, usage } = await this.insight.extract(
1464
+ demandInput,
1465
+ opt
1466
+ );
1421
1467
  let outputResult = data;
1422
1468
  if (ifTypeRestricted) {
1423
1469
  assert4(data?.result !== void 0, "No result in query data");
@@ -1437,17 +1483,17 @@ var PageTaskExecutor = class {
1437
1483
  executor: taskExecutor
1438
1484
  };
1439
1485
  }
1440
- async query(demand) {
1441
- return this.createTypeQueryTask("Query", demand);
1486
+ async query(demand, opt) {
1487
+ return this.createTypeQueryTask("Query", demand, opt);
1442
1488
  }
1443
- async boolean(prompt) {
1444
- return this.createTypeQueryTask("Boolean", prompt);
1489
+ async boolean(prompt, opt) {
1490
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1445
1491
  }
1446
- async number(prompt) {
1447
- return this.createTypeQueryTask("Number", prompt);
1492
+ async number(prompt, opt) {
1493
+ return this.createTypeQueryTask("Number", prompt, opt);
1448
1494
  }
1449
- async string(prompt) {
1450
- return this.createTypeQueryTask("String", prompt);
1495
+ async string(prompt, opt) {
1496
+ return this.createTypeQueryTask("String", prompt, opt);
1451
1497
  }
1452
1498
  async assert(assertion) {
1453
1499
  const description = `assert: ${assertion}`;
@@ -1583,7 +1629,7 @@ function buildPlans(type, locateParam, param) {
1583
1629
  param: locateParam,
1584
1630
  thought: ""
1585
1631
  } : null;
1586
- if (type === "Tap" || type === "Hover") {
1632
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1587
1633
  assert5(locateParam, `missing locate info for action "${type}"`);
1588
1634
  assert5(locatePlan, `missing locate info for action "${type}"`);
1589
1635
  const tapPlan = {
@@ -1654,8 +1700,8 @@ function buildPlans(type, locateParam, param) {
1654
1700
 
1655
1701
  // src/common/task-cache.ts
1656
1702
  import assert6 from "assert";
1657
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1658
- import { join as join2 } from "path";
1703
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1704
+ import { dirname as dirname2, join as join2 } from "path";
1659
1705
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1660
1706
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1661
1707
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1663,7 +1709,7 @@ import yaml3 from "js-yaml";
1663
1709
  import semver from "semver";
1664
1710
 
1665
1711
  // package.json
1666
- var version = "1.5.6";
1712
+ var version = "1.0.3";
1667
1713
 
1668
1714
  // src/common/task-cache.ts
1669
1715
  var debug3 = getDebug3("cache");
@@ -1794,8 +1840,14 @@ cache file: ${cacheFile}`
1794
1840
  return;
1795
1841
  }
1796
1842
  try {
1843
+ const dir = dirname2(this.cacheFilePath);
1844
+ if (!existsSync2(dir)) {
1845
+ mkdirSync2(dir, { recursive: true });
1846
+ debug3("created cache directory: %s", dir);
1847
+ }
1797
1848
  const yamlData = yaml3.dump(this.cache);
1798
1849
  writeFileSync2(this.cacheFilePath, yamlData);
1850
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1799
1851
  } catch (err) {
1800
1852
  debug3(
1801
1853
  "write cache to file failed, path: %s, error: %s",
@@ -2053,6 +2105,23 @@ var PageAgent = class {
2053
2105
  metadata
2054
2106
  };
2055
2107
  }
2108
+ async aiRightClick(locatePrompt, opt) {
2109
+ const detailedLocateParam = this.buildDetailedLocateParam(
2110
+ locatePrompt,
2111
+ opt
2112
+ );
2113
+ const plans = buildPlans("RightClick", detailedLocateParam);
2114
+ const { executor, output } = await this.taskExecutor.runPlans(
2115
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2116
+ plans,
2117
+ { cacheable: opt?.cacheable }
2118
+ );
2119
+ const metadata = this.afterTaskRunning(executor);
2120
+ return {
2121
+ result: output,
2122
+ metadata
2123
+ };
2124
+ }
2056
2125
  async aiInput(value, locatePrompt, opt) {
2057
2126
  assert7(
2058
2127
  typeof value === "string",
@@ -2484,6 +2553,40 @@ ${errors}`);
2484
2553
  }
2485
2554
  throw new Error("evaluateJavaScript is not supported in current agent");
2486
2555
  }
2556
+ async logScreenshot(title, options) {
2557
+ const screenshotTitle = title || "untitled";
2558
+ const content = options?.content || "";
2559
+ const screenshot = await this.page.screenshotBase64?.();
2560
+ if (screenshot) {
2561
+ const executionDump = {
2562
+ name: screenshotTitle,
2563
+ description: content,
2564
+ tasks: [{
2565
+ type: "Screenshot",
2566
+ subType: "log",
2567
+ status: "finished",
2568
+ executor: null,
2569
+ param: {
2570
+ title: screenshotTitle,
2571
+ content
2572
+ },
2573
+ output: {
2574
+ screenshot
2575
+ },
2576
+ thought: `Logged screenshot: ${screenshotTitle}`,
2577
+ timing: {
2578
+ start: Date.now(),
2579
+ end: Date.now(),
2580
+ cost: 0
2581
+ }
2582
+ }],
2583
+ sdkVersion: "1.0.0",
2584
+ logTime: Date.now(),
2585
+ model_name: "screenshot"
2586
+ };
2587
+ this.appendExecutionDump(executionDump);
2588
+ }
2589
+ }
2487
2590
  async destroy() {
2488
2591
  await this.page.destroy();
2489
2592
  }