misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
@@ -110,6 +110,10 @@ var ScriptPlayer = class {
110
110
  } else if ("aiQuery" in flowItem) {
111
111
  const queryTask = flowItem;
112
112
  const prompt = queryTask.aiQuery;
113
+ const options = {
114
+ domIncluded: queryTask.domIncluded,
115
+ screenshotIncluded: queryTask.screenshotIncluded
116
+ };
113
117
  assert(prompt, "missing prompt for aiQuery");
114
118
  assert(
115
119
  typeof prompt === "string",
@@ -120,6 +124,10 @@ var ScriptPlayer = class {
120
124
  } else if ("aiNumber" in flowItem) {
121
125
  const numberTask = flowItem;
122
126
  const prompt = numberTask.aiNumber;
127
+ const options = {
128
+ domIncluded: numberTask.domIncluded,
129
+ screenshotIncluded: numberTask.screenshotIncluded
130
+ };
123
131
  assert(prompt, "missing prompt for number");
124
132
  assert(
125
133
  typeof prompt === "string",
@@ -130,6 +138,10 @@ var ScriptPlayer = class {
130
138
  } else if ("aiString" in flowItem) {
131
139
  const stringTask = flowItem;
132
140
  const prompt = stringTask.aiString;
141
+ const options = {
142
+ domIncluded: stringTask.domIncluded,
143
+ screenshotIncluded: stringTask.screenshotIncluded
144
+ };
133
145
  assert(prompt, "missing prompt for string");
134
146
  assert(
135
147
  typeof prompt === "string",
@@ -140,6 +152,10 @@ var ScriptPlayer = class {
140
152
  } else if ("aiBoolean" in flowItem) {
141
153
  const booleanTask = flowItem;
142
154
  const prompt = booleanTask.aiBoolean;
155
+ const options = {
156
+ domIncluded: booleanTask.domIncluded,
157
+ screenshotIncluded: booleanTask.screenshotIncluded
158
+ };
143
159
  assert(prompt, "missing prompt for boolean");
144
160
  assert(
145
161
  typeof prompt === "string",
@@ -182,6 +198,9 @@ var ScriptPlayer = class {
182
198
  } else if ("aiTap" in flowItem) {
183
199
  const tapTask = flowItem;
184
200
  await agent.aiTap(tapTask.aiTap, tapTask);
201
+ } else if ("aiRightClick" in flowItem) {
202
+ const rightClickTask = flowItem;
203
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
185
204
  } else if ("aiHover" in flowItem) {
186
205
  const hoverTask = flowItem;
187
206
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -204,6 +223,11 @@ var ScriptPlayer = class {
204
223
  evaluateJavaScriptTask.javascript
205
224
  );
206
225
  this.setResult(evaluateJavaScriptTask.name, result);
226
+ } else if ("logScreenshot" in flowItem) {
227
+ const logScreenshotTask = flowItem;
228
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
229
+ content: logScreenshotTask.content || ""
230
+ });
207
231
  } else {
208
232
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
209
233
  }
@@ -472,7 +496,8 @@ var WebElementInfo = class {
472
496
  id,
473
497
  attributes,
474
498
  indexId,
475
- xpaths
499
+ xpaths,
500
+ isVisible
476
501
  }) {
477
502
  this.content = content;
478
503
  this.rect = rect;
@@ -485,6 +510,7 @@ var WebElementInfo = class {
485
510
  this.attributes = attributes;
486
511
  this.indexId = indexId;
487
512
  this.xpaths = xpaths;
513
+ this.isVisible = isVisible;
488
514
  }
489
515
  };
490
516
 
@@ -507,14 +533,15 @@ async function parseContextFromWebPage(page, _opt) {
507
533
  })
508
534
  ]);
509
535
  const webTree = traverseTree(tree, (elementInfo) => {
510
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
536
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
511
537
  return new WebElementInfo({
512
538
  rect,
513
539
  locator,
514
540
  id,
515
541
  content,
516
542
  attributes,
517
- indexId
543
+ indexId,
544
+ isVisible
518
545
  });
519
546
  });
520
547
  assert3(screenshotBase64, "screenshotBase64 is required");
@@ -544,7 +571,7 @@ function printReportMsg(filepath) {
544
571
  logMsg(`Midscene - report file updated: ${filepath}`);
545
572
  }
546
573
  function replaceIllegalPathCharsAndSpace(str) {
547
- return str.replace(/[/\\:*?"<>| ]/g, "-");
574
+ return str.replace(/[:*?"<>| ]/g, "-");
548
575
  }
549
576
  function forceClosePopup(page, debug6) {
550
577
  page.on("popup", async (popup) => {
@@ -858,10 +885,10 @@ var PageTaskExecutor = class {
858
885
  if (!taskParam || !taskParam.value) {
859
886
  return;
860
887
  }
861
- await this.page.keyboard.type(taskParam.value);
862
- } else {
863
- await this.page.keyboard.type(taskParam.value);
864
888
  }
889
+ await this.page.keyboard.type(taskParam.value, {
890
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
891
+ });
865
892
  }
866
893
  };
867
894
  tasks.push(taskActionInput);
@@ -890,6 +917,22 @@ var PageTaskExecutor = class {
890
917
  }
891
918
  };
892
919
  tasks.push(taskActionTap);
920
+ } else if (plan2.type === "RightClick") {
921
+ const taskActionRightClick = {
922
+ type: "Action",
923
+ subType: "RightClick",
924
+ thought: plan2.thought,
925
+ locate: plan2.locate,
926
+ executor: async (param, { element }) => {
927
+ assert4(element, "Element not found, cannot right click");
928
+ await this.page.mouse.click(
929
+ element.center[0],
930
+ element.center[1],
931
+ { button: "right" }
932
+ );
933
+ }
934
+ };
935
+ tasks.push(taskActionRightClick);
893
936
  } else if (plan2.type === "Drag") {
894
937
  const taskActionDrag = {
895
938
  type: "Action",
@@ -1418,7 +1461,7 @@ var PageTaskExecutor = class {
1418
1461
  executor: taskExecutor
1419
1462
  };
1420
1463
  }
1421
- async createTypeQueryTask(type, demand) {
1464
+ async createTypeQueryTask(type, demand, opt) {
1422
1465
  const taskExecutor = new Executor(
1423
1466
  taskTitleStr(
1424
1467
  type,
@@ -1449,7 +1492,10 @@ var PageTaskExecutor = class {
1449
1492
  result: `${type}, ${demand}`
1450
1493
  };
1451
1494
  }
1452
- const { data, usage } = await this.insight.extract(demandInput);
1495
+ const { data, usage } = await this.insight.extract(
1496
+ demandInput,
1497
+ opt
1498
+ );
1453
1499
  let outputResult = data;
1454
1500
  if (ifTypeRestricted) {
1455
1501
  assert4(data?.result !== void 0, "No result in query data");
@@ -1469,17 +1515,17 @@ var PageTaskExecutor = class {
1469
1515
  executor: taskExecutor
1470
1516
  };
1471
1517
  }
1472
- async query(demand) {
1473
- return this.createTypeQueryTask("Query", demand);
1518
+ async query(demand, opt) {
1519
+ return this.createTypeQueryTask("Query", demand, opt);
1474
1520
  }
1475
- async boolean(prompt) {
1476
- return this.createTypeQueryTask("Boolean", prompt);
1521
+ async boolean(prompt, opt) {
1522
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1477
1523
  }
1478
- async number(prompt) {
1479
- return this.createTypeQueryTask("Number", prompt);
1524
+ async number(prompt, opt) {
1525
+ return this.createTypeQueryTask("Number", prompt, opt);
1480
1526
  }
1481
- async string(prompt) {
1482
- return this.createTypeQueryTask("String", prompt);
1527
+ async string(prompt, opt) {
1528
+ return this.createTypeQueryTask("String", prompt, opt);
1483
1529
  }
1484
1530
  async assert(assertion) {
1485
1531
  const description = `assert: ${assertion}`;
@@ -1615,7 +1661,7 @@ function buildPlans(type, locateParam, param) {
1615
1661
  param: locateParam,
1616
1662
  thought: ""
1617
1663
  } : null;
1618
- if (type === "Tap" || type === "Hover") {
1664
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1619
1665
  assert5(locateParam, `missing locate info for action "${type}"`);
1620
1666
  assert5(locatePlan, `missing locate info for action "${type}"`);
1621
1667
  const tapPlan = {
@@ -1686,8 +1732,8 @@ function buildPlans(type, locateParam, param) {
1686
1732
 
1687
1733
  // src/common/task-cache.ts
1688
1734
  import assert6 from "assert";
1689
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1690
- import { join as join2 } from "path";
1735
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1736
+ import { dirname as dirname2, join as join2 } from "path";
1691
1737
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1692
1738
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1693
1739
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1695,7 +1741,7 @@ import yaml3 from "js-yaml";
1695
1741
  import semver from "semver";
1696
1742
 
1697
1743
  // package.json
1698
- var version = "1.5.6";
1744
+ var version = "1.0.3";
1699
1745
 
1700
1746
  // src/common/task-cache.ts
1701
1747
  var debug3 = getDebug3("cache");
@@ -1826,8 +1872,14 @@ cache file: ${cacheFile}`
1826
1872
  return;
1827
1873
  }
1828
1874
  try {
1875
+ const dir = dirname2(this.cacheFilePath);
1876
+ if (!existsSync2(dir)) {
1877
+ mkdirSync2(dir, { recursive: true });
1878
+ debug3("created cache directory: %s", dir);
1879
+ }
1829
1880
  const yamlData = yaml3.dump(this.cache);
1830
1881
  writeFileSync2(this.cacheFilePath, yamlData);
1882
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1831
1883
  } catch (err) {
1832
1884
  debug3(
1833
1885
  "write cache to file failed, path: %s, error: %s",
@@ -2085,6 +2137,23 @@ var PageAgent = class {
2085
2137
  metadata
2086
2138
  };
2087
2139
  }
2140
+ async aiRightClick(locatePrompt, opt) {
2141
+ const detailedLocateParam = this.buildDetailedLocateParam(
2142
+ locatePrompt,
2143
+ opt
2144
+ );
2145
+ const plans = buildPlans("RightClick", detailedLocateParam);
2146
+ const { executor, output } = await this.taskExecutor.runPlans(
2147
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2148
+ plans,
2149
+ { cacheable: opt?.cacheable }
2150
+ );
2151
+ const metadata = this.afterTaskRunning(executor);
2152
+ return {
2153
+ result: output,
2154
+ metadata
2155
+ };
2156
+ }
2088
2157
  async aiInput(value, locatePrompt, opt) {
2089
2158
  assert7(
2090
2159
  typeof value === "string",
@@ -2516,6 +2585,40 @@ ${errors}`);
2516
2585
  }
2517
2586
  throw new Error("evaluateJavaScript is not supported in current agent");
2518
2587
  }
2588
+ async logScreenshot(title, options) {
2589
+ const screenshotTitle = title || "untitled";
2590
+ const content = options?.content || "";
2591
+ const screenshot = await this.page.screenshotBase64?.();
2592
+ if (screenshot) {
2593
+ const executionDump = {
2594
+ name: screenshotTitle,
2595
+ description: content,
2596
+ tasks: [{
2597
+ type: "Screenshot",
2598
+ subType: "log",
2599
+ status: "finished",
2600
+ executor: null,
2601
+ param: {
2602
+ title: screenshotTitle,
2603
+ content
2604
+ },
2605
+ output: {
2606
+ screenshot
2607
+ },
2608
+ thought: `Logged screenshot: ${screenshotTitle}`,
2609
+ timing: {
2610
+ start: Date.now(),
2611
+ end: Date.now(),
2612
+ cost: 0
2613
+ }
2614
+ }],
2615
+ sdkVersion: "1.0.0",
2616
+ logTime: Date.now(),
2617
+ model_name: "screenshot"
2618
+ };
2619
+ this.appendExecutionDump(executionDump);
2620
+ }
2621
+ }
2519
2622
  async destroy() {
2520
2623
  await this.page.destroy();
2521
2624
  }