misoai-web 1.5.6 → 1.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/agent.js +124 -21
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +10 -9
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +126 -23
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +132 -28
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +140 -21
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +124 -21
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/midscene-server.js.map +1 -1
  14. package/dist/es/playground.js +124 -21
  15. package/dist/es/playground.js.map +1 -1
  16. package/dist/es/playwright-report.js +1 -1
  17. package/dist/es/playwright-report.js.map +1 -1
  18. package/dist/es/playwright.js +140 -21
  19. package/dist/es/playwright.js.map +1 -1
  20. package/dist/es/puppeteer-agent-launcher.js +124 -21
  21. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  22. package/dist/es/puppeteer.js +124 -21
  23. package/dist/es/puppeteer.js.map +1 -1
  24. package/dist/es/ui-utils.js.map +1 -1
  25. package/dist/es/utils.js +7 -4
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js +24 -0
  28. package/dist/es/yaml.js.map +1 -1
  29. package/dist/lib/agent.js +122 -19
  30. package/dist/lib/agent.js.map +1 -1
  31. package/dist/lib/bridge-mode-browser.js +10 -9
  32. package/dist/lib/bridge-mode-browser.js.map +1 -1
  33. package/dist/lib/bridge-mode.js +124 -21
  34. package/dist/lib/bridge-mode.js.map +1 -1
  35. package/dist/lib/chrome-extension.js +130 -26
  36. package/dist/lib/chrome-extension.js.map +1 -1
  37. package/dist/lib/index.js +138 -19
  38. package/dist/lib/index.js.map +1 -1
  39. package/dist/lib/midscene-playground.js +122 -19
  40. package/dist/lib/midscene-playground.js.map +1 -1
  41. package/dist/lib/midscene-server.js.map +1 -1
  42. package/dist/lib/playground.js +122 -19
  43. package/dist/lib/playground.js.map +1 -1
  44. package/dist/lib/playwright-report.js +1 -1
  45. package/dist/lib/playwright-report.js.map +1 -1
  46. package/dist/lib/playwright.js +138 -19
  47. package/dist/lib/playwright.js.map +1 -1
  48. package/dist/lib/puppeteer-agent-launcher.js +122 -19
  49. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  50. package/dist/lib/puppeteer.js +122 -19
  51. package/dist/lib/puppeteer.js.map +1 -1
  52. package/dist/lib/ui-utils.js.map +1 -1
  53. package/dist/lib/utils.js +7 -4
  54. package/dist/lib/utils.js.map +1 -1
  55. package/dist/lib/yaml.js +24 -0
  56. package/dist/lib/yaml.js.map +1 -1
  57. package/dist/types/agent.d.ts +10 -7
  58. package/dist/types/bridge-mode-browser.d.ts +2 -3
  59. package/dist/types/bridge-mode.d.ts +2 -3
  60. package/dist/types/{browser-aec1055d.d.ts → browser-9b472ffb.d.ts} +1 -1
  61. package/dist/types/chrome-extension.d.ts +2 -3
  62. package/dist/types/index.d.ts +1 -2
  63. package/dist/types/midscene-server.d.ts +1 -2
  64. package/dist/types/{page-86ab0fe1.d.ts → page-ed0ecb44.d.ts} +19 -9
  65. package/dist/types/playground.d.ts +2 -3
  66. package/dist/types/playwright.d.ts +9 -2
  67. package/dist/types/puppeteer-agent-launcher.d.ts +1 -2
  68. package/dist/types/puppeteer.d.ts +6 -5
  69. package/dist/types/ui-utils.d.ts +1 -1
  70. package/dist/types/utils.d.ts +1 -2
  71. package/dist/types/yaml.d.ts +1 -2
  72. package/iife-script/htmlElement.js +51 -73
  73. package/iife-script/htmlElementDebug.js +33 -54
  74. package/package.json +23 -23
  75. package/LICENSE +0 -21
package/dist/es/agent.js CHANGED
@@ -105,6 +105,10 @@ var ScriptPlayer = class {
105
105
  } else if ("aiQuery" in flowItem) {
106
106
  const queryTask = flowItem;
107
107
  const prompt = queryTask.aiQuery;
108
+ const options = {
109
+ domIncluded: queryTask.domIncluded,
110
+ screenshotIncluded: queryTask.screenshotIncluded
111
+ };
108
112
  assert(prompt, "missing prompt for aiQuery");
109
113
  assert(
110
114
  typeof prompt === "string",
@@ -115,6 +119,10 @@ var ScriptPlayer = class {
115
119
  } else if ("aiNumber" in flowItem) {
116
120
  const numberTask = flowItem;
117
121
  const prompt = numberTask.aiNumber;
122
+ const options = {
123
+ domIncluded: numberTask.domIncluded,
124
+ screenshotIncluded: numberTask.screenshotIncluded
125
+ };
118
126
  assert(prompt, "missing prompt for number");
119
127
  assert(
120
128
  typeof prompt === "string",
@@ -125,6 +133,10 @@ var ScriptPlayer = class {
125
133
  } else if ("aiString" in flowItem) {
126
134
  const stringTask = flowItem;
127
135
  const prompt = stringTask.aiString;
136
+ const options = {
137
+ domIncluded: stringTask.domIncluded,
138
+ screenshotIncluded: stringTask.screenshotIncluded
139
+ };
128
140
  assert(prompt, "missing prompt for string");
129
141
  assert(
130
142
  typeof prompt === "string",
@@ -135,6 +147,10 @@ var ScriptPlayer = class {
135
147
  } else if ("aiBoolean" in flowItem) {
136
148
  const booleanTask = flowItem;
137
149
  const prompt = booleanTask.aiBoolean;
150
+ const options = {
151
+ domIncluded: booleanTask.domIncluded,
152
+ screenshotIncluded: booleanTask.screenshotIncluded
153
+ };
138
154
  assert(prompt, "missing prompt for boolean");
139
155
  assert(
140
156
  typeof prompt === "string",
@@ -177,6 +193,9 @@ var ScriptPlayer = class {
177
193
  } else if ("aiTap" in flowItem) {
178
194
  const tapTask = flowItem;
179
195
  await agent.aiTap(tapTask.aiTap, tapTask);
196
+ } else if ("aiRightClick" in flowItem) {
197
+ const rightClickTask = flowItem;
198
+ await agent.aiRightClick(rightClickTask.aiRightClick, rightClickTask);
180
199
  } else if ("aiHover" in flowItem) {
181
200
  const hoverTask = flowItem;
182
201
  await agent.aiHover(hoverTask.aiHover, hoverTask);
@@ -199,6 +218,11 @@ var ScriptPlayer = class {
199
218
  evaluateJavaScriptTask.javascript
200
219
  );
201
220
  this.setResult(evaluateJavaScriptTask.name, result);
221
+ } else if ("logScreenshot" in flowItem) {
222
+ const logScreenshotTask = flowItem;
223
+ await agent.logScreenshot(logScreenshotTask.logScreenshot, {
224
+ content: logScreenshotTask.content || ""
225
+ });
202
226
  } else {
203
227
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
204
228
  }
@@ -467,7 +491,8 @@ var WebElementInfo = class {
467
491
  id,
468
492
  attributes,
469
493
  indexId,
470
- xpaths
494
+ xpaths,
495
+ isVisible
471
496
  }) {
472
497
  this.content = content;
473
498
  this.rect = rect;
@@ -480,6 +505,7 @@ var WebElementInfo = class {
480
505
  this.attributes = attributes;
481
506
  this.indexId = indexId;
482
507
  this.xpaths = xpaths;
508
+ this.isVisible = isVisible;
483
509
  }
484
510
  };
485
511
 
@@ -502,14 +528,15 @@ async function parseContextFromWebPage(page, _opt) {
502
528
  })
503
529
  ]);
504
530
  const webTree = traverseTree(tree, (elementInfo) => {
505
- const { rect, id, content, attributes, locator, indexId } = elementInfo;
531
+ const { rect, id, content, attributes, locator, indexId, isVisible } = elementInfo;
506
532
  return new WebElementInfo({
507
533
  rect,
508
534
  locator,
509
535
  id,
510
536
  content,
511
537
  attributes,
512
- indexId
538
+ indexId,
539
+ isVisible
513
540
  });
514
541
  });
515
542
  assert3(screenshotBase64, "screenshotBase64 is required");
@@ -539,7 +566,7 @@ function printReportMsg(filepath) {
539
566
  logMsg(`Midscene - report file updated: ${filepath}`);
540
567
  }
541
568
  function replaceIllegalPathCharsAndSpace(str) {
542
- return str.replace(/[/\\:*?"<>| ]/g, "-");
569
+ return str.replace(/[:*?"<>| ]/g, "-");
543
570
  }
544
571
  function matchElementFromPlan(planLocateParam, tree) {
545
572
  if (!planLocateParam) {
@@ -825,10 +852,10 @@ var PageTaskExecutor = class {
825
852
  if (!taskParam || !taskParam.value) {
826
853
  return;
827
854
  }
828
- await this.page.keyboard.type(taskParam.value);
829
- } else {
830
- await this.page.keyboard.type(taskParam.value);
831
855
  }
856
+ await this.page.keyboard.type(taskParam.value, {
857
+ autoDismissKeyboard: taskParam.autoDismissKeyboard
858
+ });
832
859
  }
833
860
  };
834
861
  tasks.push(taskActionInput);
@@ -857,6 +884,22 @@ var PageTaskExecutor = class {
857
884
  }
858
885
  };
859
886
  tasks.push(taskActionTap);
887
+ } else if (plan2.type === "RightClick") {
888
+ const taskActionRightClick = {
889
+ type: "Action",
890
+ subType: "RightClick",
891
+ thought: plan2.thought,
892
+ locate: plan2.locate,
893
+ executor: async (param, { element }) => {
894
+ assert4(element, "Element not found, cannot right click");
895
+ await this.page.mouse.click(
896
+ element.center[0],
897
+ element.center[1],
898
+ { button: "right" }
899
+ );
900
+ }
901
+ };
902
+ tasks.push(taskActionRightClick);
860
903
  } else if (plan2.type === "Drag") {
861
904
  const taskActionDrag = {
862
905
  type: "Action",
@@ -1385,7 +1428,7 @@ var PageTaskExecutor = class {
1385
1428
  executor: taskExecutor
1386
1429
  };
1387
1430
  }
1388
- async createTypeQueryTask(type, demand) {
1431
+ async createTypeQueryTask(type, demand, opt) {
1389
1432
  const taskExecutor = new Executor(
1390
1433
  taskTitleStr(
1391
1434
  type,
@@ -1416,7 +1459,10 @@ var PageTaskExecutor = class {
1416
1459
  result: `${type}, ${demand}`
1417
1460
  };
1418
1461
  }
1419
- const { data, usage } = await this.insight.extract(demandInput);
1462
+ const { data, usage } = await this.insight.extract(
1463
+ demandInput,
1464
+ opt
1465
+ );
1420
1466
  let outputResult = data;
1421
1467
  if (ifTypeRestricted) {
1422
1468
  assert4(data?.result !== void 0, "No result in query data");
@@ -1436,17 +1482,17 @@ var PageTaskExecutor = class {
1436
1482
  executor: taskExecutor
1437
1483
  };
1438
1484
  }
1439
- async query(demand) {
1440
- return this.createTypeQueryTask("Query", demand);
1485
+ async query(demand, opt) {
1486
+ return this.createTypeQueryTask("Query", demand, opt);
1441
1487
  }
1442
- async boolean(prompt) {
1443
- return this.createTypeQueryTask("Boolean", prompt);
1488
+ async boolean(prompt, opt) {
1489
+ return this.createTypeQueryTask("Boolean", prompt, opt);
1444
1490
  }
1445
- async number(prompt) {
1446
- return this.createTypeQueryTask("Number", prompt);
1491
+ async number(prompt, opt) {
1492
+ return this.createTypeQueryTask("Number", prompt, opt);
1447
1493
  }
1448
- async string(prompt) {
1449
- return this.createTypeQueryTask("String", prompt);
1494
+ async string(prompt, opt) {
1495
+ return this.createTypeQueryTask("String", prompt, opt);
1450
1496
  }
1451
1497
  async assert(assertion) {
1452
1498
  const description = `assert: ${assertion}`;
@@ -1582,7 +1628,7 @@ function buildPlans(type, locateParam, param) {
1582
1628
  param: locateParam,
1583
1629
  thought: ""
1584
1630
  } : null;
1585
- if (type === "Tap" || type === "Hover") {
1631
+ if (type === "Tap" || type === "Hover" || type === "RightClick") {
1586
1632
  assert5(locateParam, `missing locate info for action "${type}"`);
1587
1633
  assert5(locatePlan, `missing locate info for action "${type}"`);
1588
1634
  const tapPlan = {
@@ -1653,8 +1699,8 @@ function buildPlans(type, locateParam, param) {
1653
1699
 
1654
1700
  // src/common/task-cache.ts
1655
1701
  import assert6 from "assert";
1656
- import { existsSync as existsSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1657
- import { join as join2 } from "path";
1702
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync, writeFileSync as writeFileSync2 } from "fs";
1703
+ import { dirname as dirname2, join as join2 } from "path";
1658
1704
  import { getMidsceneRunSubDir as getMidsceneRunSubDir2 } from "misoai-shared/common";
1659
1705
  import { getDebug as getDebug3 } from "misoai-shared/logger";
1660
1706
  import { ifInBrowser as ifInBrowser2 } from "misoai-shared/utils";
@@ -1662,7 +1708,7 @@ import yaml3 from "js-yaml";
1662
1708
  import semver from "semver";
1663
1709
 
1664
1710
  // package.json
1665
- var version = "1.5.6";
1711
+ var version = "1.0.3";
1666
1712
 
1667
1713
  // src/common/task-cache.ts
1668
1714
  var debug3 = getDebug3("cache");
@@ -1793,8 +1839,14 @@ cache file: ${cacheFile}`
1793
1839
  return;
1794
1840
  }
1795
1841
  try {
1842
+ const dir = dirname2(this.cacheFilePath);
1843
+ if (!existsSync2(dir)) {
1844
+ mkdirSync2(dir, { recursive: true });
1845
+ debug3("created cache directory: %s", dir);
1846
+ }
1796
1847
  const yamlData = yaml3.dump(this.cache);
1797
1848
  writeFileSync2(this.cacheFilePath, yamlData);
1849
+ debug3("cache flushed to file: %s", this.cacheFilePath);
1798
1850
  } catch (err) {
1799
1851
  debug3(
1800
1852
  "write cache to file failed, path: %s, error: %s",
@@ -2052,6 +2104,23 @@ var PageAgent = class {
2052
2104
  metadata
2053
2105
  };
2054
2106
  }
2107
+ async aiRightClick(locatePrompt, opt) {
2108
+ const detailedLocateParam = this.buildDetailedLocateParam(
2109
+ locatePrompt,
2110
+ opt
2111
+ );
2112
+ const plans = buildPlans("RightClick", detailedLocateParam);
2113
+ const { executor, output } = await this.taskExecutor.runPlans(
2114
+ taskTitleStr("RightClick", locateParamStr(detailedLocateParam)),
2115
+ plans,
2116
+ { cacheable: opt?.cacheable }
2117
+ );
2118
+ const metadata = this.afterTaskRunning(executor);
2119
+ return {
2120
+ result: output,
2121
+ metadata
2122
+ };
2123
+ }
2055
2124
  async aiInput(value, locatePrompt, opt) {
2056
2125
  assert7(
2057
2126
  typeof value === "string",
@@ -2483,6 +2552,40 @@ ${errors}`);
2483
2552
  }
2484
2553
  throw new Error("evaluateJavaScript is not supported in current agent");
2485
2554
  }
2555
+ async logScreenshot(title, options) {
2556
+ const screenshotTitle = title || "untitled";
2557
+ const content = options?.content || "";
2558
+ const screenshot = await this.page.screenshotBase64?.();
2559
+ if (screenshot) {
2560
+ const executionDump = {
2561
+ name: screenshotTitle,
2562
+ description: content,
2563
+ tasks: [{
2564
+ type: "Screenshot",
2565
+ subType: "log",
2566
+ status: "finished",
2567
+ executor: null,
2568
+ param: {
2569
+ title: screenshotTitle,
2570
+ content
2571
+ },
2572
+ output: {
2573
+ screenshot
2574
+ },
2575
+ thought: `Logged screenshot: ${screenshotTitle}`,
2576
+ timing: {
2577
+ start: Date.now(),
2578
+ end: Date.now(),
2579
+ cost: 0
2580
+ }
2581
+ }],
2582
+ sdkVersion: "1.0.0",
2583
+ logTime: Date.now(),
2584
+ model_name: "screenshot"
2585
+ };
2586
+ this.appendExecutionDump(executionDump);
2587
+ }
2588
+ }
2486
2589
  async destroy() {
2487
2590
  await this.page.destroy();
2488
2591
  }