@midscene/web 0.16.4 → 0.16.6-beta-20250429073247.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/es/agent.js +118 -14
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +125 -36
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +119 -15
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +150 -14
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +118 -14
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/playground.js +118 -14
  14. package/dist/es/playground.js.map +1 -1
  15. package/dist/es/playwright.js +150 -14
  16. package/dist/es/playwright.js.map +1 -1
  17. package/dist/es/puppeteer-agent-launcher.js +118 -14
  18. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  19. package/dist/es/puppeteer.js +118 -14
  20. package/dist/es/puppeteer.js.map +1 -1
  21. package/dist/es/ui-utils.js.map +1 -1
  22. package/dist/es/yaml.js +40 -0
  23. package/dist/es/yaml.js.map +1 -1
  24. package/dist/lib/agent.js +117 -10
  25. package/dist/lib/agent.js.map +1 -1
  26. package/dist/lib/bridge-mode-browser.js +3 -3
  27. package/dist/lib/bridge-mode-browser.js.map +1 -1
  28. package/dist/lib/bridge-mode.js +125 -32
  29. package/dist/lib/bridge-mode.js.map +1 -1
  30. package/dist/lib/chrome-extension.js +118 -11
  31. package/dist/lib/chrome-extension.js.map +1 -1
  32. package/dist/lib/index.js +149 -10
  33. package/dist/lib/index.js.map +1 -1
  34. package/dist/lib/midscene-playground.js +117 -10
  35. package/dist/lib/midscene-playground.js.map +1 -1
  36. package/dist/lib/playground.js +117 -10
  37. package/dist/lib/playground.js.map +1 -1
  38. package/dist/lib/playwright.js +149 -10
  39. package/dist/lib/playwright.js.map +1 -1
  40. package/dist/lib/puppeteer-agent-launcher.js +117 -10
  41. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  42. package/dist/lib/puppeteer.js +117 -10
  43. package/dist/lib/puppeteer.js.map +1 -1
  44. package/dist/lib/ui-utils.js.map +1 -1
  45. package/dist/lib/yaml.js +40 -0
  46. package/dist/lib/yaml.js.map +1 -1
  47. package/dist/types/agent.d.ts +8 -0
  48. package/dist/types/bridge-mode-browser.d.ts +1 -1
  49. package/dist/types/bridge-mode.d.ts +2 -1
  50. package/dist/types/{browser-a8afbca5.d.ts → browser-0beaa7a7.d.ts} +2 -1
  51. package/dist/types/playwright.d.ts +16 -0
  52. package/dist/types/ui-utils.d.ts +1 -1
  53. package/package.json +3 -5
package/dist/es/agent.js CHANGED
@@ -109,6 +109,46 @@ var ScriptPlayer = class {
109
109
  );
110
110
  const queryResult = await agent.aiQuery(prompt);
111
111
  this.setResult(queryTask.name, queryResult);
112
+ } else if (flowItem.aiNumber) {
113
+ const numberTask = flowItem;
114
+ const prompt = numberTask.aiNumber;
115
+ assert(prompt, "missing prompt for number");
116
+ assert(
117
+ typeof prompt === "string",
118
+ "prompt for number must be a string"
119
+ );
120
+ const numberResult = await agent.aiNumber(prompt);
121
+ this.setResult(numberTask.name, numberResult);
122
+ } else if (flowItem.aiString) {
123
+ const stringTask = flowItem;
124
+ const prompt = stringTask.aiString;
125
+ assert(prompt, "missing prompt for string");
126
+ assert(
127
+ typeof prompt === "string",
128
+ "prompt for string must be a string"
129
+ );
130
+ const stringResult = await agent.aiString(prompt);
131
+ this.setResult(stringTask.name, stringResult);
132
+ } else if (flowItem.aiBoolean) {
133
+ const booleanTask = flowItem;
134
+ const prompt = booleanTask.aiBoolean;
135
+ assert(prompt, "missing prompt for boolean");
136
+ assert(
137
+ typeof prompt === "string",
138
+ "prompt for boolean must be a string"
139
+ );
140
+ const booleanResult = await agent.aiBoolean(prompt);
141
+ this.setResult(booleanTask.name, booleanResult);
142
+ } else if (flowItem.aiLocate) {
143
+ const locateTask = flowItem;
144
+ const prompt = locateTask.aiLocate;
145
+ assert(prompt, "missing prompt for aiLocate");
146
+ assert(
147
+ typeof prompt === "string",
148
+ "prompt for aiLocate must be a string"
149
+ );
150
+ const locateResult = await agent.aiLocate(prompt);
151
+ this.setResult(locateTask.name, locateResult);
112
152
  } else if (flowItem.aiWaitFor) {
113
153
  const waitForTask = flowItem;
114
154
  const prompt = waitForTask.aiWaitFor;
@@ -296,10 +336,7 @@ import { sleep } from "@midscene/core/utils";
296
336
  import { UITarsModelVersion } from "@midscene/shared/env";
297
337
  import { uiTarsModelVersion } from "@midscene/shared/env";
298
338
  import { vlLocateMode } from "@midscene/shared/env";
299
- import {
300
- imageInfo,
301
- resizeImgBase64 as resizeImgBase642
302
- } from "@midscene/shared/img";
339
+ import { imageInfo, resizeImgBase64 as resizeImgBase642 } from "@midscene/shared/img";
303
340
  import { getDebug as getDebug2 } from "@midscene/shared/logger";
304
341
  import { assert as assert4 } from "@midscene/shared/utils";
305
342
 
@@ -1485,17 +1522,23 @@ var PageTaskExecutor = class {
1485
1522
  executor: taskExecutor
1486
1523
  };
1487
1524
  }
1488
- async query(demand) {
1489
- const description = typeof demand === "string" ? demand : JSON.stringify(demand);
1490
- const taskExecutor = new Executor(taskTitleStr("Query", description), {
1491
- onTaskStart: this.onTaskStartCallback
1492
- });
1525
+ async createTypeQueryTask(type, demand) {
1526
+ const taskExecutor = new Executor(
1527
+ taskTitleStr(
1528
+ type,
1529
+ typeof demand === "string" ? demand : JSON.stringify(demand)
1530
+ ),
1531
+ {
1532
+ onTaskStart: this.onTaskStartCallback
1533
+ }
1534
+ );
1493
1535
  const queryTask = {
1494
1536
  type: "Insight",
1495
- subType: "Query",
1537
+ subType: type,
1496
1538
  locate: null,
1497
1539
  param: {
1498
1540
  dataDemand: demand
1541
+ // for user param presentation in report right sidebar
1499
1542
  },
1500
1543
  executor: async (param) => {
1501
1544
  let insightDump;
@@ -1503,11 +1546,21 @@ var PageTaskExecutor = class {
1503
1546
  insightDump = dump;
1504
1547
  };
1505
1548
  this.insight.onceDumpUpdatedFn = dumpCollector;
1506
- const { data, usage } = await this.insight.extract(
1507
- param.dataDemand
1508
- );
1549
+ const ifTypeRestricted = type !== "Query";
1550
+ let demandInput = demand;
1551
+ if (ifTypeRestricted) {
1552
+ demandInput = {
1553
+ result: `${type}, ${demand}`
1554
+ };
1555
+ }
1556
+ const { data, usage } = await this.insight.extract(demandInput);
1557
+ let outputResult = data;
1558
+ if (ifTypeRestricted) {
1559
+ assert4(data?.result !== void 0, "No result in query data");
1560
+ outputResult = data.result;
1561
+ }
1509
1562
  return {
1510
- output: data,
1563
+ output: outputResult,
1511
1564
  log: { dump: insightDump },
1512
1565
  usage
1513
1566
  };
@@ -1520,6 +1573,18 @@ var PageTaskExecutor = class {
1520
1573
  executor: taskExecutor
1521
1574
  };
1522
1575
  }
1576
+ async query(demand) {
1577
+ return this.createTypeQueryTask("Query", demand);
1578
+ }
1579
+ async boolean(prompt) {
1580
+ return this.createTypeQueryTask("Boolean", prompt);
1581
+ }
1582
+ async number(prompt) {
1583
+ return this.createTypeQueryTask("Number", prompt);
1584
+ }
1585
+ async string(prompt) {
1586
+ return this.createTypeQueryTask("String", prompt);
1587
+ }
1523
1588
  async assert(assertion) {
1524
1589
  const description = `assert: ${assertion}`;
1525
1590
  const taskExecutor = new Executor(taskTitleStr("Assert", description), {
@@ -1706,6 +1771,16 @@ function buildPlans(type, locateParam, param) {
1706
1771
  };
1707
1772
  returnPlans = [sleepPlan];
1708
1773
  }
1774
+ if (type === "Locate") {
1775
+ assert5(locateParam, `missing locate info for action "${type}"`);
1776
+ const locatePlan2 = {
1777
+ type,
1778
+ param: locateParam,
1779
+ locate: locateParam,
1780
+ thought: ""
1781
+ };
1782
+ returnPlans = [locatePlan2];
1783
+ }
1709
1784
  if (returnPlans) {
1710
1785
  debug3("buildPlans", returnPlans);
1711
1786
  return returnPlans;
@@ -1905,6 +1980,35 @@ ${errorTask?.errorStack}`);
1905
1980
  this.afterTaskRunning(executor);
1906
1981
  return output;
1907
1982
  }
1983
+ async aiBoolean(prompt) {
1984
+ const { output, executor } = await this.taskExecutor.boolean(prompt);
1985
+ this.afterTaskRunning(executor);
1986
+ return output;
1987
+ }
1988
+ async aiNumber(prompt) {
1989
+ const { output, executor } = await this.taskExecutor.number(prompt);
1990
+ this.afterTaskRunning(executor);
1991
+ return output;
1992
+ }
1993
+ async aiString(prompt) {
1994
+ const { output, executor } = await this.taskExecutor.string(prompt);
1995
+ this.afterTaskRunning(executor);
1996
+ return output;
1997
+ }
1998
+ async aiLocate(prompt, opt) {
1999
+ const detailedLocateParam = this.buildDetailedLocateParam(prompt, opt);
2000
+ const plans = buildPlans("Locate", detailedLocateParam);
2001
+ const { executor, output } = await this.taskExecutor.runPlans(
2002
+ taskTitleStr("Locate", locateParamStr(detailedLocateParam)),
2003
+ plans
2004
+ );
2005
+ this.afterTaskRunning(executor);
2006
+ const { element } = output;
2007
+ return {
2008
+ rect: element?.rect,
2009
+ center: element?.center
2010
+ };
2011
+ }
1908
2012
  async aiAssert(assertion, msg, opt) {
1909
2013
  const { output, executor } = await this.taskExecutor.assert(assertion);
1910
2014
  this.afterTaskRunning(executor, true);