@midscene/web 0.12.8 → 0.12.9-beta-20250320033304.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/dist/es/appium.js +161 -141
  2. package/dist/es/appium.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode-browser.js.map +1 -1
  5. package/dist/es/bridge-mode.js +165 -145
  6. package/dist/es/bridge-mode.js.map +1 -1
  7. package/dist/es/chrome-extension.js +146 -127
  8. package/dist/es/chrome-extension.js.map +1 -1
  9. package/dist/es/index.js +165 -181
  10. package/dist/es/index.js.map +1 -1
  11. package/dist/es/midscene-playground.js +143 -125
  12. package/dist/es/midscene-playground.js.map +1 -1
  13. package/dist/es/playground.js +137 -119
  14. package/dist/es/playground.js.map +1 -1
  15. package/dist/es/playwright-report.js +2 -1
  16. package/dist/es/playwright-report.js.map +1 -1
  17. package/dist/es/playwright.js +163 -143
  18. package/dist/es/playwright.js.map +1 -1
  19. package/dist/es/puppeteer.js +171 -151
  20. package/dist/es/puppeteer.js.map +1 -1
  21. package/dist/es/ui-utils.js +42 -12
  22. package/dist/es/ui-utils.js.map +1 -1
  23. package/dist/es/utils.js +3 -2
  24. package/dist/es/utils.js.map +1 -1
  25. package/dist/es/yaml.js +24 -36
  26. package/dist/es/yaml.js.map +1 -1
  27. package/dist/lib/appium.js +160 -141
  28. package/dist/lib/appium.js.map +1 -1
  29. package/dist/lib/bridge-mode-browser.js +3 -3
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +162 -143
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +145 -127
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +168 -186
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +141 -124
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/playground.js +132 -115
  40. package/dist/lib/playground.js.map +1 -1
  41. package/dist/lib/playwright-report.js.map +1 -1
  42. package/dist/lib/playwright.js +163 -144
  43. package/dist/lib/playwright.js.map +1 -1
  44. package/dist/lib/puppeteer.js +172 -153
  45. package/dist/lib/puppeteer.js.map +1 -1
  46. package/dist/lib/ui-utils.js +45 -12
  47. package/dist/lib/ui-utils.js.map +1 -1
  48. package/dist/lib/utils.js +1 -1
  49. package/dist/lib/utils.js.map +1 -1
  50. package/dist/lib/yaml.js +24 -37
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/{agent-3fa82650.d.ts → agent-f85d75b2.d.ts} +14 -10
  53. package/dist/types/appium.d.ts +1 -1
  54. package/dist/types/bridge-mode.d.ts +1 -1
  55. package/dist/types/chrome-extension.d.ts +2 -2
  56. package/dist/types/index.d.ts +2 -2
  57. package/dist/types/playground.d.ts +1 -1
  58. package/dist/types/playwright.d.ts +1 -1
  59. package/dist/types/puppeteer.d.ts +1 -1
  60. package/dist/types/ui-utils.d.ts +5 -2
  61. package/dist/types/{utils-5b053eba.d.ts → utils-4381250e.d.ts} +4 -4
  62. package/dist/types/yaml.d.ts +2 -2
  63. package/package.json +3 -3
package/dist/es/appium.js CHANGED
@@ -2,7 +2,6 @@
2
2
  import {
3
3
  Insight
4
4
  } from "@midscene/core";
5
- import { NodeType as NodeType2 } from "@midscene/shared/constants";
6
5
 
7
6
  // src/yaml/player.ts
8
7
  import { existsSync, mkdirSync, writeFileSync } from "fs";
@@ -124,6 +123,24 @@ var ScriptPlayer = class {
124
123
  `ms for sleep must be greater than 0, but got ${ms}`
125
124
  );
126
125
  await new Promise((resolve) => setTimeout(resolve, msNumber));
126
+ } else if (flowItem.aiTap) {
127
+ const tapTask = flowItem;
128
+ await agent.aiTap(tapTask.aiTap);
129
+ } else if (flowItem.aiHover) {
130
+ const hoverTask = flowItem;
131
+ await agent.aiHover(hoverTask.aiHover);
132
+ } else if (flowItem.aiInput) {
133
+ const inputTask = flowItem;
134
+ await agent.aiInput(inputTask.aiInput, inputTask.locate);
135
+ } else if (flowItem.aiKeyboardPress) {
136
+ const keyboardPressTask = flowItem;
137
+ await agent.aiKeyboardPress(
138
+ keyboardPressTask.aiKeyboardPress,
139
+ keyboardPressTask.locate
140
+ );
141
+ } else if (flowItem.aiScroll) {
142
+ const scrollTask = flowItem;
143
+ await agent.aiScroll(scrollTask.aiScroll, scrollTask.locate);
127
144
  } else {
128
145
  throw new Error(`unknown flowItem: ${JSON.stringify(flowItem)}`);
129
146
  }
@@ -138,6 +155,11 @@ var ScriptPlayer = class {
138
155
  try {
139
156
  const { agent: newAgent, freeFn: newFreeFn } = await this.setupAgent(target);
140
157
  agent = newAgent;
158
+ agent.onTaskStartTip = (tip) => {
159
+ if (this.status === "running") {
160
+ this.agentStatusTip = tip;
161
+ }
162
+ };
141
163
  freeFn = newFreeFn;
142
164
  } catch (e) {
143
165
  this.setPlayerStatus("error", e);
@@ -171,6 +193,7 @@ var ScriptPlayer = class {
171
193
  } else {
172
194
  this.setPlayerStatus("done");
173
195
  }
196
+ this.agentStatusTip = "";
174
197
  for (const fn of freeFn) {
175
198
  try {
176
199
  await fn.fn();
@@ -220,7 +243,7 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
220
243
  // src/common/agent.ts
221
244
  import {
222
245
  MIDSCENE_USE_VLM_UI_TARS as MIDSCENE_USE_VLM_UI_TARS2,
223
- getAIConfig as getAIConfig3,
246
+ getAIConfigInBoolean as getAIConfigInBoolean3,
224
247
  vlLocateMode
225
248
  } from "@midscene/core/env";
226
249
  import {
@@ -229,6 +252,7 @@ import {
229
252
  stringifyDumpData as stringifyDumpData2,
230
253
  writeLogFile as writeLogFile2
231
254
  } from "@midscene/core/utils";
255
+ import { assert as assert6 } from "@midscene/shared/utils";
232
256
 
233
257
  // src/common/tasks.ts
234
258
  import {
@@ -469,6 +493,30 @@ function getKeyCommands(value) {
469
493
  return acc.concat([{ key: k }]);
470
494
  }, []);
471
495
  }
496
+ function locateParamStr(locate) {
497
+ if (!locate) {
498
+ return "";
499
+ }
500
+ if (typeof locate === "string") {
501
+ return locate;
502
+ }
503
+ if (!locate.searchArea) {
504
+ if (locate.deepThink) {
505
+ return `${locate.prompt} (deep think)`;
506
+ }
507
+ return locate.prompt;
508
+ }
509
+ return `${locate.prompt} @ ${locate.searchArea}`;
510
+ }
511
+ function scrollParamStr(scrollParam) {
512
+ if (!scrollParam) {
513
+ return "";
514
+ }
515
+ return `${scrollParam.direction || "down"}, ${scrollParam.scrollType || "once"}, ${scrollParam.distance || "distance-not-set"}`;
516
+ }
517
+ function taskTitleStr(type, prompt) {
518
+ return `${type} - ${prompt}`;
519
+ }
472
520
  function paramStr(task) {
473
521
  let value;
474
522
  if (task.type === "Planning") {
@@ -478,19 +526,22 @@ function paramStr(task) {
478
526
  value = task?.param?.prompt || task?.param?.id || task?.param?.dataDemand || task?.param?.assertion;
479
527
  }
480
528
  if (task.type === "Action") {
481
- const sleepMs = task?.param?.timeMs;
482
- const scrollType = task?.param?.scrollType;
483
- if (sleepMs) {
484
- value = `${sleepMs}ms`;
485
- } else if (scrollType) {
486
- const scrollDirection = task?.param?.direction;
487
- const scrollDistance = task?.param?.distance;
488
- value = `${scrollDirection || "down"}, ${scrollType || "once"}, ${scrollDistance || "distance-not-set"}`;
489
- } else {
490
- value = task?.param?.value || task?.param?.scrollType;
529
+ const locate = task?.locate;
530
+ const locateStr = locate ? locateParamStr(locate) : "";
531
+ value = task.thought || "";
532
+ if (typeof task?.param?.timeMs === "number") {
533
+ value = `${task?.param?.timeMs}ms`;
534
+ } else if (typeof task?.param?.scrollType === "string") {
535
+ value = scrollParamStr(task?.param);
536
+ } else if (typeof task?.param?.value !== "undefined") {
537
+ value = task?.param?.value;
491
538
  }
492
- if (!value) {
493
- value = task.thought;
539
+ if (locateStr) {
540
+ if (value) {
541
+ value = `${locateStr} - ${value}`;
542
+ } else {
543
+ value = locateStr;
544
+ }
494
545
  }
495
546
  }
496
547
  if (typeof value === "undefined")
@@ -508,6 +559,7 @@ var PageTaskExecutor = class {
508
559
  this.taskCache = new TaskCache({
509
560
  cacheId: opts?.cacheId
510
561
  });
562
+ this.onTaskStartCallback = opts?.onTaskStart;
511
563
  }
512
564
  async recordScreenshot(timing) {
513
565
  const base64 = await this.page.screenshotBase64();
@@ -606,7 +658,7 @@ var PageTaskExecutor = class {
606
658
  bbox: param?.bbox
607
659
  };
608
660
  const startTime = Date.now();
609
- const element = await this.insight.locate(param, {
661
+ const { element } = await this.insight.locate(param, {
610
662
  quickAnswer
611
663
  });
612
664
  const aiCost = Date.now() - startTime;
@@ -1097,9 +1149,9 @@ var PageTaskExecutor = class {
1097
1149
  };
1098
1150
  return task;
1099
1151
  }
1100
- async runPlans(title, plans, options) {
1152
+ async runPlans(title, plans) {
1101
1153
  const taskExecutor = new Executor(title, {
1102
- onTaskStart: options?.onTaskStart
1154
+ onTaskStart: this.onTaskStartCallback
1103
1155
  });
1104
1156
  const { tasks } = await this.convertPlanToExecutable(plans);
1105
1157
  await taskExecutor.append(tasks);
@@ -1109,9 +1161,9 @@ var PageTaskExecutor = class {
1109
1161
  executor: taskExecutor
1110
1162
  };
1111
1163
  }
1112
- async action(userPrompt, options) {
1113
- const taskExecutor = new Executor(userPrompt, {
1114
- onTaskStart: options?.onTaskStart
1164
+ async action(userPrompt) {
1165
+ const taskExecutor = new Executor(taskTitleStr("Action", userPrompt), {
1166
+ onTaskStart: this.onTaskStartCallback
1115
1167
  });
1116
1168
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
1117
1169
  let planningTask = this.planningTaskFromPrompt(userPrompt, cacheGroup);
@@ -1170,9 +1222,9 @@ var PageTaskExecutor = class {
1170
1222
  executor: taskExecutor
1171
1223
  };
1172
1224
  }
1173
- async actionToGoal(userPrompt, options) {
1174
- const taskExecutor = new Executor(userPrompt, {
1175
- onTaskStart: options?.onTaskStart
1225
+ async actionToGoal(userPrompt) {
1226
+ const taskExecutor = new Executor(taskTitleStr("Action", userPrompt), {
1227
+ onTaskStart: this.onTaskStartCallback
1176
1228
  });
1177
1229
  this.conversationHistory = [];
1178
1230
  const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
@@ -1222,10 +1274,10 @@ var PageTaskExecutor = class {
1222
1274
  executor: taskExecutor
1223
1275
  };
1224
1276
  }
1225
- async query(demand, options) {
1277
+ async query(demand) {
1226
1278
  const description = typeof demand === "string" ? demand : JSON.stringify(demand);
1227
- const taskExecutor = new Executor(description, {
1228
- onTaskStart: options?.onTaskStart
1279
+ const taskExecutor = new Executor(taskTitleStr("Query", description), {
1280
+ onTaskStart: this.onTaskStartCallback
1229
1281
  });
1230
1282
  const queryTask = {
1231
1283
  type: "Insight",
@@ -1257,10 +1309,10 @@ var PageTaskExecutor = class {
1257
1309
  executor: taskExecutor
1258
1310
  };
1259
1311
  }
1260
- async assert(assertion, options) {
1312
+ async assert(assertion) {
1261
1313
  const description = `assert: ${assertion}`;
1262
- const taskExecutor = new Executor(description, {
1263
- onTaskStart: options?.onTaskStart
1314
+ const taskExecutor = new Executor(taskTitleStr("Assert", description), {
1315
+ onTaskStart: this.onTaskStartCallback
1264
1316
  });
1265
1317
  const assertionPlan = {
1266
1318
  type: "Assert",
@@ -1320,8 +1372,8 @@ var PageTaskExecutor = class {
1320
1372
  }
1321
1373
  async waitFor(assertion, opt) {
1322
1374
  const description = `waitFor: ${assertion}`;
1323
- const taskExecutor = new Executor(description, {
1324
- onTaskStart: opt.onTaskStart
1375
+ const taskExecutor = new Executor(taskTitleStr("WaitFor", description), {
1376
+ onTaskStart: this.onTaskStartCallback
1325
1377
  });
1326
1378
  const { timeoutMs, checkIntervalMs } = opt;
1327
1379
  assert3(assertion, "No assertion for waitFor");
@@ -1379,61 +1431,39 @@ var PageTaskExecutor = class {
1379
1431
  }
1380
1432
  };
1381
1433
 
1382
- // src/web-element.ts
1383
- var WebElementInfo = class {
1384
- constructor({
1385
- content,
1386
- rect,
1387
- // page,
1388
- locator,
1389
- id,
1390
- attributes,
1391
- indexId
1392
- }) {
1393
- this.content = content;
1394
- this.rect = rect;
1395
- this.center = [
1396
- Math.floor(rect.left + rect.width / 2),
1397
- Math.floor(rect.top + rect.height / 2)
1398
- ];
1399
- this.locator = locator;
1400
- this.id = id;
1401
- this.attributes = attributes;
1402
- this.indexId = indexId;
1403
- }
1404
- };
1405
-
1406
1434
  // src/common/plan-builder.ts
1407
1435
  import { assert as assert4, getDebug as getDebug2 } from "@midscene/shared/utils";
1408
1436
  var debug2 = getDebug2("plan-builder");
1409
1437
  function buildPlans(type, locate, param) {
1410
1438
  let returnPlans = [];
1411
- const locatePlan = locate ? {
1439
+ const locateParam = typeof locate === "string" ? { prompt: locate } : locate ?? null;
1440
+ const locatePlan = locateParam ? {
1412
1441
  type: "Locate",
1413
- locate,
1414
- param: locate,
1442
+ locate: locateParam,
1443
+ param: locateParam,
1415
1444
  thought: ""
1416
1445
  } : null;
1417
1446
  if (type === "Tap" || type === "Hover") {
1447
+ assert4(locate && locateParam, `missing locate info for action "${type}"`);
1418
1448
  assert4(locatePlan, `missing locate info for action "${type}"`);
1419
1449
  const tapPlan = {
1420
1450
  type,
1421
1451
  param: null,
1422
1452
  thought: "",
1423
- locate
1453
+ locate: locateParam
1424
1454
  };
1425
1455
  returnPlans = [locatePlan, tapPlan];
1426
1456
  }
1427
1457
  if (type === "Input" || type === "KeyboardPress") {
1428
1458
  if (type === "Input") {
1429
- assert4(locatePlan, `missing locate info for action "${type}"`);
1459
+ assert4(locate && locateParam, `missing locate info for action "${type}"`);
1430
1460
  }
1431
1461
  assert4(param, `missing param for action "${type}"`);
1432
1462
  const inputPlan = {
1433
1463
  type,
1434
1464
  param,
1435
1465
  thought: "",
1436
- locate
1466
+ locate: locateParam
1437
1467
  };
1438
1468
  if (locatePlan) {
1439
1469
  returnPlans = [locatePlan, inputPlan];
@@ -1447,7 +1477,7 @@ function buildPlans(type, locate, param) {
1447
1477
  type,
1448
1478
  param,
1449
1479
  thought: "",
1450
- locate
1480
+ locate: locateParam
1451
1481
  };
1452
1482
  if (locatePlan) {
1453
1483
  returnPlans = [locatePlan, scrollPlan];
@@ -1476,7 +1506,8 @@ function buildPlans(type, locate, param) {
1476
1506
  import {
1477
1507
  MIDSCENE_REPORT_TAG_NAME,
1478
1508
  MIDSCENE_USE_VLM_UI_TARS,
1479
- getAIConfig as getAIConfig2
1509
+ getAIConfig as getAIConfig2,
1510
+ getAIConfigInBoolean as getAIConfigInBoolean2
1480
1511
  } from "@midscene/core/env";
1481
1512
  import { uploadTestInfoToServer } from "@midscene/core/utils";
1482
1513
  import { NodeType } from "@midscene/shared/constants";
@@ -1484,6 +1515,32 @@ import { traverseTree, treeToList } from "@midscene/shared/extractor";
1484
1515
  import { compositeElementInfoImg, resizeImgBase64 } from "@midscene/shared/img";
1485
1516
  import { assert as assert5, uuid } from "@midscene/shared/utils";
1486
1517
  import dayjs from "dayjs";
1518
+
1519
+ // src/web-element.ts
1520
+ var WebElementInfo = class {
1521
+ constructor({
1522
+ content,
1523
+ rect,
1524
+ // page,
1525
+ locator,
1526
+ id,
1527
+ attributes,
1528
+ indexId
1529
+ }) {
1530
+ this.content = content;
1531
+ this.rect = rect;
1532
+ this.center = [
1533
+ Math.floor(rect.left + rect.width / 2),
1534
+ Math.floor(rect.top + rect.height / 2)
1535
+ ];
1536
+ this.locator = locator;
1537
+ this.id = id;
1538
+ this.attributes = attributes;
1539
+ this.indexId = indexId;
1540
+ }
1541
+ };
1542
+
1543
+ // src/common/utils.ts
1487
1544
  async function parseContextFromWebPage(page, _opt) {
1488
1545
  assert5(page, "page is required");
1489
1546
  if (page._forceUsePageContext) {
@@ -1530,7 +1587,7 @@ async function parseContextFromWebPage(page, _opt) {
1530
1587
  });
1531
1588
  }
1532
1589
  let screenshotBase64WithElementMarker = screenshotBase64;
1533
- if (!getAIConfig2(MIDSCENE_USE_VLM_UI_TARS)) {
1590
+ if (!getAIConfigInBoolean2(MIDSCENE_USE_VLM_UI_TARS)) {
1534
1591
  if (_opt?.ignoreMarker) {
1535
1592
  screenshotBase64WithElementMarker = screenshotBase64;
1536
1593
  } else {
@@ -1576,24 +1633,15 @@ var PageAgent = class {
1576
1633
  },
1577
1634
  opts || {}
1578
1635
  );
1636
+ this.onTaskStartTip = this.opts.onTaskStartTip;
1579
1637
  this.insight = new Insight(
1580
1638
  async (action) => {
1581
1639
  return this.getUIContext(action);
1582
- },
1583
- {
1584
- generateElement: ({ content, rect }) => new WebElementInfo({
1585
- content: content || "",
1586
- rect,
1587
- id: "",
1588
- attributes: {
1589
- nodeType: NodeType2.CONTAINER
1590
- },
1591
- indexId: 0
1592
- })
1593
1640
  }
1594
1641
  );
1595
1642
  this.taskExecutor = new PageTaskExecutor(this.page, this.insight, {
1596
- cacheId: opts?.cacheId
1643
+ cacheId: opts?.cacheId,
1644
+ onTaskStart: this.callbackOnTaskStartTip.bind(this)
1597
1645
  });
1598
1646
  this.dump = this.resetDump();
1599
1647
  this.reportFileName = reportFileName(opts?.testId || "web");
@@ -1642,14 +1690,10 @@ var PageAgent = class {
1642
1690
  }
1643
1691
  }
1644
1692
  async callbackOnTaskStartTip(task) {
1645
- if (this.opts.onTaskStartTip) {
1646
- const param = paramStr(task);
1647
- if (param) {
1648
- const tip = `${typeStr(task)} - ${param}`;
1649
- await this.opts.onTaskStartTip(tip);
1650
- } else {
1651
- await this.opts.onTaskStartTip(typeStr(task));
1652
- }
1693
+ const param = paramStr(task);
1694
+ const tip = param ? `${typeStr(task)} - ${param}` : typeStr(task);
1695
+ if (this.onTaskStartTip) {
1696
+ await this.onTaskStartTip(tip);
1653
1697
  }
1654
1698
  }
1655
1699
  afterTaskRunning(executor, doNotThrowError = false) {
@@ -1661,95 +1705,72 @@ var PageAgent = class {
1661
1705
  ${errorTask?.errorStack}`);
1662
1706
  }
1663
1707
  }
1664
- async aiTap(targetPrompt, searchArea) {
1665
- const plans = buildPlans("Tap", {
1666
- prompt: targetPrompt,
1667
- searchArea
1668
- });
1708
+ async aiTap(locate) {
1709
+ const plans = buildPlans("Tap", locate);
1669
1710
  const { executor, output } = await this.taskExecutor.runPlans(
1670
- `Tap ${targetPrompt}`,
1711
+ taskTitleStr("Tap", locateParamStr(locate)),
1671
1712
  plans
1672
1713
  );
1673
1714
  this.afterTaskRunning(executor);
1674
1715
  return output;
1675
1716
  }
1676
- async aiHover(taskPrompt) {
1677
- const plans = buildPlans("Hover", {
1678
- prompt: taskPrompt
1679
- });
1717
+ async aiHover(locate) {
1718
+ const plans = buildPlans("Hover", locate);
1680
1719
  const { executor, output } = await this.taskExecutor.runPlans(
1681
- `Hover ${taskPrompt}`,
1720
+ taskTitleStr("Hover", locateParamStr(locate)),
1682
1721
  plans
1683
1722
  );
1684
1723
  this.afterTaskRunning(executor);
1685
1724
  return output;
1686
1725
  }
1687
- async aiInput(where, value) {
1688
- const plans = buildPlans(
1689
- "Input",
1690
- {
1691
- prompt: where
1692
- },
1693
- {
1694
- value
1695
- }
1726
+ async aiInput(value, locate) {
1727
+ assert6(
1728
+ typeof value === "string",
1729
+ "input value must be a string, use empty string if you want to clear the input"
1696
1730
  );
1731
+ const plans = buildPlans("Input", locate, {
1732
+ value
1733
+ });
1697
1734
  const { executor, output } = await this.taskExecutor.runPlans(
1698
- `Input ${where} - ${value}`,
1735
+ taskTitleStr("Input", `${locateParamStr(locate)} - ${value}`),
1699
1736
  plans
1700
1737
  );
1701
1738
  this.afterTaskRunning(executor);
1702
1739
  return output;
1703
1740
  }
1704
- async aiKeyboardPress(where, value) {
1705
- const plans = buildPlans(
1706
- "KeyboardPress",
1707
- {
1708
- prompt: where
1709
- },
1710
- {
1711
- value
1712
- }
1741
+ async aiKeyboardPress(keyName, locate) {
1742
+ assert6(keyName, "missing keyName for keyboard press");
1743
+ const plans = buildPlans("KeyboardPress", locate, {
1744
+ value: keyName
1745
+ });
1746
+ const { executor, output } = await this.taskExecutor.runPlans(
1747
+ taskTitleStr("KeyboardPress", `${locateParamStr(locate)} - ${keyName}`),
1748
+ plans
1713
1749
  );
1750
+ this.afterTaskRunning(executor);
1751
+ return output;
1752
+ }
1753
+ async aiScroll(scrollParam, locate) {
1754
+ const plans = buildPlans("Scroll", locate, scrollParam);
1755
+ const paramInTitle = locate ? `${locateParamStr(locate)} - ${scrollParamStr(scrollParam)}` : scrollParamStr(scrollParam);
1714
1756
  const { executor, output } = await this.taskExecutor.runPlans(
1715
- `KeyboardPress ${where} - ${value}`,
1757
+ taskTitleStr("Scroll", paramInTitle),
1716
1758
  plans
1717
1759
  );
1718
1760
  this.afterTaskRunning(executor);
1719
1761
  return output;
1720
1762
  }
1721
- // async aiScroll(where: string, param: PlanningActionParamScroll) {
1722
- // const plans = buildPlans(
1723
- // 'Scroll',
1724
- // {
1725
- // prompt: where,
1726
- // },
1727
- // param,
1728
- // );
1729
- // const { executor, output } = await this.taskExecutor.runPlans(
1730
- // `Scroll ${where} - ${paramStr(param)}`,
1731
- // plans,
1732
- // );
1733
- // }
1734
1763
  async aiAction(taskPrompt) {
1735
- const { executor } = await (getAIConfig3(MIDSCENE_USE_VLM_UI_TARS2) ? this.taskExecutor.actionToGoal(taskPrompt, {
1736
- onTaskStart: this.callbackOnTaskStartTip.bind(this)
1737
- }) : this.taskExecutor.action(taskPrompt, {
1738
- onTaskStart: this.callbackOnTaskStartTip.bind(this)
1739
- }));
1764
+ const { executor } = await (getAIConfigInBoolean3(MIDSCENE_USE_VLM_UI_TARS2) ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt));
1740
1765
  this.afterTaskRunning(executor);
1741
1766
  }
1742
1767
  async aiQuery(demand) {
1743
- const { output, executor } = await this.taskExecutor.query(demand, {
1744
- onTaskStart: this.callbackOnTaskStartTip.bind(this)
1745
- });
1768
+ const { output, executor } = await this.taskExecutor.query(demand);
1746
1769
  this.afterTaskRunning(executor);
1747
1770
  return output;
1748
1771
  }
1749
1772
  async aiAssert(assertion, msg, opt) {
1750
- const { output, executor } = await this.taskExecutor.assert(assertion, {
1751
- onTaskStart: this.callbackOnTaskStartTip.bind(this)
1752
- });
1773
+ const { output, executor } = await this.taskExecutor.assert(assertion);
1753
1774
  this.afterTaskRunning(executor, true);
1754
1775
  if (output && opt?.keepRawResponse) {
1755
1776
  return output;
@@ -1765,8 +1786,7 @@ ${reasonMsg}`);
1765
1786
  const { executor } = await this.taskExecutor.waitFor(assertion, {
1766
1787
  timeoutMs: opt?.timeoutMs || 15 * 1e3,
1767
1788
  checkIntervalMs: opt?.checkIntervalMs || 3 * 1e3,
1768
- assertion,
1769
- onTaskStart: this.callbackOnTaskStartTip.bind(this)
1789
+ assertion
1770
1790
  });
1771
1791
  this.appendExecutionDump(executor.dump());
1772
1792
  this.writeOutActionDumps();