@midscene/shared 1.8.5-beta-20260525033347.0 → 1.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,7 @@ __webpack_require__.d(__webpack_exports__, {
46
46
  MIDSCENE_INSIGHT_MODEL_FAMILY: ()=>MIDSCENE_INSIGHT_MODEL_FAMILY,
47
47
  MIDSCENE_INSIGHT_MODEL_HTTP_PROXY: ()=>MIDSCENE_INSIGHT_MODEL_HTTP_PROXY,
48
48
  MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
49
+ MIDSCENE_INSIGHT_MODEL_MAX_TOKENS: ()=>MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
49
50
  MIDSCENE_INSIGHT_MODEL_NAME: ()=>MIDSCENE_INSIGHT_MODEL_NAME,
50
51
  MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET: ()=>MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET,
51
52
  MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT: ()=>MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT,
@@ -88,6 +89,7 @@ __webpack_require__.d(__webpack_exports__, {
88
89
  MIDSCENE_PLANNING_MODEL_FAMILY: ()=>MIDSCENE_PLANNING_MODEL_FAMILY,
89
90
  MIDSCENE_PLANNING_MODEL_HTTP_PROXY: ()=>MIDSCENE_PLANNING_MODEL_HTTP_PROXY,
90
91
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON: ()=>MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
92
+ MIDSCENE_PLANNING_MODEL_MAX_TOKENS: ()=>MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
91
93
  MIDSCENE_PLANNING_MODEL_NAME: ()=>MIDSCENE_PLANNING_MODEL_NAME,
92
94
  MIDSCENE_PLANNING_MODEL_REASONING_BUDGET: ()=>MIDSCENE_PLANNING_MODEL_REASONING_BUDGET,
93
95
  MIDSCENE_PLANNING_MODEL_REASONING_EFFORT: ()=>MIDSCENE_PLANNING_MODEL_REASONING_EFFORT,
@@ -177,6 +179,7 @@ const MIDSCENE_INSIGHT_MODEL_BASE_URL = 'MIDSCENE_INSIGHT_MODEL_BASE_URL';
177
179
  const MIDSCENE_INSIGHT_MODEL_API_KEY = 'MIDSCENE_INSIGHT_MODEL_API_KEY';
178
180
  const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON';
179
181
  const MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON';
182
+ const MIDSCENE_INSIGHT_MODEL_MAX_TOKENS = 'MIDSCENE_INSIGHT_MODEL_MAX_TOKENS';
180
183
  const MIDSCENE_INSIGHT_MODEL_TIMEOUT = 'MIDSCENE_INSIGHT_MODEL_TIMEOUT';
181
184
  const MIDSCENE_INSIGHT_MODEL_TEMPERATURE = 'MIDSCENE_INSIGHT_MODEL_TEMPERATURE';
182
185
  const MIDSCENE_INSIGHT_MODEL_RETRY_COUNT = 'MIDSCENE_INSIGHT_MODEL_RETRY_COUNT';
@@ -192,6 +195,7 @@ const MIDSCENE_PLANNING_MODEL_BASE_URL = 'MIDSCENE_PLANNING_MODEL_BASE_URL';
192
195
  const MIDSCENE_PLANNING_MODEL_API_KEY = 'MIDSCENE_PLANNING_MODEL_API_KEY';
193
196
  const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = 'MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON';
194
197
  const MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON = 'MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON';
198
+ const MIDSCENE_PLANNING_MODEL_MAX_TOKENS = 'MIDSCENE_PLANNING_MODEL_MAX_TOKENS';
195
199
  const MIDSCENE_PLANNING_MODEL_TIMEOUT = 'MIDSCENE_PLANNING_MODEL_TIMEOUT';
196
200
  const MIDSCENE_PLANNING_MODEL_TEMPERATURE = 'MIDSCENE_PLANNING_MODEL_TEMPERATURE';
197
201
  const MIDSCENE_PLANNING_MODEL_RETRY_COUNT = 'MIDSCENE_PLANNING_MODEL_RETRY_COUNT';
@@ -219,13 +223,10 @@ const BOOLEAN_ENV_KEYS = [
219
223
  MIDSCENE_REPORT_QUIET
220
224
  ];
221
225
  const NUMBER_ENV_KEYS = [
222
- MIDSCENE_MODEL_MAX_TOKENS,
223
226
  MIDSCENE_CACHE_MAX_FILENAME_LENGTH,
224
227
  MIDSCENE_REPLANNING_CYCLE_LIMIT
225
228
  ];
226
229
  const STRING_ENV_KEYS = [
227
- MIDSCENE_MODEL_MAX_TOKENS,
228
- OPENAI_MAX_TOKENS,
229
230
  MIDSCENE_ADB_PATH,
230
231
  MIDSCENE_ADB_REMOTE_HOST,
231
232
  MIDSCENE_ADB_REMOTE_PORT,
@@ -251,6 +252,7 @@ const MODEL_ENV_KEYS = [
251
252
  MIDSCENE_MODEL_BASE_URL,
252
253
  MIDSCENE_MODEL_SOCKS_PROXY,
253
254
  MIDSCENE_MODEL_HTTP_PROXY,
255
+ MIDSCENE_MODEL_MAX_TOKENS,
254
256
  MIDSCENE_MODEL_TIMEOUT,
255
257
  MIDSCENE_MODEL_TEMPERATURE,
256
258
  MIDSCENE_MODEL_RETRY_COUNT,
@@ -266,6 +268,7 @@ const MODEL_ENV_KEYS = [
266
268
  MIDSCENE_USE_VL_MODEL,
267
269
  OPENAI_API_KEY,
268
270
  OPENAI_BASE_URL,
271
+ OPENAI_MAX_TOKENS,
269
272
  MIDSCENE_OPENAI_INIT_CONFIG_JSON,
270
273
  MIDSCENE_OPENAI_HTTP_PROXY,
271
274
  MIDSCENE_OPENAI_SOCKS_PROXY,
@@ -276,6 +279,7 @@ const MODEL_ENV_KEYS = [
276
279
  MIDSCENE_INSIGHT_MODEL_API_KEY,
277
280
  MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON,
278
281
  MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON,
282
+ MIDSCENE_INSIGHT_MODEL_MAX_TOKENS,
279
283
  MIDSCENE_INSIGHT_MODEL_TIMEOUT,
280
284
  MIDSCENE_INSIGHT_MODEL_TEMPERATURE,
281
285
  MIDSCENE_INSIGHT_MODEL_RETRY_COUNT,
@@ -291,6 +295,7 @@ const MODEL_ENV_KEYS = [
291
295
  MIDSCENE_PLANNING_MODEL_API_KEY,
292
296
  MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON,
293
297
  MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON,
298
+ MIDSCENE_PLANNING_MODEL_MAX_TOKENS,
294
299
  MIDSCENE_PLANNING_MODEL_TIMEOUT,
295
300
  MIDSCENE_PLANNING_MODEL_TEMPERATURE,
296
301
  MIDSCENE_PLANNING_MODEL_RETRY_COUNT,
@@ -352,6 +357,7 @@ exports.MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON = __webpack_exports__.MIDSCENE_IN
352
357
  exports.MIDSCENE_INSIGHT_MODEL_FAMILY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_FAMILY;
353
358
  exports.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_HTTP_PROXY;
354
359
  exports.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON;
360
+ exports.MIDSCENE_INSIGHT_MODEL_MAX_TOKENS = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_MAX_TOKENS;
355
361
  exports.MIDSCENE_INSIGHT_MODEL_NAME = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_NAME;
356
362
  exports.MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET;
357
363
  exports.MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT = __webpack_exports__.MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT;
@@ -394,6 +400,7 @@ exports.MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON = __webpack_exports__.MIDSCENE_P
394
400
  exports.MIDSCENE_PLANNING_MODEL_FAMILY = __webpack_exports__.MIDSCENE_PLANNING_MODEL_FAMILY;
395
401
  exports.MIDSCENE_PLANNING_MODEL_HTTP_PROXY = __webpack_exports__.MIDSCENE_PLANNING_MODEL_HTTP_PROXY;
396
402
  exports.MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = __webpack_exports__.MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON;
403
+ exports.MIDSCENE_PLANNING_MODEL_MAX_TOKENS = __webpack_exports__.MIDSCENE_PLANNING_MODEL_MAX_TOKENS;
397
404
  exports.MIDSCENE_PLANNING_MODEL_NAME = __webpack_exports__.MIDSCENE_PLANNING_MODEL_NAME;
398
405
  exports.MIDSCENE_PLANNING_MODEL_REASONING_BUDGET = __webpack_exports__.MIDSCENE_PLANNING_MODEL_REASONING_BUDGET;
399
406
  exports.MIDSCENE_PLANNING_MODEL_REASONING_EFFORT = __webpack_exports__.MIDSCENE_PLANNING_MODEL_REASONING_EFFORT;
@@ -446,6 +453,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
446
453
  "MIDSCENE_INSIGHT_MODEL_FAMILY",
447
454
  "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY",
448
455
  "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON",
456
+ "MIDSCENE_INSIGHT_MODEL_MAX_TOKENS",
449
457
  "MIDSCENE_INSIGHT_MODEL_NAME",
450
458
  "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET",
451
459
  "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT",
@@ -488,6 +496,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
488
496
  "MIDSCENE_PLANNING_MODEL_FAMILY",
489
497
  "MIDSCENE_PLANNING_MODEL_HTTP_PROXY",
490
498
  "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON",
499
+ "MIDSCENE_PLANNING_MODEL_MAX_TOKENS",
491
500
  "MIDSCENE_PLANNING_MODEL_NAME",
492
501
  "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET",
493
502
  "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT",
@@ -24,13 +24,15 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
+ generateCommonTools: ()=>generateCommonTools,
27
28
  generateToolsFromActionSpace: ()=>generateToolsFromActionSpace,
28
- generateCommonTools: ()=>generateCommonTools
29
+ composeUserPrompt: ()=>external_user_prompt_js_namespaceObject.composeUserPrompt
29
30
  });
30
31
  const img_namespaceObject = require("@midscene/shared/img");
31
32
  const external_zod_namespaceObject = require("zod");
32
33
  const external_zod_schema_utils_js_namespaceObject = require("../zod-schema-utils.js");
33
34
  const external_error_formatter_js_namespaceObject = require("./error-formatter.js");
35
+ const external_user_prompt_js_namespaceObject = require("./user-prompt.js");
34
36
  function describeActionForMCP(action) {
35
37
  const actionDesc = action.description || `Execute ${action.name} action`;
36
38
  if (!action.paramSchema) return `${action.name} action, ${actionDesc}`;
@@ -365,6 +367,9 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
365
367
  const agent = await getAgent(args);
366
368
  const screenshot = await agent.page?.screenshotBase64();
367
369
  if (!screenshot) return createErrorResult('Screenshot not available');
370
+ await agent.recordToReport?.('take_screenshot', {
371
+ screenshotBase64: screenshot
372
+ });
368
373
  const { mimeType, body } = (0, img_namespaceObject.parseBase64)(screenshot);
369
374
  return {
370
375
  content: [
@@ -398,15 +403,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
398
403
  const result = await agent.aiAction(prompt, {
399
404
  deepThink: false
400
405
  });
401
- const screenshotResult = await captureScreenshotResult(agent, 'act');
402
- if (result) {
403
- const message = 'string' == typeof result ? result : JSON.stringify(result);
404
- screenshotResult.content.unshift({
405
- type: 'text',
406
- text: `Task finished, message: ${message}`
407
- });
408
- }
409
- return screenshotResult;
406
+ return await captureScreenshotResult(agent, 'act', result);
410
407
  } catch (error) {
411
408
  const errorMessage = (0, external_error_formatter_js_namespaceObject.getErrorMessage)(error);
412
409
  console.error('Error executing act:', errorMessage);
@@ -419,6 +416,7 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
419
416
  description: 'Assert a natural language statement against the current page/screen.',
420
417
  schema: {
421
418
  prompt: external_zod_namespaceObject.z.string().describe('Natural language assertion to verify, e.g. "there is a login button visible"'),
419
+ ...external_user_prompt_js_namespaceObject.promptInputExtraSchema,
422
420
  ...initArgSchema
423
421
  },
424
422
  cli: mergeToolCliMetadata(void 0, initArgCliMetadata),
@@ -427,7 +425,13 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
427
425
  try {
428
426
  const agent = await getAgent(args);
429
427
  if (!agent.aiAssert) return createErrorResult('assert is not supported by this agent');
430
- await agent.aiAssert(prompt);
428
+ const userPrompt = (0, external_user_prompt_js_namespaceObject.composeUserPrompt)({
429
+ prompt,
430
+ image: args.image,
431
+ imageName: args.imageName,
432
+ convertHttpImage2Base64: args.convertHttpImage2Base64
433
+ });
434
+ await agent.aiAssert(userPrompt);
431
435
  return {
432
436
  content: [
433
437
  {
@@ -445,9 +449,11 @@ function generateCommonTools(getAgent, initArgSchema = {}, initArgCliMetadata) {
445
449
  }
446
450
  ];
447
451
  }
452
+ exports.composeUserPrompt = __webpack_exports__.composeUserPrompt;
448
453
  exports.generateCommonTools = __webpack_exports__.generateCommonTools;
449
454
  exports.generateToolsFromActionSpace = __webpack_exports__.generateToolsFromActionSpace;
450
455
  for(var __rspack_i in __webpack_exports__)if (-1 === [
456
+ "composeUserPrompt",
451
457
  "generateCommonTools",
452
458
  "generateToolsFromActionSpace"
453
459
  ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
@@ -0,0 +1,103 @@
1
+ "use strict";
2
+ var __webpack_require__ = {};
3
+ (()=>{
4
+ __webpack_require__.d = (exports1, definition)=>{
5
+ for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
6
+ enumerable: true,
7
+ get: definition[key]
8
+ });
9
+ };
10
+ })();
11
+ (()=>{
12
+ __webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
13
+ })();
14
+ (()=>{
15
+ __webpack_require__.r = (exports1)=>{
16
+ if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
17
+ value: 'Module'
18
+ });
19
+ Object.defineProperty(exports1, '__esModule', {
20
+ value: true
21
+ });
22
+ };
23
+ })();
24
+ var __webpack_exports__ = {};
25
+ __webpack_require__.r(__webpack_exports__);
26
+ __webpack_require__.d(__webpack_exports__, {
27
+ promptInputExtraSchema: ()=>promptInputExtraSchema,
28
+ composeUserPrompt: ()=>composeUserPrompt
29
+ });
30
+ const external_zod_namespaceObject = require("zod");
31
+ function normalizeStringList(raw, fieldName) {
32
+ if (null == raw) return [];
33
+ if ('string' == typeof raw) {
34
+ const trimmed = raw.trim();
35
+ return trimmed ? [
36
+ trimmed
37
+ ] : [];
38
+ }
39
+ if (Array.isArray(raw)) return raw.map((item, index)=>{
40
+ if ('string' != typeof item) throw new Error(`${fieldName}[${index}]: expected a string.`);
41
+ return item.trim();
42
+ });
43
+ throw new Error(`${fieldName}: expected a string or string array, got ${typeof raw}.`);
44
+ }
45
+ function composeImages(input) {
46
+ const urls = normalizeStringList(input.image, 'image');
47
+ const names = normalizeStringList(input.imageName, 'imageName');
48
+ if (urls.length !== names.length) throw new Error(`image/imageName: expected the same number of --image and --image-name values, got ${urls.length} image(s) and ${names.length} image name(s).`);
49
+ return urls.map((url, index)=>({
50
+ name: names[index],
51
+ url
52
+ }));
53
+ }
54
+ function coerceBoolean(value) {
55
+ if (null == value) return;
56
+ if ('boolean' == typeof value) return value;
57
+ if ('string' == typeof value) {
58
+ const trimmed = value.trim();
59
+ if (!trimmed) return;
60
+ const v = trimmed.toLowerCase();
61
+ if ('true' === v || '1' === v) return true;
62
+ if ('false' === v || '0' === v) return false;
63
+ throw new Error(`convertHttpImage2Base64: expected "true", "false", "1", or "0"; got ${JSON.stringify(value)}.`);
64
+ }
65
+ throw new Error(`convertHttpImage2Base64: expected a boolean, got ${typeof value}.`);
66
+ }
67
+ function composeUserPrompt(input) {
68
+ const images = composeImages({
69
+ image: input.image,
70
+ imageName: input.imageName
71
+ });
72
+ const convertFlag = coerceBoolean(input.convertHttpImage2Base64);
73
+ if (0 === images.length && void 0 === convertFlag) return input.prompt;
74
+ const payload = {
75
+ prompt: input.prompt
76
+ };
77
+ if (images.length > 0) payload.images = images;
78
+ if (void 0 !== convertFlag) payload.convertHttpImage2Base64 = convertFlag;
79
+ return payload;
80
+ }
81
+ const promptInputExtraSchema = {
82
+ image: external_zod_namespaceObject.z.union([
83
+ external_zod_namespaceObject.z.string(),
84
+ external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string())
85
+ ]).optional().describe('Reference image URL/path. Repeat --image for multiple images.'),
86
+ imageName: external_zod_namespaceObject.z.union([
87
+ external_zod_namespaceObject.z.string(),
88
+ external_zod_namespaceObject.z.array(external_zod_namespaceObject.z.string())
89
+ ]).optional().describe('Reference image name. Repeat --image-name; must align with --image order.'),
90
+ convertHttpImage2Base64: external_zod_namespaceObject.z.union([
91
+ external_zod_namespaceObject.z.boolean(),
92
+ external_zod_namespaceObject.z.string()
93
+ ]).optional().describe('If true, convert http(s) image URLs to base64 before sending to the model.')
94
+ };
95
+ exports.composeUserPrompt = __webpack_exports__.composeUserPrompt;
96
+ exports.promptInputExtraSchema = __webpack_exports__.promptInputExtraSchema;
97
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
98
+ "composeUserPrompt",
99
+ "promptInputExtraSchema"
100
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
101
+ Object.defineProperty(exports, '__esModule', {
102
+ value: true
103
+ });
@@ -6,3 +6,11 @@ export declare function getCliOptionDisplay(key: string, cliOption?: ToolCliOpti
6
6
  aliases: string[];
7
7
  };
8
8
  export declare function formatCliValidationError(scriptName: string, commandName: string, def: ToolDefinition, rawArgs: Record<string, unknown>): string | undefined;
9
+ /**
10
+ * Move CLI args parsed under accepted alias spellings (kebab-case, alternate
11
+ * casings, `cli.options.aliases` entries) onto the schema's canonical key so
12
+ * tool handlers can read them with a single field name regardless of which
13
+ * spelling the user typed. Throws `CLIError` on conflicting double-spellings
14
+ * (e.g. both `--imageName` and `--image-name`).
15
+ */
16
+ export declare function canonicalizeCliArgKeys(scriptName: string, commandName: string, def: ToolDefinition, rawArgs: Record<string, unknown>): Record<string, unknown>;
@@ -1,2 +1,2 @@
1
- export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<void>\naiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepLocate?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
1
+ export declare const PLAYWRIGHT_EXAMPLE_CODE = "\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nIMPORTANT: Follow these exact type signatures for AI functions:\n\n// Type signatures for AI functions:\naiAct(prompt: string, options?: { cacheable?: boolean, deepThink?: 'unset' | true | false }): Promise<string | undefined>\naiInput(text: string, locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiTap(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiHover(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiDoubleClick(locate: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiKeyboardPress(key: string, locate?: string, options?: { deepLocate?: boolean, xpath?: string, cacheable?: boolean }): Promise<void>\naiScroll(locate: string | undefined, options: {\n direction?: 'up' | 'down' | 'left' | 'right',\n scrollType?: 'singleAction' | 'scrollToBottom' | 'scrollToTop' | 'scrollToRight' | 'scrollToLeft',\n distance?: number | null,\n deepLocate?: boolean,\n xpath?: string,\n cacheable?: boolean\n}): Promise<void>\naiAssert(assertion: string, options?: { errorMessage?: string }): Promise<void>\naiWaitFor(prompt: string, options?: { timeout?: number }): Promise<void>\naiQuery<T>(queryObject: Record<string, string>): Promise<T> // Extracts data from page based on descriptions\n\n// examples:\n// Reference the following code to generate Midscene test cases\n// The following is test code for Midscene AI, for reference\n// The following is Playwright syntax, you can use Playwright to assist in test generation\nimport { test as base } from '@playwright/test';\nimport type { PlayWrightAiFixtureType } from '@midscene/web/playwright';\nimport { PlaywrightAiFixture } from '@midscene/web/playwright';\n\nconst test = base.extend<PlayWrightAiFixtureType>(PlaywrightAiFixture({\n waitForNetworkIdleTimeout: 2000, // optional, the timeout for waiting for network idle between each action, default is 2000ms\n}));\n\n\ntest.beforeEach(async ({ page }) => {\n await page.goto('https://www.xxx.com/');\n await page.setViewportSize({ width: 1920, height: 1080 });\n});\n\ntest('ai shop', async ({\n aiAct,\n aiInput,\n aiAssert,\n aiQuery,\n aiKeyboardPress,\n aiHover,\n aiTap,\n aiWaitFor,\n agentForPage,\n page,\n}) => {\n // login\n await aiAssert('The page shows the login interface');\n await aiInput('user_name', 'in user name input');\n await aiInput('password', 'in password input');\n await aiKeyboardPress('Enter', 'Login Button');\n\n // check the login success\n await aiWaitFor('The page shows that the loading is complete');\n await aiAssert('The current page shows the product detail page');\n\n // check the product info\n const dataA = await aiQuery({\n userInfo: 'User information in the format {name: string}',\n theFirstProductInfo: 'The first product info in the format {name: string, price: number}',\n });\n expect(dataA.theFirstProductInfo.name).toBe('xxx');\n expect(dataA.theFirstProductInfo.price).toBe(100);\n\n\n // add to cart\n await aiTap('click add to cart button');\n \n await aiTap('click right top cart icon');\n await aiAssert('The cart icon shows the number 1');\n});\n";
2
2
  export declare const YAML_EXAMPLE_CODE = "\nCRITICAL - YAML Indentation Rules:\nFor actions with additional parameters (aiScroll, aiInput, aiKeyboardPress), the parameters must be SIBLING keys at the SAME indentation level as the action key, NOT nested children indented further.\nCORRECT (parameters align with the action key):\n - aiScroll:\n direction: 'down'\n scrollType: 'singleAction'\n distance: 500\n locate: \"main content area\"\n - aiInput: 'text value'\n locate: 'input field description'\nWRONG (parameters are indented further than the action key, DO NOT do this):\n - aiScroll:\n direction: 'down'\n scrollType: 'singleAction'\n - aiInput: 'text value'\n locate: 'input field description'\n\n1. Format:\n\nweb:\n url: \"starting_url\"\n viewportWidth: 1280\n viewportHeight: 960\n\ntasks:\n - name: \"descriptive task name\"\n flow:\n - aiTap: \"element description\"\n xpath: '/html/body/div[1]/button[1]'\n - aiInput: 'text value'\n locate: 'input field description'\n xpath: '/html/body/div[1]/input[1]'\n - aiScroll:\n direction: 'down'\n scrollType: 'singleAction'\n distance: 500\n locate: \"scrollable area description\"\n xpath: '/html/body/div[1]/main[1]'\n - aiAssert: \"expected state\"\n - sleep: 1000\n\n2. Action Types:\n- aiTap: for clicks (natural language targeting)\n- aiInput: for text input with 'locate' field\n- aiScroll: with direction and scrollType\n- aiAssert: for validations\n- sleep: for delays (milliseconds)\n\n\n\nYAML type\ntasks:\n - name: <name>\n continueOnError: <boolean> # Optional, whether to continue to the next task on error, defaults to false.\n flow:\n # Auto Planning (.ai)\n # ----------------\n\n # Perform an interaction. `ai` is a shorthand for `aiAct`.\n - ai: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # This usage is the same as `ai`.\n - aiAct: <prompt>\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Instant Action (.aiTap, .aiDoubleClick, .aiHover, .aiInput, .aiKeyboardPress, .aiScroll)\n # ----------------\n\n # Tap an element described by a prompt.\n - aiTap: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Double click an element described by a prompt.\n - aiDoubleClick: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Hover over an element described by a prompt.\n - aiHover: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Input text into an element described by a prompt.\n - aiInput: <final text content of the input>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Press a key (e.g., Enter, Tab, Escape) on an element described by a prompt.\n - aiKeyboardPress: <key>\n locate: <prompt>\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Scroll globally or on an element described by a prompt.\n - aiScroll:\n direction: 'up' # or 'down' | 'left' | 'right'\n scrollType: 'singleAction' # or 'scrollToTop' | 'scrollToBottom' | 'scrollToLeft' | 'scrollToRight'\n distance: <number> # Optional, the scroll distance in pixels.\n locate: <prompt> # Optional, the element to scroll on.\n deepLocate: <boolean> # Optional, whether to use deepLocate to precisely locate the element. Defaults to False.\n xpath: <xpath> # Optional, the xpath of the target element for the operation. If provided, Midscene will prioritize this xpath to find the element before using the cache and the AI model. Defaults to empty.\n cacheable: <boolean> # Optional, whether to cache the result of this API call when the [caching feature](./caching.mdx) is enabled. Defaults to True.\n\n # Record the current screenshot with a description in the report file.\n - recordToReport: <title> # Optional, the title of the screenshot. If not provided, the title will be 'untitled'.\n content: <content> # Optional, the description of the screenshot.\n\n # Data Extraction\n # ----------------\n\n # Perform a query that returns a JSON object.\n - aiQuery: <prompt> # Remember to describe the format of the result in the prompt.\n name: <name> # The key for the query result in the JSON output.\n\n # More APIs\n # ----------------\n\n # Wait for a condition to be met, with a timeout (in ms, optional, defaults to 30000).\n - aiWaitFor: <prompt>\n timeout: <ms>\n\n # Perform an assertion.\n - aiAssert: <prompt>\n errorMessage: <error-message> # Optional, the error message to print if the assertion fails.\n\n # Wait for a specified amount of time.\n - sleep: <ms>\n\n # Execute a piece of JavaScript code in the web page context.\n - javascript: <javascript>\n name: <name> # Optional, assign a name to the return value, which will be used as a key in the JSON output.\n\n - name: <name>\n flow:\n # ...\n";
@@ -12,6 +12,7 @@ interface IModelConfigKeys {
12
12
  openaiApiKey: string;
13
13
  openaiExtraConfig: string;
14
14
  extraBody: string;
15
+ maxTokens: string;
15
16
  /**
16
17
  * Extra
17
18
  */
@@ -75,6 +75,7 @@ export declare const MIDSCENE_INSIGHT_MODEL_BASE_URL = "MIDSCENE_INSIGHT_MODEL_B
75
75
  export declare const MIDSCENE_INSIGHT_MODEL_API_KEY = "MIDSCENE_INSIGHT_MODEL_API_KEY";
76
76
  export declare const MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON = "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON";
77
77
  export declare const MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON = "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON";
78
+ export declare const MIDSCENE_INSIGHT_MODEL_MAX_TOKENS = "MIDSCENE_INSIGHT_MODEL_MAX_TOKENS";
78
79
  export declare const MIDSCENE_INSIGHT_MODEL_TIMEOUT = "MIDSCENE_INSIGHT_MODEL_TIMEOUT";
79
80
  export declare const MIDSCENE_INSIGHT_MODEL_TEMPERATURE = "MIDSCENE_INSIGHT_MODEL_TEMPERATURE";
80
81
  export declare const MIDSCENE_INSIGHT_MODEL_RETRY_COUNT = "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT";
@@ -90,6 +91,7 @@ export declare const MIDSCENE_PLANNING_MODEL_BASE_URL = "MIDSCENE_PLANNING_MODEL
90
91
  export declare const MIDSCENE_PLANNING_MODEL_API_KEY = "MIDSCENE_PLANNING_MODEL_API_KEY";
91
92
  export declare const MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON = "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON";
92
93
  export declare const MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON = "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON";
94
+ export declare const MIDSCENE_PLANNING_MODEL_MAX_TOKENS = "MIDSCENE_PLANNING_MODEL_MAX_TOKENS";
93
95
  export declare const MIDSCENE_PLANNING_MODEL_TIMEOUT = "MIDSCENE_PLANNING_MODEL_TIMEOUT";
94
96
  export declare const MIDSCENE_PLANNING_MODEL_TEMPERATURE = "MIDSCENE_PLANNING_MODEL_TEMPERATURE";
95
97
  export declare const MIDSCENE_PLANNING_MODEL_RETRY_COUNT = "MIDSCENE_PLANNING_MODEL_RETRY_COUNT";
@@ -109,21 +111,21 @@ export declare const UNUSED_ENV_KEYS: string[];
109
111
  */
110
112
  export declare const BASIC_ENV_KEYS: readonly ["MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR"];
111
113
  export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
112
- export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
113
- export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
114
+ export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
115
+ export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
114
116
  /**
115
117
  * Non model related env keys, used for globally controlling the behavior of midscene
116
118
  * Can not be override by agent.modelConfig but can be override by overrideAIConfig
117
119
  * Can be access at any time
118
120
  */
119
- export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
121
+ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
120
122
  /**
121
123
  * Model related eve keys, used for declare which model to use.
122
124
  * Can be override by both agent.modelConfig and overrideAIConfig
123
125
  * Can only be access after agent.constructor
124
126
  */
125
- export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
126
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_MODEL_MAX_TOKENS", "OPENAI_MAX_TOKENS", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
127
+ export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MAX_TOKENS", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_MAX_TOKENS", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_MAX_TOKENS", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
128
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_MAX_TOKENS", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENAI_MAX_TOKENS", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_MAX_TOKENS", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_MAX_TOKENS", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
127
129
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
128
130
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
129
131
  /**
@@ -139,6 +141,7 @@ export interface IModelConfigForInsight {
139
141
  [MIDSCENE_INSIGHT_MODEL_API_KEY]?: string;
140
142
  [MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON]?: string;
141
143
  [MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON]?: string;
144
+ [MIDSCENE_INSIGHT_MODEL_MAX_TOKENS]?: string;
142
145
  [MIDSCENE_INSIGHT_MODEL_TIMEOUT]?: string;
143
146
  [MIDSCENE_INSIGHT_MODEL_TEMPERATURE]?: string;
144
147
  [MIDSCENE_INSIGHT_MODEL_FAMILY]?: TModelFamily;
@@ -151,6 +154,7 @@ export interface IModelConfigForPlanning {
151
154
  [MIDSCENE_PLANNING_MODEL_API_KEY]?: string;
152
155
  [MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON]?: string;
153
156
  [MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON]?: string;
157
+ [MIDSCENE_PLANNING_MODEL_MAX_TOKENS]?: string;
154
158
  [MIDSCENE_PLANNING_MODEL_TIMEOUT]?: string;
155
159
  [MIDSCENE_PLANNING_MODEL_TEMPERATURE]?: string;
156
160
  [MIDSCENE_PLANNING_MODEL_FAMILY]?: TModelFamily;
@@ -171,6 +175,7 @@ export interface IModelConfigForDefault {
171
175
  [MIDSCENE_MODEL_API_KEY]?: string;
172
176
  [MIDSCENE_MODEL_INIT_CONFIG_JSON]?: string;
173
177
  [MIDSCENE_MODEL_EXTRA_BODY_JSON]?: string;
178
+ [MIDSCENE_MODEL_MAX_TOKENS]?: string;
174
179
  [MIDSCENE_MODEL_FAMILY]?: TModelFamily;
175
180
  [MIDSCENE_MODEL_TEMPERATURE]?: string;
176
181
  [MIDSCENE_MODEL_REASONING_EFFORT]?: string;
@@ -184,6 +189,7 @@ export interface IModelConfigForDefaultLegacy {
184
189
  [OPENAI_BASE_URL]?: string;
185
190
  [OPENAI_API_KEY]?: string;
186
191
  [MIDSCENE_OPENAI_INIT_CONFIG_JSON]?: string;
192
+ [OPENAI_MAX_TOKENS]?: string;
187
193
  }
188
194
  /**
189
195
  * - insight: Visual Question Answering and Visual Grounding (unified)
@@ -252,6 +258,10 @@ export interface IModelConfig {
252
258
  * Example: { "chat_template_kwargs": { "enable_thinking": true } }
253
259
  */
254
260
  extraBody?: Record<string, unknown>;
261
+ /**
262
+ * max_tokens for model responses.
263
+ */
264
+ maxTokens?: number;
255
265
  /**
256
266
  * Timeout for API calls in milliseconds.
257
267
  * If not set, uses OpenAI SDK default (10 minutes).
@@ -1,4 +1,6 @@
1
1
  import type { ActionSpaceItem, BaseAgent, ToolCliMetadata, ToolDefinition, ToolSchema } from './types';
2
+ import { composeUserPrompt } from './user-prompt';
3
+ export { composeUserPrompt };
2
4
  /**
3
5
  * Converts DeviceAction from actionSpace into MCP ToolDefinition
4
6
  * This is the core logic that removes need for hardcoded tool definitions
@@ -80,6 +80,22 @@ export interface ActionSpaceItem {
80
80
  args?: Record<string, unknown>;
81
81
  paramSchema?: z.ZodTypeAny;
82
82
  }
83
+ /**
84
+ * Structural shape compatible with @midscene/core `TUserPrompt`.
85
+ * Declared locally to avoid a circular dep on `@midscene/core` from `@midscene/shared`.
86
+ *
87
+ * Currently consumed only by the `assert` tool in `generateCommonTools`.
88
+ * `aiAction` and `aiWaitFor` stay string-only at the CLI surface because the
89
+ * tools generator does not yet expose multimodal entry points for them.
90
+ */
91
+ export type UserPromptLike = string | {
92
+ prompt: string;
93
+ images?: Array<{
94
+ name: string;
95
+ url: string;
96
+ }>;
97
+ convertHttpImage2Base64?: boolean;
98
+ };
83
99
  /**
84
100
  * Base agent interface
85
101
  * Represents a platform-specific agent (Android, iOS, Web)
@@ -91,10 +107,14 @@ export interface BaseAgent {
91
107
  page?: {
92
108
  screenshotBase64(): Promise<string>;
93
109
  };
110
+ recordToReport?: (title?: string, opt?: {
111
+ content?: string;
112
+ screenshotBase64?: string;
113
+ }) => Promise<void>;
94
114
  callActionInActionSpace?: (actionName: string, params?: unknown) => Promise<unknown>;
95
115
  aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
96
116
  aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
97
- aiAssert?: (assertion: string, msg?: string, options?: Record<string, unknown>) => Promise<unknown>;
117
+ aiAssert?: (assertion: UserPromptLike, msg?: string, options?: Record<string, unknown>) => Promise<unknown>;
98
118
  }
99
119
  /**
100
120
  * Base device interface for temporary device instances
@@ -0,0 +1,13 @@
1
+ import { z } from 'zod';
2
+ import type { UserPromptLike } from './types';
3
+ export declare function composeUserPrompt(input: {
4
+ prompt: string;
5
+ image?: unknown;
6
+ imageName?: unknown;
7
+ convertHttpImage2Base64?: unknown;
8
+ }): UserPromptLike;
9
+ export declare const promptInputExtraSchema: {
10
+ image: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
11
+ imageName: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodArray<z.ZodString, "many">]>>;
12
+ convertHttpImage2Base64: z.ZodOptional<z.ZodUnion<[z.ZodBoolean, z.ZodString]>>;
13
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.8.5-beta-20260525033347.0",
3
+ "version": "1.8.5",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -1,6 +1,7 @@
1
1
  import { z } from 'zod';
2
2
  import { getKeyAliases } from '../key-alias-utils';
3
3
  import type { ToolCliOption, ToolDefinition } from '../mcp/types';
4
+ import { CLIError } from './cli-error';
4
5
 
5
6
  export function parseValue(raw: string): unknown {
6
7
  if (raw.startsWith('{') || raw.startsWith('[')) {
@@ -44,7 +45,19 @@ export function parseCliArgs(args: string[]): Record<string, unknown> {
44
45
  const result: Record<string, unknown> = {};
45
46
 
46
47
  walkCliArgs(args, (key, value) => {
47
- result[key] = value;
48
+ const existing = result[key];
49
+ if (existing === undefined) {
50
+ result[key] = value;
51
+ return;
52
+ }
53
+
54
+ if (Array.isArray(existing)) {
55
+ existing.push(value);
56
+ result[key] = existing;
57
+ return;
58
+ }
59
+
60
+ result[key] = [existing, value];
48
61
  });
49
62
 
50
63
  return result;
@@ -171,3 +184,54 @@ export function formatCliValidationError(
171
184
  typeof issue?.path[0] === 'string' ? `--${issue.path[0]}` : 'CLI arguments';
172
185
  return `Invalid value for "${optionName}" in ${scriptName} ${commandName}: ${issue?.message ?? parsed.error.message}`;
173
186
  }
187
+
188
+ /**
189
+ * Move CLI args parsed under accepted alias spellings (kebab-case, alternate
190
+ * casings, `cli.options.aliases` entries) onto the schema's canonical key so
191
+ * tool handlers can read them with a single field name regardless of which
192
+ * spelling the user typed. Throws `CLIError` on conflicting double-spellings
193
+ * (e.g. both `--imageName` and `--image-name`).
194
+ */
195
+ export function canonicalizeCliArgKeys(
196
+ scriptName: string,
197
+ commandName: string,
198
+ def: ToolDefinition,
199
+ rawArgs: Record<string, unknown>,
200
+ ): Record<string, unknown> {
201
+ if (Object.keys(def.schema).length === 0) return rawArgs;
202
+
203
+ const result: Record<string, unknown> = { ...rawArgs };
204
+
205
+ for (const schemaKey of Object.keys(def.schema)) {
206
+ const cliOption = def.cli?.options?.[schemaKey];
207
+ const acceptedSpellings = getAcceptedCliOptionNames(schemaKey, cliOption);
208
+
209
+ let chosenSpelling: string | undefined;
210
+ let chosenValue: unknown;
211
+
212
+ for (const spelling of acceptedSpellings) {
213
+ if (spelling === schemaKey) continue;
214
+ if (!(spelling in result)) continue;
215
+ if (chosenSpelling !== undefined) {
216
+ throw new CLIError(
217
+ `Conflicting CLI options "--${chosenSpelling}" and "--${spelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`,
218
+ );
219
+ }
220
+ chosenSpelling = spelling;
221
+ chosenValue = result[spelling];
222
+ }
223
+
224
+ if (chosenSpelling === undefined) continue;
225
+
226
+ if (schemaKey in result && result[schemaKey] !== chosenValue) {
227
+ throw new CLIError(
228
+ `Conflicting CLI options "--${schemaKey}" and "--${chosenSpelling}" for ${scriptName} ${commandName}: both target "${schemaKey}". Use one spelling.`,
229
+ );
230
+ }
231
+
232
+ result[schemaKey] = chosenValue;
233
+ delete result[chosenSpelling];
234
+ }
235
+
236
+ return result;
237
+ }
@@ -10,6 +10,7 @@ import type {
10
10
  ToolResultContent,
11
11
  } from '../mcp/types';
12
12
  import {
13
+ canonicalizeCliArgKeys,
13
14
  formatCliValidationError,
14
15
  getCliOptionDisplay,
15
16
  parseCliArgs,
@@ -214,9 +215,16 @@ export async function runToolsCLI(
214
215
  throw new CLIError(cliValidationError);
215
216
  }
216
217
 
217
- debug('command: %s, args: %s', match.name, JSON.stringify(parsedArgs));
218
+ const handlerArgs = canonicalizeCliArgKeys(
219
+ scriptName,
220
+ match.name,
221
+ match.def,
222
+ parsedArgs,
223
+ );
224
+
225
+ debug('command: %s, args: %s', match.name, JSON.stringify(handlerArgs));
218
226
 
219
- const result = await match.def.handler(parsedArgs);
227
+ const result = await match.def.handler(handlerArgs);
220
228
  debug(
221
229
  'command %s completed, isError: %s',
222
230
  match.name,