@midscene/shared 1.9.7 → 1.9.8-beta-20260618014851.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/es/agent-tools/agent-behavior-init-args.mjs +44 -0
  2. package/dist/es/{mcp → agent-tools}/base-tools.mjs +1 -8
  3. package/dist/es/{mcp → agent-tools}/chrome-path.mjs +14 -3
  4. package/dist/es/{mcp → agent-tools}/index.mjs +1 -3
  5. package/dist/es/{mcp → agent-tools}/tool-generator.mjs +6 -5
  6. package/dist/es/cli/cli-runner.mjs +1 -1
  7. package/dist/es/env/parse-model-config.mjs +1 -1
  8. package/dist/es/env/types.mjs +3 -5
  9. package/dist/es/img/index.mjs +2 -2
  10. package/dist/es/img/transform.mjs +18 -1
  11. package/dist/es/utils.mjs +2 -6
  12. package/dist/lib/agent-tools/agent-behavior-init-args.js +87 -0
  13. package/dist/lib/{mcp → agent-tools}/base-tools.js +1 -8
  14. package/dist/lib/{mcp → agent-tools}/chrome-path.js +13 -2
  15. package/dist/lib/{mcp → agent-tools}/index.js +10 -24
  16. package/dist/lib/{mcp → agent-tools}/tool-generator.js +6 -5
  17. package/dist/lib/cli/cli-runner.js +1 -1
  18. package/dist/lib/env/parse-model-config.js +1 -1
  19. package/dist/lib/env/types.js +5 -10
  20. package/dist/lib/img/index.js +3 -0
  21. package/dist/lib/img/transform.js +20 -0
  22. package/dist/lib/utils.js +8 -15
  23. package/dist/types/agent-tools/agent-behavior-init-args.d.ts +17 -0
  24. package/dist/types/{mcp → agent-tools}/base-tools.d.ts +7 -13
  25. package/dist/types/{mcp → agent-tools}/index.d.ts +1 -3
  26. package/dist/types/{mcp → agent-tools}/init-arg-utils.d.ts +3 -3
  27. package/dist/types/{mcp → agent-tools}/tool-defaults.d.ts +5 -6
  28. package/dist/types/{mcp → agent-tools}/tool-generator.d.ts +1 -1
  29. package/dist/types/{mcp → agent-tools}/types.d.ts +20 -13
  30. package/dist/types/cli/cli-args.d.ts +1 -1
  31. package/dist/types/cli/cli-runner.d.ts +2 -2
  32. package/dist/types/env/types.d.ts +8 -6
  33. package/dist/types/img/index.d.ts +1 -1
  34. package/dist/types/img/transform.d.ts +4 -0
  35. package/dist/types/key-alias-utils.d.ts +2 -2
  36. package/dist/types/utils.d.ts +0 -1
  37. package/package.json +15 -8
  38. package/src/agent-tools/agent-behavior-init-args.ts +109 -0
  39. package/src/{mcp → agent-tools}/base-tools.ts +8 -33
  40. package/src/{mcp → agent-tools}/chrome-path.ts +20 -3
  41. package/src/{mcp → agent-tools}/index.ts +1 -3
  42. package/src/{mcp → agent-tools}/init-arg-utils.ts +3 -3
  43. package/src/{mcp → agent-tools}/tool-defaults.ts +5 -6
  44. package/src/{mcp → agent-tools}/tool-generator.ts +14 -7
  45. package/src/{mcp → agent-tools}/types.ts +22 -10
  46. package/src/cli/cli-args.ts +1 -1
  47. package/src/cli/cli-runner.ts +4 -4
  48. package/src/env/types.ts +5 -5
  49. package/src/img/index.ts +2 -0
  50. package/src/img/transform.ts +45 -0
  51. package/src/key-alias-utils.ts +2 -2
  52. package/src/utils.ts +1 -10
  53. package/dist/es/mcp/base-server.mjs +0 -295
  54. package/dist/es/mcp/inject-report-html-plugin.mjs +0 -53
  55. package/dist/es/mcp/launcher-helper.mjs +0 -52
  56. package/dist/lib/mcp/base-server.js +0 -345
  57. package/dist/lib/mcp/inject-report-html-plugin.js +0 -98
  58. package/dist/lib/mcp/launcher-helper.js +0 -86
  59. package/dist/types/mcp/base-server.d.ts +0 -106
  60. package/dist/types/mcp/inject-report-html-plugin.d.ts +0 -18
  61. package/dist/types/mcp/launcher-helper.d.ts +0 -94
  62. package/src/mcp/base-server.ts +0 -529
  63. package/src/mcp/inject-report-html-plugin.ts +0 -119
  64. package/src/mcp/launcher-helper.ts +0 -200
  65. /package/dist/es/{mcp → agent-tools}/cli-report-session.mjs +0 -0
  66. /package/dist/es/{mcp → agent-tools}/error-formatter.mjs +0 -0
  67. /package/dist/es/{mcp → agent-tools}/init-arg-utils.mjs +0 -0
  68. /package/dist/es/{mcp → agent-tools}/tool-defaults.mjs +0 -0
  69. /package/dist/es/{mcp → agent-tools}/types.mjs +0 -0
  70. /package/dist/es/{mcp → agent-tools}/user-prompt.mjs +0 -0
  71. /package/dist/lib/{mcp → agent-tools}/cli-report-session.js +0 -0
  72. /package/dist/lib/{mcp → agent-tools}/error-formatter.js +0 -0
  73. /package/dist/lib/{mcp → agent-tools}/init-arg-utils.js +0 -0
  74. /package/dist/lib/{mcp → agent-tools}/tool-defaults.js +0 -0
  75. /package/dist/lib/{mcp → agent-tools}/types.js +0 -0
  76. /package/dist/lib/{mcp → agent-tools}/user-prompt.js +0 -0
  77. /package/dist/types/{mcp → agent-tools}/chrome-path.d.ts +0 -0
  78. /package/dist/types/{mcp → agent-tools}/cli-report-session.d.ts +0 -0
  79. /package/dist/types/{mcp → agent-tools}/error-formatter.d.ts +0 -0
  80. /package/dist/types/{mcp → agent-tools}/user-prompt.d.ts +0 -0
  81. /package/src/{mcp → agent-tools}/cli-report-session.ts +0 -0
  82. /package/src/{mcp → agent-tools}/error-formatter.ts +0 -0
  83. /package/src/{mcp → agent-tools}/user-prompt.ts +0 -0
@@ -36,6 +36,7 @@ __webpack_require__.d(__webpack_exports__, {
36
36
  MIDSCENE_ANDROID_IME_STRATEGY: ()=>MIDSCENE_ANDROID_IME_STRATEGY,
37
37
  MIDSCENE_CACHE: ()=>MIDSCENE_CACHE,
38
38
  MIDSCENE_CACHE_MAX_FILENAME_LENGTH: ()=>MIDSCENE_CACHE_MAX_FILENAME_LENGTH,
39
+ MIDSCENE_CHROME_PATH: ()=>MIDSCENE_CHROME_PATH,
39
40
  MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG: ()=>MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG,
40
41
  MIDSCENE_DEBUG_MODE: ()=>MIDSCENE_DEBUG_MODE,
41
42
  MIDSCENE_DEBUG_MODEL_PROFILE: ()=>MIDSCENE_DEBUG_MODEL_PROFILE,
@@ -60,9 +61,7 @@ __webpack_require__.d(__webpack_exports__, {
60
61
  MIDSCENE_IOS_SIMULATOR_UDID: ()=>MIDSCENE_IOS_SIMULATOR_UDID,
61
62
  MIDSCENE_LANGFUSE_DEBUG: ()=>MIDSCENE_LANGFUSE_DEBUG,
62
63
  MIDSCENE_LANGSMITH_DEBUG: ()=>MIDSCENE_LANGSMITH_DEBUG,
63
- MIDSCENE_MCP_ANDROID_MODE: ()=>MIDSCENE_MCP_ANDROID_MODE,
64
64
  MIDSCENE_MCP_CHROME_PATH: ()=>MIDSCENE_MCP_CHROME_PATH,
65
- MIDSCENE_MCP_USE_PUPPETEER_MODE: ()=>MIDSCENE_MCP_USE_PUPPETEER_MODE,
66
65
  MIDSCENE_MODEL_API_KEY: ()=>MIDSCENE_MODEL_API_KEY,
67
66
  MIDSCENE_MODEL_BASE_URL: ()=>MIDSCENE_MODEL_BASE_URL,
68
67
  MIDSCENE_MODEL_EXTRA_BODY_JSON: ()=>MIDSCENE_MODEL_EXTRA_BODY_JSON,
@@ -123,9 +122,8 @@ const MIDSCENE_DEBUG_MODEL_PROFILE = 'MIDSCENE_DEBUG_MODEL_PROFILE';
123
122
  const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
124
123
  const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = 'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
125
124
  const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
126
- const MIDSCENE_MCP_USE_PUPPETEER_MODE = 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
125
+ const MIDSCENE_CHROME_PATH = 'MIDSCENE_CHROME_PATH';
127
126
  const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
128
- const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
129
127
  const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
130
128
  const MIDSCENE_LANGSMITH_DEBUG = 'MIDSCENE_LANGSMITH_DEBUG';
131
129
  const MIDSCENE_LANGFUSE_DEBUG = 'MIDSCENE_LANGFUSE_DEBUG';
@@ -208,8 +206,6 @@ const BASIC_ENV_KEYS = [
208
206
  ];
209
207
  const BOOLEAN_ENV_KEYS = [
210
208
  MIDSCENE_CACHE,
211
- MIDSCENE_MCP_USE_PUPPETEER_MODE,
212
- MIDSCENE_MCP_ANDROID_MODE,
213
209
  MIDSCENE_LANGSMITH_DEBUG,
214
210
  MIDSCENE_LANGFUSE_DEBUG,
215
211
  MIDSCENE_REPORT_QUIET
@@ -228,6 +224,7 @@ const STRING_ENV_KEYS = [
228
224
  MIDSCENE_REPORT_TAG_NAME,
229
225
  MIDSCENE_PREFERRED_LANGUAGE,
230
226
  MATCH_BY_POSITION,
227
+ MIDSCENE_CHROME_PATH,
231
228
  MIDSCENE_MCP_CHROME_PATH,
232
229
  DOCKER_CONTAINER
233
230
  ];
@@ -338,6 +335,7 @@ exports.MIDSCENE_ADB_REMOTE_PORT = __webpack_exports__.MIDSCENE_ADB_REMOTE_PORT;
338
335
  exports.MIDSCENE_ANDROID_IME_STRATEGY = __webpack_exports__.MIDSCENE_ANDROID_IME_STRATEGY;
339
336
  exports.MIDSCENE_CACHE = __webpack_exports__.MIDSCENE_CACHE;
340
337
  exports.MIDSCENE_CACHE_MAX_FILENAME_LENGTH = __webpack_exports__.MIDSCENE_CACHE_MAX_FILENAME_LENGTH;
338
+ exports.MIDSCENE_CHROME_PATH = __webpack_exports__.MIDSCENE_CHROME_PATH;
341
339
  exports.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = __webpack_exports__.MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG;
342
340
  exports.MIDSCENE_DEBUG_MODE = __webpack_exports__.MIDSCENE_DEBUG_MODE;
343
341
  exports.MIDSCENE_DEBUG_MODEL_PROFILE = __webpack_exports__.MIDSCENE_DEBUG_MODEL_PROFILE;
@@ -362,9 +360,7 @@ exports.MIDSCENE_IOS_DEVICE_UDID = __webpack_exports__.MIDSCENE_IOS_DEVICE_UDID;
362
360
  exports.MIDSCENE_IOS_SIMULATOR_UDID = __webpack_exports__.MIDSCENE_IOS_SIMULATOR_UDID;
363
361
  exports.MIDSCENE_LANGFUSE_DEBUG = __webpack_exports__.MIDSCENE_LANGFUSE_DEBUG;
364
362
  exports.MIDSCENE_LANGSMITH_DEBUG = __webpack_exports__.MIDSCENE_LANGSMITH_DEBUG;
365
- exports.MIDSCENE_MCP_ANDROID_MODE = __webpack_exports__.MIDSCENE_MCP_ANDROID_MODE;
366
363
  exports.MIDSCENE_MCP_CHROME_PATH = __webpack_exports__.MIDSCENE_MCP_CHROME_PATH;
367
- exports.MIDSCENE_MCP_USE_PUPPETEER_MODE = __webpack_exports__.MIDSCENE_MCP_USE_PUPPETEER_MODE;
368
364
  exports.MIDSCENE_MODEL_API_KEY = __webpack_exports__.MIDSCENE_MODEL_API_KEY;
369
365
  exports.MIDSCENE_MODEL_BASE_URL = __webpack_exports__.MIDSCENE_MODEL_BASE_URL;
370
366
  exports.MIDSCENE_MODEL_EXTRA_BODY_JSON = __webpack_exports__.MIDSCENE_MODEL_EXTRA_BODY_JSON;
@@ -430,6 +426,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
430
426
  "MIDSCENE_ANDROID_IME_STRATEGY",
431
427
  "MIDSCENE_CACHE",
432
428
  "MIDSCENE_CACHE_MAX_FILENAME_LENGTH",
429
+ "MIDSCENE_CHROME_PATH",
433
430
  "MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG",
434
431
  "MIDSCENE_DEBUG_MODE",
435
432
  "MIDSCENE_DEBUG_MODEL_PROFILE",
@@ -454,9 +451,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
454
451
  "MIDSCENE_IOS_SIMULATOR_UDID",
455
452
  "MIDSCENE_LANGFUSE_DEBUG",
456
453
  "MIDSCENE_LANGSMITH_DEBUG",
457
- "MIDSCENE_MCP_ANDROID_MODE",
458
454
  "MIDSCENE_MCP_CHROME_PATH",
459
- "MIDSCENE_MCP_USE_PUPPETEER_MODE",
460
455
  "MIDSCENE_MODEL_API_KEY",
461
456
  "MIDSCENE_MODEL_BASE_URL",
462
457
  "MIDSCENE_MODEL_EXTRA_BODY_JSON",
@@ -33,6 +33,7 @@ __webpack_require__.d(__webpack_exports__, {
33
33
  saveBase64Image: ()=>external_transform_js_namespaceObject.saveBase64Image,
34
34
  isValidImageBuffer: ()=>external_info_js_namespaceObject.isValidImageBuffer,
35
35
  validateScreenshotBuffer: ()=>external_info_js_namespaceObject.validateScreenshotBuffer,
36
+ normalizeScreenshotBase64: ()=>external_transform_js_namespaceObject.normalizeScreenshotBase64,
36
37
  preProcessImageUrl: ()=>external_transform_js_namespaceObject.preProcessImageUrl,
37
38
  annotateRects: ()=>external_box_select_js_namespaceObject.annotateRects,
38
39
  compositeElementInfoImg: ()=>external_box_select_js_namespaceObject.compositeElementInfoImg,
@@ -64,6 +65,7 @@ exports.isValidJPEGImageBuffer = __webpack_exports__.isValidJPEGImageBuffer;
64
65
  exports.isValidPNGImageBuffer = __webpack_exports__.isValidPNGImageBuffer;
65
66
  exports.localImg2Base64 = __webpack_exports__.localImg2Base64;
66
67
  exports.normalizeBase64Image = __webpack_exports__.normalizeBase64Image;
68
+ exports.normalizeScreenshotBase64 = __webpack_exports__.normalizeScreenshotBase64;
67
69
  exports.paddingToMatchBlockByBase64 = __webpack_exports__.paddingToMatchBlockByBase64;
68
70
  exports.parseBase64 = __webpack_exports__.parseBase64;
69
71
  exports.preProcessImageUrl = __webpack_exports__.preProcessImageUrl;
@@ -88,6 +90,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
88
90
  "isValidPNGImageBuffer",
89
91
  "localImg2Base64",
90
92
  "normalizeBase64Image",
93
+ "normalizeScreenshotBase64",
91
94
  "paddingToMatchBlockByBase64",
92
95
  "parseBase64",
93
96
  "preProcessImageUrl",
@@ -42,6 +42,7 @@ __webpack_require__.d(__webpack_exports__, {
42
42
  resizeAndConvertImgBuffer: ()=>resizeAndConvertImgBuffer,
43
43
  resizeImgBase64: ()=>resizeImgBase64,
44
44
  saveBase64Image: ()=>saveBase64Image,
45
+ normalizeScreenshotBase64: ()=>normalizeScreenshotBase64,
45
46
  preProcessImageUrl: ()=>preProcessImageUrl,
46
47
  cropByRect: ()=>cropByRect,
47
48
  scaleImage: ()=>scaleImage,
@@ -129,6 +130,8 @@ async function resizeAndConvertImgBuffer(inputFormat, inputData, newSize) {
129
130
  }
130
131
  const normalizeBase64Body = (body)=>body.replace(/\s/g, '');
131
132
  const base64ImageDataUrlPattern = /^data:image\/[a-zA-Z0-9.+-]+;base64,/i;
133
+ const supportedScreenshotDataUriPattern = /^data:image\/(png|jpe?g);base64,([\s\S]*)$/i;
134
+ const rawBase64BodyPattern = /^[A-Za-z0-9+/=\s]+$/;
132
135
  const inferBase64ImageFormat = (base64Body)=>{
133
136
  if (base64Body.startsWith('iVBORw0KGgo')) return 'png';
134
137
  return 'jpeg';
@@ -141,6 +144,21 @@ function detectImageMimeTypeFromBuffer(buffer) {
141
144
  if (buffer.length >= 2 && 0x42 === buffer[0] && 0x4d === buffer[1]) return 'image/bmp';
142
145
  }
143
146
  const createImgBase64ByFormat = (format, body)=>`data:image/${format};base64,${normalizeBase64Body(body)}`;
147
+ const normalizeScreenshotBase64 = (base64, options)=>{
148
+ const label = options?.label ?? 'screenshot base64';
149
+ const trimmedBase64 = base64.trim();
150
+ if (!trimmedBase64) throw new Error(`${label} cannot be empty`);
151
+ const dataUriMatch = trimmedBase64.match(supportedScreenshotDataUriPattern);
152
+ if (dataUriMatch) {
153
+ const imageFormat = 'jpg' === dataUriMatch[1].toLowerCase() ? 'jpeg' : dataUriMatch[1].toLowerCase();
154
+ const body = dataUriMatch[2];
155
+ if (!normalizeBase64Body(body)) throw new Error(`${label} cannot be empty`);
156
+ return createImgBase64ByFormat(imageFormat, body);
157
+ }
158
+ if (trimmedBase64.startsWith('data:')) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
159
+ if (!rawBase64BodyPattern.test(trimmedBase64)) throw new Error(`${label} must be a PNG/JPEG data URI or raw PNG base64 string`);
160
+ return createImgBase64ByFormat('png', trimmedBase64);
161
+ };
144
162
  const normalizeBase64Image = (base64)=>{
145
163
  const trimmedBase64 = base64.trim();
146
164
  if (base64ImageDataUrlPattern.test(trimmedBase64)) return trimmedBase64;
@@ -358,6 +376,7 @@ exports.inferBase64ImageFormat = __webpack_exports__.inferBase64ImageFormat;
358
376
  exports.localImg2Base64 = __webpack_exports__.localImg2Base64;
359
377
  exports.normalizeBase64Body = __webpack_exports__.normalizeBase64Body;
360
378
  exports.normalizeBase64Image = __webpack_exports__.normalizeBase64Image;
379
+ exports.normalizeScreenshotBase64 = __webpack_exports__.normalizeScreenshotBase64;
361
380
  exports.paddingToMatchBlock = __webpack_exports__.paddingToMatchBlock;
362
381
  exports.paddingToMatchBlockByBase64 = __webpack_exports__.paddingToMatchBlockByBase64;
363
382
  exports.parseBase64 = __webpack_exports__.parseBase64;
@@ -377,6 +396,7 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
377
396
  "localImg2Base64",
378
397
  "normalizeBase64Body",
379
398
  "normalizeBase64Image",
399
+ "normalizeScreenshotBase64",
380
400
  "paddingToMatchBlock",
381
401
  "paddingToMatchBlockByBase64",
382
402
  "parseBase64",
package/dist/lib/utils.js CHANGED
@@ -24,20 +24,19 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
- uuid: ()=>uuid,
28
- isPlainObject: ()=>isPlainObject,
29
- ifInBrowser: ()=>ifInBrowser,
30
- repeat: ()=>repeat,
31
- mergeAndNormalizeAppNameMapping: ()=>mergeAndNormalizeAppNameMapping,
32
- ifInNode: ()=>ifInNode,
33
- assert: ()=>assert,
34
27
  escapeScriptTag: ()=>escapeScriptTag,
35
28
  logMsg: ()=>logMsg,
29
+ isPlainObject: ()=>isPlainObject,
36
30
  replaceIllegalPathCharsAndSpace: ()=>replaceIllegalPathCharsAndSpace,
31
+ uuid: ()=>uuid,
32
+ ifInBrowser: ()=>ifInBrowser,
37
33
  generateHashId: ()=>generateHashId,
38
34
  normalizeForComparison: ()=>normalizeForComparison,
35
+ mergeAndNormalizeAppNameMapping: ()=>mergeAndNormalizeAppNameMapping,
36
+ ifInNode: ()=>ifInNode,
39
37
  ifInWorker: ()=>ifInWorker,
40
- setIsMcp: ()=>setIsMcp,
38
+ repeat: ()=>repeat,
39
+ assert: ()=>assert,
41
40
  antiEscapeScriptTag: ()=>antiEscapeScriptTag
42
41
  });
43
42
  const external_js_sha256_namespaceObject = require("js-sha256");
@@ -76,12 +75,8 @@ function generateHashId(rect, content = '') {
76
75
  function assert(condition, message) {
77
76
  if (!condition) throw new Error(message || 'Assertion failed');
78
77
  }
79
- let isMcp = false;
80
- function setIsMcp(value) {
81
- isMcp = value;
82
- }
83
78
  function logMsg(...message) {
84
- if (!isMcp) console.log(...message);
79
+ console.log(...message);
85
80
  }
86
81
  async function repeat(times, fn) {
87
82
  for(let i = 0; i < times; i++)await fn(i);
@@ -124,7 +119,6 @@ exports.mergeAndNormalizeAppNameMapping = __webpack_exports__.mergeAndNormalizeA
124
119
  exports.normalizeForComparison = __webpack_exports__.normalizeForComparison;
125
120
  exports.repeat = __webpack_exports__.repeat;
126
121
  exports.replaceIllegalPathCharsAndSpace = __webpack_exports__.replaceIllegalPathCharsAndSpace;
127
- exports.setIsMcp = __webpack_exports__.setIsMcp;
128
122
  exports.uuid = __webpack_exports__.uuid;
129
123
  for(var __rspack_i in __webpack_exports__)if (-1 === [
130
124
  "antiEscapeScriptTag",
@@ -140,7 +134,6 @@ for(var __rspack_i in __webpack_exports__)if (-1 === [
140
134
  "normalizeForComparison",
141
135
  "repeat",
142
136
  "replaceIllegalPathCharsAndSpace",
143
- "setIsMcp",
144
137
  "uuid"
145
138
  ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
146
139
  Object.defineProperty(exports, '__esModule', {
@@ -0,0 +1,17 @@
1
+ import { z } from 'zod';
2
+ export interface AgentBehaviorInitArgs {
3
+ aiActContext?: string;
4
+ aiActionContext?: string;
5
+ replanningCycleLimit?: number;
6
+ waitAfterAction?: number;
7
+ screenshotShrinkFactor?: number;
8
+ }
9
+ export declare const agentBehaviorInitArgShape: {
10
+ aiActContext: z.ZodOptional<z.ZodString>;
11
+ replanningCycleLimit: z.ZodOptional<z.ZodNumber>;
12
+ waitAfterAction: z.ZodOptional<z.ZodNumber>;
13
+ screenshotShrinkFactor: z.ZodOptional<z.ZodNumber>;
14
+ };
15
+ export declare function extractAgentBehaviorInitArgs(extracted: Partial<AgentBehaviorInitArgs> | undefined): AgentBehaviorInitArgs | undefined;
16
+ export declare function getAgentInitArgsSignature(initArgs: object | undefined): string | undefined;
17
+ export declare function shouldRebuildAgentForInitArgs(currentSignature: string | undefined, nextSignature: string | undefined): boolean;
@@ -1,4 +1,3 @@
1
- import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
1
  import type { z } from 'zod';
3
2
  import { type CliReportSession } from './cli-report-session';
4
3
  import { type ToolDefaults } from './tool-defaults';
@@ -11,11 +10,11 @@ import type { BaseAgent, BaseDevice, IMidsceneTools, ToolCliMetadata, ToolDefini
11
10
  export interface InitArgSpec<TInitParam> {
12
11
  /** Arg namespace, e.g. `android`, `ios`. */
13
12
  namespace: string;
14
- /** Zod shape describing the init args. Field names drive the MCP schema. */
13
+ /** Zod shape describing the init args. Field names drive the tool schema. */
15
14
  shape: Record<string, z.ZodTypeAny>;
16
15
  /**
17
16
  * Optional CLI presentation hints. These affect `--help` output for
18
- * single-platform CLIs but do not alter MCP/YAML protocol keys.
17
+ * single-platform CLIs but do not alter YAML protocol keys.
19
18
  */
20
19
  cli?: {
21
20
  /** Prefer bare `--device-id`-style options in platform CLI help output. */
@@ -30,24 +29,23 @@ export interface InitArgSpec<TInitParam> {
30
29
  adapt?: (extracted: Record<string, unknown> | undefined) => TInitParam | undefined;
31
30
  }
32
31
  /**
33
- * Base class for platform-specific MCP tools.
32
+ * Base class for platform-specific Midscene tools.
34
33
  * @typeParam TAgent - Platform-specific agent type.
35
34
  * @typeParam TInitParam - Platform-specific init parameter consumed by
36
35
  * `ensureAgent`. Defaults to `undefined` for platforms that take no args.
37
36
  */
38
37
  export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent, TInitParam = unknown> implements IMidsceneTools {
39
- protected mcpServer?: McpServer;
40
38
  protected agent?: TAgent;
41
39
  protected toolDefinitions: ToolDefinition[];
42
40
  /**
43
41
  * Default options injected into every generated tool call (e.g. forced deep
44
- * locate / deep think). Set from server/CLI behavior flags before
42
+ * locate / deep think). Set from startup/CLI behavior flags before
45
43
  * `initTools()` so they are baked into the generated tool handlers.
46
44
  * See https://github.com/web-infra-dev/midscene/issues/2446.
47
45
  */
48
46
  protected toolDefaults: ToolDefaults;
49
47
  /**
50
- * Declarative init-arg spec. Subclasses that accept CLI/MCP init args should
48
+ * Declarative init-arg spec. Subclasses that accept CLI init args should
51
49
  * set this once and get `extractAgentInitParam` / `sanitizeToolArgs` /
52
50
  * `getAgentInitArgSchema` auto-implemented.
53
51
  *
@@ -66,7 +64,7 @@ export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseA
66
64
  protected abstract ensureAgent(initParam?: TInitParam): Promise<TAgent>;
67
65
  private getInitArgKeys;
68
66
  /**
69
- * Extract a platform-specific agent init parameter from CLI/MCP tool args.
67
+ * Extract a platform-specific agent init parameter from CLI tool args.
70
68
  */
71
69
  protected extractAgentInitParam(args: Record<string, unknown>): TInitParam | undefined;
72
70
  /**
@@ -82,7 +80,7 @@ export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseA
82
80
  * show ergonomic bare flags while the underlying schema stays namespaced.
83
81
  * When `preferBareKeys` is enabled, single-platform CLIs only accept the
84
82
  * bare spellings; namespaced dotted spellings remain available through the
85
- * MCP/YAML schema instead of the platform CLI surface.
83
+ * YAML schema instead of the platform CLI surface.
86
84
  */
87
85
  protected getAgentInitArgCliMetadata(): ToolCliMetadata | undefined;
88
86
  /**
@@ -109,10 +107,6 @@ export declare abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseA
109
107
  * 2. Create temporary device instance to read actionSpace (always succeeds)
110
108
  */
111
109
  initTools(): Promise<void>;
112
- /**
113
- * Attach to MCP server and register all tools
114
- */
115
- attachToServer(server: McpServer): void;
116
110
  /**
117
111
  * Cleanup method - destroy agent and release resources
118
112
  */
@@ -1,10 +1,8 @@
1
- export * from './base-server';
2
1
  export * from './base-tools';
3
2
  export * from './tool-defaults';
3
+ export * from './agent-behavior-init-args';
4
4
  export * from './init-arg-utils';
5
5
  export * from './error-formatter';
6
6
  export * from './tool-generator';
7
7
  export * from './types';
8
- export * from './inject-report-html-plugin';
9
- export * from './launcher-helper';
10
8
  export * from './chrome-path';
@@ -3,11 +3,11 @@ import type { ToolSchema } from './types';
3
3
  export declare function extractNamespacedArgs<TFieldName extends string, TArgs extends Record<string, unknown> = Record<string, unknown>>(args: Record<string, unknown>, namespace: string, keys: readonly TFieldName[]): TArgs | undefined;
4
4
  export declare function sanitizeNamespacedArgs(args: Record<string, unknown>, namespace: string, keys: readonly string[]): Record<string, unknown>;
5
5
  /**
6
- * Build a flat MCP tool schema whose keys are dotted `"<namespace>.<field>"`.
6
+ * Build a flat tool schema whose keys are dotted `"<namespace>.<field>"`.
7
7
  *
8
8
  * We intentionally stay flat (rather than `{ namespace: z.object({...}) }`) so
9
- * that CLI (`--android.device-id`), MCP clients, and `--help` output all share
10
- * the same spelling. `readNamespacedArg` understands all three input shapes:
9
+ * that CLI (`--android.device-id`) and `--help` output share the same spelling.
10
+ * `readNamespacedArg` understands all three input shapes:
11
11
  * nested namespace object, dotted flat key, and bare key fallback.
12
12
  */
13
13
  export declare function createNamespacedInitArgSchema(namespace: string, shape: Record<string, z.ZodTypeAny>): ToolSchema;
@@ -1,10 +1,10 @@
1
1
  /**
2
2
  * Unified, declarative mechanism for "force a default option on every tool
3
- * call" behaviors exposed by MCP servers and the device / Agent Skill CLIs.
3
+ * call" behaviors exposed by device and Agent Skill CLIs.
4
4
  *
5
5
  * Adding a new behavior flag (e.g. `--deep-search`) is a one-line change to
6
6
  * {@link TOOL_BEHAVIOR_FLAGS}: declare which default-option "bag" it fills.
7
- * The tool generator, servers, tools managers and CLI parsing are all generic
7
+ * The tool generator, tools managers and CLI parsing are all generic
8
8
  * over {@link ToolDefaults} and never need to learn about individual flags.
9
9
  *
10
10
  * See https://github.com/web-infra-dev/midscene/issues/2446.
@@ -50,13 +50,12 @@ export declare function resolveToolDefaults(isEnabled: (cli: string) => boolean)
50
50
  *
51
51
  * Behavior flags (e.g. `--deep-locate`) are global: they may appear anywhere
52
52
  * in argv and are not tied to a specific sub-command. They are recognized by
53
- * exact kebab-case match the same surface the MCP `parseArgs` config exposes
54
- * — and removed so a strict per-command parser never sees them. Every other
53
+ * exact kebab-case match and removed so a strict per-command parser never sees them. Every other
55
54
  * token is returned untouched and in order for that per-command parser.
56
55
  *
57
56
  * This is the single place that knows how a behavior flag looks on the command
58
- * line; both the device / Agent Skill CLI and the MCP launch path resolve their
59
- * defaults from {@link TOOL_BEHAVIOR_FLAGS} through here / {@link resolveToolDefaults}.
57
+ * line; the device / Agent Skill CLI resolves defaults from
58
+ * {@link TOOL_BEHAVIOR_FLAGS} through here / {@link resolveToolDefaults}.
60
59
  */
61
60
  export declare function stripBehaviorFlags(argv: readonly string[]): {
62
61
  rawArgs: string[];
@@ -3,7 +3,7 @@ import type { ActionSpaceItem, BaseAgent, ToolCliMetadata, ToolDefinition, ToolS
3
3
  import { composeUserPrompt } from './user-prompt';
4
4
  export { composeUserPrompt };
5
5
  /**
6
- * Converts DeviceAction from actionSpace into MCP ToolDefinition
6
+ * Converts DeviceAction from actionSpace into ToolDefinition.
7
7
  * This is the core logic that removes need for hardcoded tool definitions
8
8
  */
9
9
  export declare function generateToolsFromActionSpace(actionSpace: ActionSpaceItem[], getAgent: (args?: Record<string, unknown>) => Promise<BaseAgent>, sanitizeArgs?: (args: Record<string, unknown>) => Record<string, unknown>, initArgSchema?: ToolSchema, initArgCliMetadata?: ToolCliMetadata, toolDefaults?: ToolDefaults): ToolDefinition[];
@@ -1,4 +1,3 @@
1
- import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
1
  import type { z } from 'zod';
3
2
  import type { ToolDefaults } from './tool-defaults';
4
3
  /**
@@ -7,7 +6,7 @@ import type { ToolDefaults } from './tool-defaults';
7
6
  export declare const defaultAppLoadingTimeoutMs = 10000;
8
7
  export declare const defaultAppLoadingCheckIntervalMs = 2000;
9
8
  /**
10
- * Content item types for tool results (MCP compatible)
9
+ * Content item types for tool results.
11
10
  */
12
11
  export type ToolResultContent = {
13
12
  type: 'text';
@@ -33,7 +32,7 @@ export type ToolResultContent = {
33
32
  };
34
33
  };
35
34
  /**
36
- * Result type for tool execution (MCP compatible)
35
+ * Result type for tool execution.
37
36
  */
38
37
  export interface ToolResult {
39
38
  [x: string]: unknown;
@@ -58,7 +57,7 @@ export interface ToolCliMetadata {
58
57
  options?: Record<string, ToolCliOption>;
59
58
  }
60
59
  /**
61
- * Tool definition for MCP server
60
+ * Tool definition for Midscene CLI and Skill surfaces.
62
61
  */
63
62
  export interface ToolDefinition<T = Record<string, unknown>> {
64
63
  name: string;
@@ -67,9 +66,6 @@ export interface ToolDefinition<T = Record<string, unknown>> {
67
66
  handler: ToolHandler<T>;
68
67
  cli?: ToolCliMetadata;
69
68
  }
70
- /**
71
- * Tool type for mcpKitForAgent return value
72
- */
73
69
  export type Tool = ToolDefinition;
74
70
  /**
75
71
  * Action space item definition
@@ -97,6 +93,21 @@ export type UserPromptLike = string | {
97
93
  }>;
98
94
  convertHttpImage2Base64?: boolean;
99
95
  };
96
+ export interface RecordToReportScreenshot {
97
+ /**
98
+ * PNG/JPEG data URI, or raw PNG base64 body.
99
+ */
100
+ base64: string;
101
+ description?: string;
102
+ }
103
+ export interface RecordToReportOptions {
104
+ content?: string;
105
+ /**
106
+ * @deprecated Use `screenshots: [{ base64 }]` instead.
107
+ */
108
+ screenshotBase64?: string;
109
+ screenshots?: RecordToReportScreenshot[];
110
+ }
100
111
  /**
101
112
  * Base agent interface
102
113
  * Represents a platform-specific agent (Android, iOS, Web)
@@ -108,10 +119,7 @@ export interface BaseAgent {
108
119
  page?: {
109
120
  screenshotBase64(): Promise<string>;
110
121
  };
111
- recordToReport?: (title?: string, opt?: {
112
- content?: string;
113
- screenshotBase64?: string;
114
- }) => Promise<void>;
122
+ recordToReport?: (title?: string, opt?: RecordToReportOptions) => Promise<void>;
115
123
  callActionInActionSpace?: (actionName: string, params?: unknown) => Promise<unknown>;
116
124
  aiAction?: (description: string, params?: Record<string, unknown>) => Promise<unknown>;
117
125
  aiWaitFor?: (assertion: string, options: Record<string, unknown>) => Promise<unknown>;
@@ -125,10 +133,9 @@ export interface BaseDevice {
125
133
  destroy?(): Promise<void>;
126
134
  }
127
135
  /**
128
- * Interface for platform-specific MCP tools manager
136
+ * Interface for platform-specific tools manager.
129
137
  */
130
138
  export interface IMidsceneTools {
131
- attachToServer(server: McpServer): void;
132
139
  initTools(): Promise<void>;
133
140
  destroy?(): Promise<void>;
134
141
  setToolDefaults?(toolDefaults: ToolDefaults): void;
@@ -1,4 +1,4 @@
1
- import type { ToolCliOption, ToolDefinition } from '../mcp/types';
1
+ import type { ToolCliOption, ToolDefinition } from '../agent-tools/types';
2
2
  export declare function parseValue(raw: string): unknown;
3
3
  export declare function parseCliArgs(args: string[]): Record<string, unknown>;
4
4
  export declare function getCliOptionDisplay(key: string, cliOption?: ToolCliOption): {
@@ -1,5 +1,5 @@
1
- import type { BaseMidsceneTools } from '../mcp/base-tools';
2
- import type { ToolDefinition } from '../mcp/types';
1
+ import type { BaseMidsceneTools } from '../agent-tools/base-tools';
2
+ import type { ToolDefinition } from '../agent-tools/types';
3
3
  export interface CLIExtraCommand {
4
4
  name: string;
5
5
  def: ToolDefinition;
@@ -5,9 +5,11 @@ export declare const MIDSCENE_DEBUG_MODEL_PROFILE = "MIDSCENE_DEBUG_MODEL_PROFIL
5
5
  export declare const MIDSCENE_DEBUG_MODEL_RESPONSE = "MIDSCENE_DEBUG_MODEL_RESPONSE";
6
6
  export declare const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG = "MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG";
7
7
  export declare const MIDSCENE_DEBUG_MODE = "MIDSCENE_DEBUG_MODE";
8
- export declare const MIDSCENE_MCP_USE_PUPPETEER_MODE = "MIDSCENE_MCP_USE_PUPPETEER_MODE";
8
+ export declare const MIDSCENE_CHROME_PATH = "MIDSCENE_CHROME_PATH";
9
+ /**
10
+ * @deprecated Use MIDSCENE_CHROME_PATH instead. This is kept for backward compatibility.
11
+ */
9
12
  export declare const MIDSCENE_MCP_CHROME_PATH = "MIDSCENE_MCP_CHROME_PATH";
10
- export declare const MIDSCENE_MCP_ANDROID_MODE = "MIDSCENE_MCP_ANDROID_MODE";
11
13
  export declare const DOCKER_CONTAINER = "DOCKER_CONTAINER";
12
14
  export declare const MIDSCENE_LANGSMITH_DEBUG = "MIDSCENE_LANGSMITH_DEBUG";
13
15
  export declare const MIDSCENE_LANGFUSE_DEBUG = "MIDSCENE_LANGFUSE_DEBUG";
@@ -104,22 +106,22 @@ export declare const UNUSED_ENV_KEYS: string[];
104
106
  * can not be override by overrideAIConfig
105
107
  */
106
108
  export declare const BASIC_ENV_KEYS: readonly ["MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR"];
107
- export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
109
+ export declare const BOOLEAN_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET"];
108
110
  export declare const NUMBER_ENV_KEYS: readonly ["MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT"];
109
- export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
111
+ export declare const STRING_ENV_KEYS: readonly ["MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_CHROME_PATH", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
110
112
  /**
111
113
  * Non model related env keys, used for globally controlling the behavior of midscene
112
114
  * Can not be override by agent.modelConfig but can be override by overrideAIConfig
113
115
  * Can be access at any time
114
116
  */
115
- export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
117
+ export declare const GLOBAL_ENV_KEYS: readonly ["MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_CHROME_PATH", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER"];
116
118
  /**
117
119
  * Model related eve keys, used for declare which model to use.
118
120
  * Can be override by both agent.modelConfig and overrideAIConfig
119
121
  * Can only be access after agent.constructor
120
122
  */
121
123
  export declare const MODEL_ENV_KEYS: readonly ["MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
122
- export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_MCP_USE_PUPPETEER_MODE", "MIDSCENE_MCP_ANDROID_MODE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
124
+ export declare const ALL_ENV_KEYS: readonly [...string[], "MIDSCENE_DEBUG_MODE", "MIDSCENE_DEBUG_MODEL_PROFILE", "MIDSCENE_DEBUG_MODEL_RESPONSE", "MIDSCENE_RUN_DIR", "MIDSCENE_CACHE", "MIDSCENE_LANGSMITH_DEBUG", "MIDSCENE_LANGFUSE_DEBUG", "MIDSCENE_REPORT_QUIET", "MIDSCENE_CACHE_MAX_FILENAME_LENGTH", "MIDSCENE_REPLANNING_CYCLE_LIMIT", "MIDSCENE_ADB_PATH", "MIDSCENE_ADB_REMOTE_HOST", "MIDSCENE_ADB_REMOTE_PORT", "MIDSCENE_ANDROID_IME_STRATEGY", "MIDSCENE_IOS_DEVICE_UDID", "MIDSCENE_IOS_SIMULATOR_UDID", "MIDSCENE_REPORT_TAG_NAME", "MIDSCENE_PREFERRED_LANGUAGE", "MATCH_BY_POSITION", "MIDSCENE_CHROME_PATH", "MIDSCENE_MCP_CHROME_PATH", "DOCKER_CONTAINER", "MIDSCENE_MODEL_NAME", "MIDSCENE_MODEL_INIT_CONFIG_JSON", "MIDSCENE_MODEL_EXTRA_BODY_JSON", "MIDSCENE_MODEL_API_KEY", "MIDSCENE_MODEL_BASE_URL", "MIDSCENE_MODEL_SOCKS_PROXY", "MIDSCENE_MODEL_HTTP_PROXY", "MIDSCENE_MODEL_TIMEOUT", "MIDSCENE_MODEL_TEMPERATURE", "MIDSCENE_MODEL_RETRY_COUNT", "MIDSCENE_MODEL_RETRY_INTERVAL", "MIDSCENE_MODEL_REASONING_EFFORT", "MIDSCENE_MODEL_REASONING_ENABLED", "MIDSCENE_MODEL_REASONING_BUDGET", "MIDSCENE_USE_VLM_UI_TARS", "MIDSCENE_USE_QWEN_VL", "MIDSCENE_USE_QWEN3_VL", "MIDSCENE_USE_DOUBAO_VISION", "MIDSCENE_USE_GEMINI", "MIDSCENE_USE_VL_MODEL", "OPENAI_API_KEY", "OPENAI_BASE_URL", "MIDSCENE_OPENAI_INIT_CONFIG_JSON", "MIDSCENE_OPENAI_HTTP_PROXY", "MIDSCENE_OPENAI_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_NAME", "MIDSCENE_INSIGHT_MODEL_SOCKS_PROXY", "MIDSCENE_INSIGHT_MODEL_HTTP_PROXY", "MIDSCENE_INSIGHT_MODEL_BASE_URL", "MIDSCENE_INSIGHT_MODEL_API_KEY", "MIDSCENE_INSIGHT_MODEL_INIT_CONFIG_JSON", "MIDSCENE_INSIGHT_MODEL_EXTRA_BODY_JSON", "MIDSCENE_INSIGHT_MODEL_TIMEOUT", "MIDSCENE_INSIGHT_MODEL_TEMPERATURE", "MIDSCENE_INSIGHT_MODEL_RETRY_COUNT", "MIDSCENE_INSIGHT_MODEL_RETRY_INTERVAL", "MIDSCENE_INSIGHT_MODEL_FAMILY", "MIDSCENE_INSIGHT_MODEL_REASONING_EFFORT", "MIDSCENE_INSIGHT_MODEL_REASONING_ENABLED", "MIDSCENE_INSIGHT_MODEL_REASONING_BUDGET", "MIDSCENE_PLANNING_MODEL_NAME", "MIDSCENE_PLANNING_MODEL_SOCKS_PROXY", "MIDSCENE_PLANNING_MODEL_HTTP_PROXY", "MIDSCENE_PLANNING_MODEL_BASE_URL", "MIDSCENE_PLANNING_MODEL_API_KEY", "MIDSCENE_PLANNING_MODEL_INIT_CONFIG_JSON", "MIDSCENE_PLANNING_MODEL_EXTRA_BODY_JSON", "MIDSCENE_PLANNING_MODEL_TIMEOUT", "MIDSCENE_PLANNING_MODEL_TEMPERATURE", "MIDSCENE_PLANNING_MODEL_RETRY_COUNT", "MIDSCENE_PLANNING_MODEL_RETRY_INTERVAL", "MIDSCENE_PLANNING_MODEL_FAMILY", "MIDSCENE_PLANNING_MODEL_REASONING_EFFORT", "MIDSCENE_PLANNING_MODEL_REASONING_ENABLED", "MIDSCENE_PLANNING_MODEL_REASONING_BUDGET", "MIDSCENE_MODEL_FAMILY"];
123
125
  export type TEnvKeys = (typeof ALL_ENV_KEYS)[number];
124
126
  export type TGlobalConfig = Record<TEnvKeys, string | undefined>;
125
127
  /**
@@ -1,3 +1,3 @@
1
1
  export { imageInfoOfBase64, isValidPNGImageBuffer, isValidJPEGImageBuffer, isValidImageBuffer, validateScreenshotBuffer, type ValidateScreenshotBufferOptions, } from './info';
2
- export { resizeAndConvertImgBuffer, resizeImgBase64, zoomForGPT4o, saveBase64Image, paddingToMatchBlockByBase64, cropByRect, scaleImage, localImg2Base64, httpImg2Base64, preProcessImageUrl, parseBase64, createImgBase64ByFormat, inferBase64ImageFormat, normalizeBase64Image, } from './transform';
2
+ export { resizeAndConvertImgBuffer, resizeImgBase64, zoomForGPT4o, saveBase64Image, paddingToMatchBlockByBase64, cropByRect, scaleImage, localImg2Base64, httpImg2Base64, preProcessImageUrl, parseBase64, createImgBase64ByFormat, inferBase64ImageFormat, normalizeBase64Image, normalizeScreenshotBase64, type NormalizeScreenshotBase64Options, } from './transform';
3
3
  export { processImageElementInfo, compositeElementInfoImg, compositePointMarkerImg, annotateRects, } from './box-select';
@@ -29,6 +29,10 @@ export declare function resizeAndConvertImgBuffer(inputFormat: string, inputData
29
29
  export declare const normalizeBase64Body: (body: string) => string;
30
30
  export declare const inferBase64ImageFormat: (base64Body: string) => "jpeg" | "png";
31
31
  export declare const createImgBase64ByFormat: (format: string, body: string) => string;
32
+ export interface NormalizeScreenshotBase64Options {
33
+ label?: string;
34
+ }
35
+ export declare const normalizeScreenshotBase64: (base64: string, options?: NormalizeScreenshotBase64Options) => string;
32
36
  export declare const normalizeBase64Image: (base64: string) => string;
33
37
  export declare function resizeImgBase64(inputBase64: string, newSize: {
34
38
  width: number;
@@ -1,7 +1,7 @@
1
1
  /**
2
- * Internal-only helpers for CLI/MCP argument key aliasing.
2
+ * Internal-only helpers for CLI argument key aliasing.
3
3
  * Not re-exported from the package entry point — keep consumers within
4
- * `cli/` and `mcp/`.
4
+ * `cli/`.
5
5
  */
6
6
  export declare function kebabToCamel(str: string): string;
7
7
  export declare function camelToKebab(str: string): string;
@@ -11,7 +11,6 @@ export declare function generateHashId(rect: any, content?: string): string;
11
11
  * @throws Error with the provided message if the condition is false
12
12
  */
13
13
  export declare function assert(condition: any, message?: string): asserts condition;
14
- export declare function setIsMcp(value: boolean): void;
15
14
  export declare function logMsg(...message: Parameters<typeof console.log>): void;
16
15
  export declare function repeat(times: number, fn: (index: number) => Promise<void>): Promise<void>;
17
16
  export declare const escapeScriptTag: (html: string) => string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.9.7",
3
+ "version": "1.9.8-beta-20260618014851.0",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -57,16 +57,26 @@
57
57
  "import": "./dist/es/common.mjs",
58
58
  "require": "./dist/lib/common.js"
59
59
  },
60
- "./mcp": {
61
- "types": "./dist/types/mcp/index.d.ts",
62
- "import": "./dist/es/mcp/index.mjs",
63
- "require": "./dist/lib/mcp/index.js"
60
+ "./agent-tools": {
61
+ "types": "./dist/types/agent-tools/index.d.ts",
62
+ "import": "./dist/es/agent-tools/index.mjs",
63
+ "require": "./dist/lib/agent-tools/index.js"
64
+ },
65
+ "./agent-tools/*": {
66
+ "types": "./dist/types/agent-tools/*.d.ts",
67
+ "import": "./dist/es/agent-tools/*.mjs",
68
+ "require": "./dist/lib/agent-tools/*.js"
64
69
  },
65
70
  "./cli": {
66
71
  "types": "./dist/types/cli/index.d.ts",
67
72
  "import": "./dist/es/cli/index.mjs",
68
73
  "require": "./dist/lib/cli/index.js"
69
74
  },
75
+ "./recorder": {
76
+ "types": "./dist/types/recorder.d.ts",
77
+ "import": "./dist/es/recorder.mjs",
78
+ "require": "./dist/lib/recorder.js"
79
+ },
70
80
  "./logger": {
71
81
  "types": "./dist/types/logger.d.ts",
72
82
  "import": "./dist/es/logger.mjs",
@@ -84,11 +94,9 @@
84
94
  "README.md"
85
95
  ],
86
96
  "dependencies": {
87
- "@modelcontextprotocol/sdk": "1.10.2",
88
97
  "@silvia-odwyer/photon": "0.3.3",
89
98
  "@silvia-odwyer/photon-node": "0.3.3",
90
99
  "debug": "4.4.0",
91
- "express": "^4.21.2",
92
100
  "js-sha256": "0.11.0",
93
101
  "sharp": "^0.34.3",
94
102
  "dotenv": "^16.4.5",
@@ -98,7 +106,6 @@
98
106
  "devDependencies": {
99
107
  "@rslib/core": "^0.18.3",
100
108
  "@types/debug": "4.1.12",
101
- "@types/express": "^4.17.21",
102
109
  "@types/node": "^18.0.0",
103
110
  "@ui-tars/shared": "1.2.0",
104
111
  "openai": "6.3.0",