@midscene/shared 1.9.8-beta-20260618014851.0 → 1.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/dist/es/cli/cli-runner.mjs +1 -1
  2. package/dist/es/env/parse-model-config.mjs +1 -1
  3. package/dist/es/env/types.mjs +5 -3
  4. package/dist/es/mcp/base-server.mjs +295 -0
  5. package/dist/es/{agent-tools → mcp}/base-tools.mjs +8 -1
  6. package/dist/es/{agent-tools → mcp}/chrome-path.mjs +3 -14
  7. package/dist/es/{agent-tools → mcp}/index.mjs +3 -0
  8. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  9. package/dist/es/mcp/launcher-helper.mjs +52 -0
  10. package/dist/es/{agent-tools → mcp}/tool-generator.mjs +3 -3
  11. package/dist/es/utils.mjs +6 -2
  12. package/dist/lib/cli/cli-runner.js +1 -1
  13. package/dist/lib/env/parse-model-config.js +1 -1
  14. package/dist/lib/env/types.js +10 -5
  15. package/dist/lib/mcp/base-server.js +345 -0
  16. package/dist/lib/{agent-tools → mcp}/base-tools.js +8 -1
  17. package/dist/lib/{agent-tools → mcp}/chrome-path.js +2 -13
  18. package/dist/lib/{agent-tools → mcp}/index.js +37 -16
  19. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  20. package/dist/lib/mcp/launcher-helper.js +86 -0
  21. package/dist/lib/{agent-tools → mcp}/tool-generator.js +3 -3
  22. package/dist/lib/utils.js +15 -8
  23. package/dist/types/cli/cli-args.d.ts +1 -1
  24. package/dist/types/cli/cli-runner.d.ts +2 -2
  25. package/dist/types/env/types.d.ts +6 -8
  26. package/dist/types/key-alias-utils.d.ts +2 -2
  27. package/dist/types/mcp/base-server.d.ts +106 -0
  28. package/dist/types/{agent-tools → mcp}/base-tools.d.ts +13 -7
  29. package/dist/types/{agent-tools → mcp}/index.d.ts +3 -0
  30. package/dist/types/{agent-tools → mcp}/init-arg-utils.d.ts +3 -3
  31. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  32. package/dist/types/mcp/launcher-helper.d.ts +94 -0
  33. package/dist/types/{agent-tools → mcp}/tool-defaults.d.ts +6 -5
  34. package/dist/types/{agent-tools → mcp}/tool-generator.d.ts +1 -1
  35. package/dist/types/{agent-tools → mcp}/types.d.ts +9 -4
  36. package/dist/types/utils.d.ts +1 -0
  37. package/package.json +8 -15
  38. package/src/cli/cli-args.ts +1 -1
  39. package/src/cli/cli-runner.ts +4 -4
  40. package/src/env/types.ts +5 -5
  41. package/src/key-alias-utils.ts +2 -2
  42. package/src/mcp/base-server.ts +529 -0
  43. package/src/{agent-tools → mcp}/base-tools.ts +33 -8
  44. package/src/{agent-tools → mcp}/chrome-path.ts +3 -20
  45. package/src/{agent-tools → mcp}/index.ts +3 -0
  46. package/src/{agent-tools → mcp}/init-arg-utils.ts +3 -3
  47. package/src/mcp/inject-report-html-plugin.ts +119 -0
  48. package/src/mcp/launcher-helper.ts +200 -0
  49. package/src/{agent-tools → mcp}/tool-defaults.ts +6 -5
  50. package/src/{agent-tools → mcp}/tool-generator.ts +6 -6
  51. package/src/{agent-tools → mcp}/types.ts +9 -4
  52. package/src/utils.ts +10 -1
  53. /package/dist/es/{agent-tools → mcp}/agent-behavior-init-args.mjs +0 -0
  54. /package/dist/es/{agent-tools → mcp}/cli-report-session.mjs +0 -0
  55. /package/dist/es/{agent-tools → mcp}/error-formatter.mjs +0 -0
  56. /package/dist/es/{agent-tools → mcp}/init-arg-utils.mjs +0 -0
  57. /package/dist/es/{agent-tools → mcp}/tool-defaults.mjs +0 -0
  58. /package/dist/es/{agent-tools → mcp}/types.mjs +0 -0
  59. /package/dist/es/{agent-tools → mcp}/user-prompt.mjs +0 -0
  60. /package/dist/lib/{agent-tools → mcp}/agent-behavior-init-args.js +0 -0
  61. /package/dist/lib/{agent-tools → mcp}/cli-report-session.js +0 -0
  62. /package/dist/lib/{agent-tools → mcp}/error-formatter.js +0 -0
  63. /package/dist/lib/{agent-tools → mcp}/init-arg-utils.js +0 -0
  64. /package/dist/lib/{agent-tools → mcp}/tool-defaults.js +0 -0
  65. /package/dist/lib/{agent-tools → mcp}/types.js +0 -0
  66. /package/dist/lib/{agent-tools → mcp}/user-prompt.js +0 -0
  67. /package/dist/types/{agent-tools → mcp}/agent-behavior-init-args.d.ts +0 -0
  68. /package/dist/types/{agent-tools → mcp}/chrome-path.d.ts +0 -0
  69. /package/dist/types/{agent-tools → mcp}/cli-report-session.d.ts +0 -0
  70. /package/dist/types/{agent-tools → mcp}/error-formatter.d.ts +0 -0
  71. /package/dist/types/{agent-tools → mcp}/user-prompt.d.ts +0 -0
  72. /package/src/{agent-tools → mcp}/agent-behavior-init-args.ts +0 -0
  73. /package/src/{agent-tools → mcp}/cli-report-session.ts +0 -0
  74. /package/src/{agent-tools → mcp}/error-formatter.ts +0 -0
  75. /package/src/{agent-tools → mcp}/user-prompt.ts +0 -0
@@ -0,0 +1,94 @@
1
+ import type { BaseMCPServer } from './base-server';
2
+ import type { HttpLaunchOptions, LaunchMCPServerResult } from './base-server';
3
+ import type { IMidsceneTools } from './types';
4
+ export interface LaunchMCPServerOptions extends HttpLaunchOptions {
5
+ /**
6
+ * Whether to show server logs
7
+ * @default true
8
+ */
9
+ verbose?: boolean;
10
+ }
11
+ /**
12
+ * Generic agent type (avoid importing from @midscene/core to prevent circular deps)
13
+ */
14
+ export interface GenericAgent<TDevice = any> {
15
+ interface: TDevice;
16
+ constructor: {
17
+ name: string;
18
+ };
19
+ }
20
+ /**
21
+ * Additional information for logging server startup
22
+ */
23
+ export interface StartupInfo {
24
+ port?: number;
25
+ host?: string;
26
+ }
27
+ export interface MCPServerLauncherConfig<AgentType extends GenericAgent = GenericAgent, ToolsManagerType extends IMidsceneTools = IMidsceneTools> {
28
+ agent: AgentType;
29
+ platformName: string;
30
+ ToolsManagerClass: new (...args: any[]) => ToolsManagerType;
31
+ MCPServerClass: new (toolsManager?: ToolsManagerType) => BaseMCPServer;
32
+ }
33
+ /**
34
+ * Create a generic MCP server launcher for a given agent, tools manager, and MCP server.
35
+ *
36
+ * This helper centralizes the common wiring logic used by platform-specific launchers:
37
+ * it constructs a tools manager, attaches the provided `agent` to it, then instantiates
38
+ * the `MCPServerClass` and exposes convenience methods to start the server over stdio
39
+ * (`launch`) or HTTP (`launchHttp`).
40
+ *
41
+ * Use this helper when adding a new platform-specific launcher or when you want to
42
+ * avoid duplicating boilerplate code for starting an MCP server. Typically, callers
43
+ * provide:
44
+ * - an `agent` instance that contains the underlying device on its `interface` property
45
+ * - a `ToolsManagerClass` that knows how to expose tools for that agent
46
+ * - an `MCPServerClass` that implements the MCP protocol and supports `launch` and
47
+ * `launchHttp` methods.
48
+ *
49
+ * The returned object has two methods:
50
+ * - `launch(options?)` to start the server using stdio transport
51
+ * - `launchHttp(options)` to start the server using HTTP transport
52
+ * Both methods accept a `verbose` flag to control console logging.
53
+ *
54
+ * @param config Configuration describing the agent, platform name (for logging),
55
+ * tools manager implementation, and MCP server implementation.
56
+ *
57
+ * @returns An object with `launch` and `launchHttp` methods to start the MCP server.
58
+ *
59
+ * @example
60
+ * ```typescript
61
+ * import { createMCPServerLauncher } from '@midscene/shared/mcp';
62
+ * import { Agent } from '@midscene/core/agent';
63
+ * import { WebMidsceneTools } from './web-tools';
64
+ * import { WebMCPServer } from './server';
65
+ *
66
+ * const agent = new Agent();
67
+ * const launcher = createMCPServerLauncher({
68
+ * agent,
69
+ * platformName: 'Web',
70
+ * ToolsManagerClass: WebMidsceneTools,
71
+ * MCPServerClass: WebMCPServer,
72
+ * });
73
+ *
74
+ * // Start with stdio
75
+ * await launcher.launch({ verbose: true });
76
+ *
77
+ * // Or start with HTTP
78
+ * await launcher.launchHttp({ port: 3000, host: 'localhost' });
79
+ * ```
80
+ *
81
+ * @internal
82
+ */
83
+ export declare function createMCPServerLauncher<AgentType extends GenericAgent, ToolsManagerType extends IMidsceneTools>(config: MCPServerLauncherConfig<AgentType, ToolsManagerType>): {
84
+ /**
85
+ * Launch the MCP server with stdio transport
86
+ */
87
+ launch(options?: {
88
+ verbose?: boolean;
89
+ }): Promise<LaunchMCPServerResult>;
90
+ /**
91
+ * Launch the MCP server with HTTP transport
92
+ */
93
+ launchHttp(options: LaunchMCPServerOptions): Promise<LaunchMCPServerResult>;
94
+ };
@@ -1,10 +1,10 @@
1
1
  /**
2
2
  * Unified, declarative mechanism for "force a default option on every tool
3
- * call" behaviors exposed by device and Agent Skill CLIs.
3
+ * call" behaviors exposed by MCP servers and the device / Agent Skill CLIs.
4
4
  *
5
5
  * Adding a new behavior flag (e.g. `--deep-search`) is a one-line change to
6
6
  * {@link TOOL_BEHAVIOR_FLAGS}: declare which default-option "bag" it fills.
7
- * The tool generator, tools managers and CLI parsing are all generic
7
+ * The tool generator, servers, tools managers and CLI parsing are all generic
8
8
  * over {@link ToolDefaults} and never need to learn about individual flags.
9
9
  *
10
10
  * See https://github.com/web-infra-dev/midscene/issues/2446.
@@ -50,12 +50,13 @@ export declare function resolveToolDefaults(isEnabled: (cli: string) => boolean)
50
50
  *
51
51
  * Behavior flags (e.g. `--deep-locate`) are global: they may appear anywhere
52
52
  * in argv and are not tied to a specific sub-command. They are recognized by
53
- * exact kebab-case match and removed so a strict per-command parser never sees them. Every other
53
+ * exact kebab-case match the same surface the MCP `parseArgs` config exposes
54
+ * — and removed so a strict per-command parser never sees them. Every other
54
55
  * token is returned untouched and in order for that per-command parser.
55
56
  *
56
57
  * This is the single place that knows how a behavior flag looks on the command
57
- * line; the device / Agent Skill CLI resolves defaults from
58
- * {@link TOOL_BEHAVIOR_FLAGS} through here / {@link resolveToolDefaults}.
58
+ * line; both the device / Agent Skill CLI and the MCP launch path resolve their
59
+ * defaults from {@link TOOL_BEHAVIOR_FLAGS} through here / {@link resolveToolDefaults}.
59
60
  */
60
61
  export declare function stripBehaviorFlags(argv: readonly string[]): {
61
62
  rawArgs: string[];
@@ -3,7 +3,7 @@ import type { ActionSpaceItem, BaseAgent, ToolCliMetadata, ToolDefinition, ToolS
3
3
  import { composeUserPrompt } from './user-prompt';
4
4
  export { composeUserPrompt };
5
5
  /**
6
- * Converts DeviceAction from actionSpace into ToolDefinition.
6
+ * Converts DeviceAction from actionSpace into MCP ToolDefinition
7
7
  * This is the core logic that removes need for hardcoded tool definitions
8
8
  */
9
9
  export declare function generateToolsFromActionSpace(actionSpace: ActionSpaceItem[], getAgent: (args?: Record<string, unknown>) => Promise<BaseAgent>, sanitizeArgs?: (args: Record<string, unknown>) => Record<string, unknown>, initArgSchema?: ToolSchema, initArgCliMetadata?: ToolCliMetadata, toolDefaults?: ToolDefaults): ToolDefinition[];
@@ -1,3 +1,4 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
1
2
  import type { z } from 'zod';
2
3
  import type { ToolDefaults } from './tool-defaults';
3
4
  /**
@@ -6,7 +7,7 @@ import type { ToolDefaults } from './tool-defaults';
6
7
  export declare const defaultAppLoadingTimeoutMs = 10000;
7
8
  export declare const defaultAppLoadingCheckIntervalMs = 2000;
8
9
  /**
9
- * Content item types for tool results.
10
+ * Content item types for tool results (MCP compatible)
10
11
  */
11
12
  export type ToolResultContent = {
12
13
  type: 'text';
@@ -32,7 +33,7 @@ export type ToolResultContent = {
32
33
  };
33
34
  };
34
35
  /**
35
- * Result type for tool execution.
36
+ * Result type for tool execution (MCP compatible)
36
37
  */
37
38
  export interface ToolResult {
38
39
  [x: string]: unknown;
@@ -57,7 +58,7 @@ export interface ToolCliMetadata {
57
58
  options?: Record<string, ToolCliOption>;
58
59
  }
59
60
  /**
60
- * Tool definition for Midscene CLI and Skill surfaces.
61
+ * Tool definition for MCP server
61
62
  */
62
63
  export interface ToolDefinition<T = Record<string, unknown>> {
63
64
  name: string;
@@ -66,6 +67,9 @@ export interface ToolDefinition<T = Record<string, unknown>> {
66
67
  handler: ToolHandler<T>;
67
68
  cli?: ToolCliMetadata;
68
69
  }
70
+ /**
71
+ * Tool type for mcpKitForAgent return value
72
+ */
69
73
  export type Tool = ToolDefinition;
70
74
  /**
71
75
  * Action space item definition
@@ -133,9 +137,10 @@ export interface BaseDevice {
133
137
  destroy?(): Promise<void>;
134
138
  }
135
139
  /**
136
- * Interface for platform-specific tools manager.
140
+ * Interface for platform-specific MCP tools manager
137
141
  */
138
142
  export interface IMidsceneTools {
143
+ attachToServer(server: McpServer): void;
139
144
  initTools(): Promise<void>;
140
145
  destroy?(): Promise<void>;
141
146
  setToolDefaults?(toolDefaults: ToolDefaults): void;
@@ -11,6 +11,7 @@ export declare function generateHashId(rect: any, content?: string): string;
11
11
  * @throws Error with the provided message if the condition is false
12
12
  */
13
13
  export declare function assert(condition: any, message?: string): asserts condition;
14
+ export declare function setIsMcp(value: boolean): void;
14
15
  export declare function logMsg(...message: Parameters<typeof console.log>): void;
15
16
  export declare function repeat(times: number, fn: (index: number) => Promise<void>): Promise<void>;
16
17
  export declare const escapeScriptTag: (html: string) => string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@midscene/shared",
3
- "version": "1.9.8-beta-20260618014851.0",
3
+ "version": "1.9.8",
4
4
  "repository": "https://github.com/web-infra-dev/midscene",
5
5
  "homepage": "https://midscenejs.com/",
6
6
  "types": "./dist/types/index.d.ts",
@@ -57,26 +57,16 @@
57
57
  "import": "./dist/es/common.mjs",
58
58
  "require": "./dist/lib/common.js"
59
59
  },
60
- "./agent-tools": {
61
- "types": "./dist/types/agent-tools/index.d.ts",
62
- "import": "./dist/es/agent-tools/index.mjs",
63
- "require": "./dist/lib/agent-tools/index.js"
64
- },
65
- "./agent-tools/*": {
66
- "types": "./dist/types/agent-tools/*.d.ts",
67
- "import": "./dist/es/agent-tools/*.mjs",
68
- "require": "./dist/lib/agent-tools/*.js"
60
+ "./mcp": {
61
+ "types": "./dist/types/mcp/index.d.ts",
62
+ "import": "./dist/es/mcp/index.mjs",
63
+ "require": "./dist/lib/mcp/index.js"
69
64
  },
70
65
  "./cli": {
71
66
  "types": "./dist/types/cli/index.d.ts",
72
67
  "import": "./dist/es/cli/index.mjs",
73
68
  "require": "./dist/lib/cli/index.js"
74
69
  },
75
- "./recorder": {
76
- "types": "./dist/types/recorder.d.ts",
77
- "import": "./dist/es/recorder.mjs",
78
- "require": "./dist/lib/recorder.js"
79
- },
80
70
  "./logger": {
81
71
  "types": "./dist/types/logger.d.ts",
82
72
  "import": "./dist/es/logger.mjs",
@@ -94,9 +84,11 @@
94
84
  "README.md"
95
85
  ],
96
86
  "dependencies": {
87
+ "@modelcontextprotocol/sdk": "1.10.2",
97
88
  "@silvia-odwyer/photon": "0.3.3",
98
89
  "@silvia-odwyer/photon-node": "0.3.3",
99
90
  "debug": "4.4.0",
91
+ "express": "^4.21.2",
100
92
  "js-sha256": "0.11.0",
101
93
  "sharp": "^0.34.3",
102
94
  "dotenv": "^16.4.5",
@@ -106,6 +98,7 @@
106
98
  "devDependencies": {
107
99
  "@rslib/core": "^0.18.3",
108
100
  "@types/debug": "4.1.12",
101
+ "@types/express": "^4.17.21",
109
102
  "@types/node": "^18.0.0",
110
103
  "@ui-tars/shared": "1.2.0",
111
104
  "openai": "6.3.0",
@@ -1,6 +1,6 @@
1
1
  import { z } from 'zod';
2
- import type { ToolCliOption, ToolDefinition } from '../agent-tools/types';
3
2
  import { getKeyAliases } from '../key-alias-utils';
3
+ import type { ToolCliOption, ToolDefinition } from '../mcp/types';
4
4
  import { CLIError } from './cli-error';
5
5
 
6
6
  export function parseValue(raw: string): unknown {
@@ -2,14 +2,14 @@ import { existsSync, writeFileSync } from 'node:fs';
2
2
  import { tmpdir } from 'node:os';
3
3
  import { join } from 'node:path';
4
4
  import dotenv from 'dotenv';
5
- import type { BaseMidsceneTools } from '../agent-tools/base-tools';
6
- import { stripBehaviorFlags } from '../agent-tools/tool-defaults';
5
+ import { getDebug } from '../logger';
6
+ import type { BaseMidsceneTools } from '../mcp/base-tools';
7
+ import { stripBehaviorFlags } from '../mcp/tool-defaults';
7
8
  import type {
8
9
  ToolDefinition,
9
10
  ToolResult,
10
11
  ToolResultContent,
11
- } from '../agent-tools/types';
12
- import { getDebug } from '../logger';
12
+ } from '../mcp/types';
13
13
  import {
14
14
  canonicalizeCliArgKeys,
15
15
  formatCliValidationError,
package/src/env/types.ts CHANGED
@@ -8,11 +8,10 @@ export const MIDSCENE_DEBUG_MODEL_RESPONSE = 'MIDSCENE_DEBUG_MODEL_RESPONSE';
8
8
  export const MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG =
9
9
  'MIDSCENE_DANGEROUSLY_PRINT_ALL_CONFIG';
10
10
  export const MIDSCENE_DEBUG_MODE = 'MIDSCENE_DEBUG_MODE';
11
- export const MIDSCENE_CHROME_PATH = 'MIDSCENE_CHROME_PATH';
12
- /**
13
- * @deprecated Use MIDSCENE_CHROME_PATH instead. This is kept for backward compatibility.
14
- */
11
+ export const MIDSCENE_MCP_USE_PUPPETEER_MODE =
12
+ 'MIDSCENE_MCP_USE_PUPPETEER_MODE';
15
13
  export const MIDSCENE_MCP_CHROME_PATH = 'MIDSCENE_MCP_CHROME_PATH';
14
+ export const MIDSCENE_MCP_ANDROID_MODE = 'MIDSCENE_MCP_ANDROID_MODE';
16
15
  export const DOCKER_CONTAINER = 'DOCKER_CONTAINER';
17
16
 
18
17
  // Observability
@@ -167,6 +166,8 @@ export const BASIC_ENV_KEYS = [
167
166
 
168
167
  export const BOOLEAN_ENV_KEYS = [
169
168
  MIDSCENE_CACHE,
169
+ MIDSCENE_MCP_USE_PUPPETEER_MODE,
170
+ MIDSCENE_MCP_ANDROID_MODE,
170
171
  MIDSCENE_LANGSMITH_DEBUG,
171
172
  MIDSCENE_LANGFUSE_DEBUG,
172
173
  MIDSCENE_REPORT_QUIET,
@@ -187,7 +188,6 @@ export const STRING_ENV_KEYS = [
187
188
  MIDSCENE_REPORT_TAG_NAME,
188
189
  MIDSCENE_PREFERRED_LANGUAGE,
189
190
  MATCH_BY_POSITION,
190
- MIDSCENE_CHROME_PATH,
191
191
  MIDSCENE_MCP_CHROME_PATH,
192
192
  DOCKER_CONTAINER,
193
193
  ] as const;
@@ -1,7 +1,7 @@
1
1
  /**
2
- * Internal-only helpers for CLI argument key aliasing.
2
+ * Internal-only helpers for CLI/MCP argument key aliasing.
3
3
  * Not re-exported from the package entry point — keep consumers within
4
- * `cli/`.
4
+ * `cli/` and `mcp/`.
5
5
  */
6
6
 
7
7
  export function kebabToCamel(str: string): string {