@midscene/shared 0.30.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/dist/es/build/rspack-config.mjs +4 -0
  2. package/dist/es/constants/example-code.mjs +4 -4
  3. package/dist/es/env/constants.mjs +27 -82
  4. package/dist/es/env/global-config-manager.mjs +2 -3
  5. package/dist/es/env/helper.mjs +12 -17
  6. package/dist/es/env/init-debug.mjs +6 -6
  7. package/dist/es/env/model-config-manager.mjs +45 -65
  8. package/dist/es/env/parse-model-config.mjs +112 -0
  9. package/dist/es/env/types.mjs +70 -162
  10. package/dist/es/extractor/dom-util.mjs +10 -18
  11. package/dist/es/extractor/index.mjs +2 -3
  12. package/dist/es/extractor/locator.mjs +8 -15
  13. package/dist/es/extractor/tree.mjs +2 -5
  14. package/dist/es/extractor/util.mjs +4 -28
  15. package/dist/es/extractor/web-extractor.mjs +7 -14
  16. package/dist/es/index.mjs +2 -1
  17. package/dist/es/mcp/base-server.mjs +250 -0
  18. package/dist/es/mcp/base-tools.mjs +84 -0
  19. package/dist/es/mcp/index.mjs +5 -0
  20. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  21. package/dist/es/mcp/tool-generator.mjs +207 -0
  22. package/dist/es/mcp/types.mjs +3 -0
  23. package/dist/es/node/fs.mjs +2 -2
  24. package/dist/es/utils.mjs +2 -3
  25. package/dist/es/zod-schema-utils.mjs +54 -0
  26. package/dist/lib/baseDB.js +2 -2
  27. package/dist/lib/build/copy-static.js +4 -4
  28. package/dist/lib/build/rspack-config.js +38 -0
  29. package/dist/lib/common.js +4 -4
  30. package/dist/lib/constants/example-code.js +6 -6
  31. package/dist/lib/constants/index.js +13 -13
  32. package/dist/lib/env/basic.js +2 -2
  33. package/dist/lib/env/constants.js +32 -90
  34. package/dist/lib/env/global-config-manager.js +4 -5
  35. package/dist/lib/env/helper.js +13 -22
  36. package/dist/lib/env/index.js +24 -28
  37. package/dist/lib/env/init-debug.js +7 -7
  38. package/dist/lib/env/model-config-manager.js +47 -67
  39. package/dist/lib/env/parse-model-config.js +155 -0
  40. package/dist/lib/env/types.js +146 -379
  41. package/dist/lib/env/utils.js +4 -4
  42. package/dist/lib/extractor/constants.js +4 -4
  43. package/dist/lib/extractor/debug.js +1 -1
  44. package/dist/lib/extractor/dom-util.js +18 -26
  45. package/dist/lib/extractor/index.js +11 -21
  46. package/dist/lib/extractor/locator.js +10 -20
  47. package/dist/lib/extractor/tree.js +4 -7
  48. package/dist/lib/extractor/util.js +17 -50
  49. package/dist/lib/extractor/web-extractor.js +12 -19
  50. package/dist/lib/img/box-select.js +4 -4
  51. package/dist/lib/img/draw-box.js +2 -2
  52. package/dist/lib/img/get-jimp.js +16 -34
  53. package/dist/lib/img/get-photon.js +24 -47
  54. package/dist/lib/img/get-sharp.js +16 -34
  55. package/dist/lib/img/index.js +18 -18
  56. package/dist/lib/img/info.js +4 -4
  57. package/dist/lib/img/transform.js +10 -10
  58. package/dist/lib/index.js +8 -4
  59. package/dist/lib/logger.js +4 -4
  60. package/dist/lib/mcp/base-server.js +300 -0
  61. package/dist/lib/mcp/base-tools.js +118 -0
  62. package/dist/lib/mcp/index.js +86 -0
  63. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  64. package/dist/lib/mcp/tool-generator.js +244 -0
  65. package/dist/lib/mcp/types.js +40 -0
  66. package/dist/lib/node/fs.js +6 -6
  67. package/dist/lib/node/index.js +6 -8
  68. package/dist/lib/polyfills/async-hooks.js +2 -2
  69. package/dist/lib/polyfills/index.js +6 -8
  70. package/dist/lib/types/index.js +2 -2
  71. package/dist/lib/us-keyboard-layout.js +2 -2
  72. package/dist/lib/utils.js +13 -14
  73. package/dist/lib/zod-schema-utils.js +97 -0
  74. package/dist/types/build/rspack-config.d.ts +8 -0
  75. package/dist/types/constants/example-code.d.ts +1 -1
  76. package/dist/types/env/constants.d.ts +5 -18
  77. package/dist/types/env/global-config-manager.d.ts +1 -2
  78. package/dist/types/env/helper.d.ts +2 -4
  79. package/dist/types/env/model-config-manager.d.ts +8 -7
  80. package/dist/types/env/parse-model-config.d.ts +28 -0
  81. package/dist/types/env/types.d.ts +152 -191
  82. package/dist/types/extractor/dom-util.d.ts +2 -15
  83. package/dist/types/extractor/index.d.ts +1 -2
  84. package/dist/types/extractor/locator.d.ts +0 -1
  85. package/dist/types/extractor/tree.d.ts +1 -4
  86. package/dist/types/extractor/util.d.ts +0 -3
  87. package/dist/types/index.d.ts +1 -0
  88. package/dist/types/mcp/base-server.d.ts +77 -0
  89. package/dist/types/mcp/base-tools.d.ts +55 -0
  90. package/dist/types/mcp/index.d.ts +5 -0
  91. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  92. package/dist/types/mcp/tool-generator.d.ts +11 -0
  93. package/dist/types/mcp/types.d.ts +100 -0
  94. package/dist/types/types/index.d.ts +5 -2
  95. package/dist/types/zod-schema-utils.d.ts +23 -0
  96. package/package.json +19 -4
  97. package/src/build/rspack-config.ts +12 -0
  98. package/src/constants/example-code.ts +4 -4
  99. package/src/env/constants.ts +58 -203
  100. package/src/env/global-config-manager.ts +7 -7
  101. package/src/env/helper.ts +10 -31
  102. package/src/env/init-debug.ts +11 -6
  103. package/src/env/model-config-manager.ts +91 -87
  104. package/src/env/parse-model-config.ts +265 -0
  105. package/src/env/types.ts +212 -344
  106. package/src/extractor/dom-util.ts +15 -12
  107. package/src/extractor/index.ts +0 -3
  108. package/src/extractor/locator.ts +3 -12
  109. package/src/extractor/tree.ts +4 -4
  110. package/src/extractor/util.ts +0 -32
  111. package/src/index.ts +2 -0
  112. package/src/mcp/base-server.ts +435 -0
  113. package/src/mcp/base-tools.ts +196 -0
  114. package/src/mcp/index.ts +5 -0
  115. package/src/mcp/inject-report-html-plugin.ts +119 -0
  116. package/src/mcp/tool-generator.ts +330 -0
  117. package/src/mcp/types.ts +108 -0
  118. package/src/node/fs.ts +1 -1
  119. package/src/types/index.ts +8 -2
  120. package/src/utils.ts +1 -1
  121. package/src/zod-schema-utils.ts +133 -0
  122. package/dist/es/env/decide-model-config.mjs +0 -172
  123. package/dist/es/env/parse.mjs +0 -69
  124. package/dist/lib/env/decide-model-config.js +0 -212
  125. package/dist/lib/env/parse.js +0 -106
  126. package/dist/types/env/decide-model-config.d.ts +0 -14
  127. package/dist/types/env/parse.d.ts +0 -12
  128. package/src/env/decide-model-config.ts +0 -319
  129. package/src/env/parse.ts +0 -131
@@ -0,0 +1,196 @@
1
+ import { parseBase64 } from '@midscene/shared/img';
2
+ import { getDebug } from '@midscene/shared/logger';
3
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
4
+ import {
5
+ generateCommonTools,
6
+ generateToolsFromActionSpace,
7
+ } from './tool-generator';
8
+ import type {
9
+ ActionSpaceItem,
10
+ BaseAgent,
11
+ BaseDevice,
12
+ IMidsceneTools,
13
+ ToolDefinition,
14
+ } from './types';
15
+
16
+ const debug = getDebug('mcp:base-tools');
17
+
18
+ /**
19
+ * Base class for platform-specific MCP tools
20
+ * Generic type TAgent allows subclasses to use their specific agent types
21
+ */
22
+ export abstract class BaseMidsceneTools<TAgent extends BaseAgent = BaseAgent>
23
+ implements IMidsceneTools
24
+ {
25
+ protected mcpServer?: McpServer;
26
+ protected agent?: TAgent;
27
+ protected toolDefinitions: ToolDefinition[] = [];
28
+
29
+ /**
30
+ * Ensure agent is initialized and ready for use.
31
+ * Must be implemented by subclasses to create platform-specific agent.
32
+ * @param initParam Optional initialization parameter (platform-specific, e.g., URL, device ID)
33
+ * @returns Promise resolving to initialized agent instance
34
+ * @throws Error if agent initialization fails
35
+ */
36
+ protected abstract ensureAgent(initParam?: string): Promise<TAgent>;
37
+
38
+ /**
39
+ * Optional: prepare platform-specific tools (e.g., device connection)
40
+ */
41
+ protected preparePlatformTools(): ToolDefinition[] {
42
+ return [];
43
+ }
44
+
45
+ /**
46
+ * Must be implemented by subclasses to create a temporary device instance
47
+ * This allows getting real actionSpace without connecting to device
48
+ */
49
+ protected abstract createTemporaryDevice(): BaseDevice;
50
+
51
+ /**
52
+ * Initialize all tools by querying actionSpace
53
+ * Uses two-layer fallback strategy:
54
+ * 1. Try to get actionSpace from connected agent (if available)
55
+ * 2. Create temporary device instance to read actionSpace (always succeeds)
56
+ */
57
+ public async initTools(): Promise<void> {
58
+ this.toolDefinitions = [];
59
+
60
+ // 1. Add platform-specific tools first (device connection, etc.)
61
+ // These don't require an agent and should always be available
62
+ const platformTools = this.preparePlatformTools();
63
+ this.toolDefinitions.push(...platformTools);
64
+
65
+ // 2. Try to get agent and its action space (two-layer fallback)
66
+ let actionSpace: ActionSpaceItem[];
67
+ try {
68
+ // Layer 1: Try to use connected agent
69
+ const agent = await this.ensureAgent();
70
+ actionSpace = await agent.getActionSpace();
71
+ debug(
72
+ 'Action space from connected agent:',
73
+ actionSpace.map((a) => a.name).join(', '),
74
+ );
75
+ } catch (error) {
76
+ // Layer 2: Create temporary device instance to read actionSpace
77
+ // This is expected behavior for bridge mode without URL or unconnected devices
78
+ const errorMessage =
79
+ error instanceof Error ? error.message : String(error);
80
+ if (
81
+ errorMessage.includes('requires a URL') ||
82
+ errorMessage.includes('web_connect')
83
+ ) {
84
+ debug(
85
+ 'Bridge mode detected - agent will be initialized on first web_connect call',
86
+ );
87
+ } else {
88
+ debug(
89
+ 'Agent not available yet, using temporary device for action space',
90
+ );
91
+ }
92
+ const tempDevice = this.createTemporaryDevice();
93
+ actionSpace = tempDevice.actionSpace();
94
+ debug(
95
+ 'Action space from temporary device:',
96
+ actionSpace.map((a) => a.name).join(', '),
97
+ );
98
+
99
+ // Destroy temporary instance using optional chaining
100
+ await tempDevice.destroy?.();
101
+ }
102
+
103
+ // 3. Generate tools from action space (core innovation)
104
+ const actionTools = generateToolsFromActionSpace(actionSpace, () =>
105
+ this.ensureAgent(),
106
+ );
107
+
108
+ // 4. Add common tools (screenshot, waitFor)
109
+ const commonTools = generateCommonTools(() => this.ensureAgent());
110
+
111
+ this.toolDefinitions.push(...actionTools, ...commonTools);
112
+
113
+ debug('Total tools prepared:', this.toolDefinitions.length);
114
+ }
115
+
116
+ /**
117
+ * Attach to MCP server and register all tools
118
+ */
119
+ public attachToServer(server: McpServer): void {
120
+ this.mcpServer = server;
121
+
122
+ if (this.toolDefinitions.length === 0) {
123
+ debug('Warning: No tools to register. Tools may be initialized lazily.');
124
+ }
125
+
126
+ for (const toolDef of this.toolDefinitions) {
127
+ if (toolDef.autoDestroy) {
128
+ this.toolWithAutoDestroy(
129
+ toolDef.name,
130
+ toolDef.description,
131
+ toolDef.schema,
132
+ toolDef.handler,
133
+ );
134
+ } else {
135
+ this.mcpServer.tool(
136
+ toolDef.name,
137
+ toolDef.description,
138
+ toolDef.schema,
139
+ toolDef.handler,
140
+ );
141
+ }
142
+ }
143
+
144
+ debug('Registered', this.toolDefinitions.length, 'tools');
145
+ }
146
+
147
+ /**
148
+ * Wrapper for auto-destroy behavior
149
+ */
150
+ private toolWithAutoDestroy(
151
+ name: string,
152
+ description: string,
153
+ schema: any,
154
+ handler: (...args: any[]) => Promise<any>,
155
+ ): void {
156
+ if (!this.mcpServer) {
157
+ throw new Error('MCP server not attached');
158
+ }
159
+
160
+ this.mcpServer.tool(name, description, schema, async (...args: any[]) => {
161
+ try {
162
+ return await handler(...args);
163
+ } finally {
164
+ if (!process.env.MIDSCENE_MCP_DISABLE_AGENT_AUTO_DESTROY) {
165
+ try {
166
+ await this.agent?.destroy?.();
167
+ } catch (error) {
168
+ debug('Failed to destroy agent during cleanup:', error);
169
+ }
170
+ this.agent = undefined;
171
+ }
172
+ }
173
+ });
174
+ }
175
+
176
+ /**
177
+ * Cleanup method - destroy agent and release resources
178
+ */
179
+ public async closeBrowser(): Promise<void> {
180
+ await this.agent?.destroy?.();
181
+ }
182
+
183
+ /**
184
+ * Helper: Convert base64 screenshot to image content array
185
+ */
186
+ protected buildScreenshotContent(screenshot: string) {
187
+ const { mimeType, body } = parseBase64(screenshot);
188
+ return [
189
+ {
190
+ type: 'image' as const,
191
+ data: body,
192
+ mimeType,
193
+ },
194
+ ];
195
+ }
196
+ }
@@ -0,0 +1,5 @@
1
+ export * from './base-server';
2
+ export * from './base-tools';
3
+ export * from './tool-generator';
4
+ export * from './types';
5
+ export * from './inject-report-html-plugin';
@@ -0,0 +1,119 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+
4
+ const MAGIC_STRING = 'REPLACE_ME_WITH_REPORT_HTML';
5
+ const REPLACED_MARK = '/*REPORT_HTML_REPLACED*/';
6
+ const REG_EXP_FOR_REPLACE = /\/\*REPORT_HTML_REPLACED\*\/.*/;
7
+
8
+ interface RslibPluginApi {
9
+ onAfterBuild: (callback: () => void) => void;
10
+ }
11
+
12
+ /**
13
+ * Rslib plugin to inject report HTML from @midscene/core dist into MCP bundle.
14
+ * This runs after build and reads the already-injected HTML from core.
15
+ *
16
+ * Prerequisites:
17
+ * - @midscene/report must be in devDependencies to ensure correct build order
18
+ * - @midscene/core dist must exist with injected HTML
19
+ *
20
+ * @param packageDir - The directory of the MCP package (use __dirname)
21
+ */
22
+ export function injectReportHtmlFromCore(packageDir: string) {
23
+ return {
24
+ name: 'inject-report-html-from-core',
25
+ setup(api: RslibPluginApi) {
26
+ api.onAfterBuild(() => {
27
+ const coreUtilsPath = path.resolve(
28
+ packageDir,
29
+ '..',
30
+ 'core',
31
+ 'dist',
32
+ 'lib',
33
+ 'utils.js',
34
+ );
35
+
36
+ if (!fs.existsSync(coreUtilsPath)) {
37
+ console.warn(
38
+ '[inject-report-html] @midscene/core dist not found, skipping',
39
+ );
40
+ return;
41
+ }
42
+
43
+ const coreContent = fs.readFileSync(coreUtilsPath, 'utf-8');
44
+ if (!coreContent.includes(REPLACED_MARK)) {
45
+ console.warn(
46
+ '[inject-report-html] HTML not found in core dist. Ensure report builds first.',
47
+ );
48
+ return;
49
+ }
50
+
51
+ // Extract the JSON string after the marker
52
+ // JSON strings can contain escaped quotes, so we need to properly parse it
53
+ const markerIndex = coreContent.indexOf(REPLACED_MARK);
54
+ const jsonStart = markerIndex + REPLACED_MARK.length;
55
+
56
+ // Find the end of the JSON string by tracking quote escaping
57
+ let jsonEnd = jsonStart;
58
+ if (coreContent[jsonStart] === '"') {
59
+ jsonEnd = jsonStart + 1;
60
+ while (jsonEnd < coreContent.length) {
61
+ if (coreContent[jsonEnd] === '\\') {
62
+ jsonEnd += 2; // Skip escaped character
63
+ } else if (coreContent[jsonEnd] === '"') {
64
+ jsonEnd += 1; // Include closing quote
65
+ break;
66
+ } else {
67
+ jsonEnd += 1;
68
+ }
69
+ }
70
+ }
71
+
72
+ const jsonString = coreContent.slice(jsonStart, jsonEnd);
73
+ if (!jsonString || jsonString.length < 10) {
74
+ console.warn('[inject-report-html] Failed to extract HTML from core');
75
+ return;
76
+ }
77
+
78
+ const finalContent = `${REPLACED_MARK}${jsonString}`;
79
+ const distDir = path.join(packageDir, 'dist');
80
+
81
+ if (!fs.existsSync(distDir)) return;
82
+
83
+ const jsFiles = fs
84
+ .readdirSync(distDir)
85
+ .filter((f) => f.endsWith('.js'));
86
+ let injectedCount = 0;
87
+
88
+ for (const file of jsFiles) {
89
+ const filePath = path.join(distDir, file);
90
+ const content = fs.readFileSync(filePath, 'utf-8');
91
+
92
+ if (content.includes(REPLACED_MARK)) {
93
+ if (REG_EXP_FOR_REPLACE.test(content)) {
94
+ fs.writeFileSync(
95
+ filePath,
96
+ content.replace(REG_EXP_FOR_REPLACE, () => finalContent),
97
+ );
98
+ console.log(`[inject-report-html] Updated: ${file}`);
99
+ injectedCount++;
100
+ }
101
+ } else if (content.includes(`'${MAGIC_STRING}'`)) {
102
+ fs.writeFileSync(
103
+ filePath,
104
+ content.replace(`'${MAGIC_STRING}'`, () => finalContent),
105
+ );
106
+ console.log(`[inject-report-html] Injected: ${file}`);
107
+ injectedCount++;
108
+ }
109
+ }
110
+
111
+ if (injectedCount > 0) {
112
+ console.log(
113
+ `[inject-report-html] Completed: ${injectedCount} file(s)`,
114
+ );
115
+ }
116
+ });
117
+ },
118
+ };
119
+ }
@@ -0,0 +1,330 @@
1
+ import { parseBase64 } from '@midscene/shared/img';
2
+ import { z } from 'zod';
3
+ import { getZodDescription, getZodTypeName } from '../zod-schema-utils';
4
+ import type {
5
+ ActionSpaceItem,
6
+ BaseAgent,
7
+ ToolDefinition,
8
+ ToolResult,
9
+ } from './types';
10
+
11
+ /**
12
+ * Extract error message from unknown error type
13
+ */
14
+ function getErrorMessage(error: unknown): string {
15
+ return error instanceof Error ? error.message : String(error);
16
+ }
17
+
18
+ /**
19
+ * Generate MCP tool description from ActionSpaceItem
20
+ * Format: "actionName action, description. Parameters: param1 (type) - desc; param2 (type) - desc"
21
+ */
22
+ function describeActionForMCP(action: ActionSpaceItem): string {
23
+ const actionDesc = action.description || `Execute ${action.name} action`;
24
+
25
+ if (!action.paramSchema) {
26
+ return `${action.name} action, ${actionDesc}`;
27
+ }
28
+
29
+ const schema = action.paramSchema as {
30
+ _def?: { typeName?: string };
31
+ shape?: Record<string, unknown>;
32
+ };
33
+ const isZodObjectType = schema._def?.typeName === 'ZodObject';
34
+
35
+ if (!isZodObjectType || !schema.shape) {
36
+ // Simple type schema
37
+ const typeName = getZodTypeName(schema);
38
+ const description = getZodDescription(schema as z.ZodTypeAny);
39
+ const paramDesc = description ? `${typeName} - ${description}` : typeName;
40
+ return `${action.name} action, ${actionDesc}. Parameter: ${paramDesc}`;
41
+ }
42
+
43
+ // Object schema with multiple fields
44
+ const paramDescriptions: string[] = [];
45
+ for (const [key, field] of Object.entries(schema.shape)) {
46
+ if (field && typeof field === 'object') {
47
+ const isFieldOptional =
48
+ typeof (field as { isOptional?: () => boolean }).isOptional ===
49
+ 'function' && (field as { isOptional: () => boolean }).isOptional();
50
+ const typeName = getZodTypeName(field);
51
+ const description = getZodDescription(field as z.ZodTypeAny);
52
+
53
+ let paramStr = `${key}${isFieldOptional ? '?' : ''} (${typeName})`;
54
+ if (description) {
55
+ paramStr += ` - ${description}`;
56
+ }
57
+ paramDescriptions.push(paramStr);
58
+ }
59
+ }
60
+
61
+ if (paramDescriptions.length === 0) {
62
+ return `${action.name} action, ${actionDesc}`;
63
+ }
64
+
65
+ return `${action.name} action, ${actionDesc}. Parameters: ${paramDescriptions.join('; ')}`;
66
+ }
67
+
68
+ /**
69
+ * Type guard: check if a Zod type is ZodOptional
70
+ */
71
+ function isZodOptional(
72
+ value: z.ZodTypeAny,
73
+ ): value is z.ZodOptional<z.ZodTypeAny> {
74
+ return '_def' in value && value._def?.typeName === 'ZodOptional';
75
+ }
76
+
77
+ /**
78
+ * Type guard: check if a Zod type is ZodObject
79
+ */
80
+ function isZodObject(value: z.ZodTypeAny): value is z.ZodObject<z.ZodRawShape> {
81
+ return (
82
+ '_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value
83
+ );
84
+ }
85
+
86
+ /**
87
+ * Unwrap ZodOptional to get inner type
88
+ */
89
+ function unwrapOptional(value: z.ZodTypeAny): {
90
+ innerValue: z.ZodTypeAny;
91
+ isOptional: boolean;
92
+ } {
93
+ if (isZodOptional(value)) {
94
+ return { innerValue: value._def.innerType, isOptional: true };
95
+ }
96
+ return { innerValue: value, isOptional: false };
97
+ }
98
+
99
+ /**
100
+ * Check if a Zod object schema contains a 'prompt' field (locate field pattern)
101
+ */
102
+ function isLocateField(value: z.ZodTypeAny): boolean {
103
+ if (!isZodObject(value)) {
104
+ return false;
105
+ }
106
+ return 'prompt' in value.shape;
107
+ }
108
+
109
+ /**
110
+ * Transform a locate field schema to make its 'prompt' field optional
111
+ */
112
+ function makePromptOptional(
113
+ value: z.ZodObject<z.ZodRawShape>,
114
+ wrapInOptional: boolean,
115
+ ): z.ZodTypeAny {
116
+ const newShape = { ...value.shape };
117
+ newShape.prompt = value.shape.prompt.optional();
118
+
119
+ let newSchema: z.ZodTypeAny = z.object(newShape).passthrough();
120
+ if (wrapInOptional) {
121
+ newSchema = newSchema.optional();
122
+ }
123
+ return newSchema;
124
+ }
125
+
126
+ /**
127
+ * Transform schema field to make locate.prompt optional if applicable
128
+ */
129
+ function transformSchemaField(
130
+ key: string,
131
+ value: z.ZodTypeAny,
132
+ ): [string, z.ZodTypeAny] {
133
+ const { innerValue, isOptional } = unwrapOptional(value);
134
+
135
+ if (isZodObject(innerValue) && isLocateField(innerValue)) {
136
+ return [key, makePromptOptional(innerValue, isOptional)];
137
+ }
138
+ return [key, value];
139
+ }
140
+
141
+ /**
142
+ * Extract and transform schema from action's paramSchema
143
+ */
144
+ function extractActionSchema(
145
+ paramSchema: z.ZodTypeAny | undefined,
146
+ ): Record<string, z.ZodTypeAny> {
147
+ if (!paramSchema) {
148
+ return {};
149
+ }
150
+
151
+ const schema = paramSchema as z.ZodTypeAny;
152
+ if (!isZodObject(schema)) {
153
+ return schema as unknown as Record<string, z.ZodTypeAny>;
154
+ }
155
+
156
+ return Object.fromEntries(
157
+ Object.entries(schema.shape).map(([key, value]) =>
158
+ transformSchemaField(key, value as z.ZodTypeAny),
159
+ ),
160
+ );
161
+ }
162
+
163
+ /**
164
+ * Serialize args to human-readable description for AI action
165
+ */
166
+ function serializeArgsToDescription(args: Record<string, unknown>): string {
167
+ try {
168
+ return Object.entries(args)
169
+ .map(([key, value]) => {
170
+ if (typeof value === 'object' && value !== null) {
171
+ try {
172
+ return `${key}: ${JSON.stringify(value)}`;
173
+ } catch {
174
+ // Circular reference or non-serializable object
175
+ return `${key}: [object]`;
176
+ }
177
+ }
178
+ return `${key}: "${value}"`;
179
+ })
180
+ .join(', ');
181
+ } catch (error: unknown) {
182
+ const errorMessage = getErrorMessage(error);
183
+ console.error('Error serializing args:', errorMessage);
184
+ return `[args serialization failed: ${errorMessage}]`;
185
+ }
186
+ }
187
+
188
+ /**
189
+ * Build action instruction string from action name and args
190
+ */
191
+ function buildActionInstruction(
192
+ actionName: string,
193
+ args: Record<string, unknown>,
194
+ ): string {
195
+ const argsDescription = serializeArgsToDescription(args);
196
+ return argsDescription
197
+ ? `Use the action "${actionName}" with ${argsDescription}`
198
+ : `Use the action "${actionName}"`;
199
+ }
200
+
201
+ /**
202
+ * Capture screenshot and return as tool result
203
+ */
204
+ async function captureScreenshotResult(
205
+ agent: BaseAgent,
206
+ actionName: string,
207
+ ): Promise<ToolResult> {
208
+ try {
209
+ const screenshot = await agent.page?.screenshotBase64();
210
+ if (!screenshot) {
211
+ return {
212
+ content: [{ type: 'text', text: `Action "${actionName}" completed.` }],
213
+ };
214
+ }
215
+
216
+ const { mimeType, body } = parseBase64(screenshot);
217
+ return {
218
+ content: [
219
+ { type: 'text', text: `Action "${actionName}" completed.` },
220
+ { type: 'image', data: body, mimeType },
221
+ ],
222
+ };
223
+ } catch (error: unknown) {
224
+ const errorMessage = getErrorMessage(error);
225
+ console.error('Error capturing screenshot:', errorMessage);
226
+ return {
227
+ content: [
228
+ {
229
+ type: 'text',
230
+ text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`,
231
+ },
232
+ ],
233
+ };
234
+ }
235
+ }
236
+
237
+ /**
238
+ * Create error result for tool handler
239
+ */
240
+ function createErrorResult(message: string): ToolResult {
241
+ return {
242
+ content: [{ type: 'text', text: message }],
243
+ isError: true,
244
+ };
245
+ }
246
+
247
+ /**
248
+ * Converts DeviceAction from actionSpace into MCP ToolDefinition
249
+ * This is the core logic that removes need for hardcoded tool definitions
250
+ */
251
+ export function generateToolsFromActionSpace(
252
+ actionSpace: ActionSpaceItem[],
253
+ getAgent: () => Promise<BaseAgent>,
254
+ ): ToolDefinition[] {
255
+ return actionSpace.map((action) => {
256
+ const schema = extractActionSchema(action.paramSchema as z.ZodTypeAny);
257
+
258
+ return {
259
+ name: action.name,
260
+ description: describeActionForMCP(action),
261
+ schema,
262
+ handler: async (args: Record<string, unknown>) => {
263
+ try {
264
+ const agent = await getAgent();
265
+
266
+ if (agent.aiAction) {
267
+ const instruction = buildActionInstruction(action.name, args);
268
+ try {
269
+ await agent.aiAction(instruction);
270
+ } catch (error: unknown) {
271
+ const errorMessage = getErrorMessage(error);
272
+ console.error(
273
+ `Error executing action "${action.name}":`,
274
+ errorMessage,
275
+ );
276
+ return createErrorResult(
277
+ `Failed to execute action "${action.name}": ${errorMessage}`,
278
+ );
279
+ }
280
+ }
281
+
282
+ return await captureScreenshotResult(agent, action.name);
283
+ } catch (error: unknown) {
284
+ const errorMessage = getErrorMessage(error);
285
+ console.error(`Error in handler for "${action.name}":`, errorMessage);
286
+ return createErrorResult(
287
+ `Failed to get agent or execute action "${action.name}": ${errorMessage}`,
288
+ );
289
+ }
290
+ },
291
+ autoDestroy: true,
292
+ };
293
+ });
294
+ }
295
+
296
+ /**
297
+ * Generate common tools (screenshot, waitFor)
298
+ * SIMPLIFIED: Only keep essential helper tools, removed assert
299
+ */
300
+ export function generateCommonTools(
301
+ getAgent: () => Promise<BaseAgent>,
302
+ ): ToolDefinition[] {
303
+ return [
304
+ {
305
+ name: 'take_screenshot',
306
+ description: 'Capture screenshot of current page/screen',
307
+ schema: {},
308
+ handler: async (): Promise<ToolResult> => {
309
+ try {
310
+ const agent = await getAgent();
311
+ const screenshot = await agent.page?.screenshotBase64();
312
+ if (!screenshot) {
313
+ return createErrorResult('Screenshot not available');
314
+ }
315
+ const { mimeType, body } = parseBase64(screenshot);
316
+ return {
317
+ content: [{ type: 'image', data: body, mimeType }],
318
+ };
319
+ } catch (error: unknown) {
320
+ const errorMessage = getErrorMessage(error);
321
+ console.error('Error taking screenshot:', errorMessage);
322
+ return createErrorResult(
323
+ `Failed to capture screenshot: ${errorMessage}`,
324
+ );
325
+ }
326
+ },
327
+ autoDestroy: true,
328
+ },
329
+ ];
330
+ }