@midscene/shared 1.0.1-beta-20251208112226.0 → 1.0.1-beta-20251209024153.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/mcp/base-server.mjs +250 -0
- package/dist/es/mcp/base-tools.mjs +84 -0
- package/dist/es/mcp/index.mjs +5 -0
- package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
- package/dist/es/mcp/tool-generator.mjs +207 -0
- package/dist/es/mcp/types.mjs +3 -0
- package/dist/es/node/fs.mjs +1 -1
- package/dist/es/zod-schema-utils.mjs +54 -0
- package/dist/lib/baseDB.js +2 -2
- package/dist/lib/build/copy-static.js +2 -2
- package/dist/lib/build/rspack-config.js +2 -2
- package/dist/lib/common.js +2 -2
- package/dist/lib/constants/example-code.js +2 -2
- package/dist/lib/constants/index.js +2 -2
- package/dist/lib/env/basic.js +2 -2
- package/dist/lib/env/constants.js +2 -2
- package/dist/lib/env/global-config-manager.js +2 -2
- package/dist/lib/env/helper.js +2 -2
- package/dist/lib/env/index.js +6 -6
- package/dist/lib/env/init-debug.js +2 -2
- package/dist/lib/env/model-config-manager.js +2 -2
- package/dist/lib/env/parse-model-config.js +2 -2
- package/dist/lib/env/types.js +2 -2
- package/dist/lib/env/utils.js +2 -2
- package/dist/lib/extractor/constants.js +2 -2
- package/dist/lib/extractor/debug.js +1 -1
- package/dist/lib/extractor/dom-util.js +2 -2
- package/dist/lib/extractor/index.js +2 -2
- package/dist/lib/extractor/locator.js +2 -2
- package/dist/lib/extractor/tree.js +2 -2
- package/dist/lib/extractor/util.js +2 -2
- package/dist/lib/extractor/web-extractor.js +2 -2
- package/dist/lib/img/box-select.js +2 -2
- package/dist/lib/img/draw-box.js +2 -2
- package/dist/lib/img/get-jimp.js +2 -2
- package/dist/lib/img/get-photon.js +2 -2
- package/dist/lib/img/get-sharp.js +2 -2
- package/dist/lib/img/index.js +2 -2
- package/dist/lib/img/info.js +2 -2
- package/dist/lib/img/transform.js +2 -2
- package/dist/lib/index.js +2 -2
- package/dist/lib/logger.js +2 -2
- package/dist/lib/mcp/base-server.js +300 -0
- package/dist/lib/mcp/base-tools.js +118 -0
- package/dist/lib/mcp/index.js +86 -0
- package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
- package/dist/lib/mcp/tool-generator.js +244 -0
- package/dist/lib/mcp/types.js +40 -0
- package/dist/lib/node/fs.js +3 -3
- package/dist/lib/node/index.js +2 -2
- package/dist/lib/polyfills/async-hooks.js +2 -2
- package/dist/lib/polyfills/index.js +2 -2
- package/dist/lib/types/index.js +2 -2
- package/dist/lib/us-keyboard-layout.js +2 -2
- package/dist/lib/utils.js +2 -2
- package/dist/lib/zod-schema-utils.js +97 -0
- package/dist/types/mcp/base-server.d.ts +77 -0
- package/dist/types/mcp/base-tools.d.ts +55 -0
- package/dist/types/mcp/index.d.ts +5 -0
- package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
- package/dist/types/mcp/tool-generator.d.ts +11 -0
- package/dist/types/mcp/types.d.ts +100 -0
- package/dist/types/zod-schema-utils.d.ts +23 -0
- package/package.json +17 -3
- package/src/mcp/base-server.ts +435 -0
- package/src/mcp/base-tools.ts +196 -0
- package/src/mcp/index.ts +5 -0
- package/src/mcp/inject-report-html-plugin.ts +119 -0
- package/src/mcp/tool-generator.ts +330 -0
- package/src/mcp/types.ts +108 -0
- package/src/zod-schema-utils.ts +133 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
|
|
4
|
+
const MAGIC_STRING = 'REPLACE_ME_WITH_REPORT_HTML';
|
|
5
|
+
const REPLACED_MARK = '/*REPORT_HTML_REPLACED*/';
|
|
6
|
+
const REG_EXP_FOR_REPLACE = /\/\*REPORT_HTML_REPLACED\*\/.*/;
|
|
7
|
+
|
|
8
|
+
interface RslibPluginApi {
|
|
9
|
+
onAfterBuild: (callback: () => void) => void;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Rslib plugin to inject report HTML from @midscene/core dist into MCP bundle.
|
|
14
|
+
* This runs after build and reads the already-injected HTML from core.
|
|
15
|
+
*
|
|
16
|
+
* Prerequisites:
|
|
17
|
+
* - @midscene/report must be in devDependencies to ensure correct build order
|
|
18
|
+
* - @midscene/core dist must exist with injected HTML
|
|
19
|
+
*
|
|
20
|
+
* @param packageDir - The directory of the MCP package (use __dirname)
|
|
21
|
+
*/
|
|
22
|
+
export function injectReportHtmlFromCore(packageDir: string) {
|
|
23
|
+
return {
|
|
24
|
+
name: 'inject-report-html-from-core',
|
|
25
|
+
setup(api: RslibPluginApi) {
|
|
26
|
+
api.onAfterBuild(() => {
|
|
27
|
+
const coreUtilsPath = path.resolve(
|
|
28
|
+
packageDir,
|
|
29
|
+
'..',
|
|
30
|
+
'core',
|
|
31
|
+
'dist',
|
|
32
|
+
'lib',
|
|
33
|
+
'utils.js',
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
if (!fs.existsSync(coreUtilsPath)) {
|
|
37
|
+
console.warn(
|
|
38
|
+
'[inject-report-html] @midscene/core dist not found, skipping',
|
|
39
|
+
);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const coreContent = fs.readFileSync(coreUtilsPath, 'utf-8');
|
|
44
|
+
if (!coreContent.includes(REPLACED_MARK)) {
|
|
45
|
+
console.warn(
|
|
46
|
+
'[inject-report-html] HTML not found in core dist. Ensure report builds first.',
|
|
47
|
+
);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Extract the JSON string after the marker
|
|
52
|
+
// JSON strings can contain escaped quotes, so we need to properly parse it
|
|
53
|
+
const markerIndex = coreContent.indexOf(REPLACED_MARK);
|
|
54
|
+
const jsonStart = markerIndex + REPLACED_MARK.length;
|
|
55
|
+
|
|
56
|
+
// Find the end of the JSON string by tracking quote escaping
|
|
57
|
+
let jsonEnd = jsonStart;
|
|
58
|
+
if (coreContent[jsonStart] === '"') {
|
|
59
|
+
jsonEnd = jsonStart + 1;
|
|
60
|
+
while (jsonEnd < coreContent.length) {
|
|
61
|
+
if (coreContent[jsonEnd] === '\\') {
|
|
62
|
+
jsonEnd += 2; // Skip escaped character
|
|
63
|
+
} else if (coreContent[jsonEnd] === '"') {
|
|
64
|
+
jsonEnd += 1; // Include closing quote
|
|
65
|
+
break;
|
|
66
|
+
} else {
|
|
67
|
+
jsonEnd += 1;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const jsonString = coreContent.slice(jsonStart, jsonEnd);
|
|
73
|
+
if (!jsonString || jsonString.length < 10) {
|
|
74
|
+
console.warn('[inject-report-html] Failed to extract HTML from core');
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const finalContent = `${REPLACED_MARK}${jsonString}`;
|
|
79
|
+
const distDir = path.join(packageDir, 'dist');
|
|
80
|
+
|
|
81
|
+
if (!fs.existsSync(distDir)) return;
|
|
82
|
+
|
|
83
|
+
const jsFiles = fs
|
|
84
|
+
.readdirSync(distDir)
|
|
85
|
+
.filter((f) => f.endsWith('.js'));
|
|
86
|
+
let injectedCount = 0;
|
|
87
|
+
|
|
88
|
+
for (const file of jsFiles) {
|
|
89
|
+
const filePath = path.join(distDir, file);
|
|
90
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
91
|
+
|
|
92
|
+
if (content.includes(REPLACED_MARK)) {
|
|
93
|
+
if (REG_EXP_FOR_REPLACE.test(content)) {
|
|
94
|
+
fs.writeFileSync(
|
|
95
|
+
filePath,
|
|
96
|
+
content.replace(REG_EXP_FOR_REPLACE, () => finalContent),
|
|
97
|
+
);
|
|
98
|
+
console.log(`[inject-report-html] Updated: ${file}`);
|
|
99
|
+
injectedCount++;
|
|
100
|
+
}
|
|
101
|
+
} else if (content.includes(`'${MAGIC_STRING}'`)) {
|
|
102
|
+
fs.writeFileSync(
|
|
103
|
+
filePath,
|
|
104
|
+
content.replace(`'${MAGIC_STRING}'`, () => finalContent),
|
|
105
|
+
);
|
|
106
|
+
console.log(`[inject-report-html] Injected: ${file}`);
|
|
107
|
+
injectedCount++;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (injectedCount > 0) {
|
|
112
|
+
console.log(
|
|
113
|
+
`[inject-report-html] Completed: ${injectedCount} file(s)`,
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
},
|
|
118
|
+
};
|
|
119
|
+
}
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
import { parseBase64 } from '@midscene/shared/img';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { getZodDescription, getZodTypeName } from '../zod-schema-utils';
|
|
4
|
+
import type {
|
|
5
|
+
ActionSpaceItem,
|
|
6
|
+
BaseAgent,
|
|
7
|
+
ToolDefinition,
|
|
8
|
+
ToolResult,
|
|
9
|
+
} from './types';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Extract error message from unknown error type
|
|
13
|
+
*/
|
|
14
|
+
function getErrorMessage(error: unknown): string {
|
|
15
|
+
return error instanceof Error ? error.message : String(error);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Generate MCP tool description from ActionSpaceItem
|
|
20
|
+
* Format: "actionName action, description. Parameters: param1 (type) - desc; param2 (type) - desc"
|
|
21
|
+
*/
|
|
22
|
+
function describeActionForMCP(action: ActionSpaceItem): string {
|
|
23
|
+
const actionDesc = action.description || `Execute ${action.name} action`;
|
|
24
|
+
|
|
25
|
+
if (!action.paramSchema) {
|
|
26
|
+
return `${action.name} action, ${actionDesc}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const schema = action.paramSchema as {
|
|
30
|
+
_def?: { typeName?: string };
|
|
31
|
+
shape?: Record<string, unknown>;
|
|
32
|
+
};
|
|
33
|
+
const isZodObjectType = schema._def?.typeName === 'ZodObject';
|
|
34
|
+
|
|
35
|
+
if (!isZodObjectType || !schema.shape) {
|
|
36
|
+
// Simple type schema
|
|
37
|
+
const typeName = getZodTypeName(schema);
|
|
38
|
+
const description = getZodDescription(schema as z.ZodTypeAny);
|
|
39
|
+
const paramDesc = description ? `${typeName} - ${description}` : typeName;
|
|
40
|
+
return `${action.name} action, ${actionDesc}. Parameter: ${paramDesc}`;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Object schema with multiple fields
|
|
44
|
+
const paramDescriptions: string[] = [];
|
|
45
|
+
for (const [key, field] of Object.entries(schema.shape)) {
|
|
46
|
+
if (field && typeof field === 'object') {
|
|
47
|
+
const isFieldOptional =
|
|
48
|
+
typeof (field as { isOptional?: () => boolean }).isOptional ===
|
|
49
|
+
'function' && (field as { isOptional: () => boolean }).isOptional();
|
|
50
|
+
const typeName = getZodTypeName(field);
|
|
51
|
+
const description = getZodDescription(field as z.ZodTypeAny);
|
|
52
|
+
|
|
53
|
+
let paramStr = `${key}${isFieldOptional ? '?' : ''} (${typeName})`;
|
|
54
|
+
if (description) {
|
|
55
|
+
paramStr += ` - ${description}`;
|
|
56
|
+
}
|
|
57
|
+
paramDescriptions.push(paramStr);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
if (paramDescriptions.length === 0) {
|
|
62
|
+
return `${action.name} action, ${actionDesc}`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return `${action.name} action, ${actionDesc}. Parameters: ${paramDescriptions.join('; ')}`;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Type guard: check if a Zod type is ZodOptional
|
|
70
|
+
*/
|
|
71
|
+
function isZodOptional(
|
|
72
|
+
value: z.ZodTypeAny,
|
|
73
|
+
): value is z.ZodOptional<z.ZodTypeAny> {
|
|
74
|
+
return '_def' in value && value._def?.typeName === 'ZodOptional';
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Type guard: check if a Zod type is ZodObject
|
|
79
|
+
*/
|
|
80
|
+
function isZodObject(value: z.ZodTypeAny): value is z.ZodObject<z.ZodRawShape> {
|
|
81
|
+
return (
|
|
82
|
+
'_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Unwrap ZodOptional to get inner type
|
|
88
|
+
*/
|
|
89
|
+
function unwrapOptional(value: z.ZodTypeAny): {
|
|
90
|
+
innerValue: z.ZodTypeAny;
|
|
91
|
+
isOptional: boolean;
|
|
92
|
+
} {
|
|
93
|
+
if (isZodOptional(value)) {
|
|
94
|
+
return { innerValue: value._def.innerType, isOptional: true };
|
|
95
|
+
}
|
|
96
|
+
return { innerValue: value, isOptional: false };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Check if a Zod object schema contains a 'prompt' field (locate field pattern)
|
|
101
|
+
*/
|
|
102
|
+
function isLocateField(value: z.ZodTypeAny): boolean {
|
|
103
|
+
if (!isZodObject(value)) {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
return 'prompt' in value.shape;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Transform a locate field schema to make its 'prompt' field optional
|
|
111
|
+
*/
|
|
112
|
+
function makePromptOptional(
|
|
113
|
+
value: z.ZodObject<z.ZodRawShape>,
|
|
114
|
+
wrapInOptional: boolean,
|
|
115
|
+
): z.ZodTypeAny {
|
|
116
|
+
const newShape = { ...value.shape };
|
|
117
|
+
newShape.prompt = value.shape.prompt.optional();
|
|
118
|
+
|
|
119
|
+
let newSchema: z.ZodTypeAny = z.object(newShape).passthrough();
|
|
120
|
+
if (wrapInOptional) {
|
|
121
|
+
newSchema = newSchema.optional();
|
|
122
|
+
}
|
|
123
|
+
return newSchema;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Transform schema field to make locate.prompt optional if applicable
|
|
128
|
+
*/
|
|
129
|
+
function transformSchemaField(
|
|
130
|
+
key: string,
|
|
131
|
+
value: z.ZodTypeAny,
|
|
132
|
+
): [string, z.ZodTypeAny] {
|
|
133
|
+
const { innerValue, isOptional } = unwrapOptional(value);
|
|
134
|
+
|
|
135
|
+
if (isZodObject(innerValue) && isLocateField(innerValue)) {
|
|
136
|
+
return [key, makePromptOptional(innerValue, isOptional)];
|
|
137
|
+
}
|
|
138
|
+
return [key, value];
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Extract and transform schema from action's paramSchema
|
|
143
|
+
*/
|
|
144
|
+
function extractActionSchema(
|
|
145
|
+
paramSchema: z.ZodTypeAny | undefined,
|
|
146
|
+
): Record<string, z.ZodTypeAny> {
|
|
147
|
+
if (!paramSchema) {
|
|
148
|
+
return {};
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const schema = paramSchema as z.ZodTypeAny;
|
|
152
|
+
if (!isZodObject(schema)) {
|
|
153
|
+
return schema as unknown as Record<string, z.ZodTypeAny>;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return Object.fromEntries(
|
|
157
|
+
Object.entries(schema.shape).map(([key, value]) =>
|
|
158
|
+
transformSchemaField(key, value as z.ZodTypeAny),
|
|
159
|
+
),
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Serialize args to human-readable description for AI action
|
|
165
|
+
*/
|
|
166
|
+
function serializeArgsToDescription(args: Record<string, unknown>): string {
|
|
167
|
+
try {
|
|
168
|
+
return Object.entries(args)
|
|
169
|
+
.map(([key, value]) => {
|
|
170
|
+
if (typeof value === 'object' && value !== null) {
|
|
171
|
+
try {
|
|
172
|
+
return `${key}: ${JSON.stringify(value)}`;
|
|
173
|
+
} catch {
|
|
174
|
+
// Circular reference or non-serializable object
|
|
175
|
+
return `${key}: [object]`;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return `${key}: "${value}"`;
|
|
179
|
+
})
|
|
180
|
+
.join(', ');
|
|
181
|
+
} catch (error: unknown) {
|
|
182
|
+
const errorMessage = getErrorMessage(error);
|
|
183
|
+
console.error('Error serializing args:', errorMessage);
|
|
184
|
+
return `[args serialization failed: ${errorMessage}]`;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Build action instruction string from action name and args
|
|
190
|
+
*/
|
|
191
|
+
function buildActionInstruction(
|
|
192
|
+
actionName: string,
|
|
193
|
+
args: Record<string, unknown>,
|
|
194
|
+
): string {
|
|
195
|
+
const argsDescription = serializeArgsToDescription(args);
|
|
196
|
+
return argsDescription
|
|
197
|
+
? `Use the action "${actionName}" with ${argsDescription}`
|
|
198
|
+
: `Use the action "${actionName}"`;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Capture screenshot and return as tool result
|
|
203
|
+
*/
|
|
204
|
+
async function captureScreenshotResult(
|
|
205
|
+
agent: BaseAgent,
|
|
206
|
+
actionName: string,
|
|
207
|
+
): Promise<ToolResult> {
|
|
208
|
+
try {
|
|
209
|
+
const screenshot = await agent.page?.screenshotBase64();
|
|
210
|
+
if (!screenshot) {
|
|
211
|
+
return {
|
|
212
|
+
content: [{ type: 'text', text: `Action "${actionName}" completed.` }],
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const { mimeType, body } = parseBase64(screenshot);
|
|
217
|
+
return {
|
|
218
|
+
content: [
|
|
219
|
+
{ type: 'text', text: `Action "${actionName}" completed.` },
|
|
220
|
+
{ type: 'image', data: body, mimeType },
|
|
221
|
+
],
|
|
222
|
+
};
|
|
223
|
+
} catch (error: unknown) {
|
|
224
|
+
const errorMessage = getErrorMessage(error);
|
|
225
|
+
console.error('Error capturing screenshot:', errorMessage);
|
|
226
|
+
return {
|
|
227
|
+
content: [
|
|
228
|
+
{
|
|
229
|
+
type: 'text',
|
|
230
|
+
text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`,
|
|
231
|
+
},
|
|
232
|
+
],
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Create error result for tool handler
|
|
239
|
+
*/
|
|
240
|
+
function createErrorResult(message: string): ToolResult {
|
|
241
|
+
return {
|
|
242
|
+
content: [{ type: 'text', text: message }],
|
|
243
|
+
isError: true,
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Converts DeviceAction from actionSpace into MCP ToolDefinition
|
|
249
|
+
* This is the core logic that removes need for hardcoded tool definitions
|
|
250
|
+
*/
|
|
251
|
+
export function generateToolsFromActionSpace(
|
|
252
|
+
actionSpace: ActionSpaceItem[],
|
|
253
|
+
getAgent: () => Promise<BaseAgent>,
|
|
254
|
+
): ToolDefinition[] {
|
|
255
|
+
return actionSpace.map((action) => {
|
|
256
|
+
const schema = extractActionSchema(action.paramSchema as z.ZodTypeAny);
|
|
257
|
+
|
|
258
|
+
return {
|
|
259
|
+
name: action.name,
|
|
260
|
+
description: describeActionForMCP(action),
|
|
261
|
+
schema,
|
|
262
|
+
handler: async (args: Record<string, unknown>) => {
|
|
263
|
+
try {
|
|
264
|
+
const agent = await getAgent();
|
|
265
|
+
|
|
266
|
+
if (agent.aiAction) {
|
|
267
|
+
const instruction = buildActionInstruction(action.name, args);
|
|
268
|
+
try {
|
|
269
|
+
await agent.aiAction(instruction);
|
|
270
|
+
} catch (error: unknown) {
|
|
271
|
+
const errorMessage = getErrorMessage(error);
|
|
272
|
+
console.error(
|
|
273
|
+
`Error executing action "${action.name}":`,
|
|
274
|
+
errorMessage,
|
|
275
|
+
);
|
|
276
|
+
return createErrorResult(
|
|
277
|
+
`Failed to execute action "${action.name}": ${errorMessage}`,
|
|
278
|
+
);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return await captureScreenshotResult(agent, action.name);
|
|
283
|
+
} catch (error: unknown) {
|
|
284
|
+
const errorMessage = getErrorMessage(error);
|
|
285
|
+
console.error(`Error in handler for "${action.name}":`, errorMessage);
|
|
286
|
+
return createErrorResult(
|
|
287
|
+
`Failed to get agent or execute action "${action.name}": ${errorMessage}`,
|
|
288
|
+
);
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
autoDestroy: true,
|
|
292
|
+
};
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Generate common tools (screenshot, waitFor)
|
|
298
|
+
* SIMPLIFIED: Only keep essential helper tools, removed assert
|
|
299
|
+
*/
|
|
300
|
+
export function generateCommonTools(
|
|
301
|
+
getAgent: () => Promise<BaseAgent>,
|
|
302
|
+
): ToolDefinition[] {
|
|
303
|
+
return [
|
|
304
|
+
{
|
|
305
|
+
name: 'take_screenshot',
|
|
306
|
+
description: 'Capture screenshot of current page/screen',
|
|
307
|
+
schema: {},
|
|
308
|
+
handler: async (): Promise<ToolResult> => {
|
|
309
|
+
try {
|
|
310
|
+
const agent = await getAgent();
|
|
311
|
+
const screenshot = await agent.page?.screenshotBase64();
|
|
312
|
+
if (!screenshot) {
|
|
313
|
+
return createErrorResult('Screenshot not available');
|
|
314
|
+
}
|
|
315
|
+
const { mimeType, body } = parseBase64(screenshot);
|
|
316
|
+
return {
|
|
317
|
+
content: [{ type: 'image', data: body, mimeType }],
|
|
318
|
+
};
|
|
319
|
+
} catch (error: unknown) {
|
|
320
|
+
const errorMessage = getErrorMessage(error);
|
|
321
|
+
console.error('Error taking screenshot:', errorMessage);
|
|
322
|
+
return createErrorResult(
|
|
323
|
+
`Failed to capture screenshot: ${errorMessage}`,
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
autoDestroy: true,
|
|
328
|
+
},
|
|
329
|
+
];
|
|
330
|
+
}
|
package/src/mcp/types.ts
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import type { z } from 'zod';
|
|
3
|
+
|
|
4
|
+
// Avoid circular dependency: don't import from @midscene/core
|
|
5
|
+
// Instead, use generic types that will be provided by implementation
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Default timeout constants for app loading verification
|
|
9
|
+
*/
|
|
10
|
+
export const defaultAppLoadingTimeoutMs = 10000;
|
|
11
|
+
export const defaultAppLoadingCheckIntervalMs = 2000;
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Content item types for tool results (MCP compatible)
|
|
15
|
+
*/
|
|
16
|
+
export type ToolResultContent =
|
|
17
|
+
| { type: 'text'; text: string }
|
|
18
|
+
| { type: 'image'; data: string; mimeType: string }
|
|
19
|
+
| { type: 'audio'; data: string; mimeType: string }
|
|
20
|
+
| {
|
|
21
|
+
type: 'resource';
|
|
22
|
+
resource:
|
|
23
|
+
| { text: string; uri: string; mimeType?: string }
|
|
24
|
+
| { uri: string; blob: string; mimeType?: string };
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Result type for tool execution (MCP compatible)
|
|
29
|
+
*/
|
|
30
|
+
export interface ToolResult {
|
|
31
|
+
[x: string]: unknown;
|
|
32
|
+
content: ToolResultContent[];
|
|
33
|
+
isError?: boolean;
|
|
34
|
+
_meta?: Record<string, unknown>;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Tool handler function type
|
|
39
|
+
* Takes parsed arguments and returns a tool result
|
|
40
|
+
*/
|
|
41
|
+
export type ToolHandler<T = Record<string, unknown>> = (
|
|
42
|
+
args: T,
|
|
43
|
+
) => Promise<ToolResult>;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Tool schema type using Zod
|
|
47
|
+
*/
|
|
48
|
+
export type ToolSchema = Record<string, z.ZodTypeAny>;
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Tool definition for MCP server
|
|
52
|
+
*/
|
|
53
|
+
export interface ToolDefinition<T = Record<string, unknown>> {
|
|
54
|
+
name: string;
|
|
55
|
+
description: string;
|
|
56
|
+
schema: ToolSchema;
|
|
57
|
+
handler: ToolHandler<T>;
|
|
58
|
+
autoDestroy?: boolean;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Action space item definition
|
|
63
|
+
* Note: Intentionally no index signature to maintain compatibility with DeviceAction
|
|
64
|
+
*/
|
|
65
|
+
export interface ActionSpaceItem {
|
|
66
|
+
name: string;
|
|
67
|
+
description?: string;
|
|
68
|
+
args?: Record<string, unknown>;
|
|
69
|
+
paramSchema?: z.ZodTypeAny;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Base agent interface
|
|
74
|
+
* Represents a platform-specific agent (Android, iOS, Web)
|
|
75
|
+
* Note: Return types use `unknown` for compatibility with platform-specific implementations
|
|
76
|
+
*/
|
|
77
|
+
export interface BaseAgent {
|
|
78
|
+
getActionSpace(): Promise<ActionSpaceItem[]>;
|
|
79
|
+
destroy?(): Promise<void>;
|
|
80
|
+
page?: {
|
|
81
|
+
screenshotBase64(): Promise<string>;
|
|
82
|
+
};
|
|
83
|
+
aiAction?: (
|
|
84
|
+
description: string,
|
|
85
|
+
params?: Record<string, unknown>,
|
|
86
|
+
) => Promise<unknown>;
|
|
87
|
+
aiWaitFor?: (
|
|
88
|
+
assertion: string,
|
|
89
|
+
options: Record<string, unknown>,
|
|
90
|
+
) => Promise<unknown>;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Base device interface for temporary device instances
|
|
95
|
+
*/
|
|
96
|
+
export interface BaseDevice {
|
|
97
|
+
actionSpace(): ActionSpaceItem[];
|
|
98
|
+
destroy?(): Promise<void>;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Interface for platform-specific MCP tools manager
|
|
103
|
+
*/
|
|
104
|
+
export interface IMidsceneTools {
|
|
105
|
+
attachToServer(server: McpServer): void;
|
|
106
|
+
initTools(): Promise<void>;
|
|
107
|
+
closeBrowser?(): Promise<void>;
|
|
108
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Recursively unwrap optional, nullable, default, and effects wrapper types
|
|
5
|
+
* to get the actual inner Zod type
|
|
6
|
+
*/
|
|
7
|
+
export function unwrapZodField(field: unknown): unknown {
|
|
8
|
+
const f = field as {
|
|
9
|
+
_def?: { typeName?: string; innerType?: unknown; schema?: unknown };
|
|
10
|
+
};
|
|
11
|
+
if (!f._def) return f;
|
|
12
|
+
|
|
13
|
+
const typeName = f._def.typeName;
|
|
14
|
+
|
|
15
|
+
// Handle wrapper types that have innerType
|
|
16
|
+
if (
|
|
17
|
+
typeName === 'ZodOptional' ||
|
|
18
|
+
typeName === 'ZodNullable' ||
|
|
19
|
+
typeName === 'ZodDefault'
|
|
20
|
+
) {
|
|
21
|
+
return unwrapZodField(f._def.innerType);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Handle ZodEffects (transformations, refinements, preprocessors)
|
|
25
|
+
if (typeName === 'ZodEffects') {
|
|
26
|
+
if (f._def.schema) {
|
|
27
|
+
return unwrapZodField(f._def.schema);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return f;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Check if a field is a Midscene locator field
|
|
36
|
+
* Checks for either:
|
|
37
|
+
* 1. midscene_location_field_flag in shape (result schema)
|
|
38
|
+
* 2. prompt field in shape (input schema)
|
|
39
|
+
*/
|
|
40
|
+
export function isMidsceneLocatorField(field: unknown): boolean {
|
|
41
|
+
const actualField = unwrapZodField(field) as {
|
|
42
|
+
_def?: { typeName?: string; shape?: () => Record<string, unknown> };
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
if (actualField._def?.typeName === 'ZodObject') {
|
|
46
|
+
const shape = actualField._def.shape?.();
|
|
47
|
+
if (shape) {
|
|
48
|
+
// Method 1: Check for the location field flag (for result schema)
|
|
49
|
+
if ('midscene_location_field_flag' in shape) {
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
// Method 2: Check if it's the input schema by checking for 'prompt' field
|
|
53
|
+
if ('prompt' in shape && shape.prompt) {
|
|
54
|
+
return true;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return false;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get type name string from a Zod schema field
|
|
63
|
+
* @param field - Zod schema field
|
|
64
|
+
* @param locatorTypeDescription - Optional description for MidsceneLocation fields (used by core)
|
|
65
|
+
*/
|
|
66
|
+
export function getZodTypeName(
|
|
67
|
+
field: unknown,
|
|
68
|
+
locatorTypeDescription?: string,
|
|
69
|
+
): string {
|
|
70
|
+
const actualField = unwrapZodField(field) as {
|
|
71
|
+
_def?: { typeName?: string; values?: unknown[]; options?: unknown[] };
|
|
72
|
+
};
|
|
73
|
+
const fieldTypeName = actualField._def?.typeName;
|
|
74
|
+
|
|
75
|
+
if (fieldTypeName === 'ZodString') return 'string';
|
|
76
|
+
if (fieldTypeName === 'ZodNumber') return 'number';
|
|
77
|
+
if (fieldTypeName === 'ZodBoolean') return 'boolean';
|
|
78
|
+
if (fieldTypeName === 'ZodArray') return 'array';
|
|
79
|
+
if (fieldTypeName === 'ZodObject') {
|
|
80
|
+
// Check if this is a Midscene locator field
|
|
81
|
+
if (isMidsceneLocatorField(actualField)) {
|
|
82
|
+
return locatorTypeDescription || 'object';
|
|
83
|
+
}
|
|
84
|
+
return 'object';
|
|
85
|
+
}
|
|
86
|
+
if (fieldTypeName === 'ZodEnum') {
|
|
87
|
+
const values =
|
|
88
|
+
(actualField._def?.values as unknown[] | undefined)
|
|
89
|
+
?.map((option: unknown) => String(`'${option}'`))
|
|
90
|
+
.join(', ') ?? 'enum';
|
|
91
|
+
return `enum(${values})`;
|
|
92
|
+
}
|
|
93
|
+
// Handle ZodUnion by listing all option types
|
|
94
|
+
if (fieldTypeName === 'ZodUnion') {
|
|
95
|
+
const options = actualField._def?.options as unknown[] | undefined;
|
|
96
|
+
if (options && options.length > 0) {
|
|
97
|
+
const types = options.map((opt: unknown) =>
|
|
98
|
+
getZodTypeName(opt, locatorTypeDescription),
|
|
99
|
+
);
|
|
100
|
+
return types.join(' | ');
|
|
101
|
+
}
|
|
102
|
+
return 'union';
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return 'unknown';
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Get description from a Zod schema field
|
|
110
|
+
*/
|
|
111
|
+
export function getZodDescription(field: z.ZodTypeAny): string | null {
|
|
112
|
+
// Check for direct description on the original field (wrapper may have description)
|
|
113
|
+
if ('description' in field) {
|
|
114
|
+
return (field as { description?: string }).description || null;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const actualField = unwrapZodField(field) as {
|
|
118
|
+
description?: string;
|
|
119
|
+
_def?: { typeName?: string; shape?: () => Record<string, unknown> };
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// Check for description on the unwrapped field
|
|
123
|
+
if ('description' in actualField) {
|
|
124
|
+
return actualField.description || null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Check for MidsceneLocation fields and add description
|
|
128
|
+
if (isMidsceneLocatorField(actualField)) {
|
|
129
|
+
return 'Location information for the target element';
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return null;
|
|
133
|
+
}
|