@midscene/shared 1.6.1-beta-20260331083547.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/constants/example-code.mjs +25 -21
- package/dist/es/extractor/dom-util.mjs +9 -5
- package/dist/es/mcp/tool-generator.mjs +130 -56
- package/dist/es/node/fs.mjs +1 -1
- package/dist/lib/constants/example-code.js +25 -21
- package/dist/lib/extractor/dom-util.js +9 -5
- package/dist/lib/mcp/tool-generator.js +129 -55
- package/dist/lib/node/fs.js +1 -1
- package/dist/types/constants/example-code.d.ts +1 -1
- package/dist/types/extractor/dom-util.d.ts +5 -4
- package/dist/types/mcp/types.d.ts +1 -0
- package/package.json +1 -1
- package/src/constants/example-code.ts +25 -21
- package/src/extractor/dom-util.ts +10 -5
- package/src/mcp/tool-generator.ts +217 -66
- package/src/mcp/types.ts +4 -0
|
@@ -36,16 +36,15 @@ function getErrorMessage(error) {
|
|
|
36
36
|
function describeActionForMCP(action) {
|
|
37
37
|
const actionDesc = action.description || `Execute ${action.name} action`;
|
|
38
38
|
if (!action.paramSchema) return `${action.name} action, ${actionDesc}`;
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
const
|
|
43
|
-
const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(schema);
|
|
39
|
+
const shape = getZodObjectShape(action.paramSchema);
|
|
40
|
+
if (!shape) {
|
|
41
|
+
const typeName = (0, external_zod_schema_utils_js_namespaceObject.getZodTypeName)(action.paramSchema);
|
|
42
|
+
const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(action.paramSchema);
|
|
44
43
|
const paramDesc = description ? `${typeName} - ${description}` : typeName;
|
|
45
44
|
return `${action.name} action, ${actionDesc}. Parameter: ${paramDesc}`;
|
|
46
45
|
}
|
|
47
46
|
const paramDescriptions = [];
|
|
48
|
-
for (const [key, field] of Object.entries(
|
|
47
|
+
for (const [key, field] of Object.entries(shape))if (field && 'object' == typeof field) {
|
|
49
48
|
const isFieldOptional = 'function' == typeof field.isOptional && field.isOptional();
|
|
50
49
|
const typeName = (0, external_zod_schema_utils_js_namespaceObject.getZodTypeName)(field);
|
|
51
50
|
const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(field);
|
|
@@ -59,9 +58,6 @@ function describeActionForMCP(action) {
|
|
|
59
58
|
function isZodOptional(value) {
|
|
60
59
|
return '_def' in value && value._def?.typeName === 'ZodOptional';
|
|
61
60
|
}
|
|
62
|
-
function isZodObject(value) {
|
|
63
|
-
return '_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value;
|
|
64
|
-
}
|
|
65
61
|
function unwrapOptional(value) {
|
|
66
62
|
if (isZodOptional(value)) return {
|
|
67
63
|
innerValue: value._def.innerType,
|
|
@@ -72,24 +68,31 @@ function unwrapOptional(value) {
|
|
|
72
68
|
isOptional: false
|
|
73
69
|
};
|
|
74
70
|
}
|
|
75
|
-
function
|
|
76
|
-
if (!
|
|
77
|
-
|
|
71
|
+
function getZodObjectShape(value) {
|
|
72
|
+
if (!value) return;
|
|
73
|
+
const actualValue = (0, external_zod_schema_utils_js_namespaceObject.unwrapZodField)(value);
|
|
74
|
+
if (actualValue._def?.typeName !== 'ZodObject') return;
|
|
75
|
+
if ('function' == typeof actualValue._def.shape) return actualValue._def.shape();
|
|
76
|
+
return actualValue.shape;
|
|
77
|
+
}
|
|
78
|
+
function isRecord(value) {
|
|
79
|
+
return 'object' == typeof value && null !== value && !Array.isArray(value);
|
|
78
80
|
}
|
|
79
|
-
function makePromptOptional(
|
|
81
|
+
function makePromptOptional(shape, wrapInOptional) {
|
|
80
82
|
const newShape = {
|
|
81
|
-
...
|
|
83
|
+
...shape
|
|
82
84
|
};
|
|
83
|
-
newShape.prompt =
|
|
85
|
+
newShape.prompt = shape.prompt.optional();
|
|
84
86
|
let newSchema = external_zod_namespaceObject.z.object(newShape).passthrough();
|
|
85
87
|
if (wrapInOptional) newSchema = newSchema.optional();
|
|
86
88
|
return newSchema;
|
|
87
89
|
}
|
|
88
90
|
function transformSchemaField(key, value) {
|
|
89
91
|
const { innerValue, isOptional } = unwrapOptional(value);
|
|
90
|
-
|
|
92
|
+
const shape = getZodObjectShape(innerValue);
|
|
93
|
+
if (shape && (0, external_zod_schema_utils_js_namespaceObject.isMidsceneLocatorField)(innerValue)) return [
|
|
91
94
|
key,
|
|
92
|
-
makePromptOptional(
|
|
95
|
+
makePromptOptional(shape, isOptional)
|
|
93
96
|
];
|
|
94
97
|
return [
|
|
95
98
|
key,
|
|
@@ -98,9 +101,66 @@ function transformSchemaField(key, value) {
|
|
|
98
101
|
}
|
|
99
102
|
function extractActionSchema(paramSchema) {
|
|
100
103
|
if (!paramSchema) return {};
|
|
101
|
-
const
|
|
102
|
-
if (!
|
|
103
|
-
return Object.fromEntries(Object.entries(
|
|
104
|
+
const shape = getZodObjectShape(paramSchema);
|
|
105
|
+
if (!shape) return paramSchema;
|
|
106
|
+
return Object.fromEntries(Object.entries(shape).map(([key, value])=>transformSchemaField(key, value)));
|
|
107
|
+
}
|
|
108
|
+
function getPromptText(prompt) {
|
|
109
|
+
if ('string' == typeof prompt) return prompt;
|
|
110
|
+
if (isRecord(prompt) && 'string' == typeof prompt.prompt) return prompt.prompt;
|
|
111
|
+
}
|
|
112
|
+
function moveLocateExtrasIntoPrompt(value, locateFieldKeys) {
|
|
113
|
+
const promptText = getPromptText(value.prompt);
|
|
114
|
+
if (!promptText) return value;
|
|
115
|
+
const normalizedPrompt = isRecord(value.prompt) ? {
|
|
116
|
+
...value.prompt
|
|
117
|
+
} : {
|
|
118
|
+
prompt: promptText
|
|
119
|
+
};
|
|
120
|
+
const normalizedLocate = {};
|
|
121
|
+
let movedExtraField = false;
|
|
122
|
+
for (const [key, fieldValue] of Object.entries(value))if ('prompt' !== key) {
|
|
123
|
+
if (locateFieldKeys.has(key)) {
|
|
124
|
+
normalizedLocate[key] = fieldValue;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
movedExtraField = true;
|
|
128
|
+
if (!(key in normalizedPrompt)) normalizedPrompt[key] = fieldValue;
|
|
129
|
+
}
|
|
130
|
+
if (!movedExtraField) return value;
|
|
131
|
+
return {
|
|
132
|
+
...normalizedLocate,
|
|
133
|
+
prompt: normalizedPrompt
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
function normalizeLocateLikeArg(value, fieldSchema) {
|
|
137
|
+
if ('string' == typeof value) return {
|
|
138
|
+
prompt: value
|
|
139
|
+
};
|
|
140
|
+
if (!isRecord(value)) return value;
|
|
141
|
+
const shape = getZodObjectShape(fieldSchema);
|
|
142
|
+
if (!shape) return value;
|
|
143
|
+
return moveLocateExtrasIntoPrompt(value, new Set(Object.keys(shape)));
|
|
144
|
+
}
|
|
145
|
+
function normalizeActionArgs(args, paramSchema) {
|
|
146
|
+
if (!paramSchema) return args;
|
|
147
|
+
const shape = getZodObjectShape(paramSchema);
|
|
148
|
+
if (!shape) return args;
|
|
149
|
+
return Object.fromEntries(Object.entries(args).map(([key, value])=>{
|
|
150
|
+
const fieldSchema = shape[key];
|
|
151
|
+
if (!fieldSchema) return [
|
|
152
|
+
key,
|
|
153
|
+
value
|
|
154
|
+
];
|
|
155
|
+
if ((0, external_zod_schema_utils_js_namespaceObject.isMidsceneLocatorField)(fieldSchema)) return [
|
|
156
|
+
key,
|
|
157
|
+
normalizeLocateLikeArg(value, fieldSchema)
|
|
158
|
+
];
|
|
159
|
+
return [
|
|
160
|
+
key,
|
|
161
|
+
value
|
|
162
|
+
];
|
|
163
|
+
}));
|
|
104
164
|
}
|
|
105
165
|
function serializeArgsToDescription(args) {
|
|
106
166
|
try {
|
|
@@ -119,7 +179,7 @@ function serializeArgsToDescription(args) {
|
|
|
119
179
|
}
|
|
120
180
|
}
|
|
121
181
|
function buildActionInstruction(actionName, args) {
|
|
122
|
-
const locatePrompt = args.locate
|
|
182
|
+
const locatePrompt = isRecord(args.locate) ? getPromptText(args.locate.prompt) : void 0;
|
|
123
183
|
switch(actionName){
|
|
124
184
|
case 'Tap':
|
|
125
185
|
return locatePrompt ? `Tap on "${locatePrompt}"` : 'Tap';
|
|
@@ -147,44 +207,59 @@ function buildActionInstruction(actionName, args) {
|
|
|
147
207
|
}
|
|
148
208
|
}
|
|
149
209
|
}
|
|
150
|
-
async function
|
|
210
|
+
async function executeAction(agent, actionName, args) {
|
|
211
|
+
if (agent.callActionInActionSpace) return agent.callActionInActionSpace(actionName, args);
|
|
212
|
+
if (agent.aiAction) {
|
|
213
|
+
const instruction = buildActionInstruction(actionName, args);
|
|
214
|
+
return agent.aiAction(instruction);
|
|
215
|
+
}
|
|
216
|
+
throw new Error(`Action "${actionName}" is not supported by this agent`);
|
|
217
|
+
}
|
|
218
|
+
async function captureScreenshotResult(agent, actionName, actionResult) {
|
|
219
|
+
const content = [
|
|
220
|
+
{
|
|
221
|
+
type: 'text',
|
|
222
|
+
text: `Action "${actionName}" completed.`
|
|
223
|
+
}
|
|
224
|
+
];
|
|
225
|
+
if (void 0 !== actionResult) content.push({
|
|
226
|
+
type: 'text',
|
|
227
|
+
text: `Result: ${serializeActionResult(actionResult)}`
|
|
228
|
+
});
|
|
151
229
|
try {
|
|
152
230
|
const screenshot = await agent.page?.screenshotBase64();
|
|
153
231
|
if (!screenshot) return {
|
|
154
|
-
content
|
|
155
|
-
{
|
|
156
|
-
type: 'text',
|
|
157
|
-
text: `Action "${actionName}" completed.`
|
|
158
|
-
}
|
|
159
|
-
]
|
|
232
|
+
content
|
|
160
233
|
};
|
|
161
234
|
const { mimeType, body } = (0, img_namespaceObject.parseBase64)(screenshot);
|
|
235
|
+
content.push({
|
|
236
|
+
type: 'image',
|
|
237
|
+
data: body,
|
|
238
|
+
mimeType
|
|
239
|
+
});
|
|
162
240
|
return {
|
|
163
|
-
content
|
|
164
|
-
{
|
|
165
|
-
type: 'text',
|
|
166
|
-
text: `Action "${actionName}" completed.`
|
|
167
|
-
},
|
|
168
|
-
{
|
|
169
|
-
type: 'image',
|
|
170
|
-
data: body,
|
|
171
|
-
mimeType
|
|
172
|
-
}
|
|
173
|
-
]
|
|
241
|
+
content
|
|
174
242
|
};
|
|
175
243
|
} catch (error) {
|
|
176
244
|
const errorMessage = getErrorMessage(error);
|
|
177
245
|
console.error('Error capturing screenshot:', errorMessage);
|
|
246
|
+
content[0] = {
|
|
247
|
+
type: 'text',
|
|
248
|
+
text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`
|
|
249
|
+
};
|
|
178
250
|
return {
|
|
179
|
-
content
|
|
180
|
-
{
|
|
181
|
-
type: 'text',
|
|
182
|
-
text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`
|
|
183
|
-
}
|
|
184
|
-
]
|
|
251
|
+
content
|
|
185
252
|
};
|
|
186
253
|
}
|
|
187
254
|
}
|
|
255
|
+
function serializeActionResult(actionResult) {
|
|
256
|
+
if ('string' == typeof actionResult) return actionResult;
|
|
257
|
+
try {
|
|
258
|
+
return JSON.stringify(actionResult);
|
|
259
|
+
} catch {
|
|
260
|
+
return String(actionResult);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
188
263
|
function createErrorResult(message) {
|
|
189
264
|
return {
|
|
190
265
|
content: [
|
|
@@ -243,17 +318,16 @@ function generateToolsFromActionSpace(actionSpace, getAgent) {
|
|
|
243
318
|
handler: async (args)=>{
|
|
244
319
|
try {
|
|
245
320
|
const agent = await getAgent();
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
}
|
|
321
|
+
const normalizedArgs = normalizeActionArgs(args, action.paramSchema);
|
|
322
|
+
let actionResult;
|
|
323
|
+
try {
|
|
324
|
+
actionResult = await executeAction(agent, action.name, normalizedArgs);
|
|
325
|
+
} catch (error) {
|
|
326
|
+
const errorMessage = getErrorMessage(error);
|
|
327
|
+
console.error(`Error executing action "${action.name}":`, errorMessage);
|
|
328
|
+
return await captureFailureResult(agent, action.name, errorMessage);
|
|
255
329
|
}
|
|
256
|
-
return await captureScreenshotResult(agent, action.name);
|
|
330
|
+
return await captureScreenshotResult(agent, action.name, actionResult);
|
|
257
331
|
} catch (error) {
|
|
258
332
|
const errorMessage = getErrorMessage(error);
|
|
259
333
|
console.error(`Error in handler for "${action.name}":`, errorMessage);
|