@midscene/shared 1.6.1-beta-20260331083547.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,16 +36,15 @@ function getErrorMessage(error) {
36
36
  function describeActionForMCP(action) {
37
37
  const actionDesc = action.description || `Execute ${action.name} action`;
38
38
  if (!action.paramSchema) return `${action.name} action, ${actionDesc}`;
39
- const schema = action.paramSchema;
40
- const isZodObjectType = schema._def?.typeName === 'ZodObject';
41
- if (!isZodObjectType || !schema.shape) {
42
- const typeName = (0, external_zod_schema_utils_js_namespaceObject.getZodTypeName)(schema);
43
- const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(schema);
39
+ const shape = getZodObjectShape(action.paramSchema);
40
+ if (!shape) {
41
+ const typeName = (0, external_zod_schema_utils_js_namespaceObject.getZodTypeName)(action.paramSchema);
42
+ const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(action.paramSchema);
44
43
  const paramDesc = description ? `${typeName} - ${description}` : typeName;
45
44
  return `${action.name} action, ${actionDesc}. Parameter: ${paramDesc}`;
46
45
  }
47
46
  const paramDescriptions = [];
48
- for (const [key, field] of Object.entries(schema.shape))if (field && 'object' == typeof field) {
47
+ for (const [key, field] of Object.entries(shape))if (field && 'object' == typeof field) {
49
48
  const isFieldOptional = 'function' == typeof field.isOptional && field.isOptional();
50
49
  const typeName = (0, external_zod_schema_utils_js_namespaceObject.getZodTypeName)(field);
51
50
  const description = (0, external_zod_schema_utils_js_namespaceObject.getZodDescription)(field);
@@ -59,9 +58,6 @@ function describeActionForMCP(action) {
59
58
  function isZodOptional(value) {
60
59
  return '_def' in value && value._def?.typeName === 'ZodOptional';
61
60
  }
62
- function isZodObject(value) {
63
- return '_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value;
64
- }
65
61
  function unwrapOptional(value) {
66
62
  if (isZodOptional(value)) return {
67
63
  innerValue: value._def.innerType,
@@ -72,24 +68,31 @@ function unwrapOptional(value) {
72
68
  isOptional: false
73
69
  };
74
70
  }
75
- function isLocateField(value) {
76
- if (!isZodObject(value)) return false;
77
- return 'prompt' in value.shape;
71
+ function getZodObjectShape(value) {
72
+ if (!value) return;
73
+ const actualValue = (0, external_zod_schema_utils_js_namespaceObject.unwrapZodField)(value);
74
+ if (actualValue._def?.typeName !== 'ZodObject') return;
75
+ if ('function' == typeof actualValue._def.shape) return actualValue._def.shape();
76
+ return actualValue.shape;
77
+ }
78
+ function isRecord(value) {
79
+ return 'object' == typeof value && null !== value && !Array.isArray(value);
78
80
  }
79
- function makePromptOptional(value, wrapInOptional) {
81
+ function makePromptOptional(shape, wrapInOptional) {
80
82
  const newShape = {
81
- ...value.shape
83
+ ...shape
82
84
  };
83
- newShape.prompt = value.shape.prompt.optional();
85
+ newShape.prompt = shape.prompt.optional();
84
86
  let newSchema = external_zod_namespaceObject.z.object(newShape).passthrough();
85
87
  if (wrapInOptional) newSchema = newSchema.optional();
86
88
  return newSchema;
87
89
  }
88
90
  function transformSchemaField(key, value) {
89
91
  const { innerValue, isOptional } = unwrapOptional(value);
90
- if (isZodObject(innerValue) && isLocateField(innerValue)) return [
92
+ const shape = getZodObjectShape(innerValue);
93
+ if (shape && (0, external_zod_schema_utils_js_namespaceObject.isMidsceneLocatorField)(innerValue)) return [
91
94
  key,
92
- makePromptOptional(innerValue, isOptional)
95
+ makePromptOptional(shape, isOptional)
93
96
  ];
94
97
  return [
95
98
  key,
@@ -98,9 +101,66 @@ function transformSchemaField(key, value) {
98
101
  }
99
102
  function extractActionSchema(paramSchema) {
100
103
  if (!paramSchema) return {};
101
- const schema = paramSchema;
102
- if (!isZodObject(schema)) return schema;
103
- return Object.fromEntries(Object.entries(schema.shape).map(([key, value])=>transformSchemaField(key, value)));
104
+ const shape = getZodObjectShape(paramSchema);
105
+ if (!shape) return paramSchema;
106
+ return Object.fromEntries(Object.entries(shape).map(([key, value])=>transformSchemaField(key, value)));
107
+ }
108
+ function getPromptText(prompt) {
109
+ if ('string' == typeof prompt) return prompt;
110
+ if (isRecord(prompt) && 'string' == typeof prompt.prompt) return prompt.prompt;
111
+ }
112
+ function moveLocateExtrasIntoPrompt(value, locateFieldKeys) {
113
+ const promptText = getPromptText(value.prompt);
114
+ if (!promptText) return value;
115
+ const normalizedPrompt = isRecord(value.prompt) ? {
116
+ ...value.prompt
117
+ } : {
118
+ prompt: promptText
119
+ };
120
+ const normalizedLocate = {};
121
+ let movedExtraField = false;
122
+ for (const [key, fieldValue] of Object.entries(value))if ('prompt' !== key) {
123
+ if (locateFieldKeys.has(key)) {
124
+ normalizedLocate[key] = fieldValue;
125
+ continue;
126
+ }
127
+ movedExtraField = true;
128
+ if (!(key in normalizedPrompt)) normalizedPrompt[key] = fieldValue;
129
+ }
130
+ if (!movedExtraField) return value;
131
+ return {
132
+ ...normalizedLocate,
133
+ prompt: normalizedPrompt
134
+ };
135
+ }
136
+ function normalizeLocateLikeArg(value, fieldSchema) {
137
+ if ('string' == typeof value) return {
138
+ prompt: value
139
+ };
140
+ if (!isRecord(value)) return value;
141
+ const shape = getZodObjectShape(fieldSchema);
142
+ if (!shape) return value;
143
+ return moveLocateExtrasIntoPrompt(value, new Set(Object.keys(shape)));
144
+ }
145
+ function normalizeActionArgs(args, paramSchema) {
146
+ if (!paramSchema) return args;
147
+ const shape = getZodObjectShape(paramSchema);
148
+ if (!shape) return args;
149
+ return Object.fromEntries(Object.entries(args).map(([key, value])=>{
150
+ const fieldSchema = shape[key];
151
+ if (!fieldSchema) return [
152
+ key,
153
+ value
154
+ ];
155
+ if ((0, external_zod_schema_utils_js_namespaceObject.isMidsceneLocatorField)(fieldSchema)) return [
156
+ key,
157
+ normalizeLocateLikeArg(value, fieldSchema)
158
+ ];
159
+ return [
160
+ key,
161
+ value
162
+ ];
163
+ }));
104
164
  }
105
165
  function serializeArgsToDescription(args) {
106
166
  try {
@@ -119,7 +179,7 @@ function serializeArgsToDescription(args) {
119
179
  }
120
180
  }
121
181
  function buildActionInstruction(actionName, args) {
122
- const locatePrompt = args.locate && 'object' == typeof args.locate ? args.locate.prompt : void 0;
182
+ const locatePrompt = isRecord(args.locate) ? getPromptText(args.locate.prompt) : void 0;
123
183
  switch(actionName){
124
184
  case 'Tap':
125
185
  return locatePrompt ? `Tap on "${locatePrompt}"` : 'Tap';
@@ -147,44 +207,59 @@ function buildActionInstruction(actionName, args) {
147
207
  }
148
208
  }
149
209
  }
150
- async function captureScreenshotResult(agent, actionName) {
210
+ async function executeAction(agent, actionName, args) {
211
+ if (agent.callActionInActionSpace) return agent.callActionInActionSpace(actionName, args);
212
+ if (agent.aiAction) {
213
+ const instruction = buildActionInstruction(actionName, args);
214
+ return agent.aiAction(instruction);
215
+ }
216
+ throw new Error(`Action "${actionName}" is not supported by this agent`);
217
+ }
218
+ async function captureScreenshotResult(agent, actionName, actionResult) {
219
+ const content = [
220
+ {
221
+ type: 'text',
222
+ text: `Action "${actionName}" completed.`
223
+ }
224
+ ];
225
+ if (void 0 !== actionResult) content.push({
226
+ type: 'text',
227
+ text: `Result: ${serializeActionResult(actionResult)}`
228
+ });
151
229
  try {
152
230
  const screenshot = await agent.page?.screenshotBase64();
153
231
  if (!screenshot) return {
154
- content: [
155
- {
156
- type: 'text',
157
- text: `Action "${actionName}" completed.`
158
- }
159
- ]
232
+ content
160
233
  };
161
234
  const { mimeType, body } = (0, img_namespaceObject.parseBase64)(screenshot);
235
+ content.push({
236
+ type: 'image',
237
+ data: body,
238
+ mimeType
239
+ });
162
240
  return {
163
- content: [
164
- {
165
- type: 'text',
166
- text: `Action "${actionName}" completed.`
167
- },
168
- {
169
- type: 'image',
170
- data: body,
171
- mimeType
172
- }
173
- ]
241
+ content
174
242
  };
175
243
  } catch (error) {
176
244
  const errorMessage = getErrorMessage(error);
177
245
  console.error('Error capturing screenshot:', errorMessage);
246
+ content[0] = {
247
+ type: 'text',
248
+ text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`
249
+ };
178
250
  return {
179
- content: [
180
- {
181
- type: 'text',
182
- text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`
183
- }
184
- ]
251
+ content
185
252
  };
186
253
  }
187
254
  }
255
+ function serializeActionResult(actionResult) {
256
+ if ('string' == typeof actionResult) return actionResult;
257
+ try {
258
+ return JSON.stringify(actionResult);
259
+ } catch {
260
+ return String(actionResult);
261
+ }
262
+ }
188
263
  function createErrorResult(message) {
189
264
  return {
190
265
  content: [
@@ -243,17 +318,16 @@ function generateToolsFromActionSpace(actionSpace, getAgent) {
243
318
  handler: async (args)=>{
244
319
  try {
245
320
  const agent = await getAgent();
246
- if (agent.aiAction) {
247
- const instruction = buildActionInstruction(action.name, args);
248
- try {
249
- await agent.aiAction(instruction);
250
- } catch (error) {
251
- const errorMessage = getErrorMessage(error);
252
- console.error(`Error executing action "${action.name}":`, errorMessage);
253
- return await captureFailureResult(agent, action.name, errorMessage);
254
- }
321
+ const normalizedArgs = normalizeActionArgs(args, action.paramSchema);
322
+ let actionResult;
323
+ try {
324
+ actionResult = await executeAction(agent, action.name, normalizedArgs);
325
+ } catch (error) {
326
+ const errorMessage = getErrorMessage(error);
327
+ console.error(`Error executing action "${action.name}":`, errorMessage);
328
+ return await captureFailureResult(agent, action.name, errorMessage);
255
329
  }
256
- return await captureScreenshotResult(agent, action.name);
330
+ return await captureScreenshotResult(agent, action.name, actionResult);
257
331
  } catch (error) {
258
332
  const errorMessage = getErrorMessage(error);
259
333
  console.error(`Error in handler for "${action.name}":`, errorMessage);