@godscene/shared 1.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +9 -0
  2. package/dist/es/baseDB.mjs +109 -0
  3. package/dist/es/cli/cli-args.mjs +95 -0
  4. package/dist/es/cli/cli-error.mjs +24 -0
  5. package/dist/es/cli/cli-runner.mjs +122 -0
  6. package/dist/es/cli/index.mjs +4 -0
  7. package/dist/es/common.mjs +37 -0
  8. package/dist/es/constants/example-code.mjs +227 -0
  9. package/dist/es/constants/index.mjs +124 -0
  10. package/dist/es/env/basic.mjs +6 -0
  11. package/dist/es/env/constants.mjs +110 -0
  12. package/dist/es/env/global-config-manager.mjs +94 -0
  13. package/dist/es/env/helper.mjs +43 -0
  14. package/dist/es/env/index.mjs +5 -0
  15. package/dist/es/env/init-debug.mjs +18 -0
  16. package/dist/es/env/model-config-manager.mjs +79 -0
  17. package/dist/es/env/parse-model-config.mjs +165 -0
  18. package/dist/es/env/types.mjs +232 -0
  19. package/dist/es/env/utils.mjs +18 -0
  20. package/dist/es/extractor/constants.mjs +2 -0
  21. package/dist/es/extractor/cs_postmessage.mjs +61 -0
  22. package/dist/es/extractor/customLocator.mjs +641 -0
  23. package/dist/es/extractor/debug.mjs +6 -0
  24. package/dist/es/extractor/dom-util.mjs +96 -0
  25. package/dist/es/extractor/index.mjs +5 -0
  26. package/dist/es/extractor/locator.mjs +250 -0
  27. package/dist/es/extractor/tree.mjs +78 -0
  28. package/dist/es/extractor/util.mjs +245 -0
  29. package/dist/es/extractor/web-extractor.mjs +393 -0
  30. package/dist/es/img/box-select.mjs +824 -0
  31. package/dist/es/img/canvas-fallback.mjs +238 -0
  32. package/dist/es/img/get-photon.mjs +45 -0
  33. package/dist/es/img/get-sharp.mjs +11 -0
  34. package/dist/es/img/index.mjs +4 -0
  35. package/dist/es/img/info.mjs +35 -0
  36. package/dist/es/img/transform.mjs +275 -0
  37. package/dist/es/index.mjs +2 -0
  38. package/dist/es/key-alias-utils.mjs +19 -0
  39. package/dist/es/logger.mjs +64 -0
  40. package/dist/es/mcp/base-server.mjs +282 -0
  41. package/dist/es/mcp/base-tools.mjs +159 -0
  42. package/dist/es/mcp/chrome-path.mjs +35 -0
  43. package/dist/es/mcp/cli-report-session.mjs +78 -0
  44. package/dist/es/mcp/error-formatter.mjs +19 -0
  45. package/dist/es/mcp/index.mjs +9 -0
  46. package/dist/es/mcp/init-arg-utils.mjs +38 -0
  47. package/dist/es/mcp/inject-report-html-plugin.mjs +53 -0
  48. package/dist/es/mcp/launcher-helper.mjs +52 -0
  49. package/dist/es/mcp/tool-generator.mjs +419 -0
  50. package/dist/es/mcp/types.mjs +3 -0
  51. package/dist/es/node/fs.mjs +44 -0
  52. package/dist/es/node/index.mjs +2 -0
  53. package/dist/es/node/port.mjs +24 -0
  54. package/dist/es/polyfills/async-hooks.mjs +2 -0
  55. package/dist/es/polyfills/index.mjs +1 -0
  56. package/dist/es/types/index.mjs +3 -0
  57. package/dist/es/us-keyboard-layout.mjs +1414 -0
  58. package/dist/es/us-keyboard-layout.mjs.LICENSE.txt +5 -0
  59. package/dist/es/utils.mjs +72 -0
  60. package/dist/es/zod-schema-utils.mjs +54 -0
  61. package/dist/lib/baseDB.js +149 -0
  62. package/dist/lib/cli/cli-args.js +138 -0
  63. package/dist/lib/cli/cli-error.js +61 -0
  64. package/dist/lib/cli/cli-runner.js +181 -0
  65. package/dist/lib/cli/index.js +53 -0
  66. package/dist/lib/common.js +93 -0
  67. package/dist/lib/constants/example-code.js +264 -0
  68. package/dist/lib/constants/index.js +221 -0
  69. package/dist/lib/env/basic.js +40 -0
  70. package/dist/lib/env/constants.js +153 -0
  71. package/dist/lib/env/global-config-manager.js +128 -0
  72. package/dist/lib/env/helper.js +80 -0
  73. package/dist/lib/env/index.js +90 -0
  74. package/dist/lib/env/init-debug.js +52 -0
  75. package/dist/lib/env/model-config-manager.js +113 -0
  76. package/dist/lib/env/parse-model-config.js +211 -0
  77. package/dist/lib/env/types.js +572 -0
  78. package/dist/lib/env/utils.js +61 -0
  79. package/dist/lib/extractor/constants.js +42 -0
  80. package/dist/lib/extractor/cs_postmessage.js +98 -0
  81. package/dist/lib/extractor/customLocator.js +693 -0
  82. package/dist/lib/extractor/debug.js +12 -0
  83. package/dist/lib/extractor/dom-util.js +157 -0
  84. package/dist/lib/extractor/index.js +87 -0
  85. package/dist/lib/extractor/locator.js +296 -0
  86. package/dist/lib/extractor/tree.js +124 -0
  87. package/dist/lib/extractor/util.js +336 -0
  88. package/dist/lib/extractor/web-extractor.js +442 -0
  89. package/dist/lib/img/box-select.js +875 -0
  90. package/dist/lib/img/canvas-fallback.js +305 -0
  91. package/dist/lib/img/get-photon.js +82 -0
  92. package/dist/lib/img/get-sharp.js +45 -0
  93. package/dist/lib/img/index.js +95 -0
  94. package/dist/lib/img/info.js +92 -0
  95. package/dist/lib/img/transform.js +364 -0
  96. package/dist/lib/index.js +36 -0
  97. package/dist/lib/key-alias-utils.js +62 -0
  98. package/dist/lib/logger.js +114 -0
  99. package/dist/lib/mcp/base-server.js +332 -0
  100. package/dist/lib/mcp/base-tools.js +193 -0
  101. package/dist/lib/mcp/chrome-path.js +72 -0
  102. package/dist/lib/mcp/cli-report-session.js +121 -0
  103. package/dist/lib/mcp/error-formatter.js +53 -0
  104. package/dist/lib/mcp/index.js +114 -0
  105. package/dist/lib/mcp/init-arg-utils.js +78 -0
  106. package/dist/lib/mcp/inject-report-html-plugin.js +98 -0
  107. package/dist/lib/mcp/launcher-helper.js +86 -0
  108. package/dist/lib/mcp/tool-generator.js +456 -0
  109. package/dist/lib/mcp/types.js +40 -0
  110. package/dist/lib/node/fs.js +97 -0
  111. package/dist/lib/node/index.js +65 -0
  112. package/dist/lib/node/port.js +61 -0
  113. package/dist/lib/polyfills/async-hooks.js +36 -0
  114. package/dist/lib/polyfills/index.js +58 -0
  115. package/dist/lib/types/index.js +37 -0
  116. package/dist/lib/us-keyboard-layout.js +1457 -0
  117. package/dist/lib/us-keyboard-layout.js.LICENSE.txt +5 -0
  118. package/dist/lib/utils.js +148 -0
  119. package/dist/lib/zod-schema-utils.js +97 -0
  120. package/dist/types/baseDB.d.ts +25 -0
  121. package/dist/types/cli/cli-args.d.ts +8 -0
  122. package/dist/types/cli/cli-error.d.ts +5 -0
  123. package/dist/types/cli/cli-runner.d.ts +19 -0
  124. package/dist/types/cli/index.d.ts +4 -0
  125. package/dist/types/common.d.ts +12 -0
  126. package/dist/types/constants/example-code.d.ts +2 -0
  127. package/dist/types/constants/index.d.ts +61 -0
  128. package/dist/types/env/basic.d.ts +6 -0
  129. package/dist/types/env/constants.d.ts +50 -0
  130. package/dist/types/env/global-config-manager.d.ts +32 -0
  131. package/dist/types/env/helper.d.ts +4 -0
  132. package/dist/types/env/index.d.ts +4 -0
  133. package/dist/types/env/init-debug.d.ts +1 -0
  134. package/dist/types/env/model-config-manager.d.ts +25 -0
  135. package/dist/types/env/parse-model-config.d.ts +31 -0
  136. package/dist/types/env/types.d.ts +339 -0
  137. package/dist/types/env/utils.d.ts +7 -0
  138. package/dist/types/extractor/constants.d.ts +1 -0
  139. package/dist/types/extractor/cs_postmessage.d.ts +2 -0
  140. package/dist/types/extractor/customLocator.d.ts +69 -0
  141. package/dist/types/extractor/debug.d.ts +1 -0
  142. package/dist/types/extractor/dom-util.d.ts +57 -0
  143. package/dist/types/extractor/index.d.ts +33 -0
  144. package/dist/types/extractor/locator.d.ts +9 -0
  145. package/dist/types/extractor/tree.d.ts +6 -0
  146. package/dist/types/extractor/util.d.ts +47 -0
  147. package/dist/types/extractor/web-extractor.d.ts +24 -0
  148. package/dist/types/img/box-select.d.ts +26 -0
  149. package/dist/types/img/canvas-fallback.d.ts +105 -0
  150. package/dist/types/img/get-photon.d.ts +19 -0
  151. package/dist/types/img/get-sharp.d.ts +3 -0
  152. package/dist/types/img/index.d.ts +3 -0
  153. package/dist/types/img/info.d.ts +34 -0
  154. package/dist/types/img/transform.d.ts +98 -0
  155. package/dist/types/index.d.ts +2 -0
  156. package/dist/types/key-alias-utils.d.ts +9 -0
  157. package/dist/types/logger.d.ts +5 -0
  158. package/dist/types/mcp/base-server.d.ts +93 -0
  159. package/dist/types/mcp/base-tools.d.ts +148 -0
  160. package/dist/types/mcp/chrome-path.d.ts +2 -0
  161. package/dist/types/mcp/cli-report-session.d.ts +12 -0
  162. package/dist/types/mcp/error-formatter.d.ts +12 -0
  163. package/dist/types/mcp/index.d.ts +9 -0
  164. package/dist/types/mcp/init-arg-utils.d.ts +13 -0
  165. package/dist/types/mcp/inject-report-html-plugin.d.ts +18 -0
  166. package/dist/types/mcp/launcher-helper.d.ts +94 -0
  167. package/dist/types/mcp/tool-generator.d.ts +10 -0
  168. package/dist/types/mcp/types.d.ts +113 -0
  169. package/dist/types/node/fs.d.ts +15 -0
  170. package/dist/types/node/index.d.ts +2 -0
  171. package/dist/types/node/port.d.ts +8 -0
  172. package/dist/types/polyfills/async-hooks.d.ts +6 -0
  173. package/dist/types/polyfills/index.d.ts +4 -0
  174. package/dist/types/types/index.d.ts +36 -0
  175. package/dist/types/us-keyboard-layout.d.ts +32 -0
  176. package/dist/types/utils.d.ts +34 -0
  177. package/dist/types/zod-schema-utils.d.ts +23 -0
  178. package/package.json +125 -0
  179. package/src/baseDB.ts +158 -0
  180. package/src/cli/cli-args.ts +173 -0
  181. package/src/cli/cli-error.ts +24 -0
  182. package/src/cli/cli-runner.ts +230 -0
  183. package/src/cli/index.ts +4 -0
  184. package/src/common.ts +67 -0
  185. package/src/constants/example-code.ts +227 -0
  186. package/src/constants/index.ts +139 -0
  187. package/src/env/basic.ts +12 -0
  188. package/src/env/constants.ts +303 -0
  189. package/src/env/global-config-manager.ts +191 -0
  190. package/src/env/helper.ts +58 -0
  191. package/src/env/index.ts +4 -0
  192. package/src/env/init-debug.ts +34 -0
  193. package/src/env/model-config-manager.ts +149 -0
  194. package/src/env/parse-model-config.ts +357 -0
  195. package/src/env/types.ts +583 -0
  196. package/src/env/utils.ts +39 -0
  197. package/src/extractor/constants.ts +5 -0
  198. package/src/extractor/cs_postmessage.ts +136 -0
  199. package/src/extractor/customLocator.ts +1245 -0
  200. package/src/extractor/debug.ts +10 -0
  201. package/src/extractor/dom-util.ts +231 -0
  202. package/src/extractor/index.ts +50 -0
  203. package/src/extractor/locator.ts +469 -0
  204. package/src/extractor/tree.ts +179 -0
  205. package/src/extractor/util.ts +482 -0
  206. package/src/extractor/web-extractor.ts +617 -0
  207. package/src/img/box-select.ts +588 -0
  208. package/src/img/canvas-fallback.ts +393 -0
  209. package/src/img/get-photon.ts +108 -0
  210. package/src/img/get-sharp.ts +18 -0
  211. package/src/img/index.ts +27 -0
  212. package/src/img/info.ts +102 -0
  213. package/src/img/transform.ts +553 -0
  214. package/src/index.ts +1 -0
  215. package/src/key-alias-utils.ts +23 -0
  216. package/src/logger.ts +96 -0
  217. package/src/mcp/base-server.ts +500 -0
  218. package/src/mcp/base-tools.ts +391 -0
  219. package/src/mcp/chrome-path.ts +48 -0
  220. package/src/mcp/cli-report-session.ts +130 -0
  221. package/src/mcp/error-formatter.ts +52 -0
  222. package/src/mcp/index.ts +9 -0
  223. package/src/mcp/init-arg-utils.ts +105 -0
  224. package/src/mcp/inject-report-html-plugin.ts +119 -0
  225. package/src/mcp/launcher-helper.ts +200 -0
  226. package/src/mcp/tool-generator.ts +658 -0
  227. package/src/mcp/types.ts +131 -0
  228. package/src/node/fs.ts +84 -0
  229. package/src/node/index.ts +2 -0
  230. package/src/node/port.ts +37 -0
  231. package/src/polyfills/async-hooks.ts +6 -0
  232. package/src/polyfills/index.ts +4 -0
  233. package/src/types/index.ts +54 -0
  234. package/src/us-keyboard-layout.ts +723 -0
  235. package/src/utils.ts +149 -0
  236. package/src/zod-schema-utils.ts +133 -0
@@ -0,0 +1,658 @@
1
+ import { parseBase64 } from '@godscene/shared/img';
2
+ import { z } from 'zod';
3
+ import {
4
+ getZodDescription,
5
+ getZodTypeName,
6
+ isMidsceneLocatorField,
7
+ unwrapZodField,
8
+ } from '../zod-schema-utils';
9
+ import { getErrorMessage } from './error-formatter';
10
+ import type {
11
+ ActionSpaceItem,
12
+ BaseAgent,
13
+ ToolCliMetadata,
14
+ ToolDefinition,
15
+ ToolResult,
16
+ ToolSchema,
17
+ } from './types';
18
+
19
+ /**
20
+ * Generate MCP tool description from ActionSpaceItem
21
+ * Format: "actionName action, description. Parameters: param1 (type) - desc; param2 (type) - desc"
22
+ */
23
+ function describeActionForMCP(action: ActionSpaceItem): string {
24
+ const actionDesc = action.description || `Execute ${action.name} action`;
25
+
26
+ if (!action.paramSchema) {
27
+ return `${action.name} action, ${actionDesc}`;
28
+ }
29
+
30
+ const shape = getZodObjectShape(action.paramSchema);
31
+ if (!shape) {
32
+ // Simple type schema
33
+ const typeName = getZodTypeName(action.paramSchema);
34
+ const description = getZodDescription(action.paramSchema as z.ZodTypeAny);
35
+ const paramDesc = description ? `${typeName} - ${description}` : typeName;
36
+ return `${action.name} action, ${actionDesc}. Parameter: ${paramDesc}`;
37
+ }
38
+
39
+ // Object schema with multiple fields
40
+ const paramDescriptions: string[] = [];
41
+ for (const [key, field] of Object.entries(shape)) {
42
+ if (field && typeof field === 'object') {
43
+ const isFieldOptional =
44
+ typeof (field as { isOptional?: () => boolean }).isOptional ===
45
+ 'function' && (field as { isOptional: () => boolean }).isOptional();
46
+ const typeName = getZodTypeName(field);
47
+ const description = getZodDescription(field as z.ZodTypeAny);
48
+
49
+ let paramStr = `${key}${isFieldOptional ? '?' : ''} (${typeName})`;
50
+ if (description) {
51
+ paramStr += ` - ${description}`;
52
+ }
53
+ paramDescriptions.push(paramStr);
54
+ }
55
+ }
56
+
57
+ if (paramDescriptions.length === 0) {
58
+ return `${action.name} action, ${actionDesc}`;
59
+ }
60
+
61
+ return `${action.name} action, ${actionDesc}. Parameters: ${paramDescriptions.join('; ')}`;
62
+ }
63
+
64
+ /**
65
+ * Type guard: check if a Zod type is ZodOptional
66
+ */
67
+ function isZodOptional(
68
+ value: z.ZodTypeAny,
69
+ ): value is z.ZodOptional<z.ZodTypeAny> {
70
+ return '_def' in value && value._def?.typeName === 'ZodOptional';
71
+ }
72
+
73
+ /**
74
+ * Type guard: check if a Zod type is ZodObject
75
+ */
76
+ function isZodObject(value: z.ZodTypeAny): value is z.ZodObject<z.ZodRawShape> {
77
+ return (
78
+ '_def' in value && value._def?.typeName === 'ZodObject' && 'shape' in value
79
+ );
80
+ }
81
+
82
+ /**
83
+ * Unwrap ZodOptional to get inner type
84
+ */
85
+ function unwrapOptional(value: z.ZodTypeAny): {
86
+ innerValue: z.ZodTypeAny;
87
+ isOptional: boolean;
88
+ } {
89
+ if (isZodOptional(value)) {
90
+ return { innerValue: value._def.innerType, isOptional: true };
91
+ }
92
+ return { innerValue: value, isOptional: false };
93
+ }
94
+
95
+ function getZodObjectShape(
96
+ value: z.ZodTypeAny | undefined,
97
+ ): Record<string, z.ZodTypeAny> | undefined {
98
+ if (!value) {
99
+ return undefined;
100
+ }
101
+
102
+ const actualValue = unwrapZodField(value) as {
103
+ _def?: { typeName?: string; shape?: () => Record<string, z.ZodTypeAny> };
104
+ shape?: Record<string, z.ZodTypeAny>;
105
+ };
106
+
107
+ if (actualValue._def?.typeName !== 'ZodObject') {
108
+ return undefined;
109
+ }
110
+
111
+ if (typeof actualValue._def.shape === 'function') {
112
+ return actualValue._def.shape();
113
+ }
114
+
115
+ return actualValue.shape;
116
+ }
117
+
118
+ function isRecord(value: unknown): value is Record<string, unknown> {
119
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
120
+ }
121
+
122
+ /**
123
+ * Transform a locate field schema to make its 'prompt' field optional
124
+ */
125
+ function makePromptOptional(
126
+ shape: Record<string, z.ZodTypeAny>,
127
+ wrapInOptional: boolean,
128
+ ): z.ZodTypeAny {
129
+ const newShape = { ...shape };
130
+ newShape.prompt = shape.prompt.optional();
131
+
132
+ let newSchema: z.ZodTypeAny = z.object(newShape).passthrough();
133
+ if (wrapInOptional) {
134
+ newSchema = newSchema.optional();
135
+ }
136
+ return newSchema;
137
+ }
138
+
139
+ /**
140
+ * Transform schema field to make locate.prompt optional if applicable
141
+ */
142
+ function transformSchemaField(
143
+ key: string,
144
+ value: z.ZodTypeAny,
145
+ ): [string, z.ZodTypeAny] {
146
+ const { innerValue, isOptional } = unwrapOptional(value);
147
+ const shape = getZodObjectShape(innerValue);
148
+
149
+ if (shape && isMidsceneLocatorField(innerValue)) {
150
+ return [key, makePromptOptional(shape, isOptional)];
151
+ }
152
+ return [key, value];
153
+ }
154
+
155
+ /**
156
+ * Extract and transform schema from action's paramSchema.
157
+ *
158
+ * CLI and MCP both expose parameters as named fields, so the only schema
159
+ * shapes we can surface are ZodObject (any number of fields) or undefined
160
+ * (the action takes no parameters). A primitive schema like `z.string()`
161
+ * silently degraded to leaking the ZodString instance's prototype methods
162
+ * as CLI flags — see https://github.com/web-infra-dev/midscene/issues/2313.
163
+ * Reject such schemas up front so the next author gets a loud error
164
+ * instead of a silent misconfiguration at runtime.
165
+ */
166
+ function extractActionSchema(
167
+ paramSchema: z.ZodTypeAny | undefined,
168
+ actionName: string,
169
+ ): Record<string, z.ZodTypeAny> {
170
+ if (!paramSchema) {
171
+ return {};
172
+ }
173
+
174
+ const shape = getZodObjectShape(paramSchema);
175
+ if (!shape) {
176
+ const typeName =
177
+ (paramSchema as unknown as { _def?: { typeName?: string } })?._def
178
+ ?.typeName ?? 'unknown';
179
+ throw new Error(
180
+ `Action "${actionName}" declared a non-object paramSchema (${typeName}). CLI and MCP tool schemas must be a ZodObject (e.g. z.object({ uri: z.string() })) or undefined. Wrap primitive fields in an object schema.`,
181
+ );
182
+ }
183
+
184
+ return Object.fromEntries(
185
+ Object.entries(shape).map(([key, value]) =>
186
+ transformSchemaField(key, value as z.ZodTypeAny),
187
+ ),
188
+ );
189
+ }
190
+
191
+ function getPromptText(prompt: unknown): string | undefined {
192
+ if (typeof prompt === 'string') {
193
+ return prompt;
194
+ }
195
+
196
+ if (isRecord(prompt) && typeof prompt.prompt === 'string') {
197
+ return prompt.prompt;
198
+ }
199
+
200
+ return undefined;
201
+ }
202
+
203
+ function moveLocateExtrasIntoPrompt(
204
+ value: Record<string, unknown>,
205
+ locateFieldKeys: Set<string>,
206
+ ): Record<string, unknown> {
207
+ const promptText = getPromptText(value.prompt);
208
+ if (!promptText) {
209
+ return value;
210
+ }
211
+
212
+ const normalizedPrompt: Record<string, unknown> = isRecord(value.prompt)
213
+ ? { ...value.prompt }
214
+ : { prompt: promptText };
215
+ const normalizedLocate: Record<string, unknown> = {};
216
+ let movedExtraField = false;
217
+
218
+ for (const [key, fieldValue] of Object.entries(value)) {
219
+ if (key === 'prompt') {
220
+ continue;
221
+ }
222
+
223
+ if (locateFieldKeys.has(key)) {
224
+ normalizedLocate[key] = fieldValue;
225
+ continue;
226
+ }
227
+
228
+ movedExtraField = true;
229
+ if (!(key in normalizedPrompt)) {
230
+ normalizedPrompt[key] = fieldValue;
231
+ }
232
+ }
233
+
234
+ if (!movedExtraField) {
235
+ return value;
236
+ }
237
+
238
+ return { ...normalizedLocate, prompt: normalizedPrompt };
239
+ }
240
+
241
+ function normalizeLocateLikeArg(
242
+ value: unknown,
243
+ fieldSchema: z.ZodTypeAny,
244
+ ): unknown {
245
+ if (typeof value === 'string') {
246
+ return { prompt: value };
247
+ }
248
+
249
+ if (!isRecord(value)) {
250
+ return value;
251
+ }
252
+
253
+ const shape = getZodObjectShape(fieldSchema);
254
+ if (!shape) {
255
+ return value;
256
+ }
257
+
258
+ return moveLocateExtrasIntoPrompt(value, new Set(Object.keys(shape)));
259
+ }
260
+
261
+ function normalizeActionArgs(
262
+ args: Record<string, unknown>,
263
+ paramSchema?: z.ZodTypeAny,
264
+ ): Record<string, unknown> {
265
+ if (!paramSchema) {
266
+ return args;
267
+ }
268
+
269
+ const shape = getZodObjectShape(paramSchema);
270
+ if (!shape) {
271
+ return args;
272
+ }
273
+
274
+ return Object.fromEntries(
275
+ Object.entries(args).map(([key, value]) => {
276
+ const fieldSchema = shape[key] as z.ZodTypeAny | undefined;
277
+ if (!fieldSchema) {
278
+ return [key, value];
279
+ }
280
+
281
+ if (isMidsceneLocatorField(fieldSchema)) {
282
+ return [key, normalizeLocateLikeArg(value, fieldSchema)];
283
+ }
284
+
285
+ return [key, value];
286
+ }),
287
+ );
288
+ }
289
+
290
+ /**
291
+ * Serialize args to human-readable description for AI action
292
+ */
293
+ function serializeArgsToDescription(args: Record<string, unknown>): string {
294
+ try {
295
+ return Object.entries(args)
296
+ .map(([key, value]) => {
297
+ if (typeof value === 'object' && value !== null) {
298
+ try {
299
+ return `${key}: ${JSON.stringify(value)}`;
300
+ } catch {
301
+ // Circular reference or non-serializable object
302
+ return `${key}: [object]`;
303
+ }
304
+ }
305
+ return `${key}: "${value}"`;
306
+ })
307
+ .join(', ');
308
+ } catch (error: unknown) {
309
+ const errorMessage = getErrorMessage(error);
310
+ console.error('Error serializing args:', errorMessage);
311
+ return `[args serialization failed: ${errorMessage}]`;
312
+ }
313
+ }
314
+
315
+ /**
316
+ * Build action instruction as natural language for better AI planning flexibility.
317
+ * Natural language instructions allow the planner to adjust strategies on replanning,
318
+ * unlike rigid structured instructions that cause repeated identical failures.
319
+ */
320
+ function buildActionInstruction(
321
+ actionName: string,
322
+ args: Record<string, unknown>,
323
+ ): string {
324
+ const locatePrompt = isRecord(args.locate)
325
+ ? getPromptText(args.locate.prompt)
326
+ : undefined;
327
+
328
+ switch (actionName) {
329
+ case 'Tap':
330
+ return locatePrompt ? `Tap on "${locatePrompt}"` : 'Tap';
331
+ case 'Input': {
332
+ const value = args.value ?? args.content ?? '';
333
+ return locatePrompt
334
+ ? `Input "${value}" into "${locatePrompt}"`
335
+ : `Input "${value}"`;
336
+ }
337
+ case 'Scroll': {
338
+ const direction = args.direction ?? 'down';
339
+ return locatePrompt
340
+ ? `Scroll ${direction} on "${locatePrompt}"`
341
+ : `Scroll ${direction}`;
342
+ }
343
+ case 'Hover':
344
+ return locatePrompt ? `Hover over "${locatePrompt}"` : 'Hover';
345
+ case 'KeyboardPress': {
346
+ const key = args.value ?? args.key ?? '';
347
+ return `Press key "${key}"`;
348
+ }
349
+ default: {
350
+ const argsDescription = serializeArgsToDescription(args);
351
+ return argsDescription ? `${actionName}: ${argsDescription}` : actionName;
352
+ }
353
+ }
354
+ }
355
+
356
+ async function executeAction(
357
+ agent: BaseAgent,
358
+ actionName: string,
359
+ args: Record<string, unknown>,
360
+ ): Promise<unknown> {
361
+ if (agent.callActionInActionSpace) {
362
+ return agent.callActionInActionSpace(actionName, args);
363
+ }
364
+
365
+ if (agent.aiAction) {
366
+ const instruction = buildActionInstruction(actionName, args);
367
+ return agent.aiAction(instruction);
368
+ }
369
+
370
+ throw new Error(`Action "${actionName}" is not supported by this agent`);
371
+ }
372
+
373
+ /**
374
+ * Capture screenshot and return as tool result
375
+ */
376
+ async function captureScreenshotResult(
377
+ agent: BaseAgent,
378
+ actionName: string,
379
+ actionResult?: unknown,
380
+ ): Promise<ToolResult> {
381
+ const content: ToolResult['content'] = [
382
+ { type: 'text', text: `Action "${actionName}" completed.` },
383
+ ];
384
+
385
+ if (actionResult !== undefined) {
386
+ content.push({
387
+ type: 'text',
388
+ text: `Result: ${serializeActionResult(actionResult)}`,
389
+ });
390
+ }
391
+
392
+ try {
393
+ const screenshot = await agent.page?.screenshotBase64();
394
+ if (!screenshot) {
395
+ return { content };
396
+ }
397
+
398
+ const { mimeType, body } = parseBase64(screenshot);
399
+ content.push({ type: 'image', data: body, mimeType });
400
+ return { content };
401
+ } catch (error: unknown) {
402
+ const errorMessage = getErrorMessage(error);
403
+ console.error('Error capturing screenshot:', errorMessage);
404
+ content[0] = {
405
+ type: 'text',
406
+ text: `Action "${actionName}" completed (screenshot unavailable: ${errorMessage})`,
407
+ };
408
+ return { content };
409
+ }
410
+ }
411
+
412
+ function serializeActionResult(actionResult: unknown): string {
413
+ if (typeof actionResult === 'string') {
414
+ return actionResult;
415
+ }
416
+
417
+ try {
418
+ return JSON.stringify(actionResult);
419
+ } catch {
420
+ return String(actionResult);
421
+ }
422
+ }
423
+
424
+ /**
425
+ * Create error result for tool handler
426
+ */
427
+ function createErrorResult(message: string): ToolResult {
428
+ return {
429
+ content: [{ type: 'text', text: message }],
430
+ isError: true,
431
+ };
432
+ }
433
+
434
+ /**
435
+ * Capture screenshot and return as a non-error result with warning message.
436
+ * Used when an action fails but we want the AI agent to see the current state
437
+ * and decide how to recover, rather than treating it as a hard error (exit code 1).
438
+ */
439
+ async function captureFailureResult(
440
+ agent: BaseAgent,
441
+ actionName: string,
442
+ errorMessage: string,
443
+ ): Promise<ToolResult> {
444
+ const warningText = `Warning: Action "${actionName}" failed: ${errorMessage}. Check the screenshot below for the current page state and decide how to proceed.`;
445
+ try {
446
+ const screenshot = await agent.page?.screenshotBase64();
447
+ if (!screenshot) {
448
+ return {
449
+ content: [{ type: 'text', text: warningText }],
450
+ };
451
+ }
452
+ const { mimeType, body } = parseBase64(screenshot);
453
+ return {
454
+ content: [
455
+ { type: 'text', text: warningText },
456
+ { type: 'image', data: body, mimeType },
457
+ ],
458
+ };
459
+ } catch {
460
+ return {
461
+ content: [{ type: 'text', text: warningText }],
462
+ };
463
+ }
464
+ }
465
+
466
+ function mergeToolCliMetadata(
467
+ base?: ToolCliMetadata,
468
+ extra?: ToolCliMetadata,
469
+ ): ToolCliMetadata | undefined {
470
+ const options = {
471
+ ...(base?.options ?? {}),
472
+ ...(extra?.options ?? {}),
473
+ };
474
+
475
+ return Object.keys(options).length > 0 ? { options } : undefined;
476
+ }
477
+
478
+ /**
479
+ * Converts DeviceAction from actionSpace into MCP ToolDefinition
480
+ * This is the core logic that removes need for hardcoded tool definitions
481
+ */
482
+ export function generateToolsFromActionSpace(
483
+ actionSpace: ActionSpaceItem[],
484
+ getAgent: (args?: Record<string, unknown>) => Promise<BaseAgent>,
485
+ sanitizeArgs: (args: Record<string, unknown>) => Record<string, unknown> = (
486
+ args,
487
+ ) => args,
488
+ initArgSchema: ToolSchema = {},
489
+ initArgCliMetadata?: ToolCliMetadata,
490
+ ): ToolDefinition[] {
491
+ return actionSpace.map((action) => {
492
+ const schema = {
493
+ ...extractActionSchema(action.paramSchema as z.ZodTypeAny, action.name),
494
+ ...initArgSchema,
495
+ };
496
+
497
+ return {
498
+ name: action.name,
499
+ description: describeActionForMCP(action),
500
+ schema,
501
+ cli: initArgCliMetadata,
502
+ handler: async (args: Record<string, unknown>) => {
503
+ try {
504
+ const agent = await getAgent(args);
505
+ const normalizedArgs = normalizeActionArgs(
506
+ sanitizeArgs(args),
507
+ action.paramSchema,
508
+ );
509
+ let actionResult: unknown;
510
+
511
+ try {
512
+ actionResult = await executeAction(
513
+ agent,
514
+ action.name,
515
+ normalizedArgs,
516
+ );
517
+ } catch (error: unknown) {
518
+ const errorMessage = getErrorMessage(error);
519
+ console.error(
520
+ `Error executing action "${action.name}":`,
521
+ errorMessage,
522
+ );
523
+ // Return screenshot + warning instead of hard error,
524
+ // so the AI agent can see current state and decide to retry or adjust strategy
525
+ return await captureFailureResult(agent, action.name, errorMessage);
526
+ }
527
+
528
+ return await captureScreenshotResult(
529
+ agent,
530
+ action.name,
531
+ actionResult,
532
+ );
533
+ } catch (error: unknown) {
534
+ // Connection/agent errors are still hard errors
535
+ const errorMessage = getErrorMessage(error);
536
+ console.error(`Error in handler for "${action.name}":`, errorMessage);
537
+ return createErrorResult(
538
+ `Failed to get agent or execute action "${action.name}": ${errorMessage}`,
539
+ );
540
+ }
541
+ },
542
+ };
543
+ });
544
+ }
545
+
546
+ /**
547
+ * Generate common tools (screenshot, act)
548
+ */
549
+ export function generateCommonTools(
550
+ getAgent: (args?: Record<string, unknown>) => Promise<BaseAgent>,
551
+ initArgSchema: ToolSchema = {},
552
+ initArgCliMetadata?: ToolCliMetadata,
553
+ ): ToolDefinition[] {
554
+ return [
555
+ {
556
+ name: 'take_screenshot',
557
+ description: 'Capture screenshot of current page/screen',
558
+ schema: {
559
+ ...initArgSchema,
560
+ },
561
+ cli: initArgCliMetadata,
562
+ handler: async (
563
+ args: Record<string, unknown> = {},
564
+ ): Promise<ToolResult> => {
565
+ try {
566
+ const agent = await getAgent(args);
567
+ const screenshot = await agent.page?.screenshotBase64();
568
+ if (!screenshot) {
569
+ return createErrorResult('Screenshot not available');
570
+ }
571
+ const { mimeType, body } = parseBase64(screenshot);
572
+ return {
573
+ content: [{ type: 'image', data: body, mimeType }],
574
+ };
575
+ } catch (error: unknown) {
576
+ const errorMessage = getErrorMessage(error);
577
+ console.error('Error taking screenshot:', errorMessage);
578
+ return createErrorResult(
579
+ `Failed to capture screenshot: ${errorMessage}`,
580
+ );
581
+ }
582
+ },
583
+ },
584
+ {
585
+ name: 'act',
586
+ description:
587
+ 'Execute a natural language action. The AI will plan and perform multi-step operations in a single invocation, useful for transient UI interactions (e.g., Spotlight, dropdown menus) that disappear between separate commands.',
588
+ schema: {
589
+ prompt: z
590
+ .string()
591
+ .describe(
592
+ 'Natural language description of the action to perform, e.g. "press Command+Space, type Safari, press Enter"',
593
+ ),
594
+ ...initArgSchema,
595
+ },
596
+ cli: mergeToolCliMetadata(undefined, initArgCliMetadata),
597
+ handler: async (
598
+ args: Record<string, unknown> = {},
599
+ ): Promise<ToolResult> => {
600
+ const prompt = args.prompt as string;
601
+ try {
602
+ const agent = await getAgent(args);
603
+ if (!agent.aiAction) {
604
+ return createErrorResult('act is not supported by this agent');
605
+ }
606
+ const result = await agent.aiAction(prompt, { deepThink: false });
607
+ const screenshotResult = await captureScreenshotResult(agent, 'act');
608
+ if (result) {
609
+ const message =
610
+ typeof result === 'string' ? result : JSON.stringify(result);
611
+ screenshotResult.content.unshift({
612
+ type: 'text',
613
+ text: `Task finished, message: ${message}`,
614
+ });
615
+ }
616
+ return screenshotResult;
617
+ } catch (error: unknown) {
618
+ const errorMessage = getErrorMessage(error);
619
+ console.error('Error executing act:', errorMessage);
620
+ return createErrorResult(`Failed to execute act: ${errorMessage}`);
621
+ }
622
+ },
623
+ },
624
+ {
625
+ name: 'assert',
626
+ description:
627
+ 'Assert a natural language statement against the current page/screen.',
628
+ schema: {
629
+ prompt: z
630
+ .string()
631
+ .describe(
632
+ 'Natural language assertion to verify, e.g. "there is a login button visible"',
633
+ ),
634
+ ...initArgSchema,
635
+ },
636
+ cli: mergeToolCliMetadata(undefined, initArgCliMetadata),
637
+ handler: async (
638
+ args: Record<string, unknown> = {},
639
+ ): Promise<ToolResult> => {
640
+ const prompt = args.prompt as string;
641
+ try {
642
+ const agent = await getAgent(args);
643
+ if (!agent.aiAssert) {
644
+ return createErrorResult('assert is not supported by this agent');
645
+ }
646
+ await agent.aiAssert(prompt);
647
+ return {
648
+ content: [{ type: 'text', text: 'Assertion passed.' }],
649
+ };
650
+ } catch (error: unknown) {
651
+ const errorMessage = getErrorMessage(error);
652
+ console.error('Error executing assert:', errorMessage);
653
+ return createErrorResult(`Failed to execute assert: ${errorMessage}`);
654
+ }
655
+ },
656
+ },
657
+ ];
658
+ }