@exreve/exk 1.0.60 → 1.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,326 +1,65 @@
1
1
  /**
2
- * Module MCP Server
2
+ * Module MCP Server (Claude Agent SDK)
3
3
  *
4
- * Provides built-in tools like analyze_image for vision capabilities
5
- * and send_file for displaying files to the user in chat.
4
+ * Thin wrapper that creates Claude Agent SDK MCP tools using the shared
5
+ * tool executors from sharedTools.ts. This ensures all backends share the
6
+ * same tool logic.
6
7
  */
7
8
  import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
8
9
  import { z } from 'zod';
9
- import * as fs from 'fs';
10
- import * as path from 'path';
11
- import * as os from 'os';
12
- import sharp from 'sharp';
13
- import { getOpenrouterApiKey, getApiUrl } from './agentSession.js';
14
- const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
15
- /** Comprehensive MIME type map for file extension detection */
16
- const MIME_MAP = {
17
- // Images
18
- png: 'image/png', jpg: 'image/jpeg', jpeg: 'image/jpeg',
19
- gif: 'image/gif', webp: 'image/webp', bmp: 'image/bmp',
20
- svg: 'image/svg+xml', ico: 'image/x-icon', tiff: 'image/tiff', tif: 'image/tiff',
21
- avif: 'image/avif',
22
- // Audio
23
- mp3: 'audio/mpeg', wav: 'audio/wav', ogg: 'audio/ogg',
24
- m4a: 'audio/mp4', flac: 'audio/flac', aac: 'audio/aac',
25
- wma: 'audio/x-ms-wma', opus: 'audio/opus',
26
- // Video
27
- mp4: 'video/mp4', webm: 'video/webm', mkv: 'video/x-matroska',
28
- avi: 'video/x-msvideo', mov: 'video/quicktime', wmv: 'video/x-ms-wmv',
29
- m4v: 'video/mp4', '3gp': 'video/3gpp',
30
- // Documents
31
- pdf: 'application/pdf',
32
- // Text / Code
33
- txt: 'text/plain', md: 'text/markdown', csv: 'text/csv',
34
- json: 'application/json', xml: 'text/xml', yaml: 'text/yaml', yml: 'text/yaml',
35
- toml: 'text/plain', html: 'text/html', htm: 'text/html',
36
- css: 'text/css', scss: 'text/x-scss', less: 'text/x-less',
37
- js: 'text/javascript', mjs: 'text/javascript', cjs: 'text/javascript',
38
- ts: 'text/typescript', tsx: 'text/typescript',
39
- jsx: 'text/javascript', py: 'text/x-python', rs: 'text/x-rust',
40
- go: 'text/x-go', java: 'text/x-java', c: 'text/x-c', cpp: 'text/x-c++',
41
- h: 'text/x-c', hpp: 'text/x-c++', rb: 'text/x-ruby', php: 'text/x-php',
42
- sh: 'text/x-shellscript', bash: 'text/x-shellscript', zsh: 'text/x-shellscript',
43
- sql: 'text/x-sql', graphql: 'text/graphql', vue: 'text/x-vue',
44
- svelte: 'text/x-svelte', dart: 'text/x-dart', swift: 'text/x-swift',
45
- kt: 'text/x-kotlin', scala: 'text/x-scala', lua: 'text/x-lua',
46
- r: 'text/x-r', dockerfile: 'text/x-dockerfile',
47
- };
10
+ import { executeAnalyzeImage, executeSendFile, executeBrowserQuery } from './sharedTools.js';
48
11
  /**
49
- * Get MIME type from file extension
12
+ * Build the shared config from MCP server config.
50
13
  */
51
- function getMimeType(filePath) {
52
- const ext = path.extname(filePath).toLowerCase().replace('.', '');
53
- return MIME_MAP[ext] || 'application/octet-stream';
54
- }
55
- const IMAGE_EXTENSIONS = new Set(['png', 'jpg', 'jpeg', 'gif', 'webp', 'bmp', 'tiff', 'tif', 'avif']);
56
- const MAX_IMAGE_DIMENSION = 2048; // max width or height in pixels
57
- const MAX_IMAGE_BYTES = 2 * 1024 * 1024; // 2 MB target after compression
58
- function isImageFile(filePath) {
59
- const ext = path.extname(filePath).toLowerCase().replace('.', '');
60
- return IMAGE_EXTENSIONS.has(ext);
61
- }
62
- /**
63
- * Compress and resize an image buffer using sharp.
64
- * - Resizes so neither dimension exceeds MAX_IMAGE_DIMENSION (fit: inside, no upscale)
65
- * - Converts to JPEG quality 80 (or WebP for non-photo sources)
66
- * - If already small enough, returns the original buffer unchanged
67
- */
68
- async function compressImage(buf) {
69
- const metadata = await sharp(buf).metadata();
70
- const { width = 0, height = 0, size = 0 } = metadata;
71
- // If already under limits, keep as-is
72
- const needsResize = width > MAX_IMAGE_DIMENSION || height > MAX_IMAGE_DIMENSION;
73
- const needsCompress = (size || buf.length) > MAX_IMAGE_BYTES;
74
- if (!needsResize && !needsCompress) {
75
- // Keep original format
76
- const fmt = metadata.format || 'jpeg';
77
- const mime = fmt === 'png' ? 'image/png' : fmt === 'webp' ? 'image/webp' : 'image/jpeg';
78
- return { data: buf, mime };
79
- }
80
- let pipeline = sharp(buf)
81
- .resize(MAX_IMAGE_DIMENSION, MAX_IMAGE_DIMENSION, { fit: 'inside', withoutEnlargement: true });
82
- // Convert to JPEG for best compression on photos; use WebP for PNGs with alpha
83
- const hasAlpha = metadata.hasAlpha;
84
- if (hasAlpha) {
85
- pipeline = pipeline.webp({ quality: 80 });
86
- return { data: await pipeline.toBuffer(), mime: 'image/webp' };
87
- }
88
- pipeline = pipeline.jpeg({ quality: 80 });
89
- return { data: await pipeline.toBuffer(), mime: 'image/jpeg' };
90
- }
91
- /**
92
- * Convert a file to a data URI (base64 encoded).
93
- * Images are compressed and resized before encoding.
94
- */
95
- async function fileToDataUri(filePath) {
96
- try {
97
- const buf = fs.readFileSync(filePath);
98
- if (isImageFile(filePath)) {
99
- const { data, mime } = await compressImage(buf);
100
- return `data:${mime};base64,${data.toString('base64')}`;
101
- }
102
- const mime = getMimeType(filePath);
103
- return `data:${mime};base64,${buf.toString('base64')}`;
104
- }
105
- catch {
106
- return null;
107
- }
108
- }
109
- /**
110
- * Create the analyze_image tool for vision capabilities via OpenRouter
111
- */
112
- function createAnalyzeImageTool(attachmentDir) {
113
- const workDir = attachmentDir || os.tmpdir();
114
- return tool('analyze_image', 'Analyze one or more image files using a vision model. Pass the path to an image file and a question. Returns a detailed text answer about the image content.', {
115
- image_path: z.string().describe('Path to the image file to analyze (can be relative to working directory, e.g. "attachments/photo.jpg")'),
116
- question: z.string().describe('Question or instruction about the image. Be specific about what you want to know.'),
117
- }, async (args) => {
118
- const apiKey = getOpenrouterApiKey();
119
- if (!apiKey) {
120
- return { content: [{ type: 'text', text: 'Error: OPENROUTER_API_KEY not configured.' }], isError: true };
121
- }
122
- try {
123
- // Resolve relative paths against the attachment dir
124
- const imagePath = path.resolve(workDir, args.image_path);
125
- if (!fs.existsSync(imagePath)) {
126
- return { content: [{ type: 'text', text: `Error: Image file not found: ${args.image_path}` }], isError: true };
127
- }
128
- const dataUri = await fileToDataUri(imagePath);
129
- if (!dataUri) {
130
- return { content: [{ type: 'text', text: `Error: Could not read image file: ${args.image_path}` }], isError: true };
131
- }
132
- const OPENROUTER_ENDPOINT = 'https://openrouter.ai/api/v1/chat/completions';
133
- const OPENROUTER_MODEL = 'qwen/qwen3.5-27b';
134
- const res = await fetch(OPENROUTER_ENDPOINT, {
135
- method: 'POST',
136
- headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}` },
137
- body: JSON.stringify({
138
- model: OPENROUTER_MODEL,
139
- messages: [{ role: 'user', content: [
140
- { type: 'text', text: args.question },
141
- { type: 'image_url', image_url: { url: dataUri } },
142
- ] }],
143
- }),
144
- signal: AbortSignal.timeout(60_000),
145
- });
146
- const raw = await res.text();
147
- if (!res.ok) {
148
- return { content: [{ type: 'text', text: `Error from vision API (${res.status}): ${raw.slice(0, 500)}` }], isError: true };
149
- }
150
- const parsed = JSON.parse(raw);
151
- return { content: [{ type: 'text', text: parsed.choices?.[0]?.message?.content || raw }] };
152
- }
153
- catch (error) {
154
- return { content: [{ type: 'text', text: `Error analyzing image: ${error.message}` }], isError: true };
155
- }
156
- });
157
- }
158
- /**
159
- * Create the send_file tool for displaying files to the user in chat.
160
- * Supports images, audio, video, PDFs, code, and other files.
161
- */
162
- function createSendFileTool(attachmentDir) {
163
- const workDir = attachmentDir || os.tmpdir();
164
- return tool('send_file', 'Send a file to the user for display in chat. Supports images (shown inline), audio/video (with player), PDFs, code files (syntax highlighted), and other files (download link). Use file_path for local files or data for base64-encoded content.', {
165
- file_path: z.string().optional().describe('Path to a local file on this device (absolute or relative to project directory)'),
166
- data: z.string().optional().describe('Base64-encoded file content (without data: prefix)'),
167
- mime_type: z.string().optional().describe('MIME type of the file (required when using data, auto-detected from file_path)'),
168
- filename: z.string().optional().describe('Display name for the file (auto-detected from file_path)'),
169
- }, async (args) => {
170
- try {
171
- let dataUri;
172
- let mimeType;
173
- let fileName;
174
- let fileSize;
175
- if (args.file_path) {
176
- // Read from local file
177
- const filePath = path.resolve(workDir, args.file_path);
178
- if (!fs.existsSync(filePath)) {
179
- return { content: [{ type: 'text', text: `Error: File not found: ${args.file_path}` }], isError: true };
180
- }
181
- const stat = fs.statSync(filePath);
182
- fileSize = stat.size;
183
- if (fileSize > MAX_FILE_SIZE) {
184
- return { content: [{ type: 'text', text: `Error: File too large (${(fileSize / (1024 * 1024)).toFixed(1)} MB). Maximum size is ${MAX_FILE_SIZE / (1024 * 1024)} MB.` }], isError: true };
185
- }
186
- const buf = fs.readFileSync(filePath);
187
- mimeType = args.mime_type || getMimeType(filePath);
188
- fileName = args.filename || path.basename(filePath);
189
- dataUri = `data:${mimeType};base64,${buf.toString('base64')}`;
190
- }
191
- else if (args.data) {
192
- // Use provided base64 data
193
- mimeType = args.mime_type || 'application/octet-stream';
194
- fileName = args.filename || 'file';
195
- const rawBase64 = args.data.replace(/^data:[^;]+;base64,/, '');
196
- fileSize = Math.floor(rawBase64.length * 0.75);
197
- if (fileSize > MAX_FILE_SIZE) {
198
- return { content: [{ type: 'text', text: `Error: Data too large (~${(fileSize / (1024 * 1024)).toFixed(1)} MB). Maximum size is ${MAX_FILE_SIZE / (1024 * 1024)} MB.` }], isError: true };
199
- }
200
- dataUri = `data:${mimeType};base64,${rawBase64}`;
201
- }
202
- else {
203
- return { content: [{ type: 'text', text: 'Error: Either file_path or data must be provided.' }], isError: true };
204
- }
205
- // Return structured result that the frontend will detect
206
- const result = JSON.stringify({
207
- _type: 'send_file',
208
- data: dataUri,
209
- mime_type: mimeType,
210
- filename: fileName,
211
- size: fileSize,
212
- });
213
- return { content: [{ type: 'text', text: result }] };
214
- }
215
- catch (error) {
216
- return { content: [{ type: 'text', text: `Error sending file: ${error.message}` }], isError: true };
217
- }
218
- });
219
- }
220
- /**
221
- * Create the browser_query tool for web automation via the backend.
222
- * The agent can fire multiple queries concurrently — screenshots stream
223
- * to the frontend independently while the tool blocks until completion.
224
- */
225
- function createBrowserQueryTool(config) {
226
- return tool('browser_query', 'Launch a headless browser to automate web tasks such as searching, reading pages, filling forms, extracting data, etc. ' +
227
- 'Returns the answer, optionally structured data, and step count. ' +
228
- 'IMPORTANT: This tool is slow (30-120 seconds per query). You CAN and SHOULD call browser_query multiple times concurrently ' +
229
- '— the browser handles each in a separate session. While waiting for results, continue with other work (file edits, analysis, etc.). ' +
230
- 'Do NOT wait for one browser query to finish before starting another if you need multiple lookups.', {
231
- query: z.string().describe('Natural language task for the browser agent (e.g. "Go to google.com and search for the price of Bitcoin")'),
232
- schema: z.string().optional().describe('JSON schema for structured output, as a JSON string (e.g. \'{"type":"object","properties":{"price":{"type":"number"}}}\')'),
233
- maxSteps: z.number().optional().describe('Max automation steps, default 20. Use lower values for simple tasks.'),
234
- country: z.string().optional().describe('2-letter country code for proxy and locale (e.g. "US", "GB", "DE"). Uses direct connection if omitted.'),
235
- mobile: z.boolean().optional().describe('If true, use mobile viewport (390x844 — iPhone 14 dimensions) instead of desktop.'),
236
- }, async (args) => {
237
- const apiUrl = getApiUrl();
238
- // Read device ID for CLI auth
239
- let deviceId = '';
240
- try {
241
- const deviceIdPath = path.join(os.homedir(), '.talk-to-code', 'device-id.json');
242
- const data = fs.readFileSync(deviceIdPath, 'utf-8');
243
- deviceId = JSON.parse(data).deviceId || '';
244
- }
245
- catch {
246
- // No device ID file — will still work if backend has relaxed auth
247
- }
248
- try {
249
- const body = {
250
- query: args.query,
251
- maxSteps: args.maxSteps || 20,
252
- };
253
- if (args.schema) {
254
- try {
255
- body.schema = JSON.parse(args.schema);
256
- }
257
- catch {
258
- body.schema = args.schema;
259
- }
260
- }
261
- if (args.country)
262
- body.country = args.country;
263
- if (args.mobile)
264
- body.mobile = args.mobile;
265
- if (config.sessionId)
266
- body.sessionId = config.sessionId;
267
- if (config.promptId)
268
- body.promptId = config.promptId;
269
- const res = await fetch(`${apiUrl}/api/browser/query`, {
270
- method: 'POST',
271
- headers: {
272
- 'Content-Type': 'application/json',
273
- ...(deviceId ? { 'X-Device-ID': deviceId } : {}),
274
- },
275
- body: JSON.stringify(body),
276
- signal: AbortSignal.timeout(10 * 60 * 1000), // 10 min timeout
277
- });
278
- const raw = await res.text();
279
- if (!res.ok) {
280
- return {
281
- content: [{ type: 'text', text: `Error from browser agent (${res.status}): ${raw.slice(0, 500)}` }],
282
- isError: true,
283
- };
284
- }
285
- const result = JSON.parse(raw);
286
- // Format a nice summary for the agent
287
- const summary = [
288
- `Browser query completed in ${result.steps} steps.`,
289
- result.answer ? `\n\n**Answer:** ${result.answer}` : '',
290
- result.data ? `\n\n**Structured Data:**\n\`\`\`json\n${JSON.stringify(result.data, null, 2)}\n\`\`\`` : '',
291
- result.logs?.length ? `\n\n**Log:**\n${result.logs.slice(-5).join('\n')}` : '',
292
- ].join('');
293
- return { content: [{ type: 'text', text: summary }] };
294
- }
295
- catch (error) {
296
- if (error.name === 'TimeoutError') {
297
- return {
298
- content: [{ type: 'text', text: 'Browser query timed out after 10 minutes. Try reducing maxSteps or simplifying the query.' }],
299
- isError: true,
300
- };
301
- }
302
- return {
303
- content: [{ type: 'text', text: `Error running browser query: ${error.message}` }],
304
- isError: true,
305
- };
306
- }
307
- });
14
+ function toSharedConfig(config) {
15
+ return {
16
+ attachmentDir: config.attachmentDir,
17
+ sessionId: config.sessionId,
18
+ promptId: config.promptId,
19
+ };
308
20
  }
309
21
  /**
310
- * Create the MCP server with built-in tools
22
+ * Create the MCP server with built-in tools.
311
23
  */
312
24
  export function createModuleMcpServer(config) {
313
- const tools = [];
314
- // Always add analyze_image tool (uses OpenRouter key from ai-config via backend)
315
- tools.push(createAnalyzeImageTool(config.attachmentDir));
316
- // Add send_file tool for displaying files to the user in chat
317
- tools.push(createSendFileTool(config.attachmentDir));
318
- // Add browser_query tool for web automation
319
- tools.push(createBrowserQueryTool(config));
320
- const server = createSdkMcpServer({
25
+ const sharedConfig = toSharedConfig(config);
26
+ const tools = [
27
+ tool('analyze_image', 'Analyze one or more image files using a vision model. Pass the path to an image file and a question. Returns a detailed text answer about the image content.', {
28
+ image_path: z.string().describe('Path to the image file to analyze (can be relative to working directory, e.g. "attachments/photo.jpg")'),
29
+ question: z.string().describe('Question or instruction about the image. Be specific about what you want to know.'),
30
+ }, async (args) => executeAnalyzeImage({ image_path: args.image_path, question: args.question }, sharedConfig)),
31
+ tool('send_file', 'Send a file to the user for display in chat. Supports images (shown inline), audio/video (with player), PDFs, code files (syntax highlighted), and other files (download link). Use file_path for local files or data for base64-encoded content.', {
32
+ file_path: z.string().optional().describe('Path to a local file on this device (absolute or relative to project directory)'),
33
+ data: z.string().optional().describe('Base64-encoded file content (without data: prefix)'),
34
+ mime_type: z.string().optional().describe('MIME type of the file (required when using data, auto-detected from file_path)'),
35
+ filename: z.string().optional().describe('Display name for the file (auto-detected from file_path)'),
36
+ }, async (args) => executeSendFile({
37
+ file_path: args.file_path,
38
+ data: args.data,
39
+ mime_type: args.mime_type,
40
+ filename: args.filename,
41
+ }, sharedConfig)),
42
+ tool('browser_query', 'Launch a headless browser to automate web tasks such as searching, reading pages, filling forms, extracting data, etc. ' +
43
+ 'Returns the answer, optionally structured data, and step count. ' +
44
+ 'IMPORTANT: This tool is slow (30-120 seconds per query). You CAN and SHOULD call browser_query multiple times concurrently ' +
45
+ '— the browser handles each in a separate session. While waiting for results, continue with other work (file edits, analysis, etc.). ' +
46
+ 'Do NOT wait for one browser query to finish before starting another if you need multiple lookups.', {
47
+ query: z.string().describe('Natural language task for the browser agent (e.g. "Go to google.com and search for the price of Bitcoin")'),
48
+ schema: z.string().optional().describe('JSON schema for structured output, as a JSON string (e.g. \'{"type":"object","properties":{"price":{"type":"number"}}}\')'),
49
+ maxSteps: z.number().optional().describe('Max automation steps, default 20. Use lower values for simple tasks.'),
50
+ country: z.string().optional().describe('2-letter country code for proxy and locale (e.g. "US", "GB", "DE"). Uses direct connection if omitted.'),
51
+ mobile: z.boolean().optional().describe('If true, use mobile viewport (390x844 — iPhone 14 dimensions) instead of desktop.'),
52
+ }, async (args) => executeBrowserQuery({
53
+ query: args.query,
54
+ schema: args.schema,
55
+ maxSteps: args.maxSteps,
56
+ country: args.country,
57
+ mobile: args.mobile,
58
+ }, sharedConfig)),
59
+ ];
60
+ return createSdkMcpServer({
321
61
  name: 'claude-voice-modules',
322
62
  version: '1.0.0',
323
- tools
63
+ tools,
324
64
  });
325
- return server;
326
65
  }