@ww_nero/audio 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +388 -0
  2. package/package.json +15 -0
package/index.js ADDED
@@ -0,0 +1,388 @@
1
+ #!/usr/bin/env node
2
+
3
+ const https = require('https');
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const { Server } = require('@modelcontextprotocol/sdk/server/index.js');
7
+ const { StdioServerTransport } = require('@modelcontextprotocol/sdk/server/stdio.js');
8
+ const {
9
+ CallToolRequestSchema,
10
+ ListToolsRequestSchema,
11
+ } = require('@modelcontextprotocol/sdk/types.js');
12
+
13
+ const BASE_URL = (process.env.AUDIO_BASE_URL || '').replace(/\/+$/, '');
14
+ const API_KEY = process.env.AUDIO_API_KEY || '';
15
+
16
+ const TTS_MODEL = 'gpt-4o-mini-tts';
17
+ const ASR_MODEL = 'whisper-1';
18
+
19
+ const VOICE_MAP = {
20
+ female: 'marin',
21
+ male: 'cedar',
22
+ };
23
+
24
+ const MAX_FILE_SIZE = 20 * 1024 * 1024; // 20MB
25
+
26
+ const convertPath = (filePath) => {
27
+ const wslMatch = filePath.match(/^\/mnt\/([a-zA-Z])\/(.*)$/);
28
+ if (wslMatch) {
29
+ const drive = wslMatch[1].toUpperCase();
30
+ const rest = wslMatch[2].replace(/\//g, '\\');
31
+ return `${drive}:\\${rest}`;
32
+ }
33
+
34
+ const winMatch = filePath.match(/^([a-zA-Z]):\\(.*)$/);
35
+ if (winMatch) {
36
+ const drive = winMatch[1].toLowerCase();
37
+ const rest = winMatch[2].replace(/\\/g, '/');
38
+ return `/mnt/${drive}/${rest}`;
39
+ }
40
+
41
+ return null;
42
+ };
43
+
44
+ const resolveExistingPath = (inputPath) => {
45
+ if (fs.existsSync(inputPath)) {
46
+ return inputPath;
47
+ }
48
+
49
+ const converted = convertPath(inputPath);
50
+ if (converted && fs.existsSync(converted)) {
51
+ return converted;
52
+ }
53
+
54
+ return null;
55
+ };
56
+
57
+ const resolveWorkingDirectory = (rawPath) => {
58
+ if (!rawPath || typeof rawPath !== 'string') {
59
+ throw new Error('必须提供 working_directory');
60
+ }
61
+
62
+ if (!path.isAbsolute(rawPath)) {
63
+ throw new Error('working_directory 必须是绝对路径');
64
+ }
65
+
66
+ const resolved = resolveExistingPath(rawPath);
67
+ if (!resolved) {
68
+ throw new Error('工作目录不存在,请确认路径是否正确');
69
+ }
70
+
71
+ const stats = fs.statSync(resolved);
72
+ if (!stats.isDirectory()) {
73
+ throw new Error('working_directory 必须是文件夹路径');
74
+ }
75
+
76
+ return resolved;
77
+ };
78
+
79
+ const resolveAudioFile = (workingDir, rawPath) => {
80
+ if (!rawPath || typeof rawPath !== 'string') {
81
+ throw new Error('音频文件路径不能为空');
82
+ }
83
+
84
+ const trimmed = rawPath.trim();
85
+ if (!trimmed) {
86
+ throw new Error('音频文件路径不能为空');
87
+ }
88
+
89
+ const absolutePath = path.isAbsolute(trimmed)
90
+ ? trimmed
91
+ : path.join(workingDir, trimmed);
92
+
93
+ const resolved = resolveExistingPath(absolutePath);
94
+ if (!resolved) {
95
+ throw new Error(`音频文件不存在: ${trimmed}`);
96
+ }
97
+
98
+ const stats = fs.statSync(resolved);
99
+ if (!stats.isFile()) {
100
+ throw new Error(`音频路径不是文件: ${trimmed}`);
101
+ }
102
+
103
+ if (stats.size > MAX_FILE_SIZE) {
104
+ throw new Error(`音频文件大小超过限制 (最大 20MB),当前大小: ${(stats.size / 1024 / 1024).toFixed(2)}MB`);
105
+ }
106
+
107
+ return resolved;
108
+ };
109
+
110
+ const postTTS = (text, voice, speed, instructions) =>
111
+ new Promise((resolve, reject) => {
112
+ if (!BASE_URL) {
113
+ reject(new Error('未配置 AUDIO_BASE_URL 环境变量'));
114
+ return;
115
+ }
116
+ if (!API_KEY) {
117
+ reject(new Error('未配置 AUDIO_API_KEY 环境变量'));
118
+ return;
119
+ }
120
+
121
+ const urlObj = new URL(`${BASE_URL}/v1/audio/speech`);
122
+ const body = {
123
+ model: TTS_MODEL,
124
+ voice,
125
+ input: text,
126
+ speed,
127
+ };
128
+
129
+ if (instructions) {
130
+ body.instructions = instructions;
131
+ }
132
+
133
+ const data = JSON.stringify(body);
134
+
135
+ const options = {
136
+ hostname: urlObj.hostname,
137
+ port: urlObj.port || 443,
138
+ path: urlObj.pathname,
139
+ method: 'POST',
140
+ headers: {
141
+ 'Authorization': `Bearer ${API_KEY}`,
142
+ 'Content-Type': 'application/json',
143
+ 'Content-Length': Buffer.byteLength(data),
144
+ },
145
+ };
146
+
147
+ const req = https.request(options, (res) => {
148
+ const chunks = [];
149
+
150
+ res.on('data', (chunk) => {
151
+ chunks.push(chunk);
152
+ });
153
+
154
+ res.on('end', () => {
155
+ if (res.statusCode !== 200) {
156
+ const respData = Buffer.concat(chunks).toString();
157
+ reject(new Error(`请求失败,状态码: ${res.statusCode},响应: ${respData}`));
158
+ return;
159
+ }
160
+
161
+ resolve(Buffer.concat(chunks));
162
+ });
163
+ });
164
+
165
+ req.on('error', (err) => reject(new Error(`请求错误: ${err.message}`)));
166
+
167
+ req.write(data);
168
+ req.end();
169
+ });
170
+
171
+ const postASR = (filePath) =>
172
+ new Promise((resolve, reject) => {
173
+ if (!BASE_URL) {
174
+ reject(new Error('未配置 AUDIO_BASE_URL 环境变量'));
175
+ return;
176
+ }
177
+ if (!API_KEY) {
178
+ reject(new Error('未配置 AUDIO_API_KEY 环境变量'));
179
+ return;
180
+ }
181
+
182
+ const urlObj = new URL(`${BASE_URL}/v1/audio/transcriptions`);
183
+ const boundary = `----FormBoundary${Date.now()}`;
184
+ const fileName = path.basename(filePath);
185
+ const fileContent = fs.readFileSync(filePath);
186
+
187
+ const formParts = [];
188
+
189
+ // model field
190
+ formParts.push(
191
+ `--${boundary}\r\n`,
192
+ `Content-Disposition: form-data; name="model"\r\n\r\n`,
193
+ `${ASR_MODEL}\r\n`
194
+ );
195
+
196
+ // response_format field
197
+ formParts.push(
198
+ `--${boundary}\r\n`,
199
+ `Content-Disposition: form-data; name="response_format"\r\n\r\n`,
200
+ `srt\r\n`
201
+ );
202
+
203
+ // file field
204
+ formParts.push(
205
+ `--${boundary}\r\n`,
206
+ `Content-Disposition: form-data; name="file"; filename="${fileName}"\r\n`,
207
+ `Content-Type: application/octet-stream\r\n\r\n`
208
+ );
209
+
210
+ const formHeader = Buffer.from(formParts.join(''));
211
+ const formFooter = Buffer.from(`\r\n--${boundary}--\r\n`);
212
+ const body = Buffer.concat([formHeader, fileContent, formFooter]);
213
+
214
+ const options = {
215
+ hostname: urlObj.hostname,
216
+ port: urlObj.port || 443,
217
+ path: urlObj.pathname,
218
+ method: 'POST',
219
+ headers: {
220
+ 'Authorization': `Bearer ${API_KEY}`,
221
+ 'Content-Type': `multipart/form-data; boundary=${boundary}`,
222
+ 'Content-Length': body.length,
223
+ },
224
+ };
225
+
226
+ const req = https.request(options, (res) => {
227
+ let respData = '';
228
+
229
+ res.on('data', (chunk) => {
230
+ respData += chunk;
231
+ });
232
+
233
+ res.on('end', () => {
234
+ if (res.statusCode !== 200) {
235
+ reject(new Error(`请求失败,状态码: ${res.statusCode},响应: ${respData}`));
236
+ return;
237
+ }
238
+
239
+ resolve(respData);
240
+ });
241
+ });
242
+
243
+ req.on('error', (err) => reject(new Error(`请求错误: ${err.message}`)));
244
+
245
+ req.write(body);
246
+ req.end();
247
+ });
248
+
249
+ const tts = async ({ text, gender, speed, instructions, working_directory }) => {
250
+ const workingDir = resolveWorkingDirectory(working_directory);
251
+
252
+ const voice = VOICE_MAP[gender] || VOICE_MAP.female;
253
+ const audioBuffer = await postTTS(text, voice, speed, instructions);
254
+
255
+ const filename = `audio-${Date.now()}.mp3`;
256
+ const outputPath = path.join(workingDir, filename);
257
+ fs.writeFileSync(outputPath, audioBuffer);
258
+
259
+ return filename;
260
+ };
261
+
262
+ const asr = async ({ audio_file, working_directory }) => {
263
+ const workingDir = resolveWorkingDirectory(working_directory);
264
+ const audioPath = resolveAudioFile(workingDir, audio_file);
265
+
266
+ const srtContent = await postASR(audioPath);
267
+
268
+ const baseName = path.basename(audio_file, path.extname(audio_file));
269
+ const filename = `${baseName}.srt`;
270
+ const outputPath = path.join(workingDir, filename);
271
+ fs.writeFileSync(outputPath, srtContent);
272
+
273
+ return filename;
274
+ };
275
+
276
+ const server = new Server(
277
+ {
278
+ name: 'audio',
279
+ version: '1.0.1',
280
+ },
281
+ {
282
+ capabilities: {
283
+ tools: {},
284
+ },
285
+ }
286
+ );
287
+
288
+ server.setRequestHandler(ListToolsRequestSchema, async () => ({
289
+ tools: [
290
+ {
291
+ name: 'tts',
292
+ description: '语音合成工具,将文本转换为音频文件。',
293
+ inputSchema: {
294
+ type: 'object',
295
+ properties: {
296
+ working_directory: {
297
+ type: 'string',
298
+ description: '工作目录的绝对路径,合成的音频文件将保存到此目录',
299
+ },
300
+ text: {
301
+ type: 'string',
302
+ description: '需要合成语音的文本内容',
303
+ },
304
+ gender: {
305
+ type: 'string',
306
+ description: '语音性别,female 为女声,male 为男声,默认为 female',
307
+ enum: ['female', 'male'],
308
+ },
309
+ speed: {
310
+ type: 'number',
311
+ description: '语速,可调区间为 0.25-4,数字越大语速越快,默认为 1',
312
+ },
313
+ instructions: {
314
+ type: 'string',
315
+ description: '对语音语调的要求说明,如 "Speak in a cheerful and positive tone."',
316
+ },
317
+ },
318
+ required: ['working_directory', 'text'],
319
+ },
320
+ },
321
+ {
322
+ name: 'asr',
323
+ description: '语音识别工具,将音频文件转换为带时间戳的 SRT 字幕文件。',
324
+ inputSchema: {
325
+ type: 'object',
326
+ properties: {
327
+ working_directory: {
328
+ type: 'string',
329
+ description: '工作目录的绝对路径,识别结果将保存到此目录',
330
+ },
331
+ audio_file: {
332
+ type: 'string',
333
+ description: '音频文件的相对路径(相对于工作目录),支持 mp3/wav 格式,文件大小不超过 20MB',
334
+ },
335
+ },
336
+ required: ['working_directory', 'audio_file'],
337
+ },
338
+ },
339
+ ],
340
+ }));
341
+
342
+ server.setRequestHandler(CallToolRequestSchema, async (request) => {
343
+ const { name, arguments: args } = request.params;
344
+
345
+ try {
346
+ if (name === 'tts') {
347
+ const { working_directory, text, gender, speed, instructions } = args;
348
+ if (!working_directory || !text) {
349
+ throw new Error('必须同时提供 working_directory 和 text 参数');
350
+ }
351
+
352
+ const validSpeed = typeof speed === 'number' ? Math.max(0.25, Math.min(4, speed)) : 1;
353
+
354
+ const filename = await tts({
355
+ text,
356
+ gender: gender || 'female',
357
+ speed: validSpeed,
358
+ instructions,
359
+ working_directory,
360
+ });
361
+ return { content: [{ type: 'text', text: `已保存到工作目录下,文件名为:${filename}` }] };
362
+ }
363
+
364
+ if (name === 'asr') {
365
+ const { working_directory, audio_file } = args;
366
+ if (!working_directory || !audio_file) {
367
+ throw new Error('必须同时提供 working_directory 和 audio_file 参数');
368
+ }
369
+ const filename = await asr({ audio_file, working_directory });
370
+ return { content: [{ type: 'text', text: `已保存到工作目录下,文件名为:${filename}` }] };
371
+ }
372
+
373
+ return {
374
+ content: [{ type: 'text', text: `未知工具: ${name}` }],
375
+ isError: true,
376
+ };
377
+ } catch (error) {
378
+ const message = error?.message || '未知错误';
379
+ return { content: [{ type: 'text', text: message }], isError: true };
380
+ }
381
+ });
382
+
383
+ const main = async () => {
384
+ const transport = new StdioServerTransport();
385
+ await server.connect(transport);
386
+ };
387
+
388
+ main().catch(console.error);
package/package.json ADDED
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "@ww_nero/audio",
3
+ "version": "1.0.1",
4
+ "description": "MCP server for TTS and ASR using OpenAI compatible API",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "audio": "index.js"
8
+ },
9
+ "files": [
10
+ "index.js"
11
+ ],
12
+ "dependencies": {
13
+ "@modelcontextprotocol/sdk": "^1.22.0"
14
+ }
15
+ }