@ww_nero/media 1.0.9 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Media MCP Server
2
2
 
3
- 媒体处理 MCP 服务,提供语音识别等功能。
3
+ 媒体处理 MCP 服务,提供语音识别和语音合成功能。
4
4
 
5
5
  ## 功能
6
6
 
@@ -18,13 +18,27 @@
18
18
  - 超出限制需要外部分段处理后逐段识别
19
19
 
20
20
  **输出:**
21
- - 识别结果保存到工作目录下的 `transcribe.srt` 文件
21
+ - 识别结果保存到工作目录下的 `asr_<timestamp>.srt` 文件
22
+
23
+ ### tts - 语音合成
24
+
25
+ 将文本转换为音频文件。
26
+
27
+ **参数:**
28
+ - `working_directory`: 工作目录的绝对路径,合成的音频文件将保存到此目录
29
+ - `text`: 需要合成语音的文本内容
30
+ - `voice`: (可选)音色模型,默认 `sambert-zhimiao-emo-v1`
31
+ - `format`: (可选)输出音频格式,可选 mp3(默认)、wav、pcm
32
+ - `sample_rate`: (可选)采样率,默认 16000
33
+
34
+ **输出:**
35
+ - 合成结果保存到工作目录下的 `tts_<timestamp>.<format>` 文件
22
36
 
23
37
  ## 环境变量
24
38
 
25
39
  | 变量名 | 说明 | 必填 |
26
40
  |--------|------|------|
27
- | `ASR_API_KEY` | 阿里云 DashScope API Key | 是 |
41
+ | `DASHSCOPE_API_KEY` | 阿里云 DashScope API Key | 是 |
28
42
 
29
43
  ## 安装
30
44
 
@@ -41,7 +55,7 @@ npm install
41
55
  "command": "node",
42
56
  "args": ["/path/to/media/index.js"],
43
57
  "env": {
44
- "ASR_API_KEY": "your-api-key"
58
+ "DASHSCOPE_API_KEY": "your-api-key"
45
59
  }
46
60
  }
47
61
  }
package/index.js CHANGED
@@ -9,185 +9,10 @@ const {
9
9
  ListToolsRequestSchema,
10
10
  } = require('@modelcontextprotocol/sdk/types.js');
11
11
 
12
- const ASR_API_KEY = process.env.ASR_API_KEY || '';
13
- const ASR_UPLOAD_URL = 'http://fsheep.com:10808/upload';
14
- const ASR_SERVICE_URL = 'https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription';
12
+ const { asr, SUPPORTED_AUDIO_TYPES, MAX_FILE_SIZE_BYTES } = require('./utils/asr.js');
13
+ const { tts } = require('./utils/tts.js');
15
14
 
16
- const SUPPORTED_AUDIO_TYPES = ['.mp3', '.wav'];
17
- const MAX_FILE_SIZE_BYTES = 120 * 1024 * 1024; // 120MB
18
- const LANGUAGE_HINTS = ['zh', 'en', 'ja'];
19
-
20
- /**
21
- * 将毫秒转换为 SRT 时间格式 HH:MM:SS,mmm
22
- */
23
- const msToSrtTime = (ms) => {
24
- const hours = Math.floor(ms / 3600000);
25
- const minutes = Math.floor((ms % 3600000) / 60000);
26
- const seconds = Math.floor((ms % 60000) / 1000);
27
- const milliseconds = ms % 1000;
28
- return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')},${String(milliseconds).padStart(3, '0')}`;
29
- };
30
-
31
- /**
32
- * 将 ASR 识别结果转换为 SRT 字幕内容
33
- */
34
- const asrToSrt = (asrData) => {
35
- const srtEntries = [];
36
- let subtitleIndex = 1;
37
-
38
- for (const item of asrData) {
39
- const transcription = item.transcription || {};
40
- const transcripts = transcription.transcripts || [];
41
-
42
- for (const transcript of transcripts) {
43
- const sentences = transcript.sentences || [];
44
-
45
- for (const sentence of sentences) {
46
- const beginTime = sentence.begin_time || 0;
47
- const endTime = sentence.end_time || 0;
48
- const text = (sentence.text || '').trim();
49
-
50
- if (text) {
51
- const startStr = msToSrtTime(beginTime);
52
- const endStr = msToSrtTime(endTime);
53
-
54
- srtEntries.push(`${subtitleIndex}\n${startStr} --> ${endStr}\n${text}\n`);
55
- subtitleIndex++;
56
- }
57
- }
58
- }
59
- }
60
-
61
- return srtEntries.join('\n');
62
- };
63
-
64
- /**
65
- * 上传音频文件到服务器
66
- */
67
- const uploadAudio = async (uploadUrl, audioPath) => {
68
- const fileBuffer = fs.readFileSync(audioPath);
69
- const fileName = path.basename(audioPath);
70
-
71
- const formData = new FormData();
72
- formData.append('file', new Blob([fileBuffer]), fileName);
73
-
74
- const response = await fetch(uploadUrl, {
75
- method: 'POST',
76
- body: formData,
77
- });
78
-
79
- if (!response.ok) {
80
- throw new Error(`上传失败: ${response.status} - ${await response.text()}`);
81
- }
82
-
83
- const data = await response.json();
84
-
85
- if (!data.success || !data.data || !data.data.path) {
86
- throw new Error(`上传响应格式错误: ${JSON.stringify(data)}`);
87
- }
88
-
89
- return data.data.path;
90
- };
91
-
92
- /**
93
- * 根据上传接口 URL 和文件路径构建静态资源 URL
94
- */
95
- const getStaticUrl = (uploadUrl, filePath) => {
96
- const url = new URL(uploadUrl);
97
- return `${url.protocol}//${url.host}${filePath}`;
98
- };
99
-
100
- /**
101
- * 提交 ASR 转写任务
102
- */
103
- const submitAsrTask = async (fileUrls, apiKey) => {
104
- const response = await fetch(ASR_SERVICE_URL, {
105
- method: 'POST',
106
- headers: {
107
- 'Authorization': `Bearer ${apiKey}`,
108
- 'Content-Type': 'application/json',
109
- 'X-DashScope-Async': 'enable',
110
- },
111
- body: JSON.stringify({
112
- model: 'paraformer-v2',
113
- input: { file_urls: fileUrls },
114
- parameters: {
115
- channel_id: [0],
116
- language_hints: LANGUAGE_HINTS,
117
- },
118
- }),
119
- });
120
-
121
- if (!response.ok) {
122
- const errorText = await response.text();
123
- throw new Error(`ASR 任务提交失败: ${response.status} - ${errorText}`);
124
- }
125
-
126
- const data = await response.json();
127
-
128
- if (!data.output || !data.output.task_id) {
129
- throw new Error(`ASR 响应格式错误: ${JSON.stringify(data)}`);
130
- }
131
-
132
- return data.output.task_id;
133
- };
134
-
135
- /**
136
- * 轮询等待 ASR 任务完成
137
- */
138
- const waitForTaskComplete = async (taskId, apiKey, timeoutMs = 5 * 60 * 1000) => {
139
- const taskUrl = `https://dashscope.aliyuncs.com/api/v1/tasks/${taskId}`;
140
- const startTime = Date.now();
141
- const pollInterval = 500;
142
-
143
- while (Date.now() - startTime < timeoutMs) {
144
- const response = await fetch(taskUrl, {
145
- method: 'GET',
146
- headers: {
147
- 'Authorization': `Bearer ${apiKey}`,
148
- },
149
- });
150
-
151
- if (!response.ok) {
152
- throw new Error(`查询任务状态失败: ${response.status}`);
153
- }
154
-
155
- const data = await response.json();
156
- const status = data.output?.task_status;
157
-
158
- if (status === 'SUCCEEDED') {
159
- return data.output.results || [];
160
- } else if (status === 'FAILED') {
161
- throw new Error('ASR 识别任务失败');
162
- }
163
-
164
- await new Promise(resolve => setTimeout(resolve, pollInterval));
165
- }
166
-
167
- throw new Error('ASR 识别超时');
168
- };
169
-
170
- /**
171
- * 获取转写结果详情
172
- */
173
- const fetchTranscriptionResults = async (results) => {
174
- const allTranscriptions = [];
175
-
176
- for (const result of results) {
177
- if (result.subtask_status === 'SUCCEEDED' && result.transcription_url) {
178
- const response = await fetch(result.transcription_url);
179
- if (response.ok) {
180
- const transcriptionData = await response.json();
181
- allTranscriptions.push({
182
- file_url: result.file_url,
183
- transcription: transcriptionData,
184
- });
185
- }
186
- }
187
- }
188
-
189
- return allTranscriptions;
190
- };
15
+ const DASHSCOPE_API_KEY = process.env.DASHSCOPE_API_KEY || '';
191
16
 
192
17
  /**
193
18
  * 将 Windows 路径转换为 WSL 路径,或反之
@@ -286,59 +111,10 @@ const resolveAudioFile = (workingDir, rawPath) => {
286
111
  return resolved;
287
112
  };
288
113
 
289
- /**
290
- * ASR 语音识别
291
- */
292
- const asr = async ({ working_directory, audio_file }) => {
293
- // 验证环境变量
294
- if (!ASR_API_KEY) {
295
- throw new Error('请配置 ASR_API_KEY 环境变量');
296
- }
297
-
298
- const workingDir = resolveWorkingDirectory(working_directory);
299
- const audioPath = resolveAudioFile(workingDir, audio_file);
300
-
301
- // 检查文件大小
302
- const fileSize = fs.statSync(audioPath).size;
303
- if (fileSize > MAX_FILE_SIZE_BYTES) {
304
- throw new Error(
305
- `音频文件大小 ${(fileSize / 1024 / 1024).toFixed(2)}MB 超过限制(最大 120MB)。` +
306
- `请先对音频进行分段处理后再逐段识别。`
307
- );
308
- }
309
-
310
- // 1. 上传音频文件
311
- const filePath = await uploadAudio(ASR_UPLOAD_URL, audioPath);
312
-
313
- // 2. 构建静态资源 URL
314
- const audioUrl = getStaticUrl(ASR_UPLOAD_URL, filePath);
315
-
316
- // 3. 提交 ASR 任务
317
- const taskId = await submitAsrTask([audioUrl], ASR_API_KEY);
318
-
319
- // 4. 等待任务完成
320
- const results = await waitForTaskComplete(taskId, ASR_API_KEY);
321
-
322
- // 5. 获取转写结果
323
- const transcriptions = await fetchTranscriptionResults(results);
324
-
325
- if (!transcriptions.length) {
326
- throw new Error('未获取到识别结果');
327
- }
328
-
329
- // 6. 转换为 SRT 格式并保存
330
- const srtContent = asrToSrt(transcriptions);
331
- const filename = `asr_${Date.now()}.srt`;
332
- const outputPath = path.join(workingDir, filename);
333
- fs.writeFileSync(outputPath, srtContent, 'utf-8');
334
-
335
- return filename;
336
- };
337
-
338
114
  const server = new Server(
339
115
  {
340
116
  name: 'media',
341
- version: '1.0.9',
117
+ version: '1.0.10',
342
118
  },
343
119
  {
344
120
  capabilities: {
@@ -367,6 +143,36 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({
367
143
  required: ['working_directory', 'audio_file'],
368
144
  },
369
145
  },
146
+ {
147
+ name: 'tts',
148
+ description: '语音合成工具,将文本转换为音频文件。',
149
+ inputSchema: {
150
+ type: 'object',
151
+ properties: {
152
+ working_directory: {
153
+ type: 'string',
154
+ description: '工作目录的绝对路径,合成的音频文件将保存到此目录',
155
+ },
156
+ text: {
157
+ type: 'string',
158
+ description: '需要合成语音的文本内容',
159
+ },
160
+ voice: {
161
+ type: 'string',
162
+ description: '音色模型,可选值如 sambert-zhimiao-emo-v1(默认)、sambert-zhichu-v1 等',
163
+ },
164
+ format: {
165
+ type: 'string',
166
+ description: '输出音频格式,可选 mp3(默认)、wav、pcm',
167
+ },
168
+ sample_rate: {
169
+ type: 'number',
170
+ description: '采样率,默认 16000',
171
+ },
172
+ },
173
+ required: ['working_directory', 'text'],
174
+ },
175
+ },
370
176
  ],
371
177
  }));
372
178
 
@@ -379,10 +185,29 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
379
185
  if (!working_directory || !audio_file) {
380
186
  throw new Error('必须同时提供 working_directory 和 audio_file 参数');
381
187
  }
382
- const filename = await asr({ working_directory, audio_file });
188
+ const workingDir = resolveWorkingDirectory(working_directory);
189
+ const audioPath = resolveAudioFile(workingDir, audio_file);
190
+ const filename = await asr({ workingDir, audioPath, apiKey: DASHSCOPE_API_KEY });
383
191
  return { content: [{ type: 'text', text: `语音识别完成,字幕文件已保存到工作目录下:${filename}` }] };
384
192
  }
385
193
 
194
+ if (name === 'tts') {
195
+ const { working_directory, text, voice, format, sample_rate } = args;
196
+ if (!working_directory || !text) {
197
+ throw new Error('必须同时提供 working_directory 和 text 参数');
198
+ }
199
+ const workingDir = resolveWorkingDirectory(working_directory);
200
+ const filename = await tts({
201
+ workingDir,
202
+ text,
203
+ apiKey: DASHSCOPE_API_KEY,
204
+ voice: voice || 'sambert-zhimiao-emo-v1',
205
+ format: format || 'mp3',
206
+ sampleRate: sample_rate || 16000,
207
+ });
208
+ return { content: [{ type: 'text', text: `语音合成完成,音频文件已保存到工作目录下:${filename}` }] };
209
+ }
210
+
386
211
  return {
387
212
  content: [{ type: 'text', text: `未知工具: ${name}` }],
388
213
  isError: true,
package/package.json CHANGED
@@ -1,15 +1,18 @@
1
1
  {
2
2
  "name": "@ww_nero/media",
3
- "version": "1.0.9",
4
- "description": "MCP server for media processing, including ASR speech recognition",
3
+ "version": "1.0.10",
4
+ "description": "MCP server for media processing, including ASR speech recognition and TTS speech synthesis",
5
5
  "main": "index.js",
6
6
  "bin": {
7
7
  "media": "index.js"
8
8
  },
9
9
  "files": [
10
- "index.js"
10
+ "index.js",
11
+ "utils"
11
12
  ],
12
13
  "dependencies": {
13
- "@modelcontextprotocol/sdk": "^1.22.0"
14
+ "@modelcontextprotocol/sdk": "^1.22.0",
15
+ "uuid": "^13.0.0",
16
+ "ws": "^8.18.3"
14
17
  }
15
18
  }
package/utils/asr.js ADDED
@@ -0,0 +1,233 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+
4
+ const ASR_UPLOAD_URL = 'http://fsheep.com:10808/upload';
5
+ const ASR_SERVICE_URL = 'https://dashscope.aliyuncs.com/api/v1/services/audio/asr/transcription';
6
+
7
+ const SUPPORTED_AUDIO_TYPES = ['.mp3', '.wav'];
8
+ const MAX_FILE_SIZE_BYTES = 120 * 1024 * 1024; // 120MB
9
+ const LANGUAGE_HINTS = ['zh', 'en', 'ja'];
10
+
11
+ /**
12
+ * 将毫秒转换为 SRT 时间格式 HH:MM:SS,mmm
13
+ */
14
+ const msToSrtTime = (ms) => {
15
+ const hours = Math.floor(ms / 3600000);
16
+ const minutes = Math.floor((ms % 3600000) / 60000);
17
+ const seconds = Math.floor((ms % 60000) / 1000);
18
+ const milliseconds = ms % 1000;
19
+ return `${String(hours).padStart(2, '0')}:${String(minutes).padStart(2, '0')}:${String(seconds).padStart(2, '0')},${String(milliseconds).padStart(3, '0')}`;
20
+ };
21
+
22
+ /**
23
+ * 将 ASR 识别结果转换为 SRT 字幕内容
24
+ */
25
+ const asrToSrt = (asrData) => {
26
+ const srtEntries = [];
27
+ let subtitleIndex = 1;
28
+
29
+ for (const item of asrData) {
30
+ const transcription = item.transcription || {};
31
+ const transcripts = transcription.transcripts || [];
32
+
33
+ for (const transcript of transcripts) {
34
+ const sentences = transcript.sentences || [];
35
+
36
+ for (const sentence of sentences) {
37
+ const beginTime = sentence.begin_time || 0;
38
+ const endTime = sentence.end_time || 0;
39
+ const text = (sentence.text || '').trim();
40
+
41
+ if (text) {
42
+ const startStr = msToSrtTime(beginTime);
43
+ const endStr = msToSrtTime(endTime);
44
+
45
+ srtEntries.push(`${subtitleIndex}\n${startStr} --> ${endStr}\n${text}\n`);
46
+ subtitleIndex++;
47
+ }
48
+ }
49
+ }
50
+ }
51
+
52
+ return srtEntries.join('\n');
53
+ };
54
+
55
+ /**
56
+ * 上传音频文件到服务器
57
+ */
58
+ const uploadAudio = async (uploadUrl, audioPath) => {
59
+ const fileBuffer = fs.readFileSync(audioPath);
60
+ const fileName = path.basename(audioPath);
61
+
62
+ const formData = new FormData();
63
+ formData.append('file', new Blob([fileBuffer]), fileName);
64
+
65
+ const response = await fetch(uploadUrl, {
66
+ method: 'POST',
67
+ body: formData,
68
+ });
69
+
70
+ if (!response.ok) {
71
+ throw new Error(`上传失败: ${response.status} - ${await response.text()}`);
72
+ }
73
+
74
+ const data = await response.json();
75
+
76
+ if (!data.success || !data.data || !data.data.path) {
77
+ throw new Error(`上传响应格式错误: ${JSON.stringify(data)}`);
78
+ }
79
+
80
+ return data.data.path;
81
+ };
82
+
83
+ /**
84
+ * 根据上传接口 URL 和文件路径构建静态资源 URL
85
+ */
86
+ const getStaticUrl = (uploadUrl, filePath) => {
87
+ const url = new URL(uploadUrl);
88
+ return `${url.protocol}//${url.host}${filePath}`;
89
+ };
90
+
91
+ /**
92
+ * 提交 ASR 转写任务
93
+ */
94
+ const submitAsrTask = async (fileUrls, apiKey) => {
95
+ const response = await fetch(ASR_SERVICE_URL, {
96
+ method: 'POST',
97
+ headers: {
98
+ 'Authorization': `Bearer ${apiKey}`,
99
+ 'Content-Type': 'application/json',
100
+ 'X-DashScope-Async': 'enable',
101
+ },
102
+ body: JSON.stringify({
103
+ model: 'paraformer-v2',
104
+ input: { file_urls: fileUrls },
105
+ parameters: {
106
+ channel_id: [0],
107
+ language_hints: LANGUAGE_HINTS,
108
+ },
109
+ }),
110
+ });
111
+
112
+ if (!response.ok) {
113
+ const errorText = await response.text();
114
+ throw new Error(`ASR 任务提交失败: ${response.status} - ${errorText}`);
115
+ }
116
+
117
+ const data = await response.json();
118
+
119
+ if (!data.output || !data.output.task_id) {
120
+ throw new Error(`ASR 响应格式错误: ${JSON.stringify(data)}`);
121
+ }
122
+
123
+ return data.output.task_id;
124
+ };
125
+
126
+ /**
127
+ * 轮询等待 ASR 任务完成
128
+ */
129
+ const waitForTaskComplete = async (taskId, apiKey, timeoutMs = 5 * 60 * 1000) => {
130
+ const taskUrl = `https://dashscope.aliyuncs.com/api/v1/tasks/${taskId}`;
131
+ const startTime = Date.now();
132
+ const pollInterval = 500;
133
+
134
+ while (Date.now() - startTime < timeoutMs) {
135
+ const response = await fetch(taskUrl, {
136
+ method: 'GET',
137
+ headers: {
138
+ 'Authorization': `Bearer ${apiKey}`,
139
+ },
140
+ });
141
+
142
+ if (!response.ok) {
143
+ throw new Error(`查询任务状态失败: ${response.status}`);
144
+ }
145
+
146
+ const data = await response.json();
147
+ const status = data.output?.task_status;
148
+
149
+ if (status === 'SUCCEEDED') {
150
+ return data.output.results || [];
151
+ } else if (status === 'FAILED') {
152
+ throw new Error('ASR 识别任务失败');
153
+ }
154
+
155
+ await new Promise(resolve => setTimeout(resolve, pollInterval));
156
+ }
157
+
158
+ throw new Error('ASR 识别超时');
159
+ };
160
+
161
+ /**
162
+ * 获取转写结果详情
163
+ */
164
+ const fetchTranscriptionResults = async (results) => {
165
+ const allTranscriptions = [];
166
+
167
+ for (const result of results) {
168
+ if (result.subtask_status === 'SUCCEEDED' && result.transcription_url) {
169
+ const response = await fetch(result.transcription_url);
170
+ if (response.ok) {
171
+ const transcriptionData = await response.json();
172
+ allTranscriptions.push({
173
+ file_url: result.file_url,
174
+ transcription: transcriptionData,
175
+ });
176
+ }
177
+ }
178
+ }
179
+
180
+ return allTranscriptions;
181
+ };
182
+
183
+ /**
184
+ * ASR 语音识别
185
+ */
186
+ const asr = async ({ workingDir, audioPath, apiKey }) => {
187
+ // 验证 API Key
188
+ if (!apiKey) {
189
+ throw new Error('请配置 DASHSCOPE_API_KEY 环境变量');
190
+ }
191
+
192
+ // 检查文件大小
193
+ const fileSize = fs.statSync(audioPath).size;
194
+ if (fileSize > MAX_FILE_SIZE_BYTES) {
195
+ throw new Error(
196
+ `音频文件大小 ${(fileSize / 1024 / 1024).toFixed(2)}MB 超过限制(最大 120MB)。` +
197
+ `请先对音频进行分段处理后再逐段识别。`
198
+ );
199
+ }
200
+
201
+ // 1. 上传音频文件
202
+ const filePath = await uploadAudio(ASR_UPLOAD_URL, audioPath);
203
+
204
+ // 2. 构建静态资源 URL
205
+ const audioUrl = getStaticUrl(ASR_UPLOAD_URL, filePath);
206
+
207
+ // 3. 提交 ASR 任务
208
+ const taskId = await submitAsrTask([audioUrl], apiKey);
209
+
210
+ // 4. 等待任务完成
211
+ const results = await waitForTaskComplete(taskId, apiKey);
212
+
213
+ // 5. 获取转写结果
214
+ const transcriptions = await fetchTranscriptionResults(results);
215
+
216
+ if (!transcriptions.length) {
217
+ throw new Error('未获取到识别结果');
218
+ }
219
+
220
+ // 6. 转换为 SRT 格式并保存
221
+ const srtContent = asrToSrt(transcriptions);
222
+ const filename = `asr_${Date.now()}.srt`;
223
+ const outputPath = path.join(workingDir, filename);
224
+ fs.writeFileSync(outputPath, srtContent, 'utf-8');
225
+
226
+ return filename;
227
+ };
228
+
229
+ module.exports = {
230
+ asr,
231
+ SUPPORTED_AUDIO_TYPES,
232
+ MAX_FILE_SIZE_BYTES,
233
+ };
package/utils/tts.js ADDED
@@ -0,0 +1,128 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const WebSocket = require('ws');
4
+ const { v4: uuidv4 } = require('uuid');
5
+
6
+ const TTS_WS_URL = 'wss://dashscope.aliyuncs.com/api-ws/v1/inference/';
7
+
8
+ /**
9
+ * TTS 语音合成
10
+ */
11
+ const tts = async ({ workingDir, text, apiKey, voice = 'sambert-zhimiao-emo-v1', format = 'mp3', sampleRate = 16000 }) => {
12
+ // 验证 API Key
13
+ if (!apiKey) {
14
+ throw new Error('请配置 DASHSCOPE_API_KEY 环境变量');
15
+ }
16
+
17
+ // 验证文本
18
+ if (!text || typeof text !== 'string' || !text.trim()) {
19
+ throw new Error('合成文本不能为空');
20
+ }
21
+
22
+ const trimmedText = text.trim();
23
+
24
+ // 生成输出文件名
25
+ const filename = `tts_${Date.now()}.${format}`;
26
+ const outputPath = path.join(workingDir, filename);
27
+
28
+ return new Promise((resolve, reject) => {
29
+ // 清空或创建输出文件
30
+ fs.writeFileSync(outputPath, '');
31
+ const fileStream = fs.createWriteStream(outputPath, { flags: 'a' });
32
+
33
+ const ws = new WebSocket(TTS_WS_URL, {
34
+ headers: {
35
+ Authorization: `bearer ${apiKey}`,
36
+ 'X-DashScope-DataInspection': 'enable'
37
+ }
38
+ });
39
+
40
+ let resolved = false;
41
+
42
+ const cleanup = () => {
43
+ if (!resolved) {
44
+ resolved = true;
45
+ fileStream.end();
46
+ if (ws.readyState === WebSocket.OPEN) {
47
+ ws.close();
48
+ }
49
+ }
50
+ };
51
+
52
+ ws.on('open', () => {
53
+ const taskId = uuidv4();
54
+ const runTaskMessage = {
55
+ header: {
56
+ action: 'run-task',
57
+ task_id: taskId,
58
+ streaming: 'out'
59
+ },
60
+ payload: {
61
+ model: voice,
62
+ task_group: 'audio',
63
+ task: 'tts',
64
+ function: 'SpeechSynthesizer',
65
+ input: {
66
+ text: trimmedText
67
+ },
68
+ parameters: {
69
+ text_type: 'PlainText',
70
+ format: format,
71
+ sample_rate: sampleRate,
72
+ volume: 50,
73
+ rate: 1,
74
+ pitch: 1,
75
+ word_timestamp_enabled: false,
76
+ phoneme_timestamp_enabled: false
77
+ }
78
+ }
79
+ };
80
+ ws.send(JSON.stringify(runTaskMessage));
81
+ });
82
+
83
+ ws.on('message', (data, isBinary) => {
84
+ if (isBinary) {
85
+ fileStream.write(data);
86
+ } else {
87
+ try {
88
+ const message = JSON.parse(data);
89
+ const event = message.header?.event;
90
+
91
+ if (event === 'task-finished') {
92
+ cleanup();
93
+ resolve(filename);
94
+ } else if (event === 'task-failed') {
95
+ cleanup();
96
+ reject(new Error(message.header?.error_message || 'TTS 任务失败'));
97
+ }
98
+ } catch (e) {
99
+ // 忽略 JSON 解析错误
100
+ }
101
+ }
102
+ });
103
+
104
+ ws.on('error', (error) => {
105
+ cleanup();
106
+ reject(new Error(`WebSocket 错误: ${error.message}`));
107
+ });
108
+
109
+ ws.on('close', () => {
110
+ if (!resolved) {
111
+ cleanup();
112
+ reject(new Error('WebSocket 连接意外关闭'));
113
+ }
114
+ });
115
+
116
+ // 超时处理(2分钟)
117
+ setTimeout(() => {
118
+ if (!resolved) {
119
+ cleanup();
120
+ reject(new Error('TTS 任务超时'));
121
+ }
122
+ }, 2 * 60 * 1000);
123
+ });
124
+ };
125
+
126
+ module.exports = {
127
+ tts,
128
+ };