@mingxy/ocosay 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +556 -0
- package/TECH_PLAN.md +352 -0
- package/__mocks__/@opencode-ai/plugin.ts +32 -0
- package/dist/config.d.ts +26 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +95 -0
- package/dist/config.js.map +1 -0
- package/dist/core/backends/afplay-backend.d.ts +33 -0
- package/dist/core/backends/afplay-backend.d.ts.map +1 -0
- package/dist/core/backends/afplay-backend.js +144 -0
- package/dist/core/backends/afplay-backend.js.map +1 -0
- package/dist/core/backends/aplay-backend.d.ts +33 -0
- package/dist/core/backends/aplay-backend.d.ts.map +1 -0
- package/dist/core/backends/aplay-backend.js +142 -0
- package/dist/core/backends/aplay-backend.js.map +1 -0
- package/dist/core/backends/base.d.ts +94 -0
- package/dist/core/backends/base.d.ts.map +1 -0
- package/dist/core/backends/base.js +6 -0
- package/dist/core/backends/base.js.map +1 -0
- package/dist/core/backends/index.d.ts +29 -0
- package/dist/core/backends/index.d.ts.map +1 -0
- package/dist/core/backends/index.js +114 -0
- package/dist/core/backends/index.js.map +1 -0
- package/dist/core/backends/naudiodon-backend.d.ts +52 -0
- package/dist/core/backends/naudiodon-backend.d.ts.map +1 -0
- package/dist/core/backends/naudiodon-backend.js +123 -0
- package/dist/core/backends/naudiodon-backend.js.map +1 -0
- package/dist/core/backends/powershell-backend.d.ts +34 -0
- package/dist/core/backends/powershell-backend.d.ts.map +1 -0
- package/dist/core/backends/powershell-backend.js +154 -0
- package/dist/core/backends/powershell-backend.js.map +1 -0
- package/dist/core/player.d.ts +97 -0
- package/dist/core/player.d.ts.map +1 -0
- package/dist/core/player.js +268 -0
- package/dist/core/player.js.map +1 -0
- package/dist/core/speaker.d.ts +97 -0
- package/dist/core/speaker.d.ts.map +1 -0
- package/dist/core/speaker.js +218 -0
- package/dist/core/speaker.js.map +1 -0
- package/dist/core/stream-player.d.ts +107 -0
- package/dist/core/stream-player.d.ts.map +1 -0
- package/dist/core/stream-player.js +272 -0
- package/dist/core/stream-player.js.map +1 -0
- package/dist/core/stream-reader.d.ts +86 -0
- package/dist/core/stream-reader.d.ts.map +1 -0
- package/dist/core/stream-reader.js +172 -0
- package/dist/core/stream-reader.js.map +1 -0
- package/dist/core/streaming-synthesizer.d.ts +51 -0
- package/dist/core/streaming-synthesizer.d.ts.map +1 -0
- package/dist/core/streaming-synthesizer.js +103 -0
- package/dist/core/streaming-synthesizer.js.map +1 -0
- package/dist/core/types.d.ts +141 -0
- package/dist/core/types.d.ts.map +1 -0
- package/dist/core/types.js +37 -0
- package/dist/core/types.js.map +1 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +179 -0
- package/dist/index.js.map +1 -0
- package/dist/plugin.d.ts +4 -0
- package/dist/plugin.d.ts.map +1 -0
- package/dist/plugin.js +151 -0
- package/dist/plugin.js.map +1 -0
- package/dist/providers/base.d.ts +55 -0
- package/dist/providers/base.d.ts.map +1 -0
- package/dist/providers/base.js +95 -0
- package/dist/providers/base.js.map +1 -0
- package/dist/providers/minimax.d.ts +84 -0
- package/dist/providers/minimax.d.ts.map +1 -0
- package/dist/providers/minimax.js +387 -0
- package/dist/providers/minimax.js.map +1 -0
- package/dist/tools/tts.d.ts +147 -0
- package/dist/tools/tts.d.ts.map +1 -0
- package/dist/tools/tts.js +232 -0
- package/dist/tools/tts.js.map +1 -0
- package/jest.config.js +15 -0
- package/package.json +49 -0
- package/src/config.ts +121 -0
- package/src/core/backends/afplay-backend.ts +162 -0
- package/src/core/backends/aplay-backend.ts +160 -0
- package/src/core/backends/base.ts +117 -0
- package/src/core/backends/index.ts +128 -0
- package/src/core/backends/naudiodon-backend.ts +164 -0
- package/src/core/backends/powershell-backend.ts +173 -0
- package/src/core/player.ts +322 -0
- package/src/core/speaker.ts +283 -0
- package/src/core/stream-player.ts +326 -0
- package/src/core/stream-reader.ts +190 -0
- package/src/core/streaming-synthesizer.ts +123 -0
- package/src/core/types.ts +185 -0
- package/src/index.ts +233 -0
- package/src/plugin.ts +166 -0
- package/src/providers/base.ts +150 -0
- package/src/providers/minimax.ts +515 -0
- package/src/tools/tts.ts +277 -0
- package/src/types/naudiodon.d.ts +19 -0
- package/tests/__mocks__/@opencode-ai/plugin.ts +32 -0
- package/tests/backends.test.ts +831 -0
- package/tests/index.test.ts +201 -0
- package/tests/integration-test.d.ts +6 -0
- package/tests/integration-test.d.ts.map +1 -0
- package/tests/integration-test.js +84 -0
- package/tests/integration-test.js.map +1 -0
- package/tests/integration-test.ts +93 -0
- package/tests/p1-fixes.test.ts +160 -0
- package/tests/plugin.test.ts +311 -0
- package/tests/provider.test.d.ts +2 -0
- package/tests/provider.test.d.ts.map +1 -0
- package/tests/provider.test.js +69 -0
- package/tests/provider.test.js.map +1 -0
- package/tests/provider.test.ts +87 -0
- package/tests/speaker.test.d.ts +2 -0
- package/tests/speaker.test.d.ts.map +1 -0
- package/tests/speaker.test.js +63 -0
- package/tests/speaker.test.js.map +1 -0
- package/tests/speaker.test.ts +232 -0
- package/tests/stream-player.test.ts +303 -0
- package/tests/stream-reader.test.ts +269 -0
- package/tests/streaming-synthesizer.test.ts +225 -0
- package/tests/tts-tools.test.ts +270 -0
- package/tests/types.test.d.ts +2 -0
- package/tests/types.test.d.ts.map +1 -0
- package/tests/types.test.js +61 -0
- package/tests/types.test.js.map +1 -0
- package/tests/types.test.ts +63 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MiniMax TTS Provider
|
|
3
|
+
* 支持 T2A v2 (同步/流式HTTP)、T2A Async v2 (异步HTTP轮询)
|
|
4
|
+
* 端点可配置: baseURL (默认 https://api.minimaxi.com)
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { BaseTTSProvider } from './base'
|
|
8
|
+
import {
|
|
9
|
+
TTSError,
|
|
10
|
+
TTSErrorCode,
|
|
11
|
+
AudioResult,
|
|
12
|
+
Voice,
|
|
13
|
+
SpeakOptions
|
|
14
|
+
} from '../core/types'
|
|
15
|
+
import axios, { AxiosInstance } from 'axios'
|
|
16
|
+
import { WebSocket } from 'ws'
|
|
17
|
+
|
|
18
|
+
export interface MiniMaxConfig {
|
|
19
|
+
apiKey: string
|
|
20
|
+
baseURL?: string
|
|
21
|
+
voiceId?: string
|
|
22
|
+
model?: 'sync' | 'async' | 'stream'
|
|
23
|
+
ttsModel?: string
|
|
24
|
+
audioFormat?: 'mp3' | 'wav' | 'flac'
|
|
25
|
+
speed?: number
|
|
26
|
+
volume?: number
|
|
27
|
+
pitch?: number
|
|
28
|
+
groupId?: string
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* MiniMax TTS Provider
|
|
33
|
+
* 官网: https://www.minimax.io/
|
|
34
|
+
* API文档: https://www.minimaxi.com/document/T2A
|
|
35
|
+
*/
|
|
36
|
+
export class MiniMaxProvider extends BaseTTSProvider {
|
|
37
|
+
name = 'minimax'
|
|
38
|
+
capabilities = {
|
|
39
|
+
speak: true,
|
|
40
|
+
stream: true,
|
|
41
|
+
sync: true,
|
|
42
|
+
async: true,
|
|
43
|
+
voiceList: true,
|
|
44
|
+
voiceClone: true
|
|
45
|
+
} as const
|
|
46
|
+
|
|
47
|
+
private config: MiniMaxConfig
|
|
48
|
+
private httpClient: AxiosInstance
|
|
49
|
+
private wsConnection?: WebSocket
|
|
50
|
+
private currentAudioData: Buffer[] = []
|
|
51
|
+
private audioFormat: 'mp3' | 'wav' | 'flac' = 'mp3'
|
|
52
|
+
|
|
53
|
+
constructor(config: MiniMaxConfig) {
|
|
54
|
+
super()
|
|
55
|
+
this.config = config
|
|
56
|
+
this.apiKey = config.apiKey
|
|
57
|
+
this.defaultVoice = config.voiceId
|
|
58
|
+
this.defaultModel = config.model || 'stream'
|
|
59
|
+
this.audioFormat = config.audioFormat || 'mp3'
|
|
60
|
+
|
|
61
|
+
this.httpClient = axios.create({
|
|
62
|
+
baseURL: this.config.baseURL || 'https://api.minimaxi.com',
|
|
63
|
+
headers: {
|
|
64
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
65
|
+
'Content-Type': 'application/json'
|
|
66
|
+
},
|
|
67
|
+
timeout: 30000
|
|
68
|
+
})
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
async initialize(): Promise<void> {
|
|
72
|
+
this.validateApiKey()
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async destroy(): Promise<void> {
|
|
76
|
+
if (this.wsConnection) {
|
|
77
|
+
this.wsConnection.close()
|
|
78
|
+
this.wsConnection = undefined
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
protected async doSpeak(
|
|
83
|
+
text: string,
|
|
84
|
+
voice: string | undefined,
|
|
85
|
+
model: 'sync' | 'async' | 'stream',
|
|
86
|
+
options?: SpeakOptions
|
|
87
|
+
): Promise<AudioResult> {
|
|
88
|
+
this.validateApiKey()
|
|
89
|
+
|
|
90
|
+
switch (model) {
|
|
91
|
+
case 'stream':
|
|
92
|
+
return this.streamingSpeak(text, voice, options)
|
|
93
|
+
case 'sync':
|
|
94
|
+
return this.syncSpeak(text, voice, options)
|
|
95
|
+
case 'async':
|
|
96
|
+
return this.asyncSpeak(text, voice, options)
|
|
97
|
+
default:
|
|
98
|
+
return this.streamingSpeak(text, voice, options)
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* 流式合成 (HTTP) - T2A v2 with stream: true
|
|
104
|
+
*/
|
|
105
|
+
private async streamingSpeak(
|
|
106
|
+
text: string,
|
|
107
|
+
voice: string | undefined,
|
|
108
|
+
options?: SpeakOptions
|
|
109
|
+
): Promise<AudioResult> {
|
|
110
|
+
try {
|
|
111
|
+
const voiceId = voice || this.defaultVoice || 'male-qn-qingse'
|
|
112
|
+
const speed = options?.speed || this.config.speed || 1.0
|
|
113
|
+
const vol = options?.volume !== undefined ? options.volume / 10 : (this.config.volume !== undefined ? this.config.volume / 10 : 1.0)
|
|
114
|
+
const pitch = options?.pitch !== undefined ? Math.round((options.pitch - 1) * 12) : (this.config.pitch !== undefined ? Math.round((this.config.pitch - 1) * 12) : 0)
|
|
115
|
+
|
|
116
|
+
const response = await this.httpClient.post('/v1/t2a_v2', {
|
|
117
|
+
model: this.config.ttsModel || 'speech-2.8-hd',
|
|
118
|
+
text,
|
|
119
|
+
stream: true,
|
|
120
|
+
voice_setting: {
|
|
121
|
+
voice_id: voiceId,
|
|
122
|
+
speed: speed,
|
|
123
|
+
vol: vol,
|
|
124
|
+
pitch: pitch
|
|
125
|
+
},
|
|
126
|
+
audio_setting: {
|
|
127
|
+
sample_rate: 32000,
|
|
128
|
+
bitrate: 128000,
|
|
129
|
+
format: this.audioFormat,
|
|
130
|
+
channel: 1
|
|
131
|
+
}
|
|
132
|
+
}, {
|
|
133
|
+
headers: {
|
|
134
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
135
|
+
'Content-Type': 'application/json'
|
|
136
|
+
},
|
|
137
|
+
responseType: 'stream'
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
const stream = response.data
|
|
141
|
+
const audioChunks: Buffer[] = []
|
|
142
|
+
|
|
143
|
+
return new Promise((resolve, reject) => {
|
|
144
|
+
stream.on('data', (chunk: Buffer) => {
|
|
145
|
+
try {
|
|
146
|
+
const lines = chunk.toString().split('\n')
|
|
147
|
+
for (const line of lines) {
|
|
148
|
+
if (line.startsWith('data:')) {
|
|
149
|
+
const data = JSON.parse(line.slice(5))
|
|
150
|
+
if (data.data?.audio) {
|
|
151
|
+
audioChunks.push(Buffer.from(data.data.audio, 'hex'))
|
|
152
|
+
}
|
|
153
|
+
if (data.data?.status === 2) {
|
|
154
|
+
const fullAudio = Buffer.concat(audioChunks)
|
|
155
|
+
resolve({
|
|
156
|
+
audioData: fullAudio,
|
|
157
|
+
format: this.audioFormat,
|
|
158
|
+
isStream: true,
|
|
159
|
+
duration: this.estimateDuration(fullAudio.length)
|
|
160
|
+
})
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
} catch (e) {
|
|
165
|
+
audioChunks.push(chunk)
|
|
166
|
+
}
|
|
167
|
+
})
|
|
168
|
+
|
|
169
|
+
stream.on('error', (err: any) => {
|
|
170
|
+
reject(new TTSError(
|
|
171
|
+
'Stream error',
|
|
172
|
+
TTSErrorCode.NETWORK,
|
|
173
|
+
this.name,
|
|
174
|
+
err
|
|
175
|
+
))
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
stream.on('end', () => {
|
|
179
|
+
if (audioChunks.length > 0) {
|
|
180
|
+
const fullAudio = Buffer.concat(audioChunks)
|
|
181
|
+
resolve({
|
|
182
|
+
audioData: fullAudio,
|
|
183
|
+
format: this.audioFormat,
|
|
184
|
+
isStream: true,
|
|
185
|
+
duration: this.estimateDuration(fullAudio.length)
|
|
186
|
+
})
|
|
187
|
+
}
|
|
188
|
+
})
|
|
189
|
+
})
|
|
190
|
+
} catch (error: any) {
|
|
191
|
+
if (error instanceof TTSError) throw error
|
|
192
|
+
throw this.mapError(error)
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* 同步合成 (HTTP) - T2A v2
|
|
198
|
+
* API: POST https://api.minimax.io/v1/t2a_v2
|
|
199
|
+
*/
|
|
200
|
+
private async syncSpeak(
|
|
201
|
+
text: string,
|
|
202
|
+
voice: string | undefined,
|
|
203
|
+
options?: SpeakOptions
|
|
204
|
+
): Promise<AudioResult> {
|
|
205
|
+
try {
|
|
206
|
+
const voiceId = voice || this.defaultVoice || 'male-qn-qingse'
|
|
207
|
+
const speed = options?.speed || this.config.speed || 1.0
|
|
208
|
+
const vol = options?.volume !== undefined ? options.volume / 10 : (this.config.volume !== undefined ? this.config.volume / 10 : 1.0)
|
|
209
|
+
const pitch = options?.pitch !== undefined ? Math.round((options.pitch - 1) * 12) : (this.config.pitch !== undefined ? Math.round((this.config.pitch - 1) * 12) : 0)
|
|
210
|
+
|
|
211
|
+
const response = await this.httpClient.post('/v1/t2a_v2', {
|
|
212
|
+
model: this.config.ttsModel || 'speech-2.8-hd',
|
|
213
|
+
text,
|
|
214
|
+
stream: false,
|
|
215
|
+
output_format: 'hex',
|
|
216
|
+
voice_setting: {
|
|
217
|
+
voice_id: voiceId,
|
|
218
|
+
speed: speed,
|
|
219
|
+
vol: vol,
|
|
220
|
+
pitch: pitch
|
|
221
|
+
},
|
|
222
|
+
audio_setting: {
|
|
223
|
+
sample_rate: 32000,
|
|
224
|
+
bitrate: 128000,
|
|
225
|
+
format: this.audioFormat,
|
|
226
|
+
channel: 1
|
|
227
|
+
}
|
|
228
|
+
}, {
|
|
229
|
+
headers: {
|
|
230
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
231
|
+
'Content-Type': 'application/json'
|
|
232
|
+
}
|
|
233
|
+
})
|
|
234
|
+
|
|
235
|
+
if (response.data.base_resp?.status_code !== 0) {
|
|
236
|
+
throw new TTSError(
|
|
237
|
+
response.data.base_resp?.status_msg || 'API request failed',
|
|
238
|
+
TTSErrorCode.UNKNOWN,
|
|
239
|
+
this.name,
|
|
240
|
+
response.data.base_resp
|
|
241
|
+
)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
const audioHex = response.data.data?.audio
|
|
245
|
+
if (!audioHex) {
|
|
246
|
+
throw new TTSError(
|
|
247
|
+
'No audio data in response',
|
|
248
|
+
TTSErrorCode.UNKNOWN,
|
|
249
|
+
this.name
|
|
250
|
+
)
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const audioBuffer = Buffer.from(audioHex, 'hex')
|
|
254
|
+
|
|
255
|
+
return {
|
|
256
|
+
audioData: audioBuffer,
|
|
257
|
+
format: this.audioFormat,
|
|
258
|
+
isStream: false,
|
|
259
|
+
duration: response.data.extra_info?.audio_length
|
|
260
|
+
? response.data.extra_info.audio_length / 1000
|
|
261
|
+
: this.estimateDuration(audioBuffer.length)
|
|
262
|
+
}
|
|
263
|
+
} catch (error: any) {
|
|
264
|
+
if (error instanceof TTSError) throw error
|
|
265
|
+
throw this.mapError(error)
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
/**
|
|
270
|
+
* 异步合成 (轮询) - T2A Async v2
|
|
271
|
+
*/
|
|
272
|
+
private async asyncSpeak(
|
|
273
|
+
text: string,
|
|
274
|
+
voice: string | undefined,
|
|
275
|
+
options?: SpeakOptions
|
|
276
|
+
): Promise<AudioResult> {
|
|
277
|
+
try {
|
|
278
|
+
const voiceId = voice || this.defaultVoice || 'male-qn-qingse'
|
|
279
|
+
const speed = options?.speed || this.config.speed || 1.0
|
|
280
|
+
const vol = options?.volume !== undefined ? options.volume / 10 : (this.config.volume !== undefined ? this.config.volume / 10 : 1.0)
|
|
281
|
+
const pitch = options?.pitch !== undefined ? Math.round((options.pitch - 1) * 12) : (this.config.pitch !== undefined ? Math.round((this.config.pitch - 1) * 12) : 0)
|
|
282
|
+
|
|
283
|
+
const createResponse = await this.httpClient.post('/v1/t2a_async_v2', {
|
|
284
|
+
model: this.config.ttsModel || 'speech-2.8-hd',
|
|
285
|
+
text,
|
|
286
|
+
voice_setting: {
|
|
287
|
+
voice_id: voiceId,
|
|
288
|
+
speed: speed,
|
|
289
|
+
vol: vol,
|
|
290
|
+
pitch: pitch
|
|
291
|
+
},
|
|
292
|
+
audio_setting: {
|
|
293
|
+
sample_rate: 32000,
|
|
294
|
+
bitrate: 128000,
|
|
295
|
+
format: this.audioFormat,
|
|
296
|
+
channel: 1
|
|
297
|
+
}
|
|
298
|
+
}, {
|
|
299
|
+
headers: {
|
|
300
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
301
|
+
'Content-Type': 'application/json'
|
|
302
|
+
}
|
|
303
|
+
})
|
|
304
|
+
|
|
305
|
+
const taskId = createResponse.data.task_id
|
|
306
|
+
if (!taskId) {
|
|
307
|
+
throw new TTSError(
|
|
308
|
+
'No task_id in async response',
|
|
309
|
+
TTSErrorCode.UNKNOWN,
|
|
310
|
+
this.name,
|
|
311
|
+
createResponse.data
|
|
312
|
+
)
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
let attempts = 0
|
|
316
|
+
const maxAttempts = 60
|
|
317
|
+
|
|
318
|
+
while (attempts < maxAttempts) {
|
|
319
|
+
await this.delay(2000)
|
|
320
|
+
|
|
321
|
+
const statusResponse = await this.httpClient.get(
|
|
322
|
+
`/v1/query/t2a_async_query_v2?task_id=${taskId}`,
|
|
323
|
+
{
|
|
324
|
+
headers: {
|
|
325
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
if (statusResponse.data.status === 'success') {
|
|
331
|
+
const fileId = statusResponse.data.file_id
|
|
332
|
+
if (!fileId) {
|
|
333
|
+
throw new TTSError(
|
|
334
|
+
'No file_id in async response',
|
|
335
|
+
TTSErrorCode.UNKNOWN,
|
|
336
|
+
this.name,
|
|
337
|
+
statusResponse.data
|
|
338
|
+
)
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const downloadResponse = await this.httpClient.get(
|
|
342
|
+
`/v1/files/retrieve_content?file_id=${fileId}`,
|
|
343
|
+
{
|
|
344
|
+
headers: {
|
|
345
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
346
|
+
},
|
|
347
|
+
responseType: 'arraybuffer'
|
|
348
|
+
}
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
return {
|
|
352
|
+
audioData: Buffer.from(downloadResponse.data),
|
|
353
|
+
format: this.audioFormat,
|
|
354
|
+
isStream: false,
|
|
355
|
+
duration: 0
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if (statusResponse.data.status === 'failed') {
|
|
360
|
+
throw new TTSError(
|
|
361
|
+
'Async TTS task failed',
|
|
362
|
+
TTSErrorCode.UNKNOWN,
|
|
363
|
+
this.name,
|
|
364
|
+
statusResponse.data
|
|
365
|
+
)
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
attempts++
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
throw new TTSError(
|
|
372
|
+
'Async TTS task timeout',
|
|
373
|
+
TTSErrorCode.NETWORK,
|
|
374
|
+
this.name
|
|
375
|
+
)
|
|
376
|
+
} catch (error: any) {
|
|
377
|
+
if (error instanceof TTSError) throw error
|
|
378
|
+
throw this.mapError(error)
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* 音色克隆 - 使用参考音频克隆声音
|
|
384
|
+
*/
|
|
385
|
+
async voiceClone(audioUrl: string, text: string, voice?: string): Promise<AudioResult> {
|
|
386
|
+
this.validateApiKey()
|
|
387
|
+
|
|
388
|
+
try {
|
|
389
|
+
const response = await this.httpClient.post('/v1/t2a_v2/voice_clone', {
|
|
390
|
+
audio_url: audioUrl,
|
|
391
|
+
text,
|
|
392
|
+
voice_id: voice || 'custom_clone'
|
|
393
|
+
}, {
|
|
394
|
+
headers: {
|
|
395
|
+
'Authorization': `Bearer ${this.apiKey}`,
|
|
396
|
+
'Content-Type': 'application/json'
|
|
397
|
+
},
|
|
398
|
+
responseType: 'arraybuffer'
|
|
399
|
+
})
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
audioData: Buffer.from(response.data),
|
|
403
|
+
format: this.audioFormat,
|
|
404
|
+
isStream: false,
|
|
405
|
+
duration: this.estimateDuration(response.data.length)
|
|
406
|
+
}
|
|
407
|
+
} catch (error: any) {
|
|
408
|
+
throw this.mapError(error)
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* 获取音色列表
|
|
414
|
+
*/
|
|
415
|
+
async listVoices(): Promise<Voice[]> {
|
|
416
|
+
this.validateApiKey()
|
|
417
|
+
|
|
418
|
+
try {
|
|
419
|
+
const response = await this.httpClient.get('/v1/t2a/voices', {
|
|
420
|
+
headers: {
|
|
421
|
+
'Authorization': `Bearer ${this.apiKey}`
|
|
422
|
+
}
|
|
423
|
+
})
|
|
424
|
+
|
|
425
|
+
return response.data.voices.map((v: any) => ({
|
|
426
|
+
id: v.voice_id,
|
|
427
|
+
name: v.name,
|
|
428
|
+
language: v.language,
|
|
429
|
+
gender: v.gender
|
|
430
|
+
}))
|
|
431
|
+
} catch (error: any) {
|
|
432
|
+
// 如果API调用失败,返回预定义音色列表
|
|
433
|
+
return MINIMAX_VOICES
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
/**
|
|
438
|
+
* 错误映射
|
|
439
|
+
*/
|
|
440
|
+
private mapError(error: any): TTSError {
|
|
441
|
+
if (error.response) {
|
|
442
|
+
const status = error.response.status
|
|
443
|
+
const code =
|
|
444
|
+
status === 401 ? TTSErrorCode.AUTH :
|
|
445
|
+
status === 429 ? TTSErrorCode.QUOTA :
|
|
446
|
+
status >= 500 ? TTSErrorCode.NETWORK :
|
|
447
|
+
TTSErrorCode.UNKNOWN
|
|
448
|
+
|
|
449
|
+
return new TTSError(
|
|
450
|
+
error.response.data?.message || 'API request failed',
|
|
451
|
+
code,
|
|
452
|
+
this.name,
|
|
453
|
+
error.response.data
|
|
454
|
+
)
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
if (error.code === 'ECONNREFUSED' || error.code === 'ENOTFOUND') {
|
|
458
|
+
return new TTSError(
|
|
459
|
+
'Network error: Unable to connect to MiniMax API',
|
|
460
|
+
TTSErrorCode.NETWORK,
|
|
461
|
+
this.name,
|
|
462
|
+
error.message
|
|
463
|
+
)
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return new TTSError(
|
|
467
|
+
error.message || 'Unknown error',
|
|
468
|
+
TTSErrorCode.UNKNOWN,
|
|
469
|
+
this.name,
|
|
470
|
+
error
|
|
471
|
+
)
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
/**
|
|
475
|
+
* 延迟辅助函数
|
|
476
|
+
*/
|
|
477
|
+
private delay(ms: number): Promise<void> {
|
|
478
|
+
return new Promise(resolve => setTimeout(resolve, ms))
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/**
|
|
482
|
+
* 估算音频时长
|
|
483
|
+
* 基于 32kbps MP3 估算
|
|
484
|
+
*/
|
|
485
|
+
private estimateDuration(bytes: number): number {
|
|
486
|
+
return (bytes * 8) / (32000 * 60)
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* MiniMax 预定义音色列表
|
|
492
|
+
* 官方音色ID参考
|
|
493
|
+
*/
|
|
494
|
+
export const MINIMAX_VOICES: Voice[] = [
|
|
495
|
+
{ id: 'male-qn-qingse', name: '青年清澈', language: 'zh-CN', gender: 'male' },
|
|
496
|
+
{ id: 'male-qn-qingse_2', name: '青年清澈v2', language: 'zh-CN', gender: 'male' },
|
|
497
|
+
{ id: 'female-shaonv', name: '少女', language: 'zh-CN', gender: 'female' },
|
|
498
|
+
{ id: 'male-baiming', name: '成熟男声', language: 'zh-CN', gender: 'male' },
|
|
499
|
+
{ id: 'female-tianmei', name: '甜美女声', language: 'zh-CN', gender: 'female' },
|
|
500
|
+
{ id: 'male-zhongnan', name: '中年男声', language: 'zh-CN', gender: 'male' },
|
|
501
|
+
{ id: 'female-yujie', name: '御姐音', language: 'zh-CN', gender: 'female' },
|
|
502
|
+
{ id: 'male-qn-xiaoao', name: '青年豪爽', language: 'zh-CN', gender: 'male' },
|
|
503
|
+
{ id: 'female-shandian', name: '甜心小娘', language: 'zh-CN', gender: 'female' },
|
|
504
|
+
{ id: 'male-qn-buke', name: '青年低沉', language: 'zh-CN', gender: 'male' },
|
|
505
|
+
{ id: 'male-qn-wenlv', name: '文绿青年', language: 'zh-CN', gender: 'male' },
|
|
506
|
+
{ id: 'female-tianmei-2', name: '甜美女声v2', language: 'zh-CN', gender: 'female' },
|
|
507
|
+
{ id: 'female-yujie-2', name: '御姐音v2', language: 'zh-CN', gender: 'female' },
|
|
508
|
+
{ id: 'male-shaonian', name: '少年音', language: 'zh-CN', gender: 'male' },
|
|
509
|
+
{ id: 'female-yunv', name: '温柔女声', language: 'zh-CN', gender: 'female' },
|
|
510
|
+
{ id: 'male-qn-jingdian', name: '经典男声', language: 'zh-CN', gender: 'male' },
|
|
511
|
+
{ id: 'male-qn-kuang野', name: '狂野青年', language: 'zh-CN', gender: 'male' },
|
|
512
|
+
{ id: 'female-yujie-old', name: '优雅低沉', language: 'zh-CN', gender: 'female' },
|
|
513
|
+
{ id: 'female-tianmei-old', name: '甜美女孩', language: 'zh-CN', gender: 'female' },
|
|
514
|
+
{ id: 'male-qn-taohua', name: '桃花青年', language: 'zh-CN', gender: 'male' }
|
|
515
|
+
]
|