koishi-plugin-minimax-vits 1.0.2 → 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.d.ts +61 -7
- package/lib/index.js +399 -102
- package/package.json +7 -1
- package/readme.md +1 -1
package/lib/index.d.ts
CHANGED
|
@@ -1,17 +1,71 @@
|
|
|
1
1
|
import { Context, Schema } from 'koishi';
|
|
2
|
+
import { Tool } from '@langchain/core/tools';
|
|
3
|
+
declare module '@koishijs/plugin-console' {
|
|
4
|
+
namespace Console {
|
|
5
|
+
interface Services {
|
|
6
|
+
'minimax-vits': MinimaxVitsService;
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
export declare class MinimaxVitsTool extends Tool {
|
|
11
|
+
private ctx;
|
|
12
|
+
private config;
|
|
13
|
+
private cacheManager?;
|
|
14
|
+
name: string;
|
|
15
|
+
description: string;
|
|
16
|
+
constructor(ctx: Context, config: Config, cacheManager?: AudioCacheManager | undefined);
|
|
17
|
+
_call(input: string, _runManager: any, toolConfig: any): Promise<string>;
|
|
18
|
+
}
|
|
19
|
+
declare class MinimaxVitsService {
|
|
20
|
+
private ctx;
|
|
21
|
+
private config;
|
|
22
|
+
constructor(ctx: Context, config: Config);
|
|
23
|
+
testTTS(text: string, voice?: string, speed?: number): Promise<{
|
|
24
|
+
success: boolean;
|
|
25
|
+
audio: string;
|
|
26
|
+
size: number;
|
|
27
|
+
error?: undefined;
|
|
28
|
+
} | {
|
|
29
|
+
success: boolean;
|
|
30
|
+
error: any;
|
|
31
|
+
audio?: undefined;
|
|
32
|
+
size?: undefined;
|
|
33
|
+
}>;
|
|
34
|
+
}
|
|
2
35
|
export declare const name = "minimax-vits";
|
|
3
36
|
export interface Config {
|
|
4
|
-
|
|
5
|
-
groupId
|
|
37
|
+
ttsApiKey: string;
|
|
38
|
+
groupId?: string;
|
|
6
39
|
apiBase?: string;
|
|
7
|
-
model?: string;
|
|
8
|
-
temperature?: number;
|
|
9
|
-
maxTokens?: number;
|
|
10
|
-
ttsEnabled?: boolean;
|
|
11
|
-
ttsApiKey?: string;
|
|
12
40
|
defaultVoice?: string;
|
|
13
41
|
speechModel?: string;
|
|
42
|
+
speed?: number;
|
|
43
|
+
vol?: number;
|
|
44
|
+
pitch?: number;
|
|
45
|
+
audioFormat?: string;
|
|
46
|
+
sampleRate?: number;
|
|
47
|
+
bitrate?: number;
|
|
48
|
+
outputFormat?: string;
|
|
49
|
+
languageBoost?: string;
|
|
14
50
|
debug?: boolean;
|
|
51
|
+
voiceCloneEnabled?: boolean;
|
|
52
|
+
cacheEnabled?: boolean;
|
|
53
|
+
cacheDir?: string;
|
|
54
|
+
cacheMaxAge?: number;
|
|
55
|
+
cacheMaxSize?: number;
|
|
15
56
|
}
|
|
16
57
|
export declare const Config: Schema<Config>;
|
|
58
|
+
declare class AudioCacheManager {
|
|
59
|
+
private cacheDir;
|
|
60
|
+
private logger;
|
|
61
|
+
private enabled;
|
|
62
|
+
private maxAge;
|
|
63
|
+
private maxSize;
|
|
64
|
+
constructor(cacheDir: string, logger: any, enabled: boolean, maxAge: number, maxSize: number);
|
|
65
|
+
initialize(): Promise<void>;
|
|
66
|
+
getAudio(text: string, voice: string, format: string): Promise<Buffer | null>;
|
|
67
|
+
saveAudio(buffer: Buffer, text: string, voice: string, format: string): Promise<void>;
|
|
68
|
+
dispose(): void;
|
|
69
|
+
}
|
|
17
70
|
export declare function apply(ctx: Context, config: Config): void;
|
|
71
|
+
export {};
|
package/lib/index.js
CHANGED
|
@@ -1,121 +1,418 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
1
|
+
import { Schema, h } from 'koishi';
|
|
2
|
+
import { Tool } from '@langchain/core/tools';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import * as crypto from 'crypto';
|
|
6
|
+
import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat';
|
|
7
|
+
// --- 辅助函数 ---
|
|
8
|
+
function fuzzyQuery(text, keywords) {
|
|
9
|
+
const lowerText = text.toLowerCase();
|
|
10
|
+
return keywords.some(keyword => lowerText.includes(keyword.toLowerCase()));
|
|
11
|
+
}
|
|
12
|
+
function getMessageContent(content) {
|
|
13
|
+
if (typeof content === 'string')
|
|
14
|
+
return content;
|
|
15
|
+
if (content && typeof content === 'object') {
|
|
16
|
+
return content.text || content.content || JSON.stringify(content);
|
|
17
|
+
}
|
|
18
|
+
return String(content);
|
|
19
|
+
}
|
|
20
|
+
function extractDialogueContent(text) {
|
|
21
|
+
const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0);
|
|
22
|
+
let dialogueContent = '';
|
|
23
|
+
let inDialogue = false;
|
|
24
|
+
for (const line of lines) {
|
|
25
|
+
const isDialogueLine = line.startsWith('"') ||
|
|
26
|
+
line.startsWith("'") ||
|
|
27
|
+
line.includes('说:') ||
|
|
28
|
+
line.match(/^[A-Za-z\u4e00-\u9fff]+[::]/);
|
|
29
|
+
const isNonDialogue = (line.includes('(') && line.includes(')')) ||
|
|
30
|
+
(line.includes('(') && line.includes(')')) ||
|
|
31
|
+
line.match(/^\s*[\[\{【((]/);
|
|
32
|
+
if (isDialogueLine && !isNonDialogue) {
|
|
33
|
+
let cleanLine = line
|
|
34
|
+
.replace(/^["\'"']/, '')
|
|
35
|
+
.replace(/["\'"']$/, '')
|
|
36
|
+
.replace(/^[A-Za-z\u4e00-\u9fff]+[::]\s*/, '')
|
|
37
|
+
.replace(/说:|说道:/g, '')
|
|
38
|
+
.trim();
|
|
39
|
+
if (cleanLine.length > 0) {
|
|
40
|
+
dialogueContent += cleanLine + '。';
|
|
41
|
+
inDialogue = true;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
else if (inDialogue && line.length > 0 && !isNonDialogue) {
|
|
45
|
+
dialogueContent += line + '。';
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (dialogueContent.length > 0)
|
|
49
|
+
return dialogueContent.replace(/。+/g, '。').trim();
|
|
50
|
+
if (text.length <= 150 && !text.match(/[[{【((]/))
|
|
51
|
+
return text;
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
// --- 辅助:构建音频消息元素 ---
|
|
55
|
+
function makeAudioElement(buffer, format) {
|
|
56
|
+
const mimeType = format === 'wav' ? 'audio/wav' : 'audio/mpeg';
|
|
57
|
+
const src = `data:${mimeType};base64,${buffer.toString('base64')}`;
|
|
58
|
+
return h('audio', { src });
|
|
59
|
+
}
|
|
60
|
+
// --- ChatLuna Tool 类 ---
|
|
61
|
+
export class MinimaxVitsTool extends Tool {
|
|
62
|
+
ctx;
|
|
63
|
+
config;
|
|
64
|
+
cacheManager;
|
|
65
|
+
name = 'minimax_tts';
|
|
66
|
+
description = `Use this tool to generate speech/audio from text using MiniMax TTS.
|
|
67
|
+
Input MUST be a JSON string: {"text": "required content", "voice": "optional_id", "speed": 1.0}`;
|
|
68
|
+
constructor(ctx, config, cacheManager) {
|
|
69
|
+
super();
|
|
70
|
+
this.ctx = ctx;
|
|
71
|
+
this.config = config;
|
|
72
|
+
this.cacheManager = cacheManager;
|
|
73
|
+
}
|
|
74
|
+
async _call(input, _runManager, toolConfig) {
|
|
75
|
+
try {
|
|
76
|
+
const session = toolConfig?.configurable?.session;
|
|
77
|
+
if (!session) {
|
|
78
|
+
throw new Error('Session not found in tool config');
|
|
79
|
+
}
|
|
80
|
+
let params = {};
|
|
81
|
+
try {
|
|
82
|
+
params = JSON.parse(input);
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
params = { text: input };
|
|
86
|
+
}
|
|
87
|
+
let text = params.text || input;
|
|
88
|
+
if (typeof text === 'object')
|
|
89
|
+
text = JSON.stringify(text);
|
|
90
|
+
const voiceId = (params.voice || this.config.defaultVoice) ?? 'Chinese_female_gentle';
|
|
91
|
+
const speed = params.speed ?? this.config.speed ?? 1.0;
|
|
92
|
+
const dialogueText = extractDialogueContent(text);
|
|
93
|
+
if (!dialogueText)
|
|
94
|
+
return `未检测到有效对话内容。`;
|
|
95
|
+
const audioBuffer = await generateSpeech(this.ctx, {
|
|
96
|
+
...this.config,
|
|
97
|
+
speed,
|
|
98
|
+
vol: params.vol,
|
|
99
|
+
pitch: params.pitch
|
|
100
|
+
}, dialogueText, voiceId, this.cacheManager);
|
|
101
|
+
if (!audioBuffer)
|
|
102
|
+
return `TTS 生成失败。`;
|
|
103
|
+
await session.send(makeAudioElement(audioBuffer, this.config.audioFormat ?? 'mp3'));
|
|
104
|
+
return `Audio generated and sent.`;
|
|
105
|
+
}
|
|
106
|
+
catch (e) {
|
|
107
|
+
return `Error: ${e.message}`;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
// --- Console Service ---
|
|
112
|
+
class MinimaxVitsService {
|
|
113
|
+
ctx;
|
|
114
|
+
config;
|
|
115
|
+
constructor(ctx, config) {
|
|
116
|
+
this.ctx = ctx;
|
|
117
|
+
this.config = config;
|
|
118
|
+
}
|
|
119
|
+
async testTTS(text, voice, speed) {
|
|
120
|
+
try {
|
|
121
|
+
const audioBuffer = await generateSpeech(this.ctx, {
|
|
122
|
+
...this.config,
|
|
123
|
+
speed: speed ?? 1.0
|
|
124
|
+
}, text, voice || 'Chinese_female_gentle');
|
|
125
|
+
if (audioBuffer) {
|
|
126
|
+
return {
|
|
127
|
+
success: true,
|
|
128
|
+
audio: `data:audio/mpeg;base64,${audioBuffer.toString('base64')}`,
|
|
129
|
+
size: audioBuffer.length
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
return { success: false, error: '生成失败' };
|
|
133
|
+
}
|
|
134
|
+
catch (error) {
|
|
135
|
+
return { success: false, error: error.message };
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
export const name = 'minimax-vits';
|
|
140
|
+
export const Config = Schema.object({
|
|
141
|
+
ttsApiKey: Schema.string().required().description('MiniMax TTS API Key').role('secret'),
|
|
142
|
+
groupId: Schema.string().description('MiniMax Group ID'),
|
|
143
|
+
apiBase: Schema.string().default('https://api.minimax.io/v1').description('API 基础地址'),
|
|
144
|
+
defaultVoice: Schema.string().default('Chinese_female_gentle').description('默认语音 ID'),
|
|
145
|
+
speechModel: Schema.string().default('speech-01-turbo').description('TTS 模型 (推荐 speech-01-turbo)'),
|
|
146
|
+
speed: Schema.number().default(1.0).min(0.5).max(2.0).description('语速'),
|
|
147
|
+
vol: Schema.number().default(1.0).description('音量'),
|
|
148
|
+
pitch: Schema.number().default(0).description('音调'),
|
|
149
|
+
audioFormat: Schema.string().default('mp3').description('音频格式 (mp3, wav)'),
|
|
150
|
+
sampleRate: Schema.number().default(32000).description('采样率'),
|
|
151
|
+
bitrate: Schema.number().default(128000).description('比特率'),
|
|
152
|
+
outputFormat: Schema.string().default('hex').description('API输出编码 (必须是 hex)'),
|
|
153
|
+
languageBoost: Schema.string().default('auto').description('语言增强'),
|
|
154
|
+
debug: Schema.boolean().default(false).description('启用调试日志'),
|
|
155
|
+
voiceCloneEnabled: Schema.boolean().default(false).description('启用语音克隆'),
|
|
156
|
+
cacheEnabled: Schema.boolean().default(true).description('启用本地文件缓存'),
|
|
157
|
+
cacheDir: Schema.string().default('./data/minimax-vits/cache').description('缓存路径'),
|
|
158
|
+
cacheMaxAge: Schema.number().default(3600000).description('缓存有效期(ms)'),
|
|
159
|
+
cacheMaxSize: Schema.number().default(104857600).description('缓存最大体积(bytes)'),
|
|
19
160
|
}).description('MiniMax VITS 配置');
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
161
|
+
// --- 缓存管理器 ---
|
|
162
|
+
class AudioCacheManager {
|
|
163
|
+
cacheDir;
|
|
164
|
+
logger;
|
|
165
|
+
enabled;
|
|
166
|
+
maxAge;
|
|
167
|
+
maxSize;
|
|
168
|
+
constructor(cacheDir, logger, enabled, maxAge, maxSize) {
|
|
169
|
+
this.cacheDir = cacheDir;
|
|
170
|
+
this.logger = logger;
|
|
171
|
+
this.enabled = enabled;
|
|
172
|
+
this.maxAge = maxAge;
|
|
173
|
+
this.maxSize = maxSize;
|
|
174
|
+
}
|
|
175
|
+
async initialize() {
|
|
176
|
+
if (!this.enabled)
|
|
177
|
+
return;
|
|
178
|
+
if (!fs.existsSync(this.cacheDir))
|
|
179
|
+
fs.mkdirSync(this.cacheDir, { recursive: true });
|
|
180
|
+
}
|
|
181
|
+
async getAudio(text, voice, format) {
|
|
182
|
+
if (!this.enabled)
|
|
183
|
+
return null;
|
|
184
|
+
try {
|
|
185
|
+
const hash = crypto.createHash('md5').update(`${text}-${voice}-${format}`).digest('hex');
|
|
186
|
+
const filePath = path.join(this.cacheDir, `${hash}.${format}`);
|
|
187
|
+
if (fs.existsSync(filePath))
|
|
188
|
+
return fs.readFileSync(filePath);
|
|
189
|
+
}
|
|
190
|
+
catch { }
|
|
191
|
+
return null;
|
|
192
|
+
}
|
|
193
|
+
async saveAudio(buffer, text, voice, format) {
|
|
194
|
+
if (!this.enabled || !buffer.length)
|
|
195
|
+
return;
|
|
196
|
+
try {
|
|
197
|
+
const hash = crypto.createHash('md5').update(`${text}-${voice}-${format}`).digest('hex');
|
|
198
|
+
const filePath = path.join(this.cacheDir, `${hash}.${format}`);
|
|
199
|
+
fs.writeFileSync(filePath, buffer);
|
|
200
|
+
}
|
|
201
|
+
catch (e) {
|
|
202
|
+
this.logger.warn('缓存写入失败', e);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
dispose() { }
|
|
206
|
+
}
|
|
207
|
+
// --- 核心功能函数 ---
|
|
208
|
+
async function generateSpeech(ctx, config, text, voice, cacheManager) {
|
|
24
209
|
const logger = ctx.logger('minimax-vits');
|
|
25
|
-
const
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
210
|
+
const format = config.audioFormat ?? 'mp3';
|
|
211
|
+
if (cacheManager) {
|
|
212
|
+
const cached = await cacheManager.getAudio(text, voice, format);
|
|
213
|
+
if (cached) {
|
|
214
|
+
if (config.debug)
|
|
215
|
+
logger.debug('命中本地缓存');
|
|
216
|
+
return cached;
|
|
217
|
+
}
|
|
32
218
|
}
|
|
33
219
|
try {
|
|
34
|
-
const
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
headers
|
|
40
|
-
|
|
41
|
-
|
|
220
|
+
const headers = {
|
|
221
|
+
'Authorization': `Bearer ${config.ttsApiKey}`,
|
|
222
|
+
'Content-Type': 'application/json',
|
|
223
|
+
};
|
|
224
|
+
if (config.groupId)
|
|
225
|
+
headers['GroupId'] = config.groupId;
|
|
226
|
+
const payload = {
|
|
227
|
+
model: config.speechModel ?? 'speech-01-turbo',
|
|
228
|
+
text: text,
|
|
229
|
+
stream: false,
|
|
230
|
+
output_format: 'hex',
|
|
231
|
+
voice_setting: {
|
|
232
|
+
voice_id: voice,
|
|
233
|
+
speed: config.speed ?? 1.0,
|
|
234
|
+
vol: config.vol ?? 1.0,
|
|
235
|
+
pitch: config.pitch ?? 0
|
|
42
236
|
},
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
237
|
+
audio_setting: {
|
|
238
|
+
sample_rate: config.sampleRate ?? 32000,
|
|
239
|
+
bitrate: config.bitrate ?? 128000,
|
|
240
|
+
format: format,
|
|
241
|
+
channel: 1
|
|
242
|
+
}
|
|
243
|
+
};
|
|
244
|
+
if (config.languageBoost && config.languageBoost !== 'auto') {
|
|
245
|
+
payload.language_boost = config.languageBoost;
|
|
47
246
|
}
|
|
48
|
-
|
|
247
|
+
if (config.debug)
|
|
248
|
+
logger.debug(`调用 API: ${config.apiBase}/t2a_v2`);
|
|
249
|
+
const response = await ctx.http.post(`${config.apiBase}/t2a_v2`, payload, { headers, timeout: 60000 });
|
|
250
|
+
if (response?.base_resp && response.base_resp.status_code !== 0) {
|
|
251
|
+
logger.error(`API Error: ${response.base_resp.status_msg}`);
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
const audioHex = response?.data?.audio || response?.audio;
|
|
255
|
+
if (!audioHex) {
|
|
256
|
+
logger.error('API 返回数据中未找到 audio 字段');
|
|
257
|
+
return null;
|
|
258
|
+
}
|
|
259
|
+
const audioBuffer = Buffer.from(audioHex, 'hex');
|
|
260
|
+
if (audioBuffer.length === 0)
|
|
261
|
+
return null;
|
|
262
|
+
if (cacheManager) {
|
|
263
|
+
await cacheManager.saveAudio(audioBuffer, text, voice, format);
|
|
264
|
+
}
|
|
265
|
+
return audioBuffer;
|
|
49
266
|
}
|
|
50
267
|
catch (error) {
|
|
51
|
-
logger.error('TTS
|
|
52
|
-
if (config.debug) {
|
|
53
|
-
logger.error('错误详情:', error.response?.data || error.message);
|
|
54
|
-
}
|
|
268
|
+
logger.error('TTS 调用失败:', error);
|
|
55
269
|
return null;
|
|
56
270
|
}
|
|
57
271
|
}
|
|
58
|
-
|
|
272
|
+
// 修正:返回值类型改为 string | undefined,匹配调用处的类型
|
|
273
|
+
async function uploadFile(ctx, config, filePath, purpose) {
|
|
59
274
|
const logger = ctx.logger('minimax-vits');
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
275
|
+
try {
|
|
276
|
+
const headers = { 'Authorization': `Bearer ${config.ttsApiKey}` };
|
|
277
|
+
if (config.groupId)
|
|
278
|
+
headers['GroupId'] = config.groupId;
|
|
279
|
+
const fileRes = await ctx.http.file(filePath);
|
|
280
|
+
// 如果没有 Blob 类型(Node 低版本),需要 polyfill 或者忽略类型报错
|
|
281
|
+
// 此处假设环境支持,使用 new Blob 包装 buffer
|
|
282
|
+
const blob = new Blob([fileRes.data], { type: fileRes.mime });
|
|
283
|
+
const formData = new FormData();
|
|
284
|
+
formData.append('file', blob, fileRes.filename || 'upload.mp3');
|
|
285
|
+
formData.append('purpose', purpose);
|
|
286
|
+
const response = await ctx.http.post(`${config.apiBase}/files/upload`, formData, { headers });
|
|
287
|
+
return response.file?.file_id || undefined;
|
|
288
|
+
}
|
|
289
|
+
catch (error) {
|
|
290
|
+
logger.error(`文件上传失败:`, error);
|
|
291
|
+
return undefined;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
// 语音克隆逻辑
|
|
295
|
+
async function cloneVoice(ctx, config, fileId, voiceId, promptAudioFileId, promptText, text) {
|
|
296
|
+
const logger = ctx.logger('minimax-vits');
|
|
297
|
+
try {
|
|
298
|
+
const payload = {
|
|
299
|
+
file_id: fileId,
|
|
300
|
+
voice_id: voiceId,
|
|
301
|
+
model: config.speechModel ?? 'speech-01-turbo',
|
|
302
|
+
audio_format: config.audioFormat ?? 'mp3',
|
|
303
|
+
};
|
|
304
|
+
if (text)
|
|
305
|
+
payload.text = text;
|
|
306
|
+
if (promptAudioFileId && promptText) {
|
|
307
|
+
payload.clone_prompt = { prompt_audio: promptAudioFileId, prompt_text: promptText };
|
|
87
308
|
}
|
|
88
|
-
const
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
309
|
+
const headers = {
|
|
310
|
+
'Authorization': `Bearer ${config.ttsApiKey}`,
|
|
311
|
+
'Content-Type': 'application/json',
|
|
312
|
+
};
|
|
313
|
+
if (config.groupId)
|
|
314
|
+
headers['GroupId'] = config.groupId;
|
|
315
|
+
const response = await ctx.http.post(`${config.apiBase}/voice_clone`, payload, { headers, responseType: 'arraybuffer' });
|
|
316
|
+
return Buffer.from(response);
|
|
317
|
+
}
|
|
318
|
+
catch (error) {
|
|
319
|
+
logger.error('语音克隆失败:', error);
|
|
320
|
+
return null;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
// --- 插件入口 ---
|
|
324
|
+
export function apply(ctx, config) {
|
|
325
|
+
const logger = ctx.logger('minimax-vits');
|
|
326
|
+
// 修正:这里使用 config as any 规避类型检查,因为 ChatLunaPlugin 需要的某些配置字段(如 proxy)我们没有定义
|
|
327
|
+
const chatLunaPlugin = new ChatLunaPlugin(ctx, config, 'minimax-vits', false);
|
|
328
|
+
const cacheManager = config.cacheEnabled
|
|
329
|
+
? new AudioCacheManager(config.cacheDir ?? './data/minimax-vits/cache', logger, true, config.cacheMaxAge ?? 3600000, config.cacheMaxSize ?? 104857600)
|
|
330
|
+
: undefined;
|
|
331
|
+
ctx.on('ready', async () => {
|
|
332
|
+
await cacheManager?.initialize();
|
|
333
|
+
if (ctx.console) {
|
|
334
|
+
ctx.console.services['minimax-vits'] = new MinimaxVitsService(ctx, config);
|
|
92
335
|
}
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
336
|
+
try {
|
|
337
|
+
chatLunaPlugin.registerTool('minimax_tts', {
|
|
338
|
+
selector: (history) => history.some((item) => fuzzyQuery(getMessageContent(item.content), ['语音', '朗读', 'tts', 'speak', 'say', 'voice'])),
|
|
339
|
+
createTool: () => new MinimaxVitsTool(ctx, config, cacheManager),
|
|
340
|
+
authorization: () => true
|
|
341
|
+
});
|
|
342
|
+
logger.info('ChatLuna Tool 已注册');
|
|
97
343
|
}
|
|
98
|
-
|
|
99
|
-
logger.
|
|
344
|
+
catch (e) {
|
|
345
|
+
logger.warn('ChatLuna Tool 注册失败', e.message);
|
|
100
346
|
}
|
|
101
|
-
// 发送语音文件(使用 base64 编码)
|
|
102
|
-
return (0, koishi_1.h)('audio', { src: `base64://${audioBuffer.toString('base64')}`, type: 'audio/mpeg' });
|
|
103
347
|
});
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
return
|
|
348
|
+
ctx.on('dispose', () => cacheManager?.dispose());
|
|
349
|
+
// --- 指令注册区 ---
|
|
350
|
+
ctx.command('minivits.test <text:text>', '测试 TTS')
|
|
351
|
+
.option('voice', '-v <voice>')
|
|
352
|
+
.option('speed', '-s <speed>', { type: 'number' })
|
|
353
|
+
.action(async ({ session, options }, text) => {
|
|
354
|
+
if (!text)
|
|
355
|
+
return '请输入文本';
|
|
356
|
+
await session?.send('生成中...');
|
|
357
|
+
const buffer = await generateSpeech(ctx, {
|
|
358
|
+
...config,
|
|
359
|
+
speed: options?.speed ?? config.speed
|
|
360
|
+
}, text, options?.voice || config.defaultVoice || 'Chinese_female_gentle', cacheManager);
|
|
361
|
+
if (!buffer)
|
|
362
|
+
return '失败';
|
|
363
|
+
return makeAudioElement(buffer, config.audioFormat ?? 'mp3');
|
|
364
|
+
});
|
|
365
|
+
ctx.command('minivits.debug', '查看插件配置').action(() => {
|
|
366
|
+
return `API Base: ${config.apiBase}\nModel: ${config.speechModel}\nFormat: ${config.audioFormat}\nDebug: ${config.debug}`;
|
|
120
367
|
});
|
|
368
|
+
if (config.voiceCloneEnabled) {
|
|
369
|
+
ctx.command('minivits.clone.upload <filePath> <purpose>', '上传文件')
|
|
370
|
+
.action(async ({ session }, filePath, purpose) => {
|
|
371
|
+
if (!session || !filePath || !purpose)
|
|
372
|
+
return '缺少参数';
|
|
373
|
+
if (purpose !== 'voice_clone' && purpose !== 'prompt_audio')
|
|
374
|
+
return '用途错误';
|
|
375
|
+
await session.send('上传中...');
|
|
376
|
+
const fileId = await uploadFile(ctx, config, filePath, purpose);
|
|
377
|
+
return fileId ? `上传成功: ${fileId}` : '上传失败';
|
|
378
|
+
});
|
|
379
|
+
ctx.command('minivits.clone.create <fileId> <voiceId> [text:text]', '创建语音克隆')
|
|
380
|
+
.option('promptAudio', '-p <id>')
|
|
381
|
+
.option('promptText', '-t <text>')
|
|
382
|
+
.action(async ({ session, options }, fileId, voiceId, text) => {
|
|
383
|
+
if (!session || !fileId || !voiceId)
|
|
384
|
+
return '缺少参数';
|
|
385
|
+
await session.send('克隆中...');
|
|
386
|
+
const audioBuffer = await cloneVoice(ctx, config, fileId, voiceId, options?.promptAudio, options?.promptText, text);
|
|
387
|
+
if (!audioBuffer)
|
|
388
|
+
return '克隆失败';
|
|
389
|
+
if (text) {
|
|
390
|
+
return makeAudioElement(audioBuffer, config.audioFormat ?? 'mp3');
|
|
391
|
+
}
|
|
392
|
+
return '克隆操作请求已发送';
|
|
393
|
+
});
|
|
394
|
+
ctx.command('minivits.clone.full <sourceFile> <voiceId> <text:text>', '完整克隆流程')
|
|
395
|
+
.option('promptFile', '-p <file>')
|
|
396
|
+
.option('promptText', '-t <text>')
|
|
397
|
+
.action(async ({ session, options }, sourceFile, voiceId, text) => {
|
|
398
|
+
if (!session || !sourceFile || !voiceId || !text)
|
|
399
|
+
return '缺少参数';
|
|
400
|
+
await session.send('1. 上传源文件...');
|
|
401
|
+
const sourceFileId = await uploadFile(ctx, config, sourceFile, 'voice_clone');
|
|
402
|
+
if (!sourceFileId)
|
|
403
|
+
return '源文件上传失败';
|
|
404
|
+
let promptAudioFileId;
|
|
405
|
+
if (options?.promptFile) {
|
|
406
|
+
await session.send('2. 上传提示音频...');
|
|
407
|
+
promptAudioFileId = await uploadFile(ctx, config, options.promptFile, 'prompt_audio');
|
|
408
|
+
if (!promptAudioFileId)
|
|
409
|
+
return '提示音频上传失败';
|
|
410
|
+
}
|
|
411
|
+
await session.send('3. 生成克隆语音...');
|
|
412
|
+
const audioBuffer = await cloneVoice(ctx, config, sourceFileId, voiceId, promptAudioFileId, options?.promptText, text);
|
|
413
|
+
if (!audioBuffer)
|
|
414
|
+
return '语音克隆失败';
|
|
415
|
+
return makeAudioElement(audioBuffer, config.audioFormat ?? 'mp3');
|
|
416
|
+
});
|
|
417
|
+
}
|
|
121
418
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "koishi-plugin-minimax-vits",
|
|
3
3
|
"description": "使用 minimax 国际版生成语音,适配 chatluna",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.2.4",
|
|
5
5
|
"main": "lib/index.js",
|
|
6
6
|
"typings": "lib/index.d.ts",
|
|
7
7
|
"files": [
|
|
@@ -22,6 +22,12 @@
|
|
|
22
22
|
"peerDependencies": {
|
|
23
23
|
"koishi": "^4.18.10"
|
|
24
24
|
},
|
|
25
|
+
"inject": {
|
|
26
|
+
"optional": [
|
|
27
|
+
"console",
|
|
28
|
+
"chatluna"
|
|
29
|
+
]
|
|
30
|
+
},
|
|
25
31
|
"devDependencies": {
|
|
26
32
|
"@types/node": "^20.0.0",
|
|
27
33
|
"typescript": "^5.0.0"
|