koishi-plugin-minimax-vits 1.2.0 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.d.ts +1 -9
- package/lib/index.js +188 -201
- package/package.json +6 -2
package/lib/index.d.ts
CHANGED
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import { Context, Schema } from 'koishi';
|
|
2
2
|
import { Tool } from '@langchain/core/tools';
|
|
3
|
-
interface ChatLunaToolRunnable {
|
|
4
|
-
configurable: {
|
|
5
|
-
session: any;
|
|
6
|
-
};
|
|
7
|
-
}
|
|
8
3
|
declare module '@koishijs/plugin-console' {
|
|
9
4
|
namespace Console {
|
|
10
5
|
interface Services {
|
|
@@ -19,7 +14,7 @@ export declare class MinimaxVitsTool extends Tool {
|
|
|
19
14
|
name: string;
|
|
20
15
|
description: string;
|
|
21
16
|
constructor(ctx: Context, config: Config, cacheManager?: AudioCacheManager | undefined);
|
|
22
|
-
_call(input: string, _runManager: any, toolConfig:
|
|
17
|
+
_call(input: string, _runManager: any, toolConfig: any): Promise<string>;
|
|
23
18
|
}
|
|
24
19
|
declare class MinimaxVitsService {
|
|
25
20
|
private ctx;
|
|
@@ -66,11 +61,8 @@ declare class AudioCacheManager {
|
|
|
66
61
|
private enabled;
|
|
67
62
|
private maxAge;
|
|
68
63
|
private maxSize;
|
|
69
|
-
private cacheMap;
|
|
70
|
-
private cleanupInterval;
|
|
71
64
|
constructor(cacheDir: string, logger: any, enabled: boolean, maxAge: number, maxSize: number);
|
|
72
65
|
initialize(): Promise<void>;
|
|
73
|
-
private startCleanupScheduler;
|
|
74
66
|
getAudio(text: string, voice: string, format: string): Promise<Buffer | null>;
|
|
75
67
|
saveAudio(buffer: Buffer, text: string, voice: string, format: string): Promise<void>;
|
|
76
68
|
dispose(): void;
|
package/lib/index.js
CHANGED
|
@@ -1,53 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
Object.defineProperty(o, k2, desc);
|
|
9
|
-
}) : (function(o, m, k, k2) {
|
|
10
|
-
if (k2 === undefined) k2 = k;
|
|
11
|
-
o[k2] = m[k];
|
|
12
|
-
}));
|
|
13
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
-
}) : function(o, v) {
|
|
16
|
-
o["default"] = v;
|
|
17
|
-
});
|
|
18
|
-
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
-
var ownKeys = function(o) {
|
|
20
|
-
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
-
var ar = [];
|
|
22
|
-
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
-
return ar;
|
|
24
|
-
};
|
|
25
|
-
return ownKeys(o);
|
|
26
|
-
};
|
|
27
|
-
return function (mod) {
|
|
28
|
-
if (mod && mod.__esModule) return mod;
|
|
29
|
-
var result = {};
|
|
30
|
-
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
-
__setModuleDefault(result, mod);
|
|
32
|
-
return result;
|
|
33
|
-
};
|
|
34
|
-
})();
|
|
35
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
-
exports.Config = exports.name = exports.MinimaxVitsTool = void 0;
|
|
37
|
-
exports.apply = apply;
|
|
38
|
-
const koishi_1 = require("koishi");
|
|
39
|
-
const tools_1 = require("@langchain/core/tools");
|
|
40
|
-
const fs = __importStar(require("fs"));
|
|
41
|
-
const path = __importStar(require("path"));
|
|
42
|
-
const crypto = __importStar(require("crypto"));
|
|
43
|
-
// 引入 ChatLuna 服务类
|
|
44
|
-
const chat_1 = require("koishi-plugin-chatluna/services/chat");
|
|
45
|
-
// 辅助函数:模糊查询
|
|
1
|
+
import { Schema, h } from 'koishi';
|
|
2
|
+
import { Tool } from '@langchain/core/tools';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import * as crypto from 'crypto';
|
|
6
|
+
import { ChatLunaPlugin } from 'koishi-plugin-chatluna/services/chat';
|
|
7
|
+
// --- 辅助函数 ---
|
|
46
8
|
function fuzzyQuery(text, keywords) {
|
|
47
9
|
const lowerText = text.toLowerCase();
|
|
48
10
|
return keywords.some(keyword => lowerText.includes(keyword.toLowerCase()));
|
|
49
11
|
}
|
|
50
|
-
// 辅助函数:获取消息文本内容
|
|
51
12
|
function getMessageContent(content) {
|
|
52
13
|
if (typeof content === 'string')
|
|
53
14
|
return content;
|
|
@@ -56,7 +17,6 @@ function getMessageContent(content) {
|
|
|
56
17
|
}
|
|
57
18
|
return String(content);
|
|
58
19
|
}
|
|
59
|
-
// 辅助函数:从长文本中提取对话内容(避免朗读旁白)
|
|
60
20
|
function extractDialogueContent(text) {
|
|
61
21
|
const lines = text.split('\n').map(line => line.trim()).filter(line => line.length > 0);
|
|
62
22
|
let dialogueContent = '';
|
|
@@ -65,7 +25,7 @@ function extractDialogueContent(text) {
|
|
|
65
25
|
const isDialogueLine = line.startsWith('"') ||
|
|
66
26
|
line.startsWith("'") ||
|
|
67
27
|
line.includes('说:') ||
|
|
68
|
-
line.match(/^[A-Za-z\u4e00-\u9fff]+[::]/);
|
|
28
|
+
line.match(/^[A-Za-z\u4e00-\u9fff]+[::]/);
|
|
69
29
|
const isNonDialogue = (line.includes('(') && line.includes(')')) ||
|
|
70
30
|
(line.includes('(') && line.includes(')')) ||
|
|
71
31
|
line.match(/^\s*[\[\{【((]/);
|
|
@@ -85,29 +45,26 @@ function extractDialogueContent(text) {
|
|
|
85
45
|
dialogueContent += line + '。';
|
|
86
46
|
}
|
|
87
47
|
}
|
|
88
|
-
if (dialogueContent.length > 0)
|
|
48
|
+
if (dialogueContent.length > 0)
|
|
89
49
|
return dialogueContent.replace(/。+/g, '。').trim();
|
|
90
|
-
|
|
91
|
-
// 如果没有明显对话标记且文本较短,直接朗读全文
|
|
92
|
-
if (text.length <= 150 && !text.match(/[[{【((]/)) {
|
|
50
|
+
if (text.length <= 150 && !text.match(/[[{【((]/))
|
|
93
51
|
return text;
|
|
94
|
-
}
|
|
95
52
|
return null;
|
|
96
53
|
}
|
|
97
|
-
// ---
|
|
98
|
-
|
|
54
|
+
// --- 辅助:构建音频消息元素 ---
|
|
55
|
+
function makeAudioElement(buffer, format) {
|
|
56
|
+
const mimeType = format === 'wav' ? 'audio/wav' : 'audio/mpeg';
|
|
57
|
+
const src = `data:${mimeType};base64,${buffer.toString('base64')}`;
|
|
58
|
+
return h('audio', { src });
|
|
59
|
+
}
|
|
60
|
+
// --- ChatLuna Tool 类 ---
|
|
61
|
+
export class MinimaxVitsTool extends Tool {
|
|
99
62
|
ctx;
|
|
100
63
|
config;
|
|
101
64
|
cacheManager;
|
|
102
65
|
name = 'minimax_tts';
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
Input MUST be a JSON string with the following keys:
|
|
106
|
-
- text (required): The text content to convert to speech.
|
|
107
|
-
- voice (optional): Voice ID (default is "Chinese_female_gentle").
|
|
108
|
-
- speed (optional): Speed of speech (0.5-2.0).
|
|
109
|
-
|
|
110
|
-
Example input: "{\\"text\\": \\"Hello, how are you?\\", \\"speed\\": 1.1}"`;
|
|
66
|
+
description = `Use this tool to generate speech/audio from text using MiniMax TTS.
|
|
67
|
+
Input MUST be a JSON string: {"text": "required content", "voice": "optional_id", "speed": 1.0}`;
|
|
111
68
|
constructor(ctx, config, cacheManager) {
|
|
112
69
|
super();
|
|
113
70
|
this.ctx = ctx;
|
|
@@ -116,14 +73,15 @@ class MinimaxVitsTool extends tools_1.Tool {
|
|
|
116
73
|
}
|
|
117
74
|
async _call(input, _runManager, toolConfig) {
|
|
118
75
|
try {
|
|
119
|
-
const session = toolConfig
|
|
120
|
-
|
|
76
|
+
const session = toolConfig?.configurable?.session;
|
|
77
|
+
if (!session) {
|
|
78
|
+
throw new Error('Session not found in tool config');
|
|
79
|
+
}
|
|
121
80
|
let params = {};
|
|
122
81
|
try {
|
|
123
82
|
params = JSON.parse(input);
|
|
124
83
|
}
|
|
125
84
|
catch {
|
|
126
|
-
// 容错:如果 LLM 没传 JSON,直接当纯文本处理
|
|
127
85
|
params = { text: input };
|
|
128
86
|
}
|
|
129
87
|
let text = params.text || input;
|
|
@@ -131,32 +89,25 @@ class MinimaxVitsTool extends tools_1.Tool {
|
|
|
131
89
|
text = JSON.stringify(text);
|
|
132
90
|
const voiceId = (params.voice || this.config.defaultVoice) ?? 'Chinese_female_gentle';
|
|
133
91
|
const speed = params.speed ?? this.config.speed ?? 1.0;
|
|
134
|
-
const vol = params.vol ?? this.config.vol ?? 1.0;
|
|
135
|
-
const pitch = params.pitch ?? this.config.pitch ?? 0;
|
|
136
|
-
// 提取纯对话内容,优化朗读体验
|
|
137
92
|
const dialogueText = extractDialogueContent(text);
|
|
138
|
-
if (!dialogueText)
|
|
139
|
-
return
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
await session.send((0, koishi_1.h)('audio', { src: `base64://${audioBuffer.toString('base64')}`, type: mimeType }));
|
|
151
|
-
return `Successfully generated audio for: "${dialogueText}". The audio has been sent to the user.`;
|
|
93
|
+
if (!dialogueText)
|
|
94
|
+
return `未检测到有效对话内容。`;
|
|
95
|
+
const audioBuffer = await generateSpeech(this.ctx, {
|
|
96
|
+
...this.config,
|
|
97
|
+
speed,
|
|
98
|
+
vol: params.vol,
|
|
99
|
+
pitch: params.pitch
|
|
100
|
+
}, dialogueText, voiceId, this.cacheManager);
|
|
101
|
+
if (!audioBuffer)
|
|
102
|
+
return `TTS 生成失败。`;
|
|
103
|
+
await session.send(makeAudioElement(audioBuffer, this.config.audioFormat ?? 'mp3'));
|
|
104
|
+
return `Audio generated and sent.`;
|
|
152
105
|
}
|
|
153
106
|
catch (e) {
|
|
154
|
-
|
|
155
|
-
return `TTS Tool execution failed: ${e.message}`;
|
|
107
|
+
return `Error: ${e.message}`;
|
|
156
108
|
}
|
|
157
109
|
}
|
|
158
110
|
}
|
|
159
|
-
exports.MinimaxVitsTool = MinimaxVitsTool;
|
|
160
111
|
// --- Console Service ---
|
|
161
112
|
class MinimaxVitsService {
|
|
162
113
|
ctx;
|
|
@@ -185,43 +136,28 @@ class MinimaxVitsService {
|
|
|
185
136
|
}
|
|
186
137
|
}
|
|
187
138
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
ttsApiKey:
|
|
191
|
-
groupId:
|
|
192
|
-
apiBase:
|
|
193
|
-
defaultVoice:
|
|
194
|
-
speechModel:
|
|
195
|
-
speed:
|
|
196
|
-
vol:
|
|
197
|
-
pitch:
|
|
198
|
-
audioFormat:
|
|
199
|
-
sampleRate:
|
|
200
|
-
bitrate:
|
|
201
|
-
outputFormat:
|
|
202
|
-
languageBoost:
|
|
203
|
-
debug:
|
|
204
|
-
voiceCloneEnabled:
|
|
205
|
-
cacheEnabled:
|
|
206
|
-
cacheDir:
|
|
207
|
-
cacheMaxAge:
|
|
208
|
-
cacheMaxSize:
|
|
139
|
+
export const name = 'minimax-vits';
|
|
140
|
+
export const Config = Schema.object({
|
|
141
|
+
ttsApiKey: Schema.string().required().description('MiniMax TTS API Key').role('secret'),
|
|
142
|
+
groupId: Schema.string().description('MiniMax Group ID'),
|
|
143
|
+
apiBase: Schema.string().default('https://api.minimax.io/v1').description('API 基础地址'),
|
|
144
|
+
defaultVoice: Schema.string().default('Chinese_female_gentle').description('默认语音 ID'),
|
|
145
|
+
speechModel: Schema.string().default('speech-01-turbo').description('TTS 模型 (推荐 speech-01-turbo)'),
|
|
146
|
+
speed: Schema.number().default(1.0).min(0.5).max(2.0).description('语速'),
|
|
147
|
+
vol: Schema.number().default(1.0).description('音量'),
|
|
148
|
+
pitch: Schema.number().default(0).description('音调'),
|
|
149
|
+
audioFormat: Schema.string().default('mp3').description('音频格式 (mp3, wav)'),
|
|
150
|
+
sampleRate: Schema.number().default(32000).description('采样率'),
|
|
151
|
+
bitrate: Schema.number().default(128000).description('比特率'),
|
|
152
|
+
outputFormat: Schema.string().default('hex').description('API输出编码 (必须是 hex)'),
|
|
153
|
+
languageBoost: Schema.string().default('auto').description('语言增强'),
|
|
154
|
+
debug: Schema.boolean().default(false).description('启用调试日志'),
|
|
155
|
+
voiceCloneEnabled: Schema.boolean().default(false).description('启用语音克隆'),
|
|
156
|
+
cacheEnabled: Schema.boolean().default(true).description('启用本地文件缓存'),
|
|
157
|
+
cacheDir: Schema.string().default('./data/minimax-vits/cache').description('缓存路径'),
|
|
158
|
+
cacheMaxAge: Schema.number().default(3600000).description('缓存有效期(ms)'),
|
|
159
|
+
cacheMaxSize: Schema.number().default(104857600).description('缓存最大体积(bytes)'),
|
|
209
160
|
}).description('MiniMax VITS 配置');
|
|
210
|
-
// --- 音频处理辅助函数 ---
|
|
211
|
-
async function decodeAudioFromHex(hexString, logger) {
|
|
212
|
-
try {
|
|
213
|
-
if (!hexString)
|
|
214
|
-
return null;
|
|
215
|
-
const buffer = Buffer.from(hexString, 'hex');
|
|
216
|
-
if (buffer.length === 0)
|
|
217
|
-
return null;
|
|
218
|
-
return buffer;
|
|
219
|
-
}
|
|
220
|
-
catch (e) {
|
|
221
|
-
logger.error('Hex 解码失败:', e.message);
|
|
222
|
-
return null;
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
161
|
// --- 缓存管理器 ---
|
|
226
162
|
class AudioCacheManager {
|
|
227
163
|
cacheDir;
|
|
@@ -229,8 +165,6 @@ class AudioCacheManager {
|
|
|
229
165
|
enabled;
|
|
230
166
|
maxAge;
|
|
231
167
|
maxSize;
|
|
232
|
-
cacheMap = new Map();
|
|
233
|
-
cleanupInterval = null;
|
|
234
168
|
constructor(cacheDir, logger, enabled, maxAge, maxSize) {
|
|
235
169
|
this.cacheDir = cacheDir;
|
|
236
170
|
this.logger = logger;
|
|
@@ -241,17 +175,8 @@ class AudioCacheManager {
|
|
|
241
175
|
async initialize() {
|
|
242
176
|
if (!this.enabled)
|
|
243
177
|
return;
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
fs.mkdirSync(this.cacheDir, { recursive: true });
|
|
247
|
-
this.startCleanupScheduler();
|
|
248
|
-
}
|
|
249
|
-
catch (e) {
|
|
250
|
-
this.logger.warn('缓存初始化失败', e);
|
|
251
|
-
}
|
|
252
|
-
}
|
|
253
|
-
startCleanupScheduler() {
|
|
254
|
-
this.cleanupInterval = setInterval(() => { }, 600000);
|
|
178
|
+
if (!fs.existsSync(this.cacheDir))
|
|
179
|
+
fs.mkdirSync(this.cacheDir, { recursive: true });
|
|
255
180
|
}
|
|
256
181
|
async getAudio(text, voice, format) {
|
|
257
182
|
if (!this.enabled)
|
|
@@ -277,22 +202,17 @@ class AudioCacheManager {
|
|
|
277
202
|
this.logger.warn('缓存写入失败', e);
|
|
278
203
|
}
|
|
279
204
|
}
|
|
280
|
-
dispose() {
|
|
281
|
-
if (this.cleanupInterval)
|
|
282
|
-
clearInterval(this.cleanupInterval);
|
|
283
|
-
}
|
|
205
|
+
dispose() { }
|
|
284
206
|
}
|
|
285
|
-
// ---
|
|
207
|
+
// --- 核心功能函数 ---
|
|
286
208
|
async function generateSpeech(ctx, config, text, voice, cacheManager) {
|
|
287
209
|
const logger = ctx.logger('minimax-vits');
|
|
288
|
-
const apiBase = config.apiBase ?? 'https://api.minimax.io/v1';
|
|
289
210
|
const format = config.audioFormat ?? 'mp3';
|
|
290
|
-
// 1. 查缓存
|
|
291
211
|
if (cacheManager) {
|
|
292
212
|
const cached = await cacheManager.getAudio(text, voice, format);
|
|
293
213
|
if (cached) {
|
|
294
214
|
if (config.debug)
|
|
295
|
-
logger.debug('
|
|
215
|
+
logger.debug('命中本地缓存');
|
|
296
216
|
return cached;
|
|
297
217
|
}
|
|
298
218
|
}
|
|
@@ -303,12 +223,11 @@ async function generateSpeech(ctx, config, text, voice, cacheManager) {
|
|
|
303
223
|
};
|
|
304
224
|
if (config.groupId)
|
|
305
225
|
headers['GroupId'] = config.groupId;
|
|
306
|
-
// 2. 构造符合 T2A V2 文档的 Payload
|
|
307
226
|
const payload = {
|
|
308
227
|
model: config.speechModel ?? 'speech-01-turbo',
|
|
309
228
|
text: text,
|
|
310
|
-
stream: false,
|
|
311
|
-
output_format:
|
|
229
|
+
stream: false,
|
|
230
|
+
output_format: 'hex',
|
|
312
231
|
voice_setting: {
|
|
313
232
|
voice_id: voice,
|
|
314
233
|
speed: config.speed ?? 1.0,
|
|
@@ -325,107 +244,175 @@ async function generateSpeech(ctx, config, text, voice, cacheManager) {
|
|
|
325
244
|
if (config.languageBoost && config.languageBoost !== 'auto') {
|
|
326
245
|
payload.language_boost = config.languageBoost;
|
|
327
246
|
}
|
|
328
|
-
if (config.debug)
|
|
329
|
-
logger.debug(
|
|
330
|
-
|
|
331
|
-
}
|
|
332
|
-
// 3. 发起请求
|
|
333
|
-
const response = await ctx.http.post(`${apiBase}/t2a_v2`, payload, { headers, timeout: 60000 });
|
|
334
|
-
// 4. 检查响应状态
|
|
247
|
+
if (config.debug)
|
|
248
|
+
logger.debug(`调用 API: ${config.apiBase}/t2a_v2`);
|
|
249
|
+
const response = await ctx.http.post(`${config.apiBase}/t2a_v2`, payload, { headers, timeout: 60000 });
|
|
335
250
|
if (response?.base_resp && response.base_resp.status_code !== 0) {
|
|
336
|
-
logger.error(`API Error:
|
|
251
|
+
logger.error(`API Error: ${response.base_resp.status_msg}`);
|
|
337
252
|
return null;
|
|
338
253
|
}
|
|
339
|
-
// 5. 解析音频数据 (优先 data.audio,兼容部分 SDK 的扁平化处理)
|
|
340
254
|
const audioHex = response?.data?.audio || response?.audio;
|
|
341
255
|
if (!audioHex) {
|
|
342
|
-
logger.error('API
|
|
343
|
-
if (config.debug)
|
|
344
|
-
logger.debug('Response:', JSON.stringify(response));
|
|
256
|
+
logger.error('API 返回数据中未找到 audio 字段');
|
|
345
257
|
return null;
|
|
346
258
|
}
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
if (
|
|
259
|
+
const audioBuffer = Buffer.from(audioHex, 'hex');
|
|
260
|
+
if (audioBuffer.length === 0)
|
|
261
|
+
return null;
|
|
262
|
+
if (cacheManager) {
|
|
351
263
|
await cacheManager.saveAudio(audioBuffer, text, voice, format);
|
|
352
264
|
}
|
|
353
265
|
return audioBuffer;
|
|
354
266
|
}
|
|
355
267
|
catch (error) {
|
|
356
|
-
logger.error('TTS
|
|
357
|
-
if (error.response?.data) {
|
|
358
|
-
logger.error('API Error Detail:', JSON.stringify(error.response.data));
|
|
359
|
-
}
|
|
268
|
+
logger.error('TTS 调用失败:', error);
|
|
360
269
|
return null;
|
|
361
270
|
}
|
|
362
271
|
}
|
|
363
|
-
//
|
|
272
|
+
// 修正:返回值类型改为 string | undefined,匹配调用处的类型
|
|
364
273
|
async function uploadFile(ctx, config, filePath, purpose) {
|
|
365
|
-
const
|
|
366
|
-
|
|
367
|
-
headers
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
274
|
+
const logger = ctx.logger('minimax-vits');
|
|
275
|
+
try {
|
|
276
|
+
const headers = { 'Authorization': `Bearer ${config.ttsApiKey}` };
|
|
277
|
+
if (config.groupId)
|
|
278
|
+
headers['GroupId'] = config.groupId;
|
|
279
|
+
const fileRes = await ctx.http.file(filePath);
|
|
280
|
+
// 如果没有 Blob 类型(Node 低版本),需要 polyfill 或者忽略类型报错
|
|
281
|
+
// 此处假设环境支持,使用 new Blob 包装 buffer
|
|
282
|
+
const blob = new Blob([fileRes.data], { type: fileRes.mime });
|
|
283
|
+
const formData = new FormData();
|
|
284
|
+
formData.append('file', blob, fileRes.filename || 'upload.mp3');
|
|
285
|
+
formData.append('purpose', purpose);
|
|
286
|
+
const response = await ctx.http.post(`${config.apiBase}/files/upload`, formData, { headers });
|
|
287
|
+
return response.file?.file_id || undefined;
|
|
288
|
+
}
|
|
289
|
+
catch (error) {
|
|
290
|
+
logger.error(`文件上传失败:`, error);
|
|
291
|
+
return undefined;
|
|
292
|
+
}
|
|
373
293
|
}
|
|
374
|
-
//
|
|
375
|
-
async function cloneVoice(ctx, config, fileId, voiceId, text) {
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
294
|
+
// 语音克隆逻辑
|
|
295
|
+
async function cloneVoice(ctx, config, fileId, voiceId, promptAudioFileId, promptText, text) {
|
|
296
|
+
const logger = ctx.logger('minimax-vits');
|
|
297
|
+
try {
|
|
298
|
+
const payload = {
|
|
299
|
+
file_id: fileId,
|
|
300
|
+
voice_id: voiceId,
|
|
301
|
+
model: config.speechModel ?? 'speech-01-turbo',
|
|
302
|
+
audio_format: config.audioFormat ?? 'mp3',
|
|
303
|
+
};
|
|
304
|
+
if (text)
|
|
305
|
+
payload.text = text;
|
|
306
|
+
if (promptAudioFileId && promptText) {
|
|
307
|
+
payload.clone_prompt = { prompt_audio: promptAudioFileId, prompt_text: promptText };
|
|
308
|
+
}
|
|
309
|
+
const headers = {
|
|
310
|
+
'Authorization': `Bearer ${config.ttsApiKey}`,
|
|
311
|
+
'Content-Type': 'application/json',
|
|
312
|
+
};
|
|
313
|
+
if (config.groupId)
|
|
314
|
+
headers['GroupId'] = config.groupId;
|
|
315
|
+
const response = await ctx.http.post(`${config.apiBase}/voice_clone`, payload, { headers, responseType: 'arraybuffer' });
|
|
316
|
+
return Buffer.from(response);
|
|
317
|
+
}
|
|
318
|
+
catch (error) {
|
|
319
|
+
logger.error('语音克隆失败:', error);
|
|
320
|
+
return null;
|
|
321
|
+
}
|
|
389
322
|
}
|
|
390
323
|
// --- 插件入口 ---
|
|
391
|
-
function apply(ctx, config) {
|
|
324
|
+
export function apply(ctx, config) {
|
|
392
325
|
const logger = ctx.logger('minimax-vits');
|
|
393
|
-
//
|
|
394
|
-
const chatLunaPlugin = new
|
|
326
|
+
// 修正:这里使用 config as any 规避类型检查,因为 ChatLunaPlugin 需要的某些配置字段(如 proxy)我们没有定义
|
|
327
|
+
const chatLunaPlugin = new ChatLunaPlugin(ctx, config, 'minimax-vits', false);
|
|
395
328
|
const cacheManager = config.cacheEnabled
|
|
396
329
|
? new AudioCacheManager(config.cacheDir ?? './data/minimax-vits/cache', logger, true, config.cacheMaxAge ?? 3600000, config.cacheMaxSize ?? 104857600)
|
|
397
330
|
: undefined;
|
|
398
331
|
ctx.on('ready', async () => {
|
|
399
332
|
await cacheManager?.initialize();
|
|
400
|
-
// 2. 注册控制台服务
|
|
401
333
|
if (ctx.console) {
|
|
402
334
|
ctx.console.services['minimax-vits'] = new MinimaxVitsService(ctx, config);
|
|
403
335
|
}
|
|
404
|
-
// 3. 注册 ChatLuna 工具
|
|
405
336
|
try {
|
|
406
337
|
chatLunaPlugin.registerTool('minimax_tts', {
|
|
407
338
|
selector: (history) => history.some((item) => fuzzyQuery(getMessageContent(item.content), ['语音', '朗读', 'tts', 'speak', 'say', 'voice'])),
|
|
408
339
|
createTool: () => new MinimaxVitsTool(ctx, config, cacheManager),
|
|
409
340
|
authorization: () => true
|
|
410
341
|
});
|
|
411
|
-
logger.info('ChatLuna Tool
|
|
342
|
+
logger.info('ChatLuna Tool 已注册');
|
|
412
343
|
}
|
|
413
344
|
catch (e) {
|
|
414
|
-
logger.warn('ChatLuna Tool 注册失败
|
|
345
|
+
logger.warn('ChatLuna Tool 注册失败', e.message);
|
|
415
346
|
}
|
|
416
347
|
});
|
|
417
348
|
ctx.on('dispose', () => cacheManager?.dispose());
|
|
418
|
-
//
|
|
349
|
+
// --- 指令注册区 ---
|
|
419
350
|
ctx.command('minivits.test <text:text>', '测试 TTS')
|
|
420
351
|
.option('voice', '-v <voice>')
|
|
352
|
+
.option('speed', '-s <speed>', { type: 'number' })
|
|
421
353
|
.action(async ({ session, options }, text) => {
|
|
422
354
|
if (!text)
|
|
423
355
|
return '请输入文本';
|
|
424
356
|
await session?.send('生成中...');
|
|
425
|
-
const buffer = await generateSpeech(ctx,
|
|
357
|
+
const buffer = await generateSpeech(ctx, {
|
|
358
|
+
...config,
|
|
359
|
+
speed: options?.speed ?? config.speed
|
|
360
|
+
}, text, options?.voice || config.defaultVoice || 'Chinese_female_gentle', cacheManager);
|
|
426
361
|
if (!buffer)
|
|
427
362
|
return '失败';
|
|
428
|
-
return (
|
|
363
|
+
return makeAudioElement(buffer, config.audioFormat ?? 'mp3');
|
|
364
|
+
});
|
|
365
|
+
ctx.command('minivits.debug', '查看插件配置').action(() => {
|
|
366
|
+
return `API Base: ${config.apiBase}\nModel: ${config.speechModel}\nFormat: ${config.audioFormat}\nDebug: ${config.debug}`;
|
|
429
367
|
});
|
|
430
|
-
|
|
368
|
+
if (config.voiceCloneEnabled) {
|
|
369
|
+
ctx.command('minivits.clone.upload <filePath> <purpose>', '上传文件')
|
|
370
|
+
.action(async ({ session }, filePath, purpose) => {
|
|
371
|
+
if (!session || !filePath || !purpose)
|
|
372
|
+
return '缺少参数';
|
|
373
|
+
if (purpose !== 'voice_clone' && purpose !== 'prompt_audio')
|
|
374
|
+
return '用途错误';
|
|
375
|
+
await session.send('上传中...');
|
|
376
|
+
const fileId = await uploadFile(ctx, config, filePath, purpose);
|
|
377
|
+
return fileId ? `上传成功: ${fileId}` : '上传失败';
|
|
378
|
+
});
|
|
379
|
+
ctx.command('minivits.clone.create <fileId> <voiceId> [text:text]', '创建语音克隆')
|
|
380
|
+
.option('promptAudio', '-p <id>')
|
|
381
|
+
.option('promptText', '-t <text>')
|
|
382
|
+
.action(async ({ session, options }, fileId, voiceId, text) => {
|
|
383
|
+
if (!session || !fileId || !voiceId)
|
|
384
|
+
return '缺少参数';
|
|
385
|
+
await session.send('克隆中...');
|
|
386
|
+
const audioBuffer = await cloneVoice(ctx, config, fileId, voiceId, options?.promptAudio, options?.promptText, text);
|
|
387
|
+
if (!audioBuffer)
|
|
388
|
+
return '克隆失败';
|
|
389
|
+
if (text) {
|
|
390
|
+
return makeAudioElement(audioBuffer, config.audioFormat ?? 'mp3');
|
|
391
|
+
}
|
|
392
|
+
return '克隆操作请求已发送';
|
|
393
|
+
});
|
|
394
|
+
ctx.command('minivits.clone.full <sourceFile> <voiceId> <text:text>', '完整克隆流程')
|
|
395
|
+
.option('promptFile', '-p <file>')
|
|
396
|
+
.option('promptText', '-t <text>')
|
|
397
|
+
.action(async ({ session, options }, sourceFile, voiceId, text) => {
|
|
398
|
+
if (!session || !sourceFile || !voiceId || !text)
|
|
399
|
+
return '缺少参数';
|
|
400
|
+
await session.send('1. 上传源文件...');
|
|
401
|
+
const sourceFileId = await uploadFile(ctx, config, sourceFile, 'voice_clone');
|
|
402
|
+
if (!sourceFileId)
|
|
403
|
+
return '源文件上传失败';
|
|
404
|
+
let promptAudioFileId;
|
|
405
|
+
if (options?.promptFile) {
|
|
406
|
+
await session.send('2. 上传提示音频...');
|
|
407
|
+
promptAudioFileId = await uploadFile(ctx, config, options.promptFile, 'prompt_audio');
|
|
408
|
+
if (!promptAudioFileId)
|
|
409
|
+
return '提示音频上传失败';
|
|
410
|
+
}
|
|
411
|
+
await session.send('3. 生成克隆语音...');
|
|
412
|
+
const audioBuffer = await cloneVoice(ctx, config, sourceFileId, voiceId, promptAudioFileId, options?.promptText, text);
|
|
413
|
+
if (!audioBuffer)
|
|
414
|
+
return '语音克隆失败';
|
|
415
|
+
return makeAudioElement(audioBuffer, config.audioFormat ?? 'mp3');
|
|
416
|
+
});
|
|
417
|
+
}
|
|
431
418
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "koishi-plugin-minimax-vits",
|
|
3
3
|
"description": "使用 minimax 国际版生成语音,适配 chatluna",
|
|
4
|
-
"version": "1.2.
|
|
4
|
+
"version": "1.2.7",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"main": "lib/index.js",
|
|
6
7
|
"typings": "lib/index.d.ts",
|
|
7
8
|
"files": [
|
|
@@ -23,7 +24,10 @@
|
|
|
23
24
|
"koishi": "^4.18.10"
|
|
24
25
|
},
|
|
25
26
|
"inject": {
|
|
26
|
-
"optional": [
|
|
27
|
+
"optional": [
|
|
28
|
+
"console",
|
|
29
|
+
"chatluna"
|
|
30
|
+
]
|
|
27
31
|
},
|
|
28
32
|
"devDependencies": {
|
|
29
33
|
"@types/node": "^20.0.0",
|