@ryantest/openclaw-qqbot 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +483 -0
- package/README.zh.md +478 -0
- package/bin/qqbot-cli.js +243 -0
- package/clawdbot.plugin.json +16 -0
- package/dist/index.d.ts +17 -0
- package/dist/index.js +26 -0
- package/dist/src/admin-resolver.d.ts +27 -0
- package/dist/src/admin-resolver.js +122 -0
- package/dist/src/api.d.ts +156 -0
- package/dist/src/api.js +599 -0
- package/dist/src/channel.d.ts +11 -0
- package/dist/src/channel.js +354 -0
- package/dist/src/config.d.ts +25 -0
- package/dist/src/config.js +161 -0
- package/dist/src/credential-backup.d.ts +31 -0
- package/dist/src/credential-backup.js +66 -0
- package/dist/src/gateway.d.ts +18 -0
- package/dist/src/gateway.js +1265 -0
- package/dist/src/image-server.d.ts +68 -0
- package/dist/src/image-server.js +462 -0
- package/dist/src/inbound-attachments.d.ts +58 -0
- package/dist/src/inbound-attachments.js +234 -0
- package/dist/src/known-users.d.ts +100 -0
- package/dist/src/known-users.js +263 -0
- package/dist/src/message-queue.d.ts +50 -0
- package/dist/src/message-queue.js +115 -0
- package/dist/src/onboarding.d.ts +10 -0
- package/dist/src/onboarding.js +203 -0
- package/dist/src/outbound-deliver.d.ts +48 -0
- package/dist/src/outbound-deliver.js +462 -0
- package/dist/src/outbound.d.ts +203 -0
- package/dist/src/outbound.js +1102 -0
- package/dist/src/proactive.d.ts +170 -0
- package/dist/src/proactive.js +399 -0
- package/dist/src/ref-index-store.d.ts +70 -0
- package/dist/src/ref-index-store.js +273 -0
- package/dist/src/reply-dispatcher.d.ts +35 -0
- package/dist/src/reply-dispatcher.js +311 -0
- package/dist/src/runtime.d.ts +3 -0
- package/dist/src/runtime.js +10 -0
- package/dist/src/session-store.d.ts +52 -0
- package/dist/src/session-store.js +254 -0
- package/dist/src/slash-commands.d.ts +71 -0
- package/dist/src/slash-commands.js +1179 -0
- package/dist/src/startup-greeting.d.ts +30 -0
- package/dist/src/startup-greeting.js +78 -0
- package/dist/src/stt.d.ts +21 -0
- package/dist/src/stt.js +70 -0
- package/dist/src/tools/channel.d.ts +16 -0
- package/dist/src/tools/channel.js +234 -0
- package/dist/src/tools/remind.d.ts +2 -0
- package/dist/src/tools/remind.js +247 -0
- package/dist/src/types.d.ts +175 -0
- package/dist/src/types.js +1 -0
- package/dist/src/typing-keepalive.d.ts +27 -0
- package/dist/src/typing-keepalive.js +64 -0
- package/dist/src/update-checker.d.ts +34 -0
- package/dist/src/update-checker.js +166 -0
- package/dist/src/user-messages.d.ts +8 -0
- package/dist/src/user-messages.js +8 -0
- package/dist/src/utils/audio-convert.d.ts +89 -0
- package/dist/src/utils/audio-convert.js +704 -0
- package/dist/src/utils/file-utils.d.ts +55 -0
- package/dist/src/utils/file-utils.js +150 -0
- package/dist/src/utils/image-size.d.ts +51 -0
- package/dist/src/utils/image-size.js +234 -0
- package/dist/src/utils/media-tags.d.ts +14 -0
- package/dist/src/utils/media-tags.js +164 -0
- package/dist/src/utils/payload.d.ts +112 -0
- package/dist/src/utils/payload.js +186 -0
- package/dist/src/utils/platform.d.ts +137 -0
- package/dist/src/utils/platform.js +390 -0
- package/dist/src/utils/text-parsing.d.ts +32 -0
- package/dist/src/utils/text-parsing.js +80 -0
- package/dist/src/utils/upload-cache.d.ts +34 -0
- package/dist/src/utils/upload-cache.js +93 -0
- package/index.ts +31 -0
- package/moltbot.plugin.json +16 -0
- package/node_modules/@eshaz/web-worker/LICENSE +201 -0
- package/node_modules/@eshaz/web-worker/README.md +134 -0
- package/node_modules/@eshaz/web-worker/browser.js +17 -0
- package/node_modules/@eshaz/web-worker/cjs/browser.js +16 -0
- package/node_modules/@eshaz/web-worker/cjs/node.js +219 -0
- package/node_modules/@eshaz/web-worker/index.d.ts +4 -0
- package/node_modules/@eshaz/web-worker/node.js +223 -0
- package/node_modules/@eshaz/web-worker/package.json +54 -0
- package/node_modules/@wasm-audio-decoders/common/index.js +5 -0
- package/node_modules/@wasm-audio-decoders/common/package.json +36 -0
- package/node_modules/@wasm-audio-decoders/common/src/WASMAudioDecoderCommon.js +231 -0
- package/node_modules/@wasm-audio-decoders/common/src/WASMAudioDecoderWorker.js +129 -0
- package/node_modules/@wasm-audio-decoders/common/src/puff/README +67 -0
- package/node_modules/@wasm-audio-decoders/common/src/puff/build_puff.js +31 -0
- package/node_modules/@wasm-audio-decoders/common/src/puff/puff.c +863 -0
- package/node_modules/@wasm-audio-decoders/common/src/puff/puff.h +35 -0
- package/node_modules/@wasm-audio-decoders/common/src/utilities.js +3 -0
- package/node_modules/@wasm-audio-decoders/common/types.d.ts +7 -0
- package/node_modules/mpg123-decoder/README.md +265 -0
- package/node_modules/mpg123-decoder/dist/mpg123-decoder.min.js +185 -0
- package/node_modules/mpg123-decoder/dist/mpg123-decoder.min.js.map +1 -0
- package/node_modules/mpg123-decoder/index.js +8 -0
- package/node_modules/mpg123-decoder/package.json +58 -0
- package/node_modules/mpg123-decoder/src/EmscriptenWasm.js +464 -0
- package/node_modules/mpg123-decoder/src/MPEGDecoder.js +200 -0
- package/node_modules/mpg123-decoder/src/MPEGDecoderWebWorker.js +21 -0
- package/node_modules/mpg123-decoder/types.d.ts +30 -0
- package/node_modules/silk-wasm/LICENSE +21 -0
- package/node_modules/silk-wasm/README.md +85 -0
- package/node_modules/silk-wasm/lib/index.cjs +16 -0
- package/node_modules/silk-wasm/lib/index.d.ts +70 -0
- package/node_modules/silk-wasm/lib/index.mjs +16 -0
- package/node_modules/silk-wasm/lib/silk.wasm +0 -0
- package/node_modules/silk-wasm/lib/utils.d.ts +4 -0
- package/node_modules/silk-wasm/package.json +39 -0
- package/node_modules/simple-yenc/.github/FUNDING.yml +1 -0
- package/node_modules/simple-yenc/.prettierignore +1 -0
- package/node_modules/simple-yenc/LICENSE +7 -0
- package/node_modules/simple-yenc/README.md +163 -0
- package/node_modules/simple-yenc/dist/esm.js +1 -0
- package/node_modules/simple-yenc/dist/index.js +1 -0
- package/node_modules/simple-yenc/package.json +50 -0
- package/node_modules/simple-yenc/rollup.config.js +27 -0
- package/node_modules/simple-yenc/src/simple-yenc.js +302 -0
- package/node_modules/ws/LICENSE +20 -0
- package/node_modules/ws/README.md +548 -0
- package/node_modules/ws/browser.js +8 -0
- package/node_modules/ws/index.js +13 -0
- package/node_modules/ws/lib/buffer-util.js +131 -0
- package/node_modules/ws/lib/constants.js +19 -0
- package/node_modules/ws/lib/event-target.js +292 -0
- package/node_modules/ws/lib/extension.js +203 -0
- package/node_modules/ws/lib/limiter.js +55 -0
- package/node_modules/ws/lib/permessage-deflate.js +528 -0
- package/node_modules/ws/lib/receiver.js +706 -0
- package/node_modules/ws/lib/sender.js +602 -0
- package/node_modules/ws/lib/stream.js +161 -0
- package/node_modules/ws/lib/subprotocol.js +62 -0
- package/node_modules/ws/lib/validation.js +152 -0
- package/node_modules/ws/lib/websocket-server.js +554 -0
- package/node_modules/ws/lib/websocket.js +1393 -0
- package/node_modules/ws/package.json +69 -0
- package/node_modules/ws/wrapper.mjs +8 -0
- package/openclaw.plugin.json +16 -0
- package/package.json +76 -0
- package/scripts/cleanup-legacy-plugins.sh +124 -0
- package/scripts/proactive-api-server.ts +369 -0
- package/scripts/send-proactive.ts +293 -0
- package/scripts/set-markdown.sh +156 -0
- package/scripts/test-sendmedia.ts +116 -0
- package/scripts/upgrade-via-alt-pkg.sh +307 -0
- package/scripts/upgrade-via-npm.ps1 +296 -0
- package/scripts/upgrade-via-npm.sh +301 -0
- package/scripts/upgrade-via-source.sh +774 -0
- package/skills/qqbot-channel/SKILL.md +263 -0
- package/skills/qqbot-channel/references/api_references.md +521 -0
- package/skills/qqbot-media/SKILL.md +56 -0
- package/skills/qqbot-remind/SKILL.md +149 -0
- package/src/admin-resolver.ts +140 -0
- package/src/api.ts +819 -0
- package/src/bot-logs-2026-03-21T11-21-47(2).txt +46 -0
- package/src/channel.ts +381 -0
- package/src/config.ts +187 -0
- package/src/credential-backup.ts +72 -0
- package/src/gateway.log +43 -0
- package/src/gateway.ts +1404 -0
- package/src/image-server.ts +539 -0
- package/src/inbound-attachments.ts +304 -0
- package/src/known-users.ts +353 -0
- package/src/message-queue.ts +169 -0
- package/src/onboarding.ts +274 -0
- package/src/openclaw-2026-03-21.log +3729 -0
- package/src/openclaw-plugin-sdk.d.ts +522 -0
- package/src/outbound-deliver.ts +552 -0
- package/src/outbound.ts +1266 -0
- package/src/proactive.ts +530 -0
- package/src/ref-index-store.ts +357 -0
- package/src/reply-dispatcher.ts +334 -0
- package/src/runtime.ts +14 -0
- package/src/session-store.ts +303 -0
- package/src/slash-commands.ts +1305 -0
- package/src/startup-greeting.ts +98 -0
- package/src/stt.ts +86 -0
- package/src/tools/channel.ts +281 -0
- package/src/tools/remind.ts +296 -0
- package/src/types.ts +183 -0
- package/src/typing-keepalive.ts +59 -0
- package/src/update-checker.ts +179 -0
- package/src/user-messages.ts +7 -0
- package/src/utils/audio-convert.ts +803 -0
- package/src/utils/file-utils.ts +167 -0
- package/src/utils/image-size.ts +266 -0
- package/src/utils/media-tags.ts +182 -0
- package/src/utils/payload.ts +265 -0
- package/src/utils/platform.ts +435 -0
- package/src/utils/text-parsing.ts +82 -0
- package/src/utils/upload-cache.ts +128 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import * as fs from "node:fs";
|
|
2
|
+
import * as path from "node:path";
|
|
3
|
+
import { execFile } from "node:child_process";
|
|
4
|
+
import { decode, encode, isSilk } from "silk-wasm";
|
|
5
|
+
import { detectFfmpeg, isWindows } from "./platform.js";
|
|
6
|
+
/**
|
|
7
|
+
* 检查文件是否为 SILK 格式(QQ/微信语音常用格式)
|
|
8
|
+
* QQ 语音文件通常以 .amr 扩展名保存,但实际编码可能是 SILK v3
|
|
9
|
+
* SILK 文件头部标识: 0x02 "#!SILK_V3"
|
|
10
|
+
*/
|
|
11
|
+
function isSilkFile(filePath) {
|
|
12
|
+
try {
|
|
13
|
+
const buf = fs.readFileSync(filePath);
|
|
14
|
+
return isSilk(new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength));
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* 将 PCM (s16le) 数据封装为 WAV 文件格式
|
|
22
|
+
* WAV = 44 字节 RIFF 头 + PCM 原始数据
|
|
23
|
+
*/
|
|
24
|
+
function pcmToWav(pcmData, sampleRate, channels = 1, bitsPerSample = 16) {
|
|
25
|
+
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
26
|
+
const blockAlign = channels * (bitsPerSample / 8);
|
|
27
|
+
const dataSize = pcmData.length;
|
|
28
|
+
const headerSize = 44;
|
|
29
|
+
const fileSize = headerSize + dataSize;
|
|
30
|
+
const buffer = Buffer.alloc(fileSize);
|
|
31
|
+
// RIFF header
|
|
32
|
+
buffer.write("RIFF", 0);
|
|
33
|
+
buffer.writeUInt32LE(fileSize - 8, 4);
|
|
34
|
+
buffer.write("WAVE", 8);
|
|
35
|
+
// fmt sub-chunk
|
|
36
|
+
buffer.write("fmt ", 12);
|
|
37
|
+
buffer.writeUInt32LE(16, 16); // sub-chunk size
|
|
38
|
+
buffer.writeUInt16LE(1, 20); // PCM format
|
|
39
|
+
buffer.writeUInt16LE(channels, 22);
|
|
40
|
+
buffer.writeUInt32LE(sampleRate, 24);
|
|
41
|
+
buffer.writeUInt32LE(byteRate, 28);
|
|
42
|
+
buffer.writeUInt16LE(blockAlign, 32);
|
|
43
|
+
buffer.writeUInt16LE(bitsPerSample, 34);
|
|
44
|
+
// data sub-chunk
|
|
45
|
+
buffer.write("data", 36);
|
|
46
|
+
buffer.writeUInt32LE(dataSize, 40);
|
|
47
|
+
Buffer.from(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength).copy(buffer, headerSize);
|
|
48
|
+
return buffer;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* 去除 QQ 语音文件的 AMR 头(如果存在)
|
|
52
|
+
* QQ 的 .amr 文件可能在 SILK 数据前有 "#!AMR\n" 头(6 字节)
|
|
53
|
+
* 需要去除后才能被 silk-wasm 正确解码
|
|
54
|
+
*/
|
|
55
|
+
function stripAmrHeader(buf) {
|
|
56
|
+
const AMR_HEADER = Buffer.from("#!AMR\n");
|
|
57
|
+
if (buf.length > 6 && buf.subarray(0, 6).equals(AMR_HEADER)) {
|
|
58
|
+
return buf.subarray(6);
|
|
59
|
+
}
|
|
60
|
+
return buf;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* 将 SILK/AMR 语音文件转换为 WAV 格式
|
|
64
|
+
*
|
|
65
|
+
* @param inputPath 输入文件路径(.amr / .silk / .slk)
|
|
66
|
+
* @param outputDir 输出目录(默认与输入文件同目录)
|
|
67
|
+
* @returns 转换后的 WAV 文件路径,失败返回 null
|
|
68
|
+
*/
|
|
69
|
+
export async function convertSilkToWav(inputPath, outputDir) {
|
|
70
|
+
if (!fs.existsSync(inputPath)) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
const fileBuf = fs.readFileSync(inputPath);
|
|
74
|
+
// 去除可能的 AMR 头
|
|
75
|
+
const strippedBuf = stripAmrHeader(fileBuf);
|
|
76
|
+
// 转为 Uint8Array 以兼容 silk-wasm 类型要求
|
|
77
|
+
const rawData = new Uint8Array(strippedBuf.buffer, strippedBuf.byteOffset, strippedBuf.byteLength);
|
|
78
|
+
// 验证是否为 SILK 格式
|
|
79
|
+
if (!isSilk(rawData)) {
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
// SILK 解码为 PCM (s16le)
|
|
83
|
+
// QQ 语音通常采样率为 24000Hz
|
|
84
|
+
const sampleRate = 24000;
|
|
85
|
+
const result = await decode(rawData, sampleRate);
|
|
86
|
+
// PCM → WAV
|
|
87
|
+
const wavBuffer = pcmToWav(result.data, sampleRate);
|
|
88
|
+
// 写入 WAV 文件
|
|
89
|
+
const dir = outputDir || path.dirname(inputPath);
|
|
90
|
+
if (!fs.existsSync(dir)) {
|
|
91
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
92
|
+
}
|
|
93
|
+
const baseName = path.basename(inputPath, path.extname(inputPath));
|
|
94
|
+
const wavPath = path.join(dir, `${baseName}.wav`);
|
|
95
|
+
fs.writeFileSync(wavPath, wavBuffer);
|
|
96
|
+
return { wavPath, duration: result.duration };
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* 判断是否为语音附件(根据 content_type 或文件扩展名)
|
|
100
|
+
*/
|
|
101
|
+
export function isVoiceAttachment(att) {
|
|
102
|
+
if (att.content_type === "voice" || att.content_type?.startsWith("audio/")) {
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
const ext = att.filename ? path.extname(att.filename).toLowerCase() : "";
|
|
106
|
+
return [".amr", ".silk", ".slk", ".slac"].includes(ext);
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* 格式化语音时长为可读字符串
|
|
110
|
+
*/
|
|
111
|
+
export function formatDuration(durationMs) {
|
|
112
|
+
const seconds = Math.round(durationMs / 1000);
|
|
113
|
+
if (seconds < 60) {
|
|
114
|
+
return `${seconds}秒`;
|
|
115
|
+
}
|
|
116
|
+
const minutes = Math.floor(seconds / 60);
|
|
117
|
+
const remainSeconds = seconds % 60;
|
|
118
|
+
return remainSeconds > 0 ? `${minutes}分${remainSeconds}秒` : `${minutes}分钟`;
|
|
119
|
+
}
|
|
120
|
+
export function isAudioFile(filePath, mimeType) {
|
|
121
|
+
// MIME 优先判断(解决无扩展名或扩展名不匹配的问题)
|
|
122
|
+
if (mimeType) {
|
|
123
|
+
if (mimeType === "voice" || mimeType.startsWith("audio/"))
|
|
124
|
+
return true;
|
|
125
|
+
}
|
|
126
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
127
|
+
return [".silk", ".slk", ".amr", ".wav", ".mp3", ".ogg", ".opus", ".aac", ".flac", ".m4a", ".wma", ".pcm"].includes(ext);
|
|
128
|
+
}
|
|
129
|
+
/** QQ 平台原生支持的语音 MIME 类型(不需要转码) */
|
|
130
|
+
const QQ_NATIVE_VOICE_MIMES = new Set([
|
|
131
|
+
"audio/silk", "audio/amr", "audio/wav", "audio/wave",
|
|
132
|
+
"audio/x-wav", "audio/mpeg", "audio/mp3",
|
|
133
|
+
]);
|
|
134
|
+
/** QQ 平台原生支持的语音扩展名(不需要转码) */
|
|
135
|
+
const QQ_NATIVE_VOICE_EXTS = new Set([
|
|
136
|
+
".silk", ".slk", ".amr", ".wav", ".mp3",
|
|
137
|
+
]);
|
|
138
|
+
/**
|
|
139
|
+
* 判断语音是否需要转码(参考企微 wecom-app 的 shouldTranscodeWecomVoice)
|
|
140
|
+
*
|
|
141
|
+
* QQ Bot API 原生支持 WAV/MP3/SILK 三种格式,其他格式需要先转码。
|
|
142
|
+
* 使用 MIME + 扩展名双重判断,避免仅靠扩展名导致误判。
|
|
143
|
+
*
|
|
144
|
+
* @param filePath 音频文件路径
|
|
145
|
+
* @param mimeType 可选的 MIME 类型
|
|
146
|
+
* @returns true 表示需要转码,false 表示可以直传
|
|
147
|
+
*/
|
|
148
|
+
export function shouldTranscodeVoice(filePath, mimeType) {
|
|
149
|
+
// MIME 优先:如果 MIME 是 QQ 原生支持的格式,不需要转码
|
|
150
|
+
if (mimeType && QQ_NATIVE_VOICE_MIMES.has(mimeType.toLowerCase())) {
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
// 扩展名判断
|
|
154
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
155
|
+
if (QQ_NATIVE_VOICE_EXTS.has(ext)) {
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
// 是音频但不是原生格式 → 需要转码
|
|
159
|
+
return isAudioFile(filePath, mimeType);
|
|
160
|
+
}
|
|
161
|
+
function resolveTTSFromBlock(block, providerCfg) {
|
|
162
|
+
const baseUrl = block?.baseUrl || providerCfg?.baseUrl;
|
|
163
|
+
const apiKey = block?.apiKey || providerCfg?.apiKey;
|
|
164
|
+
const model = block?.model || "tts-1";
|
|
165
|
+
const voice = block?.voice || "alloy";
|
|
166
|
+
if (!baseUrl || !apiKey)
|
|
167
|
+
return null;
|
|
168
|
+
const authStyle = (block?.authStyle || providerCfg?.authStyle) === "api-key" ? "api-key" : "bearer";
|
|
169
|
+
const queryParams = { ...(providerCfg?.queryParams ?? {}), ...(block?.queryParams ?? {}) };
|
|
170
|
+
const speed = block?.speed;
|
|
171
|
+
return {
|
|
172
|
+
baseUrl: baseUrl.replace(/\/+$/, ""),
|
|
173
|
+
apiKey,
|
|
174
|
+
model,
|
|
175
|
+
voice,
|
|
176
|
+
authStyle,
|
|
177
|
+
...(Object.keys(queryParams).length > 0 ? { queryParams } : {}),
|
|
178
|
+
...(speed !== undefined ? { speed } : {}),
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
export function resolveTTSConfig(cfg) {
|
|
182
|
+
const c = cfg;
|
|
183
|
+
// 优先使用 channels.qqbot.tts(插件专属配置)
|
|
184
|
+
const channelTts = c?.channels?.qqbot?.tts;
|
|
185
|
+
if (channelTts && channelTts.enabled !== false) {
|
|
186
|
+
const providerId = channelTts?.provider || "openai";
|
|
187
|
+
const providerCfg = c?.models?.providers?.[providerId];
|
|
188
|
+
const result = resolveTTSFromBlock(channelTts, providerCfg);
|
|
189
|
+
if (result)
|
|
190
|
+
return result;
|
|
191
|
+
}
|
|
192
|
+
// 回退到 messages.tts(openclaw 框架级 TTS 配置)
|
|
193
|
+
const msgTts = c?.messages?.tts;
|
|
194
|
+
if (msgTts && msgTts.auto !== "disabled") {
|
|
195
|
+
const providerId = msgTts?.provider || "openai";
|
|
196
|
+
const providerBlock = msgTts?.[providerId];
|
|
197
|
+
const providerCfg = c?.models?.providers?.[providerId];
|
|
198
|
+
const result = resolveTTSFromBlock(providerBlock ?? {}, providerCfg);
|
|
199
|
+
if (result)
|
|
200
|
+
return result;
|
|
201
|
+
}
|
|
202
|
+
return null;
|
|
203
|
+
}
|
|
204
|
+
/**
|
|
205
|
+
* 构建 TTS 请求 URL 和 Headers
|
|
206
|
+
* 支持 OpenAI 标准和 Azure OpenAI 两种风格
|
|
207
|
+
*/
|
|
208
|
+
function buildTTSRequest(ttsCfg) {
|
|
209
|
+
// 构建 URL:baseUrl + /audio/speech + 可选 queryParams
|
|
210
|
+
let url = `${ttsCfg.baseUrl}/audio/speech`;
|
|
211
|
+
if (ttsCfg.queryParams && Object.keys(ttsCfg.queryParams).length > 0) {
|
|
212
|
+
const qs = new URLSearchParams(ttsCfg.queryParams).toString();
|
|
213
|
+
url += `?${qs}`;
|
|
214
|
+
}
|
|
215
|
+
// 构建认证 Header
|
|
216
|
+
const headers = { "Content-Type": "application/json" };
|
|
217
|
+
if (ttsCfg.authStyle === "api-key") {
|
|
218
|
+
headers["api-key"] = ttsCfg.apiKey;
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
headers["Authorization"] = `Bearer ${ttsCfg.apiKey}`;
|
|
222
|
+
}
|
|
223
|
+
return { url, headers };
|
|
224
|
+
}
|
|
225
|
+
export async function textToSpeechPCM(text, ttsCfg) {
|
|
226
|
+
const sampleRate = 24000;
|
|
227
|
+
const { url, headers } = buildTTSRequest(ttsCfg);
|
|
228
|
+
console.log(`[tts] Request: model=${ttsCfg.model}, voice=${ttsCfg.voice}, authStyle=${ttsCfg.authStyle ?? "bearer"}, url=${url}`);
|
|
229
|
+
console.log(`[tts] Input text (${text.length} chars): "${text.slice(0, 80)}${text.length > 80 ? "..." : ""}"`);
|
|
230
|
+
// 先尝试 PCM 格式(最高质量,无需二次转码)
|
|
231
|
+
const formats = [
|
|
232
|
+
{ format: "pcm", needsDecode: false },
|
|
233
|
+
{ format: "mp3", needsDecode: true },
|
|
234
|
+
];
|
|
235
|
+
let lastError = null;
|
|
236
|
+
const startTime = Date.now();
|
|
237
|
+
for (const { format, needsDecode } of formats) {
|
|
238
|
+
const controller = new AbortController();
|
|
239
|
+
const ttsTimeout = setTimeout(() => controller.abort(), 120000);
|
|
240
|
+
try {
|
|
241
|
+
const body = {
|
|
242
|
+
model: ttsCfg.model,
|
|
243
|
+
input: text,
|
|
244
|
+
voice: ttsCfg.voice,
|
|
245
|
+
response_format: format,
|
|
246
|
+
...(format === "pcm" ? { sample_rate: sampleRate } : {}),
|
|
247
|
+
...(ttsCfg.speed !== undefined ? { speed: ttsCfg.speed } : {}),
|
|
248
|
+
};
|
|
249
|
+
console.log(`[tts] Trying format=${format}...`);
|
|
250
|
+
const fetchStart = Date.now();
|
|
251
|
+
const resp = await fetch(url, {
|
|
252
|
+
method: "POST",
|
|
253
|
+
headers,
|
|
254
|
+
body: JSON.stringify(body),
|
|
255
|
+
signal: controller.signal,
|
|
256
|
+
}).finally(() => clearTimeout(ttsTimeout));
|
|
257
|
+
const fetchMs = Date.now() - fetchStart;
|
|
258
|
+
if (!resp.ok) {
|
|
259
|
+
const detail = await resp.text().catch(() => "");
|
|
260
|
+
console.log(`[tts] HTTP ${resp.status} for format=${format} (${fetchMs}ms): ${detail.slice(0, 200)}`);
|
|
261
|
+
// 如果 PCM 不支持(Azure 等),回退到 mp3
|
|
262
|
+
if (format === "pcm" && (resp.status === 400 || resp.status === 422)) {
|
|
263
|
+
console.log(`[tts] PCM format not supported, falling back to mp3`);
|
|
264
|
+
lastError = new Error(`TTS PCM not supported: ${detail.slice(0, 200)}`);
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
throw new Error(`TTS failed (HTTP ${resp.status}): ${detail.slice(0, 300)}`);
|
|
268
|
+
}
|
|
269
|
+
const arrayBuffer = await resp.arrayBuffer();
|
|
270
|
+
const rawBuffer = Buffer.from(arrayBuffer);
|
|
271
|
+
console.log(`[tts] Response OK: format=${format}, size=${rawBuffer.length} bytes, latency=${fetchMs}ms`);
|
|
272
|
+
if (!needsDecode) {
|
|
273
|
+
console.log(`[tts] Done: PCM direct, ${rawBuffer.length} bytes, total=${Date.now() - startTime}ms`);
|
|
274
|
+
return { pcmBuffer: rawBuffer, sampleRate };
|
|
275
|
+
}
|
|
276
|
+
// mp3 需要解码为 PCM
|
|
277
|
+
console.log(`[tts] Decoding mp3 response (${rawBuffer.length} bytes) to PCM...`);
|
|
278
|
+
const tmpDir = path.join(fs.mkdtempSync(path.join(require("node:os").tmpdir(), "tts-")));
|
|
279
|
+
const tmpMp3 = path.join(tmpDir, "tts.mp3");
|
|
280
|
+
fs.writeFileSync(tmpMp3, rawBuffer);
|
|
281
|
+
try {
|
|
282
|
+
// 优先用 ffmpeg
|
|
283
|
+
const ffmpegCmd = await checkFfmpeg();
|
|
284
|
+
if (ffmpegCmd) {
|
|
285
|
+
const pcmBuf = await ffmpegToPCM(ffmpegCmd, tmpMp3, sampleRate);
|
|
286
|
+
console.log(`[tts] Done: mp3→PCM (ffmpeg), ${pcmBuf.length} bytes, total=${Date.now() - startTime}ms`);
|
|
287
|
+
return { pcmBuffer: pcmBuf, sampleRate };
|
|
288
|
+
}
|
|
289
|
+
// WASM fallback
|
|
290
|
+
const pcmBuf = await wasmDecodeMp3ToPCM(rawBuffer, sampleRate);
|
|
291
|
+
if (pcmBuf) {
|
|
292
|
+
console.log(`[tts] Done: mp3→PCM (wasm), ${pcmBuf.length} bytes, total=${Date.now() - startTime}ms`);
|
|
293
|
+
return { pcmBuffer: pcmBuf, sampleRate };
|
|
294
|
+
}
|
|
295
|
+
throw new Error("No decoder available for mp3 (install ffmpeg for best compatibility)");
|
|
296
|
+
}
|
|
297
|
+
finally {
|
|
298
|
+
try {
|
|
299
|
+
fs.unlinkSync(tmpMp3);
|
|
300
|
+
fs.rmdirSync(tmpDir);
|
|
301
|
+
}
|
|
302
|
+
catch { }
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
catch (err) {
|
|
306
|
+
clearTimeout(ttsTimeout);
|
|
307
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
308
|
+
console.log(`[tts] Error for format=${format}: ${lastError.message.slice(0, 200)}`);
|
|
309
|
+
if (format === "pcm") {
|
|
310
|
+
// PCM 失败时不立即抛出,尝试 mp3
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
throw lastError;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
console.log(`[tts] All formats exhausted after ${Date.now() - startTime}ms`);
|
|
317
|
+
throw lastError ?? new Error("TTS failed: all formats exhausted");
|
|
318
|
+
}
|
|
319
|
+
export async function pcmToSilk(pcmBuffer, sampleRate) {
|
|
320
|
+
const pcmData = new Uint8Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.byteLength);
|
|
321
|
+
const result = await encode(pcmData, sampleRate);
|
|
322
|
+
return {
|
|
323
|
+
silkBuffer: Buffer.from(result.data.buffer, result.data.byteOffset, result.data.byteLength),
|
|
324
|
+
duration: result.duration,
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
export async function textToSilk(text, ttsCfg, outputDir) {
|
|
328
|
+
const { pcmBuffer, sampleRate } = await textToSpeechPCM(text, ttsCfg);
|
|
329
|
+
const { silkBuffer, duration } = await pcmToSilk(pcmBuffer, sampleRate);
|
|
330
|
+
if (!fs.existsSync(outputDir))
|
|
331
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
332
|
+
const silkPath = path.join(outputDir, `tts-${Date.now()}.silk`);
|
|
333
|
+
fs.writeFileSync(silkPath, silkBuffer);
|
|
334
|
+
return { silkPath, silkBase64: silkBuffer.toString("base64"), duration };
|
|
335
|
+
}
|
|
336
|
+
// ============ 核心:任意音频 → SILK Base64 ============
|
|
337
|
+
/** QQ Bot API 原生支持上传的音频格式(无需转换为 SILK) */
|
|
338
|
+
const QQ_NATIVE_UPLOAD_FORMATS = [".wav", ".mp3", ".silk"];
|
|
339
|
+
/**
|
|
340
|
+
* 将本地音频文件转换为 QQ Bot 可上传的 Base64
|
|
341
|
+
*
|
|
342
|
+
* QQ Bot API 支持直传 WAV、MP3、SILK 三种格式,其他格式仍需转换。
|
|
343
|
+
* 转换策略(参考 NapCat/go-cqhttp/Discord/Telegram 的做法):
|
|
344
|
+
*
|
|
345
|
+
* 1. WAV / MP3 / SILK → 直传(跳过转换)
|
|
346
|
+
* 2. 有 ffmpeg → ffmpeg 万能解码为 PCM → silk-wasm 编码
|
|
347
|
+
* 支持: ogg, opus, aac, flac, wma, m4a, pcm 等所有 ffmpeg 支持的格式
|
|
348
|
+
* 3. 无 ffmpeg → WASM fallback(仅支持 pcm, wav)
|
|
349
|
+
*
|
|
350
|
+
* @param directUploadFormats - 自定义直传格式列表,覆盖默认值。传 undefined 使用 QQ_NATIVE_UPLOAD_FORMATS
|
|
351
|
+
*/
|
|
352
|
+
export async function audioFileToSilkBase64(filePath, directUploadFormats) {
|
|
353
|
+
if (!fs.existsSync(filePath))
|
|
354
|
+
return null;
|
|
355
|
+
const buf = fs.readFileSync(filePath);
|
|
356
|
+
if (buf.length === 0) {
|
|
357
|
+
console.error(`[audio-convert] file is empty: ${filePath}`);
|
|
358
|
+
return null;
|
|
359
|
+
}
|
|
360
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
361
|
+
// 0. 直传判断:QQ Bot API 原生支持 WAV/MP3/SILK,可通过配置覆盖
|
|
362
|
+
const uploadFormats = directUploadFormats ? normalizeFormats(directUploadFormats) : QQ_NATIVE_UPLOAD_FORMATS;
|
|
363
|
+
if (uploadFormats.includes(ext)) {
|
|
364
|
+
console.log(`[audio-convert] direct upload (QQ native format): ${ext} (${buf.length} bytes)`);
|
|
365
|
+
return buf.toString("base64");
|
|
366
|
+
}
|
|
367
|
+
// 1. .slk / .amr 扩展名 → 检测 SILK 魔数,是 SILK 则直传
|
|
368
|
+
if ([".slk", ".slac"].includes(ext)) {
|
|
369
|
+
const stripped = stripAmrHeader(buf);
|
|
370
|
+
const raw = new Uint8Array(stripped.buffer, stripped.byteOffset, stripped.byteLength);
|
|
371
|
+
if (isSilk(raw)) {
|
|
372
|
+
console.log(`[audio-convert] SILK file, direct use: ${filePath} (${buf.length} bytes)`);
|
|
373
|
+
return buf.toString("base64");
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
// 按文件头检测 SILK(不依赖扩展名)
|
|
377
|
+
const rawCheck = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
378
|
+
const strippedCheck = stripAmrHeader(buf);
|
|
379
|
+
const strippedRaw = new Uint8Array(strippedCheck.buffer, strippedCheck.byteOffset, strippedCheck.byteLength);
|
|
380
|
+
if (isSilk(rawCheck) || isSilk(strippedRaw)) {
|
|
381
|
+
console.log(`[audio-convert] SILK detected by header: ${filePath} (${buf.length} bytes)`);
|
|
382
|
+
return buf.toString("base64");
|
|
383
|
+
}
|
|
384
|
+
const targetRate = 24000;
|
|
385
|
+
// 2. 优先使用 ffmpeg(业界标准做法,跨平台检测)
|
|
386
|
+
const ffmpegCmd = await checkFfmpeg();
|
|
387
|
+
if (ffmpegCmd) {
|
|
388
|
+
try {
|
|
389
|
+
console.log(`[audio-convert] ffmpeg (${ffmpegCmd}): converting ${ext} (${buf.length} bytes) → PCM s16le ${targetRate}Hz`);
|
|
390
|
+
const pcmBuf = await ffmpegToPCM(ffmpegCmd, filePath, targetRate);
|
|
391
|
+
if (pcmBuf.length === 0) {
|
|
392
|
+
console.error(`[audio-convert] ffmpeg produced empty PCM output`);
|
|
393
|
+
return null;
|
|
394
|
+
}
|
|
395
|
+
const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate);
|
|
396
|
+
console.log(`[audio-convert] ffmpeg: ${ext} → SILK done (${silkBuffer.length} bytes)`);
|
|
397
|
+
return silkBuffer.toString("base64");
|
|
398
|
+
}
|
|
399
|
+
catch (err) {
|
|
400
|
+
console.error(`[audio-convert] ffmpeg conversion failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
401
|
+
// ffmpeg 失败后不 return,继续尝试 WASM fallback
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
// 3. WASM fallback(无 ffmpeg 时的降级方案)
|
|
405
|
+
console.log(`[audio-convert] fallback: trying WASM decoders for ${ext}`);
|
|
406
|
+
// 3a. PCM:视为 s16le 24000Hz 单声道
|
|
407
|
+
if (ext === ".pcm") {
|
|
408
|
+
const pcmBuf = Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
409
|
+
const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate);
|
|
410
|
+
return silkBuffer.toString("base64");
|
|
411
|
+
}
|
|
412
|
+
// 3b. WAV:手动解析(仅支持标准 PCM WAV)
|
|
413
|
+
if (ext === ".wav" || (buf.length >= 4 && buf.toString("ascii", 0, 4) === "RIFF")) {
|
|
414
|
+
const wavInfo = parseWavFallback(buf);
|
|
415
|
+
if (wavInfo) {
|
|
416
|
+
const { silkBuffer } = await pcmToSilk(wavInfo, targetRate);
|
|
417
|
+
return silkBuffer.toString("base64");
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// 3c. MP3:WASM 解码
|
|
421
|
+
if (ext === ".mp3" || ext === ".mpeg") {
|
|
422
|
+
const pcmBuf = await wasmDecodeMp3ToPCM(buf, targetRate);
|
|
423
|
+
if (pcmBuf) {
|
|
424
|
+
const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate);
|
|
425
|
+
console.log(`[audio-convert] WASM: MP3 → SILK done (${silkBuffer.length} bytes)`);
|
|
426
|
+
return silkBuffer.toString("base64");
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
const installHint = isWindows()
|
|
430
|
+
? "安装方式: choco install ffmpeg 或 scoop install ffmpeg 或从 https://ffmpeg.org 下载"
|
|
431
|
+
: process.platform === "darwin"
|
|
432
|
+
? "安装方式: brew install ffmpeg"
|
|
433
|
+
: "安装方式: sudo apt install ffmpeg 或 sudo yum install ffmpeg";
|
|
434
|
+
console.error(`[audio-convert] unsupported format: ${ext} (no ffmpeg available). ${installHint}`);
|
|
435
|
+
return null;
|
|
436
|
+
}
|
|
437
|
+
/**
|
|
438
|
+
* 等待文件就绪(轮询直到文件出现且大小稳定)
|
|
439
|
+
* 用于 TTS 生成后等待文件写入完成
|
|
440
|
+
*
|
|
441
|
+
* 优化策略:
|
|
442
|
+
* - 文件出现后如果持续 0 字节超过 emptyGiveUpMs(默认 10s),快速失败
|
|
443
|
+
* - 文件未出现超过 noFileGiveUpMs(默认 15s),快速失败
|
|
444
|
+
* - 整体超时 timeoutMs 作为最终兜底
|
|
445
|
+
*
|
|
446
|
+
* @param filePath 文件路径
|
|
447
|
+
* @param timeoutMs 最大等待时间(默认 30 秒)
|
|
448
|
+
* @param pollMs 轮询间隔(默认 500ms)
|
|
449
|
+
* @returns 文件大小(字节),超时或文件始终为空返回 0
|
|
450
|
+
*/
|
|
451
|
+
export async function waitForFile(filePath, timeoutMs = 30000, pollMs = 500) {
|
|
452
|
+
const start = Date.now();
|
|
453
|
+
let lastSize = -1;
|
|
454
|
+
let stableCount = 0;
|
|
455
|
+
let fileExists = false;
|
|
456
|
+
let fileAppearedAt = 0; // 文件首次出现时间
|
|
457
|
+
let pollCount = 0;
|
|
458
|
+
// 0 字节文件放弃等待阈值:文件出现后持续空文件超过此时间则快速失败
|
|
459
|
+
const emptyGiveUpMs = 10000;
|
|
460
|
+
// 文件始终不出现的放弃阈值
|
|
461
|
+
const noFileGiveUpMs = 15000;
|
|
462
|
+
while (Date.now() - start < timeoutMs) {
|
|
463
|
+
pollCount++;
|
|
464
|
+
try {
|
|
465
|
+
const stat = fs.statSync(filePath);
|
|
466
|
+
if (!fileExists) {
|
|
467
|
+
fileExists = true;
|
|
468
|
+
fileAppearedAt = Date.now();
|
|
469
|
+
console.log(`[audio-convert] waitForFile: file appeared (${stat.size} bytes, after ${Date.now() - start}ms): ${path.basename(filePath)}`);
|
|
470
|
+
}
|
|
471
|
+
if (stat.size > 0) {
|
|
472
|
+
if (stat.size === lastSize) {
|
|
473
|
+
stableCount++;
|
|
474
|
+
if (stableCount >= 2) {
|
|
475
|
+
console.log(`[audio-convert] waitForFile: ready (${stat.size} bytes, waited ${Date.now() - start}ms, polls=${pollCount})`);
|
|
476
|
+
return stat.size;
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
else {
|
|
480
|
+
stableCount = 0;
|
|
481
|
+
}
|
|
482
|
+
lastSize = stat.size;
|
|
483
|
+
}
|
|
484
|
+
else {
|
|
485
|
+
// 文件存在但 0 字节:检查是否已超过空文件等待阈值
|
|
486
|
+
if (Date.now() - fileAppearedAt > emptyGiveUpMs) {
|
|
487
|
+
console.error(`[audio-convert] waitForFile: file still empty after ${emptyGiveUpMs}ms, giving up: ${path.basename(filePath)}`);
|
|
488
|
+
return 0;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
// 文件不存在:检查是否已超过无文件等待阈值
|
|
494
|
+
if (!fileExists && Date.now() - start > noFileGiveUpMs) {
|
|
495
|
+
console.error(`[audio-convert] waitForFile: file never appeared after ${noFileGiveUpMs}ms, giving up: ${path.basename(filePath)}`);
|
|
496
|
+
return 0;
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
await new Promise((r) => setTimeout(r, pollMs));
|
|
500
|
+
}
|
|
501
|
+
// 超时后最后检查一次
|
|
502
|
+
try {
|
|
503
|
+
const finalStat = fs.statSync(filePath);
|
|
504
|
+
if (finalStat.size > 0) {
|
|
505
|
+
console.warn(`[audio-convert] waitForFile: timeout but file has data (${finalStat.size} bytes), using it`);
|
|
506
|
+
return finalStat.size;
|
|
507
|
+
}
|
|
508
|
+
console.error(`[audio-convert] waitForFile: timeout after ${timeoutMs}ms, file exists but empty (0 bytes): ${path.basename(filePath)}`);
|
|
509
|
+
}
|
|
510
|
+
catch {
|
|
511
|
+
console.error(`[audio-convert] waitForFile: timeout after ${timeoutMs}ms, file never appeared: ${path.basename(filePath)}`);
|
|
512
|
+
}
|
|
513
|
+
return 0;
|
|
514
|
+
}
|
|
515
|
+
// ============ ffmpeg 跨平台调用 ============
|
|
516
|
+
/**
|
|
517
|
+
* 检测 ffmpeg 是否可用(委托给 platform.ts 跨平台检测)
|
|
518
|
+
* @returns ffmpeg 可执行路径或 null
|
|
519
|
+
*/
|
|
520
|
+
async function checkFfmpeg() {
|
|
521
|
+
return detectFfmpeg();
|
|
522
|
+
}
|
|
523
|
+
/**
|
|
524
|
+
* 使用 ffmpeg 将任意音频文件转换为 PCM s16le 单声道 24kHz
|
|
525
|
+
*
|
|
526
|
+
* 跨平台注意:
|
|
527
|
+
* - Windows 上 pipe:1 需要 encoding: "buffer" 防止 BOM 问题
|
|
528
|
+
* - 使用 detectFfmpeg() 返回的完整路径,兼容非 PATH 安装
|
|
529
|
+
*/
|
|
530
|
+
function ffmpegToPCM(ffmpegCmd, inputPath, sampleRate = 24000) {
|
|
531
|
+
return new Promise((resolve, reject) => {
|
|
532
|
+
const args = [
|
|
533
|
+
"-i", inputPath,
|
|
534
|
+
"-f", "s16le",
|
|
535
|
+
"-ar", String(sampleRate),
|
|
536
|
+
"-ac", "1",
|
|
537
|
+
"-acodec", "pcm_s16le",
|
|
538
|
+
"-v", "error",
|
|
539
|
+
"pipe:1",
|
|
540
|
+
];
|
|
541
|
+
execFile(ffmpegCmd, args, {
|
|
542
|
+
maxBuffer: 50 * 1024 * 1024,
|
|
543
|
+
encoding: "buffer",
|
|
544
|
+
// Windows: 隐藏弹出的 cmd 窗口
|
|
545
|
+
...(isWindows() ? { windowsHide: true } : {}),
|
|
546
|
+
}, (err, stdout) => {
|
|
547
|
+
if (err) {
|
|
548
|
+
reject(new Error(`ffmpeg failed: ${err.message}`));
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
resolve(stdout);
|
|
552
|
+
});
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
// ============ WASM fallback: MP3 解码 ============
|
|
556
|
+
/**
|
|
557
|
+
* 使用 mpg123-decoder (WASM) 解码 MP3 为 PCM
|
|
558
|
+
* 仅在 ffmpeg 不可用时作为 fallback
|
|
559
|
+
*/
|
|
560
|
+
async function wasmDecodeMp3ToPCM(buf, targetRate) {
|
|
561
|
+
try {
|
|
562
|
+
const { MPEGDecoder } = await import("mpg123-decoder");
|
|
563
|
+
console.log(`[audio-convert] WASM MP3 decode: size=${buf.length} bytes`);
|
|
564
|
+
const decoder = new MPEGDecoder();
|
|
565
|
+
await decoder.ready;
|
|
566
|
+
const decoded = decoder.decode(new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength));
|
|
567
|
+
decoder.free();
|
|
568
|
+
if (decoded.samplesDecoded === 0 || decoded.channelData.length === 0) {
|
|
569
|
+
console.error(`[audio-convert] WASM MP3 decode: no samples (samplesDecoded=${decoded.samplesDecoded})`);
|
|
570
|
+
return null;
|
|
571
|
+
}
|
|
572
|
+
console.log(`[audio-convert] WASM MP3 decode: samples=${decoded.samplesDecoded}, sampleRate=${decoded.sampleRate}, channels=${decoded.channelData.length}`);
|
|
573
|
+
// Float32 多声道混缩为单声道
|
|
574
|
+
let floatMono;
|
|
575
|
+
if (decoded.channelData.length === 1) {
|
|
576
|
+
floatMono = decoded.channelData[0];
|
|
577
|
+
}
|
|
578
|
+
else {
|
|
579
|
+
floatMono = new Float32Array(decoded.samplesDecoded);
|
|
580
|
+
const channels = decoded.channelData.length;
|
|
581
|
+
for (let i = 0; i < decoded.samplesDecoded; i++) {
|
|
582
|
+
let sum = 0;
|
|
583
|
+
for (let ch = 0; ch < channels; ch++) {
|
|
584
|
+
sum += decoded.channelData[ch][i];
|
|
585
|
+
}
|
|
586
|
+
floatMono[i] = sum / channels;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
// Float32 → s16le
|
|
590
|
+
const s16 = new Uint8Array(floatMono.length * 2);
|
|
591
|
+
const view = new DataView(s16.buffer);
|
|
592
|
+
for (let i = 0; i < floatMono.length; i++) {
|
|
593
|
+
const clamped = Math.max(-1, Math.min(1, floatMono[i]));
|
|
594
|
+
const val = clamped < 0 ? clamped * 32768 : clamped * 32767;
|
|
595
|
+
view.setInt16(i * 2, Math.round(val), true);
|
|
596
|
+
}
|
|
597
|
+
// 简单线性插值重采样
|
|
598
|
+
let pcm = s16;
|
|
599
|
+
if (decoded.sampleRate !== targetRate) {
|
|
600
|
+
const inputSamples = s16.length / 2;
|
|
601
|
+
const outputSamples = Math.round(inputSamples * targetRate / decoded.sampleRate);
|
|
602
|
+
const output = new Uint8Array(outputSamples * 2);
|
|
603
|
+
const inView = new DataView(s16.buffer, s16.byteOffset, s16.byteLength);
|
|
604
|
+
const outView = new DataView(output.buffer, output.byteOffset, output.byteLength);
|
|
605
|
+
for (let i = 0; i < outputSamples; i++) {
|
|
606
|
+
const srcIdx = i * decoded.sampleRate / targetRate;
|
|
607
|
+
const idx0 = Math.floor(srcIdx);
|
|
608
|
+
const idx1 = Math.min(idx0 + 1, inputSamples - 1);
|
|
609
|
+
const frac = srcIdx - idx0;
|
|
610
|
+
const s0 = inView.getInt16(idx0 * 2, true);
|
|
611
|
+
const s1 = inView.getInt16(idx1 * 2, true);
|
|
612
|
+
const sample = Math.round(s0 + (s1 - s0) * frac);
|
|
613
|
+
outView.setInt16(i * 2, Math.max(-32768, Math.min(32767, sample)), true);
|
|
614
|
+
}
|
|
615
|
+
pcm = output;
|
|
616
|
+
}
|
|
617
|
+
return Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
618
|
+
}
|
|
619
|
+
catch (err) {
|
|
620
|
+
console.error(`[audio-convert] WASM MP3 decode failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
621
|
+
if (err instanceof Error && err.stack) {
|
|
622
|
+
console.error(`[audio-convert] stack: ${err.stack}`);
|
|
623
|
+
}
|
|
624
|
+
return null;
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* 规范化格式列表(确保以 . 开头,小写)
|
|
629
|
+
*/
|
|
630
|
+
function normalizeFormats(formats) {
|
|
631
|
+
return formats.map((f) => {
|
|
632
|
+
const lower = f.toLowerCase().trim();
|
|
633
|
+
return lower.startsWith(".") ? lower : `.${lower}`;
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
/**
|
|
637
|
+
* WAV fallback 解析(无 ffmpeg 时使用)
|
|
638
|
+
* 仅支持标准 PCM WAV (format=1, 16bit)
|
|
639
|
+
*/
|
|
640
|
+
function parseWavFallback(buf) {
|
|
641
|
+
if (buf.length < 44)
|
|
642
|
+
return null;
|
|
643
|
+
if (buf.toString("ascii", 0, 4) !== "RIFF")
|
|
644
|
+
return null;
|
|
645
|
+
if (buf.toString("ascii", 8, 12) !== "WAVE")
|
|
646
|
+
return null;
|
|
647
|
+
if (buf.toString("ascii", 12, 16) !== "fmt ")
|
|
648
|
+
return null;
|
|
649
|
+
const audioFormat = buf.readUInt16LE(20);
|
|
650
|
+
if (audioFormat !== 1)
|
|
651
|
+
return null;
|
|
652
|
+
const channels = buf.readUInt16LE(22);
|
|
653
|
+
const sampleRate = buf.readUInt32LE(24);
|
|
654
|
+
const bitsPerSample = buf.readUInt16LE(34);
|
|
655
|
+
if (bitsPerSample !== 16)
|
|
656
|
+
return null;
|
|
657
|
+
// 找 data chunk
|
|
658
|
+
let offset = 36;
|
|
659
|
+
while (offset < buf.length - 8) {
|
|
660
|
+
const chunkId = buf.toString("ascii", offset, offset + 4);
|
|
661
|
+
const chunkSize = buf.readUInt32LE(offset + 4);
|
|
662
|
+
if (chunkId === "data") {
|
|
663
|
+
const dataStart = offset + 8;
|
|
664
|
+
const dataEnd = Math.min(dataStart + chunkSize, buf.length);
|
|
665
|
+
let pcm = new Uint8Array(buf.buffer, buf.byteOffset + dataStart, dataEnd - dataStart);
|
|
666
|
+
// 多声道混缩
|
|
667
|
+
if (channels > 1) {
|
|
668
|
+
const samplesPerCh = pcm.length / (2 * channels);
|
|
669
|
+
const mono = new Uint8Array(samplesPerCh * 2);
|
|
670
|
+
const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
671
|
+
const outV = new DataView(mono.buffer, mono.byteOffset, mono.byteLength);
|
|
672
|
+
for (let i = 0; i < samplesPerCh; i++) {
|
|
673
|
+
let sum = 0;
|
|
674
|
+
for (let ch = 0; ch < channels; ch++)
|
|
675
|
+
sum += inV.getInt16((i * channels + ch) * 2, true);
|
|
676
|
+
outV.setInt16(i * 2, Math.max(-32768, Math.min(32767, Math.round(sum / channels))), true);
|
|
677
|
+
}
|
|
678
|
+
pcm = mono;
|
|
679
|
+
}
|
|
680
|
+
// 简单线性插值重采样
|
|
681
|
+
const targetRate = 24000;
|
|
682
|
+
if (sampleRate !== targetRate) {
|
|
683
|
+
const inSamples = pcm.length / 2;
|
|
684
|
+
const outSamples = Math.round(inSamples * targetRate / sampleRate);
|
|
685
|
+
const out = new Uint8Array(outSamples * 2);
|
|
686
|
+
const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
687
|
+
const outV = new DataView(out.buffer, out.byteOffset, out.byteLength);
|
|
688
|
+
for (let i = 0; i < outSamples; i++) {
|
|
689
|
+
const src = i * sampleRate / targetRate;
|
|
690
|
+
const i0 = Math.floor(src);
|
|
691
|
+
const i1 = Math.min(i0 + 1, inSamples - 1);
|
|
692
|
+
const f = src - i0;
|
|
693
|
+
const s0 = inV.getInt16(i0 * 2, true);
|
|
694
|
+
const s1 = inV.getInt16(i1 * 2, true);
|
|
695
|
+
outV.setInt16(i * 2, Math.max(-32768, Math.min(32767, Math.round(s0 + (s1 - s0) * f))), true);
|
|
696
|
+
}
|
|
697
|
+
pcm = out;
|
|
698
|
+
}
|
|
699
|
+
return Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
|
|
700
|
+
}
|
|
701
|
+
offset += 8 + chunkSize;
|
|
702
|
+
}
|
|
703
|
+
return null;
|
|
704
|
+
}
|