@gakr-gakr/qqbot 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/api.ts +56 -0
  2. package/autobot.plugin.json +167 -0
  3. package/channel-plugin-api.ts +1 -0
  4. package/index.ts +33 -0
  5. package/package.json +64 -0
  6. package/runtime-api.ts +9 -0
  7. package/secret-contract-api.ts +5 -0
  8. package/setup-entry.ts +13 -0
  9. package/setup-plugin-api.ts +3 -0
  10. package/skills/qqbot-channel/SKILL.md +262 -0
  11. package/skills/qqbot-channel/references/api_references.md +521 -0
  12. package/skills/qqbot-media/SKILL.md +37 -0
  13. package/skills/qqbot-remind/SKILL.md +153 -0
  14. package/src/bridge/approval/capability.ts +225 -0
  15. package/src/bridge/approval/handler-runtime.ts +204 -0
  16. package/src/bridge/bootstrap.ts +135 -0
  17. package/src/bridge/channel-entry.ts +18 -0
  18. package/src/bridge/commands/framework-context-adapter.ts +60 -0
  19. package/src/bridge/commands/framework-registration.ts +66 -0
  20. package/src/bridge/commands/from-parser.ts +60 -0
  21. package/src/bridge/commands/result-dispatcher.ts +76 -0
  22. package/src/bridge/config-shared.ts +132 -0
  23. package/src/bridge/config.ts +176 -0
  24. package/src/bridge/gateway.ts +178 -0
  25. package/src/bridge/logger.ts +31 -0
  26. package/src/bridge/narrowing.ts +31 -0
  27. package/src/bridge/plugin-version.ts +102 -0
  28. package/src/bridge/runtime.ts +25 -0
  29. package/src/bridge/sdk-adapter.ts +164 -0
  30. package/src/bridge/setup/finalize.ts +144 -0
  31. package/src/bridge/setup/surface.ts +34 -0
  32. package/src/bridge/tools/channel.ts +58 -0
  33. package/src/bridge/tools/index.ts +15 -0
  34. package/src/bridge/tools/remind.ts +91 -0
  35. package/src/channel.setup.ts +33 -0
  36. package/src/channel.ts +399 -0
  37. package/src/config-schema.ts +84 -0
  38. package/src/engine/access/index.ts +2 -0
  39. package/src/engine/access/resolve-policy.ts +30 -0
  40. package/src/engine/access/sender-match.ts +55 -0
  41. package/src/engine/access/types.ts +2 -0
  42. package/src/engine/adapter/audio.port.ts +27 -0
  43. package/src/engine/adapter/commands.port.ts +22 -0
  44. package/src/engine/adapter/history.port.ts +52 -0
  45. package/src/engine/adapter/index.ts +76 -0
  46. package/src/engine/adapter/mention-gate.port.ts +50 -0
  47. package/src/engine/adapter/types.ts +38 -0
  48. package/src/engine/api/api-client.ts +212 -0
  49. package/src/engine/api/media-chunked.ts +644 -0
  50. package/src/engine/api/media.ts +218 -0
  51. package/src/engine/api/messages.ts +293 -0
  52. package/src/engine/api/retry.ts +217 -0
  53. package/src/engine/api/routes.ts +95 -0
  54. package/src/engine/api/token.ts +277 -0
  55. package/src/engine/approval/index.ts +224 -0
  56. package/src/engine/commands/builtin/log-helpers.ts +341 -0
  57. package/src/engine/commands/builtin/register-all.ts +17 -0
  58. package/src/engine/commands/builtin/register-approve.ts +201 -0
  59. package/src/engine/commands/builtin/register-basic.ts +95 -0
  60. package/src/engine/commands/builtin/register-clear-storage.ts +187 -0
  61. package/src/engine/commands/builtin/register-logs.ts +20 -0
  62. package/src/engine/commands/builtin/register-streaming.ts +138 -0
  63. package/src/engine/commands/builtin/state.ts +31 -0
  64. package/src/engine/commands/slash-command-auth.ts +88 -0
  65. package/src/engine/commands/slash-command-handler.ts +168 -0
  66. package/src/engine/commands/slash-command-test-support.ts +39 -0
  67. package/src/engine/commands/slash-commands-impl.ts +61 -0
  68. package/src/engine/commands/slash-commands.ts +202 -0
  69. package/src/engine/config/credential-backup.ts +108 -0
  70. package/src/engine/config/credentials.ts +76 -0
  71. package/src/engine/config/group.ts +227 -0
  72. package/src/engine/config/resolve.ts +283 -0
  73. package/src/engine/config/setup-logic.ts +84 -0
  74. package/src/engine/gateway/active-cfg.ts +52 -0
  75. package/src/engine/gateway/codec.ts +47 -0
  76. package/src/engine/gateway/constants.ts +117 -0
  77. package/src/engine/gateway/event-dispatcher.ts +177 -0
  78. package/src/engine/gateway/gateway-connection.ts +356 -0
  79. package/src/engine/gateway/gateway.ts +267 -0
  80. package/src/engine/gateway/inbound-attachments.ts +360 -0
  81. package/src/engine/gateway/inbound-context.ts +82 -0
  82. package/src/engine/gateway/inbound-pipeline.ts +171 -0
  83. package/src/engine/gateway/interaction-handler.ts +345 -0
  84. package/src/engine/gateway/message-queue.ts +404 -0
  85. package/src/engine/gateway/outbound-dispatch.ts +590 -0
  86. package/src/engine/gateway/reconnect.ts +199 -0
  87. package/src/engine/gateway/stages/access-stage.ts +99 -0
  88. package/src/engine/gateway/stages/assembly-stage.ts +156 -0
  89. package/src/engine/gateway/stages/content-stage.ts +77 -0
  90. package/src/engine/gateway/stages/envelope-stage.ts +144 -0
  91. package/src/engine/gateway/stages/group-gate-stage.ts +223 -0
  92. package/src/engine/gateway/stages/index.ts +18 -0
  93. package/src/engine/gateway/stages/quote-stage.ts +113 -0
  94. package/src/engine/gateway/stages/refidx-stage.ts +62 -0
  95. package/src/engine/gateway/stages/stub-contexts.ts +77 -0
  96. package/src/engine/gateway/types.ts +230 -0
  97. package/src/engine/gateway/typing-keepalive.ts +102 -0
  98. package/src/engine/gateway/ws-client.ts +16 -0
  99. package/src/engine/group/activation.ts +88 -0
  100. package/src/engine/group/history.ts +321 -0
  101. package/src/engine/group/mention.ts +114 -0
  102. package/src/engine/group/message-gating.ts +108 -0
  103. package/src/engine/messaging/decode-media-path.ts +82 -0
  104. package/src/engine/messaging/media-source.ts +210 -0
  105. package/src/engine/messaging/media-type-detect.ts +27 -0
  106. package/src/engine/messaging/outbound-audio-port.ts +38 -0
  107. package/src/engine/messaging/outbound-deliver.ts +810 -0
  108. package/src/engine/messaging/outbound-media-send.ts +658 -0
  109. package/src/engine/messaging/outbound-reply.ts +27 -0
  110. package/src/engine/messaging/outbound-result-helpers.ts +54 -0
  111. package/src/engine/messaging/outbound-types.ts +47 -0
  112. package/src/engine/messaging/outbound.ts +485 -0
  113. package/src/engine/messaging/reply-dispatcher.ts +597 -0
  114. package/src/engine/messaging/reply-limiter.ts +164 -0
  115. package/src/engine/messaging/sender.ts +741 -0
  116. package/src/engine/messaging/streaming-c2c.ts +1192 -0
  117. package/src/engine/messaging/streaming-media-send.ts +544 -0
  118. package/src/engine/messaging/target-parser.ts +104 -0
  119. package/src/engine/ref/format-message-ref.ts +142 -0
  120. package/src/engine/ref/format-ref-entry.ts +27 -0
  121. package/src/engine/ref/store.ts +211 -0
  122. package/src/engine/ref/types.ts +27 -0
  123. package/src/engine/session/known-users.ts +138 -0
  124. package/src/engine/session/session-store.ts +207 -0
  125. package/src/engine/tools/channel-api.ts +244 -0
  126. package/src/engine/tools/remind-logic.ts +377 -0
  127. package/src/engine/types.ts +313 -0
  128. package/src/engine/utils/attachment-tags.ts +174 -0
  129. package/src/engine/utils/audio.ts +525 -0
  130. package/src/engine/utils/data-paths.ts +38 -0
  131. package/src/engine/utils/diagnostics.ts +93 -0
  132. package/src/engine/utils/file-utils.ts +215 -0
  133. package/src/engine/utils/format.ts +70 -0
  134. package/src/engine/utils/image-size.ts +249 -0
  135. package/src/engine/utils/log.ts +77 -0
  136. package/src/engine/utils/media-tags.ts +177 -0
  137. package/src/engine/utils/payload.ts +157 -0
  138. package/src/engine/utils/platform.ts +265 -0
  139. package/src/engine/utils/request-context.ts +60 -0
  140. package/src/engine/utils/string-normalize.ts +91 -0
  141. package/src/engine/utils/stt.ts +103 -0
  142. package/src/engine/utils/text-parsing.ts +155 -0
  143. package/src/engine/utils/upload-cache.ts +96 -0
  144. package/src/engine/utils/voice-text.ts +15 -0
  145. package/src/exec-approvals.ts +237 -0
  146. package/src/qqbot-test-support.ts +29 -0
  147. package/src/secret-contract.ts +82 -0
  148. package/src/types.ts +210 -0
  149. package/tsconfig.json +16 -0
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Single source of truth for rendering attachment summaries as
3
+ * human-readable tags that the LLM sees.
4
+ *
5
+ * There is exactly ONE vocabulary shared by every consumer:
6
+ *
7
+ * • Type labels: `image` / `voice` / `video` / `file` / `attachment`
8
+ * • Keyword for voice text: `transcript:` (never `content:`)
9
+ * • With source: `MEDIA:{source}` (no bracketed alias)
10
+ * • Without source: `[{type}]` or `[{type}: {filename}]`
11
+ *
12
+ * Both consumers (group history / current inbound event, and the ref-index
13
+ * quoted-message block) call the same function with the same vocabulary.
14
+ * They differ only on two orthogonal dimensions:
15
+ *
16
+ * 1. `transcriptSource` — ref mode appends `[source: local STT]` (or
17
+ * similar) after a voice transcript so the model knows where the
18
+ * text came from. Inline mode omits this (the current turn knows
19
+ * its own STT provenance).
20
+ *
21
+ * 2. Separator — inline joins with `\n` (history replay is multi-line),
22
+ * ref joins with a space (quoted block is rendered inline).
23
+ *
24
+ * These are the ONLY permitted differences between modes. Any new
25
+ * decoration must be added in both modes or behind an explicit option
26
+ * documented here, otherwise the model ends up learning two dialects.
27
+ *
28
+ * Zero external dependencies — pure string formatting.
29
+ */
30
+
31
+ import type { RefAttachmentSummary } from "../ref/types.js";
32
+
33
+ // ============ Types ============
34
+
35
+ /** Canonical attachment shape shared by history entries and ref entries. */
36
+ export type AttachmentSummary = RefAttachmentSummary;
37
+
38
+ /**
39
+ * Rendering mode.
40
+ *
41
+ * - `"inline"`: current turn + history replay. No transcript-source tag.
42
+ * Tags are separated by newlines.
43
+ * - `"ref"`: quoted-message block. Appends `[source: …]` to voice
44
+ * transcripts when `transcriptSource` is present. Tags are separated
45
+ * by spaces so the block fits on one line.
46
+ */
47
+ type RenderMode = "inline" | "ref";
48
+
49
+ /** Human-readable labels for transcript provenance (prompt contract). */
50
+ export const TRANSCRIPT_SOURCE_LABELS: Record<
51
+ NonNullable<RefAttachmentSummary["transcriptSource"]>,
52
+ string
53
+ > = {
54
+ stt: "local STT",
55
+ asr: "platform ASR",
56
+ tts: "TTS source",
57
+ fallback: "fallback text",
58
+ };
59
+
60
+ /** Options controlling how the tag list is rendered. */
61
+ interface RenderOptions {
62
+ mode: RenderMode;
63
+ /** Separator between tags. Defaults per mode: inline=`\n`, ref=` `. */
64
+ separator?: string;
65
+ /** Returned when `attachments` is empty/undefined. Defaults to `""`. */
66
+ emptyFallback?: string;
67
+ }
68
+
69
+ // ============ Public API ============
70
+
71
+ /**
72
+ * Render a list of attachments into an LLM-facing tag string.
73
+ *
74
+ * Shared grammar (both modes):
75
+ *
76
+ * ```
77
+ * attachment_with_source := "MEDIA:" SOURCE [voice_suffix]
78
+ * voice_suffix := ' (transcript: "' TEXT '")' [source_suffix]
79
+ * attachment_no_source := "[" TYPE_LABEL [": " FILENAME] [voice_suffix_bare] "]" [source_suffix_bare]
80
+ * voice_suffix_bare := ' (transcript: "' TEXT '")'
81
+ * source_suffix := " [source: " LABEL "]" ← ref mode only
82
+ * source_suffix_bare := " [source: " LABEL "]" ← ref mode only
83
+ * TYPE_LABEL := "image" | "voice" | "video" | "file" | "attachment"
84
+ * ```
85
+ *
86
+ * The **only** mode-dependent decoration is the `source_suffix` (present
87
+ * in `ref`, absent in `inline`). Every other token is identical.
88
+ */
89
+ export function renderAttachmentTags(
90
+ attachments: readonly AttachmentSummary[] | undefined,
91
+ options: RenderOptions,
92
+ ): string {
93
+ if (!attachments?.length) {
94
+ return options.emptyFallback ?? "";
95
+ }
96
+
97
+ const parts: string[] = [];
98
+ for (const att of attachments) {
99
+ parts.push(renderOne(att, options.mode));
100
+ }
101
+
102
+ const separator = options.separator ?? (options.mode === "ref" ? " " : "\n");
103
+ return parts.join(separator);
104
+ }
105
+
106
+ /**
107
+ * Shorthand for `renderAttachmentTags(attachments, { mode: "inline" })`.
108
+ *
109
+ * Kept as the primary entry point for group history / current-turn
110
+ * rendering where the terse inline form is always wanted.
111
+ */
112
+ export function formatAttachmentTags(attachments?: readonly AttachmentSummary[]): string {
113
+ return renderAttachmentTags(attachments, { mode: "inline" });
114
+ }
115
+
116
+ // ============ Internal ============
117
+
118
+ /**
119
+ * Render a single attachment.
120
+ *
121
+ * The function is split into two orthogonal concerns:
122
+ * - `renderBody`: the shared "MEDIA:{source}…" or "[type…]" string.
123
+ * - `renderSourceSuffix`: ref-mode-only `" [source: …]"` tail.
124
+ *
125
+ * Both consumers produce the same body; only the suffix differs.
126
+ */
127
+ function renderOne(att: AttachmentSummary, mode: RenderMode): string {
128
+ const body = renderBody(att);
129
+ const suffix = mode === "ref" ? renderSourceSuffix(att) : "";
130
+ return body + suffix;
131
+ }
132
+
133
+ /** Shared, mode-agnostic body of the tag. */
134
+ function renderBody(att: AttachmentSummary): string {
135
+ const source = att.localPath || att.url;
136
+ const voiceSuffix =
137
+ att.type === "voice" && att.transcript ? ` (transcript: "${att.transcript}")` : "";
138
+
139
+ if (source) {
140
+ return `MEDIA:${source}${voiceSuffix}`;
141
+ }
142
+
143
+ const label = labelForType(att.type);
144
+ const namePart = att.filename ? `: ${att.filename}` : "";
145
+ return `[${label}${namePart}${voiceSuffix}]`;
146
+ }
147
+
148
+ /**
149
+ * Ref-mode-only tail that records where a voice transcript came from.
150
+ * Empty string when the attachment isn't a transcribed voice message.
151
+ */
152
+ function renderSourceSuffix(att: AttachmentSummary): string {
153
+ if (att.type !== "voice" || !att.transcript || !att.transcriptSource) {
154
+ return "";
155
+ }
156
+ const label = TRANSCRIPT_SOURCE_LABELS[att.transcriptSource] ?? att.transcriptSource;
157
+ return ` [source: ${label}]`;
158
+ }
159
+
160
+ /** Canonical single-word label for each attachment type. */
161
+ function labelForType(type: AttachmentSummary["type"]): string {
162
+ switch (type) {
163
+ case "image":
164
+ return "image";
165
+ case "voice":
166
+ return "voice";
167
+ case "video":
168
+ return "video";
169
+ case "file":
170
+ return "file";
171
+ default:
172
+ return "attachment";
173
+ }
174
+ }
@@ -0,0 +1,525 @@
1
+ /**
2
+ * Audio format conversion utilities.
3
+ * 音频格式转换工具。
4
+ *
5
+ * Handles SILK ↔ PCM ↔ WAV ↔ MP3 conversions for QQ Bot voice messaging.
6
+ * Uses WASM decoders (silk-wasm, mpg123-decoder) and direct QQ-native uploads
7
+ * without launching native subprocesses.
8
+ *
9
+ * Self-contained within engine/ — no framework SDK dependency.
10
+ */
11
+
12
+ import * as fs from "node:fs";
13
+ import * as path from "node:path";
14
+ import { readRegularFileSync } from "autobot/plugin-sdk/security-runtime";
15
+ import { formatErrorMessage } from "./format.js";
16
+ import { debugLog, debugError, debugWarn } from "./log.js";
17
+ import { normalizeLowercaseStringOrEmpty as normalizeLowercase } from "./string-normalize.js";
18
+
19
+ type SilkWasm = typeof import("silk-wasm");
20
+ let silkWasmPromise: Promise<SilkWasm | null> | null = null;
21
+
22
+ /** Lazy-load the silk-wasm module (singleton cache; returns null on failure). */
23
+ function loadSilkWasm(): Promise<SilkWasm | null> {
24
+ if (silkWasmPromise) {
25
+ return silkWasmPromise;
26
+ }
27
+ silkWasmPromise = import("silk-wasm").catch((err) => {
28
+ debugWarn(
29
+ `[audio-convert] silk-wasm not available; SILK encode/decode disabled (${formatErrorMessage(err)})`,
30
+ );
31
+ return null;
32
+ });
33
+ return silkWasmPromise;
34
+ }
35
+
36
+ /** Wrap raw PCM s16le data into a standard WAV file. */
37
+ export function pcmToWav(
38
+ pcmData: Uint8Array,
39
+ sampleRate: number,
40
+ channels: number = 1,
41
+ bitsPerSample: number = 16,
42
+ ): Buffer {
43
+ const byteRate = sampleRate * channels * (bitsPerSample / 8);
44
+ const blockAlign = channels * (bitsPerSample / 8);
45
+ const dataSize = pcmData.length;
46
+ const headerSize = 44;
47
+ const fileSize = headerSize + dataSize;
48
+
49
+ const buffer = Buffer.alloc(fileSize);
50
+
51
+ buffer.write("RIFF", 0);
52
+ buffer.writeUInt32LE(fileSize - 8, 4);
53
+ buffer.write("WAVE", 8);
54
+
55
+ buffer.write("fmt ", 12);
56
+ buffer.writeUInt32LE(16, 16);
57
+ buffer.writeUInt16LE(1, 20);
58
+ buffer.writeUInt16LE(channels, 22);
59
+ buffer.writeUInt32LE(sampleRate, 24);
60
+ buffer.writeUInt32LE(byteRate, 28);
61
+ buffer.writeUInt16LE(blockAlign, 32);
62
+ buffer.writeUInt16LE(bitsPerSample, 34);
63
+
64
+ buffer.write("data", 36);
65
+ buffer.writeUInt32LE(dataSize, 40);
66
+ Buffer.from(pcmData.buffer, pcmData.byteOffset, pcmData.byteLength).copy(buffer, headerSize);
67
+
68
+ return buffer;
69
+ }
70
+
71
+ /** Strip the AMR header that may be present in QQ voice payloads. */
72
+ export function stripAmrHeader(buf: Buffer): Buffer {
73
+ const AMR_HEADER = Buffer.from("#!AMR\n");
74
+ if (buf.length > 6 && buf.subarray(0, 6).equals(AMR_HEADER)) {
75
+ return buf.subarray(6);
76
+ }
77
+ return buf;
78
+ }
79
+
80
+ /** Convert a SILK or AMR voice file to WAV format. */
81
+ export async function convertSilkToWav(
82
+ inputPath: string,
83
+ outputDir?: string,
84
+ ): Promise<{ wavPath: string; duration: number } | null> {
85
+ let fileBuf: Buffer;
86
+ try {
87
+ fileBuf = readRegularFileSync({ filePath: inputPath }).buffer;
88
+ } catch {
89
+ return null;
90
+ }
91
+
92
+ const strippedBuf = stripAmrHeader(fileBuf);
93
+ const rawData = new Uint8Array(
94
+ strippedBuf.buffer,
95
+ strippedBuf.byteOffset,
96
+ strippedBuf.byteLength,
97
+ );
98
+
99
+ const silk = await loadSilkWasm();
100
+ if (!silk || !silk.isSilk(rawData)) {
101
+ return null;
102
+ }
103
+
104
+ const sampleRate = 24000;
105
+ const result = await silk.decode(rawData, sampleRate);
106
+ const wavBuffer = pcmToWav(result.data, sampleRate);
107
+
108
+ const dir = outputDir || path.dirname(inputPath);
109
+ if (!fs.existsSync(dir)) {
110
+ fs.mkdirSync(dir, { recursive: true });
111
+ }
112
+ const baseName = path.basename(inputPath, path.extname(inputPath));
113
+ const wavPath = path.join(dir, `${baseName}.wav`);
114
+ fs.writeFileSync(wavPath, wavBuffer);
115
+
116
+ return { wavPath, duration: result.duration };
117
+ }
118
+
119
+ /** Check whether an attachment is a voice file (by MIME type or extension). */
120
+ export function isVoiceAttachment(att: { content_type?: string; filename?: string }): boolean {
121
+ if (att.content_type === "voice" || att.content_type?.startsWith("audio/")) {
122
+ return true;
123
+ }
124
+ const ext = att.filename ? normalizeLowercase(path.extname(att.filename)) : "";
125
+ return [".amr", ".silk", ".slk", ".slac"].includes(ext);
126
+ }
127
+
128
+ /** Check whether a file path is a known audio format. */
129
+ export function isAudioFile(filePath: string, mimeType?: string): boolean {
130
+ if (mimeType) {
131
+ if (mimeType === "voice" || mimeType.startsWith("audio/")) {
132
+ return true;
133
+ }
134
+ }
135
+ const ext = normalizeLowercase(path.extname(filePath));
136
+ return [
137
+ ".silk",
138
+ ".slk",
139
+ ".amr",
140
+ ".wav",
141
+ ".mp3",
142
+ ".ogg",
143
+ ".opus",
144
+ ".aac",
145
+ ".flac",
146
+ ".m4a",
147
+ ".wma",
148
+ ".pcm",
149
+ ].includes(ext);
150
+ }
151
+
152
+ const QQ_NATIVE_VOICE_MIMES = new Set([
153
+ "audio/silk",
154
+ "audio/amr",
155
+ "audio/wav",
156
+ "audio/wave",
157
+ "audio/x-wav",
158
+ "audio/mpeg",
159
+ "audio/mp3",
160
+ ]);
161
+
162
+ const QQ_NATIVE_VOICE_EXTS = new Set([".silk", ".slk", ".amr", ".wav", ".mp3"]);
163
+
164
+ /** Check whether a voice file needs transcoding for upload (QQ-native formats skip it). */
165
+ export function shouldTranscodeVoice(filePath: string, mimeType?: string): boolean {
166
+ if (mimeType && QQ_NATIVE_VOICE_MIMES.has(normalizeLowercase(mimeType))) {
167
+ return false;
168
+ }
169
+ const ext = normalizeLowercase(path.extname(filePath));
170
+ if (QQ_NATIVE_VOICE_EXTS.has(ext)) {
171
+ return false;
172
+ }
173
+ return isAudioFile(filePath, mimeType);
174
+ }
175
+
176
+ const QQ_NATIVE_UPLOAD_FORMATS = [".wav", ".mp3", ".silk"];
177
+
178
+ function normalizeFormats(formats: string[]): string[] {
179
+ return formats.map((f) => {
180
+ const lower = normalizeLowercase(f);
181
+ return lower.startsWith(".") ? lower : `.${lower}`;
182
+ });
183
+ }
184
+
185
+ /**
186
+ * Convert a local audio file to Base64-encoded SILK for QQ API upload.
187
+ *
188
+ * Attempts conversion via direct QQ-native upload → WASM decoders → null fallback chain.
189
+ */
190
+ export async function audioFileToSilkBase64(
191
+ filePath: string,
192
+ directUploadFormats?: string[],
193
+ ): Promise<string | null> {
194
+ let buf: Buffer;
195
+ try {
196
+ buf = readRegularFileSync({ filePath }).buffer;
197
+ } catch {
198
+ return null;
199
+ }
200
+
201
+ if (buf.length === 0) {
202
+ debugError(`[audio-convert] file is empty: ${filePath}`);
203
+ return null;
204
+ }
205
+
206
+ const ext = normalizeLowercase(path.extname(filePath));
207
+ const uploadFormats = directUploadFormats
208
+ ? normalizeFormats(directUploadFormats)
209
+ : QQ_NATIVE_UPLOAD_FORMATS;
210
+ if (uploadFormats.includes(ext)) {
211
+ debugLog(`[audio-convert] direct upload (QQ native format): ${ext} (${buf.length} bytes)`);
212
+ return buf.toString("base64");
213
+ }
214
+
215
+ if ([".slk", ".slac"].includes(ext)) {
216
+ const stripped = stripAmrHeader(buf);
217
+ const raw = new Uint8Array(stripped.buffer, stripped.byteOffset, stripped.byteLength);
218
+ const silk = await loadSilkWasm();
219
+ if (silk?.isSilk(raw)) {
220
+ debugLog(`[audio-convert] SILK file, direct use: ${filePath} (${buf.length} bytes)`);
221
+ return buf.toString("base64");
222
+ }
223
+ }
224
+
225
+ const rawCheck = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
226
+ const strippedCheck = stripAmrHeader(buf);
227
+ const strippedRaw = new Uint8Array(
228
+ strippedCheck.buffer,
229
+ strippedCheck.byteOffset,
230
+ strippedCheck.byteLength,
231
+ );
232
+ const silkForCheck = await loadSilkWasm();
233
+ if (silkForCheck?.isSilk(rawCheck) || silkForCheck?.isSilk(strippedRaw)) {
234
+ debugLog(`[audio-convert] SILK detected by header: ${filePath} (${buf.length} bytes)`);
235
+ return buf.toString("base64");
236
+ }
237
+
238
+ const targetRate = 24000;
239
+
240
+ debugLog(`[audio-convert] fallback: trying WASM decoders for ${ext}`);
241
+
242
+ if (ext === ".pcm") {
243
+ const pcmBuf = Buffer.from(buf.buffer, buf.byteOffset, buf.byteLength);
244
+ const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate);
245
+ return silkBuffer.toString("base64");
246
+ }
247
+
248
+ if (ext === ".wav" || (buf.length >= 4 && buf.toString("ascii", 0, 4) === "RIFF")) {
249
+ const wavInfo = parseWavFallback(buf);
250
+ if (wavInfo) {
251
+ const { silkBuffer } = await pcmToSilk(wavInfo, targetRate);
252
+ return silkBuffer.toString("base64");
253
+ }
254
+ }
255
+
256
+ if (ext === ".mp3" || ext === ".mpeg") {
257
+ const pcmBuf = await wasmDecodeMp3ToPCM(buf, targetRate);
258
+ if (pcmBuf) {
259
+ const { silkBuffer } = await pcmToSilk(pcmBuf, targetRate);
260
+ debugLog(`[audio-convert] WASM: MP3 → SILK done (${silkBuffer.length} bytes)`);
261
+ return silkBuffer.toString("base64");
262
+ }
263
+ }
264
+
265
+ debugError(
266
+ `[audio-convert] unsupported format without native subprocess conversion: ${ext}. Use QQ-native voice formats or WAV/MP3/PCM inputs.`,
267
+ );
268
+ return null;
269
+ }
270
+
271
+ /**
272
+ * Wait for a file to appear and stabilize, then return its final size.
273
+ *
274
+ * Polls at `pollMs` intervals; returns 0 on timeout or persistent empty file.
275
+ */
276
+ export async function waitForFile(
277
+ filePath: string,
278
+ timeoutMs: number = 30000,
279
+ pollMs: number = 500,
280
+ ): Promise<number> {
281
+ const start = Date.now();
282
+ let lastSize = -1;
283
+ let stableCount = 0;
284
+ let fileExists = false;
285
+ let fileAppearedAt = 0;
286
+ let pollCount = 0;
287
+
288
+ const emptyGiveUpMs = 10000;
289
+ const noFileGiveUpMs = 15000;
290
+
291
+ while (Date.now() - start < timeoutMs) {
292
+ pollCount++;
293
+ try {
294
+ const stat = fs.statSync(filePath);
295
+ if (!fileExists) {
296
+ fileExists = true;
297
+ fileAppearedAt = Date.now();
298
+ debugLog(
299
+ `[audio-convert] waitForFile: file appeared (${stat.size} bytes, after ${Date.now() - start}ms): ${path.basename(filePath)}`,
300
+ );
301
+ }
302
+ if (stat.size > 0) {
303
+ if (stat.size === lastSize) {
304
+ stableCount++;
305
+ if (stableCount >= 2) {
306
+ debugLog(
307
+ `[audio-convert] waitForFile: ready (${stat.size} bytes, waited ${Date.now() - start}ms, polls=${pollCount})`,
308
+ );
309
+ return stat.size;
310
+ }
311
+ } else {
312
+ stableCount = 0;
313
+ }
314
+ lastSize = stat.size;
315
+ } else {
316
+ if (Date.now() - fileAppearedAt > emptyGiveUpMs) {
317
+ debugError(
318
+ `[audio-convert] waitForFile: file still empty after ${emptyGiveUpMs}ms, giving up: ${path.basename(filePath)}`,
319
+ );
320
+ return 0;
321
+ }
322
+ }
323
+ } catch {
324
+ if (!fileExists && Date.now() - start > noFileGiveUpMs) {
325
+ debugError(
326
+ `[audio-convert] waitForFile: file never appeared after ${noFileGiveUpMs}ms, giving up: ${path.basename(filePath)}`,
327
+ );
328
+ return 0;
329
+ }
330
+ }
331
+ await new Promise((r) => setTimeout(r, pollMs));
332
+ }
333
+
334
+ try {
335
+ const finalStat = fs.statSync(filePath);
336
+ if (finalStat.size > 0) {
337
+ debugWarn(
338
+ `[audio-convert] waitForFile: timeout but file has data (${finalStat.size} bytes), using it`,
339
+ );
340
+ return finalStat.size;
341
+ }
342
+ debugError(
343
+ `[audio-convert] waitForFile: timeout after ${timeoutMs}ms, file exists but empty (0 bytes): ${path.basename(filePath)}`,
344
+ );
345
+ } catch {
346
+ debugError(
347
+ `[audio-convert] waitForFile: timeout after ${timeoutMs}ms, file never appeared: ${path.basename(filePath)}`,
348
+ );
349
+ }
350
+ return 0;
351
+ }
352
+
353
+ /** Encode PCM s16le data into SILK format. */
354
+ async function pcmToSilk(
355
+ pcmBuffer: Buffer,
356
+ sampleRate: number,
357
+ ): Promise<{ silkBuffer: Buffer; duration: number }> {
358
+ const silk = await loadSilkWasm();
359
+ if (!silk) {
360
+ throw new Error("silk-wasm is not available; cannot encode PCM to SILK");
361
+ }
362
+ const pcmData = new Uint8Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.byteLength);
363
+ const result = await silk.encode(pcmData, sampleRate);
364
+ return {
365
+ silkBuffer: Buffer.from(result.data.buffer, result.data.byteOffset, result.data.byteLength),
366
+ duration: result.duration,
367
+ };
368
+ }
369
+
370
+ /** Decode MP3 to PCM via mpg123-decoder WASM. */
371
+ async function wasmDecodeMp3ToPCM(buf: Buffer, targetRate: number): Promise<Buffer | null> {
372
+ try {
373
+ const { MPEGDecoder } = await import("mpg123-decoder");
374
+ debugLog(`[audio-convert] WASM MP3 decode: size=${buf.length} bytes`);
375
+ const decoder = new MPEGDecoder();
376
+ await decoder.ready;
377
+
378
+ const decoded = decoder.decode(new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength));
379
+ decoder.free();
380
+
381
+ if (decoded.samplesDecoded === 0 || decoded.channelData.length === 0) {
382
+ debugError(
383
+ `[audio-convert] WASM MP3 decode: no samples (samplesDecoded=${decoded.samplesDecoded})`,
384
+ );
385
+ return null;
386
+ }
387
+
388
+ debugLog(
389
+ `[audio-convert] WASM MP3 decode: samples=${decoded.samplesDecoded}, sampleRate=${decoded.sampleRate}, channels=${decoded.channelData.length}`,
390
+ );
391
+
392
+ let floatMono: Float32Array;
393
+ if (decoded.channelData.length === 1) {
394
+ floatMono = decoded.channelData[0];
395
+ } else {
396
+ floatMono = new Float32Array(decoded.samplesDecoded);
397
+ const channels = decoded.channelData.length;
398
+ for (let i = 0; i < decoded.samplesDecoded; i++) {
399
+ let sum = 0;
400
+ for (let ch = 0; ch < channels; ch++) {
401
+ sum += decoded.channelData[ch][i];
402
+ }
403
+ floatMono[i] = sum / channels;
404
+ }
405
+ }
406
+
407
+ const s16 = new Uint8Array(floatMono.length * 2);
408
+ const view = new DataView(s16.buffer);
409
+ for (let i = 0; i < floatMono.length; i++) {
410
+ const clamped = Math.max(-1, Math.min(1, floatMono[i]));
411
+ const val = clamped < 0 ? clamped * 32768 : clamped * 32767;
412
+ view.setInt16(i * 2, Math.round(val), true);
413
+ }
414
+
415
+ let pcm: Uint8Array = s16;
416
+ if (decoded.sampleRate !== targetRate) {
417
+ const inputSamples = s16.length / 2;
418
+ const outputSamples = Math.round((inputSamples * targetRate) / decoded.sampleRate);
419
+ const output = new Uint8Array(outputSamples * 2);
420
+ const inView = new DataView(s16.buffer, s16.byteOffset, s16.byteLength);
421
+ const outView = new DataView(output.buffer, output.byteOffset, output.byteLength);
422
+ for (let i = 0; i < outputSamples; i++) {
423
+ const srcIdx = (i * decoded.sampleRate) / targetRate;
424
+ const idx0 = Math.floor(srcIdx);
425
+ const idx1 = Math.min(idx0 + 1, inputSamples - 1);
426
+ const frac = srcIdx - idx0;
427
+ const s0 = inView.getInt16(idx0 * 2, true);
428
+ const s1 = inView.getInt16(idx1 * 2, true);
429
+ const sample = Math.round(s0 + (s1 - s0) * frac);
430
+ outView.setInt16(i * 2, Math.max(-32768, Math.min(32767, sample)), true);
431
+ }
432
+ pcm = output;
433
+ }
434
+
435
+ return Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
436
+ } catch (err) {
437
+ debugError(`[audio-convert] WASM MP3 decode failed: ${formatErrorMessage(err)}`);
438
+ if (err instanceof Error && err.stack) {
439
+ debugError(`[audio-convert] stack: ${err.stack}`);
440
+ }
441
+ return null;
442
+ }
443
+ }
444
+
445
+ /** Parse a standard PCM WAV and extract mono 24 kHz PCM data. */
446
+ export function parseWavFallback(buf: Buffer): Buffer | null {
447
+ if (buf.length < 44) {
448
+ return null;
449
+ }
450
+ if (buf.toString("ascii", 0, 4) !== "RIFF") {
451
+ return null;
452
+ }
453
+ if (buf.toString("ascii", 8, 12) !== "WAVE") {
454
+ return null;
455
+ }
456
+ if (buf.toString("ascii", 12, 16) !== "fmt ") {
457
+ return null;
458
+ }
459
+
460
+ const audioFormat = buf.readUInt16LE(20);
461
+ if (audioFormat !== 1) {
462
+ return null;
463
+ }
464
+
465
+ const channels = buf.readUInt16LE(22);
466
+ const sampleRate = buf.readUInt32LE(24);
467
+ const bitsPerSample = buf.readUInt16LE(34);
468
+ if (bitsPerSample !== 16) {
469
+ return null;
470
+ }
471
+
472
+ let offset = 36;
473
+ while (offset < buf.length - 8) {
474
+ const chunkId = buf.toString("ascii", offset, offset + 4);
475
+ const chunkSize = buf.readUInt32LE(offset + 4);
476
+ if (chunkId === "data") {
477
+ const dataStart = offset + 8;
478
+ const dataEnd = Math.min(dataStart + chunkSize, buf.length);
479
+ let pcm = new Uint8Array(buf.buffer, buf.byteOffset + dataStart, dataEnd - dataStart);
480
+
481
+ if (channels > 1) {
482
+ const samplesPerCh = pcm.length / (2 * channels);
483
+ const mono = new Uint8Array(samplesPerCh * 2);
484
+ const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength);
485
+ const outV = new DataView(mono.buffer, mono.byteOffset, mono.byteLength);
486
+ for (let i = 0; i < samplesPerCh; i++) {
487
+ let sum = 0;
488
+ for (let ch = 0; ch < channels; ch++) {
489
+ sum += inV.getInt16((i * channels + ch) * 2, true);
490
+ }
491
+ outV.setInt16(i * 2, Math.max(-32768, Math.min(32767, Math.round(sum / channels))), true);
492
+ }
493
+ pcm = mono;
494
+ }
495
+
496
+ const targetRate = 24000;
497
+ if (sampleRate !== targetRate) {
498
+ const inSamples = pcm.length / 2;
499
+ const outSamples = Math.round((inSamples * targetRate) / sampleRate);
500
+ const out = new Uint8Array(outSamples * 2);
501
+ const inV = new DataView(pcm.buffer, pcm.byteOffset, pcm.byteLength);
502
+ const outV = new DataView(out.buffer, out.byteOffset, out.byteLength);
503
+ for (let i = 0; i < outSamples; i++) {
504
+ const src = (i * sampleRate) / targetRate;
505
+ const i0 = Math.floor(src);
506
+ const i1 = Math.min(i0 + 1, inSamples - 1);
507
+ const f = src - i0;
508
+ const s0 = inV.getInt16(i0 * 2, true);
509
+ const s1 = inV.getInt16(i1 * 2, true);
510
+ outV.setInt16(
511
+ i * 2,
512
+ Math.max(-32768, Math.min(32767, Math.round(s0 + (s1 - s0) * f))),
513
+ true,
514
+ );
515
+ }
516
+ pcm = out;
517
+ }
518
+
519
+ return Buffer.from(pcm.buffer, pcm.byteOffset, pcm.byteLength);
520
+ }
521
+ offset += 8 + chunkSize;
522
+ }
523
+
524
+ return null;
525
+ }