npm - opencode-tbot - Versions diffs - 0.1.14 → 0.1.16 - Mend

opencode-tbot 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/plugin.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import { c as loadAppConfig, i as preparePluginConfiguration, o as OPENCODE_TBOT_VERSION } from "./assets/plugin-config-DA71_jD3.js";
+import { createRequire } from "node:module";
 import { mkdir, readFile, rename, stat, writeFile } from "node:fs/promises";
 import { basename, dirname, extname, isAbsolute, join } from "node:path";
 import { parse, printParseErrorCode } from "jsonc-parser";
@@ -6,6 +7,7 @@ import { z } from "zod";
 import { OpenRouter } from "@openrouter/sdk";
 import { createOpencodeClient } from "@opencode-ai/sdk/v2/client";
 import { randomUUID } from "node:crypto";
+import { spawn } from "node:child_process";
 import { run } from "@grammyjs/runner";
 import { Bot, InlineKeyboard } from "grammy";
 //#region src/infra/utils/redact.ts
@@ -1178,6 +1180,145 @@ var NOOP_FOREGROUND_SESSION_TRACKER = {
 	}
 };
 //#endregion
+//#region src/services/voice-transcription/audio-transcoder.ts
+var OPENROUTER_SUPPORTED_AUDIO_FORMATS = ["mp3", "wav"];
+var VoiceTranscodingFailedError = class extends Error {
+	data;
+	constructor(message) {
+		super(message);
+		this.name = "VoiceTranscodingFailedError";
+		this.data = { message };
+	}
+};
+var DEFAULT_TRANSCODE_TIMEOUT_MS = 15e3;
+var FfmpegAudioTranscoder = class {
+	ffmpegPath;
+	spawnProcess;
+	timeoutMs;
+	constructor(options) {
+		this.ffmpegPath = options.ffmpegPath?.trim() || null;
+		this.spawnProcess = options.spawnProcess ?? defaultSpawnProcess;
+		this.timeoutMs = options.timeoutMs ?? DEFAULT_TRANSCODE_TIMEOUT_MS;
+	}
+	async transcode(input) {
+		if (!this.ffmpegPath) throw new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, "Bundled ffmpeg is unavailable."));
+		if (input.targetFormat !== "wav") throw new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, `Unsupported transcode target: ${input.targetFormat}.`));
+		return {
+			data: await runFfmpegTranscode({
+				data: toUint8Array$1(input.data),
+				ffmpegPath: this.ffmpegPath,
+				filename: input.filename,
+				sourceFormat: input.sourceFormat,
+				spawnProcess: this.spawnProcess,
+				timeoutMs: this.timeoutMs,
+				targetFormat: input.targetFormat
+			}),
+			filename: replaceExtension(input.filename, ".wav"),
+			format: "wav",
+			mimeType: "audio/wav"
+		};
+	}
+};
+async function runFfmpegTranscode(input) {
+	return await new Promise((resolve, reject) => {
+		const child = input.spawnProcess(input.ffmpegPath, buildFfmpegArgs(input.targetFormat), {
+			stdio: [
+				"pipe",
+				"pipe",
+				"pipe"
+			],
+			windowsHide: true
+		});
+		const stdoutChunks = [];
+		const stderrChunks = [];
+		let settled = false;
+		let timedOut = false;
+		const timer = setTimeout(() => {
+			timedOut = true;
+			child.kill();
+		}, input.timeoutMs);
+		const cleanup = () => {
+			clearTimeout(timer);
+		};
+		const rejectOnce = (message) => {
+			if (settled) return;
+			settled = true;
+			cleanup();
+			reject(new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, message)));
+		};
+		const resolveOnce = (value) => {
+			if (settled) return;
+			settled = true;
+			cleanup();
+			resolve(value);
+		};
+		child.stdout.on("data", (chunk) => {
+			stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+		});
+		child.stderr.on("data", (chunk) => {
+			stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+		});
+		child.once("error", (error) => {
+			rejectOnce(`Failed to start bundled ffmpeg: ${error.message}`);
+		});
+		child.once("close", (code, signal) => {
+			if (timedOut) {
+				rejectOnce(`Bundled ffmpeg timed out after ${input.timeoutMs} ms.`);
+				return;
+			}
+			if (code !== 0) {
+				rejectOnce(Buffer.concat(stderrChunks).toString("utf8").trim() || `Bundled ffmpeg exited with code ${code}${signal ? ` (${signal})` : ""}.`);
+				return;
+			}
+			const output = Buffer.concat(stdoutChunks);
+			if (output.length === 0) {
+				rejectOnce("Bundled ffmpeg returned empty audio output.");
+				return;
+			}
+			resolveOnce(new Uint8Array(output));
+		});
+		child.stdin.on("error", (error) => {
+			rejectOnce(`Failed to write audio data to bundled ffmpeg: ${error.message}`);
+		});
+		child.stdin.write(Buffer.from(input.data));
+		child.stdin.end();
+	});
+}
+function buildFfmpegArgs(targetFormat) {
+	if (targetFormat !== "wav") throw new Error(`Unsupported target format: ${targetFormat}`);
+	return [
+		"-hide_banner",
+		"-loglevel",
+		"error",
+		"-i",
+		"pipe:0",
+		"-f",
+		"wav",
+		"-acodec",
+		"pcm_s16le",
+		"-ac",
+		"1",
+		"-ar",
+		"16000",
+		"pipe:1"
+	];
+}
+function buildTranscodingMessage(sourceFormat, targetFormat, reason) {
+	return `Failed to transcode audio from ${sourceFormat} to ${targetFormat}. ${reason}`;
+}
+function replaceExtension(filename, nextExtension) {
+	const trimmedFilename = basename(filename).trim();
+	if (!trimmedFilename) return `telegram-voice${nextExtension}`;
+	const currentExtension = extname(trimmedFilename);
+	return currentExtension ? `${trimmedFilename.slice(0, -currentExtension.length)}${nextExtension}` : `${trimmedFilename}${nextExtension}`;
+}
+function toUint8Array$1(data) {
+	return data instanceof Uint8Array ? data : new Uint8Array(data);
+}
+function defaultSpawnProcess(command, args, options) {
+	return spawn(command, args, options);
+}
+//#endregion
 //#region src/services/voice-transcription/openrouter-voice.client.ts
 var VoiceTranscriptionNotConfiguredError = class extends Error {
 	data;
@@ -1215,11 +1356,16 @@ var DisabledVoiceTranscriptionClient = class {
 	}
 };
 var OpenRouterVoiceTranscriptionClient = class {
+	audioTranscoder;
 	model;
 	sdk;
 	timeoutMs;
 	transcriptionPrompt;
-	constructor(options, sdk) {
+	constructor(options, sdk, audioTranscoder = new FfmpegAudioTranscoder({
+		ffmpegPath: null,
+		timeoutMs: options.timeoutMs
+	})) {
+		this.audioTranscoder = audioTranscoder;
 		this.model = options.model;
 		this.sdk = sdk;
 		this.timeoutMs = options.timeoutMs;
@@ -1232,8 +1378,8 @@ var OpenRouterVoiceTranscriptionClient = class {
 		};
 	}
 	async transcribe(input) {
-		const format = resolveAudioFormat(input.filename, input.mimeType);
-		const audioData = toBase64(input.data);
+		const preparedAudio = await prepareAudioForOpenRouter(input, resolveAudioFormat(input.filename, input.mimeType), this.audioTranscoder);
+		const audioData = toBase64(preparedAudio.data);
 		const prompt = buildTranscriptionPrompt(this.transcriptionPrompt);
 		let response;
 		try {
@@ -1247,7 +1393,7 @@ var OpenRouterVoiceTranscriptionClient = class {
 						type: "input_audio",
 						inputAudio: {
 							data: audioData,
-							format
+							format: preparedAudio.format
 						}
 					}]
 				}],
@@ -1257,13 +1403,29 @@ var OpenRouterVoiceTranscriptionClient = class {
 			} }, { timeoutMs: this.timeoutMs });
 		} catch (error) {
 			throw new VoiceTranscriptionFailedError(buildTranscriptionErrorMessage(error, {
-				format,
+				format: preparedAudio.format,
 				model: this.model
 			}));
 		}
 		return { text: extractTranscript(response) };
 	}
 };
+async function prepareAudioForOpenRouter(input, sourceFormat, audioTranscoder) {
+	if (isOpenRouterSupportedAudioFormat(sourceFormat)) return {
+		data: toUint8Array(input.data),
+		format: sourceFormat
+	};
+	const transcoded = await audioTranscoder.transcode({
+		data: input.data,
+		filename: input.filename,
+		sourceFormat,
+		targetFormat: "wav"
+	});
+	return {
+		data: transcoded.data,
+		format: transcoded.format
+	};
+}
 var MIME_TYPE_FORMAT_MAP = {
 	"audio/aac": "aac",
 	"audio/aiff": "aiff",
@@ -1301,9 +1463,15 @@ function resolveAudioFormat(filename, mimeType) {
 	return "ogg";
 }
 function toBase64(data) {
-	const bytes = data instanceof Uint8Array ? data : new Uint8Array(data);
+	const bytes = toUint8Array(data);
 	return Buffer.from(bytes).toString("base64");
 }
+function toUint8Array(data) {
+	return data instanceof Uint8Array ? data : new Uint8Array(data);
+}
+function isOpenRouterSupportedAudioFormat(format) {
+	return OPENROUTER_SUPPORTED_AUDIO_FORMATS.includes(format);
+}
 function buildTranscriptionPrompt(transcriptionPrompt) {
 	const basePrompt = [
 		"Transcribe the provided audio verbatim.",
@@ -2174,6 +2342,7 @@ function resolveExtension(mimeType) {
 }
 //#endregion
 //#region src/app/container.ts
+var require = createRequire(import.meta.url);
 function createAppContainer(config, client) {
 	const logger = createOpenCodeAppLogger(client, { level: config.logLevel });
 	return createContainer(config, createOpenCodeClientFromSdkClient(client), logger);
@@ -2250,8 +2419,19 @@ function createVoiceTranscriptionClient(config) {
 	}, new OpenRouter({
 		apiKey: config.apiKey,
 		timeoutMs: config.timeoutMs
+	}), new FfmpegAudioTranscoder({
+		ffmpegPath: loadBundledFfmpegPath(),
+		timeoutMs: config.timeoutMs
 	})) : new DisabledVoiceTranscriptionClient();
 }
+function loadBundledFfmpegPath() {
+	try {
+		const ffmpegInstaller = require("@ffmpeg-installer/ffmpeg");
+		return typeof ffmpegInstaller.path === "string" && ffmpegInstaller.path.trim().length > 0 ? ffmpegInstaller.path : null;
+	} catch {
+		return null;
+	}
+}
 //#endregion
 //#region src/app/bootstrap.ts
 function bootstrapPluginApp(client, configSource = {}, options = {}) {
@@ -2484,6 +2664,7 @@ var EN_BOT_COPY = {
 		structuredOutput: "Structured output validation failed.",
 		voiceNotConfigured: "Voice transcription is not configured.",
 		voiceDownload: "Failed to download the Telegram voice file.",
+		voiceTranscoding: "Voice audio preprocessing failed.",
 		voiceTranscription: "Voice transcription failed.",
 		voiceEmpty: "Voice transcription returned empty text.",
 		voiceUnsupported: "Voice message file is too large or unsupported.",
@@ -2693,6 +2874,7 @@ var ZH_CN_BOT_COPY = {
 		structuredOutput: "结构化输出校验失败。",
 		voiceNotConfigured: "未配置语音转写服务。",
 		voiceDownload: "下载 Telegram 语音文件失败。",
+		voiceTranscoding: "语音转码失败。",
 		voiceTranscription: "语音转写失败。",
 		voiceEmpty: "语音转写结果为空。",
 		voiceUnsupported: "语音文件过大或不受支持。",
@@ -3049,6 +3231,10 @@ function normalizeError(error, copy) {
 		message: copy.errors.voiceDownload,
 		cause: extractMessage(error.data) ?? null
 	};
+	if (isNamedError(error, "VoiceTranscodingFailedError")) return {
+		message: copy.errors.voiceTranscoding,
+		cause: extractMessage(error.data) ?? null
+	};
 	if (isNamedError(error, "VoiceTranscriptionFailedError")) return {
 		message: copy.errors.voiceTranscription,
 		cause: extractMessage(error.data) ?? null
@@ -3306,9 +3492,9 @@ function splitStatusLines(text) {
 function formatHealthBadge(healthy, layout) {
 	return healthy ? "🟢" : layout.errorStatus;
 }
-function formatVoiceRecognitionBadge(status, layout) {
-	if (status.status === "configured") return status.model ? `\uD83D\uDFE2 ${layout.voiceRecognitionConfiguredLabel} (${status.model})` : `\uD83D\uDFE2 ${layout.voiceRecognitionConfiguredLabel}`;
-	return `\u26AA ${layout.voiceRecognitionNotConfiguredLabel}`;
+function formatVoiceRecognitionBadge(status, _layout) {
+	if (status.status === "configured") return status.model ? `\uD83D\uDFE2 (${status.model})` : "🟡";
+	return "⚪";
 }
 function formatLspStatusBadge(status) {
 	switch (status.status) {
@@ -3378,9 +3564,7 @@ function getStatusLayoutCopy(copy) {
 		rootLabel: "Root",
 		statusLabel: "Status",
 		tbotVersionLabel: "opencode-tbot Version",
-		voiceRecognitionConfiguredLabel: "configured",
 		voiceRecognitionLabel: "Voice Recognition",
-		voiceRecognitionNotConfiguredLabel: "not configured",
 		workspaceTitle: "📁 Workspace"
 	};
 	return {
@@ -3403,9 +3587,7 @@ function getStatusLayoutCopy(copy) {
 		rootLabel: "根目录",
 		statusLabel: "状态",
 		tbotVersionLabel: "opencode-tbot版本",
-		voiceRecognitionConfiguredLabel: "已配置",
 		voiceRecognitionLabel: "语音识别",
-		voiceRecognitionNotConfiguredLabel: "未配置",
 		workspaceTitle: "📁 工作区"
 	};
 }