opencode-tbot 0.1.14 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/plugin.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import { c as loadAppConfig, i as preparePluginConfiguration, o as OPENCODE_TBOT_VERSION } from "./assets/plugin-config-DA71_jD3.js";
2
+ import { createRequire } from "node:module";
2
3
  import { mkdir, readFile, rename, stat, writeFile } from "node:fs/promises";
3
4
  import { basename, dirname, extname, isAbsolute, join } from "node:path";
4
5
  import { parse, printParseErrorCode } from "jsonc-parser";
@@ -6,6 +7,7 @@ import { z } from "zod";
6
7
  import { OpenRouter } from "@openrouter/sdk";
7
8
  import { createOpencodeClient } from "@opencode-ai/sdk/v2/client";
8
9
  import { randomUUID } from "node:crypto";
10
+ import { spawn } from "node:child_process";
9
11
  import { run } from "@grammyjs/runner";
10
12
  import { Bot, InlineKeyboard } from "grammy";
11
13
  //#region src/infra/utils/redact.ts
@@ -1178,6 +1180,145 @@ var NOOP_FOREGROUND_SESSION_TRACKER = {
1178
1180
  }
1179
1181
  };
1180
1182
  //#endregion
1183
+ //#region src/services/voice-transcription/audio-transcoder.ts
1184
+ var OPENROUTER_SUPPORTED_AUDIO_FORMATS = ["mp3", "wav"];
1185
+ var VoiceTranscodingFailedError = class extends Error {
1186
+ data;
1187
+ constructor(message) {
1188
+ super(message);
1189
+ this.name = "VoiceTranscodingFailedError";
1190
+ this.data = { message };
1191
+ }
1192
+ };
1193
+ var DEFAULT_TRANSCODE_TIMEOUT_MS = 15e3;
1194
+ var FfmpegAudioTranscoder = class {
1195
+ ffmpegPath;
1196
+ spawnProcess;
1197
+ timeoutMs;
1198
+ constructor(options) {
1199
+ this.ffmpegPath = options.ffmpegPath?.trim() || null;
1200
+ this.spawnProcess = options.spawnProcess ?? defaultSpawnProcess;
1201
+ this.timeoutMs = options.timeoutMs ?? DEFAULT_TRANSCODE_TIMEOUT_MS;
1202
+ }
1203
+ async transcode(input) {
1204
+ if (!this.ffmpegPath) throw new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, "Bundled ffmpeg is unavailable."));
1205
+ if (input.targetFormat !== "wav") throw new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, `Unsupported transcode target: ${input.targetFormat}.`));
1206
+ return {
1207
+ data: await runFfmpegTranscode({
1208
+ data: toUint8Array$1(input.data),
1209
+ ffmpegPath: this.ffmpegPath,
1210
+ filename: input.filename,
1211
+ sourceFormat: input.sourceFormat,
1212
+ spawnProcess: this.spawnProcess,
1213
+ timeoutMs: this.timeoutMs,
1214
+ targetFormat: input.targetFormat
1215
+ }),
1216
+ filename: replaceExtension(input.filename, ".wav"),
1217
+ format: "wav",
1218
+ mimeType: "audio/wav"
1219
+ };
1220
+ }
1221
+ };
1222
+ async function runFfmpegTranscode(input) {
1223
+ return await new Promise((resolve, reject) => {
1224
+ const child = input.spawnProcess(input.ffmpegPath, buildFfmpegArgs(input.targetFormat), {
1225
+ stdio: [
1226
+ "pipe",
1227
+ "pipe",
1228
+ "pipe"
1229
+ ],
1230
+ windowsHide: true
1231
+ });
1232
+ const stdoutChunks = [];
1233
+ const stderrChunks = [];
1234
+ let settled = false;
1235
+ let timedOut = false;
1236
+ const timer = setTimeout(() => {
1237
+ timedOut = true;
1238
+ child.kill();
1239
+ }, input.timeoutMs);
1240
+ const cleanup = () => {
1241
+ clearTimeout(timer);
1242
+ };
1243
+ const rejectOnce = (message) => {
1244
+ if (settled) return;
1245
+ settled = true;
1246
+ cleanup();
1247
+ reject(new VoiceTranscodingFailedError(buildTranscodingMessage(input.sourceFormat, input.targetFormat, message)));
1248
+ };
1249
+ const resolveOnce = (value) => {
1250
+ if (settled) return;
1251
+ settled = true;
1252
+ cleanup();
1253
+ resolve(value);
1254
+ };
1255
+ child.stdout.on("data", (chunk) => {
1256
+ stdoutChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
1257
+ });
1258
+ child.stderr.on("data", (chunk) => {
1259
+ stderrChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
1260
+ });
1261
+ child.once("error", (error) => {
1262
+ rejectOnce(`Failed to start bundled ffmpeg: ${error.message}`);
1263
+ });
1264
+ child.once("close", (code, signal) => {
1265
+ if (timedOut) {
1266
+ rejectOnce(`Bundled ffmpeg timed out after ${input.timeoutMs} ms.`);
1267
+ return;
1268
+ }
1269
+ if (code !== 0) {
1270
+ rejectOnce(Buffer.concat(stderrChunks).toString("utf8").trim() || `Bundled ffmpeg exited with code ${code}${signal ? ` (${signal})` : ""}.`);
1271
+ return;
1272
+ }
1273
+ const output = Buffer.concat(stdoutChunks);
1274
+ if (output.length === 0) {
1275
+ rejectOnce("Bundled ffmpeg returned empty audio output.");
1276
+ return;
1277
+ }
1278
+ resolveOnce(new Uint8Array(output));
1279
+ });
1280
+ child.stdin.on("error", (error) => {
1281
+ rejectOnce(`Failed to write audio data to bundled ffmpeg: ${error.message}`);
1282
+ });
1283
+ child.stdin.write(Buffer.from(input.data));
1284
+ child.stdin.end();
1285
+ });
1286
+ }
1287
+ function buildFfmpegArgs(targetFormat) {
1288
+ if (targetFormat !== "wav") throw new Error(`Unsupported target format: ${targetFormat}`);
1289
+ return [
1290
+ "-hide_banner",
1291
+ "-loglevel",
1292
+ "error",
1293
+ "-i",
1294
+ "pipe:0",
1295
+ "-f",
1296
+ "wav",
1297
+ "-acodec",
1298
+ "pcm_s16le",
1299
+ "-ac",
1300
+ "1",
1301
+ "-ar",
1302
+ "16000",
1303
+ "pipe:1"
1304
+ ];
1305
+ }
1306
+ function buildTranscodingMessage(sourceFormat, targetFormat, reason) {
1307
+ return `Failed to transcode audio from ${sourceFormat} to ${targetFormat}. ${reason}`;
1308
+ }
1309
+ function replaceExtension(filename, nextExtension) {
1310
+ const trimmedFilename = basename(filename).trim();
1311
+ if (!trimmedFilename) return `telegram-voice${nextExtension}`;
1312
+ const currentExtension = extname(trimmedFilename);
1313
+ return currentExtension ? `${trimmedFilename.slice(0, -currentExtension.length)}${nextExtension}` : `${trimmedFilename}${nextExtension}`;
1314
+ }
1315
+ function toUint8Array$1(data) {
1316
+ return data instanceof Uint8Array ? data : new Uint8Array(data);
1317
+ }
1318
+ function defaultSpawnProcess(command, args, options) {
1319
+ return spawn(command, args, options);
1320
+ }
1321
+ //#endregion
1181
1322
  //#region src/services/voice-transcription/openrouter-voice.client.ts
1182
1323
  var VoiceTranscriptionNotConfiguredError = class extends Error {
1183
1324
  data;
@@ -1215,11 +1356,16 @@ var DisabledVoiceTranscriptionClient = class {
1215
1356
  }
1216
1357
  };
1217
1358
  var OpenRouterVoiceTranscriptionClient = class {
1359
+ audioTranscoder;
1218
1360
  model;
1219
1361
  sdk;
1220
1362
  timeoutMs;
1221
1363
  transcriptionPrompt;
1222
- constructor(options, sdk) {
1364
+ constructor(options, sdk, audioTranscoder = new FfmpegAudioTranscoder({
1365
+ ffmpegPath: null,
1366
+ timeoutMs: options.timeoutMs
1367
+ })) {
1368
+ this.audioTranscoder = audioTranscoder;
1223
1369
  this.model = options.model;
1224
1370
  this.sdk = sdk;
1225
1371
  this.timeoutMs = options.timeoutMs;
@@ -1232,8 +1378,8 @@ var OpenRouterVoiceTranscriptionClient = class {
1232
1378
  };
1233
1379
  }
1234
1380
  async transcribe(input) {
1235
- const format = resolveAudioFormat(input.filename, input.mimeType);
1236
- const audioData = toBase64(input.data);
1381
+ const preparedAudio = await prepareAudioForOpenRouter(input, resolveAudioFormat(input.filename, input.mimeType), this.audioTranscoder);
1382
+ const audioData = toBase64(preparedAudio.data);
1237
1383
  const prompt = buildTranscriptionPrompt(this.transcriptionPrompt);
1238
1384
  let response;
1239
1385
  try {
@@ -1247,7 +1393,7 @@ var OpenRouterVoiceTranscriptionClient = class {
1247
1393
  type: "input_audio",
1248
1394
  inputAudio: {
1249
1395
  data: audioData,
1250
- format
1396
+ format: preparedAudio.format
1251
1397
  }
1252
1398
  }]
1253
1399
  }],
@@ -1257,13 +1403,29 @@ var OpenRouterVoiceTranscriptionClient = class {
1257
1403
  } }, { timeoutMs: this.timeoutMs });
1258
1404
  } catch (error) {
1259
1405
  throw new VoiceTranscriptionFailedError(buildTranscriptionErrorMessage(error, {
1260
- format,
1406
+ format: preparedAudio.format,
1261
1407
  model: this.model
1262
1408
  }));
1263
1409
  }
1264
1410
  return { text: extractTranscript(response) };
1265
1411
  }
1266
1412
  };
1413
+ async function prepareAudioForOpenRouter(input, sourceFormat, audioTranscoder) {
1414
+ if (isOpenRouterSupportedAudioFormat(sourceFormat)) return {
1415
+ data: toUint8Array(input.data),
1416
+ format: sourceFormat
1417
+ };
1418
+ const transcoded = await audioTranscoder.transcode({
1419
+ data: input.data,
1420
+ filename: input.filename,
1421
+ sourceFormat,
1422
+ targetFormat: "wav"
1423
+ });
1424
+ return {
1425
+ data: transcoded.data,
1426
+ format: transcoded.format
1427
+ };
1428
+ }
1267
1429
  var MIME_TYPE_FORMAT_MAP = {
1268
1430
  "audio/aac": "aac",
1269
1431
  "audio/aiff": "aiff",
@@ -1301,9 +1463,15 @@ function resolveAudioFormat(filename, mimeType) {
1301
1463
  return "ogg";
1302
1464
  }
1303
1465
  function toBase64(data) {
1304
- const bytes = data instanceof Uint8Array ? data : new Uint8Array(data);
1466
+ const bytes = toUint8Array(data);
1305
1467
  return Buffer.from(bytes).toString("base64");
1306
1468
  }
1469
+ function toUint8Array(data) {
1470
+ return data instanceof Uint8Array ? data : new Uint8Array(data);
1471
+ }
1472
+ function isOpenRouterSupportedAudioFormat(format) {
1473
+ return OPENROUTER_SUPPORTED_AUDIO_FORMATS.includes(format);
1474
+ }
1307
1475
  function buildTranscriptionPrompt(transcriptionPrompt) {
1308
1476
  const basePrompt = [
1309
1477
  "Transcribe the provided audio verbatim.",
@@ -2174,6 +2342,7 @@ function resolveExtension(mimeType) {
2174
2342
  }
2175
2343
  //#endregion
2176
2344
  //#region src/app/container.ts
2345
+ var require = createRequire(import.meta.url);
2177
2346
  function createAppContainer(config, client) {
2178
2347
  const logger = createOpenCodeAppLogger(client, { level: config.logLevel });
2179
2348
  return createContainer(config, createOpenCodeClientFromSdkClient(client), logger);
@@ -2250,8 +2419,19 @@ function createVoiceTranscriptionClient(config) {
2250
2419
  }, new OpenRouter({
2251
2420
  apiKey: config.apiKey,
2252
2421
  timeoutMs: config.timeoutMs
2422
+ }), new FfmpegAudioTranscoder({
2423
+ ffmpegPath: loadBundledFfmpegPath(),
2424
+ timeoutMs: config.timeoutMs
2253
2425
  })) : new DisabledVoiceTranscriptionClient();
2254
2426
  }
2427
+ function loadBundledFfmpegPath() {
2428
+ try {
2429
+ const ffmpegInstaller = require("@ffmpeg-installer/ffmpeg");
2430
+ return typeof ffmpegInstaller.path === "string" && ffmpegInstaller.path.trim().length > 0 ? ffmpegInstaller.path : null;
2431
+ } catch {
2432
+ return null;
2433
+ }
2434
+ }
2255
2435
  //#endregion
2256
2436
  //#region src/app/bootstrap.ts
2257
2437
  function bootstrapPluginApp(client, configSource = {}, options = {}) {
@@ -2484,6 +2664,7 @@ var EN_BOT_COPY = {
2484
2664
  structuredOutput: "Structured output validation failed.",
2485
2665
  voiceNotConfigured: "Voice transcription is not configured.",
2486
2666
  voiceDownload: "Failed to download the Telegram voice file.",
2667
+ voiceTranscoding: "Voice audio preprocessing failed.",
2487
2668
  voiceTranscription: "Voice transcription failed.",
2488
2669
  voiceEmpty: "Voice transcription returned empty text.",
2489
2670
  voiceUnsupported: "Voice message file is too large or unsupported.",
@@ -2693,6 +2874,7 @@ var ZH_CN_BOT_COPY = {
2693
2874
  structuredOutput: "结构化输出校验失败。",
2694
2875
  voiceNotConfigured: "未配置语音转写服务。",
2695
2876
  voiceDownload: "下载 Telegram 语音文件失败。",
2877
+ voiceTranscoding: "语音转码失败。",
2696
2878
  voiceTranscription: "语音转写失败。",
2697
2879
  voiceEmpty: "语音转写结果为空。",
2698
2880
  voiceUnsupported: "语音文件过大或不受支持。",
@@ -3049,6 +3231,10 @@ function normalizeError(error, copy) {
3049
3231
  message: copy.errors.voiceDownload,
3050
3232
  cause: extractMessage(error.data) ?? null
3051
3233
  };
3234
+ if (isNamedError(error, "VoiceTranscodingFailedError")) return {
3235
+ message: copy.errors.voiceTranscoding,
3236
+ cause: extractMessage(error.data) ?? null
3237
+ };
3052
3238
  if (isNamedError(error, "VoiceTranscriptionFailedError")) return {
3053
3239
  message: copy.errors.voiceTranscription,
3054
3240
  cause: extractMessage(error.data) ?? null
@@ -3306,9 +3492,9 @@ function splitStatusLines(text) {
3306
3492
  function formatHealthBadge(healthy, layout) {
3307
3493
  return healthy ? "🟢" : layout.errorStatus;
3308
3494
  }
3309
- function formatVoiceRecognitionBadge(status, layout) {
3310
- if (status.status === "configured") return status.model ? `\uD83D\uDFE2 ${layout.voiceRecognitionConfiguredLabel} (${status.model})` : `\uD83D\uDFE2 ${layout.voiceRecognitionConfiguredLabel}`;
3311
- return `\u26AA ${layout.voiceRecognitionNotConfiguredLabel}`;
3495
+ function formatVoiceRecognitionBadge(status, _layout) {
3496
+ if (status.status === "configured") return status.model ? `\uD83D\uDFE2 (${status.model})` : "🟡";
3497
+ return "⚪";
3312
3498
  }
3313
3499
  function formatLspStatusBadge(status) {
3314
3500
  switch (status.status) {
@@ -3378,9 +3564,7 @@ function getStatusLayoutCopy(copy) {
3378
3564
  rootLabel: "Root",
3379
3565
  statusLabel: "Status",
3380
3566
  tbotVersionLabel: "opencode-tbot Version",
3381
- voiceRecognitionConfiguredLabel: "configured",
3382
3567
  voiceRecognitionLabel: "Voice Recognition",
3383
- voiceRecognitionNotConfiguredLabel: "not configured",
3384
3568
  workspaceTitle: "📁 Workspace"
3385
3569
  };
3386
3570
  return {
@@ -3403,9 +3587,7 @@ function getStatusLayoutCopy(copy) {
3403
3587
  rootLabel: "根目录",
3404
3588
  statusLabel: "状态",
3405
3589
  tbotVersionLabel: "opencode-tbot版本",
3406
- voiceRecognitionConfiguredLabel: "已配置",
3407
3590
  voiceRecognitionLabel: "语音识别",
3408
- voiceRecognitionNotConfiguredLabel: "未配置",
3409
3591
  workspaceTitle: "📁 工作区"
3410
3592
  };
3411
3593
  }