@inceptionstack/roundhouse 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +321 -9
  2. package/architecture.md +77 -8
  3. package/package.json +3 -1
  4. package/src/agents/pi.ts +433 -26
  5. package/src/agents/registry.ts +8 -0
  6. package/src/cli/cli.ts +384 -189
  7. package/src/cli/cron.ts +296 -0
  8. package/src/cli/doctor/checks/agent.ts +68 -0
  9. package/src/cli/doctor/checks/config.ts +88 -0
  10. package/src/cli/doctor/checks/credentials.ts +62 -0
  11. package/src/cli/doctor/checks/disk.ts +69 -0
  12. package/src/cli/doctor/checks/stt.ts +76 -0
  13. package/src/cli/doctor/checks/system.ts +86 -0
  14. package/src/cli/doctor/checks/systemd.ts +76 -0
  15. package/src/cli/doctor/output.ts +58 -0
  16. package/src/cli/doctor/runner.ts +142 -0
  17. package/src/cli/doctor/shell.ts +33 -0
  18. package/src/cli/doctor/types.ts +44 -0
  19. package/src/cli/doctor.ts +48 -0
  20. package/src/cli/setup-telegram.ts +148 -0
  21. package/src/cli/setup.ts +936 -0
  22. package/src/commands.ts +23 -0
  23. package/src/config.ts +188 -0
  24. package/src/cron/constants.ts +54 -0
  25. package/src/cron/durations.ts +33 -0
  26. package/src/cron/format.ts +139 -0
  27. package/src/cron/helpers.ts +30 -0
  28. package/src/cron/runner.ts +148 -0
  29. package/src/cron/schedule.ts +101 -0
  30. package/src/cron/scheduler.ts +295 -0
  31. package/src/cron/store.ts +125 -0
  32. package/src/cron/template.ts +89 -0
  33. package/src/cron/types.ts +76 -0
  34. package/src/gateway.ts +927 -18
  35. package/src/index.ts +1 -58
  36. package/src/memory/bootstrap.ts +98 -0
  37. package/src/memory/files.ts +100 -0
  38. package/src/memory/inject.ts +41 -0
  39. package/src/memory/lifecycle.ts +245 -0
  40. package/src/memory/policy.ts +122 -0
  41. package/src/memory/prompts.ts +42 -0
  42. package/src/memory/state.ts +43 -0
  43. package/src/memory/types.ts +90 -0
  44. package/src/notify/telegram.ts +48 -0
  45. package/src/types.ts +68 -1
  46. package/src/util.ts +28 -2
  47. package/src/voice/providers/whisper.ts +339 -0
  48. package/src/voice/stt-service.ts +284 -0
  49. package/src/voice/types.ts +63 -0
@@ -0,0 +1,339 @@
1
+ /**
2
+ * voice/providers/whisper.ts — Local Whisper STT provider
3
+ *
4
+ * Runs the whisper CLI via child_process. Auto-detects language.
5
+ * Can auto-install whisper via pip3 and warm the model on first use.
6
+ */
7
+
8
+ import { execFile } from "node:child_process";
9
+ import { access, constants, rm, readFile, readdir, writeFile } from "node:fs/promises";
10
+ import { join } from "node:path";
11
+ import { homedir } from "node:os";
12
+ import { mkdirSync } from "node:fs";
13
+ import { randomBytes } from "node:crypto";
14
+ import type { SttProvider, SttInput, TranscriptionResult, SttProviderConfig } from "../types";
15
+
16
+ // ── Binary discovery ─────────────────────────────────
17
+
18
+ const WHISPER_PATHS = [
19
+ join(homedir(), ".local", "bin", "whisper"),
20
+ "/usr/local/bin/whisper",
21
+ "/usr/bin/whisper",
22
+ ];
23
+
24
+ let cachedBinaryPath: string | null | undefined; // undefined = not checked yet
25
+
26
+ async function findWhisperBinary(): Promise<string | null> {
27
+ if (cachedBinaryPath !== undefined) return cachedBinaryPath;
28
+
29
+ for (const p of WHISPER_PATHS) {
30
+ try {
31
+ await access(p, constants.X_OK);
32
+ cachedBinaryPath = p;
33
+ return p;
34
+ } catch {}
35
+ }
36
+ cachedBinaryPath = null;
37
+ return null;
38
+ }
39
+
40
+ /** Reset cached path so next findWhisperBinary() re-scans */
41
+ function invalidateCache(): void {
42
+ cachedBinaryPath = undefined;
43
+ }
44
+
45
+ // ── Auto-install ─────────────────────────────────────
46
+
47
+ let pipAvailable: boolean | undefined;
48
+
49
+ async function checkPip(): Promise<boolean> {
50
+ if (pipAvailable !== undefined) return pipAvailable;
51
+ return new Promise<boolean>((resolve) => {
52
+ execFile("pip3", ["--version"], { timeout: 5000 }, (err) => {
53
+ pipAvailable = !err;
54
+ resolve(pipAvailable);
55
+ });
56
+ });
57
+ }
58
+
59
+ /**
60
+ * Install whisper via pip3 --user. Returns the binary path or null on failure.
61
+ */
62
+ async function installWhisperWithPip(): Promise<string | null> {
63
+ if (!(await checkPip())) {
64
+ console.warn("[stt/whisper] pip3 not available — cannot auto-install whisper");
65
+ return null;
66
+ }
67
+
68
+ console.log("[stt/whisper] installing openai-whisper via pip3...");
69
+ return new Promise<string | null>((resolve) => {
70
+ execFile(
71
+ "pip3",
72
+ ["install", "--user", "openai-whisper"],
73
+ {
74
+ timeout: 300_000, // 5 min for install
75
+ maxBuffer: 10 * 1024 * 1024, // 10MB for pip output
76
+ env: { ...process.env },
77
+ },
78
+ async (err, stdout, stderr) => {
79
+ if (err) {
80
+ console.error("[stt/whisper] pip3 install failed:", err.message);
81
+ if (stderr) console.error("[stt/whisper] stderr:", stderr.slice(0, 500));
82
+ resolve(null);
83
+ return;
84
+ }
85
+ console.log("[stt/whisper] pip3 install succeeded");
86
+
87
+ // Re-discover binary
88
+ invalidateCache();
89
+ const binary = await findWhisperBinary();
90
+ if (!binary) {
91
+ console.error("[stt/whisper] installed but binary not found in expected paths");
92
+ resolve(null);
93
+ return;
94
+ }
95
+
96
+ // Validate with --help
97
+ execFile(binary, ["--help"], { timeout: 10_000 }, (helpErr) => {
98
+ if (helpErr) {
99
+ console.error("[stt/whisper] binary found but --help failed:", helpErr.message);
100
+ resolve(null);
101
+ } else {
102
+ console.log(`[stt/whisper] validated binary at ${binary}`);
103
+ resolve(binary);
104
+ }
105
+ });
106
+ },
107
+ );
108
+ });
109
+ }
110
+
111
+ /**
112
+ * Warm the whisper model by running a tiny transcription.
113
+ * This forces the model download (~461MB for small).
114
+ */
115
+ async function warmWhisperModel(binary: string, model: string): Promise<boolean> {
116
+ const warmupDir = join(homedir(), ".roundhouse", "whisper-warmup", randomBytes(4).toString("hex"));
117
+ mkdirSync(warmupDir, { recursive: true });
118
+
119
+ // Generate a tiny silent WAV file (1 second, 16kHz, mono, 16-bit)
120
+ const sampleRate = 16000;
121
+ const numSamples = sampleRate; // 1 second
122
+ const dataSize = numSamples * 2; // 16-bit = 2 bytes per sample
123
+ const buf = Buffer.alloc(44 + dataSize);
124
+ // WAV header
125
+ buf.write("RIFF", 0);
126
+ buf.writeUInt32LE(36 + dataSize, 4);
127
+ buf.write("WAVE", 8);
128
+ buf.write("fmt ", 12);
129
+ buf.writeUInt32LE(16, 16); // PCM format chunk size
130
+ buf.writeUInt16LE(1, 20); // PCM format
131
+ buf.writeUInt16LE(1, 22); // mono
132
+ buf.writeUInt32LE(sampleRate, 24);
133
+ buf.writeUInt32LE(sampleRate * 2, 28); // byte rate
134
+ buf.writeUInt16LE(2, 32); // block align
135
+ buf.writeUInt16LE(16, 34); // bits per sample
136
+ buf.write("data", 36);
137
+ buf.writeUInt32LE(dataSize, 40);
138
+ // Data is all zeros (silence)
139
+
140
+ const wavPath = join(warmupDir, "silence.wav");
141
+ await writeFile(wavPath, buf);
142
+
143
+ console.log(`[stt/whisper] warming model '${model}' (may download ~461MB)...`);
144
+
145
+ return new Promise<boolean>((resolve) => {
146
+ execFile(
147
+ binary,
148
+ [wavPath, "--model", model, "--output_format", "json", "--output_dir", warmupDir],
149
+ {
150
+ timeout: 600_000, // 10 min for model download + first run
151
+ env: {
152
+ ...process.env,
153
+ PATH: `${join(homedir(), ".local", "bin")}:${process.env.PATH}`,
154
+ },
155
+ },
156
+ async (err) => {
157
+ // Clean up warmup files
158
+ try { await rm(warmupDir, { recursive: true }); } catch {}
159
+
160
+ if (err) {
161
+ console.warn(`[stt/whisper] model warmup failed: ${err.message}`);
162
+ resolve(false);
163
+ } else {
164
+ console.log(`[stt/whisper] model '${model}' ready`);
165
+ resolve(true);
166
+ }
167
+ },
168
+ );
169
+ });
170
+ }
171
+
172
+ // ── Provider ─────────────────────────────────────────
173
+
174
+ /** Extended provider with install capability */
175
+ export interface InstallableWhisperProvider extends SttProvider {
176
+ ensureInstalled(): Promise<boolean>;
177
+ }
178
+
179
+ // Singleton promises to prevent concurrent installs
180
+ let installPromise: Promise<string | null> | null = null;
181
+ let installFailed = false; // sticky failure to prevent retry spam
182
+
183
+ export function createWhisperProvider(config: SttProviderConfig): InstallableWhisperProvider {
184
+ const model = (config.model as string) ?? "small";
185
+ const timeoutMs = config.timeoutMs ?? 30000;
186
+ const autoInstall = config.autoInstall === true; // explicit opt-in only
187
+ let modelWarmed = false;
188
+ let warmFailed = false; // sticky failure to prevent warmup retry spam
189
+ let warmPromise: Promise<boolean> | null = null;
190
+
191
+ const WHISPER_LANGS = new Set(["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","yue","zh"]);
192
+
193
+ async function getBinary(): Promise<string | null> {
194
+ // Check if already available
195
+ const existing = await findWhisperBinary();
196
+ if (existing) return existing;
197
+
198
+ // Try auto-install
199
+ if (!autoInstall) return null;
200
+ if (installFailed) return null; // sticky failure — don't retry every message
201
+
202
+ // Singleton: join existing install or start new one
203
+ if (!installPromise) {
204
+ installPromise = installWhisperWithPip().then((result) => {
205
+ if (!result) installFailed = true;
206
+ return result;
207
+ }).finally(() => {
208
+ installPromise = null;
209
+ });
210
+ }
211
+ return installPromise;
212
+ }
213
+
214
+ return {
215
+ name: `whisper-${model}`,
216
+
217
+ canTranscribe(input: SttInput): boolean {
218
+ return input.mime.startsWith("audio/");
219
+ },
220
+
221
+ async ensureInstalled(): Promise<boolean> {
222
+ const binary = await getBinary();
223
+ if (!binary) return false;
224
+
225
+ // Warm model with singleton promise
226
+ if (!modelWarmed && !warmFailed) {
227
+ if (!warmPromise) {
228
+ warmPromise = (async () => {
229
+ // Check if model already cached
230
+ const modelDir = join(homedir(), ".cache", "whisper");
231
+ try {
232
+ const files = await readdir(modelDir);
233
+ if (files.some((f) => f.startsWith(model) && f.includes("."))) {
234
+ modelWarmed = true;
235
+ return true;
236
+ }
237
+ } catch {}
238
+
239
+ // Run warmup — catch everything so it never rejects
240
+ try {
241
+ const ok = await warmWhisperModel(binary, model);
242
+ if (!ok) warmFailed = true;
243
+ modelWarmed = ok;
244
+ return ok;
245
+ } catch (err) {
246
+ console.warn(`[stt/whisper] warmup error: ${(err as Error).message}`);
247
+ warmFailed = true;
248
+ modelWarmed = false;
249
+ return false;
250
+ }
251
+ })().finally(() => { warmPromise = null; });
252
+ }
253
+ await warmPromise;
254
+ }
255
+ return modelWarmed;
256
+ },
257
+
258
+ async transcribe(input: SttInput): Promise<TranscriptionResult> {
259
+ const binary = await getBinary();
260
+ if (!binary) {
261
+ throw new Error("whisper not available and auto-install failed");
262
+ }
263
+
264
+ const outputDir = join(homedir(), ".roundhouse", "whisper-tmp", randomBytes(6).toString("hex"));
265
+ mkdirSync(outputDir, { recursive: true });
266
+
267
+ const audioPath = input.localPath.startsWith("-") ? `./${input.localPath}` : input.localPath;
268
+
269
+ const args = [
270
+ audioPath,
271
+ "--model", model,
272
+ "--output_format", "json",
273
+ "--output_dir", outputDir,
274
+ ];
275
+
276
+ if (input.hint?.language && WHISPER_LANGS.has(input.hint.language)) {
277
+ args.push("--language", input.hint.language);
278
+ }
279
+
280
+ return new Promise<TranscriptionResult>((resolve, reject) => {
281
+ execFile(
282
+ binary,
283
+ args,
284
+ {
285
+ timeout: timeoutMs,
286
+ env: {
287
+ ...process.env,
288
+ PATH: `${join(homedir(), ".local", "bin")}:${process.env.PATH}`,
289
+ },
290
+ },
291
+ async (error, _stdout, stderr) => {
292
+ const cleanup = async () => {
293
+ try { await rm(outputDir, { recursive: true }); } catch {}
294
+ };
295
+
296
+ if (error) {
297
+ await cleanup();
298
+ reject(new Error(`whisper failed: ${error.message}`));
299
+ return;
300
+ }
301
+
302
+ try {
303
+ const files = await readdir(outputDir);
304
+ const jsonFile = files.find((f) => f.endsWith(".json"));
305
+ if (!jsonFile) {
306
+ await cleanup();
307
+ reject(new Error("whisper produced no JSON output"));
308
+ return;
309
+ }
310
+ const raw = await readFile(join(outputDir, jsonFile), "utf8");
311
+ const result = JSON.parse(raw);
312
+
313
+ await cleanup();
314
+
315
+ let language: string | undefined;
316
+ const langMatch = stderr.match(/Detected language:\s*(\w+)/);
317
+ if (langMatch) language = langMatch[1].toLowerCase();
318
+ if (result.language) language = result.language;
319
+
320
+ const text = (result.text ?? "").trim();
321
+ if (!text) {
322
+ reject(new Error("whisper returned empty transcript"));
323
+ return;
324
+ }
325
+
326
+ // Mark model as warmed after successful transcription
327
+ modelWarmed = true;
328
+
329
+ resolve({ text, language, approximate: true });
330
+ } catch (err) {
331
+ await cleanup();
332
+ reject(new Error(`whisper output parse failed: ${(err as Error).message}`));
333
+ }
334
+ },
335
+ );
336
+ });
337
+ },
338
+ };
339
+ }
@@ -0,0 +1,284 @@
1
+ /**
2
+ * voice/stt-service.ts — Speech-to-text service
3
+ *
4
+ * Manages provider chain, timeouts, and graceful fallback.
5
+ * Never throws — returns null on all failures.
6
+ */
7
+
8
+ import type { SttProvider, SttInput, SttConfig, AttachmentTranscript } from "./types";
9
+ import type { MessageAttachment } from "../types";
10
+ import { createWhisperProvider, type InstallableWhisperProvider } from "./providers/whisper";
11
+
12
+ // Provider factory registry
13
+ const PROVIDER_FACTORIES: Record<string, (config: any) => SttProvider> = {
14
+ whisper: createWhisperProvider,
15
+ };
16
+
17
+ export class SttService {
18
+ private providers: SttProvider[] = [];
19
+ private config: SttConfig;
20
+ private initPromise: Promise<void> | null = null;
21
+ private activeStt: Promise<void> = Promise.resolve(); // global concurrency: 1 at a time
22
+ private installNoticeSent = false;
23
+
24
+ constructor(config: SttConfig) {
25
+ this.config = config;
26
+ }
27
+
28
+ private async ensureInitialized(): Promise<void> {
29
+ if (!this.initPromise) {
30
+ this.initPromise = this.doInit().catch((err) => {
31
+ this.initPromise = null; // retry on next call
32
+ throw err;
33
+ });
34
+ }
35
+ await this.initPromise;
36
+ }
37
+
38
+ private async doInit(): Promise<void> {
39
+ for (const providerName of this.config.chain) {
40
+ const providerConfig = this.config.providers[providerName];
41
+ if (!providerConfig) {
42
+ console.warn(`[stt] provider "${providerName}" in chain but not configured, skipping`);
43
+ continue;
44
+ }
45
+
46
+ const type = providerConfig.type;
47
+ const factory = PROVIDER_FACTORIES[type];
48
+
49
+ if (!factory) {
50
+ console.warn(`[stt] unknown provider type "${type}", skipping`);
51
+ continue;
52
+ }
53
+
54
+ try {
55
+ // Pass autoInstall from service-level config into provider config
56
+ const mergedProviderConfig = {
57
+ ...providerConfig,
58
+ autoInstall: providerConfig.autoInstall ?? this.config.autoInstall ?? false,
59
+ };
60
+ this.providers.push(factory(mergedProviderConfig));
61
+ console.log(`[stt] loaded provider: ${providerName} (${type})`);
62
+ } catch (err) {
63
+ console.warn(`[stt] failed to create provider "${providerName}":`, (err as Error).message);
64
+ }
65
+ }
66
+
67
+ if (this.providers.length === 0) {
68
+ console.warn(`[stt] no providers available — transcription disabled`);
69
+ }
70
+ }
71
+
72
+ /**
73
+ * Prepare providers in background (install + warm model).
74
+ * Called from gateway.start() — non-blocking, never throws.
75
+ */
76
+ async prepareInBackground(): Promise<void> {
77
+ try {
78
+ await this.ensureInitialized();
79
+ } catch {
80
+ return;
81
+ }
82
+
83
+ for (const provider of this.providers) {
84
+ if ("ensureInstalled" in provider && typeof (provider as any).ensureInstalled === "function") {
85
+ try {
86
+ const installable = provider as InstallableWhisperProvider;
87
+ const ok = await installable.ensureInstalled();
88
+ if (ok) {
89
+ console.log(`[stt] ${provider.name} ready (installed + model warmed)`);
90
+ } else {
91
+ console.warn(`[stt] ${provider.name} not available after prepare`);
92
+ }
93
+ } catch (err) {
94
+ console.warn(`[stt] ${provider.name} prepare failed:`, (err as Error).message);
95
+ }
96
+ }
97
+ }
98
+ }
99
+
100
+ /** Should this attachment be auto-transcribed? */
101
+ shouldTranscribe(attachment: MessageAttachment): boolean {
102
+ if (!this.config.enabled || this.config.mode === "off") return false;
103
+
104
+ const auto = this.config.autoTranscribe ?? { voiceMessages: true, audioFiles: false, maxDurationSec: 120 };
105
+
106
+ // Only audio
107
+ if (attachment.mediaType !== "audio") return false;
108
+ if (!attachment.mime.startsWith("audio/")) return false;
109
+
110
+ // Voice messages (ogg/opus from Telegram) vs general audio files
111
+ const isVoiceMessage = attachment.mime === "audio/ogg" && attachment.name.endsWith(".ogg");
112
+ if (isVoiceMessage && auto.voiceMessages) return true;
113
+ if (!isVoiceMessage && auto.audioFiles) return true;
114
+
115
+ return false;
116
+ }
117
+
118
+ /**
119
+ * Try to transcribe an attachment using the provider chain.
120
+ * Returns null on all failures — never throws to callers.
121
+ */
122
+ async tryTranscribe(
123
+ attachment: MessageAttachment,
124
+ languageHint?: string,
125
+ notify?: (text: string) => Promise<void>,
126
+ ): Promise<AttachmentTranscript | null> {
127
+ try {
128
+ await this.ensureInitialized();
129
+ } catch (err) {
130
+ console.warn(`[stt] initialization failed:`, (err as Error).message);
131
+ return null;
132
+ }
133
+
134
+ if (this.providers.length === 0) return null;
135
+ if (!this.shouldTranscribe(attachment)) return null;
136
+
137
+ // Check duration limit using ffprobe
138
+ const maxDuration = this.config.autoTranscribe?.maxDurationSec ?? 120;
139
+ if (maxDuration > 0) {
140
+ try {
141
+ const duration = await getAudioDuration(attachment.localPath);
142
+ if (duration !== null && duration > maxDuration) {
143
+ console.log(`[stt] skipping ${attachment.name}: duration ${duration.toFixed(1)}s exceeds ${maxDuration}s limit`);
144
+ return null;
145
+ }
146
+ } catch {}
147
+ }
148
+
149
+ const input: SttInput = {
150
+ localPath: attachment.localPath,
151
+ mime: attachment.mime,
152
+ sizeBytes: attachment.sizeBytes,
153
+ hint: {
154
+ language: languageHint,
155
+ isVoiceMessage: attachment.mime === "audio/ogg",
156
+ },
157
+ };
158
+
159
+ const startTime = Date.now();
160
+
161
+ // Global concurrency limit: one transcription at a time to prevent CPU stampede
162
+ // Promise executor runs synchronously per spec, so release is always assigned before await
163
+ const prev = this.activeStt;
164
+ let release: () => void;
165
+ this.activeStt = new Promise<void>((r) => { release = r; });
166
+ await prev;
167
+
168
+ try {
169
+ for (const provider of this.providers) {
170
+ if (!provider.canTranscribe(input)) continue;
171
+
172
+ // Ensure provider is installed (with one-time user notification)
173
+ const installable = provider as InstallableWhisperProvider;
174
+ if (installable.ensureInstalled && typeof installable.ensureInstalled === "function") {
175
+ try {
176
+ const isReady = await installable.ensureInstalled();
177
+ if (!isReady) {
178
+ if (!this.installNoticeSent && notify) {
179
+ this.installNoticeSent = true;
180
+ try { await notify("🎤 Voice transcription not available. Whisper install or model download failed."); } catch {}
181
+ }
182
+ continue;
183
+ }
184
+ } catch {
185
+ continue;
186
+ }
187
+ }
188
+
189
+ try {
190
+ console.log(`[stt] trying ${provider.name} for ${attachment.name}...`);
191
+ const result = await provider.transcribe(input);
192
+ const durationMs = Date.now() - startTime;
193
+
194
+ console.log(`[stt] ${provider.name} succeeded in ${durationMs}ms: "${result.text.slice(0, 80)}"`);
195
+
196
+ return {
197
+ text: result.text,
198
+ provider: provider.name,
199
+ language: result.language,
200
+ confidence: result.confidence,
201
+ approximate: true,
202
+ status: "completed" as const,
203
+ durationMs,
204
+ };
205
+ } catch (err) {
206
+ console.warn(`[stt] ${provider.name} failed:`, (err as Error).message);
207
+ continue;
208
+ }
209
+ }
210
+
211
+ // All providers failed
212
+ return {
213
+ text: "",
214
+ provider: "none",
215
+ approximate: true,
216
+ status: "failed" as const,
217
+ error: "All STT providers failed",
218
+ durationMs: Date.now() - startTime,
219
+ };
220
+ } finally {
221
+ release!();
222
+ }
223
+ }
224
+ }
225
+
226
+ /**
227
+ * Enrich audio attachments with transcripts.
228
+ * Mutates the attachments array in-place.
229
+ */
230
+ export async function enrichAttachmentsWithTranscripts(
231
+ attachments: MessageAttachment[],
232
+ sttService: SttService | null,
233
+ notify?: (text: string) => Promise<void>,
234
+ ): Promise<void> {
235
+ if (!sttService) return;
236
+
237
+ for (const att of attachments) {
238
+ try {
239
+ const transcript = await sttService.tryTranscribe(att, undefined, notify);
240
+ if (transcript) {
241
+ att.transcript = transcript;
242
+ }
243
+ } catch (err) {
244
+ console.error(`[stt] unexpected error transcribing ${att.name}:`, (err as Error).message);
245
+ }
246
+ }
247
+ }
248
+
249
+ /** Get audio duration using ffprobe. Returns null if ffprobe is unavailable. */
250
+ async function getAudioDuration(filePath: string): Promise<number | null> {
251
+ const { execFile: exec } = await import("node:child_process");
252
+ return new Promise((resolve) => {
253
+ exec(
254
+ "ffprobe",
255
+ ["-i", filePath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
256
+ { timeout: 5000 },
257
+ (error, stdout) => {
258
+ if (error) return resolve(null);
259
+ const dur = parseFloat(stdout.trim());
260
+ resolve(isNaN(dur) ? null : dur);
261
+ },
262
+ );
263
+ });
264
+ }
265
+
266
+ /** Default STT config */
267
+ export const DEFAULT_STT_CONFIG: SttConfig = {
268
+ enabled: true,
269
+ mode: "on",
270
+ autoInstall: true,
271
+ chain: ["whisper"],
272
+ autoTranscribe: {
273
+ voiceMessages: true,
274
+ audioFiles: false,
275
+ maxDurationSec: 120,
276
+ },
277
+ providers: {
278
+ whisper: {
279
+ type: "whisper",
280
+ model: "small",
281
+ timeoutMs: 30000,
282
+ },
283
+ },
284
+ };
@@ -0,0 +1,63 @@
1
+ /**
2
+ * voice/types.ts — Shared types for voice STT/TTS
3
+ */
4
+
5
+ // ── STT (Speech-to-Text) ────────────────────────────
6
+
7
+ export interface SttProvider {
8
+ readonly name: string;
9
+ canTranscribe(input: SttInput): boolean;
10
+ transcribe(input: SttInput): Promise<TranscriptionResult>;
11
+ }
12
+
13
+ export interface SttInput {
14
+ localPath: string;
15
+ mime: string;
16
+ sizeBytes: number;
17
+ hint?: {
18
+ language?: string;
19
+ isVoiceMessage?: boolean;
20
+ };
21
+ }
22
+
23
+ export interface TranscriptionResult {
24
+ text: string;
25
+ language?: string;
26
+ confidence?: number;
27
+ approximate: true;
28
+ }
29
+
30
+ // ── Attachment transcript (stored on MessageAttachment) ──
31
+
32
+ export interface AttachmentTranscript {
33
+ text: string;
34
+ provider: string;
35
+ language?: string;
36
+ confidence?: number;
37
+ approximate: true;
38
+ status: "completed" | "failed";
39
+ error?: string;
40
+ durationMs?: number;
41
+ }
42
+
43
+ // ── STT config ───────────────────────────────────────
44
+
45
+ export interface SttProviderConfig {
46
+ type: string;
47
+ timeoutMs?: number;
48
+ autoInstall?: boolean;
49
+ [key: string]: unknown;
50
+ }
51
+
52
+ export interface SttConfig {
53
+ enabled: boolean;
54
+ mode: "on" | "off";
55
+ autoInstall?: boolean;
56
+ chain: string[];
57
+ autoTranscribe: {
58
+ voiceMessages: boolean;
59
+ audioFiles: boolean;
60
+ maxDurationSec: number;
61
+ };
62
+ providers: Record<string, SttProviderConfig>;
63
+ }