@inceptionstack/roundhouse 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +321 -9
- package/architecture.md +77 -8
- package/package.json +3 -1
- package/src/agents/pi.ts +433 -26
- package/src/agents/registry.ts +8 -0
- package/src/cli/cli.ts +384 -189
- package/src/cli/cron.ts +296 -0
- package/src/cli/doctor/checks/agent.ts +68 -0
- package/src/cli/doctor/checks/config.ts +88 -0
- package/src/cli/doctor/checks/credentials.ts +62 -0
- package/src/cli/doctor/checks/disk.ts +69 -0
- package/src/cli/doctor/checks/stt.ts +76 -0
- package/src/cli/doctor/checks/system.ts +86 -0
- package/src/cli/doctor/checks/systemd.ts +76 -0
- package/src/cli/doctor/output.ts +58 -0
- package/src/cli/doctor/runner.ts +142 -0
- package/src/cli/doctor/shell.ts +33 -0
- package/src/cli/doctor/types.ts +44 -0
- package/src/cli/doctor.ts +48 -0
- package/src/cli/setup-telegram.ts +148 -0
- package/src/cli/setup.ts +936 -0
- package/src/commands.ts +23 -0
- package/src/config.ts +188 -0
- package/src/cron/constants.ts +54 -0
- package/src/cron/durations.ts +33 -0
- package/src/cron/format.ts +139 -0
- package/src/cron/helpers.ts +30 -0
- package/src/cron/runner.ts +148 -0
- package/src/cron/schedule.ts +101 -0
- package/src/cron/scheduler.ts +295 -0
- package/src/cron/store.ts +125 -0
- package/src/cron/template.ts +89 -0
- package/src/cron/types.ts +76 -0
- package/src/gateway.ts +927 -18
- package/src/index.ts +1 -58
- package/src/memory/bootstrap.ts +98 -0
- package/src/memory/files.ts +100 -0
- package/src/memory/inject.ts +41 -0
- package/src/memory/lifecycle.ts +245 -0
- package/src/memory/policy.ts +122 -0
- package/src/memory/prompts.ts +42 -0
- package/src/memory/state.ts +43 -0
- package/src/memory/types.ts +90 -0
- package/src/notify/telegram.ts +48 -0
- package/src/types.ts +68 -1
- package/src/util.ts +28 -2
- package/src/voice/providers/whisper.ts +339 -0
- package/src/voice/stt-service.ts +284 -0
- package/src/voice/types.ts +63 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* voice/providers/whisper.ts — Local Whisper STT provider
|
|
3
|
+
*
|
|
4
|
+
* Runs the whisper CLI via child_process. Auto-detects language.
|
|
5
|
+
* Can auto-install whisper via pip3 and warm the model on first use.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { execFile } from "node:child_process";
|
|
9
|
+
import { access, constants, rm, readFile, readdir, writeFile } from "node:fs/promises";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import { homedir } from "node:os";
|
|
12
|
+
import { mkdirSync } from "node:fs";
|
|
13
|
+
import { randomBytes } from "node:crypto";
|
|
14
|
+
import type { SttProvider, SttInput, TranscriptionResult, SttProviderConfig } from "../types";
|
|
15
|
+
|
|
16
|
+
// ── Binary discovery ─────────────────────────────────
|
|
17
|
+
|
|
18
|
+
const WHISPER_PATHS = [
|
|
19
|
+
join(homedir(), ".local", "bin", "whisper"),
|
|
20
|
+
"/usr/local/bin/whisper",
|
|
21
|
+
"/usr/bin/whisper",
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
let cachedBinaryPath: string | null | undefined; // undefined = not checked yet
|
|
25
|
+
|
|
26
|
+
async function findWhisperBinary(): Promise<string | null> {
|
|
27
|
+
if (cachedBinaryPath !== undefined) return cachedBinaryPath;
|
|
28
|
+
|
|
29
|
+
for (const p of WHISPER_PATHS) {
|
|
30
|
+
try {
|
|
31
|
+
await access(p, constants.X_OK);
|
|
32
|
+
cachedBinaryPath = p;
|
|
33
|
+
return p;
|
|
34
|
+
} catch {}
|
|
35
|
+
}
|
|
36
|
+
cachedBinaryPath = null;
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Reset cached path so next findWhisperBinary() re-scans */
|
|
41
|
+
function invalidateCache(): void {
|
|
42
|
+
cachedBinaryPath = undefined;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// ── Auto-install ─────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
let pipAvailable: boolean | undefined;
|
|
48
|
+
|
|
49
|
+
async function checkPip(): Promise<boolean> {
|
|
50
|
+
if (pipAvailable !== undefined) return pipAvailable;
|
|
51
|
+
return new Promise<boolean>((resolve) => {
|
|
52
|
+
execFile("pip3", ["--version"], { timeout: 5000 }, (err) => {
|
|
53
|
+
pipAvailable = !err;
|
|
54
|
+
resolve(pipAvailable);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Install whisper via pip3 --user. Returns the binary path or null on failure.
|
|
61
|
+
*/
|
|
62
|
+
async function installWhisperWithPip(): Promise<string | null> {
|
|
63
|
+
if (!(await checkPip())) {
|
|
64
|
+
console.warn("[stt/whisper] pip3 not available — cannot auto-install whisper");
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
console.log("[stt/whisper] installing openai-whisper via pip3...");
|
|
69
|
+
return new Promise<string | null>((resolve) => {
|
|
70
|
+
execFile(
|
|
71
|
+
"pip3",
|
|
72
|
+
["install", "--user", "openai-whisper"],
|
|
73
|
+
{
|
|
74
|
+
timeout: 300_000, // 5 min for install
|
|
75
|
+
maxBuffer: 10 * 1024 * 1024, // 10MB for pip output
|
|
76
|
+
env: { ...process.env },
|
|
77
|
+
},
|
|
78
|
+
async (err, stdout, stderr) => {
|
|
79
|
+
if (err) {
|
|
80
|
+
console.error("[stt/whisper] pip3 install failed:", err.message);
|
|
81
|
+
if (stderr) console.error("[stt/whisper] stderr:", stderr.slice(0, 500));
|
|
82
|
+
resolve(null);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
console.log("[stt/whisper] pip3 install succeeded");
|
|
86
|
+
|
|
87
|
+
// Re-discover binary
|
|
88
|
+
invalidateCache();
|
|
89
|
+
const binary = await findWhisperBinary();
|
|
90
|
+
if (!binary) {
|
|
91
|
+
console.error("[stt/whisper] installed but binary not found in expected paths");
|
|
92
|
+
resolve(null);
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Validate with --help
|
|
97
|
+
execFile(binary, ["--help"], { timeout: 10_000 }, (helpErr) => {
|
|
98
|
+
if (helpErr) {
|
|
99
|
+
console.error("[stt/whisper] binary found but --help failed:", helpErr.message);
|
|
100
|
+
resolve(null);
|
|
101
|
+
} else {
|
|
102
|
+
console.log(`[stt/whisper] validated binary at ${binary}`);
|
|
103
|
+
resolve(binary);
|
|
104
|
+
}
|
|
105
|
+
});
|
|
106
|
+
},
|
|
107
|
+
);
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Warm the whisper model by running a tiny transcription.
|
|
113
|
+
* This forces the model download (~461MB for small).
|
|
114
|
+
*/
|
|
115
|
+
async function warmWhisperModel(binary: string, model: string): Promise<boolean> {
|
|
116
|
+
const warmupDir = join(homedir(), ".roundhouse", "whisper-warmup", randomBytes(4).toString("hex"));
|
|
117
|
+
mkdirSync(warmupDir, { recursive: true });
|
|
118
|
+
|
|
119
|
+
// Generate a tiny silent WAV file (1 second, 16kHz, mono, 16-bit)
|
|
120
|
+
const sampleRate = 16000;
|
|
121
|
+
const numSamples = sampleRate; // 1 second
|
|
122
|
+
const dataSize = numSamples * 2; // 16-bit = 2 bytes per sample
|
|
123
|
+
const buf = Buffer.alloc(44 + dataSize);
|
|
124
|
+
// WAV header
|
|
125
|
+
buf.write("RIFF", 0);
|
|
126
|
+
buf.writeUInt32LE(36 + dataSize, 4);
|
|
127
|
+
buf.write("WAVE", 8);
|
|
128
|
+
buf.write("fmt ", 12);
|
|
129
|
+
buf.writeUInt32LE(16, 16); // PCM format chunk size
|
|
130
|
+
buf.writeUInt16LE(1, 20); // PCM format
|
|
131
|
+
buf.writeUInt16LE(1, 22); // mono
|
|
132
|
+
buf.writeUInt32LE(sampleRate, 24);
|
|
133
|
+
buf.writeUInt32LE(sampleRate * 2, 28); // byte rate
|
|
134
|
+
buf.writeUInt16LE(2, 32); // block align
|
|
135
|
+
buf.writeUInt16LE(16, 34); // bits per sample
|
|
136
|
+
buf.write("data", 36);
|
|
137
|
+
buf.writeUInt32LE(dataSize, 40);
|
|
138
|
+
// Data is all zeros (silence)
|
|
139
|
+
|
|
140
|
+
const wavPath = join(warmupDir, "silence.wav");
|
|
141
|
+
await writeFile(wavPath, buf);
|
|
142
|
+
|
|
143
|
+
console.log(`[stt/whisper] warming model '${model}' (may download ~461MB)...`);
|
|
144
|
+
|
|
145
|
+
return new Promise<boolean>((resolve) => {
|
|
146
|
+
execFile(
|
|
147
|
+
binary,
|
|
148
|
+
[wavPath, "--model", model, "--output_format", "json", "--output_dir", warmupDir],
|
|
149
|
+
{
|
|
150
|
+
timeout: 600_000, // 10 min for model download + first run
|
|
151
|
+
env: {
|
|
152
|
+
...process.env,
|
|
153
|
+
PATH: `${join(homedir(), ".local", "bin")}:${process.env.PATH}`,
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
async (err) => {
|
|
157
|
+
// Clean up warmup files
|
|
158
|
+
try { await rm(warmupDir, { recursive: true }); } catch {}
|
|
159
|
+
|
|
160
|
+
if (err) {
|
|
161
|
+
console.warn(`[stt/whisper] model warmup failed: ${err.message}`);
|
|
162
|
+
resolve(false);
|
|
163
|
+
} else {
|
|
164
|
+
console.log(`[stt/whisper] model '${model}' ready`);
|
|
165
|
+
resolve(true);
|
|
166
|
+
}
|
|
167
|
+
},
|
|
168
|
+
);
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Provider ─────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
/** Extended provider with install capability */
|
|
175
|
+
export interface InstallableWhisperProvider extends SttProvider {
|
|
176
|
+
ensureInstalled(): Promise<boolean>;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Singleton promises to prevent concurrent installs
|
|
180
|
+
let installPromise: Promise<string | null> | null = null;
|
|
181
|
+
let installFailed = false; // sticky failure to prevent retry spam
|
|
182
|
+
|
|
183
|
+
export function createWhisperProvider(config: SttProviderConfig): InstallableWhisperProvider {
|
|
184
|
+
const model = (config.model as string) ?? "small";
|
|
185
|
+
const timeoutMs = config.timeoutMs ?? 30000;
|
|
186
|
+
const autoInstall = config.autoInstall === true; // explicit opt-in only
|
|
187
|
+
let modelWarmed = false;
|
|
188
|
+
let warmFailed = false; // sticky failure to prevent warmup retry spam
|
|
189
|
+
let warmPromise: Promise<boolean> | null = null;
|
|
190
|
+
|
|
191
|
+
const WHISPER_LANGS = new Set(["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","yue","zh"]);
|
|
192
|
+
|
|
193
|
+
async function getBinary(): Promise<string | null> {
|
|
194
|
+
// Check if already available
|
|
195
|
+
const existing = await findWhisperBinary();
|
|
196
|
+
if (existing) return existing;
|
|
197
|
+
|
|
198
|
+
// Try auto-install
|
|
199
|
+
if (!autoInstall) return null;
|
|
200
|
+
if (installFailed) return null; // sticky failure — don't retry every message
|
|
201
|
+
|
|
202
|
+
// Singleton: join existing install or start new one
|
|
203
|
+
if (!installPromise) {
|
|
204
|
+
installPromise = installWhisperWithPip().then((result) => {
|
|
205
|
+
if (!result) installFailed = true;
|
|
206
|
+
return result;
|
|
207
|
+
}).finally(() => {
|
|
208
|
+
installPromise = null;
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
return installPromise;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return {
|
|
215
|
+
name: `whisper-${model}`,
|
|
216
|
+
|
|
217
|
+
canTranscribe(input: SttInput): boolean {
|
|
218
|
+
return input.mime.startsWith("audio/");
|
|
219
|
+
},
|
|
220
|
+
|
|
221
|
+
async ensureInstalled(): Promise<boolean> {
|
|
222
|
+
const binary = await getBinary();
|
|
223
|
+
if (!binary) return false;
|
|
224
|
+
|
|
225
|
+
// Warm model with singleton promise
|
|
226
|
+
if (!modelWarmed && !warmFailed) {
|
|
227
|
+
if (!warmPromise) {
|
|
228
|
+
warmPromise = (async () => {
|
|
229
|
+
// Check if model already cached
|
|
230
|
+
const modelDir = join(homedir(), ".cache", "whisper");
|
|
231
|
+
try {
|
|
232
|
+
const files = await readdir(modelDir);
|
|
233
|
+
if (files.some((f) => f.startsWith(model) && f.includes("."))) {
|
|
234
|
+
modelWarmed = true;
|
|
235
|
+
return true;
|
|
236
|
+
}
|
|
237
|
+
} catch {}
|
|
238
|
+
|
|
239
|
+
// Run warmup — catch everything so it never rejects
|
|
240
|
+
try {
|
|
241
|
+
const ok = await warmWhisperModel(binary, model);
|
|
242
|
+
if (!ok) warmFailed = true;
|
|
243
|
+
modelWarmed = ok;
|
|
244
|
+
return ok;
|
|
245
|
+
} catch (err) {
|
|
246
|
+
console.warn(`[stt/whisper] warmup error: ${(err as Error).message}`);
|
|
247
|
+
warmFailed = true;
|
|
248
|
+
modelWarmed = false;
|
|
249
|
+
return false;
|
|
250
|
+
}
|
|
251
|
+
})().finally(() => { warmPromise = null; });
|
|
252
|
+
}
|
|
253
|
+
await warmPromise;
|
|
254
|
+
}
|
|
255
|
+
return modelWarmed;
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
async transcribe(input: SttInput): Promise<TranscriptionResult> {
|
|
259
|
+
const binary = await getBinary();
|
|
260
|
+
if (!binary) {
|
|
261
|
+
throw new Error("whisper not available and auto-install failed");
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const outputDir = join(homedir(), ".roundhouse", "whisper-tmp", randomBytes(6).toString("hex"));
|
|
265
|
+
mkdirSync(outputDir, { recursive: true });
|
|
266
|
+
|
|
267
|
+
const audioPath = input.localPath.startsWith("-") ? `./${input.localPath}` : input.localPath;
|
|
268
|
+
|
|
269
|
+
const args = [
|
|
270
|
+
audioPath,
|
|
271
|
+
"--model", model,
|
|
272
|
+
"--output_format", "json",
|
|
273
|
+
"--output_dir", outputDir,
|
|
274
|
+
];
|
|
275
|
+
|
|
276
|
+
if (input.hint?.language && WHISPER_LANGS.has(input.hint.language)) {
|
|
277
|
+
args.push("--language", input.hint.language);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return new Promise<TranscriptionResult>((resolve, reject) => {
|
|
281
|
+
execFile(
|
|
282
|
+
binary,
|
|
283
|
+
args,
|
|
284
|
+
{
|
|
285
|
+
timeout: timeoutMs,
|
|
286
|
+
env: {
|
|
287
|
+
...process.env,
|
|
288
|
+
PATH: `${join(homedir(), ".local", "bin")}:${process.env.PATH}`,
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
async (error, _stdout, stderr) => {
|
|
292
|
+
const cleanup = async () => {
|
|
293
|
+
try { await rm(outputDir, { recursive: true }); } catch {}
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
if (error) {
|
|
297
|
+
await cleanup();
|
|
298
|
+
reject(new Error(`whisper failed: ${error.message}`));
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
try {
|
|
303
|
+
const files = await readdir(outputDir);
|
|
304
|
+
const jsonFile = files.find((f) => f.endsWith(".json"));
|
|
305
|
+
if (!jsonFile) {
|
|
306
|
+
await cleanup();
|
|
307
|
+
reject(new Error("whisper produced no JSON output"));
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
const raw = await readFile(join(outputDir, jsonFile), "utf8");
|
|
311
|
+
const result = JSON.parse(raw);
|
|
312
|
+
|
|
313
|
+
await cleanup();
|
|
314
|
+
|
|
315
|
+
let language: string | undefined;
|
|
316
|
+
const langMatch = stderr.match(/Detected language:\s*(\w+)/);
|
|
317
|
+
if (langMatch) language = langMatch[1].toLowerCase();
|
|
318
|
+
if (result.language) language = result.language;
|
|
319
|
+
|
|
320
|
+
const text = (result.text ?? "").trim();
|
|
321
|
+
if (!text) {
|
|
322
|
+
reject(new Error("whisper returned empty transcript"));
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Mark model as warmed after successful transcription
|
|
327
|
+
modelWarmed = true;
|
|
328
|
+
|
|
329
|
+
resolve({ text, language, approximate: true });
|
|
330
|
+
} catch (err) {
|
|
331
|
+
await cleanup();
|
|
332
|
+
reject(new Error(`whisper output parse failed: ${(err as Error).message}`));
|
|
333
|
+
}
|
|
334
|
+
},
|
|
335
|
+
);
|
|
336
|
+
});
|
|
337
|
+
},
|
|
338
|
+
};
|
|
339
|
+
}
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* voice/stt-service.ts — Speech-to-text service
|
|
3
|
+
*
|
|
4
|
+
* Manages provider chain, timeouts, and graceful fallback.
|
|
5
|
+
* Never throws — returns null on all failures.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SttProvider, SttInput, SttConfig, AttachmentTranscript } from "./types";
|
|
9
|
+
import type { MessageAttachment } from "../types";
|
|
10
|
+
import { createWhisperProvider, type InstallableWhisperProvider } from "./providers/whisper";
|
|
11
|
+
|
|
12
|
+
// Provider factory registry
|
|
13
|
+
const PROVIDER_FACTORIES: Record<string, (config: any) => SttProvider> = {
|
|
14
|
+
whisper: createWhisperProvider,
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
export class SttService {
|
|
18
|
+
private providers: SttProvider[] = [];
|
|
19
|
+
private config: SttConfig;
|
|
20
|
+
private initPromise: Promise<void> | null = null;
|
|
21
|
+
private activeStt: Promise<void> = Promise.resolve(); // global concurrency: 1 at a time
|
|
22
|
+
private installNoticeSent = false;
|
|
23
|
+
|
|
24
|
+
constructor(config: SttConfig) {
|
|
25
|
+
this.config = config;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
private async ensureInitialized(): Promise<void> {
|
|
29
|
+
if (!this.initPromise) {
|
|
30
|
+
this.initPromise = this.doInit().catch((err) => {
|
|
31
|
+
this.initPromise = null; // retry on next call
|
|
32
|
+
throw err;
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
await this.initPromise;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private async doInit(): Promise<void> {
|
|
39
|
+
for (const providerName of this.config.chain) {
|
|
40
|
+
const providerConfig = this.config.providers[providerName];
|
|
41
|
+
if (!providerConfig) {
|
|
42
|
+
console.warn(`[stt] provider "${providerName}" in chain but not configured, skipping`);
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const type = providerConfig.type;
|
|
47
|
+
const factory = PROVIDER_FACTORIES[type];
|
|
48
|
+
|
|
49
|
+
if (!factory) {
|
|
50
|
+
console.warn(`[stt] unknown provider type "${type}", skipping`);
|
|
51
|
+
continue;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
// Pass autoInstall from service-level config into provider config
|
|
56
|
+
const mergedProviderConfig = {
|
|
57
|
+
...providerConfig,
|
|
58
|
+
autoInstall: providerConfig.autoInstall ?? this.config.autoInstall ?? false,
|
|
59
|
+
};
|
|
60
|
+
this.providers.push(factory(mergedProviderConfig));
|
|
61
|
+
console.log(`[stt] loaded provider: ${providerName} (${type})`);
|
|
62
|
+
} catch (err) {
|
|
63
|
+
console.warn(`[stt] failed to create provider "${providerName}":`, (err as Error).message);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (this.providers.length === 0) {
|
|
68
|
+
console.warn(`[stt] no providers available — transcription disabled`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Prepare providers in background (install + warm model).
|
|
74
|
+
* Called from gateway.start() — non-blocking, never throws.
|
|
75
|
+
*/
|
|
76
|
+
async prepareInBackground(): Promise<void> {
|
|
77
|
+
try {
|
|
78
|
+
await this.ensureInitialized();
|
|
79
|
+
} catch {
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
for (const provider of this.providers) {
|
|
84
|
+
if ("ensureInstalled" in provider && typeof (provider as any).ensureInstalled === "function") {
|
|
85
|
+
try {
|
|
86
|
+
const installable = provider as InstallableWhisperProvider;
|
|
87
|
+
const ok = await installable.ensureInstalled();
|
|
88
|
+
if (ok) {
|
|
89
|
+
console.log(`[stt] ${provider.name} ready (installed + model warmed)`);
|
|
90
|
+
} else {
|
|
91
|
+
console.warn(`[stt] ${provider.name} not available after prepare`);
|
|
92
|
+
}
|
|
93
|
+
} catch (err) {
|
|
94
|
+
console.warn(`[stt] ${provider.name} prepare failed:`, (err as Error).message);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Should this attachment be auto-transcribed? */
|
|
101
|
+
shouldTranscribe(attachment: MessageAttachment): boolean {
|
|
102
|
+
if (!this.config.enabled || this.config.mode === "off") return false;
|
|
103
|
+
|
|
104
|
+
const auto = this.config.autoTranscribe ?? { voiceMessages: true, audioFiles: false, maxDurationSec: 120 };
|
|
105
|
+
|
|
106
|
+
// Only audio
|
|
107
|
+
if (attachment.mediaType !== "audio") return false;
|
|
108
|
+
if (!attachment.mime.startsWith("audio/")) return false;
|
|
109
|
+
|
|
110
|
+
// Voice messages (ogg/opus from Telegram) vs general audio files
|
|
111
|
+
const isVoiceMessage = attachment.mime === "audio/ogg" && attachment.name.endsWith(".ogg");
|
|
112
|
+
if (isVoiceMessage && auto.voiceMessages) return true;
|
|
113
|
+
if (!isVoiceMessage && auto.audioFiles) return true;
|
|
114
|
+
|
|
115
|
+
return false;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Try to transcribe an attachment using the provider chain.
|
|
120
|
+
* Returns null on all failures — never throws to callers.
|
|
121
|
+
*/
|
|
122
|
+
async tryTranscribe(
|
|
123
|
+
attachment: MessageAttachment,
|
|
124
|
+
languageHint?: string,
|
|
125
|
+
notify?: (text: string) => Promise<void>,
|
|
126
|
+
): Promise<AttachmentTranscript | null> {
|
|
127
|
+
try {
|
|
128
|
+
await this.ensureInitialized();
|
|
129
|
+
} catch (err) {
|
|
130
|
+
console.warn(`[stt] initialization failed:`, (err as Error).message);
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (this.providers.length === 0) return null;
|
|
135
|
+
if (!this.shouldTranscribe(attachment)) return null;
|
|
136
|
+
|
|
137
|
+
// Check duration limit using ffprobe
|
|
138
|
+
const maxDuration = this.config.autoTranscribe?.maxDurationSec ?? 120;
|
|
139
|
+
if (maxDuration > 0) {
|
|
140
|
+
try {
|
|
141
|
+
const duration = await getAudioDuration(attachment.localPath);
|
|
142
|
+
if (duration !== null && duration > maxDuration) {
|
|
143
|
+
console.log(`[stt] skipping ${attachment.name}: duration ${duration.toFixed(1)}s exceeds ${maxDuration}s limit`);
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
} catch {}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const input: SttInput = {
|
|
150
|
+
localPath: attachment.localPath,
|
|
151
|
+
mime: attachment.mime,
|
|
152
|
+
sizeBytes: attachment.sizeBytes,
|
|
153
|
+
hint: {
|
|
154
|
+
language: languageHint,
|
|
155
|
+
isVoiceMessage: attachment.mime === "audio/ogg",
|
|
156
|
+
},
|
|
157
|
+
};
|
|
158
|
+
|
|
159
|
+
const startTime = Date.now();
|
|
160
|
+
|
|
161
|
+
// Global concurrency limit: one transcription at a time to prevent CPU stampede
|
|
162
|
+
// Promise executor runs synchronously per spec, so release is always assigned before await
|
|
163
|
+
const prev = this.activeStt;
|
|
164
|
+
let release: () => void;
|
|
165
|
+
this.activeStt = new Promise<void>((r) => { release = r; });
|
|
166
|
+
await prev;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
for (const provider of this.providers) {
|
|
170
|
+
if (!provider.canTranscribe(input)) continue;
|
|
171
|
+
|
|
172
|
+
// Ensure provider is installed (with one-time user notification)
|
|
173
|
+
const installable = provider as InstallableWhisperProvider;
|
|
174
|
+
if (installable.ensureInstalled && typeof installable.ensureInstalled === "function") {
|
|
175
|
+
try {
|
|
176
|
+
const isReady = await installable.ensureInstalled();
|
|
177
|
+
if (!isReady) {
|
|
178
|
+
if (!this.installNoticeSent && notify) {
|
|
179
|
+
this.installNoticeSent = true;
|
|
180
|
+
try { await notify("🎤 Voice transcription not available. Whisper install or model download failed."); } catch {}
|
|
181
|
+
}
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
} catch {
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
try {
|
|
190
|
+
console.log(`[stt] trying ${provider.name} for ${attachment.name}...`);
|
|
191
|
+
const result = await provider.transcribe(input);
|
|
192
|
+
const durationMs = Date.now() - startTime;
|
|
193
|
+
|
|
194
|
+
console.log(`[stt] ${provider.name} succeeded in ${durationMs}ms: "${result.text.slice(0, 80)}"`);
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
text: result.text,
|
|
198
|
+
provider: provider.name,
|
|
199
|
+
language: result.language,
|
|
200
|
+
confidence: result.confidence,
|
|
201
|
+
approximate: true,
|
|
202
|
+
status: "completed" as const,
|
|
203
|
+
durationMs,
|
|
204
|
+
};
|
|
205
|
+
} catch (err) {
|
|
206
|
+
console.warn(`[stt] ${provider.name} failed:`, (err as Error).message);
|
|
207
|
+
continue;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// All providers failed
|
|
212
|
+
return {
|
|
213
|
+
text: "",
|
|
214
|
+
provider: "none",
|
|
215
|
+
approximate: true,
|
|
216
|
+
status: "failed" as const,
|
|
217
|
+
error: "All STT providers failed",
|
|
218
|
+
durationMs: Date.now() - startTime,
|
|
219
|
+
};
|
|
220
|
+
} finally {
|
|
221
|
+
release!();
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Enrich audio attachments with transcripts.
|
|
228
|
+
* Mutates the attachments array in-place.
|
|
229
|
+
*/
|
|
230
|
+
export async function enrichAttachmentsWithTranscripts(
|
|
231
|
+
attachments: MessageAttachment[],
|
|
232
|
+
sttService: SttService | null,
|
|
233
|
+
notify?: (text: string) => Promise<void>,
|
|
234
|
+
): Promise<void> {
|
|
235
|
+
if (!sttService) return;
|
|
236
|
+
|
|
237
|
+
for (const att of attachments) {
|
|
238
|
+
try {
|
|
239
|
+
const transcript = await sttService.tryTranscribe(att, undefined, notify);
|
|
240
|
+
if (transcript) {
|
|
241
|
+
att.transcript = transcript;
|
|
242
|
+
}
|
|
243
|
+
} catch (err) {
|
|
244
|
+
console.error(`[stt] unexpected error transcribing ${att.name}:`, (err as Error).message);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/** Get audio duration using ffprobe. Returns null if ffprobe is unavailable. */
|
|
250
|
+
async function getAudioDuration(filePath: string): Promise<number | null> {
|
|
251
|
+
const { execFile: exec } = await import("node:child_process");
|
|
252
|
+
return new Promise((resolve) => {
|
|
253
|
+
exec(
|
|
254
|
+
"ffprobe",
|
|
255
|
+
["-i", filePath, "-show_entries", "format=duration", "-v", "quiet", "-of", "csv=p=0"],
|
|
256
|
+
{ timeout: 5000 },
|
|
257
|
+
(error, stdout) => {
|
|
258
|
+
if (error) return resolve(null);
|
|
259
|
+
const dur = parseFloat(stdout.trim());
|
|
260
|
+
resolve(isNaN(dur) ? null : dur);
|
|
261
|
+
},
|
|
262
|
+
);
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/** Default STT config */
|
|
267
|
+
export const DEFAULT_STT_CONFIG: SttConfig = {
|
|
268
|
+
enabled: true,
|
|
269
|
+
mode: "on",
|
|
270
|
+
autoInstall: true,
|
|
271
|
+
chain: ["whisper"],
|
|
272
|
+
autoTranscribe: {
|
|
273
|
+
voiceMessages: true,
|
|
274
|
+
audioFiles: false,
|
|
275
|
+
maxDurationSec: 120,
|
|
276
|
+
},
|
|
277
|
+
providers: {
|
|
278
|
+
whisper: {
|
|
279
|
+
type: "whisper",
|
|
280
|
+
model: "small",
|
|
281
|
+
timeoutMs: 30000,
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* voice/types.ts — Shared types for voice STT/TTS
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// ── STT (Speech-to-Text) ────────────────────────────
|
|
6
|
+
|
|
7
|
+
export interface SttProvider {
|
|
8
|
+
readonly name: string;
|
|
9
|
+
canTranscribe(input: SttInput): boolean;
|
|
10
|
+
transcribe(input: SttInput): Promise<TranscriptionResult>;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface SttInput {
|
|
14
|
+
localPath: string;
|
|
15
|
+
mime: string;
|
|
16
|
+
sizeBytes: number;
|
|
17
|
+
hint?: {
|
|
18
|
+
language?: string;
|
|
19
|
+
isVoiceMessage?: boolean;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface TranscriptionResult {
|
|
24
|
+
text: string;
|
|
25
|
+
language?: string;
|
|
26
|
+
confidence?: number;
|
|
27
|
+
approximate: true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// ── Attachment transcript (stored on MessageAttachment) ──
|
|
31
|
+
|
|
32
|
+
export interface AttachmentTranscript {
|
|
33
|
+
text: string;
|
|
34
|
+
provider: string;
|
|
35
|
+
language?: string;
|
|
36
|
+
confidence?: number;
|
|
37
|
+
approximate: true;
|
|
38
|
+
status: "completed" | "failed";
|
|
39
|
+
error?: string;
|
|
40
|
+
durationMs?: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ── STT config ───────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
export interface SttProviderConfig {
|
|
46
|
+
type: string;
|
|
47
|
+
timeoutMs?: number;
|
|
48
|
+
autoInstall?: boolean;
|
|
49
|
+
[key: string]: unknown;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface SttConfig {
|
|
53
|
+
enabled: boolean;
|
|
54
|
+
mode: "on" | "off";
|
|
55
|
+
autoInstall?: boolean;
|
|
56
|
+
chain: string[];
|
|
57
|
+
autoTranscribe: {
|
|
58
|
+
voiceMessages: boolean;
|
|
59
|
+
audioFiles: boolean;
|
|
60
|
+
maxDurationSec: number;
|
|
61
|
+
};
|
|
62
|
+
providers: Record<string, SttProviderConfig>;
|
|
63
|
+
}
|