@geravant/sinain 1.0.19 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -1
- package/cli.js +176 -0
- package/index.ts +4 -2
- package/install.js +89 -14
- package/launcher.js +622 -0
- package/openclaw.plugin.json +4 -0
- package/pack-prepare.js +48 -0
- package/package.json +24 -5
- package/sense_client/README.md +82 -0
- package/sense_client/__init__.py +1 -0
- package/sense_client/__main__.py +462 -0
- package/sense_client/app_detector.py +54 -0
- package/sense_client/app_detector_win.py +83 -0
- package/sense_client/capture.py +215 -0
- package/sense_client/capture_win.py +88 -0
- package/sense_client/change_detector.py +86 -0
- package/sense_client/config.py +64 -0
- package/sense_client/gate.py +145 -0
- package/sense_client/ocr.py +347 -0
- package/sense_client/privacy.py +65 -0
- package/sense_client/requirements.txt +13 -0
- package/sense_client/roi_extractor.py +84 -0
- package/sense_client/sender.py +173 -0
- package/sense_client/tests/__init__.py +0 -0
- package/sense_client/tests/test_stream1_optimizations.py +234 -0
- package/setup-overlay.js +82 -0
- package/sinain-agent/.env.example +17 -0
- package/sinain-agent/CLAUDE.md +87 -0
- package/sinain-agent/mcp-config.json +12 -0
- package/sinain-agent/run.sh +248 -0
- package/sinain-core/.env.example +93 -0
- package/sinain-core/package-lock.json +552 -0
- package/sinain-core/package.json +21 -0
- package/sinain-core/src/agent/analyzer.ts +366 -0
- package/sinain-core/src/agent/context-window.ts +172 -0
- package/sinain-core/src/agent/loop.ts +404 -0
- package/sinain-core/src/agent/situation-writer.ts +187 -0
- package/sinain-core/src/agent/traits.ts +520 -0
- package/sinain-core/src/audio/capture-spawner-macos.ts +44 -0
- package/sinain-core/src/audio/capture-spawner-win.ts +37 -0
- package/sinain-core/src/audio/capture-spawner.ts +14 -0
- package/sinain-core/src/audio/pipeline.ts +335 -0
- package/sinain-core/src/audio/transcription-local.ts +141 -0
- package/sinain-core/src/audio/transcription.ts +278 -0
- package/sinain-core/src/buffers/feed-buffer.ts +71 -0
- package/sinain-core/src/buffers/sense-buffer.ts +425 -0
- package/sinain-core/src/config.ts +245 -0
- package/sinain-core/src/escalation/escalation-slot.ts +136 -0
- package/sinain-core/src/escalation/escalator.ts +828 -0
- package/sinain-core/src/escalation/message-builder.ts +370 -0
- package/sinain-core/src/escalation/openclaw-ws.ts +726 -0
- package/sinain-core/src/escalation/scorer.ts +166 -0
- package/sinain-core/src/index.ts +537 -0
- package/sinain-core/src/learning/feedback-store.ts +253 -0
- package/sinain-core/src/learning/signal-collector.ts +218 -0
- package/sinain-core/src/log.ts +24 -0
- package/sinain-core/src/overlay/commands.ts +126 -0
- package/sinain-core/src/overlay/ws-handler.ts +267 -0
- package/sinain-core/src/privacy/index.ts +18 -0
- package/sinain-core/src/privacy/presets.ts +40 -0
- package/sinain-core/src/privacy/redact.ts +92 -0
- package/sinain-core/src/profiler.ts +181 -0
- package/sinain-core/src/recorder.ts +186 -0
- package/sinain-core/src/server.ts +456 -0
- package/sinain-core/src/trace/trace-store.ts +73 -0
- package/sinain-core/src/trace/tracer.ts +94 -0
- package/sinain-core/src/types.ts +427 -0
- package/sinain-core/src/util/dedup.ts +48 -0
- package/sinain-core/src/util/task-store.ts +84 -0
- package/sinain-core/tsconfig.json +18 -0
- package/sinain-knowledge/curation/engine.ts +137 -24
- package/sinain-knowledge/data/git-store.ts +26 -0
- package/sinain-knowledge/data/store.ts +117 -0
- package/sinain-mcp-server/index.ts +417 -0
- package/sinain-mcp-server/package.json +19 -0
- package/sinain-mcp-server/tsconfig.json +15 -0
- package/sinain-memory/graph_query.py +185 -0
- package/sinain-memory/knowledge_integrator.py +450 -0
- package/sinain-memory/memory-config.json +3 -1
- package/sinain-memory/session_distiller.py +162 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
import { EventEmitter } from "node:events";
|
|
2
|
+
import type { ChildProcess } from "node:child_process";
|
|
3
|
+
import type { AudioPipelineConfig, AudioChunk, AudioSourceTag } from "../types.js";
|
|
4
|
+
import type { CaptureSpawner } from "./capture-spawner.js";
|
|
5
|
+
import type { Profiler } from "../profiler.js";
|
|
6
|
+
import { log, warn, error, debug } from "../log.js";
|
|
7
|
+
|
|
8
|
+
const TAG = "audio";
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Creates a 44-byte WAV header for raw PCM data.
|
|
12
|
+
* Format: PCM (1), 16-bit, mono/stereo, given sample rate.
|
|
13
|
+
*/
|
|
14
|
+
function createWavHeader(
|
|
15
|
+
dataLength: number,
|
|
16
|
+
sampleRate: number,
|
|
17
|
+
channels: number,
|
|
18
|
+
bitsPerSample: number = 16
|
|
19
|
+
): Buffer {
|
|
20
|
+
const header = Buffer.alloc(44);
|
|
21
|
+
const byteRate = sampleRate * channels * (bitsPerSample / 8);
|
|
22
|
+
const blockAlign = channels * (bitsPerSample / 8);
|
|
23
|
+
|
|
24
|
+
header.write("RIFF", 0);
|
|
25
|
+
header.writeUInt32LE(36 + dataLength, 4);
|
|
26
|
+
header.write("WAVE", 8);
|
|
27
|
+
|
|
28
|
+
header.write("fmt ", 12);
|
|
29
|
+
header.writeUInt32LE(16, 16);
|
|
30
|
+
header.writeUInt16LE(1, 20);
|
|
31
|
+
header.writeUInt16LE(channels, 22);
|
|
32
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
33
|
+
header.writeUInt32LE(byteRate, 28);
|
|
34
|
+
header.writeUInt16LE(blockAlign, 32);
|
|
35
|
+
header.writeUInt16LE(bitsPerSample, 34);
|
|
36
|
+
|
|
37
|
+
header.write("data", 36);
|
|
38
|
+
header.writeUInt32LE(dataLength, 40);
|
|
39
|
+
|
|
40
|
+
return header;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Calculate RMS energy of 16-bit PCM samples (0.0 to 1.0). */
|
|
44
|
+
function calculateRmsEnergy(pcmData: Buffer): number {
|
|
45
|
+
if (pcmData.length < 2) return 0;
|
|
46
|
+
const sampleCount = Math.floor(pcmData.length / 2);
|
|
47
|
+
let sumSquares = 0;
|
|
48
|
+
for (let i = 0; i < sampleCount; i++) {
|
|
49
|
+
const sample = pcmData.readInt16LE(i * 2);
|
|
50
|
+
const normalized = sample / 32768;
|
|
51
|
+
sumSquares += normalized * normalized;
|
|
52
|
+
}
|
|
53
|
+
return Math.sqrt(sumSquares / sampleCount);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Audio capture pipeline.
|
|
58
|
+
* Spawns sox or ffmpeg to capture audio from a macOS device,
|
|
59
|
+
* accumulates raw PCM data, and emits WAV chunks at regular intervals.
|
|
60
|
+
*
|
|
61
|
+
* Events: 'chunk' (AudioChunk), 'started', 'stopped', 'muted', 'unmuted', 'error' (Error)
|
|
62
|
+
*/
|
|
63
|
+
/**
|
|
64
|
+
* Pre-allocated buffer size for audio accumulation.
|
|
65
|
+
* 320KB is sufficient for 5s of 16-bit mono audio at 16kHz (160KB)
|
|
66
|
+
* with 2x headroom for stereo or higher sample rates.
|
|
67
|
+
*/
|
|
68
|
+
const PREALLOC_BUFFER_SIZE = 320 * 1024;
|
|
69
|
+
|
|
70
|
+
export class AudioPipeline extends EventEmitter {
|
|
71
|
+
private config: AudioPipelineConfig;
|
|
72
|
+
private audioSourceTag: AudioSourceTag;
|
|
73
|
+
private captureSpawner: CaptureSpawner;
|
|
74
|
+
private process: ChildProcess | null = null;
|
|
75
|
+
// Pre-allocated buffer to reduce GC pressure (vs Buffer.concat per chunk)
|
|
76
|
+
private preallocBuffer: Buffer = Buffer.allocUnsafe(PREALLOC_BUFFER_SIZE);
|
|
77
|
+
private bufferWriteOffset: number = 0;
|
|
78
|
+
private chunkTimer: ReturnType<typeof setInterval> | null = null;
|
|
79
|
+
private running: boolean = false;
|
|
80
|
+
private silentChunks: number = 0;
|
|
81
|
+
private speechChunks: number = 0;
|
|
82
|
+
private errorCount: number = 0;
|
|
83
|
+
private muted: boolean = false;
|
|
84
|
+
private profiler: Profiler | null = null;
|
|
85
|
+
|
|
86
|
+
setProfiler(p: Profiler): void { this.profiler = p; }
|
|
87
|
+
|
|
88
|
+
constructor(config: AudioPipelineConfig, audioSourceTag: AudioSourceTag = "system", captureSpawner?: CaptureSpawner) {
|
|
89
|
+
super();
|
|
90
|
+
this.config = config;
|
|
91
|
+
this.audioSourceTag = audioSourceTag;
|
|
92
|
+
// If no spawner provided, lazily import the platform default
|
|
93
|
+
this.captureSpawner = captureSpawner!;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
start(): void {
|
|
97
|
+
if (this.running) {
|
|
98
|
+
warn(TAG, "already running, ignoring start()");
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
log(TAG, `starting capture: device=${this.config.device} cmd=${this.config.captureCommand} rate=${this.config.sampleRate}`);
|
|
103
|
+
|
|
104
|
+
try {
|
|
105
|
+
this.spawnCaptureProcess();
|
|
106
|
+
} catch (err) {
|
|
107
|
+
error(TAG, "failed to spawn capture process:", err);
|
|
108
|
+
this.emit("error", err);
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
this.muted = false;
|
|
113
|
+
this.chunkTimer = setInterval(() => {
|
|
114
|
+
this.emitChunk();
|
|
115
|
+
}, this.config.chunkDurationMs);
|
|
116
|
+
|
|
117
|
+
this.running = true;
|
|
118
|
+
this.emit("started");
|
|
119
|
+
log(TAG, "capture started");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
stop(): void {
|
|
123
|
+
if (!this.running) return;
|
|
124
|
+
|
|
125
|
+
log(TAG, "stopping capture...");
|
|
126
|
+
this.running = false;
|
|
127
|
+
this.muted = false;
|
|
128
|
+
|
|
129
|
+
if (this.chunkTimer) {
|
|
130
|
+
clearInterval(this.chunkTimer);
|
|
131
|
+
this.chunkTimer = null;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (this.process) {
|
|
135
|
+
this.process.removeAllListeners();
|
|
136
|
+
this.process.kill("SIGTERM");
|
|
137
|
+
const proc = this.process;
|
|
138
|
+
setTimeout(() => {
|
|
139
|
+
try { proc.kill("SIGKILL"); } catch { /* already dead */ }
|
|
140
|
+
}, 2000);
|
|
141
|
+
this.process = null;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if (this.bufferWriteOffset > 0) {
|
|
145
|
+
this.emitChunk();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
this.bufferWriteOffset = 0;
|
|
149
|
+
this.emit("stopped");
|
|
150
|
+
log(TAG, "capture stopped");
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
isRunning(): boolean {
|
|
154
|
+
return this.running;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
mute(): void {
|
|
158
|
+
if (!this.running || this.muted) return;
|
|
159
|
+
this.muted = true;
|
|
160
|
+
if (this.chunkTimer) {
|
|
161
|
+
clearInterval(this.chunkTimer);
|
|
162
|
+
this.chunkTimer = null;
|
|
163
|
+
}
|
|
164
|
+
this.bufferWriteOffset = 0;
|
|
165
|
+
log(TAG, `muted (${this.config.captureCommand} process still running)`);
|
|
166
|
+
this.emit("muted");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
unmute(): void {
|
|
170
|
+
if (!this.running || !this.muted) return;
|
|
171
|
+
this.muted = false;
|
|
172
|
+
this.chunkTimer = setInterval(() => {
|
|
173
|
+
this.emitChunk();
|
|
174
|
+
}, this.config.chunkDurationMs);
|
|
175
|
+
log(TAG, "unmuted");
|
|
176
|
+
this.emit("unmuted");
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
isMuted(): boolean {
|
|
180
|
+
return this.muted;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
getDevice(): string {
|
|
184
|
+
return this.config.device;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
getCaptureCommand(): "sox" | "ffmpeg" | "screencapturekit" {
|
|
188
|
+
return this.config.captureCommand;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
switchDevice(device: string): void {
|
|
192
|
+
const wasRunning = this.running;
|
|
193
|
+
if (wasRunning) this.stop();
|
|
194
|
+
this.config = { ...this.config, device };
|
|
195
|
+
log(TAG, `device switched to: ${device}`);
|
|
196
|
+
if (wasRunning) this.start();
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// ── Capture process spawn (platform-dispatched via CaptureSpawner) ──
|
|
200
|
+
|
|
201
|
+
private spawnCaptureProcess(): void {
|
|
202
|
+
this.process = this.captureSpawner.spawn(this.config, this.audioSourceTag);
|
|
203
|
+
const name = process.platform === "win32" ? "win-audio-capture" : "sck-capture";
|
|
204
|
+
this.wireProcessEvents(name);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// ── Process event wiring ──
|
|
208
|
+
|
|
209
|
+
private wireProcessEvents(name: string): void {
|
|
210
|
+
const proc = this.process;
|
|
211
|
+
if (!proc) return;
|
|
212
|
+
|
|
213
|
+
let headerSkipped = name !== "sox";
|
|
214
|
+
let headerBuf = Buffer.alloc(0);
|
|
215
|
+
|
|
216
|
+
proc.stdout?.on("data", (data: Buffer) => {
|
|
217
|
+
if (!this.running) return;
|
|
218
|
+
|
|
219
|
+
if (!headerSkipped) {
|
|
220
|
+
headerBuf = Buffer.concat([headerBuf, data]);
|
|
221
|
+
if (headerBuf.length >= 44) {
|
|
222
|
+
const remaining = headerBuf.subarray(44);
|
|
223
|
+
headerSkipped = true;
|
|
224
|
+
headerBuf = Buffer.alloc(0);
|
|
225
|
+
if (remaining.length > 0) {
|
|
226
|
+
this.writeToBuffer(remaining);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
this.writeToBuffer(data);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
proc.stderr?.on("data", (data: Buffer) => {
|
|
236
|
+
const msg = data.toString().trim();
|
|
237
|
+
if (msg && !/^In:.*Out:/.test(msg)) {
|
|
238
|
+
log(TAG, `${name} stderr: ${msg.slice(0, 200)}`);
|
|
239
|
+
}
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
proc.on("error", (err) => {
|
|
243
|
+
error(TAG, `${name} process error:`, err.message);
|
|
244
|
+
this.errorCount++;
|
|
245
|
+
this.profiler?.gauge("audio.errors", this.errorCount);
|
|
246
|
+
this.emit("error", new Error(`${name} process error: ${err.message}`));
|
|
247
|
+
if (this.running) this.stop();
|
|
248
|
+
});
|
|
249
|
+
|
|
250
|
+
proc.on("exit", (code, signal) => {
|
|
251
|
+
log(TAG, `${name} exited: code=${code} signal=${signal}`);
|
|
252
|
+
if (this.running && code !== 0) {
|
|
253
|
+
this.errorCount++;
|
|
254
|
+
this.profiler?.gauge("audio.errors", this.errorCount);
|
|
255
|
+
warn(TAG, `${name} exited unexpectedly, stopping pipeline`);
|
|
256
|
+
this.stop();
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// ── Buffer management ──
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Write data to pre-allocated buffer.
|
|
265
|
+
* Falls back to growing buffer if needed (rare case for very long audio).
|
|
266
|
+
*/
|
|
267
|
+
private writeToBuffer(data: Buffer): void {
|
|
268
|
+
if (this.muted) return;
|
|
269
|
+
// Check if we need to grow the buffer (rare case)
|
|
270
|
+
if (this.bufferWriteOffset + data.length > this.preallocBuffer.length) {
|
|
271
|
+
// Grow to 2x current size
|
|
272
|
+
const newSize = Math.max(this.preallocBuffer.length * 2, this.bufferWriteOffset + data.length);
|
|
273
|
+
const newBuffer = Buffer.allocUnsafe(newSize);
|
|
274
|
+
this.preallocBuffer.copy(newBuffer, 0, 0, this.bufferWriteOffset);
|
|
275
|
+
this.preallocBuffer = newBuffer;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
data.copy(this.preallocBuffer, this.bufferWriteOffset);
|
|
279
|
+
this.bufferWriteOffset += data.length;
|
|
280
|
+
this.profiler?.gauge("audio.accumulatorKb", Math.round(this.bufferWriteOffset / 1024));
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// ── Chunk emission ──
|
|
284
|
+
|
|
285
|
+
private emitChunk(): void {
|
|
286
|
+
if (this.bufferWriteOffset === 0) return;
|
|
287
|
+
|
|
288
|
+
// Extract PCM data from pre-allocated buffer (no concat allocation)
|
|
289
|
+
const pcmData = this.preallocBuffer.subarray(0, this.bufferWriteOffset);
|
|
290
|
+
this.bufferWriteOffset = 0;
|
|
291
|
+
|
|
292
|
+
const alignedLength = pcmData.length - (pcmData.length % 2);
|
|
293
|
+
// Copy aligned portion to new buffer since we'll reuse preallocBuffer
|
|
294
|
+
const alignedPcm = Buffer.from(pcmData.subarray(0, alignedLength));
|
|
295
|
+
|
|
296
|
+
if (alignedPcm.length === 0) return;
|
|
297
|
+
|
|
298
|
+
const energy = calculateRmsEnergy(alignedPcm);
|
|
299
|
+
this.profiler?.gauge("audio.lastChunkKb", Math.round(alignedPcm.length / 1024));
|
|
300
|
+
|
|
301
|
+
if (this.config.vadEnabled && energy < this.config.vadThreshold) {
|
|
302
|
+
this.silentChunks++;
|
|
303
|
+
this.profiler?.gauge("audio.silentChunks", this.silentChunks);
|
|
304
|
+
debug(TAG, `VAD: silent (energy=${energy.toFixed(4)} < ${this.config.vadThreshold}), ${this.silentChunks} silent chunk(s)`);
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
if (this.silentChunks > 0) {
|
|
309
|
+
debug(TAG, `VAD: speech detected after ${this.silentChunks} silent chunk(s) (energy=${energy.toFixed(4)})`);
|
|
310
|
+
this.silentChunks = 0;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
this.speechChunks++;
|
|
314
|
+
this.profiler?.gauge("audio.speechChunks", this.speechChunks);
|
|
315
|
+
|
|
316
|
+
const wavHeader = createWavHeader(alignedPcm.length, this.config.sampleRate, this.config.channels, 16);
|
|
317
|
+
const wavBuffer = Buffer.concat([wavHeader, alignedPcm]);
|
|
318
|
+
|
|
319
|
+
const bytesPerSample = 2 * this.config.channels;
|
|
320
|
+
const sampleCount = alignedPcm.length / bytesPerSample;
|
|
321
|
+
const durationMs = Math.round((sampleCount / this.config.sampleRate) * 1000);
|
|
322
|
+
|
|
323
|
+
const chunk: AudioChunk = {
|
|
324
|
+
buffer: wavBuffer,
|
|
325
|
+
source: this.config.device,
|
|
326
|
+
ts: Date.now(),
|
|
327
|
+
durationMs,
|
|
328
|
+
energy,
|
|
329
|
+
audioSource: this.audioSourceTag,
|
|
330
|
+
};
|
|
331
|
+
|
|
332
|
+
debug(TAG, `chunk: ${durationMs}ms, ${wavBuffer.length} bytes, energy=${energy.toFixed(4)}`);
|
|
333
|
+
this.emit("chunk", chunk);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
import { writeFile, unlink, rmdir, mkdtemp } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import type { AudioChunk, TranscriptResult } from "../types.js";
|
|
6
|
+
import { log, warn, error, debug } from "../log.js";
|
|
7
|
+
|
|
8
|
+
const TAG = "transcribe-local";
|
|
9
|
+
|
|
10
|
+
export interface LocalTranscriptionConfig {
|
|
11
|
+
/** Path to whisper-cpp binary (default: "whisper-cli") */
|
|
12
|
+
bin: string;
|
|
13
|
+
/** Path to GGUF model file */
|
|
14
|
+
modelPath: string;
|
|
15
|
+
/** Language code, e.g. "en", "ru" (default: "en") */
|
|
16
|
+
language: string;
|
|
17
|
+
/** Timeout per chunk in ms (default: 15000) */
|
|
18
|
+
timeoutMs: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Local transcription via whisper.cpp CLI.
|
|
23
|
+
*
|
|
24
|
+
* Writes WAV chunk to a temp file, runs whisper-cli, parses stdout.
|
|
25
|
+
* Fully isolated — does not touch the OpenRouter path.
|
|
26
|
+
*/
|
|
27
|
+
export class LocalTranscriptionBackend {
|
|
28
|
+
private config: LocalTranscriptionConfig;
|
|
29
|
+
private destroyed = false;
|
|
30
|
+
|
|
31
|
+
constructor(config: LocalTranscriptionConfig) {
|
|
32
|
+
this.config = config;
|
|
33
|
+
log(TAG, `initialized: bin=${config.bin} model=${config.modelPath} lang=${config.language}`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async transcribe(chunk: AudioChunk): Promise<TranscriptResult | null> {
|
|
37
|
+
if (this.destroyed) return null;
|
|
38
|
+
|
|
39
|
+
const tmpDir = await mkdtemp(join(tmpdir(), "sinain-whisper-"));
|
|
40
|
+
const wavPath = join(tmpDir, "chunk.wav");
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
await writeFile(wavPath, chunk.buffer);
|
|
44
|
+
|
|
45
|
+
const startTs = Date.now();
|
|
46
|
+
const text = await this.runWhisper(wavPath);
|
|
47
|
+
const elapsed = Date.now() - startTs;
|
|
48
|
+
|
|
49
|
+
if (!text) {
|
|
50
|
+
debug(TAG, `empty result (${elapsed}ms)`);
|
|
51
|
+
return null;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
log(TAG, `transcript (${elapsed}ms): "${text.slice(0, 100)}${text.length > 100 ? "..." : ""}"`);
|
|
55
|
+
|
|
56
|
+
return {
|
|
57
|
+
text,
|
|
58
|
+
source: "whisper",
|
|
59
|
+
refined: false,
|
|
60
|
+
confidence: 0.85,
|
|
61
|
+
ts: Date.now(),
|
|
62
|
+
audioSource: chunk.audioSource,
|
|
63
|
+
};
|
|
64
|
+
} catch (err) {
|
|
65
|
+
error(TAG, "local transcription failed:", err instanceof Error ? err.message : err);
|
|
66
|
+
throw err;
|
|
67
|
+
} finally {
|
|
68
|
+
// Cleanup temp files
|
|
69
|
+
await unlink(wavPath).catch(() => {});
|
|
70
|
+
await rmdir(tmpDir).catch(() => {});
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
private runWhisper(wavPath: string): Promise<string> {
|
|
75
|
+
return new Promise((resolve, reject) => {
|
|
76
|
+
// whisper-cli expects ISO 639-1 codes ("en"), not BCP-47 ("en-US")
|
|
77
|
+
const lang = this.config.language.split("-")[0].toLowerCase();
|
|
78
|
+
const args = [
|
|
79
|
+
"-m", this.config.modelPath,
|
|
80
|
+
"-f", wavPath,
|
|
81
|
+
"--no-timestamps",
|
|
82
|
+
"-l", lang,
|
|
83
|
+
"-np",
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
debug(TAG, `exec: ${this.config.bin} ${args.join(" ")}`);
|
|
87
|
+
|
|
88
|
+
const proc = spawn(this.config.bin, args, {
|
|
89
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
let stdout = "";
|
|
93
|
+
let stderr = "";
|
|
94
|
+
|
|
95
|
+
proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
|
|
96
|
+
proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
97
|
+
|
|
98
|
+
const timer = setTimeout(() => {
|
|
99
|
+
proc.kill("SIGKILL");
|
|
100
|
+
reject(new Error(`whisper-cpp timed out after ${this.config.timeoutMs}ms`));
|
|
101
|
+
}, this.config.timeoutMs);
|
|
102
|
+
|
|
103
|
+
proc.on("error", (err) => {
|
|
104
|
+
clearTimeout(timer);
|
|
105
|
+
reject(new Error(`whisper-cpp spawn error: ${err.message}`));
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
proc.on("close", (code) => {
|
|
109
|
+
clearTimeout(timer);
|
|
110
|
+
|
|
111
|
+
// whisper-cli may print errors to stderr but still exit 0
|
|
112
|
+
if (stderr.includes("unknown language") || stderr.includes("error:")) {
|
|
113
|
+
const msg = stderr.trim().slice(0, 300);
|
|
114
|
+
reject(new Error(`whisper-cpp stderr: ${msg}`));
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (code !== 0) {
|
|
119
|
+
const msg = stderr.trim().slice(0, 300) || `exit code ${code}`;
|
|
120
|
+
reject(new Error(`whisper-cpp failed: ${msg}`));
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// whisper-cpp outputs lines like " [text]" — strip whitespace and join
|
|
125
|
+
const text = stdout
|
|
126
|
+
.split("\n")
|
|
127
|
+
.map(l => l.trim())
|
|
128
|
+
.filter(l => l.length > 0 && !l.startsWith("["))
|
|
129
|
+
.join(" ")
|
|
130
|
+
.trim();
|
|
131
|
+
|
|
132
|
+
resolve(text);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
destroy(): void {
|
|
138
|
+
this.destroyed = true;
|
|
139
|
+
log(TAG, "destroyed");
|
|
140
|
+
}
|
|
141
|
+
}
|