openbuilder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +309 -0
- package/SKILL.md +276 -0
- package/bin/openbuilder.mjs +290 -0
- package/package.json +55 -0
- package/scripts/builder-auth.ts +265 -0
- package/scripts/builder-config.ts +166 -0
- package/scripts/builder-join.ts +1613 -0
- package/scripts/builder-report.ts +166 -0
- package/scripts/builder-screenshot.ts +80 -0
- package/scripts/builder-summarize.ts +142 -0
- package/scripts/builder-transcript.ts +62 -0
- package/src/ai/claude.ts +59 -0
- package/src/ai/openai.ts +54 -0
- package/src/ai/prompts.ts +95 -0
- package/src/ai/provider.ts +39 -0
- package/src/analytics/speaker-stats.ts +104 -0
- package/src/audio/capture.ts +374 -0
- package/src/audio/pipeline.ts +189 -0
- package/src/audio/transcriber.ts +126 -0
- package/src/report/generator.ts +149 -0
- package/src/utils/config.ts +102 -0
- package/src/utils/transcript-parser.ts +116 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* speaker-stats.ts — Speaker talk-time analytics calculator
|
|
3
|
+
*
|
|
4
|
+
* Calculates per-speaker statistics from transcript data:
|
|
5
|
+
* - Talk time (estimated from caption timestamps + text length)
|
|
6
|
+
* - Percentage of meeting
|
|
7
|
+
* - Word count
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { ParsedTranscript, TranscriptLine } from "../utils/transcript-parser.js";
|
|
11
|
+
|
|
12
|
+
export interface SpeakerStats {
|
|
13
|
+
speaker: string;
|
|
14
|
+
talkTimeSeconds: number;
|
|
15
|
+
talkTimeFormatted: string;
|
|
16
|
+
percentage: number;
|
|
17
|
+
wordCount: number;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface MeetingAnalytics {
|
|
21
|
+
speakers: SpeakerStats[];
|
|
22
|
+
totalDurationSeconds: number;
|
|
23
|
+
totalDurationFormatted: string;
|
|
24
|
+
participantCount: number;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Estimate talk time for each caption line.
|
|
29
|
+
*
|
|
30
|
+
* Strategy: For each speaker's line, estimate duration as the gap until the
|
|
31
|
+
* next line (capped at 30s to avoid inflating during pauses). For the last
|
|
32
|
+
* line, estimate based on word count (~150 words/minute speaking rate).
|
|
33
|
+
*/
|
|
34
|
+
function estimateLineDuration(lines: TranscriptLine[], index: number): number {
|
|
35
|
+
const MAX_GAP_SECONDS = 30;
|
|
36
|
+
const WORDS_PER_SECOND = 2.5; // ~150 words/minute
|
|
37
|
+
|
|
38
|
+
if (index < lines.length - 1) {
|
|
39
|
+
const gap = lines[index + 1]!.totalSeconds - lines[index]!.totalSeconds;
|
|
40
|
+
return Math.min(Math.max(gap, 1), MAX_GAP_SECONDS);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Last line: estimate from word count
|
|
44
|
+
const wordCount = lines[index]!.text.split(/\s+/).length;
|
|
45
|
+
return Math.max(Math.round(wordCount / WORDS_PER_SECOND), 1);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Format seconds as M:SS or H:MM:SS */
|
|
49
|
+
function formatDuration(totalSeconds: number): string {
|
|
50
|
+
const hours = Math.floor(totalSeconds / 3600);
|
|
51
|
+
const minutes = Math.floor((totalSeconds % 3600) / 60);
|
|
52
|
+
const seconds = Math.round(totalSeconds % 60);
|
|
53
|
+
|
|
54
|
+
if (hours > 0) {
|
|
55
|
+
return `${hours}:${String(minutes).padStart(2, "0")}:${String(seconds).padStart(2, "0")}`;
|
|
56
|
+
}
|
|
57
|
+
return `${minutes}:${String(seconds).padStart(2, "0")}`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Calculate speaker statistics from a parsed transcript. */
|
|
61
|
+
export function calculateSpeakerStats(transcript: ParsedTranscript): MeetingAnalytics {
|
|
62
|
+
const speakerMap = new Map<string, { talkTimeSeconds: number; wordCount: number }>();
|
|
63
|
+
|
|
64
|
+
for (let i = 0; i < transcript.lines.length; i++) {
|
|
65
|
+
const line = transcript.lines[i]!;
|
|
66
|
+
const duration = estimateLineDuration(transcript.lines, i);
|
|
67
|
+
const wordCount = line.text.split(/\s+/).length;
|
|
68
|
+
|
|
69
|
+
const existing = speakerMap.get(line.speaker) ?? { talkTimeSeconds: 0, wordCount: 0 };
|
|
70
|
+
existing.talkTimeSeconds += duration;
|
|
71
|
+
existing.wordCount += wordCount;
|
|
72
|
+
speakerMap.set(line.speaker, existing);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Calculate total talk time for percentage calculation
|
|
76
|
+
let totalTalkTime = 0;
|
|
77
|
+
for (const stats of speakerMap.values()) {
|
|
78
|
+
totalTalkTime += stats.talkTimeSeconds;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Use the greater of transcript duration or total talk time for meeting duration
|
|
82
|
+
const totalDuration = Math.max(transcript.durationSeconds, totalTalkTime);
|
|
83
|
+
|
|
84
|
+
const speakers: SpeakerStats[] = [];
|
|
85
|
+
for (const [speaker, stats] of speakerMap.entries()) {
|
|
86
|
+
speakers.push({
|
|
87
|
+
speaker,
|
|
88
|
+
talkTimeSeconds: stats.talkTimeSeconds,
|
|
89
|
+
talkTimeFormatted: formatDuration(stats.talkTimeSeconds),
|
|
90
|
+
percentage: totalDuration > 0 ? Math.round((stats.talkTimeSeconds / totalDuration) * 100) : 0,
|
|
91
|
+
wordCount: stats.wordCount,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Sort by talk time descending
|
|
96
|
+
speakers.sort((a, b) => b.talkTimeSeconds - a.talkTimeSeconds);
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
speakers,
|
|
100
|
+
totalDurationSeconds: totalDuration,
|
|
101
|
+
totalDurationFormatted: formatDuration(totalDuration),
|
|
102
|
+
participantCount: speakers.length,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* capture.ts — PulseAudio audio capture via ffmpeg
|
|
3
|
+
*
|
|
4
|
+
* Sets up a PulseAudio virtual sink for isolating browser audio,
|
|
5
|
+
* then uses ffmpeg to capture audio from the sink monitor into
|
|
6
|
+
* WAV chunks suitable for Whisper transcription.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { spawn, execSync, type ChildProcess } from "node:child_process";
|
|
10
|
+
import { existsSync, mkdirSync, readdirSync, unlinkSync, rmSync } from "node:fs";
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
import { tmpdir } from "node:os";
|
|
13
|
+
|
|
14
|
+
export interface AudioCaptureOptions {
|
|
15
|
+
sinkName: string;
|
|
16
|
+
chunkDurationSec?: number; // default 30
|
|
17
|
+
sampleRate?: number; // default 16000
|
|
18
|
+
outputDir?: string; // default auto-created temp dir
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface AudioCapture {
|
|
22
|
+
/** The PulseAudio sink name (set PULSE_SINK to this before launching browser) */
|
|
23
|
+
sinkName: string;
|
|
24
|
+
/** Directory where WAV chunks are written */
|
|
25
|
+
outputDir: string;
|
|
26
|
+
/** PulseAudio module index (for unloading) */
|
|
27
|
+
moduleIndex: string;
|
|
28
|
+
/** Returns list of completed chunk file paths (not currently being written) */
|
|
29
|
+
getCompletedChunks(): string[];
|
|
30
|
+
/** Stops ffmpeg and cleans up PulseAudio sink */
|
|
31
|
+
stop(): void;
|
|
32
|
+
/** Cleans up temp files */
|
|
33
|
+
cleanup(): void;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Check if a command is available on PATH */
|
|
37
|
+
function commandExists(cmd: string): boolean {
|
|
38
|
+
try {
|
|
39
|
+
execSync(`which ${cmd}`, { stdio: "ignore" });
|
|
40
|
+
return true;
|
|
41
|
+
} catch {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Check if PulseAudio and ffmpeg are available */
|
|
47
|
+
export function checkAudioDeps(): { available: boolean; missing: string[] } {
|
|
48
|
+
const missing: string[] = [];
|
|
49
|
+
if (!commandExists("pulseaudio") && !commandExists("pactl")) missing.push("pulseaudio");
|
|
50
|
+
if (!commandExists("ffmpeg")) missing.push("ffmpeg");
|
|
51
|
+
return { available: missing.length === 0, missing };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Start PulseAudio if not already running */
|
|
55
|
+
function ensurePulseAudio(): void {
|
|
56
|
+
try {
|
|
57
|
+
execSync("pactl info", { stdio: "ignore", timeout: 5000 });
|
|
58
|
+
} catch {
|
|
59
|
+
// PulseAudio not running, start it
|
|
60
|
+
try {
|
|
61
|
+
execSync("pulseaudio --start --exit-idle-time=-1", { stdio: "ignore", timeout: 10000 });
|
|
62
|
+
// Wait a moment for it to be ready
|
|
63
|
+
execSync("sleep 1 && pactl info", { stdio: "ignore", timeout: 10000 });
|
|
64
|
+
} catch (err) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`Failed to start PulseAudio: ${err instanceof Error ? err.message : String(err)}`,
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Create a PulseAudio null sink and return the module index */
|
|
73
|
+
function createSink(sinkName: string): string {
|
|
74
|
+
try {
|
|
75
|
+
const result = execSync(
|
|
76
|
+
`pactl load-module module-null-sink sink_name=${sinkName} sink_properties=device.description=OpenBuilderSink`,
|
|
77
|
+
{ encoding: "utf-8", timeout: 5000 },
|
|
78
|
+
).trim();
|
|
79
|
+
return result;
|
|
80
|
+
} catch (err) {
|
|
81
|
+
throw new Error(
|
|
82
|
+
`Failed to create PulseAudio sink "${sinkName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** Remove a PulseAudio module by index */
|
|
88
|
+
function removeSink(moduleIndex: string): void {
|
|
89
|
+
try {
|
|
90
|
+
execSync(`pactl unload-module ${moduleIndex}`, { stdio: "ignore", timeout: 5000 });
|
|
91
|
+
} catch {
|
|
92
|
+
// Best-effort cleanup
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Create a PulseAudio pipe sink and return the module index and pipe path */
|
|
97
|
+
function createPipeSink(sinkName: string): { moduleIndex: string; pipePath: string } {
|
|
98
|
+
const pipePath = `/tmp/${sinkName}-audio-pipe`;
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
// Create FIFO pipe
|
|
102
|
+
execSync(`mkfifo ${pipePath}`, { timeout: 5000 });
|
|
103
|
+
|
|
104
|
+
// Load module-pipe-sink
|
|
105
|
+
const result = execSync(
|
|
106
|
+
`pactl load-module module-pipe-sink file=${pipePath} sink_name=${sinkName} format=s16le rate=16000 channels=1 sink_properties=device.description=OpenBuilderPipeSink`,
|
|
107
|
+
{ encoding: "utf-8", timeout: 5000 },
|
|
108
|
+
).trim();
|
|
109
|
+
|
|
110
|
+
// Set as default sink
|
|
111
|
+
execSync(`pactl set-default-sink ${sinkName}`, { timeout: 5000 });
|
|
112
|
+
|
|
113
|
+
return { moduleIndex: result, pipePath };
|
|
114
|
+
} catch (err) {
|
|
115
|
+
// Clean up pipe if it was created
|
|
116
|
+
try {
|
|
117
|
+
unlinkSync(pipePath);
|
|
118
|
+
} catch {}
|
|
119
|
+
throw new Error(
|
|
120
|
+
`Failed to create PulseAudio pipe sink "${sinkName}": ${err instanceof Error ? err.message : String(err)}`,
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Test if pipe-sink approach works by playing a test sound and checking if data flows */
|
|
126
|
+
async function testPipeSink(pipePath: string, sampleRate = 16000): Promise<boolean> {
|
|
127
|
+
return new Promise((resolve) => {
|
|
128
|
+
let resolved = false;
|
|
129
|
+
const timeout = setTimeout(() => {
|
|
130
|
+
if (!resolved) {
|
|
131
|
+
resolved = true;
|
|
132
|
+
resolve(false);
|
|
133
|
+
}
|
|
134
|
+
}, 10000); // 10 second timeout
|
|
135
|
+
|
|
136
|
+
// Spawn ffmpeg to read from pipe - it should receive data if pipe works
|
|
137
|
+
const ffmpeg = spawn("ffmpeg", [
|
|
138
|
+
"-f", "s16le",
|
|
139
|
+
"-ar", String(sampleRate),
|
|
140
|
+
"-ac", "1",
|
|
141
|
+
"-i", pipePath,
|
|
142
|
+
"-t", "2", // Only capture for 2 seconds
|
|
143
|
+
"-f", "null",
|
|
144
|
+
"-"
|
|
145
|
+
], { stdio: ["ignore", "pipe", "pipe"] });
|
|
146
|
+
|
|
147
|
+
// Generate test audio in background
|
|
148
|
+
setTimeout(() => {
|
|
149
|
+
try {
|
|
150
|
+
execSync(`pactl play-sample bell`, { timeout: 3000, stdio: "ignore" });
|
|
151
|
+
} catch {
|
|
152
|
+
// Try alternative test sound
|
|
153
|
+
try {
|
|
154
|
+
execSync(`speaker-test -t sine -f 440 -l 1 -s 1 2>/dev/null`, { timeout: 3000, stdio: "ignore" });
|
|
155
|
+
} catch {
|
|
156
|
+
// No test sound available, but ffmpeg might still detect silence vs no pipe
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}, 1000);
|
|
160
|
+
|
|
161
|
+
ffmpeg.on("exit", (code) => {
|
|
162
|
+
clearTimeout(timeout);
|
|
163
|
+
if (!resolved) {
|
|
164
|
+
resolved = true;
|
|
165
|
+
// If ffmpeg exits normally (even if no audio), pipe is working
|
|
166
|
+
resolve(code !== null);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
ffmpeg.on("error", () => {
|
|
171
|
+
clearTimeout(timeout);
|
|
172
|
+
if (!resolved) {
|
|
173
|
+
resolved = true;
|
|
174
|
+
resolve(false);
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Start audio capture from PulseAudio using pipe-sink approach with null-sink fallback.
|
|
182
|
+
*
|
|
183
|
+
* ffmpeg writes segmented WAV files (chunk_000.wav, chunk_001.wav, ...)
|
|
184
|
+
* of `chunkDurationSec` seconds each.
|
|
185
|
+
*/
|
|
186
|
+
export function startAudioCapture(options: AudioCaptureOptions): AudioCapture {
|
|
187
|
+
const {
|
|
188
|
+
sinkName,
|
|
189
|
+
chunkDurationSec = 30,
|
|
190
|
+
sampleRate = 16000,
|
|
191
|
+
outputDir: providedDir,
|
|
192
|
+
} = options;
|
|
193
|
+
|
|
194
|
+
// Ensure deps
|
|
195
|
+
const deps = checkAudioDeps();
|
|
196
|
+
if (!deps.available) {
|
|
197
|
+
throw new Error(`Missing audio dependencies: ${deps.missing.join(", ")}`);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
// Start PulseAudio
|
|
201
|
+
ensurePulseAudio();
|
|
202
|
+
|
|
203
|
+
// Create output directory
|
|
204
|
+
const outputDir = providedDir ?? join(tmpdir(), `openbuilder-audio-${sinkName}-${Date.now()}`);
|
|
205
|
+
mkdirSync(outputDir, { recursive: true });
|
|
206
|
+
|
|
207
|
+
let moduleIndex: string;
|
|
208
|
+
let pipePath: string | null = null;
|
|
209
|
+
let ffmpeg: ChildProcess;
|
|
210
|
+
let audioMethod: "pipe-sink" | "null-sink";
|
|
211
|
+
|
|
212
|
+
// Try pipe-sink approach first
|
|
213
|
+
try {
|
|
214
|
+
const pipeResult = createPipeSink(sinkName);
|
|
215
|
+
moduleIndex = pipeResult.moduleIndex;
|
|
216
|
+
pipePath = pipeResult.pipePath;
|
|
217
|
+
|
|
218
|
+
console.log(` PulseAudio pipe-sink created: ${sinkName} (module ${moduleIndex})`);
|
|
219
|
+
|
|
220
|
+
// Start ffmpeg reading from the pipe
|
|
221
|
+
const chunkPattern = join(outputDir, "chunk_%03d.wav");
|
|
222
|
+
ffmpeg = spawn(
|
|
223
|
+
"ffmpeg",
|
|
224
|
+
[
|
|
225
|
+
"-f", "s16le",
|
|
226
|
+
"-ar", String(sampleRate),
|
|
227
|
+
"-ac", "1",
|
|
228
|
+
"-i", pipePath,
|
|
229
|
+
"-f", "segment",
|
|
230
|
+
"-segment_time", String(chunkDurationSec),
|
|
231
|
+
"-reset_timestamps", "1",
|
|
232
|
+
chunkPattern,
|
|
233
|
+
],
|
|
234
|
+
{
|
|
235
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
236
|
+
},
|
|
237
|
+
);
|
|
238
|
+
|
|
239
|
+
audioMethod = "pipe-sink";
|
|
240
|
+
console.log(` Using pipe-sink method with ffmpeg reading from ${pipePath}`);
|
|
241
|
+
|
|
242
|
+
} catch (pipeErr) {
|
|
243
|
+
console.log(` Pipe-sink failed: ${pipeErr instanceof Error ? pipeErr.message : String(pipeErr)}`);
|
|
244
|
+
console.log(` Falling back to null-sink method...`);
|
|
245
|
+
|
|
246
|
+
try {
|
|
247
|
+
// Fallback to null-sink approach (original method)
|
|
248
|
+
moduleIndex = createSink(sinkName);
|
|
249
|
+
pipePath = null;
|
|
250
|
+
console.log(` PulseAudio null-sink created: ${sinkName} (module ${moduleIndex})`);
|
|
251
|
+
|
|
252
|
+
// Start ffmpeg capturing from the sink monitor
|
|
253
|
+
const monitorSource = `${sinkName}.monitor`;
|
|
254
|
+
const chunkPattern = join(outputDir, "chunk_%03d.wav");
|
|
255
|
+
|
|
256
|
+
ffmpeg = spawn(
|
|
257
|
+
"ffmpeg",
|
|
258
|
+
[
|
|
259
|
+
"-f", "pulse",
|
|
260
|
+
"-i", monitorSource,
|
|
261
|
+
"-ac", "1",
|
|
262
|
+
"-ar", String(sampleRate),
|
|
263
|
+
"-f", "segment",
|
|
264
|
+
"-segment_time", String(chunkDurationSec),
|
|
265
|
+
"-reset_timestamps", "1",
|
|
266
|
+
chunkPattern,
|
|
267
|
+
],
|
|
268
|
+
{
|
|
269
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
270
|
+
},
|
|
271
|
+
);
|
|
272
|
+
|
|
273
|
+
audioMethod = "null-sink";
|
|
274
|
+
console.log(` Using null-sink method with PulseAudio monitor ${monitorSource}`);
|
|
275
|
+
|
|
276
|
+
} catch (nullErr) {
|
|
277
|
+
throw new Error(
|
|
278
|
+
`Both pipe-sink and null-sink methods failed. Pipe: ${pipeErr instanceof Error ? pipeErr.message : String(pipeErr)}. Null: ${nullErr instanceof Error ? nullErr.message : String(nullErr)}`,
|
|
279
|
+
);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
let ffmpegRunning = true;
|
|
284
|
+
|
|
285
|
+
ffmpeg.on("exit", (code) => {
|
|
286
|
+
ffmpegRunning = false;
|
|
287
|
+
if (code && code !== 255) {
|
|
288
|
+
console.error(` ffmpeg exited with code ${code}`);
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
ffmpeg.on("error", (err) => {
|
|
293
|
+
ffmpegRunning = false;
|
|
294
|
+
console.error(` ffmpeg error: ${err.message}`);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
// Track which chunk is currently being written
|
|
298
|
+
// ffmpeg writes chunk_NNN.wav sequentially; the latest one is in-progress
|
|
299
|
+
let lastSeenChunks = new Set<string>();
|
|
300
|
+
|
|
301
|
+
const getCompletedChunks = (): string[] => {
|
|
302
|
+
if (!existsSync(outputDir)) return [];
|
|
303
|
+
|
|
304
|
+
const allChunks = readdirSync(outputDir)
|
|
305
|
+
.filter((f) => f.startsWith("chunk_") && f.endsWith(".wav"))
|
|
306
|
+
.sort();
|
|
307
|
+
|
|
308
|
+
if (allChunks.length === 0) return [];
|
|
309
|
+
|
|
310
|
+
// The last file is potentially still being written by ffmpeg
|
|
311
|
+
// Only return chunks that are NOT the latest one (unless ffmpeg has stopped)
|
|
312
|
+
if (ffmpegRunning && allChunks.length > 0) {
|
|
313
|
+
const completed = allChunks.slice(0, -1);
|
|
314
|
+
// Filter to only return newly completed (not yet seen)
|
|
315
|
+
const newChunks = completed.filter((c) => !lastSeenChunks.has(c));
|
|
316
|
+
for (const c of newChunks) lastSeenChunks.add(c);
|
|
317
|
+
return newChunks.map((c) => join(outputDir, c));
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// ffmpeg stopped — all chunks are complete
|
|
321
|
+
const newChunks = allChunks.filter((c) => !lastSeenChunks.has(c));
|
|
322
|
+
for (const c of newChunks) lastSeenChunks.add(c);
|
|
323
|
+
return newChunks.map((c) => join(outputDir, c));
|
|
324
|
+
};
|
|
325
|
+
|
|
326
|
+
const stop = (): void => {
|
|
327
|
+
if (ffmpegRunning && ffmpeg.pid) {
|
|
328
|
+
// Send SIGINT for graceful shutdown (ffmpeg finalizes the current segment)
|
|
329
|
+
ffmpeg.kill("SIGINT");
|
|
330
|
+
|
|
331
|
+
// Give ffmpeg a moment to finalize, then force kill if needed
|
|
332
|
+
setTimeout(() => {
|
|
333
|
+
if (ffmpegRunning) {
|
|
334
|
+
ffmpeg.kill("SIGKILL");
|
|
335
|
+
}
|
|
336
|
+
}, 3000);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
removeSink(moduleIndex);
|
|
340
|
+
|
|
341
|
+
// Clean up pipe file if using pipe-sink
|
|
342
|
+
if (pipePath) {
|
|
343
|
+
try {
|
|
344
|
+
unlinkSync(pipePath);
|
|
345
|
+
} catch {
|
|
346
|
+
// Best-effort cleanup
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
ffmpegRunning = false;
|
|
351
|
+
};
|
|
352
|
+
|
|
353
|
+
const cleanup = (): void => {
|
|
354
|
+
try {
|
|
355
|
+
if (existsSync(outputDir)) {
|
|
356
|
+
rmSync(outputDir, { recursive: true, force: true });
|
|
357
|
+
}
|
|
358
|
+
} catch {
|
|
359
|
+
// Best-effort
|
|
360
|
+
}
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
console.log(` ffmpeg capturing audio → ${outputDir}`);
|
|
364
|
+
console.log(` Method: ${audioMethod}, Chunk duration: ${chunkDurationSec}s, Sample rate: ${sampleRate}Hz`);
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
sinkName,
|
|
368
|
+
outputDir,
|
|
369
|
+
moduleIndex,
|
|
370
|
+
getCompletedChunks,
|
|
371
|
+
stop,
|
|
372
|
+
cleanup,
|
|
373
|
+
};
|
|
374
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline.ts — Audio capture + transcription pipeline
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates PulseAudio audio capture and Whisper transcription
|
|
5
|
+
* to produce a real-time transcript file in the same format as
|
|
6
|
+
* the caption scraping approach: [HH:MM:SS] Speaker: text
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { appendFileSync, writeFileSync } from "node:fs";
|
|
10
|
+
import { startAudioCapture, checkAudioDeps, type AudioCapture } from "./capture.js";
|
|
11
|
+
import { Transcriber, type TranscriptionResult } from "./transcriber.js";
|
|
12
|
+
|
|
13
|
+
export interface AudioPipelineOptions {
|
|
14
|
+
/** Unique sink name for this meeting (e.g. "openbuilder_abc-defg-hij") */
|
|
15
|
+
sinkName: string;
|
|
16
|
+
/** Path to write the transcript file */
|
|
17
|
+
transcriptPath: string;
|
|
18
|
+
/** OpenAI API key for Whisper */
|
|
19
|
+
apiKey?: string;
|
|
20
|
+
/** Whisper model (default "whisper-1") */
|
|
21
|
+
whisperModel?: string;
|
|
22
|
+
/** Audio chunk duration in seconds (default 30) */
|
|
23
|
+
chunkDurationSec?: number;
|
|
24
|
+
/** Whether to print transcript lines to stdout */
|
|
25
|
+
verbose?: boolean;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface AudioPipeline {
|
|
29
|
+
/** Stop the pipeline and finalize transcript */
|
|
30
|
+
stop(): Promise<void>;
|
|
31
|
+
/** Clean up temp files */
|
|
32
|
+
cleanup(): void;
|
|
33
|
+
/** Returns timestamp of last transcribed text (for idle detection) */
|
|
34
|
+
getLastTranscriptAt(): number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Check whether audio capture mode is available on this system.
|
|
39
|
+
* Returns { available, missing } where missing lists absent dependencies.
|
|
40
|
+
*/
|
|
41
|
+
export function isAudioCaptureAvailable(): { available: boolean; missing: string[] } {
|
|
42
|
+
return checkAudioDeps();
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Start the audio capture + transcription pipeline.
|
|
47
|
+
*
|
|
48
|
+
* This function:
|
|
49
|
+
* 1. Creates a PulseAudio sink for browser audio isolation
|
|
50
|
+
* 2. Starts ffmpeg to capture audio into WAV chunks
|
|
51
|
+
* 3. Polls for completed chunks and sends them to Whisper
|
|
52
|
+
* 4. Writes transcription results to the transcript file
|
|
53
|
+
*
|
|
54
|
+
* The PULSE_SINK env var must be set to `sinkName` BEFORE launching
|
|
55
|
+
* the browser, so browser audio is routed to our sink.
|
|
56
|
+
*/
|
|
57
|
+
export async function startAudioPipeline(
|
|
58
|
+
options: AudioPipelineOptions,
|
|
59
|
+
): Promise<AudioPipeline> {
|
|
60
|
+
const {
|
|
61
|
+
sinkName,
|
|
62
|
+
transcriptPath,
|
|
63
|
+
apiKey,
|
|
64
|
+
whisperModel,
|
|
65
|
+
chunkDurationSec = 30,
|
|
66
|
+
verbose = false,
|
|
67
|
+
} = options;
|
|
68
|
+
|
|
69
|
+
// Initialize transcript file
|
|
70
|
+
writeFileSync(transcriptPath, "");
|
|
71
|
+
|
|
72
|
+
// Start audio capture
|
|
73
|
+
const capture: AudioCapture = startAudioCapture({
|
|
74
|
+
sinkName,
|
|
75
|
+
chunkDurationSec,
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// Initialize transcriber
|
|
79
|
+
const transcriber = new Transcriber({
|
|
80
|
+
apiKey,
|
|
81
|
+
model: whisperModel,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
let lastTranscriptAt = Date.now();
|
|
85
|
+
let running = true;
|
|
86
|
+
let lastMinuteKey = "";
|
|
87
|
+
let chunkIndex = 0;
|
|
88
|
+
|
|
89
|
+
// Meeting start time — used to calculate absolute timestamps
|
|
90
|
+
const meetingStartTime = Date.now();
|
|
91
|
+
|
|
92
|
+
const writeTranscriptLine = (
|
|
93
|
+
offsetSec: number,
|
|
94
|
+
text: string,
|
|
95
|
+
): void => {
|
|
96
|
+
// Convert chunk offset to absolute time
|
|
97
|
+
const absoluteMs = meetingStartTime + offsetSec * 1000;
|
|
98
|
+
const d = new Date(absoluteMs);
|
|
99
|
+
const hh = String(d.getHours()).padStart(2, "0");
|
|
100
|
+
const mm = String(d.getMinutes()).padStart(2, "0");
|
|
101
|
+
const ss = String(d.getSeconds()).padStart(2, "0");
|
|
102
|
+
const minuteKey = `${hh}:${mm}`;
|
|
103
|
+
|
|
104
|
+
let prefix = "";
|
|
105
|
+
if (lastMinuteKey && minuteKey !== lastMinuteKey) {
|
|
106
|
+
prefix = "\n";
|
|
107
|
+
}
|
|
108
|
+
lastMinuteKey = minuteKey;
|
|
109
|
+
|
|
110
|
+
// MVP: no speaker diarization, use "Speaker" as default
|
|
111
|
+
const line = `[${hh}:${mm}:${ss}] Speaker: ${text}`;
|
|
112
|
+
try {
|
|
113
|
+
appendFileSync(transcriptPath, `${prefix}${line}\n`);
|
|
114
|
+
} catch {
|
|
115
|
+
// Ignore write errors
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
lastTranscriptAt = Date.now();
|
|
119
|
+
|
|
120
|
+
if (verbose) {
|
|
121
|
+
console.log(` [audio] ${line}`);
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const processChunk = async (chunkPath: string): Promise<void> => {
|
|
126
|
+
const chunkStartSec = chunkIndex * chunkDurationSec;
|
|
127
|
+
chunkIndex++;
|
|
128
|
+
|
|
129
|
+
try {
|
|
130
|
+
const result: TranscriptionResult = await transcriber.transcribeChunk(chunkPath);
|
|
131
|
+
|
|
132
|
+
if (!result.text) return;
|
|
133
|
+
|
|
134
|
+
// Use segment timestamps if available, otherwise write as single block
|
|
135
|
+
if (result.segments.length > 0) {
|
|
136
|
+
for (const seg of result.segments) {
|
|
137
|
+
if (seg.text) {
|
|
138
|
+
writeTranscriptLine(chunkStartSec + seg.start, seg.text);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
} else {
|
|
142
|
+
writeTranscriptLine(chunkStartSec, result.text);
|
|
143
|
+
}
|
|
144
|
+
} catch (err) {
|
|
145
|
+
console.error(
|
|
146
|
+
` Transcription error (chunk ${chunkIndex}): ${err instanceof Error ? err.message : String(err)}`,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
// Polling loop: check for new completed chunks and transcribe them
|
|
152
|
+
const pollInterval = setInterval(async () => {
|
|
153
|
+
if (!running) return;
|
|
154
|
+
|
|
155
|
+
const chunks = capture.getCompletedChunks();
|
|
156
|
+
for (const chunkPath of chunks) {
|
|
157
|
+
await processChunk(chunkPath);
|
|
158
|
+
}
|
|
159
|
+
}, 5000); // Check every 5 seconds
|
|
160
|
+
|
|
161
|
+
const stop = async (): Promise<void> => {
|
|
162
|
+
running = false;
|
|
163
|
+
clearInterval(pollInterval);
|
|
164
|
+
|
|
165
|
+
// Stop ffmpeg (this finalizes the last chunk)
|
|
166
|
+
capture.stop();
|
|
167
|
+
|
|
168
|
+
// Wait a moment for the last chunk to be finalized
|
|
169
|
+
await new Promise((r) => setTimeout(r, 1000));
|
|
170
|
+
|
|
171
|
+
// Process any remaining chunks
|
|
172
|
+
const remaining = capture.getCompletedChunks();
|
|
173
|
+
for (const chunkPath of remaining) {
|
|
174
|
+
await processChunk(chunkPath);
|
|
175
|
+
}
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
const cleanup = (): void => {
|
|
179
|
+
capture.cleanup();
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
console.log(` Audio pipeline started (sink: ${sinkName})`);
|
|
183
|
+
|
|
184
|
+
return {
|
|
185
|
+
stop,
|
|
186
|
+
cleanup,
|
|
187
|
+
getLastTranscriptAt: () => lastTranscriptAt,
|
|
188
|
+
};
|
|
189
|
+
}
|