@acpfx/recorder 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,21 @@
1
+ # @acpfx/recorder
2
+
3
+ ## 0.2.0
4
+
5
+ ### Minor Changes
6
+
7
+ - d757640: Initial release: type-safe contracts, Rust orchestrator, manifest-driven event filtering
8
+
9
+ - Rust schema crate as canonical event type source of truth with codegen to TypeScript + Zod
10
+ - Node manifests (manifest.yaml) declaring consumes/emits contracts
11
+ - Orchestrator event filtering: nodes only receive declared events
12
+ - Rust orchestrator with ratatui TUI (--ui flag)
13
+ - node-sdk with structured logging helpers
14
+ - CI/CD with GitHub Actions and changesets
15
+ - Platform-specific npm packages for Rust binaries (esbuild-style distribution)
16
+
17
+ ### Patch Changes
18
+
19
+ - Updated dependencies [d757640]
20
+ - @acpfx/core@0.2.0
21
+ - @acpfx/node-sdk@0.2.0
package/manifest.yaml ADDED
@@ -0,0 +1,26 @@
1
+ name: recorder
2
+ # NOTE: Source has hardcoded _from === "mic"/"tts" checks (Phase 2 cleanup needed)
3
+ description: Records all events to JSONL and audio tracks to WAV files
4
+ consumes:
5
+ - audio.chunk
6
+ - audio.level
7
+ - speech.partial
8
+ - speech.delta
9
+ - speech.final
10
+ - speech.pause
11
+ - agent.submit
12
+ - agent.delta
13
+ - agent.complete
14
+ - agent.thinking
15
+ - agent.tool_start
16
+ - agent.tool_done
17
+ - control.interrupt
18
+ - control.state
19
+ - control.error
20
+ - lifecycle.ready
21
+ - lifecycle.done
22
+ - log
23
+ - player.status
24
+ emits:
25
+ - lifecycle.ready
26
+ - lifecycle.done
package/package.json ADDED
@@ -0,0 +1,16 @@
1
+ {
2
+ "name": "@acpfx/recorder",
3
+ "version": "0.2.0",
4
+ "type": "module",
5
+ "bin": {
6
+ "acpfx-recorder": "./dist/index.js"
7
+ },
8
+ "main": "./dist/index.js",
9
+ "dependencies": {
10
+ "@acpfx/core": "0.2.0",
11
+ "@acpfx/node-sdk": "0.2.0"
12
+ },
13
+ "scripts": {
14
+ "build": "esbuild src/index.ts --bundle --platform=node --format=esm --outfile=dist/index.js --packages=external"
15
+ }
16
+ }
package/src/index.ts ADDED
@@ -0,0 +1,361 @@
1
+ /**
2
+ * recorder node — captures all events to events.jsonl, writes audio tracks
3
+ * to WAV files, generates conversation.wav and timeline.html.
4
+ *
5
+ * Settings (via ACPFX_SETTINGS):
6
+ * outputDir?: string — output directory (default: ./recordings/<run-id>)
7
+ */
8
+
9
+ import {
10
+ mkdirSync,
11
+ createWriteStream,
12
+ writeFileSync,
13
+ readFileSync,
14
+ type WriteStream,
15
+ } from "node:fs";
16
+ import { open } from "node:fs/promises";
17
+ import { join, resolve } from "node:path";
18
+ import { randomUUID } from "node:crypto";
19
+ import { emit, log, onEvent, handleManifestFlag } from "@acpfx/node-sdk";
20
+
21
+ handleManifestFlag();
22
+
23
+ type Settings = {
24
+ outputDir?: string;
25
+ };
26
+
27
+ const settings: Settings = JSON.parse(process.env.ACPFX_SETTINGS || "{}");
28
+ const RUN_ID = randomUUID().slice(0, 8);
29
+ const OUTPUT_DIR = resolve(settings.outputDir ?? "./recordings", RUN_ID);
30
+
31
+ const SAMPLE_RATE = 16000;
32
+ const CHANNELS = 1;
33
+ const BYTES_PER_SAMPLE = 2;
34
+
35
+ // State
36
+ let eventsStream: WriteStream;
37
+ let startTime = Date.now();
38
+ const allEvents: Array<Record<string, unknown>> = [];
39
+
40
+ // Audio track writers
41
+ type TrackWriter = {
42
+ stream: WriteStream;
43
+ path: string;
44
+ bytesWritten: number;
45
+ };
46
+ const tracks = new Map<string, TrackWriter>();
47
+
48
+
49
+ function createWavHeader(dataSize: number, sr: number, ch: number): Buffer {
50
+ const bitsPerSample = 16;
51
+ const byteRate = (sr * ch * bitsPerSample) / 8;
52
+ const blockAlign = (ch * bitsPerSample) / 8;
53
+ const header = Buffer.alloc(44);
54
+ let off = 0;
55
+ header.write("RIFF", off); off += 4;
56
+ header.writeUInt32LE(dataSize + 36, off); off += 4;
57
+ header.write("WAVE", off); off += 4;
58
+ header.write("fmt ", off); off += 4;
59
+ header.writeUInt32LE(16, off); off += 4;
60
+ header.writeUInt16LE(1, off); off += 2;
61
+ header.writeUInt16LE(ch, off); off += 2;
62
+ header.writeUInt32LE(sr, off); off += 4;
63
+ header.writeUInt32LE(byteRate, off); off += 4;
64
+ header.writeUInt16LE(blockAlign, off); off += 2;
65
+ header.writeUInt16LE(bitsPerSample, off); off += 2;
66
+ header.write("data", off); off += 4;
67
+ header.writeUInt32LE(dataSize, off);
68
+ return header;
69
+ }
70
+
71
+ function getOrCreateTrack(trackId: string): TrackWriter {
72
+ let tw = tracks.get(trackId);
73
+ if (tw) return tw;
74
+
75
+ const filename = `${trackId}.wav`;
76
+ const path = join(OUTPUT_DIR, filename);
77
+ const stream = createWriteStream(path);
78
+ // Write placeholder header
79
+ stream.write(Buffer.alloc(44));
80
+ tw = { stream, path, bytesWritten: 0 };
81
+ tracks.set(trackId, tw);
82
+ return tw;
83
+ }
84
+
85
+ async function finalizeTrack(tw: TrackWriter): Promise<void> {
86
+ await new Promise<void>((res, rej) => {
87
+ tw.stream.end(() => res());
88
+ tw.stream.on("error", rej);
89
+ });
90
+
91
+ const header = createWavHeader(tw.bytesWritten, SAMPLE_RATE, CHANNELS);
92
+ const fd = await open(tw.path, "r+");
93
+ await fd.write(header, 0, header.length, 0);
94
+ await fd.close();
95
+ }
96
+
97
+ function generateConversationWav(): void {
98
+ // Merge input (mic) and output (tts) tracks into a single timeline WAV.
99
+ // We place them sequentially: input audio, then a gap, then output audio.
100
+ // Timeline positions come from event timestamps.
101
+
102
+ const micTrack = tracks.get("mic");
103
+ const ttsTrack = tracks.get("tts");
104
+ if (!micTrack && !ttsTrack) return;
105
+
106
+ // Find the first and last audio chunk timestamps for each track
107
+ let micStartMs = Infinity, micEndMs = 0;
108
+ let ttsStartMs = Infinity, ttsEndMs = 0;
109
+
110
+ for (const ev of allEvents) {
111
+ if (ev.type === "audio.chunk") {
112
+ const ts = (ev.ts as number) ?? 0;
113
+ const dur = (ev.durationMs as number) ?? 0;
114
+ if (ev.trackId === "mic" || ev._from === "mic") {
115
+ micStartMs = Math.min(micStartMs, ts);
116
+ micEndMs = Math.max(micEndMs, ts + dur);
117
+ }
118
+ if (ev.trackId === "tts" || ev._from === "tts") {
119
+ ttsStartMs = Math.min(ttsStartMs, ts);
120
+ ttsEndMs = Math.max(ttsEndMs, ts + dur);
121
+ }
122
+ }
123
+ }
124
+
125
+ // Calculate total duration and offsets relative to the earliest timestamp
126
+ const globalStart = Math.min(
127
+ micStartMs === Infinity ? Infinity : micStartMs,
128
+ ttsStartMs === Infinity ? Infinity : ttsStartMs,
129
+ );
130
+ if (globalStart === Infinity) return;
131
+
132
+ const globalEnd = Math.max(micEndMs, ttsEndMs);
133
+ const totalDurationMs = globalEnd - globalStart;
134
+ const totalSamples = Math.ceil((totalDurationMs / 1000) * SAMPLE_RATE);
135
+ const totalBytes = totalSamples * CHANNELS * BYTES_PER_SAMPLE;
136
+
137
+ // Create a silent buffer for the full duration
138
+ const pcm = Buffer.alloc(totalBytes);
139
+
140
+ // Write mic audio at correct timeline position
141
+ for (const ev of allEvents) {
142
+ if (ev.type !== "audio.chunk") continue;
143
+ const ts = (ev.ts as number) ?? 0;
144
+ const trackId = (ev.trackId as string) ?? (ev._from as string) ?? "";
145
+ if (trackId !== "mic" && trackId !== "tts") continue;
146
+
147
+ const offsetMs = ts - globalStart;
148
+ const offsetSamples = Math.floor((offsetMs / 1000) * SAMPLE_RATE);
149
+ const offsetBytes = offsetSamples * CHANNELS * BYTES_PER_SAMPLE;
150
+ const data = Buffer.from((ev.data as string) ?? "", "base64");
151
+
152
+ // Mix: add samples (clamped to int16 range)
153
+ for (let i = 0; i < data.length && offsetBytes + i + 1 < pcm.length; i += 2) {
154
+ const existing = pcm.readInt16LE(offsetBytes + i);
155
+ const incoming = data.readInt16LE(i);
156
+ const mixed = Math.max(-32768, Math.min(32767, existing + incoming));
157
+ pcm.writeInt16LE(mixed, offsetBytes + i);
158
+ }
159
+ }
160
+
161
+ const convPath = join(OUTPUT_DIR, "conversation.wav");
162
+ const header = createWavHeader(pcm.length, SAMPLE_RATE, CHANNELS);
163
+ writeFileSync(convPath, Buffer.concat([header, pcm]));
164
+ log.info(`Wrote conversation.wav (${totalDurationMs}ms)`);
165
+ }
166
+
167
+ function generateTimelineHtml(): void {
168
+ // Read WAV files as base64 for embedding
169
+ let inputWavB64 = "";
170
+ let outputWavB64 = "";
171
+ const micPath = join(OUTPUT_DIR, "mic.wav");
172
+ const ttsPath = join(OUTPUT_DIR, "tts.wav");
173
+ try { inputWavB64 = readFileSync(micPath).toString("base64"); } catch {}
174
+ try { outputWavB64 = readFileSync(ttsPath).toString("base64"); } catch {}
175
+
176
+ // Prepare event markers
177
+ const markers = allEvents
178
+ .filter((ev) => {
179
+ const t = ev.type as string;
180
+ return (
181
+ t === "speech.partial" ||
182
+ t === "speech.delta" ||
183
+ t === "speech.final" ||
184
+ t === "speech.pause" ||
185
+ t === "agent.submit" ||
186
+ t === "agent.delta" ||
187
+ t === "agent.complete" ||
188
+ t === "control.interrupt"
189
+ );
190
+ })
191
+ .map((ev) => ({
192
+ time: ((ev.ts as number) - startTime) / 1000,
193
+ type: ev.type,
194
+ text:
195
+ (ev as any).text ??
196
+ (ev as any).delta ??
197
+ (ev as any).pendingText ??
198
+ (ev as any).reason ??
199
+ "",
200
+ }));
201
+
202
+ const html = `<!DOCTYPE html>
203
+ <html lang="en">
204
+ <head>
205
+ <meta charset="UTF-8">
206
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
207
+ <title>acpfx Timeline - ${RUN_ID}</title>
208
+ <script src="https://unpkg.com/wavesurfer.js@7"></script>
209
+ <style>
210
+ * { box-sizing: border-box; margin: 0; padding: 0; }
211
+ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif; background: #1a1a2e; color: #eee; padding: 20px; }
212
+ h1 { font-size: 18px; margin-bottom: 16px; color: #88f; }
213
+ h2 { font-size: 14px; margin: 16px 0 8px; color: #aaa; }
214
+ .track { background: #16213e; border-radius: 8px; padding: 12px; margin-bottom: 12px; }
215
+ .track-label { font-size: 12px; color: #888; margin-bottom: 4px; }
216
+ .markers { max-height: 300px; overflow-y: auto; font-size: 12px; font-family: monospace; }
217
+ .marker { padding: 2px 8px; border-left: 3px solid #444; margin-bottom: 2px; }
218
+ .marker.speech { border-color: #4caf50; }
219
+ .marker.agent { border-color: #2196f3; }
220
+ .marker.control { border-color: #f44336; }
221
+ .marker .time { color: #888; margin-right: 8px; }
222
+ .marker .type { color: #fff; margin-right: 8px; font-weight: bold; }
223
+ .marker .text { color: #ccc; }
224
+ .controls { margin: 12px 0; }
225
+ button { background: #333; color: #eee; border: 1px solid #555; padding: 6px 16px; border-radius: 4px; cursor: pointer; margin-right: 8px; }
226
+ button:hover { background: #444; }
227
+ </style>
228
+ </head>
229
+ <body>
230
+ <h1>acpfx Timeline - Run ${RUN_ID}</h1>
231
+
232
+ <div class="controls">
233
+ <button onclick="playPause()">Play / Pause</button>
234
+ </div>
235
+
236
+ <div class="track">
237
+ <div class="track-label">Input (Mic)</div>
238
+ <div id="input-waveform"></div>
239
+ </div>
240
+
241
+ <div class="track">
242
+ <div class="track-label">Output (TTS)</div>
243
+ <div id="output-waveform"></div>
244
+ </div>
245
+
246
+ <h2>Event Timeline</h2>
247
+ <div class="markers" id="markers"></div>
248
+
249
+ <script>
250
+ const MARKERS = ${JSON.stringify(markers)};
251
+
252
+ // Render markers
253
+ const markersEl = document.getElementById('markers');
254
+ MARKERS.forEach(m => {
255
+ const div = document.createElement('div');
256
+ const cat = m.type.startsWith('speech') ? 'speech' : m.type.startsWith('agent') ? 'agent' : 'control';
257
+ div.className = 'marker ' + cat;
258
+ div.innerHTML = '<span class="time">' + m.time.toFixed(2) + 's</span>'
259
+ + '<span class="type">' + m.type + '</span>'
260
+ + '<span class="text">' + (m.text || '').substring(0, 80) + '</span>';
261
+ markersEl.appendChild(div);
262
+ });
263
+
264
+ // WaveSurfer instances
265
+ let wsInput, wsOutput;
266
+
267
+ ${inputWavB64 ? `
268
+ wsInput = WaveSurfer.create({
269
+ container: '#input-waveform',
270
+ waveColor: '#4caf50',
271
+ progressColor: '#2e7d32',
272
+ height: 80,
273
+ url: 'data:audio/wav;base64,${inputWavB64}',
274
+ });
275
+ ` : `document.getElementById('input-waveform').textContent = 'No input audio recorded';`}
276
+
277
+ ${outputWavB64 ? `
278
+ wsOutput = WaveSurfer.create({
279
+ container: '#output-waveform',
280
+ waveColor: '#2196f3',
281
+ progressColor: '#1565c0',
282
+ height: 80,
283
+ url: 'data:audio/wav;base64,${outputWavB64}',
284
+ });
285
+ ` : `document.getElementById('output-waveform').textContent = 'No output audio recorded';`}
286
+
287
+ function playPause() {
288
+ if (wsInput) wsInput.playPause();
289
+ if (wsOutput) wsOutput.playPause();
290
+ }
291
+ </script>
292
+ </body>
293
+ </html>`;
294
+
295
+ const htmlPath = join(OUTPUT_DIR, "timeline.html");
296
+ writeFileSync(htmlPath, html);
297
+ log.info(`Wrote timeline.html`);
298
+ }
299
+
300
+ async function finalize(): Promise<void> {
301
+ // Close events stream
302
+ if (eventsStream) {
303
+ await new Promise<void>((res) => eventsStream.end(() => res()));
304
+ }
305
+
306
+ // Finalize all audio tracks
307
+ for (const tw of tracks.values()) {
308
+ await finalizeTrack(tw);
309
+ }
310
+
311
+ // Generate conversation.wav
312
+ try {
313
+ generateConversationWav();
314
+ } catch (err) {
315
+ log.error(`Error generating conversation.wav: ${err}`);
316
+ }
317
+
318
+ // Generate timeline.html
319
+ try {
320
+ generateTimelineHtml();
321
+ } catch (err) {
322
+ log.error(`Error generating timeline.html: ${err}`);
323
+ }
324
+
325
+ log.info(`Recording saved to ${OUTPUT_DIR}`);
326
+ }
327
+
328
+ // --- Main ---
329
+
330
+ mkdirSync(OUTPUT_DIR, { recursive: true });
331
+ eventsStream = createWriteStream(join(OUTPUT_DIR, "events.jsonl"));
332
+ startTime = Date.now();
333
+
334
+ emit({ type: "lifecycle.ready", component: "recorder" });
335
+ log.info(`Recording to ${OUTPUT_DIR}`);
336
+
337
+ const rl = onEvent((event) => {
338
+ // Record every event to events.jsonl
339
+ allEvents.push(event);
340
+ eventsStream.write(JSON.stringify(event) + "\n");
341
+
342
+ // Capture audio tracks
343
+ if (event.type === "audio.chunk") {
344
+ const trackId = (event.trackId as string) ?? (event._from as string) ?? "unknown";
345
+ const tw = getOrCreateTrack(trackId);
346
+ const pcm = Buffer.from((event.data as string) ?? "", "base64");
347
+ tw.stream.write(pcm);
348
+ tw.bytesWritten += pcm.length;
349
+ }
350
+ });
351
+
352
+ rl.on("close", () => {
353
+ finalize().then(() => {
354
+ emit({ type: "lifecycle.done", component: "recorder" });
355
+ process.exit(0);
356
+ });
357
+ });
358
+
359
+ process.on("SIGTERM", () => {
360
+ finalize().then(() => process.exit(0));
361
+ });