escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,405 @@
1
+ /**
2
+ * FFmpeg Adapter
3
+ *
4
+ * Handles video manipulation using FFmpeg CLI.
5
+ * Used for extracting screenshots and detecting scene changes.
6
+ */
7
+ import { exec, spawn } from 'node:child_process';
8
+ import { mkdir, readdir, readFile, rm } from 'node:fs/promises';
9
+ import os from 'node:os';
10
+ import path from 'node:path';
11
+ import { promisify } from 'node:util';
12
+ import { debugLog } from './intelligence.ollama.adapter.js';
13
+ const execAsync = promisify(exec);
14
+ // Scene detection configuration (with env var overrides)
15
+ // Lower threshold = more sensitive = more scene changes detected
16
+ // Examples: 0.3 (sensitive), 0.4 (default), 0.5 (conservative)
17
+ const SCENE_THRESHOLD = Number(process.env.ESCRIBANO_SCENE_THRESHOLD) || 0.4;
18
+ // Minimum seconds between detected scene changes
19
+ // Prevents rapid-fire scene changes from generating too many frames
20
+ const SCENE_MIN_INTERVAL = Number(process.env.ESCRIBANO_SCENE_MIN_INTERVAL) || 2;
21
+ /**
22
+ * Creates a VideoService that uses FFmpeg CLI
23
+ */
24
+ export function createFfmpegVideoService() {
25
+ let currentProcess = null;
26
+ return {
27
+ /**
28
+ * Extract frames at specific timestamps.
29
+ * @deprecated Use extractFramesAtTimestampsBatch for parallel extraction with progress logging.
30
+ */
31
+ extractFramesAtTimestamps: async (videoPath, timestamps, outputDir) => {
32
+ await mkdir(outputDir, { recursive: true });
33
+ const outputPaths = [];
34
+ for (const timestamp of timestamps) {
35
+ // Format timestamp for filename (e.g., 123.45 -> 000123_450)
36
+ const seconds = Math.floor(timestamp);
37
+ const ms = Math.floor((timestamp - seconds) * 1000);
38
+ const formattedTime = `${seconds.toString().padStart(6, '0')}_${ms.toString().padStart(3, '0')}`;
39
+ const fileName = `frame_${formattedTime}.jpg`;
40
+ const outputPath = path.join(outputDir, fileName);
41
+ // -ss before -i is significantly faster for large files (input seeking)
42
+ // -vframes 1 ensures we only extract one frame
43
+ const command = `ffmpeg -ss ${timestamp} -i "${videoPath}" -vframes 1 -q:v 2 "${outputPath}" -y`;
44
+ try {
45
+ await execAsync(command);
46
+ outputPaths.push(outputPath);
47
+ }
48
+ catch (error) {
49
+ console.warn(`Failed to extract frame at ${timestamp}s: ${error.message}`);
50
+ // Continue with other timestamps even if one fails
51
+ }
52
+ }
53
+ return outputPaths;
54
+ },
55
+ /**
56
+ * Extract frames at regular intervals.
57
+ * @deprecated Use extractFramesAtTimestampsBatch + calculateRequiredTimestamps for smart extraction.
58
+ * This method extracts ALL frames, which is inefficient for long recordings.
59
+ */
60
+ extractFramesAtInterval: async (videoPath, _threshold, outputDir) => {
61
+ // Clean directory first (removes stale frames from previous runs)
62
+ await rm(outputDir, { recursive: true, force: true });
63
+ await mkdir(outputDir, { recursive: true });
64
+ const frameInterval = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
65
+ const frameWidth = Number(process.env.ESCRIBANO_FRAME_WIDTH) || 1920;
66
+ // Get expected frame count for progress calculation
67
+ let expectedFrames = 0;
68
+ try {
69
+ const probeCmd = `ffprobe -v error -show_entries format=duration -of json "${videoPath}"`;
70
+ const { stdout } = await execAsync(probeCmd);
71
+ const data = JSON.parse(stdout);
72
+ const duration = Number.parseFloat(data.format?.duration || '0');
73
+ if (duration > 0) {
74
+ expectedFrames = Math.ceil(duration / frameInterval);
75
+ console.log(`Expected frames: ${expectedFrames} (duration: ${Math.round(duration)}s, interval: ${frameInterval}s)`);
76
+ }
77
+ }
78
+ catch {
79
+ console.warn('Could not get video metadata, progress will show frame count only');
80
+ }
81
+ // Build FFmpeg command with clear sections
82
+ const ffmpegParts = [
83
+ 'ffmpeg',
84
+ '-progress pipe:2', // Structured progress output to stderr
85
+ '-hwaccel videotoolbox', // M4 hardware acceleration
86
+ `-i "${videoPath}"`, // Input file
87
+ `-vf "scale=${frameWidth}:-2,fps=1/${frameInterval}"`, // Scale + FPS filter
88
+ '-an -q:v 5', // No audio, JPEG quality 5
89
+ `"${outputDir}/scene_%04d.jpg"`, // Output pattern
90
+ '-y', // Overwrite
91
+ ];
92
+ const command = ffmpegParts.join(' ');
93
+ debugLog(`Running frame extraction: ${command}`);
94
+ try {
95
+ currentProcess = spawn('sh', ['-c', command]);
96
+ await new Promise((resolve, reject) => {
97
+ let lastLoggedPercent = 0;
98
+ let firstProgressLogged = false;
99
+ currentProcess?.stderr?.on('data', (data) => {
100
+ const output = data.toString();
101
+ const frameMatch = output.match(/frame=(\d+)/);
102
+ const fpsMatch = output.match(/fps=\s*([\d.]+)/);
103
+ if (frameMatch && expectedFrames > 0) {
104
+ const frames = parseInt(frameMatch[1], 10);
105
+ const percent = Math.floor((frames / expectedFrames) * 100);
106
+ // log on the first frame and then every 5% increment
107
+ if (!firstProgressLogged) {
108
+ firstProgressLogged = true;
109
+ console.log(`Extracting frames: 0/${expectedFrames} (0%)`);
110
+ }
111
+ if (percent - lastLoggedPercent >= 5) {
112
+ lastLoggedPercent = percent;
113
+ let etaStr = '';
114
+ if (fpsMatch) {
115
+ const fps = parseFloat(fpsMatch[1]);
116
+ if (fps > 0) {
117
+ const remainingFrames = expectedFrames - frames;
118
+ const etaSeconds = Math.ceil(remainingFrames / fps);
119
+ etaStr = ` - ETA: ${etaSeconds}s`;
120
+ }
121
+ }
122
+ console.log(`Extracting frames: ${frames}/${expectedFrames} (${percent}%)${etaStr}`);
123
+ }
124
+ }
125
+ });
126
+ currentProcess?.on('close', (code) => {
127
+ currentProcess = null;
128
+ if (code === 0) {
129
+ if (expectedFrames > 0) {
130
+ console.log(`Extracting frames: ${expectedFrames}/${expectedFrames} (100%)`);
131
+ }
132
+ resolve();
133
+ }
134
+ else {
135
+ reject(new Error(`Frame extraction failed with code ${code}`));
136
+ }
137
+ });
138
+ currentProcess?.on('error', (err) => {
139
+ currentProcess = null;
140
+ reject(err);
141
+ });
142
+ });
143
+ const files = await readdir(outputDir);
144
+ const framePaths = files
145
+ .filter((f) => f.startsWith('scene_') && f.endsWith('.jpg'))
146
+ .map((f) => path.join(outputDir, f))
147
+ .sort();
148
+ console.log(`Extracted ${framePaths.length} frames`);
149
+ return framePaths.map((p, i) => ({
150
+ imagePath: p,
151
+ timestamp: i * frameInterval,
152
+ }));
153
+ }
154
+ catch (error) {
155
+ currentProcess = null;
156
+ throw new Error(`Visual log extraction failed: ${error.message}`);
157
+ }
158
+ },
159
+ /**
160
+ * Extract frames at specific timestamps efficiently.
161
+ * Uses parallel batch extraction with progress logging.
162
+ *
163
+ * This is the preferred method for smart extraction:
164
+ * 1. Run scene detection first
165
+ * 2. Calculate required timestamps via frame-sampling.calculateRequiredTimestamps()
166
+ * 3. Extract only those frames (not all frames)
167
+ *
168
+ * @param videoPath - Path to source video
169
+ * @param timestamps - Array of timestamps (in seconds) to extract
170
+ * @param outputDir - Directory to save extracted frames
171
+ * @param concurrency - Number of parallel extractions (default: 4)
172
+ *
173
+ * @example
174
+ * // Extract frames at 0s, 10s, 20s, 30s with 4 parallel workers
175
+ * const frames = await extractFramesAtTimestampsBatch(
176
+ * '/path/to/video.mp4',
177
+ * [0, 10, 20, 30],
178
+ * '/tmp/frames',
179
+ * 4
180
+ * );
181
+ * // Returns: [{ imagePath: '/tmp/frames/frame_000000.jpg', timestamp: 0 }, ...]
182
+ */
183
+ extractFramesAtTimestampsBatch: async (videoPath, timestamps, outputDir, concurrency = 4) => {
184
+ // Clean and create output directory
185
+ await rm(outputDir, { recursive: true, force: true });
186
+ await mkdir(outputDir, { recursive: true });
187
+ const frameWidth = Number(process.env.ESCRIBANO_FRAME_WIDTH) || 1920;
188
+ const total = timestamps.length;
189
+ const results = [];
190
+ if (total === 0) {
191
+ console.log('No frames to extract');
192
+ return results;
193
+ }
194
+ console.log(`Extracting ${total} frames at specific timestamps...`);
195
+ console.log(`Output directory: ${outputDir}`);
196
+ const startTime = Date.now();
197
+ let lastLoggedPercent = 0;
198
+ // Process in batches of `concurrency`
199
+ for (let i = 0; i < timestamps.length; i += concurrency) {
200
+ const batch = timestamps.slice(i, i + concurrency);
201
+ const promises = batch.map(async (timestamp, batchIndex) => {
202
+ const frameIndex = i + batchIndex;
203
+ const fileName = `frame_${frameIndex.toString().padStart(6, '0')}.jpg`;
204
+ const outputPath = path.join(outputDir, fileName);
205
+ // Build FFmpeg command with clear sections
206
+ // -ss before -i for fast seeking (input seeking vs output seeking)
207
+ const ffmpegParts = [
208
+ 'ffmpeg',
209
+ '-ss',
210
+ String(timestamp), // Seek position (before -i for speed)
211
+ '-hwaccel videotoolbox', // M4 hardware acceleration
212
+ `-i "${videoPath}"`, // Input file
213
+ '-vframes 1', // Extract single frame
214
+ `-vf "scale=${frameWidth}:-2"`, // Scale width, auto height
215
+ '-q:v 5', // JPEG quality (2=best, 31=worst)
216
+ `"${outputPath}"`, // Output file
217
+ '-y', // Overwrite
218
+ ];
219
+ const command = ffmpegParts.join(' ');
220
+ await execAsync(command);
221
+ return { imagePath: outputPath, timestamp };
222
+ });
223
+ const batchResults = await Promise.all(promises);
224
+ results.push(...batchResults);
225
+ // Progress logging with ETA (every 5% or at completion)
226
+ const processed = results.length;
227
+ const percent = Math.floor((processed / total) * 100);
228
+ if (percent - lastLoggedPercent >= 5 || processed === total) {
229
+ lastLoggedPercent = percent;
230
+ // Calculate ETA
231
+ const elapsed = (Date.now() - startTime) / 1000;
232
+ const rate = processed / elapsed; // frames per second
233
+ const remaining = total - processed;
234
+ const etaSeconds = rate > 0 ? Math.ceil(remaining / rate) : 0;
235
+ const etaStr = processed < total ? ` - ETA: ${etaSeconds}s` : '';
236
+ console.log(`Extracting frames: ${processed}/${total} (${percent}%)${etaStr}`);
237
+ }
238
+ }
239
+ console.log(`Extracted ${results.length} frames`);
240
+ return results.sort((a, b) => a.timestamp - b.timestamp);
241
+ },
242
+ /**
243
+ * Get video metadata using ffprobe
244
+ */
245
+ getMetadata: async (videoPath) => {
246
+ // -show_entries allows selective extraction of metadata
247
+ // -of json returns machine-readable format
248
+ const command = `ffprobe -v error -show_entries format=duration -show_entries stream=width,height -of json "${videoPath}"`;
249
+ try {
250
+ const { stdout } = await execAsync(command);
251
+ const data = JSON.parse(stdout);
252
+ const duration = data.format?.duration
253
+ ? Number.parseFloat(data.format.duration)
254
+ : 0;
255
+ const videoStream = data.streams?.find((s) => s.width && s.height);
256
+ return {
257
+ duration,
258
+ width: videoStream?.width || 0,
259
+ height: videoStream?.height || 0,
260
+ };
261
+ }
262
+ catch (error) {
263
+ throw new Error(`Failed to get video metadata: ${error.message}`);
264
+ }
265
+ },
266
+ /**
267
+ * Run visual indexing (OCR + CLIP) using the Python base script.
268
+ * OCR is parallelized across all available CPU cores.
269
+ */
270
+ runVisualIndexing: async (framesDir, outputPath) => {
271
+ const scriptPath = path.join(process.cwd(), 'src', 'scripts', 'visual_observer_base.py');
272
+ const frameInterval = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
273
+ const workers = os.cpus().length;
274
+ // Use uv run to execute the script with its environment
275
+ // --workers enables parallel OCR processing
276
+ const command = `uv run "${scriptPath}" --frames-dir "${framesDir}" --output "${outputPath}" --frame-interval ${frameInterval} --workers ${workers}`;
277
+ try {
278
+ await execAsync(command, {
279
+ cwd: path.join(process.cwd(), 'src', 'scripts'),
280
+ });
281
+ const content = await readFile(outputPath, 'utf-8');
282
+ return JSON.parse(content);
283
+ }
284
+ catch (error) {
285
+ throw new Error(`Visual indexing failed: ${error.message}`);
286
+ }
287
+ },
288
+ /**
289
+ * Detect scene changes in video using ffmpeg scene filter.
290
+ * Returns timestamps of significant visual changes.
291
+ *
292
+ * Configuration via environment variables:
293
+ * - ESCRIBANO_SCENE_THRESHOLD: Sensitivity (0.0-1.0, lower=more sensitive)
294
+ * - ESCRIBANO_SCENE_MIN_INTERVAL: Min seconds between scene changes
295
+ */
296
+ detectSceneChanges: async (videoPath, config = {}) => {
297
+ // Use env vars as defaults, allow override via config parameter
298
+ const threshold = config.threshold ?? SCENE_THRESHOLD;
299
+ const minInterval = config.minInterval ?? SCENE_MIN_INTERVAL;
300
+ // Get video duration for progress calculation
301
+ let duration = 0;
302
+ try {
303
+ const probeCmd = `ffprobe -v error -show_entries format=duration -of json "${videoPath}"`;
304
+ const { stdout } = await execAsync(probeCmd);
305
+ const data = JSON.parse(stdout);
306
+ duration = Number.parseFloat(data.format?.duration || '0');
307
+ console.log(`Scene detection: analyzing ${Math.round(duration)}s video (threshold=${threshold})`);
308
+ }
309
+ catch {
310
+ console.warn('Could not get video duration, progress will not be shown');
311
+ }
312
+ // Build FFmpeg command with progress output
313
+ const ffmpegParts = [
314
+ 'ffmpeg',
315
+ '-skip_frame nokey', // Only decode I-frames (keyframes) for massive speedup
316
+ '-hwaccel videotoolbox', // M4 hardware acceleration
317
+ '-progress pipe:2', // Structured progress output to stderr
318
+ `-i "${videoPath}"`, // Input file
319
+ `-vf "select='gt(scene,${threshold})',showinfo"`, // Scene detection filter
320
+ '-vsync vfr', // Variable frame rate output
321
+ '-f null', // Null output format
322
+ '-', // Output to null
323
+ ];
324
+ const command = ffmpegParts.join(' ');
325
+ debugLog(`Running scene detection: ${command}`);
326
+ try {
327
+ currentProcess = spawn('sh', ['-c', command]);
328
+ const timestamps = [];
329
+ const ptsTimeRegex = /pts_time:(\d+\.?\d*)/g;
330
+ let lastLoggedPercent = 0;
331
+ await new Promise((resolve, reject) => {
332
+ let stderrBuffer = '';
333
+ currentProcess?.stderr?.on('data', (data) => {
334
+ const output = data.toString();
335
+ stderrBuffer += output;
336
+ // Parse progress from out_time_ms
337
+ if (duration > 0) {
338
+ const outTimeMatch = output.match(/out_time_ms=(\d+)/);
339
+ if (outTimeMatch) {
340
+ const outTimeMs = parseInt(outTimeMatch[1], 10);
341
+ const outTimeSec = outTimeMs / 1_000_000;
342
+ const percent = Math.floor((outTimeSec / duration) * 100);
343
+ // Log every 5%
344
+ if (percent - lastLoggedPercent >= 5) {
345
+ lastLoggedPercent = percent;
346
+ const remaining = duration - outTimeSec;
347
+ const etaMin = Math.ceil(remaining / 60);
348
+ console.log(`Scene detection: ${Math.round(outTimeSec)}s/${Math.round(duration)}s (${percent}%) - ETA: ${etaMin}m`);
349
+ }
350
+ }
351
+ }
352
+ });
353
+ currentProcess?.on('close', (code) => {
354
+ currentProcess = null;
355
+ if (code === 0) {
356
+ // Parse all pts_time values from accumulated stderr
357
+ const matches = stderrBuffer.matchAll(ptsTimeRegex);
358
+ for (const match of matches) {
359
+ const timestamp = Number.parseFloat(match[1] ?? '0');
360
+ if (!Number.isNaN(timestamp) && timestamp > 0) {
361
+ timestamps.push(timestamp);
362
+ }
363
+ }
364
+ if (duration > 0) {
365
+ console.log(`Scene detection: ${Math.round(duration)}s/${Math.round(duration)}s (100%)`);
366
+ }
367
+ console.log(`Found ${timestamps.length} scene changes before deduplication`);
368
+ resolve();
369
+ }
370
+ else {
371
+ reject(new Error(`Scene detection failed with code ${code}`));
372
+ }
373
+ });
374
+ currentProcess?.on('error', (err) => {
375
+ currentProcess = null;
376
+ reject(err);
377
+ });
378
+ });
379
+ // Sort and deduplicate (remove timestamps within minInterval of each other)
380
+ const sortedTimestamps = timestamps.sort((a, b) => a - b);
381
+ const deduplicated = [];
382
+ for (const ts of sortedTimestamps) {
383
+ // Check if this timestamp is at least minInterval seconds after the last one
384
+ const lastTs = deduplicated[deduplicated.length - 1];
385
+ if (lastTs === undefined || ts - lastTs >= minInterval) {
386
+ deduplicated.push(ts);
387
+ }
388
+ }
389
+ console.log(`Scene detection complete: ${deduplicated.length} scenes (after ${minInterval}s deduplication)`);
390
+ return deduplicated;
391
+ }
392
+ catch (error) {
393
+ currentProcess = null;
394
+ console.warn(`Scene detection failed: ${error.message}`);
395
+ return [];
396
+ }
397
+ },
398
+ getResourceName() {
399
+ return 'ffmpeg';
400
+ },
401
+ getPid() {
402
+ return currentProcess?.pid ?? null;
403
+ },
404
+ };
405
+ }
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Whisper Adapter
3
+ *
4
+ * Transcribes audio using whisper.cpp or OpenAI's whisper CLI.
5
+ * Shells out to the whisper binary for simplicity.
6
+ *
7
+ * Prerequisites:
8
+ * - whisper.cpp installed: brew install whisper-cpp
9
+ * - ffmpeg installed: brew install ffmpeg (for audio format conversion)
10
+ * - Or Python whisper: pip install openai-whisper
11
+ */
12
+ import { exec } from 'node:child_process';
13
+ import { readFile, unlink } from 'node:fs/promises';
14
+ import { promisify } from 'node:util';
15
+ const execAsync = promisify(exec);
16
+ async function convertToWavIfNeeded(audioPath) {
17
+ const ext = audioPath.toLowerCase().split('.').pop();
18
+ if (['wav', 'flac', 'mp3'].includes(ext || '')) {
19
+ return audioPath;
20
+ }
21
+ const outputPath = `${audioPath}.converted.wav`;
22
+ try {
23
+ console.log(`Converting ${audioPath} to WAV format...`);
24
+ await execAsync(`ffmpeg -i "${audioPath}" -f wav -ar 16000 -ac 1 "${outputPath}" -y`, { timeout: 10 * 60 * 1000 });
25
+ console.log(`Conversion complete: ${outputPath}`);
26
+ return outputPath;
27
+ }
28
+ catch (error) {
29
+ console.error(`Audio conversion failed for ${audioPath}`);
30
+ throw new Error(`Failed to convert audio to WAV: ${error.message}`);
31
+ }
32
+ }
33
+ /**
34
+ * Creates a TranscriptionService that uses whisper CLI
35
+ */
36
+ export function createWhisperTranscriber(config = {}) {
37
+ const resolvedConfig = {
38
+ binaryPath: config.binaryPath ?? 'whisper-cpp',
39
+ model: config.model ?? 'base',
40
+ outputFormat: config.outputFormat ?? 'json',
41
+ language: config.language,
42
+ };
43
+ return {
44
+ transcribe: (audioPath) => transcribeWithWhisper(audioPath, resolvedConfig),
45
+ };
46
+ }
47
+ /**
48
+ * Transcribe audio file using whisper CLI
49
+ */
50
+ async function transcribeWithWhisper(audioPath, config) {
51
+ const audioToProcess = await convertToWavIfNeeded(audioPath);
52
+ const args = [
53
+ `-m ${config.model}`,
54
+ `-f "${audioToProcess}"`,
55
+ '-oj', // Output JSON
56
+ config.language ? `-l ${config.language}` : '',
57
+ ].filter(Boolean);
58
+ const command = `${config.binaryPath} ${args.join(' ')}`;
59
+ try {
60
+ const { stdout, stderr } = await execAsync(command, {
61
+ cwd: config.cwd,
62
+ maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large transcripts
63
+ timeout: 10 * 60 * 1000, // 10 minute timeout
64
+ });
65
+ const hasError = stderr.includes('error:') ||
66
+ stderr.includes('Error:') ||
67
+ stderr.includes('failed to');
68
+ if (hasError) {
69
+ if (audioToProcess !== audioPath) {
70
+ await unlink(audioToProcess).catch(() => { });
71
+ }
72
+ throw new Error(`Whisper transcription failed:\n${stderr}`);
73
+ }
74
+ // whisper-cpp outputs JSON to a file named <input>.json
75
+ const jsonOutputPath = `${audioToProcess}.json`;
76
+ try {
77
+ const jsonContent = await readFile(jsonOutputPath, 'utf-8');
78
+ const whisperOutput = JSON.parse(jsonContent);
79
+ // Clean up the temp JSON file and converted audio
80
+ await unlink(jsonOutputPath).catch(() => { });
81
+ if (audioToProcess !== audioPath) {
82
+ await unlink(audioToProcess).catch(() => { });
83
+ }
84
+ return parseWhisperOutput(whisperOutput);
85
+ }
86
+ catch {
87
+ // Fallback: try to parse stdout as the transcript
88
+ return parseWhisperStdout(stdout);
89
+ }
90
+ }
91
+ catch (error) {
92
+ if (audioToProcess && audioToProcess !== audioPath) {
93
+ await unlink(audioToProcess).catch(() => { });
94
+ }
95
+ throw new Error(`Whisper transcription failed: ${error.message}`);
96
+ }
97
+ }
98
+ /**
99
+ * Parse whisper.cpp JSON output into our Transcript format
100
+ */
101
+ function parseWhisperOutput(output) {
102
+ const segments = output.transcription.map((seg, index) => ({
103
+ id: `seg-${index}`,
104
+ start: seg.offsets.from / 1000, // Convert ms to seconds
105
+ end: seg.offsets.to / 1000,
106
+ text: seg.text.trim(),
107
+ speaker: null,
108
+ }));
109
+ const fullText = segments.map((s) => s.text).join(' ');
110
+ const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
111
+ return {
112
+ fullText,
113
+ segments,
114
+ language: 'en', // whisper.cpp doesn't always report language in JSON
115
+ duration,
116
+ };
117
+ }
118
+ /**
119
+ * Fallback: parse whisper stdout (plain text with timestamps)
120
+ */
121
+ function parseWhisperStdout(stdout) {
122
+ // Example format: "[00:00:00.000 --> 00:00:05.000] Hello world"
123
+ const lines = stdout.split('\n').filter((l) => l.trim());
124
+ const segments = [];
125
+ const timestampRegex = /\[(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\]\s*(.*)/;
126
+ for (const line of lines) {
127
+ const match = line.match(timestampRegex);
128
+ if (match) {
129
+ const [, startStr, endStr, text] = match;
130
+ segments.push({
131
+ id: `seg-${segments.length}`,
132
+ start: parseTimestamp(startStr),
133
+ end: parseTimestamp(endStr),
134
+ text: text.trim(),
135
+ speaker: null,
136
+ });
137
+ }
138
+ }
139
+ // If no timestamps found, treat entire output as single segment
140
+ if (segments.length === 0 && stdout.trim()) {
141
+ segments.push({
142
+ id: 'seg-0',
143
+ start: 0,
144
+ end: 0,
145
+ text: stdout.trim(),
146
+ speaker: null,
147
+ });
148
+ }
149
+ const fullText = segments.map((s) => s.text).join(' ');
150
+ const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
151
+ return {
152
+ fullText,
153
+ segments,
154
+ language: 'en',
155
+ duration,
156
+ };
157
+ }
158
+ /**
159
+ * Parse timestamp string "00:00:00.000" to seconds
160
+ */
161
+ function parseTimestamp(timestamp) {
162
+ const [hours, minutes, rest] = timestamp.split(':');
163
+ const [seconds, ms] = rest.split('.');
164
+ return (parseInt(hours, 10) * 3600 +
165
+ parseInt(minutes, 10) * 60 +
166
+ parseInt(seconds, 10) +
167
+ parseInt(ms, 10) / 1000);
168
+ }