escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FFmpeg Adapter
|
|
3
|
+
*
|
|
4
|
+
* Handles video manipulation using FFmpeg CLI.
|
|
5
|
+
* Used for extracting screenshots and detecting scene changes.
|
|
6
|
+
*/
|
|
7
|
+
import { exec, spawn } from 'node:child_process';
|
|
8
|
+
import { mkdir, readdir, readFile, rm } from 'node:fs/promises';
|
|
9
|
+
import os from 'node:os';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
import { promisify } from 'node:util';
|
|
12
|
+
import { debugLog } from './intelligence.ollama.adapter.js';
|
|
13
|
+
const execAsync = promisify(exec);
|
|
14
|
+
// Scene detection configuration (with env var overrides)
|
|
15
|
+
// Lower threshold = more sensitive = more scene changes detected
|
|
16
|
+
// Examples: 0.3 (sensitive), 0.4 (default), 0.5 (conservative)
|
|
17
|
+
const SCENE_THRESHOLD = Number(process.env.ESCRIBANO_SCENE_THRESHOLD) || 0.4;
|
|
18
|
+
// Minimum seconds between detected scene changes
|
|
19
|
+
// Prevents rapid-fire scene changes from generating too many frames
|
|
20
|
+
const SCENE_MIN_INTERVAL = Number(process.env.ESCRIBANO_SCENE_MIN_INTERVAL) || 2;
|
|
21
|
+
/**
|
|
22
|
+
* Creates a VideoService that uses FFmpeg CLI
|
|
23
|
+
*/
|
|
24
|
+
export function createFfmpegVideoService() {
|
|
25
|
+
let currentProcess = null;
|
|
26
|
+
return {
|
|
27
|
+
/**
|
|
28
|
+
* Extract frames at specific timestamps.
|
|
29
|
+
* @deprecated Use extractFramesAtTimestampsBatch for parallel extraction with progress logging.
|
|
30
|
+
*/
|
|
31
|
+
extractFramesAtTimestamps: async (videoPath, timestamps, outputDir) => {
|
|
32
|
+
await mkdir(outputDir, { recursive: true });
|
|
33
|
+
const outputPaths = [];
|
|
34
|
+
for (const timestamp of timestamps) {
|
|
35
|
+
// Format timestamp for filename (e.g., 123.45 -> 000123_450)
|
|
36
|
+
const seconds = Math.floor(timestamp);
|
|
37
|
+
const ms = Math.floor((timestamp - seconds) * 1000);
|
|
38
|
+
const formattedTime = `${seconds.toString().padStart(6, '0')}_${ms.toString().padStart(3, '0')}`;
|
|
39
|
+
const fileName = `frame_${formattedTime}.jpg`;
|
|
40
|
+
const outputPath = path.join(outputDir, fileName);
|
|
41
|
+
// -ss before -i is significantly faster for large files (input seeking)
|
|
42
|
+
// -vframes 1 ensures we only extract one frame
|
|
43
|
+
const command = `ffmpeg -ss ${timestamp} -i "${videoPath}" -vframes 1 -q:v 2 "${outputPath}" -y`;
|
|
44
|
+
try {
|
|
45
|
+
await execAsync(command);
|
|
46
|
+
outputPaths.push(outputPath);
|
|
47
|
+
}
|
|
48
|
+
catch (error) {
|
|
49
|
+
console.warn(`Failed to extract frame at ${timestamp}s: ${error.message}`);
|
|
50
|
+
// Continue with other timestamps even if one fails
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return outputPaths;
|
|
54
|
+
},
|
|
55
|
+
/**
|
|
56
|
+
* Extract frames at regular intervals.
|
|
57
|
+
* @deprecated Use extractFramesAtTimestampsBatch + calculateRequiredTimestamps for smart extraction.
|
|
58
|
+
* This method extracts ALL frames, which is inefficient for long recordings.
|
|
59
|
+
*/
|
|
60
|
+
extractFramesAtInterval: async (videoPath, _threshold, outputDir) => {
|
|
61
|
+
// Clean directory first (removes stale frames from previous runs)
|
|
62
|
+
await rm(outputDir, { recursive: true, force: true });
|
|
63
|
+
await mkdir(outputDir, { recursive: true });
|
|
64
|
+
const frameInterval = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
|
|
65
|
+
const frameWidth = Number(process.env.ESCRIBANO_FRAME_WIDTH) || 1920;
|
|
66
|
+
// Get expected frame count for progress calculation
|
|
67
|
+
let expectedFrames = 0;
|
|
68
|
+
try {
|
|
69
|
+
const probeCmd = `ffprobe -v error -show_entries format=duration -of json "${videoPath}"`;
|
|
70
|
+
const { stdout } = await execAsync(probeCmd);
|
|
71
|
+
const data = JSON.parse(stdout);
|
|
72
|
+
const duration = Number.parseFloat(data.format?.duration || '0');
|
|
73
|
+
if (duration > 0) {
|
|
74
|
+
expectedFrames = Math.ceil(duration / frameInterval);
|
|
75
|
+
console.log(`Expected frames: ${expectedFrames} (duration: ${Math.round(duration)}s, interval: ${frameInterval}s)`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
console.warn('Could not get video metadata, progress will show frame count only');
|
|
80
|
+
}
|
|
81
|
+
// Build FFmpeg command with clear sections
|
|
82
|
+
const ffmpegParts = [
|
|
83
|
+
'ffmpeg',
|
|
84
|
+
'-progress pipe:2', // Structured progress output to stderr
|
|
85
|
+
'-hwaccel videotoolbox', // M4 hardware acceleration
|
|
86
|
+
`-i "${videoPath}"`, // Input file
|
|
87
|
+
`-vf "scale=${frameWidth}:-2,fps=1/${frameInterval}"`, // Scale + FPS filter
|
|
88
|
+
'-an -q:v 5', // No audio, JPEG quality 5
|
|
89
|
+
`"${outputDir}/scene_%04d.jpg"`, // Output pattern
|
|
90
|
+
'-y', // Overwrite
|
|
91
|
+
];
|
|
92
|
+
const command = ffmpegParts.join(' ');
|
|
93
|
+
debugLog(`Running frame extraction: ${command}`);
|
|
94
|
+
try {
|
|
95
|
+
currentProcess = spawn('sh', ['-c', command]);
|
|
96
|
+
await new Promise((resolve, reject) => {
|
|
97
|
+
let lastLoggedPercent = 0;
|
|
98
|
+
let firstProgressLogged = false;
|
|
99
|
+
currentProcess?.stderr?.on('data', (data) => {
|
|
100
|
+
const output = data.toString();
|
|
101
|
+
const frameMatch = output.match(/frame=(\d+)/);
|
|
102
|
+
const fpsMatch = output.match(/fps=\s*([\d.]+)/);
|
|
103
|
+
if (frameMatch && expectedFrames > 0) {
|
|
104
|
+
const frames = parseInt(frameMatch[1], 10);
|
|
105
|
+
const percent = Math.floor((frames / expectedFrames) * 100);
|
|
106
|
+
// log on the first frame and then every 5% increment
|
|
107
|
+
if (!firstProgressLogged) {
|
|
108
|
+
firstProgressLogged = true;
|
|
109
|
+
console.log(`Extracting frames: 0/${expectedFrames} (0%)`);
|
|
110
|
+
}
|
|
111
|
+
if (percent - lastLoggedPercent >= 5) {
|
|
112
|
+
lastLoggedPercent = percent;
|
|
113
|
+
let etaStr = '';
|
|
114
|
+
if (fpsMatch) {
|
|
115
|
+
const fps = parseFloat(fpsMatch[1]);
|
|
116
|
+
if (fps > 0) {
|
|
117
|
+
const remainingFrames = expectedFrames - frames;
|
|
118
|
+
const etaSeconds = Math.ceil(remainingFrames / fps);
|
|
119
|
+
etaStr = ` - ETA: ${etaSeconds}s`;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
console.log(`Extracting frames: ${frames}/${expectedFrames} (${percent}%)${etaStr}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
currentProcess?.on('close', (code) => {
|
|
127
|
+
currentProcess = null;
|
|
128
|
+
if (code === 0) {
|
|
129
|
+
if (expectedFrames > 0) {
|
|
130
|
+
console.log(`Extracting frames: ${expectedFrames}/${expectedFrames} (100%)`);
|
|
131
|
+
}
|
|
132
|
+
resolve();
|
|
133
|
+
}
|
|
134
|
+
else {
|
|
135
|
+
reject(new Error(`Frame extraction failed with code ${code}`));
|
|
136
|
+
}
|
|
137
|
+
});
|
|
138
|
+
currentProcess?.on('error', (err) => {
|
|
139
|
+
currentProcess = null;
|
|
140
|
+
reject(err);
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
const files = await readdir(outputDir);
|
|
144
|
+
const framePaths = files
|
|
145
|
+
.filter((f) => f.startsWith('scene_') && f.endsWith('.jpg'))
|
|
146
|
+
.map((f) => path.join(outputDir, f))
|
|
147
|
+
.sort();
|
|
148
|
+
console.log(`Extracted ${framePaths.length} frames`);
|
|
149
|
+
return framePaths.map((p, i) => ({
|
|
150
|
+
imagePath: p,
|
|
151
|
+
timestamp: i * frameInterval,
|
|
152
|
+
}));
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
currentProcess = null;
|
|
156
|
+
throw new Error(`Visual log extraction failed: ${error.message}`);
|
|
157
|
+
}
|
|
158
|
+
},
|
|
159
|
+
/**
|
|
160
|
+
* Extract frames at specific timestamps efficiently.
|
|
161
|
+
* Uses parallel batch extraction with progress logging.
|
|
162
|
+
*
|
|
163
|
+
* This is the preferred method for smart extraction:
|
|
164
|
+
* 1. Run scene detection first
|
|
165
|
+
* 2. Calculate required timestamps via frame-sampling.calculateRequiredTimestamps()
|
|
166
|
+
* 3. Extract only those frames (not all frames)
|
|
167
|
+
*
|
|
168
|
+
* @param videoPath - Path to source video
|
|
169
|
+
* @param timestamps - Array of timestamps (in seconds) to extract
|
|
170
|
+
* @param outputDir - Directory to save extracted frames
|
|
171
|
+
* @param concurrency - Number of parallel extractions (default: 4)
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* // Extract frames at 0s, 10s, 20s, 30s with 4 parallel workers
|
|
175
|
+
* const frames = await extractFramesAtTimestampsBatch(
|
|
176
|
+
* '/path/to/video.mp4',
|
|
177
|
+
* [0, 10, 20, 30],
|
|
178
|
+
* '/tmp/frames',
|
|
179
|
+
* 4
|
|
180
|
+
* );
|
|
181
|
+
* // Returns: [{ imagePath: '/tmp/frames/frame_000000.jpg', timestamp: 0 }, ...]
|
|
182
|
+
*/
|
|
183
|
+
extractFramesAtTimestampsBatch: async (videoPath, timestamps, outputDir, concurrency = 4) => {
|
|
184
|
+
// Clean and create output directory
|
|
185
|
+
await rm(outputDir, { recursive: true, force: true });
|
|
186
|
+
await mkdir(outputDir, { recursive: true });
|
|
187
|
+
const frameWidth = Number(process.env.ESCRIBANO_FRAME_WIDTH) || 1920;
|
|
188
|
+
const total = timestamps.length;
|
|
189
|
+
const results = [];
|
|
190
|
+
if (total === 0) {
|
|
191
|
+
console.log('No frames to extract');
|
|
192
|
+
return results;
|
|
193
|
+
}
|
|
194
|
+
console.log(`Extracting ${total} frames at specific timestamps...`);
|
|
195
|
+
console.log(`Output directory: ${outputDir}`);
|
|
196
|
+
const startTime = Date.now();
|
|
197
|
+
let lastLoggedPercent = 0;
|
|
198
|
+
// Process in batches of `concurrency`
|
|
199
|
+
for (let i = 0; i < timestamps.length; i += concurrency) {
|
|
200
|
+
const batch = timestamps.slice(i, i + concurrency);
|
|
201
|
+
const promises = batch.map(async (timestamp, batchIndex) => {
|
|
202
|
+
const frameIndex = i + batchIndex;
|
|
203
|
+
const fileName = `frame_${frameIndex.toString().padStart(6, '0')}.jpg`;
|
|
204
|
+
const outputPath = path.join(outputDir, fileName);
|
|
205
|
+
// Build FFmpeg command with clear sections
|
|
206
|
+
// -ss before -i for fast seeking (input seeking vs output seeking)
|
|
207
|
+
const ffmpegParts = [
|
|
208
|
+
'ffmpeg',
|
|
209
|
+
'-ss',
|
|
210
|
+
String(timestamp), // Seek position (before -i for speed)
|
|
211
|
+
'-hwaccel videotoolbox', // M4 hardware acceleration
|
|
212
|
+
`-i "${videoPath}"`, // Input file
|
|
213
|
+
'-vframes 1', // Extract single frame
|
|
214
|
+
`-vf "scale=${frameWidth}:-2"`, // Scale width, auto height
|
|
215
|
+
'-q:v 5', // JPEG quality (2=best, 31=worst)
|
|
216
|
+
`"${outputPath}"`, // Output file
|
|
217
|
+
'-y', // Overwrite
|
|
218
|
+
];
|
|
219
|
+
const command = ffmpegParts.join(' ');
|
|
220
|
+
await execAsync(command);
|
|
221
|
+
return { imagePath: outputPath, timestamp };
|
|
222
|
+
});
|
|
223
|
+
const batchResults = await Promise.all(promises);
|
|
224
|
+
results.push(...batchResults);
|
|
225
|
+
// Progress logging with ETA (every 5% or at completion)
|
|
226
|
+
const processed = results.length;
|
|
227
|
+
const percent = Math.floor((processed / total) * 100);
|
|
228
|
+
if (percent - lastLoggedPercent >= 5 || processed === total) {
|
|
229
|
+
lastLoggedPercent = percent;
|
|
230
|
+
// Calculate ETA
|
|
231
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
232
|
+
const rate = processed / elapsed; // frames per second
|
|
233
|
+
const remaining = total - processed;
|
|
234
|
+
const etaSeconds = rate > 0 ? Math.ceil(remaining / rate) : 0;
|
|
235
|
+
const etaStr = processed < total ? ` - ETA: ${etaSeconds}s` : '';
|
|
236
|
+
console.log(`Extracting frames: ${processed}/${total} (${percent}%)${etaStr}`);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
console.log(`Extracted ${results.length} frames`);
|
|
240
|
+
return results.sort((a, b) => a.timestamp - b.timestamp);
|
|
241
|
+
},
|
|
242
|
+
/**
|
|
243
|
+
* Get video metadata using ffprobe
|
|
244
|
+
*/
|
|
245
|
+
getMetadata: async (videoPath) => {
|
|
246
|
+
// -show_entries allows selective extraction of metadata
|
|
247
|
+
// -of json returns machine-readable format
|
|
248
|
+
const command = `ffprobe -v error -show_entries format=duration -show_entries stream=width,height -of json "${videoPath}"`;
|
|
249
|
+
try {
|
|
250
|
+
const { stdout } = await execAsync(command);
|
|
251
|
+
const data = JSON.parse(stdout);
|
|
252
|
+
const duration = data.format?.duration
|
|
253
|
+
? Number.parseFloat(data.format.duration)
|
|
254
|
+
: 0;
|
|
255
|
+
const videoStream = data.streams?.find((s) => s.width && s.height);
|
|
256
|
+
return {
|
|
257
|
+
duration,
|
|
258
|
+
width: videoStream?.width || 0,
|
|
259
|
+
height: videoStream?.height || 0,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
throw new Error(`Failed to get video metadata: ${error.message}`);
|
|
264
|
+
}
|
|
265
|
+
},
|
|
266
|
+
/**
|
|
267
|
+
* Run visual indexing (OCR + CLIP) using the Python base script.
|
|
268
|
+
* OCR is parallelized across all available CPU cores.
|
|
269
|
+
*/
|
|
270
|
+
runVisualIndexing: async (framesDir, outputPath) => {
|
|
271
|
+
const scriptPath = path.join(process.cwd(), 'src', 'scripts', 'visual_observer_base.py');
|
|
272
|
+
const frameInterval = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
|
|
273
|
+
const workers = os.cpus().length;
|
|
274
|
+
// Use uv run to execute the script with its environment
|
|
275
|
+
// --workers enables parallel OCR processing
|
|
276
|
+
const command = `uv run "${scriptPath}" --frames-dir "${framesDir}" --output "${outputPath}" --frame-interval ${frameInterval} --workers ${workers}`;
|
|
277
|
+
try {
|
|
278
|
+
await execAsync(command, {
|
|
279
|
+
cwd: path.join(process.cwd(), 'src', 'scripts'),
|
|
280
|
+
});
|
|
281
|
+
const content = await readFile(outputPath, 'utf-8');
|
|
282
|
+
return JSON.parse(content);
|
|
283
|
+
}
|
|
284
|
+
catch (error) {
|
|
285
|
+
throw new Error(`Visual indexing failed: ${error.message}`);
|
|
286
|
+
}
|
|
287
|
+
},
|
|
288
|
+
/**
|
|
289
|
+
* Detect scene changes in video using ffmpeg scene filter.
|
|
290
|
+
* Returns timestamps of significant visual changes.
|
|
291
|
+
*
|
|
292
|
+
* Configuration via environment variables:
|
|
293
|
+
* - ESCRIBANO_SCENE_THRESHOLD: Sensitivity (0.0-1.0, lower=more sensitive)
|
|
294
|
+
* - ESCRIBANO_SCENE_MIN_INTERVAL: Min seconds between scene changes
|
|
295
|
+
*/
|
|
296
|
+
detectSceneChanges: async (videoPath, config = {}) => {
|
|
297
|
+
// Use env vars as defaults, allow override via config parameter
|
|
298
|
+
const threshold = config.threshold ?? SCENE_THRESHOLD;
|
|
299
|
+
const minInterval = config.minInterval ?? SCENE_MIN_INTERVAL;
|
|
300
|
+
// Get video duration for progress calculation
|
|
301
|
+
let duration = 0;
|
|
302
|
+
try {
|
|
303
|
+
const probeCmd = `ffprobe -v error -show_entries format=duration -of json "${videoPath}"`;
|
|
304
|
+
const { stdout } = await execAsync(probeCmd);
|
|
305
|
+
const data = JSON.parse(stdout);
|
|
306
|
+
duration = Number.parseFloat(data.format?.duration || '0');
|
|
307
|
+
console.log(`Scene detection: analyzing ${Math.round(duration)}s video (threshold=${threshold})`);
|
|
308
|
+
}
|
|
309
|
+
catch {
|
|
310
|
+
console.warn('Could not get video duration, progress will not be shown');
|
|
311
|
+
}
|
|
312
|
+
// Build FFmpeg command with progress output
|
|
313
|
+
const ffmpegParts = [
|
|
314
|
+
'ffmpeg',
|
|
315
|
+
'-skip_frame nokey', // Only decode I-frames (keyframes) for massive speedup
|
|
316
|
+
'-hwaccel videotoolbox', // M4 hardware acceleration
|
|
317
|
+
'-progress pipe:2', // Structured progress output to stderr
|
|
318
|
+
`-i "${videoPath}"`, // Input file
|
|
319
|
+
`-vf "select='gt(scene,${threshold})',showinfo"`, // Scene detection filter
|
|
320
|
+
'-vsync vfr', // Variable frame rate output
|
|
321
|
+
'-f null', // Null output format
|
|
322
|
+
'-', // Output to null
|
|
323
|
+
];
|
|
324
|
+
const command = ffmpegParts.join(' ');
|
|
325
|
+
debugLog(`Running scene detection: ${command}`);
|
|
326
|
+
try {
|
|
327
|
+
currentProcess = spawn('sh', ['-c', command]);
|
|
328
|
+
const timestamps = [];
|
|
329
|
+
const ptsTimeRegex = /pts_time:(\d+\.?\d*)/g;
|
|
330
|
+
let lastLoggedPercent = 0;
|
|
331
|
+
await new Promise((resolve, reject) => {
|
|
332
|
+
let stderrBuffer = '';
|
|
333
|
+
currentProcess?.stderr?.on('data', (data) => {
|
|
334
|
+
const output = data.toString();
|
|
335
|
+
stderrBuffer += output;
|
|
336
|
+
// Parse progress from out_time_ms
|
|
337
|
+
if (duration > 0) {
|
|
338
|
+
const outTimeMatch = output.match(/out_time_ms=(\d+)/);
|
|
339
|
+
if (outTimeMatch) {
|
|
340
|
+
const outTimeMs = parseInt(outTimeMatch[1], 10);
|
|
341
|
+
const outTimeSec = outTimeMs / 1_000_000;
|
|
342
|
+
const percent = Math.floor((outTimeSec / duration) * 100);
|
|
343
|
+
// Log every 5%
|
|
344
|
+
if (percent - lastLoggedPercent >= 5) {
|
|
345
|
+
lastLoggedPercent = percent;
|
|
346
|
+
const remaining = duration - outTimeSec;
|
|
347
|
+
const etaMin = Math.ceil(remaining / 60);
|
|
348
|
+
console.log(`Scene detection: ${Math.round(outTimeSec)}s/${Math.round(duration)}s (${percent}%) - ETA: ${etaMin}m`);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
});
|
|
353
|
+
currentProcess?.on('close', (code) => {
|
|
354
|
+
currentProcess = null;
|
|
355
|
+
if (code === 0) {
|
|
356
|
+
// Parse all pts_time values from accumulated stderr
|
|
357
|
+
const matches = stderrBuffer.matchAll(ptsTimeRegex);
|
|
358
|
+
for (const match of matches) {
|
|
359
|
+
const timestamp = Number.parseFloat(match[1] ?? '0');
|
|
360
|
+
if (!Number.isNaN(timestamp) && timestamp > 0) {
|
|
361
|
+
timestamps.push(timestamp);
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
if (duration > 0) {
|
|
365
|
+
console.log(`Scene detection: ${Math.round(duration)}s/${Math.round(duration)}s (100%)`);
|
|
366
|
+
}
|
|
367
|
+
console.log(`Found ${timestamps.length} scene changes before deduplication`);
|
|
368
|
+
resolve();
|
|
369
|
+
}
|
|
370
|
+
else {
|
|
371
|
+
reject(new Error(`Scene detection failed with code ${code}`));
|
|
372
|
+
}
|
|
373
|
+
});
|
|
374
|
+
currentProcess?.on('error', (err) => {
|
|
375
|
+
currentProcess = null;
|
|
376
|
+
reject(err);
|
|
377
|
+
});
|
|
378
|
+
});
|
|
379
|
+
// Sort and deduplicate (remove timestamps within minInterval of each other)
|
|
380
|
+
const sortedTimestamps = timestamps.sort((a, b) => a - b);
|
|
381
|
+
const deduplicated = [];
|
|
382
|
+
for (const ts of sortedTimestamps) {
|
|
383
|
+
// Check if this timestamp is at least minInterval seconds after the last one
|
|
384
|
+
const lastTs = deduplicated[deduplicated.length - 1];
|
|
385
|
+
if (lastTs === undefined || ts - lastTs >= minInterval) {
|
|
386
|
+
deduplicated.push(ts);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
console.log(`Scene detection complete: ${deduplicated.length} scenes (after ${minInterval}s deduplication)`);
|
|
390
|
+
return deduplicated;
|
|
391
|
+
}
|
|
392
|
+
catch (error) {
|
|
393
|
+
currentProcess = null;
|
|
394
|
+
console.warn(`Scene detection failed: ${error.message}`);
|
|
395
|
+
return [];
|
|
396
|
+
}
|
|
397
|
+
},
|
|
398
|
+
getResourceName() {
|
|
399
|
+
return 'ffmpeg';
|
|
400
|
+
},
|
|
401
|
+
getPid() {
|
|
402
|
+
return currentProcess?.pid ?? null;
|
|
403
|
+
},
|
|
404
|
+
};
|
|
405
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Whisper Adapter
|
|
3
|
+
*
|
|
4
|
+
* Transcribes audio using whisper.cpp or OpenAI's whisper CLI.
|
|
5
|
+
* Shells out to the whisper binary for simplicity.
|
|
6
|
+
*
|
|
7
|
+
* Prerequisites:
|
|
8
|
+
* - whisper.cpp installed: brew install whisper-cpp
|
|
9
|
+
* - ffmpeg installed: brew install ffmpeg (for audio format conversion)
|
|
10
|
+
* - Or Python whisper: pip install openai-whisper
|
|
11
|
+
*/
|
|
12
|
+
import { exec } from 'node:child_process';
|
|
13
|
+
import { readFile, unlink } from 'node:fs/promises';
|
|
14
|
+
import { promisify } from 'node:util';
|
|
15
|
+
const execAsync = promisify(exec);
|
|
16
|
+
async function convertToWavIfNeeded(audioPath) {
|
|
17
|
+
const ext = audioPath.toLowerCase().split('.').pop();
|
|
18
|
+
if (['wav', 'flac', 'mp3'].includes(ext || '')) {
|
|
19
|
+
return audioPath;
|
|
20
|
+
}
|
|
21
|
+
const outputPath = `${audioPath}.converted.wav`;
|
|
22
|
+
try {
|
|
23
|
+
console.log(`Converting ${audioPath} to WAV format...`);
|
|
24
|
+
await execAsync(`ffmpeg -i "${audioPath}" -f wav -ar 16000 -ac 1 "${outputPath}" -y`, { timeout: 10 * 60 * 1000 });
|
|
25
|
+
console.log(`Conversion complete: ${outputPath}`);
|
|
26
|
+
return outputPath;
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
console.error(`Audio conversion failed for ${audioPath}`);
|
|
30
|
+
throw new Error(`Failed to convert audio to WAV: ${error.message}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Creates a TranscriptionService that uses whisper CLI
|
|
35
|
+
*/
|
|
36
|
+
export function createWhisperTranscriber(config = {}) {
|
|
37
|
+
const resolvedConfig = {
|
|
38
|
+
binaryPath: config.binaryPath ?? 'whisper-cpp',
|
|
39
|
+
model: config.model ?? 'base',
|
|
40
|
+
outputFormat: config.outputFormat ?? 'json',
|
|
41
|
+
language: config.language,
|
|
42
|
+
};
|
|
43
|
+
return {
|
|
44
|
+
transcribe: (audioPath) => transcribeWithWhisper(audioPath, resolvedConfig),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Transcribe audio file using whisper CLI
|
|
49
|
+
*/
|
|
50
|
+
async function transcribeWithWhisper(audioPath, config) {
|
|
51
|
+
const audioToProcess = await convertToWavIfNeeded(audioPath);
|
|
52
|
+
const args = [
|
|
53
|
+
`-m ${config.model}`,
|
|
54
|
+
`-f "${audioToProcess}"`,
|
|
55
|
+
'-oj', // Output JSON
|
|
56
|
+
config.language ? `-l ${config.language}` : '',
|
|
57
|
+
].filter(Boolean);
|
|
58
|
+
const command = `${config.binaryPath} ${args.join(' ')}`;
|
|
59
|
+
try {
|
|
60
|
+
const { stdout, stderr } = await execAsync(command, {
|
|
61
|
+
cwd: config.cwd,
|
|
62
|
+
maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large transcripts
|
|
63
|
+
timeout: 10 * 60 * 1000, // 10 minute timeout
|
|
64
|
+
});
|
|
65
|
+
const hasError = stderr.includes('error:') ||
|
|
66
|
+
stderr.includes('Error:') ||
|
|
67
|
+
stderr.includes('failed to');
|
|
68
|
+
if (hasError) {
|
|
69
|
+
if (audioToProcess !== audioPath) {
|
|
70
|
+
await unlink(audioToProcess).catch(() => { });
|
|
71
|
+
}
|
|
72
|
+
throw new Error(`Whisper transcription failed:\n${stderr}`);
|
|
73
|
+
}
|
|
74
|
+
// whisper-cpp outputs JSON to a file named <input>.json
|
|
75
|
+
const jsonOutputPath = `${audioToProcess}.json`;
|
|
76
|
+
try {
|
|
77
|
+
const jsonContent = await readFile(jsonOutputPath, 'utf-8');
|
|
78
|
+
const whisperOutput = JSON.parse(jsonContent);
|
|
79
|
+
// Clean up the temp JSON file and converted audio
|
|
80
|
+
await unlink(jsonOutputPath).catch(() => { });
|
|
81
|
+
if (audioToProcess !== audioPath) {
|
|
82
|
+
await unlink(audioToProcess).catch(() => { });
|
|
83
|
+
}
|
|
84
|
+
return parseWhisperOutput(whisperOutput);
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
// Fallback: try to parse stdout as the transcript
|
|
88
|
+
return parseWhisperStdout(stdout);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
if (audioToProcess && audioToProcess !== audioPath) {
|
|
93
|
+
await unlink(audioToProcess).catch(() => { });
|
|
94
|
+
}
|
|
95
|
+
throw new Error(`Whisper transcription failed: ${error.message}`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Parse whisper.cpp JSON output into our Transcript format
|
|
100
|
+
*/
|
|
101
|
+
function parseWhisperOutput(output) {
|
|
102
|
+
const segments = output.transcription.map((seg, index) => ({
|
|
103
|
+
id: `seg-${index}`,
|
|
104
|
+
start: seg.offsets.from / 1000, // Convert ms to seconds
|
|
105
|
+
end: seg.offsets.to / 1000,
|
|
106
|
+
text: seg.text.trim(),
|
|
107
|
+
speaker: null,
|
|
108
|
+
}));
|
|
109
|
+
const fullText = segments.map((s) => s.text).join(' ');
|
|
110
|
+
const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
|
|
111
|
+
return {
|
|
112
|
+
fullText,
|
|
113
|
+
segments,
|
|
114
|
+
language: 'en', // whisper.cpp doesn't always report language in JSON
|
|
115
|
+
duration,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Fallback: parse whisper stdout (plain text with timestamps)
|
|
120
|
+
*/
|
|
121
|
+
function parseWhisperStdout(stdout) {
|
|
122
|
+
// Example format: "[00:00:00.000 --> 00:00:05.000] Hello world"
|
|
123
|
+
const lines = stdout.split('\n').filter((l) => l.trim());
|
|
124
|
+
const segments = [];
|
|
125
|
+
const timestampRegex = /\[(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\]\s*(.*)/;
|
|
126
|
+
for (const line of lines) {
|
|
127
|
+
const match = line.match(timestampRegex);
|
|
128
|
+
if (match) {
|
|
129
|
+
const [, startStr, endStr, text] = match;
|
|
130
|
+
segments.push({
|
|
131
|
+
id: `seg-${segments.length}`,
|
|
132
|
+
start: parseTimestamp(startStr),
|
|
133
|
+
end: parseTimestamp(endStr),
|
|
134
|
+
text: text.trim(),
|
|
135
|
+
speaker: null,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// If no timestamps found, treat entire output as single segment
|
|
140
|
+
if (segments.length === 0 && stdout.trim()) {
|
|
141
|
+
segments.push({
|
|
142
|
+
id: 'seg-0',
|
|
143
|
+
start: 0,
|
|
144
|
+
end: 0,
|
|
145
|
+
text: stdout.trim(),
|
|
146
|
+
speaker: null,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
const fullText = segments.map((s) => s.text).join(' ');
|
|
150
|
+
const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
|
|
151
|
+
return {
|
|
152
|
+
fullText,
|
|
153
|
+
segments,
|
|
154
|
+
language: 'en',
|
|
155
|
+
duration,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Parse timestamp string "00:00:00.000" to seconds
|
|
160
|
+
*/
|
|
161
|
+
function parseTimestamp(timestamp) {
|
|
162
|
+
const [hours, minutes, rest] = timestamp.split(':');
|
|
163
|
+
const [seconds, ms] = rest.split('.');
|
|
164
|
+
return (parseInt(hours, 10) * 3600 +
|
|
165
|
+
parseInt(minutes, 10) * 60 +
|
|
166
|
+
parseInt(seconds, 10) +
|
|
167
|
+
parseInt(ms, 10) / 1000);
|
|
168
|
+
}
|