escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
import os from 'node:os';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { generateId } from '../db/helpers.js';
|
|
4
|
+
import { advanceStep, completeProcessing, failProcessing, startProcessing, } from '../domain/recording.js';
|
|
5
|
+
import { log, step } from '../pipeline/context.js';
|
|
6
|
+
import { findClusterMerges } from '../services/cluster-merge.js';
|
|
7
|
+
import { clusterObservations } from '../services/clustering.js';
|
|
8
|
+
import { extractSignals } from '../services/signal-extraction.js';
|
|
9
|
+
import { describeFrames, selectFramesForVLM, } from '../services/vlm-enrichment.js';
|
|
10
|
+
import { bufferToEmbedding, chunkArray, parallelMap } from '../utils/index.js';
|
|
11
|
+
import { cleanOcrText } from '../utils/ocr.js';
|
|
12
|
+
import { createContextsFromSignals } from './create-contexts.js';
|
|
13
|
+
import { createTopicBlockFromCluster } from './create-topic-blocks.js';
|
|
14
|
+
const STEP_ORDER = [
|
|
15
|
+
'vad',
|
|
16
|
+
'transcription',
|
|
17
|
+
'frame_extraction',
|
|
18
|
+
'ocr_processing',
|
|
19
|
+
'embedding',
|
|
20
|
+
'clustering',
|
|
21
|
+
'vlm_enrichment',
|
|
22
|
+
'signal_extraction',
|
|
23
|
+
'cluster_merge',
|
|
24
|
+
'context_creation',
|
|
25
|
+
'block_formation',
|
|
26
|
+
'complete',
|
|
27
|
+
];
|
|
28
|
+
function shouldSkipStep(currentStep, targetStep) {
|
|
29
|
+
if (!currentStep)
|
|
30
|
+
return false;
|
|
31
|
+
if (currentStep === 'complete')
|
|
32
|
+
return true;
|
|
33
|
+
const currentIndex = STEP_ORDER.indexOf(currentStep);
|
|
34
|
+
const targetIndex = STEP_ORDER.indexOf(targetStep);
|
|
35
|
+
// Skip if we're past this step
|
|
36
|
+
return targetIndex < currentIndex;
|
|
37
|
+
}
|
|
38
|
+
export async function processRecordingV2(recordingId, repos, adapters, options = {}) {
|
|
39
|
+
const dbRecording = repos.recordings.findById(recordingId);
|
|
40
|
+
if (!dbRecording) {
|
|
41
|
+
throw new Error(`Recording ${recordingId} not found`);
|
|
42
|
+
}
|
|
43
|
+
// Handle --force: delete existing observations and reset
|
|
44
|
+
if (options.force) {
|
|
45
|
+
log('info', `Force flag set, deleting existing observations for ${recordingId}...`);
|
|
46
|
+
repos.observations.deleteByRecording(recordingId);
|
|
47
|
+
}
|
|
48
|
+
// Map DB to Domain
|
|
49
|
+
let recording = {
|
|
50
|
+
id: dbRecording.id,
|
|
51
|
+
status: dbRecording.status,
|
|
52
|
+
processingStep: dbRecording.processing_step,
|
|
53
|
+
errorMessage: dbRecording.error_message,
|
|
54
|
+
videoPath: dbRecording.video_path,
|
|
55
|
+
audioMicPath: dbRecording.audio_mic_path,
|
|
56
|
+
audioSystemPath: dbRecording.audio_system_path,
|
|
57
|
+
capturedAt: dbRecording.captured_at,
|
|
58
|
+
duration: dbRecording.duration,
|
|
59
|
+
};
|
|
60
|
+
// If forced, reset to raw state
|
|
61
|
+
if (options.force) {
|
|
62
|
+
recording = {
|
|
63
|
+
...recording,
|
|
64
|
+
status: 'raw',
|
|
65
|
+
processingStep: null,
|
|
66
|
+
errorMessage: null,
|
|
67
|
+
};
|
|
68
|
+
updateRecordingInDb(repos, recording);
|
|
69
|
+
}
|
|
70
|
+
if (recording.processingStep) {
|
|
71
|
+
log('info', `Resuming ${recording.id} from step: ${recording.processingStep}`);
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
// 1. Start processing (unless already processing/processed)
|
|
75
|
+
if (!shouldSkipStep(recording.processingStep, 'vad')) {
|
|
76
|
+
recording = startProcessing(recording);
|
|
77
|
+
updateRecordingInDb(repos, recording);
|
|
78
|
+
}
|
|
79
|
+
// ============================================
|
|
80
|
+
// AUDIO PIPELINE
|
|
81
|
+
// ============================================
|
|
82
|
+
if (!shouldSkipStep(recording.processingStep, 'transcription')) {
|
|
83
|
+
log('info', 'Running audio pipeline...');
|
|
84
|
+
const audioObservations = await processAudioPipeline(recording, adapters, options);
|
|
85
|
+
// Save audio observations immediately
|
|
86
|
+
if (audioObservations.length > 0) {
|
|
87
|
+
await step('save-audio-observations', async () => {
|
|
88
|
+
repos.observations.saveBatch(audioObservations);
|
|
89
|
+
log('info', `Saved ${audioObservations.length} audio observations`);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
recording = advanceStep(recording, 'transcription');
|
|
93
|
+
updateRecordingInDb(repos, recording);
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
log('info', 'Skipping audio pipeline (already completed)');
|
|
97
|
+
}
|
|
98
|
+
// ============================================
|
|
99
|
+
// VISUAL PIPELINE
|
|
100
|
+
// ============================================
|
|
101
|
+
if (recording.videoPath) {
|
|
102
|
+
// Step: Frame Extraction
|
|
103
|
+
if (!shouldSkipStep(recording.processingStep, 'frame_extraction')) {
|
|
104
|
+
await step('frame-extraction', async () => {
|
|
105
|
+
recording = advanceStep(recording, 'frame_extraction');
|
|
106
|
+
updateRecordingInDb(repos, recording);
|
|
107
|
+
const intervalSeconds = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
|
|
108
|
+
const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
|
|
109
|
+
const extractedFrames = await adapters.video.extractFramesAtInterval(recording.videoPath, 0.3, // threshold
|
|
110
|
+
framesDir);
|
|
111
|
+
log('info', `Extracted ${extractedFrames.length} frames (interval: ${intervalSeconds}s)`);
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
log('info', 'Skipping frame extraction (already completed)');
|
|
116
|
+
}
|
|
117
|
+
// Step: OCR Processing
|
|
118
|
+
if (!shouldSkipStep(recording.processingStep, 'ocr_processing')) {
|
|
119
|
+
await step('ocr-processing', async () => {
|
|
120
|
+
recording = advanceStep(recording, 'ocr_processing');
|
|
121
|
+
updateRecordingInDb(repos, recording);
|
|
122
|
+
const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
|
|
123
|
+
const outputPath = path.join(os.tmpdir(), 'escribano', recording.id, 'visual-index.json');
|
|
124
|
+
const visualIndex = await adapters.video.runVisualIndexing(framesDir, outputPath);
|
|
125
|
+
log('info', `OCR processed ${visualIndex.frames.length} frames`);
|
|
126
|
+
// Build observations with cleaned OCR
|
|
127
|
+
const observations = [];
|
|
128
|
+
for (const frame of visualIndex.frames) {
|
|
129
|
+
const cleanedOcr = cleanOcrText(frame.ocrText);
|
|
130
|
+
observations.push({
|
|
131
|
+
id: generateId(),
|
|
132
|
+
recording_id: recording.id,
|
|
133
|
+
type: 'visual',
|
|
134
|
+
timestamp: frame.timestamp,
|
|
135
|
+
end_timestamp: frame.timestamp,
|
|
136
|
+
image_path: frame.imagePath,
|
|
137
|
+
ocr_text: cleanedOcr,
|
|
138
|
+
vlm_description: null,
|
|
139
|
+
vlm_raw_response: null,
|
|
140
|
+
activity_type: null,
|
|
141
|
+
apps: null,
|
|
142
|
+
topics: null,
|
|
143
|
+
embedding: null,
|
|
144
|
+
text: null,
|
|
145
|
+
audio_source: null,
|
|
146
|
+
audio_type: null,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
// Save visual observations (without embeddings yet)
|
|
150
|
+
if (observations.length > 0) {
|
|
151
|
+
repos.observations.saveBatch(observations);
|
|
152
|
+
log('info', `Saved ${observations.length} visual observations (OCR only)`);
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
log('info', 'Skipping OCR processing (already completed)');
|
|
158
|
+
}
|
|
159
|
+
// Step: Generate Embeddings (for BOTH visual and audio)
|
|
160
|
+
// Uses parallel batching with immediate persistence for crash-safety
|
|
161
|
+
if (!shouldSkipStep(recording.processingStep, 'embedding')) {
|
|
162
|
+
await step('generate-embeddings', async () => {
|
|
163
|
+
recording = advanceStep(recording, 'embedding');
|
|
164
|
+
updateRecordingInDb(repos, recording);
|
|
165
|
+
// Get ALL observations that need embeddings
|
|
166
|
+
const allObs = repos.observations.findByRecording(recording.id);
|
|
167
|
+
const obsNeedingEmbedding = allObs.filter((o) => !o.embedding);
|
|
168
|
+
if (obsNeedingEmbedding.length === 0) {
|
|
169
|
+
log('info', 'All observations already have embeddings');
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
// Configuration from environment
|
|
173
|
+
const BATCH_SIZE = Number(process.env.ESCRIBANO_EMBED_BATCH_SIZE) || 64;
|
|
174
|
+
const CONCURRENCY = Number(process.env.ESCRIBANO_EMBED_CONCURRENCY) || 4;
|
|
175
|
+
// Chunk observations into batches
|
|
176
|
+
const chunks = chunkArray(obsNeedingEmbedding, BATCH_SIZE);
|
|
177
|
+
let completedCount = 0;
|
|
178
|
+
let successCount = 0;
|
|
179
|
+
log('info', `Generating embeddings for ${obsNeedingEmbedding.length} observations ` +
|
|
180
|
+
`(${chunks.length} batches, ${CONCURRENCY} concurrent)...`);
|
|
181
|
+
// Process chunks in parallel with immediate saves
|
|
182
|
+
await parallelMap(chunks, async (chunk, chunkIndex) => {
|
|
183
|
+
const batchStart = Date.now();
|
|
184
|
+
log('info', `[Worker] Batch ${chunkIndex + 1}/${chunks.length} started (${chunk.length} items)...`);
|
|
185
|
+
// Extract text content based on observation type
|
|
186
|
+
const textsToEmbed = chunk.map((o) => {
|
|
187
|
+
if (o.type === 'visual') {
|
|
188
|
+
return o.ocr_text || '';
|
|
189
|
+
}
|
|
190
|
+
return o.text || ''; // Audio transcript
|
|
191
|
+
});
|
|
192
|
+
// Call embedding service for this batch
|
|
193
|
+
const embeddings = await adapters.embedding.embedBatch(textsToEmbed, 'clustering');
|
|
194
|
+
// IMMEDIATE SAVE - crash-safe persistence
|
|
195
|
+
let batchSuccess = 0;
|
|
196
|
+
const dbStart = Date.now();
|
|
197
|
+
for (let i = 0; i < chunk.length; i++) {
|
|
198
|
+
const embedding = embeddings[i];
|
|
199
|
+
if (embedding && embedding.length > 0) {
|
|
200
|
+
repos.observations.updateEmbedding(chunk[i].id, embedding);
|
|
201
|
+
batchSuccess++;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
const batchDuration = (Date.now() - batchStart) / 1000;
|
|
205
|
+
const dbDuration = (Date.now() - dbStart) / 1000;
|
|
206
|
+
completedCount += chunk.length;
|
|
207
|
+
successCount += batchSuccess;
|
|
208
|
+
log('info', `[Worker] Batch ${chunkIndex + 1}/${chunks.length} saved in ${batchDuration.toFixed(1)}s (DB: ${dbDuration.toFixed(2)}s) - ` +
|
|
209
|
+
`Total: ${completedCount}/${obsNeedingEmbedding.length}`);
|
|
210
|
+
}, CONCURRENCY);
|
|
211
|
+
log('info', `Completed: ${successCount}/${obsNeedingEmbedding.length} embeddings saved`);
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
log('info', 'Skipping embedding generation (already completed)');
|
|
216
|
+
}
|
|
217
|
+
// ============================================================================
|
|
218
|
+
// CLUSTERING PIPELINE
|
|
219
|
+
// ============================================================================
|
|
220
|
+
// Step: Semantic Clustering
|
|
221
|
+
if (!shouldSkipStep(recording.processingStep, 'clustering')) {
|
|
222
|
+
await step('clustering', async () => {
|
|
223
|
+
recording = advanceStep(recording, 'clustering');
|
|
224
|
+
updateRecordingInDb(repos, recording);
|
|
225
|
+
// Delete existing clusters for this recording
|
|
226
|
+
repos.clusters.deleteByRecording(recording.id);
|
|
227
|
+
// Cluster visual observations
|
|
228
|
+
const visualObs = repos.observations.findByRecordingAndType(recording.id, 'visual');
|
|
229
|
+
const visualClusters = clusterObservations(visualObs, adapters.embedding, {
|
|
230
|
+
timeWindowSeconds: Number(process.env.ESCRIBANO_CLUSTER_TIME_WINDOW) || 600,
|
|
231
|
+
distanceThreshold: Number(process.env.ESCRIBANO_CLUSTER_DISTANCE_THRESHOLD) || 0.4,
|
|
232
|
+
});
|
|
233
|
+
log('info', `Created ${visualClusters.length} visual clusters`);
|
|
234
|
+
// Cluster audio observations
|
|
235
|
+
const audioObs = repos.observations.findByRecordingAndType(recording.id, 'audio');
|
|
236
|
+
const audioClusters = clusterObservations(audioObs, adapters.embedding, {
|
|
237
|
+
timeWindowSeconds: 3600, // Audio can span longer
|
|
238
|
+
distanceThreshold: 0.5,
|
|
239
|
+
});
|
|
240
|
+
log('info', `Created ${audioClusters.length} audio clusters`);
|
|
241
|
+
// Save clusters to database
|
|
242
|
+
for (const cluster of [...visualClusters, ...audioClusters]) {
|
|
243
|
+
const isVisual = visualClusters.includes(cluster);
|
|
244
|
+
const clusterId = generateId();
|
|
245
|
+
repos.clusters.save({
|
|
246
|
+
id: clusterId,
|
|
247
|
+
recording_id: recording.id,
|
|
248
|
+
type: isVisual ? 'visual' : 'audio',
|
|
249
|
+
start_timestamp: cluster.startTimestamp,
|
|
250
|
+
end_timestamp: cluster.endTimestamp,
|
|
251
|
+
observation_count: cluster.observations.length,
|
|
252
|
+
centroid: Buffer.from(new Float32Array(cluster.centroid).buffer),
|
|
253
|
+
classification: null, // Filled in signal extraction
|
|
254
|
+
metadata: null,
|
|
255
|
+
});
|
|
256
|
+
// Link observations
|
|
257
|
+
const links = cluster.observations.map((obs) => ({
|
|
258
|
+
observationId: obs.id,
|
|
259
|
+
clusterId,
|
|
260
|
+
distance: 0, // TODO: compute actual distance
|
|
261
|
+
}));
|
|
262
|
+
repos.clusters.linkObservationsBatch(links);
|
|
263
|
+
}
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
// Step: VLM Enrichment
|
|
267
|
+
if (!shouldSkipStep(recording.processingStep, 'vlm_enrichment')) {
|
|
268
|
+
await step('vlm-enrichment', async () => {
|
|
269
|
+
recording = advanceStep(recording, 'vlm_enrichment');
|
|
270
|
+
updateRecordingInDb(repos, recording);
|
|
271
|
+
const clusters = repos.clusters.findByRecordingAndType(recording.id, 'visual');
|
|
272
|
+
let totalDescribed = 0;
|
|
273
|
+
for (const cluster of clusters) {
|
|
274
|
+
const observations = repos.clusters.getObservations(cluster.id);
|
|
275
|
+
const frames = selectFramesForVLM(observations);
|
|
276
|
+
if (frames.length > 0) {
|
|
277
|
+
const descriptions = await describeFrames(frames, adapters.intelligence);
|
|
278
|
+
// Update observations with VLM descriptions
|
|
279
|
+
for (const [obsId, description] of descriptions) {
|
|
280
|
+
repos.observations.updateVLMDescription(obsId, description);
|
|
281
|
+
totalDescribed++;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
log('info', `VLM described ${totalDescribed} frames across ${clusters.length} clusters`);
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
// Step: Signal Extraction
|
|
289
|
+
if (!shouldSkipStep(recording.processingStep, 'signal_extraction')) {
|
|
290
|
+
await step('signal-extraction', async () => {
|
|
291
|
+
recording = advanceStep(recording, 'signal_extraction');
|
|
292
|
+
updateRecordingInDb(repos, recording);
|
|
293
|
+
const allClusters = repos.clusters.findByRecording(recording.id);
|
|
294
|
+
for (const cluster of allClusters) {
|
|
295
|
+
const observations = repos.clusters.getObservations(cluster.id);
|
|
296
|
+
const signals = await extractSignals(observations, adapters.intelligence);
|
|
297
|
+
repos.clusters.updateClassification(cluster.id, JSON.stringify(signals));
|
|
298
|
+
}
|
|
299
|
+
log('info', `Extracted signals for ${allClusters.length} clusters`);
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
// Step: Cluster Merge (Audio → Visual)
|
|
303
|
+
if (!shouldSkipStep(recording.processingStep, 'cluster_merge')) {
|
|
304
|
+
await step('cluster-merge', async () => {
|
|
305
|
+
recording = advanceStep(recording, 'cluster_merge');
|
|
306
|
+
updateRecordingInDb(repos, recording);
|
|
307
|
+
const visualClusters = repos.clusters.findByRecordingAndType(recording.id, 'visual');
|
|
308
|
+
const audioClusters = repos.clusters.findByRecordingAndType(recording.id, 'audio');
|
|
309
|
+
if (audioClusters.length > 0 && visualClusters.length > 0) {
|
|
310
|
+
// Build cluster-with-signals for merging
|
|
311
|
+
const visualWithSignals = visualClusters.map((c) => ({
|
|
312
|
+
cluster: c,
|
|
313
|
+
signals: JSON.parse(c.classification || '{}'),
|
|
314
|
+
centroid: bufferToEmbedding(c.centroid),
|
|
315
|
+
}));
|
|
316
|
+
const audioWithSignals = audioClusters.map((c) => ({
|
|
317
|
+
cluster: c,
|
|
318
|
+
signals: JSON.parse(c.classification || '{}'),
|
|
319
|
+
centroid: bufferToEmbedding(c.centroid),
|
|
320
|
+
}));
|
|
321
|
+
const merges = findClusterMerges(visualWithSignals, audioWithSignals, adapters.embedding);
|
|
322
|
+
for (const merge of merges) {
|
|
323
|
+
repos.clusters.saveMerge(merge.visualClusterId, merge.audioClusterId, merge.similarityScore, merge.mergeReason);
|
|
324
|
+
}
|
|
325
|
+
log('info', `Created ${merges.length} audio-visual cluster merges`);
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
log('info', 'No audio clusters to merge');
|
|
329
|
+
}
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
// Step: Context Creation
|
|
333
|
+
if (!shouldSkipStep(recording.processingStep, 'context_creation')) {
|
|
334
|
+
await step('context-creation', async () => {
|
|
335
|
+
recording = advanceStep(recording, 'context_creation');
|
|
336
|
+
updateRecordingInDb(repos, recording);
|
|
337
|
+
const clusters = repos.clusters.findByRecording(recording.id);
|
|
338
|
+
let totalContexts = 0;
|
|
339
|
+
for (const cluster of clusters) {
|
|
340
|
+
const observations = repos.clusters.getObservations(cluster.id);
|
|
341
|
+
const signals = JSON.parse(cluster.classification || '{}');
|
|
342
|
+
const result = createContextsFromSignals(signals, observations, repos.contexts);
|
|
343
|
+
totalContexts += result.contextIds.length;
|
|
344
|
+
// Link observations to contexts
|
|
345
|
+
for (const link of result.observationLinks) {
|
|
346
|
+
repos.contexts.linkObservation(link.observationId, link.contextId);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
log('info', `Created/linked ${totalContexts} contexts`);
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
// Step: TopicBlock Formation
|
|
353
|
+
if (!shouldSkipStep(recording.processingStep, 'block_formation')) {
|
|
354
|
+
await step('block-formation', async () => {
|
|
355
|
+
recording = advanceStep(recording, 'block_formation');
|
|
356
|
+
updateRecordingInDb(repos, recording);
|
|
357
|
+
// Delete existing topic blocks
|
|
358
|
+
repos.topicBlocks.deleteByRecording(recording.id);
|
|
359
|
+
// Pre-load all context links for this recording to avoid N+1 queries
|
|
360
|
+
const allLinks = repos.contexts.getLinksByRecording(recording.id);
|
|
361
|
+
// Create TopicBlocks from visual clusters (audio merged in)
|
|
362
|
+
const visualClusters = repos.clusters.findByRecordingAndType(recording.id, 'visual');
|
|
363
|
+
for (const cluster of visualClusters) {
|
|
364
|
+
const signals = JSON.parse(cluster.classification || '{}');
|
|
365
|
+
const observations = repos.clusters.getObservations(cluster.id);
|
|
366
|
+
// Get context IDs from pre-loaded links
|
|
367
|
+
const obsIds = new Set(observations.map((o) => o.id));
|
|
368
|
+
const contextIds = new Set();
|
|
369
|
+
for (const link of allLinks) {
|
|
370
|
+
if (obsIds.has(link.observation_id)) {
|
|
371
|
+
contextIds.add(link.context_id);
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
// Get merged audio clusters
|
|
375
|
+
const mergedAudio = repos.clusters.getMergedAudioClusters(cluster.id);
|
|
376
|
+
createTopicBlockFromCluster({
|
|
377
|
+
cluster,
|
|
378
|
+
contextIds: Array.from(contextIds),
|
|
379
|
+
signals,
|
|
380
|
+
mergedAudioClusterIds: mergedAudio.map((a) => a.id),
|
|
381
|
+
}, repos.topicBlocks);
|
|
382
|
+
}
|
|
383
|
+
// Create standalone TopicBlocks for unmerged audio clusters
|
|
384
|
+
const audioClusters = repos.clusters.findByRecordingAndType(recording.id, 'audio');
|
|
385
|
+
for (const cluster of audioClusters) {
|
|
386
|
+
// Check if it was merged
|
|
387
|
+
const isMerged = visualClusters.some((vc) => {
|
|
388
|
+
const mergedAudio = repos.clusters.getMergedAudioClusters(vc.id);
|
|
389
|
+
return mergedAudio.some((ma) => ma.id === cluster.id);
|
|
390
|
+
});
|
|
391
|
+
if (!isMerged) {
|
|
392
|
+
const signals = JSON.parse(cluster.classification || '{}');
|
|
393
|
+
const observations = repos.clusters.getObservations(cluster.id);
|
|
394
|
+
const obsIds = new Set(observations.map((o) => o.id));
|
|
395
|
+
const contextIds = new Set();
|
|
396
|
+
for (const link of allLinks) {
|
|
397
|
+
if (obsIds.has(link.observation_id)) {
|
|
398
|
+
contextIds.add(link.context_id);
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
createTopicBlockFromCluster({
|
|
402
|
+
cluster,
|
|
403
|
+
contextIds: Array.from(contextIds),
|
|
404
|
+
signals,
|
|
405
|
+
}, repos.topicBlocks);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
log('info', `Created topic blocks for ${recording.id}`);
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
// 4. Complete
|
|
413
|
+
recording = completeProcessing(recording);
|
|
414
|
+
updateRecordingInDb(repos, recording);
|
|
415
|
+
log('info', `Successfully processed recording ${recording.id}`);
|
|
416
|
+
}
|
|
417
|
+
catch (error) {
|
|
418
|
+
const message = error.message;
|
|
419
|
+
log('error', `Processing v2 failed for ${recordingId}: ${message}`);
|
|
420
|
+
recording = failProcessing(recording, message);
|
|
421
|
+
updateRecordingInDb(repos, recording);
|
|
422
|
+
throw error;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
async function processAudioPipeline(recording, adapters, options) {
|
|
426
|
+
const observations = [];
|
|
427
|
+
const processSource = async (audioPath, source) => {
|
|
428
|
+
if (!audioPath)
|
|
429
|
+
return;
|
|
430
|
+
log('info', `Processing ${source} audio: ${audioPath}`);
|
|
431
|
+
// VAD
|
|
432
|
+
const { segments, tempDir } = await step(`vad-${source}`, async () => {
|
|
433
|
+
return await adapters.preprocessor.extractSpeechSegments(audioPath, recording.id);
|
|
434
|
+
});
|
|
435
|
+
if (segments.length === 0) {
|
|
436
|
+
log('info', `No speech segments found in ${source} audio`);
|
|
437
|
+
await adapters.preprocessor.cleanup(tempDir);
|
|
438
|
+
return;
|
|
439
|
+
}
|
|
440
|
+
log('info', `Found ${segments.length} segments in ${source} audio`);
|
|
441
|
+
// Transcription
|
|
442
|
+
await step(`transcription-${source}`, async () => {
|
|
443
|
+
let successCount = 0;
|
|
444
|
+
for (const segment of segments) {
|
|
445
|
+
try {
|
|
446
|
+
const text = await adapters.transcription.transcribeSegment(segment.audioPath);
|
|
447
|
+
if (text.length > 0) {
|
|
448
|
+
successCount++;
|
|
449
|
+
observations.push({
|
|
450
|
+
id: generateId(),
|
|
451
|
+
recording_id: recording.id,
|
|
452
|
+
type: 'audio',
|
|
453
|
+
timestamp: segment.start,
|
|
454
|
+
end_timestamp: segment.end,
|
|
455
|
+
text,
|
|
456
|
+
audio_source: source,
|
|
457
|
+
audio_type: 'speech',
|
|
458
|
+
image_path: null,
|
|
459
|
+
ocr_text: null,
|
|
460
|
+
vlm_description: null,
|
|
461
|
+
vlm_raw_response: null,
|
|
462
|
+
activity_type: null,
|
|
463
|
+
apps: null,
|
|
464
|
+
topics: null,
|
|
465
|
+
embedding: null,
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
catch (error) {
|
|
470
|
+
log('warn', `Failed to transcribe segment at ${segment.start}s: ${error.message}`);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
log('info', `Successfully transcribed ${successCount}/${segments.length} segments for ${source}`);
|
|
474
|
+
});
|
|
475
|
+
// Cleanup
|
|
476
|
+
await step(`cleanup-${source}`, async () => {
|
|
477
|
+
await adapters.preprocessor.cleanup(tempDir);
|
|
478
|
+
});
|
|
479
|
+
};
|
|
480
|
+
if (options.parallel) {
|
|
481
|
+
await Promise.all([
|
|
482
|
+
processSource(recording.audioMicPath, 'mic'),
|
|
483
|
+
processSource(recording.audioSystemPath, 'system'),
|
|
484
|
+
]);
|
|
485
|
+
}
|
|
486
|
+
else {
|
|
487
|
+
await processSource(recording.audioMicPath, 'mic');
|
|
488
|
+
await processSource(recording.audioSystemPath, 'system');
|
|
489
|
+
}
|
|
490
|
+
return observations;
|
|
491
|
+
}
|
|
492
|
+
function updateRecordingInDb(repos, recording) {
|
|
493
|
+
repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep, recording.errorMessage);
|
|
494
|
+
}
|