escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,342 @@
1
+ /**
2
+ * Escribano - Subject Grouping Service
3
+ *
4
+ * Groups TopicBlocks into coherent subjects using LLM-based clustering.
5
+ * This is the foundation for the new artifact architecture.
6
+ */
7
+ import { readFileSync } from 'node:fs';
8
+ import { join } from 'node:path';
9
+ const PERSONAL_APPS = new Set([
10
+ 'WhatsApp',
11
+ 'Instagram',
12
+ 'TikTok',
13
+ 'Telegram',
14
+ 'Facebook',
15
+ 'Twitter',
16
+ 'Snapchat',
17
+ 'Discord',
18
+ 'Messenger',
19
+ 'Signal',
20
+ 'FaceTime',
21
+ 'iMessage',
22
+ 'Messages',
23
+ ]);
24
+ const PERSONAL_APP_THRESHOLD = 0.5;
25
+ const SUBJECT_GROUPING_MODEL = process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL || 'qwen3.5:27b';
26
+ export async function groupTopicBlocksIntoSubjects(topicBlocks, intelligence, recordingId) {
27
+ if (topicBlocks.length === 0) {
28
+ return {
29
+ subjects: [],
30
+ personalDuration: 0,
31
+ workDuration: 0,
32
+ };
33
+ }
34
+ const blocksForGrouping = topicBlocks.map(extractBlockForGrouping);
35
+ const prompt = buildGroupingPrompt(blocksForGrouping);
36
+ console.log(`[subject-grouping] Grouping ${topicBlocks.length} blocks into subjects (model: ${SUBJECT_GROUPING_MODEL})`);
37
+ try {
38
+ const response = await intelligence.generateText(prompt, {
39
+ expectJson: false,
40
+ model: SUBJECT_GROUPING_MODEL,
41
+ numPredict: 2000,
42
+ think: false,
43
+ });
44
+ console.log(`[subject-grouping] LLM response (${response.length} chars):\n${response.slice(0, 500)}${response.length > 500 ? '...' : ''}`);
45
+ const grouping = parseGroupingResponse(response, topicBlocks);
46
+ console.log(`[subject-grouping] Parsed ${grouping.groups.length} groups: ${grouping.groups.map((g) => g.label).join(', ')}`);
47
+ const subjects = grouping.groups.map((group, index) => {
48
+ const subjectId = `subject-${recordingId}-${index}`;
49
+ const blocks = group.blockIds
50
+ .map((id) => topicBlocks.find((b) => b.id === id))
51
+ .filter((b) => b !== undefined);
52
+ const totalDuration = blocks.reduce((sum, b) => {
53
+ const classification = parseClassification(b);
54
+ return sum + (classification?.duration ?? 0);
55
+ }, 0);
56
+ const activityBreakdown = {};
57
+ const appsSet = new Set();
58
+ for (const block of blocks) {
59
+ const classification = parseClassification(block);
60
+ if (classification) {
61
+ const activity = classification.activity_type || 'other';
62
+ activityBreakdown[activity] =
63
+ (activityBreakdown[activity] || 0) + (classification.duration ?? 0);
64
+ if (classification.apps) {
65
+ for (const app of classification.apps) {
66
+ appsSet.add(app);
67
+ }
68
+ }
69
+ }
70
+ }
71
+ const isPersonal = detectPersonalSubject(appsSet, activityBreakdown);
72
+ return {
73
+ id: subjectId,
74
+ recordingId,
75
+ label: group.label,
76
+ topicBlockIds: group.blockIds,
77
+ totalDuration,
78
+ activityBreakdown,
79
+ apps: [...appsSet],
80
+ isPersonal,
81
+ };
82
+ });
83
+ const personalDuration = subjects
84
+ .filter((s) => s.isPersonal)
85
+ .reduce((sum, s) => sum + s.totalDuration, 0);
86
+ const workDuration = subjects
87
+ .filter((s) => !s.isPersonal)
88
+ .reduce((sum, s) => sum + s.totalDuration, 0);
89
+ return {
90
+ subjects,
91
+ personalDuration,
92
+ workDuration,
93
+ };
94
+ }
95
+ catch (error) {
96
+ const err = error;
97
+ const errorType = err.name || 'Error';
98
+ const errorMessage = err.message || String(err);
99
+ console.error(`[subject-grouping] LLM grouping failed (${errorType}): ${errorMessage}`);
100
+ if (err.stack) {
101
+ console.error(`[subject-grouping] Stack trace:`, err.stack.split('\n').slice(0, 3).join('\n'));
102
+ }
103
+ return createFallbackGrouping(topicBlocks, recordingId);
104
+ }
105
+ }
106
+ function extractBlockForGrouping(block) {
107
+ const classification = parseClassification(block);
108
+ return {
109
+ id: block.id,
110
+ startTime: classification?.start_time ?? 0,
111
+ endTime: classification?.end_time ?? 0,
112
+ duration: classification?.duration ?? 0,
113
+ activityType: classification?.activity_type || 'other',
114
+ keyDescription: classification?.key_description ?? '',
115
+ apps: classification?.apps ?? [],
116
+ topics: classification?.topics ?? [],
117
+ };
118
+ }
119
+ function parseClassification(block) {
120
+ if (!block.classification)
121
+ return null;
122
+ try {
123
+ return JSON.parse(block.classification);
124
+ }
125
+ catch {
126
+ return null;
127
+ }
128
+ }
129
+ function buildGroupingPrompt(blocks) {
130
+ const blockDescriptions = blocks
131
+ .map((b, i) => {
132
+ return `BLOCK ${i + 1}:
133
+ Time: ${formatTime(b.startTime)} - ${formatTime(b.endTime)} (${formatDuration(b.duration)})
134
+ Activity: ${b.activityType}
135
+ Description: ${b.keyDescription}
136
+ Apps: ${b.apps.join(', ') || 'none'}
137
+ Topics: ${b.topics.join(', ') || 'none'}
138
+ ID: ${b.id}`;
139
+ })
140
+ .join('\n\n');
141
+ const blockIdList = blocks.map((b) => b.id);
142
+ const exampleBlockIds = blockIdList.length >= 2
143
+ ? `"${blockIdList[0]}", "${blockIdList[1]}"`
144
+ : `"${blockIdList[0]}"`;
145
+ let template;
146
+ try {
147
+ const promptPath = join(process.cwd(), 'prompts', 'subject-grouping.md');
148
+ template = readFileSync(promptPath, 'utf-8');
149
+ }
150
+ catch {
151
+ // Fallback inline prompt if file not found
152
+ template = `You are analyzing a work session that has been divided into {{BLOCK_COUNT}} segments (TopicBlocks).
153
+
154
+ Your task is to group these segments into 1-6 coherent SUBJECTS. A subject represents a distinct thread of work (e.g., "Escribano pipeline optimization", "Personal time", "Email and admin", "Research on competitors").
155
+
156
+ GROUPING RULES:
157
+ 1. Group segments that belong to the same work thread, even if they're not consecutive in time
158
+ 2. Personal activities (WhatsApp, Instagram, social media, personal calls) should be grouped into a "Personal" subject
159
+ 3. Email/calendar/admin is only its own group when email IS the primary activity — not just because an email app was open in the background
160
+ 4. Deep work on the same project/codebase should be grouped together
161
+ 5. Research sessions should be grouped separately from coding sessions unless clearly related
162
+
163
+ RULE PRIORITY (when in doubt):
164
+ - Classify by primary ACTIVITY TYPE and project context, not by which apps happened to be open
165
+ - If all segments are about the same project, one group is correct — do not invent artificial splits
166
+
167
+ SEGMENTS TO GROUP:
168
+ {{BLOCK_DESCRIPTIONS}}
169
+
170
+ For each group, output ONE line in this EXACT format:
171
+ Group 1: label: [Descriptive subject name] | blockIds: [uuid1, uuid2, uuid3]
172
+
173
+ Example output:
174
+ Group 1: label: Escribano VLM Integration | blockIds: [{{EXAMPLE_BLOCK_IDS}}]
175
+
176
+ CRITICAL REQUIREMENTS:
177
+ - Each group MUST have "label" and "blockIds"
178
+ - Block IDs are the UUIDs shown in each BLOCK above (copy them exactly)
179
+ - Include ALL {{BLOCK_COUNT}} block IDs across all groups (every block must be assigned exactly once)
180
+ - Create 1-6 groups (one group is fine if all work is the same project)
181
+ - Use clear, descriptive labels for each subject
182
+ - Output ONLY the group lines — no explanation, no preamble, no markdown`;
183
+ }
184
+ // Replace template variables
185
+ return template
186
+ .replaceAll('{{BLOCK_COUNT}}', String(blocks.length))
187
+ .replace('{{BLOCK_DESCRIPTIONS}}', blockDescriptions)
188
+ .replace('{{EXAMPLE_BLOCK_IDS}}', exampleBlockIds);
189
+ }
190
+ function parseGroupingResponse(response, topicBlocks) {
191
+ const validBlockIds = new Set(topicBlocks.map((b) => b.id));
192
+ const groups = [];
193
+ const lines = response.split('\n').filter((line) => line.trim());
194
+ const groupRegex = /^Group\s+\d+:\s*label:\s*(.+?)\s*\|\s*blockIds:\s*\[(.+?)\]$/i;
195
+ let matchedLines = 0;
196
+ for (const line of lines) {
197
+ const match = line.match(groupRegex);
198
+ if (!match)
199
+ continue;
200
+ matchedLines++;
201
+ const label = match[1].trim();
202
+ const blockIdsStr = match[2].trim();
203
+ const blockIds = blockIdsStr
204
+ .split(',')
205
+ .map((id) => id.trim().replace(/^["']|["']$/g, ''))
206
+ .filter((id) => validBlockIds.has(id));
207
+ console.log(`[subject-grouping] Parsed group "${label}": ${blockIds.length}/${blockIdsStr.split(',').length} valid block IDs`);
208
+ if (blockIds.length > 0 && label) {
209
+ groups.push({ label, blockIds });
210
+ }
211
+ }
212
+ if (groups.length === 0) {
213
+ console.error(`[subject-grouping] Failed to parse any groups from ${lines.length} lines (${matchedLines} matched regex)`);
214
+ throw new Error(`No valid groups found in response. Matched ${matchedLines}/${lines.length} lines.`);
215
+ }
216
+ return { groups };
217
+ }
218
+ function detectPersonalSubject(apps, activityBreakdown) {
219
+ let personalAppCount = 0;
220
+ let totalAppCount = 0;
221
+ for (const app of apps) {
222
+ totalAppCount++;
223
+ if (PERSONAL_APPS.has(app)) {
224
+ personalAppCount++;
225
+ }
226
+ }
227
+ if (totalAppCount === 0)
228
+ return false;
229
+ return personalAppCount / totalAppCount >= PERSONAL_APP_THRESHOLD;
230
+ }
231
+ function createFallbackGrouping(topicBlocks, recordingId) {
232
+ if (topicBlocks.length === 0) {
233
+ return {
234
+ subjects: [],
235
+ personalDuration: 0,
236
+ workDuration: 0,
237
+ };
238
+ }
239
+ const subjects = [];
240
+ let currentSubject = null;
241
+ const sortedBlocks = [...topicBlocks].sort((a, b) => {
242
+ const aClass = parseClassification(a);
243
+ const bClass = parseClassification(b);
244
+ return (aClass?.start_time ?? 0) - (bClass?.start_time ?? 0);
245
+ });
246
+ for (const block of sortedBlocks) {
247
+ const classification = parseClassification(block);
248
+ if (!classification)
249
+ continue;
250
+ const apps = classification.apps || [];
251
+ const isPersonal = apps.some((app) => PERSONAL_APPS.has(app));
252
+ if (!currentSubject) {
253
+ currentSubject = {
254
+ label: isPersonal ? 'Personal' : 'Work Session',
255
+ blocks: [],
256
+ apps: new Set(),
257
+ activities: {},
258
+ };
259
+ }
260
+ const shouldStartNewSubject = (isPersonal && currentSubject.label !== 'Personal') ||
261
+ (!isPersonal && currentSubject.label === 'Personal');
262
+ if (shouldStartNewSubject) {
263
+ subjects.push(finalizeSubject(currentSubject, recordingId, subjects.length));
264
+ currentSubject = {
265
+ label: isPersonal ? 'Personal' : 'Work Session',
266
+ blocks: [],
267
+ apps: new Set(),
268
+ activities: {},
269
+ };
270
+ }
271
+ currentSubject.blocks.push(block);
272
+ for (const app of apps) {
273
+ currentSubject.apps.add(app);
274
+ }
275
+ const activity = classification.activity_type || 'other';
276
+ currentSubject.activities[activity] =
277
+ (currentSubject.activities[activity] || 0) +
278
+ (classification.duration ?? 0);
279
+ }
280
+ if (currentSubject && currentSubject.blocks.length > 0) {
281
+ subjects.push(finalizeSubject(currentSubject, recordingId, subjects.length));
282
+ }
283
+ const personalDuration = subjects
284
+ .filter((s) => s.isPersonal)
285
+ .reduce((sum, s) => sum + s.totalDuration, 0);
286
+ const workDuration = subjects
287
+ .filter((s) => !s.isPersonal)
288
+ .reduce((sum, s) => sum + s.totalDuration, 0);
289
+ return { subjects, personalDuration, workDuration };
290
+ }
291
+ function finalizeSubject(subject, recordingId, index) {
292
+ const totalDuration = subject.blocks.reduce((sum, b) => {
293
+ const classification = parseClassification(b);
294
+ return sum + (classification?.duration ?? 0);
295
+ }, 0);
296
+ return {
297
+ id: `subject-${recordingId}-${index}`,
298
+ recordingId,
299
+ label: subject.label,
300
+ topicBlockIds: subject.blocks.map((b) => b.id),
301
+ totalDuration,
302
+ activityBreakdown: subject.activities,
303
+ apps: [...subject.apps],
304
+ isPersonal: subject.label === 'Personal',
305
+ };
306
+ }
307
+ function formatTime(seconds) {
308
+ const mins = Math.floor(seconds / 60);
309
+ const secs = Math.floor(seconds % 60);
310
+ return `${mins}:${secs.toString().padStart(2, '0')}`;
311
+ }
312
+ function formatDuration(seconds) {
313
+ const hours = Math.floor(seconds / 3600);
314
+ const mins = Math.floor((seconds % 3600) / 60);
315
+ if (hours > 0) {
316
+ return `${hours}h ${mins}m`;
317
+ }
318
+ if (mins > 0) {
319
+ return `${mins}m`;
320
+ }
321
+ return `${Math.floor(seconds)}s`;
322
+ }
323
+ export function saveSubjectsToDatabase(subjects, recordingId, repos) {
324
+ const subjectInserts = [];
325
+ const links = [];
326
+ for (const subject of subjects) {
327
+ subjectInserts.push({
328
+ id: subject.id,
329
+ recording_id: subject.recordingId,
330
+ label: subject.label,
331
+ is_personal: subject.isPersonal ? 1 : 0,
332
+ duration: subject.totalDuration,
333
+ activity_breakdown: JSON.stringify(subject.activityBreakdown),
334
+ metadata: JSON.stringify({ apps: subject.apps }),
335
+ });
336
+ for (const blockId of subject.topicBlockIds) {
337
+ links.push({ subjectId: subject.id, topicBlockId: blockId });
338
+ }
339
+ }
340
+ repos.subjects.saveBatch(subjectInserts);
341
+ repos.subjects.linkTopicBlocksBatch(links);
342
+ }
@@ -0,0 +1,99 @@
1
+ /**
2
+ * Escribano - Temporal Audio Alignment Service
3
+ *
4
+ * Attaches audio transcripts to visual segments based on timestamp overlap.
5
+ * Replaces semantic similarity-based merge with deterministic temporal alignment.
6
+ */
7
+ const DEFAULT_CONFIG = {
8
+ minOverlapSeconds: 1,
9
+ preferredSource: 'mic',
10
+ };
11
+ /**
12
+ * Check if two time ranges overlap.
13
+ *
14
+ * @param start1 - Start of range 1
15
+ * @param end1 - End of range 1
16
+ * @param start2 - Start of range 2
17
+ * @param end2 - End of range 2
18
+ * @returns True if ranges overlap
19
+ */
20
+ function rangesOverlap(start1, end1, start2, end2) {
21
+ return start1 < end2 && start2 < end1;
22
+ }
23
+ /**
24
+ * Calculate overlap duration between two time ranges.
25
+ *
26
+ * @param start1 - Start of range 1
27
+ * @param end1 - End of range 1
28
+ * @param start2 - Start of range 2
29
+ * @param end2 - End of range 2
30
+ * @returns Overlap duration in seconds (0 if no overlap)
31
+ */
32
+ function calculateOverlap(start1, end1, start2, end2) {
33
+ if (!rangesOverlap(start1, end1, start2, end2)) {
34
+ return 0;
35
+ }
36
+ const overlapStart = Math.max(start1, start2);
37
+ const overlapEnd = Math.min(end1, end2);
38
+ return Math.max(0, overlapEnd - overlapStart);
39
+ }
40
+ /**
41
+ * Align audio transcripts to visual segments based on temporal overlap.
42
+ *
43
+ * @param segments - Visual segments from activity segmentation
44
+ * @param audioObservations - Audio observations with transcripts
45
+ * @param config - Alignment configuration
46
+ * @returns Segments enriched with aligned transcripts
47
+ */
48
+ export function alignAudioToSegments(segments, audioObservations, config = {}) {
49
+ const cfg = { ...DEFAULT_CONFIG, ...config };
50
+ // Filter to audio observations with transcripts
51
+ const audioTranscripts = audioObservations
52
+ .filter((o) => o.type === 'audio' && o.text && o.text.trim().length > 0)
53
+ .map((o) => ({
54
+ source: o.audio_source,
55
+ text: o.text,
56
+ startTime: o.timestamp,
57
+ endTime: o.end_timestamp ?? o.timestamp + 5, // Default 5s if no end time
58
+ }))
59
+ .sort((a, b) => a.startTime - b.startTime);
60
+ // Enrich each segment with overlapping transcripts
61
+ return segments.map((segment) => {
62
+ const overlappingTranscripts = [];
63
+ for (const transcript of audioTranscripts) {
64
+ const overlap = calculateOverlap(segment.startTime, segment.endTime, transcript.startTime, transcript.endTime);
65
+ if (overlap >= cfg.minOverlapSeconds) {
66
+ overlappingTranscripts.push(transcript);
67
+ }
68
+ }
69
+ // Combine transcripts in chronological order
70
+ const combinedTranscript = overlappingTranscripts
71
+ .sort((a, b) => a.startTime - b.startTime)
72
+ .map((t) => `[${t.source.toUpperCase()}] ${t.text}`)
73
+ .join('\n');
74
+ return {
75
+ ...segment,
76
+ transcripts: overlappingTranscripts,
77
+ combinedTranscript,
78
+ };
79
+ });
80
+ }
81
+ /**
82
+ * Get statistics about audio alignment.
83
+ */
84
+ export function getAlignmentStats(enrichedSegments) {
85
+ const segmentsWithAudio = enrichedSegments.filter((s) => s.transcripts.length > 0).length;
86
+ const totalTranscriptSegments = enrichedSegments.reduce((sum, s) => sum + s.transcripts.length, 0);
87
+ const micTranscriptCount = enrichedSegments.reduce((sum, s) => sum + s.transcripts.filter((t) => t.source === 'mic').length, 0);
88
+ const systemTranscriptCount = enrichedSegments.reduce((sum, s) => sum + s.transcripts.filter((t) => t.source === 'system').length, 0);
89
+ return {
90
+ totalSegments: enrichedSegments.length,
91
+ segmentsWithAudio,
92
+ totalTranscriptSegments,
93
+ micTranscriptCount,
94
+ systemTranscriptCount,
95
+ avgTranscriptsPerSegment: enrichedSegments.length > 0
96
+ ? totalTranscriptSegments / enrichedSegments.length
97
+ : 0,
98
+ };
99
+ }
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Escribano - VLM Enrichment Service
3
+ * @deprecated V2 pipeline - uses old clustering approach. Use V3 pipeline instead.
4
+ *
5
+ * Selects representative frames from clusters and describes them with a vision model.
6
+ */
7
+ const DEFAULT_CONFIG = {
8
+ maxFramesPerCluster: 5,
9
+ minOcrLength: 100,
10
+ visionModel: 'qwen3-vl-8b',
11
+ };
12
+ /**
13
+ * Select representative frames for VLM description.
14
+ *
15
+ * Strategy:
16
+ * 1. Always include first and last frame (boundaries)
17
+ * 2. Include frames with low OCR quality (< minOcrLength chars)
18
+ * 3. Sample at regular intervals based on cluster duration
19
+ * 4. Cap at maxFramesPerCluster
20
+ */
21
+ export function selectFramesForVLM(observations, config = {}) {
22
+ const cfg = { ...DEFAULT_CONFIG, ...config };
23
+ if (observations.length === 0)
24
+ return [];
25
+ const visualObs = observations
26
+ .filter((o) => o.type === 'visual' && o.image_path)
27
+ .sort((a, b) => a.timestamp - b.timestamp);
28
+ if (visualObs.length === 0)
29
+ return [];
30
+ const selected = [];
31
+ const selectedIds = new Set();
32
+ const addFrame = (obs, reason) => {
33
+ if (!selectedIds.has(obs.id)) {
34
+ selectedIds.add(obs.id);
35
+ selected.push({ observation: obs, reason });
36
+ }
37
+ };
38
+ // 1. Boundaries
39
+ addFrame(visualObs[0], 'boundary');
40
+ if (visualObs.length > 1) {
41
+ addFrame(visualObs[visualObs.length - 1], 'boundary');
42
+ }
43
+ // 2. Low OCR quality frames
44
+ const lowOcrFrames = visualObs.filter((o) => (o.ocr_text?.length ?? 0) < cfg.minOcrLength && !selectedIds.has(o.id));
45
+ for (const frame of lowOcrFrames.slice(0, 3)) {
46
+ addFrame(frame, 'low_ocr');
47
+ }
48
+ // 3. Interval sampling (if still below max)
49
+ if (selected.length < cfg.maxFramesPerCluster && visualObs.length > 2) {
50
+ const duration = visualObs[visualObs.length - 1].timestamp - visualObs[0].timestamp;
51
+ const intervalSeconds = duration / (cfg.maxFramesPerCluster - selected.length + 1);
52
+ let nextTarget = visualObs[0].timestamp + intervalSeconds;
53
+ for (const obs of visualObs) {
54
+ if (selected.length >= cfg.maxFramesPerCluster)
55
+ break;
56
+ if (obs.timestamp >= nextTarget && !selectedIds.has(obs.id)) {
57
+ addFrame(obs, 'interval');
58
+ nextTarget += intervalSeconds;
59
+ }
60
+ }
61
+ }
62
+ return selected.slice(0, cfg.maxFramesPerCluster);
63
+ }
64
+ /**
65
+ * Describe selected frames using VLM.
66
+ */
67
+ export async function describeFrames(frames, intelligence) {
68
+ const results = new Map();
69
+ if (frames.length === 0)
70
+ return results;
71
+ const images = frames.map((f) => ({
72
+ imagePath: f.observation.image_path,
73
+ clusterId: 0, // Not used in our case
74
+ timestamp: f.observation.timestamp,
75
+ }));
76
+ const descriptions = await intelligence.describeImages(images);
77
+ for (const [index, frame] of frames.entries()) {
78
+ const desc = descriptions[index];
79
+ if (desc?.description) {
80
+ results.set(frame.observation.id, desc.description);
81
+ }
82
+ }
83
+ return results;
84
+ }
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Escribano - VLM Service
3
+ *
4
+ * Orchestrates sequential VLM inference for frame descriptions.
5
+ * Each frame is processed individually for accurate image-description mapping.
6
+ */
7
+ const DEFAULT_CONFIG = {
8
+ model: process.env.ESCRIBANO_VLM_MODEL || 'qwen3-vl:4b',
9
+ };
10
+ /**
11
+ * Process sampled frames through VLM sequentially (one image at a time).
12
+ *
13
+ * @param frames - Sampled frames from adaptiveSample()
14
+ * @param intelligence - Intelligence service with describeImages
15
+ * @param config - Processing configuration
16
+ * @returns Array of frame descriptions with VLM analysis
17
+ */
18
+ export async function describeFrames(frames, intelligence, config = {}) {
19
+ const cfg = { ...DEFAULT_CONFIG, ...config };
20
+ if (frames.length === 0) {
21
+ console.log('[VLM] No frames to process');
22
+ return [];
23
+ }
24
+ const total = frames.length;
25
+ console.log(`[VLM] Processing ${total} frames sequentially...`);
26
+ console.log(`[VLM] Model: ${cfg.model}`);
27
+ // Prepare input for intelligence service
28
+ const images = frames.map((f) => ({
29
+ imagePath: f.imagePath,
30
+ timestamp: f.timestamp,
31
+ }));
32
+ // Call the sequential VLM API with per-image callback
33
+ const results = await intelligence.describeImages(images, {
34
+ model: cfg.model,
35
+ recordingId: cfg.recordingId,
36
+ onImageProcessed: cfg.onImageProcessed,
37
+ });
38
+ console.log(`\n[VLM] Completed ${results.length}/${total} frames`);
39
+ return results;
40
+ }
41
+ /** @deprecated Use describeFrames instead */
42
+ export const batchDescribeFrames = describeFrames;
43
+ /**
44
+ * Normalize activity labels to canonical forms.
45
+ * Allows VLM flexibility while maintaining consistency.
46
+ */
47
+ export function normalizeActivity(rawActivity) {
48
+ const lower = rawActivity.toLowerCase().trim();
49
+ const synonyms = {
50
+ // Debugging
51
+ debugging: 'debugging',
52
+ 'fixing bug': 'debugging',
53
+ 'investigating error': 'debugging',
54
+ troubleshooting: 'debugging',
55
+ 'reading error': 'debugging',
56
+ 'stack trace': 'debugging',
57
+ // Coding
58
+ coding: 'coding',
59
+ 'writing code': 'coding',
60
+ implementing: 'coding',
61
+ developing: 'coding',
62
+ programming: 'coding',
63
+ // Reading
64
+ reading: 'reading',
65
+ 'reading docs': 'reading',
66
+ documentation: 'reading',
67
+ 'reading documentation': 'reading',
68
+ // Research
69
+ research: 'research',
70
+ browsing: 'research',
71
+ searching: 'research',
72
+ watching: 'research',
73
+ 'stack overflow': 'research',
74
+ googling: 'research',
75
+ // Meeting
76
+ meeting: 'meeting',
77
+ 'video call': 'meeting',
78
+ zoom: 'meeting',
79
+ 'google meet': 'meeting',
80
+ 'screen share': 'meeting',
81
+ // Terminal
82
+ terminal: 'terminal',
83
+ 'command line': 'terminal',
84
+ cli: 'terminal',
85
+ shell: 'terminal',
86
+ // Code Review
87
+ review: 'review',
88
+ reviewing: 'review',
89
+ 'code review': 'review',
90
+ 'reviewing pr': 'review',
91
+ 'pull request': 'review',
92
+ };
93
+ // Check exact match
94
+ if (synonyms[lower]) {
95
+ return synonyms[lower];
96
+ }
97
+ // Check partial match
98
+ for (const [pattern, normalized] of Object.entries(synonyms)) {
99
+ if (lower.includes(pattern)) {
100
+ return normalized;
101
+ }
102
+ }
103
+ // Return as-is if no match (allows new activities to emerge)
104
+ return lower.replace(/\s+/g, '_');
105
+ }
106
+ /**
107
+ * Get statistics about VLM processing results.
108
+ */
109
+ export function getVLMStats(descriptions) {
110
+ const activityCounts = {};
111
+ const apps = new Set();
112
+ const topics = new Set();
113
+ for (const desc of descriptions) {
114
+ const normalized = normalizeActivity(desc.activity);
115
+ activityCounts[normalized] = (activityCounts[normalized] || 0) + 1;
116
+ desc.apps.forEach((app) => {
117
+ apps.add(app);
118
+ });
119
+ desc.topics.forEach((topic) => {
120
+ topics.add(topic);
121
+ });
122
+ }
123
+ return {
124
+ totalFrames: descriptions.length,
125
+ uniqueActivities: Object.keys(activityCounts),
126
+ activityCounts,
127
+ uniqueApps: Array.from(apps),
128
+ uniqueTopics: Array.from(topics),
129
+ };
130
+ }
@@ -0,0 +1,3 @@
1
+ export { cancelCurrentRun, getCurrentRunId, setupStatsObserver, } from './observer.js';
2
+ export { createStatsRepository } from './repository.js';
3
+ export { ResourceTracker } from './resource-tracker.js';