escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Subject Grouping Service
|
|
3
|
+
*
|
|
4
|
+
* Groups TopicBlocks into coherent subjects using LLM-based clustering.
|
|
5
|
+
* This is the foundation for the new artifact architecture.
|
|
6
|
+
*/
|
|
7
|
+
import { readFileSync } from 'node:fs';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
const PERSONAL_APPS = new Set([
|
|
10
|
+
'WhatsApp',
|
|
11
|
+
'Instagram',
|
|
12
|
+
'TikTok',
|
|
13
|
+
'Telegram',
|
|
14
|
+
'Facebook',
|
|
15
|
+
'Twitter',
|
|
16
|
+
'Snapchat',
|
|
17
|
+
'Discord',
|
|
18
|
+
'Messenger',
|
|
19
|
+
'Signal',
|
|
20
|
+
'FaceTime',
|
|
21
|
+
'iMessage',
|
|
22
|
+
'Messages',
|
|
23
|
+
]);
|
|
24
|
+
const PERSONAL_APP_THRESHOLD = 0.5;
|
|
25
|
+
const SUBJECT_GROUPING_MODEL = process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL || 'qwen3.5:27b';
|
|
26
|
+
export async function groupTopicBlocksIntoSubjects(topicBlocks, intelligence, recordingId) {
|
|
27
|
+
if (topicBlocks.length === 0) {
|
|
28
|
+
return {
|
|
29
|
+
subjects: [],
|
|
30
|
+
personalDuration: 0,
|
|
31
|
+
workDuration: 0,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
const blocksForGrouping = topicBlocks.map(extractBlockForGrouping);
|
|
35
|
+
const prompt = buildGroupingPrompt(blocksForGrouping);
|
|
36
|
+
console.log(`[subject-grouping] Grouping ${topicBlocks.length} blocks into subjects (model: ${SUBJECT_GROUPING_MODEL})`);
|
|
37
|
+
try {
|
|
38
|
+
const response = await intelligence.generateText(prompt, {
|
|
39
|
+
expectJson: false,
|
|
40
|
+
model: SUBJECT_GROUPING_MODEL,
|
|
41
|
+
numPredict: 2000,
|
|
42
|
+
think: false,
|
|
43
|
+
});
|
|
44
|
+
console.log(`[subject-grouping] LLM response (${response.length} chars):\n${response.slice(0, 500)}${response.length > 500 ? '...' : ''}`);
|
|
45
|
+
const grouping = parseGroupingResponse(response, topicBlocks);
|
|
46
|
+
console.log(`[subject-grouping] Parsed ${grouping.groups.length} groups: ${grouping.groups.map((g) => g.label).join(', ')}`);
|
|
47
|
+
const subjects = grouping.groups.map((group, index) => {
|
|
48
|
+
const subjectId = `subject-${recordingId}-${index}`;
|
|
49
|
+
const blocks = group.blockIds
|
|
50
|
+
.map((id) => topicBlocks.find((b) => b.id === id))
|
|
51
|
+
.filter((b) => b !== undefined);
|
|
52
|
+
const totalDuration = blocks.reduce((sum, b) => {
|
|
53
|
+
const classification = parseClassification(b);
|
|
54
|
+
return sum + (classification?.duration ?? 0);
|
|
55
|
+
}, 0);
|
|
56
|
+
const activityBreakdown = {};
|
|
57
|
+
const appsSet = new Set();
|
|
58
|
+
for (const block of blocks) {
|
|
59
|
+
const classification = parseClassification(block);
|
|
60
|
+
if (classification) {
|
|
61
|
+
const activity = classification.activity_type || 'other';
|
|
62
|
+
activityBreakdown[activity] =
|
|
63
|
+
(activityBreakdown[activity] || 0) + (classification.duration ?? 0);
|
|
64
|
+
if (classification.apps) {
|
|
65
|
+
for (const app of classification.apps) {
|
|
66
|
+
appsSet.add(app);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
const isPersonal = detectPersonalSubject(appsSet, activityBreakdown);
|
|
72
|
+
return {
|
|
73
|
+
id: subjectId,
|
|
74
|
+
recordingId,
|
|
75
|
+
label: group.label,
|
|
76
|
+
topicBlockIds: group.blockIds,
|
|
77
|
+
totalDuration,
|
|
78
|
+
activityBreakdown,
|
|
79
|
+
apps: [...appsSet],
|
|
80
|
+
isPersonal,
|
|
81
|
+
};
|
|
82
|
+
});
|
|
83
|
+
const personalDuration = subjects
|
|
84
|
+
.filter((s) => s.isPersonal)
|
|
85
|
+
.reduce((sum, s) => sum + s.totalDuration, 0);
|
|
86
|
+
const workDuration = subjects
|
|
87
|
+
.filter((s) => !s.isPersonal)
|
|
88
|
+
.reduce((sum, s) => sum + s.totalDuration, 0);
|
|
89
|
+
return {
|
|
90
|
+
subjects,
|
|
91
|
+
personalDuration,
|
|
92
|
+
workDuration,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
const err = error;
|
|
97
|
+
const errorType = err.name || 'Error';
|
|
98
|
+
const errorMessage = err.message || String(err);
|
|
99
|
+
console.error(`[subject-grouping] LLM grouping failed (${errorType}): ${errorMessage}`);
|
|
100
|
+
if (err.stack) {
|
|
101
|
+
console.error(`[subject-grouping] Stack trace:`, err.stack.split('\n').slice(0, 3).join('\n'));
|
|
102
|
+
}
|
|
103
|
+
return createFallbackGrouping(topicBlocks, recordingId);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
function extractBlockForGrouping(block) {
|
|
107
|
+
const classification = parseClassification(block);
|
|
108
|
+
return {
|
|
109
|
+
id: block.id,
|
|
110
|
+
startTime: classification?.start_time ?? 0,
|
|
111
|
+
endTime: classification?.end_time ?? 0,
|
|
112
|
+
duration: classification?.duration ?? 0,
|
|
113
|
+
activityType: classification?.activity_type || 'other',
|
|
114
|
+
keyDescription: classification?.key_description ?? '',
|
|
115
|
+
apps: classification?.apps ?? [],
|
|
116
|
+
topics: classification?.topics ?? [],
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
function parseClassification(block) {
|
|
120
|
+
if (!block.classification)
|
|
121
|
+
return null;
|
|
122
|
+
try {
|
|
123
|
+
return JSON.parse(block.classification);
|
|
124
|
+
}
|
|
125
|
+
catch {
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
function buildGroupingPrompt(blocks) {
|
|
130
|
+
const blockDescriptions = blocks
|
|
131
|
+
.map((b, i) => {
|
|
132
|
+
return `BLOCK ${i + 1}:
|
|
133
|
+
Time: ${formatTime(b.startTime)} - ${formatTime(b.endTime)} (${formatDuration(b.duration)})
|
|
134
|
+
Activity: ${b.activityType}
|
|
135
|
+
Description: ${b.keyDescription}
|
|
136
|
+
Apps: ${b.apps.join(', ') || 'none'}
|
|
137
|
+
Topics: ${b.topics.join(', ') || 'none'}
|
|
138
|
+
ID: ${b.id}`;
|
|
139
|
+
})
|
|
140
|
+
.join('\n\n');
|
|
141
|
+
const blockIdList = blocks.map((b) => b.id);
|
|
142
|
+
const exampleBlockIds = blockIdList.length >= 2
|
|
143
|
+
? `"${blockIdList[0]}", "${blockIdList[1]}"`
|
|
144
|
+
: `"${blockIdList[0]}"`;
|
|
145
|
+
let template;
|
|
146
|
+
try {
|
|
147
|
+
const promptPath = join(process.cwd(), 'prompts', 'subject-grouping.md');
|
|
148
|
+
template = readFileSync(promptPath, 'utf-8');
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
// Fallback inline prompt if file not found
|
|
152
|
+
template = `You are analyzing a work session that has been divided into {{BLOCK_COUNT}} segments (TopicBlocks).
|
|
153
|
+
|
|
154
|
+
Your task is to group these segments into 1-6 coherent SUBJECTS. A subject represents a distinct thread of work (e.g., "Escribano pipeline optimization", "Personal time", "Email and admin", "Research on competitors").
|
|
155
|
+
|
|
156
|
+
GROUPING RULES:
|
|
157
|
+
1. Group segments that belong to the same work thread, even if they're not consecutive in time
|
|
158
|
+
2. Personal activities (WhatsApp, Instagram, social media, personal calls) should be grouped into a "Personal" subject
|
|
159
|
+
3. Email/calendar/admin is only its own group when email IS the primary activity — not just because an email app was open in the background
|
|
160
|
+
4. Deep work on the same project/codebase should be grouped together
|
|
161
|
+
5. Research sessions should be grouped separately from coding sessions unless clearly related
|
|
162
|
+
|
|
163
|
+
RULE PRIORITY (when in doubt):
|
|
164
|
+
- Classify by primary ACTIVITY TYPE and project context, not by which apps happened to be open
|
|
165
|
+
- If all segments are about the same project, one group is correct — do not invent artificial splits
|
|
166
|
+
|
|
167
|
+
SEGMENTS TO GROUP:
|
|
168
|
+
{{BLOCK_DESCRIPTIONS}}
|
|
169
|
+
|
|
170
|
+
For each group, output ONE line in this EXACT format:
|
|
171
|
+
Group 1: label: [Descriptive subject name] | blockIds: [uuid1, uuid2, uuid3]
|
|
172
|
+
|
|
173
|
+
Example output:
|
|
174
|
+
Group 1: label: Escribano VLM Integration | blockIds: [{{EXAMPLE_BLOCK_IDS}}]
|
|
175
|
+
|
|
176
|
+
CRITICAL REQUIREMENTS:
|
|
177
|
+
- Each group MUST have "label" and "blockIds"
|
|
178
|
+
- Block IDs are the UUIDs shown in each BLOCK above (copy them exactly)
|
|
179
|
+
- Include ALL {{BLOCK_COUNT}} block IDs across all groups (every block must be assigned exactly once)
|
|
180
|
+
- Create 1-6 groups (one group is fine if all work is the same project)
|
|
181
|
+
- Use clear, descriptive labels for each subject
|
|
182
|
+
- Output ONLY the group lines — no explanation, no preamble, no markdown`;
|
|
183
|
+
}
|
|
184
|
+
// Replace template variables
|
|
185
|
+
return template
|
|
186
|
+
.replaceAll('{{BLOCK_COUNT}}', String(blocks.length))
|
|
187
|
+
.replace('{{BLOCK_DESCRIPTIONS}}', blockDescriptions)
|
|
188
|
+
.replace('{{EXAMPLE_BLOCK_IDS}}', exampleBlockIds);
|
|
189
|
+
}
|
|
190
|
+
function parseGroupingResponse(response, topicBlocks) {
|
|
191
|
+
const validBlockIds = new Set(topicBlocks.map((b) => b.id));
|
|
192
|
+
const groups = [];
|
|
193
|
+
const lines = response.split('\n').filter((line) => line.trim());
|
|
194
|
+
const groupRegex = /^Group\s+\d+:\s*label:\s*(.+?)\s*\|\s*blockIds:\s*\[(.+?)\]$/i;
|
|
195
|
+
let matchedLines = 0;
|
|
196
|
+
for (const line of lines) {
|
|
197
|
+
const match = line.match(groupRegex);
|
|
198
|
+
if (!match)
|
|
199
|
+
continue;
|
|
200
|
+
matchedLines++;
|
|
201
|
+
const label = match[1].trim();
|
|
202
|
+
const blockIdsStr = match[2].trim();
|
|
203
|
+
const blockIds = blockIdsStr
|
|
204
|
+
.split(',')
|
|
205
|
+
.map((id) => id.trim().replace(/^["']|["']$/g, ''))
|
|
206
|
+
.filter((id) => validBlockIds.has(id));
|
|
207
|
+
console.log(`[subject-grouping] Parsed group "${label}": ${blockIds.length}/${blockIdsStr.split(',').length} valid block IDs`);
|
|
208
|
+
if (blockIds.length > 0 && label) {
|
|
209
|
+
groups.push({ label, blockIds });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
if (groups.length === 0) {
|
|
213
|
+
console.error(`[subject-grouping] Failed to parse any groups from ${lines.length} lines (${matchedLines} matched regex)`);
|
|
214
|
+
throw new Error(`No valid groups found in response. Matched ${matchedLines}/${lines.length} lines.`);
|
|
215
|
+
}
|
|
216
|
+
return { groups };
|
|
217
|
+
}
|
|
218
|
+
function detectPersonalSubject(apps, activityBreakdown) {
|
|
219
|
+
let personalAppCount = 0;
|
|
220
|
+
let totalAppCount = 0;
|
|
221
|
+
for (const app of apps) {
|
|
222
|
+
totalAppCount++;
|
|
223
|
+
if (PERSONAL_APPS.has(app)) {
|
|
224
|
+
personalAppCount++;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
if (totalAppCount === 0)
|
|
228
|
+
return false;
|
|
229
|
+
return personalAppCount / totalAppCount >= PERSONAL_APP_THRESHOLD;
|
|
230
|
+
}
|
|
231
|
+
function createFallbackGrouping(topicBlocks, recordingId) {
|
|
232
|
+
if (topicBlocks.length === 0) {
|
|
233
|
+
return {
|
|
234
|
+
subjects: [],
|
|
235
|
+
personalDuration: 0,
|
|
236
|
+
workDuration: 0,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
const subjects = [];
|
|
240
|
+
let currentSubject = null;
|
|
241
|
+
const sortedBlocks = [...topicBlocks].sort((a, b) => {
|
|
242
|
+
const aClass = parseClassification(a);
|
|
243
|
+
const bClass = parseClassification(b);
|
|
244
|
+
return (aClass?.start_time ?? 0) - (bClass?.start_time ?? 0);
|
|
245
|
+
});
|
|
246
|
+
for (const block of sortedBlocks) {
|
|
247
|
+
const classification = parseClassification(block);
|
|
248
|
+
if (!classification)
|
|
249
|
+
continue;
|
|
250
|
+
const apps = classification.apps || [];
|
|
251
|
+
const isPersonal = apps.some((app) => PERSONAL_APPS.has(app));
|
|
252
|
+
if (!currentSubject) {
|
|
253
|
+
currentSubject = {
|
|
254
|
+
label: isPersonal ? 'Personal' : 'Work Session',
|
|
255
|
+
blocks: [],
|
|
256
|
+
apps: new Set(),
|
|
257
|
+
activities: {},
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
const shouldStartNewSubject = (isPersonal && currentSubject.label !== 'Personal') ||
|
|
261
|
+
(!isPersonal && currentSubject.label === 'Personal');
|
|
262
|
+
if (shouldStartNewSubject) {
|
|
263
|
+
subjects.push(finalizeSubject(currentSubject, recordingId, subjects.length));
|
|
264
|
+
currentSubject = {
|
|
265
|
+
label: isPersonal ? 'Personal' : 'Work Session',
|
|
266
|
+
blocks: [],
|
|
267
|
+
apps: new Set(),
|
|
268
|
+
activities: {},
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
currentSubject.blocks.push(block);
|
|
272
|
+
for (const app of apps) {
|
|
273
|
+
currentSubject.apps.add(app);
|
|
274
|
+
}
|
|
275
|
+
const activity = classification.activity_type || 'other';
|
|
276
|
+
currentSubject.activities[activity] =
|
|
277
|
+
(currentSubject.activities[activity] || 0) +
|
|
278
|
+
(classification.duration ?? 0);
|
|
279
|
+
}
|
|
280
|
+
if (currentSubject && currentSubject.blocks.length > 0) {
|
|
281
|
+
subjects.push(finalizeSubject(currentSubject, recordingId, subjects.length));
|
|
282
|
+
}
|
|
283
|
+
const personalDuration = subjects
|
|
284
|
+
.filter((s) => s.isPersonal)
|
|
285
|
+
.reduce((sum, s) => sum + s.totalDuration, 0);
|
|
286
|
+
const workDuration = subjects
|
|
287
|
+
.filter((s) => !s.isPersonal)
|
|
288
|
+
.reduce((sum, s) => sum + s.totalDuration, 0);
|
|
289
|
+
return { subjects, personalDuration, workDuration };
|
|
290
|
+
}
|
|
291
|
+
function finalizeSubject(subject, recordingId, index) {
|
|
292
|
+
const totalDuration = subject.blocks.reduce((sum, b) => {
|
|
293
|
+
const classification = parseClassification(b);
|
|
294
|
+
return sum + (classification?.duration ?? 0);
|
|
295
|
+
}, 0);
|
|
296
|
+
return {
|
|
297
|
+
id: `subject-${recordingId}-${index}`,
|
|
298
|
+
recordingId,
|
|
299
|
+
label: subject.label,
|
|
300
|
+
topicBlockIds: subject.blocks.map((b) => b.id),
|
|
301
|
+
totalDuration,
|
|
302
|
+
activityBreakdown: subject.activities,
|
|
303
|
+
apps: [...subject.apps],
|
|
304
|
+
isPersonal: subject.label === 'Personal',
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
function formatTime(seconds) {
|
|
308
|
+
const mins = Math.floor(seconds / 60);
|
|
309
|
+
const secs = Math.floor(seconds % 60);
|
|
310
|
+
return `${mins}:${secs.toString().padStart(2, '0')}`;
|
|
311
|
+
}
|
|
312
|
+
function formatDuration(seconds) {
|
|
313
|
+
const hours = Math.floor(seconds / 3600);
|
|
314
|
+
const mins = Math.floor((seconds % 3600) / 60);
|
|
315
|
+
if (hours > 0) {
|
|
316
|
+
return `${hours}h ${mins}m`;
|
|
317
|
+
}
|
|
318
|
+
if (mins > 0) {
|
|
319
|
+
return `${mins}m`;
|
|
320
|
+
}
|
|
321
|
+
return `${Math.floor(seconds)}s`;
|
|
322
|
+
}
|
|
323
|
+
export function saveSubjectsToDatabase(subjects, recordingId, repos) {
|
|
324
|
+
const subjectInserts = [];
|
|
325
|
+
const links = [];
|
|
326
|
+
for (const subject of subjects) {
|
|
327
|
+
subjectInserts.push({
|
|
328
|
+
id: subject.id,
|
|
329
|
+
recording_id: subject.recordingId,
|
|
330
|
+
label: subject.label,
|
|
331
|
+
is_personal: subject.isPersonal ? 1 : 0,
|
|
332
|
+
duration: subject.totalDuration,
|
|
333
|
+
activity_breakdown: JSON.stringify(subject.activityBreakdown),
|
|
334
|
+
metadata: JSON.stringify({ apps: subject.apps }),
|
|
335
|
+
});
|
|
336
|
+
for (const blockId of subject.topicBlockIds) {
|
|
337
|
+
links.push({ subjectId: subject.id, topicBlockId: blockId });
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
repos.subjects.saveBatch(subjectInserts);
|
|
341
|
+
repos.subjects.linkTopicBlocksBatch(links);
|
|
342
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Temporal Audio Alignment Service
|
|
3
|
+
*
|
|
4
|
+
* Attaches audio transcripts to visual segments based on timestamp overlap.
|
|
5
|
+
* Replaces semantic similarity-based merge with deterministic temporal alignment.
|
|
6
|
+
*/
|
|
7
|
+
const DEFAULT_CONFIG = {
|
|
8
|
+
minOverlapSeconds: 1,
|
|
9
|
+
preferredSource: 'mic',
|
|
10
|
+
};
|
|
11
|
+
/**
|
|
12
|
+
* Check if two time ranges overlap.
|
|
13
|
+
*
|
|
14
|
+
* @param start1 - Start of range 1
|
|
15
|
+
* @param end1 - End of range 1
|
|
16
|
+
* @param start2 - Start of range 2
|
|
17
|
+
* @param end2 - End of range 2
|
|
18
|
+
* @returns True if ranges overlap
|
|
19
|
+
*/
|
|
20
|
+
function rangesOverlap(start1, end1, start2, end2) {
|
|
21
|
+
return start1 < end2 && start2 < end1;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Calculate overlap duration between two time ranges.
|
|
25
|
+
*
|
|
26
|
+
* @param start1 - Start of range 1
|
|
27
|
+
* @param end1 - End of range 1
|
|
28
|
+
* @param start2 - Start of range 2
|
|
29
|
+
* @param end2 - End of range 2
|
|
30
|
+
* @returns Overlap duration in seconds (0 if no overlap)
|
|
31
|
+
*/
|
|
32
|
+
function calculateOverlap(start1, end1, start2, end2) {
|
|
33
|
+
if (!rangesOverlap(start1, end1, start2, end2)) {
|
|
34
|
+
return 0;
|
|
35
|
+
}
|
|
36
|
+
const overlapStart = Math.max(start1, start2);
|
|
37
|
+
const overlapEnd = Math.min(end1, end2);
|
|
38
|
+
return Math.max(0, overlapEnd - overlapStart);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Align audio transcripts to visual segments based on temporal overlap.
|
|
42
|
+
*
|
|
43
|
+
* @param segments - Visual segments from activity segmentation
|
|
44
|
+
* @param audioObservations - Audio observations with transcripts
|
|
45
|
+
* @param config - Alignment configuration
|
|
46
|
+
* @returns Segments enriched with aligned transcripts
|
|
47
|
+
*/
|
|
48
|
+
export function alignAudioToSegments(segments, audioObservations, config = {}) {
|
|
49
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
50
|
+
// Filter to audio observations with transcripts
|
|
51
|
+
const audioTranscripts = audioObservations
|
|
52
|
+
.filter((o) => o.type === 'audio' && o.text && o.text.trim().length > 0)
|
|
53
|
+
.map((o) => ({
|
|
54
|
+
source: o.audio_source,
|
|
55
|
+
text: o.text,
|
|
56
|
+
startTime: o.timestamp,
|
|
57
|
+
endTime: o.end_timestamp ?? o.timestamp + 5, // Default 5s if no end time
|
|
58
|
+
}))
|
|
59
|
+
.sort((a, b) => a.startTime - b.startTime);
|
|
60
|
+
// Enrich each segment with overlapping transcripts
|
|
61
|
+
return segments.map((segment) => {
|
|
62
|
+
const overlappingTranscripts = [];
|
|
63
|
+
for (const transcript of audioTranscripts) {
|
|
64
|
+
const overlap = calculateOverlap(segment.startTime, segment.endTime, transcript.startTime, transcript.endTime);
|
|
65
|
+
if (overlap >= cfg.minOverlapSeconds) {
|
|
66
|
+
overlappingTranscripts.push(transcript);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Combine transcripts in chronological order
|
|
70
|
+
const combinedTranscript = overlappingTranscripts
|
|
71
|
+
.sort((a, b) => a.startTime - b.startTime)
|
|
72
|
+
.map((t) => `[${t.source.toUpperCase()}] ${t.text}`)
|
|
73
|
+
.join('\n');
|
|
74
|
+
return {
|
|
75
|
+
...segment,
|
|
76
|
+
transcripts: overlappingTranscripts,
|
|
77
|
+
combinedTranscript,
|
|
78
|
+
};
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Get statistics about audio alignment.
|
|
83
|
+
*/
|
|
84
|
+
export function getAlignmentStats(enrichedSegments) {
|
|
85
|
+
const segmentsWithAudio = enrichedSegments.filter((s) => s.transcripts.length > 0).length;
|
|
86
|
+
const totalTranscriptSegments = enrichedSegments.reduce((sum, s) => sum + s.transcripts.length, 0);
|
|
87
|
+
const micTranscriptCount = enrichedSegments.reduce((sum, s) => sum + s.transcripts.filter((t) => t.source === 'mic').length, 0);
|
|
88
|
+
const systemTranscriptCount = enrichedSegments.reduce((sum, s) => sum + s.transcripts.filter((t) => t.source === 'system').length, 0);
|
|
89
|
+
return {
|
|
90
|
+
totalSegments: enrichedSegments.length,
|
|
91
|
+
segmentsWithAudio,
|
|
92
|
+
totalTranscriptSegments,
|
|
93
|
+
micTranscriptCount,
|
|
94
|
+
systemTranscriptCount,
|
|
95
|
+
avgTranscriptsPerSegment: enrichedSegments.length > 0
|
|
96
|
+
? totalTranscriptSegments / enrichedSegments.length
|
|
97
|
+
: 0,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - VLM Enrichment Service
|
|
3
|
+
* @deprecated V2 pipeline - uses old clustering approach. Use V3 pipeline instead.
|
|
4
|
+
*
|
|
5
|
+
* Selects representative frames from clusters and describes them with a vision model.
|
|
6
|
+
*/
|
|
7
|
+
const DEFAULT_CONFIG = {
|
|
8
|
+
maxFramesPerCluster: 5,
|
|
9
|
+
minOcrLength: 100,
|
|
10
|
+
visionModel: 'qwen3-vl-8b',
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Select representative frames for VLM description.
|
|
14
|
+
*
|
|
15
|
+
* Strategy:
|
|
16
|
+
* 1. Always include first and last frame (boundaries)
|
|
17
|
+
* 2. Include frames with low OCR quality (< minOcrLength chars)
|
|
18
|
+
* 3. Sample at regular intervals based on cluster duration
|
|
19
|
+
* 4. Cap at maxFramesPerCluster
|
|
20
|
+
*/
|
|
21
|
+
export function selectFramesForVLM(observations, config = {}) {
|
|
22
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
23
|
+
if (observations.length === 0)
|
|
24
|
+
return [];
|
|
25
|
+
const visualObs = observations
|
|
26
|
+
.filter((o) => o.type === 'visual' && o.image_path)
|
|
27
|
+
.sort((a, b) => a.timestamp - b.timestamp);
|
|
28
|
+
if (visualObs.length === 0)
|
|
29
|
+
return [];
|
|
30
|
+
const selected = [];
|
|
31
|
+
const selectedIds = new Set();
|
|
32
|
+
const addFrame = (obs, reason) => {
|
|
33
|
+
if (!selectedIds.has(obs.id)) {
|
|
34
|
+
selectedIds.add(obs.id);
|
|
35
|
+
selected.push({ observation: obs, reason });
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
// 1. Boundaries
|
|
39
|
+
addFrame(visualObs[0], 'boundary');
|
|
40
|
+
if (visualObs.length > 1) {
|
|
41
|
+
addFrame(visualObs[visualObs.length - 1], 'boundary');
|
|
42
|
+
}
|
|
43
|
+
// 2. Low OCR quality frames
|
|
44
|
+
const lowOcrFrames = visualObs.filter((o) => (o.ocr_text?.length ?? 0) < cfg.minOcrLength && !selectedIds.has(o.id));
|
|
45
|
+
for (const frame of lowOcrFrames.slice(0, 3)) {
|
|
46
|
+
addFrame(frame, 'low_ocr');
|
|
47
|
+
}
|
|
48
|
+
// 3. Interval sampling (if still below max)
|
|
49
|
+
if (selected.length < cfg.maxFramesPerCluster && visualObs.length > 2) {
|
|
50
|
+
const duration = visualObs[visualObs.length - 1].timestamp - visualObs[0].timestamp;
|
|
51
|
+
const intervalSeconds = duration / (cfg.maxFramesPerCluster - selected.length + 1);
|
|
52
|
+
let nextTarget = visualObs[0].timestamp + intervalSeconds;
|
|
53
|
+
for (const obs of visualObs) {
|
|
54
|
+
if (selected.length >= cfg.maxFramesPerCluster)
|
|
55
|
+
break;
|
|
56
|
+
if (obs.timestamp >= nextTarget && !selectedIds.has(obs.id)) {
|
|
57
|
+
addFrame(obs, 'interval');
|
|
58
|
+
nextTarget += intervalSeconds;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return selected.slice(0, cfg.maxFramesPerCluster);
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Describe selected frames using VLM.
|
|
66
|
+
*/
|
|
67
|
+
export async function describeFrames(frames, intelligence) {
|
|
68
|
+
const results = new Map();
|
|
69
|
+
if (frames.length === 0)
|
|
70
|
+
return results;
|
|
71
|
+
const images = frames.map((f) => ({
|
|
72
|
+
imagePath: f.observation.image_path,
|
|
73
|
+
clusterId: 0, // Not used in our case
|
|
74
|
+
timestamp: f.observation.timestamp,
|
|
75
|
+
}));
|
|
76
|
+
const descriptions = await intelligence.describeImages(images);
|
|
77
|
+
for (const [index, frame] of frames.entries()) {
|
|
78
|
+
const desc = descriptions[index];
|
|
79
|
+
if (desc?.description) {
|
|
80
|
+
results.set(frame.observation.id, desc.description);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
return results;
|
|
84
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - VLM Service
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates sequential VLM inference for frame descriptions.
|
|
5
|
+
* Each frame is processed individually for accurate image-description mapping.
|
|
6
|
+
*/
|
|
7
|
+
const DEFAULT_CONFIG = {
|
|
8
|
+
model: process.env.ESCRIBANO_VLM_MODEL || 'qwen3-vl:4b',
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Process sampled frames through VLM sequentially (one image at a time).
|
|
12
|
+
*
|
|
13
|
+
* @param frames - Sampled frames from adaptiveSample()
|
|
14
|
+
* @param intelligence - Intelligence service with describeImages
|
|
15
|
+
* @param config - Processing configuration
|
|
16
|
+
* @returns Array of frame descriptions with VLM analysis
|
|
17
|
+
*/
|
|
18
|
+
export async function describeFrames(frames, intelligence, config = {}) {
|
|
19
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
20
|
+
if (frames.length === 0) {
|
|
21
|
+
console.log('[VLM] No frames to process');
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
const total = frames.length;
|
|
25
|
+
console.log(`[VLM] Processing ${total} frames sequentially...`);
|
|
26
|
+
console.log(`[VLM] Model: ${cfg.model}`);
|
|
27
|
+
// Prepare input for intelligence service
|
|
28
|
+
const images = frames.map((f) => ({
|
|
29
|
+
imagePath: f.imagePath,
|
|
30
|
+
timestamp: f.timestamp,
|
|
31
|
+
}));
|
|
32
|
+
// Call the sequential VLM API with per-image callback
|
|
33
|
+
const results = await intelligence.describeImages(images, {
|
|
34
|
+
model: cfg.model,
|
|
35
|
+
recordingId: cfg.recordingId,
|
|
36
|
+
onImageProcessed: cfg.onImageProcessed,
|
|
37
|
+
});
|
|
38
|
+
console.log(`\n[VLM] Completed ${results.length}/${total} frames`);
|
|
39
|
+
return results;
|
|
40
|
+
}
|
|
41
|
+
/** @deprecated Use describeFrames instead */
|
|
42
|
+
export const batchDescribeFrames = describeFrames;
|
|
43
|
+
/**
|
|
44
|
+
* Normalize activity labels to canonical forms.
|
|
45
|
+
* Allows VLM flexibility while maintaining consistency.
|
|
46
|
+
*/
|
|
47
|
+
export function normalizeActivity(rawActivity) {
|
|
48
|
+
const lower = rawActivity.toLowerCase().trim();
|
|
49
|
+
const synonyms = {
|
|
50
|
+
// Debugging
|
|
51
|
+
debugging: 'debugging',
|
|
52
|
+
'fixing bug': 'debugging',
|
|
53
|
+
'investigating error': 'debugging',
|
|
54
|
+
troubleshooting: 'debugging',
|
|
55
|
+
'reading error': 'debugging',
|
|
56
|
+
'stack trace': 'debugging',
|
|
57
|
+
// Coding
|
|
58
|
+
coding: 'coding',
|
|
59
|
+
'writing code': 'coding',
|
|
60
|
+
implementing: 'coding',
|
|
61
|
+
developing: 'coding',
|
|
62
|
+
programming: 'coding',
|
|
63
|
+
// Reading
|
|
64
|
+
reading: 'reading',
|
|
65
|
+
'reading docs': 'reading',
|
|
66
|
+
documentation: 'reading',
|
|
67
|
+
'reading documentation': 'reading',
|
|
68
|
+
// Research
|
|
69
|
+
research: 'research',
|
|
70
|
+
browsing: 'research',
|
|
71
|
+
searching: 'research',
|
|
72
|
+
watching: 'research',
|
|
73
|
+
'stack overflow': 'research',
|
|
74
|
+
googling: 'research',
|
|
75
|
+
// Meeting
|
|
76
|
+
meeting: 'meeting',
|
|
77
|
+
'video call': 'meeting',
|
|
78
|
+
zoom: 'meeting',
|
|
79
|
+
'google meet': 'meeting',
|
|
80
|
+
'screen share': 'meeting',
|
|
81
|
+
// Terminal
|
|
82
|
+
terminal: 'terminal',
|
|
83
|
+
'command line': 'terminal',
|
|
84
|
+
cli: 'terminal',
|
|
85
|
+
shell: 'terminal',
|
|
86
|
+
// Code Review
|
|
87
|
+
review: 'review',
|
|
88
|
+
reviewing: 'review',
|
|
89
|
+
'code review': 'review',
|
|
90
|
+
'reviewing pr': 'review',
|
|
91
|
+
'pull request': 'review',
|
|
92
|
+
};
|
|
93
|
+
// Check exact match
|
|
94
|
+
if (synonyms[lower]) {
|
|
95
|
+
return synonyms[lower];
|
|
96
|
+
}
|
|
97
|
+
// Check partial match
|
|
98
|
+
for (const [pattern, normalized] of Object.entries(synonyms)) {
|
|
99
|
+
if (lower.includes(pattern)) {
|
|
100
|
+
return normalized;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
// Return as-is if no match (allows new activities to emerge)
|
|
104
|
+
return lower.replace(/\s+/g, '_');
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Get statistics about VLM processing results.
|
|
108
|
+
*/
|
|
109
|
+
export function getVLMStats(descriptions) {
|
|
110
|
+
const activityCounts = {};
|
|
111
|
+
const apps = new Set();
|
|
112
|
+
const topics = new Set();
|
|
113
|
+
for (const desc of descriptions) {
|
|
114
|
+
const normalized = normalizeActivity(desc.activity);
|
|
115
|
+
activityCounts[normalized] = (activityCounts[normalized] || 0) + 1;
|
|
116
|
+
desc.apps.forEach((app) => {
|
|
117
|
+
apps.add(app);
|
|
118
|
+
});
|
|
119
|
+
desc.topics.forEach((topic) => {
|
|
120
|
+
topics.add(topic);
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
totalFrames: descriptions.length,
|
|
125
|
+
uniqueActivities: Object.keys(activityCounts),
|
|
126
|
+
activityCounts,
|
|
127
|
+
uniqueApps: Array.from(apps),
|
|
128
|
+
uniqueTopics: Array.from(topics),
|
|
129
|
+
};
|
|
130
|
+
}
|