escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Activity Segmentation Service
|
|
3
|
+
*
|
|
4
|
+
* Groups consecutive VLM observations by activity continuity.
|
|
5
|
+
* Replaces embedding-based clustering with VLM-driven segmentation.
|
|
6
|
+
*/
|
|
7
|
+
import { normalizeActivity } from './vlm-service.js';
|
|
8
|
+
const DEFAULT_CONFIG = {
|
|
9
|
+
minSegmentDuration: 30,
|
|
10
|
+
gapTolerance: 5,
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Extract activity type from VLM description.
|
|
14
|
+
* Uses prioritized activity detection with precise pattern matching.
|
|
15
|
+
*/
|
|
16
|
+
function extractActivityType(vlmDescription) {
|
|
17
|
+
if (!vlmDescription)
|
|
18
|
+
return 'other';
|
|
19
|
+
// Normalize the activity string
|
|
20
|
+
const normalized = vlmDescription.toLowerCase().trim();
|
|
21
|
+
// Check for known activity patterns (order matters - more specific first)
|
|
22
|
+
const activityPatterns = {
|
|
23
|
+
// Debugging - very specific technical terms
|
|
24
|
+
debugging: [
|
|
25
|
+
'debugging',
|
|
26
|
+
'troubleshooting',
|
|
27
|
+
'investigating error',
|
|
28
|
+
'reading error',
|
|
29
|
+
'stack trace',
|
|
30
|
+
'exception thrown',
|
|
31
|
+
'error message',
|
|
32
|
+
'fixing bug',
|
|
33
|
+
],
|
|
34
|
+
// Coding/Development
|
|
35
|
+
coding: [
|
|
36
|
+
'writing code',
|
|
37
|
+
'implementing',
|
|
38
|
+
'developing',
|
|
39
|
+
'programming',
|
|
40
|
+
'refactoring',
|
|
41
|
+
'coding',
|
|
42
|
+
],
|
|
43
|
+
// Code Review - specific workflow
|
|
44
|
+
review: ['reviewing pr', 'pull request', 'code review', 'reviewing code'],
|
|
45
|
+
// Meeting/Collaboration
|
|
46
|
+
meeting: [
|
|
47
|
+
'in zoom',
|
|
48
|
+
'in google meet',
|
|
49
|
+
'in slack huddle',
|
|
50
|
+
'video call',
|
|
51
|
+
'screen sharing',
|
|
52
|
+
'team meeting',
|
|
53
|
+
],
|
|
54
|
+
// Research/Information gathering
|
|
55
|
+
research: [
|
|
56
|
+
'browsing',
|
|
57
|
+
'stack overflow',
|
|
58
|
+
'googling',
|
|
59
|
+
'researching',
|
|
60
|
+
'searching for',
|
|
61
|
+
],
|
|
62
|
+
// Reading documentation
|
|
63
|
+
reading: [
|
|
64
|
+
'reading documentation',
|
|
65
|
+
'reading docs',
|
|
66
|
+
'reading manual',
|
|
67
|
+
'reading guide',
|
|
68
|
+
],
|
|
69
|
+
// Terminal/CLI operations (only if explicitly mentioned)
|
|
70
|
+
terminal: [
|
|
71
|
+
'in terminal',
|
|
72
|
+
'in iterm',
|
|
73
|
+
'command line',
|
|
74
|
+
'running git',
|
|
75
|
+
'running npm',
|
|
76
|
+
],
|
|
77
|
+
};
|
|
78
|
+
// Check each activity type in order (most specific patterns first)
|
|
79
|
+
for (const [activityType, patterns] of Object.entries(activityPatterns)) {
|
|
80
|
+
for (const pattern of patterns) {
|
|
81
|
+
if (normalized.includes(pattern)) {
|
|
82
|
+
return activityType;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Check for single-word activities that appear at the start
|
|
87
|
+
const firstWord = normalized.split(/[\s,.]+/)[0];
|
|
88
|
+
const startPatterns = {
|
|
89
|
+
debugging: ['debug', 'fix'],
|
|
90
|
+
coding: ['writing', 'implementing', 'developing'],
|
|
91
|
+
reading: ['reading'],
|
|
92
|
+
research: ['researching', 'browsing'],
|
|
93
|
+
};
|
|
94
|
+
for (const [activityType, patterns] of Object.entries(startPatterns)) {
|
|
95
|
+
if (patterns.includes(firstWord)) {
|
|
96
|
+
return activityType;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return 'other';
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Parse VLM description to extract apps and topics.
|
|
103
|
+
* Expects format like: "Debugging Python error in VSCode, working on escribano project"
|
|
104
|
+
*/
|
|
105
|
+
function extractContext(vlmDescription) {
|
|
106
|
+
if (!vlmDescription)
|
|
107
|
+
return { apps: [], topics: [] };
|
|
108
|
+
const apps = [];
|
|
109
|
+
const topics = [];
|
|
110
|
+
const text = vlmDescription.toLowerCase();
|
|
111
|
+
// Common app patterns
|
|
112
|
+
const appPatterns = [
|
|
113
|
+
/in (vscode|vs code|visual studio code)/i,
|
|
114
|
+
/in (terminal|iterm|alacritty|warp)/i,
|
|
115
|
+
/in (chrome|safari|firefox|browser)/i,
|
|
116
|
+
/in (slack|discord|teams|zoom)/i,
|
|
117
|
+
/in (github|gitlab|bitbucket)/i,
|
|
118
|
+
/in (intellij|webstorm|pycharm)/i,
|
|
119
|
+
/using (vscode|terminal|chrome|slack)/i,
|
|
120
|
+
];
|
|
121
|
+
for (const pattern of appPatterns) {
|
|
122
|
+
const match = text.match(pattern);
|
|
123
|
+
if (match && match[1]) {
|
|
124
|
+
apps.push(match[1].toLowerCase().replace(' ', '_'));
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
// Extract potential project names (capitalized words after "working on" or "in")
|
|
128
|
+
const topicPatterns = [
|
|
129
|
+
/working on (?:the )?(\w+)/i,
|
|
130
|
+
/(?:in|for) (?:the )?(\w+) project/i,
|
|
131
|
+
/(?:implementing|fixing|debugging) (\w+)/i,
|
|
132
|
+
];
|
|
133
|
+
for (const pattern of topicPatterns) {
|
|
134
|
+
const match = text.match(pattern);
|
|
135
|
+
if (match && match[1]) {
|
|
136
|
+
topics.push(match[1].toLowerCase());
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return { apps: [...new Set(apps)], topics: [...new Set(topics)] };
|
|
140
|
+
}
|
|
141
|
+
const NOISY_APP_PATTERNS = [
|
|
142
|
+
/^and\s/i,
|
|
143
|
+
/^a\s/i,
|
|
144
|
+
/^the\s/i,
|
|
145
|
+
/\.\.\.$/,
|
|
146
|
+
/^\s*$/,
|
|
147
|
+
/^file manager$/i,
|
|
148
|
+
/^personal website$/i,
|
|
149
|
+
];
|
|
150
|
+
function cleanAppName(app) {
|
|
151
|
+
const cleaned = app.trim();
|
|
152
|
+
if (cleaned.length < 2 || cleaned.length > 50)
|
|
153
|
+
return null;
|
|
154
|
+
for (const pattern of NOISY_APP_PATTERNS) {
|
|
155
|
+
if (pattern.test(cleaned))
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
return cleaned;
|
|
159
|
+
}
|
|
160
|
+
function aggregateContextFromObservations(observations) {
|
|
161
|
+
const appsSet = new Set();
|
|
162
|
+
const topicsSet = new Set();
|
|
163
|
+
for (const obs of observations) {
|
|
164
|
+
if (obs.apps) {
|
|
165
|
+
try {
|
|
166
|
+
const appsArr = JSON.parse(obs.apps);
|
|
167
|
+
for (const app of appsArr) {
|
|
168
|
+
const cleaned = cleanAppName(app);
|
|
169
|
+
if (cleaned)
|
|
170
|
+
appsSet.add(cleaned);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
catch {
|
|
174
|
+
// Invalid JSON, skip
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
if (obs.topics) {
|
|
178
|
+
try {
|
|
179
|
+
const topicsArr = JSON.parse(obs.topics);
|
|
180
|
+
for (const topic of topicsArr) {
|
|
181
|
+
const cleaned = topic.trim();
|
|
182
|
+
if (cleaned && cleaned.length >= 2 && cleaned.length <= 50) {
|
|
183
|
+
topicsSet.add(cleaned);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
catch {
|
|
188
|
+
// Invalid JSON, skip
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return {
|
|
193
|
+
apps: [...appsSet].sort(),
|
|
194
|
+
topics: [...topicsSet].sort(),
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Group consecutive observations by activity type.
|
|
199
|
+
*
|
|
200
|
+
* @param observations - Visual observations with VLM descriptions, sorted by timestamp
|
|
201
|
+
* @param config - Segmentation configuration
|
|
202
|
+
* @returns Array of segments grouped by activity continuity
|
|
203
|
+
*/
|
|
204
|
+
export function segmentByActivity(observations, config = {}) {
|
|
205
|
+
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
206
|
+
// Filter to visual observations only, sorted by timestamp
|
|
207
|
+
const visualObs = observations
|
|
208
|
+
.filter((o) => o.type === 'visual' && o.vlm_description)
|
|
209
|
+
.sort((a, b) => a.timestamp - b.timestamp);
|
|
210
|
+
if (visualObs.length === 0) {
|
|
211
|
+
return [];
|
|
212
|
+
}
|
|
213
|
+
// Group consecutive observations by activity type
|
|
214
|
+
const rawSegments = [];
|
|
215
|
+
let currentSegment = null;
|
|
216
|
+
for (const obs of visualObs) {
|
|
217
|
+
const rawActivity = obs.activity_type || extractActivityType(obs.vlm_description);
|
|
218
|
+
const activityType = normalizeActivity(rawActivity);
|
|
219
|
+
if (!currentSegment || currentSegment.activityType !== activityType) {
|
|
220
|
+
// Start new segment
|
|
221
|
+
if (currentSegment) {
|
|
222
|
+
rawSegments.push(currentSegment);
|
|
223
|
+
}
|
|
224
|
+
currentSegment = {
|
|
225
|
+
activityType,
|
|
226
|
+
startTime: obs.timestamp,
|
|
227
|
+
endTime: obs.end_timestamp ?? obs.timestamp,
|
|
228
|
+
observations: [obs],
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
// Continue current segment
|
|
233
|
+
currentSegment.endTime = obs.end_timestamp ?? obs.timestamp;
|
|
234
|
+
currentSegment.observations.push(obs);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
// Don't forget the last segment
|
|
238
|
+
if (currentSegment) {
|
|
239
|
+
rawSegments.push(currentSegment);
|
|
240
|
+
}
|
|
241
|
+
// Merge short segments into their longest neighbor
|
|
242
|
+
const mergedSegments = mergeShortSegments(rawSegments, cfg.minSegmentDuration);
|
|
243
|
+
// Convert to final Segment format
|
|
244
|
+
return mergedSegments.map((seg, index) => {
|
|
245
|
+
const aggregatedContext = aggregateContextFromObservations(seg.observations);
|
|
246
|
+
return {
|
|
247
|
+
id: `seg-${index}`,
|
|
248
|
+
recordingId: seg.observations[0]?.recording_id || '',
|
|
249
|
+
activityType: seg.activityType,
|
|
250
|
+
startTime: seg.startTime,
|
|
251
|
+
endTime: seg.endTime,
|
|
252
|
+
duration: seg.endTime - seg.startTime,
|
|
253
|
+
observationIds: seg.observations.map((o) => o.id),
|
|
254
|
+
keyDescription: seg.observations[0]?.vlm_description || '',
|
|
255
|
+
apps: aggregatedContext.apps,
|
|
256
|
+
topics: aggregatedContext.topics,
|
|
257
|
+
};
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Merge segments shorter than minDuration into their longest neighbor.
|
|
262
|
+
*
|
|
263
|
+
* Strategy:
|
|
264
|
+
* 1. For each short segment, find the longer of (previous, next) neighbor
|
|
265
|
+
* 2. Merge into that neighbor (concatenate observations, extend time range)
|
|
266
|
+
* 3. If no neighbors exist (only segment), keep it as-is
|
|
267
|
+
*/
|
|
268
|
+
function mergeShortSegments(segments, minDuration) {
|
|
269
|
+
if (segments.length <= 1) {
|
|
270
|
+
return segments;
|
|
271
|
+
}
|
|
272
|
+
const result = [...segments];
|
|
273
|
+
let i = 0;
|
|
274
|
+
while (i < result.length) {
|
|
275
|
+
const seg = result[i];
|
|
276
|
+
const duration = seg.endTime - seg.startTime;
|
|
277
|
+
if (duration >= minDuration) {
|
|
278
|
+
// Segment is long enough, keep it
|
|
279
|
+
i++;
|
|
280
|
+
continue;
|
|
281
|
+
}
|
|
282
|
+
// Find neighbors
|
|
283
|
+
const prev = i > 0 ? result[i - 1] : null;
|
|
284
|
+
const next = i < result.length - 1 ? result[i + 1] : null;
|
|
285
|
+
if (!prev && !next) {
|
|
286
|
+
// Only segment, keep it
|
|
287
|
+
i++;
|
|
288
|
+
continue;
|
|
289
|
+
}
|
|
290
|
+
// Choose longer neighbor (never choose null)
|
|
291
|
+
let targetIndex;
|
|
292
|
+
if (!prev) {
|
|
293
|
+
targetIndex = i + 1;
|
|
294
|
+
}
|
|
295
|
+
else if (!next) {
|
|
296
|
+
targetIndex = i - 1;
|
|
297
|
+
}
|
|
298
|
+
else {
|
|
299
|
+
const prevDuration = prev.endTime - prev.startTime;
|
|
300
|
+
const nextDuration = next.endTime - next.startTime;
|
|
301
|
+
targetIndex = prevDuration >= nextDuration ? i - 1 : i + 1;
|
|
302
|
+
}
|
|
303
|
+
const target = result[targetIndex];
|
|
304
|
+
// Merge into target
|
|
305
|
+
target.observations = target.observations.concat(seg.observations);
|
|
306
|
+
target.startTime = Math.min(target.startTime, seg.startTime);
|
|
307
|
+
target.endTime = Math.max(target.endTime, seg.endTime);
|
|
308
|
+
// Remove short segment
|
|
309
|
+
result.splice(i, 1);
|
|
310
|
+
// If we merged into previous, stay at same index (since we removed current)
|
|
311
|
+
// If we merged into next, stay at same index
|
|
312
|
+
// No need to adjust i since we removed the current element
|
|
313
|
+
}
|
|
314
|
+
return result;
|
|
315
|
+
}
|
|
316
|
+
/**
|
|
317
|
+
* Get statistics about segments.
|
|
318
|
+
*/
|
|
319
|
+
export function getSegmentStats(segments) {
|
|
320
|
+
const activityTypeCounts = {};
|
|
321
|
+
let totalDuration = 0;
|
|
322
|
+
for (const seg of segments) {
|
|
323
|
+
activityTypeCounts[seg.activityType] =
|
|
324
|
+
(activityTypeCounts[seg.activityType] || 0) + 1;
|
|
325
|
+
totalDuration += seg.duration;
|
|
326
|
+
}
|
|
327
|
+
return {
|
|
328
|
+
totalSegments: segments.length,
|
|
329
|
+
totalDuration,
|
|
330
|
+
activityTypeCounts,
|
|
331
|
+
avgSegmentDuration: segments.length > 0 ? totalDuration / segments.length : 0,
|
|
332
|
+
};
|
|
333
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for Activity Segmentation Service
|
|
3
|
+
*/
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
|
+
import { getSegmentStats, segmentByActivity, } from './activity-segmentation.js';
|
|
6
|
+
describe('segmentByActivity', () => {
|
|
7
|
+
const createObservation = (id, timestamp, vlmDescription, type = 'visual', duration = 10) => ({
|
|
8
|
+
id,
|
|
9
|
+
recording_id: 'test-recording',
|
|
10
|
+
type,
|
|
11
|
+
timestamp,
|
|
12
|
+
end_timestamp: timestamp + duration,
|
|
13
|
+
image_path: type === 'visual' ? `/path/${id}.png` : null,
|
|
14
|
+
ocr_text: null,
|
|
15
|
+
vlm_description: vlmDescription,
|
|
16
|
+
vlm_raw_response: null,
|
|
17
|
+
activity_type: null,
|
|
18
|
+
apps: null,
|
|
19
|
+
topics: null,
|
|
20
|
+
text: null,
|
|
21
|
+
audio_source: null,
|
|
22
|
+
audio_type: null,
|
|
23
|
+
embedding: null,
|
|
24
|
+
created_at: new Date().toISOString(),
|
|
25
|
+
});
|
|
26
|
+
it('should group consecutive same-activity observations', () => {
|
|
27
|
+
// Create longer observations (60s each) to avoid merging
|
|
28
|
+
const observations = [
|
|
29
|
+
createObservation('1', 0, 'Debugging Python error in VSCode', 'visual', 60),
|
|
30
|
+
createObservation('2', 60, 'Debugging stack trace in terminal', 'visual', 60),
|
|
31
|
+
createObservation('3', 120, 'Debugging the issue', 'visual', 60),
|
|
32
|
+
createObservation('4', 180, 'Writing code in VSCode', 'visual', 60),
|
|
33
|
+
createObservation('5', 240, 'Implementing new feature', 'visual', 60),
|
|
34
|
+
createObservation('6', 300, 'Reading documentation in Chrome', 'visual', 60),
|
|
35
|
+
];
|
|
36
|
+
const segments = segmentByActivity(observations);
|
|
37
|
+
expect(segments).toHaveLength(3);
|
|
38
|
+
expect(segments[0].activityType).toBe('debugging');
|
|
39
|
+
expect(segments[0].observationIds).toEqual(['1', '2', '3']);
|
|
40
|
+
expect(segments[1].activityType).toBe('coding');
|
|
41
|
+
expect(segments[1].observationIds).toEqual(['4', '5']);
|
|
42
|
+
expect(segments[2].activityType).toBe('reading');
|
|
43
|
+
expect(segments[2].observationIds).toEqual(['6']);
|
|
44
|
+
});
|
|
45
|
+
it('should handle single observation segments', () => {
|
|
46
|
+
const observations = [
|
|
47
|
+
createObservation('1', 0, 'Debugging Python error'),
|
|
48
|
+
createObservation('2', 10, 'Writing new function'),
|
|
49
|
+
createObservation('3', 20, 'Reading documentation'),
|
|
50
|
+
];
|
|
51
|
+
// Disable merging to test grouping logic directly
|
|
52
|
+
const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
|
|
53
|
+
expect(segments).toHaveLength(3);
|
|
54
|
+
segments.forEach((seg, i) => {
|
|
55
|
+
expect(seg.observationIds).toHaveLength(1);
|
|
56
|
+
expect(seg.observationIds[0]).toBe(String(i + 1));
|
|
57
|
+
});
|
|
58
|
+
});
|
|
59
|
+
it('should merge short segments into longest neighbor', () => {
|
|
60
|
+
// Create a scenario where middle segment is too short
|
|
61
|
+
// First segment: 60s, Middle: 10s (will be merged), Last: 50s
|
|
62
|
+
const observations = [
|
|
63
|
+
createObservation('1', 0, 'Debugging error 1', 'visual', 10),
|
|
64
|
+
createObservation('2', 10, 'Debugging error 2', 'visual', 10),
|
|
65
|
+
createObservation('3', 20, 'Debugging error 3', 'visual', 10),
|
|
66
|
+
createObservation('4', 30, 'Debugging error 4', 'visual', 10),
|
|
67
|
+
createObservation('5', 40, 'Debugging error 5', 'visual', 10),
|
|
68
|
+
createObservation('6', 50, 'Debugging error 6', 'visual', 10),
|
|
69
|
+
// Short coding segment (10s) - will be merged into first debugging segment
|
|
70
|
+
createObservation('7', 60, 'Implementing new feature in VSCode', 'visual', 10),
|
|
71
|
+
// Second debugging segment is shorter (20s), should also be merged
|
|
72
|
+
createObservation('8', 70, 'Debugging more', 'visual', 10),
|
|
73
|
+
createObservation('9', 80, 'Debugging final', 'visual', 10),
|
|
74
|
+
];
|
|
75
|
+
const segments = segmentByActivity(observations, {
|
|
76
|
+
minSegmentDuration: 30,
|
|
77
|
+
});
|
|
78
|
+
// The coding segment (60-70s) is only 10s
|
|
79
|
+
// After merging it into first debugging (now 70s), the second debugging (20s)
|
|
80
|
+
// The coding segment (60-70s) is only 10s
|
|
81
|
+
// After merging it into first debugging (now 70s), the second debugging (20s)
|
|
82
|
+
// becomes the new shortest and gets merged too
|
|
83
|
+
expect(segments).toHaveLength(1);
|
|
84
|
+
expect(segments[0].activityType).toBe('debugging');
|
|
85
|
+
// Should include all observations including the coding one
|
|
86
|
+
expect(segments[0].observationIds).toHaveLength(9);
|
|
87
|
+
});
|
|
88
|
+
it('should extract apps and topics from observation fields', () => {
|
|
89
|
+
const obs = createObservation('1', 0, 'Debugging Python error in VSCode, working on escribano project');
|
|
90
|
+
obs.apps = JSON.stringify(['vscode']);
|
|
91
|
+
obs.topics = JSON.stringify(['escribano']);
|
|
92
|
+
const segments = segmentByActivity([obs]);
|
|
93
|
+
expect(segments[0].apps).toContain('vscode');
|
|
94
|
+
expect(segments[0].topics).toContain('escribano');
|
|
95
|
+
});
|
|
96
|
+
it('should filter out audio observations', () => {
|
|
97
|
+
const observations = [
|
|
98
|
+
createObservation('1', 0, 'Debugging error', 'visual'),
|
|
99
|
+
createObservation('2', 10, 'This is audio transcript', 'audio'),
|
|
100
|
+
createObservation('3', 20, 'Writing code', 'visual'),
|
|
101
|
+
];
|
|
102
|
+
// Disable merging to test filtering logic
|
|
103
|
+
const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
|
|
104
|
+
expect(segments).toHaveLength(2);
|
|
105
|
+
expect(segments[0].activityType).toBe('debugging');
|
|
106
|
+
expect(segments[1].activityType).toBe('coding');
|
|
107
|
+
});
|
|
108
|
+
it('should handle observations without VLM descriptions', () => {
|
|
109
|
+
const observations = [
|
|
110
|
+
createObservation('1', 0, 'Debugging error', 'visual', 60),
|
|
111
|
+
// null observation is filtered out entirely
|
|
112
|
+
createObservation('2', 60, null, 'visual', 60),
|
|
113
|
+
createObservation('3', 120, 'Debugging more', 'visual', 60),
|
|
114
|
+
];
|
|
115
|
+
// With longer durations and minSegmentDuration: 0, we get 2 debugging segments
|
|
116
|
+
// (filtered out null creates a gap, not a segment)
|
|
117
|
+
const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
|
|
118
|
+
// null observations are filtered out, so we only have the 2 debugging observations
|
|
119
|
+
expect(segments.length).toBeGreaterThanOrEqual(1);
|
|
120
|
+
expect(segments[0].activityType).toBe('debugging');
|
|
121
|
+
});
|
|
122
|
+
it('should sort observations by timestamp', () => {
|
|
123
|
+
const observations = [
|
|
124
|
+
createObservation('1', 30, 'Reading documentation'),
|
|
125
|
+
createObservation('2', 0, 'Debugging error'),
|
|
126
|
+
createObservation('3', 10, 'Writing code'),
|
|
127
|
+
];
|
|
128
|
+
// Disable merging to test sorting
|
|
129
|
+
const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
|
|
130
|
+
expect(segments[0].startTime).toBe(0);
|
|
131
|
+
expect(segments[0].activityType).toBe('debugging');
|
|
132
|
+
expect(segments[1].startTime).toBe(10);
|
|
133
|
+
expect(segments[1].activityType).toBe('coding');
|
|
134
|
+
expect(segments[2].startTime).toBe(30);
|
|
135
|
+
expect(segments[2].activityType).toBe('reading');
|
|
136
|
+
});
|
|
137
|
+
it('should return empty array for empty observations', () => {
|
|
138
|
+
const segments = segmentByActivity([]);
|
|
139
|
+
expect(segments).toEqual([]);
|
|
140
|
+
});
|
|
141
|
+
it('should handle all other activities', () => {
|
|
142
|
+
const observations = [
|
|
143
|
+
createObservation('1', 0, 'Browsing Stack Overflow for solutions'),
|
|
144
|
+
createObservation('2', 10, 'In Zoom meeting discussing project'),
|
|
145
|
+
createObservation('3', 20, 'Reviewing pull request in GitHub'),
|
|
146
|
+
createObservation('4', 30, 'Some generic unknown activity'),
|
|
147
|
+
];
|
|
148
|
+
// Disable merging to test activity detection
|
|
149
|
+
const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
|
|
150
|
+
expect(segments).toHaveLength(4);
|
|
151
|
+
expect(segments[0].activityType).toBe('research');
|
|
152
|
+
expect(segments[1].activityType).toBe('meeting');
|
|
153
|
+
expect(segments[2].activityType).toBe('review');
|
|
154
|
+
expect(segments[3].activityType).toBe('other');
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
describe('getSegmentStats', () => {
|
|
158
|
+
const createMockSegment = (activityType, duration) => ({
|
|
159
|
+
id: `seg-${activityType}`,
|
|
160
|
+
recordingId: 'test',
|
|
161
|
+
activityType,
|
|
162
|
+
startTime: 0,
|
|
163
|
+
endTime: duration,
|
|
164
|
+
duration,
|
|
165
|
+
observationIds: [],
|
|
166
|
+
keyDescription: '',
|
|
167
|
+
apps: [],
|
|
168
|
+
topics: [],
|
|
169
|
+
});
|
|
170
|
+
it('should calculate correct statistics', () => {
|
|
171
|
+
const segments = [
|
|
172
|
+
createMockSegment('debugging', 60),
|
|
173
|
+
createMockSegment('coding', 120),
|
|
174
|
+
createMockSegment('debugging', 45),
|
|
175
|
+
];
|
|
176
|
+
const stats = getSegmentStats(segments);
|
|
177
|
+
expect(stats.totalSegments).toBe(3);
|
|
178
|
+
expect(stats.totalDuration).toBe(225);
|
|
179
|
+
expect(stats.avgSegmentDuration).toBe(75);
|
|
180
|
+
expect(stats.activityTypeCounts).toEqual({
|
|
181
|
+
debugging: 2,
|
|
182
|
+
coding: 1,
|
|
183
|
+
});
|
|
184
|
+
});
|
|
185
|
+
it('should handle empty segments', () => {
|
|
186
|
+
const stats = getSegmentStats([]);
|
|
187
|
+
expect(stats.totalSegments).toBe(0);
|
|
188
|
+
expect(stats.totalDuration).toBe(0);
|
|
189
|
+
expect(stats.avgSegmentDuration).toBe(0);
|
|
190
|
+
});
|
|
191
|
+
});
|