escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,333 @@
1
+ /**
2
+ * Escribano - Activity Segmentation Service
3
+ *
4
+ * Groups consecutive VLM observations by activity continuity.
5
+ * Replaces embedding-based clustering with VLM-driven segmentation.
6
+ */
7
+ import { normalizeActivity } from './vlm-service.js';
8
+ const DEFAULT_CONFIG = {
9
+ minSegmentDuration: 30,
10
+ gapTolerance: 5,
11
+ };
12
+ /**
13
+ * Extract activity type from VLM description.
14
+ * Uses prioritized activity detection with precise pattern matching.
15
+ */
16
+ function extractActivityType(vlmDescription) {
17
+ if (!vlmDescription)
18
+ return 'other';
19
+ // Normalize the activity string
20
+ const normalized = vlmDescription.toLowerCase().trim();
21
+ // Check for known activity patterns (order matters - more specific first)
22
+ const activityPatterns = {
23
+ // Debugging - very specific technical terms
24
+ debugging: [
25
+ 'debugging',
26
+ 'troubleshooting',
27
+ 'investigating error',
28
+ 'reading error',
29
+ 'stack trace',
30
+ 'exception thrown',
31
+ 'error message',
32
+ 'fixing bug',
33
+ ],
34
+ // Coding/Development
35
+ coding: [
36
+ 'writing code',
37
+ 'implementing',
38
+ 'developing',
39
+ 'programming',
40
+ 'refactoring',
41
+ 'coding',
42
+ ],
43
+ // Code Review - specific workflow
44
+ review: ['reviewing pr', 'pull request', 'code review', 'reviewing code'],
45
+ // Meeting/Collaboration
46
+ meeting: [
47
+ 'in zoom',
48
+ 'in google meet',
49
+ 'in slack huddle',
50
+ 'video call',
51
+ 'screen sharing',
52
+ 'team meeting',
53
+ ],
54
+ // Research/Information gathering
55
+ research: [
56
+ 'browsing',
57
+ 'stack overflow',
58
+ 'googling',
59
+ 'researching',
60
+ 'searching for',
61
+ ],
62
+ // Reading documentation
63
+ reading: [
64
+ 'reading documentation',
65
+ 'reading docs',
66
+ 'reading manual',
67
+ 'reading guide',
68
+ ],
69
+ // Terminal/CLI operations (only if explicitly mentioned)
70
+ terminal: [
71
+ 'in terminal',
72
+ 'in iterm',
73
+ 'command line',
74
+ 'running git',
75
+ 'running npm',
76
+ ],
77
+ };
78
+ // Check each activity type in order (most specific patterns first)
79
+ for (const [activityType, patterns] of Object.entries(activityPatterns)) {
80
+ for (const pattern of patterns) {
81
+ if (normalized.includes(pattern)) {
82
+ return activityType;
83
+ }
84
+ }
85
+ }
86
+ // Check for single-word activities that appear at the start
87
+ const firstWord = normalized.split(/[\s,.]+/)[0];
88
+ const startPatterns = {
89
+ debugging: ['debug', 'fix'],
90
+ coding: ['writing', 'implementing', 'developing'],
91
+ reading: ['reading'],
92
+ research: ['researching', 'browsing'],
93
+ };
94
+ for (const [activityType, patterns] of Object.entries(startPatterns)) {
95
+ if (patterns.includes(firstWord)) {
96
+ return activityType;
97
+ }
98
+ }
99
+ return 'other';
100
+ }
101
+ /**
102
+ * Parse VLM description to extract apps and topics.
103
+ * Expects format like: "Debugging Python error in VSCode, working on escribano project"
104
+ */
105
+ function extractContext(vlmDescription) {
106
+ if (!vlmDescription)
107
+ return { apps: [], topics: [] };
108
+ const apps = [];
109
+ const topics = [];
110
+ const text = vlmDescription.toLowerCase();
111
+ // Common app patterns
112
+ const appPatterns = [
113
+ /in (vscode|vs code|visual studio code)/i,
114
+ /in (terminal|iterm|alacritty|warp)/i,
115
+ /in (chrome|safari|firefox|browser)/i,
116
+ /in (slack|discord|teams|zoom)/i,
117
+ /in (github|gitlab|bitbucket)/i,
118
+ /in (intellij|webstorm|pycharm)/i,
119
+ /using (vscode|terminal|chrome|slack)/i,
120
+ ];
121
+ for (const pattern of appPatterns) {
122
+ const match = text.match(pattern);
123
+ if (match && match[1]) {
124
+ apps.push(match[1].toLowerCase().replace(' ', '_'));
125
+ }
126
+ }
127
+ // Extract potential project names (capitalized words after "working on" or "in")
128
+ const topicPatterns = [
129
+ /working on (?:the )?(\w+)/i,
130
+ /(?:in|for) (?:the )?(\w+) project/i,
131
+ /(?:implementing|fixing|debugging) (\w+)/i,
132
+ ];
133
+ for (const pattern of topicPatterns) {
134
+ const match = text.match(pattern);
135
+ if (match && match[1]) {
136
+ topics.push(match[1].toLowerCase());
137
+ }
138
+ }
139
+ return { apps: [...new Set(apps)], topics: [...new Set(topics)] };
140
+ }
141
+ const NOISY_APP_PATTERNS = [
142
+ /^and\s/i,
143
+ /^a\s/i,
144
+ /^the\s/i,
145
+ /\.\.\.$/,
146
+ /^\s*$/,
147
+ /^file manager$/i,
148
+ /^personal website$/i,
149
+ ];
150
+ function cleanAppName(app) {
151
+ const cleaned = app.trim();
152
+ if (cleaned.length < 2 || cleaned.length > 50)
153
+ return null;
154
+ for (const pattern of NOISY_APP_PATTERNS) {
155
+ if (pattern.test(cleaned))
156
+ return null;
157
+ }
158
+ return cleaned;
159
+ }
160
+ function aggregateContextFromObservations(observations) {
161
+ const appsSet = new Set();
162
+ const topicsSet = new Set();
163
+ for (const obs of observations) {
164
+ if (obs.apps) {
165
+ try {
166
+ const appsArr = JSON.parse(obs.apps);
167
+ for (const app of appsArr) {
168
+ const cleaned = cleanAppName(app);
169
+ if (cleaned)
170
+ appsSet.add(cleaned);
171
+ }
172
+ }
173
+ catch {
174
+ // Invalid JSON, skip
175
+ }
176
+ }
177
+ if (obs.topics) {
178
+ try {
179
+ const topicsArr = JSON.parse(obs.topics);
180
+ for (const topic of topicsArr) {
181
+ const cleaned = topic.trim();
182
+ if (cleaned && cleaned.length >= 2 && cleaned.length <= 50) {
183
+ topicsSet.add(cleaned);
184
+ }
185
+ }
186
+ }
187
+ catch {
188
+ // Invalid JSON, skip
189
+ }
190
+ }
191
+ }
192
+ return {
193
+ apps: [...appsSet].sort(),
194
+ topics: [...topicsSet].sort(),
195
+ };
196
+ }
197
+ /**
198
+ * Group consecutive observations by activity type.
199
+ *
200
+ * @param observations - Visual observations with VLM descriptions, sorted by timestamp
201
+ * @param config - Segmentation configuration
202
+ * @returns Array of segments grouped by activity continuity
203
+ */
204
+ export function segmentByActivity(observations, config = {}) {
205
+ const cfg = { ...DEFAULT_CONFIG, ...config };
206
+ // Filter to visual observations only, sorted by timestamp
207
+ const visualObs = observations
208
+ .filter((o) => o.type === 'visual' && o.vlm_description)
209
+ .sort((a, b) => a.timestamp - b.timestamp);
210
+ if (visualObs.length === 0) {
211
+ return [];
212
+ }
213
+ // Group consecutive observations by activity type
214
+ const rawSegments = [];
215
+ let currentSegment = null;
216
+ for (const obs of visualObs) {
217
+ const rawActivity = obs.activity_type || extractActivityType(obs.vlm_description);
218
+ const activityType = normalizeActivity(rawActivity);
219
+ if (!currentSegment || currentSegment.activityType !== activityType) {
220
+ // Start new segment
221
+ if (currentSegment) {
222
+ rawSegments.push(currentSegment);
223
+ }
224
+ currentSegment = {
225
+ activityType,
226
+ startTime: obs.timestamp,
227
+ endTime: obs.end_timestamp ?? obs.timestamp,
228
+ observations: [obs],
229
+ };
230
+ }
231
+ else {
232
+ // Continue current segment
233
+ currentSegment.endTime = obs.end_timestamp ?? obs.timestamp;
234
+ currentSegment.observations.push(obs);
235
+ }
236
+ }
237
+ // Don't forget the last segment
238
+ if (currentSegment) {
239
+ rawSegments.push(currentSegment);
240
+ }
241
+ // Merge short segments into their longest neighbor
242
+ const mergedSegments = mergeShortSegments(rawSegments, cfg.minSegmentDuration);
243
+ // Convert to final Segment format
244
+ return mergedSegments.map((seg, index) => {
245
+ const aggregatedContext = aggregateContextFromObservations(seg.observations);
246
+ return {
247
+ id: `seg-${index}`,
248
+ recordingId: seg.observations[0]?.recording_id || '',
249
+ activityType: seg.activityType,
250
+ startTime: seg.startTime,
251
+ endTime: seg.endTime,
252
+ duration: seg.endTime - seg.startTime,
253
+ observationIds: seg.observations.map((o) => o.id),
254
+ keyDescription: seg.observations[0]?.vlm_description || '',
255
+ apps: aggregatedContext.apps,
256
+ topics: aggregatedContext.topics,
257
+ };
258
+ });
259
+ }
260
+ /**
261
+ * Merge segments shorter than minDuration into their longest neighbor.
262
+ *
263
+ * Strategy:
264
+ * 1. For each short segment, find the longer of (previous, next) neighbor
265
+ * 2. Merge into that neighbor (concatenate observations, extend time range)
266
+ * 3. If no neighbors exist (only segment), keep it as-is
267
+ */
268
+ function mergeShortSegments(segments, minDuration) {
269
+ if (segments.length <= 1) {
270
+ return segments;
271
+ }
272
+ const result = [...segments];
273
+ let i = 0;
274
+ while (i < result.length) {
275
+ const seg = result[i];
276
+ const duration = seg.endTime - seg.startTime;
277
+ if (duration >= minDuration) {
278
+ // Segment is long enough, keep it
279
+ i++;
280
+ continue;
281
+ }
282
+ // Find neighbors
283
+ const prev = i > 0 ? result[i - 1] : null;
284
+ const next = i < result.length - 1 ? result[i + 1] : null;
285
+ if (!prev && !next) {
286
+ // Only segment, keep it
287
+ i++;
288
+ continue;
289
+ }
290
+ // Choose longer neighbor (never choose null)
291
+ let targetIndex;
292
+ if (!prev) {
293
+ targetIndex = i + 1;
294
+ }
295
+ else if (!next) {
296
+ targetIndex = i - 1;
297
+ }
298
+ else {
299
+ const prevDuration = prev.endTime - prev.startTime;
300
+ const nextDuration = next.endTime - next.startTime;
301
+ targetIndex = prevDuration >= nextDuration ? i - 1 : i + 1;
302
+ }
303
+ const target = result[targetIndex];
304
+ // Merge into target
305
+ target.observations = target.observations.concat(seg.observations);
306
+ target.startTime = Math.min(target.startTime, seg.startTime);
307
+ target.endTime = Math.max(target.endTime, seg.endTime);
308
+ // Remove short segment
309
+ result.splice(i, 1);
310
+ // If we merged into previous, stay at same index (since we removed current)
311
+ // If we merged into next, stay at same index
312
+ // No need to adjust i since we removed the current element
313
+ }
314
+ return result;
315
+ }
316
+ /**
317
+ * Get statistics about segments.
318
+ */
319
+ export function getSegmentStats(segments) {
320
+ const activityTypeCounts = {};
321
+ let totalDuration = 0;
322
+ for (const seg of segments) {
323
+ activityTypeCounts[seg.activityType] =
324
+ (activityTypeCounts[seg.activityType] || 0) + 1;
325
+ totalDuration += seg.duration;
326
+ }
327
+ return {
328
+ totalSegments: segments.length,
329
+ totalDuration,
330
+ activityTypeCounts,
331
+ avgSegmentDuration: segments.length > 0 ? totalDuration / segments.length : 0,
332
+ };
333
+ }
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Tests for Activity Segmentation Service
3
+ */
4
+ import { describe, expect, it } from 'vitest';
5
+ import { getSegmentStats, segmentByActivity, } from './activity-segmentation.js';
6
+ describe('segmentByActivity', () => {
7
+ const createObservation = (id, timestamp, vlmDescription, type = 'visual', duration = 10) => ({
8
+ id,
9
+ recording_id: 'test-recording',
10
+ type,
11
+ timestamp,
12
+ end_timestamp: timestamp + duration,
13
+ image_path: type === 'visual' ? `/path/${id}.png` : null,
14
+ ocr_text: null,
15
+ vlm_description: vlmDescription,
16
+ vlm_raw_response: null,
17
+ activity_type: null,
18
+ apps: null,
19
+ topics: null,
20
+ text: null,
21
+ audio_source: null,
22
+ audio_type: null,
23
+ embedding: null,
24
+ created_at: new Date().toISOString(),
25
+ });
26
+ it('should group consecutive same-activity observations', () => {
27
+ // Create longer observations (60s each) to avoid merging
28
+ const observations = [
29
+ createObservation('1', 0, 'Debugging Python error in VSCode', 'visual', 60),
30
+ createObservation('2', 60, 'Debugging stack trace in terminal', 'visual', 60),
31
+ createObservation('3', 120, 'Debugging the issue', 'visual', 60),
32
+ createObservation('4', 180, 'Writing code in VSCode', 'visual', 60),
33
+ createObservation('5', 240, 'Implementing new feature', 'visual', 60),
34
+ createObservation('6', 300, 'Reading documentation in Chrome', 'visual', 60),
35
+ ];
36
+ const segments = segmentByActivity(observations);
37
+ expect(segments).toHaveLength(3);
38
+ expect(segments[0].activityType).toBe('debugging');
39
+ expect(segments[0].observationIds).toEqual(['1', '2', '3']);
40
+ expect(segments[1].activityType).toBe('coding');
41
+ expect(segments[1].observationIds).toEqual(['4', '5']);
42
+ expect(segments[2].activityType).toBe('reading');
43
+ expect(segments[2].observationIds).toEqual(['6']);
44
+ });
45
+ it('should handle single observation segments', () => {
46
+ const observations = [
47
+ createObservation('1', 0, 'Debugging Python error'),
48
+ createObservation('2', 10, 'Writing new function'),
49
+ createObservation('3', 20, 'Reading documentation'),
50
+ ];
51
+ // Disable merging to test grouping logic directly
52
+ const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
53
+ expect(segments).toHaveLength(3);
54
+ segments.forEach((seg, i) => {
55
+ expect(seg.observationIds).toHaveLength(1);
56
+ expect(seg.observationIds[0]).toBe(String(i + 1));
57
+ });
58
+ });
59
+ it('should merge short segments into longest neighbor', () => {
60
+ // Create a scenario where middle segment is too short
61
+ // First segment: 60s, Middle: 10s (will be merged), Last: 50s
62
+ const observations = [
63
+ createObservation('1', 0, 'Debugging error 1', 'visual', 10),
64
+ createObservation('2', 10, 'Debugging error 2', 'visual', 10),
65
+ createObservation('3', 20, 'Debugging error 3', 'visual', 10),
66
+ createObservation('4', 30, 'Debugging error 4', 'visual', 10),
67
+ createObservation('5', 40, 'Debugging error 5', 'visual', 10),
68
+ createObservation('6', 50, 'Debugging error 6', 'visual', 10),
69
+ // Short coding segment (10s) - will be merged into first debugging segment
70
+ createObservation('7', 60, 'Implementing new feature in VSCode', 'visual', 10),
71
+ // Second debugging segment is shorter (20s), should also be merged
72
+ createObservation('8', 70, 'Debugging more', 'visual', 10),
73
+ createObservation('9', 80, 'Debugging final', 'visual', 10),
74
+ ];
75
+ const segments = segmentByActivity(observations, {
76
+ minSegmentDuration: 30,
77
+ });
78
+ // The coding segment (60-70s) is only 10s
79
+ // After merging it into first debugging (now 70s), the second debugging (20s)
80
+ // The coding segment (60-70s) is only 10s
81
+ // After merging it into first debugging (now 70s), the second debugging (20s)
82
+ // becomes the new shortest and gets merged too
83
+ expect(segments).toHaveLength(1);
84
+ expect(segments[0].activityType).toBe('debugging');
85
+ // Should include all observations including the coding one
86
+ expect(segments[0].observationIds).toHaveLength(9);
87
+ });
88
+ it('should extract apps and topics from observation fields', () => {
89
+ const obs = createObservation('1', 0, 'Debugging Python error in VSCode, working on escribano project');
90
+ obs.apps = JSON.stringify(['vscode']);
91
+ obs.topics = JSON.stringify(['escribano']);
92
+ const segments = segmentByActivity([obs]);
93
+ expect(segments[0].apps).toContain('vscode');
94
+ expect(segments[0].topics).toContain('escribano');
95
+ });
96
+ it('should filter out audio observations', () => {
97
+ const observations = [
98
+ createObservation('1', 0, 'Debugging error', 'visual'),
99
+ createObservation('2', 10, 'This is audio transcript', 'audio'),
100
+ createObservation('3', 20, 'Writing code', 'visual'),
101
+ ];
102
+ // Disable merging to test filtering logic
103
+ const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
104
+ expect(segments).toHaveLength(2);
105
+ expect(segments[0].activityType).toBe('debugging');
106
+ expect(segments[1].activityType).toBe('coding');
107
+ });
108
+ it('should handle observations without VLM descriptions', () => {
109
+ const observations = [
110
+ createObservation('1', 0, 'Debugging error', 'visual', 60),
111
+ // null observation is filtered out entirely
112
+ createObservation('2', 60, null, 'visual', 60),
113
+ createObservation('3', 120, 'Debugging more', 'visual', 60),
114
+ ];
115
+ // With longer durations and minSegmentDuration: 0, we get 2 debugging segments
116
+ // (filtered out null creates a gap, not a segment)
117
+ const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
118
+ // null observations are filtered out, so we only have the 2 debugging observations
119
+ expect(segments.length).toBeGreaterThanOrEqual(1);
120
+ expect(segments[0].activityType).toBe('debugging');
121
+ });
122
+ it('should sort observations by timestamp', () => {
123
+ const observations = [
124
+ createObservation('1', 30, 'Reading documentation'),
125
+ createObservation('2', 0, 'Debugging error'),
126
+ createObservation('3', 10, 'Writing code'),
127
+ ];
128
+ // Disable merging to test sorting
129
+ const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
130
+ expect(segments[0].startTime).toBe(0);
131
+ expect(segments[0].activityType).toBe('debugging');
132
+ expect(segments[1].startTime).toBe(10);
133
+ expect(segments[1].activityType).toBe('coding');
134
+ expect(segments[2].startTime).toBe(30);
135
+ expect(segments[2].activityType).toBe('reading');
136
+ });
137
+ it('should return empty array for empty observations', () => {
138
+ const segments = segmentByActivity([]);
139
+ expect(segments).toEqual([]);
140
+ });
141
+ it('should handle all other activities', () => {
142
+ const observations = [
143
+ createObservation('1', 0, 'Browsing Stack Overflow for solutions'),
144
+ createObservation('2', 10, 'In Zoom meeting discussing project'),
145
+ createObservation('3', 20, 'Reviewing pull request in GitHub'),
146
+ createObservation('4', 30, 'Some generic unknown activity'),
147
+ ];
148
+ // Disable merging to test activity detection
149
+ const segments = segmentByActivity(observations, { minSegmentDuration: 0 });
150
+ expect(segments).toHaveLength(4);
151
+ expect(segments[0].activityType).toBe('research');
152
+ expect(segments[1].activityType).toBe('meeting');
153
+ expect(segments[2].activityType).toBe('review');
154
+ expect(segments[3].activityType).toBe('other');
155
+ });
156
+ });
157
+ describe('getSegmentStats', () => {
158
+ const createMockSegment = (activityType, duration) => ({
159
+ id: `seg-${activityType}`,
160
+ recordingId: 'test',
161
+ activityType,
162
+ startTime: 0,
163
+ endTime: duration,
164
+ duration,
165
+ observationIds: [],
166
+ keyDescription: '',
167
+ apps: [],
168
+ topics: [],
169
+ });
170
+ it('should calculate correct statistics', () => {
171
+ const segments = [
172
+ createMockSegment('debugging', 60),
173
+ createMockSegment('coding', 120),
174
+ createMockSegment('debugging', 45),
175
+ ];
176
+ const stats = getSegmentStats(segments);
177
+ expect(stats.totalSegments).toBe(3);
178
+ expect(stats.totalDuration).toBe(225);
179
+ expect(stats.avgSegmentDuration).toBe(75);
180
+ expect(stats.activityTypeCounts).toEqual({
181
+ debugging: 2,
182
+ coding: 1,
183
+ });
184
+ });
185
+ it('should handle empty segments', () => {
186
+ const stats = getSegmentStats([]);
187
+ expect(stats.totalSegments).toBe(0);
188
+ expect(stats.totalDuration).toBe(0);
189
+ expect(stats.avgSegmentDuration).toBe(0);
190
+ });
191
+ });