escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,318 @@
1
+ /**
2
+ * Escribano - Adaptive Frame Sampling Service
3
+ *
4
+ * Reduces frame count while preserving important moments.
5
+ * Strategy: Base sampling (10s) + gap filling for large time jumps.
6
+ */
7
+ const DEFAULT_CONFIG = {
8
+ baseIntervalSeconds: Number(process.env.ESCRIBANO_SAMPLE_INTERVAL) || 10,
9
+ gapThresholdSeconds: Number(process.env.ESCRIBANO_SAMPLE_GAP_THRESHOLD) || 15,
10
+ gapFillIntervalSeconds: Number(process.env.ESCRIBANO_SAMPLE_GAP_FILL) || 3,
11
+ };
12
+ /**
13
+ * Find the frame closest to a target timestamp.
14
+ */
15
+ function findNearestFrame(frames, targetTimestamp) {
16
+ if (frames.length === 0)
17
+ return null;
18
+ let nearest = frames[0];
19
+ let minDiff = Math.abs(frames[0].timestamp - targetTimestamp);
20
+ for (const frame of frames) {
21
+ const diff = Math.abs(frame.timestamp - targetTimestamp);
22
+ if (diff < minDiff) {
23
+ minDiff = diff;
24
+ nearest = frame;
25
+ }
26
+ }
27
+ return nearest;
28
+ }
29
+ /**
30
+ * Adaptively sample frames from a recording.
31
+ *
32
+ * Strategy:
33
+ * 1. Take frames at base interval (default: every 10 seconds)
34
+ * 2. Detect gaps larger than threshold (default: 15 seconds)
35
+ * 3. Fill gaps with denser sampling (default: every 3 seconds)
36
+ *
37
+ * @param allFrames - All extracted frames (typically at 2s intervals)
38
+ * @param config - Sampling configuration
39
+ * @returns Sampled frames with reason annotations
40
+ */
41
+ export function adaptiveSample(allFrames, config = {}) {
42
+ const cfg = { ...DEFAULT_CONFIG, ...config };
43
+ if (allFrames.length === 0)
44
+ return [];
45
+ // Sort frames by timestamp
46
+ const sortedFrames = [...allFrames].sort((a, b) => a.timestamp - b.timestamp);
47
+ // Step 1: Base sampling - take frames at regular intervals
48
+ const baseSampled = [];
49
+ const sampledTimestamps = new Set();
50
+ let lastSampledTime = -Infinity;
51
+ for (const frame of sortedFrames) {
52
+ if (frame.timestamp - lastSampledTime >= cfg.baseIntervalSeconds) {
53
+ baseSampled.push({
54
+ imagePath: frame.imagePath,
55
+ timestamp: frame.timestamp,
56
+ reason: 'base',
57
+ });
58
+ sampledTimestamps.add(frame.timestamp);
59
+ lastSampledTime = frame.timestamp;
60
+ }
61
+ }
62
+ // Step 2: Detect and fill gaps
63
+ const result = [];
64
+ for (let i = 0; i < baseSampled.length; i++) {
65
+ result.push(baseSampled[i]);
66
+ // Check for gap to next sample
67
+ if (i < baseSampled.length - 1) {
68
+ const currentTime = baseSampled[i].timestamp;
69
+ const nextTime = baseSampled[i + 1].timestamp;
70
+ const gap = nextTime - currentTime;
71
+ if (gap > cfg.gapThresholdSeconds) {
72
+ // Fill the gap with denser samples
73
+ const gapStart = currentTime + cfg.gapFillIntervalSeconds;
74
+ const gapEnd = nextTime - cfg.gapFillIntervalSeconds;
75
+ for (let t = gapStart; t <= gapEnd; t += cfg.gapFillIntervalSeconds) {
76
+ const nearestFrame = findNearestFrame(sortedFrames, t);
77
+ if (nearestFrame && !sampledTimestamps.has(nearestFrame.timestamp)) {
78
+ result.push({
79
+ imagePath: nearestFrame.imagePath,
80
+ timestamp: nearestFrame.timestamp,
81
+ reason: 'gap_fill',
82
+ });
83
+ sampledTimestamps.add(nearestFrame.timestamp);
84
+ }
85
+ }
86
+ }
87
+ }
88
+ }
89
+ // Sort final result by timestamp
90
+ return result.sort((a, b) => a.timestamp - b.timestamp);
91
+ }
92
+ /**
93
+ * Calculate adaptive base interval based on scene change density.
94
+ *
95
+ * When scene changes are dense, they already provide good timeline coverage,
96
+ * so we increase the base interval to avoid excessive frames.
97
+ *
98
+ * Thresholds:
99
+ * - < 20 scenes: 10s base (scenes too sparse, need dense base sampling)
100
+ * - 20-50 scenes: 20s base (moderate coverage from scenes)
101
+ * - > 50 scenes: 30s base (scenes provide excellent coverage)
102
+ *
103
+ * @param sceneCount - Number of detected scene changes
104
+ * @param configBaseInterval - User-configured base interval (used as minimum)
105
+ * @returns Adjusted base interval in seconds
106
+ */
107
+ export function calculateAdaptiveBaseInterval(sceneCount, configBaseInterval) {
108
+ if (sceneCount > 50)
109
+ return Math.max(configBaseInterval, 30);
110
+ if (sceneCount > 20)
111
+ return Math.max(configBaseInterval, 20);
112
+ return configBaseInterval;
113
+ }
114
+ /**
115
+ * Adaptively sample frames with scene change awareness.
116
+ *
117
+ * Strategy:
118
+ * 1. Always include frames nearest to scene change timestamps
119
+ * 2. Between scene changes, sample at base interval
120
+ * 3. Detect gaps larger than threshold and fill with denser sampling
121
+ *
122
+ * @param allFrames - All extracted frames (typically at 2s intervals)
123
+ * @param sceneChanges - Timestamps of detected scene changes from ffmpeg
124
+ * @param config - Sampling configuration
125
+ * @returns Sampled frames with reason annotations
126
+ */
127
+ export function adaptiveSampleWithScenes(allFrames, sceneChanges, config = {}) {
128
+ const cfg = { ...DEFAULT_CONFIG, ...config };
129
+ if (allFrames.length === 0)
130
+ return [];
131
+ // Adjust base interval based on scene density
132
+ cfg.baseIntervalSeconds = calculateAdaptiveBaseInterval(sceneChanges.length, cfg.baseIntervalSeconds);
133
+ // When scene density is high, also increase gap threshold to prevent
134
+ // gap filling between closely-spaced scene changes
135
+ if (sceneChanges.length > 50) {
136
+ cfg.gapThresholdSeconds = Math.max(cfg.gapThresholdSeconds, 60);
137
+ cfg.gapFillIntervalSeconds = Math.max(cfg.gapFillIntervalSeconds, 10);
138
+ }
139
+ else if (sceneChanges.length > 20) {
140
+ cfg.gapThresholdSeconds = Math.max(cfg.gapThresholdSeconds, 40);
141
+ cfg.gapFillIntervalSeconds = Math.max(cfg.gapFillIntervalSeconds, 5);
142
+ }
143
+ // Sort frames by timestamp
144
+ const sortedFrames = [...allFrames].sort((a, b) => a.timestamp - b.timestamp);
145
+ // Track which timestamps we've already sampled
146
+ const sampledTimestamps = new Set();
147
+ const result = [];
148
+ // Step 1: Always include frames nearest to scene changes
149
+ for (const changeTime of sceneChanges) {
150
+ const nearest = findNearestFrame(sortedFrames, changeTime);
151
+ if (nearest && !sampledTimestamps.has(nearest.timestamp)) {
152
+ result.push({
153
+ imagePath: nearest.imagePath,
154
+ timestamp: nearest.timestamp,
155
+ reason: 'scene_change',
156
+ });
157
+ sampledTimestamps.add(nearest.timestamp);
158
+ }
159
+ }
160
+ // Sort scene change frames by timestamp
161
+ result.sort((a, b) => a.timestamp - b.timestamp);
162
+ // Step 2: Between scene changes, sample at base interval
163
+ // Create segments between scene changes
164
+ const sceneTimestamps = result.map((f) => f.timestamp);
165
+ const segments = [];
166
+ if (sceneTimestamps.length === 0) {
167
+ // No scene changes - sample entire video
168
+ segments.push({
169
+ start: sortedFrames[0].timestamp,
170
+ end: sortedFrames[sortedFrames.length - 1].timestamp,
171
+ });
172
+ }
173
+ else {
174
+ // Create segments: before first scene, between scenes, after last scene
175
+ segments.push({
176
+ start: sortedFrames[0].timestamp,
177
+ end: sceneTimestamps[0],
178
+ });
179
+ for (let i = 0; i < sceneTimestamps.length - 1; i++) {
180
+ segments.push({
181
+ start: sceneTimestamps[i],
182
+ end: sceneTimestamps[i + 1],
183
+ });
184
+ }
185
+ segments.push({
186
+ start: sceneTimestamps[sceneTimestamps.length - 1],
187
+ end: sortedFrames[sortedFrames.length - 1].timestamp,
188
+ });
189
+ }
190
+ // Sample each segment at base interval
191
+ for (const segment of segments) {
192
+ let lastSampleTime = segment.start;
193
+ for (const frame of sortedFrames) {
194
+ if (frame.timestamp < segment.start || frame.timestamp > segment.end) {
195
+ continue;
196
+ }
197
+ if (frame.timestamp - lastSampleTime >= cfg.baseIntervalSeconds &&
198
+ !sampledTimestamps.has(frame.timestamp)) {
199
+ result.push({
200
+ imagePath: frame.imagePath,
201
+ timestamp: frame.timestamp,
202
+ reason: 'base',
203
+ });
204
+ sampledTimestamps.add(frame.timestamp);
205
+ lastSampleTime = frame.timestamp;
206
+ }
207
+ }
208
+ }
209
+ // Sort before gap filling
210
+ result.sort((a, b) => a.timestamp - b.timestamp);
211
+ // Step 3: Fill large gaps between any samples
212
+ const withGapsFilled = [];
213
+ for (let i = 0; i < result.length; i++) {
214
+ withGapsFilled.push(result[i]);
215
+ if (i < result.length - 1) {
216
+ const currentTime = result[i].timestamp;
217
+ const nextTime = result[i + 1].timestamp;
218
+ const gap = nextTime - currentTime;
219
+ if (gap > cfg.gapThresholdSeconds) {
220
+ // Fill the gap with denser samples
221
+ const gapStart = currentTime + cfg.gapFillIntervalSeconds;
222
+ const gapEnd = nextTime - cfg.gapFillIntervalSeconds;
223
+ for (let t = gapStart; t <= gapEnd; t += cfg.gapFillIntervalSeconds) {
224
+ const nearestFrame = findNearestFrame(sortedFrames, t);
225
+ if (nearestFrame && !sampledTimestamps.has(nearestFrame.timestamp)) {
226
+ withGapsFilled.push({
227
+ imagePath: nearestFrame.imagePath,
228
+ timestamp: nearestFrame.timestamp,
229
+ reason: 'gap_fill',
230
+ });
231
+ sampledTimestamps.add(nearestFrame.timestamp);
232
+ }
233
+ }
234
+ }
235
+ }
236
+ }
237
+ // Sort final result by timestamp
238
+ return withGapsFilled.sort((a, b) => a.timestamp - b.timestamp);
239
+ }
240
+ /**
241
+ * Get sampling statistics for logging.
242
+ */
243
+ export function getSamplingStats(original, sampled) {
244
+ const baseCount = sampled.filter((f) => f.reason === 'base').length;
245
+ const gapFillCount = sampled.filter((f) => f.reason === 'gap_fill').length;
246
+ const sceneChangeCount = sampled.filter((f) => f.reason === 'scene_change').length;
247
+ return {
248
+ originalCount: original.length,
249
+ sampledCount: sampled.length,
250
+ reductionPercent: Math.round((1 - sampled.length / (original.length || 1)) * 100),
251
+ baseCount,
252
+ gapFillCount,
253
+ sceneChangeCount,
254
+ };
255
+ }
256
+ /**
257
+ * Calculate required frame timestamps WITHOUT extracting frames.
258
+ * Used by the smart extraction pipeline to extract only needed frames.
259
+ *
260
+ * This is the inverse of adaptiveSampleWithScenes - instead of selecting
261
+ * from existing frames, we calculate which timestamps we need.
262
+ *
263
+ * @param durationSeconds - Total video duration in seconds
264
+ * @param sceneChanges - Timestamps of detected scene changes
265
+ * @param config - Sampling configuration
266
+ * @returns Sorted array of timestamps that need frames
267
+ *
268
+ * @example
269
+ * // For a 60s video with 3 scene changes at 10s, 25s, 45s
270
+ * // Default config: 10s base interval, 15s gap threshold, 3s gap fill
271
+ * const timestamps = calculateRequiredTimestamps(60, [10, 25, 45]);
272
+ * // Returns: [0, 10, 20, 25, 30, 40, 45, 50, 60] (9 frames)
273
+ * // Instead of extracting 30 frames (every 2s), we only extract 9
274
+ *
275
+ * @example
276
+ * // For a 132min (7920s) video with 50 scene changes
277
+ * // Default config produces ~200-400 frames instead of ~3960
278
+ * const timestamps = calculateRequiredTimestamps(7920, sceneChanges);
279
+ */
280
+ export function calculateRequiredTimestamps(durationSeconds, sceneChanges = [], config = {}) {
281
+ const cfg = { ...DEFAULT_CONFIG, ...config };
282
+ if (durationSeconds <= 0)
283
+ return [];
284
+ // Adjust for scene density (same logic as adaptiveSampleWithScenes)
285
+ cfg.baseIntervalSeconds = calculateAdaptiveBaseInterval(sceneChanges.length, cfg.baseIntervalSeconds);
286
+ // Adjust gap thresholds for high scene density
287
+ if (sceneChanges.length > 50) {
288
+ cfg.gapThresholdSeconds = Math.max(cfg.gapThresholdSeconds, 60);
289
+ cfg.gapFillIntervalSeconds = Math.max(cfg.gapFillIntervalSeconds, 10);
290
+ }
291
+ else if (sceneChanges.length > 20) {
292
+ cfg.gapThresholdSeconds = Math.max(cfg.gapThresholdSeconds, 40);
293
+ cfg.gapFillIntervalSeconds = Math.max(cfg.gapFillIntervalSeconds, 5);
294
+ }
295
+ const timestamps = new Set();
296
+ // Step 1: Add scene change timestamps (rounded to nearest second)
297
+ for (const t of sceneChanges) {
298
+ const rounded = Math.round(t);
299
+ if (rounded >= 0 && rounded <= durationSeconds) {
300
+ timestamps.add(rounded);
301
+ }
302
+ }
303
+ // Step 2: Add base interval samples throughout video
304
+ for (let t = 0; t <= durationSeconds; t += cfg.baseIntervalSeconds) {
305
+ timestamps.add(Math.round(t));
306
+ }
307
+ // Step 3: Fill large gaps between samples
308
+ const sorted = [...timestamps].sort((a, b) => a - b);
309
+ for (let i = 0; i < sorted.length - 1; i++) {
310
+ const gap = sorted[i + 1] - sorted[i];
311
+ if (gap > cfg.gapThresholdSeconds) {
312
+ for (let t = sorted[i] + cfg.gapFillIntervalSeconds; t < sorted[i + 1]; t += cfg.gapFillIntervalSeconds) {
313
+ timestamps.add(Math.round(t));
314
+ }
315
+ }
316
+ }
317
+ return [...timestamps].sort((a, b) => a - b);
318
+ }
@@ -0,0 +1,106 @@
1
+ /**
2
+ * Escribano - Signal Extraction Service
3
+ *
4
+ * Extracts semantic signals (apps, urls, projects, topics) from cluster observations.
5
+ * Uses a tiered approach: regex for structured → patterns for semi-structured → LLM for semantic.
6
+ */
7
+ // ============================================================================
8
+ // TIER 1: REGEX-BASED EXTRACTION (URLs, Domains)
9
+ // ============================================================================
10
+ const URL_REGEX = /(?:https?:\/\/)?(?:www\.)?([a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)(?:\/[^\s]*)?/gi;
11
+ const NOISE_DOMAINS = ['localhost', '127.0.0.1', '0.0.0.0', 'example.com'];
12
+ export function extractUrls(texts) {
13
+ const domains = new Map();
14
+ for (const text of texts) {
15
+ const matches = text.matchAll(URL_REGEX);
16
+ for (const match of matches) {
17
+ const domain = match[1].toLowerCase();
18
+ if (!NOISE_DOMAINS.includes(domain) && !domain.startsWith('192.168.')) {
19
+ domains.set(domain, (domains.get(domain) || 0) + 1);
20
+ }
21
+ }
22
+ }
23
+ // Return domains appearing at least twice, sorted by frequency
24
+ return Array.from(domains.entries())
25
+ .filter(([_, count]) => count >= 2)
26
+ .sort((a, b) => b[1] - a[1])
27
+ .map(([domain]) => domain);
28
+ }
29
+ // ============================================================================
30
+ // TIER 2: PATTERN-BASED EXTRACTION (Apps, Projects)
31
+ // ============================================================================
32
+ const APP_PATTERNS = {
33
+ 'VS Code': /(?:visual\s+studio\s+code|vscode|code\s+-|\[Code\]|\.vscode)/i,
34
+ Chrome: /(?:google\s+chrome|chrome\s+-|\s+-\s+chrome)/i,
35
+ Firefox: /(?:mozilla\s+firefox|firefox\s+-)/i,
36
+ Safari: /(?:safari\s+-|apple\s+safari)/i,
37
+ Terminal: /(?:terminal|iterm|iterm2|hyper)/i,
38
+ Ghostty: /ghostty/i,
39
+ Neovim: /(?:neovim|nvim|nvimtree)/i,
40
+ Vim: /(?:\bvim\b(?!tree))/i,
41
+ Slack: /(?:slack\s+-|\[Slack\])/i,
42
+ Discord: /(?:discord\s+-|\[Discord\])/i,
43
+ YouTube: /(?:youtube\.com|youtube\s+-)/i,
44
+ GitHub: /(?:github\.com|github\s+-)/i,
45
+ Figma: /(?:figma\.com|figma\s+-)/i,
46
+ Notion: /(?:notion\.so|notion\s+-)/i,
47
+ Obsidian: /(?:obsidian\s+-|\.obsidian)/i,
48
+ };
49
+ export function extractApps(texts) {
50
+ const detected = new Set();
51
+ for (const text of texts) {
52
+ for (const [app, pattern] of Object.entries(APP_PATTERNS)) {
53
+ if (pattern.test(text)) {
54
+ detected.add(app);
55
+ }
56
+ }
57
+ }
58
+ return Array.from(detected);
59
+ }
60
+ const PROJECT_PATTERNS = [
61
+ /(?:repos|projects|dev|src|code)\/([a-zA-Z0-9_-]+)/i,
62
+ /(?:github\.com|gitlab\.com)\/[^/]+\/([a-zA-Z0-9_-]+)/i,
63
+ /package\.json.*?"name":\s*"([^"]+)"/i,
64
+ /~\/([a-zA-Z0-9_-]+)\/(?:src|lib|packages)/i,
65
+ ];
66
+ export function extractProjects(texts) {
67
+ const projects = new Map();
68
+ for (const text of texts) {
69
+ for (const pattern of PROJECT_PATTERNS) {
70
+ const match = text.match(pattern);
71
+ if (match && match[1]) {
72
+ const name = match[1].toLowerCase();
73
+ // Filter out common non-project names
74
+ if (!['src', 'lib', 'dist', 'build', 'node_modules', 'packages'].includes(name)) {
75
+ projects.set(name, (projects.get(name) || 0) + 1);
76
+ }
77
+ }
78
+ }
79
+ }
80
+ return Array.from(projects.keys());
81
+ }
82
+ // ============================================================================
83
+ // TIER 3: LLM-BASED EXTRACTION (Topics)
84
+ // ============================================================================
85
+ export async function extractTopics(observations, intelligence) {
86
+ return intelligence.extractTopics(observations);
87
+ }
88
+ // ============================================================================
89
+ // COMBINED EXTRACTION
90
+ // ============================================================================
91
+ export async function extractSignals(observations, intelligence) {
92
+ // Collect all text content
93
+ const allTexts = observations.map((o) => {
94
+ if (o.type === 'visual') {
95
+ return [o.ocr_text || '', o.vlm_description || ''].join(' ');
96
+ }
97
+ return o.text || '';
98
+ });
99
+ // Tier 1 & 2: Fast extraction
100
+ const urls = extractUrls(allTexts);
101
+ const apps = extractApps(allTexts);
102
+ const projects = extractProjects(allTexts);
103
+ // Tier 3: LLM-based topics
104
+ const topics = await extractTopics(observations, intelligence);
105
+ return { apps, urls, projects, topics };
106
+ }