escribano 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +297 -0
  3. package/dist/0_types.js +279 -0
  4. package/dist/actions/classify-session.js +77 -0
  5. package/dist/actions/create-contexts.js +44 -0
  6. package/dist/actions/create-topic-blocks.js +68 -0
  7. package/dist/actions/extract-metadata.js +24 -0
  8. package/dist/actions/generate-artifact-v3.js +296 -0
  9. package/dist/actions/generate-artifact.js +61 -0
  10. package/dist/actions/generate-summary-v3.js +260 -0
  11. package/dist/actions/outline-index.js +204 -0
  12. package/dist/actions/process-recording-v2.js +494 -0
  13. package/dist/actions/process-recording-v3.js +412 -0
  14. package/dist/actions/process-session.js +183 -0
  15. package/dist/actions/publish-summary-v3.js +303 -0
  16. package/dist/actions/sync-to-outline.js +196 -0
  17. package/dist/adapters/audio.silero.adapter.js +69 -0
  18. package/dist/adapters/cap.adapter.js +94 -0
  19. package/dist/adapters/capture.cap.adapter.js +107 -0
  20. package/dist/adapters/capture.filesystem.adapter.js +124 -0
  21. package/dist/adapters/embedding.ollama.adapter.js +141 -0
  22. package/dist/adapters/intelligence.adapter.js +202 -0
  23. package/dist/adapters/intelligence.mlx.adapter.js +395 -0
  24. package/dist/adapters/intelligence.ollama.adapter.js +741 -0
  25. package/dist/adapters/publishing.outline.adapter.js +75 -0
  26. package/dist/adapters/storage.adapter.js +81 -0
  27. package/dist/adapters/storage.fs.adapter.js +83 -0
  28. package/dist/adapters/transcription.whisper.adapter.js +206 -0
  29. package/dist/adapters/video.ffmpeg.adapter.js +405 -0
  30. package/dist/adapters/whisper.adapter.js +168 -0
  31. package/dist/batch-context.js +329 -0
  32. package/dist/db/helpers.js +50 -0
  33. package/dist/db/index.js +95 -0
  34. package/dist/db/migrate.js +80 -0
  35. package/dist/db/repositories/artifact.sqlite.js +77 -0
  36. package/dist/db/repositories/cluster.sqlite.js +92 -0
  37. package/dist/db/repositories/context.sqlite.js +75 -0
  38. package/dist/db/repositories/index.js +10 -0
  39. package/dist/db/repositories/observation.sqlite.js +70 -0
  40. package/dist/db/repositories/recording.sqlite.js +56 -0
  41. package/dist/db/repositories/subject.sqlite.js +64 -0
  42. package/dist/db/repositories/topic-block.sqlite.js +45 -0
  43. package/dist/db/types.js +4 -0
  44. package/dist/domain/classification.js +60 -0
  45. package/dist/domain/context.js +97 -0
  46. package/dist/domain/index.js +2 -0
  47. package/dist/domain/observation.js +17 -0
  48. package/dist/domain/recording.js +41 -0
  49. package/dist/domain/segment.js +93 -0
  50. package/dist/domain/session.js +93 -0
  51. package/dist/domain/time-range.js +38 -0
  52. package/dist/domain/transcript.js +79 -0
  53. package/dist/index.js +173 -0
  54. package/dist/pipeline/context.js +162 -0
  55. package/dist/pipeline/events.js +2 -0
  56. package/dist/prerequisites.js +226 -0
  57. package/dist/scripts/rebuild-index.js +53 -0
  58. package/dist/scripts/seed-fixtures.js +290 -0
  59. package/dist/services/activity-segmentation.js +333 -0
  60. package/dist/services/activity-segmentation.test.js +191 -0
  61. package/dist/services/app-normalization.js +212 -0
  62. package/dist/services/cluster-merge.js +69 -0
  63. package/dist/services/clustering.js +237 -0
  64. package/dist/services/debug.js +58 -0
  65. package/dist/services/frame-sampling.js +318 -0
  66. package/dist/services/signal-extraction.js +106 -0
  67. package/dist/services/subject-grouping.js +342 -0
  68. package/dist/services/temporal-alignment.js +99 -0
  69. package/dist/services/vlm-enrichment.js +84 -0
  70. package/dist/services/vlm-service.js +130 -0
  71. package/dist/stats/index.js +3 -0
  72. package/dist/stats/observer.js +65 -0
  73. package/dist/stats/repository.js +36 -0
  74. package/dist/stats/resource-tracker.js +86 -0
  75. package/dist/stats/types.js +1 -0
  76. package/dist/test-classification-prompts.js +181 -0
  77. package/dist/tests/cap.adapter.test.js +75 -0
  78. package/dist/tests/capture.cap.adapter.test.js +69 -0
  79. package/dist/tests/classify-session.test.js +140 -0
  80. package/dist/tests/db/repositories.test.js +243 -0
  81. package/dist/tests/domain/time-range.test.js +31 -0
  82. package/dist/tests/integration.test.js +84 -0
  83. package/dist/tests/intelligence.adapter.test.js +102 -0
  84. package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
  85. package/dist/tests/process-v2.test.js +90 -0
  86. package/dist/tests/services/clustering.test.js +112 -0
  87. package/dist/tests/services/frame-sampling.test.js +152 -0
  88. package/dist/tests/utils/ocr.test.js +76 -0
  89. package/dist/tests/utils/parallel.test.js +57 -0
  90. package/dist/tests/visual-observer.test.js +175 -0
  91. package/dist/utils/id-normalization.js +15 -0
  92. package/dist/utils/index.js +9 -0
  93. package/dist/utils/model-detector.js +154 -0
  94. package/dist/utils/ocr.js +80 -0
  95. package/dist/utils/parallel.js +32 -0
  96. package/migrations/001_initial.sql +109 -0
  97. package/migrations/002_clusters.sql +41 -0
  98. package/migrations/003_observations_vlm_fields.sql +14 -0
  99. package/migrations/004_observations_unique.sql +18 -0
  100. package/migrations/005_processing_stats.sql +29 -0
  101. package/migrations/006_vlm_raw_response.sql +6 -0
  102. package/migrations/007_subjects.sql +23 -0
  103. package/migrations/008_artifacts_recording.sql +6 -0
  104. package/migrations/009_artifact_subjects.sql +10 -0
  105. package/package.json +82 -0
  106. package/prompts/action-items.md +55 -0
  107. package/prompts/blog-draft.md +54 -0
  108. package/prompts/blog-research.md +87 -0
  109. package/prompts/card.md +54 -0
  110. package/prompts/classify-segment.md +38 -0
  111. package/prompts/classify.md +37 -0
  112. package/prompts/code-snippets.md +163 -0
  113. package/prompts/extract-metadata.md +149 -0
  114. package/prompts/notes.md +83 -0
  115. package/prompts/runbook.md +123 -0
  116. package/prompts/standup.md +50 -0
  117. package/prompts/step-by-step.md +125 -0
  118. package/prompts/subject-grouping.md +31 -0
  119. package/prompts/summary-v3.md +89 -0
  120. package/prompts/summary.md +77 -0
  121. package/prompts/topic-classifier.md +24 -0
  122. package/prompts/topic-extract.md +13 -0
  123. package/prompts/vlm-batch.md +21 -0
  124. package/prompts/vlm-single.md +19 -0
@@ -0,0 +1,741 @@
1
+ /**
2
+ * Escribano - Intelligence Adapter (Ollama)
3
+ *
4
+ * Implements IntelligenceService using Ollama REST API
5
+ */
6
+ import { readFileSync } from 'node:fs';
7
+ import { join } from 'node:path';
8
+ import { Agent, fetch as undiciFetch } from 'undici';
9
+ import { z } from 'zod';
10
+ import { classificationSchema, intelligenceConfigSchema, transcriptMetadataSchema, } from '../0_types.js';
11
+ // Debug logging controlled by environment variable
12
+ const DEBUG_OLLAMA = process.env.ESCRIBANO_DEBUG_OLLAMA === 'true';
13
+ // TODO: put in an util
14
+ export function debugLog(...args) {
15
+ if (DEBUG_OLLAMA) {
16
+ console.log('[Ollama]', ...args);
17
+ }
18
+ }
19
+ // Zod schema for VLM batch response validation
20
+ const vlmBatchItemSchema = z.object({
21
+ index: z.number(),
22
+ description: z.string(),
23
+ activity: z.string(),
24
+ apps: z.array(z.string()).default([]),
25
+ topics: z.array(z.string()).default([]),
26
+ });
27
+ const vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
28
+ /**
29
+ * Helper to convert Zod schema to Ollama-compatible JSON schema
30
+ */
31
+ function toOllamaSchema(schema) {
32
+ // biome-ignore lint/suspicious/noExplicitAny: needed for Zod schema conversion
33
+ const jsonSchema = z.toJSONSchema(schema);
34
+ const { $schema, ...rest } = jsonSchema;
35
+ return rest;
36
+ }
37
+ // Model warm state - ensures model is loaded before first real request
38
+ const warmedModels = new Set();
39
+ // Warmup lock - prevents parallel warmup race condition
40
+ const warmupInProgress = new Map();
41
+ export function createOllamaIntelligenceService(config = {}) {
42
+ const parsedConfig = intelligenceConfigSchema.parse(config);
43
+ return {
44
+ classify: (transcript, visualLogs) => classifyWithOllama(transcript, parsedConfig, visualLogs),
45
+ classifySegment: (segment, transcript) => classifySegmentWithOllama(segment, parsedConfig, transcript),
46
+ extractMetadata: (transcript, classification, visualLogs) => extractMetadata(transcript, classification, parsedConfig, visualLogs),
47
+ generate: (artifactType, context) => generateArtifact(artifactType, context, parsedConfig),
48
+ describeImages: (images, options) => describeImagesWithOllama(images, parsedConfig, options),
49
+ embedText: (texts, options) => embedTextWithOllama(texts, parsedConfig, options),
50
+ extractTopics: (observations) => extractTopicsWithOllama(observations, parsedConfig),
51
+ generateText: (prompt, options) => generateTextWithOllama(prompt, parsedConfig, options),
52
+ };
53
+ }
54
+ async function embedTextWithOllama(texts, config, options = {}) {
55
+ const batchSize = options.batchSize ?? 10;
56
+ const model = process.env.ESCRIBANO_EMBED_MODEL || 'nomic-embed-text';
57
+ const endpoint = `${config.endpoint.replace('/chat', '').replace('/generate', '')}/embeddings`;
58
+ const embeddings = [];
59
+ for (let i = 0; i < texts.length; i += batchSize) {
60
+ const batch = texts.slice(i, i + batchSize);
61
+ for (const text of batch) {
62
+ if (!text || text.trim().length === 0) {
63
+ embeddings.push([]); // Empty embedding for empty text
64
+ continue;
65
+ }
66
+ try {
67
+ const response = await fetch(endpoint, {
68
+ method: 'POST',
69
+ headers: { 'Content-Type': 'application/json' },
70
+ body: JSON.stringify({ model, prompt: text }),
71
+ });
72
+ if (!response.ok) {
73
+ console.warn(`Embedding failed for text: ${text.substring(0, 50)}...`);
74
+ embeddings.push([]);
75
+ continue;
76
+ }
77
+ const data = await response.json();
78
+ embeddings.push(data.embedding || []);
79
+ }
80
+ catch (error) {
81
+ console.warn(`Embedding request failed: ${error.message}`);
82
+ embeddings.push([]);
83
+ }
84
+ }
85
+ }
86
+ return embeddings;
87
+ }
88
+ async function ensureModelWarmed(modelName, config) {
89
+ // Already warmed - fast path
90
+ if (warmedModels.has(modelName)) {
91
+ debugLog(`Model ${modelName} already warm`);
92
+ return;
93
+ }
94
+ // Warmup already in progress - wait for it (prevents race condition)
95
+ const existingWarmup = warmupInProgress.get(modelName);
96
+ if (existingWarmup) {
97
+ debugLog(`Waiting for existing warmup of ${modelName}...`);
98
+ return existingWarmup;
99
+ }
100
+ // Start warmup and store the promise
101
+ const warmupPromise = doModelWarmup(modelName, config);
102
+ warmupInProgress.set(modelName, warmupPromise);
103
+ try {
104
+ await warmupPromise;
105
+ }
106
+ finally {
107
+ warmupInProgress.delete(modelName);
108
+ }
109
+ }
110
+ async function doModelWarmup(modelName, config) {
111
+ try {
112
+ console.log(`Warming up model: ${modelName}...`);
113
+ const response = await fetch(`${config.endpoint.replace('/chat', '').replace('/generate', '')}/chat`, {
114
+ method: 'POST',
115
+ headers: { 'Content-Type': 'application/json' },
116
+ body: JSON.stringify({
117
+ model: modelName,
118
+ messages: [],
119
+ keep_alive: config.keepAlive,
120
+ }),
121
+ });
122
+ if (response.ok) {
123
+ warmedModels.add(modelName);
124
+ console.log(`✓ Model ${modelName} loaded and ready.`);
125
+ }
126
+ }
127
+ catch (_error) {
128
+ // In tests, model warming may fail - continue anyway
129
+ // The real request will retry if needed
130
+ console.log(` (Model warmup for ${modelName} skipped or failed, continuing...)`);
131
+ warmedModels.add(modelName); // Mark as warmed to avoid repeated attempts
132
+ }
133
+ }
134
+ async function checkOllamaHealth() {
135
+ try {
136
+ const response = await fetch('http://localhost:11434/api/tags');
137
+ if (!response.ok) {
138
+ throw new Error('Ollama API not accessible');
139
+ }
140
+ const data = await response.json();
141
+ console.log('✓ Ollama is running and accessible');
142
+ console.log(` Available models: ${data.models?.length || 0}`);
143
+ }
144
+ catch (_error) {
145
+ // In tests with mocked fetch, this will fail - just log and continue
146
+ console.log(' (Health check skipped or failed, continuing... )');
147
+ }
148
+ }
149
+ /**
150
+ * Calculate required context window size for the prompt
151
+ * @param promptLength - Length of the prompt string
152
+ * @param maxContextSize - Maximum context size supported by the model
153
+ * @returns Optimal context size (rounded to next power of 2)
154
+ */
155
+ function calculateContextSize(promptLength, maxContextSize) {
156
+ // Rough estimate: ~4 chars per token for English text
157
+ const estimatedTokens = Math.ceil(promptLength / 4);
158
+ // Add buffer for system prompt + response (at least 1024 tokens)
159
+ const totalNeeded = estimatedTokens + 1024;
160
+ // Round up to next power of 2: 4096 → 8192 → 16384 → 32768 → 65536 → 131072
161
+ const contextSizes = [4096, 8192, 16384, 32768, 65536, 131072];
162
+ for (const size of contextSizes) {
163
+ if (size >= totalNeeded) {
164
+ return Math.min(size, maxContextSize);
165
+ }
166
+ }
167
+ return maxContextSize; // Use max if needed
168
+ }
169
+ async function classifyWithOllama(transcript, config, visualLogs) {
170
+ console.log('Classifying transcript with Ollama...');
171
+ const tick = setInterval(() => {
172
+ process.stdout.write('.');
173
+ }, 1000);
174
+ await checkOllamaHealth();
175
+ const prompt = loadClassifyPrompt(transcript, visualLogs);
176
+ const raw = await callOllama(prompt, config, {
177
+ expectJson: true,
178
+ jsonSchema: toOllamaSchema(classificationSchema),
179
+ model: config.model,
180
+ });
181
+ clearInterval(tick);
182
+ console.log('\nClassification completed.');
183
+ return raw;
184
+ }
185
+ async function classifySegmentWithOllama(segment, config, transcript) {
186
+ await checkOllamaHealth();
187
+ const prompt = loadClassifySegmentPrompt(segment, transcript);
188
+ const raw = await callOllama(prompt, config, {
189
+ expectJson: true,
190
+ jsonSchema: toOllamaSchema(classificationSchema),
191
+ model: config.model,
192
+ });
193
+ return raw;
194
+ }
195
+ function loadClassifySegmentPrompt(segment, transcript) {
196
+ const promptPath = join(process.cwd(), 'prompts', 'classify-segment.md');
197
+ let prompt = readFileSync(promptPath, 'utf-8');
198
+ const timeRangeStr = `[${segment.timeRange[0]}s - ${segment.timeRange[1]}s]`;
199
+ const ocrContext = segment.contexts.map((c) => `${c.type}: ${c.value}`).join(', ') || 'None';
200
+ const transcriptText = transcript?.fullText ||
201
+ segment.transcriptSlice?.transcript.fullText ||
202
+ 'N/A';
203
+ prompt = prompt.replace('{{TIME_RANGE}}', timeRangeStr);
204
+ prompt = prompt.replace('{{VISUAL_CONTEXT}}', segment.visualClusterIds.length > 0 ? 'Multiple visual clusters' : 'N/A');
205
+ prompt = prompt.replace('{{OCR_CONTEXT}}', ocrContext);
206
+ prompt = prompt.replace('{{TRANSCRIPT_CONTENT}}', transcriptText);
207
+ prompt = prompt.replace('{{VLM_DESCRIPTION}}', 'N/A'); // Placeholder for future integration
208
+ return prompt;
209
+ }
210
+ function loadClassifyPrompt(transcript, visualLogs) {
211
+ const promptPath = join(process.cwd(), 'prompts', 'classify.md');
212
+ let prompt = readFileSync(promptPath, 'utf-8');
213
+ const segmentsText = transcript.segments
214
+ .map((seg) => `[seg-${seg.id}] [${seg.start}s - ${seg.end}s] ${seg.text}`)
215
+ .join('\n');
216
+ // TODO: Implement robust transcript cleaning (Milestone 4)
217
+ prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
218
+ prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
219
+ if (visualLogs && visualLogs.length > 0) {
220
+ const visualSummary = visualLogs[0].entries
221
+ .map((e, _i) => {
222
+ const timestamp = `[${e.timestamp}s]`;
223
+ const label = e.heuristicLabel ? `[${e.heuristicLabel}]` : '';
224
+ const description = e.description ? `: ${e.description}` : '';
225
+ const ocr = e.ocrSummary
226
+ ? ` (OCR: ${e.ocrSummary.substring(0, 100)})`
227
+ : '';
228
+ return `${timestamp} ${label}${description}${ocr}`;
229
+ })
230
+ .join('\n');
231
+ prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
232
+ }
233
+ else {
234
+ prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
235
+ }
236
+ return prompt;
237
+ }
238
+ /**
239
+ * Build VLM prompt for single image analysis.
240
+ * Loads from prompts/vlm-single.md with fallback to inline.
241
+ */
242
+ function buildVLMSingleImagePrompt() {
243
+ try {
244
+ const promptPath = join(process.cwd(), 'prompts', 'vlm-single.md');
245
+ return readFileSync(promptPath, 'utf-8');
246
+ }
247
+ catch {
248
+ // Fallback inline prompt if file not found
249
+ return `Analyze this screenshot from a screen recording.
250
+
251
+ Provide:
252
+ - description: What's on screen? Be specific about content, text, and UI elements.
253
+ - activity: What is the user doing? (e.g., browsing, coding, reading, debugging)
254
+ - apps: Which applications are visible? (e.g., Chrome, VS Code, Terminal)
255
+ - topics: What topics, projects, or technical subjects? (e.g., Next.js, Bun, cloud services)
256
+
257
+ Output in this exact format:
258
+ description: ... | activity: ... | apps: [...] | topics: [...]`;
259
+ }
260
+ }
261
+ /**
262
+ * Parse single-image VLM response.
263
+ * Returns parsed data or fallback values.
264
+ */
265
+ function parseVLMResponse(content) {
266
+ if (!content || content.trim().length === 0) {
267
+ return { description: '', activity: 'unknown', apps: [], topics: [] };
268
+ }
269
+ const regex = /^description:\s*(.+?)\s*\|\s*activity:\s*(.+?)\s*\|\s*apps:\s*(\[.+?\]|[^|]+)\s*\|\s*topics:\s*(.+)$/s;
270
+ const match = content.match(regex);
271
+ if (match) {
272
+ const appsStr = match[3].replace(/^\[|\]$/g, '').trim();
273
+ const topicsStr = match[4].replace(/^\[|\]$/g, '').trim();
274
+ return {
275
+ description: match[1].trim(),
276
+ activity: match[2].trim(),
277
+ apps: appsStr
278
+ ? appsStr
279
+ .split(',')
280
+ .map((s) => s.trim())
281
+ .filter(Boolean)
282
+ : [],
283
+ topics: topicsStr
284
+ ? topicsStr
285
+ .split(',')
286
+ .map((s) => s.trim())
287
+ .filter(Boolean)
288
+ : [],
289
+ };
290
+ }
291
+ debugLog('[parseVLMResponse] No match, using content as description');
292
+ debugLog('[parseVLMResponse] Raw content:', content.substring(0, 500));
293
+ return {
294
+ description: content.trim(),
295
+ activity: 'unknown',
296
+ apps: [],
297
+ topics: [],
298
+ };
299
+ }
300
+ /**
301
+ * Describe images sequentially (one at a time).
302
+ * Each image gets its own VLM request for accurate image-description mapping.
303
+ */
304
+ async function describeImagesWithOllama(images, config, options = {}) {
305
+ const model = options.model ?? process.env.ESCRIBANO_VLM_MODEL ?? 'qwen3-vl:4b';
306
+ const endpoint = `${config.endpoint.replace('/generate', '').replace('/chat', '')}/chat`;
307
+ const { timeout, keepAlive } = config;
308
+ const numPredict = Number(process.env.ESCRIBANO_VLM_NUM_PREDICT) || 30000;
309
+ const allResults = [];
310
+ const total = images.length;
311
+ console.log(`[VLM] Processing ${total} images sequentially...`);
312
+ console.log(`[VLM] Model: ${model}, num_predict: ${numPredict}`);
313
+ const startTime = Date.now();
314
+ for (let i = 0; i < images.length; i++) {
315
+ const image = images[i];
316
+ const current = i + 1;
317
+ const imageStartTime = Date.now();
318
+ let lastError = null;
319
+ let success = false;
320
+ // 3 retry attempts
321
+ for (let attempt = 1; attempt <= 3 && !success; attempt++) {
322
+ try {
323
+ // Read and encode image
324
+ let base64Image;
325
+ try {
326
+ const buffer = readFileSync(image.imagePath);
327
+ base64Image = buffer.toString('base64');
328
+ }
329
+ catch (readError) {
330
+ throw new Error(`Failed to read image: ${readError.message}`);
331
+ }
332
+ const prompt = buildVLMSingleImagePrompt();
333
+ const controller = new AbortController();
334
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
335
+ // Custom agent with extended headers timeout to prevent UND_ERR_HEADERS_TIMEOUT
336
+ const agent = new Agent({
337
+ headersTimeout: timeout,
338
+ connectTimeout: timeout,
339
+ });
340
+ const response = await undiciFetch(endpoint, {
341
+ method: 'POST',
342
+ headers: { 'Content-Type': 'application/json' },
343
+ dispatcher: agent,
344
+ body: JSON.stringify({
345
+ model,
346
+ messages: [
347
+ {
348
+ role: 'user',
349
+ content: prompt,
350
+ images: [base64Image],
351
+ },
352
+ ],
353
+ stream: false,
354
+ keep_alive: keepAlive,
355
+ options: {
356
+ num_predict: numPredict,
357
+ temperature: 0.3,
358
+ },
359
+ }),
360
+ signal: controller.signal,
361
+ });
362
+ clearTimeout(timeoutId);
363
+ if (!response.ok) {
364
+ throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
365
+ }
366
+ const data = (await response.json());
367
+ debugLog('[VLM] Response data keys:', Object.keys(data).join(', '));
368
+ const content = data.message?.content || data.response || '';
369
+ debugLog('[VLM] Raw content length:', content.length);
370
+ debugLog('[VLM] Raw content preview:', content.substring(0, 500));
371
+ const parsed = parseVLMResponse(content);
372
+ if (parsed.activity === 'unknown' && parsed.description.length === 0) {
373
+ debugLog('[VLM] Parsed as empty/unknown, full response:', content);
374
+ throw new Error('VLM returned empty/unparseable response');
375
+ }
376
+ const result = {
377
+ index: i,
378
+ timestamp: image.timestamp,
379
+ imagePath: image.imagePath,
380
+ activity: parsed.activity,
381
+ description: parsed.description,
382
+ apps: parsed.apps,
383
+ topics: parsed.topics,
384
+ };
385
+ allResults.push(result);
386
+ success = true;
387
+ const duration = Date.now() - imageStartTime;
388
+ // Log every 10 frames
389
+ if (current % 10 === 0) {
390
+ console.log(`[VLM] [${current}/${total}] ✓ (${(duration / 1000).toFixed(1)}s)`);
391
+ }
392
+ // Call callback immediately after each image
393
+ if (options.onImageProcessed) {
394
+ options.onImageProcessed(result, { current, total });
395
+ }
396
+ }
397
+ catch (error) {
398
+ lastError = error;
399
+ if (attempt < 3) {
400
+ debugLog(`[VLM] [${current}/${total}] Attempt ${attempt}/3 failed: ${lastError.message}, retrying...`);
401
+ await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
402
+ }
403
+ }
404
+ }
405
+ if (!success) {
406
+ console.warn(`[VLM] [${current}/${total}] ✗ Failed after 3 attempts: ${lastError?.message}`);
407
+ // Don't save - frame will be re-processed on next run
408
+ }
409
+ }
410
+ const totalDuration = ((Date.now() - startTime) / 1000).toFixed(1);
411
+ const successCount = allResults.length;
412
+ console.log(`\n[VLM] Complete: ${successCount}/${total} frames in ${totalDuration}s`);
413
+ return allResults;
414
+ }
415
+ async function extractTopicsWithOllama(observations, config) {
416
+ const textSamples = observations
417
+ .slice(0, 20)
418
+ .map((o) => {
419
+ if (o.type === 'visual') {
420
+ return o.vlm_description || o.ocr_text?.slice(0, 200) || '';
421
+ }
422
+ return o.text?.slice(0, 500) || '';
423
+ })
424
+ .filter((t) => t.length > 10);
425
+ if (textSamples.length === 0)
426
+ return [];
427
+ let prompt;
428
+ try {
429
+ const promptPath = join(process.cwd(), 'prompts', 'topic-extract.md');
430
+ const template = readFileSync(promptPath, 'utf-8');
431
+ prompt = template.replace('{{OBSERVATIONS}}', textSamples.join('\n---\n'));
432
+ }
433
+ catch {
434
+ // Fallback inline prompt if file not found
435
+ prompt = `Analyze these observations from a screen recording session and generate 1-3 descriptive topic labels.
436
+
437
+ Observations:
438
+ ${textSamples.join('\n---\n')}
439
+
440
+ Output ONLY a JSON object with this format:
441
+ {"topics": ["specific topic 1", "specific topic 2"]}
442
+
443
+ Rules:
444
+ - Be specific: "debugging TypeScript errors" not just "debugging"
445
+ - Be descriptive: "learning React hooks" not just "learning"
446
+ - Focus on what the user is DOING, not just what's visible
447
+ - Max 3 topics`;
448
+ }
449
+ try {
450
+ const result = await callOllama(prompt, config, {
451
+ expectJson: true,
452
+ model: config.model,
453
+ });
454
+ return result.topics || [];
455
+ }
456
+ catch (error) {
457
+ console.warn('Topic extraction failed:', error);
458
+ return [];
459
+ }
460
+ }
461
+ async function generateTextWithOllama(prompt, config, options) {
462
+ const model = options?.model ||
463
+ process.env.ESCRIBANO_LLM_MODEL ||
464
+ config.generationModel ||
465
+ config.model;
466
+ const expectJson = options?.expectJson ?? false;
467
+ try {
468
+ const result = await callOllama(prompt, config, {
469
+ expectJson,
470
+ model,
471
+ num_predict: options?.numPredict,
472
+ think: options?.think,
473
+ });
474
+ // If expectJson, result might be an object - stringify it
475
+ if (expectJson && typeof result === 'object') {
476
+ return JSON.stringify(result, null, 2);
477
+ }
478
+ // Otherwise return as string
479
+ return String(result);
480
+ }
481
+ catch (error) {
482
+ console.error('Text generation failed:', error.message);
483
+ throw error;
484
+ }
485
+ }
486
+ function extractJsonFromThinking(thinking) {
487
+ const jsonCodeBlockRegex = /```json\s*([\s\S]*?)```/g;
488
+ let match = jsonCodeBlockRegex.exec(thinking);
489
+ while (match !== null) {
490
+ try {
491
+ return JSON.parse(match[1].trim());
492
+ }
493
+ catch {
494
+ match = jsonCodeBlockRegex.exec(thinking);
495
+ }
496
+ }
497
+ const jsonObjectRegex = /\{[\s\S]*?"\w+"[\s\S]*?\}/g;
498
+ match = jsonObjectRegex.exec(thinking);
499
+ while (match !== null) {
500
+ try {
501
+ const parsed = JSON.parse(match[0]);
502
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
503
+ return parsed;
504
+ }
505
+ }
506
+ catch {
507
+ match = jsonObjectRegex.exec(thinking);
508
+ continue;
509
+ }
510
+ match = jsonObjectRegex.exec(thinking);
511
+ }
512
+ return null;
513
+ }
514
+ async function callOllama(prompt, config, options
515
+ // biome-ignore lint/suspicious/noExplicitAny: Ollama returns dynamic JSON or strings
516
+ ) {
517
+ const requestId = Math.random().toString(36).substring(2, 8);
518
+ const requestStart = Date.now();
519
+ // Model warm-up (errors handled gracefully, especially in tests)
520
+ try {
521
+ await ensureModelWarmed(options.model, config);
522
+ }
523
+ catch {
524
+ // Continue even if warmup fails - model will load on first request
525
+ }
526
+ const { endpoint, maxRetries, timeout, keepAlive, maxContextSize } = config;
527
+ // Calculate optimal context size for this prompt
528
+ const contextSize = calculateContextSize(prompt.length, maxContextSize);
529
+ debugLog(`[${requestId}] Request started`);
530
+ debugLog(` Model: ${options.model}`);
531
+ debugLog(` Prompt: ${prompt.length} chars (~${Math.ceil(prompt.length / 4)} tokens)`);
532
+ debugLog(` Context: ${contextSize}, Timeout: ${timeout}ms`);
533
+ debugLog(` Thinking: ${options.think ? 'enabled' : 'disabled'}`);
534
+ debugLog(` Expect JSON: ${options.expectJson}`);
535
+ debugLog(` Prompt:\n${prompt}`);
536
+ let lastError = null;
537
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
538
+ const attemptStart = Date.now();
539
+ try {
540
+ const controller = new AbortController();
541
+ const timeoutId = setTimeout(() => controller.abort(), timeout);
542
+ debugLog(`[${requestId}] Attempt ${attempt}/${maxRetries}...`);
543
+ // Custom agent with extended headers timeout to prevent UND_ERR_HEADERS_TIMEOUT
544
+ // when models take a long time to generate the first token (thinking mode)
545
+ const agent = new Agent({
546
+ headersTimeout: timeout,
547
+ connectTimeout: timeout,
548
+ });
549
+ const response = await undiciFetch(endpoint, {
550
+ method: 'POST',
551
+ headers: {
552
+ 'Content-Type': 'application/json',
553
+ },
554
+ dispatcher: agent,
555
+ body: JSON.stringify({
556
+ model: options.model,
557
+ messages: [
558
+ {
559
+ role: 'system',
560
+ content: options.expectJson
561
+ ? 'You are a helpful assistant. Respond only with the requested JSON object, no other text.'
562
+ : 'You are a helpful assistant that generates high-quality markdown documentation.',
563
+ },
564
+ {
565
+ role: 'user',
566
+ content: prompt,
567
+ ...(options.images && { images: options.images }),
568
+ },
569
+ ],
570
+ stream: false,
571
+ keep_alive: keepAlive,
572
+ options: {
573
+ num_ctx: contextSize,
574
+ ...(options.num_predict && { num_predict: options.num_predict }),
575
+ },
576
+ ...(options.expectJson && {
577
+ format: options.jsonSchema ?? 'json',
578
+ }),
579
+ ...(options.format && { format: options.format }),
580
+ ...(options.think !== undefined && { think: options.think }),
581
+ }),
582
+ signal: controller.signal,
583
+ });
584
+ clearTimeout(timeoutId);
585
+ debugLog(`[${requestId}] response`, response);
586
+ if (!response.ok) {
587
+ throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
588
+ }
589
+ const data = (await response.json());
590
+ debugLog(`[${requestId}] Response received in ${Date.now() - attemptStart}ms`, data);
591
+ if (data.eval_count) {
592
+ debugLog(` Tokens: ${data.eval_count} eval, ${data.prompt_eval_count || 0} prompt`);
593
+ }
594
+ debugLog(` Total request time: ${Date.now() - requestStart}ms`);
595
+ if (!data.done || data.done_reason !== 'stop') {
596
+ // Warn about truncation but don't throw - let caller decide
597
+ if (data.done_reason === 'length') {
598
+ console.warn(`[Ollama] Response truncated (done_reason: length). ` +
599
+ `Used ${data.eval_count} tokens. Consider increasing num_predict.`);
600
+ }
601
+ throw new Error(`Incomplete response: done=${data.done}, reason=${data.done_reason}`);
602
+ }
603
+ if (options.expectJson) {
604
+ const content = data.message.content;
605
+ const thinking = data.message.thinking;
606
+ try {
607
+ return JSON.parse(content);
608
+ }
609
+ catch {
610
+ if (thinking) {
611
+ const extracted = extractJsonFromThinking(thinking);
612
+ if (extracted) {
613
+ debugLog(`[${requestId}] Extracted JSON from thinking block`);
614
+ return extracted;
615
+ }
616
+ }
617
+ throw new Error(`Failed to parse JSON response: ${content.slice(0, 100)}`);
618
+ }
619
+ }
620
+ const content = data.message.content;
621
+ const thinking = data.message.thinking;
622
+ if (!content || content.length < 20) {
623
+ if (thinking && thinking.length > content.length) {
624
+ debugLog(`[${requestId}] Using thinking content as fallback (${thinking.length} chars)`);
625
+ return thinking;
626
+ }
627
+ }
628
+ return content;
629
+ }
630
+ catch (error) {
631
+ lastError = error;
632
+ if (error instanceof Error && error.name === 'AbortError') {
633
+ console.error(`[Ollama] [${requestId}] Attempt ${attempt}/${maxRetries}: Request timed out after ${Date.now() - attemptStart}ms, retrying...`);
634
+ debugLog(`[${requestId}] Timeout after ${Date.now() - attemptStart}ms`);
635
+ }
636
+ else {
637
+ const errorMsg = lastError?.message || String(lastError);
638
+ console.error(`[Ollama] [${requestId}] Attempt ${attempt}/${maxRetries}: Request failed: ${errorMsg} (retrying...)`);
639
+ debugLog(`[${requestId}] Error:`, lastError);
640
+ }
641
+ if (attempt < maxRetries) {
642
+ await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
643
+ }
644
+ }
645
+ }
646
+ debugLog(`[${requestId}] Failed after ${maxRetries} retries`);
647
+ console.error(`[Ollama] [${requestId}] All ${maxRetries} attempts failed: ${lastError?.message}`);
648
+ throw new Error(`Request failed after ${maxRetries} retries: ${lastError?.message}`);
649
+ }
650
+ async function extractMetadata(transcript, classification, config, visualLogs) {
651
+ const prompt = loadMetadataPrompt(transcript, classification, visualLogs);
652
+ const raw = await callOllama(prompt, config, {
653
+ expectJson: true,
654
+ jsonSchema: toOllamaSchema(transcriptMetadataSchema),
655
+ model: config.generationModel, // Metadata extraction benefits from larger model
656
+ });
657
+ return raw;
658
+ }
659
+ function loadMetadataPrompt(transcript, classification, visualLogs) {
660
+ const promptPath = join(process.cwd(), 'prompts', 'extract-metadata.md');
661
+ let prompt = readFileSync(promptPath, 'utf-8');
662
+ const classificationSummary = Object.entries(classification)
663
+ .filter(([_, score]) => score >= 25)
664
+ .map(([type, score]) => `${type}: ${score}%`)
665
+ .join(', ');
666
+ const segmentsText = transcript.segments
667
+ .map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
668
+ .join('\n');
669
+ prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
670
+ prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
671
+ // TODO: Implement robust transcript cleaning (Milestone 4)
672
+ prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
673
+ if (visualLogs && visualLogs.length > 0) {
674
+ const visualSummary = visualLogs[0].entries
675
+ .map((e, _i) => {
676
+ const timestamp = `[${e.timestamp}s]`;
677
+ const label = e.heuristicLabel ? `[${e.heuristicLabel}]` : '';
678
+ const description = e.description ? `: ${e.description}` : '';
679
+ const ocr = e.ocrSummary
680
+ ? ` (OCR: ${e.ocrSummary.substring(0, 100)})`
681
+ : '';
682
+ return `${timestamp} ${label}${description}${ocr}`;
683
+ })
684
+ .join('\n');
685
+ prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
686
+ }
687
+ else {
688
+ prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
689
+ }
690
+ return prompt;
691
+ }
692
+ async function generateArtifact(artifactType, context, config) {
693
+ const prompt = loadArtifactPrompt(artifactType, context);
694
+ const response = await callOllama(prompt, config, {
695
+ expectJson: false,
696
+ model: config.generationModel,
697
+ });
698
+ return response;
699
+ }
700
+ function loadArtifactPrompt(artifactType, context) {
701
+ const promptPath = join(process.cwd(), 'prompts', `${artifactType}.md`);
702
+ let prompt = readFileSync(promptPath, 'utf-8');
703
+ // TODO: Implement robust transcript cleaning (Milestone 4)
704
+ prompt = prompt.replace('{{TRANSCRIPT_ALL}}', context.transcript.fullText);
705
+ prompt = prompt.replace('{{LANGUAGE}}', context.transcript.language || 'en');
706
+ const segmentsText = context.transcript.segments
707
+ .map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
708
+ .join('\n');
709
+ prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
710
+ const classificationSummary = Object.entries(context.classification)
711
+ .filter(([_, score]) => score >= 25)
712
+ .map(([type, score]) => `${type}: ${score}%`)
713
+ .join(', ');
714
+ prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
715
+ if (context.visualLogs && context.visualLogs.length > 0) {
716
+ const visualSummary = context.visualLogs[0].entries
717
+ .map((e, _i) => `[Scene ${_i}] at ${e.timestamp}s: ${e.description || 'Action on screen'}`)
718
+ .join('\n');
719
+ prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
720
+ }
721
+ else {
722
+ prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
723
+ }
724
+ if (context.metadata) {
725
+ prompt = prompt.replace('{{METADATA}}', JSON.stringify(context.metadata, null, 2));
726
+ prompt = prompt.replace('{{SPEAKERS}}', JSON.stringify(context.metadata.speakers || [], null, 2));
727
+ prompt = prompt.replace('{{KEY_MOMENTS}}', JSON.stringify(context.metadata.keyMoments || [], null, 2));
728
+ prompt = prompt.replace('{{ACTION_ITEMS}}', JSON.stringify(context.metadata.actionItems || [], null, 2));
729
+ prompt = prompt.replace('{{TECHNICAL_TERMS}}', JSON.stringify(context.metadata.technicalTerms || [], null, 2));
730
+ prompt = prompt.replace('{{CODE_SNIPPETS}}', JSON.stringify(context.metadata.codeSnippets || [], null, 2));
731
+ }
732
+ else {
733
+ prompt = prompt.replace('{{METADATA}}', 'N/A');
734
+ prompt = prompt.replace('{{SPEAKERS}}', 'N/A');
735
+ prompt = prompt.replace('{{KEY_MOMENTS}}', 'N/A');
736
+ prompt = prompt.replace('{{ACTION_ITEMS}}', 'N/A');
737
+ prompt = prompt.replace('{{TECHNICAL_TERMS}}', 'N/A');
738
+ prompt = prompt.replace('{{CODE_SNIPPETS}}', 'N/A');
739
+ }
740
+ return prompt;
741
+ }