escribano 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +297 -0
- package/dist/0_types.js +279 -0
- package/dist/actions/classify-session.js +77 -0
- package/dist/actions/create-contexts.js +44 -0
- package/dist/actions/create-topic-blocks.js +68 -0
- package/dist/actions/extract-metadata.js +24 -0
- package/dist/actions/generate-artifact-v3.js +296 -0
- package/dist/actions/generate-artifact.js +61 -0
- package/dist/actions/generate-summary-v3.js +260 -0
- package/dist/actions/outline-index.js +204 -0
- package/dist/actions/process-recording-v2.js +494 -0
- package/dist/actions/process-recording-v3.js +412 -0
- package/dist/actions/process-session.js +183 -0
- package/dist/actions/publish-summary-v3.js +303 -0
- package/dist/actions/sync-to-outline.js +196 -0
- package/dist/adapters/audio.silero.adapter.js +69 -0
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/capture.cap.adapter.js +107 -0
- package/dist/adapters/capture.filesystem.adapter.js +124 -0
- package/dist/adapters/embedding.ollama.adapter.js +141 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +395 -0
- package/dist/adapters/intelligence.ollama.adapter.js +741 -0
- package/dist/adapters/publishing.outline.adapter.js +75 -0
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/storage.fs.adapter.js +83 -0
- package/dist/adapters/transcription.whisper.adapter.js +206 -0
- package/dist/adapters/video.ffmpeg.adapter.js +405 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +329 -0
- package/dist/db/helpers.js +50 -0
- package/dist/db/index.js +95 -0
- package/dist/db/migrate.js +80 -0
- package/dist/db/repositories/artifact.sqlite.js +77 -0
- package/dist/db/repositories/cluster.sqlite.js +92 -0
- package/dist/db/repositories/context.sqlite.js +75 -0
- package/dist/db/repositories/index.js +10 -0
- package/dist/db/repositories/observation.sqlite.js +70 -0
- package/dist/db/repositories/recording.sqlite.js +56 -0
- package/dist/db/repositories/subject.sqlite.js +64 -0
- package/dist/db/repositories/topic-block.sqlite.js +45 -0
- package/dist/db/types.js +4 -0
- package/dist/domain/classification.js +60 -0
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/domain/recording.js +41 -0
- package/dist/domain/segment.js +93 -0
- package/dist/domain/session.js +93 -0
- package/dist/domain/time-range.js +38 -0
- package/dist/domain/transcript.js +79 -0
- package/dist/index.js +173 -0
- package/dist/pipeline/context.js +162 -0
- package/dist/pipeline/events.js +2 -0
- package/dist/prerequisites.js +226 -0
- package/dist/scripts/rebuild-index.js +53 -0
- package/dist/scripts/seed-fixtures.js +290 -0
- package/dist/services/activity-segmentation.js +333 -0
- package/dist/services/activity-segmentation.test.js +191 -0
- package/dist/services/app-normalization.js +212 -0
- package/dist/services/cluster-merge.js +69 -0
- package/dist/services/clustering.js +237 -0
- package/dist/services/debug.js +58 -0
- package/dist/services/frame-sampling.js +318 -0
- package/dist/services/signal-extraction.js +106 -0
- package/dist/services/subject-grouping.js +342 -0
- package/dist/services/temporal-alignment.js +99 -0
- package/dist/services/vlm-enrichment.js +84 -0
- package/dist/services/vlm-service.js +130 -0
- package/dist/stats/index.js +3 -0
- package/dist/stats/observer.js +65 -0
- package/dist/stats/repository.js +36 -0
- package/dist/stats/resource-tracker.js +86 -0
- package/dist/stats/types.js +1 -0
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/capture.cap.adapter.test.js +69 -0
- package/dist/tests/classify-session.test.js +140 -0
- package/dist/tests/db/repositories.test.js +243 -0
- package/dist/tests/domain/time-range.test.js +31 -0
- package/dist/tests/integration.test.js +84 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.ollama.adapter.test.js +178 -0
- package/dist/tests/process-v2.test.js +90 -0
- package/dist/tests/services/clustering.test.js +112 -0
- package/dist/tests/services/frame-sampling.test.js +152 -0
- package/dist/tests/utils/ocr.test.js +76 -0
- package/dist/tests/utils/parallel.test.js +57 -0
- package/dist/tests/visual-observer.test.js +175 -0
- package/dist/utils/id-normalization.js +15 -0
- package/dist/utils/index.js +9 -0
- package/dist/utils/model-detector.js +154 -0
- package/dist/utils/ocr.js +80 -0
- package/dist/utils/parallel.js +32 -0
- package/migrations/001_initial.sql +109 -0
- package/migrations/002_clusters.sql +41 -0
- package/migrations/003_observations_vlm_fields.sql +14 -0
- package/migrations/004_observations_unique.sql +18 -0
- package/migrations/005_processing_stats.sql +29 -0
- package/migrations/006_vlm_raw_response.sql +6 -0
- package/migrations/007_subjects.sql +23 -0
- package/migrations/008_artifacts_recording.sql +6 -0
- package/migrations/009_artifact_subjects.sql +10 -0
- package/package.json +82 -0
- package/prompts/action-items.md +55 -0
- package/prompts/blog-draft.md +54 -0
- package/prompts/blog-research.md +87 -0
- package/prompts/card.md +54 -0
- package/prompts/classify-segment.md +38 -0
- package/prompts/classify.md +37 -0
- package/prompts/code-snippets.md +163 -0
- package/prompts/extract-metadata.md +149 -0
- package/prompts/notes.md +83 -0
- package/prompts/runbook.md +123 -0
- package/prompts/standup.md +50 -0
- package/prompts/step-by-step.md +125 -0
- package/prompts/subject-grouping.md +31 -0
- package/prompts/summary-v3.md +89 -0
- package/prompts/summary.md +77 -0
- package/prompts/topic-classifier.md +24 -0
- package/prompts/topic-extract.md +13 -0
- package/prompts/vlm-batch.md +21 -0
- package/prompts/vlm-single.md +19 -0
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Intelligence Adapter (Ollama)
|
|
3
|
+
*
|
|
4
|
+
* Implements IntelligenceService using Ollama REST API
|
|
5
|
+
*/
|
|
6
|
+
import { readFileSync } from 'node:fs';
|
|
7
|
+
import { join } from 'node:path';
|
|
8
|
+
import { Agent, fetch as undiciFetch } from 'undici';
|
|
9
|
+
import { z } from 'zod';
|
|
10
|
+
import { classificationSchema, intelligenceConfigSchema, transcriptMetadataSchema, } from '../0_types.js';
|
|
11
|
+
// Debug logging controlled by environment variable
|
|
12
|
+
const DEBUG_OLLAMA = process.env.ESCRIBANO_DEBUG_OLLAMA === 'true';
|
|
13
|
+
// TODO: put in an util
|
|
14
|
+
export function debugLog(...args) {
|
|
15
|
+
if (DEBUG_OLLAMA) {
|
|
16
|
+
console.log('[Ollama]', ...args);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
// Zod schema for VLM batch response validation
|
|
20
|
+
const vlmBatchItemSchema = z.object({
|
|
21
|
+
index: z.number(),
|
|
22
|
+
description: z.string(),
|
|
23
|
+
activity: z.string(),
|
|
24
|
+
apps: z.array(z.string()).default([]),
|
|
25
|
+
topics: z.array(z.string()).default([]),
|
|
26
|
+
});
|
|
27
|
+
const vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
|
|
28
|
+
/**
|
|
29
|
+
* Helper to convert Zod schema to Ollama-compatible JSON schema
|
|
30
|
+
*/
|
|
31
|
+
function toOllamaSchema(schema) {
|
|
32
|
+
// biome-ignore lint/suspicious/noExplicitAny: needed for Zod schema conversion
|
|
33
|
+
const jsonSchema = z.toJSONSchema(schema);
|
|
34
|
+
const { $schema, ...rest } = jsonSchema;
|
|
35
|
+
return rest;
|
|
36
|
+
}
|
|
37
|
+
// Model warm state - ensures model is loaded before first real request
|
|
38
|
+
const warmedModels = new Set();
|
|
39
|
+
// Warmup lock - prevents parallel warmup race condition
|
|
40
|
+
const warmupInProgress = new Map();
|
|
41
|
+
export function createOllamaIntelligenceService(config = {}) {
|
|
42
|
+
const parsedConfig = intelligenceConfigSchema.parse(config);
|
|
43
|
+
return {
|
|
44
|
+
classify: (transcript, visualLogs) => classifyWithOllama(transcript, parsedConfig, visualLogs),
|
|
45
|
+
classifySegment: (segment, transcript) => classifySegmentWithOllama(segment, parsedConfig, transcript),
|
|
46
|
+
extractMetadata: (transcript, classification, visualLogs) => extractMetadata(transcript, classification, parsedConfig, visualLogs),
|
|
47
|
+
generate: (artifactType, context) => generateArtifact(artifactType, context, parsedConfig),
|
|
48
|
+
describeImages: (images, options) => describeImagesWithOllama(images, parsedConfig, options),
|
|
49
|
+
embedText: (texts, options) => embedTextWithOllama(texts, parsedConfig, options),
|
|
50
|
+
extractTopics: (observations) => extractTopicsWithOllama(observations, parsedConfig),
|
|
51
|
+
generateText: (prompt, options) => generateTextWithOllama(prompt, parsedConfig, options),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
async function embedTextWithOllama(texts, config, options = {}) {
|
|
55
|
+
const batchSize = options.batchSize ?? 10;
|
|
56
|
+
const model = process.env.ESCRIBANO_EMBED_MODEL || 'nomic-embed-text';
|
|
57
|
+
const endpoint = `${config.endpoint.replace('/chat', '').replace('/generate', '')}/embeddings`;
|
|
58
|
+
const embeddings = [];
|
|
59
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
60
|
+
const batch = texts.slice(i, i + batchSize);
|
|
61
|
+
for (const text of batch) {
|
|
62
|
+
if (!text || text.trim().length === 0) {
|
|
63
|
+
embeddings.push([]); // Empty embedding for empty text
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
try {
|
|
67
|
+
const response = await fetch(endpoint, {
|
|
68
|
+
method: 'POST',
|
|
69
|
+
headers: { 'Content-Type': 'application/json' },
|
|
70
|
+
body: JSON.stringify({ model, prompt: text }),
|
|
71
|
+
});
|
|
72
|
+
if (!response.ok) {
|
|
73
|
+
console.warn(`Embedding failed for text: ${text.substring(0, 50)}...`);
|
|
74
|
+
embeddings.push([]);
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
const data = await response.json();
|
|
78
|
+
embeddings.push(data.embedding || []);
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
console.warn(`Embedding request failed: ${error.message}`);
|
|
82
|
+
embeddings.push([]);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return embeddings;
|
|
87
|
+
}
|
|
88
|
+
async function ensureModelWarmed(modelName, config) {
|
|
89
|
+
// Already warmed - fast path
|
|
90
|
+
if (warmedModels.has(modelName)) {
|
|
91
|
+
debugLog(`Model ${modelName} already warm`);
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
// Warmup already in progress - wait for it (prevents race condition)
|
|
95
|
+
const existingWarmup = warmupInProgress.get(modelName);
|
|
96
|
+
if (existingWarmup) {
|
|
97
|
+
debugLog(`Waiting for existing warmup of ${modelName}...`);
|
|
98
|
+
return existingWarmup;
|
|
99
|
+
}
|
|
100
|
+
// Start warmup and store the promise
|
|
101
|
+
const warmupPromise = doModelWarmup(modelName, config);
|
|
102
|
+
warmupInProgress.set(modelName, warmupPromise);
|
|
103
|
+
try {
|
|
104
|
+
await warmupPromise;
|
|
105
|
+
}
|
|
106
|
+
finally {
|
|
107
|
+
warmupInProgress.delete(modelName);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
async function doModelWarmup(modelName, config) {
|
|
111
|
+
try {
|
|
112
|
+
console.log(`Warming up model: ${modelName}...`);
|
|
113
|
+
const response = await fetch(`${config.endpoint.replace('/chat', '').replace('/generate', '')}/chat`, {
|
|
114
|
+
method: 'POST',
|
|
115
|
+
headers: { 'Content-Type': 'application/json' },
|
|
116
|
+
body: JSON.stringify({
|
|
117
|
+
model: modelName,
|
|
118
|
+
messages: [],
|
|
119
|
+
keep_alive: config.keepAlive,
|
|
120
|
+
}),
|
|
121
|
+
});
|
|
122
|
+
if (response.ok) {
|
|
123
|
+
warmedModels.add(modelName);
|
|
124
|
+
console.log(`✓ Model ${modelName} loaded and ready.`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (_error) {
|
|
128
|
+
// In tests, model warming may fail - continue anyway
|
|
129
|
+
// The real request will retry if needed
|
|
130
|
+
console.log(` (Model warmup for ${modelName} skipped or failed, continuing...)`);
|
|
131
|
+
warmedModels.add(modelName); // Mark as warmed to avoid repeated attempts
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
async function checkOllamaHealth() {
|
|
135
|
+
try {
|
|
136
|
+
const response = await fetch('http://localhost:11434/api/tags');
|
|
137
|
+
if (!response.ok) {
|
|
138
|
+
throw new Error('Ollama API not accessible');
|
|
139
|
+
}
|
|
140
|
+
const data = await response.json();
|
|
141
|
+
console.log('✓ Ollama is running and accessible');
|
|
142
|
+
console.log(` Available models: ${data.models?.length || 0}`);
|
|
143
|
+
}
|
|
144
|
+
catch (_error) {
|
|
145
|
+
// In tests with mocked fetch, this will fail - just log and continue
|
|
146
|
+
console.log(' (Health check skipped or failed, continuing... )');
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Calculate required context window size for the prompt
|
|
151
|
+
* @param promptLength - Length of the prompt string
|
|
152
|
+
* @param maxContextSize - Maximum context size supported by the model
|
|
153
|
+
* @returns Optimal context size (rounded to next power of 2)
|
|
154
|
+
*/
|
|
155
|
+
function calculateContextSize(promptLength, maxContextSize) {
|
|
156
|
+
// Rough estimate: ~4 chars per token for English text
|
|
157
|
+
const estimatedTokens = Math.ceil(promptLength / 4);
|
|
158
|
+
// Add buffer for system prompt + response (at least 1024 tokens)
|
|
159
|
+
const totalNeeded = estimatedTokens + 1024;
|
|
160
|
+
// Round up to next power of 2: 4096 → 8192 → 16384 → 32768 → 65536 → 131072
|
|
161
|
+
const contextSizes = [4096, 8192, 16384, 32768, 65536, 131072];
|
|
162
|
+
for (const size of contextSizes) {
|
|
163
|
+
if (size >= totalNeeded) {
|
|
164
|
+
return Math.min(size, maxContextSize);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return maxContextSize; // Use max if needed
|
|
168
|
+
}
|
|
169
|
+
async function classifyWithOllama(transcript, config, visualLogs) {
|
|
170
|
+
console.log('Classifying transcript with Ollama...');
|
|
171
|
+
const tick = setInterval(() => {
|
|
172
|
+
process.stdout.write('.');
|
|
173
|
+
}, 1000);
|
|
174
|
+
await checkOllamaHealth();
|
|
175
|
+
const prompt = loadClassifyPrompt(transcript, visualLogs);
|
|
176
|
+
const raw = await callOllama(prompt, config, {
|
|
177
|
+
expectJson: true,
|
|
178
|
+
jsonSchema: toOllamaSchema(classificationSchema),
|
|
179
|
+
model: config.model,
|
|
180
|
+
});
|
|
181
|
+
clearInterval(tick);
|
|
182
|
+
console.log('\nClassification completed.');
|
|
183
|
+
return raw;
|
|
184
|
+
}
|
|
185
|
+
async function classifySegmentWithOllama(segment, config, transcript) {
|
|
186
|
+
await checkOllamaHealth();
|
|
187
|
+
const prompt = loadClassifySegmentPrompt(segment, transcript);
|
|
188
|
+
const raw = await callOllama(prompt, config, {
|
|
189
|
+
expectJson: true,
|
|
190
|
+
jsonSchema: toOllamaSchema(classificationSchema),
|
|
191
|
+
model: config.model,
|
|
192
|
+
});
|
|
193
|
+
return raw;
|
|
194
|
+
}
|
|
195
|
+
function loadClassifySegmentPrompt(segment, transcript) {
|
|
196
|
+
const promptPath = join(process.cwd(), 'prompts', 'classify-segment.md');
|
|
197
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
198
|
+
const timeRangeStr = `[${segment.timeRange[0]}s - ${segment.timeRange[1]}s]`;
|
|
199
|
+
const ocrContext = segment.contexts.map((c) => `${c.type}: ${c.value}`).join(', ') || 'None';
|
|
200
|
+
const transcriptText = transcript?.fullText ||
|
|
201
|
+
segment.transcriptSlice?.transcript.fullText ||
|
|
202
|
+
'N/A';
|
|
203
|
+
prompt = prompt.replace('{{TIME_RANGE}}', timeRangeStr);
|
|
204
|
+
prompt = prompt.replace('{{VISUAL_CONTEXT}}', segment.visualClusterIds.length > 0 ? 'Multiple visual clusters' : 'N/A');
|
|
205
|
+
prompt = prompt.replace('{{OCR_CONTEXT}}', ocrContext);
|
|
206
|
+
prompt = prompt.replace('{{TRANSCRIPT_CONTENT}}', transcriptText);
|
|
207
|
+
prompt = prompt.replace('{{VLM_DESCRIPTION}}', 'N/A'); // Placeholder for future integration
|
|
208
|
+
return prompt;
|
|
209
|
+
}
|
|
210
|
+
function loadClassifyPrompt(transcript, visualLogs) {
|
|
211
|
+
const promptPath = join(process.cwd(), 'prompts', 'classify.md');
|
|
212
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
213
|
+
const segmentsText = transcript.segments
|
|
214
|
+
.map((seg) => `[seg-${seg.id}] [${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
215
|
+
.join('\n');
|
|
216
|
+
// TODO: Implement robust transcript cleaning (Milestone 4)
|
|
217
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
|
|
218
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
219
|
+
if (visualLogs && visualLogs.length > 0) {
|
|
220
|
+
const visualSummary = visualLogs[0].entries
|
|
221
|
+
.map((e, _i) => {
|
|
222
|
+
const timestamp = `[${e.timestamp}s]`;
|
|
223
|
+
const label = e.heuristicLabel ? `[${e.heuristicLabel}]` : '';
|
|
224
|
+
const description = e.description ? `: ${e.description}` : '';
|
|
225
|
+
const ocr = e.ocrSummary
|
|
226
|
+
? ` (OCR: ${e.ocrSummary.substring(0, 100)})`
|
|
227
|
+
: '';
|
|
228
|
+
return `${timestamp} ${label}${description}${ocr}`;
|
|
229
|
+
})
|
|
230
|
+
.join('\n');
|
|
231
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
|
|
232
|
+
}
|
|
233
|
+
else {
|
|
234
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
|
|
235
|
+
}
|
|
236
|
+
return prompt;
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Build VLM prompt for single image analysis.
|
|
240
|
+
* Loads from prompts/vlm-single.md with fallback to inline.
|
|
241
|
+
*/
|
|
242
|
+
function buildVLMSingleImagePrompt() {
|
|
243
|
+
try {
|
|
244
|
+
const promptPath = join(process.cwd(), 'prompts', 'vlm-single.md');
|
|
245
|
+
return readFileSync(promptPath, 'utf-8');
|
|
246
|
+
}
|
|
247
|
+
catch {
|
|
248
|
+
// Fallback inline prompt if file not found
|
|
249
|
+
return `Analyze this screenshot from a screen recording.
|
|
250
|
+
|
|
251
|
+
Provide:
|
|
252
|
+
- description: What's on screen? Be specific about content, text, and UI elements.
|
|
253
|
+
- activity: What is the user doing? (e.g., browsing, coding, reading, debugging)
|
|
254
|
+
- apps: Which applications are visible? (e.g., Chrome, VS Code, Terminal)
|
|
255
|
+
- topics: What topics, projects, or technical subjects? (e.g., Next.js, Bun, cloud services)
|
|
256
|
+
|
|
257
|
+
Output in this exact format:
|
|
258
|
+
description: ... | activity: ... | apps: [...] | topics: [...]`;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Parse single-image VLM response.
|
|
263
|
+
* Returns parsed data or fallback values.
|
|
264
|
+
*/
|
|
265
|
+
function parseVLMResponse(content) {
|
|
266
|
+
if (!content || content.trim().length === 0) {
|
|
267
|
+
return { description: '', activity: 'unknown', apps: [], topics: [] };
|
|
268
|
+
}
|
|
269
|
+
const regex = /^description:\s*(.+?)\s*\|\s*activity:\s*(.+?)\s*\|\s*apps:\s*(\[.+?\]|[^|]+)\s*\|\s*topics:\s*(.+)$/s;
|
|
270
|
+
const match = content.match(regex);
|
|
271
|
+
if (match) {
|
|
272
|
+
const appsStr = match[3].replace(/^\[|\]$/g, '').trim();
|
|
273
|
+
const topicsStr = match[4].replace(/^\[|\]$/g, '').trim();
|
|
274
|
+
return {
|
|
275
|
+
description: match[1].trim(),
|
|
276
|
+
activity: match[2].trim(),
|
|
277
|
+
apps: appsStr
|
|
278
|
+
? appsStr
|
|
279
|
+
.split(',')
|
|
280
|
+
.map((s) => s.trim())
|
|
281
|
+
.filter(Boolean)
|
|
282
|
+
: [],
|
|
283
|
+
topics: topicsStr
|
|
284
|
+
? topicsStr
|
|
285
|
+
.split(',')
|
|
286
|
+
.map((s) => s.trim())
|
|
287
|
+
.filter(Boolean)
|
|
288
|
+
: [],
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
debugLog('[parseVLMResponse] No match, using content as description');
|
|
292
|
+
debugLog('[parseVLMResponse] Raw content:', content.substring(0, 500));
|
|
293
|
+
return {
|
|
294
|
+
description: content.trim(),
|
|
295
|
+
activity: 'unknown',
|
|
296
|
+
apps: [],
|
|
297
|
+
topics: [],
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
/**
|
|
301
|
+
* Describe images sequentially (one at a time).
|
|
302
|
+
* Each image gets its own VLM request for accurate image-description mapping.
|
|
303
|
+
*/
|
|
304
|
+
async function describeImagesWithOllama(images, config, options = {}) {
|
|
305
|
+
const model = options.model ?? process.env.ESCRIBANO_VLM_MODEL ?? 'qwen3-vl:4b';
|
|
306
|
+
const endpoint = `${config.endpoint.replace('/generate', '').replace('/chat', '')}/chat`;
|
|
307
|
+
const { timeout, keepAlive } = config;
|
|
308
|
+
const numPredict = Number(process.env.ESCRIBANO_VLM_NUM_PREDICT) || 30000;
|
|
309
|
+
const allResults = [];
|
|
310
|
+
const total = images.length;
|
|
311
|
+
console.log(`[VLM] Processing ${total} images sequentially...`);
|
|
312
|
+
console.log(`[VLM] Model: ${model}, num_predict: ${numPredict}`);
|
|
313
|
+
const startTime = Date.now();
|
|
314
|
+
for (let i = 0; i < images.length; i++) {
|
|
315
|
+
const image = images[i];
|
|
316
|
+
const current = i + 1;
|
|
317
|
+
const imageStartTime = Date.now();
|
|
318
|
+
let lastError = null;
|
|
319
|
+
let success = false;
|
|
320
|
+
// 3 retry attempts
|
|
321
|
+
for (let attempt = 1; attempt <= 3 && !success; attempt++) {
|
|
322
|
+
try {
|
|
323
|
+
// Read and encode image
|
|
324
|
+
let base64Image;
|
|
325
|
+
try {
|
|
326
|
+
const buffer = readFileSync(image.imagePath);
|
|
327
|
+
base64Image = buffer.toString('base64');
|
|
328
|
+
}
|
|
329
|
+
catch (readError) {
|
|
330
|
+
throw new Error(`Failed to read image: ${readError.message}`);
|
|
331
|
+
}
|
|
332
|
+
const prompt = buildVLMSingleImagePrompt();
|
|
333
|
+
const controller = new AbortController();
|
|
334
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
335
|
+
// Custom agent with extended headers timeout to prevent UND_ERR_HEADERS_TIMEOUT
|
|
336
|
+
const agent = new Agent({
|
|
337
|
+
headersTimeout: timeout,
|
|
338
|
+
connectTimeout: timeout,
|
|
339
|
+
});
|
|
340
|
+
const response = await undiciFetch(endpoint, {
|
|
341
|
+
method: 'POST',
|
|
342
|
+
headers: { 'Content-Type': 'application/json' },
|
|
343
|
+
dispatcher: agent,
|
|
344
|
+
body: JSON.stringify({
|
|
345
|
+
model,
|
|
346
|
+
messages: [
|
|
347
|
+
{
|
|
348
|
+
role: 'user',
|
|
349
|
+
content: prompt,
|
|
350
|
+
images: [base64Image],
|
|
351
|
+
},
|
|
352
|
+
],
|
|
353
|
+
stream: false,
|
|
354
|
+
keep_alive: keepAlive,
|
|
355
|
+
options: {
|
|
356
|
+
num_predict: numPredict,
|
|
357
|
+
temperature: 0.3,
|
|
358
|
+
},
|
|
359
|
+
}),
|
|
360
|
+
signal: controller.signal,
|
|
361
|
+
});
|
|
362
|
+
clearTimeout(timeoutId);
|
|
363
|
+
if (!response.ok) {
|
|
364
|
+
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
365
|
+
}
|
|
366
|
+
const data = (await response.json());
|
|
367
|
+
debugLog('[VLM] Response data keys:', Object.keys(data).join(', '));
|
|
368
|
+
const content = data.message?.content || data.response || '';
|
|
369
|
+
debugLog('[VLM] Raw content length:', content.length);
|
|
370
|
+
debugLog('[VLM] Raw content preview:', content.substring(0, 500));
|
|
371
|
+
const parsed = parseVLMResponse(content);
|
|
372
|
+
if (parsed.activity === 'unknown' && parsed.description.length === 0) {
|
|
373
|
+
debugLog('[VLM] Parsed as empty/unknown, full response:', content);
|
|
374
|
+
throw new Error('VLM returned empty/unparseable response');
|
|
375
|
+
}
|
|
376
|
+
const result = {
|
|
377
|
+
index: i,
|
|
378
|
+
timestamp: image.timestamp,
|
|
379
|
+
imagePath: image.imagePath,
|
|
380
|
+
activity: parsed.activity,
|
|
381
|
+
description: parsed.description,
|
|
382
|
+
apps: parsed.apps,
|
|
383
|
+
topics: parsed.topics,
|
|
384
|
+
};
|
|
385
|
+
allResults.push(result);
|
|
386
|
+
success = true;
|
|
387
|
+
const duration = Date.now() - imageStartTime;
|
|
388
|
+
// Log every 10 frames
|
|
389
|
+
if (current % 10 === 0) {
|
|
390
|
+
console.log(`[VLM] [${current}/${total}] ✓ (${(duration / 1000).toFixed(1)}s)`);
|
|
391
|
+
}
|
|
392
|
+
// Call callback immediately after each image
|
|
393
|
+
if (options.onImageProcessed) {
|
|
394
|
+
options.onImageProcessed(result, { current, total });
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
catch (error) {
|
|
398
|
+
lastError = error;
|
|
399
|
+
if (attempt < 3) {
|
|
400
|
+
debugLog(`[VLM] [${current}/${total}] Attempt ${attempt}/3 failed: ${lastError.message}, retrying...`);
|
|
401
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
if (!success) {
|
|
406
|
+
console.warn(`[VLM] [${current}/${total}] ✗ Failed after 3 attempts: ${lastError?.message}`);
|
|
407
|
+
// Don't save - frame will be re-processed on next run
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
const totalDuration = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
411
|
+
const successCount = allResults.length;
|
|
412
|
+
console.log(`\n[VLM] Complete: ${successCount}/${total} frames in ${totalDuration}s`);
|
|
413
|
+
return allResults;
|
|
414
|
+
}
|
|
415
|
+
async function extractTopicsWithOllama(observations, config) {
|
|
416
|
+
const textSamples = observations
|
|
417
|
+
.slice(0, 20)
|
|
418
|
+
.map((o) => {
|
|
419
|
+
if (o.type === 'visual') {
|
|
420
|
+
return o.vlm_description || o.ocr_text?.slice(0, 200) || '';
|
|
421
|
+
}
|
|
422
|
+
return o.text?.slice(0, 500) || '';
|
|
423
|
+
})
|
|
424
|
+
.filter((t) => t.length > 10);
|
|
425
|
+
if (textSamples.length === 0)
|
|
426
|
+
return [];
|
|
427
|
+
let prompt;
|
|
428
|
+
try {
|
|
429
|
+
const promptPath = join(process.cwd(), 'prompts', 'topic-extract.md');
|
|
430
|
+
const template = readFileSync(promptPath, 'utf-8');
|
|
431
|
+
prompt = template.replace('{{OBSERVATIONS}}', textSamples.join('\n---\n'));
|
|
432
|
+
}
|
|
433
|
+
catch {
|
|
434
|
+
// Fallback inline prompt if file not found
|
|
435
|
+
prompt = `Analyze these observations from a screen recording session and generate 1-3 descriptive topic labels.
|
|
436
|
+
|
|
437
|
+
Observations:
|
|
438
|
+
${textSamples.join('\n---\n')}
|
|
439
|
+
|
|
440
|
+
Output ONLY a JSON object with this format:
|
|
441
|
+
{"topics": ["specific topic 1", "specific topic 2"]}
|
|
442
|
+
|
|
443
|
+
Rules:
|
|
444
|
+
- Be specific: "debugging TypeScript errors" not just "debugging"
|
|
445
|
+
- Be descriptive: "learning React hooks" not just "learning"
|
|
446
|
+
- Focus on what the user is DOING, not just what's visible
|
|
447
|
+
- Max 3 topics`;
|
|
448
|
+
}
|
|
449
|
+
try {
|
|
450
|
+
const result = await callOllama(prompt, config, {
|
|
451
|
+
expectJson: true,
|
|
452
|
+
model: config.model,
|
|
453
|
+
});
|
|
454
|
+
return result.topics || [];
|
|
455
|
+
}
|
|
456
|
+
catch (error) {
|
|
457
|
+
console.warn('Topic extraction failed:', error);
|
|
458
|
+
return [];
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
async function generateTextWithOllama(prompt, config, options) {
|
|
462
|
+
const model = options?.model ||
|
|
463
|
+
process.env.ESCRIBANO_LLM_MODEL ||
|
|
464
|
+
config.generationModel ||
|
|
465
|
+
config.model;
|
|
466
|
+
const expectJson = options?.expectJson ?? false;
|
|
467
|
+
try {
|
|
468
|
+
const result = await callOllama(prompt, config, {
|
|
469
|
+
expectJson,
|
|
470
|
+
model,
|
|
471
|
+
num_predict: options?.numPredict,
|
|
472
|
+
think: options?.think,
|
|
473
|
+
});
|
|
474
|
+
// If expectJson, result might be an object - stringify it
|
|
475
|
+
if (expectJson && typeof result === 'object') {
|
|
476
|
+
return JSON.stringify(result, null, 2);
|
|
477
|
+
}
|
|
478
|
+
// Otherwise return as string
|
|
479
|
+
return String(result);
|
|
480
|
+
}
|
|
481
|
+
catch (error) {
|
|
482
|
+
console.error('Text generation failed:', error.message);
|
|
483
|
+
throw error;
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
function extractJsonFromThinking(thinking) {
|
|
487
|
+
const jsonCodeBlockRegex = /```json\s*([\s\S]*?)```/g;
|
|
488
|
+
let match = jsonCodeBlockRegex.exec(thinking);
|
|
489
|
+
while (match !== null) {
|
|
490
|
+
try {
|
|
491
|
+
return JSON.parse(match[1].trim());
|
|
492
|
+
}
|
|
493
|
+
catch {
|
|
494
|
+
match = jsonCodeBlockRegex.exec(thinking);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
const jsonObjectRegex = /\{[\s\S]*?"\w+"[\s\S]*?\}/g;
|
|
498
|
+
match = jsonObjectRegex.exec(thinking);
|
|
499
|
+
while (match !== null) {
|
|
500
|
+
try {
|
|
501
|
+
const parsed = JSON.parse(match[0]);
|
|
502
|
+
if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
|
|
503
|
+
return parsed;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
catch {
|
|
507
|
+
match = jsonObjectRegex.exec(thinking);
|
|
508
|
+
continue;
|
|
509
|
+
}
|
|
510
|
+
match = jsonObjectRegex.exec(thinking);
|
|
511
|
+
}
|
|
512
|
+
return null;
|
|
513
|
+
}
|
|
514
|
+
async function callOllama(prompt, config, options
|
|
515
|
+
// biome-ignore lint/suspicious/noExplicitAny: Ollama returns dynamic JSON or strings
|
|
516
|
+
) {
|
|
517
|
+
const requestId = Math.random().toString(36).substring(2, 8);
|
|
518
|
+
const requestStart = Date.now();
|
|
519
|
+
// Model warm-up (errors handled gracefully, especially in tests)
|
|
520
|
+
try {
|
|
521
|
+
await ensureModelWarmed(options.model, config);
|
|
522
|
+
}
|
|
523
|
+
catch {
|
|
524
|
+
// Continue even if warmup fails - model will load on first request
|
|
525
|
+
}
|
|
526
|
+
const { endpoint, maxRetries, timeout, keepAlive, maxContextSize } = config;
|
|
527
|
+
// Calculate optimal context size for this prompt
|
|
528
|
+
const contextSize = calculateContextSize(prompt.length, maxContextSize);
|
|
529
|
+
debugLog(`[${requestId}] Request started`);
|
|
530
|
+
debugLog(` Model: ${options.model}`);
|
|
531
|
+
debugLog(` Prompt: ${prompt.length} chars (~${Math.ceil(prompt.length / 4)} tokens)`);
|
|
532
|
+
debugLog(` Context: ${contextSize}, Timeout: ${timeout}ms`);
|
|
533
|
+
debugLog(` Thinking: ${options.think ? 'enabled' : 'disabled'}`);
|
|
534
|
+
debugLog(` Expect JSON: ${options.expectJson}`);
|
|
535
|
+
debugLog(` Prompt:\n${prompt}`);
|
|
536
|
+
let lastError = null;
|
|
537
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
538
|
+
const attemptStart = Date.now();
|
|
539
|
+
try {
|
|
540
|
+
const controller = new AbortController();
|
|
541
|
+
const timeoutId = setTimeout(() => controller.abort(), timeout);
|
|
542
|
+
debugLog(`[${requestId}] Attempt ${attempt}/${maxRetries}...`);
|
|
543
|
+
// Custom agent with extended headers timeout to prevent UND_ERR_HEADERS_TIMEOUT
|
|
544
|
+
// when models take a long time to generate the first token (thinking mode)
|
|
545
|
+
const agent = new Agent({
|
|
546
|
+
headersTimeout: timeout,
|
|
547
|
+
connectTimeout: timeout,
|
|
548
|
+
});
|
|
549
|
+
const response = await undiciFetch(endpoint, {
|
|
550
|
+
method: 'POST',
|
|
551
|
+
headers: {
|
|
552
|
+
'Content-Type': 'application/json',
|
|
553
|
+
},
|
|
554
|
+
dispatcher: agent,
|
|
555
|
+
body: JSON.stringify({
|
|
556
|
+
model: options.model,
|
|
557
|
+
messages: [
|
|
558
|
+
{
|
|
559
|
+
role: 'system',
|
|
560
|
+
content: options.expectJson
|
|
561
|
+
? 'You are a helpful assistant. Respond only with the requested JSON object, no other text.'
|
|
562
|
+
: 'You are a helpful assistant that generates high-quality markdown documentation.',
|
|
563
|
+
},
|
|
564
|
+
{
|
|
565
|
+
role: 'user',
|
|
566
|
+
content: prompt,
|
|
567
|
+
...(options.images && { images: options.images }),
|
|
568
|
+
},
|
|
569
|
+
],
|
|
570
|
+
stream: false,
|
|
571
|
+
keep_alive: keepAlive,
|
|
572
|
+
options: {
|
|
573
|
+
num_ctx: contextSize,
|
|
574
|
+
...(options.num_predict && { num_predict: options.num_predict }),
|
|
575
|
+
},
|
|
576
|
+
...(options.expectJson && {
|
|
577
|
+
format: options.jsonSchema ?? 'json',
|
|
578
|
+
}),
|
|
579
|
+
...(options.format && { format: options.format }),
|
|
580
|
+
...(options.think !== undefined && { think: options.think }),
|
|
581
|
+
}),
|
|
582
|
+
signal: controller.signal,
|
|
583
|
+
});
|
|
584
|
+
clearTimeout(timeoutId);
|
|
585
|
+
debugLog(`[${requestId}] response`, response);
|
|
586
|
+
if (!response.ok) {
|
|
587
|
+
throw new Error(`Ollama API error: ${response.status} ${response.statusText}`);
|
|
588
|
+
}
|
|
589
|
+
const data = (await response.json());
|
|
590
|
+
debugLog(`[${requestId}] Response received in ${Date.now() - attemptStart}ms`, data);
|
|
591
|
+
if (data.eval_count) {
|
|
592
|
+
debugLog(` Tokens: ${data.eval_count} eval, ${data.prompt_eval_count || 0} prompt`);
|
|
593
|
+
}
|
|
594
|
+
debugLog(` Total request time: ${Date.now() - requestStart}ms`);
|
|
595
|
+
if (!data.done || data.done_reason !== 'stop') {
|
|
596
|
+
// Warn about truncation but don't throw - let caller decide
|
|
597
|
+
if (data.done_reason === 'length') {
|
|
598
|
+
console.warn(`[Ollama] Response truncated (done_reason: length). ` +
|
|
599
|
+
`Used ${data.eval_count} tokens. Consider increasing num_predict.`);
|
|
600
|
+
}
|
|
601
|
+
throw new Error(`Incomplete response: done=${data.done}, reason=${data.done_reason}`);
|
|
602
|
+
}
|
|
603
|
+
if (options.expectJson) {
|
|
604
|
+
const content = data.message.content;
|
|
605
|
+
const thinking = data.message.thinking;
|
|
606
|
+
try {
|
|
607
|
+
return JSON.parse(content);
|
|
608
|
+
}
|
|
609
|
+
catch {
|
|
610
|
+
if (thinking) {
|
|
611
|
+
const extracted = extractJsonFromThinking(thinking);
|
|
612
|
+
if (extracted) {
|
|
613
|
+
debugLog(`[${requestId}] Extracted JSON from thinking block`);
|
|
614
|
+
return extracted;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
throw new Error(`Failed to parse JSON response: ${content.slice(0, 100)}`);
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
const content = data.message.content;
|
|
621
|
+
const thinking = data.message.thinking;
|
|
622
|
+
if (!content || content.length < 20) {
|
|
623
|
+
if (thinking && thinking.length > content.length) {
|
|
624
|
+
debugLog(`[${requestId}] Using thinking content as fallback (${thinking.length} chars)`);
|
|
625
|
+
return thinking;
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
return content;
|
|
629
|
+
}
|
|
630
|
+
catch (error) {
|
|
631
|
+
lastError = error;
|
|
632
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
633
|
+
console.error(`[Ollama] [${requestId}] Attempt ${attempt}/${maxRetries}: Request timed out after ${Date.now() - attemptStart}ms, retrying...`);
|
|
634
|
+
debugLog(`[${requestId}] Timeout after ${Date.now() - attemptStart}ms`);
|
|
635
|
+
}
|
|
636
|
+
else {
|
|
637
|
+
const errorMsg = lastError?.message || String(lastError);
|
|
638
|
+
console.error(`[Ollama] [${requestId}] Attempt ${attempt}/${maxRetries}: Request failed: ${errorMsg} (retrying...)`);
|
|
639
|
+
debugLog(`[${requestId}] Error:`, lastError);
|
|
640
|
+
}
|
|
641
|
+
if (attempt < maxRetries) {
|
|
642
|
+
await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
debugLog(`[${requestId}] Failed after ${maxRetries} retries`);
|
|
647
|
+
console.error(`[Ollama] [${requestId}] All ${maxRetries} attempts failed: ${lastError?.message}`);
|
|
648
|
+
throw new Error(`Request failed after ${maxRetries} retries: ${lastError?.message}`);
|
|
649
|
+
}
|
|
650
|
+
async function extractMetadata(transcript, classification, config, visualLogs) {
|
|
651
|
+
const prompt = loadMetadataPrompt(transcript, classification, visualLogs);
|
|
652
|
+
const raw = await callOllama(prompt, config, {
|
|
653
|
+
expectJson: true,
|
|
654
|
+
jsonSchema: toOllamaSchema(transcriptMetadataSchema),
|
|
655
|
+
model: config.generationModel, // Metadata extraction benefits from larger model
|
|
656
|
+
});
|
|
657
|
+
return raw;
|
|
658
|
+
}
|
|
659
|
+
function loadMetadataPrompt(transcript, classification, visualLogs) {
|
|
660
|
+
const promptPath = join(process.cwd(), 'prompts', 'extract-metadata.md');
|
|
661
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
662
|
+
const classificationSummary = Object.entries(classification)
|
|
663
|
+
.filter(([_, score]) => score >= 25)
|
|
664
|
+
.map(([type, score]) => `${type}: ${score}%`)
|
|
665
|
+
.join(', ');
|
|
666
|
+
const segmentsText = transcript.segments
|
|
667
|
+
.map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
668
|
+
.join('\n');
|
|
669
|
+
prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
|
|
670
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
671
|
+
// TODO: Implement robust transcript cleaning (Milestone 4)
|
|
672
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', transcript.fullText);
|
|
673
|
+
if (visualLogs && visualLogs.length > 0) {
|
|
674
|
+
const visualSummary = visualLogs[0].entries
|
|
675
|
+
.map((e, _i) => {
|
|
676
|
+
const timestamp = `[${e.timestamp}s]`;
|
|
677
|
+
const label = e.heuristicLabel ? `[${e.heuristicLabel}]` : '';
|
|
678
|
+
const description = e.description ? `: ${e.description}` : '';
|
|
679
|
+
const ocr = e.ocrSummary
|
|
680
|
+
? ` (OCR: ${e.ocrSummary.substring(0, 100)})`
|
|
681
|
+
: '';
|
|
682
|
+
return `${timestamp} ${label}${description}${ocr}`;
|
|
683
|
+
})
|
|
684
|
+
.join('\n');
|
|
685
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
|
|
686
|
+
}
|
|
687
|
+
else {
|
|
688
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
|
|
689
|
+
}
|
|
690
|
+
return prompt;
|
|
691
|
+
}
|
|
692
|
+
async function generateArtifact(artifactType, context, config) {
|
|
693
|
+
const prompt = loadArtifactPrompt(artifactType, context);
|
|
694
|
+
const response = await callOllama(prompt, config, {
|
|
695
|
+
expectJson: false,
|
|
696
|
+
model: config.generationModel,
|
|
697
|
+
});
|
|
698
|
+
return response;
|
|
699
|
+
}
|
|
700
|
+
function loadArtifactPrompt(artifactType, context) {
|
|
701
|
+
const promptPath = join(process.cwd(), 'prompts', `${artifactType}.md`);
|
|
702
|
+
let prompt = readFileSync(promptPath, 'utf-8');
|
|
703
|
+
// TODO: Implement robust transcript cleaning (Milestone 4)
|
|
704
|
+
prompt = prompt.replace('{{TRANSCRIPT_ALL}}', context.transcript.fullText);
|
|
705
|
+
prompt = prompt.replace('{{LANGUAGE}}', context.transcript.language || 'en');
|
|
706
|
+
const segmentsText = context.transcript.segments
|
|
707
|
+
.map((seg) => `[${seg.start}s - ${seg.end}s] ${seg.text}`)
|
|
708
|
+
.join('\n');
|
|
709
|
+
prompt = prompt.replace('{{TRANSCRIPT_SEGMENTS}}', segmentsText);
|
|
710
|
+
const classificationSummary = Object.entries(context.classification)
|
|
711
|
+
.filter(([_, score]) => score >= 25)
|
|
712
|
+
.map(([type, score]) => `${type}: ${score}%`)
|
|
713
|
+
.join(', ');
|
|
714
|
+
prompt = prompt.replace('{{CLASSIFICATION_SUMMARY}}', classificationSummary);
|
|
715
|
+
if (context.visualLogs && context.visualLogs.length > 0) {
|
|
716
|
+
const visualSummary = context.visualLogs[0].entries
|
|
717
|
+
.map((e, _i) => `[Scene ${_i}] at ${e.timestamp}s: ${e.description || 'Action on screen'}`)
|
|
718
|
+
.join('\n');
|
|
719
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', visualSummary);
|
|
720
|
+
}
|
|
721
|
+
else {
|
|
722
|
+
prompt = prompt.replace('{{VISUAL_LOG}}', 'N/A');
|
|
723
|
+
}
|
|
724
|
+
if (context.metadata) {
|
|
725
|
+
prompt = prompt.replace('{{METADATA}}', JSON.stringify(context.metadata, null, 2));
|
|
726
|
+
prompt = prompt.replace('{{SPEAKERS}}', JSON.stringify(context.metadata.speakers || [], null, 2));
|
|
727
|
+
prompt = prompt.replace('{{KEY_MOMENTS}}', JSON.stringify(context.metadata.keyMoments || [], null, 2));
|
|
728
|
+
prompt = prompt.replace('{{ACTION_ITEMS}}', JSON.stringify(context.metadata.actionItems || [], null, 2));
|
|
729
|
+
prompt = prompt.replace('{{TECHNICAL_TERMS}}', JSON.stringify(context.metadata.technicalTerms || [], null, 2));
|
|
730
|
+
prompt = prompt.replace('{{CODE_SNIPPETS}}', JSON.stringify(context.metadata.codeSnippets || [], null, 2));
|
|
731
|
+
}
|
|
732
|
+
else {
|
|
733
|
+
prompt = prompt.replace('{{METADATA}}', 'N/A');
|
|
734
|
+
prompt = prompt.replace('{{SPEAKERS}}', 'N/A');
|
|
735
|
+
prompt = prompt.replace('{{KEY_MOMENTS}}', 'N/A');
|
|
736
|
+
prompt = prompt.replace('{{ACTION_ITEMS}}', 'N/A');
|
|
737
|
+
prompt = prompt.replace('{{TECHNICAL_TERMS}}', 'N/A');
|
|
738
|
+
prompt = prompt.replace('{{CODE_SNIPPETS}}', 'N/A');
|
|
739
|
+
}
|
|
740
|
+
return prompt;
|
|
741
|
+
}
|