escribano 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Escribano - Storage Adapter
3
+ *
4
+ * Saves and loads sessions from filesystem
5
+ */
6
+ import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
7
+ import os from 'node:os';
8
+ import { join } from 'node:path';
9
+ const SESSIONS_DIR = join(os.homedir(), '.escribano', 'sessions');
10
+ export function createStorageService() {
11
+ return {
12
+ saveSession,
13
+ loadSession,
14
+ listSessions,
15
+ saveArtifact,
16
+ loadArtifacts,
17
+ };
18
+ }
19
+ async function ensureSessionsDir() {
20
+ await mkdir(SESSIONS_DIR, { recursive: true });
21
+ }
22
+ async function saveSession(session) {
23
+ await ensureSessionsDir();
24
+ const sessionPath = join(SESSIONS_DIR, `${session.id}.json`);
25
+ await writeFile(sessionPath, JSON.stringify(session, null, 2), 'utf-8');
26
+ }
27
+ async function loadSession(sessionId) {
28
+ await ensureSessionsDir();
29
+ const sessionPath = join(SESSIONS_DIR, `${sessionId}.json`);
30
+ try {
31
+ const content = await readFile(sessionPath, 'utf-8');
32
+ return JSON.parse(content);
33
+ }
34
+ catch {
35
+ return null;
36
+ }
37
+ }
38
+ async function listSessions() {
39
+ await ensureSessionsDir();
40
+ const files = await readdir(SESSIONS_DIR);
41
+ const jsonFiles = files.filter((file) => file.endsWith('.json'));
42
+ const sessions = [];
43
+ for (const file of jsonFiles) {
44
+ const content = await readFile(join(SESSIONS_DIR, file), 'utf-8');
45
+ sessions.push(JSON.parse(content));
46
+ }
47
+ return sessions;
48
+ }
49
+ async function saveArtifact(sessionId, artifact) {
50
+ const artifactsDir = join(SESSIONS_DIR, sessionId, 'artifacts');
51
+ await mkdir(artifactsDir, { recursive: true });
52
+ const timestamp = new Date().toISOString().replace(/:/g, '-').split('.')[0];
53
+ const filename = `${artifact.type}-${timestamp}.${artifact.format}`;
54
+ const artifactPath = join(artifactsDir, filename);
55
+ await writeFile(artifactPath, artifact.content, 'utf-8');
56
+ }
57
+ async function loadArtifacts(sessionId) {
58
+ const artifactsDir = join(SESSIONS_DIR, sessionId, 'artifacts');
59
+ try {
60
+ const files = await readdir(artifactsDir);
61
+ const artifacts = [];
62
+ for (const file of files) {
63
+ const content = await readFile(join(artifactsDir, file), 'utf-8');
64
+ const match = file.match(/^(\w+)-(.+)\.md$/);
65
+ if (!match)
66
+ continue;
67
+ const [, type] = match;
68
+ artifacts.push({
69
+ id: `${sessionId}-${file.replace('.md', '')}`,
70
+ type: type,
71
+ content,
72
+ format: 'markdown',
73
+ createdAt: new Date(),
74
+ });
75
+ }
76
+ return artifacts;
77
+ }
78
+ catch {
79
+ return [];
80
+ }
81
+ }
@@ -0,0 +1,168 @@
1
+ /**
2
+ * Whisper Adapter
3
+ *
4
+ * Transcribes audio using whisper.cpp or OpenAI's whisper CLI.
5
+ * Shells out to the whisper binary for simplicity.
6
+ *
7
+ * Prerequisites:
8
+ * - whisper.cpp installed: brew install whisper-cpp
9
+ * - ffmpeg installed: brew install ffmpeg (for audio format conversion)
10
+ * - Or Python whisper: pip install openai-whisper
11
+ */
12
+ import { exec } from 'node:child_process';
13
+ import { readFile, unlink } from 'node:fs/promises';
14
+ import { promisify } from 'node:util';
15
+ const execAsync = promisify(exec);
16
+ async function convertToWavIfNeeded(audioPath) {
17
+ const ext = audioPath.toLowerCase().split('.').pop();
18
+ if (['wav', 'flac', 'mp3'].includes(ext || '')) {
19
+ return audioPath;
20
+ }
21
+ const outputPath = `${audioPath}.converted.wav`;
22
+ try {
23
+ console.log(`Converting ${audioPath} to WAV format...`);
24
+ await execAsync(`ffmpeg -i "${audioPath}" -f wav -ar 16000 -ac 1 "${outputPath}" -y`, { timeout: 10 * 60 * 1000 });
25
+ console.log(`Conversion complete: ${outputPath}`);
26
+ return outputPath;
27
+ }
28
+ catch (error) {
29
+ console.error(`Audio conversion failed for ${audioPath}`);
30
+ throw new Error(`Failed to convert audio to WAV: ${error.message}`);
31
+ }
32
+ }
33
+ /**
34
+ * Creates a TranscriptionService that uses whisper CLI
35
+ */
36
+ export function createWhisperTranscriber(config = {}) {
37
+ const resolvedConfig = {
38
+ binaryPath: config.binaryPath ?? 'whisper-cpp',
39
+ model: config.model ?? 'base',
40
+ outputFormat: config.outputFormat ?? 'json',
41
+ language: config.language,
42
+ };
43
+ return {
44
+ transcribe: (audioPath) => transcribeWithWhisper(audioPath, resolvedConfig),
45
+ };
46
+ }
47
+ /**
48
+ * Transcribe audio file using whisper CLI
49
+ */
50
+ async function transcribeWithWhisper(audioPath, config) {
51
+ const audioToProcess = await convertToWavIfNeeded(audioPath);
52
+ const args = [
53
+ `-m ${config.model}`,
54
+ `-f "${audioToProcess}"`,
55
+ '-oj', // Output JSON
56
+ config.language ? `-l ${config.language}` : '',
57
+ ].filter(Boolean);
58
+ const command = `${config.binaryPath} ${args.join(' ')}`;
59
+ try {
60
+ const { stdout, stderr } = await execAsync(command, {
61
+ cwd: config.cwd,
62
+ maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large transcripts
63
+ timeout: 10 * 60 * 1000, // 10 minute timeout
64
+ });
65
+ const hasError = stderr.includes('error:') ||
66
+ stderr.includes('Error:') ||
67
+ stderr.includes('failed to');
68
+ if (hasError) {
69
+ if (audioToProcess !== audioPath) {
70
+ await unlink(audioToProcess).catch(() => { });
71
+ }
72
+ throw new Error(`Whisper transcription failed:\n${stderr}`);
73
+ }
74
+ // whisper-cpp outputs JSON to a file named <input>.json
75
+ const jsonOutputPath = `${audioToProcess}.json`;
76
+ try {
77
+ const jsonContent = await readFile(jsonOutputPath, 'utf-8');
78
+ const whisperOutput = JSON.parse(jsonContent);
79
+ // Clean up the temp JSON file and converted audio
80
+ await unlink(jsonOutputPath).catch(() => { });
81
+ if (audioToProcess !== audioPath) {
82
+ await unlink(audioToProcess).catch(() => { });
83
+ }
84
+ return parseWhisperOutput(whisperOutput);
85
+ }
86
+ catch {
87
+ // Fallback: try to parse stdout as the transcript
88
+ return parseWhisperStdout(stdout);
89
+ }
90
+ }
91
+ catch (error) {
92
+ if (audioToProcess && audioToProcess !== audioPath) {
93
+ await unlink(audioToProcess).catch(() => { });
94
+ }
95
+ throw new Error(`Whisper transcription failed: ${error.message}`);
96
+ }
97
+ }
98
+ /**
99
+ * Parse whisper.cpp JSON output into our Transcript format
100
+ */
101
+ function parseWhisperOutput(output) {
102
+ const segments = output.transcription.map((seg, index) => ({
103
+ id: `seg-${index}`,
104
+ start: seg.offsets.from / 1000, // Convert ms to seconds
105
+ end: seg.offsets.to / 1000,
106
+ text: seg.text.trim(),
107
+ speaker: null,
108
+ }));
109
+ const fullText = segments.map((s) => s.text).join(' ');
110
+ const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
111
+ return {
112
+ fullText,
113
+ segments,
114
+ language: 'en', // whisper.cpp doesn't always report language in JSON
115
+ duration,
116
+ };
117
+ }
118
+ /**
119
+ * Fallback: parse whisper stdout (plain text with timestamps)
120
+ */
121
+ function parseWhisperStdout(stdout) {
122
+ // Example format: "[00:00:00.000 --> 00:00:05.000] Hello world"
123
+ const lines = stdout.split('\n').filter((l) => l.trim());
124
+ const segments = [];
125
+ const timestampRegex = /\[(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\]\s*(.*)/;
126
+ for (const line of lines) {
127
+ const match = line.match(timestampRegex);
128
+ if (match) {
129
+ const [, startStr, endStr, text] = match;
130
+ segments.push({
131
+ id: `seg-${segments.length}`,
132
+ start: parseTimestamp(startStr),
133
+ end: parseTimestamp(endStr),
134
+ text: text.trim(),
135
+ speaker: null,
136
+ });
137
+ }
138
+ }
139
+ // If no timestamps found, treat entire output as single segment
140
+ if (segments.length === 0 && stdout.trim()) {
141
+ segments.push({
142
+ id: 'seg-0',
143
+ start: 0,
144
+ end: 0,
145
+ text: stdout.trim(),
146
+ speaker: null,
147
+ });
148
+ }
149
+ const fullText = segments.map((s) => s.text).join(' ');
150
+ const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
151
+ return {
152
+ fullText,
153
+ segments,
154
+ language: 'en',
155
+ duration,
156
+ };
157
+ }
158
+ /**
159
+ * Parse timestamp string "00:00:00.000" to seconds
160
+ */
161
+ function parseTimestamp(timestamp) {
162
+ const [hours, minutes, rest] = timestamp.split(':');
163
+ const [seconds, ms] = rest.split('.');
164
+ return (parseInt(hours, 10) * 3600 +
165
+ parseInt(minutes, 10) * 60 +
166
+ parseInt(seconds, 10) +
167
+ parseInt(ms, 10) / 1000);
168
+ }
@@ -29,7 +29,7 @@ import { createDefaultConfig, loadConfig, logConfig } from './config.js';
29
29
  import { getDbPath, getRepositories } from './db/index.js';
30
30
  import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
31
31
  import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
32
- import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
32
+ import { formatModelSelection, selectBestLLMModel, selectBestMLXModel, } from './utils/model-detector.js';
33
33
  const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
34
34
  const MODEL_FILE = 'ggml-large-v3.bin';
35
35
  const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
@@ -50,15 +50,24 @@ export async function initializeSystem() {
50
50
  console.log('');
51
51
  // Setup stats observer to capture pipeline events
52
52
  setupStatsObserver(repos.stats);
53
- // Detect best LLM model
54
- const modelSelection = await selectBestLLMModel();
55
- console.log(formatModelSelection(modelSelection));
56
- console.log('');
57
- // Initialize adapters ONCE (config is now used by adapters)
58
- console.log('[VLM] Using MLX-VLM for image processing');
59
- const vlm = createMlxIntelligenceService(config);
60
- console.log('[LLM] Using Ollama for text generation');
61
- const llm = createOllamaIntelligenceService(config);
53
+ // Detect best LLM model based on configured backend
54
+ let llm;
55
+ let mlxService = null;
56
+ if (config.llmBackend === 'mlx') {
57
+ console.log('[LLM] Using MLX for text generation');
58
+ const mlxModelSelection = await selectBestMLXModel();
59
+ console.log(formatModelSelection(mlxModelSelection));
60
+ console.log('');
61
+ mlxService = createMlxIntelligenceService();
62
+ llm = mlxService;
63
+ }
64
+ else {
65
+ console.log('[LLM] Using Ollama for text generation');
66
+ const ollamaModelSelection = await selectBestLLMModel();
67
+ console.log(formatModelSelection(ollamaModelSelection));
68
+ console.log('');
69
+ llm = createOllamaIntelligenceService();
70
+ }
62
71
  const video = createFfmpegVideoService();
63
72
  const preprocessor = createSileroPreprocessor();
64
73
  const transcription = createWhisperTranscriptionService({
@@ -67,32 +76,42 @@ export async function initializeSystem() {
67
76
  cwd: MODELS_DIR,
68
77
  outputFormat: 'json',
69
78
  });
70
- // Setup resource tracking
71
79
  const resourceTracker = new ResourceTracker();
72
- resourceTracker.register(vlm);
73
80
  resourceTracker.register(video);
74
81
  resourceTracker.register(preprocessor);
75
- // Ollama runs as a daemon - special case
76
- resourceTracker.register({
77
- getResourceName: () => 'ollama',
78
- getPid: () => {
79
- try {
80
- const output = execSync('pgrep -f "ollama serve"').toString().trim();
81
- const pid = parseInt(output.split('\n')[0] ?? '0', 10);
82
- return pid > 0 ? pid : null;
83
- }
84
- catch {
85
- return null;
86
- }
87
- },
88
- });
82
+ if (config.llmBackend === 'ollama') {
83
+ resourceTracker.register({
84
+ getResourceName: () => 'ollama',
85
+ getPid: () => {
86
+ try {
87
+ const output = execSync('pgrep -f "ollama serve"').toString().trim();
88
+ const pid = parseInt(output.split('\n')[0] ?? '0', 10);
89
+ return pid > 0 ? pid : null;
90
+ }
91
+ catch {
92
+ return null;
93
+ }
94
+ },
95
+ });
96
+ }
97
+ else if (mlxService) {
98
+ resourceTracker.register(mlxService);
99
+ }
89
100
  setResourceTracker(resourceTracker);
90
101
  const outlineConfig = getOutlineConfig();
91
102
  return {
92
103
  repos,
93
- adapters: { vlm, llm, video, preprocessor, transcription },
104
+ adapters: {
105
+ vlm: null,
106
+ llm,
107
+ video,
108
+ preprocessor,
109
+ transcription,
110
+ },
94
111
  resourceTracker,
95
112
  outlineConfig,
113
+ config,
114
+ llmBackend: config.llmBackend,
96
115
  };
97
116
  }
98
117
  /**
@@ -105,7 +124,7 @@ export async function processVideo(videoPath, ctx, options = {}) {
105
124
  const startTime = Date.now();
106
125
  const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
107
126
  const { repos, adapters, outlineConfig } = ctx;
108
- const { vlm, llm, video, preprocessor, transcription } = adapters;
127
+ const { llm, video, preprocessor, transcription } = adapters;
109
128
  // Load unified config for lifecycle management
110
129
  const config = loadConfig();
111
130
  try {
@@ -157,28 +176,60 @@ export async function processVideo(videoPath, ctx, options = {}) {
157
176
  const skipProcessing = dbRec &&
158
177
  (dbRec.status === 'processed' || dbRec.status === 'published') &&
159
178
  !force;
179
+ // Create VLM adapter lazily (only if needed)
180
+ let vlm = null;
181
+ if (!skipProcessing) {
182
+ // Reuse the same MLX service instance for VLM (unified adapter handles both)
183
+ // Check if LLM is MLX backend - if so, it's already a unified VLM+LLM service
184
+ if (ctx.config.llmBackend === 'mlx' && llm) {
185
+ vlm = llm;
186
+ }
187
+ else {
188
+ console.log('[VLM] Initializing MLX-VLM for frame analysis...');
189
+ vlm = createMlxIntelligenceService();
190
+ ctx.resourceTracker.register(vlm);
191
+ }
192
+ ctx.adapters.vlm = vlm;
193
+ }
160
194
  if (!skipProcessing) {
161
195
  const runType = force
162
196
  ? 'force'
163
197
  : dbRec?.processing_step
164
198
  ? 'resume'
165
199
  : 'initial';
166
- const runMetadata = collectRunMetadata(ctx.resourceTracker);
200
+ const runMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
167
201
  await withPipeline(recording.id, runType, runMetadata, async () => {
202
+ if (!vlm)
203
+ throw new Error('[VLM] Internal error: VLM adapter expected but not initialized');
168
204
  await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
169
205
  });
170
- // Free VLM memory after processing (good hygiene for all RAM tiers)
171
- console.log('[VLM] Freeing VLM memory...');
172
- cleanupMlxBridge();
206
+ // Clean up VLM bridge after processing to free memory for LLM
207
+ if (vlm) {
208
+ console.log('[VLM] Unloading VLM model to free memory...');
209
+ await vlm.unloadVlm?.();
210
+ // Note: We don't kill the bridge process here, just unload the model
211
+ // The bridge process will be reused for subsequent recordings if needed
212
+ }
173
213
  }
174
214
  // Generate artifact and publish (unless skipped), tracked as a pipeline run
175
215
  let artifact = null;
176
216
  let outlineUrl;
177
217
  if (!skipSummary) {
178
- const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker);
218
+ // Guard: Ensure VLM is unloaded before LLM generation to prevent memory contention
219
+ if (ctx.adapters.vlm) {
220
+ console.log('[VLM] Warning: VLM bridge still loaded during artifact generation');
221
+ console.log('[VLM] Unloading to prevent memory contention with LLM...');
222
+ if ('unloadVlm' in ctx.adapters.vlm && ctx.adapters.vlm.unloadVlm) {
223
+ await ctx.adapters.vlm.unloadVlm();
224
+ }
225
+ ctx.adapters.vlm = null;
226
+ }
227
+ const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
179
228
  const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
180
229
  console.log(`\nGenerating ${format} artifact...`);
181
230
  let generatedArtifact;
231
+ // LLM model loading is handled internally by generateText()
232
+ // No explicit load/unload calls needed here
182
233
  if (format === 'narrative') {
183
234
  // Route narrative through the corrected path
184
235
  generatedArtifact = await generateSummaryV3(recording.id, repos, llm, {
@@ -301,6 +352,11 @@ export async function processVideo(videoPath, ctx, options = {}) {
301
352
  };
302
353
  await unloadOllamaModel(config.llmModel, intelConfig);
303
354
  }
355
+ else if ('unloadLlm' in ctx.adapters.llm &&
356
+ ctx.adapters.llm.unloadLlm) {
357
+ console.log('[LLM] Unloading MLX model to free memory...');
358
+ await ctx.adapters.llm.unloadLlm();
359
+ }
304
360
  }
305
361
  console.log('\n✓ Complete!');
306
362
  return {
@@ -345,7 +401,7 @@ function getOutlineConfig() {
345
401
  /**
346
402
  * Collect metadata about the current run.
347
403
  */
348
- function collectRunMetadata(resourceTracker) {
404
+ function collectRunMetadata(resourceTracker, config) {
349
405
  let commitHash = 'unknown';
350
406
  try {
351
407
  commitHash = execSync('git rev-parse --short HEAD', {
@@ -359,6 +415,7 @@ function collectRunMetadata(resourceTracker) {
359
415
  vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
360
416
  'mlx-community/Qwen3-VL-2B-Instruct-bf16',
361
417
  llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
418
+ llm_backend: config?.llmBackend ?? 'ollama',
362
419
  commit_hash: commitHash,
363
420
  node_version: process.version,
364
421
  platform: process.platform,
package/dist/config.js CHANGED
@@ -27,12 +27,15 @@ const configSchema = z.object({
27
27
  vlmMaxTokens: z.number().int().min(500).max(8000).default(2000),
28
28
  // === MODELS ===
29
29
  llmModel: z.string().optional(),
30
+ llmBackend: z.enum(['mlx', 'ollama']).default('mlx'),
31
+ llmMlxModel: z.string().optional(),
30
32
  vlmModel: z.string().default('mlx-community/Qwen3-VL-2B-Instruct-4bit'),
31
33
  subjectGroupingModel: z.string().optional(),
32
34
  // === DEBUGGING ===
33
35
  verbose: z.boolean().default(false),
34
36
  debugOllama: z.boolean().default(false),
35
37
  debugVlm: z.boolean().default(false),
38
+ debugLlm: z.boolean().default(false),
36
39
  skipLlm: z.boolean().default(false),
37
40
  // === ADVANCED ===
38
41
  sceneMinInterval: z.number().int().min(1).max(10).default(2),
@@ -72,10 +75,12 @@ const BASE_DEFAULTS = {
72
75
  sampleInterval: 10,
73
76
  sceneThreshold: 0.4,
74
77
  vlmMaxTokens: 2000,
78
+ llmBackend: 'mlx',
75
79
  vlmModel: 'mlx-community/Qwen3-VL-2B-Instruct-4bit',
76
80
  verbose: false,
77
81
  debugOllama: false,
78
82
  debugVlm: false,
83
+ debugLlm: false,
79
84
  skipLlm: false,
80
85
  sceneMinInterval: 2,
81
86
  sampleGapThreshold: 15,
@@ -103,12 +108,15 @@ ESCRIBANO_SCENE_THRESHOLD=0.4 # Scene detection sensitivity (0.0-1.0)
103
108
  ESCRIBANO_VLM_MAX_TOKENS=2000 # Token budget per batch
104
109
 
105
110
  # === MODELS ===
106
- # ESCRIBANO_LLM_MODEL=qwen3.5:27b # Summary generation (auto-detected if not set)
111
+ # ESCRIBANO_LLM_BACKEND=mlx # LLM backend: 'mlx' (default) or 'ollama'
112
+ # ESCRIBANO_LLM_MODEL=qwen3.5:27b # Ollama model (only used if llmBackend='ollama')
113
+ # ESCRIBANO_LLM_MLX_MODEL= # MLX model (only used if llmBackend='mlx', auto-detected if not set)
107
114
  ESCRIBANO_VLM_MODEL=mlx-community/Qwen3-VL-2B-Instruct-4bit
108
115
 
109
116
  # === DEBUGGING ===
110
117
  ESCRIBANO_VERBOSE=false # Enable verbose logging
111
118
  ESCRIBANO_DEBUG_VLM=false # Debug VLM processing
119
+ ESCRIBANO_DEBUG_LLM=false # Log all LLM calls to debug table
112
120
 
113
121
  # === ADVANCED ===
114
122
  ESCRIBANO_SCENE_MIN_INTERVAL=2
@@ -199,12 +207,15 @@ export function loadConfig() {
199
207
  vlmMaxTokens: parseEnvNumberWithSource('ESCRIBANO_VLM_MAX_TOKENS', BASE_DEFAULTS.vlmMaxTokens, sources, 'vlmMaxTokens'),
200
208
  // === MODELS ===
201
209
  llmModel: parseEnvStringWithSource('ESCRIBANO_LLM_MODEL', undefined, sources, 'llmModel'),
210
+ llmBackend: (parseEnvStringWithSource('ESCRIBANO_LLM_BACKEND', BASE_DEFAULTS.llmBackend, sources, 'llmBackend') ?? 'mlx'),
211
+ llmMlxModel: parseEnvStringWithSource('ESCRIBANO_LLM_MLX_MODEL', undefined, sources, 'llmMlxModel'),
202
212
  vlmModel: parseEnvStringWithSource('ESCRIBANO_VLM_MODEL', BASE_DEFAULTS.vlmModel, sources, 'vlmModel'),
203
213
  subjectGroupingModel: parseEnvStringWithSource('ESCRIBANO_SUBJECT_GROUPING_MODEL', undefined, sources, 'subjectGroupingModel'),
204
214
  // === DEBUGGING ===
205
215
  verbose: parseEnvBooleanWithSource('ESCRIBANO_VERBOSE', BASE_DEFAULTS.verbose, sources, 'verbose'),
206
216
  debugOllama: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_OLLAMA', BASE_DEFAULTS.debugOllama, sources, 'debugOllama'),
207
217
  debugVlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_VLM', BASE_DEFAULTS.debugVlm, sources, 'debugVlm'),
218
+ debugLlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_LLM', BASE_DEFAULTS.debugLlm, sources, 'debugLlm'),
208
219
  skipLlm: parseEnvBooleanWithSource('ESCRIBANO_SKIP_LLM', BASE_DEFAULTS.skipLlm, sources, 'skipLlm'),
209
220
  // === ADVANCED ===
210
221
  sceneMinInterval: parseEnvNumberWithSource('ESCRIBANO_SCENE_MIN_INTERVAL', BASE_DEFAULTS.sceneMinInterval, sources, 'sceneMinInterval'),
@@ -7,7 +7,7 @@ export function createSqliteSubjectRepository(db) {
7
7
  findById: db.prepare('SELECT * FROM subjects WHERE id = ?'),
8
8
  findByRecording: db.prepare('SELECT * FROM subjects WHERE recording_id = ? ORDER BY created_at ASC'),
9
9
  insert: db.prepare(`
10
- INSERT INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
10
+ INSERT OR IGNORE INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
11
11
  VALUES (?, ?, ?, ?, ?, ?, ?, ?)
12
12
  `),
13
13
  insertLink: db.prepare(`
@@ -0,0 +1,97 @@
1
+ /**
2
+ * Escribano - Context Extraction Domain Module
3
+ */
4
+ export const Context = {
5
+ /**
6
+ * Extract semantic contexts from raw OCR text using regex patterns.
7
+ * This is a fast-path optimization for common applications and URLs.
8
+ */
9
+ extractFromOCR: (ocrText) => {
10
+ const contexts = [];
11
+ const text = ocrText.trim();
12
+ if (!text)
13
+ return contexts;
14
+ // 1. App Detection
15
+ const apps = [
16
+ { name: 'Ghostty', pattern: /Ghostty/i },
17
+ { name: 'VS Code', pattern: /Visual Studio Code|VS Code/i },
18
+ { name: 'Chrome', pattern: /Google Chrome/i },
19
+ { name: 'Arc', pattern: /Arc/i },
20
+ { name: 'Cursor', pattern: /Cursor/i },
21
+ { name: 'TablePlus', pattern: /TablePlus/i },
22
+ { name: 'Slack', pattern: /Slack/i },
23
+ { name: 'Spotify', pattern: /Spotify/i },
24
+ { name: 'YouTube Music', pattern: /YouTube Music/i },
25
+ ];
26
+ for (const app of apps) {
27
+ if (app.pattern.test(text)) {
28
+ contexts.push({
29
+ type: 'app',
30
+ value: app.name,
31
+ confidence: 0.9,
32
+ });
33
+ }
34
+ }
35
+ // 2. URL Detection
36
+ const urlPattern = /https?:\/\/[^\s]+/g;
37
+ const urls = text.match(urlPattern);
38
+ if (urls) {
39
+ for (const url of urls) {
40
+ contexts.push({
41
+ type: 'url',
42
+ value: url.replace(/[,.)}>]$/, ''), // Clean trailing punctuation
43
+ confidence: 1.0,
44
+ });
45
+ }
46
+ }
47
+ // 3. Domain Detection (Specific known domains)
48
+ const domains = [
49
+ { name: 'github.com', pattern: /github\.com/i },
50
+ { name: 'linkedin.com', pattern: /linkedin\.com/i },
51
+ { name: 'stackoverflow.com', pattern: /stackoverflow\.com/i },
52
+ { name: 'docs.rs', pattern: /docs\.rs/i },
53
+ { name: 'ollama.com', pattern: /ollama\.com/i },
54
+ ];
55
+ for (const domain of domains) {
56
+ if (domain.pattern.test(text)) {
57
+ // Only add if not already covered by a full URL
58
+ if (!contexts.some((c) => c.type === 'url' && c.value.includes(domain.name))) {
59
+ contexts.push({
60
+ type: 'url',
61
+ value: domain.name,
62
+ confidence: 0.8,
63
+ });
64
+ }
65
+ }
66
+ }
67
+ // 4. File Path Detection
68
+ const pathPattern = /(?:~\/|\/Users\/)[^\s]+\.(?:ts|js|py|rs|md|go|json|yml|yaml)/g;
69
+ const paths = text.match(pathPattern);
70
+ if (paths) {
71
+ for (const path of paths) {
72
+ contexts.push({
73
+ type: 'file',
74
+ value: path,
75
+ confidence: 0.9,
76
+ });
77
+ }
78
+ }
79
+ // TODO: Implement Step 2 - Embedding clustering for topic grouping
80
+ // This will be used when regex patterns don't yield high-confidence results
81
+ // or when we want to group related segments together.
82
+ return contexts;
83
+ },
84
+ /**
85
+ * Aggregate multiple contexts and remove duplicates
86
+ */
87
+ unique: (contexts) => {
88
+ const seen = new Set();
89
+ return contexts.filter((c) => {
90
+ const key = `${c.type}:${c.value}`;
91
+ if (seen.has(key))
92
+ return false;
93
+ seen.add(key);
94
+ return true;
95
+ });
96
+ },
97
+ };
@@ -0,0 +1,2 @@
1
+ export * from './observation.js';
2
+ export * from './recording.js';
@@ -0,0 +1,17 @@
1
+ import { generateId } from '../db/helpers.js';
2
+ /**
3
+ * Factory for audio observations
4
+ */
5
+ export function createAudioObservation(params) {
6
+ return {
7
+ id: generateId(),
8
+ recordingId: params.recordingId,
9
+ type: 'audio',
10
+ timestamp: params.timestamp,
11
+ endTimestamp: params.endTimestamp,
12
+ text: params.text,
13
+ audioSource: params.audioSource,
14
+ audioType: 'speech',
15
+ confidence: params.confidence ?? null,
16
+ };
17
+ }