escribano 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -132,6 +132,43 @@ async function doModelWarmup(modelName, config) {
132
132
  warmedModels.add(modelName); // Mark as warmed to avoid repeated attempts
133
133
  }
134
134
  }
135
+ /**
136
+ * Unload an Ollama model from memory.
137
+ * Uses keep_alive: 0 to tell Ollama to release the model immediately.
138
+ */
139
+ export async function unloadOllamaModel(modelName, config) {
140
+ try {
141
+ debugLog(`Unloading model: ${modelName}...`);
142
+ const response = await fetch(`${config.endpoint.replace('/chat', '').replace('/generate', '')}/generate`, {
143
+ method: 'POST',
144
+ headers: { 'Content-Type': 'application/json' },
145
+ body: JSON.stringify({
146
+ model: modelName,
147
+ prompt: '',
148
+ keep_alive: 0, // Unload immediately
149
+ }),
150
+ });
151
+ if (response.ok) {
152
+ warmedModels.delete(modelName);
153
+ debugLog(`Model ${modelName} unloaded.`);
154
+ }
155
+ else {
156
+ let bodyText = '';
157
+ try {
158
+ bodyText = await response.text();
159
+ }
160
+ catch {
161
+ // Ignore errors while reading response body for logging
162
+ }
163
+ debugLog(`Failed to unload model ${modelName}: HTTP ${response.status} ${response.statusText}` +
164
+ (bodyText ? ` - Response body: ${bodyText}` : ''));
165
+ }
166
+ }
167
+ catch (error) {
168
+ // Unload is best-effort - don't throw
169
+ debugLog(`Failed to unload model ${modelName}: ${error.message}`);
170
+ }
171
+ }
135
172
  async function checkOllamaHealth() {
136
173
  try {
137
174
  const response = await fetch('http://localhost:11434/api/tags');
@@ -21,15 +21,15 @@ import { hasContentChanged, publishSummaryV3, updateRecordingOutlineMetadata, }
21
21
  import { createSileroPreprocessor } from './adapters/audio.silero.adapter.js';
22
22
  import { createFilesystemCaptureSource } from './adapters/capture.filesystem.adapter.js';
23
23
  import { cleanupMlxBridge, createMlxIntelligenceService, } from './adapters/intelligence.mlx.adapter.js';
24
- import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.adapter.js';
24
+ import { createOllamaIntelligenceService, unloadOllamaModel, } from './adapters/intelligence.ollama.adapter.js';
25
25
  import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
26
26
  import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
27
27
  import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
28
- import { createDefaultConfig } from './config.js';
28
+ import { createDefaultConfig, loadConfig, logConfig } from './config.js';
29
29
  import { getDbPath, getRepositories } from './db/index.js';
30
30
  import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
31
31
  import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
32
- import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
32
+ import { formatModelSelection, selectBestLLMModel, selectBestMLXModel, } from './utils/model-detector.js';
33
33
  const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
34
34
  const MODEL_FILE = 'ggml-large-v3.bin';
35
35
  const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
@@ -40,21 +40,34 @@ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
40
40
  export async function initializeSystem() {
41
41
  // Create default config file if it doesn't exist
42
42
  createDefaultConfig();
43
+ // Load and log unified configuration
44
+ const config = loadConfig();
45
+ logConfig();
46
+ console.log('');
43
47
  console.log('Initializing database...');
44
48
  const repos = getRepositories();
45
49
  console.log(`Database ready: ${getDbPath()}`);
46
50
  console.log('');
47
51
  // Setup stats observer to capture pipeline events
48
52
  setupStatsObserver(repos.stats);
49
- // Detect best LLM model
50
- const modelSelection = await selectBestLLMModel();
51
- console.log(formatModelSelection(modelSelection));
52
- console.log('');
53
- // Initialize adapters ONCE
54
- console.log('[VLM] Using MLX-VLM for image processing');
55
- const vlm = createMlxIntelligenceService();
56
- console.log('[LLM] Using Ollama for text generation');
57
- const llm = createOllamaIntelligenceService();
53
+ // Detect best LLM model based on configured backend
54
+ let llm;
55
+ let mlxService = null;
56
+ if (config.llmBackend === 'mlx') {
57
+ console.log('[LLM] Using MLX for text generation');
58
+ const mlxModelSelection = await selectBestMLXModel();
59
+ console.log(formatModelSelection(mlxModelSelection));
60
+ console.log('');
61
+ mlxService = createMlxIntelligenceService();
62
+ llm = mlxService;
63
+ }
64
+ else {
65
+ console.log('[LLM] Using Ollama for text generation');
66
+ const ollamaModelSelection = await selectBestLLMModel();
67
+ console.log(formatModelSelection(ollamaModelSelection));
68
+ console.log('');
69
+ llm = createOllamaIntelligenceService();
70
+ }
58
71
  const video = createFfmpegVideoService();
59
72
  const preprocessor = createSileroPreprocessor();
60
73
  const transcription = createWhisperTranscriptionService({
@@ -63,32 +76,42 @@ export async function initializeSystem() {
63
76
  cwd: MODELS_DIR,
64
77
  outputFormat: 'json',
65
78
  });
66
- // Setup resource tracking
67
79
  const resourceTracker = new ResourceTracker();
68
- resourceTracker.register(vlm);
69
80
  resourceTracker.register(video);
70
81
  resourceTracker.register(preprocessor);
71
- // Ollama runs as a daemon - special case
72
- resourceTracker.register({
73
- getResourceName: () => 'ollama',
74
- getPid: () => {
75
- try {
76
- const output = execSync('pgrep -f "ollama serve"').toString().trim();
77
- const pid = parseInt(output.split('\n')[0] ?? '0', 10);
78
- return pid > 0 ? pid : null;
79
- }
80
- catch {
81
- return null;
82
- }
83
- },
84
- });
82
+ if (config.llmBackend === 'ollama') {
83
+ resourceTracker.register({
84
+ getResourceName: () => 'ollama',
85
+ getPid: () => {
86
+ try {
87
+ const output = execSync('pgrep -f "ollama serve"').toString().trim();
88
+ const pid = parseInt(output.split('\n')[0] ?? '0', 10);
89
+ return pid > 0 ? pid : null;
90
+ }
91
+ catch {
92
+ return null;
93
+ }
94
+ },
95
+ });
96
+ }
97
+ else if (mlxService) {
98
+ resourceTracker.register(mlxService);
99
+ }
85
100
  setResourceTracker(resourceTracker);
86
101
  const outlineConfig = getOutlineConfig();
87
102
  return {
88
103
  repos,
89
- adapters: { vlm, llm, video, preprocessor, transcription },
104
+ adapters: {
105
+ vlm: null,
106
+ llm,
107
+ video,
108
+ preprocessor,
109
+ transcription,
110
+ },
90
111
  resourceTracker,
91
112
  outlineConfig,
113
+ config,
114
+ llmBackend: config.llmBackend,
92
115
  };
93
116
  }
94
117
  /**
@@ -101,7 +124,9 @@ export async function processVideo(videoPath, ctx, options = {}) {
101
124
  const startTime = Date.now();
102
125
  const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
103
126
  const { repos, adapters, outlineConfig } = ctx;
104
- const { vlm, llm, video, preprocessor, transcription } = adapters;
127
+ const { llm, video, preprocessor, transcription } = adapters;
128
+ // Load unified config for lifecycle management
129
+ const config = loadConfig();
105
130
  try {
106
131
  // Create capture source for this specific file
107
132
  // Note: Hardcoded to filesystem source, not Cap recordings
@@ -151,25 +176,60 @@ export async function processVideo(videoPath, ctx, options = {}) {
151
176
  const skipProcessing = dbRec &&
152
177
  (dbRec.status === 'processed' || dbRec.status === 'published') &&
153
178
  !force;
179
+ // Create VLM adapter lazily (only if needed)
180
+ let vlm = null;
181
+ if (!skipProcessing) {
182
+ // Reuse the same MLX service instance for VLM (unified adapter handles both)
183
+ // Check if LLM is MLX backend - if so, it's already a unified VLM+LLM service
184
+ if (ctx.config.llmBackend === 'mlx' && llm) {
185
+ vlm = llm;
186
+ }
187
+ else {
188
+ console.log('[VLM] Initializing MLX-VLM for frame analysis...');
189
+ vlm = createMlxIntelligenceService();
190
+ ctx.resourceTracker.register(vlm);
191
+ }
192
+ ctx.adapters.vlm = vlm;
193
+ }
154
194
  if (!skipProcessing) {
155
195
  const runType = force
156
196
  ? 'force'
157
197
  : dbRec?.processing_step
158
198
  ? 'resume'
159
199
  : 'initial';
160
- const runMetadata = collectRunMetadata(ctx.resourceTracker);
200
+ const runMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
161
201
  await withPipeline(recording.id, runType, runMetadata, async () => {
202
+ if (!vlm)
203
+ throw new Error('[VLM] Internal error: VLM adapter expected but not initialized');
162
204
  await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
163
205
  });
206
+ // Clean up VLM bridge after processing to free memory for LLM
207
+ if (vlm) {
208
+ console.log('[VLM] Unloading VLM model to free memory...');
209
+ await vlm.unloadVlm?.();
210
+ // Note: We don't kill the bridge process here, just unload the model
211
+ // The bridge process will be reused for subsequent recordings if needed
212
+ }
164
213
  }
165
214
  // Generate artifact and publish (unless skipped), tracked as a pipeline run
166
215
  let artifact = null;
167
216
  let outlineUrl;
168
217
  if (!skipSummary) {
169
- const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker);
218
+ // Guard: Ensure VLM is unloaded before LLM generation to prevent memory contention
219
+ if (ctx.adapters.vlm) {
220
+ console.log('[VLM] Warning: VLM bridge still loaded during artifact generation');
221
+ console.log('[VLM] Unloading to prevent memory contention with LLM...');
222
+ if ('unloadVlm' in ctx.adapters.vlm && ctx.adapters.vlm.unloadVlm) {
223
+ await ctx.adapters.vlm.unloadVlm();
224
+ }
225
+ ctx.adapters.vlm = null;
226
+ }
227
+ const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
170
228
  const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
171
229
  console.log(`\nGenerating ${format} artifact...`);
172
230
  let generatedArtifact;
231
+ // LLM model loading is handled internally by generateText()
232
+ // No explicit load/unload calls needed here
173
233
  if (format === 'narrative') {
174
234
  // Route narrative through the corrected path
175
235
  generatedArtifact = await generateSummaryV3(recording.id, repos, llm, {
@@ -272,6 +332,31 @@ export async function processVideo(videoPath, ctx, options = {}) {
272
332
  });
273
333
  artifact = pipelineResult.artifact;
274
334
  outlineUrl = pipelineResult.outlineUrl;
335
+ // Unload LLM after artifact generation to free memory (good hygiene for all RAM tiers)
336
+ if (config.llmModel) {
337
+ console.log('[LLM] Unloading model to free memory...');
338
+ const intelConfig = {
339
+ provider: 'ollama',
340
+ endpoint: 'http://localhost:11434/api/chat',
341
+ model: config.llmModel,
342
+ generationModel: config.llmModel,
343
+ visionModel: config.vlmModel,
344
+ maxRetries: 3,
345
+ timeout: 600000,
346
+ keepAlive: '10m',
347
+ maxContextSize: 131072,
348
+ embedding: { model: 'nomic-embed-text', similarityThreshold: 0.75 },
349
+ vlmBatchSize: config.vlmBatchSize,
350
+ vlmMaxTokens: config.vlmMaxTokens,
351
+ mlxSocketPath: config.mlxSocketPath,
352
+ };
353
+ await unloadOllamaModel(config.llmModel, intelConfig);
354
+ }
355
+ else if ('unloadLlm' in ctx.adapters.llm &&
356
+ ctx.adapters.llm.unloadLlm) {
357
+ console.log('[LLM] Unloading MLX model to free memory...');
358
+ await ctx.adapters.llm.unloadLlm();
359
+ }
275
360
  }
276
361
  console.log('\n✓ Complete!');
277
362
  return {
@@ -316,7 +401,7 @@ function getOutlineConfig() {
316
401
  /**
317
402
  * Collect metadata about the current run.
318
403
  */
319
- function collectRunMetadata(resourceTracker) {
404
+ function collectRunMetadata(resourceTracker, config) {
320
405
  let commitHash = 'unknown';
321
406
  try {
322
407
  commitHash = execSync('git rev-parse --short HEAD', {
@@ -330,6 +415,7 @@ function collectRunMetadata(resourceTracker) {
330
415
  vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
331
416
  'mlx-community/Qwen3-VL-2B-Instruct-bf16',
332
417
  llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
418
+ llm_backend: config?.llmBackend ?? 'ollama',
333
419
  commit_hash: commitHash,
334
420
  node_version: process.version,
335
421
  platform: process.platform,
package/dist/config.js CHANGED
@@ -5,12 +5,12 @@
5
5
  * 1. CLI arguments
6
6
  * 2. Shell environment variables (export ESCRIBANO_*)
7
7
  * 3. ~/.escribano/.env file
8
- * 4. Default values
8
+ * 4. RAM-aware defaults (based on system memory)
9
9
  *
10
10
  * Note: Project-level .env is NOT loaded by default (only for development).
11
11
  */
12
12
  import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
13
- import { homedir } from 'node:os';
13
+ import { homedir, totalmem } from 'node:os';
14
14
  import path from 'node:path';
15
15
  import { config as dotenvConfig } from 'dotenv';
16
16
  import { z } from 'zod';
@@ -27,19 +27,22 @@ const configSchema = z.object({
27
27
  vlmMaxTokens: z.number().int().min(500).max(8000).default(2000),
28
28
  // === MODELS ===
29
29
  llmModel: z.string().optional(),
30
+ llmBackend: z.enum(['mlx', 'ollama']).default('mlx'),
31
+ llmMlxModel: z.string().optional(),
30
32
  vlmModel: z.string().default('mlx-community/Qwen3-VL-2B-Instruct-4bit'),
31
33
  subjectGroupingModel: z.string().optional(),
32
34
  // === DEBUGGING ===
33
35
  verbose: z.boolean().default(false),
34
36
  debugOllama: z.boolean().default(false),
35
37
  debugVlm: z.boolean().default(false),
38
+ debugLlm: z.boolean().default(false),
36
39
  skipLlm: z.boolean().default(false),
37
40
  // === ADVANCED ===
38
41
  sceneMinInterval: z.number().int().min(1).max(10).default(2),
39
42
  sampleGapThreshold: z.number().int().min(5).max(60).default(15),
40
43
  sampleGapFill: z.number().int().min(1).max(10).default(3),
41
44
  mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
42
- mlxStartupTimeout: z.number().int().min(10000).default(60000),
45
+ mlxStartupTimeout: z.number().int().min(10000).default(120000),
43
46
  pythonPath: z.string().optional(),
44
47
  parallelTranscription: z.boolean().default(false),
45
48
  artifactThink: z.boolean().default(false),
@@ -49,24 +52,41 @@ const configSchema = z.object({
49
52
  outlineCollection: z.string().default('Escribano Sessions'),
50
53
  });
51
54
  // =============================================================================
55
+ // RAM DETECTION
56
+ // =============================================================================
57
+ function getSystemRamGB() {
58
+ return Math.round(totalmem() / (1024 * 1024 * 1024));
59
+ }
60
+ function getRamTier(ramGB) {
61
+ if (ramGB >= 32) {
62
+ return { tier: 'high', frameWidth: 1024 };
63
+ }
64
+ if (ramGB >= 16) {
65
+ return { tier: 'medium', frameWidth: 1024 };
66
+ }
67
+ return { tier: 'low', frameWidth: 768 };
68
+ }
69
+ // =============================================================================
52
70
  // DEFAULT CONFIG
53
71
  // =============================================================================
54
- const DEFAULT_CONFIG = {
72
+ const BASE_DEFAULTS = {
55
73
  frameWidth: 1024,
56
74
  vlmBatchSize: 2,
57
75
  sampleInterval: 10,
58
76
  sceneThreshold: 0.4,
59
77
  vlmMaxTokens: 2000,
78
+ llmBackend: 'mlx',
60
79
  vlmModel: 'mlx-community/Qwen3-VL-2B-Instruct-4bit',
61
80
  verbose: false,
62
81
  debugOllama: false,
63
82
  debugVlm: false,
83
+ debugLlm: false,
64
84
  skipLlm: false,
65
85
  sceneMinInterval: 2,
66
86
  sampleGapThreshold: 15,
67
87
  sampleGapFill: 3,
68
88
  mlxSocketPath: '/tmp/escribano-mlx.sock',
69
- mlxStartupTimeout: 60000,
89
+ mlxStartupTimeout: 120000,
70
90
  parallelTranscription: false,
71
91
  artifactThink: false,
72
92
  outlineCollection: 'Escribano Sessions',
@@ -79,30 +99,33 @@ const CONFIG_TEMPLATE = `# Escribano Configuration - ~/.escribano/.env
79
99
  # Full reference: https://github.com/eduardosanzb/escribano#configuration
80
100
 
81
101
  # === PERFORMANCE ===
82
- ESCRIBANO_FRAME_WIDTH=1024 # Lower = faster (1920, 1280, 1024, 640)
83
- ESCRIBANO_VLM_BATCH_SIZE=2 # 1-4 frames (lower = more reliable)
84
- ESCRIBANO_SAMPLE_INTERVAL=10 # Base frame sampling (seconds)
102
+ # ESCRIBANO_FRAME_WIDTH=1024 # Auto-adjusted based on RAM (1024 for 16GB+, 768 for <16GB)
103
+ # ESCRIBANO_VLM_BATCH_SIZE=2 # 1-4 frames (lower = more reliable)
104
+ ESCRIBANO_SAMPLE_INTERVAL=10 # Base frame sampling (seconds)
85
105
 
86
106
  # === QUALITY ===
87
- ESCRIBANO_SCENE_THRESHOLD=0.4 # Scene detection sensitivity (0.0-1.0)
88
- ESCRIBANO_VLM_MAX_TOKENS=2000 # Token budget per batch
107
+ ESCRIBANO_SCENE_THRESHOLD=0.4 # Scene detection sensitivity (0.0-1.0)
108
+ ESCRIBANO_VLM_MAX_TOKENS=2000 # Token budget per batch
89
109
 
90
110
  # === MODELS ===
91
- # ESCRIBANO_LLM_MODEL=qwen3.5:27b # Summary generation (auto-detected if not set)
111
+ # ESCRIBANO_LLM_BACKEND=mlx # LLM backend: 'mlx' (default) or 'ollama'
112
+ # ESCRIBANO_LLM_MODEL=qwen3.5:27b # Ollama model (only used if llmBackend='ollama')
113
+ # ESCRIBANO_LLM_MLX_MODEL= # MLX model (only used if llmBackend='mlx', auto-detected if not set)
92
114
  ESCRIBANO_VLM_MODEL=mlx-community/Qwen3-VL-2B-Instruct-4bit
93
115
 
94
116
  # === DEBUGGING ===
95
- ESCRIBANO_VERBOSE=false # Enable verbose logging
96
- ESCRIBANO_DEBUG_VLM=false # Debug VLM processing
117
+ ESCRIBANO_VERBOSE=false # Enable verbose logging
118
+ ESCRIBANO_DEBUG_VLM=false # Debug VLM processing
119
+ ESCRIBANO_DEBUG_LLM=false # Log all LLM calls to debug table
97
120
 
98
121
  # === ADVANCED ===
99
122
  ESCRIBANO_SCENE_MIN_INTERVAL=2
100
123
  ESCRIBANO_SAMPLE_GAP_THRESHOLD=15
101
124
  ESCRIBANO_SAMPLE_GAP_FILL=3
102
125
  ESCRIBANO_MLX_SOCKET_PATH=/tmp/escribano-mlx.sock
103
- ESCRIBANO_MLX_STARTUP_TIMEOUT=60000
104
- # ESCRIBANO_PYTHON_PATH= # Auto-detected if not set
105
- ESCRIBANO_ARTIFACT_THINK=false # Enable thinking for artifacts (slower)
126
+ ESCRIBANO_MLX_STARTUP_TIMEOUT=120000
127
+ # ESCRIBANO_PYTHON_PATH= # Auto-detected if not set
128
+ ESCRIBANO_ARTIFACT_THINK=false # Enable thinking for artifacts (slower)
106
129
 
107
130
  # === OPTIONAL (Outline publishing) ===
108
131
  # ESCRIBANO_OUTLINE_URL=
@@ -113,6 +136,7 @@ ESCRIBANO_ARTIFACT_THINK=false # Enable thinking for artifacts (slower)
113
136
  // CONFIG LOADER
114
137
  // =============================================================================
115
138
  let cachedConfig = null;
139
+ let cachedSources = [];
116
140
  export function getConfigPath() {
117
141
  return path.join(homedir(), '.escribano', '.env');
118
142
  }
@@ -133,97 +157,179 @@ export function createDefaultConfig() {
133
157
  console.error(`Failed to create config file at ${configPath}: ${error.message}`);
134
158
  }
135
159
  }
160
+ /**
161
+ * Check if running in development mode.
162
+ * Development mode = running via tsx from source (src/index.ts)
163
+ * Production mode = running compiled code (dist/index.js)
164
+ */
165
+ function isDevelopmentMode() {
166
+ // Check if running from src directory via tsx
167
+ const currentFile = import.meta.url;
168
+ return currentFile.includes('/src/');
169
+ }
136
170
  export function loadConfig() {
137
171
  if (cachedConfig) {
138
172
  return cachedConfig;
139
173
  }
140
- // 1. Load from config file (if exists)
141
- const configPath = getConfigPath();
142
- if (existsSync(configPath)) {
143
- try {
144
- const result = dotenvConfig({ path: configPath });
145
- if (result.error) {
146
- console.error(`Failed to parse config file ${configPath}: ${result.error.message}`);
147
- console.error('Using default configuration.');
174
+ const sources = [];
175
+ // 1. Load from user config file (PRODUCTION MODE ONLY)
176
+ // In development mode, we use project .env via tsx --env-file flag
177
+ if (!isDevelopmentMode()) {
178
+ const configPath = getConfigPath();
179
+ if (existsSync(configPath)) {
180
+ try {
181
+ const result = dotenvConfig({ path: configPath });
182
+ if (result.error) {
183
+ console.error(`Failed to parse config file ${configPath}: ${result.error.message}`);
184
+ console.error('Using default configuration.');
185
+ }
186
+ else if (result.parsed && Object.keys(result.parsed).length > 0) {
187
+ console.log(`Loaded config from ${configPath}`);
188
+ }
148
189
  }
149
- else if (result.parsed && Object.keys(result.parsed).length > 0) {
150
- console.log(`Loaded config from ${configPath}`);
190
+ catch (error) {
191
+ console.error(`Error reading config file ${configPath}: ${error.message}`);
192
+ console.error('Using default configuration.');
151
193
  }
152
194
  }
153
- catch (error) {
154
- console.error(`Error reading config file ${configPath}: ${error.message}`);
155
- console.error('Using default configuration.');
156
- }
157
195
  }
158
- // 2. Build config from environment variables
196
+ // 2. Get RAM-aware defaults
197
+ const ramGB = getSystemRamGB();
198
+ const ramTier = getRamTier(ramGB);
199
+ // 3. Build config with source tracking
159
200
  const config = {
160
201
  // === PERFORMANCE ===
161
- frameWidth: parseEnvNumber('ESCRIBANO_FRAME_WIDTH', DEFAULT_CONFIG.frameWidth),
162
- vlmBatchSize: parseEnvNumber('ESCRIBANO_VLM_BATCH_SIZE', DEFAULT_CONFIG.vlmBatchSize),
163
- sampleInterval: parseEnvNumber('ESCRIBANO_SAMPLE_INTERVAL', DEFAULT_CONFIG.sampleInterval),
202
+ frameWidth: parseEnvNumberWithSource('ESCRIBANO_FRAME_WIDTH', ramTier.frameWidth, sources, 'frameWidth'),
203
+ vlmBatchSize: parseEnvNumberWithSource('ESCRIBANO_VLM_BATCH_SIZE', BASE_DEFAULTS.vlmBatchSize, sources, 'vlmBatchSize'),
204
+ sampleInterval: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_INTERVAL', BASE_DEFAULTS.sampleInterval, sources, 'sampleInterval'),
164
205
  // === QUALITY ===
165
- sceneThreshold: parseEnvNumber('ESCRIBANO_SCENE_THRESHOLD', DEFAULT_CONFIG.sceneThreshold),
166
- vlmMaxTokens: parseEnvNumber('ESCRIBANO_VLM_MAX_TOKENS', DEFAULT_CONFIG.vlmMaxTokens),
206
+ sceneThreshold: parseEnvNumberWithSource('ESCRIBANO_SCENE_THRESHOLD', BASE_DEFAULTS.sceneThreshold, sources, 'sceneThreshold'),
207
+ vlmMaxTokens: parseEnvNumberWithSource('ESCRIBANO_VLM_MAX_TOKENS', BASE_DEFAULTS.vlmMaxTokens, sources, 'vlmMaxTokens'),
167
208
  // === MODELS ===
168
- llmModel: process.env.ESCRIBANO_LLM_MODEL,
169
- vlmModel: process.env.ESCRIBANO_VLM_MODEL || DEFAULT_CONFIG.vlmModel,
170
- subjectGroupingModel: process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL,
209
+ llmModel: parseEnvStringWithSource('ESCRIBANO_LLM_MODEL', undefined, sources, 'llmModel'),
210
+ llmBackend: (parseEnvStringWithSource('ESCRIBANO_LLM_BACKEND', BASE_DEFAULTS.llmBackend, sources, 'llmBackend') ?? 'mlx'),
211
+ llmMlxModel: parseEnvStringWithSource('ESCRIBANO_LLM_MLX_MODEL', undefined, sources, 'llmMlxModel'),
212
+ vlmModel: parseEnvStringWithSource('ESCRIBANO_VLM_MODEL', BASE_DEFAULTS.vlmModel, sources, 'vlmModel'),
213
+ subjectGroupingModel: parseEnvStringWithSource('ESCRIBANO_SUBJECT_GROUPING_MODEL', undefined, sources, 'subjectGroupingModel'),
171
214
  // === DEBUGGING ===
172
- verbose: parseEnvBoolean('ESCRIBANO_VERBOSE', DEFAULT_CONFIG.verbose),
173
- debugOllama: parseEnvBoolean('ESCRIBANO_DEBUG_OLLAMA', DEFAULT_CONFIG.debugOllama),
174
- debugVlm: parseEnvBoolean('ESCRIBANO_DEBUG_VLM', DEFAULT_CONFIG.debugVlm),
175
- skipLlm: parseEnvBoolean('ESCRIBANO_SKIP_LLM', DEFAULT_CONFIG.skipLlm),
215
+ verbose: parseEnvBooleanWithSource('ESCRIBANO_VERBOSE', BASE_DEFAULTS.verbose, sources, 'verbose'),
216
+ debugOllama: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_OLLAMA', BASE_DEFAULTS.debugOllama, sources, 'debugOllama'),
217
+ debugVlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_VLM', BASE_DEFAULTS.debugVlm, sources, 'debugVlm'),
218
+ debugLlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_LLM', BASE_DEFAULTS.debugLlm, sources, 'debugLlm'),
219
+ skipLlm: parseEnvBooleanWithSource('ESCRIBANO_SKIP_LLM', BASE_DEFAULTS.skipLlm, sources, 'skipLlm'),
176
220
  // === ADVANCED ===
177
- sceneMinInterval: parseEnvNumber('ESCRIBANO_SCENE_MIN_INTERVAL', DEFAULT_CONFIG.sceneMinInterval),
178
- sampleGapThreshold: parseEnvNumber('ESCRIBANO_SAMPLE_GAP_THRESHOLD', DEFAULT_CONFIG.sampleGapThreshold),
179
- sampleGapFill: parseEnvNumber('ESCRIBANO_SAMPLE_GAP_FILL', DEFAULT_CONFIG.sampleGapFill),
180
- mlxSocketPath: process.env.ESCRIBANO_MLX_SOCKET_PATH || DEFAULT_CONFIG.mlxSocketPath,
181
- mlxStartupTimeout: parseEnvNumber('ESCRIBANO_MLX_STARTUP_TIMEOUT', DEFAULT_CONFIG.mlxStartupTimeout),
182
- pythonPath: process.env.ESCRIBANO_PYTHON_PATH,
183
- parallelTranscription: parseEnvBoolean('ESCRIBANO_PARALLEL_TRANSCRIPTION', DEFAULT_CONFIG.parallelTranscription),
184
- artifactThink: parseEnvBoolean('ESCRIBANO_ARTIFACT_THINK', DEFAULT_CONFIG.artifactThink),
221
+ sceneMinInterval: parseEnvNumberWithSource('ESCRIBANO_SCENE_MIN_INTERVAL', BASE_DEFAULTS.sceneMinInterval, sources, 'sceneMinInterval'),
222
+ sampleGapThreshold: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_THRESHOLD', BASE_DEFAULTS.sampleGapThreshold, sources, 'sampleGapThreshold'),
223
+ sampleGapFill: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_FILL', BASE_DEFAULTS.sampleGapFill, sources, 'sampleGapFill'),
224
+ mlxSocketPath: parseEnvStringWithSource('ESCRIBANO_MLX_SOCKET_PATH', BASE_DEFAULTS.mlxSocketPath, sources, 'mlxSocketPath'),
225
+ mlxStartupTimeout: parseEnvNumberWithSource('ESCRIBANO_MLX_STARTUP_TIMEOUT', BASE_DEFAULTS.mlxStartupTimeout, sources, 'mlxStartupTimeout'),
226
+ pythonPath: parseEnvStringWithSource('ESCRIBANO_PYTHON_PATH', undefined, sources, 'pythonPath'),
227
+ parallelTranscription: parseEnvBooleanWithSource('ESCRIBANO_PARALLEL_TRANSCRIPTION', BASE_DEFAULTS.parallelTranscription, sources, 'parallelTranscription'),
228
+ artifactThink: parseEnvBooleanWithSource('ESCRIBANO_ARTIFACT_THINK', BASE_DEFAULTS.artifactThink, sources, 'artifactThink'),
185
229
  // === OPTIONAL ===
186
- outlineUrl: process.env.ESCRIBANO_OUTLINE_URL,
187
- outlineToken: process.env.ESCRIBANO_OUTLINE_TOKEN,
188
- outlineCollection: process.env.ESCRIBANO_OUTLINE_COLLECTION ||
189
- DEFAULT_CONFIG.outlineCollection,
230
+ outlineUrl: parseEnvStringWithSource('ESCRIBANO_OUTLINE_URL', undefined, sources, 'outlineUrl'),
231
+ outlineToken: parseEnvStringWithSource('ESCRIBANO_OUTLINE_TOKEN', undefined, sources, 'outlineToken'),
232
+ outlineCollection: parseEnvStringWithSource('ESCRIBANO_OUTLINE_COLLECTION', BASE_DEFAULTS.outlineCollection, sources, 'outlineCollection'),
190
233
  };
191
- // 3. Validate with Zod
234
+ // 4. Validate with Zod
192
235
  const validated = configSchema.parse(config);
193
236
  cachedConfig = validated;
237
+ cachedSources = sources;
194
238
  return validated;
195
239
  }
240
+ export function getConfigSources() {
241
+ return cachedSources;
242
+ }
243
+ export function getRamInfo() {
244
+ const ramGB = getSystemRamGB();
245
+ const ramTier = getRamTier(ramGB);
246
+ return { ramGB, tier: ramTier.tier };
247
+ }
196
248
  // =============================================================================
197
249
  // HELPERS
198
250
  // =============================================================================
199
- function parseEnvNumber(key, defaultValue) {
251
+ function parseEnvNumberWithSource(key, defaultValue, sources, configKey) {
200
252
  const value = process.env[key];
201
- if (!value)
253
+ if (value === undefined) {
254
+ const isRamAware = configKey === 'frameWidth';
255
+ sources.push({
256
+ key: configKey,
257
+ source: isRamAware ? 'ram-aware' : 'default',
258
+ });
202
259
  return defaultValue;
260
+ }
203
261
  const parsed = Number(value);
204
262
  if (Number.isNaN(parsed)) {
205
263
  console.warn(`Invalid ${key}="${value}", using default: ${defaultValue}`);
264
+ sources.push({ key: configKey, source: 'default' });
206
265
  return defaultValue;
207
266
  }
267
+ sources.push({ key: configKey, source: 'env' });
208
268
  return parsed;
209
269
  }
210
- function parseEnvBoolean(key, defaultValue) {
270
+ function parseEnvStringWithSource(key, defaultValue, sources, configKey) {
211
271
  const value = process.env[key];
212
- if (!value)
272
+ if (value === undefined) {
273
+ sources.push({ key: configKey, source: 'default' });
213
274
  return defaultValue;
275
+ }
276
+ sources.push({ key: configKey, source: 'env' });
277
+ return value;
278
+ }
279
+ function parseEnvBooleanWithSource(key, defaultValue, sources, configKey) {
280
+ const value = process.env[key];
281
+ if (value === undefined) {
282
+ sources.push({ key: configKey, source: 'default' });
283
+ return defaultValue;
284
+ }
285
+ sources.push({ key: configKey, source: 'env' });
214
286
  return value === 'true';
215
287
  }
216
288
  // =============================================================================
289
+ // LOGGING
290
+ // =============================================================================
291
+ export function logConfig() {
292
+ const config = loadConfig();
293
+ const { ramGB, tier } = getRamInfo();
294
+ const sources = getConfigSources();
295
+ const userSetKeys = sources.filter((s) => s.source === 'env');
296
+ // Compact one-liner per category
297
+ const perf = `frameWidth=${config.frameWidth} vlmBatchSize=${config.vlmBatchSize} sampleInterval=${config.sampleInterval}`;
298
+ const quality = `sceneThreshold=${config.sceneThreshold} vlmMaxTokens=${config.vlmMaxTokens}`;
299
+ const models = `vlmModel=${config.vlmModel.split('/').pop()} llmModel=${config.llmModel || 'auto'}`;
300
+ // Show dev mode indicator if applicable
301
+ if (isDevelopmentMode()) {
302
+ console.log('[Config] Mode: development (using project .env)');
303
+ }
304
+ console.log(`[Config] RAM: ${ramGB}GB (${tier})`);
305
+ console.log(`[Config] Performance: ${perf}`);
306
+ console.log(`[Config] Quality: ${quality}`);
307
+ console.log(`[Config] Models: ${models}`);
308
+ if (userSetKeys.length > 0) {
309
+ console.log(`[Config] User overrides: ${userSetKeys.map((s) => s.key).join(', ')}`);
310
+ }
311
+ }
312
+ // =============================================================================
217
313
  // CLI UTILITIES
218
314
  // =============================================================================
219
315
  export function showConfig() {
220
316
  const configPath = getConfigPath();
221
- // Create config file if it doesn't exist
317
+ // In dev mode, show that we're using project .env instead
318
+ if (isDevelopmentMode()) {
319
+ console.log('Development mode: Using project .env (not ~/.escribano/.env)\n');
320
+ console.log('Current configuration:');
321
+ const config = loadConfig();
322
+ console.log(JSON.stringify(config, null, 2));
323
+ return;
324
+ }
325
+ // Create config file if it doesn't exist (production mode)
222
326
  if (!existsSync(configPath)) {
223
327
  createDefaultConfig();
224
328
  }
225
329
  const config = loadConfig();
226
- console.log(`Config file: ${configPath}\n`);
330
+ const { ramGB, tier } = getRamInfo();
331
+ console.log(`Config file: ${configPath}`);
332
+ console.log(`System RAM: ${ramGB}GB (${tier} tier)\n`);
227
333
  console.log('Current configuration:');
228
334
  console.log(JSON.stringify(config, null, 2));
229
335
  }