npm - escribano - Versions diffs - 0.4.4 → 0.5.0 - Mend

escribano 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +46 -26
package/dist/0_types.js +1 -1
package/dist/actions/generate-artifact-v3.js +5 -3
package/dist/actions/generate-summary-v3.js +81 -13
package/dist/adapters/intelligence.mlx.adapter.js +271 -197
package/dist/adapters/intelligence.ollama.adapter.js +37 -0
package/dist/batch-context.js +119 -33
package/dist/config.js +168 -62
package/dist/db/repositories/subject.sqlite.js +1 -1
package/dist/python-utils.js +28 -10
package/dist/services/subject-grouping.js +36 -9
package/dist/tests/index.test.js +25 -12
package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
package/dist/tests/utils/env-logger.test.js +6 -6
package/dist/utils/model-detector.js +105 -2
package/migrations/010_llm_backend_metadata.sql +25 -0
package/migrations/011_llm_debug_log.sql +19 -0
package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
package/package.json +1 -1
package/scripts/mlx_bridge.py +578 -78

package/dist/adapters/intelligence.ollama.adapter.js CHANGED Viewed

@@ -132,6 +132,43 @@ async function doModelWarmup(modelName, config) {
         warmedModels.add(modelName); // Mark as warmed to avoid repeated attempts
     }
 }
+/**
+ * Unload an Ollama model from memory.
+ * Uses keep_alive: 0 to tell Ollama to release the model immediately.
+ */
+export async function unloadOllamaModel(modelName, config) {
+    try {
+        debugLog(`Unloading model: ${modelName}...`);
+        const response = await fetch(`${config.endpoint.replace('/chat', '').replace('/generate', '')}/generate`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                model: modelName,
+                prompt: '',
+                keep_alive: 0, // Unload immediately
+            }),
+        });
+        if (response.ok) {
+            warmedModels.delete(modelName);
+            debugLog(`Model ${modelName} unloaded.`);
+        }
+        else {
+            let bodyText = '';
+            try {
+                bodyText = await response.text();
+            }
+            catch {
+                // Ignore errors while reading response body for logging
+            }
+            debugLog(`Failed to unload model ${modelName}: HTTP ${response.status} ${response.statusText}` +
+                (bodyText ? ` - Response body: ${bodyText}` : ''));
+        }
+    }
+    catch (error) {
+        // Unload is best-effort - don't throw
+        debugLog(`Failed to unload model ${modelName}: ${error.message}`);
+    }
+}
 async function checkOllamaHealth() {
     try {
         const response = await fetch('http://localhost:11434/api/tags');

package/dist/batch-context.js CHANGED Viewed

@@ -21,15 +21,15 @@ import { hasContentChanged, publishSummaryV3, updateRecordingOutlineMetadata, }
 import { createSileroPreprocessor } from './adapters/audio.silero.adapter.js';
 import { createFilesystemCaptureSource } from './adapters/capture.filesystem.adapter.js';
 import { cleanupMlxBridge, createMlxIntelligenceService, } from './adapters/intelligence.mlx.adapter.js';
-import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.adapter.js';
+import { createOllamaIntelligenceService, unloadOllamaModel, } from './adapters/intelligence.ollama.adapter.js';
 import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
 import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
 import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
-import { createDefaultConfig } from './config.js';
+import { createDefaultConfig, loadConfig, logConfig } from './config.js';
 import { getDbPath, getRepositories } from './db/index.js';
 import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
 import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
-import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
+import { formatModelSelection, selectBestLLMModel, selectBestMLXModel, } from './utils/model-detector.js';
 const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
 const MODEL_FILE = 'ggml-large-v3.bin';
 const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
@@ -40,21 +40,34 @@ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
 export async function initializeSystem() {
     // Create default config file if it doesn't exist
     createDefaultConfig();
+    // Load and log unified configuration
+    const config = loadConfig();
+    logConfig();
+    console.log('');
     console.log('Initializing database...');
     const repos = getRepositories();
     console.log(`Database ready: ${getDbPath()}`);
     console.log('');
     // Setup stats observer to capture pipeline events
     setupStatsObserver(repos.stats);
-    // Detect best LLM model
-    const modelSelection = await selectBestLLMModel();
-    console.log(formatModelSelection(modelSelection));
-    console.log('');
-    // Initialize adapters ONCE
-    console.log('[VLM] Using MLX-VLM for image processing');
-    const vlm = createMlxIntelligenceService();
-    console.log('[LLM] Using Ollama for text generation');
-    const llm = createOllamaIntelligenceService();
+    // Detect best LLM model based on configured backend
+    let llm;
+    let mlxService = null;
+    if (config.llmBackend === 'mlx') {
+        console.log('[LLM] Using MLX for text generation');
+        const mlxModelSelection = await selectBestMLXModel();
+        console.log(formatModelSelection(mlxModelSelection));
+        console.log('');
+        mlxService = createMlxIntelligenceService();
+        llm = mlxService;
+    }
+    else {
+        console.log('[LLM] Using Ollama for text generation');
+        const ollamaModelSelection = await selectBestLLMModel();
+        console.log(formatModelSelection(ollamaModelSelection));
+        console.log('');
+        llm = createOllamaIntelligenceService();
+    }
     const video = createFfmpegVideoService();
     const preprocessor = createSileroPreprocessor();
     const transcription = createWhisperTranscriptionService({
@@ -63,32 +76,42 @@ export async function initializeSystem() {
         cwd: MODELS_DIR,
         outputFormat: 'json',
     });
-    // Setup resource tracking
     const resourceTracker = new ResourceTracker();
-    resourceTracker.register(vlm);
     resourceTracker.register(video);
     resourceTracker.register(preprocessor);
-    // Ollama runs as a daemon - special case
-    resourceTracker.register({
-        getResourceName: () => 'ollama',
-        getPid: () => {
-            try {
-                const output = execSync('pgrep -f "ollama serve"').toString().trim();
-                const pid = parseInt(output.split('\n')[0] ?? '0', 10);
-                return pid > 0 ? pid : null;
-            }
-            catch {
-                return null;
-            }
-        },
-    });
+    if (config.llmBackend === 'ollama') {
+        resourceTracker.register({
+            getResourceName: () => 'ollama',
+            getPid: () => {
+                try {
+                    const output = execSync('pgrep -f "ollama serve"').toString().trim();
+                    const pid = parseInt(output.split('\n')[0] ?? '0', 10);
+                    return pid > 0 ? pid : null;
+                }
+                catch {
+                    return null;
+                }
+            },
+        });
+    }
+    else if (mlxService) {
+        resourceTracker.register(mlxService);
+    }
     setResourceTracker(resourceTracker);
     const outlineConfig = getOutlineConfig();
     return {
         repos,
-        adapters: { vlm, llm, video, preprocessor, transcription },
+        adapters: {
+            vlm: null,
+            llm,
+            video,
+            preprocessor,
+            transcription,
+        },
         resourceTracker,
         outlineConfig,
+        config,
+        llmBackend: config.llmBackend,
     };
 }
 /**
@@ -101,7 +124,9 @@ export async function processVideo(videoPath, ctx, options = {}) {
     const startTime = Date.now();
     const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
     const { repos, adapters, outlineConfig } = ctx;
-    const { vlm, llm, video, preprocessor, transcription } = adapters;
+    const { llm, video, preprocessor, transcription } = adapters;
+    // Load unified config for lifecycle management
+    const config = loadConfig();
     try {
         // Create capture source for this specific file
         // Note: Hardcoded to filesystem source, not Cap recordings
@@ -151,25 +176,60 @@ export async function processVideo(videoPath, ctx, options = {}) {
         const skipProcessing = dbRec &&
             (dbRec.status === 'processed' || dbRec.status === 'published') &&
             !force;
+        // Create VLM adapter lazily (only if needed)
+        let vlm = null;
+        if (!skipProcessing) {
+            // Reuse the same MLX service instance for VLM (unified adapter handles both)
+            // Check if LLM is MLX backend - if so, it's already a unified VLM+LLM service
+            if (ctx.config.llmBackend === 'mlx' && llm) {
+                vlm = llm;
+            }
+            else {
+                console.log('[VLM] Initializing MLX-VLM for frame analysis...');
+                vlm = createMlxIntelligenceService();
+                ctx.resourceTracker.register(vlm);
+            }
+            ctx.adapters.vlm = vlm;
+        }
         if (!skipProcessing) {
             const runType = force
                 ? 'force'
                 : dbRec?.processing_step
                     ? 'resume'
                     : 'initial';
-            const runMetadata = collectRunMetadata(ctx.resourceTracker);
+            const runMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
             await withPipeline(recording.id, runType, runMetadata, async () => {
+                if (!vlm)
+                    throw new Error('[VLM] Internal error: VLM adapter expected but not initialized');
                 await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
             });
+            // Clean up VLM bridge after processing to free memory for LLM
+            if (vlm) {
+                console.log('[VLM] Unloading VLM model to free memory...');
+                await vlm.unloadVlm?.();
+                // Note: We don't kill the bridge process here, just unload the model
+                // The bridge process will be reused for subsequent recordings if needed
+            }
         }
         // Generate artifact and publish (unless skipped), tracked as a pipeline run
         let artifact = null;
         let outlineUrl;
         if (!skipSummary) {
-            const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker);
+            // Guard: Ensure VLM is unloaded before LLM generation to prevent memory contention
+            if (ctx.adapters.vlm) {
+                console.log('[VLM] Warning: VLM bridge still loaded during artifact generation');
+                console.log('[VLM] Unloading to prevent memory contention with LLM...');
+                if ('unloadVlm' in ctx.adapters.vlm && ctx.adapters.vlm.unloadVlm) {
+                    await ctx.adapters.vlm.unloadVlm();
+                }
+                ctx.adapters.vlm = null;
+            }
+            const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
             const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
                 console.log(`\nGenerating ${format} artifact...`);
                 let generatedArtifact;
+                // LLM model loading is handled internally by generateText()
+                // No explicit load/unload calls needed here
                 if (format === 'narrative') {
                     // Route narrative through the corrected path
                     generatedArtifact = await generateSummaryV3(recording.id, repos, llm, {
@@ -272,6 +332,31 @@ export async function processVideo(videoPath, ctx, options = {}) {
             });
             artifact = pipelineResult.artifact;
             outlineUrl = pipelineResult.outlineUrl;
+            // Unload LLM after artifact generation to free memory (good hygiene for all RAM tiers)
+            if (config.llmModel) {
+                console.log('[LLM] Unloading model to free memory...');
+                const intelConfig = {
+                    provider: 'ollama',
+                    endpoint: 'http://localhost:11434/api/chat',
+                    model: config.llmModel,
+                    generationModel: config.llmModel,
+                    visionModel: config.vlmModel,
+                    maxRetries: 3,
+                    timeout: 600000,
+                    keepAlive: '10m',
+                    maxContextSize: 131072,
+                    embedding: { model: 'nomic-embed-text', similarityThreshold: 0.75 },
+                    vlmBatchSize: config.vlmBatchSize,
+                    vlmMaxTokens: config.vlmMaxTokens,
+                    mlxSocketPath: config.mlxSocketPath,
+                };
+                await unloadOllamaModel(config.llmModel, intelConfig);
+            }
+            else if ('unloadLlm' in ctx.adapters.llm &&
+                ctx.adapters.llm.unloadLlm) {
+                console.log('[LLM] Unloading MLX model to free memory...');
+                await ctx.adapters.llm.unloadLlm();
+            }
         }
         console.log('\n✓ Complete!');
         return {
@@ -316,7 +401,7 @@ function getOutlineConfig() {
 /**
  * Collect metadata about the current run.
  */
-function collectRunMetadata(resourceTracker) {
+function collectRunMetadata(resourceTracker, config) {
     let commitHash = 'unknown';
     try {
         commitHash = execSync('git rev-parse --short HEAD', {
@@ -330,6 +415,7 @@ function collectRunMetadata(resourceTracker) {
         vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
             'mlx-community/Qwen3-VL-2B-Instruct-bf16',
         llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
+        llm_backend: config?.llmBackend ?? 'ollama',
         commit_hash: commitHash,
         node_version: process.version,
         platform: process.platform,

package/dist/config.js CHANGED Viewed

@@ -5,12 +5,12 @@
  * 1. CLI arguments
  * 2. Shell environment variables (export ESCRIBANO_*)
  * 3. ~/.escribano/.env file
- * 4. Default values
+ * 4. RAM-aware defaults (based on system memory)
  *
  * Note: Project-level .env is NOT loaded by default (only for development).
  */
 import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
-import { homedir } from 'node:os';
+import { homedir, totalmem } from 'node:os';
 import path from 'node:path';
 import { config as dotenvConfig } from 'dotenv';
 import { z } from 'zod';
@@ -27,19 +27,22 @@ const configSchema = z.object({
     vlmMaxTokens: z.number().int().min(500).max(8000).default(2000),
     // === MODELS ===
     llmModel: z.string().optional(),
+    llmBackend: z.enum(['mlx', 'ollama']).default('mlx'),
+    llmMlxModel: z.string().optional(),
     vlmModel: z.string().default('mlx-community/Qwen3-VL-2B-Instruct-4bit'),
     subjectGroupingModel: z.string().optional(),
     // === DEBUGGING ===
     verbose: z.boolean().default(false),
     debugOllama: z.boolean().default(false),
     debugVlm: z.boolean().default(false),
+    debugLlm: z.boolean().default(false),
     skipLlm: z.boolean().default(false),
     // === ADVANCED ===
     sceneMinInterval: z.number().int().min(1).max(10).default(2),
     sampleGapThreshold: z.number().int().min(5).max(60).default(15),
     sampleGapFill: z.number().int().min(1).max(10).default(3),
     mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
-    mlxStartupTimeout: z.number().int().min(10000).default(60000),
+    mlxStartupTimeout: z.number().int().min(10000).default(120000),
     pythonPath: z.string().optional(),
     parallelTranscription: z.boolean().default(false),
     artifactThink: z.boolean().default(false),
@@ -49,24 +52,41 @@ const configSchema = z.object({
     outlineCollection: z.string().default('Escribano Sessions'),
 });
 // =============================================================================
+// RAM DETECTION
+// =============================================================================
+function getSystemRamGB() {
+    return Math.round(totalmem() / (1024 * 1024 * 1024));
+}
+function getRamTier(ramGB) {
+    if (ramGB >= 32) {
+        return { tier: 'high', frameWidth: 1024 };
+    }
+    if (ramGB >= 16) {
+        return { tier: 'medium', frameWidth: 1024 };
+    }
+    return { tier: 'low', frameWidth: 768 };
+}
+// =============================================================================
 // DEFAULT CONFIG
 // =============================================================================
-const DEFAULT_CONFIG = {
+const BASE_DEFAULTS = {
     frameWidth: 1024,
     vlmBatchSize: 2,
     sampleInterval: 10,
     sceneThreshold: 0.4,
     vlmMaxTokens: 2000,
+    llmBackend: 'mlx',
     vlmModel: 'mlx-community/Qwen3-VL-2B-Instruct-4bit',
     verbose: false,
     debugOllama: false,
     debugVlm: false,
+    debugLlm: false,
     skipLlm: false,
     sceneMinInterval: 2,
     sampleGapThreshold: 15,
     sampleGapFill: 3,
     mlxSocketPath: '/tmp/escribano-mlx.sock',
-    mlxStartupTimeout: 60000,
+    mlxStartupTimeout: 120000,
     parallelTranscription: false,
     artifactThink: false,
     outlineCollection: 'Escribano Sessions',
@@ -79,30 +99,33 @@ const CONFIG_TEMPLATE = `# Escribano Configuration - ~/.escribano/.env
 # Full reference: https://github.com/eduardosanzb/escribano#configuration
 # === PERFORMANCE ===
-ESCRIBANO_FRAME_WIDTH=1024          # Lower = faster (1920, 1280, 1024, 640)
-ESCRIBANO_VLM_BATCH_SIZE=2          # 1-4 frames (lower = more reliable)
-ESCRIBANO_SAMPLE_INTERVAL=10        # Base frame sampling (seconds)
+# ESCRIBANO_FRAME_WIDTH=1024          # Auto-adjusted based on RAM (1024 for 16GB+, 768 for <16GB)
+# ESCRIBANO_VLM_BATCH_SIZE=2          # 1-4 frames (lower = more reliable)
+ESCRIBANO_SAMPLE_INTERVAL=10          # Base frame sampling (seconds)
 # === QUALITY ===
-ESCRIBANO_SCENE_THRESHOLD=0.4       # Scene detection sensitivity (0.0-1.0)
-ESCRIBANO_VLM_MAX_TOKENS=2000       # Token budget per batch
+ESCRIBANO_SCENE_THRESHOLD=0.4         # Scene detection sensitivity (0.0-1.0)
+ESCRIBANO_VLM_MAX_TOKENS=2000         # Token budget per batch
 # === MODELS ===
-# ESCRIBANO_LLM_MODEL=qwen3.5:27b   # Summary generation (auto-detected if not set)
+# ESCRIBANO_LLM_BACKEND=mlx             # LLM backend: 'mlx' (default) or 'ollama'
+# ESCRIBANO_LLM_MODEL=qwen3.5:27b       # Ollama model (only used if llmBackend='ollama')
+# ESCRIBANO_LLM_MLX_MODEL=              # MLX model (only used if llmBackend='mlx', auto-detected if not set)
 ESCRIBANO_VLM_MODEL=mlx-community/Qwen3-VL-2B-Instruct-4bit
 # === DEBUGGING ===
-ESCRIBANO_VERBOSE=false              # Enable verbose logging
-ESCRIBANO_DEBUG_VLM=false           # Debug VLM processing
+ESCRIBANO_VERBOSE=false               # Enable verbose logging
+ESCRIBANO_DEBUG_VLM=false             # Debug VLM processing
+ESCRIBANO_DEBUG_LLM=false             # Log all LLM calls to debug table
 # === ADVANCED ===
 ESCRIBANO_SCENE_MIN_INTERVAL=2
 ESCRIBANO_SAMPLE_GAP_THRESHOLD=15
 ESCRIBANO_SAMPLE_GAP_FILL=3
 ESCRIBANO_MLX_SOCKET_PATH=/tmp/escribano-mlx.sock
-ESCRIBANO_MLX_STARTUP_TIMEOUT=60000
-# ESCRIBANO_PYTHON_PATH=             # Auto-detected if not set
-ESCRIBANO_ARTIFACT_THINK=false      # Enable thinking for artifacts (slower)
+ESCRIBANO_MLX_STARTUP_TIMEOUT=120000
+# ESCRIBANO_PYTHON_PATH=              # Auto-detected if not set
+ESCRIBANO_ARTIFACT_THINK=false        # Enable thinking for artifacts (slower)
 # === OPTIONAL (Outline publishing) ===
 # ESCRIBANO_OUTLINE_URL=
@@ -113,6 +136,7 @@ ESCRIBANO_ARTIFACT_THINK=false      # Enable thinking for artifacts (slower)
 // CONFIG LOADER
 // =============================================================================
 let cachedConfig = null;
+let cachedSources = [];
 export function getConfigPath() {
     return path.join(homedir(), '.escribano', '.env');
 }
@@ -133,97 +157,179 @@ export function createDefaultConfig() {
         console.error(`Failed to create config file at ${configPath}: ${error.message}`);
     }
 }
+/**
+ * Check if running in development mode.
+ * Development mode = running via tsx from source (src/index.ts)
+ * Production mode = running compiled code (dist/index.js)
+ */
+function isDevelopmentMode() {
+    // Check if running from src directory via tsx
+    const currentFile = import.meta.url;
+    return currentFile.includes('/src/');
+}
 export function loadConfig() {
     if (cachedConfig) {
         return cachedConfig;
     }
-    // 1. Load from config file (if exists)
-    const configPath = getConfigPath();
-    if (existsSync(configPath)) {
-        try {
-            const result = dotenvConfig({ path: configPath });
-            if (result.error) {
-                console.error(`Failed to parse config file ${configPath}: ${result.error.message}`);
-                console.error('Using default configuration.');
+    const sources = [];
+    // 1. Load from user config file (PRODUCTION MODE ONLY)
+    // In development mode, we use project .env via tsx --env-file flag
+    if (!isDevelopmentMode()) {
+        const configPath = getConfigPath();
+        if (existsSync(configPath)) {
+            try {
+                const result = dotenvConfig({ path: configPath });
+                if (result.error) {
+                    console.error(`Failed to parse config file ${configPath}: ${result.error.message}`);
+                    console.error('Using default configuration.');
+                }
+                else if (result.parsed && Object.keys(result.parsed).length > 0) {
+                    console.log(`Loaded config from ${configPath}`);
+                }
             }
-            else if (result.parsed && Object.keys(result.parsed).length > 0) {
-                console.log(`Loaded config from ${configPath}`);
+            catch (error) {
+                console.error(`Error reading config file ${configPath}: ${error.message}`);
+                console.error('Using default configuration.');
             }
         }
-        catch (error) {
-            console.error(`Error reading config file ${configPath}: ${error.message}`);
-            console.error('Using default configuration.');
-        }
     }
-    // 2. Build config from environment variables
+    // 2. Get RAM-aware defaults
+    const ramGB = getSystemRamGB();
+    const ramTier = getRamTier(ramGB);
+    // 3. Build config with source tracking
     const config = {
         // === PERFORMANCE ===
-        frameWidth: parseEnvNumber('ESCRIBANO_FRAME_WIDTH', DEFAULT_CONFIG.frameWidth),
-        vlmBatchSize: parseEnvNumber('ESCRIBANO_VLM_BATCH_SIZE', DEFAULT_CONFIG.vlmBatchSize),
-        sampleInterval: parseEnvNumber('ESCRIBANO_SAMPLE_INTERVAL', DEFAULT_CONFIG.sampleInterval),
+        frameWidth: parseEnvNumberWithSource('ESCRIBANO_FRAME_WIDTH', ramTier.frameWidth, sources, 'frameWidth'),
+        vlmBatchSize: parseEnvNumberWithSource('ESCRIBANO_VLM_BATCH_SIZE', BASE_DEFAULTS.vlmBatchSize, sources, 'vlmBatchSize'),
+        sampleInterval: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_INTERVAL', BASE_DEFAULTS.sampleInterval, sources, 'sampleInterval'),
         // === QUALITY ===
-        sceneThreshold: parseEnvNumber('ESCRIBANO_SCENE_THRESHOLD', DEFAULT_CONFIG.sceneThreshold),
-        vlmMaxTokens: parseEnvNumber('ESCRIBANO_VLM_MAX_TOKENS', DEFAULT_CONFIG.vlmMaxTokens),
+        sceneThreshold: parseEnvNumberWithSource('ESCRIBANO_SCENE_THRESHOLD', BASE_DEFAULTS.sceneThreshold, sources, 'sceneThreshold'),
+        vlmMaxTokens: parseEnvNumberWithSource('ESCRIBANO_VLM_MAX_TOKENS', BASE_DEFAULTS.vlmMaxTokens, sources, 'vlmMaxTokens'),
         // === MODELS ===
-        llmModel: process.env.ESCRIBANO_LLM_MODEL,
-        vlmModel: process.env.ESCRIBANO_VLM_MODEL || DEFAULT_CONFIG.vlmModel,
-        subjectGroupingModel: process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL,
+        llmModel: parseEnvStringWithSource('ESCRIBANO_LLM_MODEL', undefined, sources, 'llmModel'),
+        llmBackend: (parseEnvStringWithSource('ESCRIBANO_LLM_BACKEND', BASE_DEFAULTS.llmBackend, sources, 'llmBackend') ?? 'mlx'),
+        llmMlxModel: parseEnvStringWithSource('ESCRIBANO_LLM_MLX_MODEL', undefined, sources, 'llmMlxModel'),
+        vlmModel: parseEnvStringWithSource('ESCRIBANO_VLM_MODEL', BASE_DEFAULTS.vlmModel, sources, 'vlmModel'),
+        subjectGroupingModel: parseEnvStringWithSource('ESCRIBANO_SUBJECT_GROUPING_MODEL', undefined, sources, 'subjectGroupingModel'),
         // === DEBUGGING ===
-        verbose: parseEnvBoolean('ESCRIBANO_VERBOSE', DEFAULT_CONFIG.verbose),
-        debugOllama: parseEnvBoolean('ESCRIBANO_DEBUG_OLLAMA', DEFAULT_CONFIG.debugOllama),
-        debugVlm: parseEnvBoolean('ESCRIBANO_DEBUG_VLM', DEFAULT_CONFIG.debugVlm),
-        skipLlm: parseEnvBoolean('ESCRIBANO_SKIP_LLM', DEFAULT_CONFIG.skipLlm),
+        verbose: parseEnvBooleanWithSource('ESCRIBANO_VERBOSE', BASE_DEFAULTS.verbose, sources, 'verbose'),
+        debugOllama: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_OLLAMA', BASE_DEFAULTS.debugOllama, sources, 'debugOllama'),
+        debugVlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_VLM', BASE_DEFAULTS.debugVlm, sources, 'debugVlm'),
+        debugLlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_LLM', BASE_DEFAULTS.debugLlm, sources, 'debugLlm'),
+        skipLlm: parseEnvBooleanWithSource('ESCRIBANO_SKIP_LLM', BASE_DEFAULTS.skipLlm, sources, 'skipLlm'),
         // === ADVANCED ===
-        sceneMinInterval: parseEnvNumber('ESCRIBANO_SCENE_MIN_INTERVAL', DEFAULT_CONFIG.sceneMinInterval),
-        sampleGapThreshold: parseEnvNumber('ESCRIBANO_SAMPLE_GAP_THRESHOLD', DEFAULT_CONFIG.sampleGapThreshold),
-        sampleGapFill: parseEnvNumber('ESCRIBANO_SAMPLE_GAP_FILL', DEFAULT_CONFIG.sampleGapFill),
-        mlxSocketPath: process.env.ESCRIBANO_MLX_SOCKET_PATH || DEFAULT_CONFIG.mlxSocketPath,
-        mlxStartupTimeout: parseEnvNumber('ESCRIBANO_MLX_STARTUP_TIMEOUT', DEFAULT_CONFIG.mlxStartupTimeout),
-        pythonPath: process.env.ESCRIBANO_PYTHON_PATH,
-        parallelTranscription: parseEnvBoolean('ESCRIBANO_PARALLEL_TRANSCRIPTION', DEFAULT_CONFIG.parallelTranscription),
-        artifactThink: parseEnvBoolean('ESCRIBANO_ARTIFACT_THINK', DEFAULT_CONFIG.artifactThink),
+        sceneMinInterval: parseEnvNumberWithSource('ESCRIBANO_SCENE_MIN_INTERVAL', BASE_DEFAULTS.sceneMinInterval, sources, 'sceneMinInterval'),
+        sampleGapThreshold: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_THRESHOLD', BASE_DEFAULTS.sampleGapThreshold, sources, 'sampleGapThreshold'),
+        sampleGapFill: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_FILL', BASE_DEFAULTS.sampleGapFill, sources, 'sampleGapFill'),
+        mlxSocketPath: parseEnvStringWithSource('ESCRIBANO_MLX_SOCKET_PATH', BASE_DEFAULTS.mlxSocketPath, sources, 'mlxSocketPath'),
+        mlxStartupTimeout: parseEnvNumberWithSource('ESCRIBANO_MLX_STARTUP_TIMEOUT', BASE_DEFAULTS.mlxStartupTimeout, sources, 'mlxStartupTimeout'),
+        pythonPath: parseEnvStringWithSource('ESCRIBANO_PYTHON_PATH', undefined, sources, 'pythonPath'),
+        parallelTranscription: parseEnvBooleanWithSource('ESCRIBANO_PARALLEL_TRANSCRIPTION', BASE_DEFAULTS.parallelTranscription, sources, 'parallelTranscription'),
+        artifactThink: parseEnvBooleanWithSource('ESCRIBANO_ARTIFACT_THINK', BASE_DEFAULTS.artifactThink, sources, 'artifactThink'),
         // === OPTIONAL ===
-        outlineUrl: process.env.ESCRIBANO_OUTLINE_URL,
-        outlineToken: process.env.ESCRIBANO_OUTLINE_TOKEN,
-        outlineCollection: process.env.ESCRIBANO_OUTLINE_COLLECTION ||
-            DEFAULT_CONFIG.outlineCollection,
+        outlineUrl: parseEnvStringWithSource('ESCRIBANO_OUTLINE_URL', undefined, sources, 'outlineUrl'),
+        outlineToken: parseEnvStringWithSource('ESCRIBANO_OUTLINE_TOKEN', undefined, sources, 'outlineToken'),
+        outlineCollection: parseEnvStringWithSource('ESCRIBANO_OUTLINE_COLLECTION', BASE_DEFAULTS.outlineCollection, sources, 'outlineCollection'),
     };
-    // 3. Validate with Zod
+    // 4. Validate with Zod
     const validated = configSchema.parse(config);
     cachedConfig = validated;
+    cachedSources = sources;
     return validated;
 }
+export function getConfigSources() {
+    return cachedSources;
+}
+export function getRamInfo() {
+    const ramGB = getSystemRamGB();
+    const ramTier = getRamTier(ramGB);
+    return { ramGB, tier: ramTier.tier };
+}
 // =============================================================================
 // HELPERS
 // =============================================================================
-function parseEnvNumber(key, defaultValue) {
+function parseEnvNumberWithSource(key, defaultValue, sources, configKey) {
     const value = process.env[key];
-    if (!value)
+    if (value === undefined) {
+        const isRamAware = configKey === 'frameWidth';
+        sources.push({
+            key: configKey,
+            source: isRamAware ? 'ram-aware' : 'default',
+        });
         return defaultValue;
+    }
     const parsed = Number(value);
     if (Number.isNaN(parsed)) {
         console.warn(`Invalid ${key}="${value}", using default: ${defaultValue}`);
+        sources.push({ key: configKey, source: 'default' });
         return defaultValue;
     }
+    sources.push({ key: configKey, source: 'env' });
     return parsed;
 }
-function parseEnvBoolean(key, defaultValue) {
+function parseEnvStringWithSource(key, defaultValue, sources, configKey) {
     const value = process.env[key];
-    if (!value)
+    if (value === undefined) {
+        sources.push({ key: configKey, source: 'default' });
         return defaultValue;
+    }
+    sources.push({ key: configKey, source: 'env' });
+    return value;
+}
+function parseEnvBooleanWithSource(key, defaultValue, sources, configKey) {
+    const value = process.env[key];
+    if (value === undefined) {
+        sources.push({ key: configKey, source: 'default' });
+        return defaultValue;
+    }
+    sources.push({ key: configKey, source: 'env' });
     return value === 'true';
 }
 // =============================================================================
+// LOGGING
+// =============================================================================
+export function logConfig() {
+    const config = loadConfig();
+    const { ramGB, tier } = getRamInfo();
+    const sources = getConfigSources();
+    const userSetKeys = sources.filter((s) => s.source === 'env');
+    // Compact one-liner per category
+    const perf = `frameWidth=${config.frameWidth} vlmBatchSize=${config.vlmBatchSize} sampleInterval=${config.sampleInterval}`;
+    const quality = `sceneThreshold=${config.sceneThreshold} vlmMaxTokens=${config.vlmMaxTokens}`;
+    const models = `vlmModel=${config.vlmModel.split('/').pop()} llmModel=${config.llmModel || 'auto'}`;
+    // Show dev mode indicator if applicable
+    if (isDevelopmentMode()) {
+        console.log('[Config] Mode: development (using project .env)');
+    }
+    console.log(`[Config] RAM: ${ramGB}GB (${tier})`);
+    console.log(`[Config] Performance: ${perf}`);
+    console.log(`[Config] Quality: ${quality}`);
+    console.log(`[Config] Models: ${models}`);
+    if (userSetKeys.length > 0) {
+        console.log(`[Config] User overrides: ${userSetKeys.map((s) => s.key).join(', ')}`);
+    }
+}
+// =============================================================================
 // CLI UTILITIES
 // =============================================================================
 export function showConfig() {
     const configPath = getConfigPath();
-    // Create config file if it doesn't exist
+    // In dev mode, show that we're using project .env instead
+    if (isDevelopmentMode()) {
+        console.log('Development mode: Using project .env (not ~/.escribano/.env)\n');
+        console.log('Current configuration:');
+        const config = loadConfig();
+        console.log(JSON.stringify(config, null, 2));
+        return;
+    }
+    // Create config file if it doesn't exist (production mode)
     if (!existsSync(configPath)) {
         createDefaultConfig();
     }
     const config = loadConfig();
-    console.log(`Config file: ${configPath}\n`);
+    const { ramGB, tier } = getRamInfo();
+    console.log(`Config file: ${configPath}`);
+    console.log(`System RAM: ${ramGB}GB (${tier} tier)\n`);
     console.log('Current configuration:');
     console.log(JSON.stringify(config, null, 2));
 }