escribano 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -26
- package/dist/0_types.js +1 -1
- package/dist/actions/generate-artifact-v3.js +5 -3
- package/dist/actions/generate-summary-v3.js +81 -13
- package/dist/adapters/intelligence.mlx.adapter.js +271 -197
- package/dist/adapters/intelligence.ollama.adapter.js +37 -0
- package/dist/batch-context.js +119 -33
- package/dist/config.js +168 -62
- package/dist/db/repositories/subject.sqlite.js +1 -1
- package/dist/python-utils.js +28 -10
- package/dist/services/subject-grouping.js +36 -9
- package/dist/tests/index.test.js +25 -12
- package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
- package/dist/tests/utils/env-logger.test.js +6 -6
- package/dist/utils/model-detector.js +105 -2
- package/migrations/010_llm_backend_metadata.sql +25 -0
- package/migrations/011_llm_debug_log.sql +19 -0
- package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
- package/package.json +1 -1
- package/scripts/mlx_bridge.py +578 -78
|
@@ -132,6 +132,43 @@ async function doModelWarmup(modelName, config) {
|
|
|
132
132
|
warmedModels.add(modelName); // Mark as warmed to avoid repeated attempts
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
|
+
/**
|
|
136
|
+
* Unload an Ollama model from memory.
|
|
137
|
+
* Uses keep_alive: 0 to tell Ollama to release the model immediately.
|
|
138
|
+
*/
|
|
139
|
+
export async function unloadOllamaModel(modelName, config) {
|
|
140
|
+
try {
|
|
141
|
+
debugLog(`Unloading model: ${modelName}...`);
|
|
142
|
+
const response = await fetch(`${config.endpoint.replace('/chat', '').replace('/generate', '')}/generate`, {
|
|
143
|
+
method: 'POST',
|
|
144
|
+
headers: { 'Content-Type': 'application/json' },
|
|
145
|
+
body: JSON.stringify({
|
|
146
|
+
model: modelName,
|
|
147
|
+
prompt: '',
|
|
148
|
+
keep_alive: 0, // Unload immediately
|
|
149
|
+
}),
|
|
150
|
+
});
|
|
151
|
+
if (response.ok) {
|
|
152
|
+
warmedModels.delete(modelName);
|
|
153
|
+
debugLog(`Model ${modelName} unloaded.`);
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
let bodyText = '';
|
|
157
|
+
try {
|
|
158
|
+
bodyText = await response.text();
|
|
159
|
+
}
|
|
160
|
+
catch {
|
|
161
|
+
// Ignore errors while reading response body for logging
|
|
162
|
+
}
|
|
163
|
+
debugLog(`Failed to unload model ${modelName}: HTTP ${response.status} ${response.statusText}` +
|
|
164
|
+
(bodyText ? ` - Response body: ${bodyText}` : ''));
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch (error) {
|
|
168
|
+
// Unload is best-effort - don't throw
|
|
169
|
+
debugLog(`Failed to unload model ${modelName}: ${error.message}`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
135
172
|
async function checkOllamaHealth() {
|
|
136
173
|
try {
|
|
137
174
|
const response = await fetch('http://localhost:11434/api/tags');
|
package/dist/batch-context.js
CHANGED
|
@@ -21,15 +21,15 @@ import { hasContentChanged, publishSummaryV3, updateRecordingOutlineMetadata, }
|
|
|
21
21
|
import { createSileroPreprocessor } from './adapters/audio.silero.adapter.js';
|
|
22
22
|
import { createFilesystemCaptureSource } from './adapters/capture.filesystem.adapter.js';
|
|
23
23
|
import { cleanupMlxBridge, createMlxIntelligenceService, } from './adapters/intelligence.mlx.adapter.js';
|
|
24
|
-
import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.adapter.js';
|
|
24
|
+
import { createOllamaIntelligenceService, unloadOllamaModel, } from './adapters/intelligence.ollama.adapter.js';
|
|
25
25
|
import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
|
|
26
26
|
import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
|
|
27
27
|
import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
|
|
28
|
-
import { createDefaultConfig } from './config.js';
|
|
28
|
+
import { createDefaultConfig, loadConfig, logConfig } from './config.js';
|
|
29
29
|
import { getDbPath, getRepositories } from './db/index.js';
|
|
30
30
|
import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
|
|
31
31
|
import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
|
|
32
|
-
import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
|
|
32
|
+
import { formatModelSelection, selectBestLLMModel, selectBestMLXModel, } from './utils/model-detector.js';
|
|
33
33
|
const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
|
|
34
34
|
const MODEL_FILE = 'ggml-large-v3.bin';
|
|
35
35
|
const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
|
|
@@ -40,21 +40,34 @@ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
|
|
|
40
40
|
export async function initializeSystem() {
|
|
41
41
|
// Create default config file if it doesn't exist
|
|
42
42
|
createDefaultConfig();
|
|
43
|
+
// Load and log unified configuration
|
|
44
|
+
const config = loadConfig();
|
|
45
|
+
logConfig();
|
|
46
|
+
console.log('');
|
|
43
47
|
console.log('Initializing database...');
|
|
44
48
|
const repos = getRepositories();
|
|
45
49
|
console.log(`Database ready: ${getDbPath()}`);
|
|
46
50
|
console.log('');
|
|
47
51
|
// Setup stats observer to capture pipeline events
|
|
48
52
|
setupStatsObserver(repos.stats);
|
|
49
|
-
// Detect best LLM model
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
53
|
+
// Detect best LLM model based on configured backend
|
|
54
|
+
let llm;
|
|
55
|
+
let mlxService = null;
|
|
56
|
+
if (config.llmBackend === 'mlx') {
|
|
57
|
+
console.log('[LLM] Using MLX for text generation');
|
|
58
|
+
const mlxModelSelection = await selectBestMLXModel();
|
|
59
|
+
console.log(formatModelSelection(mlxModelSelection));
|
|
60
|
+
console.log('');
|
|
61
|
+
mlxService = createMlxIntelligenceService();
|
|
62
|
+
llm = mlxService;
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
console.log('[LLM] Using Ollama for text generation');
|
|
66
|
+
const ollamaModelSelection = await selectBestLLMModel();
|
|
67
|
+
console.log(formatModelSelection(ollamaModelSelection));
|
|
68
|
+
console.log('');
|
|
69
|
+
llm = createOllamaIntelligenceService();
|
|
70
|
+
}
|
|
58
71
|
const video = createFfmpegVideoService();
|
|
59
72
|
const preprocessor = createSileroPreprocessor();
|
|
60
73
|
const transcription = createWhisperTranscriptionService({
|
|
@@ -63,32 +76,42 @@ export async function initializeSystem() {
|
|
|
63
76
|
cwd: MODELS_DIR,
|
|
64
77
|
outputFormat: 'json',
|
|
65
78
|
});
|
|
66
|
-
// Setup resource tracking
|
|
67
79
|
const resourceTracker = new ResourceTracker();
|
|
68
|
-
resourceTracker.register(vlm);
|
|
69
80
|
resourceTracker.register(video);
|
|
70
81
|
resourceTracker.register(preprocessor);
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
if (config.llmBackend === 'ollama') {
|
|
83
|
+
resourceTracker.register({
|
|
84
|
+
getResourceName: () => 'ollama',
|
|
85
|
+
getPid: () => {
|
|
86
|
+
try {
|
|
87
|
+
const output = execSync('pgrep -f "ollama serve"').toString().trim();
|
|
88
|
+
const pid = parseInt(output.split('\n')[0] ?? '0', 10);
|
|
89
|
+
return pid > 0 ? pid : null;
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
else if (mlxService) {
|
|
98
|
+
resourceTracker.register(mlxService);
|
|
99
|
+
}
|
|
85
100
|
setResourceTracker(resourceTracker);
|
|
86
101
|
const outlineConfig = getOutlineConfig();
|
|
87
102
|
return {
|
|
88
103
|
repos,
|
|
89
|
-
adapters: {
|
|
104
|
+
adapters: {
|
|
105
|
+
vlm: null,
|
|
106
|
+
llm,
|
|
107
|
+
video,
|
|
108
|
+
preprocessor,
|
|
109
|
+
transcription,
|
|
110
|
+
},
|
|
90
111
|
resourceTracker,
|
|
91
112
|
outlineConfig,
|
|
113
|
+
config,
|
|
114
|
+
llmBackend: config.llmBackend,
|
|
92
115
|
};
|
|
93
116
|
}
|
|
94
117
|
/**
|
|
@@ -101,7 +124,9 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
101
124
|
const startTime = Date.now();
|
|
102
125
|
const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
|
|
103
126
|
const { repos, adapters, outlineConfig } = ctx;
|
|
104
|
-
const {
|
|
127
|
+
const { llm, video, preprocessor, transcription } = adapters;
|
|
128
|
+
// Load unified config for lifecycle management
|
|
129
|
+
const config = loadConfig();
|
|
105
130
|
try {
|
|
106
131
|
// Create capture source for this specific file
|
|
107
132
|
// Note: Hardcoded to filesystem source, not Cap recordings
|
|
@@ -151,25 +176,60 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
151
176
|
const skipProcessing = dbRec &&
|
|
152
177
|
(dbRec.status === 'processed' || dbRec.status === 'published') &&
|
|
153
178
|
!force;
|
|
179
|
+
// Create VLM adapter lazily (only if needed)
|
|
180
|
+
let vlm = null;
|
|
181
|
+
if (!skipProcessing) {
|
|
182
|
+
// Reuse the same MLX service instance for VLM (unified adapter handles both)
|
|
183
|
+
// Check if LLM is MLX backend - if so, it's already a unified VLM+LLM service
|
|
184
|
+
if (ctx.config.llmBackend === 'mlx' && llm) {
|
|
185
|
+
vlm = llm;
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
console.log('[VLM] Initializing MLX-VLM for frame analysis...');
|
|
189
|
+
vlm = createMlxIntelligenceService();
|
|
190
|
+
ctx.resourceTracker.register(vlm);
|
|
191
|
+
}
|
|
192
|
+
ctx.adapters.vlm = vlm;
|
|
193
|
+
}
|
|
154
194
|
if (!skipProcessing) {
|
|
155
195
|
const runType = force
|
|
156
196
|
? 'force'
|
|
157
197
|
: dbRec?.processing_step
|
|
158
198
|
? 'resume'
|
|
159
199
|
: 'initial';
|
|
160
|
-
const runMetadata = collectRunMetadata(ctx.resourceTracker);
|
|
200
|
+
const runMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
|
|
161
201
|
await withPipeline(recording.id, runType, runMetadata, async () => {
|
|
202
|
+
if (!vlm)
|
|
203
|
+
throw new Error('[VLM] Internal error: VLM adapter expected but not initialized');
|
|
162
204
|
await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
|
|
163
205
|
});
|
|
206
|
+
// Clean up VLM bridge after processing to free memory for LLM
|
|
207
|
+
if (vlm) {
|
|
208
|
+
console.log('[VLM] Unloading VLM model to free memory...');
|
|
209
|
+
await vlm.unloadVlm?.();
|
|
210
|
+
// Note: We don't kill the bridge process here, just unload the model
|
|
211
|
+
// The bridge process will be reused for subsequent recordings if needed
|
|
212
|
+
}
|
|
164
213
|
}
|
|
165
214
|
// Generate artifact and publish (unless skipped), tracked as a pipeline run
|
|
166
215
|
let artifact = null;
|
|
167
216
|
let outlineUrl;
|
|
168
217
|
if (!skipSummary) {
|
|
169
|
-
|
|
218
|
+
// Guard: Ensure VLM is unloaded before LLM generation to prevent memory contention
|
|
219
|
+
if (ctx.adapters.vlm) {
|
|
220
|
+
console.log('[VLM] Warning: VLM bridge still loaded during artifact generation');
|
|
221
|
+
console.log('[VLM] Unloading to prevent memory contention with LLM...');
|
|
222
|
+
if ('unloadVlm' in ctx.adapters.vlm && ctx.adapters.vlm.unloadVlm) {
|
|
223
|
+
await ctx.adapters.vlm.unloadVlm();
|
|
224
|
+
}
|
|
225
|
+
ctx.adapters.vlm = null;
|
|
226
|
+
}
|
|
227
|
+
const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
|
|
170
228
|
const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
|
|
171
229
|
console.log(`\nGenerating ${format} artifact...`);
|
|
172
230
|
let generatedArtifact;
|
|
231
|
+
// LLM model loading is handled internally by generateText()
|
|
232
|
+
// No explicit load/unload calls needed here
|
|
173
233
|
if (format === 'narrative') {
|
|
174
234
|
// Route narrative through the corrected path
|
|
175
235
|
generatedArtifact = await generateSummaryV3(recording.id, repos, llm, {
|
|
@@ -272,6 +332,31 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
272
332
|
});
|
|
273
333
|
artifact = pipelineResult.artifact;
|
|
274
334
|
outlineUrl = pipelineResult.outlineUrl;
|
|
335
|
+
// Unload LLM after artifact generation to free memory (good hygiene for all RAM tiers)
|
|
336
|
+
if (config.llmModel) {
|
|
337
|
+
console.log('[LLM] Unloading model to free memory...');
|
|
338
|
+
const intelConfig = {
|
|
339
|
+
provider: 'ollama',
|
|
340
|
+
endpoint: 'http://localhost:11434/api/chat',
|
|
341
|
+
model: config.llmModel,
|
|
342
|
+
generationModel: config.llmModel,
|
|
343
|
+
visionModel: config.vlmModel,
|
|
344
|
+
maxRetries: 3,
|
|
345
|
+
timeout: 600000,
|
|
346
|
+
keepAlive: '10m',
|
|
347
|
+
maxContextSize: 131072,
|
|
348
|
+
embedding: { model: 'nomic-embed-text', similarityThreshold: 0.75 },
|
|
349
|
+
vlmBatchSize: config.vlmBatchSize,
|
|
350
|
+
vlmMaxTokens: config.vlmMaxTokens,
|
|
351
|
+
mlxSocketPath: config.mlxSocketPath,
|
|
352
|
+
};
|
|
353
|
+
await unloadOllamaModel(config.llmModel, intelConfig);
|
|
354
|
+
}
|
|
355
|
+
else if ('unloadLlm' in ctx.adapters.llm &&
|
|
356
|
+
ctx.adapters.llm.unloadLlm) {
|
|
357
|
+
console.log('[LLM] Unloading MLX model to free memory...');
|
|
358
|
+
await ctx.adapters.llm.unloadLlm();
|
|
359
|
+
}
|
|
275
360
|
}
|
|
276
361
|
console.log('\n✓ Complete!');
|
|
277
362
|
return {
|
|
@@ -316,7 +401,7 @@ function getOutlineConfig() {
|
|
|
316
401
|
/**
|
|
317
402
|
* Collect metadata about the current run.
|
|
318
403
|
*/
|
|
319
|
-
function collectRunMetadata(resourceTracker) {
|
|
404
|
+
function collectRunMetadata(resourceTracker, config) {
|
|
320
405
|
let commitHash = 'unknown';
|
|
321
406
|
try {
|
|
322
407
|
commitHash = execSync('git rev-parse --short HEAD', {
|
|
@@ -330,6 +415,7 @@ function collectRunMetadata(resourceTracker) {
|
|
|
330
415
|
vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
|
|
331
416
|
'mlx-community/Qwen3-VL-2B-Instruct-bf16',
|
|
332
417
|
llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
|
|
418
|
+
llm_backend: config?.llmBackend ?? 'ollama',
|
|
333
419
|
commit_hash: commitHash,
|
|
334
420
|
node_version: process.version,
|
|
335
421
|
platform: process.platform,
|
package/dist/config.js
CHANGED
|
@@ -5,12 +5,12 @@
|
|
|
5
5
|
* 1. CLI arguments
|
|
6
6
|
* 2. Shell environment variables (export ESCRIBANO_*)
|
|
7
7
|
* 3. ~/.escribano/.env file
|
|
8
|
-
* 4.
|
|
8
|
+
* 4. RAM-aware defaults (based on system memory)
|
|
9
9
|
*
|
|
10
10
|
* Note: Project-level .env is NOT loaded by default (only for development).
|
|
11
11
|
*/
|
|
12
12
|
import { existsSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
13
|
-
import { homedir } from 'node:os';
|
|
13
|
+
import { homedir, totalmem } from 'node:os';
|
|
14
14
|
import path from 'node:path';
|
|
15
15
|
import { config as dotenvConfig } from 'dotenv';
|
|
16
16
|
import { z } from 'zod';
|
|
@@ -27,19 +27,22 @@ const configSchema = z.object({
|
|
|
27
27
|
vlmMaxTokens: z.number().int().min(500).max(8000).default(2000),
|
|
28
28
|
// === MODELS ===
|
|
29
29
|
llmModel: z.string().optional(),
|
|
30
|
+
llmBackend: z.enum(['mlx', 'ollama']).default('mlx'),
|
|
31
|
+
llmMlxModel: z.string().optional(),
|
|
30
32
|
vlmModel: z.string().default('mlx-community/Qwen3-VL-2B-Instruct-4bit'),
|
|
31
33
|
subjectGroupingModel: z.string().optional(),
|
|
32
34
|
// === DEBUGGING ===
|
|
33
35
|
verbose: z.boolean().default(false),
|
|
34
36
|
debugOllama: z.boolean().default(false),
|
|
35
37
|
debugVlm: z.boolean().default(false),
|
|
38
|
+
debugLlm: z.boolean().default(false),
|
|
36
39
|
skipLlm: z.boolean().default(false),
|
|
37
40
|
// === ADVANCED ===
|
|
38
41
|
sceneMinInterval: z.number().int().min(1).max(10).default(2),
|
|
39
42
|
sampleGapThreshold: z.number().int().min(5).max(60).default(15),
|
|
40
43
|
sampleGapFill: z.number().int().min(1).max(10).default(3),
|
|
41
44
|
mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
|
|
42
|
-
mlxStartupTimeout: z.number().int().min(10000).default(
|
|
45
|
+
mlxStartupTimeout: z.number().int().min(10000).default(120000),
|
|
43
46
|
pythonPath: z.string().optional(),
|
|
44
47
|
parallelTranscription: z.boolean().default(false),
|
|
45
48
|
artifactThink: z.boolean().default(false),
|
|
@@ -49,24 +52,41 @@ const configSchema = z.object({
|
|
|
49
52
|
outlineCollection: z.string().default('Escribano Sessions'),
|
|
50
53
|
});
|
|
51
54
|
// =============================================================================
|
|
55
|
+
// RAM DETECTION
|
|
56
|
+
// =============================================================================
|
|
57
|
+
function getSystemRamGB() {
|
|
58
|
+
return Math.round(totalmem() / (1024 * 1024 * 1024));
|
|
59
|
+
}
|
|
60
|
+
function getRamTier(ramGB) {
|
|
61
|
+
if (ramGB >= 32) {
|
|
62
|
+
return { tier: 'high', frameWidth: 1024 };
|
|
63
|
+
}
|
|
64
|
+
if (ramGB >= 16) {
|
|
65
|
+
return { tier: 'medium', frameWidth: 1024 };
|
|
66
|
+
}
|
|
67
|
+
return { tier: 'low', frameWidth: 768 };
|
|
68
|
+
}
|
|
69
|
+
// =============================================================================
|
|
52
70
|
// DEFAULT CONFIG
|
|
53
71
|
// =============================================================================
|
|
54
|
-
const
|
|
72
|
+
const BASE_DEFAULTS = {
|
|
55
73
|
frameWidth: 1024,
|
|
56
74
|
vlmBatchSize: 2,
|
|
57
75
|
sampleInterval: 10,
|
|
58
76
|
sceneThreshold: 0.4,
|
|
59
77
|
vlmMaxTokens: 2000,
|
|
78
|
+
llmBackend: 'mlx',
|
|
60
79
|
vlmModel: 'mlx-community/Qwen3-VL-2B-Instruct-4bit',
|
|
61
80
|
verbose: false,
|
|
62
81
|
debugOllama: false,
|
|
63
82
|
debugVlm: false,
|
|
83
|
+
debugLlm: false,
|
|
64
84
|
skipLlm: false,
|
|
65
85
|
sceneMinInterval: 2,
|
|
66
86
|
sampleGapThreshold: 15,
|
|
67
87
|
sampleGapFill: 3,
|
|
68
88
|
mlxSocketPath: '/tmp/escribano-mlx.sock',
|
|
69
|
-
mlxStartupTimeout:
|
|
89
|
+
mlxStartupTimeout: 120000,
|
|
70
90
|
parallelTranscription: false,
|
|
71
91
|
artifactThink: false,
|
|
72
92
|
outlineCollection: 'Escribano Sessions',
|
|
@@ -79,30 +99,33 @@ const CONFIG_TEMPLATE = `# Escribano Configuration - ~/.escribano/.env
|
|
|
79
99
|
# Full reference: https://github.com/eduardosanzb/escribano#configuration
|
|
80
100
|
|
|
81
101
|
# === PERFORMANCE ===
|
|
82
|
-
ESCRIBANO_FRAME_WIDTH=1024 #
|
|
83
|
-
ESCRIBANO_VLM_BATCH_SIZE=2 # 1-4 frames (lower = more reliable)
|
|
84
|
-
ESCRIBANO_SAMPLE_INTERVAL=10
|
|
102
|
+
# ESCRIBANO_FRAME_WIDTH=1024 # Auto-adjusted based on RAM (1024 for 16GB+, 768 for <16GB)
|
|
103
|
+
# ESCRIBANO_VLM_BATCH_SIZE=2 # 1-4 frames (lower = more reliable)
|
|
104
|
+
ESCRIBANO_SAMPLE_INTERVAL=10 # Base frame sampling (seconds)
|
|
85
105
|
|
|
86
106
|
# === QUALITY ===
|
|
87
|
-
ESCRIBANO_SCENE_THRESHOLD=0.4
|
|
88
|
-
ESCRIBANO_VLM_MAX_TOKENS=2000
|
|
107
|
+
ESCRIBANO_SCENE_THRESHOLD=0.4 # Scene detection sensitivity (0.0-1.0)
|
|
108
|
+
ESCRIBANO_VLM_MAX_TOKENS=2000 # Token budget per batch
|
|
89
109
|
|
|
90
110
|
# === MODELS ===
|
|
91
|
-
#
|
|
111
|
+
# ESCRIBANO_LLM_BACKEND=mlx # LLM backend: 'mlx' (default) or 'ollama'
|
|
112
|
+
# ESCRIBANO_LLM_MODEL=qwen3.5:27b # Ollama model (only used if llmBackend='ollama')
|
|
113
|
+
# ESCRIBANO_LLM_MLX_MODEL= # MLX model (only used if llmBackend='mlx', auto-detected if not set)
|
|
92
114
|
ESCRIBANO_VLM_MODEL=mlx-community/Qwen3-VL-2B-Instruct-4bit
|
|
93
115
|
|
|
94
116
|
# === DEBUGGING ===
|
|
95
|
-
ESCRIBANO_VERBOSE=false
|
|
96
|
-
ESCRIBANO_DEBUG_VLM=false
|
|
117
|
+
ESCRIBANO_VERBOSE=false # Enable verbose logging
|
|
118
|
+
ESCRIBANO_DEBUG_VLM=false # Debug VLM processing
|
|
119
|
+
ESCRIBANO_DEBUG_LLM=false # Log all LLM calls to debug table
|
|
97
120
|
|
|
98
121
|
# === ADVANCED ===
|
|
99
122
|
ESCRIBANO_SCENE_MIN_INTERVAL=2
|
|
100
123
|
ESCRIBANO_SAMPLE_GAP_THRESHOLD=15
|
|
101
124
|
ESCRIBANO_SAMPLE_GAP_FILL=3
|
|
102
125
|
ESCRIBANO_MLX_SOCKET_PATH=/tmp/escribano-mlx.sock
|
|
103
|
-
ESCRIBANO_MLX_STARTUP_TIMEOUT=
|
|
104
|
-
# ESCRIBANO_PYTHON_PATH=
|
|
105
|
-
ESCRIBANO_ARTIFACT_THINK=false
|
|
126
|
+
ESCRIBANO_MLX_STARTUP_TIMEOUT=120000
|
|
127
|
+
# ESCRIBANO_PYTHON_PATH= # Auto-detected if not set
|
|
128
|
+
ESCRIBANO_ARTIFACT_THINK=false # Enable thinking for artifacts (slower)
|
|
106
129
|
|
|
107
130
|
# === OPTIONAL (Outline publishing) ===
|
|
108
131
|
# ESCRIBANO_OUTLINE_URL=
|
|
@@ -113,6 +136,7 @@ ESCRIBANO_ARTIFACT_THINK=false # Enable thinking for artifacts (slower)
|
|
|
113
136
|
// CONFIG LOADER
|
|
114
137
|
// =============================================================================
|
|
115
138
|
let cachedConfig = null;
|
|
139
|
+
let cachedSources = [];
|
|
116
140
|
export function getConfigPath() {
|
|
117
141
|
return path.join(homedir(), '.escribano', '.env');
|
|
118
142
|
}
|
|
@@ -133,97 +157,179 @@ export function createDefaultConfig() {
|
|
|
133
157
|
console.error(`Failed to create config file at ${configPath}: ${error.message}`);
|
|
134
158
|
}
|
|
135
159
|
}
|
|
160
|
+
/**
|
|
161
|
+
* Check if running in development mode.
|
|
162
|
+
* Development mode = running via tsx from source (src/index.ts)
|
|
163
|
+
* Production mode = running compiled code (dist/index.js)
|
|
164
|
+
*/
|
|
165
|
+
function isDevelopmentMode() {
|
|
166
|
+
// Check if running from src directory via tsx
|
|
167
|
+
const currentFile = import.meta.url;
|
|
168
|
+
return currentFile.includes('/src/');
|
|
169
|
+
}
|
|
136
170
|
export function loadConfig() {
|
|
137
171
|
if (cachedConfig) {
|
|
138
172
|
return cachedConfig;
|
|
139
173
|
}
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
174
|
+
const sources = [];
|
|
175
|
+
// 1. Load from user config file (PRODUCTION MODE ONLY)
|
|
176
|
+
// In development mode, we use project .env via tsx --env-file flag
|
|
177
|
+
if (!isDevelopmentMode()) {
|
|
178
|
+
const configPath = getConfigPath();
|
|
179
|
+
if (existsSync(configPath)) {
|
|
180
|
+
try {
|
|
181
|
+
const result = dotenvConfig({ path: configPath });
|
|
182
|
+
if (result.error) {
|
|
183
|
+
console.error(`Failed to parse config file ${configPath}: ${result.error.message}`);
|
|
184
|
+
console.error('Using default configuration.');
|
|
185
|
+
}
|
|
186
|
+
else if (result.parsed && Object.keys(result.parsed).length > 0) {
|
|
187
|
+
console.log(`Loaded config from ${configPath}`);
|
|
188
|
+
}
|
|
148
189
|
}
|
|
149
|
-
|
|
150
|
-
console.
|
|
190
|
+
catch (error) {
|
|
191
|
+
console.error(`Error reading config file ${configPath}: ${error.message}`);
|
|
192
|
+
console.error('Using default configuration.');
|
|
151
193
|
}
|
|
152
194
|
}
|
|
153
|
-
catch (error) {
|
|
154
|
-
console.error(`Error reading config file ${configPath}: ${error.message}`);
|
|
155
|
-
console.error('Using default configuration.');
|
|
156
|
-
}
|
|
157
195
|
}
|
|
158
|
-
// 2.
|
|
196
|
+
// 2. Get RAM-aware defaults
|
|
197
|
+
const ramGB = getSystemRamGB();
|
|
198
|
+
const ramTier = getRamTier(ramGB);
|
|
199
|
+
// 3. Build config with source tracking
|
|
159
200
|
const config = {
|
|
160
201
|
// === PERFORMANCE ===
|
|
161
|
-
frameWidth:
|
|
162
|
-
vlmBatchSize:
|
|
163
|
-
sampleInterval:
|
|
202
|
+
frameWidth: parseEnvNumberWithSource('ESCRIBANO_FRAME_WIDTH', ramTier.frameWidth, sources, 'frameWidth'),
|
|
203
|
+
vlmBatchSize: parseEnvNumberWithSource('ESCRIBANO_VLM_BATCH_SIZE', BASE_DEFAULTS.vlmBatchSize, sources, 'vlmBatchSize'),
|
|
204
|
+
sampleInterval: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_INTERVAL', BASE_DEFAULTS.sampleInterval, sources, 'sampleInterval'),
|
|
164
205
|
// === QUALITY ===
|
|
165
|
-
sceneThreshold:
|
|
166
|
-
vlmMaxTokens:
|
|
206
|
+
sceneThreshold: parseEnvNumberWithSource('ESCRIBANO_SCENE_THRESHOLD', BASE_DEFAULTS.sceneThreshold, sources, 'sceneThreshold'),
|
|
207
|
+
vlmMaxTokens: parseEnvNumberWithSource('ESCRIBANO_VLM_MAX_TOKENS', BASE_DEFAULTS.vlmMaxTokens, sources, 'vlmMaxTokens'),
|
|
167
208
|
// === MODELS ===
|
|
168
|
-
llmModel:
|
|
169
|
-
|
|
170
|
-
|
|
209
|
+
llmModel: parseEnvStringWithSource('ESCRIBANO_LLM_MODEL', undefined, sources, 'llmModel'),
|
|
210
|
+
llmBackend: (parseEnvStringWithSource('ESCRIBANO_LLM_BACKEND', BASE_DEFAULTS.llmBackend, sources, 'llmBackend') ?? 'mlx'),
|
|
211
|
+
llmMlxModel: parseEnvStringWithSource('ESCRIBANO_LLM_MLX_MODEL', undefined, sources, 'llmMlxModel'),
|
|
212
|
+
vlmModel: parseEnvStringWithSource('ESCRIBANO_VLM_MODEL', BASE_DEFAULTS.vlmModel, sources, 'vlmModel'),
|
|
213
|
+
subjectGroupingModel: parseEnvStringWithSource('ESCRIBANO_SUBJECT_GROUPING_MODEL', undefined, sources, 'subjectGroupingModel'),
|
|
171
214
|
// === DEBUGGING ===
|
|
172
|
-
verbose:
|
|
173
|
-
debugOllama:
|
|
174
|
-
debugVlm:
|
|
175
|
-
|
|
215
|
+
verbose: parseEnvBooleanWithSource('ESCRIBANO_VERBOSE', BASE_DEFAULTS.verbose, sources, 'verbose'),
|
|
216
|
+
debugOllama: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_OLLAMA', BASE_DEFAULTS.debugOllama, sources, 'debugOllama'),
|
|
217
|
+
debugVlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_VLM', BASE_DEFAULTS.debugVlm, sources, 'debugVlm'),
|
|
218
|
+
debugLlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_LLM', BASE_DEFAULTS.debugLlm, sources, 'debugLlm'),
|
|
219
|
+
skipLlm: parseEnvBooleanWithSource('ESCRIBANO_SKIP_LLM', BASE_DEFAULTS.skipLlm, sources, 'skipLlm'),
|
|
176
220
|
// === ADVANCED ===
|
|
177
|
-
sceneMinInterval:
|
|
178
|
-
sampleGapThreshold:
|
|
179
|
-
sampleGapFill:
|
|
180
|
-
mlxSocketPath:
|
|
181
|
-
mlxStartupTimeout:
|
|
182
|
-
pythonPath:
|
|
183
|
-
parallelTranscription:
|
|
184
|
-
artifactThink:
|
|
221
|
+
sceneMinInterval: parseEnvNumberWithSource('ESCRIBANO_SCENE_MIN_INTERVAL', BASE_DEFAULTS.sceneMinInterval, sources, 'sceneMinInterval'),
|
|
222
|
+
sampleGapThreshold: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_THRESHOLD', BASE_DEFAULTS.sampleGapThreshold, sources, 'sampleGapThreshold'),
|
|
223
|
+
sampleGapFill: parseEnvNumberWithSource('ESCRIBANO_SAMPLE_GAP_FILL', BASE_DEFAULTS.sampleGapFill, sources, 'sampleGapFill'),
|
|
224
|
+
mlxSocketPath: parseEnvStringWithSource('ESCRIBANO_MLX_SOCKET_PATH', BASE_DEFAULTS.mlxSocketPath, sources, 'mlxSocketPath'),
|
|
225
|
+
mlxStartupTimeout: parseEnvNumberWithSource('ESCRIBANO_MLX_STARTUP_TIMEOUT', BASE_DEFAULTS.mlxStartupTimeout, sources, 'mlxStartupTimeout'),
|
|
226
|
+
pythonPath: parseEnvStringWithSource('ESCRIBANO_PYTHON_PATH', undefined, sources, 'pythonPath'),
|
|
227
|
+
parallelTranscription: parseEnvBooleanWithSource('ESCRIBANO_PARALLEL_TRANSCRIPTION', BASE_DEFAULTS.parallelTranscription, sources, 'parallelTranscription'),
|
|
228
|
+
artifactThink: parseEnvBooleanWithSource('ESCRIBANO_ARTIFACT_THINK', BASE_DEFAULTS.artifactThink, sources, 'artifactThink'),
|
|
185
229
|
// === OPTIONAL ===
|
|
186
|
-
outlineUrl:
|
|
187
|
-
outlineToken:
|
|
188
|
-
outlineCollection:
|
|
189
|
-
DEFAULT_CONFIG.outlineCollection,
|
|
230
|
+
outlineUrl: parseEnvStringWithSource('ESCRIBANO_OUTLINE_URL', undefined, sources, 'outlineUrl'),
|
|
231
|
+
outlineToken: parseEnvStringWithSource('ESCRIBANO_OUTLINE_TOKEN', undefined, sources, 'outlineToken'),
|
|
232
|
+
outlineCollection: parseEnvStringWithSource('ESCRIBANO_OUTLINE_COLLECTION', BASE_DEFAULTS.outlineCollection, sources, 'outlineCollection'),
|
|
190
233
|
};
|
|
191
|
-
//
|
|
234
|
+
// 4. Validate with Zod
|
|
192
235
|
const validated = configSchema.parse(config);
|
|
193
236
|
cachedConfig = validated;
|
|
237
|
+
cachedSources = sources;
|
|
194
238
|
return validated;
|
|
195
239
|
}
|
|
240
|
+
export function getConfigSources() {
|
|
241
|
+
return cachedSources;
|
|
242
|
+
}
|
|
243
|
+
export function getRamInfo() {
|
|
244
|
+
const ramGB = getSystemRamGB();
|
|
245
|
+
const ramTier = getRamTier(ramGB);
|
|
246
|
+
return { ramGB, tier: ramTier.tier };
|
|
247
|
+
}
|
|
196
248
|
// =============================================================================
|
|
197
249
|
// HELPERS
|
|
198
250
|
// =============================================================================
|
|
199
|
-
function
|
|
251
|
+
function parseEnvNumberWithSource(key, defaultValue, sources, configKey) {
|
|
200
252
|
const value = process.env[key];
|
|
201
|
-
if (
|
|
253
|
+
if (value === undefined) {
|
|
254
|
+
const isRamAware = configKey === 'frameWidth';
|
|
255
|
+
sources.push({
|
|
256
|
+
key: configKey,
|
|
257
|
+
source: isRamAware ? 'ram-aware' : 'default',
|
|
258
|
+
});
|
|
202
259
|
return defaultValue;
|
|
260
|
+
}
|
|
203
261
|
const parsed = Number(value);
|
|
204
262
|
if (Number.isNaN(parsed)) {
|
|
205
263
|
console.warn(`Invalid ${key}="${value}", using default: ${defaultValue}`);
|
|
264
|
+
sources.push({ key: configKey, source: 'default' });
|
|
206
265
|
return defaultValue;
|
|
207
266
|
}
|
|
267
|
+
sources.push({ key: configKey, source: 'env' });
|
|
208
268
|
return parsed;
|
|
209
269
|
}
|
|
210
|
-
function
|
|
270
|
+
function parseEnvStringWithSource(key, defaultValue, sources, configKey) {
|
|
211
271
|
const value = process.env[key];
|
|
212
|
-
if (
|
|
272
|
+
if (value === undefined) {
|
|
273
|
+
sources.push({ key: configKey, source: 'default' });
|
|
213
274
|
return defaultValue;
|
|
275
|
+
}
|
|
276
|
+
sources.push({ key: configKey, source: 'env' });
|
|
277
|
+
return value;
|
|
278
|
+
}
|
|
279
|
+
function parseEnvBooleanWithSource(key, defaultValue, sources, configKey) {
|
|
280
|
+
const value = process.env[key];
|
|
281
|
+
if (value === undefined) {
|
|
282
|
+
sources.push({ key: configKey, source: 'default' });
|
|
283
|
+
return defaultValue;
|
|
284
|
+
}
|
|
285
|
+
sources.push({ key: configKey, source: 'env' });
|
|
214
286
|
return value === 'true';
|
|
215
287
|
}
|
|
216
288
|
// =============================================================================
|
|
289
|
+
// LOGGING
|
|
290
|
+
// =============================================================================
|
|
291
|
+
export function logConfig() {
|
|
292
|
+
const config = loadConfig();
|
|
293
|
+
const { ramGB, tier } = getRamInfo();
|
|
294
|
+
const sources = getConfigSources();
|
|
295
|
+
const userSetKeys = sources.filter((s) => s.source === 'env');
|
|
296
|
+
// Compact one-liner per category
|
|
297
|
+
const perf = `frameWidth=${config.frameWidth} vlmBatchSize=${config.vlmBatchSize} sampleInterval=${config.sampleInterval}`;
|
|
298
|
+
const quality = `sceneThreshold=${config.sceneThreshold} vlmMaxTokens=${config.vlmMaxTokens}`;
|
|
299
|
+
const models = `vlmModel=${config.vlmModel.split('/').pop()} llmModel=${config.llmModel || 'auto'}`;
|
|
300
|
+
// Show dev mode indicator if applicable
|
|
301
|
+
if (isDevelopmentMode()) {
|
|
302
|
+
console.log('[Config] Mode: development (using project .env)');
|
|
303
|
+
}
|
|
304
|
+
console.log(`[Config] RAM: ${ramGB}GB (${tier})`);
|
|
305
|
+
console.log(`[Config] Performance: ${perf}`);
|
|
306
|
+
console.log(`[Config] Quality: ${quality}`);
|
|
307
|
+
console.log(`[Config] Models: ${models}`);
|
|
308
|
+
if (userSetKeys.length > 0) {
|
|
309
|
+
console.log(`[Config] User overrides: ${userSetKeys.map((s) => s.key).join(', ')}`);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
// =============================================================================
|
|
217
313
|
// CLI UTILITIES
|
|
218
314
|
// =============================================================================
|
|
219
315
|
export function showConfig() {
|
|
220
316
|
const configPath = getConfigPath();
|
|
221
|
-
//
|
|
317
|
+
// In dev mode, show that we're using project .env instead
|
|
318
|
+
if (isDevelopmentMode()) {
|
|
319
|
+
console.log('Development mode: Using project .env (not ~/.escribano/.env)\n');
|
|
320
|
+
console.log('Current configuration:');
|
|
321
|
+
const config = loadConfig();
|
|
322
|
+
console.log(JSON.stringify(config, null, 2));
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
// Create config file if it doesn't exist (production mode)
|
|
222
326
|
if (!existsSync(configPath)) {
|
|
223
327
|
createDefaultConfig();
|
|
224
328
|
}
|
|
225
329
|
const config = loadConfig();
|
|
226
|
-
|
|
330
|
+
const { ramGB, tier } = getRamInfo();
|
|
331
|
+
console.log(`Config file: ${configPath}`);
|
|
332
|
+
console.log(`System RAM: ${ramGB}GB (${tier} tier)\n`);
|
|
227
333
|
console.log('Current configuration:');
|
|
228
334
|
console.log(JSON.stringify(config, null, 2));
|
|
229
335
|
}
|