escribano 0.2.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -212,6 +212,7 @@ Output: `~/.escribano/artifacts/`
212
212
  | Flag | What it does |
213
213
  |------|--------------|
214
214
  | `--file <path>` | Process a video file |
215
+ | `--latest <dir>` | Find and process latest video in directory |
215
216
  | `--mic-audio <path>` | External mic audio |
216
217
  | `--system-audio <path>` | External system audio |
217
218
  | `--format <format>` | `card`, `standup`, or `narrative` (default: card) |
@@ -222,6 +223,14 @@ Output: `~/.escribano/artifacts/`
222
223
  | `--stdout` | Print to stdout |
223
224
  | `--help` | Show all options |
224
225
 
226
+ ### Subcommands
227
+
228
+ | Command | What it does |
229
+ |---------|--------------|
230
+ | `doctor` | Check prerequisites and system requirements |
231
+ | `config` | Show current configuration (merged from all sources) |
232
+ | `config --path` | Show path to config file (`~/.escribano/.env`) |
233
+
225
234
  ### Formats
226
235
 
227
236
  | Format | Use for | Style |
@@ -236,11 +245,18 @@ Output: `~/.escribano/artifacts/`
236
245
  # Process and copy
237
246
  npx escribano --file "~/Desktop/Screen Recording.mov" --format standup --copy
238
247
 
248
+ # Find latest video in a directory
249
+ npx escribano --latest "~/Videos"
250
+
239
251
  # Narrative format
240
252
  npx escribano --file session.mp4 --format narrative --force
241
253
 
242
254
  # With external audio
243
255
  npx escribano --file recording.mov --mic-audio mic.wav
256
+
257
+ # View configuration
258
+ npx escribano config
259
+ npx escribano config --path
244
260
  ```
245
261
 
246
262
  ---
@@ -256,6 +272,35 @@ npx escribano --file recording.mov --mic-audio mic.wav
256
272
 
257
273
  ---
258
274
 
275
+ ## Configuration
276
+
277
+ Escribano auto-creates a config file on first run that persists your settings:
278
+
279
+ ```bash
280
+ # View current configuration
281
+ npx escribano config
282
+
283
+ # Show path to config file
284
+ npx escribano config --path
285
+
286
+ # Edit manually
287
+ vim ~/.escribano/.env
288
+ ```
289
+
290
+ The config file (`~/.escribano/.env`) is organized by category with inline comments:
291
+
292
+ | Category | Examples |
293
+ |----------|----------|
294
+ | **Performance** | Frame width, batch size, sampling interval |
295
+ | **Quality** | Scene detection, token budget |
296
+ | **Models** | VLM model, LLM model, subject grouping model |
297
+ | **Debugging** | Verbose logging, VLM/Ollama debug output |
298
+ | **Advanced** | Socket path, timeouts, Python path |
299
+
300
+ Environment variables always take priority over the config file. For full reference, see [AGENTS.md](AGENTS.md#configuration).
301
+
302
+ ---
303
+
259
304
  ## Architecture
260
305
 
261
306
  Clean architecture: domain entities, pure services, adapter interfaces for external systems (MLX-VLM, Ollama, Whisper, FFmpeg, SQLite).
package/dist/0_types.js CHANGED
@@ -267,11 +267,6 @@ export const intelligenceConfigSchema = z.object({
267
267
  mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
268
268
  });
269
269
  export const DEFAULT_INTELLIGENCE_CONFIG = intelligenceConfigSchema.parse({});
270
- const artifactConfigSchema = z.object({
271
- parallelGeneration: z.boolean().default(false),
272
- maxParallel: z.number().default(3),
273
- maxScreenshots: z.number().default(10),
274
- });
275
270
  export const outlineConfigSchema = z.object({
276
271
  url: z.string().url(),
277
272
  token: z.string(),
@@ -48,7 +48,7 @@ export async function generateArtifactV3(recordingId, repos, intelligence, optio
48
48
  for (const subject of subjects) {
49
49
  subject.apps = normalizeAppNames(subject.apps);
50
50
  }
51
- const filteredSubjects = options.includePersonal
51
+ const _filteredSubjects = options.includePersonal
52
52
  ? subjects
53
53
  : subjects.filter((s) => !s.isPersonal);
54
54
  log('info', `[Artifact V3.1] Generating ${format} with LLM...`);
@@ -166,7 +166,7 @@ function generateCardTemplate(subjects, groupingResult, sessionDate, sessionDura
166
166
  }
167
167
  return content;
168
168
  }
169
- function generateStandupTemplate(subjects, sessionDate, sessionDuration) {
169
+ function generateStandupTemplate(subjects, sessionDate, _sessionDuration) {
170
170
  let content = `## Standup - ${sessionDate}\n\n`;
171
171
  content += `**What I did:**\n`;
172
172
  const allActivities = [];
@@ -199,7 +199,7 @@ function generateNarrativeTemplate(subjects, sessionDate, sessionDuration) {
199
199
  }
200
200
  return content;
201
201
  }
202
- async function generateLlmArtifact(subjects, groupingResult, format, recording, intelligence, repos, allTopicBlocks) {
202
+ async function generateLlmArtifact(subjects, groupingResult, format, recording, intelligence, _repos, allTopicBlocks) {
203
203
  const ARTIFACT_THINK = process.env.ESCRIBANO_ARTIFACT_THINK === 'true';
204
204
  const promptFileName = format === 'card'
205
205
  ? 'card.md'
@@ -129,7 +129,7 @@ async function generateLlmSummary(sections, recording, intelligence) {
129
129
  // Build activity timeline
130
130
  const activityTimeline = sections
131
131
  .map((section, i) => {
132
- const startMin = Math.round(section.startTime / 60);
132
+ const _startMin = Math.round(section.startTime / 60);
133
133
  const durationMin = Math.round(section.duration / 60);
134
134
  const startTimeStr = `${Math.floor(section.startTime / 60)}:${Math.floor(section.startTime % 60)
135
135
  .toString()
@@ -99,6 +99,7 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
99
99
  // VISUAL PIPELINE
100
100
  // ============================================
101
101
  if (recording.videoPath) {
102
+ const videoPath = recording.videoPath;
102
103
  // Step: Frame Extraction
103
104
  if (!shouldSkipStep(recording.processingStep, 'frame_extraction')) {
104
105
  await step('frame-extraction', async () => {
@@ -106,7 +107,7 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
106
107
  updateRecordingInDb(repos, recording);
107
108
  const intervalSeconds = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
108
109
  const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
109
- const extractedFrames = await adapters.video.extractFramesAtInterval(recording.videoPath, 0.3, // threshold
110
+ const extractedFrames = await adapters.video.extractFramesAtInterval(videoPath, 0.3, // threshold
110
111
  framesDir);
111
112
  log('info', `Extracted ${extractedFrames.length} frames (interval: ${intervalSeconds}s)`);
112
113
  });
@@ -307,22 +308,29 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
307
308
  const visualClusters = repos.clusters.findByRecordingAndType(recording.id, 'visual');
308
309
  const audioClusters = repos.clusters.findByRecordingAndType(recording.id, 'audio');
309
310
  if (audioClusters.length > 0 && visualClusters.length > 0) {
310
- // Build cluster-with-signals for merging
311
- const visualWithSignals = visualClusters.map((c) => ({
312
- cluster: c,
313
- signals: JSON.parse(c.classification || '{}'),
314
- centroid: bufferToEmbedding(c.centroid),
315
- }));
316
- const audioWithSignals = audioClusters.map((c) => ({
317
- cluster: c,
318
- signals: JSON.parse(c.classification || '{}'),
319
- centroid: bufferToEmbedding(c.centroid),
320
- }));
321
- const merges = findClusterMerges(visualWithSignals, audioWithSignals, adapters.embedding);
322
- for (const merge of merges) {
323
- repos.clusters.saveMerge(merge.visualClusterId, merge.audioClusterId, merge.similarityScore, merge.mergeReason);
311
+ const validVisualClusters = visualClusters.filter((c) => c.centroid !== null);
312
+ const validAudioClusters = audioClusters.filter((c) => c.centroid !== null);
313
+ if (validVisualClusters.length === 0 ||
314
+ validAudioClusters.length === 0) {
315
+ log('info', 'Skipping cluster merge - no clusters with embeddings');
316
+ }
317
+ else {
318
+ const visualWithSignals = validVisualClusters.map((c) => ({
319
+ cluster: c,
320
+ signals: JSON.parse(c.classification || '{}'),
321
+ centroid: bufferToEmbedding(c.centroid),
322
+ }));
323
+ const audioWithSignals = validAudioClusters.map((c) => ({
324
+ cluster: c,
325
+ signals: JSON.parse(c.classification || '{}'),
326
+ centroid: bufferToEmbedding(c.centroid),
327
+ }));
328
+ const merges = findClusterMerges(visualWithSignals, audioWithSignals, adapters.embedding);
329
+ for (const merge of merges) {
330
+ repos.clusters.saveMerge(merge.visualClusterId, merge.audioClusterId, merge.similarityScore, merge.mergeReason);
331
+ }
332
+ log('info', `Created ${merges.length} audio-visual cluster merges`);
324
333
  }
325
- log('info', `Created ${merges.length} audio-visual cluster merges`);
326
334
  }
327
335
  else {
328
336
  log('info', 'No audio clusters to merge');
@@ -490,5 +498,5 @@ async function processAudioPipeline(recording, adapters, options) {
490
498
  return observations;
491
499
  }
492
500
  function updateRecordingInDb(repos, recording) {
493
- repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep, recording.errorMessage);
501
+ repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep ?? undefined, recording.errorMessage);
494
502
  }
@@ -108,8 +108,9 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
108
108
  // VISUAL PIPELINE (V3: Smart Extraction)
109
109
  // ============================================
110
110
  if (recording.videoPath) {
111
+ const videoPath = recording.videoPath;
111
112
  // Step 1: Get video metadata
112
- const metadata = await adapters.video.getMetadata(recording.videoPath);
113
+ const metadata = await adapters.video.getMetadata(videoPath);
113
114
  log('info', `[V3] Video: ${Math.round(metadata.duration)}s, ${metadata.width}x${metadata.height}`);
114
115
  // Step 2: Scene Detection FIRST (no frame extraction needed)
115
116
  let sceneChanges = [];
@@ -123,7 +124,7 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
123
124
  }
124
125
  else {
125
126
  sceneChanges = await step('scene-detection', async () => {
126
- const changes = await adapters.video.detectSceneChanges(recording.videoPath);
127
+ const changes = await adapters.video.detectSceneChanges(videoPath);
127
128
  log('info', `[V3] Detected ${changes.length} scene changes`);
128
129
  // Save to DB for resume safety
129
130
  if (dbRecording) {
@@ -145,7 +146,7 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
145
146
  if (!shouldSkipStep(recording.processingStep, 'frame_extraction')) {
146
147
  extractedFrames = await step('frame-extraction-batch', async () => {
147
148
  const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
148
- const frames = await adapters.video.extractFramesAtTimestampsBatch(recording.videoPath, requiredTimestamps, framesDir);
149
+ const frames = await adapters.video.extractFramesAtTimestampsBatch(videoPath, requiredTimestamps, framesDir);
149
150
  log('info', `[V3] Extracted ${frames.length} frames`);
150
151
  recording = advanceStep(recording, 'frame_extraction');
151
152
  updateRecordingInDb(repos, recording);
@@ -408,5 +409,5 @@ async function processAudioPipeline(recording, adapters) {
408
409
  return observations;
409
410
  }
410
411
  function updateRecordingInDb(repos, recording) {
411
- repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep, recording.errorMessage);
412
+ repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep ?? undefined, recording.errorMessage);
412
413
  }
@@ -62,7 +62,7 @@ export function createSileroPreprocessor() {
62
62
  if (process.env.ESCRIBANO_VERBOSE === 'true' && stdout) {
63
63
  console.log(` Silero VAD stdout:\n${stdout
64
64
  .split('\n')
65
- .map((l) => ' ' + l)
65
+ .map((l) => ` ${l}`)
66
66
  .join('\n')}`);
67
67
  }
68
68
  resolve();
@@ -70,12 +70,12 @@ export function createSileroPreprocessor() {
70
70
  else {
71
71
  console.error(` Silero VAD stderr:\n${stderr
72
72
  .split('\n')
73
- .map((l) => ' ' + l)
73
+ .map((l) => ` ${l}`)
74
74
  .join('\n')}`);
75
75
  if (stdout) {
76
76
  console.error(` Silero VAD stdout:\n${stdout
77
77
  .split('\n')
78
- .map((l) => ' ' + l)
78
+ .map((l) => ` ${l}`)
79
79
  .join('\n')}`);
80
80
  }
81
81
  reject(new Error(`Silero VAD failed with code ${code}: ${stderr || stdout || 'No output captured'}`));
@@ -10,12 +10,12 @@
10
10
  * See docs/adr/006-mlx-vlm-adapter.md for full design.
11
11
  */
12
12
  import { spawn } from 'node:child_process';
13
- import { existsSync, unlinkSync } from 'node:fs';
13
+ import { existsSync, mkdirSync, unlinkSync } from 'node:fs';
14
14
  import { createConnection } from 'node:net';
15
- import { homedir } from 'node:os';
16
15
  import { dirname, resolve } from 'node:path';
17
16
  import { fileURLToPath } from 'node:url';
18
17
  const __dirname = dirname(fileURLToPath(import.meta.url));
18
+ import { ESCRIBANO_HOME, ESCRIBANO_VENV, ESCRIBANO_VENV_PYTHON, getPythonPath, } from '../python-utils.js';
19
19
  const DEBUG_MLX = process.env.ESCRIBANO_VERBOSE === 'true';
20
20
  function debugLog(...args) {
21
21
  if (DEBUG_MLX) {
@@ -29,28 +29,85 @@ const DEFAULT_CONFIG = {
29
29
  maxTokens: Number(process.env.ESCRIBANO_VLM_MAX_TOKENS) || 2000,
30
30
  socketPath: process.env.ESCRIBANO_MLX_SOCKET_PATH ?? '/tmp/escribano-mlx.sock',
31
31
  bridgeScript: resolve(__dirname, '../../scripts/mlx_bridge.py'),
32
- startupTimeout: Number(process.env.ESCRIBANO_MLX_STARTUP_TIMEOUT) || 60000,
32
+ startupTimeout: Number(process.env.ESCRIBANO_MLX_STARTUP_TIMEOUT) || 120000,
33
33
  };
34
+ /** pip binary inside Escribano's managed venv. */
35
+ const _ESCRIBANO_VENV_PIP = resolve(ESCRIBANO_VENV, 'bin', 'pip');
34
36
  /**
35
- * Get Python executable path.
36
- * Priority:
37
- * 1. ESCRIBANO_PYTHON_PATH env var (explicit override)
38
- * 2. Active virtual environment (VIRTUAL_ENV)
39
- * 3. System python3 (fallback)
37
+ * Run a command, streaming stdout/stderr directly to the terminal.
38
+ * Used for long-running setup tasks (venv creation, pip install) so the
39
+ * user can see progress in real time.
40
40
  */
41
- function getPythonPath() {
42
- if (process.env.ESCRIBANO_PYTHON_PATH) {
43
- return process.env.ESCRIBANO_PYTHON_PATH;
41
+ function runVisible(cmd, args) {
42
+ return new Promise((res, rej) => {
43
+ const proc = spawn(cmd, args, { stdio: 'inherit' });
44
+ proc.on('exit', (code) => code === 0 ? res() : rej(new Error(`${cmd} exited with code ${code}`)));
45
+ proc.on('error', rej);
46
+ });
47
+ }
48
+ /**
49
+ * Run a command silently (discard output). Used for quick probe checks.
50
+ */
51
+ function runSilent(cmd, args) {
52
+ return new Promise((res, rej) => {
53
+ const proc = spawn(cmd, args, { stdio: 'ignore' });
54
+ proc.on('exit', (code) => code === 0 ? res() : rej(new Error(`${cmd} exited with code ${code}`)));
55
+ proc.on('error', rej);
56
+ });
57
+ }
58
+ /**
59
+ * Ensure ~/.escribano/venv exists and has mlx-vlm installed.
60
+ * Uses plain `python3 -m venv` — no uv, no pip flags, no fuss.
61
+ * On first run this takes a few minutes; subsequent runs are instant.
62
+ */
63
+ async function ensureEscribanoVenv() {
64
+ if (!existsSync(ESCRIBANO_HOME)) {
65
+ mkdirSync(ESCRIBANO_HOME, { recursive: true });
66
+ }
67
+ if (!existsSync(ESCRIBANO_VENV_PYTHON)) {
68
+ console.log('[VLM] First-time setup: creating Python environment at ~/.escribano/venv');
69
+ await runVisible('python3', ['-m', 'venv', ESCRIBANO_VENV]);
44
70
  }
45
- if (process.env.VIRTUAL_ENV) {
46
- return resolve(process.env.VIRTUAL_ENV, 'bin', 'python3');
71
+ // Check whether mlx-vlm and required runtime deps are already importable (~0.3s probe)
72
+ let mlxReady = false;
73
+ try {
74
+ await runSilent(ESCRIBANO_VENV_PYTHON, [
75
+ '-c',
76
+ 'import mlx_vlm; import torch; import torchvision',
77
+ ]);
78
+ mlxReady = true;
47
79
  }
48
- // Check common uv venv location (typically ~/.venv)
49
- const uvHomeVenv = resolve(homedir(), '.venv', 'bin', 'python3');
50
- if (existsSync(uvHomeVenv)) {
51
- return uvHomeVenv;
80
+ catch {
81
+ // not installed yet
52
82
  }
53
- return 'python3';
83
+ if (!mlxReady) {
84
+ console.log('[VLM] Installing mlx-vlm into ~/.escribano/venv (first run — this may take a few minutes)...');
85
+ // Ensure pip is available in the venv; ignore failures if ensurepip is disabled.
86
+ try {
87
+ await runVisible(ESCRIBANO_VENV_PYTHON, ['-m', 'ensurepip', '--upgrade']);
88
+ }
89
+ catch {
90
+ // ensurepip may be unavailable; continue and rely on existing pip if present.
91
+ }
92
+ await runVisible(ESCRIBANO_VENV_PYTHON, [
93
+ '-m',
94
+ 'pip',
95
+ 'install',
96
+ 'mlx-vlm',
97
+ 'torch',
98
+ 'torchvision',
99
+ ]);
100
+ console.log('[VLM] mlx-vlm installed successfully.');
101
+ }
102
+ return ESCRIBANO_VENV_PYTHON;
103
+ }
104
+ /**
105
+ * Resolve the Python executable to use for the MLX bridge.
106
+ * If the user has configured an explicit environment, use it.
107
+ * Otherwise, transparently create and populate ~/.escribano/venv.
108
+ */
109
+ export async function resolvePythonPath() {
110
+ return getPythonPath() ?? ensureEscribanoVenv();
54
111
  }
55
112
  // Global cleanup function to track the current bridge instance
56
113
  let globalCleanup = null;
@@ -124,10 +181,11 @@ export function createMlxIntelligenceService(_config = {}) {
124
181
  if (bridge.process && bridge.ready) {
125
182
  return;
126
183
  }
184
+ debugLog('Starting MLX bridge...');
185
+ // Resolve (and if needed, auto-create) the Python environment before spawning.
186
+ const pythonPath = await resolvePythonPath();
187
+ debugLog(`Using Python: ${pythonPath}`);
127
188
  return new Promise((resolve, reject) => {
128
- debugLog('Starting MLX bridge...');
129
- const pythonPath = getPythonPath();
130
- debugLog(`Using Python: ${pythonPath}`);
131
189
  bridge.process = spawn(pythonPath, [mlxConfig.bridgeScript], {
132
190
  stdio: ['ignore', 'pipe', 'pipe'],
133
191
  env: {
@@ -26,12 +26,11 @@ const vlmBatchItemSchema = z.object({
26
26
  apps: z.array(z.string()).default([]),
27
27
  topics: z.array(z.string()).default([]),
28
28
  });
29
- const vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
29
+ const _vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
30
30
  /**
31
31
  * Helper to convert Zod schema to Ollama-compatible JSON schema
32
32
  */
33
33
  function toOllamaSchema(schema) {
34
- // biome-ignore lint/suspicious/noExplicitAny: needed for Zod schema conversion
35
34
  const jsonSchema = z.toJSONSchema(schema);
36
35
  const { $schema, ...rest } = jsonSchema;
37
36
  return rest;
@@ -513,9 +512,7 @@ function extractJsonFromThinking(thinking) {
513
512
  }
514
513
  return null;
515
514
  }
516
- async function callOllama(prompt, config, options
517
- // biome-ignore lint/suspicious/noExplicitAny: Ollama returns dynamic JSON or strings
518
- ) {
515
+ async function callOllama(prompt, config, options) {
519
516
  const requestId = Math.random().toString(36).substring(2, 8);
520
517
  const requestStart = Date.now();
521
518
  // Model warm-up (errors handled gracefully, especially in tests)
@@ -24,6 +24,7 @@ import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.
24
24
  import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
25
25
  import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
26
26
  import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
27
+ import { createDefaultConfig } from './config.js';
27
28
  import { getDbPath, getRepositories } from './db/index.js';
28
29
  import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
29
30
  import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
@@ -36,6 +37,8 @@ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
36
37
  * All adapters are created ONCE and reused across recordings.
37
38
  */
38
39
  export async function initializeSystem() {
40
+ // Create default config file if it doesn't exist
41
+ createDefaultConfig();
39
42
  console.log('Initializing database...');
40
43
  const repos = getRepositories();
41
44
  console.log(`Database ready: ${getDbPath()}`);