npm - escribano - Versions diffs - 0.2.2 → 0.4.1 - Mend

escribano 0.2.2 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +45 -0
package/dist/0_types.js +0 -5
package/dist/actions/generate-artifact-v3.js +3 -3
package/dist/actions/generate-summary-v3.js +1 -1
package/dist/actions/process-recording-v2.js +25 -17
package/dist/actions/process-recording-v3.js +5 -4
package/dist/adapters/audio.silero.adapter.js +3 -3
package/dist/adapters/intelligence.mlx.adapter.js +79 -21
package/dist/adapters/intelligence.ollama.adapter.js +2 -5
package/dist/batch-context.js +3 -0
package/dist/config.js +237 -0
package/dist/domain/segment.js +1 -3
package/dist/index.js +122 -8
package/dist/prerequisites.js +8 -18
package/dist/python-utils.js +64 -0
package/dist/services/activity-segmentation.js +3 -3
package/dist/services/signal-extraction.js +1 -1
package/dist/services/subject-grouping.js +2 -2
package/dist/services/temporal-alignment.js +1 -1
package/dist/services/vlm-enrichment.js +5 -2
package/dist/stats/observer.js +1 -1
package/dist/tests/db/repositories.test.js +8 -8
package/dist/tests/index.test.js +102 -0
package/dist/tests/intelligence.mlx.adapter.test.js +222 -0
package/dist/tests/intelligence.ollama.adapter.test.js +1 -0
package/dist/tests/services/clustering.test.js +1 -0
package/dist/tests/services/frame-sampling.test.js +1 -1
package/dist/tests/visual-observer.test.js +0 -1
package/package.json +2 -1
package/scripts/create-release.mjs +55 -9
package/scripts/mlx_bridge.py +26 -2

package/README.md CHANGED Viewed

@@ -212,6 +212,7 @@ Output: `~/.escribano/artifacts/`
 | Flag | What it does |
 |------|--------------|
 | `--file <path>` | Process a video file |
+| `--latest <dir>` | Find and process latest video in directory |
 | `--mic-audio <path>` | External mic audio |
 | `--system-audio <path>` | External system audio |
 | `--format <format>` | `card`, `standup`, or `narrative` (default: card) |
@@ -222,6 +223,14 @@ Output: `~/.escribano/artifacts/`
 | `--stdout` | Print to stdout |
 | `--help` | Show all options |
+### Subcommands
+| Command | What it does |
+|---------|--------------|
+| `doctor` | Check prerequisites and system requirements |
+| `config` | Show current configuration (merged from all sources) |
+| `config --path` | Show path to config file (`~/.escribano/.env`) |
 ### Formats
 | Format | Use for | Style |
@@ -236,11 +245,18 @@ Output: `~/.escribano/artifacts/`
 # Process and copy
 npx escribano --file "~/Desktop/Screen Recording.mov" --format standup --copy
+# Find latest video in a directory
+npx escribano --latest "~/Videos"
 # Narrative format
 npx escribano --file session.mp4 --format narrative --force
 # With external audio
 npx escribano --file recording.mov --mic-audio mic.wav
+# View configuration
+npx escribano config
+npx escribano config --path
 ```
 ---
@@ -256,6 +272,35 @@ npx escribano --file recording.mov --mic-audio mic.wav
 ---
+## Configuration
+Escribano auto-creates a config file on first run that persists your settings:
+```bash
+# View current configuration
+npx escribano config
+# Show path to config file
+npx escribano config --path
+# Edit manually
+vim ~/.escribano/.env
+```
+The config file (`~/.escribano/.env`) is organized by category with inline comments:
+| Category | Examples |
+|----------|----------|
+| **Performance** | Frame width, batch size, sampling interval |
+| **Quality** | Scene detection, token budget |
+| **Models** | VLM model, LLM model, subject grouping model |
+| **Debugging** | Verbose logging, VLM/Ollama debug output |
+| **Advanced** | Socket path, timeouts, Python path |
+Environment variables always take priority over the config file. For full reference, see [AGENTS.md](AGENTS.md#configuration).
+---
 ## Architecture
 Clean architecture: domain entities, pure services, adapter interfaces for external systems (MLX-VLM, Ollama, Whisper, FFmpeg, SQLite).

package/dist/0_types.js CHANGED Viewed

@@ -267,11 +267,6 @@ export const intelligenceConfigSchema = z.object({
     mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
 });
 export const DEFAULT_INTELLIGENCE_CONFIG = intelligenceConfigSchema.parse({});
-const artifactConfigSchema = z.object({
-    parallelGeneration: z.boolean().default(false),
-    maxParallel: z.number().default(3),
-    maxScreenshots: z.number().default(10),
-});
 export const outlineConfigSchema = z.object({
     url: z.string().url(),
     token: z.string(),

package/dist/actions/generate-artifact-v3.js CHANGED Viewed

@@ -48,7 +48,7 @@ export async function generateArtifactV3(recordingId, repos, intelligence, optio
     for (const subject of subjects) {
         subject.apps = normalizeAppNames(subject.apps);
     }
-    const filteredSubjects = options.includePersonal
+    const _filteredSubjects = options.includePersonal
         ? subjects
         : subjects.filter((s) => !s.isPersonal);
     log('info', `[Artifact V3.1] Generating ${format} with LLM...`);
@@ -166,7 +166,7 @@ function generateCardTemplate(subjects, groupingResult, sessionDate, sessionDura
     }
     return content;
 }
-function generateStandupTemplate(subjects, sessionDate, sessionDuration) {
+function generateStandupTemplate(subjects, sessionDate, _sessionDuration) {
     let content = `## Standup - ${sessionDate}\n\n`;
     content += `**What I did:**\n`;
     const allActivities = [];
@@ -199,7 +199,7 @@ function generateNarrativeTemplate(subjects, sessionDate, sessionDuration) {
     }
     return content;
 }
-async function generateLlmArtifact(subjects, groupingResult, format, recording, intelligence, repos, allTopicBlocks) {
+async function generateLlmArtifact(subjects, groupingResult, format, recording, intelligence, _repos, allTopicBlocks) {
     const ARTIFACT_THINK = process.env.ESCRIBANO_ARTIFACT_THINK === 'true';
     const promptFileName = format === 'card'
         ? 'card.md'

package/dist/actions/generate-summary-v3.js CHANGED Viewed

@@ -129,7 +129,7 @@ async function generateLlmSummary(sections, recording, intelligence) {
     // Build activity timeline
     const activityTimeline = sections
         .map((section, i) => {
-        const startMin = Math.round(section.startTime / 60);
+        const _startMin = Math.round(section.startTime / 60);
         const durationMin = Math.round(section.duration / 60);
         const startTimeStr = `${Math.floor(section.startTime / 60)}:${Math.floor(section.startTime % 60)
             .toString()

package/dist/actions/process-recording-v2.js CHANGED Viewed

@@ -99,6 +99,7 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
         // VISUAL PIPELINE
         // ============================================
         if (recording.videoPath) {
+            const videoPath = recording.videoPath;
             // Step: Frame Extraction
             if (!shouldSkipStep(recording.processingStep, 'frame_extraction')) {
                 await step('frame-extraction', async () => {
@@ -106,7 +107,7 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
                     updateRecordingInDb(repos, recording);
                     const intervalSeconds = Number(process.env.ESCRIBANO_FRAME_INTERVAL) || 2;
                     const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
-                    const extractedFrames = await adapters.video.extractFramesAtInterval(recording.videoPath, 0.3, // threshold
+                    const extractedFrames = await adapters.video.extractFramesAtInterval(videoPath, 0.3, // threshold
                     framesDir);
                     log('info', `Extracted ${extractedFrames.length} frames (interval: ${intervalSeconds}s)`);
                 });
@@ -307,22 +308,29 @@ export async function processRecordingV2(recordingId, repos, adapters, options =
                     const visualClusters = repos.clusters.findByRecordingAndType(recording.id, 'visual');
                     const audioClusters = repos.clusters.findByRecordingAndType(recording.id, 'audio');
                     if (audioClusters.length > 0 && visualClusters.length > 0) {
-                        // Build cluster-with-signals for merging
-                        const visualWithSignals = visualClusters.map((c) => ({
-                            cluster: c,
-                            signals: JSON.parse(c.classification || '{}'),
-                            centroid: bufferToEmbedding(c.centroid),
-                        }));
-                        const audioWithSignals = audioClusters.map((c) => ({
-                            cluster: c,
-                            signals: JSON.parse(c.classification || '{}'),
-                            centroid: bufferToEmbedding(c.centroid),
-                        }));
-                        const merges = findClusterMerges(visualWithSignals, audioWithSignals, adapters.embedding);
-                        for (const merge of merges) {
-                            repos.clusters.saveMerge(merge.visualClusterId, merge.audioClusterId, merge.similarityScore, merge.mergeReason);
+                        const validVisualClusters = visualClusters.filter((c) => c.centroid !== null);
+                        const validAudioClusters = audioClusters.filter((c) => c.centroid !== null);
+                        if (validVisualClusters.length === 0 ||
+                            validAudioClusters.length === 0) {
+                            log('info', 'Skipping cluster merge - no clusters with embeddings');
+                        }
+                        else {
+                            const visualWithSignals = validVisualClusters.map((c) => ({
+                                cluster: c,
+                                signals: JSON.parse(c.classification || '{}'),
+                                centroid: bufferToEmbedding(c.centroid),
+                            }));
+                            const audioWithSignals = validAudioClusters.map((c) => ({
+                                cluster: c,
+                                signals: JSON.parse(c.classification || '{}'),
+                                centroid: bufferToEmbedding(c.centroid),
+                            }));
+                            const merges = findClusterMerges(visualWithSignals, audioWithSignals, adapters.embedding);
+                            for (const merge of merges) {
+                                repos.clusters.saveMerge(merge.visualClusterId, merge.audioClusterId, merge.similarityScore, merge.mergeReason);
+                            }
+                            log('info', `Created ${merges.length} audio-visual cluster merges`);
                         }
-                        log('info', `Created ${merges.length} audio-visual cluster merges`);
                     }
                     else {
                         log('info', 'No audio clusters to merge');
@@ -490,5 +498,5 @@ async function processAudioPipeline(recording, adapters, options) {
     return observations;
 }
 function updateRecordingInDb(repos, recording) {
-    repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep, recording.errorMessage);
+    repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep ?? undefined, recording.errorMessage);
 }

package/dist/actions/process-recording-v3.js CHANGED Viewed

@@ -108,8 +108,9 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
         // VISUAL PIPELINE (V3: Smart Extraction)
         // ============================================
         if (recording.videoPath) {
+            const videoPath = recording.videoPath;
             // Step 1: Get video metadata
-            const metadata = await adapters.video.getMetadata(recording.videoPath);
+            const metadata = await adapters.video.getMetadata(videoPath);
             log('info', `[V3] Video: ${Math.round(metadata.duration)}s, ${metadata.width}x${metadata.height}`);
             // Step 2: Scene Detection FIRST (no frame extraction needed)
             let sceneChanges = [];
@@ -123,7 +124,7 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
             }
             else {
                 sceneChanges = await step('scene-detection', async () => {
-                    const changes = await adapters.video.detectSceneChanges(recording.videoPath);
+                    const changes = await adapters.video.detectSceneChanges(videoPath);
                     log('info', `[V3] Detected ${changes.length} scene changes`);
                     // Save to DB for resume safety
                     if (dbRecording) {
@@ -145,7 +146,7 @@ export async function processRecordingV3(recordingId, repos, adapters, options =
             if (!shouldSkipStep(recording.processingStep, 'frame_extraction')) {
                 extractedFrames = await step('frame-extraction-batch', async () => {
                     const framesDir = path.join(os.tmpdir(), 'escribano', recording.id, 'frames');
-                    const frames = await adapters.video.extractFramesAtTimestampsBatch(recording.videoPath, requiredTimestamps, framesDir);
+                    const frames = await adapters.video.extractFramesAtTimestampsBatch(videoPath, requiredTimestamps, framesDir);
                     log('info', `[V3] Extracted ${frames.length} frames`);
                     recording = advanceStep(recording, 'frame_extraction');
                     updateRecordingInDb(repos, recording);
@@ -408,5 +409,5 @@ async function processAudioPipeline(recording, adapters) {
     return observations;
 }
 function updateRecordingInDb(repos, recording) {
-    repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep, recording.errorMessage);
+    repos.recordings.updateStatus(recording.id, recording.status, recording.processingStep ?? undefined, recording.errorMessage);
 }

package/dist/adapters/audio.silero.adapter.js CHANGED Viewed

@@ -62,7 +62,7 @@ export function createSileroPreprocessor() {
                             if (process.env.ESCRIBANO_VERBOSE === 'true' && stdout) {
                                 console.log(`  Silero VAD stdout:\n${stdout
                                     .split('\n')
-                                    .map((l) => '    ' + l)
+                                    .map((l) => `    ${l}`)
                                     .join('\n')}`);
                             }
                             resolve();
@@ -70,12 +70,12 @@ export function createSileroPreprocessor() {
                         else {
                             console.error(`  Silero VAD stderr:\n${stderr
                                 .split('\n')
-                                .map((l) => '    ' + l)
+                                .map((l) => `    ${l}`)
                                 .join('\n')}`);
                             if (stdout) {
                                 console.error(`  Silero VAD stdout:\n${stdout
                                     .split('\n')
-                                    .map((l) => '    ' + l)
+                                    .map((l) => `    ${l}`)
                                     .join('\n')}`);
                             }
                             reject(new Error(`Silero VAD failed with code ${code}: ${stderr || stdout || 'No output captured'}`));

package/dist/adapters/intelligence.mlx.adapter.js CHANGED Viewed

@@ -10,12 +10,12 @@
  * See docs/adr/006-mlx-vlm-adapter.md for full design.
  */
 import { spawn } from 'node:child_process';
-import { existsSync, unlinkSync } from 'node:fs';
+import { existsSync, mkdirSync, unlinkSync } from 'node:fs';
 import { createConnection } from 'node:net';
-import { homedir } from 'node:os';
 import { dirname, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
 const __dirname = dirname(fileURLToPath(import.meta.url));
+import { ESCRIBANO_HOME, ESCRIBANO_VENV, ESCRIBANO_VENV_PYTHON, getPythonPath, } from '../python-utils.js';
 const DEBUG_MLX = process.env.ESCRIBANO_VERBOSE === 'true';
 function debugLog(...args) {
     if (DEBUG_MLX) {
@@ -29,28 +29,85 @@ const DEFAULT_CONFIG = {
     maxTokens: Number(process.env.ESCRIBANO_VLM_MAX_TOKENS) || 2000,
     socketPath: process.env.ESCRIBANO_MLX_SOCKET_PATH ?? '/tmp/escribano-mlx.sock',
     bridgeScript: resolve(__dirname, '../../scripts/mlx_bridge.py'),
-    startupTimeout: Number(process.env.ESCRIBANO_MLX_STARTUP_TIMEOUT) || 60000,
+    startupTimeout: Number(process.env.ESCRIBANO_MLX_STARTUP_TIMEOUT) || 120000,
 };
+/** pip binary inside Escribano's managed venv. */
+const _ESCRIBANO_VENV_PIP = resolve(ESCRIBANO_VENV, 'bin', 'pip');
 /**
- * Get Python executable path.
- * Priority:
- * 1. ESCRIBANO_PYTHON_PATH env var (explicit override)
- * 2. Active virtual environment (VIRTUAL_ENV)
- * 3. System python3 (fallback)
+ * Run a command, streaming stdout/stderr directly to the terminal.
+ * Used for long-running setup tasks (venv creation, pip install) so the
+ * user can see progress in real time.
  */
-function getPythonPath() {
-    if (process.env.ESCRIBANO_PYTHON_PATH) {
-        return process.env.ESCRIBANO_PYTHON_PATH;
+function runVisible(cmd, args) {
+    return new Promise((res, rej) => {
+        const proc = spawn(cmd, args, { stdio: 'inherit' });
+        proc.on('exit', (code) => code === 0 ? res() : rej(new Error(`${cmd} exited with code ${code}`)));
+        proc.on('error', rej);
+    });
+}
+/**
+ * Run a command silently (discard output). Used for quick probe checks.
+ */
+function runSilent(cmd, args) {
+    return new Promise((res, rej) => {
+        const proc = spawn(cmd, args, { stdio: 'ignore' });
+        proc.on('exit', (code) => code === 0 ? res() : rej(new Error(`${cmd} exited with code ${code}`)));
+        proc.on('error', rej);
+    });
+}
+/**
+ * Ensure ~/.escribano/venv exists and has mlx-vlm installed.
+ * Uses plain `python3 -m venv` — no uv, no pip flags, no fuss.
+ * On first run this takes a few minutes; subsequent runs are instant.
+ */
+async function ensureEscribanoVenv() {
+    if (!existsSync(ESCRIBANO_HOME)) {
+        mkdirSync(ESCRIBANO_HOME, { recursive: true });
+    }
+    if (!existsSync(ESCRIBANO_VENV_PYTHON)) {
+        console.log('[VLM] First-time setup: creating Python environment at ~/.escribano/venv');
+        await runVisible('python3', ['-m', 'venv', ESCRIBANO_VENV]);
     }
-    if (process.env.VIRTUAL_ENV) {
-        return resolve(process.env.VIRTUAL_ENV, 'bin', 'python3');
+    // Check whether mlx-vlm and required runtime deps are already importable (~0.3s probe)
+    let mlxReady = false;
+    try {
+        await runSilent(ESCRIBANO_VENV_PYTHON, [
+            '-c',
+            'import mlx_vlm; import torch; import torchvision',
+        ]);
+        mlxReady = true;
     }
-    // Check common uv venv location (typically ~/.venv)
-    const uvHomeVenv = resolve(homedir(), '.venv', 'bin', 'python3');
-    if (existsSync(uvHomeVenv)) {
-        return uvHomeVenv;
+    catch {
+        // not installed yet
     }
-    return 'python3';
+    if (!mlxReady) {
+        console.log('[VLM] Installing mlx-vlm into ~/.escribano/venv (first run — this may take a few minutes)...');
+        // Ensure pip is available in the venv; ignore failures if ensurepip is disabled.
+        try {
+            await runVisible(ESCRIBANO_VENV_PYTHON, ['-m', 'ensurepip', '--upgrade']);
+        }
+        catch {
+            // ensurepip may be unavailable; continue and rely on existing pip if present.
+        }
+        await runVisible(ESCRIBANO_VENV_PYTHON, [
+            '-m',
+            'pip',
+            'install',
+            'mlx-vlm',
+            'torch',
+            'torchvision',
+        ]);
+        console.log('[VLM] mlx-vlm installed successfully.');
+    }
+    return ESCRIBANO_VENV_PYTHON;
+}
+/**
+ * Resolve the Python executable to use for the MLX bridge.
+ * If the user has configured an explicit environment, use it.
+ * Otherwise, transparently create and populate ~/.escribano/venv.
+ */
+export async function resolvePythonPath() {
+    return getPythonPath() ?? ensureEscribanoVenv();
 }
 // Global cleanup function to track the current bridge instance
 let globalCleanup = null;
@@ -124,10 +181,11 @@ export function createMlxIntelligenceService(_config = {}) {
         if (bridge.process && bridge.ready) {
             return;
         }
+        debugLog('Starting MLX bridge...');
+        // Resolve (and if needed, auto-create) the Python environment before spawning.
+        const pythonPath = await resolvePythonPath();
+        debugLog(`Using Python: ${pythonPath}`);
         return new Promise((resolve, reject) => {
-            debugLog('Starting MLX bridge...');
-            const pythonPath = getPythonPath();
-            debugLog(`Using Python: ${pythonPath}`);
             bridge.process = spawn(pythonPath, [mlxConfig.bridgeScript], {
                 stdio: ['ignore', 'pipe', 'pipe'],
                 env: {

package/dist/adapters/intelligence.ollama.adapter.js CHANGED Viewed

@@ -26,12 +26,11 @@ const vlmBatchItemSchema = z.object({
     apps: z.array(z.string()).default([]),
     topics: z.array(z.string()).default([]),
 });
-const vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
+const _vlmBatchResponseSchema = z.array(vlmBatchItemSchema);
 /**
  * Helper to convert Zod schema to Ollama-compatible JSON schema
  */
 function toOllamaSchema(schema) {
-    // biome-ignore lint/suspicious/noExplicitAny: needed for Zod schema conversion
     const jsonSchema = z.toJSONSchema(schema);
     const { $schema, ...rest } = jsonSchema;
     return rest;
@@ -513,9 +512,7 @@ function extractJsonFromThinking(thinking) {
     }
     return null;
 }
-async function callOllama(prompt, config, options
-// biome-ignore lint/suspicious/noExplicitAny: Ollama returns dynamic JSON or strings
-) {
+async function callOllama(prompt, config, options) {
     const requestId = Math.random().toString(36).substring(2, 8);
     const requestStart = Date.now();
     // Model warm-up (errors handled gracefully, especially in tests)

package/dist/batch-context.js CHANGED Viewed

@@ -24,6 +24,7 @@ import { createOllamaIntelligenceService } from './adapters/intelligence.ollama.
 import { createOutlinePublishingService } from './adapters/publishing.outline.adapter.js';
 import { createWhisperTranscriptionService } from './adapters/transcription.whisper.adapter.js';
 import { createFfmpegVideoService } from './adapters/video.ffmpeg.adapter.js';
+import { createDefaultConfig } from './config.js';
 import { getDbPath, getRepositories } from './db/index.js';
 import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
 import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
@@ -36,6 +37,8 @@ const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
  * All adapters are created ONCE and reused across recordings.
  */
 export async function initializeSystem() {
+    // Create default config file if it doesn't exist
+    createDefaultConfig();
     console.log('Initializing database...');
     const repos = getRepositories();
     console.log(`Database ready: ${getDbPath()}`);