npm - escribano - Versions diffs - 0.4.4 → 0.5.0 - Mend

escribano 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +46 -26
package/dist/0_types.js +1 -1
package/dist/actions/generate-artifact-v3.js +5 -3
package/dist/actions/generate-summary-v3.js +81 -13
package/dist/adapters/intelligence.mlx.adapter.js +271 -197
package/dist/adapters/intelligence.ollama.adapter.js +37 -0
package/dist/batch-context.js +119 -33
package/dist/config.js +168 -62
package/dist/db/repositories/subject.sqlite.js +1 -1
package/dist/python-utils.js +28 -10
package/dist/services/subject-grouping.js +36 -9
package/dist/tests/index.test.js +25 -12
package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
package/dist/tests/utils/env-logger.test.js +6 -6
package/dist/utils/model-detector.js +105 -2
package/migrations/010_llm_backend_metadata.sql +25 -0
package/migrations/011_llm_debug_log.sql +19 -0
package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
package/package.json +1 -1
package/scripts/mlx_bridge.py +578 -78

package/README.md CHANGED Viewed

@@ -95,19 +95,49 @@ Good for retrospectives or blog drafts.
 ## Benchmarks
-Ran the full pipeline on 11 real screen recordings:
+### Architecture Benefits (MLX Migration)
+| Improvement | Impact |
+|-------------|--------|
+| **Zero dependencies** | No external daemons required |
+| **Unified backend** | VLM + LLM use same MLX infrastructure |
+| **Native Metal** | Optimized for Apple Silicon |
+| **Memory efficient** | Sequential model loading (no OOM) |
+| **Auto-detection** | RAM-based model selection |
+### Production Run (March 2026)
+Processed **17 real screen recordings** with MLX backend:
 | Metric | Result |
 |--------|--------|
-| Videos processed | 11 |
-| Artifacts generated | 33 (3 formats × 11 videos) |
-| Success rate | 100% |
-| Total time | 1h 41m |
-| Avg per video | **~9 min** (pipeline + all 3 formats) |
+| Videos processed | 17 |
+| Successful | 15 (88%) |
+| Total video duration | 25.6 hours |
+| Artifacts generated | 45 (3 formats × 15 videos) |
+| **LLM generation** | **~2.2 min per video** |
+| Subject grouping | 78.7s avg |
+| Artifact generation | 53.6s avg |
+| LLM success rate | 100% (92 calls) |
 | Hardware | MacBook Pro M4 Max, 128GB |
+| Backend | MLX (Qwen3-VL-2B + Qwen3.5-27B) |
 Everything runs locally. No API keys. Nothing leaves your machine.
+### Hardware Tiers (March 2026)
+Performance varies by hardware:
+| Hardware | RAM | VLM Speed | LLM Model | LLM Speed | Total (1min video) |
+|----------|-----|-----------|-----------|-----------|-------------------|
+| **M4 Max** | 128GB | 0.7s/frame | Qwen3.5-27B | 53s avg | **~2.2 min** |
+| **M1/M2/M3 Pro** | 16-32GB | 1.5-3s/frame | Qwen3.5-9B | 80-120s | ~5-8 min |
+| **M1/M2 Air** | 16GB | 7-9s/frame | Qwen3.5-9B | 150-250s | ~12-15 min |
+**Minimum viable**: 16GB unified memory (slower but functional)
+**Recommended**: 32GB+ for comfortable use, 64GB+ for best quality
 ---
 ## Why this exists
@@ -141,7 +171,7 @@ Screen recording
 Activity segmentation → temporal audio alignment → TopicBlocks
      │
      ▼
-LLM summary (Ollama, auto-detected) → Markdown artifact
+LLM summary (MLX-LM, auto-detected) → Markdown artifact
 ```
 Uses VLM-first visual understanding, not OCR + text clustering. OCR fails for developer work because all code screens produce similar tokens. VLMs understand the *activity*, not just the text.
@@ -154,32 +184,22 @@ Uses VLM-first visual understanding, not OCR + text clustering. OCR fails for de
 ```bash
 # macOS (Homebrew)
-brew install ollama whisper-cpp ffmpeg
+brew install whisper-cpp ffmpeg
-# MLX-VLM for frame analysis (Apple Silicon)
-# Using uv (recommended, faster)
-uv pip install mlx-vlm
-# Or using pip
-pip install mlx-vlm
+# MLX for inference (Apple Silicon) - auto-installed on first run
+# Or pre-install with:
+pip install mlx-vlm mlx-lm
 ```
-### LLM Model Setup
+That's it. No external daemons required. MLX-VLM and MLX-LM run in-process.
-Escribano auto-detects the best model for your hardware:
+### (Optional) Ollama Backend
-| Your RAM | Auto-selected | Install command |
-|----------|---------------|-----------------|
-| 16GB | `qwen3:8b` | `ollama pull qwen3:8b` |
-| 32GB | `qwen3:14b` | `ollama pull qwen3:14b` |
-| 64GB+ | `qwen3.5:27b` | `ollama pull qwen3.5:27b` |
+If you prefer Ollama, set `ESCRIBANO_LLM_BACKEND=ollama`:
 ```bash
-# Minimum (16GB)
-ollama pull qwen3:8b
-# Or best quality (64GB+)
-ollama pull qwen3.5:27b
+brew install ollama
+ollama pull qwen3:8b  # or qwen3.5:27b for 64GB+ RAM
 ```
 ### Run

package/dist/0_types.js CHANGED Viewed

@@ -262,7 +262,7 @@ export const intelligenceConfigSchema = z.object({
         similarityThreshold: 0.75,
     }),
     // MLX-VLM specific config
-    vlmBatchSize: z.number().default(4),
+    vlmBatchSize: z.number().default(2),
     vlmMaxTokens: z.number().default(2000),
     mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
 });

package/dist/actions/generate-artifact-v3.js CHANGED Viewed

@@ -228,9 +228,11 @@ async function generateLlmArtifact(subjects, groupingResult, format, recording,
         .replace('{{SUBJECT_COUNT}}', String(subjects.length))
         .replace('{{SUBJECTS_DATA}}', subjectsData)
         .replace('{{WORK_SUBJECTS}}', subjectsData);
-    return intelligence.generateText(prompt, {
-        expectJson: false,
-        think: ARTIFACT_THINK,
+    return step('llm_artifact_generation', async () => {
+        return intelligence.generateText(prompt, {
+            expectJson: false,
+            think: ARTIFACT_THINK,
+        });
     });
 }
 function buildSubjectsDataForPrompt(subjects, allTopicBlocks) {

package/dist/actions/generate-summary-v3.js CHANGED Viewed

@@ -8,7 +8,7 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
 import { homedir } from 'node:os';
 import path, { dirname, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { log } from '../pipeline/context.js';
+import { log, step } from '../pipeline/context.js';
 import { groupTopicBlocksIntoSubjects, saveSubjectsToDatabase, } from '../services/subject-grouping.js';
 const __dirname = dirname(fileURLToPath(import.meta.url));
 /**
@@ -33,14 +33,28 @@ export async function generateSummaryV3(recordingId, repos, intelligence, option
         throw new Error(`No TopicBlocks found for recording ${recordingId}. Run process-v3 first.`);
     }
     log('info', `[Summary V3] Found ${allTopicBlocks.length} TopicBlocks`);
-    // Group TopicBlocks into subjects
-    log('info', '[Summary V3] Grouping TopicBlocks into subjects...');
-    const groupingResult = await groupTopicBlocksIntoSubjects(allTopicBlocks, intelligence, recordingId);
-    const { subjects } = groupingResult;
-    const { personalDuration, workDuration } = groupingResult;
-    // Save subjects to database
-    log('info', `[Summary V3] Saving ${subjects.length} subjects to database...`);
-    saveSubjectsToDatabase(subjects, recordingId, repos);
+    // Check if subjects already exist for this recording
+    const existingSubjects = repos.subjects.findByRecording(recordingId);
+    let subjects;
+    let personalDuration;
+    let workDuration;
+    if (existingSubjects.length > 0) {
+        log('info', `[Summary V3] Reusing ${existingSubjects.length} existing subjects (no re-grouping needed)`);
+        const loaded = loadExistingSubjects(existingSubjects, repos);
+        subjects = loaded.subjects;
+        personalDuration = loaded.personalDuration;
+        workDuration = loaded.workDuration;
+    }
+    else {
+        // Group TopicBlocks into subjects
+        log('info', '[Summary V3] Grouping TopicBlocks into subjects...');
+        const groupingResult = await groupTopicBlocksIntoSubjects(allTopicBlocks, intelligence, recordingId);
+        log('info', `[Summary V3] Saving ${groupingResult.subjects.length} subjects to database...`);
+        saveSubjectsToDatabase(groupingResult.subjects, recordingId, repos);
+        subjects = groupingResult.subjects;
+        personalDuration = groupingResult.personalDuration;
+        workDuration = groupingResult.workDuration;
+    }
     // Filter TopicBlocks based on personal/work classification
     let topicBlocksToUse = allTopicBlocks;
     if (!options.includePersonal) {
@@ -48,7 +62,8 @@ export async function generateSummaryV3(recordingId, repos, intelligence, option
         const personalSubjectIds = new Set(subjects.filter((s) => s.isPersonal).map((s) => s.id));
         topicBlocksToUse = allTopicBlocks.filter((block) => {
             const subjectForBlock = subjects.find((s) => s.topicBlockIds.includes(block.id));
-            return !subjectForBlock?.isPersonal;
+            // Use the collected personalSubjectIds set for filtering
+            return !personalSubjectIds.has(subjectForBlock?.id ?? '');
         });
     }
     // Build sections from TopicBlocks
@@ -210,10 +225,35 @@ ${section.transcript ? `**Audio Transcript:**\n${section.transcript}` : '*No aud
         .replace('{{APPS_LIST}}', appsList)
         .replace('{{URLS_LIST}}', urlsList);
     // Call LLM
-    const result = await intelligence.generateText(prompt, {
-        expectJson: false,
+    const result = await step('llm_artifact_generation', async () => {
+        return intelligence.generateText(prompt, {
+            expectJson: false,
+            debugContext: {
+                recordingId: recording.id,
+                callType: 'artifact_generation',
+            },
+        });
     });
-    return result;
+    // Strip thinking leakage if present
+    let cleaned = result.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+    if (cleaned.includes('</think>')) {
+        // Handle orphan </think> tag (Qwen3.5 behavior)
+        cleaned = cleaned.split('</think>')[1].trim();
+    }
+    // Strip "Thinking Process:" prose (Qwen3.5-OptiQ format)
+    const tpMatch = cleaned.match(/(?:^|\n)Thinking Process:/);
+    if (tpMatch !== null) {
+        const after = cleaned.slice((tpMatch.index ?? 0) + tpMatch[0].length);
+        const heading = after.match(/\n(#\s|\*\*)/);
+        cleaned =
+            heading?.index !== undefined ? after.slice(heading.index).trim() : '';
+    }
+    // If cleaning leaves nothing usable, fall back to template
+    if (cleaned.length > 50) {
+        return cleaned;
+    }
+    console.warn('[artifact-generation] Thinking leakage detected or response too short — falling back to template');
+    return formatSummary(sections, recording.duration, recording.id);
 }
 /**
  * Format sections into a readable markdown summary (template fallback).
@@ -312,3 +352,31 @@ ${section.transcript}
 `;
     return summary;
 }
+function loadExistingSubjects(existingSubjects, repos) {
+    const subjects = [];
+    for (const dbSubject of existingSubjects) {
+        const topicBlocks = repos.subjects.getTopicBlocks(dbSubject.id);
+        const activityBreakdown = dbSubject.activity_breakdown
+            ? JSON.parse(dbSubject.activity_breakdown)
+            : {};
+        const metadata = dbSubject.metadata ? JSON.parse(dbSubject.metadata) : {};
+        const apps = metadata.apps || [];
+        subjects.push({
+            id: dbSubject.id,
+            recordingId: topicBlocks[0]?.recording_id || '',
+            label: dbSubject.label,
+            topicBlockIds: topicBlocks.map((b) => b.id),
+            totalDuration: dbSubject.duration,
+            activityBreakdown,
+            apps,
+            isPersonal: dbSubject.is_personal === 1,
+        });
+    }
+    const personalDuration = subjects
+        .filter((s) => s.isPersonal)
+        .reduce((sum, s) => sum + s.totalDuration, 0);
+    const workDuration = subjects
+        .filter((s) => !s.isPersonal)
+        .reduce((sum, s) => sum + s.totalDuration, 0);
+    return { subjects, personalDuration, workDuration };
+}