escribano 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -95,19 +95,49 @@ Good for retrospectives or blog drafts.
95
95
 
96
96
  ## Benchmarks
97
97
 
98
- Ran the full pipeline on 11 real screen recordings:
98
+ ### Architecture Benefits (MLX Migration)
99
+
100
+ | Improvement | Impact |
101
+ |-------------|--------|
102
+ | **Zero dependencies** | No external daemons required |
103
+ | **Unified backend** | VLM + LLM use same MLX infrastructure |
104
+ | **Native Metal** | Optimized for Apple Silicon |
105
+ | **Memory efficient** | Sequential model loading (no OOM) |
106
+ | **Auto-detection** | RAM-based model selection |
107
+
108
+ ### Production Run (March 2026)
109
+
110
+ Processed **17 real screen recordings** with MLX backend:
99
111
 
100
112
  | Metric | Result |
101
113
  |--------|--------|
102
- | Videos processed | 11 |
103
- | Artifacts generated | 33 (3 formats × 11 videos) |
104
- | Success rate | 100% |
105
- | Total time | 1h 41m |
106
- | Avg per video | **~9 min** (pipeline + all 3 formats) |
114
+ | Videos processed | 17 |
115
+ | Successful | 15 (88%) |
116
+ | Total video duration | 25.6 hours |
117
+ | Artifacts generated | 45 (3 formats × 15 videos) |
118
+ | **LLM generation** | **~2.2 min per video** |
119
+ | Subject grouping | 78.7s avg |
120
+ | Artifact generation | 53.6s avg |
121
+ | LLM success rate | 100% (92 calls) |
107
122
  | Hardware | MacBook Pro M4 Max, 128GB |
123
+ | Backend | MLX (Qwen3-VL-2B + Qwen3.5-27B) |
108
124
 
109
125
  Everything runs locally. No API keys. Nothing leaves your machine.
110
126
 
127
+ ### Hardware Tiers (March 2026)
128
+
129
+ Performance varies by hardware:
130
+
131
+ | Hardware | RAM | VLM Speed | LLM Model | LLM Speed | Total (1min video) |
132
+ |----------|-----|-----------|-----------|-----------|-------------------|
133
+ | **M4 Max** | 128GB | 0.7s/frame | Qwen3.5-27B | 53s avg | **~2.2 min** |
134
+ | **M1/M2/M3 Pro** | 16-32GB | 1.5-3s/frame | Qwen3.5-9B | 80-120s | ~5-8 min |
135
+ | **M1/M2 Air** | 16GB | 7-9s/frame | Qwen3.5-9B | 150-250s | ~12-15 min |
136
+
137
+ **Minimum viable**: 16GB unified memory (slower but functional)
138
+
139
+ **Recommended**: 32GB+ for comfortable use, 64GB+ for best quality
140
+
111
141
  ---
112
142
 
113
143
  ## Why this exists
@@ -141,7 +171,7 @@ Screen recording
141
171
  Activity segmentation → temporal audio alignment → TopicBlocks
142
172
 
143
173
 
144
- LLM summary (Ollama, auto-detected) → Markdown artifact
174
+ LLM summary (MLX-LM, auto-detected) → Markdown artifact
145
175
  ```
146
176
 
147
177
  Uses VLM-first visual understanding, not OCR + text clustering. OCR fails for developer work because all code screens produce similar tokens. VLMs understand the *activity*, not just the text.
@@ -154,32 +184,22 @@ Uses VLM-first visual understanding, not OCR + text clustering. OCR fails for de
154
184
 
155
185
  ```bash
156
186
  # macOS (Homebrew)
157
- brew install ollama whisper-cpp ffmpeg
187
+ brew install whisper-cpp ffmpeg
158
188
 
159
- # MLX-VLM for frame analysis (Apple Silicon)
160
- # Using uv (recommended, faster)
161
- uv pip install mlx-vlm
162
-
163
- # Or using pip
164
- pip install mlx-vlm
189
+ # MLX for inference (Apple Silicon) - auto-installed on first run
190
+ # Or pre-install with:
191
+ pip install mlx-vlm mlx-lm
165
192
  ```
166
193
 
167
- ### LLM Model Setup
194
+ That's it. No external daemons required. MLX-VLM and MLX-LM run in-process.
168
195
 
169
- Escribano auto-detects the best model for your hardware:
196
+ ### (Optional) Ollama Backend
170
197
 
171
- | Your RAM | Auto-selected | Install command |
172
- |----------|---------------|-----------------|
173
- | 16GB | `qwen3:8b` | `ollama pull qwen3:8b` |
174
- | 32GB | `qwen3:14b` | `ollama pull qwen3:14b` |
175
- | 64GB+ | `qwen3.5:27b` | `ollama pull qwen3.5:27b` |
198
+ If you prefer Ollama, set `ESCRIBANO_LLM_BACKEND=ollama`:
176
199
 
177
200
  ```bash
178
- # Minimum (16GB)
179
- ollama pull qwen3:8b
180
-
181
- # Or best quality (64GB+)
182
- ollama pull qwen3.5:27b
201
+ brew install ollama
202
+ ollama pull qwen3:8b # or qwen3.5:27b for 64GB+ RAM
183
203
  ```
184
204
 
185
205
  ### Run
package/dist/0_types.js CHANGED
@@ -262,7 +262,7 @@ export const intelligenceConfigSchema = z.object({
262
262
  similarityThreshold: 0.75,
263
263
  }),
264
264
  // MLX-VLM specific config
265
- vlmBatchSize: z.number().default(4),
265
+ vlmBatchSize: z.number().default(2),
266
266
  vlmMaxTokens: z.number().default(2000),
267
267
  mlxSocketPath: z.string().default('/tmp/escribano-mlx.sock'),
268
268
  });
@@ -228,9 +228,11 @@ async function generateLlmArtifact(subjects, groupingResult, format, recording,
228
228
  .replace('{{SUBJECT_COUNT}}', String(subjects.length))
229
229
  .replace('{{SUBJECTS_DATA}}', subjectsData)
230
230
  .replace('{{WORK_SUBJECTS}}', subjectsData);
231
- return intelligence.generateText(prompt, {
232
- expectJson: false,
233
- think: ARTIFACT_THINK,
231
+ return step('llm_artifact_generation', async () => {
232
+ return intelligence.generateText(prompt, {
233
+ expectJson: false,
234
+ think: ARTIFACT_THINK,
235
+ });
234
236
  });
235
237
  }
236
238
  function buildSubjectsDataForPrompt(subjects, allTopicBlocks) {
@@ -8,7 +8,7 @@ import { mkdir, readFile, writeFile } from 'node:fs/promises';
8
8
  import { homedir } from 'node:os';
9
9
  import path, { dirname, resolve } from 'node:path';
10
10
  import { fileURLToPath } from 'node:url';
11
- import { log } from '../pipeline/context.js';
11
+ import { log, step } from '../pipeline/context.js';
12
12
  import { groupTopicBlocksIntoSubjects, saveSubjectsToDatabase, } from '../services/subject-grouping.js';
13
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
14
  /**
@@ -33,14 +33,28 @@ export async function generateSummaryV3(recordingId, repos, intelligence, option
33
33
  throw new Error(`No TopicBlocks found for recording ${recordingId}. Run process-v3 first.`);
34
34
  }
35
35
  log('info', `[Summary V3] Found ${allTopicBlocks.length} TopicBlocks`);
36
- // Group TopicBlocks into subjects
37
- log('info', '[Summary V3] Grouping TopicBlocks into subjects...');
38
- const groupingResult = await groupTopicBlocksIntoSubjects(allTopicBlocks, intelligence, recordingId);
39
- const { subjects } = groupingResult;
40
- const { personalDuration, workDuration } = groupingResult;
41
- // Save subjects to database
42
- log('info', `[Summary V3] Saving ${subjects.length} subjects to database...`);
43
- saveSubjectsToDatabase(subjects, recordingId, repos);
36
+ // Check if subjects already exist for this recording
37
+ const existingSubjects = repos.subjects.findByRecording(recordingId);
38
+ let subjects;
39
+ let personalDuration;
40
+ let workDuration;
41
+ if (existingSubjects.length > 0) {
42
+ log('info', `[Summary V3] Reusing ${existingSubjects.length} existing subjects (no re-grouping needed)`);
43
+ const loaded = loadExistingSubjects(existingSubjects, repos);
44
+ subjects = loaded.subjects;
45
+ personalDuration = loaded.personalDuration;
46
+ workDuration = loaded.workDuration;
47
+ }
48
+ else {
49
+ // Group TopicBlocks into subjects
50
+ log('info', '[Summary V3] Grouping TopicBlocks into subjects...');
51
+ const groupingResult = await groupTopicBlocksIntoSubjects(allTopicBlocks, intelligence, recordingId);
52
+ log('info', `[Summary V3] Saving ${groupingResult.subjects.length} subjects to database...`);
53
+ saveSubjectsToDatabase(groupingResult.subjects, recordingId, repos);
54
+ subjects = groupingResult.subjects;
55
+ personalDuration = groupingResult.personalDuration;
56
+ workDuration = groupingResult.workDuration;
57
+ }
44
58
  // Filter TopicBlocks based on personal/work classification
45
59
  let topicBlocksToUse = allTopicBlocks;
46
60
  if (!options.includePersonal) {
@@ -48,7 +62,8 @@ export async function generateSummaryV3(recordingId, repos, intelligence, option
48
62
  const personalSubjectIds = new Set(subjects.filter((s) => s.isPersonal).map((s) => s.id));
49
63
  topicBlocksToUse = allTopicBlocks.filter((block) => {
50
64
  const subjectForBlock = subjects.find((s) => s.topicBlockIds.includes(block.id));
51
- return !subjectForBlock?.isPersonal;
65
+ // Use the collected personalSubjectIds set for filtering
66
+ return !personalSubjectIds.has(subjectForBlock?.id ?? '');
52
67
  });
53
68
  }
54
69
  // Build sections from TopicBlocks
@@ -210,10 +225,35 @@ ${section.transcript ? `**Audio Transcript:**\n${section.transcript}` : '*No aud
210
225
  .replace('{{APPS_LIST}}', appsList)
211
226
  .replace('{{URLS_LIST}}', urlsList);
212
227
  // Call LLM
213
- const result = await intelligence.generateText(prompt, {
214
- expectJson: false,
228
+ const result = await step('llm_artifact_generation', async () => {
229
+ return intelligence.generateText(prompt, {
230
+ expectJson: false,
231
+ debugContext: {
232
+ recordingId: recording.id,
233
+ callType: 'artifact_generation',
234
+ },
235
+ });
215
236
  });
216
- return result;
237
+ // Strip thinking leakage if present
238
+ let cleaned = result.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
239
+ if (cleaned.includes('</think>')) {
240
+ // Handle orphan </think> tag (Qwen3.5 behavior)
241
+ cleaned = cleaned.split('</think>')[1].trim();
242
+ }
243
+ // Strip "Thinking Process:" prose (Qwen3.5-OptiQ format)
244
+ const tpMatch = cleaned.match(/(?:^|\n)Thinking Process:/);
245
+ if (tpMatch !== null) {
246
+ const after = cleaned.slice((tpMatch.index ?? 0) + tpMatch[0].length);
247
+ const heading = after.match(/\n(#\s|\*\*)/);
248
+ cleaned =
249
+ heading?.index !== undefined ? after.slice(heading.index).trim() : '';
250
+ }
251
+ // If cleaning leaves nothing usable, fall back to template
252
+ if (cleaned.length > 50) {
253
+ return cleaned;
254
+ }
255
+ console.warn('[artifact-generation] Thinking leakage detected or response too short — falling back to template');
256
+ return formatSummary(sections, recording.duration, recording.id);
217
257
  }
218
258
  /**
219
259
  * Format sections into a readable markdown summary (template fallback).
@@ -312,3 +352,31 @@ ${section.transcript}
312
352
  `;
313
353
  return summary;
314
354
  }
355
+ function loadExistingSubjects(existingSubjects, repos) {
356
+ const subjects = [];
357
+ for (const dbSubject of existingSubjects) {
358
+ const topicBlocks = repos.subjects.getTopicBlocks(dbSubject.id);
359
+ const activityBreakdown = dbSubject.activity_breakdown
360
+ ? JSON.parse(dbSubject.activity_breakdown)
361
+ : {};
362
+ const metadata = dbSubject.metadata ? JSON.parse(dbSubject.metadata) : {};
363
+ const apps = metadata.apps || [];
364
+ subjects.push({
365
+ id: dbSubject.id,
366
+ recordingId: topicBlocks[0]?.recording_id || '',
367
+ label: dbSubject.label,
368
+ topicBlockIds: topicBlocks.map((b) => b.id),
369
+ totalDuration: dbSubject.duration,
370
+ activityBreakdown,
371
+ apps,
372
+ isPersonal: dbSubject.is_personal === 1,
373
+ });
374
+ }
375
+ const personalDuration = subjects
376
+ .filter((s) => s.isPersonal)
377
+ .reduce((sum, s) => sum + s.totalDuration, 0);
378
+ const workDuration = subjects
379
+ .filter((s) => !s.isPersonal)
380
+ .reduce((sum, s) => sum + s.totalDuration, 0);
381
+ return { subjects, personalDuration, workDuration };
382
+ }