escribano 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -26
- package/dist/actions/generate-artifact-v3.js +5 -3
- package/dist/actions/generate-summary-v3.js +29 -4
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +258 -185
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +91 -34
- package/dist/config.js +12 -1
- package/dist/db/repositories/subject.sqlite.js +1 -1
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/python-utils.js +28 -10
- package/dist/services/subject-grouping.js +36 -9
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
- package/dist/utils/model-detector.js +105 -2
- package/migrations/010_llm_backend_metadata.sql +25 -0
- package/migrations/011_llm_debug_log.sql +19 -0
- package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
- package/package.json +1 -1
- package/scripts/mlx_bridge.py +574 -74
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Storage Adapter
|
|
3
|
+
*
|
|
4
|
+
* Saves and loads sessions from filesystem
|
|
5
|
+
*/
|
|
6
|
+
import { mkdir, readdir, readFile, writeFile } from 'node:fs/promises';
|
|
7
|
+
import os from 'node:os';
|
|
8
|
+
import { join } from 'node:path';
|
|
9
|
+
const SESSIONS_DIR = join(os.homedir(), '.escribano', 'sessions');
|
|
10
|
+
export function createStorageService() {
|
|
11
|
+
return {
|
|
12
|
+
saveSession,
|
|
13
|
+
loadSession,
|
|
14
|
+
listSessions,
|
|
15
|
+
saveArtifact,
|
|
16
|
+
loadArtifacts,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async function ensureSessionsDir() {
|
|
20
|
+
await mkdir(SESSIONS_DIR, { recursive: true });
|
|
21
|
+
}
|
|
22
|
+
async function saveSession(session) {
|
|
23
|
+
await ensureSessionsDir();
|
|
24
|
+
const sessionPath = join(SESSIONS_DIR, `${session.id}.json`);
|
|
25
|
+
await writeFile(sessionPath, JSON.stringify(session, null, 2), 'utf-8');
|
|
26
|
+
}
|
|
27
|
+
async function loadSession(sessionId) {
|
|
28
|
+
await ensureSessionsDir();
|
|
29
|
+
const sessionPath = join(SESSIONS_DIR, `${sessionId}.json`);
|
|
30
|
+
try {
|
|
31
|
+
const content = await readFile(sessionPath, 'utf-8');
|
|
32
|
+
return JSON.parse(content);
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
async function listSessions() {
|
|
39
|
+
await ensureSessionsDir();
|
|
40
|
+
const files = await readdir(SESSIONS_DIR);
|
|
41
|
+
const jsonFiles = files.filter((file) => file.endsWith('.json'));
|
|
42
|
+
const sessions = [];
|
|
43
|
+
for (const file of jsonFiles) {
|
|
44
|
+
const content = await readFile(join(SESSIONS_DIR, file), 'utf-8');
|
|
45
|
+
sessions.push(JSON.parse(content));
|
|
46
|
+
}
|
|
47
|
+
return sessions;
|
|
48
|
+
}
|
|
49
|
+
async function saveArtifact(sessionId, artifact) {
|
|
50
|
+
const artifactsDir = join(SESSIONS_DIR, sessionId, 'artifacts');
|
|
51
|
+
await mkdir(artifactsDir, { recursive: true });
|
|
52
|
+
const timestamp = new Date().toISOString().replace(/:/g, '-').split('.')[0];
|
|
53
|
+
const filename = `${artifact.type}-${timestamp}.${artifact.format}`;
|
|
54
|
+
const artifactPath = join(artifactsDir, filename);
|
|
55
|
+
await writeFile(artifactPath, artifact.content, 'utf-8');
|
|
56
|
+
}
|
|
57
|
+
async function loadArtifacts(sessionId) {
|
|
58
|
+
const artifactsDir = join(SESSIONS_DIR, sessionId, 'artifacts');
|
|
59
|
+
try {
|
|
60
|
+
const files = await readdir(artifactsDir);
|
|
61
|
+
const artifacts = [];
|
|
62
|
+
for (const file of files) {
|
|
63
|
+
const content = await readFile(join(artifactsDir, file), 'utf-8');
|
|
64
|
+
const match = file.match(/^(\w+)-(.+)\.md$/);
|
|
65
|
+
if (!match)
|
|
66
|
+
continue;
|
|
67
|
+
const [, type] = match;
|
|
68
|
+
artifacts.push({
|
|
69
|
+
id: `${sessionId}-${file.replace('.md', '')}`,
|
|
70
|
+
type: type,
|
|
71
|
+
content,
|
|
72
|
+
format: 'markdown',
|
|
73
|
+
createdAt: new Date(),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
return artifacts;
|
|
77
|
+
}
|
|
78
|
+
catch {
|
|
79
|
+
return [];
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Whisper Adapter
|
|
3
|
+
*
|
|
4
|
+
* Transcribes audio using whisper.cpp or OpenAI's whisper CLI.
|
|
5
|
+
* Shells out to the whisper binary for simplicity.
|
|
6
|
+
*
|
|
7
|
+
* Prerequisites:
|
|
8
|
+
* - whisper.cpp installed: brew install whisper-cpp
|
|
9
|
+
* - ffmpeg installed: brew install ffmpeg (for audio format conversion)
|
|
10
|
+
* - Or Python whisper: pip install openai-whisper
|
|
11
|
+
*/
|
|
12
|
+
import { exec } from 'node:child_process';
|
|
13
|
+
import { readFile, unlink } from 'node:fs/promises';
|
|
14
|
+
import { promisify } from 'node:util';
|
|
15
|
+
const execAsync = promisify(exec);
|
|
16
|
+
async function convertToWavIfNeeded(audioPath) {
|
|
17
|
+
const ext = audioPath.toLowerCase().split('.').pop();
|
|
18
|
+
if (['wav', 'flac', 'mp3'].includes(ext || '')) {
|
|
19
|
+
return audioPath;
|
|
20
|
+
}
|
|
21
|
+
const outputPath = `${audioPath}.converted.wav`;
|
|
22
|
+
try {
|
|
23
|
+
console.log(`Converting ${audioPath} to WAV format...`);
|
|
24
|
+
await execAsync(`ffmpeg -i "${audioPath}" -f wav -ar 16000 -ac 1 "${outputPath}" -y`, { timeout: 10 * 60 * 1000 });
|
|
25
|
+
console.log(`Conversion complete: ${outputPath}`);
|
|
26
|
+
return outputPath;
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
console.error(`Audio conversion failed for ${audioPath}`);
|
|
30
|
+
throw new Error(`Failed to convert audio to WAV: ${error.message}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Creates a TranscriptionService that uses whisper CLI
|
|
35
|
+
*/
|
|
36
|
+
export function createWhisperTranscriber(config = {}) {
|
|
37
|
+
const resolvedConfig = {
|
|
38
|
+
binaryPath: config.binaryPath ?? 'whisper-cpp',
|
|
39
|
+
model: config.model ?? 'base',
|
|
40
|
+
outputFormat: config.outputFormat ?? 'json',
|
|
41
|
+
language: config.language,
|
|
42
|
+
};
|
|
43
|
+
return {
|
|
44
|
+
transcribe: (audioPath) => transcribeWithWhisper(audioPath, resolvedConfig),
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Transcribe audio file using whisper CLI
|
|
49
|
+
*/
|
|
50
|
+
async function transcribeWithWhisper(audioPath, config) {
|
|
51
|
+
const audioToProcess = await convertToWavIfNeeded(audioPath);
|
|
52
|
+
const args = [
|
|
53
|
+
`-m ${config.model}`,
|
|
54
|
+
`-f "${audioToProcess}"`,
|
|
55
|
+
'-oj', // Output JSON
|
|
56
|
+
config.language ? `-l ${config.language}` : '',
|
|
57
|
+
].filter(Boolean);
|
|
58
|
+
const command = `${config.binaryPath} ${args.join(' ')}`;
|
|
59
|
+
try {
|
|
60
|
+
const { stdout, stderr } = await execAsync(command, {
|
|
61
|
+
cwd: config.cwd,
|
|
62
|
+
maxBuffer: 50 * 1024 * 1024, // 50MB buffer for large transcripts
|
|
63
|
+
timeout: 10 * 60 * 1000, // 10 minute timeout
|
|
64
|
+
});
|
|
65
|
+
const hasError = stderr.includes('error:') ||
|
|
66
|
+
stderr.includes('Error:') ||
|
|
67
|
+
stderr.includes('failed to');
|
|
68
|
+
if (hasError) {
|
|
69
|
+
if (audioToProcess !== audioPath) {
|
|
70
|
+
await unlink(audioToProcess).catch(() => { });
|
|
71
|
+
}
|
|
72
|
+
throw new Error(`Whisper transcription failed:\n${stderr}`);
|
|
73
|
+
}
|
|
74
|
+
// whisper-cpp outputs JSON to a file named <input>.json
|
|
75
|
+
const jsonOutputPath = `${audioToProcess}.json`;
|
|
76
|
+
try {
|
|
77
|
+
const jsonContent = await readFile(jsonOutputPath, 'utf-8');
|
|
78
|
+
const whisperOutput = JSON.parse(jsonContent);
|
|
79
|
+
// Clean up the temp JSON file and converted audio
|
|
80
|
+
await unlink(jsonOutputPath).catch(() => { });
|
|
81
|
+
if (audioToProcess !== audioPath) {
|
|
82
|
+
await unlink(audioToProcess).catch(() => { });
|
|
83
|
+
}
|
|
84
|
+
return parseWhisperOutput(whisperOutput);
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
// Fallback: try to parse stdout as the transcript
|
|
88
|
+
return parseWhisperStdout(stdout);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
if (audioToProcess && audioToProcess !== audioPath) {
|
|
93
|
+
await unlink(audioToProcess).catch(() => { });
|
|
94
|
+
}
|
|
95
|
+
throw new Error(`Whisper transcription failed: ${error.message}`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Parse whisper.cpp JSON output into our Transcript format
|
|
100
|
+
*/
|
|
101
|
+
function parseWhisperOutput(output) {
|
|
102
|
+
const segments = output.transcription.map((seg, index) => ({
|
|
103
|
+
id: `seg-${index}`,
|
|
104
|
+
start: seg.offsets.from / 1000, // Convert ms to seconds
|
|
105
|
+
end: seg.offsets.to / 1000,
|
|
106
|
+
text: seg.text.trim(),
|
|
107
|
+
speaker: null,
|
|
108
|
+
}));
|
|
109
|
+
const fullText = segments.map((s) => s.text).join(' ');
|
|
110
|
+
const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
|
|
111
|
+
return {
|
|
112
|
+
fullText,
|
|
113
|
+
segments,
|
|
114
|
+
language: 'en', // whisper.cpp doesn't always report language in JSON
|
|
115
|
+
duration,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Fallback: parse whisper stdout (plain text with timestamps)
|
|
120
|
+
*/
|
|
121
|
+
function parseWhisperStdout(stdout) {
|
|
122
|
+
// Example format: "[00:00:00.000 --> 00:00:05.000] Hello world"
|
|
123
|
+
const lines = stdout.split('\n').filter((l) => l.trim());
|
|
124
|
+
const segments = [];
|
|
125
|
+
const timestampRegex = /\[(\d{2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{2}:\d{2}:\d{2}\.\d{3})\]\s*(.*)/;
|
|
126
|
+
for (const line of lines) {
|
|
127
|
+
const match = line.match(timestampRegex);
|
|
128
|
+
if (match) {
|
|
129
|
+
const [, startStr, endStr, text] = match;
|
|
130
|
+
segments.push({
|
|
131
|
+
id: `seg-${segments.length}`,
|
|
132
|
+
start: parseTimestamp(startStr),
|
|
133
|
+
end: parseTimestamp(endStr),
|
|
134
|
+
text: text.trim(),
|
|
135
|
+
speaker: null,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// If no timestamps found, treat entire output as single segment
|
|
140
|
+
if (segments.length === 0 && stdout.trim()) {
|
|
141
|
+
segments.push({
|
|
142
|
+
id: 'seg-0',
|
|
143
|
+
start: 0,
|
|
144
|
+
end: 0,
|
|
145
|
+
text: stdout.trim(),
|
|
146
|
+
speaker: null,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
const fullText = segments.map((s) => s.text).join(' ');
|
|
150
|
+
const duration = segments.length > 0 ? segments[segments.length - 1].end : 0;
|
|
151
|
+
return {
|
|
152
|
+
fullText,
|
|
153
|
+
segments,
|
|
154
|
+
language: 'en',
|
|
155
|
+
duration,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
/**
|
|
159
|
+
* Parse timestamp string "00:00:00.000" to seconds
|
|
160
|
+
*/
|
|
161
|
+
function parseTimestamp(timestamp) {
|
|
162
|
+
const [hours, minutes, rest] = timestamp.split(':');
|
|
163
|
+
const [seconds, ms] = rest.split('.');
|
|
164
|
+
return (parseInt(hours, 10) * 3600 +
|
|
165
|
+
parseInt(minutes, 10) * 60 +
|
|
166
|
+
parseInt(seconds, 10) +
|
|
167
|
+
parseInt(ms, 10) / 1000);
|
|
168
|
+
}
|
package/dist/batch-context.js
CHANGED
|
@@ -29,7 +29,7 @@ import { createDefaultConfig, loadConfig, logConfig } from './config.js';
|
|
|
29
29
|
import { getDbPath, getRepositories } from './db/index.js';
|
|
30
30
|
import { log, setResourceTracker, step, withPipeline, } from './pipeline/context.js';
|
|
31
31
|
import { ResourceTracker, setupStatsObserver, } from './stats/index.js';
|
|
32
|
-
import { formatModelSelection, selectBestLLMModel, } from './utils/model-detector.js';
|
|
32
|
+
import { formatModelSelection, selectBestLLMModel, selectBestMLXModel, } from './utils/model-detector.js';
|
|
33
33
|
const MODELS_DIR = path.join(homedir(), '.escribano', 'models');
|
|
34
34
|
const MODEL_FILE = 'ggml-large-v3.bin';
|
|
35
35
|
const MODEL_PATH = path.join(MODELS_DIR, MODEL_FILE);
|
|
@@ -50,15 +50,24 @@ export async function initializeSystem() {
|
|
|
50
50
|
console.log('');
|
|
51
51
|
// Setup stats observer to capture pipeline events
|
|
52
52
|
setupStatsObserver(repos.stats);
|
|
53
|
-
// Detect best LLM model
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
53
|
+
// Detect best LLM model based on configured backend
|
|
54
|
+
let llm;
|
|
55
|
+
let mlxService = null;
|
|
56
|
+
if (config.llmBackend === 'mlx') {
|
|
57
|
+
console.log('[LLM] Using MLX for text generation');
|
|
58
|
+
const mlxModelSelection = await selectBestMLXModel();
|
|
59
|
+
console.log(formatModelSelection(mlxModelSelection));
|
|
60
|
+
console.log('');
|
|
61
|
+
mlxService = createMlxIntelligenceService();
|
|
62
|
+
llm = mlxService;
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
console.log('[LLM] Using Ollama for text generation');
|
|
66
|
+
const ollamaModelSelection = await selectBestLLMModel();
|
|
67
|
+
console.log(formatModelSelection(ollamaModelSelection));
|
|
68
|
+
console.log('');
|
|
69
|
+
llm = createOllamaIntelligenceService();
|
|
70
|
+
}
|
|
62
71
|
const video = createFfmpegVideoService();
|
|
63
72
|
const preprocessor = createSileroPreprocessor();
|
|
64
73
|
const transcription = createWhisperTranscriptionService({
|
|
@@ -67,32 +76,42 @@ export async function initializeSystem() {
|
|
|
67
76
|
cwd: MODELS_DIR,
|
|
68
77
|
outputFormat: 'json',
|
|
69
78
|
});
|
|
70
|
-
// Setup resource tracking
|
|
71
79
|
const resourceTracker = new ResourceTracker();
|
|
72
|
-
resourceTracker.register(vlm);
|
|
73
80
|
resourceTracker.register(video);
|
|
74
81
|
resourceTracker.register(preprocessor);
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
82
|
+
if (config.llmBackend === 'ollama') {
|
|
83
|
+
resourceTracker.register({
|
|
84
|
+
getResourceName: () => 'ollama',
|
|
85
|
+
getPid: () => {
|
|
86
|
+
try {
|
|
87
|
+
const output = execSync('pgrep -f "ollama serve"').toString().trim();
|
|
88
|
+
const pid = parseInt(output.split('\n')[0] ?? '0', 10);
|
|
89
|
+
return pid > 0 ? pid : null;
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return null;
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
else if (mlxService) {
|
|
98
|
+
resourceTracker.register(mlxService);
|
|
99
|
+
}
|
|
89
100
|
setResourceTracker(resourceTracker);
|
|
90
101
|
const outlineConfig = getOutlineConfig();
|
|
91
102
|
return {
|
|
92
103
|
repos,
|
|
93
|
-
adapters: {
|
|
104
|
+
adapters: {
|
|
105
|
+
vlm: null,
|
|
106
|
+
llm,
|
|
107
|
+
video,
|
|
108
|
+
preprocessor,
|
|
109
|
+
transcription,
|
|
110
|
+
},
|
|
94
111
|
resourceTracker,
|
|
95
112
|
outlineConfig,
|
|
113
|
+
config,
|
|
114
|
+
llmBackend: config.llmBackend,
|
|
96
115
|
};
|
|
97
116
|
}
|
|
98
117
|
/**
|
|
@@ -105,7 +124,7 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
105
124
|
const startTime = Date.now();
|
|
106
125
|
const { force = false, skipSummary = false, micAudioPath, systemAudioPath, format = 'card', includePersonal = false, copyToClipboard = false, printToStdout = false, } = options;
|
|
107
126
|
const { repos, adapters, outlineConfig } = ctx;
|
|
108
|
-
const {
|
|
127
|
+
const { llm, video, preprocessor, transcription } = adapters;
|
|
109
128
|
// Load unified config for lifecycle management
|
|
110
129
|
const config = loadConfig();
|
|
111
130
|
try {
|
|
@@ -157,28 +176,60 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
157
176
|
const skipProcessing = dbRec &&
|
|
158
177
|
(dbRec.status === 'processed' || dbRec.status === 'published') &&
|
|
159
178
|
!force;
|
|
179
|
+
// Create VLM adapter lazily (only if needed)
|
|
180
|
+
let vlm = null;
|
|
181
|
+
if (!skipProcessing) {
|
|
182
|
+
// Reuse the same MLX service instance for VLM (unified adapter handles both)
|
|
183
|
+
// Check if LLM is MLX backend - if so, it's already a unified VLM+LLM service
|
|
184
|
+
if (ctx.config.llmBackend === 'mlx' && llm) {
|
|
185
|
+
vlm = llm;
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
console.log('[VLM] Initializing MLX-VLM for frame analysis...');
|
|
189
|
+
vlm = createMlxIntelligenceService();
|
|
190
|
+
ctx.resourceTracker.register(vlm);
|
|
191
|
+
}
|
|
192
|
+
ctx.adapters.vlm = vlm;
|
|
193
|
+
}
|
|
160
194
|
if (!skipProcessing) {
|
|
161
195
|
const runType = force
|
|
162
196
|
? 'force'
|
|
163
197
|
: dbRec?.processing_step
|
|
164
198
|
? 'resume'
|
|
165
199
|
: 'initial';
|
|
166
|
-
const runMetadata = collectRunMetadata(ctx.resourceTracker);
|
|
200
|
+
const runMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
|
|
167
201
|
await withPipeline(recording.id, runType, runMetadata, async () => {
|
|
202
|
+
if (!vlm)
|
|
203
|
+
throw new Error('[VLM] Internal error: VLM adapter expected but not initialized');
|
|
168
204
|
await processRecordingV3(recording.id, repos, { preprocessor, transcription, video, intelligence: vlm }, { force });
|
|
169
205
|
});
|
|
170
|
-
//
|
|
171
|
-
|
|
172
|
-
|
|
206
|
+
// Clean up VLM bridge after processing to free memory for LLM
|
|
207
|
+
if (vlm) {
|
|
208
|
+
console.log('[VLM] Unloading VLM model to free memory...');
|
|
209
|
+
await vlm.unloadVlm?.();
|
|
210
|
+
// Note: We don't kill the bridge process here, just unload the model
|
|
211
|
+
// The bridge process will be reused for subsequent recordings if needed
|
|
212
|
+
}
|
|
173
213
|
}
|
|
174
214
|
// Generate artifact and publish (unless skipped), tracked as a pipeline run
|
|
175
215
|
let artifact = null;
|
|
176
216
|
let outlineUrl;
|
|
177
217
|
if (!skipSummary) {
|
|
178
|
-
|
|
218
|
+
// Guard: Ensure VLM is unloaded before LLM generation to prevent memory contention
|
|
219
|
+
if (ctx.adapters.vlm) {
|
|
220
|
+
console.log('[VLM] Warning: VLM bridge still loaded during artifact generation');
|
|
221
|
+
console.log('[VLM] Unloading to prevent memory contention with LLM...');
|
|
222
|
+
if ('unloadVlm' in ctx.adapters.vlm && ctx.adapters.vlm.unloadVlm) {
|
|
223
|
+
await ctx.adapters.vlm.unloadVlm();
|
|
224
|
+
}
|
|
225
|
+
ctx.adapters.vlm = null;
|
|
226
|
+
}
|
|
227
|
+
const artifactRunMetadata = collectRunMetadata(ctx.resourceTracker, ctx.config);
|
|
179
228
|
const pipelineResult = await withPipeline(recording.id, 'artifact', artifactRunMetadata, async () => {
|
|
180
229
|
console.log(`\nGenerating ${format} artifact...`);
|
|
181
230
|
let generatedArtifact;
|
|
231
|
+
// LLM model loading is handled internally by generateText()
|
|
232
|
+
// No explicit load/unload calls needed here
|
|
182
233
|
if (format === 'narrative') {
|
|
183
234
|
// Route narrative through the corrected path
|
|
184
235
|
generatedArtifact = await generateSummaryV3(recording.id, repos, llm, {
|
|
@@ -301,6 +352,11 @@ export async function processVideo(videoPath, ctx, options = {}) {
|
|
|
301
352
|
};
|
|
302
353
|
await unloadOllamaModel(config.llmModel, intelConfig);
|
|
303
354
|
}
|
|
355
|
+
else if ('unloadLlm' in ctx.adapters.llm &&
|
|
356
|
+
ctx.adapters.llm.unloadLlm) {
|
|
357
|
+
console.log('[LLM] Unloading MLX model to free memory...');
|
|
358
|
+
await ctx.adapters.llm.unloadLlm();
|
|
359
|
+
}
|
|
304
360
|
}
|
|
305
361
|
console.log('\n✓ Complete!');
|
|
306
362
|
return {
|
|
@@ -345,7 +401,7 @@ function getOutlineConfig() {
|
|
|
345
401
|
/**
|
|
346
402
|
* Collect metadata about the current run.
|
|
347
403
|
*/
|
|
348
|
-
function collectRunMetadata(resourceTracker) {
|
|
404
|
+
function collectRunMetadata(resourceTracker, config) {
|
|
349
405
|
let commitHash = 'unknown';
|
|
350
406
|
try {
|
|
351
407
|
commitHash = execSync('git rev-parse --short HEAD', {
|
|
@@ -359,6 +415,7 @@ function collectRunMetadata(resourceTracker) {
|
|
|
359
415
|
vlm_model: process.env.ESCRIBANO_VLM_MODEL ??
|
|
360
416
|
'mlx-community/Qwen3-VL-2B-Instruct-bf16',
|
|
361
417
|
llm_model: process.env.ESCRIBANO_LLM_MODEL ?? 'auto-detected',
|
|
418
|
+
llm_backend: config?.llmBackend ?? 'ollama',
|
|
362
419
|
commit_hash: commitHash,
|
|
363
420
|
node_version: process.version,
|
|
364
421
|
platform: process.platform,
|
package/dist/config.js
CHANGED
|
@@ -27,12 +27,15 @@ const configSchema = z.object({
|
|
|
27
27
|
vlmMaxTokens: z.number().int().min(500).max(8000).default(2000),
|
|
28
28
|
// === MODELS ===
|
|
29
29
|
llmModel: z.string().optional(),
|
|
30
|
+
llmBackend: z.enum(['mlx', 'ollama']).default('mlx'),
|
|
31
|
+
llmMlxModel: z.string().optional(),
|
|
30
32
|
vlmModel: z.string().default('mlx-community/Qwen3-VL-2B-Instruct-4bit'),
|
|
31
33
|
subjectGroupingModel: z.string().optional(),
|
|
32
34
|
// === DEBUGGING ===
|
|
33
35
|
verbose: z.boolean().default(false),
|
|
34
36
|
debugOllama: z.boolean().default(false),
|
|
35
37
|
debugVlm: z.boolean().default(false),
|
|
38
|
+
debugLlm: z.boolean().default(false),
|
|
36
39
|
skipLlm: z.boolean().default(false),
|
|
37
40
|
// === ADVANCED ===
|
|
38
41
|
sceneMinInterval: z.number().int().min(1).max(10).default(2),
|
|
@@ -72,10 +75,12 @@ const BASE_DEFAULTS = {
|
|
|
72
75
|
sampleInterval: 10,
|
|
73
76
|
sceneThreshold: 0.4,
|
|
74
77
|
vlmMaxTokens: 2000,
|
|
78
|
+
llmBackend: 'mlx',
|
|
75
79
|
vlmModel: 'mlx-community/Qwen3-VL-2B-Instruct-4bit',
|
|
76
80
|
verbose: false,
|
|
77
81
|
debugOllama: false,
|
|
78
82
|
debugVlm: false,
|
|
83
|
+
debugLlm: false,
|
|
79
84
|
skipLlm: false,
|
|
80
85
|
sceneMinInterval: 2,
|
|
81
86
|
sampleGapThreshold: 15,
|
|
@@ -103,12 +108,15 @@ ESCRIBANO_SCENE_THRESHOLD=0.4 # Scene detection sensitivity (0.0-1.0)
|
|
|
103
108
|
ESCRIBANO_VLM_MAX_TOKENS=2000 # Token budget per batch
|
|
104
109
|
|
|
105
110
|
# === MODELS ===
|
|
106
|
-
#
|
|
111
|
+
# ESCRIBANO_LLM_BACKEND=mlx # LLM backend: 'mlx' (default) or 'ollama'
|
|
112
|
+
# ESCRIBANO_LLM_MODEL=qwen3.5:27b # Ollama model (only used if llmBackend='ollama')
|
|
113
|
+
# ESCRIBANO_LLM_MLX_MODEL= # MLX model (only used if llmBackend='mlx', auto-detected if not set)
|
|
107
114
|
ESCRIBANO_VLM_MODEL=mlx-community/Qwen3-VL-2B-Instruct-4bit
|
|
108
115
|
|
|
109
116
|
# === DEBUGGING ===
|
|
110
117
|
ESCRIBANO_VERBOSE=false # Enable verbose logging
|
|
111
118
|
ESCRIBANO_DEBUG_VLM=false # Debug VLM processing
|
|
119
|
+
ESCRIBANO_DEBUG_LLM=false # Log all LLM calls to debug table
|
|
112
120
|
|
|
113
121
|
# === ADVANCED ===
|
|
114
122
|
ESCRIBANO_SCENE_MIN_INTERVAL=2
|
|
@@ -199,12 +207,15 @@ export function loadConfig() {
|
|
|
199
207
|
vlmMaxTokens: parseEnvNumberWithSource('ESCRIBANO_VLM_MAX_TOKENS', BASE_DEFAULTS.vlmMaxTokens, sources, 'vlmMaxTokens'),
|
|
200
208
|
// === MODELS ===
|
|
201
209
|
llmModel: parseEnvStringWithSource('ESCRIBANO_LLM_MODEL', undefined, sources, 'llmModel'),
|
|
210
|
+
llmBackend: (parseEnvStringWithSource('ESCRIBANO_LLM_BACKEND', BASE_DEFAULTS.llmBackend, sources, 'llmBackend') ?? 'mlx'),
|
|
211
|
+
llmMlxModel: parseEnvStringWithSource('ESCRIBANO_LLM_MLX_MODEL', undefined, sources, 'llmMlxModel'),
|
|
202
212
|
vlmModel: parseEnvStringWithSource('ESCRIBANO_VLM_MODEL', BASE_DEFAULTS.vlmModel, sources, 'vlmModel'),
|
|
203
213
|
subjectGroupingModel: parseEnvStringWithSource('ESCRIBANO_SUBJECT_GROUPING_MODEL', undefined, sources, 'subjectGroupingModel'),
|
|
204
214
|
// === DEBUGGING ===
|
|
205
215
|
verbose: parseEnvBooleanWithSource('ESCRIBANO_VERBOSE', BASE_DEFAULTS.verbose, sources, 'verbose'),
|
|
206
216
|
debugOllama: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_OLLAMA', BASE_DEFAULTS.debugOllama, sources, 'debugOllama'),
|
|
207
217
|
debugVlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_VLM', BASE_DEFAULTS.debugVlm, sources, 'debugVlm'),
|
|
218
|
+
debugLlm: parseEnvBooleanWithSource('ESCRIBANO_DEBUG_LLM', BASE_DEFAULTS.debugLlm, sources, 'debugLlm'),
|
|
208
219
|
skipLlm: parseEnvBooleanWithSource('ESCRIBANO_SKIP_LLM', BASE_DEFAULTS.skipLlm, sources, 'skipLlm'),
|
|
209
220
|
// === ADVANCED ===
|
|
210
221
|
sceneMinInterval: parseEnvNumberWithSource('ESCRIBANO_SCENE_MIN_INTERVAL', BASE_DEFAULTS.sceneMinInterval, sources, 'sceneMinInterval'),
|
|
@@ -7,7 +7,7 @@ export function createSqliteSubjectRepository(db) {
|
|
|
7
7
|
findById: db.prepare('SELECT * FROM subjects WHERE id = ?'),
|
|
8
8
|
findByRecording: db.prepare('SELECT * FROM subjects WHERE recording_id = ? ORDER BY created_at ASC'),
|
|
9
9
|
insert: db.prepare(`
|
|
10
|
-
INSERT INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
|
|
10
|
+
INSERT OR IGNORE INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
|
|
11
11
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
12
12
|
`),
|
|
13
13
|
insertLink: db.prepare(`
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escribano - Context Extraction Domain Module
|
|
3
|
+
*/
|
|
4
|
+
export const Context = {
|
|
5
|
+
/**
|
|
6
|
+
* Extract semantic contexts from raw OCR text using regex patterns.
|
|
7
|
+
* This is a fast-path optimization for common applications and URLs.
|
|
8
|
+
*/
|
|
9
|
+
extractFromOCR: (ocrText) => {
|
|
10
|
+
const contexts = [];
|
|
11
|
+
const text = ocrText.trim();
|
|
12
|
+
if (!text)
|
|
13
|
+
return contexts;
|
|
14
|
+
// 1. App Detection
|
|
15
|
+
const apps = [
|
|
16
|
+
{ name: 'Ghostty', pattern: /Ghostty/i },
|
|
17
|
+
{ name: 'VS Code', pattern: /Visual Studio Code|VS Code/i },
|
|
18
|
+
{ name: 'Chrome', pattern: /Google Chrome/i },
|
|
19
|
+
{ name: 'Arc', pattern: /Arc/i },
|
|
20
|
+
{ name: 'Cursor', pattern: /Cursor/i },
|
|
21
|
+
{ name: 'TablePlus', pattern: /TablePlus/i },
|
|
22
|
+
{ name: 'Slack', pattern: /Slack/i },
|
|
23
|
+
{ name: 'Spotify', pattern: /Spotify/i },
|
|
24
|
+
{ name: 'YouTube Music', pattern: /YouTube Music/i },
|
|
25
|
+
];
|
|
26
|
+
for (const app of apps) {
|
|
27
|
+
if (app.pattern.test(text)) {
|
|
28
|
+
contexts.push({
|
|
29
|
+
type: 'app',
|
|
30
|
+
value: app.name,
|
|
31
|
+
confidence: 0.9,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// 2. URL Detection
|
|
36
|
+
const urlPattern = /https?:\/\/[^\s]+/g;
|
|
37
|
+
const urls = text.match(urlPattern);
|
|
38
|
+
if (urls) {
|
|
39
|
+
for (const url of urls) {
|
|
40
|
+
contexts.push({
|
|
41
|
+
type: 'url',
|
|
42
|
+
value: url.replace(/[,.)}>]$/, ''), // Clean trailing punctuation
|
|
43
|
+
confidence: 1.0,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
// 3. Domain Detection (Specific known domains)
|
|
48
|
+
const domains = [
|
|
49
|
+
{ name: 'github.com', pattern: /github\.com/i },
|
|
50
|
+
{ name: 'linkedin.com', pattern: /linkedin\.com/i },
|
|
51
|
+
{ name: 'stackoverflow.com', pattern: /stackoverflow\.com/i },
|
|
52
|
+
{ name: 'docs.rs', pattern: /docs\.rs/i },
|
|
53
|
+
{ name: 'ollama.com', pattern: /ollama\.com/i },
|
|
54
|
+
];
|
|
55
|
+
for (const domain of domains) {
|
|
56
|
+
if (domain.pattern.test(text)) {
|
|
57
|
+
// Only add if not already covered by a full URL
|
|
58
|
+
if (!contexts.some((c) => c.type === 'url' && c.value.includes(domain.name))) {
|
|
59
|
+
contexts.push({
|
|
60
|
+
type: 'url',
|
|
61
|
+
value: domain.name,
|
|
62
|
+
confidence: 0.8,
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
// 4. File Path Detection
|
|
68
|
+
const pathPattern = /(?:~\/|\/Users\/)[^\s]+\.(?:ts|js|py|rs|md|go|json|yml|yaml)/g;
|
|
69
|
+
const paths = text.match(pathPattern);
|
|
70
|
+
if (paths) {
|
|
71
|
+
for (const path of paths) {
|
|
72
|
+
contexts.push({
|
|
73
|
+
type: 'file',
|
|
74
|
+
value: path,
|
|
75
|
+
confidence: 0.9,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// TODO: Implement Step 2 - Embedding clustering for topic grouping
|
|
80
|
+
// This will be used when regex patterns don't yield high-confidence results
|
|
81
|
+
// or when we want to group related segments together.
|
|
82
|
+
return contexts;
|
|
83
|
+
},
|
|
84
|
+
/**
|
|
85
|
+
* Aggregate multiple contexts and remove duplicates
|
|
86
|
+
*/
|
|
87
|
+
unique: (contexts) => {
|
|
88
|
+
const seen = new Set();
|
|
89
|
+
return contexts.filter((c) => {
|
|
90
|
+
const key = `${c.type}:${c.value}`;
|
|
91
|
+
if (seen.has(key))
|
|
92
|
+
return false;
|
|
93
|
+
seen.add(key);
|
|
94
|
+
return true;
|
|
95
|
+
});
|
|
96
|
+
},
|
|
97
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { generateId } from '../db/helpers.js';
|
|
2
|
+
/**
|
|
3
|
+
* Factory for audio observations
|
|
4
|
+
*/
|
|
5
|
+
export function createAudioObservation(params) {
|
|
6
|
+
return {
|
|
7
|
+
id: generateId(),
|
|
8
|
+
recordingId: params.recordingId,
|
|
9
|
+
type: 'audio',
|
|
10
|
+
timestamp: params.timestamp,
|
|
11
|
+
endTimestamp: params.endTimestamp,
|
|
12
|
+
text: params.text,
|
|
13
|
+
audioSource: params.audioSource,
|
|
14
|
+
audioType: 'speech',
|
|
15
|
+
confidence: params.confidence ?? null,
|
|
16
|
+
};
|
|
17
|
+
}
|