escribano 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -26
- package/dist/0_types.js +1 -1
- package/dist/actions/generate-artifact-v3.js +5 -3
- package/dist/actions/generate-summary-v3.js +81 -13
- package/dist/adapters/intelligence.mlx.adapter.js +271 -197
- package/dist/adapters/intelligence.ollama.adapter.js +37 -0
- package/dist/batch-context.js +119 -33
- package/dist/config.js +168 -62
- package/dist/db/repositories/subject.sqlite.js +1 -1
- package/dist/python-utils.js +28 -10
- package/dist/services/subject-grouping.js +36 -9
- package/dist/tests/index.test.js +25 -12
- package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
- package/dist/tests/utils/env-logger.test.js +6 -6
- package/dist/utils/model-detector.js +105 -2
- package/migrations/010_llm_backend_metadata.sql +25 -0
- package/migrations/011_llm_debug_log.sql +19 -0
- package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
- package/package.json +1 -1
- package/scripts/mlx_bridge.py +578 -78
|
@@ -7,7 +7,7 @@ export function createSqliteSubjectRepository(db) {
|
|
|
7
7
|
findById: db.prepare('SELECT * FROM subjects WHERE id = ?'),
|
|
8
8
|
findByRecording: db.prepare('SELECT * FROM subjects WHERE recording_id = ? ORDER BY created_at ASC'),
|
|
9
9
|
insert: db.prepare(`
|
|
10
|
-
INSERT INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
|
|
10
|
+
INSERT OR IGNORE INTO subjects (id, recording_id, label, is_personal, duration, activity_breakdown, metadata, created_at)
|
|
11
11
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
12
12
|
`),
|
|
13
13
|
insertLink: db.prepare(`
|
package/dist/python-utils.js
CHANGED
|
@@ -10,6 +10,16 @@ import { resolve } from 'node:path';
|
|
|
10
10
|
export const ESCRIBANO_HOME = resolve(homedir(), '.escribano');
|
|
11
11
|
export const ESCRIBANO_VENV = resolve(ESCRIBANO_HOME, 'venv');
|
|
12
12
|
export const ESCRIBANO_VENV_PYTHON = resolve(ESCRIBANO_VENV, 'bin', 'python3');
|
|
13
|
+
/**
|
|
14
|
+
* Check if a path is inside the current working directory (project-local).
|
|
15
|
+
* Used to skip VIRTUAL_ENV/UV_PROJECT_ENVIRONMENT that are dev venvs for
|
|
16
|
+
* the project itself, not suitable as Escribano's Python runtime.
|
|
17
|
+
*/
|
|
18
|
+
function isInsideCwd(path) {
|
|
19
|
+
const absPath = resolve(path);
|
|
20
|
+
const cwd = process.cwd();
|
|
21
|
+
return absPath.startsWith(`${cwd}/`) || absPath.startsWith(`${cwd}\\`);
|
|
22
|
+
}
|
|
13
23
|
/**
|
|
14
24
|
* Get explicitly configured Python path.
|
|
15
25
|
* Returns null when nothing is explicitly configured or found via well-known
|
|
@@ -19,29 +29,37 @@ export const ESCRIBANO_VENV_PYTHON = resolve(ESCRIBANO_VENV, 'bin', 'python3');
|
|
|
19
29
|
*
|
|
20
30
|
* Priority:
|
|
21
31
|
* 1. ESCRIBANO_PYTHON_PATH env var (explicit override)
|
|
22
|
-
* 2.
|
|
23
|
-
* 3.
|
|
24
|
-
* 4.
|
|
25
|
-
* 5.
|
|
26
|
-
* 6.
|
|
32
|
+
* 2. ~/.escribano/venv (managed venv, if it exists — preferred once created)
|
|
33
|
+
* 3. Active virtual environment (VIRTUAL_ENV, unless inside CWD)
|
|
34
|
+
* 4. UV_PROJECT_ENVIRONMENT (uv project-synced venv, unless inside CWD)
|
|
35
|
+
* 5. Project-local .venv (created by `uv venv` in CWD)
|
|
36
|
+
* 6. ~/.venv/bin/python3 (home-level venv)
|
|
37
|
+
* 7. null — no environment detected; auto-venv will be created
|
|
27
38
|
*/
|
|
28
39
|
export function getPythonPath() {
|
|
40
|
+
// 1. Explicit override always wins
|
|
29
41
|
if (process.env.ESCRIBANO_PYTHON_PATH) {
|
|
30
42
|
return process.env.ESCRIBANO_PYTHON_PATH;
|
|
31
43
|
}
|
|
32
|
-
|
|
44
|
+
// 2. Escribano's managed venv — preferred once it exists
|
|
45
|
+
if (existsSync(ESCRIBANO_VENV_PYTHON)) {
|
|
46
|
+
return ESCRIBANO_VENV_PYTHON;
|
|
47
|
+
}
|
|
48
|
+
// 3. Active virtual environment (skip if it's a project-local dev venv)
|
|
49
|
+
if (process.env.VIRTUAL_ENV && !isInsideCwd(process.env.VIRTUAL_ENV)) {
|
|
33
50
|
return resolve(process.env.VIRTUAL_ENV, 'bin', 'python3');
|
|
34
51
|
}
|
|
35
|
-
// UV_PROJECT_ENVIRONMENT
|
|
36
|
-
if (process.env.UV_PROJECT_ENVIRONMENT
|
|
52
|
+
// 4. UV_PROJECT_ENVIRONMENT (skip if inside CWD)
|
|
53
|
+
if (process.env.UV_PROJECT_ENVIRONMENT &&
|
|
54
|
+
!isInsideCwd(process.env.UV_PROJECT_ENVIRONMENT)) {
|
|
37
55
|
return resolve(process.env.UV_PROJECT_ENVIRONMENT, 'bin', 'python3');
|
|
38
56
|
}
|
|
39
|
-
//
|
|
57
|
+
// 5. Project-local .venv (created by `uv venv` in the current working directory)
|
|
40
58
|
const localVenv = resolve(process.cwd(), '.venv', 'bin', 'python3');
|
|
41
59
|
if (existsSync(localVenv)) {
|
|
42
60
|
return localVenv;
|
|
43
61
|
}
|
|
44
|
-
//
|
|
62
|
+
// 6. Home-level venv (e.g., `uv venv ~/.venv`)
|
|
45
63
|
const uvHomeVenv = resolve(homedir(), '.venv', 'bin', 'python3');
|
|
46
64
|
if (existsSync(uvHomeVenv)) {
|
|
47
65
|
return uvHomeVenv;
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import { readFileSync } from 'node:fs';
|
|
8
8
|
import { dirname, resolve } from 'node:path';
|
|
9
9
|
import { fileURLToPath } from 'node:url';
|
|
10
|
+
import { step } from '../pipeline/context.js';
|
|
10
11
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
11
12
|
const PERSONAL_APPS = new Set([
|
|
12
13
|
'WhatsApp',
|
|
@@ -24,7 +25,7 @@ const PERSONAL_APPS = new Set([
|
|
|
24
25
|
'Messages',
|
|
25
26
|
]);
|
|
26
27
|
const PERSONAL_APP_THRESHOLD = 0.5;
|
|
27
|
-
const SUBJECT_GROUPING_MODEL = process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL
|
|
28
|
+
const SUBJECT_GROUPING_MODEL = process.env.ESCRIBANO_SUBJECT_GROUPING_MODEL;
|
|
28
29
|
export async function groupTopicBlocksIntoSubjects(topicBlocks, intelligence, recordingId) {
|
|
29
30
|
if (topicBlocks.length === 0) {
|
|
30
31
|
return {
|
|
@@ -35,16 +36,42 @@ export async function groupTopicBlocksIntoSubjects(topicBlocks, intelligence, re
|
|
|
35
36
|
}
|
|
36
37
|
const blocksForGrouping = topicBlocks.map(extractBlockForGrouping);
|
|
37
38
|
const prompt = buildGroupingPrompt(blocksForGrouping);
|
|
38
|
-
|
|
39
|
+
const modelInfo = SUBJECT_GROUPING_MODEL
|
|
40
|
+
? ` (model: ${SUBJECT_GROUPING_MODEL})`
|
|
41
|
+
: ' (auto-detected)';
|
|
42
|
+
console.log(`[subject-grouping] Grouping ${topicBlocks.length} blocks into subjects${modelInfo}`);
|
|
39
43
|
try {
|
|
40
|
-
const response = await
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
const response = await step('llm_subject_grouping', async () => {
|
|
45
|
+
return intelligence.generateText(prompt, {
|
|
46
|
+
expectJson: false,
|
|
47
|
+
model: SUBJECT_GROUPING_MODEL || undefined,
|
|
48
|
+
numPredict: 2000,
|
|
49
|
+
think: false,
|
|
50
|
+
debugContext: {
|
|
51
|
+
recordingId,
|
|
52
|
+
callType: 'subject_grouping',
|
|
53
|
+
},
|
|
54
|
+
});
|
|
45
55
|
});
|
|
46
|
-
|
|
47
|
-
|
|
56
|
+
// Strip thinking leakage if present
|
|
57
|
+
let cleaned = response.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
58
|
+
if (cleaned.includes('</think>')) {
|
|
59
|
+
// Handle orphan </think> tag (Qwen3.5 behavior)
|
|
60
|
+
cleaned = cleaned.split('</think>')[1].trim();
|
|
61
|
+
}
|
|
62
|
+
// Strip "Thinking Process:" prose (Qwen3.5-OptiQ format)
|
|
63
|
+
const tpMatch = cleaned.match(/(?:^|\n)Thinking Process:/);
|
|
64
|
+
if (tpMatch !== null) {
|
|
65
|
+
const after = cleaned.slice((tpMatch.index ?? 0) + tpMatch[0].length);
|
|
66
|
+
const heading = after.match(/\n(#\s|\*\*|Group\s)/);
|
|
67
|
+
cleaned =
|
|
68
|
+
heading?.index !== undefined ? after.slice(heading.index).trim() : '';
|
|
69
|
+
}
|
|
70
|
+
if (cleaned.length < 10) {
|
|
71
|
+
console.warn('[subject-grouping] Thinking leakage detected or response too short — parseGroupingResponse will fall back');
|
|
72
|
+
}
|
|
73
|
+
console.log(`[subject-grouping] LLM response (${cleaned.length} chars after stripping):\n${cleaned.slice(0, 500)}${cleaned.length > 500 ? '...' : ''}`);
|
|
74
|
+
const grouping = parseGroupingResponse(cleaned || response, topicBlocks);
|
|
48
75
|
console.log(`[subject-grouping] Parsed ${grouping.groups.length} groups: ${grouping.groups.map((g) => g.label).join(', ')}`);
|
|
49
76
|
const subjects = grouping.groups.map((group, index) => {
|
|
50
77
|
const subjectId = `subject-${recordingId}-${index}`;
|
package/dist/tests/index.test.js
CHANGED
|
@@ -1,40 +1,53 @@
|
|
|
1
|
-
import { chmod, mkdir, rm, stat, symlink, writeFile } from 'node:fs/promises';
|
|
2
|
-
import { tmpdir } from 'node:os';
|
|
1
|
+
import { access, chmod, constants, mkdir, readdir, rm, stat, symlink, writeFile, } from 'node:fs/promises';
|
|
2
|
+
import { homedir, tmpdir } from 'node:os';
|
|
3
3
|
import path from 'node:path';
|
|
4
4
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
|
5
5
|
const VIDEO_EXTENSIONS = ['.mov', '.mp4', '.mkv', '.avi', '.webm'];
|
|
6
6
|
function expandPath(inputPath) {
|
|
7
|
+
if (!inputPath.startsWith('~')) {
|
|
8
|
+
return inputPath;
|
|
9
|
+
}
|
|
10
|
+
const homeDir = homedir();
|
|
11
|
+
if (!homeDir) {
|
|
12
|
+
return inputPath;
|
|
13
|
+
}
|
|
14
|
+
if (inputPath === '~' || inputPath === '~/') {
|
|
15
|
+
return homeDir;
|
|
16
|
+
}
|
|
7
17
|
if (inputPath.startsWith('~/')) {
|
|
8
|
-
const homeDir = process.env.HOME || process.env.USERPROFILE || '';
|
|
9
18
|
return path.join(homeDir, inputPath.slice(2));
|
|
10
19
|
}
|
|
11
20
|
return inputPath;
|
|
12
21
|
}
|
|
13
22
|
async function findLatestVideo(dirPath) {
|
|
14
23
|
const resolvedPath = expandPath(dirPath);
|
|
15
|
-
const { readdir } = await import('node:fs/promises');
|
|
16
24
|
const entries = await readdir(resolvedPath, { withFileTypes: true });
|
|
17
25
|
const videoFiles = entries.filter((entry) => entry.isFile() &&
|
|
18
26
|
VIDEO_EXTENSIONS.some((ext) => entry.name.toLowerCase().endsWith(ext)));
|
|
19
27
|
if (videoFiles.length === 0) {
|
|
20
28
|
throw new Error(`No video files found in: ${resolvedPath}`);
|
|
21
29
|
}
|
|
22
|
-
|
|
30
|
+
let latestFilePath = null;
|
|
31
|
+
let latestMtime = -Infinity;
|
|
32
|
+
for (const entry of videoFiles) {
|
|
23
33
|
const fullPath = path.join(resolvedPath, entry.name);
|
|
24
34
|
try {
|
|
35
|
+
await access(fullPath, constants.R_OK);
|
|
25
36
|
const fileStat = await stat(fullPath);
|
|
26
|
-
|
|
37
|
+
const mtimeMs = fileStat.mtime.getTime();
|
|
38
|
+
if (mtimeMs > latestMtime) {
|
|
39
|
+
latestMtime = mtimeMs;
|
|
40
|
+
latestFilePath = fullPath;
|
|
41
|
+
}
|
|
27
42
|
}
|
|
28
43
|
catch {
|
|
29
|
-
|
|
44
|
+
// Skip files that are inaccessible (permission denied, broken symlink, etc.)
|
|
30
45
|
}
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
if (validFiles.length === 0) {
|
|
46
|
+
}
|
|
47
|
+
if (!latestFilePath) {
|
|
34
48
|
throw new Error(`No accessible video files found in: ${resolvedPath}`);
|
|
35
49
|
}
|
|
36
|
-
|
|
37
|
-
return validFiles[0].path;
|
|
50
|
+
return latestFilePath;
|
|
38
51
|
}
|
|
39
52
|
describe('findLatestVideo', () => {
|
|
40
53
|
let testDir;
|
|
@@ -22,7 +22,7 @@ vi.mock('node:child_process', () => ({
|
|
|
22
22
|
kill: vi.fn(),
|
|
23
23
|
})),
|
|
24
24
|
}));
|
|
25
|
-
import { existsSync } from 'node:fs';
|
|
25
|
+
import { existsSync, mkdirSync } from 'node:fs';
|
|
26
26
|
import { resolvePythonPath } from '../adapters/intelligence.mlx.adapter.js';
|
|
27
27
|
import { getPythonPath } from '../python-utils.js';
|
|
28
28
|
const mockExistsSync = vi.mocked(existsSync);
|
|
@@ -178,8 +178,10 @@ describe('resolvePythonPath', () => {
|
|
|
178
178
|
});
|
|
179
179
|
it('installs mlx-vlm when the import probe fails', async () => {
|
|
180
180
|
const venvPython = resolve(homedir(), '.escribano', 'venv', 'bin', 'python3');
|
|
181
|
-
|
|
182
|
-
mockExistsSync.mockImplementation((p) => p ===
|
|
181
|
+
const escribanoHome = resolve(homedir(), '.escribano');
|
|
182
|
+
mockExistsSync.mockImplementation((p) => p === escribanoHome);
|
|
183
|
+
const mockMkdirSync = vi.mocked(mkdirSync);
|
|
184
|
+
mockMkdirSync.mockReturnValue(undefined);
|
|
183
185
|
const { spawn } = await import('node:child_process');
|
|
184
186
|
const mockSpawn = vi.mocked(spawn);
|
|
185
187
|
mockSpawn.mockClear();
|
|
@@ -189,9 +191,12 @@ describe('resolvePythonPath', () => {
|
|
|
189
191
|
const emitter = {
|
|
190
192
|
on: vi.fn((event, cb) => {
|
|
191
193
|
if (event === 'exit') {
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
194
|
+
if (thisCall === 1) {
|
|
195
|
+
cb(1);
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
cb(0);
|
|
199
|
+
}
|
|
195
200
|
}
|
|
196
201
|
return emitter;
|
|
197
202
|
}),
|
|
@@ -202,8 +207,7 @@ describe('resolvePythonPath', () => {
|
|
|
202
207
|
return emitter;
|
|
203
208
|
});
|
|
204
209
|
await expect(resolvePythonPath()).resolves.toBe(venvPython);
|
|
205
|
-
expect(mockSpawn.mock.calls.length).toBeGreaterThanOrEqual(
|
|
206
|
-
// Find the pip install call regardless of its position (robust to ensurepip being inserted)
|
|
210
|
+
expect(mockSpawn.mock.calls.length).toBeGreaterThanOrEqual(3);
|
|
207
211
|
const installCall = mockSpawn.mock.calls.find(([_cmd, args]) => Array.isArray(args) &&
|
|
208
212
|
args.includes('-m') &&
|
|
209
213
|
args.includes('pip') &&
|
|
@@ -217,6 +221,7 @@ describe('resolvePythonPath', () => {
|
|
|
217
221
|
'mlx-vlm',
|
|
218
222
|
'torch',
|
|
219
223
|
'torchvision',
|
|
224
|
+
'mlx-lm',
|
|
220
225
|
]));
|
|
221
226
|
});
|
|
222
227
|
});
|
|
@@ -127,7 +127,7 @@ OTHER_VAR=value`;
|
|
|
127
127
|
});
|
|
128
128
|
it('marks custom values with [CUSTOM]', () => {
|
|
129
129
|
const mockContent = `# Default batch size
|
|
130
|
-
ESCRIBANO_VLM_BATCH_SIZE=
|
|
130
|
+
ESCRIBANO_VLM_BATCH_SIZE=2`;
|
|
131
131
|
vi.mocked(readFileSync).mockReturnValue(mockContent);
|
|
132
132
|
process.env.ESCRIBANO_VERBOSE = 'true';
|
|
133
133
|
process.env.ESCRIBANO_VLM_BATCH_SIZE = '8';
|
|
@@ -138,15 +138,15 @@ ESCRIBANO_VLM_BATCH_SIZE=4`;
|
|
|
138
138
|
.join('\n');
|
|
139
139
|
expect(output).toContain('[CUSTOM]');
|
|
140
140
|
expect(output).toContain('Current: 8');
|
|
141
|
-
expect(output).toContain('Default:
|
|
141
|
+
expect(output).toContain('Default: 2');
|
|
142
142
|
consoleSpy.mockRestore();
|
|
143
143
|
});
|
|
144
144
|
it('does not mark default values', () => {
|
|
145
145
|
const mockContent = `# Default batch size
|
|
146
|
-
ESCRIBANO_VLM_BATCH_SIZE=
|
|
146
|
+
ESCRIBANO_VLM_BATCH_SIZE=2`;
|
|
147
147
|
vi.mocked(readFileSync).mockReturnValue(mockContent);
|
|
148
148
|
process.env.ESCRIBANO_VERBOSE = 'true';
|
|
149
|
-
process.env.ESCRIBANO_VLM_BATCH_SIZE = '
|
|
149
|
+
process.env.ESCRIBANO_VLM_BATCH_SIZE = '2';
|
|
150
150
|
const consoleSpy = vi.spyOn(console, 'log').mockImplementation(() => { });
|
|
151
151
|
logEnvironmentVariables();
|
|
152
152
|
const output = consoleSpy.mock.calls
|
|
@@ -170,7 +170,7 @@ ESCRIBANO_VLM_BATCH_SIZE=4`;
|
|
|
170
170
|
consoleSpy.mockRestore();
|
|
171
171
|
});
|
|
172
172
|
it('does not mask non-secret values', () => {
|
|
173
|
-
const mockContent = `ESCRIBANO_VLM_BATCH_SIZE=
|
|
173
|
+
const mockContent = `ESCRIBANO_VLM_BATCH_SIZE=2`;
|
|
174
174
|
vi.mocked(readFileSync).mockReturnValue(mockContent);
|
|
175
175
|
process.env.ESCRIBANO_VERBOSE = 'true';
|
|
176
176
|
process.env.ESCRIBANO_VLM_BATCH_SIZE = '8';
|
|
@@ -184,7 +184,7 @@ ESCRIBANO_VLM_BATCH_SIZE=4`;
|
|
|
184
184
|
consoleSpy.mockRestore();
|
|
185
185
|
});
|
|
186
186
|
it('shows "not set" for undefined variables', () => {
|
|
187
|
-
const mockContent = `ESCRIBANO_VLM_BATCH_SIZE=
|
|
187
|
+
const mockContent = `ESCRIBANO_VLM_BATCH_SIZE=2`;
|
|
188
188
|
vi.mocked(readFileSync).mockReturnValue(mockContent);
|
|
189
189
|
process.env.ESCRIBANO_VERBOSE = 'true';
|
|
190
190
|
delete process.env.ESCRIBANO_VLM_BATCH_SIZE;
|
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Detects the best available LLM model from installed Ollama models
|
|
5
5
|
* based on system RAM and model quality tiers.
|
|
6
|
+
*
|
|
7
|
+
* MLX Models Note:
|
|
8
|
+
* Uses lmstudio-community Instruct-2507 models for reliable inference.
|
|
9
|
+
* These models respect think=False and produce clean output without
|
|
10
|
+
* thinking leakage, unlike older Qwen3.5 models.
|
|
6
11
|
*/
|
|
7
12
|
import { totalmem } from 'node:os';
|
|
8
13
|
export const LLM_MODEL_TIERS = [
|
|
@@ -11,6 +16,26 @@ export const LLM_MODEL_TIERS = [
|
|
|
11
16
|
{ model: 'qwen3:8b', tier: 2, minRamGB: 10, label: 'good' },
|
|
12
17
|
{ model: 'qwen3:4b', tier: 1, minRamGB: 6, label: 'minimum' },
|
|
13
18
|
];
|
|
19
|
+
export const MLX_LLM_MODEL_TIERS = [
|
|
20
|
+
{
|
|
21
|
+
model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-8bit',
|
|
22
|
+
tier: 3,
|
|
23
|
+
minRamGB: 64,
|
|
24
|
+
label: 'best',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-4bit',
|
|
28
|
+
tier: 2,
|
|
29
|
+
minRamGB: 32,
|
|
30
|
+
label: 'good',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
model: 'lmstudio-community/Qwen3-4B-Instruct-2507-MLX-4bit',
|
|
34
|
+
tier: 1,
|
|
35
|
+
minRamGB: 8,
|
|
36
|
+
label: 'minimum',
|
|
37
|
+
},
|
|
38
|
+
];
|
|
14
39
|
const OLLAMA_ENDPOINT = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|
15
40
|
/**
|
|
16
41
|
* Fetch installed models from Ollama
|
|
@@ -135,6 +160,84 @@ export async function selectBestLLMModel() {
|
|
|
135
160
|
recommendation: `Install at least ${lowest.model}: ollama pull ${lowest.model}`,
|
|
136
161
|
};
|
|
137
162
|
}
|
|
163
|
+
/**
|
|
164
|
+
* Select the best MLX LLM model based on system RAM.
|
|
165
|
+
*
|
|
166
|
+
* If ESCRIBANO_LLM_MLX_MODEL is set, uses that but still validates and warns.
|
|
167
|
+
* Otherwise, auto-selects the best available model that fits in RAM.
|
|
168
|
+
*/
|
|
169
|
+
export async function selectBestMLXModel() {
|
|
170
|
+
const ramGB = getSystemRamGB();
|
|
171
|
+
const envModel = process.env.ESCRIBANO_LLM_MLX_MODEL;
|
|
172
|
+
// If env var is set, use it but validate
|
|
173
|
+
if (envModel) {
|
|
174
|
+
const tier = MLX_LLM_MODEL_TIERS.find((t) => t.model.toLowerCase() === envModel.toLowerCase());
|
|
175
|
+
if (!tier) {
|
|
176
|
+
return {
|
|
177
|
+
model: envModel,
|
|
178
|
+
source: 'env',
|
|
179
|
+
tier: 0,
|
|
180
|
+
label: 'unknown',
|
|
181
|
+
ramGB,
|
|
182
|
+
warning: `${envModel} is not a recognized MLX model.`,
|
|
183
|
+
recommendation: `Consider using one of: ${MLX_LLM_MODEL_TIERS.map((t) => t.model).join(', ')}`,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
if (tier.minRamGB > ramGB) {
|
|
187
|
+
const recommended = MLX_LLM_MODEL_TIERS.find((t) => t.minRamGB <= ramGB);
|
|
188
|
+
return {
|
|
189
|
+
model: envModel,
|
|
190
|
+
source: 'env',
|
|
191
|
+
tier: tier.tier,
|
|
192
|
+
label: tier.label,
|
|
193
|
+
ramGB,
|
|
194
|
+
warning: `${envModel} may be too large for your ${ramGB}GB RAM.`,
|
|
195
|
+
recommendation: recommended
|
|
196
|
+
? `Consider ${recommended.model} for stability`
|
|
197
|
+
: undefined,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
// Check if there's a better model available for this RAM
|
|
201
|
+
const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
|
|
202
|
+
return {
|
|
203
|
+
model: envModel,
|
|
204
|
+
source: 'env',
|
|
205
|
+
tier: tier.tier,
|
|
206
|
+
label: tier.label,
|
|
207
|
+
ramGB,
|
|
208
|
+
recommendation: betterTier
|
|
209
|
+
? `${betterTier.model} would give better quality for your ${ramGB}GB RAM`
|
|
210
|
+
: undefined,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
// Auto-select: find best model that fits in RAM
|
|
214
|
+
for (const tier of MLX_LLM_MODEL_TIERS) {
|
|
215
|
+
if (tier.minRamGB > ramGB)
|
|
216
|
+
continue;
|
|
217
|
+
// Check if there's a better model NOT selected
|
|
218
|
+
const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
|
|
219
|
+
return {
|
|
220
|
+
model: tier.model,
|
|
221
|
+
source: 'auto',
|
|
222
|
+
tier: tier.tier,
|
|
223
|
+
label: tier.label,
|
|
224
|
+
ramGB,
|
|
225
|
+
recommendation: betterTier
|
|
226
|
+
? `For better quality, consider ${betterTier.model} (install via mlx-vlm)`
|
|
227
|
+
: undefined,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
// Nothing found - return lowest tier
|
|
231
|
+
const lowest = MLX_LLM_MODEL_TIERS[MLX_LLM_MODEL_TIERS.length - 1];
|
|
232
|
+
return {
|
|
233
|
+
model: lowest.model,
|
|
234
|
+
source: 'auto',
|
|
235
|
+
tier: 0,
|
|
236
|
+
label: 'minimum',
|
|
237
|
+
ramGB,
|
|
238
|
+
warning: `Selected minimum MLX model for ${ramGB}GB RAM.`,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
138
241
|
/**
|
|
139
242
|
* Format model selection for console output
|
|
140
243
|
*/
|
|
@@ -145,10 +248,10 @@ export function formatModelSelection(selection) {
|
|
|
145
248
|
: '(auto-detected)';
|
|
146
249
|
lines.push(`Using ${selection.model} ${sourceLabel}`);
|
|
147
250
|
if (selection.warning) {
|
|
148
|
-
lines.push(`
|
|
251
|
+
lines.push(` ! ${selection.warning}`);
|
|
149
252
|
}
|
|
150
253
|
if (selection.recommendation) {
|
|
151
|
-
lines.push(`
|
|
254
|
+
lines.push(` i ${selection.recommendation}`);
|
|
152
255
|
}
|
|
153
256
|
return lines.join('\n');
|
|
154
257
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
-- Migration 010: Add LLM backend tracking to existing runs
|
|
2
|
+
--
|
|
3
|
+
-- Marks all existing runs as having used Ollama backend (before MLX-LM migration).
|
|
4
|
+
-- This allows benchmarking comparisons between backends.
|
|
5
|
+
|
|
6
|
+
-- Update existing runs to set llm_backend in metadata
|
|
7
|
+
-- Since SQLite doesn't have native JSON functions in all versions, we update the metadata text directly
|
|
8
|
+
UPDATE processing_runs
|
|
9
|
+
SET metadata = CASE
|
|
10
|
+
WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
|
|
11
|
+
WHEN metadata LIKE '%"llm_backend"%' THEN metadata -- Already set, don't override
|
|
12
|
+
WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
|
|
13
|
+
ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
|
|
14
|
+
END
|
|
15
|
+
WHERE status IN ('completed', 'failed');
|
|
16
|
+
|
|
17
|
+
-- For running/other statuses that may be stale, also update them
|
|
18
|
+
UPDATE processing_runs
|
|
19
|
+
SET metadata = CASE
|
|
20
|
+
WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
|
|
21
|
+
WHEN metadata LIKE '%"llm_backend"%' THEN metadata
|
|
22
|
+
WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
|
|
23
|
+
ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
|
|
24
|
+
END
|
|
25
|
+
WHERE metadata IS NULL OR metadata NOT LIKE '%"llm_backend"%';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
-- Migration 011: LLM Debug Log Table
|
|
2
|
+
-- Stores raw LLM inputs/outputs for debugging thinking leakage
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS llm_debug_log (
|
|
5
|
+
id TEXT PRIMARY KEY,
|
|
6
|
+
recording_id TEXT,
|
|
7
|
+
artifact_id TEXT,
|
|
8
|
+
call_type TEXT NOT NULL, -- 'subject_grouping' | 'artifact_generation'
|
|
9
|
+
prompt TEXT, -- raw user prompt (before apply_chat_template)
|
|
10
|
+
result TEXT, -- final processed response returned to caller
|
|
11
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
12
|
+
|
|
13
|
+
-- Everything else as JSON (model, tokens, timing, raw_response diff, etc.)
|
|
14
|
+
metadata TEXT NOT NULL
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
|
|
18
|
+
CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
|
|
19
|
+
CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
-- Migration 012: Rebuild llm_debug_log with prompt/result columns
|
|
2
|
+
-- Drops and recreates the table to add first-class prompt + result columns.
|
|
3
|
+
-- Safe: this table only contains debug data.
|
|
4
|
+
|
|
5
|
+
DROP TABLE IF EXISTS llm_debug_log;
|
|
6
|
+
|
|
7
|
+
CREATE TABLE IF NOT EXISTS llm_debug_log (
|
|
8
|
+
id TEXT PRIMARY KEY,
|
|
9
|
+
recording_id TEXT,
|
|
10
|
+
artifact_id TEXT,
|
|
11
|
+
call_type TEXT NOT NULL,
|
|
12
|
+
prompt TEXT,
|
|
13
|
+
result TEXT,
|
|
14
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
15
|
+
metadata TEXT NOT NULL
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
|
|
19
|
+
CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
|
|
20
|
+
CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
|