escribano 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -26
- package/dist/actions/generate-artifact-v3.js +5 -3
- package/dist/actions/generate-summary-v3.js +29 -4
- package/dist/adapters/cap.adapter.js +94 -0
- package/dist/adapters/intelligence.adapter.js +202 -0
- package/dist/adapters/intelligence.mlx.adapter.js +258 -185
- package/dist/adapters/storage.adapter.js +81 -0
- package/dist/adapters/whisper.adapter.js +168 -0
- package/dist/batch-context.js +91 -34
- package/dist/config.js +12 -1
- package/dist/db/repositories/subject.sqlite.js +1 -1
- package/dist/domain/context.js +97 -0
- package/dist/domain/index.js +2 -0
- package/dist/domain/observation.js +17 -0
- package/dist/python-utils.js +28 -10
- package/dist/services/subject-grouping.js +36 -9
- package/dist/test-classification-prompts.js +181 -0
- package/dist/tests/cap.adapter.test.js +75 -0
- package/dist/tests/intelligence.adapter.test.js +102 -0
- package/dist/tests/intelligence.mlx.adapter.test.js +13 -8
- package/dist/utils/model-detector.js +105 -2
- package/migrations/010_llm_backend_metadata.sql +25 -0
- package/migrations/011_llm_debug_log.sql +19 -0
- package/migrations/012_llm_debug_log_prompt_result.sql +20 -0
- package/package.json +1 -1
- package/scripts/mlx_bridge.py +574 -74
|
@@ -3,6 +3,11 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Detects the best available LLM model from installed Ollama models
|
|
5
5
|
* based on system RAM and model quality tiers.
|
|
6
|
+
*
|
|
7
|
+
* MLX Models Note:
|
|
8
|
+
* Uses lmstudio-community Instruct-2507 models for reliable inference.
|
|
9
|
+
* These models respect think=False and produce clean output without
|
|
10
|
+
* thinking leakage, unlike older Qwen3.5 models.
|
|
6
11
|
*/
|
|
7
12
|
import { totalmem } from 'node:os';
|
|
8
13
|
export const LLM_MODEL_TIERS = [
|
|
@@ -11,6 +16,26 @@ export const LLM_MODEL_TIERS = [
|
|
|
11
16
|
{ model: 'qwen3:8b', tier: 2, minRamGB: 10, label: 'good' },
|
|
12
17
|
{ model: 'qwen3:4b', tier: 1, minRamGB: 6, label: 'minimum' },
|
|
13
18
|
];
|
|
19
|
+
export const MLX_LLM_MODEL_TIERS = [
|
|
20
|
+
{
|
|
21
|
+
model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-8bit',
|
|
22
|
+
tier: 3,
|
|
23
|
+
minRamGB: 64,
|
|
24
|
+
label: 'best',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-4bit',
|
|
28
|
+
tier: 2,
|
|
29
|
+
minRamGB: 32,
|
|
30
|
+
label: 'good',
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
model: 'lmstudio-community/Qwen3-4B-Instruct-2507-MLX-4bit',
|
|
34
|
+
tier: 1,
|
|
35
|
+
minRamGB: 8,
|
|
36
|
+
label: 'minimum',
|
|
37
|
+
},
|
|
38
|
+
];
|
|
14
39
|
const OLLAMA_ENDPOINT = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|
15
40
|
/**
|
|
16
41
|
* Fetch installed models from Ollama
|
|
@@ -135,6 +160,84 @@ export async function selectBestLLMModel() {
|
|
|
135
160
|
recommendation: `Install at least ${lowest.model}: ollama pull ${lowest.model}`,
|
|
136
161
|
};
|
|
137
162
|
}
|
|
163
|
+
/**
|
|
164
|
+
* Select the best MLX LLM model based on system RAM.
|
|
165
|
+
*
|
|
166
|
+
* If ESCRIBANO_LLM_MLX_MODEL is set, uses that but still validates and warns.
|
|
167
|
+
* Otherwise, auto-selects the best available model that fits in RAM.
|
|
168
|
+
*/
|
|
169
|
+
export async function selectBestMLXModel() {
|
|
170
|
+
const ramGB = getSystemRamGB();
|
|
171
|
+
const envModel = process.env.ESCRIBANO_LLM_MLX_MODEL;
|
|
172
|
+
// If env var is set, use it but validate
|
|
173
|
+
if (envModel) {
|
|
174
|
+
const tier = MLX_LLM_MODEL_TIERS.find((t) => t.model.toLowerCase() === envModel.toLowerCase());
|
|
175
|
+
if (!tier) {
|
|
176
|
+
return {
|
|
177
|
+
model: envModel,
|
|
178
|
+
source: 'env',
|
|
179
|
+
tier: 0,
|
|
180
|
+
label: 'unknown',
|
|
181
|
+
ramGB,
|
|
182
|
+
warning: `${envModel} is not a recognized MLX model.`,
|
|
183
|
+
recommendation: `Consider using one of: ${MLX_LLM_MODEL_TIERS.map((t) => t.model).join(', ')}`,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
if (tier.minRamGB > ramGB) {
|
|
187
|
+
const recommended = MLX_LLM_MODEL_TIERS.find((t) => t.minRamGB <= ramGB);
|
|
188
|
+
return {
|
|
189
|
+
model: envModel,
|
|
190
|
+
source: 'env',
|
|
191
|
+
tier: tier.tier,
|
|
192
|
+
label: tier.label,
|
|
193
|
+
ramGB,
|
|
194
|
+
warning: `${envModel} may be too large for your ${ramGB}GB RAM.`,
|
|
195
|
+
recommendation: recommended
|
|
196
|
+
? `Consider ${recommended.model} for stability`
|
|
197
|
+
: undefined,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
// Check if there's a better model available for this RAM
|
|
201
|
+
const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
|
|
202
|
+
return {
|
|
203
|
+
model: envModel,
|
|
204
|
+
source: 'env',
|
|
205
|
+
tier: tier.tier,
|
|
206
|
+
label: tier.label,
|
|
207
|
+
ramGB,
|
|
208
|
+
recommendation: betterTier
|
|
209
|
+
? `${betterTier.model} would give better quality for your ${ramGB}GB RAM`
|
|
210
|
+
: undefined,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
// Auto-select: find best model that fits in RAM
|
|
214
|
+
for (const tier of MLX_LLM_MODEL_TIERS) {
|
|
215
|
+
if (tier.minRamGB > ramGB)
|
|
216
|
+
continue;
|
|
217
|
+
// Check if there's a better model NOT selected
|
|
218
|
+
const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
|
|
219
|
+
return {
|
|
220
|
+
model: tier.model,
|
|
221
|
+
source: 'auto',
|
|
222
|
+
tier: tier.tier,
|
|
223
|
+
label: tier.label,
|
|
224
|
+
ramGB,
|
|
225
|
+
recommendation: betterTier
|
|
226
|
+
? `For better quality, consider ${betterTier.model} (install via mlx-vlm)`
|
|
227
|
+
: undefined,
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
// Nothing found - return lowest tier
|
|
231
|
+
const lowest = MLX_LLM_MODEL_TIERS[MLX_LLM_MODEL_TIERS.length - 1];
|
|
232
|
+
return {
|
|
233
|
+
model: lowest.model,
|
|
234
|
+
source: 'auto',
|
|
235
|
+
tier: 0,
|
|
236
|
+
label: 'minimum',
|
|
237
|
+
ramGB,
|
|
238
|
+
warning: `Selected minimum MLX model for ${ramGB}GB RAM.`,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
138
241
|
/**
|
|
139
242
|
* Format model selection for console output
|
|
140
243
|
*/
|
|
@@ -145,10 +248,10 @@ export function formatModelSelection(selection) {
|
|
|
145
248
|
: '(auto-detected)';
|
|
146
249
|
lines.push(`Using ${selection.model} ${sourceLabel}`);
|
|
147
250
|
if (selection.warning) {
|
|
148
|
-
lines.push(`
|
|
251
|
+
lines.push(` ! ${selection.warning}`);
|
|
149
252
|
}
|
|
150
253
|
if (selection.recommendation) {
|
|
151
|
-
lines.push(`
|
|
254
|
+
lines.push(` i ${selection.recommendation}`);
|
|
152
255
|
}
|
|
153
256
|
return lines.join('\n');
|
|
154
257
|
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
-- Migration 010: Add LLM backend tracking to existing runs
|
|
2
|
+
--
|
|
3
|
+
-- Marks all existing runs as having used Ollama backend (before MLX-LM migration).
|
|
4
|
+
-- This allows benchmarking comparisons between backends.
|
|
5
|
+
|
|
6
|
+
-- Update existing runs to set llm_backend in metadata
|
|
7
|
+
-- Since SQLite doesn't have native JSON functions in all versions, we update the metadata text directly
|
|
8
|
+
UPDATE processing_runs
|
|
9
|
+
SET metadata = CASE
|
|
10
|
+
WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
|
|
11
|
+
WHEN metadata LIKE '%"llm_backend"%' THEN metadata -- Already set, don't override
|
|
12
|
+
WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
|
|
13
|
+
ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
|
|
14
|
+
END
|
|
15
|
+
WHERE status IN ('completed', 'failed');
|
|
16
|
+
|
|
17
|
+
-- For running/other statuses that may be stale, also update them
|
|
18
|
+
UPDATE processing_runs
|
|
19
|
+
SET metadata = CASE
|
|
20
|
+
WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
|
|
21
|
+
WHEN metadata LIKE '%"llm_backend"%' THEN metadata
|
|
22
|
+
WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
|
|
23
|
+
ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
|
|
24
|
+
END
|
|
25
|
+
WHERE metadata IS NULL OR metadata NOT LIKE '%"llm_backend"%';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
-- Migration 011: LLM Debug Log Table
|
|
2
|
+
-- Stores raw LLM inputs/outputs for debugging thinking leakage
|
|
3
|
+
|
|
4
|
+
CREATE TABLE IF NOT EXISTS llm_debug_log (
|
|
5
|
+
id TEXT PRIMARY KEY,
|
|
6
|
+
recording_id TEXT,
|
|
7
|
+
artifact_id TEXT,
|
|
8
|
+
call_type TEXT NOT NULL, -- 'subject_grouping' | 'artifact_generation'
|
|
9
|
+
prompt TEXT, -- raw user prompt (before apply_chat_template)
|
|
10
|
+
result TEXT, -- final processed response returned to caller
|
|
11
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
12
|
+
|
|
13
|
+
-- Everything else as JSON (model, tokens, timing, raw_response diff, etc.)
|
|
14
|
+
metadata TEXT NOT NULL
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
|
|
18
|
+
CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
|
|
19
|
+
CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
-- Migration 012: Rebuild llm_debug_log with prompt/result columns
|
|
2
|
+
-- Drops and recreates the table to add first-class prompt + result columns.
|
|
3
|
+
-- Safe: this table only contains debug data.
|
|
4
|
+
|
|
5
|
+
DROP TABLE IF EXISTS llm_debug_log;
|
|
6
|
+
|
|
7
|
+
CREATE TABLE IF NOT EXISTS llm_debug_log (
|
|
8
|
+
id TEXT PRIMARY KEY,
|
|
9
|
+
recording_id TEXT,
|
|
10
|
+
artifact_id TEXT,
|
|
11
|
+
call_type TEXT NOT NULL,
|
|
12
|
+
prompt TEXT,
|
|
13
|
+
result TEXT,
|
|
14
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
15
|
+
metadata TEXT NOT NULL
|
|
16
|
+
);
|
|
17
|
+
|
|
18
|
+
CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
|
|
19
|
+
CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
|
|
20
|
+
CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
|