escribano 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,11 @@
3
3
  *
4
4
  * Detects the best available LLM model from installed Ollama models
5
5
  * based on system RAM and model quality tiers.
6
+ *
7
+ * MLX Models Note:
8
+ * Uses lmstudio-community Instruct-2507 models for reliable inference.
9
+ * These models respect think=False and produce clean output without
10
+ * thinking leakage, unlike older Qwen3.5 models.
6
11
  */
7
12
  import { totalmem } from 'node:os';
8
13
  export const LLM_MODEL_TIERS = [
@@ -11,6 +16,26 @@ export const LLM_MODEL_TIERS = [
11
16
  { model: 'qwen3:8b', tier: 2, minRamGB: 10, label: 'good' },
12
17
  { model: 'qwen3:4b', tier: 1, minRamGB: 6, label: 'minimum' },
13
18
  ];
19
+ export const MLX_LLM_MODEL_TIERS = [
20
+ {
21
+ model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-8bit',
22
+ tier: 3,
23
+ minRamGB: 64,
24
+ label: 'best',
25
+ },
26
+ {
27
+ model: 'lmstudio-community/Qwen3-30B-A3B-Instruct-2507-MLX-4bit',
28
+ tier: 2,
29
+ minRamGB: 32,
30
+ label: 'good',
31
+ },
32
+ {
33
+ model: 'lmstudio-community/Qwen3-4B-Instruct-2507-MLX-4bit',
34
+ tier: 1,
35
+ minRamGB: 8,
36
+ label: 'minimum',
37
+ },
38
+ ];
14
39
  const OLLAMA_ENDPOINT = process.env.OLLAMA_HOST || 'http://localhost:11434';
15
40
  /**
16
41
  * Fetch installed models from Ollama
@@ -135,6 +160,84 @@ export async function selectBestLLMModel() {
135
160
  recommendation: `Install at least ${lowest.model}: ollama pull ${lowest.model}`,
136
161
  };
137
162
  }
163
+ /**
164
+ * Select the best MLX LLM model based on system RAM.
165
+ *
166
+ * If ESCRIBANO_LLM_MLX_MODEL is set, uses that but still validates and warns.
167
+ * Otherwise, auto-selects the best available model that fits in RAM.
168
+ */
169
+ export async function selectBestMLXModel() {
170
+ const ramGB = getSystemRamGB();
171
+ const envModel = process.env.ESCRIBANO_LLM_MLX_MODEL;
172
+ // If env var is set, use it but validate
173
+ if (envModel) {
174
+ const tier = MLX_LLM_MODEL_TIERS.find((t) => t.model.toLowerCase() === envModel.toLowerCase());
175
+ if (!tier) {
176
+ return {
177
+ model: envModel,
178
+ source: 'env',
179
+ tier: 0,
180
+ label: 'unknown',
181
+ ramGB,
182
+ warning: `${envModel} is not a recognized MLX model.`,
183
+ recommendation: `Consider using one of: ${MLX_LLM_MODEL_TIERS.map((t) => t.model).join(', ')}`,
184
+ };
185
+ }
186
+ if (tier.minRamGB > ramGB) {
187
+ const recommended = MLX_LLM_MODEL_TIERS.find((t) => t.minRamGB <= ramGB);
188
+ return {
189
+ model: envModel,
190
+ source: 'env',
191
+ tier: tier.tier,
192
+ label: tier.label,
193
+ ramGB,
194
+ warning: `${envModel} may be too large for your ${ramGB}GB RAM.`,
195
+ recommendation: recommended
196
+ ? `Consider ${recommended.model} for stability`
197
+ : undefined,
198
+ };
199
+ }
200
+ // Check if there's a better model available for this RAM
201
+ const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
202
+ return {
203
+ model: envModel,
204
+ source: 'env',
205
+ tier: tier.tier,
206
+ label: tier.label,
207
+ ramGB,
208
+ recommendation: betterTier
209
+ ? `${betterTier.model} would give better quality for your ${ramGB}GB RAM`
210
+ : undefined,
211
+ };
212
+ }
213
+ // Auto-select: find best model that fits in RAM
214
+ for (const tier of MLX_LLM_MODEL_TIERS) {
215
+ if (tier.minRamGB > ramGB)
216
+ continue;
217
+ // Check if there's a better model NOT selected
218
+ const betterTier = MLX_LLM_MODEL_TIERS.find((t) => t.tier > tier.tier && t.minRamGB <= ramGB);
219
+ return {
220
+ model: tier.model,
221
+ source: 'auto',
222
+ tier: tier.tier,
223
+ label: tier.label,
224
+ ramGB,
225
+ recommendation: betterTier
226
+ ? `For better quality, consider ${betterTier.model} (install via mlx-vlm)`
227
+ : undefined,
228
+ };
229
+ }
230
+ // Nothing found - return lowest tier
231
+ const lowest = MLX_LLM_MODEL_TIERS[MLX_LLM_MODEL_TIERS.length - 1];
232
+ return {
233
+ model: lowest.model,
234
+ source: 'auto',
235
+ tier: 0,
236
+ label: 'minimum',
237
+ ramGB,
238
+ warning: `Selected minimum MLX model for ${ramGB}GB RAM.`,
239
+ };
240
+ }
138
241
  /**
139
242
  * Format model selection for console output
140
243
  */
@@ -145,10 +248,10 @@ export function formatModelSelection(selection) {
145
248
  : '(auto-detected)';
146
249
  lines.push(`Using ${selection.model} ${sourceLabel}`);
147
250
  if (selection.warning) {
148
- lines.push(` ${selection.warning}`);
251
+ lines.push(` ! ${selection.warning}`);
149
252
  }
150
253
  if (selection.recommendation) {
151
- lines.push(` ${selection.recommendation}`);
254
+ lines.push(` i ${selection.recommendation}`);
152
255
  }
153
256
  return lines.join('\n');
154
257
  }
@@ -0,0 +1,25 @@
1
+ -- Migration 010: Add LLM backend tracking to existing runs
2
+ --
3
+ -- Marks all existing runs as having used Ollama backend (before MLX-LM migration).
4
+ -- This allows benchmarking comparisons between backends.
5
+
6
+ -- Update existing runs to set llm_backend in metadata
7
+ -- Since SQLite doesn't have native JSON functions in all versions, we update the metadata text directly
8
+ UPDATE processing_runs
9
+ SET metadata = CASE
10
+ WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
11
+ WHEN metadata LIKE '%"llm_backend"%' THEN metadata -- Already set, don't override
12
+ WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
13
+ ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
14
+ END
15
+ WHERE status IN ('completed', 'failed');
16
+
17
+ -- For running/other statuses that may be stale, also update them
18
+ UPDATE processing_runs
19
+ SET metadata = CASE
20
+ WHEN metadata IS NULL THEN '{"llm_backend":"ollama"}'
21
+ WHEN metadata LIKE '%"llm_backend"%' THEN metadata
22
+ WHEN metadata = '{}' THEN '{"llm_backend":"ollama"}'
23
+ ELSE REPLACE(metadata, '}', ',"llm_backend":"ollama"}')
24
+ END
25
+ WHERE metadata IS NULL OR metadata NOT LIKE '%"llm_backend"%';
@@ -0,0 +1,19 @@
1
+ -- Migration 011: LLM Debug Log Table
2
+ -- Stores raw LLM inputs/outputs for debugging thinking leakage
3
+
4
+ CREATE TABLE IF NOT EXISTS llm_debug_log (
5
+ id TEXT PRIMARY KEY,
6
+ recording_id TEXT,
7
+ artifact_id TEXT,
8
+ call_type TEXT NOT NULL, -- 'subject_grouping' | 'artifact_generation'
9
+ prompt TEXT, -- raw user prompt (before apply_chat_template)
10
+ result TEXT, -- final processed response returned to caller
11
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
12
+
13
+ -- Everything else as JSON (model, tokens, timing, raw_response diff, etc.)
14
+ metadata TEXT NOT NULL
15
+ );
16
+
17
+ CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
18
+ CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
19
+ CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
@@ -0,0 +1,20 @@
1
+ -- Migration 012: Rebuild llm_debug_log with prompt/result columns
2
+ -- Drops and recreates the table to add first-class prompt + result columns.
3
+ -- Safe: this table only contains debug data.
4
+
5
+ DROP TABLE IF EXISTS llm_debug_log;
6
+
7
+ CREATE TABLE IF NOT EXISTS llm_debug_log (
8
+ id TEXT PRIMARY KEY,
9
+ recording_id TEXT,
10
+ artifact_id TEXT,
11
+ call_type TEXT NOT NULL,
12
+ prompt TEXT,
13
+ result TEXT,
14
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
15
+ metadata TEXT NOT NULL
16
+ );
17
+
18
+ CREATE INDEX idx_llm_debug_log_recording ON llm_debug_log(recording_id);
19
+ CREATE INDEX idx_llm_debug_log_call_type ON llm_debug_log(call_type);
20
+ CREATE INDEX idx_llm_debug_log_created ON llm_debug_log(created_at);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "escribano",
3
- "version": "0.4.5",
3
+ "version": "0.5.0",
4
4
  "description": "AI-powered session intelligence tool — turn screen recordings into structured work summaries",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",