llm-checker 3.5.0 → 3.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,368 @@
1
+ /**
2
+ * Roadmap command helpers for issue #48:
3
+ * - gpu-plan
4
+ * - verify-context
5
+ * - amd-guard
6
+ * - toolcheck
7
+ */
8
+
9
+ function clamp(value, min, max) {
10
+ return Math.max(min, Math.min(max, value));
11
+ }
12
+
13
+ function round1(value) {
14
+ return Math.round(value * 10) / 10;
15
+ }
16
+
17
+ function parseModelSizeGB(value) {
18
+ if (typeof value === 'number' && Number.isFinite(value)) {
19
+ return value > 0 ? value : null;
20
+ }
21
+
22
+ if (typeof value !== 'string' || !value.trim()) return null;
23
+
24
+ const normalized = value.trim().toUpperCase();
25
+ const match = normalized.match(/^(\d+\.?\d*)\s*(GB|G|B)?$/);
26
+ if (!match) return null;
27
+
28
+ const amount = parseFloat(match[1]);
29
+ if (!Number.isFinite(amount) || amount <= 0) return null;
30
+
31
+ const unit = match[2] || 'B';
32
+ if (unit === 'GB' || unit === 'G') return amount;
33
+
34
+ // Parameters in billions -> rough Q4 memory footprint.
35
+ return amount * 0.55;
36
+ }
37
+
38
+ function flattenGPUs(hardware = {}) {
39
+ const gpus = [];
40
+ const backends = hardware.backends || {};
41
+
42
+ for (const [backend, data] of Object.entries(backends)) {
43
+ if (!data || !data.available || !data.info) continue;
44
+
45
+ if (Array.isArray(data.info.gpus) && data.info.gpus.length > 0) {
46
+ for (const gpu of data.info.gpus) {
47
+ gpus.push({
48
+ backend,
49
+ name: gpu.name || `${backend.toUpperCase()} GPU`,
50
+ vramGB: gpu.memory?.total || 0,
51
+ speedCoefficient: gpu.speedCoefficient || 0
52
+ });
53
+ }
54
+ continue;
55
+ }
56
+
57
+ // Apple Metal detector reports a single GPU differently.
58
+ if (backend === 'metal') {
59
+ gpus.push({
60
+ backend,
61
+ name: data.info.chip || 'Apple Silicon GPU',
62
+ vramGB: data.info.memory?.unified || 0,
63
+ speedCoefficient: data.info.speedCoefficient || 0
64
+ });
65
+ }
66
+ }
67
+
68
+ return gpus;
69
+ }
70
+
71
+ function buildGpuPlan(hardware = {}, options = {}) {
72
+ const modelSizeGB = parseModelSizeGB(options.modelSizeGB);
73
+ const summary = hardware.summary || {};
74
+ const gpus = flattenGPUs(hardware).sort((a, b) => {
75
+ if (b.vramGB !== a.vramGB) return b.vramGB - a.vramGB;
76
+ return b.speedCoefficient - a.speedCoefficient;
77
+ });
78
+
79
+ const gpuCount = gpus.length;
80
+ const totalVRAM = round1(gpus.reduce((sum, gpu) => sum + gpu.vramGB, 0));
81
+ const strongest = gpus[0] || null;
82
+ const strongestVRAM = strongest ? strongest.vramGB : 0;
83
+ const pooledMaxModelGB = clamp(totalVRAM - 2, 0, Number.MAX_SAFE_INTEGER);
84
+ const singleMaxModelGB = clamp(strongestVRAM - 2, 0, Number.MAX_SAFE_INTEGER);
85
+ const backend = summary.bestBackend || 'cpu';
86
+
87
+ let strategy = 'cpu_fallback';
88
+ let strategyReason = 'No compatible GPU backend detected.';
89
+
90
+ if (gpuCount === 1) {
91
+ strategy = 'single_gpu';
92
+ strategyReason = `One ${backend.toUpperCase()} GPU detected; keep model weights on a single device.`;
93
+ } else if (gpuCount > 1) {
94
+ strategy = 'distributed';
95
+ strategyReason = `${gpuCount} GPUs detected; use spread scheduling and keep one model shard per device class.`;
96
+ }
97
+
98
+ const recommendedParallel = gpuCount >= 4 ? 4 : gpuCount >= 2 ? 2 : 1;
99
+ const maxLoadedModels = gpuCount >= 4 ? 3 : gpuCount >= 2 ? 2 : 1;
100
+ const env = {
101
+ OLLAMA_SCHED_SPREAD: strategy === 'distributed' ? '1' : '0',
102
+ OLLAMA_NUM_PARALLEL: String(recommendedParallel),
103
+ OLLAMA_MAX_LOADED_MODELS: String(maxLoadedModels)
104
+ };
105
+
106
+ const fit = modelSizeGB === null ? null : {
107
+ modelSizeGB,
108
+ fitsSingleGPU: modelSizeGB <= singleMaxModelGB,
109
+ fitsPooled: modelSizeGB <= pooledMaxModelGB
110
+ };
111
+
112
+ const recommendations = [];
113
+ if (gpuCount > 1) {
114
+ recommendations.push(
115
+ `Prefer model sizes <= ${round1(singleMaxModelGB)}GB for deterministic single-GPU residency.`,
116
+ `Pooled envelope is ~${round1(pooledMaxModelGB)}GB if scheduling spreads the load.`
117
+ );
118
+ } else if (gpuCount === 1) {
119
+ recommendations.push(`Keep model payload <= ${round1(singleMaxModelGB)}GB for stable inference.`);
120
+ } else {
121
+ recommendations.push('Use smaller quantized models and prioritize CPU-safe profiles.');
122
+ }
123
+
124
+ return {
125
+ backend,
126
+ gpuCount,
127
+ gpus,
128
+ totalVRAM,
129
+ strongestGPU: strongest,
130
+ singleMaxModelGB: round1(singleMaxModelGB),
131
+ pooledMaxModelGB: round1(pooledMaxModelGB),
132
+ strategy,
133
+ strategyReason,
134
+ env,
135
+ fit,
136
+ recommendations
137
+ };
138
+ }
139
+
140
+ function extractContextWindow(showPayload = {}) {
141
+ if (!showPayload || typeof showPayload !== 'object') return null;
142
+
143
+ // Typical `/api/show` values in newer Ollama builds.
144
+ const modelInfo = showPayload.model_info || {};
145
+ for (const [key, value] of Object.entries(modelInfo)) {
146
+ if (!key.toLowerCase().includes('context_length')) continue;
147
+ const parsed = parseInt(value, 10);
148
+ if (Number.isFinite(parsed) && parsed > 0) return parsed;
149
+ }
150
+
151
+ // Older payloads often expose this in free-form parameters text.
152
+ const paramsText = typeof showPayload.parameters === 'string' ? showPayload.parameters : '';
153
+ const match = paramsText.match(/num_ctx\s+(\d+)/i);
154
+ if (match) {
155
+ const parsed = parseInt(match[1], 10);
156
+ if (Number.isFinite(parsed) && parsed > 0) return parsed;
157
+ }
158
+
159
+ return null;
160
+ }
161
+
162
+ function estimateKvCachePer1kTokensGB(modelSizeGB = 7) {
163
+ // Practical approximation that scales with model size.
164
+ // This keeps estimates conservative while avoiding huge over-allocation.
165
+ return clamp(modelSizeGB / 90, 0.03, 0.45);
166
+ }
167
+
168
+ function buildContextVerification(input = {}) {
169
+ const {
170
+ modelName = 'unknown',
171
+ targetTokens = 8192,
172
+ declaredContext = null,
173
+ modelSizeGB = 7,
174
+ hardware = {}
175
+ } = input;
176
+
177
+ const summary = hardware.summary || {};
178
+ const effectiveMemoryGB = summary.effectiveMemory || Math.round((summary.systemRAM || 8) * 0.7);
179
+ const kvPer1k = estimateKvCachePer1kTokensGB(modelSizeGB);
180
+ const kvBudgetGB = Math.max(0, effectiveMemoryGB - modelSizeGB - 2);
181
+ const memoryLimitedContext = Math.max(1024, Math.floor((kvBudgetGB / kvPer1k) * 1000));
182
+
183
+ let recommendedContext = memoryLimitedContext;
184
+ if (declaredContext) recommendedContext = Math.min(recommendedContext, declaredContext);
185
+
186
+ const checks = [];
187
+ if (declaredContext) {
188
+ checks.push({
189
+ id: 'declared_context',
190
+ status: targetTokens <= declaredContext ? 'pass' : 'fail',
191
+ message: `Model-declared context window: ${declaredContext} tokens`
192
+ });
193
+ } else {
194
+ checks.push({
195
+ id: 'declared_context',
196
+ status: 'warn',
197
+ message: 'Model metadata does not expose an explicit context length.'
198
+ });
199
+ }
200
+
201
+ checks.push({
202
+ id: 'memory_budget',
203
+ status: targetTokens <= memoryLimitedContext ? 'pass' : 'warn',
204
+ message: `Estimated memory-safe context: ~${memoryLimitedContext} tokens on this hardware`
205
+ });
206
+
207
+ let status = 'pass';
208
+ if (checks.some((item) => item.status === 'fail')) status = 'fail';
209
+ else if (checks.some((item) => item.status === 'warn')) status = 'warn';
210
+
211
+ const suggestions = [];
212
+ if (status === 'fail') {
213
+ suggestions.push(`Reduce target context to <= ${recommendedContext} tokens.`);
214
+ } else if (status === 'warn') {
215
+ suggestions.push(`Use ${recommendedContext} tokens as a safer runtime default.`);
216
+ } else {
217
+ suggestions.push(`Target context (${targetTokens}) is within estimated safe limits.`);
218
+ }
219
+
220
+ if (modelSizeGB > effectiveMemoryGB * 0.7) {
221
+ suggestions.push('Consider a smaller quantization to preserve KV cache headroom.');
222
+ }
223
+
224
+ return {
225
+ modelName,
226
+ targetTokens,
227
+ declaredContext,
228
+ modelSizeGB: round1(modelSizeGB),
229
+ effectiveMemoryGB: round1(effectiveMemoryGB),
230
+ memoryLimitedContext,
231
+ recommendedContext,
232
+ status,
233
+ checks,
234
+ suggestions
235
+ };
236
+ }
237
+
238
+ function buildAmdGuard(input = {}) {
239
+ const {
240
+ platform = process.platform,
241
+ hardware = {},
242
+ rocmAvailable = false,
243
+ rocmDetectionMethod = null
244
+ } = input;
245
+
246
+ const backends = hardware.backends || {};
247
+ const summary = hardware.summary || {};
248
+ const hasRocmBackend = !!backends.rocm?.available;
249
+ const hasAmdGPU = hasRocmBackend || !!rocmDetectionMethod || summary.bestBackend === 'rocm';
250
+
251
+ const checks = [];
252
+
253
+ checks.push({
254
+ id: 'amd_presence',
255
+ status: hasAmdGPU ? 'pass' : 'warn',
256
+ message: hasAmdGPU ? 'AMD GPU path detected.' : 'No AMD GPU backend detected.'
257
+ });
258
+
259
+ if (platform === 'win32' && hasAmdGPU && !hasRocmBackend) {
260
+ checks.push({
261
+ id: 'windows_runtime',
262
+ status: 'warn',
263
+ message: 'Windows AMD path may fall back to CPU unless ROCm-equivalent stack is configured.'
264
+ });
265
+ } else if (platform === 'linux' && hasAmdGPU && !rocmAvailable) {
266
+ checks.push({
267
+ id: 'linux_runtime',
268
+ status: 'warn',
269
+ message: `AMD GPU detected via ${rocmDetectionMethod || 'fallback'} without ROCm userspace tools.`
270
+ });
271
+ } else if (hasAmdGPU) {
272
+ checks.push({
273
+ id: 'runtime_stack',
274
+ status: 'pass',
275
+ message: 'ROCm runtime path appears available.'
276
+ });
277
+ }
278
+
279
+ if (summary.bestBackend === 'cpu' && hasAmdGPU) {
280
+ checks.push({
281
+ id: 'backend_selection',
282
+ status: 'warn',
283
+ message: 'Primary backend resolved to CPU despite AMD detection.'
284
+ });
285
+ } else if (summary.bestBackend === 'rocm') {
286
+ checks.push({
287
+ id: 'backend_selection',
288
+ status: 'pass',
289
+ message: 'ROCm selected as primary backend.'
290
+ });
291
+ }
292
+
293
+ let status = 'pass';
294
+ if (checks.some((item) => item.status === 'fail')) status = 'fail';
295
+ else if (checks.some((item) => item.status === 'warn')) status = 'warn';
296
+
297
+ const recommendations = [];
298
+ if (platform === 'linux' && hasAmdGPU && !rocmAvailable) {
299
+ recommendations.push('Install ROCm runtime packages and verify `rocm-smi` availability.');
300
+ }
301
+ if (platform === 'win32' && hasAmdGPU) {
302
+ recommendations.push('On Windows, validate latest Adrenalin driver or use WSL2 for ROCm workloads.');
303
+ }
304
+ if (summary.bestBackend === 'cpu' && hasAmdGPU) {
305
+ recommendations.push('Force a small model profile until GPU backend is consistently selected.');
306
+ }
307
+ if (recommendations.length === 0) {
308
+ recommendations.push('AMD path looks healthy for local LLM inference.');
309
+ }
310
+
311
+ return {
312
+ status,
313
+ platform,
314
+ rocmAvailable: !!rocmAvailable,
315
+ rocmDetectionMethod: rocmDetectionMethod || 'none',
316
+ primaryBackend: summary.bestBackend || 'cpu',
317
+ checks,
318
+ recommendations
319
+ };
320
+ }
321
+
322
+ function evaluateToolCallingResult(chatPayload = null, error = null) {
323
+ if (error) {
324
+ return {
325
+ status: 'unsupported',
326
+ score: 0,
327
+ reason: error.message || String(error),
328
+ toolCalls: []
329
+ };
330
+ }
331
+
332
+ const message = chatPayload?.message || {};
333
+ const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
334
+ if (toolCalls.length > 0) {
335
+ return {
336
+ status: 'supported',
337
+ score: 100,
338
+ reason: 'Model emitted structured tool_calls.',
339
+ toolCalls
340
+ };
341
+ }
342
+
343
+ const content = (message.content || '').toLowerCase();
344
+ if (content.includes('5') || content.includes('add_numbers') || content.includes('tool')) {
345
+ return {
346
+ status: 'partial',
347
+ score: 50,
348
+ reason: 'Model responded but did not emit structured tool_calls.',
349
+ toolCalls: []
350
+ };
351
+ }
352
+
353
+ return {
354
+ status: 'unsupported',
355
+ score: 10,
356
+ reason: 'No tool-calling markers found in response.',
357
+ toolCalls: []
358
+ };
359
+ }
360
+
361
+ module.exports = {
362
+ buildAmdGuard,
363
+ buildContextVerification,
364
+ buildGpuPlan,
365
+ evaluateToolCallingResult,
366
+ extractContextWindow,
367
+ parseModelSizeGB
368
+ };
@@ -0,0 +1,17 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3464 | 10:03 PM | 🔵 | SQL Database Schema - Indexed Model Repository with Benchmarks | ~555 |
11
+
12
+ ### Feb 14, 2026
13
+
14
+ | ID | Time | T | Title | Read |
15
+ |----|------|---|-------|------|
16
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
17
+ </claude-mem-context>
@@ -0,0 +1,18 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3490 | 10:24 PM | 🔵 | Hardware Detector Cache Implementation - 5-Minute TTL Without Force Refresh Option | ~536 |
11
+ | #3440 | 9:58 PM | 🔵 | Hardware Detection System - Multi-GPU Support with Intelligent Selection | ~611 |
12
+
13
+ ### Feb 14, 2026
14
+
15
+ | ID | Time | T | Title | Read |
16
+ |----|------|---|-------|------|
17
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
18
+ </claude-mem-context>
@@ -0,0 +1,17 @@
1
+ <claude-mem-context>
2
+ # Recent Activity
3
+
4
+ <!-- This section is auto-generated by claude-mem. Edit content outside the tags. -->
5
+
6
+ ### Feb 12, 2026
7
+
8
+ | ID | Time | T | Title | Read |
9
+ |----|------|---|-------|------|
10
+ | #3453 | 10:01 PM | 🔵 | CUDA Detector Implementation - NVIDIA GPU Detection via nvidia-smi | ~497 |
11
+
12
+ ### Feb 14, 2026
13
+
14
+ | ID | Time | T | Title | Read |
15
+ |----|------|---|-------|------|
16
+ | #4339 | 6:49 PM | 🟣 | MCP server implementation and documentation added to llm-checker repository | ~457 |
17
+ </claude-mem-context>
@@ -99,9 +99,8 @@ class CPUDetector {
99
99
  }
100
100
  return coreIds.size || os.cpus().length;
101
101
  } else if (process.platform === 'win32') {
102
- const wmic = execSync('wmic cpu get NumberOfCores', { encoding: 'utf8', timeout: 5000 });
103
- const match = wmic.match(/\d+/);
104
- return match ? parseInt(match[0]) : os.cpus().length;
102
+ const physicalCores = this.getWindowsPhysicalCoreCount();
103
+ return physicalCores || os.cpus().length;
105
104
  }
106
105
  } catch (e) {
107
106
  return os.cpus().length;
@@ -125,9 +124,8 @@ class CPUDetector {
125
124
  );
126
125
  return Math.round(parseInt(maxFreq) / 1000); // kHz to MHz
127
126
  } else if (process.platform === 'win32') {
128
- const wmic = execSync('wmic cpu get MaxClockSpeed', { encoding: 'utf8', timeout: 5000 });
129
- const match = wmic.match(/\d+/);
130
- return match ? parseInt(match[0]) : 0;
127
+ const maxClock = this.getWindowsMaxClockSpeed();
128
+ return maxClock || (os.cpus()[0]?.speed || 0);
131
129
  }
132
130
  } catch (e) {
133
131
  return os.cpus()[0]?.speed || 0;
@@ -135,6 +133,68 @@ class CPUDetector {
135
133
  return 0;
136
134
  }
137
135
 
136
+ /**
137
+ * Execute shell command with consistent options.
138
+ */
139
+ runCommand(command) {
140
+ return execSync(command, { encoding: 'utf8', timeout: 5000 });
141
+ }
142
+
143
+ /**
144
+ * Extract first integer from command output.
145
+ */
146
+ extractFirstInteger(output) {
147
+ if (typeof output !== 'string') return null;
148
+ const match = output.match(/-?\d+/);
149
+ if (!match) return null;
150
+ const parsed = parseInt(match[0], 10);
151
+ return Number.isFinite(parsed) ? parsed : null;
152
+ }
153
+
154
+ /**
155
+ * Try multiple Windows commands and return first numeric value.
156
+ */
157
+ queryWindowsNumeric(commands) {
158
+ for (const command of commands) {
159
+ try {
160
+ const output = this.runCommand(command);
161
+ const parsed = this.extractFirstInteger(output);
162
+ if (parsed !== null) {
163
+ return parsed;
164
+ }
165
+ } catch (e) {
166
+ continue;
167
+ }
168
+ }
169
+ return null;
170
+ }
171
+
172
+ /**
173
+ * Get physical core count on Windows.
174
+ * WMIC can be absent on modern Windows 11, so we fallback to CIM.
175
+ */
176
+ getWindowsPhysicalCoreCount() {
177
+ const value = this.queryWindowsNumeric([
178
+ 'wmic cpu get NumberOfCores /value',
179
+ 'powershell -NoProfile -NonInteractive -Command "(Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfCores -Sum).Sum"',
180
+ 'pwsh -NoProfile -NonInteractive -Command "(Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfCores -Sum).Sum"'
181
+ ]);
182
+ return value && value > 0 ? value : null;
183
+ }
184
+
185
+ /**
186
+ * Get max clock speed on Windows (MHz).
187
+ * WMIC can be absent on modern Windows 11, so we fallback to CIM.
188
+ */
189
+ getWindowsMaxClockSpeed() {
190
+ const value = this.queryWindowsNumeric([
191
+ 'wmic cpu get MaxClockSpeed /value',
192
+ 'powershell -NoProfile -NonInteractive -Command "(Get-CimInstance Win32_Processor | Measure-Object -Property MaxClockSpeed -Maximum).Maximum"',
193
+ 'pwsh -NoProfile -NonInteractive -Command "(Get-CimInstance Win32_Processor | Measure-Object -Property MaxClockSpeed -Maximum).Maximum"'
194
+ ]);
195
+ return value && value > 0 ? value : null;
196
+ }
197
+
138
198
  /**
139
199
  * Get CPU cache information
140
200
  */