llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,634 @@
1
+ /**
2
+ * Enhanced Ollama Scraper
3
+ * Scrapes ALL models from ollama.com with ALL variants and quantizations
4
+ * No external API dependencies - pure HTML scraping
5
+ */
6
+
7
+ const https = require('https');
8
+ const path = require('path');
9
+ const os = require('os');
10
+ const fs = require('fs');
11
+
12
+ class EnhancedOllamaScraper {
13
+ constructor(options = {}) {
14
+ this.baseURL = 'https://ollama.com';
15
+ this.concurrency = options.concurrency || 5;
16
+ this.rateLimitMs = options.rateLimitMs || 200;
17
+ this.timeout = options.timeout || 15000;
18
+ this.maxRetries = options.maxRetries || 3;
19
+
20
+ // Progress tracking
21
+ this.onProgress = options.onProgress || (() => {});
22
+ this.onError = options.onError || console.error;
23
+ }
24
+
25
+ /**
26
+ * Make HTTP request with retry logic
27
+ */
28
+ async httpGet(url, retries = 0) {
29
+ return new Promise((resolve, reject) => {
30
+ const urlObj = new URL(url);
31
+
32
+ const options = {
33
+ hostname: urlObj.hostname,
34
+ path: urlObj.pathname + urlObj.search,
35
+ method: 'GET',
36
+ headers: {
37
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
38
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
39
+ 'Accept-Language': 'en-US,en;q=0.5',
40
+ 'Accept-Encoding': 'identity',
41
+ 'Connection': 'keep-alive',
42
+ 'Cache-Control': 'no-cache'
43
+ },
44
+ timeout: this.timeout
45
+ };
46
+
47
+ const req = https.request(options, (res) => {
48
+ let data = '';
49
+ const maxBytes = 10 * 1024 * 1024; // 10MB limit
50
+ let bytesReceived = 0;
51
+
52
+ res.on('data', (chunk) => {
53
+ bytesReceived += chunk.length;
54
+ if (bytesReceived > maxBytes) {
55
+ req.destroy();
56
+ reject(new Error('Response too large'));
57
+ return;
58
+ }
59
+ data += chunk;
60
+ });
61
+
62
+ res.on('end', () => {
63
+ if (res.statusCode === 200) {
64
+ resolve(data);
65
+ } else if (res.statusCode === 429 && retries < this.maxRetries) {
66
+ // Rate limited, retry with backoff
67
+ setTimeout(() => {
68
+ this.httpGet(url, retries + 1).then(resolve).catch(reject);
69
+ }, (retries + 1) * 2000);
70
+ } else if (res.statusCode >= 300 && res.statusCode < 400) {
71
+ // Redirect
72
+ const redirectUrl = res.headers.location;
73
+ if (redirectUrl) {
74
+ this.httpGet(redirectUrl.startsWith('http') ? redirectUrl : this.baseURL + redirectUrl, retries)
75
+ .then(resolve).catch(reject);
76
+ } else {
77
+ reject(new Error(`Redirect without location: ${res.statusCode}`));
78
+ }
79
+ } else {
80
+ reject(new Error(`HTTP ${res.statusCode} for ${url}`));
81
+ }
82
+ });
83
+ });
84
+
85
+ req.on('error', (err) => {
86
+ if (retries < this.maxRetries) {
87
+ setTimeout(() => {
88
+ this.httpGet(url, retries + 1).then(resolve).catch(reject);
89
+ }, (retries + 1) * 1000);
90
+ } else {
91
+ reject(err);
92
+ }
93
+ });
94
+
95
+ req.on('timeout', () => {
96
+ req.destroy();
97
+ if (retries < this.maxRetries) {
98
+ this.httpGet(url, retries + 1).then(resolve).catch(reject);
99
+ } else {
100
+ reject(new Error('Request timeout'));
101
+ }
102
+ });
103
+
104
+ req.end();
105
+ });
106
+ }
107
+
108
+ /**
109
+ * Sleep utility
110
+ */
111
+ sleep(ms) {
112
+ return new Promise(resolve => setTimeout(resolve, ms));
113
+ }
114
+
115
+ /**
116
+ * Scrape the main library page to get all model identifiers
117
+ */
118
+ async scrapeModelList() {
119
+ this.onProgress({ phase: 'list', message: 'Fetching model list from ollama.com/library...' });
120
+
121
+ const html = await this.httpGet(`${this.baseURL}/library`);
122
+
123
+ // Extract model links using multiple patterns for robustness
124
+ const models = [];
125
+ const seen = new Set();
126
+
127
+ // Pattern 1: Direct library links
128
+ const linkPattern = /href="\/library\/([^"\/]+)"/gi;
129
+ let match;
130
+ while ((match = linkPattern.exec(html)) !== null) {
131
+ const id = match[1].toLowerCase();
132
+ if (!seen.has(id) && !id.includes('?') && !id.includes('#')) {
133
+ seen.add(id);
134
+ models.push({ id });
135
+ }
136
+ }
137
+
138
+ // Pattern 2: Look for model cards with more info (bounded match to prevent ReDoS)
139
+ const cardPattern = /<a[^>]*href="\/library\/([^"]+)"[^>]*>[\s\S]{0,5000}?<\/a>/gi;
140
+ while ((match = cardPattern.exec(html)) !== null) {
141
+ const id = match[1].toLowerCase().split('/')[0];
142
+ if (!seen.has(id)) {
143
+ seen.add(id);
144
+
145
+ // Try to extract pulls
146
+ const pullsMatch = match[0].match(/(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls)/i);
147
+ const pulls = pullsMatch ? this.parsePulls(pullsMatch[1]) : 0;
148
+
149
+ models.push({ id, pulls });
150
+ }
151
+ }
152
+
153
+ this.onProgress({ phase: 'list', message: `Found ${models.length} models` });
154
+
155
+ return models;
156
+ }
157
+
158
+ /**
159
+ * Parse pull count (e.g., "1.2M" -> 1200000)
160
+ */
161
+ parsePulls(pullStr) {
162
+ if (!pullStr) return 0;
163
+ const num = parseFloat(pullStr);
164
+ if (pullStr.includes('B')) return Math.round(num * 1e9);
165
+ if (pullStr.includes('M')) return Math.round(num * 1e6);
166
+ if (pullStr.includes('K')) return Math.round(num * 1e3);
167
+ return Math.round(num);
168
+ }
169
+
170
+ /**
171
+ * Scrape model detail page
172
+ */
173
+ async scrapeModelDetails(modelId) {
174
+ const url = `${this.baseURL}/library/${modelId}`;
175
+
176
+ try {
177
+ const html = await this.httpGet(url);
178
+
179
+ const model = {
180
+ id: modelId,
181
+ name: this.extractModelName(html, modelId),
182
+ description: this.extractDescription(html),
183
+ pulls: this.extractPulls(html),
184
+ tags_count: this.extractTagsCount(html),
185
+ capabilities: this.extractCapabilities(html, modelId),
186
+ last_updated: this.extractLastUpdated(html),
187
+ url: url,
188
+ type: this.isOfficialModel(html) ? 'official' : 'community'
189
+ };
190
+
191
+ return model;
192
+ } catch (error) {
193
+ this.onError(`Error scraping ${modelId}: ${error.message}`);
194
+ return null;
195
+ }
196
+ }
197
+
198
+ /**
199
+ * Scrape all tags/variants for a model
200
+ */
201
+ async scrapeModelTags(modelId) {
202
+ const url = `${this.baseURL}/library/${modelId}/tags`;
203
+
204
+ try {
205
+ const html = await this.httpGet(url);
206
+ const variants = [];
207
+
208
+ // Pattern 1: Tag blocks with size info
209
+ // Looking for patterns like: llama3.1:8b 4.9GB
210
+ const tagBlockPattern = new RegExp(
211
+ `(${modelId}:[\\w\\d\\.\\-]+)\\s*[\\n\\r\\s]+([\\d\\.]+)\\s*(GB|MB|KB)`,
212
+ 'gi'
213
+ );
214
+
215
+ let match;
216
+ while ((match = tagBlockPattern.exec(html)) !== null) {
217
+ const tag = match[1];
218
+ const sizeNum = parseFloat(match[2]);
219
+ const sizeUnit = match[3].toUpperCase();
220
+
221
+ let sizeGB = sizeNum;
222
+ if (sizeUnit === 'MB') sizeGB = sizeNum / 1024;
223
+ if (sizeUnit === 'KB') sizeGB = sizeNum / (1024 * 1024);
224
+
225
+ variants.push(this.parseVariant(modelId, tag, sizeGB));
226
+ }
227
+
228
+ // Pattern 2: Look for all tag mentions
229
+ const tagPattern = new RegExp(`${modelId}:([\\w\\d\\.\\-]+)`, 'gi');
230
+ const seenTags = new Set(variants.map(v => v.tag));
231
+
232
+ while ((match = tagPattern.exec(html)) !== null) {
233
+ const tag = `${modelId}:${match[1]}`;
234
+ if (!seenTags.has(tag)) {
235
+ seenTags.add(tag);
236
+ variants.push(this.parseVariant(modelId, tag, null));
237
+ }
238
+ }
239
+
240
+ // Pattern 3: Size extraction from other patterns
241
+ const sizePattern = /(\d+(?:\.\d+)?)\s*(GB|MB)\s*·/gi;
242
+ const sizes = [];
243
+ while ((match = sizePattern.exec(html)) !== null) {
244
+ const sizeNum = parseFloat(match[1]);
245
+ const sizeUnit = match[2].toUpperCase();
246
+ let sizeGB = sizeUnit === 'MB' ? sizeNum / 1024 : sizeNum;
247
+ sizes.push(sizeGB);
248
+ }
249
+
250
+ // Try to match sizes to variants that don't have them
251
+ let sizeIndex = 0;
252
+ for (const variant of variants) {
253
+ if (variant.size_gb === null && sizeIndex < sizes.length) {
254
+ variant.size_gb = sizes[sizeIndex++];
255
+ }
256
+ }
257
+
258
+ // If no variants found, create default ones
259
+ if (variants.length === 0) {
260
+ variants.push(this.parseVariant(modelId, `${modelId}:latest`, null));
261
+ }
262
+
263
+ return variants;
264
+ } catch (error) {
265
+ this.onError(`Error scraping tags for ${modelId}: ${error.message}`);
266
+ // Return at least a latest variant
267
+ return [this.parseVariant(modelId, `${modelId}:latest`, null)];
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Parse variant info from tag string
273
+ */
274
+ parseVariant(modelId, tag, sizeGB) {
275
+ const variant = {
276
+ model_id: modelId,
277
+ tag: tag,
278
+ params_b: this.extractParams(tag),
279
+ quant: this.extractQuantization(tag),
280
+ size_gb: sizeGB,
281
+ context_length: this.extractContextLength(tag),
282
+ input_types: this.extractInputTypes(tag, modelId),
283
+ is_moe: this.isMoE(tag, modelId),
284
+ expert_count: this.extractExpertCount(tag)
285
+ };
286
+
287
+ // Estimate size if not provided
288
+ if (variant.size_gb === null && variant.params_b) {
289
+ variant.size_gb = this.estimateSize(variant.params_b, variant.quant);
290
+ }
291
+
292
+ return variant;
293
+ }
294
+
295
+ /**
296
+ * Extract parameter count from tag
297
+ */
298
+ extractParams(tag) {
299
+ // Match patterns like: 8b, 70b, 7b, 1.5b, 0.5b, 405b
300
+ const match = tag.match(/(\d+\.?\d*)[bB](?:[^a-zA-Z]|$)/);
301
+ if (match) {
302
+ return parseFloat(match[1]);
303
+ }
304
+
305
+ // Check for known sizes in model names
306
+ const sizePatterns = [
307
+ { pattern: /mini/i, size: 3.8 },
308
+ { pattern: /tiny/i, size: 1.1 },
309
+ { pattern: /small/i, size: 7 },
310
+ { pattern: /medium/i, size: 13 },
311
+ { pattern: /large/i, size: 34 },
312
+ { pattern: /xl/i, size: 70 },
313
+ ];
314
+
315
+ for (const { pattern, size } of sizePatterns) {
316
+ if (pattern.test(tag)) return size;
317
+ }
318
+
319
+ return null;
320
+ }
321
+
322
+ /**
323
+ * Extract quantization from tag
324
+ */
325
+ extractQuantization(tag) {
326
+ const quantPatterns = [
327
+ // K-quant patterns
328
+ { pattern: /q8[_-]?0/i, quant: 'Q8_0' },
329
+ { pattern: /q6[_-]?k/i, quant: 'Q6_K' },
330
+ { pattern: /q5[_-]?k[_-]?m/i, quant: 'Q5_K_M' },
331
+ { pattern: /q5[_-]?k[_-]?s/i, quant: 'Q5_K_S' },
332
+ { pattern: /q5[_-]?0/i, quant: 'Q5_0' },
333
+ { pattern: /q4[_-]?k[_-]?m/i, quant: 'Q4_K_M' },
334
+ { pattern: /q4[_-]?k[_-]?s/i, quant: 'Q4_K_S' },
335
+ { pattern: /q4[_-]?0/i, quant: 'Q4_0' },
336
+ { pattern: /q3[_-]?k[_-]?m/i, quant: 'Q3_K_M' },
337
+ { pattern: /q3[_-]?k[_-]?s/i, quant: 'Q3_K_S' },
338
+ { pattern: /q3[_-]?k[_-]?l/i, quant: 'Q3_K_L' },
339
+ { pattern: /q2[_-]?k/i, quant: 'Q2_K' },
340
+ // FP patterns
341
+ { pattern: /fp16/i, quant: 'FP16' },
342
+ { pattern: /fp32/i, quant: 'FP32' },
343
+ { pattern: /f16/i, quant: 'FP16' },
344
+ { pattern: /f32/i, quant: 'FP32' },
345
+ // INT patterns
346
+ { pattern: /int8/i, quant: 'INT8' },
347
+ { pattern: /int4/i, quant: 'INT4' },
348
+ // IQ patterns (important quantization)
349
+ { pattern: /iq4[_-]?nl/i, quant: 'IQ4_NL' },
350
+ { pattern: /iq4[_-]?xs/i, quant: 'IQ4_XS' },
351
+ { pattern: /iq3[_-]?xxs/i, quant: 'IQ3_XXS' },
352
+ { pattern: /iq3[_-]?xs/i, quant: 'IQ3_XS' },
353
+ { pattern: /iq2[_-]?xxs/i, quant: 'IQ2_XXS' },
354
+ { pattern: /iq2[_-]?xs/i, quant: 'IQ2_XS' },
355
+ { pattern: /iq1[_-]?s/i, quant: 'IQ1_S' },
356
+ ];
357
+
358
+ for (const { pattern, quant } of quantPatterns) {
359
+ if (pattern.test(tag)) return quant;
360
+ }
361
+
362
+ // Default to Q4_0 for standard tags
363
+ if (tag.includes(':latest') || !tag.includes('-')) {
364
+ return 'Q4_0';
365
+ }
366
+
367
+ return null;
368
+ }
369
+
370
+ /**
371
+ * Extract context length from tag
372
+ */
373
+ extractContextLength(tag) {
374
+ // Match patterns like: 128k, 32k, 8k, 4k
375
+ const match = tag.match(/(\d+)[kK](?:[^a-zA-Z]|$)/);
376
+ if (match) {
377
+ return parseInt(match[1]) * 1024;
378
+ }
379
+
380
+ // Known models with specific context lengths
381
+ const longContextModels = [
382
+ { pattern: /qwen2\.5|qwen2/i, ctx: 131072 },
383
+ { pattern: /llama3\.1/i, ctx: 131072 },
384
+ { pattern: /llama3\.2/i, ctx: 131072 },
385
+ { pattern: /mistral/i, ctx: 32768 },
386
+ { pattern: /gemma2/i, ctx: 8192 },
387
+ { pattern: /phi-?3/i, ctx: 131072 },
388
+ ];
389
+
390
+ for (const { pattern, ctx } of longContextModels) {
391
+ if (pattern.test(tag)) return ctx;
392
+ }
393
+
394
+ return 4096; // Default
395
+ }
396
+
397
+ /**
398
+ * Extract input types
399
+ */
400
+ extractInputTypes(tag, modelId) {
401
+ const types = ['text'];
402
+
403
+ // Vision/multimodal models
404
+ if (/llava|vision|minicpm-v|bakllava|moondream/i.test(tag) ||
405
+ /llava|vision|minicpm-v|bakllava|moondream/i.test(modelId)) {
406
+ types.push('image');
407
+ }
408
+
409
+ return types;
410
+ }
411
+
412
+ /**
413
+ * Check if model is Mixture of Experts
414
+ */
415
+ isMoE(tag, modelId) {
416
+ return /mixtral|moe|experts/i.test(tag) || /mixtral|moe/i.test(modelId);
417
+ }
418
+
419
+ /**
420
+ * Extract expert count for MoE models
421
+ */
422
+ extractExpertCount(tag) {
423
+ const match = tag.match(/(\d+)x\d+/i);
424
+ if (match) return parseInt(match[1]);
425
+
426
+ if (/mixtral/i.test(tag)) return 8;
427
+
428
+ return null;
429
+ }
430
+
431
+ /**
432
+ * Estimate size based on params and quantization
433
+ */
434
+ estimateSize(paramsB, quant) {
435
+ const bytesPerParam = {
436
+ 'FP32': 4,
437
+ 'FP16': 2,
438
+ 'Q8_0': 1,
439
+ 'Q6_K': 0.75,
440
+ 'Q5_K_M': 0.625,
441
+ 'Q5_K_S': 0.625,
442
+ 'Q5_0': 0.625,
443
+ 'Q4_K_M': 0.5,
444
+ 'Q4_K_S': 0.5,
445
+ 'Q4_0': 0.5,
446
+ 'Q3_K_M': 0.4,
447
+ 'Q3_K_S': 0.375,
448
+ 'Q3_K_L': 0.4,
449
+ 'Q2_K': 0.3,
450
+ 'IQ4_NL': 0.5,
451
+ 'IQ4_XS': 0.45,
452
+ 'IQ3_XXS': 0.35,
453
+ 'IQ3_XS': 0.375,
454
+ 'IQ2_XXS': 0.25,
455
+ 'IQ2_XS': 0.28,
456
+ 'IQ1_S': 0.2,
457
+ 'INT8': 1,
458
+ 'INT4': 0.5,
459
+ };
460
+
461
+ const bpp = bytesPerParam[quant] || 0.5;
462
+ return Math.round(paramsB * bpp * 10) / 10; // GB, rounded to 1 decimal
463
+ }
464
+
465
+ // ==================== HTML EXTRACTION HELPERS ====================
466
+
467
+ extractModelName(html, modelId) {
468
+ // Try to get display name from title or h1
469
+ const titleMatch = html.match(/<title>([^<]+)<\/title>/i);
470
+ if (titleMatch) {
471
+ const title = titleMatch[1].split('·')[0].split('-')[0].trim();
472
+ if (title && title.toLowerCase() !== 'ollama') {
473
+ return title;
474
+ }
475
+ }
476
+
477
+ const h1Match = html.match(/<h1[^>]*>([^<]+)<\/h1>/i);
478
+ if (h1Match) {
479
+ return h1Match[1].trim();
480
+ }
481
+
482
+ // Capitalize the model ID
483
+ return modelId.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
484
+ }
485
+
486
+ extractDescription(html) {
487
+ // Try meta description
488
+ const metaMatch = html.match(/<meta[^>]*name=["']description["'][^>]*content=["']([^"']+)["']/i);
489
+ if (metaMatch) {
490
+ return metaMatch[1].substring(0, 500);
491
+ }
492
+
493
+ // Try first paragraph
494
+ const pMatch = html.match(/<p[^>]*>([^<]{20,500})<\/p>/i);
495
+ if (pMatch) {
496
+ return pMatch[1].trim();
497
+ }
498
+
499
+ return '';
500
+ }
501
+
502
+ extractPulls(html) {
503
+ const match = html.match(/(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls|Downloads|downloads)/i);
504
+ return match ? this.parsePulls(match[1]) : 0;
505
+ }
506
+
507
+ extractTagsCount(html) {
508
+ const match = html.match(/(\d+)\s*(?:Tags|tags|Versions|versions)/i);
509
+ return match ? parseInt(match[1]) : 1;
510
+ }
511
+
512
+ extractCapabilities(html, modelId) {
513
+ const capabilities = [];
514
+
515
+ // Detect by model ID patterns
516
+ if (/code|coder|starcoder/i.test(modelId)) capabilities.push('coding');
517
+ if (/llava|vision|minicpm-v|bakllava|moondream/i.test(modelId)) capabilities.push('multimodal');
518
+ if (/embed|bge|nomic|gte|e5/i.test(modelId)) capabilities.push('embeddings');
519
+ if (/deepseek-r1|qwq|reasoning/i.test(modelId)) capabilities.push('reasoning');
520
+ if (/math|mathstral/i.test(modelId)) capabilities.push('math');
521
+ if (/dolphin|wizard|uncensored/i.test(modelId)) capabilities.push('creative');
522
+ if (/guard|shield|safety/i.test(modelId)) capabilities.push('safety');
523
+
524
+ // Detect from HTML content
525
+ if (/code generation|programming|coding/i.test(html)) capabilities.push('coding');
526
+ if (/vision|image understanding|multimodal/i.test(html)) capabilities.push('multimodal');
527
+ if (/embedding|semantic search/i.test(html)) capabilities.push('embeddings');
528
+ if (/reasoning|chain.of.thought/i.test(html)) capabilities.push('reasoning');
529
+
530
+ // Default capability
531
+ if (capabilities.length === 0) capabilities.push('chat');
532
+
533
+ return [...new Set(capabilities)];
534
+ }
535
+
536
+ extractLastUpdated(html) {
537
+ const match = html.match(/Updated?\s*(\d+\s*(?:days?|weeks?|months?|hours?)\s*ago)/i);
538
+ return match ? match[1] : '';
539
+ }
540
+
541
+ isOfficialModel(html) {
542
+ // Check for official badge or lack of namespace
543
+ return /official|verified/i.test(html) || !/community/i.test(html);
544
+ }
545
+
546
+ // ==================== MAIN SCRAPING METHOD ====================
547
+
548
+ /**
549
+ * Scrape all models with all variants
550
+ * @param {Function} onModelComplete - Callback when a model is complete
551
+ * @returns {Object} { models: [], variants: [] }
552
+ */
553
+ async scrapeAll(onModelComplete = null) {
554
+ const startTime = Date.now();
555
+
556
+ // Step 1: Get list of all models
557
+ const modelList = await this.scrapeModelList();
558
+ const totalModels = modelList.length;
559
+
560
+ this.onProgress({
561
+ phase: 'details',
562
+ message: `Scraping ${totalModels} models...`,
563
+ current: 0,
564
+ total: totalModels
565
+ });
566
+
567
+ const allModels = [];
568
+ const allVariants = [];
569
+
570
+ // Step 2: Process models in batches
571
+ for (let i = 0; i < modelList.length; i += this.concurrency) {
572
+ const batch = modelList.slice(i, i + this.concurrency);
573
+
574
+ const batchPromises = batch.map(async ({ id }) => {
575
+ try {
576
+ // Get model details
577
+ const model = await this.scrapeModelDetails(id);
578
+ if (!model) return null;
579
+
580
+ // Get all variants/tags
581
+ await this.sleep(this.rateLimitMs);
582
+ const variants = await this.scrapeModelTags(id);
583
+
584
+ return { model, variants };
585
+ } catch (error) {
586
+ this.onError(`Error processing ${id}: ${error.message}`);
587
+ return null;
588
+ }
589
+ });
590
+
591
+ const batchResults = await Promise.all(batchPromises);
592
+
593
+ for (const result of batchResults) {
594
+ if (result) {
595
+ allModels.push(result.model);
596
+ allVariants.push(...result.variants);
597
+
598
+ if (onModelComplete) {
599
+ onModelComplete(result.model, result.variants);
600
+ }
601
+ }
602
+ }
603
+
604
+ this.onProgress({
605
+ phase: 'details',
606
+ message: `Scraped ${Math.min(i + this.concurrency, totalModels)}/${totalModels} models`,
607
+ current: Math.min(i + this.concurrency, totalModels),
608
+ total: totalModels
609
+ });
610
+
611
+ // Rate limiting between batches
612
+ await this.sleep(this.rateLimitMs * 2);
613
+ }
614
+
615
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
616
+
617
+ this.onProgress({
618
+ phase: 'complete',
619
+ message: `Scraped ${allModels.length} models with ${allVariants.length} variants in ${elapsed}s`
620
+ });
621
+
622
+ return {
623
+ models: allModels,
624
+ variants: allVariants,
625
+ stats: {
626
+ modelCount: allModels.length,
627
+ variantCount: allVariants.length,
628
+ elapsedSeconds: parseFloat(elapsed)
629
+ }
630
+ };
631
+ }
632
+ }
633
+
634
+ module.exports = EnhancedOllamaScraper;