llm-checker 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +418 -0
  3. package/analyzer/compatibility.js +584 -0
  4. package/analyzer/performance.js +505 -0
  5. package/bin/CLAUDE.md +12 -0
  6. package/bin/enhanced_cli.js +3118 -0
  7. package/bin/test-deterministic.js +41 -0
  8. package/package.json +96 -0
  9. package/src/CLAUDE.md +12 -0
  10. package/src/ai/intelligent-selector.js +615 -0
  11. package/src/ai/model-selector.js +312 -0
  12. package/src/ai/multi-objective-selector.js +820 -0
  13. package/src/commands/check.js +58 -0
  14. package/src/data/CLAUDE.md +11 -0
  15. package/src/data/model-database.js +637 -0
  16. package/src/data/sync-manager.js +279 -0
  17. package/src/hardware/CLAUDE.md +12 -0
  18. package/src/hardware/backends/CLAUDE.md +11 -0
  19. package/src/hardware/backends/apple-silicon.js +318 -0
  20. package/src/hardware/backends/cpu-detector.js +490 -0
  21. package/src/hardware/backends/cuda-detector.js +417 -0
  22. package/src/hardware/backends/intel-detector.js +436 -0
  23. package/src/hardware/backends/rocm-detector.js +440 -0
  24. package/src/hardware/detector.js +573 -0
  25. package/src/hardware/pc-optimizer.js +635 -0
  26. package/src/hardware/specs.js +286 -0
  27. package/src/hardware/unified-detector.js +442 -0
  28. package/src/index.js +2289 -0
  29. package/src/models/CLAUDE.md +17 -0
  30. package/src/models/ai-check-selector.js +806 -0
  31. package/src/models/catalog.json +426 -0
  32. package/src/models/deterministic-selector.js +1145 -0
  33. package/src/models/expanded_database.js +1142 -0
  34. package/src/models/intelligent-selector.js +532 -0
  35. package/src/models/requirements.js +310 -0
  36. package/src/models/scoring-config.js +57 -0
  37. package/src/models/scoring-engine.js +715 -0
  38. package/src/ollama/.cache/README.md +33 -0
  39. package/src/ollama/CLAUDE.md +24 -0
  40. package/src/ollama/client.js +438 -0
  41. package/src/ollama/enhanced-client.js +113 -0
  42. package/src/ollama/enhanced-scraper.js +634 -0
  43. package/src/ollama/manager.js +357 -0
  44. package/src/ollama/native-scraper.js +776 -0
  45. package/src/plugins/CLAUDE.md +11 -0
  46. package/src/plugins/examples/custom_model_plugin.js +87 -0
  47. package/src/plugins/index.js +295 -0
  48. package/src/utils/CLAUDE.md +11 -0
  49. package/src/utils/config.js +359 -0
  50. package/src/utils/formatter.js +315 -0
  51. package/src/utils/logger.js +272 -0
  52. package/src/utils/model-classifier.js +167 -0
  53. package/src/utils/verbose-progress.js +266 -0
@@ -0,0 +1,776 @@
1
+ const https = require('https');
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+ const os = require('os');
5
+ const { classifyAllModels } = require('../utils/model-classifier');
6
+
7
+ class OllamaNativeScraper {
8
+ constructor() {
9
+ this.baseURL = 'https://ollama.com';
10
+ this.registryAPI = 'https://registry.ollama.ai';
11
+
12
+ // New secure cache location (user home)
13
+ this.cacheDir = path.join(os.homedir(), '.llm-checker', 'cache', 'ollama');
14
+ this.cacheFile = path.join(this.cacheDir, 'ollama-models.json');
15
+ this.detailedCacheFile = path.join(this.cacheDir, 'ollama-detailed-models.json');
16
+
17
+ // Legacy cache location (inside repo) for backward compatibility
18
+ this.legacyCacheDir = path.join(__dirname, '.cache');
19
+ this.legacyCacheFile = path.join(this.legacyCacheDir, 'ollama-models.json');
20
+ this.legacyDetailedCacheFile = path.join(this.legacyCacheDir, 'ollama-detailed-models.json');
21
+
22
+ this.cacheExpiry = 6 * 60 * 60 * 1000; // 6 horas para actualizar más frecuentemente
23
+
24
+ if (!fs.existsSync(this.cacheDir)) {
25
+ fs.mkdirSync(this.cacheDir, { recursive: true });
26
+ }
27
+ }
28
+
29
+ async httpRequest(url, options = {}) {
30
+ return new Promise((resolve, reject) => {
31
+ const urlObj = new URL(url);
32
+ const requestOptions = {
33
+ hostname: urlObj.hostname,
34
+ port: urlObj.port || 443,
35
+ path: urlObj.pathname + urlObj.search,
36
+ method: options.method || 'GET',
37
+ headers: {
38
+ 'User-Agent': 'Mozilla/5.0',
39
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
40
+ 'Accept-Language': 'en-US,en;q=0.5',
41
+ // Do not claim compression we don't handle here
42
+ 'Connection': 'keep-alive',
43
+ 'Upgrade-Insecure-Requests': '1',
44
+ ...options.headers
45
+ }
46
+ };
47
+
48
+ const timeoutMs = typeof options.timeout === 'number' ? options.timeout : 15000;
49
+ const maxBytes = typeof options.maxBytes === 'number' ? options.maxBytes : 5 * 1024 * 1024; // 5MB
50
+
51
+ const req = https.request(requestOptions, (res) => {
52
+ let data = '';
53
+ let received = 0;
54
+
55
+ res.on('data', chunk => {
56
+ received += chunk.length;
57
+ if (received > maxBytes) {
58
+ req.destroy(new Error('Response too large'));
59
+ return;
60
+ }
61
+ data += chunk;
62
+ });
63
+
64
+ res.on('end', () => {
65
+ if (res.statusCode >= 200 && res.statusCode < 300) {
66
+ resolve({ statusCode: res.statusCode, data, headers: res.headers });
67
+ } else {
68
+ reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`));
69
+ }
70
+ });
71
+ });
72
+
73
+ // Socket/request timeout
74
+ req.setTimeout(timeoutMs, () => {
75
+ req.destroy(new Error('Request timeout'));
76
+ });
77
+
78
+ req.on('error', reject);
79
+ if (options.body) req.write(options.body);
80
+ req.end();
81
+ });
82
+ }
83
+
84
+ parseModelFromHTML(html) {
85
+ const models = [];
86
+ const pattern = /<a[^>]*href="\/library\/([^"]*)"[^>]*>[\s\S]{0,5000}?<h3[^>]*>([^<]*)<\/h3>[\s\S]{0,2000}?<p[^>]*>([^<]*)<\/p>[\s\S]{0,2000}?(?:<span[^>]*>([^<]*)<\/span>)[\s\S]{0,2000}?(?:(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls))[\s\S]{0,1000}?(?:(\d+)\s*(?:Tags|tags))[\s\S]{0,1000}?(?:Updated\s*(\d+\s*\w+\s*ago))?[\s\S]{0,500}?<\/a>/gi;
87
+
88
+ let match;
89
+ while ((match = pattern.exec(html)) !== null) {
90
+ const [, identifier, name, description, labels, pulls, tags, lastUpdated] = match;
91
+ const cleanName = this.cleanText(name);
92
+ const cleanDescription = this.cleanText(description);
93
+ const pullsNum = this.parsePulls(pulls);
94
+
95
+ models.push({
96
+ model_identifier: identifier,
97
+ model_name: cleanName,
98
+ description: cleanDescription,
99
+ labels: labels ? labels.split(',').map(l => l.trim()) : [],
100
+ pulls: pullsNum,
101
+ tags: parseInt(tags) || 0,
102
+ last_updated: lastUpdated || 'Unknown',
103
+ url: `${this.baseURL}/library/${identifier}`,
104
+ namespace: identifier.includes('/') ? identifier.split('/')[0] : null,
105
+ model_type: identifier.includes('/') ? 'community' : 'official'
106
+ });
107
+ }
108
+
109
+ if (models.length === 0) {
110
+ return this.parseModelsFallback(html);
111
+ }
112
+
113
+ return models;
114
+ }
115
+
116
+ parseModelsFallback(html) {
117
+ const models = [];
118
+ const libraryLinks = html.match(/href="\/library\/[^"]*"/g);
119
+
120
+ if (libraryLinks) {
121
+ const uniqueLinks = [...new Set(libraryLinks)];
122
+
123
+ for (const link of uniqueLinks) {
124
+ const identifier = link.match(/\/library\/([^"]*)/)[1];
125
+ const linkIndex = html.indexOf(link);
126
+ const section = html.substring(Math.max(0, linkIndex - 500), linkIndex + 500);
127
+ const nameMatch = section.match(/<h[2-4][^>]*>([^<]*)<\/h[2-4]>/);
128
+ const descMatch = section.match(/<p[^>]*>([^<]*)<\/p>/);
129
+ const pullsMatch = section.match(/(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls)/i);
130
+
131
+ models.push({
132
+ model_identifier: identifier,
133
+ model_name: nameMatch ? this.cleanText(nameMatch[1]) : identifier,
134
+ description: descMatch ? this.cleanText(descMatch[1]) : '',
135
+ labels: [],
136
+ pulls: pullsMatch ? this.parsePulls(pullsMatch[1]) : 0,
137
+ tags: 0,
138
+ last_updated: 'Unknown',
139
+ url: `${this.baseURL}/library/${identifier}`,
140
+ namespace: identifier.includes('/') ? identifier.split('/')[0] : null,
141
+ model_type: identifier.includes('/') ? 'community' : 'official'
142
+ });
143
+ }
144
+ }
145
+
146
+ return models;
147
+ }
148
+
149
+ cleanText(text) {
150
+ return text
151
+ .replace(/&amp;/g, '&')
152
+ .replace(/&lt;/g, '<')
153
+ .replace(/&gt;/g, '>')
154
+ .replace(/&quot;/g, '"')
155
+ .replace(/&#39;/g, "'")
156
+ .replace(/\s+/g, ' ')
157
+ .trim();
158
+ }
159
+
160
+ parsePulls(pullsStr) {
161
+ if (!pullsStr) return 0;
162
+ const num = parseFloat(pullsStr);
163
+ const str = pullsStr.toLowerCase();
164
+ if (str.includes('k')) return Math.floor(num * 1000);
165
+ if (str.includes('m')) return Math.floor(num * 1000000);
166
+ if (str.includes('b')) return Math.floor(num * 1000000000);
167
+ return Math.floor(num);
168
+ }
169
+
170
+ isCacheValid() {
171
+ const file = fs.existsSync(this.cacheFile) ? this.cacheFile : (fs.existsSync(this.legacyCacheFile) ? this.legacyCacheFile : null);
172
+ if (!file) return false;
173
+ const stats = fs.statSync(file);
174
+ const age = Date.now() - stats.mtime.getTime();
175
+ return age < this.cacheExpiry;
176
+ }
177
+
178
+ readCache() {
179
+ try {
180
+ const file = fs.existsSync(this.cacheFile) ? this.cacheFile : this.legacyCacheFile;
181
+ if (!file) return null;
182
+ const data = fs.readFileSync(file, 'utf8');
183
+ return JSON.parse(data);
184
+ } catch {
185
+ return null;
186
+ }
187
+ }
188
+
189
+ writeCache(models) {
190
+ try {
191
+ const data = {
192
+ models,
193
+ total_count: models.length,
194
+ cached_at: new Date().toISOString(),
195
+ expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
196
+ };
197
+ fs.writeFileSync(this.cacheFile, JSON.stringify(data, null, 2));
198
+ return true;
199
+ } catch {
200
+ return false;
201
+ }
202
+ }
203
+
204
+ isDetailedCacheValid() {
205
+ const file = fs.existsSync(this.detailedCacheFile) ? this.detailedCacheFile : (fs.existsSync(this.legacyDetailedCacheFile) ? this.legacyDetailedCacheFile : null);
206
+ if (!file) return false;
207
+ const stats = fs.statSync(file);
208
+ const age = Date.now() - stats.mtime.getTime();
209
+ return age < this.cacheExpiry;
210
+ }
211
+
212
+ readDetailedCache() {
213
+ try {
214
+ const file = fs.existsSync(this.detailedCacheFile) ? this.detailedCacheFile : this.legacyDetailedCacheFile;
215
+ if (!file) return null;
216
+ const data = fs.readFileSync(file, 'utf8');
217
+ return JSON.parse(data);
218
+ } catch {
219
+ return null;
220
+ }
221
+ }
222
+
223
+ writeDetailedCache(models) {
224
+ try {
225
+ const data = {
226
+ models,
227
+ total_count: models.length,
228
+ cached_at: new Date().toISOString(),
229
+ expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
230
+ };
231
+ fs.writeFileSync(this.detailedCacheFile, JSON.stringify(data, null, 2));
232
+ return true;
233
+ } catch {
234
+ return false;
235
+ }
236
+ }
237
+
238
+ async getDetailedModelsInfo(basicModels) {
239
+ const detailedModels = [];
240
+ const batchSize = 5; // Procesar en lotes para no sobrecargar el servidor
241
+
242
+ for (let i = 0; i < basicModels.length; i += batchSize) {
243
+ const batch = basicModels.slice(i, i + batchSize);
244
+ console.log(`Processing batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(basicModels.length/batchSize)}`);
245
+
246
+ const batchPromises = batch.map(model => this.getModelDetailedInfo(model));
247
+ const batchResults = await Promise.allSettled(batchPromises);
248
+
249
+ batchResults.forEach((result, index) => {
250
+ if (result.status === 'fulfilled' && result.value) {
251
+ detailedModels.push(result.value);
252
+ } else {
253
+ // Si falla, al menos guardamos la información básica
254
+ detailedModels.push(batch[index]);
255
+ }
256
+ });
257
+
258
+ // Pequeña pausa entre lotes
259
+ if (i + batchSize < basicModels.length) {
260
+ await new Promise(resolve => setTimeout(resolve, 1000));
261
+ }
262
+ }
263
+
264
+ return detailedModels;
265
+ }
266
+
267
+ async getModelDetailedInfo(basicModel) {
268
+ try {
269
+ const modelUrl = `${this.baseURL}/library/${basicModel.model_identifier}`;
270
+ const response = await this.httpRequest(modelUrl);
271
+
272
+ if (response.statusCode !== 200) {
273
+ return basicModel; // Fallback a información básica
274
+ }
275
+
276
+ const detailedInfo = this.parseModelDetailPage(response.data, basicModel);
277
+
278
+ return {
279
+ ...basicModel,
280
+ ...detailedInfo,
281
+ // Usar datos mejorados si están disponibles
282
+ pulls: detailedInfo.actual_pulls || basicModel.pulls || 0,
283
+ main_size: detailedInfo.main_size || 'Unknown',
284
+ detailed_scraped_at: new Date().toISOString()
285
+ };
286
+
287
+ } catch (error) {
288
+ console.warn(`Failed to get details for ${basicModel.model_identifier}: ${error.message}`);
289
+ return basicModel; // Fallback a información básica
290
+ }
291
+ }
292
+
293
+ parseModelDetailPage(html, basicModel) {
294
+ const details = {
295
+ variants: [],
296
+ tags: [],
297
+ detailed_description: '',
298
+ parameters: {},
299
+ quantizations: [],
300
+ model_sizes: [],
301
+ category: 'general',
302
+ use_cases: [],
303
+ main_size: 'Unknown',
304
+ actual_pulls: 0,
305
+ context_length: 'Unknown',
306
+ input_types: []
307
+ };
308
+
309
+ try {
310
+ // MEJORAR: Extraer TODOS los tags incluyendo quantizaciones específicas
311
+ const allTagMatches = [];
312
+
313
+ // Buscar en bloques de código
314
+ const codeBlocks = html.match(/<code[^>]*>([^<]+)<\/code>/g) || [];
315
+ codeBlocks.forEach(match => {
316
+ const content = match.replace(/<[^>]*>/g, '').trim();
317
+ const modelMatch = content.match(/ollama (?:run|pull) ([^\s]+)/);
318
+ if (modelMatch) {
319
+ allTagMatches.push(modelMatch[1]);
320
+ }
321
+ });
322
+
323
+ // Buscar en texto plano (para tags que no están en código)
324
+ const plainTextTags = html.match(new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]+`, 'g')) || [];
325
+ allTagMatches.push(...plainTextTags);
326
+
327
+ // Buscar patrones específicos de quantización
328
+ const quantPatterns = [
329
+ new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*q\\d+[_km\\d]*`, 'gi'),
330
+ new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*fp\\d+`, 'gi'),
331
+ new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*int\\d+`, 'gi')
332
+ ];
333
+
334
+ quantPatterns.forEach(pattern => {
335
+ const matches = html.match(pattern) || [];
336
+ allTagMatches.push(...matches);
337
+ });
338
+
339
+ // Limpiar y deduplicar tags
340
+ details.tags = [...new Set(allTagMatches)]
341
+ .filter(tag => tag && tag.includes(':'))
342
+ .slice(0, 50); // Aumentar límite para capturar más variantes
343
+
344
+ // NUEVO: Extraer información de contexto
345
+ const contextMatches = html.match(/context\s*:?\s*(\d+[kmb]?)/gi) ||
346
+ html.match(/(\d+[kmb]?)\s*context/gi) ||
347
+ html.match(/context\s+length\s*:?\s*(\d+[kmb]?)/gi);
348
+
349
+ if (contextMatches && contextMatches.length > 0) {
350
+ // Extraer el número más grande encontrado
351
+ const contextNumbers = contextMatches.map(match => {
352
+ const num = match.match(/(\d+[kmb]?)/i);
353
+ if (num) {
354
+ const value = num[1].toLowerCase();
355
+ if (value.includes('k')) return parseInt(value) * 1000;
356
+ if (value.includes('m')) return parseInt(value) * 1000000;
357
+ if (value.includes('b')) return parseInt(value) * 1000000000;
358
+ return parseInt(value);
359
+ }
360
+ return 0;
361
+ }).filter(n => n > 0);
362
+
363
+ if (contextNumbers.length > 0) {
364
+ const maxContext = Math.max(...contextNumbers);
365
+ details.context_length = maxContext > 1000000 ?
366
+ `${(maxContext/1000000).toFixed(1)}M` :
367
+ maxContext > 1000 ? `${(maxContext/1000).toFixed(0)}K` :
368
+ maxContext.toString();
369
+ }
370
+ }
371
+
372
+ // NUEVO: Detectar tipos de input soportados
373
+ const inputTypes = [];
374
+ if (html.toLowerCase().includes('text') || html.toLowerCase().includes('chat')) {
375
+ inputTypes.push('text');
376
+ }
377
+ if (html.toLowerCase().includes('image') || html.toLowerCase().includes('vision') ||
378
+ html.toLowerCase().includes('visual')) {
379
+ inputTypes.push('image');
380
+ }
381
+ if (html.toLowerCase().includes('code') || html.toLowerCase().includes('programming')) {
382
+ inputTypes.push('code');
383
+ }
384
+ if (html.toLowerCase().includes('audio') || html.toLowerCase().includes('speech')) {
385
+ inputTypes.push('audio');
386
+ }
387
+
388
+ details.input_types = inputTypes.length > 0 ? inputTypes : ['text'];
389
+
390
+ // Mejor extracción de tamaños con regex más específico
391
+ const sizeMatches = html.match(/\b(\d+(?:\.\d+)?)\s*[BG]B?\b/gi);
392
+ if (sizeMatches) {
393
+ details.model_sizes = [...new Set(sizeMatches.map(size => size.toLowerCase()))];
394
+ // Determinar el tamaño principal (más común)
395
+ if (details.model_sizes.length > 0) {
396
+ details.main_size = details.model_sizes[0];
397
+ }
398
+ }
399
+
400
+ // Extraer pulls reales del HTML
401
+ const pullsMatch = html.match(/(\d+(?:\.\d+)?[KMB]?)\s*pulls?/i);
402
+ if (pullsMatch) {
403
+ details.actual_pulls = this.parsePulls(pullsMatch[1]);
404
+ }
405
+
406
+ // Mejorar detección de quantizaciones
407
+ const quantMatches = html.match(/\b(Q\d+_[KM](?:_[MS])?|Q\d+|FP16|FP32|INT8|INT4)\b/gi);
408
+ if (quantMatches) {
409
+ details.quantizations = [...new Set(quantMatches.map(q => q.toUpperCase()))];
410
+ }
411
+
412
+ // Mejor categorización basada en múltiples indicadores
413
+ const htmlLower = html.toLowerCase();
414
+ const title = html.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.toLowerCase() || '';
415
+ const description = html.match(/<meta[^>]*name="description"[^>]*content="([^"]+)"/i)?.[1]?.toLowerCase() || '';
416
+ const fullText = `${htmlLower} ${title} ${description}`;
417
+
418
+ // Resetear categoría
419
+ details.category = 'general';
420
+ details.use_cases = [];
421
+
422
+ // Categorizar basado en el nombre del modelo de forma más robusta
423
+ const modelName = basicModel.model_identifier.toLowerCase();
424
+ const modelDisplayName = basicModel.model_name.toLowerCase();
425
+ const fullModelText = `${modelName} ${modelDisplayName}`;
426
+
427
+ // Resetear categoría y casos de uso
428
+ details.category = 'general';
429
+ details.use_cases = [];
430
+
431
+ // Sistema de categorización por prioridad (específico a general)
432
+
433
+ // 1. CODING - Detectar modelos de programación
434
+ if (fullModelText.includes('coder') ||
435
+ fullModelText.includes('codellama') ||
436
+ fullModelText.includes('starcoder') ||
437
+ fullModelText.includes('codestral') ||
438
+ fullModelText.includes('code-') ||
439
+ modelName.startsWith('codellama') ||
440
+ modelName.startsWith('starcoder') ||
441
+ modelName.includes('deepseek-coder') ||
442
+ modelName.includes('qwen2.5-coder')) {
443
+ details.category = 'coding';
444
+ details.use_cases.push('coding', 'programming', 'development');
445
+ }
446
+
447
+ // 2. EMBEDDINGS - Modelos de vectores/embeddings
448
+ else if (fullModelText.includes('embed') ||
449
+ fullModelText.includes('nomic') ||
450
+ fullModelText.includes('bge') ||
451
+ fullModelText.includes('e5') ||
452
+ modelName.includes('all-minilm') ||
453
+ modelName.startsWith('nomic-embed')) {
454
+ details.category = 'embeddings';
455
+ details.use_cases.push('embeddings', 'search', 'similarity');
456
+ }
457
+
458
+ // 3. MULTIMODAL - Modelos de visión/imagen
459
+ else if (fullModelText.includes('llava') ||
460
+ fullModelText.includes('pixtral') ||
461
+ fullModelText.includes('vision') ||
462
+ fullModelText.includes('moondream') ||
463
+ modelName.includes('qwen-vl') ||
464
+ modelName.includes('qwen2.5vl') ||
465
+ modelName.startsWith('llava')) {
466
+ details.category = 'multimodal';
467
+ details.use_cases.push('vision', 'multimodal', 'image');
468
+ }
469
+
470
+ // 4. REASONING - Modelos especializados en razonamiento
471
+ else if (fullModelText.includes('deepseek-r1') ||
472
+ fullModelText.includes('reasoning') ||
473
+ fullModelText.includes('math') ||
474
+ modelName.includes('deepseek-r1') ||
475
+ modelName.includes('o1-')) {
476
+ details.category = 'reasoning';
477
+ details.use_cases.push('reasoning', 'mathematics', 'logic');
478
+ }
479
+
480
+ // 5. TALKING - Modelos conversacionales/chat (mayoría de modelos)
481
+ else if (fullModelText.includes('llama') ||
482
+ fullModelText.includes('mistral') ||
483
+ fullModelText.includes('phi') ||
484
+ fullModelText.includes('gemma') ||
485
+ fullModelText.includes('qwen') ||
486
+ fullModelText.includes('chat') ||
487
+ fullModelText.includes('instruct') ||
488
+ modelName.startsWith('llama') ||
489
+ modelName.startsWith('mistral') ||
490
+ modelName.startsWith('phi') ||
491
+ modelName.startsWith('gemma') ||
492
+ modelName.startsWith('qwen') && !modelName.includes('coder') && !modelName.includes('vl')) {
493
+ details.category = 'talking';
494
+ details.use_cases.push('chat', 'conversation', 'assistant');
495
+ }
496
+
497
+ // 6. READING - Modelos para análisis de texto
498
+ else if (fullModelText.includes('solar') ||
499
+ fullModelText.includes('openchat') ||
500
+ fullModelText.includes('neural-chat') ||
501
+ fullModelText.includes('vicuna')) {
502
+ details.category = 'reading';
503
+ details.use_cases.push('reading', 'analysis', 'comprehension');
504
+ }
505
+
506
+ // 7. CREATIVE - Modelos creativos
507
+ else if (fullModelText.includes('dolphin') ||
508
+ fullModelText.includes('wizard') ||
509
+ fullModelText.includes('uncensored') ||
510
+ fullModelText.includes('airoboros')) {
511
+ details.category = 'creative';
512
+ details.use_cases.push('creative', 'writing', 'storytelling');
513
+ }
514
+
515
+ // 8. Por defecto: GENERAL
516
+ else {
517
+ details.category = 'general';
518
+ details.use_cases.push('general', 'assistant');
519
+ }
520
+
521
+ // Extraer descripción mejorada
522
+ const descPatterns = [
523
+ /<p[^>]*class="[^"]*description[^"]*"[^>]*>([^<]+)<\/p>/i,
524
+ /<meta[^>]*name="description"[^>]*content="([^"]+)"/i,
525
+ /<div[^>]*class="[^"]*desc[^"]*"[^>]*>([^<]+)<\/div>/i
526
+ ];
527
+
528
+ for (const pattern of descPatterns) {
529
+ const match = html.match(pattern);
530
+ if (match) {
531
+ details.detailed_description = this.cleanText(match[1]);
532
+ break;
533
+ }
534
+ }
535
+
536
+ // Crear variantes mejoradas con tamaños reales extraídos de la página
537
+ details.variants = details.tags.map(tag => {
538
+ const size = this.extractSizeFromTag(tag);
539
+ const quantization = this.extractQuantizationFromTag(tag);
540
+ const realSizeGB = this.extractRealSizeFromHTML(html, tag);
541
+ return {
542
+ tag: tag,
543
+ size: size,
544
+ quantization: quantization,
545
+ command: `ollama pull ${tag}`,
546
+ estimated_size_gb: this.estimateModelSizeGB(tag),
547
+ real_size_gb: realSizeGB || this.estimateModelSizeGB(tag)
548
+ };
549
+ });
550
+
551
+ } catch (error) {
552
+ console.warn(`Error parsing detailed page: ${error.message}`);
553
+ }
554
+
555
+ return details;
556
+ }
557
+
558
+ extractSizeFromTag(tag) {
559
+ const sizeMatch = tag.match(/(\d+\.?\d*)[bg]/i);
560
+ return sizeMatch ? sizeMatch[0].toLowerCase() : 'unknown';
561
+ }
562
+
563
+ extractQuantizationFromTag(tag) {
564
+ const quantMatch = tag.match(/\b(q\d+_[km]?_?[ms]?|fp16|fp32|int8|int4)\b/i);
565
+ return quantMatch ? quantMatch[0].toUpperCase() : 'Q4_0'; // Default assumption
566
+ }
567
+
568
+ extractRealSizeFromHTML(html, tag) {
569
+ try {
570
+ // Buscar el patrón específico: [tag]\n size·context·type·date
571
+ // Ejemplo: [llama3.1:8b]\n4.9GB · 128K context window · Text · 8 months ago
572
+ const escapedTag = tag.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
573
+ const pattern = new RegExp(`\\[${escapedTag}\\][\\s\\S]*?(\\d+(?:\\.\\d+)?)(GB|MB)`, 'i');
574
+ const match = html.match(pattern);
575
+
576
+ if (match) {
577
+ const num = parseFloat(match[1]);
578
+ const unit = match[2].toUpperCase();
579
+
580
+ if (unit === 'MB') {
581
+ return num / 1024; // Convert MB to GB
582
+ } else {
583
+ return num; // Already in GB
584
+ }
585
+ }
586
+
587
+ // Fallback: buscar tamaño cerca del tag
588
+ const tagIndex = html.indexOf(tag);
589
+ if (tagIndex !== -1) {
590
+ const surrounding = html.substring(tagIndex, tagIndex + 500);
591
+ const sizeMatch = surrounding.match(/(\d+(?:\.\d+)?)\s*(GB|MB)/i);
592
+ if (sizeMatch) {
593
+ const num = parseFloat(sizeMatch[1]);
594
+ const unit = sizeMatch[2].toUpperCase();
595
+ return unit === 'MB' ? num / 1024 : num;
596
+ }
597
+ }
598
+
599
+ return null; // No se encontró tamaño real
600
+ } catch (error) {
601
+ console.warn(`Error extracting real size for ${tag}: ${error.message}`);
602
+ return null;
603
+ }
604
+ }
605
+
606
+ estimateModelSizeGB(tag) {
607
+ const sizeMatch = tag.match(/(\d+\.?\d*)[bg]/i);
608
+ if (!sizeMatch) return 1;
609
+
610
+ const num = parseFloat(sizeMatch[1]);
611
+ const unit = sizeMatch[0].slice(-1).toLowerCase();
612
+
613
+ if (unit === 'b') return num;
614
+ if (unit === 'g') return num;
615
+ return num; // Default to GB
616
+ }
617
+
618
+ async scrapeAllModels(forceRefresh = false) {
619
+ try {
620
+ if (!forceRefresh && this.isDetailedCacheValid()) {
621
+ return this.readDetailedCache();
622
+ }
623
+
624
+ console.log('Scraping ALL Ollama models with detailed information...');
625
+
626
+ // Primero obtenemos la lista básica de modelos
627
+ const response = await this.httpRequest(`${this.baseURL}/library`);
628
+ if (response.statusCode !== 200) throw new Error(`Failed to fetch: ${response.statusCode}`);
629
+ const basicModels = this.parseModelFromHTML(response.data);
630
+
631
+ console.log(`Found ${basicModels.length} models. Getting detailed information...`);
632
+
633
+ // Ahora obtenemos información detallada de cada modelo
634
+ const detailedModels = await this.getDetailedModelsInfo(basicModels);
635
+
636
+ // Apply classification to all models
637
+ const classifiedData = classifyAllModels({
638
+ models: detailedModels,
639
+ total_count: detailedModels.length,
640
+ cached_at: new Date().toISOString(),
641
+ expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
642
+ });
643
+
644
+ this.writeDetailedCache(classifiedData.models);
645
+
646
+ return {
647
+ models: classifiedData.models,
648
+ total_count: classifiedData.models.length,
649
+ cached_at: new Date().toISOString(),
650
+ expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
651
+ };
652
+ } catch (error) {
653
+ const cachedData = this.readDetailedCache();
654
+ if (cachedData) return cachedData;
655
+ throw error;
656
+ }
657
+ }
658
+
659
+ async searchModels(query, options = {}) {
660
+ const data = await this.scrapeAllModels();
661
+ const models = data.models;
662
+
663
+ if (!query) return { models, total_count: models.length };
664
+
665
+ const filtered = models.filter(model => {
666
+ const searchText = `${model.model_name} ${model.description} ${model.model_identifier}`.toLowerCase();
667
+ return searchText.includes(query.toLowerCase());
668
+ });
669
+
670
+ return {
671
+ models: filtered,
672
+ total_count: filtered.length,
673
+ query
674
+ };
675
+ }
676
+
677
+ async findCompatibleModels(localModels) {
678
+ const data = await this.scrapeAllModels();
679
+ const cloudModels = data.models;
680
+ const compatible = [];
681
+
682
+ for (const localModel of localModels) {
683
+ const localName = localModel.name || localModel.model;
684
+ const [baseName] = localName.split(':');
685
+
686
+ const match = cloudModels.find(cloudModel =>
687
+ cloudModel.model_identifier === baseName ||
688
+ cloudModel.model_identifier === localName ||
689
+ cloudModel.model_name.toLowerCase().includes(baseName.toLowerCase()) ||
690
+ baseName.toLowerCase().includes(cloudModel.model_identifier.toLowerCase())
691
+ );
692
+
693
+ if (match) {
694
+ compatible.push({
695
+ local: localModel,
696
+ cloud: match,
697
+ match_type: match.model_identifier === baseName ? 'exact' : 'fuzzy'
698
+ });
699
+ }
700
+ }
701
+
702
+ return {
703
+ total_local: localModels.length,
704
+ total_compatible: compatible.length,
705
+ compatible_models: compatible,
706
+ all_available: data.total_count
707
+ };
708
+ }
709
+
710
+ async getStats() {
711
+ const data = await this.scrapeAllModels();
712
+ const models = data.models;
713
+
714
+ return {
715
+ total_models: models.length,
716
+ official_models: models.filter(m => m.model_type === 'official').length,
717
+ community_models: models.filter(m => m.model_type === 'community').length,
718
+ total_pulls: models.reduce((sum, m) => sum + (m.pulls || 0), 0),
719
+ most_popular: models
720
+ .sort((a, b) => (b.pulls || 0) - (a.pulls || 0))
721
+ .slice(0, 10)
722
+ .map(m => ({ name: m.model_name, pulls: m.pulls })),
723
+ last_updated: data.cached_at
724
+ };
725
+ }
726
+ }
727
+
728
+ async function getOllamaModelsIntegration(localModels = []) {
729
+ const scraper = new OllamaNativeScraper();
730
+
731
+ try {
732
+ if (localModels.length > 0) {
733
+ const compatible = await scraper.findCompatibleModels(localModels);
734
+ return compatible;
735
+ } else {
736
+ const allModels = await scraper.scrapeAllModels();
737
+ return {
738
+ total_local: 0,
739
+ total_compatible: 0,
740
+ compatible_models: [],
741
+ all_available: allModels.total_count,
742
+ recommendations: allModels.models.slice(0, 20)
743
+ };
744
+ }
745
+ } catch (error) {
746
+ return {
747
+ total_local: localModels.length,
748
+ total_compatible: 0,
749
+ compatible_models: [],
750
+ all_available: 0,
751
+ error: error.message
752
+ };
753
+ }
754
+ }
755
+
756
+ async function testScraper() {
757
+ const scraper = new OllamaNativeScraper();
758
+
759
+ const localModels = [
760
+ { name: 'mistral:latest' },
761
+ { name: 'deepseek-coder:6.7b' },
762
+ { name: 'deepseek-coder:1.3b' }
763
+ ];
764
+
765
+ const result = await getOllamaModelsIntegration(localModels);
766
+ console.log(JSON.stringify(result, null, 2));
767
+ }
768
+
769
+ module.exports = {
770
+ OllamaNativeScraper,
771
+ getOllamaModelsIntegration
772
+ };
773
+
774
+ if (require.main === module) {
775
+ testScraper().catch(console.error);
776
+ }