llm-checker 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +418 -0
- package/analyzer/compatibility.js +584 -0
- package/analyzer/performance.js +505 -0
- package/bin/CLAUDE.md +12 -0
- package/bin/enhanced_cli.js +3118 -0
- package/bin/test-deterministic.js +41 -0
- package/package.json +96 -0
- package/src/CLAUDE.md +12 -0
- package/src/ai/intelligent-selector.js +615 -0
- package/src/ai/model-selector.js +312 -0
- package/src/ai/multi-objective-selector.js +820 -0
- package/src/commands/check.js +58 -0
- package/src/data/CLAUDE.md +11 -0
- package/src/data/model-database.js +637 -0
- package/src/data/sync-manager.js +279 -0
- package/src/hardware/CLAUDE.md +12 -0
- package/src/hardware/backends/CLAUDE.md +11 -0
- package/src/hardware/backends/apple-silicon.js +318 -0
- package/src/hardware/backends/cpu-detector.js +490 -0
- package/src/hardware/backends/cuda-detector.js +417 -0
- package/src/hardware/backends/intel-detector.js +436 -0
- package/src/hardware/backends/rocm-detector.js +440 -0
- package/src/hardware/detector.js +573 -0
- package/src/hardware/pc-optimizer.js +635 -0
- package/src/hardware/specs.js +286 -0
- package/src/hardware/unified-detector.js +442 -0
- package/src/index.js +2289 -0
- package/src/models/CLAUDE.md +17 -0
- package/src/models/ai-check-selector.js +806 -0
- package/src/models/catalog.json +426 -0
- package/src/models/deterministic-selector.js +1145 -0
- package/src/models/expanded_database.js +1142 -0
- package/src/models/intelligent-selector.js +532 -0
- package/src/models/requirements.js +310 -0
- package/src/models/scoring-config.js +57 -0
- package/src/models/scoring-engine.js +715 -0
- package/src/ollama/.cache/README.md +33 -0
- package/src/ollama/CLAUDE.md +24 -0
- package/src/ollama/client.js +438 -0
- package/src/ollama/enhanced-client.js +113 -0
- package/src/ollama/enhanced-scraper.js +634 -0
- package/src/ollama/manager.js +357 -0
- package/src/ollama/native-scraper.js +776 -0
- package/src/plugins/CLAUDE.md +11 -0
- package/src/plugins/examples/custom_model_plugin.js +87 -0
- package/src/plugins/index.js +295 -0
- package/src/utils/CLAUDE.md +11 -0
- package/src/utils/config.js +359 -0
- package/src/utils/formatter.js +315 -0
- package/src/utils/logger.js +272 -0
- package/src/utils/model-classifier.js +167 -0
- package/src/utils/verbose-progress.js +266 -0
|
@@ -0,0 +1,776 @@
|
|
|
1
|
+
const https = require('https');
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const { classifyAllModels } = require('../utils/model-classifier');
|
|
6
|
+
|
|
7
|
+
class OllamaNativeScraper {
|
|
8
|
+
constructor() {
|
|
9
|
+
this.baseURL = 'https://ollama.com';
|
|
10
|
+
this.registryAPI = 'https://registry.ollama.ai';
|
|
11
|
+
|
|
12
|
+
// New secure cache location (user home)
|
|
13
|
+
this.cacheDir = path.join(os.homedir(), '.llm-checker', 'cache', 'ollama');
|
|
14
|
+
this.cacheFile = path.join(this.cacheDir, 'ollama-models.json');
|
|
15
|
+
this.detailedCacheFile = path.join(this.cacheDir, 'ollama-detailed-models.json');
|
|
16
|
+
|
|
17
|
+
// Legacy cache location (inside repo) for backward compatibility
|
|
18
|
+
this.legacyCacheDir = path.join(__dirname, '.cache');
|
|
19
|
+
this.legacyCacheFile = path.join(this.legacyCacheDir, 'ollama-models.json');
|
|
20
|
+
this.legacyDetailedCacheFile = path.join(this.legacyCacheDir, 'ollama-detailed-models.json');
|
|
21
|
+
|
|
22
|
+
this.cacheExpiry = 6 * 60 * 60 * 1000; // 6 horas para actualizar más frecuentemente
|
|
23
|
+
|
|
24
|
+
if (!fs.existsSync(this.cacheDir)) {
|
|
25
|
+
fs.mkdirSync(this.cacheDir, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async httpRequest(url, options = {}) {
|
|
30
|
+
return new Promise((resolve, reject) => {
|
|
31
|
+
const urlObj = new URL(url);
|
|
32
|
+
const requestOptions = {
|
|
33
|
+
hostname: urlObj.hostname,
|
|
34
|
+
port: urlObj.port || 443,
|
|
35
|
+
path: urlObj.pathname + urlObj.search,
|
|
36
|
+
method: options.method || 'GET',
|
|
37
|
+
headers: {
|
|
38
|
+
'User-Agent': 'Mozilla/5.0',
|
|
39
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
40
|
+
'Accept-Language': 'en-US,en;q=0.5',
|
|
41
|
+
// Do not claim compression we don't handle here
|
|
42
|
+
'Connection': 'keep-alive',
|
|
43
|
+
'Upgrade-Insecure-Requests': '1',
|
|
44
|
+
...options.headers
|
|
45
|
+
}
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
const timeoutMs = typeof options.timeout === 'number' ? options.timeout : 15000;
|
|
49
|
+
const maxBytes = typeof options.maxBytes === 'number' ? options.maxBytes : 5 * 1024 * 1024; // 5MB
|
|
50
|
+
|
|
51
|
+
const req = https.request(requestOptions, (res) => {
|
|
52
|
+
let data = '';
|
|
53
|
+
let received = 0;
|
|
54
|
+
|
|
55
|
+
res.on('data', chunk => {
|
|
56
|
+
received += chunk.length;
|
|
57
|
+
if (received > maxBytes) {
|
|
58
|
+
req.destroy(new Error('Response too large'));
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
data += chunk;
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
res.on('end', () => {
|
|
65
|
+
if (res.statusCode >= 200 && res.statusCode < 300) {
|
|
66
|
+
resolve({ statusCode: res.statusCode, data, headers: res.headers });
|
|
67
|
+
} else {
|
|
68
|
+
reject(new Error(`HTTP ${res.statusCode}: ${res.statusMessage}`));
|
|
69
|
+
}
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
// Socket/request timeout
|
|
74
|
+
req.setTimeout(timeoutMs, () => {
|
|
75
|
+
req.destroy(new Error('Request timeout'));
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
req.on('error', reject);
|
|
79
|
+
if (options.body) req.write(options.body);
|
|
80
|
+
req.end();
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
parseModelFromHTML(html) {
|
|
85
|
+
const models = [];
|
|
86
|
+
const pattern = /<a[^>]*href="\/library\/([^"]*)"[^>]*>[\s\S]{0,5000}?<h3[^>]*>([^<]*)<\/h3>[\s\S]{0,2000}?<p[^>]*>([^<]*)<\/p>[\s\S]{0,2000}?(?:<span[^>]*>([^<]*)<\/span>)[\s\S]{0,2000}?(?:(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls))[\s\S]{0,1000}?(?:(\d+)\s*(?:Tags|tags))[\s\S]{0,1000}?(?:Updated\s*(\d+\s*\w+\s*ago))?[\s\S]{0,500}?<\/a>/gi;
|
|
87
|
+
|
|
88
|
+
let match;
|
|
89
|
+
while ((match = pattern.exec(html)) !== null) {
|
|
90
|
+
const [, identifier, name, description, labels, pulls, tags, lastUpdated] = match;
|
|
91
|
+
const cleanName = this.cleanText(name);
|
|
92
|
+
const cleanDescription = this.cleanText(description);
|
|
93
|
+
const pullsNum = this.parsePulls(pulls);
|
|
94
|
+
|
|
95
|
+
models.push({
|
|
96
|
+
model_identifier: identifier,
|
|
97
|
+
model_name: cleanName,
|
|
98
|
+
description: cleanDescription,
|
|
99
|
+
labels: labels ? labels.split(',').map(l => l.trim()) : [],
|
|
100
|
+
pulls: pullsNum,
|
|
101
|
+
tags: parseInt(tags) || 0,
|
|
102
|
+
last_updated: lastUpdated || 'Unknown',
|
|
103
|
+
url: `${this.baseURL}/library/${identifier}`,
|
|
104
|
+
namespace: identifier.includes('/') ? identifier.split('/')[0] : null,
|
|
105
|
+
model_type: identifier.includes('/') ? 'community' : 'official'
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (models.length === 0) {
|
|
110
|
+
return this.parseModelsFallback(html);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return models;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
parseModelsFallback(html) {
|
|
117
|
+
const models = [];
|
|
118
|
+
const libraryLinks = html.match(/href="\/library\/[^"]*"/g);
|
|
119
|
+
|
|
120
|
+
if (libraryLinks) {
|
|
121
|
+
const uniqueLinks = [...new Set(libraryLinks)];
|
|
122
|
+
|
|
123
|
+
for (const link of uniqueLinks) {
|
|
124
|
+
const identifier = link.match(/\/library\/([^"]*)/)[1];
|
|
125
|
+
const linkIndex = html.indexOf(link);
|
|
126
|
+
const section = html.substring(Math.max(0, linkIndex - 500), linkIndex + 500);
|
|
127
|
+
const nameMatch = section.match(/<h[2-4][^>]*>([^<]*)<\/h[2-4]>/);
|
|
128
|
+
const descMatch = section.match(/<p[^>]*>([^<]*)<\/p>/);
|
|
129
|
+
const pullsMatch = section.match(/(\d+(?:\.\d+)?[KMB]?)\s*(?:Pulls|pulls)/i);
|
|
130
|
+
|
|
131
|
+
models.push({
|
|
132
|
+
model_identifier: identifier,
|
|
133
|
+
model_name: nameMatch ? this.cleanText(nameMatch[1]) : identifier,
|
|
134
|
+
description: descMatch ? this.cleanText(descMatch[1]) : '',
|
|
135
|
+
labels: [],
|
|
136
|
+
pulls: pullsMatch ? this.parsePulls(pullsMatch[1]) : 0,
|
|
137
|
+
tags: 0,
|
|
138
|
+
last_updated: 'Unknown',
|
|
139
|
+
url: `${this.baseURL}/library/${identifier}`,
|
|
140
|
+
namespace: identifier.includes('/') ? identifier.split('/')[0] : null,
|
|
141
|
+
model_type: identifier.includes('/') ? 'community' : 'official'
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return models;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
cleanText(text) {
|
|
150
|
+
return text
|
|
151
|
+
.replace(/&/g, '&')
|
|
152
|
+
.replace(/</g, '<')
|
|
153
|
+
.replace(/>/g, '>')
|
|
154
|
+
.replace(/"/g, '"')
|
|
155
|
+
.replace(/'/g, "'")
|
|
156
|
+
.replace(/\s+/g, ' ')
|
|
157
|
+
.trim();
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
parsePulls(pullsStr) {
|
|
161
|
+
if (!pullsStr) return 0;
|
|
162
|
+
const num = parseFloat(pullsStr);
|
|
163
|
+
const str = pullsStr.toLowerCase();
|
|
164
|
+
if (str.includes('k')) return Math.floor(num * 1000);
|
|
165
|
+
if (str.includes('m')) return Math.floor(num * 1000000);
|
|
166
|
+
if (str.includes('b')) return Math.floor(num * 1000000000);
|
|
167
|
+
return Math.floor(num);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
isCacheValid() {
|
|
171
|
+
const file = fs.existsSync(this.cacheFile) ? this.cacheFile : (fs.existsSync(this.legacyCacheFile) ? this.legacyCacheFile : null);
|
|
172
|
+
if (!file) return false;
|
|
173
|
+
const stats = fs.statSync(file);
|
|
174
|
+
const age = Date.now() - stats.mtime.getTime();
|
|
175
|
+
return age < this.cacheExpiry;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
readCache() {
|
|
179
|
+
try {
|
|
180
|
+
const file = fs.existsSync(this.cacheFile) ? this.cacheFile : this.legacyCacheFile;
|
|
181
|
+
if (!file) return null;
|
|
182
|
+
const data = fs.readFileSync(file, 'utf8');
|
|
183
|
+
return JSON.parse(data);
|
|
184
|
+
} catch {
|
|
185
|
+
return null;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
writeCache(models) {
|
|
190
|
+
try {
|
|
191
|
+
const data = {
|
|
192
|
+
models,
|
|
193
|
+
total_count: models.length,
|
|
194
|
+
cached_at: new Date().toISOString(),
|
|
195
|
+
expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
|
|
196
|
+
};
|
|
197
|
+
fs.writeFileSync(this.cacheFile, JSON.stringify(data, null, 2));
|
|
198
|
+
return true;
|
|
199
|
+
} catch {
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
isDetailedCacheValid() {
|
|
205
|
+
const file = fs.existsSync(this.detailedCacheFile) ? this.detailedCacheFile : (fs.existsSync(this.legacyDetailedCacheFile) ? this.legacyDetailedCacheFile : null);
|
|
206
|
+
if (!file) return false;
|
|
207
|
+
const stats = fs.statSync(file);
|
|
208
|
+
const age = Date.now() - stats.mtime.getTime();
|
|
209
|
+
return age < this.cacheExpiry;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
readDetailedCache() {
|
|
213
|
+
try {
|
|
214
|
+
const file = fs.existsSync(this.detailedCacheFile) ? this.detailedCacheFile : this.legacyDetailedCacheFile;
|
|
215
|
+
if (!file) return null;
|
|
216
|
+
const data = fs.readFileSync(file, 'utf8');
|
|
217
|
+
return JSON.parse(data);
|
|
218
|
+
} catch {
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
writeDetailedCache(models) {
|
|
224
|
+
try {
|
|
225
|
+
const data = {
|
|
226
|
+
models,
|
|
227
|
+
total_count: models.length,
|
|
228
|
+
cached_at: new Date().toISOString(),
|
|
229
|
+
expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
|
|
230
|
+
};
|
|
231
|
+
fs.writeFileSync(this.detailedCacheFile, JSON.stringify(data, null, 2));
|
|
232
|
+
return true;
|
|
233
|
+
} catch {
|
|
234
|
+
return false;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async getDetailedModelsInfo(basicModels) {
|
|
239
|
+
const detailedModels = [];
|
|
240
|
+
const batchSize = 5; // Procesar en lotes para no sobrecargar el servidor
|
|
241
|
+
|
|
242
|
+
for (let i = 0; i < basicModels.length; i += batchSize) {
|
|
243
|
+
const batch = basicModels.slice(i, i + batchSize);
|
|
244
|
+
console.log(`Processing batch ${Math.floor(i/batchSize) + 1}/${Math.ceil(basicModels.length/batchSize)}`);
|
|
245
|
+
|
|
246
|
+
const batchPromises = batch.map(model => this.getModelDetailedInfo(model));
|
|
247
|
+
const batchResults = await Promise.allSettled(batchPromises);
|
|
248
|
+
|
|
249
|
+
batchResults.forEach((result, index) => {
|
|
250
|
+
if (result.status === 'fulfilled' && result.value) {
|
|
251
|
+
detailedModels.push(result.value);
|
|
252
|
+
} else {
|
|
253
|
+
// Si falla, al menos guardamos la información básica
|
|
254
|
+
detailedModels.push(batch[index]);
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Pequeña pausa entre lotes
|
|
259
|
+
if (i + batchSize < basicModels.length) {
|
|
260
|
+
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return detailedModels;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
async getModelDetailedInfo(basicModel) {
|
|
268
|
+
try {
|
|
269
|
+
const modelUrl = `${this.baseURL}/library/${basicModel.model_identifier}`;
|
|
270
|
+
const response = await this.httpRequest(modelUrl);
|
|
271
|
+
|
|
272
|
+
if (response.statusCode !== 200) {
|
|
273
|
+
return basicModel; // Fallback a información básica
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const detailedInfo = this.parseModelDetailPage(response.data, basicModel);
|
|
277
|
+
|
|
278
|
+
return {
|
|
279
|
+
...basicModel,
|
|
280
|
+
...detailedInfo,
|
|
281
|
+
// Usar datos mejorados si están disponibles
|
|
282
|
+
pulls: detailedInfo.actual_pulls || basicModel.pulls || 0,
|
|
283
|
+
main_size: detailedInfo.main_size || 'Unknown',
|
|
284
|
+
detailed_scraped_at: new Date().toISOString()
|
|
285
|
+
};
|
|
286
|
+
|
|
287
|
+
} catch (error) {
|
|
288
|
+
console.warn(`Failed to get details for ${basicModel.model_identifier}: ${error.message}`);
|
|
289
|
+
return basicModel; // Fallback a información básica
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
parseModelDetailPage(html, basicModel) {
|
|
294
|
+
const details = {
|
|
295
|
+
variants: [],
|
|
296
|
+
tags: [],
|
|
297
|
+
detailed_description: '',
|
|
298
|
+
parameters: {},
|
|
299
|
+
quantizations: [],
|
|
300
|
+
model_sizes: [],
|
|
301
|
+
category: 'general',
|
|
302
|
+
use_cases: [],
|
|
303
|
+
main_size: 'Unknown',
|
|
304
|
+
actual_pulls: 0,
|
|
305
|
+
context_length: 'Unknown',
|
|
306
|
+
input_types: []
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
try {
|
|
310
|
+
// MEJORAR: Extraer TODOS los tags incluyendo quantizaciones específicas
|
|
311
|
+
const allTagMatches = [];
|
|
312
|
+
|
|
313
|
+
// Buscar en bloques de código
|
|
314
|
+
const codeBlocks = html.match(/<code[^>]*>([^<]+)<\/code>/g) || [];
|
|
315
|
+
codeBlocks.forEach(match => {
|
|
316
|
+
const content = match.replace(/<[^>]*>/g, '').trim();
|
|
317
|
+
const modelMatch = content.match(/ollama (?:run|pull) ([^\s]+)/);
|
|
318
|
+
if (modelMatch) {
|
|
319
|
+
allTagMatches.push(modelMatch[1]);
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
// Buscar en texto plano (para tags que no están en código)
|
|
324
|
+
const plainTextTags = html.match(new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]+`, 'g')) || [];
|
|
325
|
+
allTagMatches.push(...plainTextTags);
|
|
326
|
+
|
|
327
|
+
// Buscar patrones específicos de quantización
|
|
328
|
+
const quantPatterns = [
|
|
329
|
+
new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*q\\d+[_km\\d]*`, 'gi'),
|
|
330
|
+
new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*fp\\d+`, 'gi'),
|
|
331
|
+
new RegExp(`${basicModel.model_identifier}:[\\w\\d\\.-]*int\\d+`, 'gi')
|
|
332
|
+
];
|
|
333
|
+
|
|
334
|
+
quantPatterns.forEach(pattern => {
|
|
335
|
+
const matches = html.match(pattern) || [];
|
|
336
|
+
allTagMatches.push(...matches);
|
|
337
|
+
});
|
|
338
|
+
|
|
339
|
+
// Limpiar y deduplicar tags
|
|
340
|
+
details.tags = [...new Set(allTagMatches)]
|
|
341
|
+
.filter(tag => tag && tag.includes(':'))
|
|
342
|
+
.slice(0, 50); // Aumentar límite para capturar más variantes
|
|
343
|
+
|
|
344
|
+
// NUEVO: Extraer información de contexto
|
|
345
|
+
const contextMatches = html.match(/context\s*:?\s*(\d+[kmb]?)/gi) ||
|
|
346
|
+
html.match(/(\d+[kmb]?)\s*context/gi) ||
|
|
347
|
+
html.match(/context\s+length\s*:?\s*(\d+[kmb]?)/gi);
|
|
348
|
+
|
|
349
|
+
if (contextMatches && contextMatches.length > 0) {
|
|
350
|
+
// Extraer el número más grande encontrado
|
|
351
|
+
const contextNumbers = contextMatches.map(match => {
|
|
352
|
+
const num = match.match(/(\d+[kmb]?)/i);
|
|
353
|
+
if (num) {
|
|
354
|
+
const value = num[1].toLowerCase();
|
|
355
|
+
if (value.includes('k')) return parseInt(value) * 1000;
|
|
356
|
+
if (value.includes('m')) return parseInt(value) * 1000000;
|
|
357
|
+
if (value.includes('b')) return parseInt(value) * 1000000000;
|
|
358
|
+
return parseInt(value);
|
|
359
|
+
}
|
|
360
|
+
return 0;
|
|
361
|
+
}).filter(n => n > 0);
|
|
362
|
+
|
|
363
|
+
if (contextNumbers.length > 0) {
|
|
364
|
+
const maxContext = Math.max(...contextNumbers);
|
|
365
|
+
details.context_length = maxContext > 1000000 ?
|
|
366
|
+
`${(maxContext/1000000).toFixed(1)}M` :
|
|
367
|
+
maxContext > 1000 ? `${(maxContext/1000).toFixed(0)}K` :
|
|
368
|
+
maxContext.toString();
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// NUEVO: Detectar tipos de input soportados
|
|
373
|
+
const inputTypes = [];
|
|
374
|
+
if (html.toLowerCase().includes('text') || html.toLowerCase().includes('chat')) {
|
|
375
|
+
inputTypes.push('text');
|
|
376
|
+
}
|
|
377
|
+
if (html.toLowerCase().includes('image') || html.toLowerCase().includes('vision') ||
|
|
378
|
+
html.toLowerCase().includes('visual')) {
|
|
379
|
+
inputTypes.push('image');
|
|
380
|
+
}
|
|
381
|
+
if (html.toLowerCase().includes('code') || html.toLowerCase().includes('programming')) {
|
|
382
|
+
inputTypes.push('code');
|
|
383
|
+
}
|
|
384
|
+
if (html.toLowerCase().includes('audio') || html.toLowerCase().includes('speech')) {
|
|
385
|
+
inputTypes.push('audio');
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
details.input_types = inputTypes.length > 0 ? inputTypes : ['text'];
|
|
389
|
+
|
|
390
|
+
// Mejor extracción de tamaños con regex más específico
|
|
391
|
+
const sizeMatches = html.match(/\b(\d+(?:\.\d+)?)\s*[BG]B?\b/gi);
|
|
392
|
+
if (sizeMatches) {
|
|
393
|
+
details.model_sizes = [...new Set(sizeMatches.map(size => size.toLowerCase()))];
|
|
394
|
+
// Determinar el tamaño principal (más común)
|
|
395
|
+
if (details.model_sizes.length > 0) {
|
|
396
|
+
details.main_size = details.model_sizes[0];
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Extraer pulls reales del HTML
|
|
401
|
+
const pullsMatch = html.match(/(\d+(?:\.\d+)?[KMB]?)\s*pulls?/i);
|
|
402
|
+
if (pullsMatch) {
|
|
403
|
+
details.actual_pulls = this.parsePulls(pullsMatch[1]);
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// Mejorar detección de quantizaciones
|
|
407
|
+
const quantMatches = html.match(/\b(Q\d+_[KM](?:_[MS])?|Q\d+|FP16|FP32|INT8|INT4)\b/gi);
|
|
408
|
+
if (quantMatches) {
|
|
409
|
+
details.quantizations = [...new Set(quantMatches.map(q => q.toUpperCase()))];
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Mejor categorización basada en múltiples indicadores
|
|
413
|
+
const htmlLower = html.toLowerCase();
|
|
414
|
+
const title = html.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1]?.toLowerCase() || '';
|
|
415
|
+
const description = html.match(/<meta[^>]*name="description"[^>]*content="([^"]+)"/i)?.[1]?.toLowerCase() || '';
|
|
416
|
+
const fullText = `${htmlLower} ${title} ${description}`;
|
|
417
|
+
|
|
418
|
+
// Resetear categoría
|
|
419
|
+
details.category = 'general';
|
|
420
|
+
details.use_cases = [];
|
|
421
|
+
|
|
422
|
+
// Categorizar basado en el nombre del modelo de forma más robusta
|
|
423
|
+
const modelName = basicModel.model_identifier.toLowerCase();
|
|
424
|
+
const modelDisplayName = basicModel.model_name.toLowerCase();
|
|
425
|
+
const fullModelText = `${modelName} ${modelDisplayName}`;
|
|
426
|
+
|
|
427
|
+
// Resetear categoría y casos de uso
|
|
428
|
+
details.category = 'general';
|
|
429
|
+
details.use_cases = [];
|
|
430
|
+
|
|
431
|
+
// Sistema de categorización por prioridad (específico a general)
|
|
432
|
+
|
|
433
|
+
// 1. CODING - Detectar modelos de programación
|
|
434
|
+
if (fullModelText.includes('coder') ||
|
|
435
|
+
fullModelText.includes('codellama') ||
|
|
436
|
+
fullModelText.includes('starcoder') ||
|
|
437
|
+
fullModelText.includes('codestral') ||
|
|
438
|
+
fullModelText.includes('code-') ||
|
|
439
|
+
modelName.startsWith('codellama') ||
|
|
440
|
+
modelName.startsWith('starcoder') ||
|
|
441
|
+
modelName.includes('deepseek-coder') ||
|
|
442
|
+
modelName.includes('qwen2.5-coder')) {
|
|
443
|
+
details.category = 'coding';
|
|
444
|
+
details.use_cases.push('coding', 'programming', 'development');
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// 2. EMBEDDINGS - Modelos de vectores/embeddings
|
|
448
|
+
else if (fullModelText.includes('embed') ||
|
|
449
|
+
fullModelText.includes('nomic') ||
|
|
450
|
+
fullModelText.includes('bge') ||
|
|
451
|
+
fullModelText.includes('e5') ||
|
|
452
|
+
modelName.includes('all-minilm') ||
|
|
453
|
+
modelName.startsWith('nomic-embed')) {
|
|
454
|
+
details.category = 'embeddings';
|
|
455
|
+
details.use_cases.push('embeddings', 'search', 'similarity');
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
// 3. MULTIMODAL - Modelos de visión/imagen
|
|
459
|
+
else if (fullModelText.includes('llava') ||
|
|
460
|
+
fullModelText.includes('pixtral') ||
|
|
461
|
+
fullModelText.includes('vision') ||
|
|
462
|
+
fullModelText.includes('moondream') ||
|
|
463
|
+
modelName.includes('qwen-vl') ||
|
|
464
|
+
modelName.includes('qwen2.5vl') ||
|
|
465
|
+
modelName.startsWith('llava')) {
|
|
466
|
+
details.category = 'multimodal';
|
|
467
|
+
details.use_cases.push('vision', 'multimodal', 'image');
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// 4. REASONING - Modelos especializados en razonamiento
|
|
471
|
+
else if (fullModelText.includes('deepseek-r1') ||
|
|
472
|
+
fullModelText.includes('reasoning') ||
|
|
473
|
+
fullModelText.includes('math') ||
|
|
474
|
+
modelName.includes('deepseek-r1') ||
|
|
475
|
+
modelName.includes('o1-')) {
|
|
476
|
+
details.category = 'reasoning';
|
|
477
|
+
details.use_cases.push('reasoning', 'mathematics', 'logic');
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// 5. TALKING - Modelos conversacionales/chat (mayoría de modelos)
|
|
481
|
+
else if (fullModelText.includes('llama') ||
|
|
482
|
+
fullModelText.includes('mistral') ||
|
|
483
|
+
fullModelText.includes('phi') ||
|
|
484
|
+
fullModelText.includes('gemma') ||
|
|
485
|
+
fullModelText.includes('qwen') ||
|
|
486
|
+
fullModelText.includes('chat') ||
|
|
487
|
+
fullModelText.includes('instruct') ||
|
|
488
|
+
modelName.startsWith('llama') ||
|
|
489
|
+
modelName.startsWith('mistral') ||
|
|
490
|
+
modelName.startsWith('phi') ||
|
|
491
|
+
modelName.startsWith('gemma') ||
|
|
492
|
+
modelName.startsWith('qwen') && !modelName.includes('coder') && !modelName.includes('vl')) {
|
|
493
|
+
details.category = 'talking';
|
|
494
|
+
details.use_cases.push('chat', 'conversation', 'assistant');
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// 6. READING - Modelos para análisis de texto
|
|
498
|
+
else if (fullModelText.includes('solar') ||
|
|
499
|
+
fullModelText.includes('openchat') ||
|
|
500
|
+
fullModelText.includes('neural-chat') ||
|
|
501
|
+
fullModelText.includes('vicuna')) {
|
|
502
|
+
details.category = 'reading';
|
|
503
|
+
details.use_cases.push('reading', 'analysis', 'comprehension');
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// 7. CREATIVE - Modelos creativos
|
|
507
|
+
else if (fullModelText.includes('dolphin') ||
|
|
508
|
+
fullModelText.includes('wizard') ||
|
|
509
|
+
fullModelText.includes('uncensored') ||
|
|
510
|
+
fullModelText.includes('airoboros')) {
|
|
511
|
+
details.category = 'creative';
|
|
512
|
+
details.use_cases.push('creative', 'writing', 'storytelling');
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// 8. Por defecto: GENERAL
|
|
516
|
+
else {
|
|
517
|
+
details.category = 'general';
|
|
518
|
+
details.use_cases.push('general', 'assistant');
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Extraer descripción mejorada
|
|
522
|
+
const descPatterns = [
|
|
523
|
+
/<p[^>]*class="[^"]*description[^"]*"[^>]*>([^<]+)<\/p>/i,
|
|
524
|
+
/<meta[^>]*name="description"[^>]*content="([^"]+)"/i,
|
|
525
|
+
/<div[^>]*class="[^"]*desc[^"]*"[^>]*>([^<]+)<\/div>/i
|
|
526
|
+
];
|
|
527
|
+
|
|
528
|
+
for (const pattern of descPatterns) {
|
|
529
|
+
const match = html.match(pattern);
|
|
530
|
+
if (match) {
|
|
531
|
+
details.detailed_description = this.cleanText(match[1]);
|
|
532
|
+
break;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Crear variantes mejoradas con tamaños reales extraídos de la página
|
|
537
|
+
details.variants = details.tags.map(tag => {
|
|
538
|
+
const size = this.extractSizeFromTag(tag);
|
|
539
|
+
const quantization = this.extractQuantizationFromTag(tag);
|
|
540
|
+
const realSizeGB = this.extractRealSizeFromHTML(html, tag);
|
|
541
|
+
return {
|
|
542
|
+
tag: tag,
|
|
543
|
+
size: size,
|
|
544
|
+
quantization: quantization,
|
|
545
|
+
command: `ollama pull ${tag}`,
|
|
546
|
+
estimated_size_gb: this.estimateModelSizeGB(tag),
|
|
547
|
+
real_size_gb: realSizeGB || this.estimateModelSizeGB(tag)
|
|
548
|
+
};
|
|
549
|
+
});
|
|
550
|
+
|
|
551
|
+
} catch (error) {
|
|
552
|
+
console.warn(`Error parsing detailed page: ${error.message}`);
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
return details;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
extractSizeFromTag(tag) {
|
|
559
|
+
const sizeMatch = tag.match(/(\d+\.?\d*)[bg]/i);
|
|
560
|
+
return sizeMatch ? sizeMatch[0].toLowerCase() : 'unknown';
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
extractQuantizationFromTag(tag) {
|
|
564
|
+
const quantMatch = tag.match(/\b(q\d+_[km]?_?[ms]?|fp16|fp32|int8|int4)\b/i);
|
|
565
|
+
return quantMatch ? quantMatch[0].toUpperCase() : 'Q4_0'; // Default assumption
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
extractRealSizeFromHTML(html, tag) {
|
|
569
|
+
try {
|
|
570
|
+
// Buscar el patrón específico: [tag]\n size·context·type·date
|
|
571
|
+
// Ejemplo: [llama3.1:8b]\n4.9GB · 128K context window · Text · 8 months ago
|
|
572
|
+
const escapedTag = tag.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
573
|
+
const pattern = new RegExp(`\\[${escapedTag}\\][\\s\\S]*?(\\d+(?:\\.\\d+)?)(GB|MB)`, 'i');
|
|
574
|
+
const match = html.match(pattern);
|
|
575
|
+
|
|
576
|
+
if (match) {
|
|
577
|
+
const num = parseFloat(match[1]);
|
|
578
|
+
const unit = match[2].toUpperCase();
|
|
579
|
+
|
|
580
|
+
if (unit === 'MB') {
|
|
581
|
+
return num / 1024; // Convert MB to GB
|
|
582
|
+
} else {
|
|
583
|
+
return num; // Already in GB
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Fallback: buscar tamaño cerca del tag
|
|
588
|
+
const tagIndex = html.indexOf(tag);
|
|
589
|
+
if (tagIndex !== -1) {
|
|
590
|
+
const surrounding = html.substring(tagIndex, tagIndex + 500);
|
|
591
|
+
const sizeMatch = surrounding.match(/(\d+(?:\.\d+)?)\s*(GB|MB)/i);
|
|
592
|
+
if (sizeMatch) {
|
|
593
|
+
const num = parseFloat(sizeMatch[1]);
|
|
594
|
+
const unit = sizeMatch[2].toUpperCase();
|
|
595
|
+
return unit === 'MB' ? num / 1024 : num;
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
return null; // No se encontró tamaño real
|
|
600
|
+
} catch (error) {
|
|
601
|
+
console.warn(`Error extracting real size for ${tag}: ${error.message}`);
|
|
602
|
+
return null;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
estimateModelSizeGB(tag) {
|
|
607
|
+
const sizeMatch = tag.match(/(\d+\.?\d*)[bg]/i);
|
|
608
|
+
if (!sizeMatch) return 1;
|
|
609
|
+
|
|
610
|
+
const num = parseFloat(sizeMatch[1]);
|
|
611
|
+
const unit = sizeMatch[0].slice(-1).toLowerCase();
|
|
612
|
+
|
|
613
|
+
if (unit === 'b') return num;
|
|
614
|
+
if (unit === 'g') return num;
|
|
615
|
+
return num; // Default to GB
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
async scrapeAllModels(forceRefresh = false) {
|
|
619
|
+
try {
|
|
620
|
+
if (!forceRefresh && this.isDetailedCacheValid()) {
|
|
621
|
+
return this.readDetailedCache();
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
console.log('Scraping ALL Ollama models with detailed information...');
|
|
625
|
+
|
|
626
|
+
// Primero obtenemos la lista básica de modelos
|
|
627
|
+
const response = await this.httpRequest(`${this.baseURL}/library`);
|
|
628
|
+
if (response.statusCode !== 200) throw new Error(`Failed to fetch: ${response.statusCode}`);
|
|
629
|
+
const basicModels = this.parseModelFromHTML(response.data);
|
|
630
|
+
|
|
631
|
+
console.log(`Found ${basicModels.length} models. Getting detailed information...`);
|
|
632
|
+
|
|
633
|
+
// Ahora obtenemos información detallada de cada modelo
|
|
634
|
+
const detailedModels = await this.getDetailedModelsInfo(basicModels);
|
|
635
|
+
|
|
636
|
+
// Apply classification to all models
|
|
637
|
+
const classifiedData = classifyAllModels({
|
|
638
|
+
models: detailedModels,
|
|
639
|
+
total_count: detailedModels.length,
|
|
640
|
+
cached_at: new Date().toISOString(),
|
|
641
|
+
expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
|
|
642
|
+
});
|
|
643
|
+
|
|
644
|
+
this.writeDetailedCache(classifiedData.models);
|
|
645
|
+
|
|
646
|
+
return {
|
|
647
|
+
models: classifiedData.models,
|
|
648
|
+
total_count: classifiedData.models.length,
|
|
649
|
+
cached_at: new Date().toISOString(),
|
|
650
|
+
expires_at: new Date(Date.now() + this.cacheExpiry).toISOString()
|
|
651
|
+
};
|
|
652
|
+
} catch (error) {
|
|
653
|
+
const cachedData = this.readDetailedCache();
|
|
654
|
+
if (cachedData) return cachedData;
|
|
655
|
+
throw error;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
async searchModels(query, options = {}) {
|
|
660
|
+
const data = await this.scrapeAllModels();
|
|
661
|
+
const models = data.models;
|
|
662
|
+
|
|
663
|
+
if (!query) return { models, total_count: models.length };
|
|
664
|
+
|
|
665
|
+
const filtered = models.filter(model => {
|
|
666
|
+
const searchText = `${model.model_name} ${model.description} ${model.model_identifier}`.toLowerCase();
|
|
667
|
+
return searchText.includes(query.toLowerCase());
|
|
668
|
+
});
|
|
669
|
+
|
|
670
|
+
return {
|
|
671
|
+
models: filtered,
|
|
672
|
+
total_count: filtered.length,
|
|
673
|
+
query
|
|
674
|
+
};
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
async findCompatibleModels(localModels) {
|
|
678
|
+
const data = await this.scrapeAllModels();
|
|
679
|
+
const cloudModels = data.models;
|
|
680
|
+
const compatible = [];
|
|
681
|
+
|
|
682
|
+
for (const localModel of localModels) {
|
|
683
|
+
const localName = localModel.name || localModel.model;
|
|
684
|
+
const [baseName] = localName.split(':');
|
|
685
|
+
|
|
686
|
+
const match = cloudModels.find(cloudModel =>
|
|
687
|
+
cloudModel.model_identifier === baseName ||
|
|
688
|
+
cloudModel.model_identifier === localName ||
|
|
689
|
+
cloudModel.model_name.toLowerCase().includes(baseName.toLowerCase()) ||
|
|
690
|
+
baseName.toLowerCase().includes(cloudModel.model_identifier.toLowerCase())
|
|
691
|
+
);
|
|
692
|
+
|
|
693
|
+
if (match) {
|
|
694
|
+
compatible.push({
|
|
695
|
+
local: localModel,
|
|
696
|
+
cloud: match,
|
|
697
|
+
match_type: match.model_identifier === baseName ? 'exact' : 'fuzzy'
|
|
698
|
+
});
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
return {
|
|
703
|
+
total_local: localModels.length,
|
|
704
|
+
total_compatible: compatible.length,
|
|
705
|
+
compatible_models: compatible,
|
|
706
|
+
all_available: data.total_count
|
|
707
|
+
};
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
async getStats() {
|
|
711
|
+
const data = await this.scrapeAllModels();
|
|
712
|
+
const models = data.models;
|
|
713
|
+
|
|
714
|
+
return {
|
|
715
|
+
total_models: models.length,
|
|
716
|
+
official_models: models.filter(m => m.model_type === 'official').length,
|
|
717
|
+
community_models: models.filter(m => m.model_type === 'community').length,
|
|
718
|
+
total_pulls: models.reduce((sum, m) => sum + (m.pulls || 0), 0),
|
|
719
|
+
most_popular: models
|
|
720
|
+
.sort((a, b) => (b.pulls || 0) - (a.pulls || 0))
|
|
721
|
+
.slice(0, 10)
|
|
722
|
+
.map(m => ({ name: m.model_name, pulls: m.pulls })),
|
|
723
|
+
last_updated: data.cached_at
|
|
724
|
+
};
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
async function getOllamaModelsIntegration(localModels = []) {
|
|
729
|
+
const scraper = new OllamaNativeScraper();
|
|
730
|
+
|
|
731
|
+
try {
|
|
732
|
+
if (localModels.length > 0) {
|
|
733
|
+
const compatible = await scraper.findCompatibleModels(localModels);
|
|
734
|
+
return compatible;
|
|
735
|
+
} else {
|
|
736
|
+
const allModels = await scraper.scrapeAllModels();
|
|
737
|
+
return {
|
|
738
|
+
total_local: 0,
|
|
739
|
+
total_compatible: 0,
|
|
740
|
+
compatible_models: [],
|
|
741
|
+
all_available: allModels.total_count,
|
|
742
|
+
recommendations: allModels.models.slice(0, 20)
|
|
743
|
+
};
|
|
744
|
+
}
|
|
745
|
+
} catch (error) {
|
|
746
|
+
return {
|
|
747
|
+
total_local: localModels.length,
|
|
748
|
+
total_compatible: 0,
|
|
749
|
+
compatible_models: [],
|
|
750
|
+
all_available: 0,
|
|
751
|
+
error: error.message
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
async function testScraper() {
|
|
757
|
+
const scraper = new OllamaNativeScraper();
|
|
758
|
+
|
|
759
|
+
const localModels = [
|
|
760
|
+
{ name: 'mistral:latest' },
|
|
761
|
+
{ name: 'deepseek-coder:6.7b' },
|
|
762
|
+
{ name: 'deepseek-coder:1.3b' }
|
|
763
|
+
];
|
|
764
|
+
|
|
765
|
+
const result = await getOllamaModelsIntegration(localModels);
|
|
766
|
+
console.log(JSON.stringify(result, null, 2));
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
module.exports = {
|
|
770
|
+
OllamaNativeScraper,
|
|
771
|
+
getOllamaModelsIntegration
|
|
772
|
+
};
|
|
773
|
+
|
|
774
|
+
if (require.main === module) {
|
|
775
|
+
testScraper().catch(console.error);
|
|
776
|
+
}
|