llm-checker 3.6.1 → 3.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -8
- package/bin/enhanced_cli.js +407 -5
- package/bin/mcp-server.mjs +5 -0
- package/package.json +7 -2
- package/src/data/model-database.js +452 -0
- package/src/data/registry-ingestors.js +765 -0
- package/src/data/registry-recommender.js +632 -0
- package/src/data/seed/README.md +11 -3
- package/src/data/seed/models.db +0 -0
- package/src/index.js +68 -4
- package/src/models/deterministic-selector.js +85 -39
- package/src/models/moe-assumptions.js +11 -0
|
@@ -0,0 +1,765 @@
|
|
|
1
|
+
const crypto = require('crypto');
|
|
2
|
+
const fetch = require('../utils/fetch');
|
|
3
|
+
|
|
4
|
+
const SOURCE_DEFINITIONS = {
|
|
5
|
+
huggingface: {
|
|
6
|
+
id: 'huggingface',
|
|
7
|
+
name: 'Hugging Face Hub',
|
|
8
|
+
base_url: 'https://huggingface.co',
|
|
9
|
+
source_type: 'model_hub'
|
|
10
|
+
},
|
|
11
|
+
ollama: {
|
|
12
|
+
id: 'ollama',
|
|
13
|
+
name: 'Ollama Library',
|
|
14
|
+
base_url: 'https://ollama.com/library',
|
|
15
|
+
source_type: 'runtime_registry'
|
|
16
|
+
},
|
|
17
|
+
gpt4all: {
|
|
18
|
+
id: 'gpt4all',
|
|
19
|
+
name: 'GPT4All Catalog',
|
|
20
|
+
base_url: 'https://github.com/nomic-ai/gpt4all',
|
|
21
|
+
source_type: 'curated_catalog'
|
|
22
|
+
}
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const HUGGING_FACE_MODEL_API = 'https://huggingface.co/api/models';
|
|
26
|
+
const GPT4ALL_MODELS_URL = 'https://gpt4all.io/models/models3.json';
|
|
27
|
+
|
|
28
|
+
function extractNextLink(linkHeader = '') {
|
|
29
|
+
const links = String(linkHeader || '').split(',');
|
|
30
|
+
for (const link of links) {
|
|
31
|
+
const match = link.match(/<([^>]+)>;\s*rel="next"/i);
|
|
32
|
+
if (match) return match[1];
|
|
33
|
+
}
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function toArray(value) {
|
|
38
|
+
if (!value) return [];
|
|
39
|
+
return Array.isArray(value) ? value : [value];
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function normalizeIdPart(value) {
|
|
43
|
+
return String(value || '')
|
|
44
|
+
.trim()
|
|
45
|
+
.replace(/^https?:\/\//, '')
|
|
46
|
+
.replace(/[^a-zA-Z0-9._:/@-]+/g, '-')
|
|
47
|
+
.replace(/^-+|-+$/g, '')
|
|
48
|
+
.toLowerCase();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function hashShort(value) {
|
|
52
|
+
return crypto.createHash('sha1').update(String(value || '')).digest('hex').slice(0, 12);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function makeScopedId(...parts) {
|
|
56
|
+
const normalized = parts.map(normalizeIdPart).filter(Boolean).join(':');
|
|
57
|
+
if (normalized.length <= 180) return normalized;
|
|
58
|
+
return `${normalized.slice(0, 140)}:${hashShort(normalized)}`;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function makeArtifactId(sourceId, repoId, artifactName) {
|
|
62
|
+
return makeScopedId(sourceId, repoId, artifactName, hashShort(artifactName));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function bytesToGB(bytes) {
|
|
66
|
+
const parsed = Number(bytes);
|
|
67
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return null;
|
|
68
|
+
return Math.round((parsed / (1024 ** 3)) * 1000) / 1000;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function parseNumberWithUnit(rawValue) {
|
|
72
|
+
if (rawValue === null || rawValue === undefined) return null;
|
|
73
|
+
if (typeof rawValue === 'number' && Number.isFinite(rawValue)) return rawValue;
|
|
74
|
+
|
|
75
|
+
const text = String(rawValue).replace(/,/g, '').trim().toLowerCase();
|
|
76
|
+
if (!text) return null;
|
|
77
|
+
|
|
78
|
+
if (/^\d+(?:\.\d+)?$/.test(text)) {
|
|
79
|
+
return Number(text);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Mixture-of-Experts "NxMB" naming (e.g. Mixtral 8x7B, 8x22B): the total
|
|
83
|
+
// parameter footprint that must reside in memory is experts * per-expert
|
|
84
|
+
// size. Without this, "8x7B" matches the bare "7b" below and is stored as 7B.
|
|
85
|
+
const moe = text.match(/(\d+)\s*x\s*(\d+(?:\.\d+)?)\s*b\b/i);
|
|
86
|
+
if (moe) {
|
|
87
|
+
const experts = Number(moe[1]);
|
|
88
|
+
const perExpert = Number(moe[2]);
|
|
89
|
+
if (experts > 0 && Number.isFinite(perExpert) && perExpert > 0) {
|
|
90
|
+
return experts * perExpert;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Note: 'k'/'thousand' are intentionally NOT parameter units. Parameter
|
|
95
|
+
// counts are never expressed in thousands-of-billions, and tokens like
|
|
96
|
+
// "128k" (a context length) were being misread as ~0.0001B and rounded to 0.
|
|
97
|
+
const match = text.match(/(\d+(?:\.\d+)?)\s*(trillion|billion|million|[tmb])\b/i);
|
|
98
|
+
if (!match) return null;
|
|
99
|
+
|
|
100
|
+
const value = Number(match[1]);
|
|
101
|
+
if (!Number.isFinite(value)) return null;
|
|
102
|
+
const unit = (match[2] || '').toLowerCase();
|
|
103
|
+
if (unit === 't' || unit === 'trillion') return value * 1000;
|
|
104
|
+
if (unit === 'm' || unit === 'million') return value / 1000;
|
|
105
|
+
return value;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function sumSafetensorsParams(safetensors) {
|
|
109
|
+
if (!safetensors || typeof safetensors !== 'object') return null;
|
|
110
|
+
if (Number.isFinite(Number(safetensors.total))) {
|
|
111
|
+
return Number(safetensors.total) / 1e9;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const parameters = safetensors.parameters;
|
|
115
|
+
if (!parameters || typeof parameters !== 'object') return null;
|
|
116
|
+
const total = Object.values(parameters).reduce((sum, value) => {
|
|
117
|
+
const parsed = Number(value);
|
|
118
|
+
return sum + (Number.isFinite(parsed) ? parsed : 0);
|
|
119
|
+
}, 0);
|
|
120
|
+
|
|
121
|
+
return total > 0 ? total / 1e9 : null;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function parseParamsB(...values) {
|
|
125
|
+
for (const value of values) {
|
|
126
|
+
const parsed = parseNumberWithUnit(value);
|
|
127
|
+
if (parsed !== null && parsed > 0) {
|
|
128
|
+
const rounded = Math.round(parsed * 1000) / 1000;
|
|
129
|
+
// Never let a value that rounds to 0 escape the > 0 guard.
|
|
130
|
+
if (rounded > 0) return rounded;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function parseActiveParamsB(...values) {
|
|
137
|
+
const text = values.map((value) => String(value || '')).join(' ');
|
|
138
|
+
const active = text.match(/(?:^|[-_\s])a(\d+(?:\.\d+)?)([bm])(?:[-_\s]|$)/i);
|
|
139
|
+
if (!active) return null;
|
|
140
|
+
const value = Number(active[1]);
|
|
141
|
+
if (!Number.isFinite(value)) return null;
|
|
142
|
+
return active[2].toLowerCase() === 'm'
|
|
143
|
+
? Math.round((value / 1000) * 1000) / 1000
|
|
144
|
+
: value;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function inferQuantization(...values) {
|
|
148
|
+
const text = values.map((value) => String(value || '')).join(' ');
|
|
149
|
+
// Note: F16/FP16/BF16 are PRECISIONS, not quantizations — they're handled by
|
|
150
|
+
// inferPrecision so a full-precision model isn't mislabeled as "quantized".
|
|
151
|
+
const ggufQuant = text.match(/\b(IQ\d(?:_[A-Z0-9]+)?|Q\d(?:_[A-Z0-9]+){0,2}|Q8_0)\b/i);
|
|
152
|
+
if (ggufQuant) return ggufQuant[1].toUpperCase();
|
|
153
|
+
|
|
154
|
+
const bitQuant = text.match(/\b([234568])\s*[-_ ]?bit\b/i);
|
|
155
|
+
if (bitQuant) return `${bitQuant[1]}bit`;
|
|
156
|
+
|
|
157
|
+
return '';
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function inferPrecision(...values) {
|
|
161
|
+
const text = values.map((value) => String(value || '')).join(' ').toLowerCase();
|
|
162
|
+
if (/\bbf16\b/.test(text)) return 'BF16';
|
|
163
|
+
if (/\bfp16\b|\bf16\b/.test(text)) return 'FP16';
|
|
164
|
+
if (/\bfp32\b|\bf32\b/.test(text)) return 'FP32';
|
|
165
|
+
if (/\bint8\b|\b8bit\b/.test(text)) return 'INT8';
|
|
166
|
+
if (/\bint4\b|\b4bit\b/.test(text)) return 'INT4';
|
|
167
|
+
return '';
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function inferFormat(filename = '', tags = []) {
|
|
171
|
+
const lower = String(filename || '').toLowerCase();
|
|
172
|
+
const tagText = toArray(tags).join(' ').toLowerCase();
|
|
173
|
+
if (lower.endsWith('.gguf')) return 'gguf';
|
|
174
|
+
if (lower.endsWith('.safetensors')) return tagText.includes('mlx') || lower.includes('mlx') ? 'mlx' : 'safetensors';
|
|
175
|
+
if (lower.endsWith('.bin')) return lower.includes('ggml') ? 'ggml' : 'pytorch_bin';
|
|
176
|
+
if (lower.endsWith('.pt') || lower.endsWith('.pth')) return 'pytorch';
|
|
177
|
+
if (tagText.includes('ollama')) return 'ollama';
|
|
178
|
+
return 'unknown';
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function inferRuntimeSupport(format, tags = [], sourceId = '') {
|
|
182
|
+
const normalizedFormat = String(format || '').toLowerCase();
|
|
183
|
+
const tagText = `${toArray(tags).join(' ')} ${sourceId}`.toLowerCase();
|
|
184
|
+
const runtimes = new Set();
|
|
185
|
+
|
|
186
|
+
if (normalizedFormat === 'gguf' || normalizedFormat === 'ggml') {
|
|
187
|
+
runtimes.add('llama.cpp');
|
|
188
|
+
runtimes.add('ollama');
|
|
189
|
+
}
|
|
190
|
+
if (normalizedFormat === 'ollama') {
|
|
191
|
+
runtimes.add('ollama');
|
|
192
|
+
}
|
|
193
|
+
if (normalizedFormat === 'mlx' || tagText.includes('mlx')) {
|
|
194
|
+
runtimes.add('mlx');
|
|
195
|
+
}
|
|
196
|
+
if (normalizedFormat === 'safetensors' || normalizedFormat === 'pytorch' || normalizedFormat === 'pytorch_bin') {
|
|
197
|
+
runtimes.add('transformers');
|
|
198
|
+
runtimes.add('vllm');
|
|
199
|
+
}
|
|
200
|
+
if (tagText.includes('exl2') || tagText.includes('exllama')) {
|
|
201
|
+
runtimes.add('exllama');
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return [...runtimes];
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function inferTasks(model = {}) {
|
|
208
|
+
const tags = toArray(model.tags || model.capabilities || model.categories || model.use_cases);
|
|
209
|
+
const tasks = new Set();
|
|
210
|
+
const pipelineTag = model.pipeline_tag || model.primary_category || model.category;
|
|
211
|
+
if (pipelineTag) tasks.add(String(pipelineTag));
|
|
212
|
+
|
|
213
|
+
const text = [
|
|
214
|
+
model.id,
|
|
215
|
+
model.modelId,
|
|
216
|
+
model.model_identifier,
|
|
217
|
+
model.model_name,
|
|
218
|
+
model.description,
|
|
219
|
+
...tags
|
|
220
|
+
].filter(Boolean).join(' ').toLowerCase();
|
|
221
|
+
|
|
222
|
+
if (/code|coder|programming/.test(text)) tasks.add('coding');
|
|
223
|
+
if (/chat|instruct|assistant|conversation/.test(text)) tasks.add('chat');
|
|
224
|
+
if (/reason|math|logic|r1|qwq/.test(text)) tasks.add('reasoning');
|
|
225
|
+
if (/embed|retrieval|bge|e5|nomic/.test(text)) tasks.add('embeddings');
|
|
226
|
+
if (/vision|vl|image|multimodal|llava/.test(text)) tasks.add('multimodal');
|
|
227
|
+
if (/creative|writing|story|roleplay/.test(text)) tasks.add('creative');
|
|
228
|
+
if (tasks.size === 0) tasks.add('general');
|
|
229
|
+
return [...tasks];
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function inferModalities(model = {}, filename = '') {
|
|
233
|
+
const text = [
|
|
234
|
+
model.id,
|
|
235
|
+
model.modelId,
|
|
236
|
+
model.model_identifier,
|
|
237
|
+
model.model_name,
|
|
238
|
+
model.description,
|
|
239
|
+
filename,
|
|
240
|
+
...toArray(model.tags || model.capabilities || model.categories)
|
|
241
|
+
].filter(Boolean).join(' ').toLowerCase();
|
|
242
|
+
const modalities = new Set(['text']);
|
|
243
|
+
if (/vision|image|vl|multimodal|llava/.test(text)) modalities.add('vision');
|
|
244
|
+
if (/audio|speech|whisper/.test(text)) modalities.add('audio');
|
|
245
|
+
return [...modalities];
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function extractLicense(model = {}) {
|
|
249
|
+
const cardData = model.cardData || model.card_data || {};
|
|
250
|
+
if (cardData.license) return Array.isArray(cardData.license) ? cardData.license.join(',') : String(cardData.license);
|
|
251
|
+
const licenseTag = toArray(model.tags).find((tag) => String(tag).startsWith('license:'));
|
|
252
|
+
return licenseTag ? String(licenseTag).replace(/^license:/, '') : 'unknown';
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function getSiblingName(sibling = {}) {
|
|
256
|
+
return sibling.rfilename || sibling.path || sibling.name || sibling.filename || '';
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function getSiblingSizeBytes(sibling = {}) {
|
|
260
|
+
const candidates = [
|
|
261
|
+
sibling.size,
|
|
262
|
+
sibling.sizeBytes,
|
|
263
|
+
sibling.lfs?.size,
|
|
264
|
+
sibling.blobSize
|
|
265
|
+
];
|
|
266
|
+
for (const value of candidates) {
|
|
267
|
+
const parsed = Number(value);
|
|
268
|
+
if (Number.isFinite(parsed) && parsed > 0) return parsed;
|
|
269
|
+
}
|
|
270
|
+
return null;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function isModelArtifactFile(filename) {
|
|
274
|
+
const lower = String(filename || '').toLowerCase();
|
|
275
|
+
if (!lower) return false;
|
|
276
|
+
// Exclude non-model weight files that would otherwise be ingested as standalone
|
|
277
|
+
// "models": LoRA/PEFT adapters (a few MB but inherit the repo's param count) and
|
|
278
|
+
// optimizer/training state.
|
|
279
|
+
if (/(^|[/_-])adapter[_-]?(model|config)/.test(lower)) return false;
|
|
280
|
+
if (/(^|[/_-])(lora|optimizer|scheduler|rng_state|trainer_state|training_args)/.test(lower)) return false;
|
|
281
|
+
if (lower.endsWith('.gguf')) return true;
|
|
282
|
+
if (lower.endsWith('.safetensors')) return true;
|
|
283
|
+
if (/pytorch_model.*\.(bin)$/.test(lower)) return true;
|
|
284
|
+
// Mistral-style consolidated weights (consolidated.00.pth) were being dropped.
|
|
285
|
+
if (/(^|[/])consolidated.*\.(pt|pth|bin)$/.test(lower)) return true;
|
|
286
|
+
if (/model.*\.(bin|pt|pth)$/.test(lower)) return true;
|
|
287
|
+
if (/ggml.*\.bin$/.test(lower)) return true;
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function buildHuggingFaceDownloadUrl(repoId, filename, revision = 'main') {
|
|
292
|
+
const encodedPath = String(filename || '')
|
|
293
|
+
.split('/')
|
|
294
|
+
.map((part) => encodeURIComponent(part))
|
|
295
|
+
.join('/');
|
|
296
|
+
return `https://huggingface.co/${repoId}/resolve/${revision || 'main'}/${encodedPath}`;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
function normalizeHuggingFaceModel(model) {
|
|
300
|
+
const repoId = model.id || model.modelId || model.model_id;
|
|
301
|
+
if (!repoId) return null;
|
|
302
|
+
|
|
303
|
+
const namespace = repoId.includes('/') ? repoId.split('/')[0] : '';
|
|
304
|
+
const tags = toArray(model.tags);
|
|
305
|
+
const tasks = inferTasks(model);
|
|
306
|
+
const modalities = inferModalities(model);
|
|
307
|
+
const license = extractLicense(model);
|
|
308
|
+
const gated = Boolean(model.gated && model.gated !== 'false');
|
|
309
|
+
const repoKey = makeScopedId('huggingface', repoId);
|
|
310
|
+
const repo = {
|
|
311
|
+
id: repoKey,
|
|
312
|
+
source_id: 'huggingface',
|
|
313
|
+
repo_id: repoId,
|
|
314
|
+
namespace,
|
|
315
|
+
canonical_model_id: repoId,
|
|
316
|
+
display_name: model.modelId || repoId,
|
|
317
|
+
url: `https://huggingface.co/${repoId}`,
|
|
318
|
+
license,
|
|
319
|
+
gated,
|
|
320
|
+
requires_auth: gated,
|
|
321
|
+
downloads: Number(model.downloads) || 0,
|
|
322
|
+
likes: Number(model.likes) || 0,
|
|
323
|
+
tags,
|
|
324
|
+
tasks,
|
|
325
|
+
modalities,
|
|
326
|
+
last_modified: model.lastModified || model.last_modified || '',
|
|
327
|
+
sha: model.sha || '',
|
|
328
|
+
metadata: {
|
|
329
|
+
pipeline_tag: model.pipeline_tag || '',
|
|
330
|
+
library_name: model.library_name || '',
|
|
331
|
+
cardData: model.cardData || null
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
const nameTotalB = parseParamsB(repoId, tags.join(' '));
|
|
336
|
+
const metadataParamsB =
|
|
337
|
+
sumSafetensorsParams(model.safetensors) ||
|
|
338
|
+
parseParamsB(model.config?.num_parameters, model.cardData?.params);
|
|
339
|
+
// Prefer the larger of metadata vs the MoE-aware name total, so an MoE whose
|
|
340
|
+
// safetensors/config under-reports (or is absent) still stores the full total.
|
|
341
|
+
const repoParamsB = Math.max(metadataParamsB || 0, nameTotalB || 0) || null;
|
|
342
|
+
const activeParamsB = parseActiveParamsB(repoId, tags.join(' '));
|
|
343
|
+
const contextLength = Number(
|
|
344
|
+
model.config?.max_position_embeddings ||
|
|
345
|
+
model.config?.model_max_length ||
|
|
346
|
+
model.config?.max_sequence_length ||
|
|
347
|
+
model.cardData?.context_length ||
|
|
348
|
+
0
|
|
349
|
+
) || null;
|
|
350
|
+
const revision = model.sha || 'main';
|
|
351
|
+
const artifacts = [];
|
|
352
|
+
|
|
353
|
+
for (const sibling of toArray(model.siblings)) {
|
|
354
|
+
const filename = getSiblingName(sibling);
|
|
355
|
+
if (!isModelArtifactFile(filename)) continue;
|
|
356
|
+
|
|
357
|
+
const sizeBytes = getSiblingSizeBytes(sibling);
|
|
358
|
+
const format = inferFormat(filename, tags);
|
|
359
|
+
const quantization = inferQuantization(filename, tags.join(' '));
|
|
360
|
+
const precision = inferPrecision(filename, tags.join(' '), quantization);
|
|
361
|
+
const artifactName = filename;
|
|
362
|
+
artifacts.push({
|
|
363
|
+
id: makeArtifactId('huggingface', repoId, artifactName),
|
|
364
|
+
source_id: 'huggingface',
|
|
365
|
+
repo_key: repoKey,
|
|
366
|
+
repo_id: repoId,
|
|
367
|
+
canonical_model_id: repoId,
|
|
368
|
+
artifact_name: artifactName,
|
|
369
|
+
filename,
|
|
370
|
+
format,
|
|
371
|
+
quantization,
|
|
372
|
+
precision,
|
|
373
|
+
parameter_count_b: Math.max(parseParamsB(filename) || 0, repoParamsB || 0) || null,
|
|
374
|
+
active_parameter_count_b: activeParamsB,
|
|
375
|
+
size_bytes: sizeBytes,
|
|
376
|
+
size_gb: bytesToGB(sizeBytes),
|
|
377
|
+
context_length: contextLength,
|
|
378
|
+
runtime_support: inferRuntimeSupport(format, tags, 'huggingface'),
|
|
379
|
+
tasks,
|
|
380
|
+
modalities: inferModalities(model, filename),
|
|
381
|
+
download_url: buildHuggingFaceDownloadUrl(repoId, filename, revision),
|
|
382
|
+
install_command: `hf download ${repoId} ${filename}`,
|
|
383
|
+
sha256: sibling.lfs?.sha256 || '',
|
|
384
|
+
etag: sibling.lfs?.oid || sibling.blobId || '',
|
|
385
|
+
license,
|
|
386
|
+
gated,
|
|
387
|
+
requires_auth: gated,
|
|
388
|
+
downloads: repo.downloads,
|
|
389
|
+
likes: repo.likes,
|
|
390
|
+
updated_at: repo.last_modified,
|
|
391
|
+
metadata: {
|
|
392
|
+
repo_sha: model.sha || '',
|
|
393
|
+
sibling
|
|
394
|
+
}
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return { source: SOURCE_DEFINITIONS.huggingface, repos: [repo], artifacts };
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
function normalizeGpt4AllEntry(entry) {
|
|
402
|
+
const filenameCandidate = entry.filename || '';
|
|
403
|
+
const url = entry.url || entry.downloadUrl || entry.download_url ||
|
|
404
|
+
(filenameCandidate ? `https://gpt4all.io/models/gguf/${encodeURIComponent(filenameCandidate)}` : '');
|
|
405
|
+
const name = entry.name || filenameCandidate || url.split('/').filter(Boolean).pop();
|
|
406
|
+
if (!name || !url) return null;
|
|
407
|
+
|
|
408
|
+
const repoMatch = url.match(/huggingface\.co\/([^/]+\/[^/]+)\/resolve\/([^/]+)\/(.+)$/);
|
|
409
|
+
const repoId = repoMatch ? repoMatch[1] : `gpt4all/${name}`;
|
|
410
|
+
// When the download points at a Hugging Face repo, use that repo id as the
|
|
411
|
+
// canonical model id so the same model lines up across sources for dedup.
|
|
412
|
+
const canonicalModelId = repoMatch ? repoMatch[1] : name;
|
|
413
|
+
const filename = repoMatch ? decodeURIComponent(repoMatch[3]) : (filenameCandidate || url.split('/').filter(Boolean).pop());
|
|
414
|
+
const repoKey = makeScopedId('gpt4all', repoId);
|
|
415
|
+
const tags = ['gpt4all', entry.type, entry.quant].filter(Boolean);
|
|
416
|
+
const paramsB = parseParamsB(entry.parameters, name, filename);
|
|
417
|
+
// Sizes can arrive as comma-formatted strings ("8,000,000,000"); strip non-digits.
|
|
418
|
+
const sizeBytes = Number(String(entry.filesize ?? entry.fileSize ?? entry.size ?? 0).replace(/[^0-9.]/g, '')) || null;
|
|
419
|
+
const format = inferFormat(filename, tags);
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
source: SOURCE_DEFINITIONS.gpt4all,
|
|
423
|
+
repos: [{
|
|
424
|
+
id: repoKey,
|
|
425
|
+
source_id: 'gpt4all',
|
|
426
|
+
repo_id: repoId,
|
|
427
|
+
namespace: repoId.includes('/') ? repoId.split('/')[0] : 'gpt4all',
|
|
428
|
+
canonical_model_id: canonicalModelId,
|
|
429
|
+
display_name: name,
|
|
430
|
+
url: repoMatch ? `https://huggingface.co/${repoId}` : url,
|
|
431
|
+
license: entry.license || 'unknown',
|
|
432
|
+
gated: false,
|
|
433
|
+
requires_auth: false,
|
|
434
|
+
downloads: Number(entry.downloads) || 0,
|
|
435
|
+
likes: 0,
|
|
436
|
+
tags,
|
|
437
|
+
tasks: inferTasks({ model_name: name, tags }),
|
|
438
|
+
modalities: ['text'],
|
|
439
|
+
metadata: {
|
|
440
|
+
ramrequired: entry.ramrequired || null,
|
|
441
|
+
type: entry.type || null,
|
|
442
|
+
md5sum: entry.md5sum || null
|
|
443
|
+
}
|
|
444
|
+
}],
|
|
445
|
+
artifacts: [{
|
|
446
|
+
id: makeArtifactId('gpt4all', repoId, filename || name),
|
|
447
|
+
source_id: 'gpt4all',
|
|
448
|
+
repo_key: repoKey,
|
|
449
|
+
repo_id: repoId,
|
|
450
|
+
canonical_model_id: canonicalModelId,
|
|
451
|
+
artifact_name: filename || name,
|
|
452
|
+
filename: filename || '',
|
|
453
|
+
format,
|
|
454
|
+
quantization: inferQuantization(entry.quant, filename),
|
|
455
|
+
precision: inferPrecision(entry.quant, filename),
|
|
456
|
+
parameter_count_b: paramsB,
|
|
457
|
+
active_parameter_count_b: null,
|
|
458
|
+
size_bytes: sizeBytes,
|
|
459
|
+
size_gb: bytesToGB(sizeBytes),
|
|
460
|
+
runtime_support: inferRuntimeSupport(format, tags, 'gpt4all'),
|
|
461
|
+
tasks: inferTasks({ model_name: name, tags }),
|
|
462
|
+
modalities: ['text'],
|
|
463
|
+
download_url: url,
|
|
464
|
+
install_command: `curl -L ${url} -o ${filename || name}`,
|
|
465
|
+
sha256: entry.sha256 || '',
|
|
466
|
+
etag: entry.md5sum || '',
|
|
467
|
+
license: entry.license || 'unknown',
|
|
468
|
+
gated: false,
|
|
469
|
+
requires_auth: false,
|
|
470
|
+
metadata: {
|
|
471
|
+
ramrequired: entry.ramrequired || null,
|
|
472
|
+
description: entry.description || '',
|
|
473
|
+
promptTemplate: entry.promptTemplate || ''
|
|
474
|
+
}
|
|
475
|
+
}]
|
|
476
|
+
};
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
function normalizeOllamaRows(model, variant) {
|
|
480
|
+
const modelId = model.id || model.model_identifier;
|
|
481
|
+
const tag = variant.tag || modelId;
|
|
482
|
+
const repoKey = makeScopedId('ollama', modelId);
|
|
483
|
+
const capabilities = (() => {
|
|
484
|
+
try {
|
|
485
|
+
return JSON.parse(model.capabilities || '[]');
|
|
486
|
+
} catch {
|
|
487
|
+
return [];
|
|
488
|
+
}
|
|
489
|
+
})();
|
|
490
|
+
const tasks = inferTasks({
|
|
491
|
+
model_identifier: modelId,
|
|
492
|
+
model_name: model.name,
|
|
493
|
+
capabilities,
|
|
494
|
+
categories: capabilities
|
|
495
|
+
});
|
|
496
|
+
const modalities = inferModalities({ model_identifier: modelId, model_name: model.name, capabilities }, tag);
|
|
497
|
+
|
|
498
|
+
return {
|
|
499
|
+
source: SOURCE_DEFINITIONS.ollama,
|
|
500
|
+
repos: [{
|
|
501
|
+
id: repoKey,
|
|
502
|
+
source_id: 'ollama',
|
|
503
|
+
repo_id: modelId,
|
|
504
|
+
namespace: model.namespace || '',
|
|
505
|
+
canonical_model_id: modelId,
|
|
506
|
+
display_name: model.name || modelId,
|
|
507
|
+
url: model.url || `https://ollama.com/library/${modelId}`,
|
|
508
|
+
license: 'unknown',
|
|
509
|
+
gated: false,
|
|
510
|
+
requires_auth: false,
|
|
511
|
+
downloads: Number(model.pulls) || 0,
|
|
512
|
+
likes: 0,
|
|
513
|
+
tags: capabilities,
|
|
514
|
+
tasks,
|
|
515
|
+
modalities,
|
|
516
|
+
last_modified: model.last_updated || '',
|
|
517
|
+
metadata: {
|
|
518
|
+
tags_count: model.tags_count || 0,
|
|
519
|
+
source_updated_at: model.updated_at || ''
|
|
520
|
+
}
|
|
521
|
+
}],
|
|
522
|
+
artifacts: [{
|
|
523
|
+
id: makeArtifactId('ollama', modelId, tag),
|
|
524
|
+
source_id: 'ollama',
|
|
525
|
+
repo_key: repoKey,
|
|
526
|
+
repo_id: modelId,
|
|
527
|
+
canonical_model_id: modelId,
|
|
528
|
+
artifact_name: tag,
|
|
529
|
+
filename: '',
|
|
530
|
+
format: 'ollama',
|
|
531
|
+
quantization: variant.quant || inferQuantization(tag),
|
|
532
|
+
precision: inferPrecision(variant.quant, tag),
|
|
533
|
+
parameter_count_b: Math.max(Number(variant.params_b) || 0, parseParamsB(tag) || 0) || null,
|
|
534
|
+
active_parameter_count_b: null,
|
|
535
|
+
size_bytes: null,
|
|
536
|
+
size_gb: Number(variant.size_gb) || null,
|
|
537
|
+
context_length: Number(variant.context_length) || null,
|
|
538
|
+
runtime_support: ['ollama'],
|
|
539
|
+
tasks,
|
|
540
|
+
modalities,
|
|
541
|
+
download_url: `ollama://library/${tag}`,
|
|
542
|
+
install_command: `ollama pull ${tag}`,
|
|
543
|
+
license: 'unknown',
|
|
544
|
+
gated: false,
|
|
545
|
+
requires_auth: false,
|
|
546
|
+
downloads: Number(model.pulls) || 0,
|
|
547
|
+
updated_at: model.updated_at || model.last_updated || '',
|
|
548
|
+
metadata: {
|
|
549
|
+
input_types: variant.input_types || '["text"]',
|
|
550
|
+
is_moe: Boolean(variant.is_moe),
|
|
551
|
+
expert_count: variant.expert_count || null
|
|
552
|
+
}
|
|
553
|
+
}]
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
class RegistryIngestor {
|
|
558
|
+
constructor(options = {}) {
|
|
559
|
+
this.database = options.database;
|
|
560
|
+
this.fetchImpl = options.fetchImpl || fetch;
|
|
561
|
+
this.onProgress = options.onProgress || (() => {});
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
async ingest(options = {}) {
|
|
565
|
+
if (!this.database) {
|
|
566
|
+
throw new Error('RegistryIngestor requires a database instance');
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const sources = String(options.sources || 'ollama,huggingface,gpt4all')
|
|
570
|
+
.split(',')
|
|
571
|
+
.map((source) => source.trim().toLowerCase())
|
|
572
|
+
.filter(Boolean);
|
|
573
|
+
const genericLimit = Number(options.limit) > 0 ? Number(options.limit) : null;
|
|
574
|
+
const limits = {
|
|
575
|
+
huggingface: Number(options.hfLimit || options.huggingfaceLimit) > 0
|
|
576
|
+
? Number(options.hfLimit || options.huggingfaceLimit)
|
|
577
|
+
: (genericLimit || 3000),
|
|
578
|
+
gpt4all: Number(options.gpt4allLimit) > 0
|
|
579
|
+
? Number(options.gpt4allLimit)
|
|
580
|
+
: (genericLimit || 1000),
|
|
581
|
+
ollama: Number(options.ollamaLimit) > 0
|
|
582
|
+
? Number(options.ollamaLimit)
|
|
583
|
+
: (genericLimit || 10000)
|
|
584
|
+
};
|
|
585
|
+
const collections = [];
|
|
586
|
+
|
|
587
|
+
for (const source of sources) {
|
|
588
|
+
if (source === 'huggingface' || source === 'hf') {
|
|
589
|
+
collections.push(...await this.collectHuggingFace({
|
|
590
|
+
limit: limits.huggingface,
|
|
591
|
+
query: options.query,
|
|
592
|
+
task: options.task
|
|
593
|
+
}));
|
|
594
|
+
} else if (source === 'gpt4all') {
|
|
595
|
+
collections.push(...await this.collectGpt4All({ limit: limits.gpt4all }));
|
|
596
|
+
} else if (source === 'ollama') {
|
|
597
|
+
collections.push(...this.collectOllamaFromDatabase({ limit: limits.ollama }));
|
|
598
|
+
} else {
|
|
599
|
+
throw new Error(`Unsupported registry source: ${source}`);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
if (!options.dryRun) {
|
|
604
|
+
this.storeCollections(collections);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
return this.summarizeCollections(collections, { dryRun: Boolean(options.dryRun) });
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
async collectHuggingFace(options = {}) {
|
|
611
|
+
const requestedLimit = Number(options.limit) > 0 ? Number(options.limit) : 1000;
|
|
612
|
+
const pageLimit = Math.min(1000, requestedLimit);
|
|
613
|
+
const params = new URLSearchParams({
|
|
614
|
+
sort: 'downloads',
|
|
615
|
+
direction: '-1',
|
|
616
|
+
limit: String(pageLimit),
|
|
617
|
+
full: 'true',
|
|
618
|
+
config: 'true'
|
|
619
|
+
});
|
|
620
|
+
if (options.query) params.set('search', options.query);
|
|
621
|
+
if (options.task) params.set('filter', options.task);
|
|
622
|
+
|
|
623
|
+
const models = [];
|
|
624
|
+
let url = `${HUGGING_FACE_MODEL_API}?${params.toString()}`;
|
|
625
|
+
while (url && models.length < requestedLimit) {
|
|
626
|
+
this.onProgress({ source: 'huggingface', message: `Fetching ${url}` });
|
|
627
|
+
const response = await this.fetchImpl(url, {
|
|
628
|
+
headers: { 'Accept': 'application/json' }
|
|
629
|
+
});
|
|
630
|
+
|
|
631
|
+
if (!response.ok) {
|
|
632
|
+
throw new Error(`Hugging Face request failed: HTTP ${response.status}`);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
const payload = await response.json();
|
|
636
|
+
const pageModels = toArray(payload);
|
|
637
|
+
models.push(...pageModels);
|
|
638
|
+
if (pageModels.length === 0) break;
|
|
639
|
+
url = models.length < requestedLimit ? extractNextLink(response.headers?.get?.('link')) : null;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return models
|
|
643
|
+
.slice(0, requestedLimit)
|
|
644
|
+
.map(normalizeHuggingFaceModel)
|
|
645
|
+
.filter(Boolean);
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
async collectGpt4All(options = {}) {
|
|
649
|
+
this.onProgress({ source: 'gpt4all', message: 'Fetching GPT4All metadata' });
|
|
650
|
+
const response = await this.fetchImpl(GPT4ALL_MODELS_URL, {
|
|
651
|
+
headers: { 'Accept': 'application/json' }
|
|
652
|
+
});
|
|
653
|
+
|
|
654
|
+
if (!response.ok) {
|
|
655
|
+
throw new Error(`GPT4All request failed: HTTP ${response.status}`);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
const payload = await response.json();
|
|
659
|
+
const entries = Array.isArray(payload) ? payload : (payload.models || []);
|
|
660
|
+
return entries
|
|
661
|
+
.slice(0, options.limit || entries.length)
|
|
662
|
+
.map(normalizeGpt4AllEntry)
|
|
663
|
+
.filter(Boolean);
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
collectOllamaFromDatabase(options = {}) {
|
|
667
|
+
const limit = Number(options.limit) > 0 ? Number(options.limit) : 1000;
|
|
668
|
+
const rows = this.database.all(`
|
|
669
|
+
SELECT
|
|
670
|
+
m.*,
|
|
671
|
+
v.tag,
|
|
672
|
+
v.params_b,
|
|
673
|
+
v.quant,
|
|
674
|
+
v.size_gb,
|
|
675
|
+
v.context_length,
|
|
676
|
+
v.input_types,
|
|
677
|
+
v.is_moe,
|
|
678
|
+
v.expert_count
|
|
679
|
+
FROM models m
|
|
680
|
+
JOIN variants v ON v.model_id = m.id
|
|
681
|
+
ORDER BY m.pulls DESC, v.params_b DESC, v.size_gb ASC
|
|
682
|
+
LIMIT ?
|
|
683
|
+
`, [limit]);
|
|
684
|
+
|
|
685
|
+
return rows.map((row) => {
|
|
686
|
+
const model = {
|
|
687
|
+
id: row.id,
|
|
688
|
+
name: row.name,
|
|
689
|
+
capabilities: row.capabilities,
|
|
690
|
+
namespace: row.namespace,
|
|
691
|
+
url: row.url,
|
|
692
|
+
pulls: row.pulls,
|
|
693
|
+
tags_count: row.tags_count,
|
|
694
|
+
last_updated: row.last_updated,
|
|
695
|
+
updated_at: row.updated_at
|
|
696
|
+
};
|
|
697
|
+
const variant = {
|
|
698
|
+
tag: row.tag,
|
|
699
|
+
params_b: row.params_b,
|
|
700
|
+
quant: row.quant,
|
|
701
|
+
size_gb: row.size_gb,
|
|
702
|
+
context_length: row.context_length,
|
|
703
|
+
input_types: row.input_types,
|
|
704
|
+
is_moe: row.is_moe,
|
|
705
|
+
expert_count: row.expert_count
|
|
706
|
+
};
|
|
707
|
+
return normalizeOllamaRows(model, variant);
|
|
708
|
+
});
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
storeCollections(collections) {
|
|
712
|
+
this.database.beginBatch();
|
|
713
|
+
try {
|
|
714
|
+
for (const collection of collections) {
|
|
715
|
+
if (collection.source) {
|
|
716
|
+
this.database.upsertRegistrySource({
|
|
717
|
+
...collection.source,
|
|
718
|
+
last_ingested_at: new Date().toISOString()
|
|
719
|
+
});
|
|
720
|
+
}
|
|
721
|
+
for (const repo of collection.repos || []) {
|
|
722
|
+
this.database.upsertRegistryRepo(repo);
|
|
723
|
+
}
|
|
724
|
+
for (const artifact of collection.artifacts || []) {
|
|
725
|
+
this.database.upsertModelArtifact(artifact);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
} finally {
|
|
729
|
+
this.database.endBatch();
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
summarizeCollections(collections, options = {}) {
|
|
734
|
+
const sources = new Set();
|
|
735
|
+
const repoIds = new Set();
|
|
736
|
+
let artifacts = 0;
|
|
737
|
+
for (const collection of collections) {
|
|
738
|
+
if (collection.source?.id) sources.add(collection.source.id);
|
|
739
|
+
for (const repo of collection.repos || []) repoIds.add(repo.id);
|
|
740
|
+
artifacts += (collection.artifacts || []).length;
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
return {
|
|
744
|
+
dryRun: Boolean(options.dryRun),
|
|
745
|
+
sources: sources.size,
|
|
746
|
+
repos: repoIds.size,
|
|
747
|
+
artifacts,
|
|
748
|
+
collections: collections.length
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
module.exports = {
|
|
754
|
+
RegistryIngestor,
|
|
755
|
+
SOURCE_DEFINITIONS,
|
|
756
|
+
normalizeHuggingFaceModel,
|
|
757
|
+
normalizeGpt4AllEntry,
|
|
758
|
+
normalizeOllamaRows,
|
|
759
|
+
inferFormat,
|
|
760
|
+
inferQuantization,
|
|
761
|
+
inferRuntimeSupport,
|
|
762
|
+
isModelArtifactFile,
|
|
763
|
+
parseParamsB,
|
|
764
|
+
buildHuggingFaceDownloadUrl
|
|
765
|
+
};
|