@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
|
@@ -24,7 +24,7 @@ const GPU_MEMORY_MAP = {
|
|
|
24
24
|
'NVIDIA H100': 80,
|
|
25
25
|
'AWS Inferentia2': 32,
|
|
26
26
|
'AWS Trainium': 32
|
|
27
|
-
}
|
|
27
|
+
};
|
|
28
28
|
|
|
29
29
|
/**
|
|
30
30
|
* Cost tier classification by instance family.
|
|
@@ -45,7 +45,7 @@ const COST_TIER_MAP = {
|
|
|
45
45
|
'p5e': 'high',
|
|
46
46
|
'p5en': 'high',
|
|
47
47
|
'p6': 'high'
|
|
48
|
-
}
|
|
48
|
+
};
|
|
49
49
|
|
|
50
50
|
/**
|
|
51
51
|
* Relative cost weight by tier for sorting within TP groups.
|
|
@@ -55,7 +55,7 @@ const COST_TIER_WEIGHT = {
|
|
|
55
55
|
'low': 1,
|
|
56
56
|
'medium': 2,
|
|
57
57
|
'high': 3
|
|
58
|
-
}
|
|
58
|
+
};
|
|
59
59
|
|
|
60
60
|
/**
|
|
61
61
|
* Generation weight by instance family.
|
|
@@ -77,13 +77,13 @@ const GENERATION_WEIGHT = {
|
|
|
77
77
|
'p3': 6,
|
|
78
78
|
'g4dn': 7,
|
|
79
79
|
'g4ad': 7
|
|
80
|
-
}
|
|
80
|
+
};
|
|
81
81
|
|
|
82
82
|
/**
|
|
83
83
|
* TP overhead penalty: 10% per additional GPU beyond the first.
|
|
84
84
|
* Effective VRAM = totalVram × (1 - 0.10 × (gpuCount - 1))
|
|
85
85
|
*/
|
|
86
|
-
const TP_OVERHEAD_PER_GPU = 0.10
|
|
86
|
+
const TP_OVERHEAD_PER_GPU = 0.10;
|
|
87
87
|
|
|
88
88
|
// ── Helper Functions ─────────────────────────────────────────────────────────
|
|
89
89
|
|
|
@@ -101,33 +101,33 @@ const TP_OVERHEAD_PER_GPU = 0.10
|
|
|
101
101
|
const getPerGpuMemoryGb = (instance) => {
|
|
102
102
|
// 1. Direct field
|
|
103
103
|
if (instance.gpuMemoryGb) {
|
|
104
|
-
return instance.gpuMemoryGb
|
|
104
|
+
return instance.gpuMemoryGb;
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
// 2. Parse from accelerator string
|
|
108
108
|
if (instance.accelerator) {
|
|
109
109
|
// Match patterns like "A10G 24GB", "4x A10G 96GB", "T4 16GB"
|
|
110
|
-
const totalMatch = instance.accelerator.match(/(\d+)GB/)
|
|
110
|
+
const totalMatch = instance.accelerator.match(/(\d+)GB/);
|
|
111
111
|
if (totalMatch) {
|
|
112
|
-
const totalGb = parseInt(totalMatch[1], 10)
|
|
113
|
-
const gpuCount = instance.gpus || 1
|
|
112
|
+
const totalGb = parseInt(totalMatch[1], 10);
|
|
113
|
+
const gpuCount = instance.gpus || 1;
|
|
114
114
|
// If the string has a multiplier prefix like "4x", the GB is total
|
|
115
|
-
const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/)
|
|
115
|
+
const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/);
|
|
116
116
|
if (hasMultiplier) {
|
|
117
|
-
return totalGb / gpuCount
|
|
117
|
+
return totalGb / gpuCount;
|
|
118
118
|
}
|
|
119
119
|
// Single GPU entry — the GB value is per-GPU
|
|
120
|
-
return totalGb
|
|
120
|
+
return totalGb;
|
|
121
121
|
}
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
// 3. Lookup by hardware type
|
|
125
125
|
if (instance.hardware && GPU_MEMORY_MAP[instance.hardware]) {
|
|
126
|
-
return GPU_MEMORY_MAP[instance.hardware]
|
|
126
|
+
return GPU_MEMORY_MAP[instance.hardware];
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
-
return null
|
|
130
|
-
}
|
|
129
|
+
return null;
|
|
130
|
+
};
|
|
131
131
|
|
|
132
132
|
/**
|
|
133
133
|
* Determine cost tier for an instance based on its family.
|
|
@@ -137,11 +137,11 @@ const getPerGpuMemoryGb = (instance) => {
|
|
|
137
137
|
*/
|
|
138
138
|
const getCostTier = (instance) => {
|
|
139
139
|
if (instance.costTier) {
|
|
140
|
-
return instance.costTier
|
|
140
|
+
return instance.costTier;
|
|
141
141
|
}
|
|
142
|
-
const family = instance.family || ''
|
|
143
|
-
return COST_TIER_MAP[family] || 'medium'
|
|
144
|
-
}
|
|
142
|
+
const family = instance.family || '';
|
|
143
|
+
return COST_TIER_MAP[family] || 'medium';
|
|
144
|
+
};
|
|
145
145
|
|
|
146
146
|
/**
|
|
147
147
|
* Calculate effective VRAM available after TP overhead penalty.
|
|
@@ -158,11 +158,11 @@ const getCostTier = (instance) => {
|
|
|
158
158
|
* @returns {number} Effective usable VRAM in GB
|
|
159
159
|
*/
|
|
160
160
|
const effectiveVram = (totalVramGb, gpuCount) => {
|
|
161
|
-
if (gpuCount <= 1) return totalVramGb
|
|
162
|
-
const perGpuMemory = totalVramGb / gpuCount
|
|
163
|
-
const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1)
|
|
164
|
-
return totalVramGb - overhead
|
|
165
|
-
}
|
|
161
|
+
if (gpuCount <= 1) return totalVramGb;
|
|
162
|
+
const perGpuMemory = totalVramGb / gpuCount;
|
|
163
|
+
const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1);
|
|
164
|
+
return totalVramGb - overhead;
|
|
165
|
+
};
|
|
166
166
|
|
|
167
167
|
// ── Main Function ────────────────────────────────────────────────────────────
|
|
168
168
|
|
|
@@ -177,33 +177,33 @@ const effectiveVram = (totalVramGb, gpuCount) => {
|
|
|
177
177
|
* @returns {object[]} Ranked list of compatible instances
|
|
178
178
|
*/
|
|
179
179
|
const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) => {
|
|
180
|
-
const { limit = 10, allowTensorParallelism = true } = options
|
|
180
|
+
const { limit = 10, allowTensorParallelism = true } = options;
|
|
181
181
|
|
|
182
182
|
if (!vramRequired || vramRequired <= 0) {
|
|
183
|
-
return []
|
|
183
|
+
return [];
|
|
184
184
|
}
|
|
185
185
|
|
|
186
186
|
if (!instanceCatalog || typeof instanceCatalog !== 'object') {
|
|
187
|
-
return []
|
|
187
|
+
return [];
|
|
188
188
|
}
|
|
189
189
|
|
|
190
|
-
const candidates = []
|
|
190
|
+
const candidates = [];
|
|
191
191
|
|
|
192
192
|
for (const [instanceType, meta] of Object.entries(instanceCatalog)) {
|
|
193
193
|
// Skip non-GPU instances
|
|
194
|
-
if (!meta.gpus || meta.gpus <= 0) continue
|
|
195
|
-
if (meta.category !== 'gpu') continue
|
|
194
|
+
if (!meta.gpus || meta.gpus <= 0) continue;
|
|
195
|
+
if (meta.category !== 'gpu') continue;
|
|
196
196
|
|
|
197
|
-
const perGpuMemory = getPerGpuMemoryGb(meta)
|
|
198
|
-
if (!perGpuMemory) continue
|
|
197
|
+
const perGpuMemory = getPerGpuMemoryGb(meta);
|
|
198
|
+
if (!perGpuMemory) continue;
|
|
199
199
|
|
|
200
|
-
const gpuCount = meta.gpus
|
|
201
|
-
const totalVramGb = perGpuMemory * gpuCount
|
|
200
|
+
const gpuCount = meta.gpus;
|
|
201
|
+
const totalVramGb = perGpuMemory * gpuCount;
|
|
202
202
|
|
|
203
203
|
// Determine if model fits on a single GPU
|
|
204
204
|
if (gpuCount === 1) {
|
|
205
205
|
if (perGpuMemory >= vramRequired) {
|
|
206
|
-
const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100)
|
|
206
|
+
const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100);
|
|
207
207
|
candidates.push({
|
|
208
208
|
instanceType,
|
|
209
209
|
gpuCount,
|
|
@@ -212,13 +212,13 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
|
|
|
212
212
|
tensorParallelism: 1,
|
|
213
213
|
costTier: getCostTier(meta),
|
|
214
214
|
family: meta.family || ''
|
|
215
|
-
})
|
|
215
|
+
});
|
|
216
216
|
}
|
|
217
217
|
} else if (allowTensorParallelism) {
|
|
218
218
|
// Multi-GPU: check if model fits with TP across all GPUs
|
|
219
|
-
const effectiveTotal = effectiveVram(totalVramGb, gpuCount)
|
|
219
|
+
const effectiveTotal = effectiveVram(totalVramGb, gpuCount);
|
|
220
220
|
if (effectiveTotal >= vramRequired) {
|
|
221
|
-
const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100)
|
|
221
|
+
const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100);
|
|
222
222
|
candidates.push({
|
|
223
223
|
instanceType,
|
|
224
224
|
gpuCount,
|
|
@@ -227,7 +227,7 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
|
|
|
227
227
|
tensorParallelism: gpuCount,
|
|
228
228
|
costTier: getCostTier(meta),
|
|
229
229
|
family: meta.family || ''
|
|
230
|
-
})
|
|
230
|
+
});
|
|
231
231
|
}
|
|
232
232
|
}
|
|
233
233
|
}
|
|
@@ -240,34 +240,34 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
|
|
|
240
240
|
candidates.sort((a, b) => {
|
|
241
241
|
// Primary: TP degree (lower is better)
|
|
242
242
|
if (a.tensorParallelism !== b.tensorParallelism) {
|
|
243
|
-
return a.tensorParallelism - b.tensorParallelism
|
|
243
|
+
return a.tensorParallelism - b.tensorParallelism;
|
|
244
244
|
}
|
|
245
245
|
|
|
246
246
|
// Secondary: generation (newer is better — lower weight)
|
|
247
|
-
const genA = GENERATION_WEIGHT[a.family] || 4
|
|
248
|
-
const genB = GENERATION_WEIGHT[b.family] || 4
|
|
247
|
+
const genA = GENERATION_WEIGHT[a.family] || 4;
|
|
248
|
+
const genB = GENERATION_WEIGHT[b.family] || 4;
|
|
249
249
|
if (genA !== genB) {
|
|
250
|
-
return genA - genB
|
|
250
|
+
return genA - genB;
|
|
251
251
|
}
|
|
252
252
|
|
|
253
253
|
// Tertiary: cost tier (lower is better)
|
|
254
|
-
const costA = COST_TIER_WEIGHT[a.costTier] || 2
|
|
255
|
-
const costB = COST_TIER_WEIGHT[b.costTier] || 2
|
|
254
|
+
const costA = COST_TIER_WEIGHT[a.costTier] || 2;
|
|
255
|
+
const costB = COST_TIER_WEIGHT[b.costTier] || 2;
|
|
256
256
|
if (costA !== costB) {
|
|
257
|
-
return costA - costB
|
|
257
|
+
return costA - costB;
|
|
258
258
|
}
|
|
259
259
|
|
|
260
260
|
// Quaternary: prefer lower total VRAM (right-sized, less waste)
|
|
261
261
|
if (a.totalVramGb !== b.totalVramGb) {
|
|
262
|
-
return a.totalVramGb - b.totalVramGb
|
|
262
|
+
return a.totalVramGb - b.totalVramGb;
|
|
263
263
|
}
|
|
264
264
|
|
|
265
265
|
// Final tiebreaker: instance type name for deterministic ordering
|
|
266
|
-
return a.instanceType.localeCompare(b.instanceType)
|
|
267
|
-
})
|
|
266
|
+
return a.instanceType.localeCompare(b.instanceType);
|
|
267
|
+
});
|
|
268
268
|
|
|
269
|
-
return candidates.slice(0, limit)
|
|
270
|
-
}
|
|
269
|
+
return candidates.slice(0, limit);
|
|
270
|
+
};
|
|
271
271
|
|
|
272
272
|
// ── Availability Ranking ─────────────────────────────────────────────────────
|
|
273
273
|
|
|
@@ -279,7 +279,7 @@ const CAPACITY_TYPE_PRIORITY = {
|
|
|
279
279
|
reserved: 0,
|
|
280
280
|
ftp: 1,
|
|
281
281
|
'on-demand': 2
|
|
282
|
-
}
|
|
282
|
+
};
|
|
283
283
|
|
|
284
284
|
/**
|
|
285
285
|
* Annotate, filter, and re-rank instance recommendations based on
|
|
@@ -305,40 +305,40 @@ const CAPACITY_TYPE_PRIORITY = {
|
|
|
305
305
|
*/
|
|
306
306
|
const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) => {
|
|
307
307
|
if (!recommendations || recommendations.length === 0) {
|
|
308
|
-
return []
|
|
308
|
+
return [];
|
|
309
309
|
}
|
|
310
310
|
|
|
311
311
|
// If all signals are null (all API calls failed), return unmodified
|
|
312
312
|
if (!quotas && !reservations && !ftps) {
|
|
313
|
-
return recommendations
|
|
313
|
+
return recommendations;
|
|
314
314
|
}
|
|
315
315
|
|
|
316
316
|
// Annotate each recommendation with capacityType and quotaStatus
|
|
317
317
|
for (const rec of recommendations) {
|
|
318
|
-
rec.capacityType = 'on-demand'
|
|
319
|
-
rec.quotaStatus = 'available'
|
|
318
|
+
rec.capacityType = 'on-demand';
|
|
319
|
+
rec.quotaStatus = 'available';
|
|
320
320
|
|
|
321
321
|
if (reservations?.has(rec.instanceType)) {
|
|
322
|
-
rec.capacityType = 'reserved'
|
|
323
|
-
rec.reservationInfo = reservations.get(rec.instanceType)
|
|
324
|
-
rec.reservationType = 'training-plan'
|
|
322
|
+
rec.capacityType = 'reserved';
|
|
323
|
+
rec.reservationInfo = reservations.get(rec.instanceType);
|
|
324
|
+
rec.reservationType = 'training-plan';
|
|
325
325
|
} else if (ftps?.has(rec.instanceType)) {
|
|
326
|
-
rec.capacityType = 'ftp'
|
|
327
|
-
rec.ftpInfo = ftps.get(rec.instanceType)
|
|
326
|
+
rec.capacityType = 'ftp';
|
|
327
|
+
rec.ftpInfo = ftps.get(rec.instanceType);
|
|
328
328
|
}
|
|
329
329
|
|
|
330
330
|
// quotaStatus applies to all instances regardless of capacityType
|
|
331
331
|
if (quotas) {
|
|
332
|
-
const q = quotas.get(rec.instanceType)
|
|
332
|
+
const q = quotas.get(rec.instanceType);
|
|
333
333
|
if (q && q.headroom === 0) {
|
|
334
|
-
rec.quotaStatus = 'zero-quota'
|
|
334
|
+
rec.quotaStatus = 'zero-quota';
|
|
335
335
|
} else if (q && q.headroom < 2) {
|
|
336
|
-
rec.quotaStatus = 'limited'
|
|
336
|
+
rec.quotaStatus = 'limited';
|
|
337
337
|
}
|
|
338
338
|
if (q) {
|
|
339
|
-
rec.quotaHeadroom = q.headroom
|
|
340
|
-
rec.quotaDeployed = q.deployed
|
|
341
|
-
rec.quotaLimit = q.quota
|
|
339
|
+
rec.quotaHeadroom = q.headroom;
|
|
340
|
+
rec.quotaDeployed = q.deployed;
|
|
341
|
+
rec.quotaLimit = q.quota;
|
|
342
342
|
}
|
|
343
343
|
}
|
|
344
344
|
}
|
|
@@ -346,18 +346,18 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
|
|
|
346
346
|
// Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
|
|
347
347
|
const filtered = recommendations.filter(r =>
|
|
348
348
|
r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
|
|
349
|
-
)
|
|
349
|
+
);
|
|
350
350
|
|
|
351
351
|
// Sort: reserved first, then FTP, then on-demand (preserve existing order within tier)
|
|
352
352
|
filtered.sort((a, b) => {
|
|
353
|
-
const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2
|
|
354
|
-
const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2
|
|
355
|
-
if (pa !== pb) return pa - pb
|
|
356
|
-
return 0
|
|
357
|
-
})
|
|
353
|
+
const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2;
|
|
354
|
+
const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2;
|
|
355
|
+
if (pa !== pb) return pa - pb;
|
|
356
|
+
return 0;
|
|
357
|
+
});
|
|
358
358
|
|
|
359
|
-
return filtered
|
|
360
|
-
}
|
|
359
|
+
return filtered;
|
|
360
|
+
};
|
|
361
361
|
|
|
362
362
|
export {
|
|
363
363
|
filterAndRankInstances,
|
|
@@ -371,4 +371,4 @@ export {
|
|
|
371
371
|
GENERATION_WEIGHT,
|
|
372
372
|
CAPACITY_TYPE_PRIORITY,
|
|
373
373
|
TP_OVERHEAD_PER_GPU
|
|
374
|
-
}
|
|
374
|
+
};
|
|
@@ -10,18 +10,18 @@
|
|
|
10
10
|
* 3. If neither available, return null (caller handles fallback)
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import { readFile } from 'node:fs/promises'
|
|
14
|
-
import { fileURLToPath } from 'node:url'
|
|
15
|
-
import { dirname, join } from 'node:path'
|
|
13
|
+
import { readFile } from 'node:fs/promises';
|
|
14
|
+
import { fileURLToPath } from 'node:url';
|
|
15
|
+
import { dirname, join } from 'node:path';
|
|
16
16
|
|
|
17
17
|
// ── Constants ────────────────────────────────────────────────────────────────
|
|
18
18
|
|
|
19
|
-
const __filename = fileURLToPath(import.meta.url)
|
|
20
|
-
const __dirname = dirname(__filename)
|
|
19
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
20
|
+
const __dirname = dirname(__filename);
|
|
21
21
|
|
|
22
|
-
const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json')
|
|
23
|
-
const HUGGINGFACE_BASE_URL = 'https://huggingface.co'
|
|
24
|
-
const HUGGINGFACE_TIMEOUT_MS = 5000
|
|
22
|
+
const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json');
|
|
23
|
+
const HUGGINGFACE_BASE_URL = 'https://huggingface.co';
|
|
24
|
+
const HUGGINGFACE_TIMEOUT_MS = 5000;
|
|
25
25
|
|
|
26
26
|
// ── Glob Pattern Matching ────────────────────────────────────────────────────
|
|
27
27
|
|
|
@@ -36,10 +36,10 @@ const HUGGINGFACE_TIMEOUT_MS = 5000
|
|
|
36
36
|
const globMatch = (pattern, text) => {
|
|
37
37
|
const regexStr = pattern
|
|
38
38
|
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
39
|
-
.replace(/\*/g, '.*')
|
|
40
|
-
const regex = new RegExp(`^${regexStr}$`, 'i')
|
|
41
|
-
return regex.test(text)
|
|
42
|
-
}
|
|
39
|
+
.replace(/\*/g, '.*');
|
|
40
|
+
const regex = new RegExp(`^${regexStr}$`, 'i');
|
|
41
|
+
return regex.test(text);
|
|
42
|
+
};
|
|
43
43
|
|
|
44
44
|
// ── Catalog Lookup ───────────────────────────────────────────────────────────
|
|
45
45
|
|
|
@@ -51,12 +51,12 @@ const globMatch = (pattern, text) => {
|
|
|
51
51
|
*/
|
|
52
52
|
const loadCatalog = async (catalogPath) => {
|
|
53
53
|
try {
|
|
54
|
-
const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8')
|
|
55
|
-
return JSON.parse(raw)
|
|
54
|
+
const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8');
|
|
55
|
+
return JSON.parse(raw);
|
|
56
56
|
} catch {
|
|
57
|
-
return null
|
|
57
|
+
return null;
|
|
58
58
|
}
|
|
59
|
-
}
|
|
59
|
+
};
|
|
60
60
|
|
|
61
61
|
/**
|
|
62
62
|
* Look up a model in the catalog by exact match or glob pattern.
|
|
@@ -67,26 +67,26 @@ const loadCatalog = async (catalogPath) => {
|
|
|
67
67
|
*/
|
|
68
68
|
const catalogLookup = (modelName, catalog) => {
|
|
69
69
|
if (!catalog) {
|
|
70
|
-
return null
|
|
70
|
+
return null;
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
// Support both flat catalog (models.json) and wrapped format ({ models: {...} })
|
|
74
|
-
const models = catalog.models || catalog
|
|
74
|
+
const models = catalog.models || catalog;
|
|
75
75
|
|
|
76
76
|
// Try exact match first
|
|
77
77
|
if (models[modelName]) {
|
|
78
|
-
return models[modelName]
|
|
78
|
+
return models[modelName];
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
// Try glob pattern matching
|
|
82
82
|
for (const pattern of Object.keys(models)) {
|
|
83
83
|
if (globMatch(pattern, modelName)) {
|
|
84
|
-
return models[pattern]
|
|
84
|
+
return models[pattern];
|
|
85
85
|
}
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
return null
|
|
89
|
-
}
|
|
88
|
+
return null;
|
|
89
|
+
};
|
|
90
90
|
|
|
91
91
|
// ── HuggingFace API ──────────────────────────────────────────────────────────
|
|
92
92
|
|
|
@@ -97,28 +97,28 @@ const catalogLookup = (modelName, catalog) => {
|
|
|
97
97
|
* @returns {Promise<object|null>} Parsed config or null on failure
|
|
98
98
|
*/
|
|
99
99
|
const fetchHuggingFaceConfig = async (modelName) => {
|
|
100
|
-
const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json
|
|
100
|
+
const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`;
|
|
101
101
|
|
|
102
102
|
try {
|
|
103
|
-
const controller = new AbortController()
|
|
104
|
-
const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS)
|
|
103
|
+
const controller = new AbortController();
|
|
104
|
+
const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS);
|
|
105
105
|
|
|
106
106
|
const response = await fetch(url, {
|
|
107
107
|
signal: controller.signal,
|
|
108
108
|
headers: { 'Accept': 'application/json' }
|
|
109
|
-
})
|
|
109
|
+
});
|
|
110
110
|
|
|
111
|
-
clearTimeout(timeout)
|
|
111
|
+
clearTimeout(timeout);
|
|
112
112
|
|
|
113
113
|
if (!response.ok) {
|
|
114
|
-
return null
|
|
114
|
+
return null;
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
-
return await response.json()
|
|
117
|
+
return await response.json();
|
|
118
118
|
} catch {
|
|
119
|
-
return null
|
|
119
|
+
return null;
|
|
120
120
|
}
|
|
121
|
-
}
|
|
121
|
+
};
|
|
122
122
|
|
|
123
123
|
/**
|
|
124
124
|
* Estimate parameter count from architecture dimensions.
|
|
@@ -133,15 +133,15 @@ const fetchHuggingFaceConfig = async (modelName) => {
|
|
|
133
133
|
* @returns {number|null} Estimated parameter count or null if dimensions unavailable
|
|
134
134
|
*/
|
|
135
135
|
const estimateParamsFromConfig = (config) => {
|
|
136
|
-
const hiddenSize = config.hidden_size
|
|
137
|
-
const numLayers = config.num_hidden_layers
|
|
136
|
+
const hiddenSize = config.hidden_size;
|
|
137
|
+
const numLayers = config.num_hidden_layers;
|
|
138
138
|
|
|
139
139
|
if (!hiddenSize || !numLayers) {
|
|
140
|
-
return null
|
|
140
|
+
return null;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
-
return hiddenSize * numLayers * 12
|
|
144
|
-
}
|
|
143
|
+
return hiddenSize * numLayers * 12;
|
|
144
|
+
};
|
|
145
145
|
|
|
146
146
|
/**
|
|
147
147
|
* Extract model metadata from a HuggingFace config.json.
|
|
@@ -151,11 +151,11 @@ const estimateParamsFromConfig = (config) => {
|
|
|
151
151
|
*/
|
|
152
152
|
const extractFromHuggingFaceConfig = (config) => {
|
|
153
153
|
const parameterCount = config.num_parameters
|
|
154
|
-
?? estimateParamsFromConfig(config)
|
|
154
|
+
?? estimateParamsFromConfig(config);
|
|
155
155
|
|
|
156
|
-
const dtype = config.torch_dtype || 'float16'
|
|
157
|
-
const architecture = config.architectures?.[0] || 'unknown'
|
|
158
|
-
const maxPositionEmbeddings = config.max_position_embeddings || 4096
|
|
156
|
+
const dtype = config.torch_dtype || 'float16';
|
|
157
|
+
const architecture = config.architectures?.[0] || 'unknown';
|
|
158
|
+
const maxPositionEmbeddings = config.max_position_embeddings || 4096;
|
|
159
159
|
|
|
160
160
|
return {
|
|
161
161
|
parameterCount,
|
|
@@ -163,16 +163,16 @@ const extractFromHuggingFaceConfig = (config) => {
|
|
|
163
163
|
architecture,
|
|
164
164
|
maxPositionEmbeddings,
|
|
165
165
|
source: 'huggingface_api'
|
|
166
|
-
}
|
|
167
|
-
}
|
|
166
|
+
};
|
|
167
|
+
};
|
|
168
168
|
|
|
169
169
|
// ── In-memory cache for discover mode ────────────────────────────────────────
|
|
170
170
|
|
|
171
|
-
const discoverCache = new Map()
|
|
171
|
+
const discoverCache = new Map();
|
|
172
172
|
|
|
173
173
|
// ── Protocol prefix detection ────────────────────────────────────────────────
|
|
174
174
|
|
|
175
|
-
const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://']
|
|
175
|
+
const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://'];
|
|
176
176
|
|
|
177
177
|
/**
|
|
178
178
|
* Check if a model name matches the HuggingFace org/model-name pattern.
|
|
@@ -182,13 +182,13 @@ const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registr
|
|
|
182
182
|
* @returns {boolean} True if it matches the HuggingFace pattern
|
|
183
183
|
*/
|
|
184
184
|
const isHuggingFacePattern = (modelName) => {
|
|
185
|
-
if (!modelName || typeof modelName !== 'string') return false
|
|
185
|
+
if (!modelName || typeof modelName !== 'string') return false;
|
|
186
186
|
// Must not have a protocol prefix
|
|
187
|
-
if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false
|
|
187
|
+
if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false;
|
|
188
188
|
// Must contain exactly one `/` (org/model-name)
|
|
189
|
-
const slashCount = (modelName.match(/\//g) || []).length
|
|
190
|
-
return slashCount === 1
|
|
191
|
-
}
|
|
189
|
+
const slashCount = (modelName.match(/\//g) || []).length;
|
|
190
|
+
return slashCount === 1;
|
|
191
|
+
};
|
|
192
192
|
|
|
193
193
|
// ── Main Resolver ────────────────────────────────────────────────────────────
|
|
194
194
|
|
|
@@ -207,11 +207,11 @@ const isHuggingFacePattern = (modelName) => {
|
|
|
207
207
|
* @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
|
|
208
208
|
*/
|
|
209
209
|
const resolveModelMetadata = async (modelName, options = {}) => {
|
|
210
|
-
const { discover = true, catalogPath } = options
|
|
210
|
+
const { discover = true, catalogPath } = options;
|
|
211
211
|
|
|
212
212
|
// Tier 1: Catalog lookup
|
|
213
|
-
const catalog = await loadCatalog(catalogPath)
|
|
214
|
-
const catalogEntry = catalogLookup(modelName, catalog)
|
|
213
|
+
const catalog = await loadCatalog(catalogPath);
|
|
214
|
+
const catalogEntry = catalogLookup(modelName, catalog);
|
|
215
215
|
|
|
216
216
|
if (catalogEntry) {
|
|
217
217
|
// Only use catalog entry if it has a usable parameterCount for VRAM estimation.
|
|
@@ -223,7 +223,7 @@ const resolveModelMetadata = async (modelName, options = {}) => {
|
|
|
223
223
|
architecture: catalogEntry.architecture,
|
|
224
224
|
maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings,
|
|
225
225
|
source: 'catalog'
|
|
226
|
-
}
|
|
226
|
+
};
|
|
227
227
|
}
|
|
228
228
|
}
|
|
229
229
|
|
|
@@ -231,26 +231,26 @@ const resolveModelMetadata = async (modelName, options = {}) => {
|
|
|
231
231
|
if (discover && isHuggingFacePattern(modelName)) {
|
|
232
232
|
// Check in-memory cache first
|
|
233
233
|
if (discoverCache.has(modelName)) {
|
|
234
|
-
return discoverCache.get(modelName)
|
|
234
|
+
return discoverCache.get(modelName);
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
-
const config = await fetchHuggingFaceConfig(modelName)
|
|
237
|
+
const config = await fetchHuggingFaceConfig(modelName);
|
|
238
238
|
|
|
239
239
|
if (config) {
|
|
240
|
-
const metadata = extractFromHuggingFaceConfig(config)
|
|
240
|
+
const metadata = extractFromHuggingFaceConfig(config);
|
|
241
241
|
|
|
242
242
|
// Only return if we got a usable parameter count
|
|
243
243
|
if (metadata.parameterCount) {
|
|
244
244
|
// Cache for session duration
|
|
245
|
-
discoverCache.set(modelName, metadata)
|
|
246
|
-
return metadata
|
|
245
|
+
discoverCache.set(modelName, metadata);
|
|
246
|
+
return metadata;
|
|
247
247
|
}
|
|
248
248
|
}
|
|
249
249
|
}
|
|
250
250
|
|
|
251
251
|
// Tier 3: No metadata available
|
|
252
|
-
return null
|
|
253
|
-
}
|
|
252
|
+
return null;
|
|
253
|
+
};
|
|
254
254
|
|
|
255
255
|
export {
|
|
256
256
|
resolveModelMetadata,
|
|
@@ -266,4 +266,4 @@ export {
|
|
|
266
266
|
DEFAULT_CATALOG_PATH,
|
|
267
267
|
HUGGINGFACE_BASE_URL,
|
|
268
268
|
HUGGINGFACE_TIMEOUT_MS
|
|
269
|
-
}
|
|
269
|
+
};
|