@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -24,7 +24,7 @@ const GPU_MEMORY_MAP = {
24
24
  'NVIDIA H100': 80,
25
25
  'AWS Inferentia2': 32,
26
26
  'AWS Trainium': 32
27
- }
27
+ };
28
28
 
29
29
  /**
30
30
  * Cost tier classification by instance family.
@@ -45,7 +45,7 @@ const COST_TIER_MAP = {
45
45
  'p5e': 'high',
46
46
  'p5en': 'high',
47
47
  'p6': 'high'
48
- }
48
+ };
49
49
 
50
50
  /**
51
51
  * Relative cost weight by tier for sorting within TP groups.
@@ -55,7 +55,7 @@ const COST_TIER_WEIGHT = {
55
55
  'low': 1,
56
56
  'medium': 2,
57
57
  'high': 3
58
- }
58
+ };
59
59
 
60
60
  /**
61
61
  * Generation weight by instance family.
@@ -77,13 +77,13 @@ const GENERATION_WEIGHT = {
77
77
  'p3': 6,
78
78
  'g4dn': 7,
79
79
  'g4ad': 7
80
- }
80
+ };
81
81
 
82
82
  /**
83
83
  * TP overhead penalty: 10% per additional GPU beyond the first.
84
84
  * Effective VRAM = totalVram × (1 - 0.10 × (gpuCount - 1))
85
85
  */
86
- const TP_OVERHEAD_PER_GPU = 0.10
86
+ const TP_OVERHEAD_PER_GPU = 0.10;
87
87
 
88
88
  // ── Helper Functions ─────────────────────────────────────────────────────────
89
89
 
@@ -101,33 +101,33 @@ const TP_OVERHEAD_PER_GPU = 0.10
101
101
  const getPerGpuMemoryGb = (instance) => {
102
102
  // 1. Direct field
103
103
  if (instance.gpuMemoryGb) {
104
- return instance.gpuMemoryGb
104
+ return instance.gpuMemoryGb;
105
105
  }
106
106
 
107
107
  // 2. Parse from accelerator string
108
108
  if (instance.accelerator) {
109
109
  // Match patterns like "A10G 24GB", "4x A10G 96GB", "T4 16GB"
110
- const totalMatch = instance.accelerator.match(/(\d+)GB/)
110
+ const totalMatch = instance.accelerator.match(/(\d+)GB/);
111
111
  if (totalMatch) {
112
- const totalGb = parseInt(totalMatch[1], 10)
113
- const gpuCount = instance.gpus || 1
112
+ const totalGb = parseInt(totalMatch[1], 10);
113
+ const gpuCount = instance.gpus || 1;
114
114
  // If the string has a multiplier prefix like "4x", the GB is total
115
- const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/)
115
+ const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/);
116
116
  if (hasMultiplier) {
117
- return totalGb / gpuCount
117
+ return totalGb / gpuCount;
118
118
  }
119
119
  // Single GPU entry — the GB value is per-GPU
120
- return totalGb
120
+ return totalGb;
121
121
  }
122
122
  }
123
123
 
124
124
  // 3. Lookup by hardware type
125
125
  if (instance.hardware && GPU_MEMORY_MAP[instance.hardware]) {
126
- return GPU_MEMORY_MAP[instance.hardware]
126
+ return GPU_MEMORY_MAP[instance.hardware];
127
127
  }
128
128
 
129
- return null
130
- }
129
+ return null;
130
+ };
131
131
 
132
132
  /**
133
133
  * Determine cost tier for an instance based on its family.
@@ -137,11 +137,11 @@ const getPerGpuMemoryGb = (instance) => {
137
137
  */
138
138
  const getCostTier = (instance) => {
139
139
  if (instance.costTier) {
140
- return instance.costTier
140
+ return instance.costTier;
141
141
  }
142
- const family = instance.family || ''
143
- return COST_TIER_MAP[family] || 'medium'
144
- }
142
+ const family = instance.family || '';
143
+ return COST_TIER_MAP[family] || 'medium';
144
+ };
145
145
 
146
146
  /**
147
147
  * Calculate effective VRAM available after TP overhead penalty.
@@ -158,11 +158,11 @@ const getCostTier = (instance) => {
158
158
  * @returns {number} Effective usable VRAM in GB
159
159
  */
160
160
  const effectiveVram = (totalVramGb, gpuCount) => {
161
- if (gpuCount <= 1) return totalVramGb
162
- const perGpuMemory = totalVramGb / gpuCount
163
- const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1)
164
- return totalVramGb - overhead
165
- }
161
+ if (gpuCount <= 1) return totalVramGb;
162
+ const perGpuMemory = totalVramGb / gpuCount;
163
+ const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1);
164
+ return totalVramGb - overhead;
165
+ };
166
166
 
167
167
  // ── Main Function ────────────────────────────────────────────────────────────
168
168
 
@@ -177,33 +177,33 @@ const effectiveVram = (totalVramGb, gpuCount) => {
177
177
  * @returns {object[]} Ranked list of compatible instances
178
178
  */
179
179
  const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) => {
180
- const { limit = 10, allowTensorParallelism = true } = options
180
+ const { limit = 10, allowTensorParallelism = true } = options;
181
181
 
182
182
  if (!vramRequired || vramRequired <= 0) {
183
- return []
183
+ return [];
184
184
  }
185
185
 
186
186
  if (!instanceCatalog || typeof instanceCatalog !== 'object') {
187
- return []
187
+ return [];
188
188
  }
189
189
 
190
- const candidates = []
190
+ const candidates = [];
191
191
 
192
192
  for (const [instanceType, meta] of Object.entries(instanceCatalog)) {
193
193
  // Skip non-GPU instances
194
- if (!meta.gpus || meta.gpus <= 0) continue
195
- if (meta.category !== 'gpu') continue
194
+ if (!meta.gpus || meta.gpus <= 0) continue;
195
+ if (meta.category !== 'gpu') continue;
196
196
 
197
- const perGpuMemory = getPerGpuMemoryGb(meta)
198
- if (!perGpuMemory) continue
197
+ const perGpuMemory = getPerGpuMemoryGb(meta);
198
+ if (!perGpuMemory) continue;
199
199
 
200
- const gpuCount = meta.gpus
201
- const totalVramGb = perGpuMemory * gpuCount
200
+ const gpuCount = meta.gpus;
201
+ const totalVramGb = perGpuMemory * gpuCount;
202
202
 
203
203
  // Determine if model fits on a single GPU
204
204
  if (gpuCount === 1) {
205
205
  if (perGpuMemory >= vramRequired) {
206
- const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100)
206
+ const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100);
207
207
  candidates.push({
208
208
  instanceType,
209
209
  gpuCount,
@@ -212,13 +212,13 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
212
212
  tensorParallelism: 1,
213
213
  costTier: getCostTier(meta),
214
214
  family: meta.family || ''
215
- })
215
+ });
216
216
  }
217
217
  } else if (allowTensorParallelism) {
218
218
  // Multi-GPU: check if model fits with TP across all GPUs
219
- const effectiveTotal = effectiveVram(totalVramGb, gpuCount)
219
+ const effectiveTotal = effectiveVram(totalVramGb, gpuCount);
220
220
  if (effectiveTotal >= vramRequired) {
221
- const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100)
221
+ const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100);
222
222
  candidates.push({
223
223
  instanceType,
224
224
  gpuCount,
@@ -227,7 +227,7 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
227
227
  tensorParallelism: gpuCount,
228
228
  costTier: getCostTier(meta),
229
229
  family: meta.family || ''
230
- })
230
+ });
231
231
  }
232
232
  }
233
233
  }
@@ -240,34 +240,34 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
240
240
  candidates.sort((a, b) => {
241
241
  // Primary: TP degree (lower is better)
242
242
  if (a.tensorParallelism !== b.tensorParallelism) {
243
- return a.tensorParallelism - b.tensorParallelism
243
+ return a.tensorParallelism - b.tensorParallelism;
244
244
  }
245
245
 
246
246
  // Secondary: generation (newer is better — lower weight)
247
- const genA = GENERATION_WEIGHT[a.family] || 4
248
- const genB = GENERATION_WEIGHT[b.family] || 4
247
+ const genA = GENERATION_WEIGHT[a.family] || 4;
248
+ const genB = GENERATION_WEIGHT[b.family] || 4;
249
249
  if (genA !== genB) {
250
- return genA - genB
250
+ return genA - genB;
251
251
  }
252
252
 
253
253
  // Tertiary: cost tier (lower is better)
254
- const costA = COST_TIER_WEIGHT[a.costTier] || 2
255
- const costB = COST_TIER_WEIGHT[b.costTier] || 2
254
+ const costA = COST_TIER_WEIGHT[a.costTier] || 2;
255
+ const costB = COST_TIER_WEIGHT[b.costTier] || 2;
256
256
  if (costA !== costB) {
257
- return costA - costB
257
+ return costA - costB;
258
258
  }
259
259
 
260
260
  // Quaternary: prefer lower total VRAM (right-sized, less waste)
261
261
  if (a.totalVramGb !== b.totalVramGb) {
262
- return a.totalVramGb - b.totalVramGb
262
+ return a.totalVramGb - b.totalVramGb;
263
263
  }
264
264
 
265
265
  // Final tiebreaker: instance type name for deterministic ordering
266
- return a.instanceType.localeCompare(b.instanceType)
267
- })
266
+ return a.instanceType.localeCompare(b.instanceType);
267
+ });
268
268
 
269
- return candidates.slice(0, limit)
270
- }
269
+ return candidates.slice(0, limit);
270
+ };
271
271
 
272
272
  // ── Availability Ranking ─────────────────────────────────────────────────────
273
273
 
@@ -279,7 +279,7 @@ const CAPACITY_TYPE_PRIORITY = {
279
279
  reserved: 0,
280
280
  ftp: 1,
281
281
  'on-demand': 2
282
- }
282
+ };
283
283
 
284
284
  /**
285
285
  * Annotate, filter, and re-rank instance recommendations based on
@@ -305,40 +305,84 @@ const CAPACITY_TYPE_PRIORITY = {
305
305
  */
306
306
  const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) => {
307
307
  if (!recommendations || recommendations.length === 0) {
308
- return []
308
+ return [];
309
309
  }
310
310
 
311
311
  // If all signals are null (all API calls failed), return unmodified
312
312
  if (!quotas && !reservations && !ftps) {
313
- return recommendations
313
+ return recommendations;
314
314
  }
315
315
 
316
316
  // Annotate each recommendation with capacityType and quotaStatus
317
317
  for (const rec of recommendations) {
318
- rec.capacityType = 'on-demand'
319
- rec.quotaStatus = 'available'
318
+ rec.capacityType = 'on-demand';
319
+ rec.quotaStatus = 'available';
320
320
 
321
321
  if (reservations?.has(rec.instanceType)) {
322
- rec.capacityType = 'reserved'
323
- rec.reservationInfo = reservations.get(rec.instanceType)
324
- rec.reservationType = 'training-plan'
322
+ rec.capacityType = 'reserved';
323
+ rec.reservationInfo = reservations.get(rec.instanceType);
324
+ rec.reservationType = 'training-plan';
325
325
  } else if (ftps?.has(rec.instanceType)) {
326
- rec.capacityType = 'ftp'
327
- rec.ftpInfo = ftps.get(rec.instanceType)
326
+ rec.capacityType = 'ftp';
327
+ rec.ftpInfo = ftps.get(rec.instanceType);
328
328
  }
329
329
 
330
330
  // quotaStatus applies to all instances regardless of capacityType
331
331
  if (quotas) {
332
- const q = quotas.get(rec.instanceType)
332
+ const q = quotas.get(rec.instanceType);
333
333
  if (q && q.headroom === 0) {
334
- rec.quotaStatus = 'zero-quota'
334
+ rec.quotaStatus = 'zero-quota';
335
335
  } else if (q && q.headroom < 2) {
336
- rec.quotaStatus = 'limited'
336
+ rec.quotaStatus = 'limited';
337
337
  }
338
338
  if (q) {
339
- rec.quotaHeadroom = q.headroom
340
- rec.quotaDeployed = q.deployed
341
- rec.quotaLimit = q.quota
339
+ rec.quotaHeadroom = q.headroom;
340
+ rec.quotaDeployed = q.deployed;
341
+ rec.quotaLimit = q.quota;
342
+ }
343
+ }
344
+ }
345
+
346
+ // Inject FTP/reserved instances that aren't already in the recommendation list.
347
+ // These instances may not be in the static catalog (e.g., ml.p6-b200.48xlarge)
348
+ // but are available via capacity reservation — always surface them.
349
+ const existingTypes = new Set(recommendations.map(r => r.instanceType));
350
+
351
+ if (reservations) {
352
+ for (const [instanceType, info] of reservations) {
353
+ if (!existingTypes.has(instanceType)) {
354
+ recommendations.push({
355
+ instanceType,
356
+ capacityType: 'reserved',
357
+ reservationInfo: info,
358
+ reservationType: 'training-plan',
359
+ quotaStatus: 'available',
360
+ gpuCount: null,
361
+ totalVramGb: null,
362
+ utilizationPercent: null,
363
+ tensorParallelism: null,
364
+ costTier: null,
365
+ injectedFromReservation: true
366
+ });
367
+ }
368
+ }
369
+ }
370
+
371
+ if (ftps) {
372
+ for (const [instanceType, info] of ftps) {
373
+ if (!existingTypes.has(instanceType)) {
374
+ recommendations.push({
375
+ instanceType,
376
+ capacityType: 'ftp',
377
+ ftpInfo: info,
378
+ quotaStatus: 'available',
379
+ gpuCount: null,
380
+ totalVramGb: null,
381
+ utilizationPercent: null,
382
+ tensorParallelism: null,
383
+ costTier: null,
384
+ injectedFromFtp: true
385
+ });
342
386
  }
343
387
  }
344
388
  }
@@ -346,18 +390,18 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
346
390
  // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
347
391
  const filtered = recommendations.filter(r =>
348
392
  r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
349
- )
393
+ );
350
394
 
351
395
  // Sort: reserved first, then FTP, then on-demand (preserve existing order within tier)
352
396
  filtered.sort((a, b) => {
353
- const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2
354
- const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2
355
- if (pa !== pb) return pa - pb
356
- return 0
357
- })
397
+ const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2;
398
+ const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2;
399
+ if (pa !== pb) return pa - pb;
400
+ return 0;
401
+ });
358
402
 
359
- return filtered
360
- }
403
+ return filtered;
404
+ };
361
405
 
362
406
  export {
363
407
  filterAndRankInstances,
@@ -371,4 +415,4 @@ export {
371
415
  GENERATION_WEIGHT,
372
416
  CAPACITY_TYPE_PRIORITY,
373
417
  TP_OVERHEAD_PER_GPU
374
- }
418
+ };
@@ -10,18 +10,18 @@
10
10
  * 3. If neither available, return null (caller handles fallback)
11
11
  */
12
12
 
13
- import { readFile } from 'node:fs/promises'
14
- import { fileURLToPath } from 'node:url'
15
- import { dirname, join } from 'node:path'
13
+ import { readFile } from 'node:fs/promises';
14
+ import { fileURLToPath } from 'node:url';
15
+ import { dirname, join } from 'node:path';
16
16
 
17
17
  // ── Constants ────────────────────────────────────────────────────────────────
18
18
 
19
- const __filename = fileURLToPath(import.meta.url)
20
- const __dirname = dirname(__filename)
19
+ const __filename = fileURLToPath(import.meta.url);
20
+ const __dirname = dirname(__filename);
21
21
 
22
- const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json')
23
- const HUGGINGFACE_BASE_URL = 'https://huggingface.co'
24
- const HUGGINGFACE_TIMEOUT_MS = 5000
22
+ const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json');
23
+ const HUGGINGFACE_BASE_URL = 'https://huggingface.co';
24
+ const HUGGINGFACE_TIMEOUT_MS = 5000;
25
25
 
26
26
  // ── Glob Pattern Matching ────────────────────────────────────────────────────
27
27
 
@@ -36,10 +36,10 @@ const HUGGINGFACE_TIMEOUT_MS = 5000
36
36
  const globMatch = (pattern, text) => {
37
37
  const regexStr = pattern
38
38
  .replace(/[.+^${}()|[\]\\]/g, '\\$&')
39
- .replace(/\*/g, '.*')
40
- const regex = new RegExp(`^${regexStr}$`, 'i')
41
- return regex.test(text)
42
- }
39
+ .replace(/\*/g, '.*');
40
+ const regex = new RegExp(`^${regexStr}$`, 'i');
41
+ return regex.test(text);
42
+ };
43
43
 
44
44
  // ── Catalog Lookup ───────────────────────────────────────────────────────────
45
45
 
@@ -51,12 +51,12 @@ const globMatch = (pattern, text) => {
51
51
  */
52
52
  const loadCatalog = async (catalogPath) => {
53
53
  try {
54
- const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8')
55
- return JSON.parse(raw)
54
+ const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8');
55
+ return JSON.parse(raw);
56
56
  } catch {
57
- return null
57
+ return null;
58
58
  }
59
- }
59
+ };
60
60
 
61
61
  /**
62
62
  * Look up a model in the catalog by exact match or glob pattern.
@@ -67,26 +67,26 @@ const loadCatalog = async (catalogPath) => {
67
67
  */
68
68
  const catalogLookup = (modelName, catalog) => {
69
69
  if (!catalog) {
70
- return null
70
+ return null;
71
71
  }
72
72
 
73
73
  // Support both flat catalog (models.json) and wrapped format ({ models: {...} })
74
- const models = catalog.models || catalog
74
+ const models = catalog.models || catalog;
75
75
 
76
76
  // Try exact match first
77
77
  if (models[modelName]) {
78
- return models[modelName]
78
+ return models[modelName];
79
79
  }
80
80
 
81
81
  // Try glob pattern matching
82
82
  for (const pattern of Object.keys(models)) {
83
83
  if (globMatch(pattern, modelName)) {
84
- return models[pattern]
84
+ return models[pattern];
85
85
  }
86
86
  }
87
87
 
88
- return null
89
- }
88
+ return null;
89
+ };
90
90
 
91
91
  // ── HuggingFace API ──────────────────────────────────────────────────────────
92
92
 
@@ -97,28 +97,28 @@ const catalogLookup = (modelName, catalog) => {
97
97
  * @returns {Promise<object|null>} Parsed config or null on failure
98
98
  */
99
99
  const fetchHuggingFaceConfig = async (modelName) => {
100
- const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`
100
+ const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`;
101
101
 
102
102
  try {
103
- const controller = new AbortController()
104
- const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS)
103
+ const controller = new AbortController();
104
+ const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS);
105
105
 
106
106
  const response = await fetch(url, {
107
107
  signal: controller.signal,
108
108
  headers: { 'Accept': 'application/json' }
109
- })
109
+ });
110
110
 
111
- clearTimeout(timeout)
111
+ clearTimeout(timeout);
112
112
 
113
113
  if (!response.ok) {
114
- return null
114
+ return null;
115
115
  }
116
116
 
117
- return await response.json()
117
+ return await response.json();
118
118
  } catch {
119
- return null
119
+ return null;
120
120
  }
121
- }
121
+ };
122
122
 
123
123
  /**
124
124
  * Estimate parameter count from architecture dimensions.
@@ -133,15 +133,15 @@ const fetchHuggingFaceConfig = async (modelName) => {
133
133
  * @returns {number|null} Estimated parameter count or null if dimensions unavailable
134
134
  */
135
135
  const estimateParamsFromConfig = (config) => {
136
- const hiddenSize = config.hidden_size
137
- const numLayers = config.num_hidden_layers
136
+ const hiddenSize = config.hidden_size;
137
+ const numLayers = config.num_hidden_layers;
138
138
 
139
139
  if (!hiddenSize || !numLayers) {
140
- return null
140
+ return null;
141
141
  }
142
142
 
143
- return hiddenSize * numLayers * 12
144
- }
143
+ return hiddenSize * numLayers * 12;
144
+ };
145
145
 
146
146
  /**
147
147
  * Extract model metadata from a HuggingFace config.json.
@@ -151,11 +151,11 @@ const estimateParamsFromConfig = (config) => {
151
151
  */
152
152
  const extractFromHuggingFaceConfig = (config) => {
153
153
  const parameterCount = config.num_parameters
154
- ?? estimateParamsFromConfig(config)
154
+ ?? estimateParamsFromConfig(config);
155
155
 
156
- const dtype = config.torch_dtype || 'float16'
157
- const architecture = config.architectures?.[0] || 'unknown'
158
- const maxPositionEmbeddings = config.max_position_embeddings || 4096
156
+ const dtype = config.torch_dtype || 'float16';
157
+ const architecture = config.architectures?.[0] || 'unknown';
158
+ const maxPositionEmbeddings = config.max_position_embeddings || 4096;
159
159
 
160
160
  return {
161
161
  parameterCount,
@@ -163,16 +163,16 @@ const extractFromHuggingFaceConfig = (config) => {
163
163
  architecture,
164
164
  maxPositionEmbeddings,
165
165
  source: 'huggingface_api'
166
- }
167
- }
166
+ };
167
+ };
168
168
 
169
169
  // ── In-memory cache for discover mode ────────────────────────────────────────
170
170
 
171
- const discoverCache = new Map()
171
+ const discoverCache = new Map();
172
172
 
173
173
  // ── Protocol prefix detection ────────────────────────────────────────────────
174
174
 
175
- const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://']
175
+ const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://'];
176
176
 
177
177
  /**
178
178
  * Check if a model name matches the HuggingFace org/model-name pattern.
@@ -182,13 +182,13 @@ const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registr
182
182
  * @returns {boolean} True if it matches the HuggingFace pattern
183
183
  */
184
184
  const isHuggingFacePattern = (modelName) => {
185
- if (!modelName || typeof modelName !== 'string') return false
185
+ if (!modelName || typeof modelName !== 'string') return false;
186
186
  // Must not have a protocol prefix
187
- if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false
187
+ if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false;
188
188
  // Must contain exactly one `/` (org/model-name)
189
- const slashCount = (modelName.match(/\//g) || []).length
190
- return slashCount === 1
191
- }
189
+ const slashCount = (modelName.match(/\//g) || []).length;
190
+ return slashCount === 1;
191
+ };
192
192
 
193
193
  // ── Main Resolver ────────────────────────────────────────────────────────────
194
194
 
@@ -207,11 +207,11 @@ const isHuggingFacePattern = (modelName) => {
207
207
  * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
208
208
  */
209
209
  const resolveModelMetadata = async (modelName, options = {}) => {
210
- const { discover = true, catalogPath } = options
210
+ const { discover = true, catalogPath } = options;
211
211
 
212
212
  // Tier 1: Catalog lookup
213
- const catalog = await loadCatalog(catalogPath)
214
- const catalogEntry = catalogLookup(modelName, catalog)
213
+ const catalog = await loadCatalog(catalogPath);
214
+ const catalogEntry = catalogLookup(modelName, catalog);
215
215
 
216
216
  if (catalogEntry) {
217
217
  // Only use catalog entry if it has a usable parameterCount for VRAM estimation.
@@ -223,7 +223,7 @@ const resolveModelMetadata = async (modelName, options = {}) => {
223
223
  architecture: catalogEntry.architecture,
224
224
  maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings,
225
225
  source: 'catalog'
226
- }
226
+ };
227
227
  }
228
228
  }
229
229
 
@@ -231,26 +231,26 @@ const resolveModelMetadata = async (modelName, options = {}) => {
231
231
  if (discover && isHuggingFacePattern(modelName)) {
232
232
  // Check in-memory cache first
233
233
  if (discoverCache.has(modelName)) {
234
- return discoverCache.get(modelName)
234
+ return discoverCache.get(modelName);
235
235
  }
236
236
 
237
- const config = await fetchHuggingFaceConfig(modelName)
237
+ const config = await fetchHuggingFaceConfig(modelName);
238
238
 
239
239
  if (config) {
240
- const metadata = extractFromHuggingFaceConfig(config)
240
+ const metadata = extractFromHuggingFaceConfig(config);
241
241
 
242
242
  // Only return if we got a usable parameter count
243
243
  if (metadata.parameterCount) {
244
244
  // Cache for session duration
245
- discoverCache.set(modelName, metadata)
246
- return metadata
245
+ discoverCache.set(modelName, metadata);
246
+ return metadata;
247
247
  }
248
248
  }
249
249
  }
250
250
 
251
251
  // Tier 3: No metadata available
252
- return null
253
- }
252
+ return null;
253
+ };
254
254
 
255
255
  export {
256
256
  resolveModelMetadata,
@@ -266,4 +266,4 @@ export {
266
266
  DEFAULT_CATALOG_PATH,
267
267
  HUGGINGFACE_BASE_URL,
268
268
  HUGGINGFACE_TIMEOUT_MS
269
- }
269
+ };