npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/servers/instance-sizer/lib/instance-ranker.js CHANGED Viewed

@@ -24,7 +24,7 @@ const GPU_MEMORY_MAP = {
     'NVIDIA H100': 80,
     'AWS Inferentia2': 32,
     'AWS Trainium': 32
-}
+};
 /**
  * Cost tier classification by instance family.
@@ -45,7 +45,7 @@ const COST_TIER_MAP = {
     'p5e': 'high',
     'p5en': 'high',
     'p6': 'high'
-}
+};
 /**
  * Relative cost weight by tier for sorting within TP groups.
@@ -55,7 +55,7 @@ const COST_TIER_WEIGHT = {
     'low': 1,
     'medium': 2,
     'high': 3
-}
+};
 /**
  * Generation weight by instance family.
@@ -77,13 +77,13 @@ const GENERATION_WEIGHT = {
     'p3': 6,
     'g4dn': 7,
     'g4ad': 7
-}
+};
 /**
  * TP overhead penalty: 10% per additional GPU beyond the first.
  * Effective VRAM = totalVram × (1 - 0.10 × (gpuCount - 1))
  */
-const TP_OVERHEAD_PER_GPU = 0.10
+const TP_OVERHEAD_PER_GPU = 0.10;
 // ── Helper Functions ─────────────────────────────────────────────────────────
@@ -101,33 +101,33 @@ const TP_OVERHEAD_PER_GPU = 0.10
 const getPerGpuMemoryGb = (instance) => {
     // 1. Direct field
     if (instance.gpuMemoryGb) {
-        return instance.gpuMemoryGb
+        return instance.gpuMemoryGb;
     }
     // 2. Parse from accelerator string
     if (instance.accelerator) {
         // Match patterns like "A10G 24GB", "4x A10G 96GB", "T4 16GB"
-        const totalMatch = instance.accelerator.match(/(\d+)GB/)
+        const totalMatch = instance.accelerator.match(/(\d+)GB/);
         if (totalMatch) {
-            const totalGb = parseInt(totalMatch[1], 10)
-            const gpuCount = instance.gpus || 1
+            const totalGb = parseInt(totalMatch[1], 10);
+            const gpuCount = instance.gpus || 1;
             // If the string has a multiplier prefix like "4x", the GB is total
-            const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/)
+            const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/);
             if (hasMultiplier) {
-                return totalGb / gpuCount
+                return totalGb / gpuCount;
             }
             // Single GPU entry — the GB value is per-GPU
-            return totalGb
+            return totalGb;
         }
     }
     // 3. Lookup by hardware type
     if (instance.hardware && GPU_MEMORY_MAP[instance.hardware]) {
-        return GPU_MEMORY_MAP[instance.hardware]
+        return GPU_MEMORY_MAP[instance.hardware];
     }
-    return null
-}
+    return null;
+};
 /**
  * Determine cost tier for an instance based on its family.
@@ -137,11 +137,11 @@ const getPerGpuMemoryGb = (instance) => {
  */
 const getCostTier = (instance) => {
     if (instance.costTier) {
-        return instance.costTier
+        return instance.costTier;
     }
-    const family = instance.family || ''
-    return COST_TIER_MAP[family] || 'medium'
-}
+    const family = instance.family || '';
+    return COST_TIER_MAP[family] || 'medium';
+};
 /**
  * Calculate effective VRAM available after TP overhead penalty.
@@ -158,11 +158,11 @@ const getCostTier = (instance) => {
  * @returns {number} Effective usable VRAM in GB
  */
 const effectiveVram = (totalVramGb, gpuCount) => {
-    if (gpuCount <= 1) return totalVramGb
-    const perGpuMemory = totalVramGb / gpuCount
-    const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1)
-    return totalVramGb - overhead
-}
+    if (gpuCount <= 1) return totalVramGb;
+    const perGpuMemory = totalVramGb / gpuCount;
+    const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1);
+    return totalVramGb - overhead;
+};
 // ── Main Function ────────────────────────────────────────────────────────────
@@ -177,33 +177,33 @@ const effectiveVram = (totalVramGb, gpuCount) => {
  * @returns {object[]} Ranked list of compatible instances
  */
 const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) => {
-    const { limit = 10, allowTensorParallelism = true } = options
+    const { limit = 10, allowTensorParallelism = true } = options;
     if (!vramRequired || vramRequired <= 0) {
-        return []
+        return [];
     }
     if (!instanceCatalog || typeof instanceCatalog !== 'object') {
-        return []
+        return [];
     }
-    const candidates = []
+    const candidates = [];
     for (const [instanceType, meta] of Object.entries(instanceCatalog)) {
         // Skip non-GPU instances
-        if (!meta.gpus || meta.gpus <= 0) continue
-        if (meta.category !== 'gpu') continue
+        if (!meta.gpus || meta.gpus <= 0) continue;
+        if (meta.category !== 'gpu') continue;
-        const perGpuMemory = getPerGpuMemoryGb(meta)
-        if (!perGpuMemory) continue
+        const perGpuMemory = getPerGpuMemoryGb(meta);
+        if (!perGpuMemory) continue;
-        const gpuCount = meta.gpus
-        const totalVramGb = perGpuMemory * gpuCount
+        const gpuCount = meta.gpus;
+        const totalVramGb = perGpuMemory * gpuCount;
         // Determine if model fits on a single GPU
         if (gpuCount === 1) {
             if (perGpuMemory >= vramRequired) {
-                const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100)
+                const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100);
                 candidates.push({
                     instanceType,
                     gpuCount,
@@ -212,13 +212,13 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
                     tensorParallelism: 1,
                     costTier: getCostTier(meta),
                     family: meta.family || ''
-                })
+                });
             }
         } else if (allowTensorParallelism) {
             // Multi-GPU: check if model fits with TP across all GPUs
-            const effectiveTotal = effectiveVram(totalVramGb, gpuCount)
+            const effectiveTotal = effectiveVram(totalVramGb, gpuCount);
             if (effectiveTotal >= vramRequired) {
-                const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100)
+                const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100);
                 candidates.push({
                     instanceType,
                     gpuCount,
@@ -227,7 +227,7 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
                     tensorParallelism: gpuCount,
                     costTier: getCostTier(meta),
                     family: meta.family || ''
-                })
+                });
             }
         }
     }
@@ -240,34 +240,34 @@ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) =>
     candidates.sort((a, b) => {
         // Primary: TP degree (lower is better)
         if (a.tensorParallelism !== b.tensorParallelism) {
-            return a.tensorParallelism - b.tensorParallelism
+            return a.tensorParallelism - b.tensorParallelism;
         }
         // Secondary: generation (newer is better — lower weight)
-        const genA = GENERATION_WEIGHT[a.family] || 4
-        const genB = GENERATION_WEIGHT[b.family] || 4
+        const genA = GENERATION_WEIGHT[a.family] || 4;
+        const genB = GENERATION_WEIGHT[b.family] || 4;
         if (genA !== genB) {
-            return genA - genB
+            return genA - genB;
         }
         // Tertiary: cost tier (lower is better)
-        const costA = COST_TIER_WEIGHT[a.costTier] || 2
-        const costB = COST_TIER_WEIGHT[b.costTier] || 2
+        const costA = COST_TIER_WEIGHT[a.costTier] || 2;
+        const costB = COST_TIER_WEIGHT[b.costTier] || 2;
         if (costA !== costB) {
-            return costA - costB
+            return costA - costB;
         }
         // Quaternary: prefer lower total VRAM (right-sized, less waste)
         if (a.totalVramGb !== b.totalVramGb) {
-            return a.totalVramGb - b.totalVramGb
+            return a.totalVramGb - b.totalVramGb;
         }
         // Final tiebreaker: instance type name for deterministic ordering
-        return a.instanceType.localeCompare(b.instanceType)
-    })
+        return a.instanceType.localeCompare(b.instanceType);
+    });
-    return candidates.slice(0, limit)
-}
+    return candidates.slice(0, limit);
+};
 // ── Availability Ranking ─────────────────────────────────────────────────────
@@ -279,7 +279,7 @@ const CAPACITY_TYPE_PRIORITY = {
     reserved: 0,
     ftp: 1,
     'on-demand': 2
-}
+};
 /**
  * Annotate, filter, and re-rank instance recommendations based on
@@ -305,40 +305,84 @@ const CAPACITY_TYPE_PRIORITY = {
  */
 const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) => {
     if (!recommendations || recommendations.length === 0) {
-        return []
+        return [];
     }
     // If all signals are null (all API calls failed), return unmodified
     if (!quotas && !reservations && !ftps) {
-        return recommendations
+        return recommendations;
     }
     // Annotate each recommendation with capacityType and quotaStatus
     for (const rec of recommendations) {
-        rec.capacityType = 'on-demand'
-        rec.quotaStatus = 'available'
+        rec.capacityType = 'on-demand';
+        rec.quotaStatus = 'available';
         if (reservations?.has(rec.instanceType)) {
-            rec.capacityType = 'reserved'
-            rec.reservationInfo = reservations.get(rec.instanceType)
-            rec.reservationType = 'training-plan'
+            rec.capacityType = 'reserved';
+            rec.reservationInfo = reservations.get(rec.instanceType);
+            rec.reservationType = 'training-plan';
         } else if (ftps?.has(rec.instanceType)) {
-            rec.capacityType = 'ftp'
-            rec.ftpInfo = ftps.get(rec.instanceType)
+            rec.capacityType = 'ftp';
+            rec.ftpInfo = ftps.get(rec.instanceType);
         }
         // quotaStatus applies to all instances regardless of capacityType
         if (quotas) {
-            const q = quotas.get(rec.instanceType)
+            const q = quotas.get(rec.instanceType);
             if (q && q.headroom === 0) {
-                rec.quotaStatus = 'zero-quota'
+                rec.quotaStatus = 'zero-quota';
             } else if (q && q.headroom < 2) {
-                rec.quotaStatus = 'limited'
+                rec.quotaStatus = 'limited';
             }
             if (q) {
-                rec.quotaHeadroom = q.headroom
-                rec.quotaDeployed = q.deployed
-                rec.quotaLimit = q.quota
+                rec.quotaHeadroom = q.headroom;
+                rec.quotaDeployed = q.deployed;
+                rec.quotaLimit = q.quota;
+            }
+        }
+    }
+    // Inject FTP/reserved instances that aren't already in the recommendation list.
+    // These instances may not be in the static catalog (e.g., ml.p6-b200.48xlarge)
+    // but are available via capacity reservation — always surface them.
+    const existingTypes = new Set(recommendations.map(r => r.instanceType));
+    if (reservations) {
+        for (const [instanceType, info] of reservations) {
+            if (!existingTypes.has(instanceType)) {
+                recommendations.push({
+                    instanceType,
+                    capacityType: 'reserved',
+                    reservationInfo: info,
+                    reservationType: 'training-plan',
+                    quotaStatus: 'available',
+                    gpuCount: null,
+                    totalVramGb: null,
+                    utilizationPercent: null,
+                    tensorParallelism: null,
+                    costTier: null,
+                    injectedFromReservation: true
+                });
+            }
+        }
+    }
+    if (ftps) {
+        for (const [instanceType, info] of ftps) {
+            if (!existingTypes.has(instanceType)) {
+                recommendations.push({
+                    instanceType,
+                    capacityType: 'ftp',
+                    ftpInfo: info,
+                    quotaStatus: 'available',
+                    gpuCount: null,
+                    totalVramGb: null,
+                    utilizationPercent: null,
+                    tensorParallelism: null,
+                    costTier: null,
+                    injectedFromFtp: true
+                });
             }
         }
     }
@@ -346,18 +390,18 @@ const applyAvailabilityRanking = (recommendations, quotas, reservations, ftps) =
     // Filter out zero-quota instances (but never filter reserved/FTP — you have the capacity)
     const filtered = recommendations.filter(r =>
         r.quotaStatus !== 'zero-quota' || r.capacityType === 'reserved' || r.capacityType === 'ftp'
-    )
+    );
     // Sort: reserved first, then FTP, then on-demand (preserve existing order within tier)
     filtered.sort((a, b) => {
-        const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2
-        const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2
-        if (pa !== pb) return pa - pb
-        return 0
-    })
+        const pa = CAPACITY_TYPE_PRIORITY[a.capacityType] ?? 2;
+        const pb = CAPACITY_TYPE_PRIORITY[b.capacityType] ?? 2;
+        if (pa !== pb) return pa - pb;
+        return 0;
+    });
-    return filtered
-}
+    return filtered;
+};
 export {
     filterAndRankInstances,
@@ -371,4 +415,4 @@ export {
     GENERATION_WEIGHT,
     CAPACITY_TYPE_PRIORITY,
     TP_OVERHEAD_PER_GPU
-}
+};

package/servers/instance-sizer/lib/model-resolver.js CHANGED Viewed

@@ -10,18 +10,18 @@
  * 3. If neither available, return null (caller handles fallback)
  */
-import { readFile } from 'node:fs/promises'
-import { fileURLToPath } from 'node:url'
-import { dirname, join } from 'node:path'
+import { readFile } from 'node:fs/promises';
+import { fileURLToPath } from 'node:url';
+import { dirname, join } from 'node:path';
 // ── Constants ────────────────────────────────────────────────────────────────
-const __filename = fileURLToPath(import.meta.url)
-const __dirname = dirname(__filename)
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
-const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json')
-const HUGGINGFACE_BASE_URL = 'https://huggingface.co'
-const HUGGINGFACE_TIMEOUT_MS = 5000
+const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json');
+const HUGGINGFACE_BASE_URL = 'https://huggingface.co';
+const HUGGINGFACE_TIMEOUT_MS = 5000;
 // ── Glob Pattern Matching ────────────────────────────────────────────────────
@@ -36,10 +36,10 @@ const HUGGINGFACE_TIMEOUT_MS = 5000
 const globMatch = (pattern, text) => {
     const regexStr = pattern
         .replace(/[.+^${}()|[\]\\]/g, '\\$&')
-        .replace(/\*/g, '.*')
-    const regex = new RegExp(`^${regexStr}$`, 'i')
-    return regex.test(text)
-}
+        .replace(/\*/g, '.*');
+    const regex = new RegExp(`^${regexStr}$`, 'i');
+    return regex.test(text);
+};
 // ── Catalog Lookup ───────────────────────────────────────────────────────────
@@ -51,12 +51,12 @@ const globMatch = (pattern, text) => {
  */
 const loadCatalog = async (catalogPath) => {
     try {
-        const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8')
-        return JSON.parse(raw)
+        const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8');
+        return JSON.parse(raw);
     } catch {
-        return null
+        return null;
     }
-}
+};
 /**
  * Look up a model in the catalog by exact match or glob pattern.
@@ -67,26 +67,26 @@ const loadCatalog = async (catalogPath) => {
  */
 const catalogLookup = (modelName, catalog) => {
     if (!catalog) {
-        return null
+        return null;
     }
     // Support both flat catalog (models.json) and wrapped format ({ models: {...} })
-    const models = catalog.models || catalog
+    const models = catalog.models || catalog;
     // Try exact match first
     if (models[modelName]) {
-        return models[modelName]
+        return models[modelName];
     }
     // Try glob pattern matching
     for (const pattern of Object.keys(models)) {
         if (globMatch(pattern, modelName)) {
-            return models[pattern]
+            return models[pattern];
         }
     }
-    return null
-}
+    return null;
+};
 // ── HuggingFace API ──────────────────────────────────────────────────────────
@@ -97,28 +97,28 @@ const catalogLookup = (modelName, catalog) => {
  * @returns {Promise<object|null>} Parsed config or null on failure
  */
 const fetchHuggingFaceConfig = async (modelName) => {
-    const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`
+    const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`;
     try {
-        const controller = new AbortController()
-        const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS)
+        const controller = new AbortController();
+        const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS);
         const response = await fetch(url, {
             signal: controller.signal,
             headers: { 'Accept': 'application/json' }
-        })
+        });
-        clearTimeout(timeout)
+        clearTimeout(timeout);
         if (!response.ok) {
-            return null
+            return null;
         }
-        return await response.json()
+        return await response.json();
     } catch {
-        return null
+        return null;
     }
-}
+};
 /**
  * Estimate parameter count from architecture dimensions.
@@ -133,15 +133,15 @@ const fetchHuggingFaceConfig = async (modelName) => {
  * @returns {number|null} Estimated parameter count or null if dimensions unavailable
  */
 const estimateParamsFromConfig = (config) => {
-    const hiddenSize = config.hidden_size
-    const numLayers = config.num_hidden_layers
+    const hiddenSize = config.hidden_size;
+    const numLayers = config.num_hidden_layers;
     if (!hiddenSize || !numLayers) {
-        return null
+        return null;
     }
-    return hiddenSize * numLayers * 12
-}
+    return hiddenSize * numLayers * 12;
+};
 /**
  * Extract model metadata from a HuggingFace config.json.
@@ -151,11 +151,11 @@ const estimateParamsFromConfig = (config) => {
  */
 const extractFromHuggingFaceConfig = (config) => {
     const parameterCount = config.num_parameters
-        ?? estimateParamsFromConfig(config)
+        ?? estimateParamsFromConfig(config);
-    const dtype = config.torch_dtype || 'float16'
-    const architecture = config.architectures?.[0] || 'unknown'
-    const maxPositionEmbeddings = config.max_position_embeddings || 4096
+    const dtype = config.torch_dtype || 'float16';
+    const architecture = config.architectures?.[0] || 'unknown';
+    const maxPositionEmbeddings = config.max_position_embeddings || 4096;
     return {
         parameterCount,
@@ -163,16 +163,16 @@ const extractFromHuggingFaceConfig = (config) => {
         architecture,
         maxPositionEmbeddings,
         source: 'huggingface_api'
-    }
-}
+    };
+};
 // ── In-memory cache for discover mode ────────────────────────────────────────
-const discoverCache = new Map()
+const discoverCache = new Map();
 // ── Protocol prefix detection ────────────────────────────────────────────────
-const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://']
+const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://'];
 /**
  * Check if a model name matches the HuggingFace org/model-name pattern.
@@ -182,13 +182,13 @@ const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registr
  * @returns {boolean} True if it matches the HuggingFace pattern
  */
 const isHuggingFacePattern = (modelName) => {
-    if (!modelName || typeof modelName !== 'string') return false
+    if (!modelName || typeof modelName !== 'string') return false;
     // Must not have a protocol prefix
-    if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false
+    if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false;
     // Must contain exactly one `/` (org/model-name)
-    const slashCount = (modelName.match(/\//g) || []).length
-    return slashCount === 1
-}
+    const slashCount = (modelName.match(/\//g) || []).length;
+    return slashCount === 1;
+};
 // ── Main Resolver ────────────────────────────────────────────────────────────
@@ -207,11 +207,11 @@ const isHuggingFacePattern = (modelName) => {
  * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
  */
 const resolveModelMetadata = async (modelName, options = {}) => {
-    const { discover = true, catalogPath } = options
+    const { discover = true, catalogPath } = options;
     // Tier 1: Catalog lookup
-    const catalog = await loadCatalog(catalogPath)
-    const catalogEntry = catalogLookup(modelName, catalog)
+    const catalog = await loadCatalog(catalogPath);
+    const catalogEntry = catalogLookup(modelName, catalog);
     if (catalogEntry) {
         // Only use catalog entry if it has a usable parameterCount for VRAM estimation.
@@ -223,7 +223,7 @@ const resolveModelMetadata = async (modelName, options = {}) => {
                 architecture: catalogEntry.architecture,
                 maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings,
                 source: 'catalog'
-            }
+            };
         }
     }
@@ -231,26 +231,26 @@ const resolveModelMetadata = async (modelName, options = {}) => {
     if (discover && isHuggingFacePattern(modelName)) {
         // Check in-memory cache first
         if (discoverCache.has(modelName)) {
-            return discoverCache.get(modelName)
+            return discoverCache.get(modelName);
         }
-        const config = await fetchHuggingFaceConfig(modelName)
+        const config = await fetchHuggingFaceConfig(modelName);
         if (config) {
-            const metadata = extractFromHuggingFaceConfig(config)
+            const metadata = extractFromHuggingFaceConfig(config);
             // Only return if we got a usable parameter count
             if (metadata.parameterCount) {
                 // Cache for session duration
-                discoverCache.set(modelName, metadata)
-                return metadata
+                discoverCache.set(modelName, metadata);
+                return metadata;
             }
         }
     }
     // Tier 3: No metadata available
-    return null
-}
+    return null;
+};
 export {
     resolveModelMetadata,
@@ -266,4 +266,4 @@ export {
     DEFAULT_CATALOG_PATH,
     HUGGINGFACE_BASE_URL,
     HUGGINGFACE_TIMEOUT_MS
-}
+};