npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/servers/instance-sizer/lib/quota-resolver.js CHANGED Viewed

@@ -11,20 +11,20 @@
  * All methods degrade gracefully — API failures return null and log to stderr.
  */
-import { ServiceQuotasClient, ListServiceQuotasCommand } from '@aws-sdk/client-service-quotas'
-import { SageMakerClient, ListEndpointsCommand, ListTrainingPlansCommand } from '@aws-sdk/client-sagemaker'
+import { ServiceQuotasClient, ListServiceQuotasCommand } from '@aws-sdk/client-service-quotas';
+import { SageMakerClient, ListEndpointsCommand, ListTrainingPlansCommand } from '@aws-sdk/client-sagemaker';
 // ── Constants ────────────────────────────────────────────────────────────────
-const SAGEMAKER_SERVICE_CODE = 'sagemaker'
-const DEFAULT_TIMEOUT_MS = 5000
-const DEFAULT_CACHE_TTL_MS = 300000 // 5 minutes
-const QUOTA_NAME_PATTERN = /^(ml\.[a-z0-9]+\.[a-z0-9]+) for endpoint usage$/
+const SAGEMAKER_SERVICE_CODE = 'sagemaker';
+const DEFAULT_TIMEOUT_MS = 5000;
+const DEFAULT_CACHE_TTL_MS = 300000; // 5 minutes
+const QUOTA_NAME_PATTERN = /^(ml\.[a-z0-9]+\.[a-z0-9]+) for endpoint usage$/;
 // ── Logging ──────────────────────────────────────────────────────────────────
 function log(message) {
-    process.stderr.write(`[quota-resolver] ${message}\n`)
+    process.stderr.write(`[quota-resolver] ${message}\n`);
 }
 // ── QuotaResolver Class ──────────────────────────────────────────────────────
@@ -37,20 +37,20 @@ class QuotaResolver {
      * @param {number} [options.cacheTtl=300000] - Cache TTL in ms (default 5 min)
      */
     constructor(region, options = {}) {
-        this.region = region
-        this.timeout = options.timeout || DEFAULT_TIMEOUT_MS
-        this.cacheTtl = options.cacheTtl || DEFAULT_CACHE_TTL_MS
-        this.cache = new Map()
+        this.region = region;
+        this.timeout = options.timeout || DEFAULT_TIMEOUT_MS;
+        this.cacheTtl = options.cacheTtl || DEFAULT_CACHE_TTL_MS;
+        this.cache = new Map();
         const clientConfig = {
             region: this.region,
             requestHandler: {
                 requestTimeout: this.timeout
             }
-        }
+        };
-        this.quotasClient = new ServiceQuotasClient(clientConfig)
-        this.sagemakerClient = new SageMakerClient(clientConfig)
+        this.quotasClient = new ServiceQuotasClient(clientConfig);
+        this.sagemakerClient = new SageMakerClient(clientConfig);
     }
     /**
@@ -59,13 +59,13 @@ class QuotaResolver {
      * @returns {*|null} Cached value or null
      */
     _getCached(key) {
-        const entry = this.cache.get(key)
-        if (!entry) return null
+        const entry = this.cache.get(key);
+        if (!entry) return null;
         if (Date.now() - entry.timestamp > this.cacheTtl) {
-            this.cache.delete(key)
-            return null
+            this.cache.delete(key);
+            return null;
         }
-        return entry.value
+        return entry.value;
     }
     /**
@@ -74,7 +74,7 @@ class QuotaResolver {
      * @param {*} value - Value to cache
      */
     _setCache(key, value) {
-        this.cache.set(key, { value, timestamp: Date.now() })
+        this.cache.set(key, { value, timestamp: Date.now() });
     }
     /**
@@ -85,8 +85,8 @@ class QuotaResolver {
      * @returns {string|null} Instance type or null if pattern doesn't match
      */
     _parseQuotaName(quotaName) {
-        const match = quotaName.match(QUOTA_NAME_PATTERN)
-        return match ? match[1] : null
+        const match = quotaName.match(QUOTA_NAME_PATTERN);
+        return match ? match[1] : null;
     }
     /**
@@ -100,50 +100,50 @@ class QuotaResolver {
      * @returns {Promise<Map|null>} Map: instanceType → { quota, deployed, headroom }, or null on failure
      */
     async getQuotaHeadroom(instanceTypes) {
-        const cacheKey = 'quotaHeadroom'
-        const cached = this._getCached(cacheKey)
-        if (cached) return cached
+        const cacheKey = 'quotaHeadroom';
+        const cached = this._getCached(cacheKey);
+        if (cached) return cached;
         try {
             const [quotaMap, deployedMap] = await Promise.allSettled([
                 this._fetchServiceQuotas(),
                 this._fetchDeployedCounts()
-            ])
+            ]);
-            const quotas = quotaMap.status === 'fulfilled' ? quotaMap.value : null
-            const deployed = deployedMap.status === 'fulfilled' ? deployedMap.value : null
+            const quotas = quotaMap.status === 'fulfilled' ? quotaMap.value : null;
+            const deployed = deployedMap.status === 'fulfilled' ? deployedMap.value : null;
             if (!quotas) {
-                return null
+                return null;
             }
-            const result = new Map()
-            const deployedCounts = deployed || new Map()
+            const result = new Map();
+            const deployedCounts = deployed || new Map();
             for (const instanceType of instanceTypes) {
-                const quota = quotas.get(instanceType)
-                if (quota != null) {
-                    const deployedCount = deployedCounts.get(instanceType) || 0
-                    const headroom = quota - deployedCount
+                const quota = quotas.get(instanceType);
+                if (quota !== null && quota !== undefined) {
+                    const deployedCount = deployedCounts.get(instanceType) || 0;
+                    const headroom = quota - deployedCount;
                     result.set(instanceType, {
                         quota,
                         deployed: deployedCount,
                         headroom
-                    })
+                    });
                 }
             }
-            this._setCache(cacheKey, result)
-            return result
+            this._setCache(cacheKey, result);
+            return result;
         } catch (err) {
             if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
-                log(`AccessDenied: insufficient permissions for quota queries — skipping`)
+                log('AccessDenied: insufficient permissions for quota queries — skipping');
             } else if (err.name === 'ThrottlingException' || err.Code === 'ThrottlingException') {
-                log(`Throttled: Service Quotas API rate limit hit — skipping`)
+                log('Throttled: Service Quotas API rate limit hit — skipping');
             } else {
-                log(`Failed to get quota headroom: ${err.message}`)
+                log(`Failed to get quota headroom: ${err.message}`);
             }
-            return null
+            return null;
         }
     }
@@ -154,28 +154,28 @@ class QuotaResolver {
      * @returns {Promise<Map>} Map: instanceType → quota limit (number)
      */
     async _fetchServiceQuotas() {
-        const quotaMap = new Map()
-        let nextToken = undefined
+        const quotaMap = new Map();
+        let nextToken = undefined;
         do {
             const command = new ListServiceQuotasCommand({
                 ServiceCode: SAGEMAKER_SERVICE_CODE,
                 ...(nextToken && { NextToken: nextToken })
-            })
+            });
-            const response = await this.quotasClient.send(command)
+            const response = await this.quotasClient.send(command);
             for (const quota of (response.Quotas || [])) {
-                const instanceType = this._parseQuotaName(quota.QuotaName || '')
-                if (instanceType && quota.Value != null) {
-                    quotaMap.set(instanceType, quota.Value)
+                const instanceType = this._parseQuotaName(quota.QuotaName || '');
+                if (instanceType && quota.Value !== null && quota.Value !== undefined) {
+                    quotaMap.set(instanceType, quota.Value);
                 }
             }
-            nextToken = response.NextToken
-        } while (nextToken)
+            nextToken = response.NextToken;
+        } while (nextToken);
-        return quotaMap
+        return quotaMap;
     }
     /**
@@ -185,16 +185,16 @@ class QuotaResolver {
      * @returns {Promise<Map>} Map: instanceType → deployed count
      */
     async _fetchDeployedCounts() {
-        const deployedMap = new Map()
-        let nextToken = undefined
+        const deployedMap = new Map();
+        let nextToken = undefined;
         do {
             const command = new ListEndpointsCommand({
                 StatusEquals: 'InService',
                 ...(nextToken && { NextToken: nextToken })
-            })
+            });
-            const response = await this.sagemakerClient.send(command)
+            const response = await this.sagemakerClient.send(command);
             for (const endpoint of (response.Endpoints || [])) {
                 // ListEndpoints returns endpoint summaries; instance type info
@@ -208,18 +208,18 @@ class QuotaResolver {
                 if (endpoint.ProductionVariants) {
                     for (const variant of endpoint.ProductionVariants) {
                         if (variant.InstanceType) {
-                            const current = deployedMap.get(variant.InstanceType) || 0
-                            const count = variant.CurrentInstanceCount || 1
-                            deployedMap.set(variant.InstanceType, current + count)
+                            const current = deployedMap.get(variant.InstanceType) || 0;
+                            const count = variant.CurrentInstanceCount || 1;
+                            deployedMap.set(variant.InstanceType, current + count);
                         }
                     }
                 }
             }
-            nextToken = response.NextToken
-        } while (nextToken)
+            nextToken = response.NextToken;
+        } while (nextToken);
-        return deployedMap
+        return deployedMap;
     }
     /**
@@ -234,46 +234,46 @@ class QuotaResolver {
      * @returns {Promise<Map|null>} Map: instanceType → { planName, planArn, remainingCapacity, startDate, endDate }, or null on failure
      */
     async getCapacityReservations() {
-        const cacheKey = 'capacityReservations'
-        const cached = this._getCached(cacheKey)
-        if (cached) return cached
+        const cacheKey = 'capacityReservations';
+        const cached = this._getCached(cacheKey);
+        if (cached) return cached;
         try {
-            const result = new Map()
-            let nextToken = undefined
+            const result = new Map();
+            let nextToken = undefined;
             do {
                 const command = new ListTrainingPlansCommand({
                     StatusEquals: 'Active',
                     ...(nextToken && { NextToken: nextToken })
-                })
+                });
-                const response = await this.sagemakerClient.send(command)
-                const now = new Date()
+                const response = await this.sagemakerClient.send(command);
+                const now = new Date();
                 for (const plan of (response.TrainingPlanSummaries || [])) {
                     // Only include plans targeting inference endpoints
-                    const targetResources = plan.TargetResources || []
-                    if (!targetResources.includes('endpoint')) continue
+                    const targetResources = plan.TargetResources || [];
+                    if (!targetResources.includes('endpoint')) continue;
-                    const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType
-                    if (!instanceType) continue
+                    const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType;
+                    if (!instanceType) continue;
-                    const planArn = plan.TrainingPlanArn
-                    const planName = plan.TrainingPlanName || 'unknown'
+                    const planArn = plan.TrainingPlanArn;
+                    const planName = plan.TrainingPlanName || 'unknown';
                     const remainingCapacity = plan.AvailableInstanceCount
                         ?? plan.RemainingCapacity
                         ?? plan.TotalInstanceCount
-                        ?? 0
-                    const startDate = plan.StartTime || null
-                    const endDate = plan.EndTime || plan.ExpirationTime || null
+                        ?? 0;
+                    const startDate = plan.StartTime || null;
+                    const endDate = plan.EndTime || plan.ExpirationTime || null;
                     // Skip plans outside their time window
-                    if (startDate && new Date(startDate) > now) continue
-                    if (endDate && new Date(endDate) < now) continue
+                    if (startDate && new Date(startDate) > now) continue;
+                    if (endDate && new Date(endDate) < now) continue;
                     // Only include if there's remaining capacity
-                    if (remainingCapacity <= 0) continue
+                    if (remainingCapacity <= 0) continue;
                     result.set(instanceType, {
                         planName,
@@ -282,25 +282,25 @@ class QuotaResolver {
                         count: remainingCapacity,
                         startDate: startDate ? (startDate instanceof Date ? startDate.toISOString() : startDate) : null,
                         endDate: endDate ? (endDate instanceof Date ? endDate.toISOString() : endDate) : null
-                    })
+                    });
                 }
-                nextToken = response.NextToken
-            } while (nextToken)
+                nextToken = response.NextToken;
+            } while (nextToken);
-            this._setCache(cacheKey, result)
-            return result
+            this._setCache(cacheKey, result);
+            return result;
         } catch (err) {
             if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
-                log(`AccessDenied: insufficient permissions for training plan queries — skipping`)
+                log('AccessDenied: insufficient permissions for training plan queries — skipping');
             } else if (err.name === 'ValidationException') {
-                log(`ListTrainingPlans not available in region ${this.region} — skipping`)
+                log(`ListTrainingPlans not available in region ${this.region} — skipping`);
             } else if (err.name === 'ThrottlingException' || err.Code === 'ThrottlingException') {
-                log(`Throttled: ListTrainingPlans rate limit hit — skipping`)
+                log('Throttled: ListTrainingPlans rate limit hit — skipping');
             } else {
-                log(`Failed to get capacity reservations: ${err.message}`)
+                log(`Failed to get capacity reservations: ${err.message}`);
             }
-            return null
+            return null;
         }
     }
@@ -313,56 +313,56 @@ class QuotaResolver {
      * @returns {Promise<Map|null>} Map: instanceType → { planName, remainingCapacity, expiresAt }, or null on failure
      */
     async getTrainingPlans() {
-        const cacheKey = 'trainingPlans'
-        const cached = this._getCached(cacheKey)
-        if (cached) return cached
+        const cacheKey = 'trainingPlans';
+        const cached = this._getCached(cacheKey);
+        if (cached) return cached;
         try {
-            const result = new Map()
-            let nextToken = undefined
+            const result = new Map();
+            let nextToken = undefined;
             do {
                 const command = new ListTrainingPlansCommand({
                     StatusEquals: 'Active',
                     ...(nextToken && { NextToken: nextToken })
-                })
+                });
-                const response = await this.sagemakerClient.send(command)
+                const response = await this.sagemakerClient.send(command);
                 for (const plan of (response.TrainingPlanSummaries || [])) {
-                    const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType
-                    const planName = plan.TrainingPlanName || plan.TrainingPlanArn || 'unknown'
+                    const instanceType = plan.InstanceType || plan.ReservedCapacityInstanceType;
+                    const planName = plan.TrainingPlanName || plan.TrainingPlanArn || 'unknown';
                     const remainingCapacity = plan.AvailableInstanceCount
                         ?? plan.RemainingCapacity
                         ?? plan.TotalInstanceCount
-                        ?? 0
-                    const expiresAt = plan.EndTime || plan.ExpirationTime || null
+                        ?? 0;
+                    const expiresAt = plan.EndTime || plan.ExpirationTime || null;
                     if (instanceType && remainingCapacity > 0) {
                         result.set(instanceType, {
                             planName,
                             remainingCapacity,
                             expiresAt
-                        })
+                        });
                     }
                 }
-                nextToken = response.NextToken
-            } while (nextToken)
+                nextToken = response.NextToken;
+            } while (nextToken);
-            this._setCache(cacheKey, result)
-            return result
+            this._setCache(cacheKey, result);
+            return result;
         } catch (err) {
             if (err.name === 'AccessDeniedException' || err.Code === 'AccessDeniedException') {
-                log(`AccessDenied: insufficient permissions for training plan queries — skipping`)
+                log('AccessDenied: insufficient permissions for training plan queries — skipping');
             } else if (err.name === 'ValidationException') {
-                log(`ListTrainingPlans not available in region ${this.region} — skipping`)
+                log(`ListTrainingPlans not available in region ${this.region} — skipping`);
             } else {
-                log(`Failed to get training plans: ${err.message}`)
+                log(`Failed to get training plans: ${err.message}`);
             }
-            return null
+            return null;
         }
     }
 }
-export { QuotaResolver, QUOTA_NAME_PATTERN, SAGEMAKER_SERVICE_CODE, DEFAULT_TIMEOUT_MS, DEFAULT_CACHE_TTL_MS }
+export { QuotaResolver, QUOTA_NAME_PATTERN, SAGEMAKER_SERVICE_CODE, DEFAULT_TIMEOUT_MS, DEFAULT_CACHE_TTL_MS };

package/servers/instance-sizer/lib/vram-estimator.js CHANGED Viewed

@@ -17,20 +17,20 @@ const BYTES_PER_PARAM = {
     bfloat16: 2.0,
     int8: 1.0,
     int4: 0.5
-}
+};
 const QUANTIZATION_BYTES = {
     'awq': 0.5,
     'gptq': 0.5,
     'bnb-4bit': 0.5,
     'bnb-8bit': 1.0
-}
+};
-const BYTES_IN_GB = 1024 ** 3
+const BYTES_IN_GB = 1024 ** 3;
-const DEFAULT_MAX_SEQUENCE_LENGTH = 4096
-const DEFAULT_BATCH_SIZE = 1
-const OVERHEAD_FACTOR = 0.1
+const DEFAULT_MAX_SEQUENCE_LENGTH = 4096;
+const DEFAULT_BATCH_SIZE = 1;
+const OVERHEAD_FACTOR = 0.1;
 // ── Helper Functions ─────────────────────────────────────────────────────────
@@ -44,10 +44,10 @@ const OVERHEAD_FACTOR = 0.1
  */
 const bytesPerParam = (dtype, quantization) => {
     if (quantization && QUANTIZATION_BYTES[quantization] !== undefined) {
-        return QUANTIZATION_BYTES[quantization]
+        return QUANTIZATION_BYTES[quantization];
     }
-    return BYTES_PER_PARAM[dtype] ?? BYTES_PER_PARAM.float16
-}
+    return BYTES_PER_PARAM[dtype] ?? BYTES_PER_PARAM.float16;
+};
 /**
  * Estimate KV cache memory usage.
@@ -66,16 +66,16 @@ const bytesPerParam = (dtype, quantization) => {
  * @returns {number} Estimated KV cache size in bytes
  */
 const estimateKvCache = (parameterCount, maxSequenceLength, batchSize) => {
-    const seqLength = maxSequenceLength ?? DEFAULT_MAX_SEQUENCE_LENGTH
-    const batch = batchSize ?? DEFAULT_BATCH_SIZE
+    const seqLength = maxSequenceLength ?? DEFAULT_MAX_SEQUENCE_LENGTH;
+    const batch = batchSize ?? DEFAULT_BATCH_SIZE;
     // Heuristic: KV cache ≈ parameterCount × (seqLength / 4096) × batch × 0.05 bytes
     // This gives ~5% of raw param count in bytes at default seq length and batch=1
     // For 7B params: 7e9 × 0.05 = 350MB at seq=4096, batch=1
     // Scales linearly with sequence length and batch size
-    const kvBytes = parameterCount * (seqLength / DEFAULT_MAX_SEQUENCE_LENGTH) * batch * 0.05
-    return kvBytes
-}
+    const kvBytes = parameterCount * (seqLength / DEFAULT_MAX_SEQUENCE_LENGTH) * batch * 0.05;
+    return kvBytes;
+};
 // ── Main Estimation Function ─────────────────────────────────────────────────
@@ -97,28 +97,28 @@ const estimateVram = (modelInfo) => {
         quantization,
         maxSequenceLength,
         batchSize
-    } = modelInfo
+    } = modelInfo;
     // Determine confidence based on what was explicitly provided
-    const confidence = determineConfidence(modelInfo)
+    const confidence = determineConfidence(modelInfo);
     // Calculate base weight bytes
-    const bpp = bytesPerParam(dtype, quantization)
-    const baseWeightBytes = parameterCount * bpp
+    const bpp = bytesPerParam(dtype, quantization);
+    const baseWeightBytes = parameterCount * bpp;
     // Calculate KV cache
     const kvCacheBytes = estimateKvCache(
         parameterCount,
         maxSequenceLength ?? DEFAULT_MAX_SEQUENCE_LENGTH,
         batchSize ?? DEFAULT_BATCH_SIZE
-    )
+    );
     // Calculate overhead (framework/CUDA)
-    const overheadBytes = baseWeightBytes * OVERHEAD_FACTOR
+    const overheadBytes = baseWeightBytes * OVERHEAD_FACTOR;
     // Total VRAM
-    const totalVramBytes = baseWeightBytes + kvCacheBytes + overheadBytes
-    const vramGb = totalVramBytes / BYTES_IN_GB
+    const totalVramBytes = baseWeightBytes + kvCacheBytes + overheadBytes;
+    const vramGb = totalVramBytes / BYTES_IN_GB;
     return {
         vramGb,
@@ -129,8 +129,8 @@ const estimateVram = (modelInfo) => {
         },
         confidence,
         source: 'estimate'
-    }
-}
+    };
+};
 /**
  * Determine confidence level based on which parameters were explicitly provided.
@@ -143,25 +143,25 @@ const estimateVram = (modelInfo) => {
  * @returns {'high' | 'medium' | 'low'}
  */
 const determineConfidence = (modelInfo) => {
-    const { parameterCount, dtype, maxSequenceLength, batchSize } = modelInfo
+    const { parameterCount, dtype, maxSequenceLength, batchSize } = modelInfo;
     if (!parameterCount || !dtype) {
-        return 'low'
+        return 'low';
     }
     // If dtype is not in our known list, confidence drops
     if (!BYTES_PER_PARAM[dtype]) {
-        return 'low'
+        return 'low';
     }
     // All key params explicitly provided
     if (maxSequenceLength !== undefined && batchSize !== undefined) {
-        return 'high'
+        return 'high';
     }
     // Core params present but some optional ones use defaults
-    return 'medium'
-}
+    return 'medium';
+};
 export {
     estimateVram,
@@ -174,4 +174,4 @@ export {
     DEFAULT_BATCH_SIZE,
     OVERHEAD_FACTOR,
     BYTES_IN_GB
-}
+};