npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/servers/instance-sizer/index.js CHANGED Viewed

@@ -18,43 +18,43 @@
  *   Returns: { values, choices, metadata }
  */
-import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
-import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
-import { z } from 'zod'
-import { readFileSync } from 'node:fs'
-import { fileURLToPath } from 'node:url'
-import { resolve, dirname } from 'node:path'
-import { resolveModelMetadata } from './lib/model-resolver.js'
-import { estimateVram } from './lib/vram-estimator.js'
-import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js'
-import { QuotaResolver } from './lib/quota-resolver.js'
-import { queryBedrock } from '../lib/bedrock-client.js'
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { z } from 'zod';
+import { readFileSync } from 'node:fs';
+import { fileURLToPath } from 'node:url';
+import { resolve, dirname } from 'node:path';
+import { resolveModelMetadata } from './lib/model-resolver.js';
+import { estimateVram } from './lib/vram-estimator.js';
+import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
+import { QuotaResolver } from './lib/quota-resolver.js';
+import { queryBedrock } from '../lib/bedrock-client.js';
 // ── Path setup ───────────────────────────────────────────────────────────────
-const __filename = fileURLToPath(import.meta.url)
-const __dirname = dirname(__filename)
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
 // ── Load instance catalog from shared lib ────────────────────────────────────
-let INSTANCE_CATALOG
+let INSTANCE_CATALOG;
 try {
-    const catalogPath = resolve(__dirname, '../lib/catalogs/instances.json')
-    const raw = readFileSync(catalogPath, 'utf8')
-    const data = JSON.parse(raw)
-    INSTANCE_CATALOG = data.catalog
+    const catalogPath = resolve(__dirname, '../lib/catalogs/instances.json');
+    const raw = readFileSync(catalogPath, 'utf8');
+    const data = JSON.parse(raw);
+    INSTANCE_CATALOG = data.catalog;
 } catch (err) {
-    process.stderr.write(`[instance-sizer] Fatal: Failed to load instance catalog: ${err.message}\n`)
-    process.exit(1)
+    process.stderr.write(`[instance-sizer] Fatal: Failed to load instance catalog: ${err.message}\n`);
+    process.exit(1);
 }
 // ── Mode configuration ───────────────────────────────────────────────────────
-const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
-const SMART_MODE = process.env.BEDROCK_SMART === 'true'
-const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
-const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
+const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover');
+const SMART_MODE = process.env.BEDROCK_SMART === 'true';
+const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0';
+const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1';
 // ── Bedrock server config ─────────────────────────────────────────────────────
@@ -95,7 +95,7 @@ Rules:
     maxTokens: 1024,
     modelId: BEDROCK_MODEL,
     region: BEDROCK_REGION
-}
+};
 // ── Logging ──────────────────────────────────────────────────────────────────
@@ -103,7 +103,7 @@ Rules:
  * Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
  */
 function log(message) {
-    process.stderr.write(`[instance-sizer] ${message}\n`)
+    process.stderr.write(`[instance-sizer] ${message}\n`);
 }
 // ── Tag-based search filtering ───────────────────────────────────────────────
@@ -119,76 +119,76 @@ function log(message) {
  * @returns {string[]} Matching instance type names, sorted by relevance
  */
 function searchInstancesByTag(search, instanceCatalog, options = {}) {
-    const { limit = 10 } = options
-    const candidates = Object.entries(instanceCatalog)
+    const { limit = 10 } = options;
+    const candidates = Object.entries(instanceCatalog);
     // Tokenize search into lowercase keywords
-    const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean)
+    const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean);
     // Detect compound terms
-    const rawLower = search.toLowerCase()
-    const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu')
+    const rawLower = search.toLowerCase();
+    const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu');
     // Detect CUDA version requests: "cuda 12", "cuda 11.8", "cuda-12.1"
-    const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/)
-    const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null
+    const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/);
+    const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null;
     // Score each instance
     const scored = candidates.map(([name, meta]) => {
-        let score = 0
-        const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : ''
-        const haystack = [...(meta.tags || []), (meta.accelerator || '').toLowerCase(), name, meta.category || '', cudaStr].join(' ')
+        let score = 0;
+        const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : '';
+        const haystack = [...(meta.tags || []), (meta.accelerator || '').toLowerCase(), name, meta.category || '', cudaStr].join(' ');
         // Compound term: multi-gpu
         if (wantsMultiGpu) {
             if (meta.gpus > 1) {
-                score += 5
+                score += 5;
             } else {
-                return { name, meta, score: 0 }
+                return { name, meta, score: 0 };
             }
         }
         // Compound term: cuda version
         if (wantsCudaVersion) {
-            if (!meta.cudaVersions) return { name, meta, score: 0 }
-            const hasExact = meta.cudaVersions.includes(wantsCudaVersion)
-            const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion))
+            if (!meta.cudaVersions) return { name, meta, score: 0 };
+            const hasExact = meta.cudaVersions.includes(wantsCudaVersion);
+            const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion));
             if (hasExact) {
-                score += 4
+                score += 4;
             } else if (hasMajor) {
-                score += 3
+                score += 3;
             } else {
-                return { name, meta, score: 0 }
+                return { name, meta, score: 0 };
             }
         }
         for (const token of tokens) {
-            if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue
-            if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue
+            if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue;
+            if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue;
-            if (haystack.includes(token)) score += 1
-            if (meta.gpus > 1 && token === 'parallel') score += 2
-            if (token === 'gpu' && meta.gpus > 0) score += 1
-            if (token === 'cpu' && meta.gpus === 0) score += 1
+            if (haystack.includes(token)) score += 1;
+            if (meta.gpus > 1 && token === 'parallel') score += 2;
+            if (token === 'gpu' && meta.gpus > 0) score += 1;
+            if (token === 'cpu' && meta.gpus === 0) score += 1;
             if (token === 'cheap' || token === 'budget' || token === 'cost') {
-                if ((meta.tags || []).includes('budget') || (meta.tags || []).includes('cost-effective')) score += 1
+                if ((meta.tags || []).includes('budget') || (meta.tags || []).includes('cost-effective')) score += 1;
             }
             if (token === 'memory' || token === 'high-memory') {
-                if (meta.memGb >= 32) score += 1
+                if (meta.memGb >= 32) score += 1;
             }
-            if (token === 'large' && meta.vcpus >= 16) score += 1
-            if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2
+            if (token === 'large' && meta.vcpus >= 16) score += 1;
+            if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2;
         }
-        return { name, meta, score }
-    })
+        return { name, meta, score };
+    });
-    const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score)
+    const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score);
     if (matched.length === 0) {
-        return []
+        return [];
     }
-    return matched.slice(0, limit).map(s => s.name)
+    return matched.slice(0, limit).map(s => s.name);
 }
 // ── CUDA version filtering ───────────────────────────────────────────────────
@@ -201,22 +201,22 @@ function searchInstancesByTag(search, instanceCatalog, options = {}) {
  * @returns {object} Filtered instance catalog
  */
 function filterByCudaVersion(instanceCatalog, requiredCuda) {
-    const majorRequired = requiredCuda.split('.')[0]
-    const filtered = {}
+    const majorRequired = requiredCuda.split('.')[0];
+    const filtered = {};
     for (const [name, meta] of Object.entries(instanceCatalog)) {
-        if (!meta.cudaVersions || meta.cudaVersions.length === 0) continue
+        if (!meta.cudaVersions || meta.cudaVersions.length === 0) continue;
         const hasCompatible = meta.cudaVersions.some(v => {
-            if (v === requiredCuda) return true
-            if (v.startsWith(majorRequired + '.')) return true
-            return false
-        })
+            if (v === requiredCuda) return true;
+            if (v.startsWith(`${majorRequired  }.`)) return true;
+            return false;
+        });
         if (hasCompatible) {
-            filtered[name] = meta
+            filtered[name] = meta;
         }
     }
-    return filtered
+    return filtered;
 }
 // ── Tool handler ─────────────────────────────────────────────────────────────
@@ -239,26 +239,26 @@ async function handleGetInstanceRecommendation(params) {
         cudaVersion,
         limit = 10,
         context
-    } = params
+    } = params;
     // Apply profile ENV overrides to sequence length and batch size
-    let effectiveMaxSeqLen = maxSequenceLength
-    let effectiveBatchSize = batchSize
+    let effectiveMaxSeqLen = maxSequenceLength;
+    let effectiveBatchSize = batchSize;
     if (context?.profileEnvVars) {
         if (context.profileEnvVars.VLLM_MAX_MODEL_LEN) {
-            effectiveMaxSeqLen = parseInt(context.profileEnvVars.VLLM_MAX_MODEL_LEN, 10) || effectiveMaxSeqLen
+            effectiveMaxSeqLen = parseInt(context.profileEnvVars.VLLM_MAX_MODEL_LEN, 10) || effectiveMaxSeqLen;
         }
         if (context.profileEnvVars.VLLM_MAX_NUM_SEQS) {
-            effectiveBatchSize = parseInt(context.profileEnvVars.VLLM_MAX_NUM_SEQS, 10) || effectiveBatchSize
+            effectiveBatchSize = parseInt(context.profileEnvVars.VLLM_MAX_NUM_SEQS, 10) || effectiveBatchSize;
         }
     }
     // Apply CUDA version filtering to instance catalog
-    let effectiveCatalog = INSTANCE_CATALOG
+    let effectiveCatalog = INSTANCE_CATALOG;
     if (cudaVersion) {
-        effectiveCatalog = filterByCudaVersion(INSTANCE_CATALOG, cudaVersion)
+        effectiveCatalog = filterByCudaVersion(INSTANCE_CATALOG, cudaVersion);
         if (Object.keys(effectiveCatalog).length === 0) {
-            log(`CUDA version ${cudaVersion} filter eliminated all instances`)
+            log(`CUDA version ${cudaVersion} filter eliminated all instances`);
             return {
                 content: [{
                     type: 'text',
@@ -272,13 +272,13 @@ async function handleGetInstanceRecommendation(params) {
                         }
                     })
                 }]
-            }
+            };
         }
     }
     // Mode: tag-based search only (no model name)
     if (!modelName && instanceSearch) {
-        const searchResults = searchInstancesByTag(instanceSearch, effectiveCatalog, { limit })
+        const searchResults = searchInstancesByTag(instanceSearch, effectiveCatalog, { limit });
         return {
             content: [{
                 type: 'text',
@@ -293,14 +293,14 @@ async function handleGetInstanceRecommendation(params) {
                     }
                 })
             }]
-        }
+        };
     }
     // Mode: no model name and no search — return all GPU instances
     if (!modelName) {
         const allGpuInstances = Object.keys(effectiveCatalog)
             .filter(key => effectiveCatalog[key].category === 'gpu')
-            .slice(0, limit)
+            .slice(0, limit);
         return {
             content: [{
@@ -316,120 +316,133 @@ async function handleGetInstanceRecommendation(params) {
                     }
                 })
             }]
-        }
+        };
     }
     // Step 1: Resolve model metadata
     const modelMetadata = await resolveModelMetadata(modelName, {
         discover: DISCOVER_MODE
-    })
+    });
     // If model metadata cannot be resolved, return all GPU instances unfiltered
     if (!modelMetadata) {
-        log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`)
-        const allGpuInstances = Object.keys(effectiveCatalog)
+        log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`);
+        let unfilteredRecs = Object.keys(effectiveCatalog)
             .filter(key => effectiveCatalog[key].category === 'gpu')
             .slice(0, limit)
+            .map(instanceType => ({
+                instanceType,
+                gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
+                totalVramGb: null,
+                utilizationPercent: null,
+                tensorParallelism: null,
+                costTier: null
+            }));
+        // Still apply availability ranking so quota/FTP info is displayed
+        if (DISCOVER_MODE && unfilteredRecs.length > 0) {
+            try {
+                const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
+                const quotaResolver = new QuotaResolver(region);
+                const instanceTypes = unfilteredRecs.map(r => r.instanceType);
+                const [quotas, reservations, ftps] = await Promise.allSettled([
+                    quotaResolver.getQuotaHeadroom(instanceTypes),
+                    quotaResolver.getCapacityReservations(),
+                    quotaResolver.getTrainingPlans()
+                ]);
+                unfilteredRecs = applyAvailabilityRanking(unfilteredRecs, quotas.status === 'fulfilled' ? quotas.value : null, reservations.status === 'fulfilled' ? reservations.value : null, ftps.status === 'fulfilled' ? ftps.value : null);
+            } catch (err) {
+                log(`Quota resolution skipped (unfiltered path): ${err.message}`);
+            }
+        }
         return {
             content: [{
                 type: 'text',
                 text: JSON.stringify({
-                    values: { instanceType: allGpuInstances[0] || null },
-                    choices: { instanceType: allGpuInstances },
+                    values: { instanceType: unfilteredRecs[0]?.instanceType || null },
+                    choices: { instanceType: unfilteredRecs.map(r => r.instanceType) },
                     metadata: {
                         modelName,
-                        parameterCount: null,
-                        dtype: null,
-                        quantization: quantization || null,
-                        estimatedVramGb: null,
-                        vramBreakdown: null,
-                        recommendations: allGpuInstances.map(instanceType => ({
-                            instanceType,
-                            gpuCount: effectiveCatalog[instanceType]?.gpus || 0,
-                            totalVramGb: null,
-                            utilizationPercent: null,
-                            tensorParallelism: null,
-                            costTier: null
-                        })),
+                        recommendations: unfilteredRecs,
                         source: 'unfiltered',
                         cudaVersionFilter: cudaVersion || null,
                         warning: `Could not resolve model metadata for "${modelName}". Returning all GPU instances without filtering.`
                     }
                 })
             }]
-        }
+        };
     }
     // Step 2: Estimate VRAM
     // Use model's max_position_embeddings as the sequence length when no explicit value is provided.
     // This ensures KV cache is sized for the model's actual context window, not the 4096 default.
-    const resolvedMaxSeqLen = effectiveMaxSeqLen || modelMetadata.maxPositionEmbeddings || undefined
+    const resolvedMaxSeqLen = effectiveMaxSeqLen || modelMetadata.maxPositionEmbeddings || undefined;
     const vramEstimate = estimateVram({
         parameterCount: modelMetadata.parameterCount,
         dtype: modelMetadata.dtype,
         quantization: quantization || undefined,
         maxSequenceLength: resolvedMaxSeqLen,
         batchSize: effectiveBatchSize || undefined
-    })
+    });
     // Step 3: Filter and rank instances
     let recommendations = filterAndRankInstances(
         vramEstimate.vramGb,
         effectiveCatalog,
         { limit }
-    )
+    );
     // Step 3a: Quota & availability filtering (discover mode only)
-    let preQuotaFilterCount = 0
-    let allFilteredByQuota = false
-    let preQuotaRecommendations = []
+    let preQuotaFilterCount = 0;
+    let allFilteredByQuota = false;
+    let preQuotaRecommendations = [];
     if (DISCOVER_MODE && recommendations.length > 0) {
         try {
-            const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
-            const quotaResolver = new QuotaResolver(region)
+            const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
+            const quotaResolver = new QuotaResolver(region);
-            const instanceTypes = recommendations.map(r => r.instanceType)
+            const instanceTypes = recommendations.map(r => r.instanceType);
             const [quotas, reservations, ftps] = await Promise.allSettled([
                 quotaResolver.getQuotaHeadroom(instanceTypes),
                 quotaResolver.getCapacityReservations(),
                 quotaResolver.getTrainingPlans()
-            ])
+            ]);
-            preQuotaFilterCount = recommendations.length
-            preQuotaRecommendations = [...recommendations]
+            preQuotaFilterCount = recommendations.length;
+            preQuotaRecommendations = [...recommendations];
             recommendations = applyAvailabilityRanking(
                 recommendations,
                 quotas.status === 'fulfilled' ? quotas.value : null,
                 reservations.status === 'fulfilled' ? reservations.value : null,
                 ftps.status === 'fulfilled' ? ftps.value : null
-            )
+            );
             if (recommendations.length === 0 && preQuotaFilterCount > 0) {
-                allFilteredByQuota = true
+                allFilteredByQuota = true;
                 // Restore pre-filter recommendations so user can see compatible instances
                 // and request quota increases for the ones they want
-                recommendations = preQuotaRecommendations
-                log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`)
+                recommendations = preQuotaRecommendations;
+                log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`);
             }
         } catch (err) {
             // Graceful degradation: if credentials are missing or any unexpected
             // error occurs, skip quota filtering and continue with unfiltered results
-            log(`Quota resolution skipped: ${err.message}`)
+            log(`Quota resolution skipped: ${err.message}`);
         }
     }
     // Step 3b: If instanceSearch is also provided, further filter by tags
     if (instanceSearch && recommendations.length > 0) {
-        const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }))
-        recommendations = recommendations.filter(r => searchMatches.has(r.instanceType))
+        const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }));
+        recommendations = recommendations.filter(r => searchMatches.has(r.instanceType));
     }
     // Step 4: Smart mode — query Bedrock for edge-case reasoning
-    let finalRecommendations = recommendations
-    let smartModeUsed = false
+    let finalRecommendations = recommendations;
+    let smartModeUsed = false;
     if (SMART_MODE && recommendations.length > 0) {
-        log('[smart] Smart mode enabled, querying Amazon Bedrock...')
+        log('[smart] Smart mode enabled, querying Amazon Bedrock...');
         const bedrockContext = {
             modelName,
@@ -446,38 +459,38 @@ async function handleGetInstanceRecommendation(params) {
                 tensorParallelism: r.tensorParallelism
             })),
             ...(context || {})
-        }
+        };
         const bedrockResult = await queryBedrock(
             SERVER_CONFIG,
             ['instanceType'],
             limit,
             bedrockContext
-        )
+        );
         if (bedrockResult?.values?.instanceType) {
-            const bedrockInstance = bedrockResult.values.instanceType
-            log(`[smart] Bedrock recommendation: ${bedrockInstance}`)
+            const bedrockInstance = bedrockResult.values.instanceType;
+            log(`[smart] Bedrock recommendation: ${bedrockInstance}`);
             // Check if Bedrock's suggestion is already in our list
             const existingIndex = finalRecommendations.findIndex(
                 r => r.instanceType === bedrockInstance
-            )
+            );
             if (existingIndex > 0) {
                 // Move Bedrock's pick to the top
-                const [picked] = finalRecommendations.splice(existingIndex, 1)
-                finalRecommendations = [picked, ...finalRecommendations]
-                smartModeUsed = true
+                const [picked] = finalRecommendations.splice(existingIndex, 1);
+                finalRecommendations = [picked, ...finalRecommendations];
+                smartModeUsed = true;
             } else if (existingIndex === 0) {
                 // Already at the top — Bedrock agrees with static
-                smartModeUsed = true
-                log('[smart] Bedrock agrees with static top recommendation')
+                smartModeUsed = true;
+                log('[smart] Bedrock agrees with static top recommendation');
             } else {
                 // Bedrock suggested an instance not in our filtered list;
                 // verify it exists in the catalog before prepending
                 if (INSTANCE_CATALOG[bedrockInstance]) {
-                    const catalogEntry = INSTANCE_CATALOG[bedrockInstance]
+                    const catalogEntry = INSTANCE_CATALOG[bedrockInstance];
                     const bedrockRec = {
                         instanceType: bedrockInstance,
                         gpuCount: catalogEntry.gpus || 0,
@@ -485,24 +498,24 @@ async function handleGetInstanceRecommendation(params) {
                         utilizationPercent: null,
                         tensorParallelism: catalogEntry.gpus || 1,
                         costTier: catalogEntry.costTier || null
-                    }
-                    finalRecommendations = [bedrockRec, ...finalRecommendations].slice(0, limit)
-                    smartModeUsed = true
+                    };
+                    finalRecommendations = [bedrockRec, ...finalRecommendations].slice(0, limit);
+                    smartModeUsed = true;
                 } else {
-                    log(`[smart] Bedrock suggested unknown instance "${bedrockInstance}", ignoring`)
+                    log(`[smart] Bedrock suggested unknown instance "${bedrockInstance}", ignoring`);
                 }
             }
         } else {
-            log('[smart] Bedrock did not return usable results, falling back to static recommendations')
+            log('[smart] Bedrock did not return usable results, falling back to static recommendations');
         }
     }
     // Build response
     const topRecommendation = finalRecommendations.length > 0
         ? finalRecommendations[0].instanceType
-        : null
+        : null;
-    const rankedList = finalRecommendations.map(r => r.instanceType)
+    const rankedList = finalRecommendations.map(r => r.instanceType);
     return {
         content: [{
@@ -524,7 +537,7 @@ async function handleGetInstanceRecommendation(params) {
                 }
             })
         }]
-    }
+    };
 }
 // ── MCP Server setup ─────────────────────────────────────────────────────────
@@ -532,7 +545,7 @@ async function handleGetInstanceRecommendation(params) {
 const server = new McpServer({
     name: 'instance-sizer',
     version: '1.0.0'
-})
+});
 // Register the get_instance_recommendation tool
 server.tool(
@@ -554,9 +567,9 @@ server.tool(
         }).optional().describe('Additional deployment context')
     },
     async (params) => {
-        return handleGetInstanceRecommendation(params)
+        return handleGetInstanceRecommendation(params);
     }
-)
+);
 // Register alias tool name for backward compatibility
 server.tool(
@@ -578,27 +591,27 @@ server.tool(
         }).optional().describe('Additional deployment context')
     },
     async (params) => {
-        return handleGetInstanceRecommendation(params)
+        return handleGetInstanceRecommendation(params);
     }
-)
+);
 // ── Exports for testing ──────────────────────────────────────────────────────
-export { handleGetInstanceRecommendation, INSTANCE_CATALOG, SERVER_CONFIG, server, searchInstancesByTag, filterByCudaVersion }
+export { handleGetInstanceRecommendation, INSTANCE_CATALOG, SERVER_CONFIG, server, searchInstancesByTag, filterByCudaVersion };
 // ── Transport connection (main module only) ──────────────────────────────────
-const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
+const isMain = process.argv[1] && resolve(process.argv[1]) === __filename;
 if (isMain) {
     if (SMART_MODE) {
-        log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
+        log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`);
     } else if (!DISCOVER_MODE) {
-        log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
+        log('Static mode (catalog-only, no network calls) — use --no-discover to force this');
     } else {
-        log('Discover mode (HuggingFace API + quota lookups active)')
+        log('Discover mode (HuggingFace API + quota lookups active)');
     }
-    const transport = new StdioServerTransport()
-    await server.connect(transport)
+    const transport = new StdioServerTransport();
+    await server.connect(transport);
 }