@aws/ml-container-creator 0.10.0 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +5 -21
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +51 -66
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/model-servers.json +201 -3
  29. package/servers/lib/custom-validators.js +13 -13
  30. package/servers/lib/dynamic-resolver.js +4 -4
  31. package/servers/marketplace-picker/index.js +342 -0
  32. package/servers/marketplace-picker/manifest.json +14 -0
  33. package/servers/marketplace-picker/package.json +18 -0
  34. package/servers/model-picker/index.js +382 -382
  35. package/servers/region-picker/index.js +56 -56
  36. package/servers/workload-picker/LICENSE +202 -0
  37. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  38. package/servers/workload-picker/index.js +171 -0
  39. package/servers/workload-picker/manifest.json +16 -0
  40. package/servers/workload-picker/package.json +16 -0
  41. package/src/app.js +4 -2
  42. package/src/lib/bootstrap-command-handler.js +579 -14
  43. package/src/lib/bootstrap-config.js +36 -0
  44. package/src/lib/bootstrap-profile-manager.js +48 -41
  45. package/src/lib/ci-register-helpers.js +74 -0
  46. package/src/lib/config-loader.js +3 -0
  47. package/src/lib/config-manager.js +7 -0
  48. package/src/lib/cuda-resolver.js +17 -8
  49. package/src/lib/generated/cli-options.js +315 -315
  50. package/src/lib/generated/parameter-matrix.js +661 -661
  51. package/src/lib/generated/validation-rules.js +71 -71
  52. package/src/lib/path-prover-brain.js +607 -0
  53. package/src/lib/prompts/project-prompts.js +12 -0
  54. package/src/lib/template-variable-resolver.js +25 -1
  55. package/src/lib/tune-catalog-validator.js +37 -4
  56. package/templates/Dockerfile +9 -0
  57. package/templates/code/adapter_sidecar.py +444 -0
  58. package/templates/code/serve +6 -0
  59. package/templates/code/serve.d/vllm.ejs +1 -1
  60. package/templates/do/.benchmark_writer.py +1476 -0
  61. package/templates/do/.tune_helper.py +982 -57
  62. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  63. package/templates/do/adapter +149 -0
  64. package/templates/do/benchmark +639 -85
  65. package/templates/do/config +108 -5
  66. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  67. package/templates/do/optimize +106 -37
  68. package/templates/do/register +89 -0
  69. package/templates/do/test +13 -0
  70. package/templates/do/tune +378 -59
  71. package/templates/do/validate +44 -4
@@ -18,43 +18,43 @@
18
18
  * Returns: { values, choices, metadata }
19
19
  */
20
20
 
21
- import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
22
- import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
23
- import { z } from 'zod'
24
- import { readFileSync } from 'node:fs'
25
- import { fileURLToPath } from 'node:url'
26
- import { resolve, dirname } from 'node:path'
27
- import { resolveModelMetadata } from './lib/model-resolver.js'
28
- import { estimateVram } from './lib/vram-estimator.js'
29
- import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js'
30
- import { QuotaResolver } from './lib/quota-resolver.js'
31
- import { queryBedrock } from '../lib/bedrock-client.js'
21
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
22
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
23
+ import { z } from 'zod';
24
+ import { readFileSync } from 'node:fs';
25
+ import { fileURLToPath } from 'node:url';
26
+ import { resolve, dirname } from 'node:path';
27
+ import { resolveModelMetadata } from './lib/model-resolver.js';
28
+ import { estimateVram } from './lib/vram-estimator.js';
29
+ import { filterAndRankInstances, applyAvailabilityRanking } from './lib/instance-ranker.js';
30
+ import { QuotaResolver } from './lib/quota-resolver.js';
31
+ import { queryBedrock } from '../lib/bedrock-client.js';
32
32
 
33
33
  // ── Path setup ───────────────────────────────────────────────────────────────
34
34
 
35
- const __filename = fileURLToPath(import.meta.url)
36
- const __dirname = dirname(__filename)
35
+ const __filename = fileURLToPath(import.meta.url);
36
+ const __dirname = dirname(__filename);
37
37
 
38
38
  // ── Load instance catalog from shared lib ────────────────────────────────────
39
39
 
40
- let INSTANCE_CATALOG
40
+ let INSTANCE_CATALOG;
41
41
 
42
42
  try {
43
- const catalogPath = resolve(__dirname, '../lib/catalogs/instances.json')
44
- const raw = readFileSync(catalogPath, 'utf8')
45
- const data = JSON.parse(raw)
46
- INSTANCE_CATALOG = data.catalog
43
+ const catalogPath = resolve(__dirname, '../lib/catalogs/instances.json');
44
+ const raw = readFileSync(catalogPath, 'utf8');
45
+ const data = JSON.parse(raw);
46
+ INSTANCE_CATALOG = data.catalog;
47
47
  } catch (err) {
48
- process.stderr.write(`[instance-sizer] Fatal: Failed to load instance catalog: ${err.message}\n`)
49
- process.exit(1)
48
+ process.stderr.write(`[instance-sizer] Fatal: Failed to load instance catalog: ${err.message}\n`);
49
+ process.exit(1);
50
50
  }
51
51
 
52
52
  // ── Mode configuration ───────────────────────────────────────────────────────
53
53
 
54
- const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
55
- const SMART_MODE = process.env.BEDROCK_SMART === 'true'
56
- const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
57
- const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
54
+ const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover');
55
+ const SMART_MODE = process.env.BEDROCK_SMART === 'true';
56
+ const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0';
57
+ const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1';
58
58
 
59
59
  // ── Bedrock server config ─────────────────────────────────────────────────────
60
60
 
@@ -95,7 +95,7 @@ Rules:
95
95
  maxTokens: 1024,
96
96
  modelId: BEDROCK_MODEL,
97
97
  region: BEDROCK_REGION
98
- }
98
+ };
99
99
 
100
100
  // ── Logging ──────────────────────────────────────────────────────────────────
101
101
 
@@ -103,7 +103,7 @@ Rules:
103
103
  * Log to stderr so it doesn't interfere with MCP stdio protocol on stdout.
104
104
  */
105
105
  function log(message) {
106
- process.stderr.write(`[instance-sizer] ${message}\n`)
106
+ process.stderr.write(`[instance-sizer] ${message}\n`);
107
107
  }
108
108
 
109
109
  // ── Tag-based search filtering ───────────────────────────────────────────────
@@ -119,76 +119,76 @@ function log(message) {
119
119
  * @returns {string[]} Matching instance type names, sorted by relevance
120
120
  */
121
121
  function searchInstancesByTag(search, instanceCatalog, options = {}) {
122
- const { limit = 10 } = options
123
- const candidates = Object.entries(instanceCatalog)
122
+ const { limit = 10 } = options;
123
+ const candidates = Object.entries(instanceCatalog);
124
124
 
125
125
  // Tokenize search into lowercase keywords
126
- const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean)
126
+ const tokens = search.toLowerCase().split(/[\s,\-_]+/).filter(Boolean);
127
127
 
128
128
  // Detect compound terms
129
- const rawLower = search.toLowerCase()
130
- const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu')
129
+ const rawLower = search.toLowerCase();
130
+ const wantsMultiGpu = rawLower.includes('multi gpu') || rawLower.includes('multi-gpu') || rawLower.includes('multigpu');
131
131
 
132
132
  // Detect CUDA version requests: "cuda 12", "cuda 11.8", "cuda-12.1"
133
- const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/)
134
- const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null
133
+ const cudaMatch = rawLower.match(/cuda[\s\-_]*(\d+(?:\.\d+)?)/);
134
+ const wantsCudaVersion = cudaMatch ? cudaMatch[1] : null;
135
135
 
136
136
  // Score each instance
137
137
  const scored = candidates.map(([name, meta]) => {
138
- let score = 0
139
- const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : ''
140
- const haystack = [...(meta.tags || []), (meta.accelerator || '').toLowerCase(), name, meta.category || '', cudaStr].join(' ')
138
+ let score = 0;
139
+ const cudaStr = meta.cudaVersions ? meta.cudaVersions.join(' ') : '';
140
+ const haystack = [...(meta.tags || []), (meta.accelerator || '').toLowerCase(), name, meta.category || '', cudaStr].join(' ');
141
141
 
142
142
  // Compound term: multi-gpu
143
143
  if (wantsMultiGpu) {
144
144
  if (meta.gpus > 1) {
145
- score += 5
145
+ score += 5;
146
146
  } else {
147
- return { name, meta, score: 0 }
147
+ return { name, meta, score: 0 };
148
148
  }
149
149
  }
150
150
 
151
151
  // Compound term: cuda version
152
152
  if (wantsCudaVersion) {
153
- if (!meta.cudaVersions) return { name, meta, score: 0 }
154
- const hasExact = meta.cudaVersions.includes(wantsCudaVersion)
155
- const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion))
153
+ if (!meta.cudaVersions) return { name, meta, score: 0 };
154
+ const hasExact = meta.cudaVersions.includes(wantsCudaVersion);
155
+ const hasMajor = meta.cudaVersions.some(v => v.startsWith(wantsCudaVersion));
156
156
  if (hasExact) {
157
- score += 4
157
+ score += 4;
158
158
  } else if (hasMajor) {
159
- score += 3
159
+ score += 3;
160
160
  } else {
161
- return { name, meta, score: 0 }
161
+ return { name, meta, score: 0 };
162
162
  }
163
163
  }
164
164
 
165
165
  for (const token of tokens) {
166
- if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue
167
- if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue
166
+ if (wantsMultiGpu && (token === 'multi' || token === 'gpu')) continue;
167
+ if (wantsCudaVersion && (token === 'cuda' || token === wantsCudaVersion)) continue;
168
168
 
169
- if (haystack.includes(token)) score += 1
170
- if (meta.gpus > 1 && token === 'parallel') score += 2
171
- if (token === 'gpu' && meta.gpus > 0) score += 1
172
- if (token === 'cpu' && meta.gpus === 0) score += 1
169
+ if (haystack.includes(token)) score += 1;
170
+ if (meta.gpus > 1 && token === 'parallel') score += 2;
171
+ if (token === 'gpu' && meta.gpus > 0) score += 1;
172
+ if (token === 'cpu' && meta.gpus === 0) score += 1;
173
173
  if (token === 'cheap' || token === 'budget' || token === 'cost') {
174
- if ((meta.tags || []).includes('budget') || (meta.tags || []).includes('cost-effective')) score += 1
174
+ if ((meta.tags || []).includes('budget') || (meta.tags || []).includes('cost-effective')) score += 1;
175
175
  }
176
176
  if (token === 'memory' || token === 'high-memory') {
177
- if (meta.memGb >= 32) score += 1
177
+ if (meta.memGb >= 32) score += 1;
178
178
  }
179
- if (token === 'large' && meta.vcpus >= 16) score += 1
180
- if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2
179
+ if (token === 'large' && meta.vcpus >= 16) score += 1;
180
+ if (meta.cudaVersions && meta.cudaVersions.includes(token)) score += 2;
181
181
  }
182
- return { name, meta, score }
183
- })
182
+ return { name, meta, score };
183
+ });
184
184
 
185
- const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score)
185
+ const matched = scored.filter(s => s.score > 0).sort((a, b) => b.score - a.score);
186
186
 
187
187
  if (matched.length === 0) {
188
- return []
188
+ return [];
189
189
  }
190
190
 
191
- return matched.slice(0, limit).map(s => s.name)
191
+ return matched.slice(0, limit).map(s => s.name);
192
192
  }
193
193
 
194
194
  // ── CUDA version filtering ───────────────────────────────────────────────────
@@ -201,22 +201,22 @@ function searchInstancesByTag(search, instanceCatalog, options = {}) {
201
201
  * @returns {object} Filtered instance catalog
202
202
  */
203
203
  function filterByCudaVersion(instanceCatalog, requiredCuda) {
204
- const majorRequired = requiredCuda.split('.')[0]
205
- const filtered = {}
204
+ const majorRequired = requiredCuda.split('.')[0];
205
+ const filtered = {};
206
206
 
207
207
  for (const [name, meta] of Object.entries(instanceCatalog)) {
208
- if (!meta.cudaVersions || meta.cudaVersions.length === 0) continue
208
+ if (!meta.cudaVersions || meta.cudaVersions.length === 0) continue;
209
209
  const hasCompatible = meta.cudaVersions.some(v => {
210
- if (v === requiredCuda) return true
211
- if (v.startsWith(majorRequired + '.')) return true
212
- return false
213
- })
210
+ if (v === requiredCuda) return true;
211
+ if (v.startsWith(`${majorRequired }.`)) return true;
212
+ return false;
213
+ });
214
214
  if (hasCompatible) {
215
- filtered[name] = meta
215
+ filtered[name] = meta;
216
216
  }
217
217
  }
218
218
 
219
- return filtered
219
+ return filtered;
220
220
  }
221
221
 
222
222
  // ── Tool handler ─────────────────────────────────────────────────────────────
@@ -239,26 +239,26 @@ async function handleGetInstanceRecommendation(params) {
239
239
  cudaVersion,
240
240
  limit = 10,
241
241
  context
242
- } = params
242
+ } = params;
243
243
 
244
244
  // Apply profile ENV overrides to sequence length and batch size
245
- let effectiveMaxSeqLen = maxSequenceLength
246
- let effectiveBatchSize = batchSize
245
+ let effectiveMaxSeqLen = maxSequenceLength;
246
+ let effectiveBatchSize = batchSize;
247
247
  if (context?.profileEnvVars) {
248
248
  if (context.profileEnvVars.VLLM_MAX_MODEL_LEN) {
249
- effectiveMaxSeqLen = parseInt(context.profileEnvVars.VLLM_MAX_MODEL_LEN, 10) || effectiveMaxSeqLen
249
+ effectiveMaxSeqLen = parseInt(context.profileEnvVars.VLLM_MAX_MODEL_LEN, 10) || effectiveMaxSeqLen;
250
250
  }
251
251
  if (context.profileEnvVars.VLLM_MAX_NUM_SEQS) {
252
- effectiveBatchSize = parseInt(context.profileEnvVars.VLLM_MAX_NUM_SEQS, 10) || effectiveBatchSize
252
+ effectiveBatchSize = parseInt(context.profileEnvVars.VLLM_MAX_NUM_SEQS, 10) || effectiveBatchSize;
253
253
  }
254
254
  }
255
255
 
256
256
  // Apply CUDA version filtering to instance catalog
257
- let effectiveCatalog = INSTANCE_CATALOG
257
+ let effectiveCatalog = INSTANCE_CATALOG;
258
258
  if (cudaVersion) {
259
- effectiveCatalog = filterByCudaVersion(INSTANCE_CATALOG, cudaVersion)
259
+ effectiveCatalog = filterByCudaVersion(INSTANCE_CATALOG, cudaVersion);
260
260
  if (Object.keys(effectiveCatalog).length === 0) {
261
- log(`CUDA version ${cudaVersion} filter eliminated all instances`)
261
+ log(`CUDA version ${cudaVersion} filter eliminated all instances`);
262
262
  return {
263
263
  content: [{
264
264
  type: 'text',
@@ -272,13 +272,13 @@ async function handleGetInstanceRecommendation(params) {
272
272
  }
273
273
  })
274
274
  }]
275
- }
275
+ };
276
276
  }
277
277
  }
278
278
 
279
279
  // Mode: tag-based search only (no model name)
280
280
  if (!modelName && instanceSearch) {
281
- const searchResults = searchInstancesByTag(instanceSearch, effectiveCatalog, { limit })
281
+ const searchResults = searchInstancesByTag(instanceSearch, effectiveCatalog, { limit });
282
282
  return {
283
283
  content: [{
284
284
  type: 'text',
@@ -293,14 +293,14 @@ async function handleGetInstanceRecommendation(params) {
293
293
  }
294
294
  })
295
295
  }]
296
- }
296
+ };
297
297
  }
298
298
 
299
299
  // Mode: no model name and no search — return all GPU instances
300
300
  if (!modelName) {
301
301
  const allGpuInstances = Object.keys(effectiveCatalog)
302
302
  .filter(key => effectiveCatalog[key].category === 'gpu')
303
- .slice(0, limit)
303
+ .slice(0, limit);
304
304
 
305
305
  return {
306
306
  content: [{
@@ -316,20 +316,20 @@ async function handleGetInstanceRecommendation(params) {
316
316
  }
317
317
  })
318
318
  }]
319
- }
319
+ };
320
320
  }
321
321
 
322
322
  // Step 1: Resolve model metadata
323
323
  const modelMetadata = await resolveModelMetadata(modelName, {
324
324
  discover: DISCOVER_MODE
325
- })
325
+ });
326
326
 
327
327
  // If model metadata cannot be resolved, return all GPU instances unfiltered
328
328
  if (!modelMetadata) {
329
- log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`)
329
+ log(`Model metadata not found for "${modelName}", returning unfiltered GPU instances`);
330
330
  const allGpuInstances = Object.keys(effectiveCatalog)
331
331
  .filter(key => effectiveCatalog[key].category === 'gpu')
332
- .slice(0, limit)
332
+ .slice(0, limit);
333
333
 
334
334
  return {
335
335
  content: [{
@@ -358,78 +358,78 @@ async function handleGetInstanceRecommendation(params) {
358
358
  }
359
359
  })
360
360
  }]
361
- }
361
+ };
362
362
  }
363
363
 
364
364
  // Step 2: Estimate VRAM
365
365
  // Use model's max_position_embeddings as the sequence length when no explicit value is provided.
366
366
  // This ensures KV cache is sized for the model's actual context window, not the 4096 default.
367
- const resolvedMaxSeqLen = effectiveMaxSeqLen || modelMetadata.maxPositionEmbeddings || undefined
367
+ const resolvedMaxSeqLen = effectiveMaxSeqLen || modelMetadata.maxPositionEmbeddings || undefined;
368
368
  const vramEstimate = estimateVram({
369
369
  parameterCount: modelMetadata.parameterCount,
370
370
  dtype: modelMetadata.dtype,
371
371
  quantization: quantization || undefined,
372
372
  maxSequenceLength: resolvedMaxSeqLen,
373
373
  batchSize: effectiveBatchSize || undefined
374
- })
374
+ });
375
375
 
376
376
  // Step 3: Filter and rank instances
377
377
  let recommendations = filterAndRankInstances(
378
378
  vramEstimate.vramGb,
379
379
  effectiveCatalog,
380
380
  { limit }
381
- )
381
+ );
382
382
 
383
383
  // Step 3a: Quota & availability filtering (discover mode only)
384
- let preQuotaFilterCount = 0
385
- let allFilteredByQuota = false
386
- let preQuotaRecommendations = []
384
+ let preQuotaFilterCount = 0;
385
+ let allFilteredByQuota = false;
386
+ let preQuotaRecommendations = [];
387
387
  if (DISCOVER_MODE && recommendations.length > 0) {
388
388
  try {
389
- const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION
390
- const quotaResolver = new QuotaResolver(region)
389
+ const region = process.env.AWS_REGION || process.env.AWS_DEFAULT_REGION || BEDROCK_REGION;
390
+ const quotaResolver = new QuotaResolver(region);
391
391
 
392
- const instanceTypes = recommendations.map(r => r.instanceType)
392
+ const instanceTypes = recommendations.map(r => r.instanceType);
393
393
  const [quotas, reservations, ftps] = await Promise.allSettled([
394
394
  quotaResolver.getQuotaHeadroom(instanceTypes),
395
395
  quotaResolver.getCapacityReservations(),
396
396
  quotaResolver.getTrainingPlans()
397
- ])
397
+ ]);
398
398
 
399
- preQuotaFilterCount = recommendations.length
400
- preQuotaRecommendations = [...recommendations]
399
+ preQuotaFilterCount = recommendations.length;
400
+ preQuotaRecommendations = [...recommendations];
401
401
  recommendations = applyAvailabilityRanking(
402
402
  recommendations,
403
403
  quotas.status === 'fulfilled' ? quotas.value : null,
404
404
  reservations.status === 'fulfilled' ? reservations.value : null,
405
405
  ftps.status === 'fulfilled' ? ftps.value : null
406
- )
406
+ );
407
407
  if (recommendations.length === 0 && preQuotaFilterCount > 0) {
408
- allFilteredByQuota = true
408
+ allFilteredByQuota = true;
409
409
  // Restore pre-filter recommendations so user can see compatible instances
410
410
  // and request quota increases for the ones they want
411
- recommendations = preQuotaRecommendations
412
- log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`)
411
+ recommendations = preQuotaRecommendations;
412
+ log(`All ${preQuotaFilterCount} instances filtered by zero-quota — restoring unfiltered list`);
413
413
  }
414
414
  } catch (err) {
415
415
  // Graceful degradation: if credentials are missing or any unexpected
416
416
  // error occurs, skip quota filtering and continue with unfiltered results
417
- log(`Quota resolution skipped: ${err.message}`)
417
+ log(`Quota resolution skipped: ${err.message}`);
418
418
  }
419
419
  }
420
420
 
421
421
  // Step 3b: If instanceSearch is also provided, further filter by tags
422
422
  if (instanceSearch && recommendations.length > 0) {
423
- const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }))
424
- recommendations = recommendations.filter(r => searchMatches.has(r.instanceType))
423
+ const searchMatches = new Set(searchInstancesByTag(instanceSearch, effectiveCatalog, { limit: 100 }));
424
+ recommendations = recommendations.filter(r => searchMatches.has(r.instanceType));
425
425
  }
426
426
 
427
427
  // Step 4: Smart mode — query Bedrock for edge-case reasoning
428
- let finalRecommendations = recommendations
429
- let smartModeUsed = false
428
+ let finalRecommendations = recommendations;
429
+ let smartModeUsed = false;
430
430
 
431
431
  if (SMART_MODE && recommendations.length > 0) {
432
- log('[smart] Smart mode enabled, querying Amazon Bedrock...')
432
+ log('[smart] Smart mode enabled, querying Amazon Bedrock...');
433
433
 
434
434
  const bedrockContext = {
435
435
  modelName,
@@ -446,38 +446,38 @@ async function handleGetInstanceRecommendation(params) {
446
446
  tensorParallelism: r.tensorParallelism
447
447
  })),
448
448
  ...(context || {})
449
- }
449
+ };
450
450
 
451
451
  const bedrockResult = await queryBedrock(
452
452
  SERVER_CONFIG,
453
453
  ['instanceType'],
454
454
  limit,
455
455
  bedrockContext
456
- )
456
+ );
457
457
 
458
458
  if (bedrockResult?.values?.instanceType) {
459
- const bedrockInstance = bedrockResult.values.instanceType
460
- log(`[smart] Bedrock recommendation: ${bedrockInstance}`)
459
+ const bedrockInstance = bedrockResult.values.instanceType;
460
+ log(`[smart] Bedrock recommendation: ${bedrockInstance}`);
461
461
 
462
462
  // Check if Bedrock's suggestion is already in our list
463
463
  const existingIndex = finalRecommendations.findIndex(
464
464
  r => r.instanceType === bedrockInstance
465
- )
465
+ );
466
466
 
467
467
  if (existingIndex > 0) {
468
468
  // Move Bedrock's pick to the top
469
- const [picked] = finalRecommendations.splice(existingIndex, 1)
470
- finalRecommendations = [picked, ...finalRecommendations]
471
- smartModeUsed = true
469
+ const [picked] = finalRecommendations.splice(existingIndex, 1);
470
+ finalRecommendations = [picked, ...finalRecommendations];
471
+ smartModeUsed = true;
472
472
  } else if (existingIndex === 0) {
473
473
  // Already at the top — Bedrock agrees with static
474
- smartModeUsed = true
475
- log('[smart] Bedrock agrees with static top recommendation')
474
+ smartModeUsed = true;
475
+ log('[smart] Bedrock agrees with static top recommendation');
476
476
  } else {
477
477
  // Bedrock suggested an instance not in our filtered list;
478
478
  // verify it exists in the catalog before prepending
479
479
  if (INSTANCE_CATALOG[bedrockInstance]) {
480
- const catalogEntry = INSTANCE_CATALOG[bedrockInstance]
480
+ const catalogEntry = INSTANCE_CATALOG[bedrockInstance];
481
481
  const bedrockRec = {
482
482
  instanceType: bedrockInstance,
483
483
  gpuCount: catalogEntry.gpus || 0,
@@ -485,24 +485,24 @@ async function handleGetInstanceRecommendation(params) {
485
485
  utilizationPercent: null,
486
486
  tensorParallelism: catalogEntry.gpus || 1,
487
487
  costTier: catalogEntry.costTier || null
488
- }
489
- finalRecommendations = [bedrockRec, ...finalRecommendations].slice(0, limit)
490
- smartModeUsed = true
488
+ };
489
+ finalRecommendations = [bedrockRec, ...finalRecommendations].slice(0, limit);
490
+ smartModeUsed = true;
491
491
  } else {
492
- log(`[smart] Bedrock suggested unknown instance "${bedrockInstance}", ignoring`)
492
+ log(`[smart] Bedrock suggested unknown instance "${bedrockInstance}", ignoring`);
493
493
  }
494
494
  }
495
495
  } else {
496
- log('[smart] Bedrock did not return usable results, falling back to static recommendations')
496
+ log('[smart] Bedrock did not return usable results, falling back to static recommendations');
497
497
  }
498
498
  }
499
499
 
500
500
  // Build response
501
501
  const topRecommendation = finalRecommendations.length > 0
502
502
  ? finalRecommendations[0].instanceType
503
- : null
503
+ : null;
504
504
 
505
- const rankedList = finalRecommendations.map(r => r.instanceType)
505
+ const rankedList = finalRecommendations.map(r => r.instanceType);
506
506
 
507
507
  return {
508
508
  content: [{
@@ -524,7 +524,7 @@ async function handleGetInstanceRecommendation(params) {
524
524
  }
525
525
  })
526
526
  }]
527
- }
527
+ };
528
528
  }
529
529
 
530
530
  // ── MCP Server setup ─────────────────────────────────────────────────────────
@@ -532,7 +532,7 @@ async function handleGetInstanceRecommendation(params) {
532
532
  const server = new McpServer({
533
533
  name: 'instance-sizer',
534
534
  version: '1.0.0'
535
- })
535
+ });
536
536
 
537
537
  // Register the get_instance_recommendation tool
538
538
  server.tool(
@@ -554,9 +554,9 @@ server.tool(
554
554
  }).optional().describe('Additional deployment context')
555
555
  },
556
556
  async (params) => {
557
- return handleGetInstanceRecommendation(params)
557
+ return handleGetInstanceRecommendation(params);
558
558
  }
559
- )
559
+ );
560
560
 
561
561
  // Register alias tool name for backward compatibility
562
562
  server.tool(
@@ -578,27 +578,27 @@ server.tool(
578
578
  }).optional().describe('Additional deployment context')
579
579
  },
580
580
  async (params) => {
581
- return handleGetInstanceRecommendation(params)
581
+ return handleGetInstanceRecommendation(params);
582
582
  }
583
- )
583
+ );
584
584
 
585
585
  // ── Exports for testing ──────────────────────────────────────────────────────
586
586
 
587
- export { handleGetInstanceRecommendation, INSTANCE_CATALOG, SERVER_CONFIG, server, searchInstancesByTag, filterByCudaVersion }
587
+ export { handleGetInstanceRecommendation, INSTANCE_CATALOG, SERVER_CONFIG, server, searchInstancesByTag, filterByCudaVersion };
588
588
 
589
589
  // ── Transport connection (main module only) ──────────────────────────────────
590
590
 
591
- const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
591
+ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename;
592
592
 
593
593
  if (isMain) {
594
594
  if (SMART_MODE) {
595
- log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
595
+ log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`);
596
596
  } else if (!DISCOVER_MODE) {
597
- log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
597
+ log('Static mode (catalog-only, no network calls) — use --no-discover to force this');
598
598
  } else {
599
- log('Discover mode (HuggingFace API + quota lookups active)')
599
+ log('Discover mode (HuggingFace API + quota lookups active)');
600
600
  }
601
601
 
602
- const transport = new StdioServerTransport()
603
- await server.connect(transport)
602
+ const transport = new StdioServerTransport();
603
+ await server.connect(transport);
604
604
  }