@aws/ml-container-creator 0.9.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +2049 -0
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -68
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/catalogs/models.json +182 -26
  31. package/servers/lib/custom-validators.js +13 -13
  32. package/servers/lib/dynamic-resolver.js +4 -4
  33. package/servers/marketplace-picker/index.js +342 -0
  34. package/servers/marketplace-picker/manifest.json +14 -0
  35. package/servers/marketplace-picker/package.json +18 -0
  36. package/servers/model-picker/index.js +382 -382
  37. package/servers/region-picker/index.js +56 -56
  38. package/servers/workload-picker/LICENSE +202 -0
  39. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  40. package/servers/workload-picker/index.js +171 -0
  41. package/servers/workload-picker/manifest.json +16 -0
  42. package/servers/workload-picker/package.json +16 -0
  43. package/src/app.js +4 -390
  44. package/src/lib/bootstrap-command-handler.js +710 -1148
  45. package/src/lib/bootstrap-config.js +36 -0
  46. package/src/lib/bootstrap-profile-manager.js +641 -0
  47. package/src/lib/bootstrap-provisioners.js +421 -0
  48. package/src/lib/ci-register-helpers.js +74 -0
  49. package/src/lib/config-loader.js +408 -0
  50. package/src/lib/config-manager.js +66 -1685
  51. package/src/lib/config-mcp-client.js +118 -0
  52. package/src/lib/config-validator.js +634 -0
  53. package/src/lib/cuda-resolver.js +149 -0
  54. package/src/lib/e2e-catalog-validator.js +251 -3
  55. package/src/lib/e2e-ci-recorder.js +103 -0
  56. package/src/lib/generated/cli-options.js +315 -311
  57. package/src/lib/generated/parameter-matrix.js +671 -0
  58. package/src/lib/generated/validation-rules.js +71 -71
  59. package/src/lib/marketplace-flow.js +276 -0
  60. package/src/lib/mcp-query-runner.js +768 -0
  61. package/src/lib/parameter-schema-validator.js +62 -18
  62. package/src/lib/path-prover-brain.js +607 -0
  63. package/src/lib/prompt-runner.js +41 -1504
  64. package/src/lib/prompts/feature-prompts.js +172 -0
  65. package/src/lib/prompts/index.js +48 -0
  66. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  67. package/src/lib/prompts/model-prompts.js +552 -0
  68. package/src/lib/prompts/project-prompts.js +82 -0
  69. package/src/lib/prompts.js +2 -1446
  70. package/src/lib/registry-command-handler.js +135 -3
  71. package/src/lib/secrets-prompt-runner.js +251 -0
  72. package/src/lib/template-variable-resolver.js +422 -0
  73. package/src/lib/tune-catalog-validator.js +37 -4
  74. package/templates/Dockerfile +9 -0
  75. package/templates/code/adapter_sidecar.py +444 -0
  76. package/templates/code/serve +6 -0
  77. package/templates/code/serve.d/vllm.ejs +1 -1
  78. package/templates/do/.benchmark_writer.py +1476 -0
  79. package/templates/do/.tune_helper.py +982 -57
  80. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  81. package/templates/do/adapter +149 -0
  82. package/templates/do/benchmark +639 -85
  83. package/templates/do/config +108 -5
  84. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  85. package/templates/do/optimize +106 -37
  86. package/templates/do/register +89 -0
  87. package/templates/do/test +13 -0
  88. package/templates/do/tune +378 -59
  89. package/templates/do/validate +44 -4
  90. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,768 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * MCP Query Runner - Handles all MCP server queries for the prompt runner.
6
+ * Uses delegation pattern: receives parent PromptRunner reference to access shared state.
7
+ */
8
+
9
+ import fs from 'fs';
10
+ import path from 'path';
11
+ import { fileURLToPath } from 'node:url';
12
+ import {
13
+ baseImageSearchPrompts,
14
+ formatImageChoices
15
+ } from './prompts/index.js';
16
+
17
+ const __mcp_filename = fileURLToPath(import.meta.url);
18
+ const __mcp_dirname = path.dirname(__mcp_filename);
19
+ const GENERATOR_ROOT = path.resolve(__mcp_dirname, '..', '..');
20
+
21
+ /**
22
+ * Resolve MCP server args — converts relative paths to absolute using GENERATOR_ROOT.
23
+ * @param {string[]} args - The args array from mcp.json serverConfig
24
+ * @returns {string[]} Args with relative paths resolved
25
+ */
26
+ function resolveMcpArgs(args) {
27
+ return (args || []).map(arg => {
28
+ if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
29
+ return path.resolve(GENERATOR_ROOT, arg);
30
+ }
31
+ return arg;
32
+ });
33
+ }
34
+
35
+ export default class McpQueryRunner {
36
+ constructor(runner) {
37
+ this.runner = runner;
38
+ }
39
+
40
+ async _queryMcpForRegion(frameworkAnswers, explicitConfig) {
41
+ const cm = this.runner.configManager;
42
+ if (!cm) return;
43
+
44
+ const mcpServers = cm.getMcpServerNames();
45
+ if (mcpServers.length === 0) return;
46
+
47
+ const smart = this.runner.options.smart === true;
48
+
49
+ // Region: skip MCP query if region was explicitly provided via CLI, config file, or bootstrap profile
50
+ const cliRegion = this.runner.options.region;
51
+ const bootstrapRegion = explicitConfig.awsRegion;
52
+ const skipRegionQuery = (cliRegion !== undefined && cliRegion !== null) ||
53
+ (bootstrapRegion !== undefined && bootstrapRegion !== null);
54
+
55
+ if (!skipRegionQuery && mcpServers.includes('region-picker')) {
56
+ const { regionSearch } = await this.runner._runPrompts([{
57
+ type: 'input',
58
+ name: 'regionSearch',
59
+ message: '🔌 Search for a region (e.g. "europe", "us west", "tokyo"):',
60
+ default: ''
61
+ }]);
62
+
63
+ if (regionSearch && regionSearch.trim()) {
64
+ console.log(` 🔍 Querying region-picker${smart ? ' [smart]' : ''}...`);
65
+ const result = await cm.queryMcpServer('region-picker', {
66
+ ...frameworkAnswers,
67
+ regionSearch: regionSearch.trim()
68
+ });
69
+ if (result && result.choices?.awsRegion?.length > 0) {
70
+ const choices = result.choices.awsRegion;
71
+ const preview = choices.length <= 5
72
+ ? choices.join(', ')
73
+ : `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
74
+ console.log(` ✓ ${choices.length} region(s): [${preview}]`);
75
+ } else {
76
+ console.log(' ↳ No MCP results, using static list');
77
+ }
78
+ }
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Query MCP instance-sizer server with tag-based search after deployment target is known.
84
+ * Used when no model name is available for VRAM-based sizing.
85
+ * Populates configManager.mcpChoices so _runPhase injects them into list prompts.
86
+ * @private
87
+ */
88
+ async _queryMcpForInstance(frameworkAnswers, explicitConfig) {
89
+ const cm = this.runner.configManager;
90
+ if (!cm) return;
91
+
92
+ const mcpServers = cm.getMcpServerNames();
93
+ if (mcpServers.length === 0) return;
94
+
95
+ const smart = this.runner.options.smart === true;
96
+
97
+ // Instance type: query if not already provided via CLI/config
98
+ if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
99
+ const { instanceSearch } = await this.runner._runPrompts([{
100
+ type: 'input',
101
+ name: 'instanceSearch',
102
+ message: '🔌 Describe your instance needs (e.g. "multi-gpu", "cost-effective cpu"):',
103
+ default: frameworkAnswers.framework || ''
104
+ }]);
105
+
106
+ if (instanceSearch && instanceSearch.trim()) {
107
+ console.log(` 🔍 Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
108
+ const result = await cm.queryMcpServer('instance-sizer', {
109
+ ...frameworkAnswers,
110
+ instanceSearch: instanceSearch.trim()
111
+ });
112
+ if (result && result.choices?.instanceType?.length > 0) {
113
+ const choices = result.choices.instanceType;
114
+ const preview = choices.length <= 5
115
+ ? choices.join(', ')
116
+ : `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
117
+ console.log(` ✓ ${choices.length} instance(s): [${preview}]`);
118
+ } else {
119
+ console.log(' ↳ No MCP results, using static list');
120
+ }
121
+ }
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Query the instance-sizer MCP server after model is known.
127
+ * Estimates VRAM requirements and returns filtered, ranked instance recommendations.
128
+ * Stores results in this.runner._mcpInstanceSizerChoices and this.runner._instanceSizerMetadata.
129
+ * Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
130
+ * @private
131
+ */
132
+ async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
133
+ const cm = this.runner.configManager;
134
+ if (!cm) return;
135
+
136
+ const mcpServers = cm.getMcpServerNames();
137
+ if (!mcpServers.includes('instance-sizer')) return;
138
+
139
+ // Resolve model name from answers or explicit config
140
+ const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
141
+ if (!modelName || modelName === 'Custom (enter manually)') return;
142
+
143
+ const smart = this.runner.options.smart === true;
144
+ const discover = this.runner.options.discover !== false;
145
+
146
+ const modeLabel = [smart && '[smart]', !discover && '[no-discover]'].filter(Boolean).join(' ');
147
+ console.log(` 🔍 Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
148
+
149
+ try {
150
+ const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
151
+ if (!fs.existsSync(mcpConfigPath)) return;
152
+
153
+ const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
154
+ const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
155
+ if (!serverConfig) return;
156
+
157
+ const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
158
+ const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
159
+
160
+ const serverArgs = [...resolveMcpArgs(serverConfig.args)];
161
+ if (!discover && !serverArgs.includes('--no-discover')) {
162
+ serverArgs.push('--no-discover');
163
+ }
164
+
165
+ const transport = new StdioClientTransport({
166
+ command: serverConfig.command,
167
+ args: serverArgs,
168
+ env: {
169
+ ...process.env,
170
+ ...(serverConfig.env || {}),
171
+ ...(smart ? { BEDROCK_SMART: 'true' } : {})
172
+ },
173
+ stderr: 'pipe'
174
+ });
175
+
176
+ const mcpClient = new Client(
177
+ { name: 'ml-container-creator', version: '1.0.0' },
178
+ { capabilities: {} }
179
+ );
180
+
181
+ await mcpClient.connect(transport);
182
+
183
+ const toolArgs = {
184
+ modelName,
185
+ limit: 10,
186
+ context: {
187
+ architecture: frameworkAnswers.architecture || undefined,
188
+ backend: frameworkAnswers.backend || undefined,
189
+ deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
190
+ profileEnvVars: sizerContext.profileEnvVars || undefined
191
+ }
192
+ };
193
+
194
+ // Add CUDA version from base image for filtering
195
+ if (sizerContext.cudaVersion) {
196
+ toolArgs.cudaVersion = sizerContext.cudaVersion;
197
+ }
198
+
199
+ // Add quantization if available from model format answers
200
+ if (modelFormatAnswers.quantization) {
201
+ toolArgs.quantization = modelFormatAnswers.quantization;
202
+ }
203
+
204
+ const result = await mcpClient.callTool({
205
+ name: 'get_instance_recommendation',
206
+ arguments: toolArgs
207
+ });
208
+
209
+ await mcpClient.close();
210
+
211
+ // Parse the response
212
+ const textBlock = result?.content?.find(b => b.type === 'text');
213
+ if (textBlock) {
214
+ const parsed = JSON.parse(textBlock.text);
215
+
216
+ if (parsed.choices?.instanceType?.length > 0) {
217
+ this.runner._instanceSizerMetadata = parsed.metadata || null;
218
+
219
+ // Build display labels with VRAM estimate and utilization percentage
220
+ const recommendations = parsed.metadata?.recommendations || [];
221
+ const estimatedVramGb = parsed.metadata?.estimatedVramGb;
222
+
223
+ // Store choices with display labels for the instance prompt
224
+ this.runner._mcpInstanceSizerChoices = parsed.choices.instanceType;
225
+ this.runner._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
226
+ name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB — ${rec.utilizationPercent || '?'}% utilization)`,
227
+ value: rec.instanceType,
228
+ short: rec.instanceType
229
+ }));
230
+
231
+ const choices = parsed.choices.instanceType;
232
+ const topRec = recommendations[0];
233
+ const vramInfo = estimatedVramGb
234
+ ? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
235
+ : '';
236
+
237
+ console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
238
+
239
+ // Warn if all instances had zero quota but were restored for visibility
240
+ if (parsed.metadata?.allFilteredByQuota) {
241
+ console.log(' ⚠️ All instances have zero quota — request a quota increase for your preferred type');
242
+ }
243
+
244
+ // Check if availability data is present (recommendations have capacityType)
245
+ const hasAvailabilityData = recommendations.some(r => r.capacityType);
246
+
247
+ if (hasAvailabilityData) {
248
+ // Group by capacityType for display
249
+ const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
250
+ const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
251
+
252
+ if (reserved.length > 0) {
253
+ console.log(' ── Reserved Capacity ──');
254
+ for (const rec of reserved) {
255
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
256
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
257
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
258
+ const tag = rec.capacityType === 'reserved'
259
+ ? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
260
+ : ` [FTP] ${rec.ftpInfo?.planName || ''}`;
261
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
262
+ }
263
+ }
264
+
265
+ if (onDemand.length > 0) {
266
+ console.log(' ── On-Demand ──');
267
+ for (const rec of onDemand) {
268
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
269
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
270
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
271
+ const deployed = rec.quotaDeployed;
272
+ const quota = rec.quotaLimit;
273
+ const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
274
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
275
+ }
276
+ }
277
+ } else {
278
+ // Fallback: display compact recommendation table (no availability data)
279
+ for (const rec of recommendations) {
280
+ const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
281
+ const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
282
+ const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
283
+ console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
284
+ }
285
+ }
286
+ } else if (parsed.metadata?.allFilteredByQuota) {
287
+ // All VRAM-compatible instances had zero quota
288
+ console.log(' ⚠️ No quota available for compatible instances. Request a quota increase.');
289
+ this.runner._instanceSizerMetadata = parsed.metadata || null;
290
+ } else if (parsed.metadata?.warning) {
291
+ console.log(` ⚠️ ${parsed.metadata.warning}`);
292
+ } else {
293
+ // Apply architecture heuristic fallback when sizer returns empty
294
+ const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
295
+ this.runner._architectureHeuristicDefault = this.runner._getArchitectureHeuristicDefault(archForHeuristic);
296
+ console.log(` ↳ No instance-sizer results, using heuristic default: ${this.runner._architectureHeuristicDefault}`);
297
+ }
298
+ }
299
+ } catch (err) {
300
+ // Sizer unavailable — apply architecture heuristic fallback
301
+ const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
302
+ this.runner._architectureHeuristicDefault = this.runner._getArchitectureHeuristicDefault(archForHeuristic);
303
+ console.log(` ⚠️ instance-sizer: ${err.message}`);
304
+ console.log(` ↳ Using heuristic default: ${this.runner._architectureHeuristicDefault}`);
305
+ }
306
+ }
307
+
308
+ /**
309
+ * Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
310
+ * Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
311
+ * Falls back to manual entry if the MCP server is not configured or fails.
312
+ * Requirements: 12.1, 12.2, 12.3
313
+ * @private
314
+ */
315
+ async _queryMcpForHyperPod(infraAnswers, explicitConfig) {
316
+ const cm = this.runner.configManager;
317
+ if (!cm) return;
318
+
319
+ const mcpServers = cm.getMcpServerNames();
320
+ if (!mcpServers.includes('hyperpod-cluster-picker')) return;
321
+
322
+ // Skip if cluster already provided via CLI/config
323
+ if (explicitConfig.hyperPodCluster) return;
324
+
325
+ const smart = this.runner.options.smart === true;
326
+ console.log(` 🔍 Querying hyperpod-cluster-picker${smart ? ' [smart]' : ''}...`);
327
+
328
+ const result = await cm.queryMcpServer('hyperpod-cluster-picker', {
329
+ ...infraAnswers
330
+ });
331
+
332
+ if (result && result.choices?.hyperPodCluster?.length > 0) {
333
+ const choices = result.choices.hyperPodCluster;
334
+ const preview = choices.length <= 5
335
+ ? choices.join(', ')
336
+ : `${choices.slice(0, 5).join(', ')} (+${choices.length - 5} more)`;
337
+ console.log(` ✓ ${choices.length} cluster(s): [${preview}]`);
338
+ } else {
339
+ // Surface any error message from the MCP server
340
+ if (result?.message) {
341
+ console.log(` ⚠️ ${result.message}`);
342
+ } else {
343
+ console.log(' ↳ No HyperPod clusters found via MCP, manual entry available');
344
+ }
345
+ }
346
+ }
347
+
348
+ /**
349
+ * Query the endpoint-picker MCP server for available InService real-time endpoints.
350
+ * Populates this.runner._mcpEndpointChoices for the existing endpoint selection prompt.
351
+ * Graceful fallback: if MCP server fails (no credentials, timeout), skip and create new endpoint.
352
+ * Requirements: 3.3, 4.3, 4.4
353
+ * @private
354
+ */
355
+ async _queryMcpForEndpoints(infraAnswers, explicitConfig) {
356
+ const cm = this.runner.configManager;
357
+ if (!cm) return;
358
+
359
+ const mcpServers = cm.getMcpServerNames();
360
+ if (!mcpServers.includes('endpoint-picker')) return;
361
+
362
+ // Skip if existing endpoint already provided via CLI/config
363
+ if (explicitConfig.existingEndpointName) return;
364
+
365
+ console.log(' 🔍 Querying endpoint-picker...');
366
+
367
+ try {
368
+ const result = await cm.queryMcpServer('endpoint-picker', {
369
+ awsRegion: infraAnswers.awsRegion,
370
+ deploymentTarget: 'realtime-inference'
371
+ });
372
+
373
+ if (result && result.choices?.endpointName?.length > 0) {
374
+ const endpointNames = result.choices.endpointName;
375
+ const metadata = result.metadata || {};
376
+
377
+ // Build choices with metadata annotations
378
+ this.runner._mcpEndpointChoices = endpointNames.map(name => {
379
+ const meta = metadata[name];
380
+ if (meta) {
381
+ const gpuInfo = meta.availableGpus === '?' ? 'GPUs: ?' : `${meta.availableGpus} GPUs free`;
382
+ return {
383
+ name: `${name} (${meta.instanceType}, ${gpuInfo}, ${meta.icCount} IC${meta.icCount !== 1 ? 's' : ''})`,
384
+ value: name
385
+ };
386
+ }
387
+ return { name, value: name };
388
+ });
389
+
390
+ console.log(` ✓ ${endpointNames.length} endpoint(s) with available capacity`);
391
+ } else {
392
+ if (result?.message) {
393
+ console.log(` ↳ ${result.message}`);
394
+ } else {
395
+ console.log(' ↳ No endpoints with available capacity found');
396
+ }
397
+ }
398
+ } catch (err) {
399
+ // Graceful fallback: if MCP server fails, skip and create new endpoint
400
+ console.log(` ⚠️ endpoint-picker: ${err.message || 'query failed'} — will create new endpoint`);
401
+ }
402
+ }
403
+
404
+ /**
405
+ * Query MCP base-image-picker server after deployment config is selected.
406
+ * Populates _mcpBaseImageChoices for the base image selection prompt.
407
+ * Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3
408
+ * @private
409
+ */
410
+ async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig) {
411
+ // Skip if base image provided via CLI --base-image flag
412
+ if (this.runner.options['base-image']) return;
413
+
414
+ const cm = this.runner.configManager;
415
+ if (!cm) return;
416
+
417
+ const mcpServers = cm.getMcpServerNames();
418
+ if (!mcpServers.includes('base-image-picker')) return;
419
+
420
+ const smart = this.runner.options.smart === true;
421
+ const discover = this.runner.options.discover !== false;
422
+ const framework = frameworkAnswers.framework;
423
+ const modelServer = frameworkAnswers.modelServer;
424
+ const architecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
425
+ const isTransformer = framework === 'transformers';
426
+ const isTriton = architecture === 'triton';
427
+ const isDiffusors = architecture === 'diffusors';
428
+
429
+ // For non-transformer, non-triton, non-diffusors frameworks, prompt for optional search criteria
430
+ let searchCriteria;
431
+ if (!isTransformer && !isTriton && !isDiffusors) {
432
+ const searchAnswer = await this.runner._runPrompts(baseImageSearchPrompts.map(p => ({
433
+ ...p,
434
+ when: () => true // Always show for non-transformer since we already checked
435
+ })));
436
+ searchCriteria = searchAnswer.baseImageSearch;
437
+ }
438
+
439
+ const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
440
+ console.log(` 🔍 Querying base-image-picker${modeLabel ? ` ${modeLabel}` : ''}...`);
441
+
442
+ const context = { framework, modelServer, architecture };
443
+ if (searchCriteria && searchCriteria.trim()) {
444
+ context.searchCriteria = searchCriteria.trim();
445
+ }
446
+
447
+ const result = await cm.queryMcpServer('base-image-picker', context);
448
+
449
+ if (result && result.metadata?.baseImage?.length > 0) {
450
+ const entries = result.metadata.baseImage;
451
+ this.runner._mcpBaseImageChoices = formatImageChoices(entries, isTransformer || isTriton || isDiffusors);
452
+ const count = entries.length;
453
+ console.log(` ✓ ${count} base image(s) available`);
454
+ } else {
455
+ console.log(' ↳ No MCP results, using default image');
456
+ }
457
+ }
458
+
459
+ /**
460
+ * Query model-picker MCP server catalog for model choices.
461
+ * Reads the architecture-specific catalog (popular-transformers.json or
462
+ * popular-diffusors.json) to populate the model selection prompt.
463
+ * @param {string} [architecture] - Current architecture ('transformers', 'diffusors', etc.)
464
+ * @private
465
+ */
466
+ _queryMcpForModels(architecture) {
467
+ const cm = this.runner.configManager;
468
+ if (!cm) return;
469
+
470
+ const mcpServers = cm.getMcpServerNames();
471
+ if (!mcpServers.includes('model-picker')) return;
472
+
473
+ try {
474
+ const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
475
+ if (!fs.existsSync(mcpConfigPath)) return;
476
+
477
+ const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
478
+ const serverConfig = mcpConfig.mcpServers?.['model-picker'];
479
+ if (!serverConfig?.args?.length) return;
480
+
481
+ // Resolve the server entry point directory from the args
482
+ const serverEntryPoint = serverConfig.args[serverConfig.args.length - 1];
483
+ const serverDir = path.dirname(serverEntryPoint);
484
+
485
+ // Read manifest to find catalog path
486
+ const manifestPath = path.join(serverDir, 'manifest.json');
487
+ if (!fs.existsSync(manifestPath)) return;
488
+
489
+ const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
490
+
491
+ // Select catalog based on architecture
492
+ const catalogKey = architecture === 'diffusors'
493
+ ? 'popular-diffusors'
494
+ : 'popular-transformers';
495
+ const catalogRelPath = manifest.catalogs?.[catalogKey];
496
+ if (!catalogRelPath) return;
497
+
498
+ const catalogPath = path.resolve(serverDir, catalogRelPath);
499
+ if (!fs.existsSync(catalogPath)) return;
500
+
501
+ const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
502
+
503
+ // Extract model IDs, filtering out glob patterns (entries with *)
504
+ const modelIds = Object.keys(catalog).filter(id => !id.includes('*'));
505
+
506
+ if (modelIds.length > 0) {
507
+ this.runner._mcpModelChoices = modelIds;
508
+ }
509
+ } catch {
510
+ // Silently fall back to hardcoded defaults
511
+ }
512
+ }
513
+
514
+ /**
515
+ * Fetch and display model information from HuggingFace API and Model Registry
516
+ * Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
517
+ * @private
518
+ */
519
+ async _fetchAndDisplayModelInfo(modelId) {
520
+ console.log('\n 🔍 Querying model-picker [discover]...');
521
+
522
+ const sources = [];
523
+ let chatTemplate = null;
524
+ let modelFamily = null;
525
+ let mcpUsed = false;
526
+
527
+ // Try model-picker MCP server in discover mode (queries HuggingFace + merges with catalog)
528
+ const cm = this.runner.configManager;
529
+ if (cm) {
530
+ const mcpServers = cm.getMcpServerNames();
531
+ if (mcpServers.includes('model-picker')) {
532
+ try {
533
+ const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
534
+ if (fs.existsSync(mcpConfigPath)) {
535
+ const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
536
+ const serverConfig = mcpConfig.mcpServers?.['model-picker'];
537
+ if (serverConfig) {
538
+ const { McpClient } = await import('./mcp-client.js');
539
+ const client = new McpClient(serverConfig, { timeout: 15000 });
540
+
541
+ // Override _buildContext to pass model_id and mode directly
542
+ client._getUnboundedParameterNames = () => [];
543
+ client._buildContext = () => ({});
544
+
545
+ // Connect and call get_models directly
546
+ const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
547
+ const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
548
+
549
+ const transport = new StdioClientTransport({
550
+ command: serverConfig.command,
551
+ args: resolveMcpArgs(serverConfig.args),
552
+ env: { ...process.env, ...(serverConfig.env || {}) },
553
+ stderr: 'pipe'
554
+ });
555
+
556
+ const mcpClient = new Client(
557
+ { name: 'ml-container-creator', version: '1.0.0' },
558
+ { capabilities: {} }
559
+ );
560
+
561
+ await mcpClient.connect(transport);
562
+
563
+ const result = await mcpClient.callTool({
564
+ name: 'get_models',
565
+ arguments: { model_id: modelId, mode: 'discover' }
566
+ });
567
+
568
+ await mcpClient.close();
569
+
570
+ // Parse the response
571
+ const textBlock = result?.content?.find(b => b.type === 'text');
572
+ if (textBlock) {
573
+ const parsed = JSON.parse(textBlock.text);
574
+ if (parsed.values && Object.keys(parsed.values).length > 0) {
575
+ mcpUsed = true;
576
+ const vals = parsed.values;
577
+
578
+ if (vals.chat_template) {
579
+ chatTemplate = vals.chat_template;
580
+ }
581
+ if (vals.family) {
582
+ modelFamily = vals.family;
583
+ }
584
+
585
+ // Extract model_type for architecture validation
586
+ // Requirements: 4.1
587
+ if (vals.model_type) {
588
+ this.runner._modelType = vals.model_type;
589
+ }
590
+
591
+ // Extract model source metadata for loading adapter
592
+ // Requirements: 2.1, 2.2, 2.3, 2.4
593
+ if (vals.provider) {
594
+ this.runner._mcpModelSource = vals.provider;
595
+ }
596
+ if (vals.artifactUri) {
597
+ this.runner._mcpArtifactUri = vals.artifactUri;
598
+ }
599
+
600
+ // Determine sources based on what was returned
601
+ if (vals.tags || vals.pipeline_tag) {
602
+ sources.push('HuggingFace_Hub_API');
603
+ }
604
+ if (vals.validation_level || vals.framework_compatibility) {
605
+ sources.push('Model_Picker_Catalog');
606
+ }
607
+ if (sources.length === 0) {
608
+ sources.push('model-picker');
609
+ }
610
+ console.log(` ✓ Resolved: ${modelId}`);
611
+ } else if (parsed.message) {
612
+ console.log(` ↳ ${parsed.message}`);
613
+ }
614
+ }
615
+ }
616
+ }
617
+ } catch (err) {
618
+ console.log(' ↳ model-picker unavailable, using fallback');
619
+ }
620
+ }
621
+ }
622
+
623
+ // Fallback to legacy path if MCP didn't resolve
624
+ if (!mcpUsed) {
625
+ const registryConfigManager = this.runner.registryConfigManager;
626
+ if (registryConfigManager) {
627
+ // Only try HuggingFace API for bare model IDs (not prefixed URIs)
628
+ const isNonHfUri = modelId.startsWith('s3://') ||
629
+ modelId.startsWith('registry://');
630
+
631
+ if (!isNonHfUri) {
632
+ // Try HuggingFace API directly
633
+ try {
634
+ const hfData = await registryConfigManager._fetchHuggingFaceData(modelId);
635
+ if (hfData) {
636
+ sources.push('HuggingFace_Hub_API');
637
+ if (hfData.chatTemplate) {
638
+ chatTemplate = hfData.chatTemplate;
639
+ }
640
+ // Extract model_type for architecture validation
641
+ // Requirements: 4.1
642
+ if (hfData.modelConfig?.model_type) {
643
+ this.runner._modelType = hfData.modelConfig.model_type;
644
+ }
645
+ console.log(' ✅ Found on HuggingFace Hub');
646
+ } else {
647
+ console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
648
+ }
649
+ } catch (error) {
650
+ console.log(' ⚠️ HuggingFace API unavailable');
651
+ }
652
+ } else {
653
+ // Non-HF URI (s3://, registry://, etc.) — skip HF lookup silently
654
+ }
655
+
656
+ // Check Model Registry for overrides
657
+ if (registryConfigManager.modelRegistry) {
658
+ let modelConfig = registryConfigManager.modelRegistry[modelId];
659
+
660
+ if (!modelConfig) {
661
+ for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
662
+ if (pattern.includes('*')) {
663
+ const regex = new RegExp(`^${ pattern.replace(/\*/g, '.*') }$`);
664
+ if (regex.test(modelId)) {
665
+ modelConfig = config;
666
+ console.log(` ✅ Matched pattern in Model_Registry: ${pattern}`);
667
+ break;
668
+ }
669
+ }
670
+ }
671
+ } else {
672
+ console.log(' ✅ Found in Model_Registry');
673
+ }
674
+
675
+ if (modelConfig) {
676
+ sources.push('Model_Registry');
677
+ if (modelConfig.chatTemplate) {
678
+ chatTemplate = modelConfig.chatTemplate;
679
+ }
680
+ if (modelConfig.family) {
681
+ modelFamily = modelConfig.family;
682
+ }
683
+ }
684
+ }
685
+ }
686
+ }
687
+
688
+ // Display information
689
+ if (sources.length > 0) {
690
+ console.log('\n📋 Model Information:');
691
+ console.log(` • Model ID: ${modelId}`);
692
+ if (modelFamily) {
693
+ console.log(` • Family: ${modelFamily}`);
694
+ }
695
+ if (chatTemplate) {
696
+ console.log(' • Chat Template: ✅ Available');
697
+ console.log(' (Will be injected into generated files)');
698
+ } else {
699
+ console.log(' • Chat Template: ❌ Not available');
700
+ console.log(' (Chat endpoints may require manual configuration)');
701
+ }
702
+ console.log(` • Sources: ${sources.join(', ')}`);
703
+ } else {
704
+ console.log(' ℹ️ No additional model information available');
705
+ console.log(' Proceeding with default configuration');
706
+ }
707
+ }
708
+
709
+ /**
710
+ * Validate and display instance type compatibility
711
+ * Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
712
+ * @private
713
+ */
714
+ async _validateAndDisplayInstanceType(instanceType, framework, version) {
715
+ const registryConfigManager = this.runner.registryConfigManager;
716
+
717
+ if (!registryConfigManager) {
718
+ return;
719
+ }
720
+
721
+ // Get framework configuration
722
+ const frameworkConfig = registryConfigManager.frameworkRegistry?.[framework]?.[version];
723
+ if (!frameworkConfig) {
724
+ return; // No framework config, skip validation
725
+ }
726
+
727
+ console.log(`\n🔍 Validating instance type: ${instanceType}`);
728
+
729
+ // Validate instance type
730
+ const validationResult = registryConfigManager.validateInstanceType(instanceType, frameworkConfig);
731
+
732
+ if (validationResult.compatible) {
733
+ console.log(' ✅ Instance type is compatible');
734
+ if (validationResult.info) {
735
+ console.log(` ℹ️ ${validationResult.info}`);
736
+ }
737
+ } else {
738
+ console.log(' ❌ Instance type compatibility issue detected');
739
+ if (validationResult.error) {
740
+ console.log(` Error: ${validationResult.error}`);
741
+ }
742
+ if (validationResult.recommendations && validationResult.recommendations.length > 0) {
743
+ console.log(` 💡 Recommended instances: ${validationResult.recommendations.join(', ')}`);
744
+ }
745
+
746
+ // In test mode or non-interactive mode, throw error instead of prompting
747
+ if (this.runner.options.skipPrompts || process.env.NODE_ENV === 'test') {
748
+ throw new Error('Instance type validation failed. Please select a compatible instance type.');
749
+ }
750
+
751
+ // Ask user if they want to proceed
752
+ const proceed = await this.runner._runPrompts([{
753
+ type: 'confirm',
754
+ name: 'proceedWithIncompatible',
755
+ message: 'Instance type may not be compatible. Proceed anyway?',
756
+ default: false
757
+ }]);
758
+
759
+ if (!proceed.proceedWithIncompatible) {
760
+ throw new Error('Instance type validation failed. Please select a compatible instance type.');
761
+ }
762
+ }
763
+
764
+ if (validationResult.warning) {
765
+ console.log(` ⚠️ Warning: ${validationResult.warning}`);
766
+ }
767
+ }
768
+ }