@aws/ml-container-creator 0.9.0 ā 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/config/parameter-schema-v2.json +2065 -0
- package/package.json +6 -3
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/models.json +182 -26
- package/src/app.js +6 -389
- package/src/lib/bootstrap-command-handler.js +75 -1078
- package/src/lib/bootstrap-profile-manager.js +634 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/config-loader.js +405 -0
- package/src/lib/config-manager.js +59 -1668
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +140 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +471 -0
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +70 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +398 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/config/parameter-schema.json +0 -88
|
@@ -0,0 +1,768 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* MCP Query Runner - Handles all MCP server queries for the prompt runner.
|
|
6
|
+
* Uses delegation pattern: receives parent PromptRunner reference to access shared state.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import fs from 'fs';
|
|
10
|
+
import path from 'path';
|
|
11
|
+
import { fileURLToPath } from 'node:url';
|
|
12
|
+
import {
|
|
13
|
+
baseImageSearchPrompts,
|
|
14
|
+
formatImageChoices
|
|
15
|
+
} from './prompts/index.js';
|
|
16
|
+
|
|
17
|
+
const __mcp_filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __mcp_dirname = path.dirname(__mcp_filename);
|
|
19
|
+
const GENERATOR_ROOT = path.resolve(__mcp_dirname, '..', '..');
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Resolve MCP server args ā converts relative paths to absolute using GENERATOR_ROOT.
|
|
23
|
+
* @param {string[]} args - The args array from mcp.json serverConfig
|
|
24
|
+
* @returns {string[]} Args with relative paths resolved
|
|
25
|
+
*/
|
|
26
|
+
function resolveMcpArgs(args) {
|
|
27
|
+
return (args || []).map(arg => {
|
|
28
|
+
if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
|
|
29
|
+
return path.resolve(GENERATOR_ROOT, arg);
|
|
30
|
+
}
|
|
31
|
+
return arg;
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export default class McpQueryRunner {
|
|
36
|
+
constructor(runner) {
|
|
37
|
+
this.runner = runner;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
async _queryMcpForRegion(frameworkAnswers, explicitConfig) {
|
|
41
|
+
const cm = this.runner.configManager;
|
|
42
|
+
if (!cm) return;
|
|
43
|
+
|
|
44
|
+
const mcpServers = cm.getMcpServerNames();
|
|
45
|
+
if (mcpServers.length === 0) return;
|
|
46
|
+
|
|
47
|
+
const smart = this.runner.options.smart === true;
|
|
48
|
+
|
|
49
|
+
// Region: skip MCP query if region was explicitly provided via CLI, config file, or bootstrap profile
|
|
50
|
+
const cliRegion = this.runner.options.region;
|
|
51
|
+
const bootstrapRegion = explicitConfig.awsRegion;
|
|
52
|
+
const skipRegionQuery = (cliRegion !== undefined && cliRegion !== null) ||
|
|
53
|
+
(bootstrapRegion !== undefined && bootstrapRegion !== null);
|
|
54
|
+
|
|
55
|
+
if (!skipRegionQuery && mcpServers.includes('region-picker')) {
|
|
56
|
+
const { regionSearch } = await this.runner._runPrompts([{
|
|
57
|
+
type: 'input',
|
|
58
|
+
name: 'regionSearch',
|
|
59
|
+
message: 'š Search for a region (e.g. "europe", "us west", "tokyo"):',
|
|
60
|
+
default: ''
|
|
61
|
+
}]);
|
|
62
|
+
|
|
63
|
+
if (regionSearch && regionSearch.trim()) {
|
|
64
|
+
console.log(` š Querying region-picker${smart ? ' [smart]' : ''}...`);
|
|
65
|
+
const result = await cm.queryMcpServer('region-picker', {
|
|
66
|
+
...frameworkAnswers,
|
|
67
|
+
regionSearch: regionSearch.trim()
|
|
68
|
+
});
|
|
69
|
+
if (result && result.choices?.awsRegion?.length > 0) {
|
|
70
|
+
const choices = result.choices.awsRegion;
|
|
71
|
+
const preview = choices.length <= 5
|
|
72
|
+
? choices.join(', ')
|
|
73
|
+
: `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
|
|
74
|
+
console.log(` ā ${choices.length} region(s): [${preview}]`);
|
|
75
|
+
} else {
|
|
76
|
+
console.log(' ā³ No MCP results, using static list');
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Query MCP instance-sizer server with tag-based search after deployment target is known.
|
|
84
|
+
* Used when no model name is available for VRAM-based sizing.
|
|
85
|
+
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
86
|
+
* @private
|
|
87
|
+
*/
|
|
88
|
+
async _queryMcpForInstance(frameworkAnswers, explicitConfig) {
|
|
89
|
+
const cm = this.runner.configManager;
|
|
90
|
+
if (!cm) return;
|
|
91
|
+
|
|
92
|
+
const mcpServers = cm.getMcpServerNames();
|
|
93
|
+
if (mcpServers.length === 0) return;
|
|
94
|
+
|
|
95
|
+
const smart = this.runner.options.smart === true;
|
|
96
|
+
|
|
97
|
+
// Instance type: query if not already provided via CLI/config
|
|
98
|
+
if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
|
|
99
|
+
const { instanceSearch } = await this.runner._runPrompts([{
|
|
100
|
+
type: 'input',
|
|
101
|
+
name: 'instanceSearch',
|
|
102
|
+
message: 'š Describe your instance needs (e.g. "multi-gpu", "cost-effective cpu"):',
|
|
103
|
+
default: frameworkAnswers.framework || ''
|
|
104
|
+
}]);
|
|
105
|
+
|
|
106
|
+
if (instanceSearch && instanceSearch.trim()) {
|
|
107
|
+
console.log(` š Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
|
|
108
|
+
const result = await cm.queryMcpServer('instance-sizer', {
|
|
109
|
+
...frameworkAnswers,
|
|
110
|
+
instanceSearch: instanceSearch.trim()
|
|
111
|
+
});
|
|
112
|
+
if (result && result.choices?.instanceType?.length > 0) {
|
|
113
|
+
const choices = result.choices.instanceType;
|
|
114
|
+
const preview = choices.length <= 5
|
|
115
|
+
? choices.join(', ')
|
|
116
|
+
: `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
|
|
117
|
+
console.log(` ā ${choices.length} instance(s): [${preview}]`);
|
|
118
|
+
} else {
|
|
119
|
+
console.log(' ā³ No MCP results, using static list');
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Query the instance-sizer MCP server after model is known.
|
|
127
|
+
* Estimates VRAM requirements and returns filtered, ranked instance recommendations.
|
|
128
|
+
* Stores results in this.runner._mcpInstanceSizerChoices and this.runner._instanceSizerMetadata.
|
|
129
|
+
* Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
|
|
130
|
+
* @private
|
|
131
|
+
*/
|
|
132
|
+
async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
|
|
133
|
+
const cm = this.runner.configManager;
|
|
134
|
+
if (!cm) return;
|
|
135
|
+
|
|
136
|
+
const mcpServers = cm.getMcpServerNames();
|
|
137
|
+
if (!mcpServers.includes('instance-sizer')) return;
|
|
138
|
+
|
|
139
|
+
// Resolve model name from answers or explicit config
|
|
140
|
+
const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
141
|
+
if (!modelName || modelName === 'Custom (enter manually)') return;
|
|
142
|
+
|
|
143
|
+
const smart = this.runner.options.smart === true;
|
|
144
|
+
const discover = this.runner.options.discover !== false;
|
|
145
|
+
|
|
146
|
+
const modeLabel = [smart && '[smart]', !discover && '[no-discover]'].filter(Boolean).join(' ');
|
|
147
|
+
console.log(` š Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
151
|
+
if (!fs.existsSync(mcpConfigPath)) return;
|
|
152
|
+
|
|
153
|
+
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
154
|
+
const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
|
|
155
|
+
if (!serverConfig) return;
|
|
156
|
+
|
|
157
|
+
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
158
|
+
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
159
|
+
|
|
160
|
+
const serverArgs = [...resolveMcpArgs(serverConfig.args)];
|
|
161
|
+
if (!discover && !serverArgs.includes('--no-discover')) {
|
|
162
|
+
serverArgs.push('--no-discover');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const transport = new StdioClientTransport({
|
|
166
|
+
command: serverConfig.command,
|
|
167
|
+
args: serverArgs,
|
|
168
|
+
env: {
|
|
169
|
+
...process.env,
|
|
170
|
+
...(serverConfig.env || {}),
|
|
171
|
+
...(smart ? { BEDROCK_SMART: 'true' } : {})
|
|
172
|
+
},
|
|
173
|
+
stderr: 'pipe'
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const mcpClient = new Client(
|
|
177
|
+
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
178
|
+
{ capabilities: {} }
|
|
179
|
+
);
|
|
180
|
+
|
|
181
|
+
await mcpClient.connect(transport);
|
|
182
|
+
|
|
183
|
+
const toolArgs = {
|
|
184
|
+
modelName,
|
|
185
|
+
limit: 10,
|
|
186
|
+
context: {
|
|
187
|
+
architecture: frameworkAnswers.architecture || undefined,
|
|
188
|
+
backend: frameworkAnswers.backend || undefined,
|
|
189
|
+
deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
|
|
190
|
+
profileEnvVars: sizerContext.profileEnvVars || undefined
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
// Add CUDA version from base image for filtering
|
|
195
|
+
if (sizerContext.cudaVersion) {
|
|
196
|
+
toolArgs.cudaVersion = sizerContext.cudaVersion;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// Add quantization if available from model format answers
|
|
200
|
+
if (modelFormatAnswers.quantization) {
|
|
201
|
+
toolArgs.quantization = modelFormatAnswers.quantization;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const result = await mcpClient.callTool({
|
|
205
|
+
name: 'get_instance_recommendation',
|
|
206
|
+
arguments: toolArgs
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
await mcpClient.close();
|
|
210
|
+
|
|
211
|
+
// Parse the response
|
|
212
|
+
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
213
|
+
if (textBlock) {
|
|
214
|
+
const parsed = JSON.parse(textBlock.text);
|
|
215
|
+
|
|
216
|
+
if (parsed.choices?.instanceType?.length > 0) {
|
|
217
|
+
this.runner._instanceSizerMetadata = parsed.metadata || null;
|
|
218
|
+
|
|
219
|
+
// Build display labels with VRAM estimate and utilization percentage
|
|
220
|
+
const recommendations = parsed.metadata?.recommendations || [];
|
|
221
|
+
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|
|
222
|
+
|
|
223
|
+
// Store choices with display labels for the instance prompt
|
|
224
|
+
this.runner._mcpInstanceSizerChoices = parsed.choices.instanceType;
|
|
225
|
+
this.runner._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
|
|
226
|
+
name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB ā ${rec.utilizationPercent || '?'}% utilization)`,
|
|
227
|
+
value: rec.instanceType,
|
|
228
|
+
short: rec.instanceType
|
|
229
|
+
}));
|
|
230
|
+
|
|
231
|
+
const choices = parsed.choices.instanceType;
|
|
232
|
+
const topRec = recommendations[0];
|
|
233
|
+
const vramInfo = estimatedVramGb
|
|
234
|
+
? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
|
|
235
|
+
: '';
|
|
236
|
+
|
|
237
|
+
console.log(` ā ${choices.length} compatible instance(s) found${vramInfo}`);
|
|
238
|
+
|
|
239
|
+
// Warn if all instances had zero quota but were restored for visibility
|
|
240
|
+
if (parsed.metadata?.allFilteredByQuota) {
|
|
241
|
+
console.log(' ā ļø All instances have zero quota ā request a quota increase for your preferred type');
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Check if availability data is present (recommendations have capacityType)
|
|
245
|
+
const hasAvailabilityData = recommendations.some(r => r.capacityType);
|
|
246
|
+
|
|
247
|
+
if (hasAvailabilityData) {
|
|
248
|
+
// Group by capacityType for display
|
|
249
|
+
const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
|
|
250
|
+
const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
|
|
251
|
+
|
|
252
|
+
if (reserved.length > 0) {
|
|
253
|
+
console.log(' āā Reserved Capacity āā');
|
|
254
|
+
for (const rec of reserved) {
|
|
255
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
256
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
257
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
258
|
+
const tag = rec.capacityType === 'reserved'
|
|
259
|
+
? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
|
|
260
|
+
: ` [FTP] ${rec.ftpInfo?.planName || ''}`;
|
|
261
|
+
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (onDemand.length > 0) {
|
|
266
|
+
console.log(' āā On-Demand āā');
|
|
267
|
+
for (const rec of onDemand) {
|
|
268
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
269
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
270
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
271
|
+
const deployed = rec.quotaDeployed;
|
|
272
|
+
const quota = rec.quotaLimit;
|
|
273
|
+
const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
|
|
274
|
+
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
} else {
|
|
278
|
+
// Fallback: display compact recommendation table (no availability data)
|
|
279
|
+
for (const rec of recommendations) {
|
|
280
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
281
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
282
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
283
|
+
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
} else if (parsed.metadata?.allFilteredByQuota) {
|
|
287
|
+
// All VRAM-compatible instances had zero quota
|
|
288
|
+
console.log(' ā ļø No quota available for compatible instances. Request a quota increase.');
|
|
289
|
+
this.runner._instanceSizerMetadata = parsed.metadata || null;
|
|
290
|
+
} else if (parsed.metadata?.warning) {
|
|
291
|
+
console.log(` ā ļø ${parsed.metadata.warning}`);
|
|
292
|
+
} else {
|
|
293
|
+
// Apply architecture heuristic fallback when sizer returns empty
|
|
294
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
295
|
+
this.runner._architectureHeuristicDefault = this.runner._getArchitectureHeuristicDefault(archForHeuristic);
|
|
296
|
+
console.log(` ā³ No instance-sizer results, using heuristic default: ${this.runner._architectureHeuristicDefault}`);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
} catch (err) {
|
|
300
|
+
// Sizer unavailable ā apply architecture heuristic fallback
|
|
301
|
+
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
302
|
+
this.runner._architectureHeuristicDefault = this.runner._getArchitectureHeuristicDefault(archForHeuristic);
|
|
303
|
+
console.log(` ā ļø instance-sizer: ${err.message}`);
|
|
304
|
+
console.log(` ā³ Using heuristic default: ${this.runner._architectureHeuristicDefault}`);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
|
|
310
|
+
* Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
|
|
311
|
+
* Falls back to manual entry if the MCP server is not configured or fails.
|
|
312
|
+
* Requirements: 12.1, 12.2, 12.3
|
|
313
|
+
* @private
|
|
314
|
+
*/
|
|
315
|
+
async _queryMcpForHyperPod(infraAnswers, explicitConfig) {
|
|
316
|
+
const cm = this.runner.configManager;
|
|
317
|
+
if (!cm) return;
|
|
318
|
+
|
|
319
|
+
const mcpServers = cm.getMcpServerNames();
|
|
320
|
+
if (!mcpServers.includes('hyperpod-cluster-picker')) return;
|
|
321
|
+
|
|
322
|
+
// Skip if cluster already provided via CLI/config
|
|
323
|
+
if (explicitConfig.hyperPodCluster) return;
|
|
324
|
+
|
|
325
|
+
const smart = this.runner.options.smart === true;
|
|
326
|
+
console.log(` š Querying hyperpod-cluster-picker${smart ? ' [smart]' : ''}...`);
|
|
327
|
+
|
|
328
|
+
const result = await cm.queryMcpServer('hyperpod-cluster-picker', {
|
|
329
|
+
...infraAnswers
|
|
330
|
+
});
|
|
331
|
+
|
|
332
|
+
if (result && result.choices?.hyperPodCluster?.length > 0) {
|
|
333
|
+
const choices = result.choices.hyperPodCluster;
|
|
334
|
+
const preview = choices.length <= 5
|
|
335
|
+
? choices.join(', ')
|
|
336
|
+
: `${choices.slice(0, 5).join(', ')} (+${choices.length - 5} more)`;
|
|
337
|
+
console.log(` ā ${choices.length} cluster(s): [${preview}]`);
|
|
338
|
+
} else {
|
|
339
|
+
// Surface any error message from the MCP server
|
|
340
|
+
if (result?.message) {
|
|
341
|
+
console.log(` ā ļø ${result.message}`);
|
|
342
|
+
} else {
|
|
343
|
+
console.log(' ā³ No HyperPod clusters found via MCP, manual entry available');
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Query the endpoint-picker MCP server for available InService real-time endpoints.
|
|
350
|
+
* Populates this.runner._mcpEndpointChoices for the existing endpoint selection prompt.
|
|
351
|
+
* Graceful fallback: if MCP server fails (no credentials, timeout), skip and create new endpoint.
|
|
352
|
+
* Requirements: 3.3, 4.3, 4.4
|
|
353
|
+
* @private
|
|
354
|
+
*/
|
|
355
|
+
async _queryMcpForEndpoints(infraAnswers, explicitConfig) {
|
|
356
|
+
const cm = this.runner.configManager;
|
|
357
|
+
if (!cm) return;
|
|
358
|
+
|
|
359
|
+
const mcpServers = cm.getMcpServerNames();
|
|
360
|
+
if (!mcpServers.includes('endpoint-picker')) return;
|
|
361
|
+
|
|
362
|
+
// Skip if existing endpoint already provided via CLI/config
|
|
363
|
+
if (explicitConfig.existingEndpointName) return;
|
|
364
|
+
|
|
365
|
+
console.log(' š Querying endpoint-picker...');
|
|
366
|
+
|
|
367
|
+
try {
|
|
368
|
+
const result = await cm.queryMcpServer('endpoint-picker', {
|
|
369
|
+
awsRegion: infraAnswers.awsRegion,
|
|
370
|
+
deploymentTarget: 'realtime-inference'
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
if (result && result.choices?.endpointName?.length > 0) {
|
|
374
|
+
const endpointNames = result.choices.endpointName;
|
|
375
|
+
const metadata = result.metadata || {};
|
|
376
|
+
|
|
377
|
+
// Build choices with metadata annotations
|
|
378
|
+
this.runner._mcpEndpointChoices = endpointNames.map(name => {
|
|
379
|
+
const meta = metadata[name];
|
|
380
|
+
if (meta) {
|
|
381
|
+
const gpuInfo = meta.availableGpus === '?' ? 'GPUs: ?' : `${meta.availableGpus} GPUs free`;
|
|
382
|
+
return {
|
|
383
|
+
name: `${name} (${meta.instanceType}, ${gpuInfo}, ${meta.icCount} IC${meta.icCount !== 1 ? 's' : ''})`,
|
|
384
|
+
value: name
|
|
385
|
+
};
|
|
386
|
+
}
|
|
387
|
+
return { name, value: name };
|
|
388
|
+
});
|
|
389
|
+
|
|
390
|
+
console.log(` ā ${endpointNames.length} endpoint(s) with available capacity`);
|
|
391
|
+
} else {
|
|
392
|
+
if (result?.message) {
|
|
393
|
+
console.log(` ā³ ${result.message}`);
|
|
394
|
+
} else {
|
|
395
|
+
console.log(' ā³ No endpoints with available capacity found');
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
} catch (err) {
|
|
399
|
+
// Graceful fallback: if MCP server fails, skip and create new endpoint
|
|
400
|
+
console.log(` ā ļø endpoint-picker: ${err.message || 'query failed'} ā will create new endpoint`);
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/**
|
|
405
|
+
* Query MCP base-image-picker server after deployment config is selected.
|
|
406
|
+
* Populates _mcpBaseImageChoices for the base image selection prompt.
|
|
407
|
+
* Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3
|
|
408
|
+
* @private
|
|
409
|
+
*/
|
|
410
|
+
async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig) {
|
|
411
|
+
// Skip if base image provided via CLI --base-image flag
|
|
412
|
+
if (this.runner.options['base-image']) return;
|
|
413
|
+
|
|
414
|
+
const cm = this.runner.configManager;
|
|
415
|
+
if (!cm) return;
|
|
416
|
+
|
|
417
|
+
const mcpServers = cm.getMcpServerNames();
|
|
418
|
+
if (!mcpServers.includes('base-image-picker')) return;
|
|
419
|
+
|
|
420
|
+
const smart = this.runner.options.smart === true;
|
|
421
|
+
const discover = this.runner.options.discover !== false;
|
|
422
|
+
const framework = frameworkAnswers.framework;
|
|
423
|
+
const modelServer = frameworkAnswers.modelServer;
|
|
424
|
+
const architecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
425
|
+
const isTransformer = framework === 'transformers';
|
|
426
|
+
const isTriton = architecture === 'triton';
|
|
427
|
+
const isDiffusors = architecture === 'diffusors';
|
|
428
|
+
|
|
429
|
+
// For non-transformer, non-triton, non-diffusors frameworks, prompt for optional search criteria
|
|
430
|
+
let searchCriteria;
|
|
431
|
+
if (!isTransformer && !isTriton && !isDiffusors) {
|
|
432
|
+
const searchAnswer = await this.runner._runPrompts(baseImageSearchPrompts.map(p => ({
|
|
433
|
+
...p,
|
|
434
|
+
when: () => true // Always show for non-transformer since we already checked
|
|
435
|
+
})));
|
|
436
|
+
searchCriteria = searchAnswer.baseImageSearch;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
|
|
440
|
+
console.log(` š Querying base-image-picker${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
441
|
+
|
|
442
|
+
const context = { framework, modelServer, architecture };
|
|
443
|
+
if (searchCriteria && searchCriteria.trim()) {
|
|
444
|
+
context.searchCriteria = searchCriteria.trim();
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
const result = await cm.queryMcpServer('base-image-picker', context);
|
|
448
|
+
|
|
449
|
+
if (result && result.metadata?.baseImage?.length > 0) {
|
|
450
|
+
const entries = result.metadata.baseImage;
|
|
451
|
+
this.runner._mcpBaseImageChoices = formatImageChoices(entries, isTransformer || isTriton || isDiffusors);
|
|
452
|
+
const count = entries.length;
|
|
453
|
+
console.log(` ā ${count} base image(s) available`);
|
|
454
|
+
} else {
|
|
455
|
+
console.log(' ā³ No MCP results, using default image');
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Query model-picker MCP server catalog for model choices.
|
|
461
|
+
* Reads the architecture-specific catalog (popular-transformers.json or
|
|
462
|
+
* popular-diffusors.json) to populate the model selection prompt.
|
|
463
|
+
* @param {string} [architecture] - Current architecture ('transformers', 'diffusors', etc.)
|
|
464
|
+
* @private
|
|
465
|
+
*/
|
|
466
|
+
_queryMcpForModels(architecture) {
|
|
467
|
+
const cm = this.runner.configManager;
|
|
468
|
+
if (!cm) return;
|
|
469
|
+
|
|
470
|
+
const mcpServers = cm.getMcpServerNames();
|
|
471
|
+
if (!mcpServers.includes('model-picker')) return;
|
|
472
|
+
|
|
473
|
+
try {
|
|
474
|
+
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
475
|
+
if (!fs.existsSync(mcpConfigPath)) return;
|
|
476
|
+
|
|
477
|
+
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
478
|
+
const serverConfig = mcpConfig.mcpServers?.['model-picker'];
|
|
479
|
+
if (!serverConfig?.args?.length) return;
|
|
480
|
+
|
|
481
|
+
// Resolve the server entry point directory from the args
|
|
482
|
+
const serverEntryPoint = serverConfig.args[serverConfig.args.length - 1];
|
|
483
|
+
const serverDir = path.dirname(serverEntryPoint);
|
|
484
|
+
|
|
485
|
+
// Read manifest to find catalog path
|
|
486
|
+
const manifestPath = path.join(serverDir, 'manifest.json');
|
|
487
|
+
if (!fs.existsSync(manifestPath)) return;
|
|
488
|
+
|
|
489
|
+
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
490
|
+
|
|
491
|
+
// Select catalog based on architecture
|
|
492
|
+
const catalogKey = architecture === 'diffusors'
|
|
493
|
+
? 'popular-diffusors'
|
|
494
|
+
: 'popular-transformers';
|
|
495
|
+
const catalogRelPath = manifest.catalogs?.[catalogKey];
|
|
496
|
+
if (!catalogRelPath) return;
|
|
497
|
+
|
|
498
|
+
const catalogPath = path.resolve(serverDir, catalogRelPath);
|
|
499
|
+
if (!fs.existsSync(catalogPath)) return;
|
|
500
|
+
|
|
501
|
+
const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
|
|
502
|
+
|
|
503
|
+
// Extract model IDs, filtering out glob patterns (entries with *)
|
|
504
|
+
const modelIds = Object.keys(catalog).filter(id => !id.includes('*'));
|
|
505
|
+
|
|
506
|
+
if (modelIds.length > 0) {
|
|
507
|
+
this.runner._mcpModelChoices = modelIds;
|
|
508
|
+
}
|
|
509
|
+
} catch {
|
|
510
|
+
// Silently fall back to hardcoded defaults
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
/**
|
|
515
|
+
* Fetch and display model information from HuggingFace API and Model Registry
|
|
516
|
+
* Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
|
|
517
|
+
* @private
|
|
518
|
+
*/
|
|
519
|
+
async _fetchAndDisplayModelInfo(modelId) {
|
|
520
|
+
console.log('\n š Querying model-picker [discover]...');
|
|
521
|
+
|
|
522
|
+
const sources = [];
|
|
523
|
+
let chatTemplate = null;
|
|
524
|
+
let modelFamily = null;
|
|
525
|
+
let mcpUsed = false;
|
|
526
|
+
|
|
527
|
+
// Try model-picker MCP server in discover mode (queries HuggingFace + merges with catalog)
|
|
528
|
+
const cm = this.runner.configManager;
|
|
529
|
+
if (cm) {
|
|
530
|
+
const mcpServers = cm.getMcpServerNames();
|
|
531
|
+
if (mcpServers.includes('model-picker')) {
|
|
532
|
+
try {
|
|
533
|
+
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
534
|
+
if (fs.existsSync(mcpConfigPath)) {
|
|
535
|
+
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
536
|
+
const serverConfig = mcpConfig.mcpServers?.['model-picker'];
|
|
537
|
+
if (serverConfig) {
|
|
538
|
+
const { McpClient } = await import('./mcp-client.js');
|
|
539
|
+
const client = new McpClient(serverConfig, { timeout: 15000 });
|
|
540
|
+
|
|
541
|
+
// Override _buildContext to pass model_id and mode directly
|
|
542
|
+
client._getUnboundedParameterNames = () => [];
|
|
543
|
+
client._buildContext = () => ({});
|
|
544
|
+
|
|
545
|
+
// Connect and call get_models directly
|
|
546
|
+
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
547
|
+
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
548
|
+
|
|
549
|
+
const transport = new StdioClientTransport({
|
|
550
|
+
command: serverConfig.command,
|
|
551
|
+
args: resolveMcpArgs(serverConfig.args),
|
|
552
|
+
env: { ...process.env, ...(serverConfig.env || {}) },
|
|
553
|
+
stderr: 'pipe'
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
const mcpClient = new Client(
|
|
557
|
+
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
558
|
+
{ capabilities: {} }
|
|
559
|
+
);
|
|
560
|
+
|
|
561
|
+
await mcpClient.connect(transport);
|
|
562
|
+
|
|
563
|
+
const result = await mcpClient.callTool({
|
|
564
|
+
name: 'get_models',
|
|
565
|
+
arguments: { model_id: modelId, mode: 'discover' }
|
|
566
|
+
});
|
|
567
|
+
|
|
568
|
+
await mcpClient.close();
|
|
569
|
+
|
|
570
|
+
// Parse the response
|
|
571
|
+
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
572
|
+
if (textBlock) {
|
|
573
|
+
const parsed = JSON.parse(textBlock.text);
|
|
574
|
+
if (parsed.values && Object.keys(parsed.values).length > 0) {
|
|
575
|
+
mcpUsed = true;
|
|
576
|
+
const vals = parsed.values;
|
|
577
|
+
|
|
578
|
+
if (vals.chat_template) {
|
|
579
|
+
chatTemplate = vals.chat_template;
|
|
580
|
+
}
|
|
581
|
+
if (vals.family) {
|
|
582
|
+
modelFamily = vals.family;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Extract model_type for architecture validation
|
|
586
|
+
// Requirements: 4.1
|
|
587
|
+
if (vals.model_type) {
|
|
588
|
+
this.runner._modelType = vals.model_type;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Extract model source metadata for loading adapter
|
|
592
|
+
// Requirements: 2.1, 2.2, 2.3, 2.4
|
|
593
|
+
if (vals.provider) {
|
|
594
|
+
this.runner._mcpModelSource = vals.provider;
|
|
595
|
+
}
|
|
596
|
+
if (vals.artifactUri) {
|
|
597
|
+
this.runner._mcpArtifactUri = vals.artifactUri;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// Determine sources based on what was returned
|
|
601
|
+
if (vals.tags || vals.pipeline_tag) {
|
|
602
|
+
sources.push('HuggingFace_Hub_API');
|
|
603
|
+
}
|
|
604
|
+
if (vals.validation_level || vals.framework_compatibility) {
|
|
605
|
+
sources.push('Model_Picker_Catalog');
|
|
606
|
+
}
|
|
607
|
+
if (sources.length === 0) {
|
|
608
|
+
sources.push('model-picker');
|
|
609
|
+
}
|
|
610
|
+
console.log(` ā Resolved: ${modelId}`);
|
|
611
|
+
} else if (parsed.message) {
|
|
612
|
+
console.log(` ā³ ${parsed.message}`);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
} catch (err) {
|
|
618
|
+
console.log(' ā³ model-picker unavailable, using fallback');
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// Fallback to legacy path if MCP didn't resolve
|
|
624
|
+
if (!mcpUsed) {
|
|
625
|
+
const registryConfigManager = this.runner.registryConfigManager;
|
|
626
|
+
if (registryConfigManager) {
|
|
627
|
+
// Only try HuggingFace API for bare model IDs (not prefixed URIs)
|
|
628
|
+
const isNonHfUri = modelId.startsWith('s3://') ||
|
|
629
|
+
modelId.startsWith('registry://');
|
|
630
|
+
|
|
631
|
+
if (!isNonHfUri) {
|
|
632
|
+
// Try HuggingFace API directly
|
|
633
|
+
try {
|
|
634
|
+
const hfData = await registryConfigManager._fetchHuggingFaceData(modelId);
|
|
635
|
+
if (hfData) {
|
|
636
|
+
sources.push('HuggingFace_Hub_API');
|
|
637
|
+
if (hfData.chatTemplate) {
|
|
638
|
+
chatTemplate = hfData.chatTemplate;
|
|
639
|
+
}
|
|
640
|
+
// Extract model_type for architecture validation
|
|
641
|
+
// Requirements: 4.1
|
|
642
|
+
if (hfData.modelConfig?.model_type) {
|
|
643
|
+
this.runner._modelType = hfData.modelConfig.model_type;
|
|
644
|
+
}
|
|
645
|
+
console.log(' ā
Found on HuggingFace Hub');
|
|
646
|
+
} else {
|
|
647
|
+
console.log(' ā¹ļø Not found on HuggingFace Hub (may be private or offline)');
|
|
648
|
+
}
|
|
649
|
+
} catch (error) {
|
|
650
|
+
console.log(' ā ļø HuggingFace API unavailable');
|
|
651
|
+
}
|
|
652
|
+
} else {
|
|
653
|
+
// Non-HF URI (s3://, registry://, etc.) ā skip HF lookup silently
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
// Check Model Registry for overrides
|
|
657
|
+
if (registryConfigManager.modelRegistry) {
|
|
658
|
+
let modelConfig = registryConfigManager.modelRegistry[modelId];
|
|
659
|
+
|
|
660
|
+
if (!modelConfig) {
|
|
661
|
+
for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
|
|
662
|
+
if (pattern.includes('*')) {
|
|
663
|
+
const regex = new RegExp(`^${ pattern.replace(/\*/g, '.*') }$`);
|
|
664
|
+
if (regex.test(modelId)) {
|
|
665
|
+
modelConfig = config;
|
|
666
|
+
console.log(` ā
Matched pattern in Model_Registry: ${pattern}`);
|
|
667
|
+
break;
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
} else {
|
|
672
|
+
console.log(' ā
Found in Model_Registry');
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
if (modelConfig) {
|
|
676
|
+
sources.push('Model_Registry');
|
|
677
|
+
if (modelConfig.chatTemplate) {
|
|
678
|
+
chatTemplate = modelConfig.chatTemplate;
|
|
679
|
+
}
|
|
680
|
+
if (modelConfig.family) {
|
|
681
|
+
modelFamily = modelConfig.family;
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// Display information
|
|
689
|
+
if (sources.length > 0) {
|
|
690
|
+
console.log('\nš Model Information:');
|
|
691
|
+
console.log(` ⢠Model ID: ${modelId}`);
|
|
692
|
+
if (modelFamily) {
|
|
693
|
+
console.log(` ⢠Family: ${modelFamily}`);
|
|
694
|
+
}
|
|
695
|
+
if (chatTemplate) {
|
|
696
|
+
console.log(' ⢠Chat Template: ā
Available');
|
|
697
|
+
console.log(' (Will be injected into generated files)');
|
|
698
|
+
} else {
|
|
699
|
+
console.log(' ⢠Chat Template: ā Not available');
|
|
700
|
+
console.log(' (Chat endpoints may require manual configuration)');
|
|
701
|
+
}
|
|
702
|
+
console.log(` ⢠Sources: ${sources.join(', ')}`);
|
|
703
|
+
} else {
|
|
704
|
+
console.log(' ā¹ļø No additional model information available');
|
|
705
|
+
console.log(' Proceeding with default configuration');
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Validate and display instance type compatibility
|
|
711
|
+
* Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
|
|
712
|
+
* @private
|
|
713
|
+
*/
|
|
714
|
+
async _validateAndDisplayInstanceType(instanceType, framework, version) {
|
|
715
|
+
const registryConfigManager = this.runner.registryConfigManager;
|
|
716
|
+
|
|
717
|
+
if (!registryConfigManager) {
|
|
718
|
+
return;
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// Get framework configuration
|
|
722
|
+
const frameworkConfig = registryConfigManager.frameworkRegistry?.[framework]?.[version];
|
|
723
|
+
if (!frameworkConfig) {
|
|
724
|
+
return; // No framework config, skip validation
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
console.log(`\nš Validating instance type: ${instanceType}`);
|
|
728
|
+
|
|
729
|
+
// Validate instance type
|
|
730
|
+
const validationResult = registryConfigManager.validateInstanceType(instanceType, frameworkConfig);
|
|
731
|
+
|
|
732
|
+
if (validationResult.compatible) {
|
|
733
|
+
console.log(' ā
Instance type is compatible');
|
|
734
|
+
if (validationResult.info) {
|
|
735
|
+
console.log(` ā¹ļø ${validationResult.info}`);
|
|
736
|
+
}
|
|
737
|
+
} else {
|
|
738
|
+
console.log(' ā Instance type compatibility issue detected');
|
|
739
|
+
if (validationResult.error) {
|
|
740
|
+
console.log(` Error: ${validationResult.error}`);
|
|
741
|
+
}
|
|
742
|
+
if (validationResult.recommendations && validationResult.recommendations.length > 0) {
|
|
743
|
+
console.log(` š” Recommended instances: ${validationResult.recommendations.join(', ')}`);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
// In test mode or non-interactive mode, throw error instead of prompting
|
|
747
|
+
if (this.runner.options.skipPrompts || process.env.NODE_ENV === 'test') {
|
|
748
|
+
throw new Error('Instance type validation failed. Please select a compatible instance type.');
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// Ask user if they want to proceed
|
|
752
|
+
const proceed = await this.runner._runPrompts([{
|
|
753
|
+
type: 'confirm',
|
|
754
|
+
name: 'proceedWithIncompatible',
|
|
755
|
+
message: 'Instance type may not be compatible. Proceed anyway?',
|
|
756
|
+
default: false
|
|
757
|
+
}]);
|
|
758
|
+
|
|
759
|
+
if (!proceed.proceedWithIncompatible) {
|
|
760
|
+
throw new Error('Instance type validation failed. Please select a compatible instance type.');
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
|
|
764
|
+
if (validationResult.warning) {
|
|
765
|
+
console.log(` ā ļø Warning: ${validationResult.warning}`);
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
}
|