@aws/ml-container-creator 0.9.1 ā 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
package/src/lib/prompt-runner.js
CHANGED
|
@@ -29,40 +29,25 @@ import {
|
|
|
29
29
|
infraBuildPrompts,
|
|
30
30
|
projectPrompts,
|
|
31
31
|
destinationPrompts,
|
|
32
|
-
baseImageSearchPrompts,
|
|
33
32
|
baseImagePrompts,
|
|
34
|
-
formatImageChoices,
|
|
35
33
|
filterByCudaGeneration,
|
|
36
34
|
instanceCatalogRaw
|
|
37
|
-
} from './prompts.js';
|
|
35
|
+
} from './prompts/index.js';
|
|
38
36
|
|
|
39
37
|
import fs from 'fs';
|
|
40
38
|
import path from 'path';
|
|
41
|
-
import { execSync } from 'node:child_process';
|
|
42
39
|
import { fileURLToPath } from 'node:url';
|
|
43
40
|
import RegistryLoader from './registry-loader.js';
|
|
44
41
|
import { runPrompts } from '../prompt-adapter.js';
|
|
45
|
-
import
|
|
46
|
-
import
|
|
47
|
-
import
|
|
42
|
+
import McpQueryRunner from './mcp-query-runner.js';
|
|
43
|
+
import SecretsPromptRunner from './secrets-prompt-runner.js';
|
|
44
|
+
import CudaResolver from './cuda-resolver.js';
|
|
45
|
+
import MarketplaceFlow from './marketplace-flow.js';
|
|
48
46
|
|
|
49
47
|
const __pr_filename = fileURLToPath(import.meta.url);
|
|
50
48
|
const __pr_dirname = path.dirname(__pr_filename);
|
|
51
49
|
const GENERATOR_ROOT = path.resolve(__pr_dirname, '..', '..');
|
|
52
50
|
|
|
53
|
-
/**
|
|
54
|
-
* Resolve MCP server args ā converts relative paths to absolute using GENERATOR_ROOT.
|
|
55
|
-
* @param {string[]} args - The args array from mcp.json serverConfig
|
|
56
|
-
* @returns {string[]} Args with relative paths resolved
|
|
57
|
-
*/
|
|
58
|
-
function resolveMcpArgs(args) {
|
|
59
|
-
return (args || []).map(arg => {
|
|
60
|
-
if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
|
|
61
|
-
return path.resolve(GENERATOR_ROOT, arg);
|
|
62
|
-
}
|
|
63
|
-
return arg;
|
|
64
|
-
});
|
|
65
|
-
}
|
|
66
51
|
|
|
67
52
|
export default class PromptRunner {
|
|
68
53
|
constructor({ configManager, options, registryConfigManager, baseConfig, promptFn }) {
|
|
@@ -71,8 +56,32 @@ export default class PromptRunner {
|
|
|
71
56
|
this.registryConfigManager = registryConfigManager || null;
|
|
72
57
|
this.baseConfig = baseConfig || {};
|
|
73
58
|
this._runPrompts = promptFn || runPrompts;
|
|
59
|
+
this.mcpQueryRunner = new McpQueryRunner(this);
|
|
60
|
+
this.secretsPromptRunner = new SecretsPromptRunner(this);
|
|
61
|
+
this.cudaResolver = new CudaResolver(this);
|
|
62
|
+
this.marketplaceFlow = new MarketplaceFlow(this);
|
|
74
63
|
}
|
|
75
64
|
|
|
65
|
+
// āā Sub-object delegations (backward compat for tests) āāāāāāāāāā
|
|
66
|
+
|
|
67
|
+
_queryMcpForBaseImage(...args) { return this.mcpQueryRunner._queryMcpForBaseImage(...args); }
|
|
68
|
+
_queryMcpForModels(...args) { return this.mcpQueryRunner._queryMcpForModels(...args); }
|
|
69
|
+
_queryMcpForRegion(...args) { return this.mcpQueryRunner._queryMcpForRegion(...args); }
|
|
70
|
+
_queryMcpForInstance(...args) { return this.mcpQueryRunner._queryMcpForInstance(...args); }
|
|
71
|
+
_queryMcpForInstanceSizing(...args) { return this.mcpQueryRunner._queryMcpForInstanceSizing(...args); }
|
|
72
|
+
_queryMcpForEndpoints(...args) { return this.mcpQueryRunner._queryMcpForEndpoints(...args); }
|
|
73
|
+
_queryMcpForHyperPod(...args) { return this.mcpQueryRunner._queryMcpForHyperPod(...args); }
|
|
74
|
+
_fetchAndDisplayModelInfo(...args) { return this.mcpQueryRunner._fetchAndDisplayModelInfo(...args); }
|
|
75
|
+
_validateAndDisplayInstanceType(...args) { return this.mcpQueryRunner._validateAndDisplayInstanceType(...args); }
|
|
76
|
+
_runSecretPrompts(...args) { return this.secretsPromptRunner._runSecretPrompts(...args); }
|
|
77
|
+
_secretStagesApply(...args) { return this.secretsPromptRunner._secretStagesApply(...args); }
|
|
78
|
+
_getArnConfigKey(...args) { return this.secretsPromptRunner._getArnConfigKey(...args); }
|
|
79
|
+
_getPlaintextConfigKey(...args) { return this.secretsPromptRunner._getPlaintextConfigKey(...args); }
|
|
80
|
+
_promptSecretSelection(...args) { return this.secretsPromptRunner._promptSecretSelection(...args); }
|
|
81
|
+
_promptPlaintextEntry(...args) { return this.secretsPromptRunner._promptPlaintextEntry(...args); }
|
|
82
|
+
_promptPlaintextFallback(...args) { return this.secretsPromptRunner._promptPlaintextFallback(...args); }
|
|
83
|
+
_promptCudaVersion(...args) { return this.cudaResolver._promptCudaVersion(...args); }
|
|
84
|
+
|
|
76
85
|
/**
|
|
77
86
|
* Runs all prompting phases and returns combined answers
|
|
78
87
|
*
|
|
@@ -131,7 +140,7 @@ export default class PromptRunner {
|
|
|
131
140
|
// Requirements: 2.3, 2.4, 2.5
|
|
132
141
|
// āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
|
|
133
142
|
if (frameworkAnswers.architecture === 'marketplace') {
|
|
134
|
-
return this._runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp);
|
|
143
|
+
return this.marketplaceFlow._runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp);
|
|
135
144
|
}
|
|
136
145
|
|
|
137
146
|
// Engine prompt for http architecture
|
|
@@ -141,7 +150,7 @@ export default class PromptRunner {
|
|
|
141
150
|
const tritonAutoFormat = this._getTritonAutoModelFormat(architecture, backend);
|
|
142
151
|
|
|
143
152
|
// Query model-picker MCP server for model choices
|
|
144
|
-
this._queryMcpForModels(frameworkAnswers.architecture);
|
|
153
|
+
this.mcpQueryRunner._queryMcpForModels(frameworkAnswers.architecture);
|
|
145
154
|
if (this._mcpModelChoices) {
|
|
146
155
|
console.log(' š Querying model-picker...');
|
|
147
156
|
console.log(` ā ${this._mcpModelChoices.length} model(s) available from catalog`);
|
|
@@ -171,7 +180,7 @@ export default class PromptRunner {
|
|
|
171
180
|
|
|
172
181
|
// Fetch model information from HuggingFace and Model Registry
|
|
173
182
|
if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
174
|
-
await this._fetchAndDisplayModelInfo(phase1ModelId);
|
|
183
|
+
await this.mcpQueryRunner._fetchAndDisplayModelInfo(phase1ModelId);
|
|
175
184
|
}
|
|
176
185
|
|
|
177
186
|
// āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
|
|
@@ -186,7 +195,7 @@ export default class PromptRunner {
|
|
|
186
195
|
const regionAndTargetAnswers = await this._runPhase(infraRegionAndTargetPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
|
|
187
196
|
|
|
188
197
|
// 2b. Query base-image-picker MCP server for base image choices
|
|
189
|
-
await this._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
198
|
+
await this.mcpQueryRunner._queryMcpForBaseImage(frameworkAnswers, explicitConfig);
|
|
190
199
|
const baseImagePreviousAnswers = {
|
|
191
200
|
...frameworkAnswers,
|
|
192
201
|
...engineAnswers,
|
|
@@ -211,7 +220,7 @@ export default class PromptRunner {
|
|
|
211
220
|
// āāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāāā
|
|
212
221
|
|
|
213
222
|
// 3a. Region query
|
|
214
|
-
await this._queryMcpForRegion(frameworkAnswers, explicitConfig);
|
|
223
|
+
await this.mcpQueryRunner._queryMcpForRegion(frameworkAnswers, explicitConfig);
|
|
215
224
|
|
|
216
225
|
// 3a2. Existing endpoint prompt (only for realtime-inference)
|
|
217
226
|
// Requirements: 3.3, 4.3, 4.4 ā endpoint-picker MCP query
|
|
@@ -219,7 +228,7 @@ export default class PromptRunner {
|
|
|
219
228
|
if (regionAndTargetAnswers.deploymentTarget === 'realtime-inference') {
|
|
220
229
|
// Query endpoint-picker MCP server for available endpoints
|
|
221
230
|
const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
|
|
222
|
-
await this._queryMcpForEndpoints({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
231
|
+
await this.mcpQueryRunner._queryMcpForEndpoints({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
223
232
|
|
|
224
233
|
const endpointPreviousAnswers = {
|
|
225
234
|
...regionAndTargetAnswers,
|
|
@@ -261,13 +270,13 @@ export default class PromptRunner {
|
|
|
261
270
|
this._architectureHeuristicDefault = 'ml.m5.large';
|
|
262
271
|
} else if (phase1ModelId && phase1ModelId !== 'Custom (enter manually)') {
|
|
263
272
|
// Query instance-sizer with full context
|
|
264
|
-
await this._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
|
|
273
|
+
await this.mcpQueryRunner._queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, {
|
|
265
274
|
cudaVersion: selectedBaseImageCuda,
|
|
266
275
|
profileEnvVars: this._selectedProfileEnvVars || {}
|
|
267
276
|
});
|
|
268
277
|
} else {
|
|
269
278
|
// No model known ā use architecture heuristic
|
|
270
|
-
await this._queryMcpForInstance(frameworkAnswers, explicitConfig);
|
|
279
|
+
await this.mcpQueryRunner._queryMcpForInstance(frameworkAnswers, explicitConfig);
|
|
271
280
|
}
|
|
272
281
|
}
|
|
273
282
|
|
|
@@ -419,7 +428,7 @@ export default class PromptRunner {
|
|
|
419
428
|
|
|
420
429
|
// 3e. CUDA/AMI auto-resolution
|
|
421
430
|
const instanceType = instanceAnswers.customInstanceType || instanceAnswers.instanceType;
|
|
422
|
-
const cudaAnswer = await this._promptCudaVersion(
|
|
431
|
+
const cudaAnswer = await this.cudaResolver._promptCudaVersion(
|
|
423
432
|
instanceType,
|
|
424
433
|
frameworkAnswers.framework,
|
|
425
434
|
null, // frameworkVersion not yet known in Phase 3
|
|
@@ -430,7 +439,7 @@ export default class PromptRunner {
|
|
|
430
439
|
let hyperPodAnswers = {};
|
|
431
440
|
if (regionAndTargetAnswers.deploymentTarget === 'hyperpod-eks') {
|
|
432
441
|
const resolvedRegion = regionAndTargetAnswers.customAwsRegion || regionAndTargetAnswers.awsRegion;
|
|
433
|
-
await this._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
442
|
+
await this.mcpQueryRunner._queryMcpForHyperPod({ ...regionAndTargetAnswers, awsRegion: resolvedRegion }, explicitConfig);
|
|
434
443
|
hyperPodAnswers = await this._runPhase(infraHyperPodPrompts, { ...regionAndTargetAnswers }, explicitConfig, existingConfig);
|
|
435
444
|
}
|
|
436
445
|
|
|
@@ -507,7 +516,7 @@ export default class PromptRunner {
|
|
|
507
516
|
|
|
508
517
|
// Secret prompts ā registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
|
|
509
518
|
const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
|
|
510
|
-
const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
|
|
519
|
+
const secretAnswers = await this.secretsPromptRunner._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
|
|
511
520
|
const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
|
|
512
521
|
const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
|
|
513
522
|
|
|
@@ -548,7 +557,7 @@ export default class PromptRunner {
|
|
|
548
557
|
// Validate instance type against framework requirements (now that framework version is known)
|
|
549
558
|
const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
550
559
|
if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
551
|
-
await this._validateAndDisplayInstanceType(
|
|
560
|
+
await this.mcpQueryRunner._validateAndDisplayInstanceType(
|
|
552
561
|
finalInstanceType,
|
|
553
562
|
frameworkAnswers.framework,
|
|
554
563
|
frameworkVersionAnswers.frameworkVersion
|
|
@@ -756,264 +765,6 @@ export default class PromptRunner {
|
|
|
756
765
|
return combinedAnswers;
|
|
757
766
|
}
|
|
758
767
|
|
|
759
|
-
/**
|
|
760
|
-
* Marketplace-specific prompt flow.
|
|
761
|
-
* Skips all container-related prompts (framework, model server, base image, CUDA version)
|
|
762
|
-
* and prompts only for: model package ARN, instance type, deployment target, region.
|
|
763
|
-
*
|
|
764
|
-
* Requirements: 2.3, 2.4, 2.5
|
|
765
|
-
* @private
|
|
766
|
-
*/
|
|
767
|
-
async _runMarketplaceFlow(frameworkAnswers, explicitConfig, existingConfig, buildTimestamp) {
|
|
768
|
-
console.log('\nšŖ Marketplace Model Package Configuration');
|
|
769
|
-
|
|
770
|
-
// Query marketplace-picker MCP server for subscription discovery
|
|
771
|
-
// Requirements: 2.4, 6.1, 6.2
|
|
772
|
-
let mcpSubscriptions = [];
|
|
773
|
-
const cm = this.configManager;
|
|
774
|
-
if (cm && cm.getMcpServerNames && cm.getMcpServerNames().includes('marketplace-picker')) {
|
|
775
|
-
try {
|
|
776
|
-
console.log(' š Querying marketplace-picker for subscriptions...');
|
|
777
|
-
const result = await cm.queryMcpServer('marketplace-picker', {
|
|
778
|
-
region: explicitConfig.awsRegion || existingConfig.awsRegion || process.env.AWS_REGION || 'us-east-1'
|
|
779
|
-
});
|
|
780
|
-
if (result && result.metadata?.subscriptions?.length > 0) {
|
|
781
|
-
mcpSubscriptions = result.metadata.subscriptions;
|
|
782
|
-
console.log(` ā
Found ${mcpSubscriptions.length} Marketplace subscription(s)`);
|
|
783
|
-
} else {
|
|
784
|
-
console.log(' ā¹ļø No Marketplace subscriptions found ā enter ARN manually');
|
|
785
|
-
}
|
|
786
|
-
} catch (err) {
|
|
787
|
-
console.log(` ā ļø marketplace-picker unavailable: ${err.message}`);
|
|
788
|
-
console.log(' Falling back to manual ARN entry');
|
|
789
|
-
}
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
// Marketplace-specific prompts: model package ARN
|
|
793
|
-
const marketplacePrompts = [
|
|
794
|
-
{
|
|
795
|
-
type: mcpSubscriptions.length > 0 ? 'list' : 'input',
|
|
796
|
-
name: 'modelPackageArn',
|
|
797
|
-
message: mcpSubscriptions.length > 0
|
|
798
|
-
? 'Select a Marketplace model package:'
|
|
799
|
-
: 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
|
|
800
|
-
...(mcpSubscriptions.length > 0 ? {
|
|
801
|
-
choices: [
|
|
802
|
-
...mcpSubscriptions.map(sub => ({
|
|
803
|
-
name: `${sub.modelName} (${sub.vendor}) ā ${sub.arn}`,
|
|
804
|
-
value: sub.arn,
|
|
805
|
-
short: sub.modelName
|
|
806
|
-
})),
|
|
807
|
-
{ type: 'separator', separator: 'āāāāāāāāāāāāāā' },
|
|
808
|
-
{ name: 'Enter ARN manually...', value: '__manual__', short: 'manual' }
|
|
809
|
-
]
|
|
810
|
-
} : {
|
|
811
|
-
validate: (input) => {
|
|
812
|
-
if (!input || input.trim() === '') {
|
|
813
|
-
return 'Model package ARN is required';
|
|
814
|
-
}
|
|
815
|
-
const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
|
|
816
|
-
if (!arnPattern.test(input.trim())) {
|
|
817
|
-
return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
|
|
818
|
-
}
|
|
819
|
-
return true;
|
|
820
|
-
}
|
|
821
|
-
})
|
|
822
|
-
},
|
|
823
|
-
{
|
|
824
|
-
type: 'input',
|
|
825
|
-
name: 'modelPackageArnManual',
|
|
826
|
-
message: 'Model package ARN (arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>):',
|
|
827
|
-
when: (answers) => answers.modelPackageArn === '__manual__',
|
|
828
|
-
validate: (input) => {
|
|
829
|
-
if (!input || input.trim() === '') {
|
|
830
|
-
return 'Model package ARN is required';
|
|
831
|
-
}
|
|
832
|
-
const arnPattern = /^arn:aws:sagemaker:[a-z0-9-]+:\d{12}:model-package\/[\w-]+\/\d+$/;
|
|
833
|
-
if (!arnPattern.test(input.trim())) {
|
|
834
|
-
return 'Invalid ARN format. Expected: arn:aws:sagemaker:<region>:<account>:model-package/<name>/<version>';
|
|
835
|
-
}
|
|
836
|
-
return true;
|
|
837
|
-
}
|
|
838
|
-
}
|
|
839
|
-
];
|
|
840
|
-
const marketplaceAnswers = await this._runPhase(marketplacePrompts, { ...frameworkAnswers }, explicitConfig, existingConfig);
|
|
841
|
-
|
|
842
|
-
// Handle manual ARN entry fallback
|
|
843
|
-
if (marketplaceAnswers.modelPackageArn === '__manual__' && marketplaceAnswers.modelPackageArnManual) {
|
|
844
|
-
marketplaceAnswers.modelPackageArn = marketplaceAnswers.modelPackageArnManual;
|
|
845
|
-
delete marketplaceAnswers.modelPackageArnManual;
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
// Infrastructure prompts: region, deployment target, instance type
|
|
849
|
-
console.log('\nšŖ Infrastructure & Deployment');
|
|
850
|
-
const bootstrapRegion = existingConfig.awsRegion || explicitConfig.awsRegion;
|
|
851
|
-
const regionPreviousAnswers = bootstrapRegion ? { _bootstrapRegion: bootstrapRegion } : {};
|
|
852
|
-
|
|
853
|
-
// Marketplace deployment targets (no HyperPod ā vendor controls the container)
|
|
854
|
-
const marketplaceInfraPrompts = [
|
|
855
|
-
{
|
|
856
|
-
type: 'list',
|
|
857
|
-
name: 'awsRegion',
|
|
858
|
-
message: 'Target AWS region?',
|
|
859
|
-
choices: (answers) => {
|
|
860
|
-
const bootstrapReg = answers._bootstrapRegion;
|
|
861
|
-
const choices = ['us-east-1'];
|
|
862
|
-
if (bootstrapReg && bootstrapReg !== 'us-east-1') {
|
|
863
|
-
choices.unshift({ name: `${bootstrapReg} (from bootstrap profile)`, value: bootstrapReg });
|
|
864
|
-
}
|
|
865
|
-
choices.push({ name: 'Custom...', value: 'custom' });
|
|
866
|
-
return choices;
|
|
867
|
-
},
|
|
868
|
-
default: (answers) => answers._bootstrapRegion || 'us-east-1'
|
|
869
|
-
},
|
|
870
|
-
{
|
|
871
|
-
type: 'input',
|
|
872
|
-
name: 'customAwsRegion',
|
|
873
|
-
message: 'Enter AWS region (e.g., us-west-2, eu-west-1):',
|
|
874
|
-
when: answers => answers.awsRegion === 'custom'
|
|
875
|
-
},
|
|
876
|
-
{
|
|
877
|
-
type: 'list',
|
|
878
|
-
name: 'deploymentTarget',
|
|
879
|
-
message: 'Deployment target?',
|
|
880
|
-
choices: [
|
|
881
|
-
{ name: 'SageMaker Real-Time Inference', value: 'realtime-inference' },
|
|
882
|
-
{ name: 'SageMaker Async Inference', value: 'async-inference' },
|
|
883
|
-
{ name: 'SageMaker Batch Transform', value: 'batch-transform' }
|
|
884
|
-
],
|
|
885
|
-
default: 'realtime-inference'
|
|
886
|
-
},
|
|
887
|
-
{
|
|
888
|
-
type: 'list',
|
|
889
|
-
name: 'instanceType',
|
|
890
|
-
message: 'Instance type for deployment?',
|
|
891
|
-
choices: [
|
|
892
|
-
{ name: 'ml.g5.xlarge (1 GPU, 24GB)', value: 'ml.g5.xlarge' },
|
|
893
|
-
{ name: 'ml.g5.2xlarge (1 GPU, 24GB)', value: 'ml.g5.2xlarge' },
|
|
894
|
-
{ name: 'ml.g5.4xlarge (1 GPU, 24GB)', value: 'ml.g5.4xlarge' },
|
|
895
|
-
{ name: 'ml.g5.12xlarge (4 GPUs, 96GB)', value: 'ml.g5.12xlarge' },
|
|
896
|
-
{ name: 'ml.p3.2xlarge (1 GPU, 16GB V100)', value: 'ml.p3.2xlarge' },
|
|
897
|
-
{ name: 'ml.m5.xlarge (CPU, 16GB)', value: 'ml.m5.xlarge' },
|
|
898
|
-
{ name: 'Custom...', value: 'custom' }
|
|
899
|
-
],
|
|
900
|
-
default: 'ml.g5.xlarge'
|
|
901
|
-
},
|
|
902
|
-
{
|
|
903
|
-
type: 'input',
|
|
904
|
-
name: 'customInstanceType',
|
|
905
|
-
message: 'Enter instance type (e.g., ml.g5.xlarge):',
|
|
906
|
-
validate: (input) => {
|
|
907
|
-
if (!input || input.trim() === '') {
|
|
908
|
-
return 'Instance type is required';
|
|
909
|
-
}
|
|
910
|
-
if (!input.startsWith('ml.')) {
|
|
911
|
-
return 'Instance type must start with "ml." (e.g., ml.g5.xlarge)';
|
|
912
|
-
}
|
|
913
|
-
return true;
|
|
914
|
-
},
|
|
915
|
-
when: answers => answers.instanceType === 'custom'
|
|
916
|
-
}
|
|
917
|
-
];
|
|
918
|
-
const infraAnswers = await this._runPhase(marketplaceInfraPrompts, { ...frameworkAnswers, ...regionPreviousAnswers }, explicitConfig, existingConfig);
|
|
919
|
-
|
|
920
|
-
// Async-specific prompts (only when deploymentTarget === 'async-inference')
|
|
921
|
-
let asyncAnswers = {};
|
|
922
|
-
if (infraAnswers.deploymentTarget === 'async-inference') {
|
|
923
|
-
asyncAnswers = await this._runPhase(infraAsyncPrompts, { ...infraAnswers }, explicitConfig, existingConfig);
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
// Batch transform-specific prompts (only when deploymentTarget === 'batch-transform')
|
|
927
|
-
let batchTransformAnswers = {};
|
|
928
|
-
if (infraAnswers.deploymentTarget === 'batch-transform') {
|
|
929
|
-
batchTransformAnswers = await this._runPhase(
|
|
930
|
-
infraBatchTransformPrompts,
|
|
931
|
-
{ ...infraAnswers },
|
|
932
|
-
explicitConfig,
|
|
933
|
-
existingConfig
|
|
934
|
-
);
|
|
935
|
-
}
|
|
936
|
-
|
|
937
|
-
// Role ARN prompt (always needed for marketplace deploy)
|
|
938
|
-
const rolePrompts = [
|
|
939
|
-
{
|
|
940
|
-
type: 'input',
|
|
941
|
-
name: 'awsRoleArn',
|
|
942
|
-
message: 'AWS IAM Role ARN for SageMaker execution (optional)?',
|
|
943
|
-
validate: (input) => {
|
|
944
|
-
if (!input || input.trim() === '') {
|
|
945
|
-
return true;
|
|
946
|
-
}
|
|
947
|
-
const arnPattern = /^arn:aws:iam::\d{12}:role\/[\w+=,.@-]+$/;
|
|
948
|
-
if (!arnPattern.test(input)) {
|
|
949
|
-
return 'Invalid ARN format. Expected: arn:aws:iam::123456789012:role/RoleName';
|
|
950
|
-
}
|
|
951
|
-
return true;
|
|
952
|
-
}
|
|
953
|
-
}
|
|
954
|
-
];
|
|
955
|
-
const roleAnswers = await this._runPhase(rolePrompts, { ...infraAnswers }, explicitConfig, existingConfig);
|
|
956
|
-
|
|
957
|
-
// Project name + destination
|
|
958
|
-
console.log('\nš Project Configuration');
|
|
959
|
-
const allTechnicalAnswers = {
|
|
960
|
-
...frameworkAnswers,
|
|
961
|
-
...marketplaceAnswers,
|
|
962
|
-
...infraAnswers,
|
|
963
|
-
...asyncAnswers,
|
|
964
|
-
...batchTransformAnswers,
|
|
965
|
-
...roleAnswers
|
|
966
|
-
};
|
|
967
|
-
const projectAnswers = await this._runPhase(projectPrompts, allTechnicalAnswers, explicitConfig, existingConfig);
|
|
968
|
-
const destinationAnswers = await this._runPhase(destinationPrompts,
|
|
969
|
-
{ ...allTechnicalAnswers, ...projectAnswers }, explicitConfig, existingConfig);
|
|
970
|
-
|
|
971
|
-
// Combine all marketplace answers
|
|
972
|
-
const combinedAnswers = {
|
|
973
|
-
...frameworkAnswers,
|
|
974
|
-
...marketplaceAnswers,
|
|
975
|
-
...infraAnswers,
|
|
976
|
-
...asyncAnswers,
|
|
977
|
-
...batchTransformAnswers,
|
|
978
|
-
...roleAnswers,
|
|
979
|
-
...projectAnswers,
|
|
980
|
-
...destinationAnswers,
|
|
981
|
-
buildTimestamp
|
|
982
|
-
};
|
|
983
|
-
|
|
984
|
-
// Handle custom instance type
|
|
985
|
-
if (combinedAnswers.customInstanceType) {
|
|
986
|
-
combinedAnswers.instanceType = combinedAnswers.customInstanceType;
|
|
987
|
-
delete combinedAnswers.customInstanceType;
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
// Handle custom AWS region
|
|
991
|
-
if (combinedAnswers.customAwsRegion) {
|
|
992
|
-
combinedAnswers.awsRegion = combinedAnswers.customAwsRegion;
|
|
993
|
-
delete combinedAnswers.customAwsRegion;
|
|
994
|
-
}
|
|
995
|
-
|
|
996
|
-
// Map awsRoleArn to roleArn for templates
|
|
997
|
-
if (combinedAnswers.awsRoleArn) {
|
|
998
|
-
combinedAnswers.roleArn = combinedAnswers.awsRoleArn;
|
|
999
|
-
delete combinedAnswers.awsRoleArn;
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
// Ensure CLI-provided values are in combinedAnswers
|
|
1003
|
-
if (explicitConfig.modelPackageArn && !combinedAnswers.modelPackageArn) {
|
|
1004
|
-
combinedAnswers.modelPackageArn = explicitConfig.modelPackageArn;
|
|
1005
|
-
}
|
|
1006
|
-
|
|
1007
|
-
// Handle marketplace:// prefix from --model-name CLI option
|
|
1008
|
-
const modelName = explicitConfig.modelName || combinedAnswers.modelName;
|
|
1009
|
-
if (modelName && modelName.startsWith('marketplace://')) {
|
|
1010
|
-
const arn = modelName.replace(/^marketplace:\/\//, '');
|
|
1011
|
-
combinedAnswers.modelPackageArn = arn;
|
|
1012
|
-
delete combinedAnswers.modelName;
|
|
1013
|
-
}
|
|
1014
|
-
|
|
1015
|
-
return combinedAnswers;
|
|
1016
|
-
}
|
|
1017
768
|
|
|
1018
769
|
/**
|
|
1019
770
|
* Checks if a parameter is promptable according to the parameter matrix
|
|
@@ -1267,491 +1018,6 @@ export default class PromptRunner {
|
|
|
1267
1018
|
return Object.hasOwn(HEURISTIC_DEFAULTS, architecture) ? HEURISTIC_DEFAULTS[architecture] : 'ml.g5.xlarge';
|
|
1268
1019
|
}
|
|
1269
1020
|
|
|
1270
|
-
/**
|
|
1271
|
-
* Query MCP region-picker server before infrastructure prompts.
|
|
1272
|
-
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
1273
|
-
* @private
|
|
1274
|
-
*/
|
|
1275
|
-
async _queryMcpForRegion(frameworkAnswers, explicitConfig) {
|
|
1276
|
-
const cm = this.configManager;
|
|
1277
|
-
if (!cm) return;
|
|
1278
|
-
|
|
1279
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1280
|
-
if (mcpServers.length === 0) return;
|
|
1281
|
-
|
|
1282
|
-
const smart = this.options.smart === true;
|
|
1283
|
-
|
|
1284
|
-
// Region: skip MCP query if region was explicitly provided via CLI, config file, or bootstrap profile
|
|
1285
|
-
const cliRegion = this.options.region;
|
|
1286
|
-
const bootstrapRegion = explicitConfig.awsRegion;
|
|
1287
|
-
const skipRegionQuery = (cliRegion !== undefined && cliRegion !== null) ||
|
|
1288
|
-
(bootstrapRegion !== undefined && bootstrapRegion !== null);
|
|
1289
|
-
|
|
1290
|
-
if (!skipRegionQuery && mcpServers.includes('region-picker')) {
|
|
1291
|
-
const { regionSearch } = await this._runPrompts([{
|
|
1292
|
-
type: 'input',
|
|
1293
|
-
name: 'regionSearch',
|
|
1294
|
-
message: 'š Search for a region (e.g. "europe", "us west", "tokyo"):',
|
|
1295
|
-
default: ''
|
|
1296
|
-
}]);
|
|
1297
|
-
|
|
1298
|
-
if (regionSearch && regionSearch.trim()) {
|
|
1299
|
-
console.log(` š Querying region-picker${smart ? ' [smart]' : ''}...`);
|
|
1300
|
-
const result = await cm.queryMcpServer('region-picker', {
|
|
1301
|
-
...frameworkAnswers,
|
|
1302
|
-
regionSearch: regionSearch.trim()
|
|
1303
|
-
});
|
|
1304
|
-
if (result && result.choices?.awsRegion?.length > 0) {
|
|
1305
|
-
const choices = result.choices.awsRegion;
|
|
1306
|
-
const preview = choices.length <= 5
|
|
1307
|
-
? choices.join(', ')
|
|
1308
|
-
: `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
|
|
1309
|
-
console.log(` ā ${choices.length} region(s): [${preview}]`);
|
|
1310
|
-
} else {
|
|
1311
|
-
console.log(' ā³ No MCP results, using static list');
|
|
1312
|
-
}
|
|
1313
|
-
}
|
|
1314
|
-
}
|
|
1315
|
-
}
|
|
1316
|
-
|
|
1317
|
-
/**
|
|
1318
|
-
* Query MCP instance-sizer server with tag-based search after deployment target is known.
|
|
1319
|
-
* Used when no model name is available for VRAM-based sizing.
|
|
1320
|
-
* Populates configManager.mcpChoices so _runPhase injects them into list prompts.
|
|
1321
|
-
* @private
|
|
1322
|
-
*/
|
|
1323
|
-
async _queryMcpForInstance(frameworkAnswers, explicitConfig) {
|
|
1324
|
-
const cm = this.configManager;
|
|
1325
|
-
if (!cm) return;
|
|
1326
|
-
|
|
1327
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1328
|
-
if (mcpServers.length === 0) return;
|
|
1329
|
-
|
|
1330
|
-
const smart = this.options.smart === true;
|
|
1331
|
-
|
|
1332
|
-
// Instance type: query if not already provided via CLI/config
|
|
1333
|
-
if (!explicitConfig.instanceType && mcpServers.includes('instance-sizer')) {
|
|
1334
|
-
const { instanceSearch } = await this._runPrompts([{
|
|
1335
|
-
type: 'input',
|
|
1336
|
-
name: 'instanceSearch',
|
|
1337
|
-
message: 'š Describe your instance needs (e.g. "multi-gpu", "cost-effective cpu"):',
|
|
1338
|
-
default: frameworkAnswers.framework || ''
|
|
1339
|
-
}]);
|
|
1340
|
-
|
|
1341
|
-
if (instanceSearch && instanceSearch.trim()) {
|
|
1342
|
-
console.log(` š Querying instance-sizer [search]${smart ? ' [smart]' : ''}...`);
|
|
1343
|
-
const result = await cm.queryMcpServer('instance-sizer', {
|
|
1344
|
-
...frameworkAnswers,
|
|
1345
|
-
instanceSearch: instanceSearch.trim()
|
|
1346
|
-
});
|
|
1347
|
-
if (result && result.choices?.instanceType?.length > 0) {
|
|
1348
|
-
const choices = result.choices.instanceType;
|
|
1349
|
-
const preview = choices.length <= 5
|
|
1350
|
-
? choices.join(', ')
|
|
1351
|
-
: `${choices.slice(0, 5).join(', ') } (+${choices.length - 5} more)`;
|
|
1352
|
-
console.log(` ā ${choices.length} instance(s): [${preview}]`);
|
|
1353
|
-
} else {
|
|
1354
|
-
console.log(' ā³ No MCP results, using static list');
|
|
1355
|
-
}
|
|
1356
|
-
}
|
|
1357
|
-
}
|
|
1358
|
-
}
|
|
1359
|
-
|
|
1360
|
-
/**
|
|
1361
|
-
* Query the instance-sizer MCP server after model is known.
|
|
1362
|
-
* Estimates VRAM requirements and returns filtered, ranked instance recommendations.
|
|
1363
|
-
* Stores results in this._mcpInstanceSizerChoices and this._instanceSizerMetadata.
|
|
1364
|
-
* Requirements: 4.4, 4.5, 4.7, 3.6, 3.7
|
|
1365
|
-
* @param {object} frameworkAnswers - Framework/architecture answers
|
|
1366
|
-
* @param {object} modelFormatAnswers - Model format answers (contains modelName)
|
|
1367
|
-
* @param {object} explicitConfig - Explicit CLI/config values
|
|
1368
|
-
* @param {object} [sizerContext={}] - Additional context for the sizer query
|
|
1369
|
-
* @param {string} [sizerContext.cudaVersion] - CUDA version from base image
|
|
1370
|
-
* @param {object} [sizerContext.profileEnvVars] - Profile ENV overrides
|
|
1371
|
-
* @private
|
|
1372
|
-
*/
|
|
1373
|
-
async _queryMcpForInstanceSizing(frameworkAnswers, modelFormatAnswers, explicitConfig, sizerContext = {}) {
|
|
1374
|
-
const cm = this.configManager;
|
|
1375
|
-
if (!cm) return;
|
|
1376
|
-
|
|
1377
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1378
|
-
if (!mcpServers.includes('instance-sizer')) return;
|
|
1379
|
-
|
|
1380
|
-
// Resolve model name from answers or explicit config
|
|
1381
|
-
const modelName = modelFormatAnswers.customModelName || modelFormatAnswers.modelName || explicitConfig.modelName;
|
|
1382
|
-
if (!modelName || modelName === 'Custom (enter manually)') return;
|
|
1383
|
-
|
|
1384
|
-
const smart = this.options.smart === true;
|
|
1385
|
-
const discover = this.options.discover !== false;
|
|
1386
|
-
|
|
1387
|
-
const modeLabel = [smart && '[smart]', !discover && '[no-discover]'].filter(Boolean).join(' ');
|
|
1388
|
-
console.log(` š Querying instance-sizer${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
1389
|
-
|
|
1390
|
-
try {
|
|
1391
|
-
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
1392
|
-
if (!fs.existsSync(mcpConfigPath)) return;
|
|
1393
|
-
|
|
1394
|
-
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
1395
|
-
const serverConfig = mcpConfig.mcpServers?.['instance-sizer'];
|
|
1396
|
-
if (!serverConfig) return;
|
|
1397
|
-
|
|
1398
|
-
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
1399
|
-
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
1400
|
-
|
|
1401
|
-
const serverArgs = [...resolveMcpArgs(serverConfig.args)];
|
|
1402
|
-
if (!discover && !serverArgs.includes('--no-discover')) {
|
|
1403
|
-
serverArgs.push('--no-discover');
|
|
1404
|
-
}
|
|
1405
|
-
|
|
1406
|
-
const transport = new StdioClientTransport({
|
|
1407
|
-
command: serverConfig.command,
|
|
1408
|
-
args: serverArgs,
|
|
1409
|
-
env: {
|
|
1410
|
-
...process.env,
|
|
1411
|
-
...(serverConfig.env || {}),
|
|
1412
|
-
...(smart ? { BEDROCK_SMART: 'true' } : {})
|
|
1413
|
-
},
|
|
1414
|
-
stderr: 'pipe'
|
|
1415
|
-
});
|
|
1416
|
-
|
|
1417
|
-
const mcpClient = new Client(
|
|
1418
|
-
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
1419
|
-
{ capabilities: {} }
|
|
1420
|
-
);
|
|
1421
|
-
|
|
1422
|
-
await mcpClient.connect(transport);
|
|
1423
|
-
|
|
1424
|
-
const toolArgs = {
|
|
1425
|
-
modelName,
|
|
1426
|
-
limit: 10,
|
|
1427
|
-
context: {
|
|
1428
|
-
architecture: frameworkAnswers.architecture || undefined,
|
|
1429
|
-
backend: frameworkAnswers.backend || undefined,
|
|
1430
|
-
deploymentTarget: frameworkAnswers.deploymentTarget || undefined,
|
|
1431
|
-
profileEnvVars: sizerContext.profileEnvVars || undefined
|
|
1432
|
-
}
|
|
1433
|
-
};
|
|
1434
|
-
|
|
1435
|
-
// Add CUDA version from base image for filtering
|
|
1436
|
-
if (sizerContext.cudaVersion) {
|
|
1437
|
-
toolArgs.cudaVersion = sizerContext.cudaVersion;
|
|
1438
|
-
}
|
|
1439
|
-
|
|
1440
|
-
// Add quantization if available from model format answers
|
|
1441
|
-
if (modelFormatAnswers.quantization) {
|
|
1442
|
-
toolArgs.quantization = modelFormatAnswers.quantization;
|
|
1443
|
-
}
|
|
1444
|
-
|
|
1445
|
-
const result = await mcpClient.callTool({
|
|
1446
|
-
name: 'get_instance_recommendation',
|
|
1447
|
-
arguments: toolArgs
|
|
1448
|
-
});
|
|
1449
|
-
|
|
1450
|
-
await mcpClient.close();
|
|
1451
|
-
|
|
1452
|
-
// Parse the response
|
|
1453
|
-
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
1454
|
-
if (textBlock) {
|
|
1455
|
-
const parsed = JSON.parse(textBlock.text);
|
|
1456
|
-
|
|
1457
|
-
if (parsed.choices?.instanceType?.length > 0) {
|
|
1458
|
-
this._instanceSizerMetadata = parsed.metadata || null;
|
|
1459
|
-
|
|
1460
|
-
// Build display labels with VRAM estimate and utilization percentage
|
|
1461
|
-
const recommendations = parsed.metadata?.recommendations || [];
|
|
1462
|
-
const estimatedVramGb = parsed.metadata?.estimatedVramGb;
|
|
1463
|
-
|
|
1464
|
-
// Store choices with display labels for the instance prompt
|
|
1465
|
-
this._mcpInstanceSizerChoices = parsed.choices.instanceType;
|
|
1466
|
-
this._mcpInstanceSizerDisplayChoices = recommendations.map(rec => ({
|
|
1467
|
-
name: rec.displayLabel || `${rec.instanceType} (${estimatedVramGb ? estimatedVramGb.toFixed(1) : '?'}GB / ${rec.totalVramGb || '?'}GB ā ${rec.utilizationPercent || '?'}% utilization)`,
|
|
1468
|
-
value: rec.instanceType,
|
|
1469
|
-
short: rec.instanceType
|
|
1470
|
-
}));
|
|
1471
|
-
|
|
1472
|
-
const choices = parsed.choices.instanceType;
|
|
1473
|
-
const topRec = recommendations[0];
|
|
1474
|
-
const vramInfo = estimatedVramGb
|
|
1475
|
-
? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
|
|
1476
|
-
: '';
|
|
1477
|
-
|
|
1478
|
-
console.log(` ā ${choices.length} compatible instance(s) found${vramInfo}`);
|
|
1479
|
-
|
|
1480
|
-
// Warn if all instances had zero quota but were restored for visibility
|
|
1481
|
-
if (parsed.metadata?.allFilteredByQuota) {
|
|
1482
|
-
console.log(' ā ļø All instances have zero quota ā request a quota increase for your preferred type');
|
|
1483
|
-
}
|
|
1484
|
-
|
|
1485
|
-
// Check if availability data is present (recommendations have capacityType)
|
|
1486
|
-
const hasAvailabilityData = recommendations.some(r => r.capacityType);
|
|
1487
|
-
|
|
1488
|
-
if (hasAvailabilityData) {
|
|
1489
|
-
// Group by capacityType for display
|
|
1490
|
-
const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
|
|
1491
|
-
const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
|
|
1492
|
-
|
|
1493
|
-
if (reserved.length > 0) {
|
|
1494
|
-
console.log(' āā Reserved Capacity āā');
|
|
1495
|
-
for (const rec of reserved) {
|
|
1496
|
-
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1497
|
-
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1498
|
-
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1499
|
-
const tag = rec.capacityType === 'reserved'
|
|
1500
|
-
? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
|
|
1501
|
-
: ` [FTP] ${rec.ftpInfo?.planName || ''}`;
|
|
1502
|
-
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
1503
|
-
}
|
|
1504
|
-
}
|
|
1505
|
-
|
|
1506
|
-
if (onDemand.length > 0) {
|
|
1507
|
-
console.log(' āā On-Demand āā');
|
|
1508
|
-
for (const rec of onDemand) {
|
|
1509
|
-
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1510
|
-
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1511
|
-
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1512
|
-
const deployed = rec.quotaDeployed;
|
|
1513
|
-
const quota = rec.quotaLimit;
|
|
1514
|
-
const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
|
|
1515
|
-
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
1516
|
-
}
|
|
1517
|
-
}
|
|
1518
|
-
} else {
|
|
1519
|
-
// Fallback: display compact recommendation table (no availability data)
|
|
1520
|
-
for (const rec of recommendations) {
|
|
1521
|
-
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1522
|
-
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1523
|
-
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1524
|
-
console.log(` ${rec === topRec ? 'ā' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
|
|
1525
|
-
}
|
|
1526
|
-
}
|
|
1527
|
-
} else if (parsed.metadata?.allFilteredByQuota) {
|
|
1528
|
-
// All VRAM-compatible instances had zero quota
|
|
1529
|
-
console.log(' ā ļø No quota available for compatible instances. Request a quota increase.');
|
|
1530
|
-
this._instanceSizerMetadata = parsed.metadata || null;
|
|
1531
|
-
} else if (parsed.metadata?.warning) {
|
|
1532
|
-
console.log(` ā ļø ${parsed.metadata.warning}`);
|
|
1533
|
-
} else {
|
|
1534
|
-
// Apply architecture heuristic fallback when sizer returns empty
|
|
1535
|
-
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
1536
|
-
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
1537
|
-
console.log(` ā³ No instance-sizer results, using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
1538
|
-
}
|
|
1539
|
-
}
|
|
1540
|
-
} catch (err) {
|
|
1541
|
-
// Sizer unavailable ā apply architecture heuristic fallback
|
|
1542
|
-
const archForHeuristic = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
1543
|
-
this._architectureHeuristicDefault = this._getArchitectureHeuristicDefault(archForHeuristic);
|
|
1544
|
-
console.log(` ā ļø instance-sizer: ${err.message}`);
|
|
1545
|
-
console.log(` ā³ Using heuristic default: ${this._architectureHeuristicDefault}`);
|
|
1546
|
-
}
|
|
1547
|
-
}
|
|
1548
|
-
|
|
1549
|
-
/**
|
|
1550
|
-
* Query the hyperpod-cluster-picker MCP server for available HyperPod EKS clusters.
|
|
1551
|
-
* Populates configManager.mcpChoices.hyperPodCluster so _runPhase injects them into the list prompt.
|
|
1552
|
-
* Falls back to manual entry if the MCP server is not configured or fails.
|
|
1553
|
-
* Requirements: 12.1, 12.2, 12.3
|
|
1554
|
-
* @private
|
|
1555
|
-
*/
|
|
1556
|
-
async _queryMcpForHyperPod(infraAnswers, explicitConfig) {
|
|
1557
|
-
const cm = this.configManager;
|
|
1558
|
-
if (!cm) return;
|
|
1559
|
-
|
|
1560
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1561
|
-
if (!mcpServers.includes('hyperpod-cluster-picker')) return;
|
|
1562
|
-
|
|
1563
|
-
// Skip if cluster already provided via CLI/config
|
|
1564
|
-
if (explicitConfig.hyperPodCluster) return;
|
|
1565
|
-
|
|
1566
|
-
const smart = this.options.smart === true;
|
|
1567
|
-
console.log(` š Querying hyperpod-cluster-picker${smart ? ' [smart]' : ''}...`);
|
|
1568
|
-
|
|
1569
|
-
const result = await cm.queryMcpServer('hyperpod-cluster-picker', {
|
|
1570
|
-
...infraAnswers
|
|
1571
|
-
});
|
|
1572
|
-
|
|
1573
|
-
if (result && result.choices?.hyperPodCluster?.length > 0) {
|
|
1574
|
-
const choices = result.choices.hyperPodCluster;
|
|
1575
|
-
const preview = choices.length <= 5
|
|
1576
|
-
? choices.join(', ')
|
|
1577
|
-
: `${choices.slice(0, 5).join(', ')} (+${choices.length - 5} more)`;
|
|
1578
|
-
console.log(` ā ${choices.length} cluster(s): [${preview}]`);
|
|
1579
|
-
} else {
|
|
1580
|
-
// Surface any error message from the MCP server
|
|
1581
|
-
if (result?.message) {
|
|
1582
|
-
console.log(` ā ļø ${result.message}`);
|
|
1583
|
-
} else {
|
|
1584
|
-
console.log(' ā³ No HyperPod clusters found via MCP, manual entry available');
|
|
1585
|
-
}
|
|
1586
|
-
}
|
|
1587
|
-
}
|
|
1588
|
-
|
|
1589
|
-
/**
|
|
1590
|
-
* Query the endpoint-picker MCP server for available InService real-time endpoints.
|
|
1591
|
-
* Populates this._mcpEndpointChoices for the existing endpoint selection prompt.
|
|
1592
|
-
* Graceful fallback: if MCP server fails (no credentials, timeout), skip and create new endpoint.
|
|
1593
|
-
* Requirements: 3.3, 4.3, 4.4
|
|
1594
|
-
* @private
|
|
1595
|
-
*/
|
|
1596
|
-
async _queryMcpForEndpoints(infraAnswers, explicitConfig) {
|
|
1597
|
-
const cm = this.configManager;
|
|
1598
|
-
if (!cm) return;
|
|
1599
|
-
|
|
1600
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1601
|
-
if (!mcpServers.includes('endpoint-picker')) return;
|
|
1602
|
-
|
|
1603
|
-
// Skip if existing endpoint already provided via CLI/config
|
|
1604
|
-
if (explicitConfig.existingEndpointName) return;
|
|
1605
|
-
|
|
1606
|
-
console.log(' š Querying endpoint-picker...');
|
|
1607
|
-
|
|
1608
|
-
try {
|
|
1609
|
-
const result = await cm.queryMcpServer('endpoint-picker', {
|
|
1610
|
-
awsRegion: infraAnswers.awsRegion,
|
|
1611
|
-
deploymentTarget: 'realtime-inference'
|
|
1612
|
-
});
|
|
1613
|
-
|
|
1614
|
-
if (result && result.choices?.endpointName?.length > 0) {
|
|
1615
|
-
const endpointNames = result.choices.endpointName;
|
|
1616
|
-
const metadata = result.metadata || {};
|
|
1617
|
-
|
|
1618
|
-
// Build choices with metadata annotations
|
|
1619
|
-
this._mcpEndpointChoices = endpointNames.map(name => {
|
|
1620
|
-
const meta = metadata[name];
|
|
1621
|
-
if (meta) {
|
|
1622
|
-
const gpuInfo = meta.availableGpus === '?' ? 'GPUs: ?' : `${meta.availableGpus} GPUs free`;
|
|
1623
|
-
return {
|
|
1624
|
-
name: `${name} (${meta.instanceType}, ${gpuInfo}, ${meta.icCount} IC${meta.icCount !== 1 ? 's' : ''})`,
|
|
1625
|
-
value: name
|
|
1626
|
-
};
|
|
1627
|
-
}
|
|
1628
|
-
return { name, value: name };
|
|
1629
|
-
});
|
|
1630
|
-
|
|
1631
|
-
console.log(` ā ${endpointNames.length} endpoint(s) with available capacity`);
|
|
1632
|
-
} else {
|
|
1633
|
-
if (result?.message) {
|
|
1634
|
-
console.log(` ā³ ${result.message}`);
|
|
1635
|
-
} else {
|
|
1636
|
-
console.log(' ā³ No endpoints with available capacity found');
|
|
1637
|
-
}
|
|
1638
|
-
}
|
|
1639
|
-
} catch (err) {
|
|
1640
|
-
// Graceful fallback: if MCP server fails, skip and create new endpoint
|
|
1641
|
-
console.log(` ā ļø endpoint-picker: ${err.message || 'query failed'} ā will create new endpoint`);
|
|
1642
|
-
}
|
|
1643
|
-
}
|
|
1644
|
-
|
|
1645
|
-
/**
|
|
1646
|
-
* Query MCP base-image-picker server after deployment config is selected.
|
|
1647
|
-
* Populates _mcpBaseImageChoices for the base image selection prompt.
|
|
1648
|
-
* Requirements: 5.1, 5.2, 5.3, 5.4, 9.1, 9.2, 9.3
|
|
1649
|
-
* @private
|
|
1650
|
-
*/
|
|
1651
|
-
async _queryMcpForBaseImage(frameworkAnswers, _explicitConfig) {
|
|
1652
|
-
// Skip if base image provided via CLI --base-image flag
|
|
1653
|
-
if (this.options['base-image']) return;
|
|
1654
|
-
|
|
1655
|
-
const cm = this.configManager;
|
|
1656
|
-
if (!cm) return;
|
|
1657
|
-
|
|
1658
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1659
|
-
if (!mcpServers.includes('base-image-picker')) return;
|
|
1660
|
-
|
|
1661
|
-
const smart = this.options.smart === true;
|
|
1662
|
-
const discover = this.options.discover !== false;
|
|
1663
|
-
const framework = frameworkAnswers.framework;
|
|
1664
|
-
const modelServer = frameworkAnswers.modelServer;
|
|
1665
|
-
const architecture = frameworkAnswers.architecture || frameworkAnswers.deploymentConfig?.split('-')[0];
|
|
1666
|
-
const isTransformer = framework === 'transformers';
|
|
1667
|
-
const isTriton = architecture === 'triton';
|
|
1668
|
-
const isDiffusors = architecture === 'diffusors';
|
|
1669
|
-
|
|
1670
|
-
// For non-transformer, non-triton, non-diffusors frameworks, prompt for optional search criteria
|
|
1671
|
-
let searchCriteria;
|
|
1672
|
-
if (!isTransformer && !isTriton && !isDiffusors) {
|
|
1673
|
-
const searchAnswer = await this._runPrompts(baseImageSearchPrompts.map(p => ({
|
|
1674
|
-
...p,
|
|
1675
|
-
when: () => true // Always show for non-transformer since we already checked
|
|
1676
|
-
})));
|
|
1677
|
-
searchCriteria = searchAnswer.baseImageSearch;
|
|
1678
|
-
}
|
|
1679
|
-
|
|
1680
|
-
const modeLabel = [smart && '[smart]', discover && '[discover]'].filter(Boolean).join(' ');
|
|
1681
|
-
console.log(` š Querying base-image-picker${modeLabel ? ` ${modeLabel}` : ''}...`);
|
|
1682
|
-
|
|
1683
|
-
const context = { framework, modelServer, architecture };
|
|
1684
|
-
if (searchCriteria && searchCriteria.trim()) {
|
|
1685
|
-
context.searchCriteria = searchCriteria.trim();
|
|
1686
|
-
}
|
|
1687
|
-
|
|
1688
|
-
const result = await cm.queryMcpServer('base-image-picker', context);
|
|
1689
|
-
|
|
1690
|
-
if (result && result.metadata?.baseImage?.length > 0) {
|
|
1691
|
-
const entries = result.metadata.baseImage;
|
|
1692
|
-
this._mcpBaseImageChoices = formatImageChoices(entries, isTransformer || isTriton || isDiffusors);
|
|
1693
|
-
const count = entries.length;
|
|
1694
|
-
console.log(` ā ${count} base image(s) available`);
|
|
1695
|
-
} else {
|
|
1696
|
-
console.log(' ā³ No MCP results, using default image');
|
|
1697
|
-
}
|
|
1698
|
-
}
|
|
1699
|
-
|
|
1700
|
-
/**
|
|
1701
|
-
* Query model-picker MCP server catalog for model choices.
|
|
1702
|
-
* Reads the architecture-specific catalog (popular-transformers.json or
|
|
1703
|
-
* popular-diffusors.json) to populate the model selection prompt.
|
|
1704
|
-
* @param {string} [architecture] - Current architecture ('transformers', 'diffusors', etc.)
|
|
1705
|
-
* @private
|
|
1706
|
-
*/
|
|
1707
|
-
_queryMcpForModels(architecture) {
|
|
1708
|
-
const cm = this.configManager;
|
|
1709
|
-
if (!cm) return;
|
|
1710
|
-
|
|
1711
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1712
|
-
if (!mcpServers.includes('model-picker')) return;
|
|
1713
|
-
|
|
1714
|
-
try {
|
|
1715
|
-
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
1716
|
-
if (!fs.existsSync(mcpConfigPath)) return;
|
|
1717
|
-
|
|
1718
|
-
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
1719
|
-
const serverConfig = mcpConfig.mcpServers?.['model-picker'];
|
|
1720
|
-
if (!serverConfig?.args?.length) return;
|
|
1721
|
-
|
|
1722
|
-
// Resolve the server entry point directory from the args
|
|
1723
|
-
const serverEntryPoint = serverConfig.args[serverConfig.args.length - 1];
|
|
1724
|
-
const serverDir = path.dirname(serverEntryPoint);
|
|
1725
|
-
|
|
1726
|
-
// Read manifest to find catalog path
|
|
1727
|
-
const manifestPath = path.join(serverDir, 'manifest.json');
|
|
1728
|
-
if (!fs.existsSync(manifestPath)) return;
|
|
1729
|
-
|
|
1730
|
-
const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
|
|
1731
|
-
|
|
1732
|
-
// Select catalog based on architecture
|
|
1733
|
-
const catalogKey = architecture === 'diffusors'
|
|
1734
|
-
? 'popular-diffusors'
|
|
1735
|
-
: 'popular-transformers';
|
|
1736
|
-
const catalogRelPath = manifest.catalogs?.[catalogKey];
|
|
1737
|
-
if (!catalogRelPath) return;
|
|
1738
|
-
|
|
1739
|
-
const catalogPath = path.resolve(serverDir, catalogRelPath);
|
|
1740
|
-
if (!fs.existsSync(catalogPath)) return;
|
|
1741
|
-
|
|
1742
|
-
const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
|
|
1743
|
-
|
|
1744
|
-
// Extract model IDs, filtering out glob patterns (entries with *)
|
|
1745
|
-
const modelIds = Object.keys(catalog).filter(id => !id.includes('*'));
|
|
1746
|
-
|
|
1747
|
-
if (modelIds.length > 0) {
|
|
1748
|
-
this._mcpModelChoices = modelIds;
|
|
1749
|
-
}
|
|
1750
|
-
} catch {
|
|
1751
|
-
// Silently fall back to hardcoded defaults
|
|
1752
|
-
}
|
|
1753
|
-
}
|
|
1754
|
-
|
|
1755
1021
|
/**
|
|
1756
1022
|
* Get framework version choices from registry
|
|
1757
1023
|
* Requirements: 2.1, 2.6, 8.2, 8.3
|
|
@@ -1916,735 +1182,6 @@ export default class PromptRunner {
|
|
|
1916
1182
|
return choices;
|
|
1917
1183
|
}
|
|
1918
1184
|
|
|
1919
|
-
/**
|
|
1920
|
-
* Fetch and display model information from HuggingFace API and Model Registry
|
|
1921
|
-
* Requirements: 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.11, 11.1, 11.2, 11.3, 11.5, 11.6, 11.7
|
|
1922
|
-
* @private
|
|
1923
|
-
*/
|
|
1924
|
-
async _fetchAndDisplayModelInfo(modelId) {
|
|
1925
|
-
console.log('\n š Querying model-picker [discover]...');
|
|
1926
|
-
|
|
1927
|
-
const sources = [];
|
|
1928
|
-
let chatTemplate = null;
|
|
1929
|
-
let modelFamily = null;
|
|
1930
|
-
let mcpUsed = false;
|
|
1931
|
-
|
|
1932
|
-
// Try model-picker MCP server in discover mode (queries HuggingFace + merges with catalog)
|
|
1933
|
-
const cm = this.configManager;
|
|
1934
|
-
if (cm) {
|
|
1935
|
-
const mcpServers = cm.getMcpServerNames();
|
|
1936
|
-
if (mcpServers.includes('model-picker')) {
|
|
1937
|
-
try {
|
|
1938
|
-
const mcpConfigPath = path.join(GENERATOR_ROOT, 'config', 'mcp.json');
|
|
1939
|
-
if (fs.existsSync(mcpConfigPath)) {
|
|
1940
|
-
const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
|
|
1941
|
-
const serverConfig = mcpConfig.mcpServers?.['model-picker'];
|
|
1942
|
-
if (serverConfig) {
|
|
1943
|
-
const { McpClient } = await import('./mcp-client.js');
|
|
1944
|
-
const client = new McpClient(serverConfig, { timeout: 15000 });
|
|
1945
|
-
|
|
1946
|
-
// Override _buildContext to pass model_id and mode directly
|
|
1947
|
-
client._getUnboundedParameterNames = () => [];
|
|
1948
|
-
client._buildContext = () => ({});
|
|
1949
|
-
|
|
1950
|
-
// Connect and call get_models directly
|
|
1951
|
-
const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
|
|
1952
|
-
const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
|
|
1953
|
-
|
|
1954
|
-
const transport = new StdioClientTransport({
|
|
1955
|
-
command: serverConfig.command,
|
|
1956
|
-
args: resolveMcpArgs(serverConfig.args),
|
|
1957
|
-
env: { ...process.env, ...(serverConfig.env || {}) },
|
|
1958
|
-
stderr: 'pipe'
|
|
1959
|
-
});
|
|
1960
|
-
|
|
1961
|
-
const mcpClient = new Client(
|
|
1962
|
-
{ name: 'ml-container-creator', version: '1.0.0' },
|
|
1963
|
-
{ capabilities: {} }
|
|
1964
|
-
);
|
|
1965
|
-
|
|
1966
|
-
await mcpClient.connect(transport);
|
|
1967
|
-
|
|
1968
|
-
const result = await mcpClient.callTool({
|
|
1969
|
-
name: 'get_models',
|
|
1970
|
-
arguments: { model_id: modelId, mode: 'discover' }
|
|
1971
|
-
});
|
|
1972
|
-
|
|
1973
|
-
await mcpClient.close();
|
|
1974
|
-
|
|
1975
|
-
// Parse the response
|
|
1976
|
-
const textBlock = result?.content?.find(b => b.type === 'text');
|
|
1977
|
-
if (textBlock) {
|
|
1978
|
-
const parsed = JSON.parse(textBlock.text);
|
|
1979
|
-
if (parsed.values && Object.keys(parsed.values).length > 0) {
|
|
1980
|
-
mcpUsed = true;
|
|
1981
|
-
const vals = parsed.values;
|
|
1982
|
-
|
|
1983
|
-
if (vals.chat_template) {
|
|
1984
|
-
chatTemplate = vals.chat_template;
|
|
1985
|
-
}
|
|
1986
|
-
if (vals.family) {
|
|
1987
|
-
modelFamily = vals.family;
|
|
1988
|
-
}
|
|
1989
|
-
|
|
1990
|
-
// Extract model_type for architecture validation
|
|
1991
|
-
// Requirements: 4.1
|
|
1992
|
-
if (vals.model_type) {
|
|
1993
|
-
this._modelType = vals.model_type;
|
|
1994
|
-
}
|
|
1995
|
-
|
|
1996
|
-
// Extract model source metadata for loading adapter
|
|
1997
|
-
// Requirements: 2.1, 2.2, 2.3, 2.4
|
|
1998
|
-
if (vals.provider) {
|
|
1999
|
-
this._mcpModelSource = vals.provider;
|
|
2000
|
-
}
|
|
2001
|
-
if (vals.artifactUri) {
|
|
2002
|
-
this._mcpArtifactUri = vals.artifactUri;
|
|
2003
|
-
}
|
|
2004
|
-
|
|
2005
|
-
// Determine sources based on what was returned
|
|
2006
|
-
if (vals.tags || vals.pipeline_tag) {
|
|
2007
|
-
sources.push('HuggingFace_Hub_API');
|
|
2008
|
-
}
|
|
2009
|
-
if (vals.validation_level || vals.framework_compatibility) {
|
|
2010
|
-
sources.push('Model_Picker_Catalog');
|
|
2011
|
-
}
|
|
2012
|
-
if (sources.length === 0) {
|
|
2013
|
-
sources.push('model-picker');
|
|
2014
|
-
}
|
|
2015
|
-
console.log(` ā Resolved: ${modelId}`);
|
|
2016
|
-
} else if (parsed.message) {
|
|
2017
|
-
console.log(` ā³ ${parsed.message}`);
|
|
2018
|
-
}
|
|
2019
|
-
}
|
|
2020
|
-
}
|
|
2021
|
-
}
|
|
2022
|
-
} catch (err) {
|
|
2023
|
-
console.log(' ā³ model-picker unavailable, using fallback');
|
|
2024
|
-
}
|
|
2025
|
-
}
|
|
2026
|
-
}
|
|
2027
|
-
|
|
2028
|
-
// Fallback to legacy path if MCP didn't resolve
|
|
2029
|
-
if (!mcpUsed) {
|
|
2030
|
-
const registryConfigManager = this.registryConfigManager;
|
|
2031
|
-
if (registryConfigManager) {
|
|
2032
|
-
// Only try HuggingFace API for bare model IDs (not prefixed URIs)
|
|
2033
|
-
const isNonHfUri = modelId.startsWith('s3://') ||
|
|
2034
|
-
modelId.startsWith('registry://');
|
|
2035
|
-
|
|
2036
|
-
if (!isNonHfUri) {
|
|
2037
|
-
// Try HuggingFace API directly
|
|
2038
|
-
try {
|
|
2039
|
-
const hfData = await registryConfigManager._fetchHuggingFaceData(modelId);
|
|
2040
|
-
if (hfData) {
|
|
2041
|
-
sources.push('HuggingFace_Hub_API');
|
|
2042
|
-
if (hfData.chatTemplate) {
|
|
2043
|
-
chatTemplate = hfData.chatTemplate;
|
|
2044
|
-
}
|
|
2045
|
-
// Extract model_type for architecture validation
|
|
2046
|
-
// Requirements: 4.1
|
|
2047
|
-
if (hfData.modelConfig?.model_type) {
|
|
2048
|
-
this._modelType = hfData.modelConfig.model_type;
|
|
2049
|
-
}
|
|
2050
|
-
console.log(' ā
Found on HuggingFace Hub');
|
|
2051
|
-
} else {
|
|
2052
|
-
console.log(' ā¹ļø Not found on HuggingFace Hub (may be private or offline)');
|
|
2053
|
-
}
|
|
2054
|
-
} catch (error) {
|
|
2055
|
-
console.log(' ā ļø HuggingFace API unavailable');
|
|
2056
|
-
}
|
|
2057
|
-
} else {
|
|
2058
|
-
// Non-HF URI (s3://, registry://, etc.) ā skip HF lookup silently
|
|
2059
|
-
// The summary at the end of this function will report "No additional model information"
|
|
2060
|
-
}
|
|
2061
|
-
|
|
2062
|
-
// Check Model Registry for overrides
|
|
2063
|
-
if (registryConfigManager.modelRegistry) {
|
|
2064
|
-
let modelConfig = registryConfigManager.modelRegistry[modelId];
|
|
2065
|
-
|
|
2066
|
-
if (!modelConfig) {
|
|
2067
|
-
for (const [pattern, config] of Object.entries(registryConfigManager.modelRegistry)) {
|
|
2068
|
-
if (pattern.includes('*')) {
|
|
2069
|
-
const regex = new RegExp(`^${ pattern.replace(/\*/g, '.*') }$`);
|
|
2070
|
-
if (regex.test(modelId)) {
|
|
2071
|
-
modelConfig = config;
|
|
2072
|
-
console.log(` ā
Matched pattern in Model_Registry: ${pattern}`);
|
|
2073
|
-
break;
|
|
2074
|
-
}
|
|
2075
|
-
}
|
|
2076
|
-
}
|
|
2077
|
-
} else {
|
|
2078
|
-
console.log(' ā
Found in Model_Registry');
|
|
2079
|
-
}
|
|
2080
|
-
|
|
2081
|
-
if (modelConfig) {
|
|
2082
|
-
sources.push('Model_Registry');
|
|
2083
|
-
if (modelConfig.chatTemplate) {
|
|
2084
|
-
chatTemplate = modelConfig.chatTemplate;
|
|
2085
|
-
}
|
|
2086
|
-
if (modelConfig.family) {
|
|
2087
|
-
modelFamily = modelConfig.family;
|
|
2088
|
-
}
|
|
2089
|
-
}
|
|
2090
|
-
}
|
|
2091
|
-
}
|
|
2092
|
-
}
|
|
2093
|
-
|
|
2094
|
-
// Display information
|
|
2095
|
-
if (sources.length > 0) {
|
|
2096
|
-
console.log('\nš Model Information:');
|
|
2097
|
-
console.log(` ⢠Model ID: ${modelId}`);
|
|
2098
|
-
if (modelFamily) {
|
|
2099
|
-
console.log(` ⢠Family: ${modelFamily}`);
|
|
2100
|
-
}
|
|
2101
|
-
if (chatTemplate) {
|
|
2102
|
-
console.log(' ⢠Chat Template: ā
Available');
|
|
2103
|
-
console.log(' (Will be injected into generated files)');
|
|
2104
|
-
} else {
|
|
2105
|
-
console.log(' ⢠Chat Template: ā Not available');
|
|
2106
|
-
console.log(' (Chat endpoints may require manual configuration)');
|
|
2107
|
-
}
|
|
2108
|
-
console.log(` ⢠Sources: ${sources.join(', ')}`);
|
|
2109
|
-
} else {
|
|
2110
|
-
console.log(' ā¹ļø No additional model information available');
|
|
2111
|
-
console.log(' Proceeding with default configuration');
|
|
2112
|
-
}
|
|
2113
|
-
}
|
|
2114
|
-
|
|
2115
|
-
|
|
2116
|
-
|
|
2117
|
-
/**
|
|
2118
|
-
* Validate and display instance type compatibility
|
|
2119
|
-
* Requirements: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6
|
|
2120
|
-
* @private
|
|
2121
|
-
*/
|
|
2122
|
-
async _validateAndDisplayInstanceType(instanceType, framework, version) {
|
|
2123
|
-
const registryConfigManager = this.registryConfigManager;
|
|
2124
|
-
|
|
2125
|
-
if (!registryConfigManager) {
|
|
2126
|
-
return;
|
|
2127
|
-
}
|
|
2128
|
-
|
|
2129
|
-
// Get framework configuration
|
|
2130
|
-
const frameworkConfig = registryConfigManager.frameworkRegistry?.[framework]?.[version];
|
|
2131
|
-
if (!frameworkConfig) {
|
|
2132
|
-
return; // No framework config, skip validation
|
|
2133
|
-
}
|
|
2134
|
-
|
|
2135
|
-
console.log(`\nš Validating instance type: ${instanceType}`);
|
|
2136
|
-
|
|
2137
|
-
// Validate instance type
|
|
2138
|
-
const validationResult = registryConfigManager.validateInstanceType(instanceType, frameworkConfig);
|
|
2139
|
-
|
|
2140
|
-
if (validationResult.compatible) {
|
|
2141
|
-
console.log(' ā
Instance type is compatible');
|
|
2142
|
-
if (validationResult.info) {
|
|
2143
|
-
console.log(` ā¹ļø ${validationResult.info}`);
|
|
2144
|
-
}
|
|
2145
|
-
} else {
|
|
2146
|
-
console.log(' ā Instance type compatibility issue detected');
|
|
2147
|
-
if (validationResult.error) {
|
|
2148
|
-
console.log(` Error: ${validationResult.error}`);
|
|
2149
|
-
}
|
|
2150
|
-
if (validationResult.recommendations && validationResult.recommendations.length > 0) {
|
|
2151
|
-
console.log(` š” Recommended instances: ${validationResult.recommendations.join(', ')}`);
|
|
2152
|
-
}
|
|
2153
|
-
|
|
2154
|
-
// In test mode or non-interactive mode, throw error instead of prompting
|
|
2155
|
-
if (this.options.skipPrompts || process.env.NODE_ENV === 'test') {
|
|
2156
|
-
throw new Error('Instance type validation failed. Please select a compatible instance type.');
|
|
2157
|
-
}
|
|
2158
|
-
|
|
2159
|
-
// Ask user if they want to proceed
|
|
2160
|
-
const proceed = await this._runPrompts([{
|
|
2161
|
-
type: 'confirm',
|
|
2162
|
-
name: 'proceedWithIncompatible',
|
|
2163
|
-
message: 'Instance type may not be compatible. Proceed anyway?',
|
|
2164
|
-
default: false
|
|
2165
|
-
}]);
|
|
2166
|
-
|
|
2167
|
-
if (!proceed.proceedWithIncompatible) {
|
|
2168
|
-
throw new Error('Instance type validation failed. Please select a compatible instance type.');
|
|
2169
|
-
}
|
|
2170
|
-
}
|
|
2171
|
-
|
|
2172
|
-
if (validationResult.warning) {
|
|
2173
|
-
console.log(` ā ļø Warning: ${validationResult.warning}`);
|
|
2174
|
-
}
|
|
2175
|
-
}
|
|
2176
|
-
|
|
2177
|
-
/**
|
|
2178
|
-
* Run secret prompts using the Secret_Classification registry.
|
|
2179
|
-
* For each secret type whose stages apply to the current context:
|
|
2180
|
-
* - Query for managed secrets of that type
|
|
2181
|
-
* - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
|
|
2182
|
-
* - If no managed secrets exist: fall back to existing plaintext prompt
|
|
2183
|
-
*
|
|
2184
|
-
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
|
|
2185
|
-
* @param {object} previousAnswers - Answers from previous prompt phases
|
|
2186
|
-
* @param {object} explicitConfig - Explicit CLI/config values
|
|
2187
|
-
* @param {object} existingConfig - Existing project configuration
|
|
2188
|
-
* @returns {Promise<object>} Object with token/ARN values keyed by config field names
|
|
2189
|
-
* @private
|
|
2190
|
-
*/
|
|
2191
|
-
async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
|
|
2192
|
-
const results = {};
|
|
2193
|
-
|
|
2194
|
-
for (const classification of SECRET_CLASSIFICATIONS) {
|
|
2195
|
-
// Check if this secret type's stages apply to the current context
|
|
2196
|
-
if (!this._secretStagesApply(classification, previousAnswers)) continue;
|
|
2197
|
-
|
|
2198
|
-
// Determine the config keys for this classification
|
|
2199
|
-
const arnConfigKey = this._getArnConfigKey(classification);
|
|
2200
|
-
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
2201
|
-
|
|
2202
|
-
// Skip if ARN already provided via CLI flag
|
|
2203
|
-
if (explicitConfig[arnConfigKey]) {
|
|
2204
|
-
results[arnConfigKey] = explicitConfig[arnConfigKey];
|
|
2205
|
-
continue;
|
|
2206
|
-
}
|
|
2207
|
-
|
|
2208
|
-
// Skip if plaintext already provided via CLI flag
|
|
2209
|
-
if (explicitConfig[plaintextConfigKey]) {
|
|
2210
|
-
results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
|
|
2211
|
-
continue;
|
|
2212
|
-
}
|
|
2213
|
-
|
|
2214
|
-
// Query for existing managed secrets of this type
|
|
2215
|
-
const managedSecrets = await this._listManagedSecrets(classification.identifier);
|
|
2216
|
-
|
|
2217
|
-
if (managedSecrets.length > 0) {
|
|
2218
|
-
// Show selection list: managed secrets + plaintext entry + skip
|
|
2219
|
-
const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
|
|
2220
|
-
Object.assign(results, answer);
|
|
2221
|
-
} else {
|
|
2222
|
-
// Fall back to existing plaintext prompt
|
|
2223
|
-
const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
|
|
2224
|
-
Object.assign(results, answer);
|
|
2225
|
-
}
|
|
2226
|
-
}
|
|
2227
|
-
|
|
2228
|
-
return results;
|
|
2229
|
-
}
|
|
2230
|
-
|
|
2231
|
-
/**
|
|
2232
|
-
* Determine if a secret classification's stages apply to the current generation context.
|
|
2233
|
-
* Build-time secrets apply when the project involves a Docker build step.
|
|
2234
|
-
* Runtime secrets apply when the architecture uses HuggingFace Hub models.
|
|
2235
|
-
* Requirements: 8.9
|
|
2236
|
-
* @param {object} classification - Secret classification entry
|
|
2237
|
-
* @param {object} answers - Current answers from previous phases
|
|
2238
|
-
* @returns {boolean} True if the secret type is applicable
|
|
2239
|
-
* @private
|
|
2240
|
-
*/
|
|
2241
|
-
_secretStagesApply(classification, answers) {
|
|
2242
|
-
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
2243
|
-
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
2244
|
-
|
|
2245
|
-
if (classification.identifier === 'hf-token') {
|
|
2246
|
-
// HF token applies to transformers, diffusors, and Triton LLM backends
|
|
2247
|
-
const isTransformers = architecture === 'transformers';
|
|
2248
|
-
const isDiffusors = architecture === 'diffusors';
|
|
2249
|
-
const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
|
|
2250
|
-
|
|
2251
|
-
if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
|
|
2252
|
-
|
|
2253
|
-
// Skip for non-HuggingFace model sources
|
|
2254
|
-
const modelSource = answers.modelSource;
|
|
2255
|
-
if (modelSource && modelSource !== 'huggingface') return false;
|
|
2256
|
-
|
|
2257
|
-
return true;
|
|
2258
|
-
}
|
|
2259
|
-
|
|
2260
|
-
if (classification.identifier === 'ngc-token') {
|
|
2261
|
-
// NGC token only applies to transformers-tensorrt-llm (build-time only)
|
|
2262
|
-
if (architecture === 'triton') return false;
|
|
2263
|
-
if (architecture === 'diffusors') return false;
|
|
2264
|
-
return architecture === 'transformers' && backend === 'tensorrt-llm';
|
|
2265
|
-
}
|
|
2266
|
-
|
|
2267
|
-
// For future secret types, check if any stage applies
|
|
2268
|
-
// Build-time applies to all Docker-based deployments
|
|
2269
|
-
// Runtime applies to architectures that download at startup
|
|
2270
|
-
return classification.stages.length > 0;
|
|
2271
|
-
}
|
|
2272
|
-
|
|
2273
|
-
/**
|
|
2274
|
-
* Get the ARN config key for a classification.
|
|
2275
|
-
* Maps classification identifiers to config field names.
|
|
2276
|
-
* @param {object} classification - Secret classification entry
|
|
2277
|
-
* @returns {string} Config key for the ARN value
|
|
2278
|
-
* @private
|
|
2279
|
-
*/
|
|
2280
|
-
_getArnConfigKey(classification) {
|
|
2281
|
-
const keyMap = {
|
|
2282
|
-
'hf-token': 'hfTokenArn',
|
|
2283
|
-
'ngc-token': 'ngcTokenArn'
|
|
2284
|
-
};
|
|
2285
|
-
return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
|
|
2286
|
-
}
|
|
2287
|
-
|
|
2288
|
-
/**
|
|
2289
|
-
* Get the plaintext config key for a classification.
|
|
2290
|
-
* Maps classification identifiers to config field names.
|
|
2291
|
-
* @param {object} classification - Secret classification entry
|
|
2292
|
-
* @returns {string} Config key for the plaintext value
|
|
2293
|
-
* @private
|
|
2294
|
-
*/
|
|
2295
|
-
_getPlaintextConfigKey(classification) {
|
|
2296
|
-
const keyMap = {
|
|
2297
|
-
'hf-token': 'hfToken',
|
|
2298
|
-
'ngc-token': 'ngcApiKey'
|
|
2299
|
-
};
|
|
2300
|
-
return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
|
|
2301
|
-
}
|
|
2302
|
-
|
|
2303
|
-
/**
|
|
2304
|
-
* List managed secrets of a given type from AWS Secrets Manager.
|
|
2305
|
-
* Uses the active bootstrap profile to query for secrets tagged with
|
|
2306
|
-
* the mlcc:secret-type matching the given identifier.
|
|
2307
|
-
* @param {string} secretType - The secret type identifier (e.g., 'hf-token')
|
|
2308
|
-
* @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
|
|
2309
|
-
* @private
|
|
2310
|
-
*/
|
|
2311
|
-
async _listManagedSecrets(secretType) {
|
|
2312
|
-
try {
|
|
2313
|
-
const bootstrapConfig = new BootstrapConfig();
|
|
2314
|
-
const activeProfile = bootstrapConfig.getActiveProfile();
|
|
2315
|
-
if (!activeProfile) return [];
|
|
2316
|
-
|
|
2317
|
-
const profile = activeProfile.config.awsProfile;
|
|
2318
|
-
const region = activeProfile.config.awsRegion;
|
|
2319
|
-
if (!profile || !region) return [];
|
|
2320
|
-
|
|
2321
|
-
const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
|
|
2322
|
-
const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
|
|
2323
|
-
const trimmed = output.trim();
|
|
2324
|
-
if (!trimmed) return [];
|
|
2325
|
-
|
|
2326
|
-
const result = JSON.parse(trimmed);
|
|
2327
|
-
const secrets = result.SecretList || [];
|
|
2328
|
-
|
|
2329
|
-
// Filter by secret type tag
|
|
2330
|
-
return secrets
|
|
2331
|
-
.filter(secret => {
|
|
2332
|
-
const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
|
|
2333
|
-
return typeTag && typeTag.Value === secretType;
|
|
2334
|
-
})
|
|
2335
|
-
.map(secret => ({
|
|
2336
|
-
name: secret.Name,
|
|
2337
|
-
arn: secret.ARN
|
|
2338
|
-
}));
|
|
2339
|
-
} catch {
|
|
2340
|
-
// If AWS CLI fails (not configured, no credentials, etc.), return empty
|
|
2341
|
-
return [];
|
|
2342
|
-
}
|
|
2343
|
-
}
|
|
2344
|
-
|
|
2345
|
-
/**
|
|
2346
|
-
* Display a selection list for managed secrets of a given type.
|
|
2347
|
-
* Shows available secrets plus options for plaintext entry and skip.
|
|
2348
|
-
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
|
|
2349
|
-
* @param {object} classification - Secret classification entry
|
|
2350
|
-
* @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
|
|
2351
|
-
* @param {object} previousAnswers - Answers from previous phases
|
|
2352
|
-
* @returns {Promise<object>} Object with the selected value keyed by config field name
|
|
2353
|
-
* @private
|
|
2354
|
-
*/
|
|
2355
|
-
async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
|
|
2356
|
-
const arnConfigKey = this._getArnConfigKey(classification);
|
|
2357
|
-
|
|
2358
|
-
console.log(`\nš ${classification.displayName}`);
|
|
2359
|
-
console.log(` ${classification.purpose}`);
|
|
2360
|
-
|
|
2361
|
-
// Build choices: managed secrets + enter plaintext + skip
|
|
2362
|
-
const choices = [
|
|
2363
|
-
...managedSecrets.map(secret => ({
|
|
2364
|
-
name: `š ${secret.name} (${secret.arn})`,
|
|
2365
|
-
value: secret.arn,
|
|
2366
|
-
short: secret.name
|
|
2367
|
-
})),
|
|
2368
|
-
{ name: 'āļø Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
|
|
2369
|
-
{ name: 'āļø Skip (use environment variable)', value: '__skip__', short: 'Skip' }
|
|
2370
|
-
];
|
|
2371
|
-
|
|
2372
|
-
const { secretSelection } = await this._runPrompts([{
|
|
2373
|
-
type: 'list',
|
|
2374
|
-
name: 'secretSelection',
|
|
2375
|
-
message: `Select ${classification.promptLabel}:`,
|
|
2376
|
-
choices
|
|
2377
|
-
}]);
|
|
2378
|
-
|
|
2379
|
-
if (secretSelection === '__skip__') {
|
|
2380
|
-
return {};
|
|
2381
|
-
}
|
|
2382
|
-
|
|
2383
|
-
if (secretSelection === '__plaintext__') {
|
|
2384
|
-
// Use existing plaintext flow
|
|
2385
|
-
return this._promptPlaintextEntry(classification, previousAnswers);
|
|
2386
|
-
}
|
|
2387
1185
|
|
|
2388
|
-
// User selected a managed secret ARN
|
|
2389
|
-
return { [arnConfigKey]: secretSelection };
|
|
2390
|
-
}
|
|
2391
|
-
|
|
2392
|
-
/**
|
|
2393
|
-
* Prompt for plaintext token entry with ARN detection.
|
|
2394
|
-
* If the user enters an ARN, store it as an ARN reference.
|
|
2395
|
-
* Requirements: 8.4, 8.5, 8.6
|
|
2396
|
-
* @param {object} classification - Secret classification entry
|
|
2397
|
-
* @param {object} previousAnswers - Answers from previous phases
|
|
2398
|
-
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
2399
|
-
* @private
|
|
2400
|
-
*/
|
|
2401
|
-
async _promptPlaintextEntry(classification, _previousAnswers) {
|
|
2402
|
-
const arnConfigKey = this._getArnConfigKey(classification);
|
|
2403
|
-
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
2404
|
-
|
|
2405
|
-
const { tokenValue } = await this._runPrompts([{
|
|
2406
|
-
type: 'input',
|
|
2407
|
-
name: 'tokenValue',
|
|
2408
|
-
message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
|
|
2409
|
-
validate: (input) => {
|
|
2410
|
-
// Empty is valid
|
|
2411
|
-
if (!input || input.trim() === '') return true;
|
|
2412
|
-
// Environment variable reference is valid
|
|
2413
|
-
if (input.trim().startsWith('$')) return true;
|
|
2414
|
-
return true;
|
|
2415
|
-
}
|
|
2416
|
-
}]);
|
|
2417
|
-
|
|
2418
|
-
if (!tokenValue || tokenValue.trim() === '') {
|
|
2419
|
-
return {};
|
|
2420
|
-
}
|
|
2421
|
-
|
|
2422
|
-
const value = tokenValue.trim();
|
|
2423
|
-
|
|
2424
|
-
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
2425
|
-
if (isSecretsManagerArn(value)) {
|
|
2426
|
-
return { [arnConfigKey]: value };
|
|
2427
|
-
}
|
|
2428
|
-
|
|
2429
|
-
// Otherwise store as plaintext
|
|
2430
|
-
return { [plaintextConfigKey]: value };
|
|
2431
|
-
}
|
|
2432
|
-
|
|
2433
|
-
/**
|
|
2434
|
-
* Fall back to existing plaintext prompt when no managed secrets exist.
|
|
2435
|
-
* Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
|
|
2436
|
-
* but with ARN detection on the input.
|
|
2437
|
-
* Requirements: 8.7
|
|
2438
|
-
* @param {object} classification - Secret classification entry
|
|
2439
|
-
* @param {object} previousAnswers - Answers from previous phases
|
|
2440
|
-
* @param {object} explicitConfig - Explicit CLI/config values
|
|
2441
|
-
* @param {object} existingConfig - Existing project configuration
|
|
2442
|
-
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
2443
|
-
* @private
|
|
2444
|
-
*/
|
|
2445
|
-
async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
|
|
2446
|
-
const arnConfigKey = this._getArnConfigKey(classification);
|
|
2447
|
-
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
2448
|
-
|
|
2449
|
-
// If in auto-prompt mode, skip
|
|
2450
|
-
if (this.configManager?.isAutoPrompt()) {
|
|
2451
|
-
return {};
|
|
2452
|
-
}
|
|
2453
|
-
|
|
2454
|
-
// Display context-appropriate security message
|
|
2455
|
-
if (classification.identifier === 'hf-token') {
|
|
2456
|
-
console.log('\nš HuggingFace Authentication');
|
|
2457
|
-
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
2458
|
-
console.log(' š” Tip: Use `ml-container-creator secrets create --type hf-token` to store');
|
|
2459
|
-
console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
|
|
2460
|
-
console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
|
|
2461
|
-
} else if (classification.identifier === 'ngc-token') {
|
|
2462
|
-
console.log('\nš NVIDIA NGC Authentication');
|
|
2463
|
-
console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
|
|
2464
|
-
console.log(' š” Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
|
|
2465
|
-
console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
|
|
2466
|
-
console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
|
|
2467
|
-
} else {
|
|
2468
|
-
console.log(`\nš ${classification.displayName}`);
|
|
2469
|
-
console.log(` ${classification.purpose}\n`);
|
|
2470
|
-
}
|
|
2471
|
-
|
|
2472
|
-
const { tokenValue } = await this._runPrompts([{
|
|
2473
|
-
type: 'input',
|
|
2474
|
-
name: 'tokenValue',
|
|
2475
|
-
message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
|
|
2476
|
-
validate: (input) => {
|
|
2477
|
-
if (!input || input.trim() === '') return true;
|
|
2478
|
-
if (input.trim().startsWith('$')) return true;
|
|
2479
|
-
// Warn about HF token format
|
|
2480
|
-
if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
|
|
2481
|
-
console.warn('\nā ļø Warning: HuggingFace tokens typically start with "hf_"');
|
|
2482
|
-
console.warn(' If this is intentional, you can ignore this warning.');
|
|
2483
|
-
}
|
|
2484
|
-
return true;
|
|
2485
|
-
}
|
|
2486
|
-
}]);
|
|
2487
|
-
|
|
2488
|
-
if (!tokenValue || tokenValue.trim() === '') {
|
|
2489
|
-
return {};
|
|
2490
|
-
}
|
|
2491
|
-
|
|
2492
|
-
const value = tokenValue.trim();
|
|
2493
|
-
|
|
2494
|
-
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
2495
|
-
if (isSecretsManagerArn(value)) {
|
|
2496
|
-
return { [arnConfigKey]: value };
|
|
2497
|
-
}
|
|
2498
|
-
|
|
2499
|
-
// Otherwise store as plaintext
|
|
2500
|
-
return { [plaintextConfigKey]: value };
|
|
2501
|
-
}
|
|
2502
|
-
|
|
2503
|
-
/**
|
|
2504
|
-
* CUDA-to-AMI mapping.
|
|
2505
|
-
* Maps CUDA major.minor versions to the SageMaker inference AMI that provides
|
|
2506
|
-
* the matching CUDA driver. Derived from the framework registry patterns.
|
|
2507
|
-
* @private
|
|
2508
|
-
*/
|
|
2509
|
-
static CUDA_AMI_MAP = {
|
|
2510
|
-
'11.0': 'al2-ami-sagemaker-inference-gpu-2',
|
|
2511
|
-
'11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
2512
|
-
'11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
2513
|
-
'12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2514
|
-
'12.2': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2515
|
-
'12.4': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2516
|
-
'12.6': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2517
|
-
'13.0': 'al2023-ami-sagemaker-inference-gpu-4-1'
|
|
2518
|
-
};
|
|
2519
|
-
|
|
2520
|
-
/**
|
|
2521
|
-
* Prompt the user to select a CUDA version when the selected GPU instance
|
|
2522
|
-
* supports multiple versions. The choice transparently resolves to the
|
|
2523
|
-
* correct SageMaker inference AMI.
|
|
2524
|
-
*
|
|
2525
|
-
* When a base image CUDA version is provided, auto-resolves by intersecting
|
|
2526
|
-
* with the instance's supported versions. Removes the CUDA prompt from the
|
|
2527
|
-
* interactive flow when auto-resolution succeeds.
|
|
2528
|
-
*
|
|
2529
|
-
* Skipped for CPU instances, non-CUDA accelerators, or when only one
|
|
2530
|
-
* compatible CUDA version exists.
|
|
2531
|
-
*
|
|
2532
|
-
* @param {string} instanceType - Selected instance type (e.g. "ml.g5.2xlarge")
|
|
2533
|
-
* @param {string} framework - Selected framework name
|
|
2534
|
-
* @param {string} frameworkVersion - Selected framework version
|
|
2535
|
-
* @param {string} [baseImageCuda] - CUDA version from selected base image (for auto-resolution)
|
|
2536
|
-
* @returns {Promise<{cudaVersion: string, inferenceAmiVersion: string}|null>}
|
|
2537
|
-
* @private
|
|
2538
|
-
*/
|
|
2539
|
-
async _promptCudaVersion(instanceType, framework, frameworkVersion, baseImageCuda) {
|
|
2540
|
-
if (!instanceType) return null;
|
|
2541
|
-
|
|
2542
|
-
// Look up instance in accelerator mapping
|
|
2543
|
-
const instanceInfo = this._instanceAcceleratorMapping[instanceType];
|
|
2544
|
-
if (!instanceInfo || instanceInfo.accelerator.type !== 'cuda') return null;
|
|
2545
|
-
|
|
2546
|
-
const instanceCudaVersions = instanceInfo.accelerator.versions;
|
|
2547
|
-
if (!instanceCudaVersions || instanceCudaVersions.length === 0) return null;
|
|
2548
|
-
|
|
2549
|
-
// Auto-resolution: when base image specifies a CUDA version, intersect with instance support
|
|
2550
|
-
// Requirements: 3.11, 4.9, 4.10, 4.11
|
|
2551
|
-
if (baseImageCuda) {
|
|
2552
|
-
const majorRequired = baseImageCuda.split('.')[0];
|
|
2553
|
-
const intersection = instanceCudaVersions.filter(v => {
|
|
2554
|
-
if (v === baseImageCuda) return true;
|
|
2555
|
-
if (v.startsWith(`${majorRequired }.`)) return true;
|
|
2556
|
-
return false;
|
|
2557
|
-
});
|
|
2558
|
-
|
|
2559
|
-
if (intersection.length > 0) {
|
|
2560
|
-
// Auto-select: pick exact match or highest compatible
|
|
2561
|
-
const exactMatch = intersection.find(v => v === baseImageCuda);
|
|
2562
|
-
const selectedVersion = exactMatch || intersection.sort().pop();
|
|
2563
|
-
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[selectedVersion];
|
|
2564
|
-
if (inferenceAmiVersion) {
|
|
2565
|
-
console.log(`\nš§ CUDA ${selectedVersion} auto-resolved from base image (requires ${baseImageCuda})`);
|
|
2566
|
-
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
2567
|
-
return { cudaVersion: selectedVersion, inferenceAmiVersion };
|
|
2568
|
-
}
|
|
2569
|
-
} else {
|
|
2570
|
-
// No intersection ā warn and fall through to manual prompt
|
|
2571
|
-
console.log(`\n ā ļø Base image requires CUDA ${baseImageCuda} but instance ${instanceType} supports: ${instanceCudaVersions.join(', ')}`);
|
|
2572
|
-
console.log(' No compatible CUDA version found. Falling back to manual selection.');
|
|
2573
|
-
}
|
|
2574
|
-
}
|
|
2575
|
-
|
|
2576
|
-
// Get framework CUDA requirements (if available)
|
|
2577
|
-
const registryConfigManager = this.registryConfigManager;
|
|
2578
|
-
const frameworkConfig = registryConfigManager?.frameworkRegistry?.[framework]?.[frameworkVersion];
|
|
2579
|
-
const frameworkAccel = frameworkConfig?.accelerator;
|
|
2580
|
-
|
|
2581
|
-
// Compute compatible CUDA versions: intersection of instance support and framework range
|
|
2582
|
-
let compatibleVersions;
|
|
2583
|
-
if (frameworkAccel?.versionRange) {
|
|
2584
|
-
const { min, max } = frameworkAccel.versionRange;
|
|
2585
|
-
compatibleVersions = instanceCudaVersions.filter(v => {
|
|
2586
|
-
return v >= min && v <= max;
|
|
2587
|
-
});
|
|
2588
|
-
} else {
|
|
2589
|
-
compatibleVersions = [...instanceCudaVersions];
|
|
2590
|
-
}
|
|
2591
|
-
|
|
2592
|
-
if (compatibleVersions.length === 0) {
|
|
2593
|
-
// No overlap ā fall back to all instance versions (validation already warned)
|
|
2594
|
-
compatibleVersions = [...instanceCudaVersions];
|
|
2595
|
-
}
|
|
2596
|
-
|
|
2597
|
-
// If only one option, auto-select it silently
|
|
2598
|
-
if (compatibleVersions.length === 1) {
|
|
2599
|
-
const cudaVersion = compatibleVersions[0];
|
|
2600
|
-
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[cudaVersion];
|
|
2601
|
-
if (inferenceAmiVersion) {
|
|
2602
|
-
console.log(`\nš§ CUDA ${cudaVersion} auto-selected (only compatible version for ${instanceType})`);
|
|
2603
|
-
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
2604
|
-
}
|
|
2605
|
-
return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
|
|
2606
|
-
}
|
|
2607
|
-
|
|
2608
|
-
// Multiple options ā let the user choose (or auto-select in auto-prompt mode)
|
|
2609
|
-
const defaultVersion = frameworkAccel?.version
|
|
2610
|
-
&& compatibleVersions.includes(frameworkAccel.version)
|
|
2611
|
-
? frameworkAccel.version
|
|
2612
|
-
: instanceInfo.accelerator.default || compatibleVersions[compatibleVersions.length - 1];
|
|
2613
|
-
|
|
2614
|
-
// In auto-prompt mode, auto-select the default without prompting
|
|
2615
|
-
if (this.configManager?.isAutoPrompt()) {
|
|
2616
|
-
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[defaultVersion];
|
|
2617
|
-
if (inferenceAmiVersion) {
|
|
2618
|
-
console.log(`\nš§ CUDA ${defaultVersion} auto-selected (auto-prompt mode)`);
|
|
2619
|
-
console.log(` AMI: ${inferenceAmiVersion}`);
|
|
2620
|
-
}
|
|
2621
|
-
return inferenceAmiVersion ? { cudaVersion: defaultVersion, inferenceAmiVersion } : null;
|
|
2622
|
-
}
|
|
2623
|
-
|
|
2624
|
-
const choices = compatibleVersions.map(v => {
|
|
2625
|
-
const ami = PromptRunner.CUDA_AMI_MAP[v] || 'unknown';
|
|
2626
|
-
const isDefault = v === defaultVersion ? ' (recommended)' : '';
|
|
2627
|
-
return {
|
|
2628
|
-
name: `CUDA ${v}${isDefault} ā AMI: ${ami}`,
|
|
2629
|
-
value: v,
|
|
2630
|
-
short: `CUDA ${v}`
|
|
2631
|
-
};
|
|
2632
|
-
});
|
|
2633
|
-
|
|
2634
|
-
const { cudaVersion } = await this._runPrompts([{
|
|
2635
|
-
type: 'list',
|
|
2636
|
-
name: 'cudaVersion',
|
|
2637
|
-
message: `Select CUDA version for ${instanceType} (${instanceInfo.accelerator.hardware}):`,
|
|
2638
|
-
choices,
|
|
2639
|
-
default: defaultVersion
|
|
2640
|
-
}]);
|
|
2641
|
-
|
|
2642
|
-
const inferenceAmiVersion = PromptRunner.CUDA_AMI_MAP[cudaVersion];
|
|
2643
|
-
if (inferenceAmiVersion) {
|
|
2644
|
-
console.log(` ā
CUDA ${cudaVersion} ā AMI: ${inferenceAmiVersion}`);
|
|
2645
|
-
}
|
|
2646
|
-
|
|
2647
|
-
return inferenceAmiVersion ? { cudaVersion, inferenceAmiVersion } : null;
|
|
2648
|
-
}
|
|
2649
1186
|
}
|
|
2650
1187
|
|