@aws/ml-container-creator 0.2.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +38 -2
- package/config/bootstrap-stack.json +94 -1
- package/config/defaults.json +1 -1
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +3 -1
- package/servers/instance-sizer/index.js +45 -8
- package/servers/instance-sizer/lib/instance-ranker.js +140 -11
- package/servers/instance-sizer/lib/model-resolver.js +10 -6
- package/servers/instance-sizer/lib/quota-resolver.js +368 -0
- package/servers/instance-sizer/package.json +2 -0
- package/servers/lib/catalogs/instances.json +527 -12
- package/servers/lib/catalogs/model-servers.json +298 -20
- package/servers/lib/catalogs/model-sizes.json +27 -0
- package/servers/lib/catalogs/models.json +101 -0
- package/servers/lib/schemas/image-catalog.schema.json +15 -1
- package/servers/model-picker/index.js +2 -1
- package/src/app.js +96 -2
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +178 -3
- package/src/lib/cli-handler.js +2 -2
- package/src/lib/config-manager.js +121 -1
- package/src/lib/cross-cutting-checker.js +119 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/prompt-runner.js +514 -20
- package/src/lib/prompts.js +67 -5
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/schema-sync.js +31 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/template-manager.js +49 -1
- package/src/lib/validate-runner.js +174 -2
- package/src/lib/validation-report.js +8 -1
- package/src/prompt-adapter.js +3 -2
- package/templates/Dockerfile +10 -2
- package/templates/code/cuda_compat.sh +22 -0
- package/templates/code/serve +3 -0
- package/templates/code/start_server.sh +3 -0
- package/templates/diffusors/Dockerfile +2 -1
- package/templates/diffusors/serve +3 -0
- package/templates/do/README.md +33 -0
- package/templates/do/benchmark +646 -0
- package/templates/do/build +22 -0
- package/templates/do/clean +86 -0
- package/templates/do/config +41 -6
- package/templates/do/deploy +66 -6
- package/templates/do/logs +18 -3
- package/templates/do/register +8 -1
- package/templates/do/run +10 -0
- package/templates/triton/Dockerfile +5 -0
package/src/lib/prompt-runner.js
CHANGED
|
@@ -17,9 +17,8 @@ import {
|
|
|
17
17
|
modelServerPrompts,
|
|
18
18
|
modelLoadStrategyPrompts,
|
|
19
19
|
modelProfilePrompts,
|
|
20
|
-
hfTokenPrompts,
|
|
21
|
-
ngcApiKeyPrompts,
|
|
22
20
|
modulePrompts,
|
|
21
|
+
benchmarkPrompts,
|
|
23
22
|
infraRegionAndTargetPrompts,
|
|
24
23
|
infraInstancePrompts,
|
|
25
24
|
infraAsyncPrompts,
|
|
@@ -35,9 +34,13 @@ import {
|
|
|
35
34
|
|
|
36
35
|
import fs from 'fs';
|
|
37
36
|
import path from 'path';
|
|
37
|
+
import { execSync } from 'node:child_process';
|
|
38
38
|
import { fileURLToPath } from 'node:url';
|
|
39
39
|
import RegistryLoader from './registry-loader.js';
|
|
40
40
|
import { runPrompts } from '../prompt-adapter.js';
|
|
41
|
+
import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
|
|
42
|
+
import { isSecretsManagerArn } from './arn-detection.js';
|
|
43
|
+
import BootstrapConfig from './bootstrap-config.js';
|
|
41
44
|
|
|
42
45
|
const __pr_filename = fileURLToPath(import.meta.url);
|
|
43
46
|
const __pr_dirname = path.dirname(__pr_filename);
|
|
@@ -170,6 +173,9 @@ export default class PromptRunner {
|
|
|
170
173
|
existingConfig
|
|
171
174
|
);
|
|
172
175
|
|
|
176
|
+
// Requirements: 4.2-4.5 — Check model architecture compatibility after base image selection
|
|
177
|
+
this._checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers);
|
|
178
|
+
|
|
173
179
|
// Extract CUDA version from selected base image for instance-sizer context
|
|
174
180
|
const selectedBaseImageCuda = this._extractCudaFromBaseImage(baseImageAnswers);
|
|
175
181
|
|
|
@@ -247,6 +253,29 @@ export default class PromptRunner {
|
|
|
247
253
|
this._autoGpuCount = tpRec.gpuCount;
|
|
248
254
|
console.log(` ✓ Auto-set tensor parallelism: TP=${tpRec.tensorParallelism} (${tpRec.gpuCount} GPUs)`);
|
|
249
255
|
}
|
|
256
|
+
|
|
257
|
+
// Display capacity type confirmation for selected instance
|
|
258
|
+
// Requirements: 5.4
|
|
259
|
+
if (matchingRec && matchingRec.capacityType) {
|
|
260
|
+
if (matchingRec.capacityType === 'reserved') {
|
|
261
|
+
const resType = matchingRec.reservationType === 'capacity-block' ? 'Capacity Block' : 'ODCR';
|
|
262
|
+
const endInfo = matchingRec.reservationType === 'capacity-block' && matchingRec.reservationInfo?.endDate
|
|
263
|
+
? `, ends ${new Date(matchingRec.reservationInfo.endDate).toLocaleDateString()}`
|
|
264
|
+
: '';
|
|
265
|
+
console.log(` ✓ Using reserved capacity — ${resType} (reservation ${matchingRec.reservationInfo?.reservationId || 'unknown'}${endInfo})`);
|
|
266
|
+
} else if (matchingRec.capacityType === 'ftp') {
|
|
267
|
+
console.log(` ✓ Using reserved capacity (plan ${matchingRec.ftpInfo?.planName || 'unknown'})`);
|
|
268
|
+
} else {
|
|
269
|
+
const headroom = matchingRec.quotaHeadroom;
|
|
270
|
+
console.log(` ✓ Using on-demand capacity (quota headroom: ${headroom ?? 'unknown'})`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Extract reservation ARN from selected instance for deployment config
|
|
275
|
+
// Requirements: 2.3
|
|
276
|
+
if (matchingRec && matchingRec.capacityType === 'reserved' && matchingRec.reservationInfo?.reservationArn) {
|
|
277
|
+
this._selectedCapacityReservationArn = matchingRec.reservationInfo.reservationArn;
|
|
278
|
+
}
|
|
250
279
|
}
|
|
251
280
|
|
|
252
281
|
// 3c. Async-specific prompts (only when deploymentTarget === 'async-inference')
|
|
@@ -353,13 +382,11 @@ export default class PromptRunner {
|
|
|
353
382
|
existingConfig
|
|
354
383
|
);
|
|
355
384
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
const ngcApiKeyAnswers =
|
|
361
|
-
{ ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers },
|
|
362
|
-
explicitConfig, existingConfig);
|
|
385
|
+
// Secret prompts — registry-driven secret selection (replaces hardcoded hfToken/ngcApiKey prompts)
|
|
386
|
+
const secretPreviousAnswers = { ...frameworkAnswers, ...engineAnswers, ...frameworkVersionAnswers, ...frameworkProfileAnswers, ...modelFormatAnswers, ...modelServerAnswers, ...modelProfileAnswers };
|
|
387
|
+
const secretAnswers = await this._runSecretPrompts(secretPreviousAnswers, explicitConfig, existingConfig);
|
|
388
|
+
const hfTokenAnswers = { hfToken: secretAnswers.hfToken, hfTokenArn: secretAnswers.hfTokenArn };
|
|
389
|
+
const ngcApiKeyAnswers = { ngcApiKey: secretAnswers.ngcApiKey, ngcTokenArn: secretAnswers.ngcTokenArn };
|
|
363
390
|
|
|
364
391
|
// Module selection
|
|
365
392
|
const moduleAnswers = await this._runPhase(modulePrompts, { ...frameworkAnswers, ...engineAnswers }, explicitConfig, existingConfig);
|
|
@@ -372,6 +399,21 @@ export default class PromptRunner {
|
|
|
372
399
|
moduleAnswers.includeSampleModel = false;
|
|
373
400
|
}
|
|
374
401
|
|
|
402
|
+
// Benchmark prompts — derive includeBenchmark from testTypes selection or CLI flag
|
|
403
|
+
// Requirements: 1.1, 1.2
|
|
404
|
+
let benchmarkAnswers = {};
|
|
405
|
+
if (frameworkAnswers.architecture === 'transformers' || frameworkAnswers.architecture === 'diffusors') {
|
|
406
|
+
const testTypes = moduleAnswers.testTypes || [];
|
|
407
|
+
const includeBenchmark = testTypes.includes('sagemaker-ai-automated-benchmarking') ||
|
|
408
|
+
explicitConfig.includeBenchmark === true ||
|
|
409
|
+
explicitConfig.includeBenchmark === 'true';
|
|
410
|
+
benchmarkAnswers.includeBenchmark = includeBenchmark;
|
|
411
|
+
if (includeBenchmark) {
|
|
412
|
+
const subAnswers = await this._runPhase(benchmarkPrompts, { ...frameworkAnswers, ...moduleAnswers, includeBenchmark }, explicitConfig, existingConfig);
|
|
413
|
+
benchmarkAnswers = { ...benchmarkAnswers, ...subAnswers };
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
375
417
|
// Validate instance type against framework requirements (now that framework version is known)
|
|
376
418
|
const finalInstanceType = infraAnswers.customInstanceType || infraAnswers.instanceType;
|
|
377
419
|
if (finalInstanceType && frameworkVersionAnswers.frameworkVersion) {
|
|
@@ -413,6 +455,7 @@ export default class PromptRunner {
|
|
|
413
455
|
...hfTokenAnswers,
|
|
414
456
|
...ngcApiKeyAnswers,
|
|
415
457
|
...moduleAnswers,
|
|
458
|
+
...benchmarkAnswers,
|
|
416
459
|
...projectAnswers,
|
|
417
460
|
...destinationAnswers,
|
|
418
461
|
buildTimestamp
|
|
@@ -432,6 +475,12 @@ export default class PromptRunner {
|
|
|
432
475
|
combinedAnswers.artifactUri = this._mcpArtifactUri;
|
|
433
476
|
}
|
|
434
477
|
|
|
478
|
+
// Flow capacity reservation ARN from instance-sizer selection
|
|
479
|
+
// Requirements: 2.3
|
|
480
|
+
if (this._selectedCapacityReservationArn) {
|
|
481
|
+
combinedAnswers.capacityReservationArn = this._selectedCapacityReservationArn;
|
|
482
|
+
}
|
|
483
|
+
|
|
435
484
|
// Validate: non-HF model sources require an artifact URI
|
|
436
485
|
// Without it, the serve script can't download the model at runtime
|
|
437
486
|
// Infer modelSource from model name prefix if not set by MCP
|
|
@@ -741,6 +790,69 @@ export default class PromptRunner {
|
|
|
741
790
|
return null;
|
|
742
791
|
}
|
|
743
792
|
|
|
793
|
+
/**
|
|
794
|
+
* Check model architecture compatibility against the selected base image.
|
|
795
|
+
* Emits an advisory warning if the model's model_type is not in the server's
|
|
796
|
+
* supportedModelTypes. Skips silently if supportedModelTypes is empty (sync not run).
|
|
797
|
+
* Requirements: 4.2, 4.3, 4.4, 4.5
|
|
798
|
+
* @param {Object} baseImageAnswers - Answers from base image selection phase
|
|
799
|
+
* @param {Object} frameworkAnswers - Answers from framework/deployment config phase
|
|
800
|
+
* @private
|
|
801
|
+
*/
|
|
802
|
+
_checkModelArchitectureCompatibility(baseImageAnswers, frameworkAnswers) {
|
|
803
|
+
// Requirement 4.5: skip if no model_type was resolved
|
|
804
|
+
if (!this._modelType) return;
|
|
805
|
+
|
|
806
|
+
// Determine the selected image
|
|
807
|
+
const selectedImage = baseImageAnswers.baseImage || baseImageAnswers.customBaseImage;
|
|
808
|
+
if (!selectedImage || selectedImage === 'custom') return;
|
|
809
|
+
|
|
810
|
+
// Resolve the matching choice from MCP base image choices
|
|
811
|
+
if (!this._mcpBaseImageChoices) return;
|
|
812
|
+
const matchingChoice = this._mcpBaseImageChoices.find(c => c.value === selectedImage);
|
|
813
|
+
if (!matchingChoice) return;
|
|
814
|
+
|
|
815
|
+
// Determine the server name from framework answers
|
|
816
|
+
const server = frameworkAnswers.modelServer || frameworkAnswers.backend;
|
|
817
|
+
if (!server) return;
|
|
818
|
+
|
|
819
|
+
// Load the model-servers catalog to find the entry with supportedModelTypes
|
|
820
|
+
try {
|
|
821
|
+
const catalogPath = path.resolve(GENERATOR_ROOT, 'servers', 'lib', 'catalogs', 'model-servers.json');
|
|
822
|
+
const catalog = JSON.parse(fs.readFileSync(catalogPath, 'utf8'));
|
|
823
|
+
|
|
824
|
+
const serverEntries = catalog[server];
|
|
825
|
+
if (!Array.isArray(serverEntries)) return;
|
|
826
|
+
|
|
827
|
+
// Find the catalog entry matching the selected image
|
|
828
|
+
const entry = serverEntries.find(e => e.image === selectedImage);
|
|
829
|
+
if (!entry) return;
|
|
830
|
+
|
|
831
|
+
const supported = entry.supportedModelTypes;
|
|
832
|
+
// Requirement 4.5: skip silently when supportedModelTypes is empty (sync not run)
|
|
833
|
+
if (!supported || supported.length === 0) return;
|
|
834
|
+
|
|
835
|
+
// Requirement 4.2-4.3: cross-reference model_type (case-insensitive)
|
|
836
|
+
const modelTypeLower = this._modelType.toLowerCase();
|
|
837
|
+
if (!supported.includes(modelTypeLower)) {
|
|
838
|
+
const version = entry.labels?.framework_version || entry.tag || 'unknown';
|
|
839
|
+
const docsUrls = {
|
|
840
|
+
vllm: 'https://docs.vllm.ai/en/latest/models/supported_models.html',
|
|
841
|
+
sglang: 'https://sgl-project.github.io/references/supported_models.html',
|
|
842
|
+
'tensorrt-llm': 'https://nvidia.github.io/TensorRT-LLM/reference/support-matrix.html'
|
|
843
|
+
};
|
|
844
|
+
const docsUrl = docsUrls[server] || `https://github.com/search?q=${server}+supported+models`;
|
|
845
|
+
|
|
846
|
+
// Requirement 4.3-4.4: emit advisory warning (does not block generation)
|
|
847
|
+
console.log(`\n ⚠️ Model architecture "${this._modelType}" may not be supported by ${server} ${version}`);
|
|
848
|
+
console.log(' Consider upgrading to a newer base image, or verify compatibility at:');
|
|
849
|
+
console.log(` ${docsUrl}`);
|
|
850
|
+
}
|
|
851
|
+
} catch (err) {
|
|
852
|
+
// Graceful degradation: if catalog can't be read, skip silently
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
|
|
744
856
|
/**
|
|
745
857
|
* Get architecture-based heuristic default instance type.
|
|
746
858
|
* Used when the instance-sizer cannot produce a recommendation.
|
|
@@ -917,7 +1029,7 @@ export default class PromptRunner {
|
|
|
917
1029
|
|
|
918
1030
|
const toolArgs = {
|
|
919
1031
|
modelName,
|
|
920
|
-
limit:
|
|
1032
|
+
limit: 10,
|
|
921
1033
|
context: {
|
|
922
1034
|
architecture: frameworkAnswers.architecture || undefined,
|
|
923
1035
|
backend: frameworkAnswers.backend || undefined,
|
|
@@ -966,13 +1078,57 @@ export default class PromptRunner {
|
|
|
966
1078
|
const choices = parsed.choices.instanceType;
|
|
967
1079
|
const topRec = recommendations[0];
|
|
968
1080
|
const vramInfo = estimatedVramGb
|
|
969
|
-
? ` (
|
|
970
|
-
: '';
|
|
971
|
-
const tpInfo = topRec?.tensorParallelism > 1
|
|
972
|
-
? ` [TP=${topRec.tensorParallelism}]`
|
|
1081
|
+
? ` (model needs ~${estimatedVramGb.toFixed(1)}GB VRAM)`
|
|
973
1082
|
: '';
|
|
974
1083
|
|
|
975
|
-
console.log(` ✓ ${choices.length}
|
|
1084
|
+
console.log(` ✓ ${choices.length} compatible instance(s) found${vramInfo}`);
|
|
1085
|
+
|
|
1086
|
+
// Check if availability data is present (recommendations have capacityType)
|
|
1087
|
+
const hasAvailabilityData = recommendations.some(r => r.capacityType);
|
|
1088
|
+
|
|
1089
|
+
if (hasAvailabilityData) {
|
|
1090
|
+
// Group by capacityType for display
|
|
1091
|
+
const reserved = recommendations.filter(r => r.capacityType === 'reserved' || r.capacityType === 'ftp');
|
|
1092
|
+
const onDemand = recommendations.filter(r => r.capacityType === 'on-demand');
|
|
1093
|
+
|
|
1094
|
+
if (reserved.length > 0) {
|
|
1095
|
+
console.log(' ── Reserved Capacity ──');
|
|
1096
|
+
for (const rec of reserved) {
|
|
1097
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1098
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1099
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1100
|
+
const tag = rec.capacityType === 'reserved'
|
|
1101
|
+
? ` [CR] ${rec.reservationInfo?.planName || rec.reservationInfo?.reservationId || ''}`
|
|
1102
|
+
: ` [FTP] ${rec.ftpInfo?.planName || ''}`;
|
|
1103
|
+
console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
if (onDemand.length > 0) {
|
|
1108
|
+
console.log(' ── On-Demand ──');
|
|
1109
|
+
for (const rec of onDemand) {
|
|
1110
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1111
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1112
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1113
|
+
const deployed = rec.quotaDeployed;
|
|
1114
|
+
const quota = rec.quotaLimit;
|
|
1115
|
+
const tag = quota !== null && quota !== undefined ? ` [Q:${deployed ?? 0}/${quota}]` : '';
|
|
1116
|
+
console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}${tag}`);
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
} else {
|
|
1120
|
+
// Fallback: display compact recommendation table (no availability data)
|
|
1121
|
+
for (const rec of recommendations) {
|
|
1122
|
+
const tp = rec.tensorParallelism > 1 ? ` TP=${rec.tensorParallelism}` : '';
|
|
1123
|
+
const vram = rec.totalVramGb ? `${rec.totalVramGb}GB` : '?';
|
|
1124
|
+
const util = rec.utilizationPercent ? `${rec.utilizationPercent}%` : '?';
|
|
1125
|
+
console.log(` ${rec === topRec ? '→' : ' '} ${rec.instanceType.padEnd(20)} ${vram.padStart(5)} VRAM ${util.padStart(4)} util${tp}`);
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
} else if (parsed.metadata?.allFilteredByQuota) {
|
|
1129
|
+
// All VRAM-compatible instances had zero quota
|
|
1130
|
+
console.log(' ⚠️ No quota available for compatible instances. Request a quota increase.');
|
|
1131
|
+
this._instanceSizerMetadata = parsed.metadata || null;
|
|
976
1132
|
} else if (parsed.metadata?.warning) {
|
|
977
1133
|
console.log(` ⚠️ ${parsed.metadata.warning}`);
|
|
978
1134
|
} else {
|
|
@@ -1376,6 +1532,12 @@ export default class PromptRunner {
|
|
|
1376
1532
|
modelFamily = vals.family;
|
|
1377
1533
|
}
|
|
1378
1534
|
|
|
1535
|
+
// Extract model_type for architecture validation
|
|
1536
|
+
// Requirements: 4.1
|
|
1537
|
+
if (vals.model_type) {
|
|
1538
|
+
this._modelType = vals.model_type;
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1379
1541
|
// Extract model source metadata for loading adapter
|
|
1380
1542
|
// Requirements: 2.1, 2.2, 2.3, 2.4
|
|
1381
1543
|
if (vals.provider) {
|
|
@@ -1427,6 +1589,11 @@ export default class PromptRunner {
|
|
|
1427
1589
|
if (hfData.chatTemplate) {
|
|
1428
1590
|
chatTemplate = hfData.chatTemplate;
|
|
1429
1591
|
}
|
|
1592
|
+
// Extract model_type for architecture validation
|
|
1593
|
+
// Requirements: 4.1
|
|
1594
|
+
if (hfData.modelConfig?.model_type) {
|
|
1595
|
+
this._modelType = hfData.modelConfig.model_type;
|
|
1596
|
+
}
|
|
1430
1597
|
console.log(' ✅ Found on HuggingFace Hub');
|
|
1431
1598
|
} else {
|
|
1432
1599
|
console.log(' ℹ️ Not found on HuggingFace Hub (may be private or offline)');
|
|
@@ -1554,6 +1721,332 @@ export default class PromptRunner {
|
|
|
1554
1721
|
}
|
|
1555
1722
|
}
|
|
1556
1723
|
|
|
1724
|
+
/**
|
|
1725
|
+
* Run secret prompts using the Secret_Classification registry.
|
|
1726
|
+
* For each secret type whose stages apply to the current context:
|
|
1727
|
+
* - Query for managed secrets of that type
|
|
1728
|
+
* - If managed secrets exist: show selection list (secrets + "Enter plaintext token" + "Skip")
|
|
1729
|
+
* - If no managed secrets exist: fall back to existing plaintext prompt
|
|
1730
|
+
*
|
|
1731
|
+
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9
|
|
1732
|
+
* @param {object} previousAnswers - Answers from previous prompt phases
|
|
1733
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
1734
|
+
* @param {object} existingConfig - Existing project configuration
|
|
1735
|
+
* @returns {Promise<object>} Object with token/ARN values keyed by config field names
|
|
1736
|
+
* @private
|
|
1737
|
+
*/
|
|
1738
|
+
async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
|
|
1739
|
+
const results = {};
|
|
1740
|
+
|
|
1741
|
+
for (const classification of SECRET_CLASSIFICATIONS) {
|
|
1742
|
+
// Check if this secret type's stages apply to the current context
|
|
1743
|
+
if (!this._secretStagesApply(classification, previousAnswers)) continue;
|
|
1744
|
+
|
|
1745
|
+
// Determine the config keys for this classification
|
|
1746
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1747
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1748
|
+
|
|
1749
|
+
// Skip if ARN already provided via CLI flag
|
|
1750
|
+
if (explicitConfig[arnConfigKey]) {
|
|
1751
|
+
results[arnConfigKey] = explicitConfig[arnConfigKey];
|
|
1752
|
+
continue;
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
// Skip if plaintext already provided via CLI flag
|
|
1756
|
+
if (explicitConfig[plaintextConfigKey]) {
|
|
1757
|
+
results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
|
|
1758
|
+
continue;
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
// Query for existing managed secrets of this type
|
|
1762
|
+
const managedSecrets = await this._listManagedSecrets(classification.identifier);
|
|
1763
|
+
|
|
1764
|
+
if (managedSecrets.length > 0) {
|
|
1765
|
+
// Show selection list: managed secrets + plaintext entry + skip
|
|
1766
|
+
const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
|
|
1767
|
+
Object.assign(results, answer);
|
|
1768
|
+
} else {
|
|
1769
|
+
// Fall back to existing plaintext prompt
|
|
1770
|
+
const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
|
|
1771
|
+
Object.assign(results, answer);
|
|
1772
|
+
}
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
return results;
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
/**
|
|
1779
|
+
* Determine if a secret classification's stages apply to the current generation context.
|
|
1780
|
+
* Build-time secrets apply when the project involves a Docker build step.
|
|
1781
|
+
* Runtime secrets apply when the architecture uses HuggingFace Hub models.
|
|
1782
|
+
* Requirements: 8.9
|
|
1783
|
+
* @param {object} classification - Secret classification entry
|
|
1784
|
+
* @param {object} answers - Current answers from previous phases
|
|
1785
|
+
* @returns {boolean} True if the secret type is applicable
|
|
1786
|
+
* @private
|
|
1787
|
+
*/
|
|
1788
|
+
_secretStagesApply(classification, answers) {
|
|
1789
|
+
const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
|
|
1790
|
+
const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
|
|
1791
|
+
|
|
1792
|
+
if (classification.identifier === 'hf-token') {
|
|
1793
|
+
// HF token applies to transformers, diffusors, and Triton LLM backends
|
|
1794
|
+
const isTransformers = architecture === 'transformers';
|
|
1795
|
+
const isDiffusors = architecture === 'diffusors';
|
|
1796
|
+
const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
|
|
1797
|
+
|
|
1798
|
+
if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
|
|
1799
|
+
|
|
1800
|
+
// Skip for non-HuggingFace model sources
|
|
1801
|
+
const modelSource = answers.modelSource;
|
|
1802
|
+
if (modelSource && modelSource !== 'huggingface') return false;
|
|
1803
|
+
|
|
1804
|
+
return true;
|
|
1805
|
+
}
|
|
1806
|
+
|
|
1807
|
+
if (classification.identifier === 'ngc-token') {
|
|
1808
|
+
// NGC token only applies to transformers-tensorrt-llm (build-time only)
|
|
1809
|
+
if (architecture === 'triton') return false;
|
|
1810
|
+
if (architecture === 'diffusors') return false;
|
|
1811
|
+
return architecture === 'transformers' && backend === 'tensorrt-llm';
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1814
|
+
// For future secret types, check if any stage applies
|
|
1815
|
+
// Build-time applies to all Docker-based deployments
|
|
1816
|
+
// Runtime applies to architectures that download at startup
|
|
1817
|
+
return classification.stages.length > 0;
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
/**
|
|
1821
|
+
* Get the ARN config key for a classification.
|
|
1822
|
+
* Maps classification identifiers to config field names.
|
|
1823
|
+
* @param {object} classification - Secret classification entry
|
|
1824
|
+
* @returns {string} Config key for the ARN value
|
|
1825
|
+
* @private
|
|
1826
|
+
*/
|
|
1827
|
+
_getArnConfigKey(classification) {
|
|
1828
|
+
const keyMap = {
|
|
1829
|
+
'hf-token': 'hfTokenArn',
|
|
1830
|
+
'ngc-token': 'ngcTokenArn'
|
|
1831
|
+
};
|
|
1832
|
+
return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
|
|
1833
|
+
}
|
|
1834
|
+
|
|
1835
|
+
/**
|
|
1836
|
+
* Get the plaintext config key for a classification.
|
|
1837
|
+
* Maps classification identifiers to config field names.
|
|
1838
|
+
* @param {object} classification - Secret classification entry
|
|
1839
|
+
* @returns {string} Config key for the plaintext value
|
|
1840
|
+
* @private
|
|
1841
|
+
*/
|
|
1842
|
+
_getPlaintextConfigKey(classification) {
|
|
1843
|
+
const keyMap = {
|
|
1844
|
+
'hf-token': 'hfToken',
|
|
1845
|
+
'ngc-token': 'ngcApiKey'
|
|
1846
|
+
};
|
|
1847
|
+
return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
|
|
1848
|
+
}
|
|
1849
|
+
|
|
1850
|
+
/**
|
|
1851
|
+
* List managed secrets of a given type from AWS Secrets Manager.
|
|
1852
|
+
* Uses the active bootstrap profile to query for secrets tagged with
|
|
1853
|
+
* the mlcc:secret-type matching the given identifier.
|
|
1854
|
+
* @param {string} secretType - The secret type identifier (e.g., 'hf-token')
|
|
1855
|
+
* @returns {Promise<Array<{name: string, arn: string}>>} Array of managed secrets
|
|
1856
|
+
* @private
|
|
1857
|
+
*/
|
|
1858
|
+
async _listManagedSecrets(secretType) {
|
|
1859
|
+
try {
|
|
1860
|
+
const bootstrapConfig = new BootstrapConfig();
|
|
1861
|
+
const activeProfile = bootstrapConfig.getActiveProfile();
|
|
1862
|
+
if (!activeProfile) return [];
|
|
1863
|
+
|
|
1864
|
+
const profile = activeProfile.config.awsProfile;
|
|
1865
|
+
const region = activeProfile.config.awsRegion;
|
|
1866
|
+
if (!profile || !region) return [];
|
|
1867
|
+
|
|
1868
|
+
const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
|
|
1869
|
+
const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
|
|
1870
|
+
const trimmed = output.trim();
|
|
1871
|
+
if (!trimmed) return [];
|
|
1872
|
+
|
|
1873
|
+
const result = JSON.parse(trimmed);
|
|
1874
|
+
const secrets = result.SecretList || [];
|
|
1875
|
+
|
|
1876
|
+
// Filter by secret type tag
|
|
1877
|
+
return secrets
|
|
1878
|
+
.filter(secret => {
|
|
1879
|
+
const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
|
|
1880
|
+
return typeTag && typeTag.Value === secretType;
|
|
1881
|
+
})
|
|
1882
|
+
.map(secret => ({
|
|
1883
|
+
name: secret.Name,
|
|
1884
|
+
arn: secret.ARN
|
|
1885
|
+
}));
|
|
1886
|
+
} catch {
|
|
1887
|
+
// If AWS CLI fails (not configured, no credentials, etc.), return empty
|
|
1888
|
+
return [];
|
|
1889
|
+
}
|
|
1890
|
+
}
|
|
1891
|
+
|
|
1892
|
+
/**
|
|
1893
|
+
* Display a selection list for managed secrets of a given type.
|
|
1894
|
+
* Shows available secrets plus options for plaintext entry and skip.
|
|
1895
|
+
* Requirements: 8.1, 8.2, 8.3, 8.4, 8.5, 8.6
|
|
1896
|
+
* @param {object} classification - Secret classification entry
|
|
1897
|
+
* @param {Array<{name: string, arn: string}>} managedSecrets - Available managed secrets
|
|
1898
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1899
|
+
* @returns {Promise<object>} Object with the selected value keyed by config field name
|
|
1900
|
+
* @private
|
|
1901
|
+
*/
|
|
1902
|
+
async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
|
|
1903
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1904
|
+
|
|
1905
|
+
console.log(`\n🔐 ${classification.displayName}`);
|
|
1906
|
+
console.log(` ${classification.purpose}`);
|
|
1907
|
+
|
|
1908
|
+
// Build choices: managed secrets + enter plaintext + skip
|
|
1909
|
+
const choices = [
|
|
1910
|
+
...managedSecrets.map(secret => ({
|
|
1911
|
+
name: `🔒 ${secret.name} (${secret.arn})`,
|
|
1912
|
+
value: secret.arn,
|
|
1913
|
+
short: secret.name
|
|
1914
|
+
})),
|
|
1915
|
+
{ name: '✏️ Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
|
|
1916
|
+
{ name: '⏭️ Skip (use environment variable)', value: '__skip__', short: 'Skip' }
|
|
1917
|
+
];
|
|
1918
|
+
|
|
1919
|
+
const { secretSelection } = await this._runPrompts([{
|
|
1920
|
+
type: 'list',
|
|
1921
|
+
name: 'secretSelection',
|
|
1922
|
+
message: `Select ${classification.promptLabel}:`,
|
|
1923
|
+
choices
|
|
1924
|
+
}]);
|
|
1925
|
+
|
|
1926
|
+
if (secretSelection === '__skip__') {
|
|
1927
|
+
return {};
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
if (secretSelection === '__plaintext__') {
|
|
1931
|
+
// Use existing plaintext flow
|
|
1932
|
+
return this._promptPlaintextEntry(classification, previousAnswers);
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
// User selected a managed secret ARN
|
|
1936
|
+
return { [arnConfigKey]: secretSelection };
|
|
1937
|
+
}
|
|
1938
|
+
|
|
1939
|
+
/**
|
|
1940
|
+
* Prompt for plaintext token entry with ARN detection.
|
|
1941
|
+
* If the user enters an ARN, store it as an ARN reference.
|
|
1942
|
+
* Requirements: 8.4, 8.5, 8.6
|
|
1943
|
+
* @param {object} classification - Secret classification entry
|
|
1944
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1945
|
+
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
1946
|
+
* @private
|
|
1947
|
+
*/
|
|
1948
|
+
async _promptPlaintextEntry(classification, _previousAnswers) {
|
|
1949
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1950
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1951
|
+
|
|
1952
|
+
const { tokenValue } = await this._runPrompts([{
|
|
1953
|
+
type: 'input',
|
|
1954
|
+
name: 'tokenValue',
|
|
1955
|
+
message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
|
|
1956
|
+
validate: (input) => {
|
|
1957
|
+
// Empty is valid
|
|
1958
|
+
if (!input || input.trim() === '') return true;
|
|
1959
|
+
// Environment variable reference is valid
|
|
1960
|
+
if (input.trim().startsWith('$')) return true;
|
|
1961
|
+
return true;
|
|
1962
|
+
}
|
|
1963
|
+
}]);
|
|
1964
|
+
|
|
1965
|
+
if (!tokenValue || tokenValue.trim() === '') {
|
|
1966
|
+
return {};
|
|
1967
|
+
}
|
|
1968
|
+
|
|
1969
|
+
const value = tokenValue.trim();
|
|
1970
|
+
|
|
1971
|
+
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
1972
|
+
if (isSecretsManagerArn(value)) {
|
|
1973
|
+
return { [arnConfigKey]: value };
|
|
1974
|
+
}
|
|
1975
|
+
|
|
1976
|
+
// Otherwise store as plaintext
|
|
1977
|
+
return { [plaintextConfigKey]: value };
|
|
1978
|
+
}
|
|
1979
|
+
|
|
1980
|
+
/**
|
|
1981
|
+
* Fall back to existing plaintext prompt when no managed secrets exist.
|
|
1982
|
+
* Uses the same prompts as the original hfTokenPrompts/ngcApiKeyPrompts
|
|
1983
|
+
* but with ARN detection on the input.
|
|
1984
|
+
* Requirements: 8.7
|
|
1985
|
+
* @param {object} classification - Secret classification entry
|
|
1986
|
+
* @param {object} previousAnswers - Answers from previous phases
|
|
1987
|
+
* @param {object} explicitConfig - Explicit CLI/config values
|
|
1988
|
+
* @param {object} existingConfig - Existing project configuration
|
|
1989
|
+
* @returns {Promise<object>} Object with the value keyed by config field name
|
|
1990
|
+
* @private
|
|
1991
|
+
*/
|
|
1992
|
+
async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
|
|
1993
|
+
const arnConfigKey = this._getArnConfigKey(classification);
|
|
1994
|
+
const plaintextConfigKey = this._getPlaintextConfigKey(classification);
|
|
1995
|
+
|
|
1996
|
+
// If in auto-prompt mode, skip
|
|
1997
|
+
if (this.configManager?.isAutoPrompt()) {
|
|
1998
|
+
return {};
|
|
1999
|
+
}
|
|
2000
|
+
|
|
2001
|
+
// Display context-appropriate security message
|
|
2002
|
+
if (classification.identifier === 'hf-token') {
|
|
2003
|
+
console.log('\n🔐 HuggingFace Authentication');
|
|
2004
|
+
console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
|
|
2005
|
+
console.log(' 💡 Tip: Use `ml-container-creator secrets create --type hf-token` to store');
|
|
2006
|
+
console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
|
|
2007
|
+
console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
|
|
2008
|
+
} else if (classification.identifier === 'ngc-token') {
|
|
2009
|
+
console.log('\n🔐 NVIDIA NGC Authentication');
|
|
2010
|
+
console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
|
|
2011
|
+
console.log(' 💡 Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
|
|
2012
|
+
console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
|
|
2013
|
+
console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
|
|
2014
|
+
} else {
|
|
2015
|
+
console.log(`\n🔐 ${classification.displayName}`);
|
|
2016
|
+
console.log(` ${classification.purpose}\n`);
|
|
2017
|
+
}
|
|
2018
|
+
|
|
2019
|
+
const { tokenValue } = await this._runPrompts([{
|
|
2020
|
+
type: 'input',
|
|
2021
|
+
name: 'tokenValue',
|
|
2022
|
+
message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
|
|
2023
|
+
validate: (input) => {
|
|
2024
|
+
if (!input || input.trim() === '') return true;
|
|
2025
|
+
if (input.trim().startsWith('$')) return true;
|
|
2026
|
+
// Warn about HF token format
|
|
2027
|
+
if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
|
|
2028
|
+
console.warn('\n⚠️ Warning: HuggingFace tokens typically start with "hf_"');
|
|
2029
|
+
console.warn(' If this is intentional, you can ignore this warning.');
|
|
2030
|
+
}
|
|
2031
|
+
return true;
|
|
2032
|
+
}
|
|
2033
|
+
}]);
|
|
2034
|
+
|
|
2035
|
+
if (!tokenValue || tokenValue.trim() === '') {
|
|
2036
|
+
return {};
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
const value = tokenValue.trim();
|
|
2040
|
+
|
|
2041
|
+
// ARN detection: if the value is a Secrets Manager ARN, store as ARN
|
|
2042
|
+
if (isSecretsManagerArn(value)) {
|
|
2043
|
+
return { [arnConfigKey]: value };
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
// Otherwise store as plaintext
|
|
2047
|
+
return { [plaintextConfigKey]: value };
|
|
2048
|
+
}
|
|
2049
|
+
|
|
1557
2050
|
/**
|
|
1558
2051
|
* CUDA-to-AMI mapping.
|
|
1559
2052
|
* Maps CUDA major.minor versions to the SageMaker inference AMI that provides
|
|
@@ -1561,13 +2054,14 @@ export default class PromptRunner {
|
|
|
1561
2054
|
* @private
|
|
1562
2055
|
*/
|
|
1563
2056
|
static CUDA_AMI_MAP = {
|
|
1564
|
-
'11.0': 'al2-ami-sagemaker-inference-gpu-2
|
|
2057
|
+
'11.0': 'al2-ami-sagemaker-inference-gpu-2',
|
|
1565
2058
|
'11.4': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
1566
|
-
'11.8': 'al2-ami-sagemaker-inference-gpu-
|
|
2059
|
+
'11.8': 'al2-ami-sagemaker-inference-gpu-2-1',
|
|
1567
2060
|
'12.1': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
1568
|
-
'12.2': 'al2-ami-sagemaker-inference-gpu-3-
|
|
1569
|
-
'12.4': 'al2-ami-sagemaker-inference-gpu-3-
|
|
1570
|
-
'12.6': 'al2-ami-sagemaker-inference-gpu-3-
|
|
2061
|
+
'12.2': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2062
|
+
'12.4': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2063
|
+
'12.6': 'al2-ami-sagemaker-inference-gpu-3-1',
|
|
2064
|
+
'13.0': 'al2023-ami-sagemaker-inference-gpu-4-1'
|
|
1571
2065
|
};
|
|
1572
2066
|
|
|
1573
2067
|
/**
|