@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
@@ -20,12 +20,14 @@
20
20
 
21
21
  import os from 'node:os';
22
22
  import path from 'node:path';
23
+ import fs from 'node:fs';
23
24
  import { readFileSync } from 'node:fs';
24
25
  import { execSync } from 'node:child_process';
25
26
  import { fileURLToPath } from 'node:url';
26
27
  import DeploymentRegistry, { reconstructReplayFlags } from './deployment-registry.js';
27
28
  import { syncArchitectures } from './architecture-sync.js';
28
29
  import HuggingFaceClient from './huggingface-client.js';
30
+ import { computeConfigId } from './ci-register-helpers.js';
29
31
 
30
32
  const PERSONAL_REGISTRY_PATH = path.join(os.homedir(), '.ml-container-creator', 'registry.json');
31
33
  const PROJECT_REGISTRY_PATH = path.join(process.cwd(), '.ml-container-creator', 'registry.json');
@@ -53,7 +55,7 @@ export default class RegistryCommandHandler {
53
55
  await this._handleLog(options);
54
56
  break;
55
57
  case 'list':
56
- this._handleList(options);
58
+ await this._handleList(options);
57
59
  break;
58
60
  case 'get':
59
61
  this._handleGet(args[1]);
@@ -177,10 +179,11 @@ export default class RegistryCommandHandler {
177
179
  *
178
180
  * Displays entries from both personal and project-level registries.
179
181
  * Supports filtering by backend, architecture, model, instance-type, and status.
182
+ * When the e2e-status MCP server is reachable, enriches output with E2E status.
180
183
  *
181
184
  * @param {object} options - Parsed CLI options
182
185
  */
183
- _handleList(options) {
186
+ async _handleList(options) {
184
187
  const filters = this._extractFilters(options);
185
188
 
186
189
  const personalRegistry = new DeploymentRegistry(PERSONAL_REGISTRY_PATH);
@@ -197,6 +200,9 @@ export default class RegistryCommandHandler {
197
200
  return;
198
201
  }
199
202
 
203
+ // Attempt to fetch E2E status from the MCP server (silently degrades if unavailable)
204
+ const e2eStatusMap = await this._fetchE2eStatus(allEntries);
205
+
200
206
  console.log('\nDeployment Registry Entries:\n');
201
207
  for (const entry of allEntries) {
202
208
  const id = entry.id || '(no id)';
@@ -206,7 +212,16 @@ export default class RegistryCommandHandler {
206
212
  const it = entry.infrastructure?.instanceType || '(none)';
207
213
  const st = entry.status || '(none)';
208
214
  const src = entry._source === 'project' ? ' [project]' : '';
209
- console.log(` ${id} ${ts} ${dc} ${mn} ${it} ${st}${src}`);
215
+
216
+ // Append E2E status column only when MCP server provided data
217
+ let e2eCol = '';
218
+ if (e2eStatusMap) {
219
+ const configId = this._deriveConfigIdFromEntry(entry);
220
+ const e2e = configId ? e2eStatusMap.get(configId) : null;
221
+ e2eCol = e2e ? ` [E2E: ${e2e.testStatus}]` : ' [E2E: untested]';
222
+ }
223
+
224
+ console.log(` ${id} ${ts} ${dc} ${mn} ${it} ${st}${src}${e2eCol}`);
210
225
  }
211
226
  console.log('');
212
227
  }
@@ -764,4 +779,121 @@ EXAMPLES:
764
779
  const projectRegistry = new DeploymentRegistry(PROJECT_REGISTRY_PATH);
765
780
  return projectRegistry.get(id);
766
781
  }
782
+
783
+ /**
784
+ * Attempt to fetch E2E status from the e2e-status MCP server.
785
+ * Silently returns null if the server is unreachable, disabled, or returns an error.
786
+ * No error is shown to the user in any failure case.
787
+ *
788
+ * @param {Array} entries - Registry entries to fetch status for
789
+ * @returns {Promise<Map<string, object>|null>} Map of configId → status, or null if unavailable
790
+ */
791
+ async _fetchE2eStatus(entries) {
792
+ try {
793
+ // Load MCP config to check if e2e-status server is configured
794
+ const __fn = fileURLToPath(import.meta.url);
795
+ const generatorRoot = path.resolve(path.dirname(__fn), '..', '..');
796
+ const mcpConfigPath = path.join(generatorRoot, 'config', 'mcp.json');
797
+
798
+ if (!fs.existsSync(mcpConfigPath)) return null;
799
+
800
+ const mcpConfig = JSON.parse(fs.readFileSync(mcpConfigPath, 'utf8'));
801
+ const serverConfig = mcpConfig.mcpServers?.['e2e-status'];
802
+
803
+ if (!serverConfig) return null;
804
+
805
+ // Respect disabled flag if present
806
+ if (serverConfig.disabled === true) return null;
807
+
808
+ // Derive configIds from entries
809
+ const configIds = [];
810
+ for (const entry of entries) {
811
+ const configId = this._deriveConfigIdFromEntry(entry);
812
+ if (configId) configIds.push(configId);
813
+ }
814
+
815
+ if (configIds.length === 0) return null;
816
+
817
+ // Spawn the MCP server and call get_e2e_status
818
+ const { Client } = await import('@modelcontextprotocol/sdk/client/index.js');
819
+ const { StdioClientTransport } = await import('@modelcontextprotocol/sdk/client/stdio.js');
820
+
821
+ const resolvedArgs = (serverConfig.args || []).map(arg => {
822
+ if (arg && !path.isAbsolute(arg) && !arg.startsWith('-')) {
823
+ return path.resolve(generatorRoot, arg);
824
+ }
825
+ return arg;
826
+ });
827
+
828
+ const transport = new StdioClientTransport({
829
+ command: serverConfig.command,
830
+ args: resolvedArgs,
831
+ env: { ...process.env, ...(serverConfig.env || {}) },
832
+ stderr: 'pipe'
833
+ });
834
+
835
+ const client = new Client(
836
+ { name: 'ml-container-creator', version: '1.0.0' },
837
+ { capabilities: {} }
838
+ );
839
+
840
+ // Use a short timeout to avoid blocking the CLI
841
+ const timeoutMs = 5000;
842
+ const result = await Promise.race([
843
+ (async () => {
844
+ await client.connect(transport);
845
+ const response = await client.callTool({
846
+ name: 'get_e2e_status',
847
+ arguments: { configIds }
848
+ });
849
+ await client.close();
850
+ return response;
851
+ })(),
852
+ new Promise(resolve => setTimeout(() => resolve(null), timeoutMs))
853
+ ]);
854
+
855
+ if (!result) return null;
856
+
857
+ // Parse the response
858
+ const textBlock = result.content?.find(b => b.type === 'text');
859
+ if (!textBlock) return null;
860
+
861
+ const parsed = JSON.parse(textBlock.text);
862
+ if (!parsed.results || !Array.isArray(parsed.results)) return null;
863
+
864
+ // Build a map of configId → status object
865
+ const statusMap = new Map();
866
+ for (const item of parsed.results) {
867
+ statusMap.set(item.configId, item);
868
+ }
869
+ return statusMap;
870
+ } catch {
871
+ // Silently degrade — no error shown to user
872
+ return null;
873
+ }
874
+ }
875
+
876
+ /**
877
+ * Derive a configId from a registry entry using the same hashing algorithm
878
+ * as do/register --ci. Returns null if the entry lacks sufficient data.
879
+ *
880
+ * @param {object} entry - A deployment registry entry
881
+ * @returns {string|null} 16-char hex configId, or null
882
+ */
883
+ _deriveConfigIdFromEntry(entry) {
884
+ try {
885
+ const deploymentConfig = entry.deployment?.deploymentConfig || '';
886
+ const modelName = entry.model?.modelName || 'none';
887
+ const instanceType = entry.infrastructure?.instanceType || '';
888
+ const region = entry.infrastructure?.region || 'us-west-2';
889
+ const deploymentTarget = entry.deployment?.deploymentTarget || 'realtime-inference';
890
+
891
+ // Need at least deploymentConfig and instanceType to produce a meaningful hash
892
+ if (!deploymentConfig && !instanceType) return null;
893
+
894
+ return computeConfigId(deploymentConfig, modelName, instanceType, region, deploymentTarget);
895
+ } catch {
896
+ return null;
897
+ }
898
+ }
767
899
  }
@@ -0,0 +1,251 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Secrets Prompt Runner - Handles secret selection and plaintext entry prompts.
6
+ * Uses delegation pattern: receives parent PromptRunner reference to access shared state.
7
+ */
8
+
9
+ import { execSync } from 'node:child_process';
10
+ import { SECRET_CLASSIFICATIONS } from './secret-classification.js';
11
+ import { isSecretsManagerArn } from './arn-detection.js';
12
+ import BootstrapConfig from './bootstrap-config.js';
13
+
14
+ export default class SecretsPromptRunner {
15
+ constructor(runner) {
16
+ this.runner = runner;
17
+ }
18
+
19
+ /**
20
+ * Run secret prompts using the Secret_Classification registry.
21
+ * @param {object} previousAnswers - Answers from previous prompt phases
22
+ * @param {object} explicitConfig - Explicit CLI/config values
23
+ * @param {object} existingConfig - Existing project configuration
24
+ * @returns {Promise<object>} Object with token/ARN values keyed by config field names
25
+ */
26
+ async _runSecretPrompts(previousAnswers, explicitConfig, existingConfig) {
27
+ const results = {};
28
+
29
+ for (const classification of SECRET_CLASSIFICATIONS) {
30
+ if (!this._secretStagesApply(classification, previousAnswers)) continue;
31
+
32
+ const arnConfigKey = this._getArnConfigKey(classification);
33
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
34
+
35
+ if (explicitConfig[arnConfigKey]) {
36
+ results[arnConfigKey] = explicitConfig[arnConfigKey];
37
+ continue;
38
+ }
39
+
40
+ if (explicitConfig[plaintextConfigKey]) {
41
+ results[plaintextConfigKey] = explicitConfig[plaintextConfigKey];
42
+ continue;
43
+ }
44
+
45
+ const managedSecrets = await this._listManagedSecrets(classification.identifier);
46
+
47
+ if (managedSecrets.length > 0) {
48
+ const answer = await this._promptSecretSelection(classification, managedSecrets, previousAnswers);
49
+ Object.assign(results, answer);
50
+ } else {
51
+ const answer = await this._promptPlaintextFallback(classification, previousAnswers, explicitConfig, existingConfig);
52
+ Object.assign(results, answer);
53
+ }
54
+ }
55
+
56
+ return results;
57
+ }
58
+
59
+ _secretStagesApply(classification, answers) {
60
+ const architecture = answers.architecture || answers.deploymentConfig?.split('-')[0];
61
+ const backend = answers.backend || answers.deploymentConfig?.split('-').slice(1).join('-');
62
+
63
+ if (classification.identifier === 'hf-token') {
64
+ const isTransformers = architecture === 'transformers';
65
+ const isDiffusors = architecture === 'diffusors';
66
+ const isTritonLlm = architecture === 'triton' && (backend === 'vllm' || backend === 'tensorrtllm');
67
+
68
+ if (!isTransformers && !isDiffusors && !isTritonLlm) return false;
69
+
70
+ const modelSource = answers.modelSource;
71
+ if (modelSource && modelSource !== 'huggingface') return false;
72
+
73
+ return true;
74
+ }
75
+
76
+ if (classification.identifier === 'ngc-token') {
77
+ if (architecture === 'triton') return false;
78
+ if (architecture === 'diffusors') return false;
79
+ return architecture === 'transformers' && backend === 'tensorrt-llm';
80
+ }
81
+
82
+ return classification.stages.length > 0;
83
+ }
84
+
85
+ _getArnConfigKey(classification) {
86
+ const keyMap = {
87
+ 'hf-token': 'hfTokenArn',
88
+ 'ngc-token': 'ngcTokenArn'
89
+ };
90
+ return keyMap[classification.identifier] || `${classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase())}Arn`;
91
+ }
92
+
93
+ _getPlaintextConfigKey(classification) {
94
+ const keyMap = {
95
+ 'hf-token': 'hfToken',
96
+ 'ngc-token': 'ngcApiKey'
97
+ };
98
+ return keyMap[classification.identifier] || classification.identifier.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
99
+ }
100
+
101
+ async _listManagedSecrets(secretType) {
102
+ // Allow test overrides on the parent runner
103
+ if (this.runner._listManagedSecrets && this.runner._listManagedSecrets !== this._listManagedSecrets) {
104
+ return this.runner._listManagedSecrets(secretType);
105
+ }
106
+ try {
107
+ const bootstrapConfig = new BootstrapConfig();
108
+ const activeProfile = bootstrapConfig.getActiveProfile();
109
+ if (!activeProfile) return [];
110
+
111
+ const profile = activeProfile.config.awsProfile;
112
+ const region = activeProfile.config.awsRegion;
113
+ if (!profile || !region) return [];
114
+
115
+ const command = `aws secretsmanager list-secrets --filters Key=tag-key,Values=mlcc:managed-by Key=tag-value,Values=ml-container-creator --region ${region} --profile ${profile} --output json`;
116
+ const output = execSync(command, { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000 });
117
+ const trimmed = output.trim();
118
+ if (!trimmed) return [];
119
+
120
+ const result = JSON.parse(trimmed);
121
+ const secrets = result.SecretList || [];
122
+
123
+ return secrets
124
+ .filter(secret => {
125
+ const typeTag = (secret.Tags || []).find(t => t.Key === 'mlcc:secret-type');
126
+ return typeTag && typeTag.Value === secretType;
127
+ })
128
+ .map(secret => ({
129
+ name: secret.Name,
130
+ arn: secret.ARN
131
+ }));
132
+ } catch {
133
+ return [];
134
+ }
135
+ }
136
+
137
+ async _promptSecretSelection(classification, managedSecrets, previousAnswers) {
138
+ const arnConfigKey = this._getArnConfigKey(classification);
139
+
140
+ console.log(`\nšŸ” ${classification.displayName}`);
141
+ console.log(` ${classification.purpose}`);
142
+
143
+ const choices = [
144
+ ...managedSecrets.map(secret => ({
145
+ name: `šŸ”’ ${secret.name} (${secret.arn})`,
146
+ value: secret.arn,
147
+ short: secret.name
148
+ })),
149
+ { name: 'āœļø Enter plaintext token', value: '__plaintext__', short: 'Plaintext' },
150
+ { name: 'ā­ļø Skip (use environment variable)', value: '__skip__', short: 'Skip' }
151
+ ];
152
+
153
+ const { secretSelection } = await this.runner._runPrompts([{
154
+ type: 'list',
155
+ name: 'secretSelection',
156
+ message: `Select ${classification.promptLabel}:`,
157
+ choices
158
+ }]);
159
+
160
+ if (secretSelection === '__skip__') {
161
+ return {};
162
+ }
163
+
164
+ if (secretSelection === '__plaintext__') {
165
+ return this._promptPlaintextEntry(classification, previousAnswers);
166
+ }
167
+
168
+ return { [arnConfigKey]: secretSelection };
169
+ }
170
+
171
+ async _promptPlaintextEntry(classification, _previousAnswers) {
172
+ const arnConfigKey = this._getArnConfigKey(classification);
173
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
174
+
175
+ const { tokenValue } = await this.runner._runPrompts([{
176
+ type: 'input',
177
+ name: 'tokenValue',
178
+ message: `${classification.promptLabel} (enter token, ARN, or leave empty):`,
179
+ validate: (input) => {
180
+ if (!input || input.trim() === '') return true;
181
+ if (input.trim().startsWith('$')) return true;
182
+ return true;
183
+ }
184
+ }]);
185
+
186
+ if (!tokenValue || tokenValue.trim() === '') {
187
+ return {};
188
+ }
189
+
190
+ const value = tokenValue.trim();
191
+
192
+ if (isSecretsManagerArn(value)) {
193
+ return { [arnConfigKey]: value };
194
+ }
195
+
196
+ return { [plaintextConfigKey]: value };
197
+ }
198
+
199
+ async _promptPlaintextFallback(classification, _previousAnswers, _explicitConfig, _existingConfig) {
200
+ const arnConfigKey = this._getArnConfigKey(classification);
201
+ const plaintextConfigKey = this._getPlaintextConfigKey(classification);
202
+
203
+ if (this.runner.configManager?.isAutoPrompt()) {
204
+ return {};
205
+ }
206
+
207
+ if (classification.identifier === 'hf-token') {
208
+ console.log('\nšŸ” HuggingFace Authentication');
209
+ console.log(' Many models (e.g. Llama, Mistral) are gated and require a token.');
210
+ console.log(' šŸ’” Tip: Use `ml-container-creator secrets create --type hf-token` to store');
211
+ console.log(' your token in AWS Secrets Manager for zero-knowledge operation.');
212
+ console.log(' For CI/CD pipelines, use "$HF_TOKEN" to reference an environment variable.\n');
213
+ } else if (classification.identifier === 'ngc-token') {
214
+ console.log('\nšŸ” NVIDIA NGC Authentication');
215
+ console.log(' TensorRT-LLM base images are hosted on NVIDIA NGC and require an API key.');
216
+ console.log(' šŸ’” Tip: Use `ml-container-creator secrets create --type ngc-token` to store');
217
+ console.log(' your key in AWS Secrets Manager for zero-knowledge operation.');
218
+ console.log(' For CI/CD pipelines, use "$NGC_API_KEY" to reference an environment variable.\n');
219
+ } else {
220
+ console.log(`\nšŸ” ${classification.displayName}`);
221
+ console.log(` ${classification.purpose}\n`);
222
+ }
223
+
224
+ const { tokenValue } = await this.runner._runPrompts([{
225
+ type: 'input',
226
+ name: 'tokenValue',
227
+ message: `${classification.promptLabel} (enter token, ARN, "$${classification.envVar}" for env var, or leave empty):`,
228
+ validate: (input) => {
229
+ if (!input || input.trim() === '') return true;
230
+ if (input.trim().startsWith('$')) return true;
231
+ if (classification.identifier === 'hf-token' && !input.startsWith('hf_') && !isSecretsManagerArn(input)) {
232
+ console.warn('\nāš ļø Warning: HuggingFace tokens typically start with "hf_"');
233
+ console.warn(' If this is intentional, you can ignore this warning.');
234
+ }
235
+ return true;
236
+ }
237
+ }]);
238
+
239
+ if (!tokenValue || tokenValue.trim() === '') {
240
+ return {};
241
+ }
242
+
243
+ const value = tokenValue.trim();
244
+
245
+ if (isSecretsManagerArn(value)) {
246
+ return { [arnConfigKey]: value };
247
+ }
248
+
249
+ return { [plaintextConfigKey]: value };
250
+ }
251
+ }