@aws/ml-container-creator 0.2.6 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/cli.js +38 -2
  2. package/config/bootstrap-stack.json +94 -1
  3. package/config/defaults.json +1 -1
  4. package/infra/ci-harness/package-lock.json +22 -9
  5. package/package.json +3 -1
  6. package/servers/instance-sizer/index.js +45 -8
  7. package/servers/instance-sizer/lib/instance-ranker.js +140 -11
  8. package/servers/instance-sizer/lib/model-resolver.js +10 -6
  9. package/servers/instance-sizer/lib/quota-resolver.js +368 -0
  10. package/servers/instance-sizer/package.json +2 -0
  11. package/servers/lib/catalogs/instances.json +527 -12
  12. package/servers/lib/catalogs/model-servers.json +298 -20
  13. package/servers/lib/catalogs/model-sizes.json +27 -0
  14. package/servers/lib/catalogs/models.json +101 -0
  15. package/servers/lib/schemas/image-catalog.schema.json +15 -1
  16. package/servers/model-picker/index.js +2 -1
  17. package/src/app.js +96 -2
  18. package/src/lib/architecture-sync.js +171 -0
  19. package/src/lib/arn-detection.js +22 -0
  20. package/src/lib/bootstrap-command-handler.js +178 -3
  21. package/src/lib/cli-handler.js +2 -2
  22. package/src/lib/config-manager.js +121 -1
  23. package/src/lib/cross-cutting-checker.js +119 -0
  24. package/src/lib/deployment-entry-schema.js +1 -2
  25. package/src/lib/prompt-runner.js +514 -20
  26. package/src/lib/prompts.js +67 -5
  27. package/src/lib/registry-command-handler.js +236 -0
  28. package/src/lib/schema-sync.js +31 -0
  29. package/src/lib/secret-classification.js +56 -0
  30. package/src/lib/secrets-command-handler.js +550 -0
  31. package/src/lib/template-manager.js +49 -1
  32. package/src/lib/validate-runner.js +174 -2
  33. package/src/lib/validation-report.js +8 -1
  34. package/src/prompt-adapter.js +3 -2
  35. package/templates/Dockerfile +10 -2
  36. package/templates/code/cuda_compat.sh +22 -0
  37. package/templates/code/serve +3 -0
  38. package/templates/code/start_server.sh +3 -0
  39. package/templates/diffusors/Dockerfile +2 -1
  40. package/templates/diffusors/serve +3 -0
  41. package/templates/do/README.md +33 -0
  42. package/templates/do/benchmark +646 -0
  43. package/templates/do/build +22 -0
  44. package/templates/do/clean +86 -0
  45. package/templates/do/config +41 -6
  46. package/templates/do/deploy +66 -6
  47. package/templates/do/logs +18 -3
  48. package/templates/do/register +8 -1
  49. package/templates/do/run +10 -0
  50. package/templates/triton/Dockerfile +5 -0
package/src/app.js CHANGED
@@ -119,6 +119,23 @@ export async function run(projectName, options) {
119
119
  let answers;
120
120
  if (configManager.shouldSkipPrompts()) {
121
121
  console.log('\nšŸš€ Skipping prompts - using configuration from other sources');
122
+
123
+ // Fail-fast if required parameters are missing
124
+ const missing = configManager.getMissingRequiredParameters();
125
+ if (missing.length > 0) {
126
+ console.error('\nāŒ Cannot skip prompts — required parameters are missing:\n');
127
+ for (const param of missing) {
128
+ const matrix = configManager._getParameterMatrix()[param];
129
+ const cliFlag = matrix?.cliOption ? `--${matrix.cliOption}` : '';
130
+ const envVar = matrix?.envVar || '';
131
+ const hints = [cliFlag, envVar].filter(Boolean).join(' or ');
132
+ console.error(` • ${param}${hints ? ` (${hints})` : ''}`);
133
+ }
134
+ console.error('\n Provide these via CLI flags, environment variables, or a config file.');
135
+ console.error(' Run "ml-container-creator --help" for available options.\n');
136
+ process.exit(1);
137
+ }
138
+
122
139
  answers = configManager.getFinalConfiguration();
123
140
 
124
141
  // Infer modelSource from model name prefix if not set
@@ -305,6 +322,17 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
305
322
  ignorePatterns.push('**/sample_model/**');
306
323
  }
307
324
 
325
+ // Exclude do/benchmark when benchmarking is not selected
326
+ if (!answers.includeBenchmark) {
327
+ ignorePatterns.push('**/do/benchmark');
328
+ }
329
+
330
+ // Exclude do/test when hosted-model-endpoint is not selected
331
+ const testTypes = answers.testTypes || [];
332
+ if (!testTypes.includes('hosted-model-endpoint')) {
333
+ ignorePatterns.push('**/do/test');
334
+ }
335
+
308
336
  // Always exclude triton and diffusors source directories
309
337
  ignorePatterns.push('**/triton/**');
310
338
  ignorePatterns.push('**/diffusors/**');
@@ -400,6 +428,20 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
400
428
  _copyFile(path.join(LIB_DIR, 'manifest-cli.js'), path.join(doLibDir, 'manifest-cli.js'));
401
429
  _copyFile(path.join(LIB_DIR, 'asset-manager.js'), path.join(doLibDir, 'asset-manager.js'));
402
430
  _copyFile(path.join(LIB_DIR, 'bootstrap-config.js'), path.join(doLibDir, 'bootstrap-config.js'));
431
+
432
+ // Generate .gitignore with benchmarks/ when benchmarking is enabled
433
+ if (answers.includeBenchmark) {
434
+ const gitignorePath = path.join(destDir, '.gitignore');
435
+ const gitignoreContent = '# Benchmark results (generated by do/benchmark)\nbenchmarks/\n';
436
+ if (fs.existsSync(gitignorePath)) {
437
+ const existing = fs.readFileSync(gitignorePath, 'utf8');
438
+ if (!existing.includes('benchmarks/')) {
439
+ fs.appendFileSync(gitignorePath, `\n${gitignoreContent}`);
440
+ }
441
+ } else {
442
+ fs.writeFileSync(gitignorePath, gitignoreContent);
443
+ }
444
+ }
403
445
  }
404
446
 
405
447
  /**
@@ -493,7 +535,9 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
493
535
  chatTemplate: null,
494
536
  chatTemplateSource: null,
495
537
  hfToken: null,
538
+ hfTokenArn: null,
496
539
  ngcApiKey: null,
540
+ ngcTokenArn: null,
497
541
  envVars: {},
498
542
  inferenceAmiVersion: null,
499
543
  accelerator: null,
@@ -510,7 +554,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
510
554
  codebuildProjectName: null,
511
555
  modelName: null,
512
556
  modelFormat: null,
513
- includeSampleModel: false,
557
+ includeSampleModel: true,
514
558
  includeTesting: true,
515
559
  testTypes: [],
516
560
  buildTimestamp: new Date().toISOString(),
@@ -622,6 +666,55 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
622
666
  // Silently continue - defaults are already set
623
667
  }
624
668
  }
669
+
670
+ // Populate baseImage from the catalog when still falsy (covers --skip-prompts and
671
+ // cases where MCP/CLI/config did not provide a base image).
672
+ // Precedence: MCP > CLI > config > catalog default (this block).
673
+ if (!answers.baseImage && registryConfigManager?.frameworkRegistry) {
674
+ const backendKey = answers.backend || answers.modelServer;
675
+ if (backendKey) {
676
+ const frameworkVersions = registryConfigManager.frameworkRegistry[backendKey];
677
+ if (frameworkVersions) {
678
+ let resolvedConfig = null;
679
+ if (answers.frameworkVersion && frameworkVersions[answers.frameworkVersion]) {
680
+ resolvedConfig = frameworkVersions[answers.frameworkVersion];
681
+ } else {
682
+ // Fall back to latest version
683
+ const versions = Object.keys(frameworkVersions).sort((a, b) =>
684
+ b.localeCompare(a, undefined, { numeric: true })
685
+ );
686
+ if (versions.length > 0) {
687
+ resolvedConfig = frameworkVersions[versions[0]];
688
+ }
689
+ }
690
+ if (resolvedConfig?.baseImage) {
691
+ answers.baseImage = resolvedConfig.baseImage;
692
+ }
693
+ }
694
+ }
695
+ }
696
+
697
+ // Populate icGpuCount from instance catalog when not explicitly set.
698
+ // The deploy template uses IC_GPU_COUNT unconditionally for NumberOfAcceleratorDevicesRequired,
699
+ // so it must always have a value for GPU deployments.
700
+ if (answers.icGpuCount == null && answers.instanceType) {
701
+ // Use gpuCount from instance-sizer recommendation if available
702
+ if (answers.gpuCount) {
703
+ answers.icGpuCount = answers.gpuCount;
704
+ } else {
705
+ // Look up from instances catalog
706
+ try {
707
+ const catalogPath = path.resolve(__dirname, '..', 'servers', 'lib', 'catalogs', 'instances.json');
708
+ const catalogData = JSON.parse(fs.readFileSync(catalogPath, 'utf-8'));
709
+ const instanceInfo = catalogData?.catalog?.[answers.instanceType];
710
+ if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
711
+ answers.icGpuCount = instanceInfo.gpus;
712
+ }
713
+ } catch {
714
+ // Silently continue — template fallback handles missing value
715
+ }
716
+ }
717
+ }
625
718
  }
626
719
 
627
720
  /**
@@ -958,7 +1051,8 @@ function _setExecutablePermissions(destDir) {
958
1051
  'do/submit',
959
1052
  'do/register',
960
1053
  'do/ci',
961
- 'do/manifest'
1054
+ 'do/manifest',
1055
+ 'do/benchmark'
962
1056
  ];
963
1057
 
964
1058
  shellScripts.forEach(script => {
@@ -0,0 +1,171 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Architecture Sync
6
+ *
7
+ * Fetches model registry source files from server GitHub repositories
8
+ * and extracts supported model_type values into the model-servers catalog.
9
+ */
10
+
11
+ import { readFileSync, writeFileSync } from 'node:fs';
12
+
13
+ /**
14
+ * Parse vLLM's model registry Python source to extract model_type keys.
15
+ *
16
+ * vLLM's registry maps architecture class names to (module, impl_class) tuples:
17
+ * "LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
18
+ * "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"),
19
+ *
20
+ * The module name (first tuple element) corresponds to the model_type.
21
+ * Also matches older formats where model_type is used directly as dict key.
22
+ *
23
+ * @param {string} source - Python source code content
24
+ * @returns {string[]} Sorted array of model_type strings
25
+ */
26
+ export const parseVllmRegistry = (source) => {
27
+ const modelTypes = new Set();
28
+ const patterns = [
29
+ // Tuple value format: ("module_name", "ClassName") — extract module_name
30
+ /\("([a-z][a-z0-9_]*)"\s*,\s*"[A-Z]/g,
31
+ // Direct lowercase key format (older registries): "model_type": (
32
+ /"([a-z][a-z0-9_]*)":\s*\(/g,
33
+ // Direct lowercase key format: "model_type": ClassName
34
+ /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
35
+ // Direct lowercase key format: "model_type": [
36
+ /"([a-z][a-z0-9_]*)":\s*\[/g
37
+ ];
38
+ for (const pattern of patterns) {
39
+ let match;
40
+ while ((match = pattern.exec(source)) !== null) {
41
+ modelTypes.add(match[1]);
42
+ }
43
+ }
44
+ return [...modelTypes].sort();
45
+ };
46
+
47
+ /**
48
+ * Parse SGLang's model_registry.py to extract model_type keys.
49
+ *
50
+ * Matches patterns like:
51
+ * "llama": ModelClass,
52
+ * "qwen2": (ModulePath, ClassName),
53
+ *
54
+ * @param {string} source - Python source code content
55
+ * @returns {string[]} Sorted array of model_type strings
56
+ */
57
+ export const parseSglangRegistry = (source) => {
58
+ const modelTypes = new Set();
59
+ const patterns = [
60
+ /"([a-z][a-z0-9_]*)":\s*\(/g,
61
+ /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
62
+ /"([a-z][a-z0-9_]*)":\s*\[/g
63
+ ];
64
+ for (const pattern of patterns) {
65
+ let match;
66
+ while ((match = pattern.exec(source)) !== null) {
67
+ modelTypes.add(match[1]);
68
+ }
69
+ }
70
+ return [...modelTypes].sort();
71
+ };
72
+
73
+ /**
74
+ * Parse TensorRT-LLM's models __init__.py to extract model_type keys.
75
+ *
76
+ * Matches patterns from MODEL_MAP or similar dict structures:
77
+ * "llama": LlamaForCausalLM,
78
+ * "gpt2": GPT2LMHeadModel,
79
+ *
80
+ * @param {string} source - Python source code content
81
+ * @returns {string[]} Sorted array of model_type strings
82
+ */
83
+ export const parseTensorRTRegistry = (source) => {
84
+ const modelTypes = new Set();
85
+ const patterns = [
86
+ /"([a-z][a-z0-9_]*)":\s*[A-Z]/g,
87
+ /"([a-z][a-z0-9_]*)":\s*\(/g,
88
+ /'([a-z][a-z0-9_]*)':\s*[A-Z]/g,
89
+ /'([a-z][a-z0-9_]*)':\s*\(/g
90
+ ];
91
+ for (const pattern of patterns) {
92
+ let match;
93
+ while ((match = pattern.exec(source)) !== null) {
94
+ modelTypes.add(match[1]);
95
+ }
96
+ }
97
+ return [...modelTypes].sort();
98
+ };
99
+
100
+ /**
101
+ * Configuration mapping each server to its GitHub repository,
102
+ * registry file path, tag prefix, and parser function.
103
+ */
104
+ export const SERVER_REGISTRY_SOURCES = {
105
+ vllm: {
106
+ repo: 'vllm-project/vllm',
107
+ file: 'vllm/model_executor/models/registry.py',
108
+ tagPrefix: 'v',
109
+ parser: parseVllmRegistry
110
+ },
111
+ sglang: {
112
+ repo: 'sgl-project/sglang',
113
+ file: 'python/sglang/srt/models/model_registry.py',
114
+ tagPrefix: 'v',
115
+ parser: parseSglangRegistry
116
+ },
117
+ 'tensorrt-llm': {
118
+ repo: 'NVIDIA/TensorRT-LLM',
119
+ file: 'tensorrt_llm/models/__init__.py',
120
+ tagPrefix: 'v',
121
+ parser: parseTensorRTRegistry
122
+ }
123
+ };
124
+
125
+ /**
126
+ * Sync supported model architectures from server GitHub repositories
127
+ * into the model-servers catalog.
128
+ *
129
+ * For each server entry in the catalog that has a matching source config,
130
+ * fetches the model registry file from GitHub at the version tag and
131
+ * parses it to extract supported model_type values.
132
+ *
133
+ * @param {string} catalogPath - Path to model-servers.json
134
+ * @returns {object} Summary with counts and failures
135
+ */
136
+ export const syncArchitectures = async (catalogPath) => {
137
+ const catalog = JSON.parse(readFileSync(catalogPath, 'utf8'));
138
+ const summary = { servers: [], failures: [] };
139
+
140
+ for (const [server, entries] of Object.entries(catalog)) {
141
+ const source = SERVER_REGISTRY_SOURCES[server];
142
+ if (!source) continue;
143
+
144
+ for (const entry of entries) {
145
+ const version = entry.labels?.framework_version;
146
+ if (!version) continue;
147
+
148
+ const tag = `${source.tagPrefix}${version}`;
149
+ const url = `https://raw.githubusercontent.com/${source.repo}/${tag}/${source.file}`;
150
+
151
+ try {
152
+ const response = await fetch(url);
153
+ if (!response.ok) {
154
+ summary.failures.push({ server, version, reason: `HTTP ${response.status}` });
155
+ console.log(` āš ļø ${server} ${version}: fetch failed (HTTP ${response.status})`);
156
+ continue;
157
+ }
158
+ const content = await response.text();
159
+ entry.supportedModelTypes = source.parser(content);
160
+ summary.servers.push({ server, version, count: entry.supportedModelTypes.length });
161
+ console.log(` āœ“ ${server} ${version}: ${entry.supportedModelTypes.length} architectures`);
162
+ } catch (err) {
163
+ summary.failures.push({ server, version, reason: err.message });
164
+ console.log(` āš ļø ${server} ${version}: fetch failed (${err.message})`);
165
+ }
166
+ }
167
+ }
168
+
169
+ writeFileSync(catalogPath, JSON.stringify(catalog, null, 4));
170
+ return summary;
171
+ };
@@ -0,0 +1,22 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * ARN Detection Utility
6
+ *
7
+ * Provides a pure function for distinguishing AWS Secrets Manager ARNs
8
+ * from plaintext values. Used by the prompt flow and CLI to determine
9
+ * whether user input should be treated as a secret reference or a
10
+ * literal token value.
11
+ */
12
+
13
+ const SECRETS_MANAGER_ARN_PREFIX = 'arn:aws:secretsmanager:';
14
+
15
+ /**
16
+ * Determines if a value is a Secrets Manager ARN.
17
+ * @param {*} value - The input value to check
18
+ * @returns {boolean} True if the value is a Secrets Manager ARN
19
+ */
20
+ export function isSecretsManagerArn(value) {
21
+ return typeof value === 'string' && value.startsWith(SECRETS_MANAGER_ARN_PREFIX);
22
+ }
@@ -24,6 +24,8 @@ import { fileURLToPath } from 'node:url';
24
24
  import BootstrapConfig from './bootstrap-config.js';
25
25
  import AwsProfileParser from './aws-profile-parser.js';
26
26
  import AssetManager from './asset-manager.js';
27
+ import McpCommandHandler from './mcp-command-handler.js';
28
+ import RegistryCommandHandler from './registry-command-handler.js';
27
29
  import { runPrompts } from '../prompt-adapter.js';
28
30
 
29
31
  const __filename = fileURLToPath(import.meta.url);
@@ -114,6 +116,11 @@ export default class BootstrapCommandHandler {
114
116
 
115
117
  console.log('\nšŸš€ Bootstrap — Shared AWS Infrastructure Setup\n');
116
118
 
119
+ // Verify AWS CLI v2 is installed
120
+ if (!this._verifyCliV2()) {
121
+ return;
122
+ }
123
+
117
124
  // Determine bootstrap profile name
118
125
  let profileName;
119
126
  if (nonInteractive) {
@@ -192,6 +199,9 @@ export default class BootstrapCommandHandler {
192
199
  if (stackOutputs.BatchS3BucketName) {
193
200
  profileData.batchS3Bucket = stackOutputs.BatchS3BucketName;
194
201
  }
202
+ if (stackOutputs.BenchmarkS3BucketName) {
203
+ profileData.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
204
+ }
195
205
 
196
206
  console.log(' āœ… Bootstrap stack deployed successfully');
197
207
  } catch (error) {
@@ -311,6 +321,9 @@ export default class BootstrapCommandHandler {
311
321
 
312
322
  // Display summary
313
323
  this._displaySummary(profileName, profileData);
324
+
325
+ // Step 6: Post-setup chain (mcp init → sync-architectures → sync-schemas)
326
+ await this._runPostSetupChain(options);
314
327
  }
315
328
 
316
329
  /**
@@ -377,6 +390,9 @@ export default class BootstrapCommandHandler {
377
390
  if (outputs.BatchS3BucketName) {
378
391
  console.log(` āœ… S3 bucket (batch): ${outputs.BatchS3BucketName}`);
379
392
  }
393
+ if (outputs.BenchmarkS3BucketName) {
394
+ console.log(` āœ… S3 bucket (benchmark): ${outputs.BenchmarkS3BucketName}`);
395
+ }
380
396
  if (outputs.StackVersion) {
381
397
  console.log(` šŸ“‹ Stack version: ${outputs.StackVersion}`);
382
398
  }
@@ -447,6 +463,20 @@ export default class BootstrapCommandHandler {
447
463
  console.log(` āš ļø S3 bucket: ${profile.config.batchS3Bucket} — could not validate`);
448
464
  }
449
465
  }
466
+
467
+ if (profile.config.benchmarkS3Bucket) {
468
+ try {
469
+ const benchmarkExists = this._resourceExists(
470
+ `s3api head-bucket --bucket ${profile.config.benchmarkS3Bucket}`,
471
+ profile.config.awsProfile
472
+ );
473
+ console.log(benchmarkExists
474
+ ? ` āœ… S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket}`
475
+ : ` āš ļø S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — missing`);
476
+ } catch {
477
+ console.log(` āš ļø S3 bucket (benchmark): ${profile.config.benchmarkS3Bucket} — could not validate`);
478
+ }
479
+ }
450
480
  }
451
481
 
452
482
  // Display deployed resources from manifest
@@ -1000,6 +1030,7 @@ export default class BootstrapCommandHandler {
1000
1030
  if (stackOutputs.EcrRepositoryName) profileConfig.ecrRepositoryName = stackOutputs.EcrRepositoryName;
1001
1031
  if (stackOutputs.AsyncS3BucketName) profileConfig.asyncS3Bucket = stackOutputs.AsyncS3BucketName;
1002
1032
  if (stackOutputs.BatchS3BucketName) profileConfig.batchS3Bucket = stackOutputs.BatchS3BucketName;
1033
+ if (stackOutputs.BenchmarkS3BucketName) profileConfig.benchmarkS3Bucket = stackOutputs.BenchmarkS3BucketName;
1003
1034
  profileConfig.stackName = stackName;
1004
1035
 
1005
1036
  console.log(' āœ… Bootstrap stack updated');
@@ -1054,6 +1085,74 @@ export default class BootstrapCommandHandler {
1054
1085
  // Save updated profile
1055
1086
  this.config.setProfile(name, profileConfig);
1056
1087
  console.log(`\nāœ… Update complete for profile "${name}"`);
1088
+
1089
+ // Re-run post-setup chain after updating AWS resources
1090
+ await this._runPostSetupChain(options);
1091
+ }
1092
+
1093
+ /**
1094
+ * Run the post-setup chain: mcp init → registry sync-architectures → sync-schemas.
1095
+ * Each step is independent — failures are collected and reported at the end.
1096
+ *
1097
+ * @param {object} options - Parsed CLI options (checks skipPostSetup)
1098
+ */
1099
+ async _runPostSetupChain(options = {}) {
1100
+ if (options['skip-post-setup']) {
1101
+ console.log('\nā­ļø Skipping post-setup chain (--skip-post-setup)');
1102
+ return;
1103
+ }
1104
+
1105
+ console.log('\nšŸ”— Running post-setup configuration...\n');
1106
+
1107
+ const failures = [];
1108
+
1109
+ // 1. MCP init — register bundled MCP servers
1110
+ console.log('šŸ“” Registering MCP servers...');
1111
+ try {
1112
+ const generatorAdapter = {
1113
+ destinationPath(...segments) {
1114
+ return path.resolve(process.cwd(), ...segments);
1115
+ }
1116
+ };
1117
+ const mcpHandler = new McpCommandHandler(generatorAdapter);
1118
+ await mcpHandler.handle(['init'], {});
1119
+ } catch (error) {
1120
+ failures.push({ step: 'mcp init', error: error.message });
1121
+ console.log(` āš ļø mcp init failed: ${error.message}`);
1122
+ }
1123
+
1124
+ // 2. Registry sync-architectures — populate supportedModelTypes
1125
+ console.log('\nšŸ“‹ Syncing model architecture registry...');
1126
+ try {
1127
+ const registryHandler = new RegistryCommandHandler();
1128
+ await registryHandler.handle(['sync-architectures'], {});
1129
+ } catch (error) {
1130
+ failures.push({ step: 'registry sync-architectures', error: error.message });
1131
+ console.log(` āš ļø registry sync-architectures failed: ${error.message}`);
1132
+ }
1133
+
1134
+ // 3. Schema sync — download AWS service models
1135
+ console.log('\nšŸ“ Syncing service schemas...');
1136
+ try {
1137
+ await this._handleSyncSchemas();
1138
+ } catch (error) {
1139
+ failures.push({ step: 'sync-schemas', error: error.message });
1140
+ console.log(` āš ļø sync-schemas failed: ${error.message}`);
1141
+ }
1142
+
1143
+ // Report results
1144
+ if (failures.length === 0) {
1145
+ console.log('\nāœ… Bootstrap complete — all systems operational');
1146
+ } else {
1147
+ console.log(`\nāš ļø Bootstrap complete with ${failures.length} warning${failures.length === 1 ? '' : 's'}:`);
1148
+ for (const { step, error } of failures) {
1149
+ console.log(` • ${step}: ${error}`);
1150
+ }
1151
+ console.log('\n These steps can be re-run individually:');
1152
+ console.log(' ml-container-creator mcp init');
1153
+ console.log(' ml-container-creator registry sync-architectures');
1154
+ console.log(' ml-container-creator bootstrap sync-schemas');
1155
+ }
1057
1156
  }
1058
1157
 
1059
1158
  /**
@@ -1205,11 +1304,28 @@ export default class BootstrapCommandHandler {
1205
1304
  'sagemaker:DescribeEndpointConfig',
1206
1305
  'sagemaker:DescribeModel',
1207
1306
  'sagemaker:DescribeInferenceComponent',
1307
+ 'sagemaker:ListInferenceComponents',
1208
1308
  'sagemaker:InvokeEndpoint',
1209
1309
  'sagemaker:InvokeEndpointAsync'
1210
1310
  ],
1211
1311
  Resource: '*'
1212
1312
  },
1313
+ {
1314
+ Sid: 'SageMakerBenchmarking',
1315
+ Effect: 'Allow',
1316
+ Action: [
1317
+ 'sagemaker:CreateAIBenchmarkJob',
1318
+ 'sagemaker:DescribeAIBenchmarkJob',
1319
+ 'sagemaker:ListAIBenchmarkJobs',
1320
+ 'sagemaker:StopAIBenchmarkJob',
1321
+ 'sagemaker:DeleteAIBenchmarkJob',
1322
+ 'sagemaker:CreateAIWorkloadConfig',
1323
+ 'sagemaker:DescribeAIWorkloadConfig',
1324
+ 'sagemaker:ListAIWorkloadConfigs',
1325
+ 'sagemaker:DeleteAIWorkloadConfig'
1326
+ ],
1327
+ Resource: '*'
1328
+ },
1213
1329
  {
1214
1330
  Sid: 'ECRPull',
1215
1331
  Effect: 'Allow',
@@ -1242,12 +1358,43 @@ export default class BootstrapCommandHandler {
1242
1358
  Effect: 'Allow',
1243
1359
  Action: [
1244
1360
  's3:GetObject',
1361
+ 's3:PutObject',
1362
+ 's3:AbortMultipartUpload',
1245
1363
  's3:ListBucket'
1246
1364
  ],
1247
1365
  Resource: [
1248
1366
  'arn:aws:s3:::ml-container-creator-*',
1249
1367
  'arn:aws:s3:::ml-container-creator-*/*'
1250
1368
  ]
1369
+ },
1370
+ {
1371
+ Sid: 'SNSPublish',
1372
+ Effect: 'Allow',
1373
+ Action: 'sns:Publish',
1374
+ Resource: 'arn:aws:sns:*:*:ml-container-creator-*'
1375
+ },
1376
+ {
1377
+ Sid: 'SecretsManagerBenchmark',
1378
+ Effect: 'Allow',
1379
+ Action: [
1380
+ 'secretsmanager:CreateSecret',
1381
+ 'secretsmanager:PutSecretValue',
1382
+ 'secretsmanager:GetSecretValue',
1383
+ 'secretsmanager:DescribeSecret'
1384
+ ],
1385
+ Resource: 'arn:aws:secretsmanager:*:*:secret:ml-container-creator/*'
1386
+ },
1387
+ {
1388
+ Sid: 'QuotaAndAvailability',
1389
+ Effect: 'Allow',
1390
+ Action: [
1391
+ 'service-quotas:GetServiceQuota',
1392
+ 'service-quotas:ListServiceQuotas',
1393
+ 'sagemaker:ListTrainingPlans',
1394
+ 'sagemaker:DescribeTrainingPlan',
1395
+ 'sagemaker:ListEndpoints'
1396
+ ],
1397
+ Resource: '*'
1251
1398
  }
1252
1399
  ]
1253
1400
  };
@@ -1397,9 +1544,15 @@ export default class BootstrapCommandHandler {
1397
1544
 
1398
1545
  /**
1399
1546
  * Optionally create S3 buckets for async/batch deployments.
1547
+ * Always creates the benchmark S3 bucket (unconditional).
1400
1548
  * @returns {Promise<object|null>} Bucket names or null if skipped
1401
1549
  */
1402
1550
  async _setupS3Buckets() {
1551
+ // Always create benchmark bucket (unconditional — avoids re-bootstrap when benchmarking is enabled later)
1552
+ const benchmarkBucketName = `ml-container-creator-benchmark-${this._currentRegion}-${this._currentAccountId}`;
1553
+ const tags = this._buildResourceTags();
1554
+ const benchmarkS3Bucket = await this._createS3Bucket(benchmarkBucketName, tags);
1555
+
1403
1556
  const { useS3 } = await this._promptFn([{
1404
1557
  type: 'confirm',
1405
1558
  name: 'useS3',
@@ -1408,17 +1561,16 @@ export default class BootstrapCommandHandler {
1408
1561
  }]);
1409
1562
 
1410
1563
  if (!useS3) {
1411
- return null;
1564
+ return { benchmarkS3Bucket };
1412
1565
  }
1413
1566
 
1414
1567
  const asyncBucketName = `ml-container-creator-async-${this._currentRegion}-${this._currentAccountId}`;
1415
1568
  const batchBucketName = `ml-container-creator-batch-${this._currentRegion}-${this._currentAccountId}`;
1416
1569
 
1417
- const tags = this._buildResourceTags();
1418
1570
  const asyncS3Bucket = await this._createS3Bucket(asyncBucketName, tags);
1419
1571
  const batchS3Bucket = await this._createS3Bucket(batchBucketName, tags);
1420
1572
 
1421
- return { asyncS3Bucket, batchS3Bucket };
1573
+ return { asyncS3Bucket, batchS3Bucket, benchmarkS3Bucket };
1422
1574
  }
1423
1575
 
1424
1576
  /**
@@ -1475,6 +1627,28 @@ export default class BootstrapCommandHandler {
1475
1627
 
1476
1628
  // ── AWS CLI helpers ─────────────────────────────────────────────
1477
1629
 
1630
+ /**
1631
+ * Verify AWS CLI v2 is installed. Returns true if v2 is detected, false otherwise.
1632
+ * Extracted as a method so tests can override it.
1633
+ * @returns {boolean}
1634
+ */
1635
+ _verifyCliV2() {
1636
+ try {
1637
+ const versionOutput = execSync('aws --version', { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] }).trim();
1638
+ if (!versionOutput.includes('aws-cli/2')) {
1639
+ console.log(` āŒ AWS CLI v2 is required. Detected: ${versionOutput.split(' ')[0]}`);
1640
+ console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html');
1641
+ console.log(' Some features (benchmarking, newer SageMaker APIs) require CLI v2.\n');
1642
+ return false;
1643
+ }
1644
+ return true;
1645
+ } catch {
1646
+ console.log(' āŒ AWS CLI not found.');
1647
+ console.log(' Install: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html\n');
1648
+ return false;
1649
+ }
1650
+ }
1651
+
1478
1652
  /**
1479
1653
  * Execute an AWS CLI command and return parsed JSON output.
1480
1654
  * @param {string} command - AWS CLI command (without 'aws' prefix)
@@ -1649,6 +1823,7 @@ SETUP OPTIONS:
1649
1823
  --skip-s3 Skip S3 bucket creation
1650
1824
  --ci Provision CI testing infrastructure
1651
1825
  --skip-ci Skip CI infrastructure provisioning
1826
+ --skip-post-setup Skip post-setup chain (mcp init, sync-architectures, sync-schemas)
1652
1827
 
1653
1828
  STATUS OPTIONS:
1654
1829
  --verify Check each active resource against AWS APIs for drift detection
@@ -375,7 +375,7 @@ For more information, visit: https://github.com/awslabs/ml-container-creator
375
375
  type: 'confirm',
376
376
  name: 'includeSampleModel',
377
377
  message: 'Include sample model:',
378
- default: false,
378
+ default: true,
379
379
  when: answers => answers.framework !== 'transformers'
380
380
  },
381
381
  {
@@ -515,7 +515,7 @@ ml-container-creator \\
515
515
  'framework': 'sklearn',
516
516
  'modelServer': 'flask',
517
517
  'modelFormat': 'pkl',
518
- 'includeSampleModel': false,
518
+ 'includeSampleModel': true,
519
519
  'includeTesting': true,
520
520
  'testTypes': ['local-model-cli', 'local-model-server', 'hosted-model-endpoint'],
521
521
  'buildTarget': 'codebuild',