@aws/ml-container-creator 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE-THIRD-PARTY +50760 -16218
  2. package/bin/cli.js +31 -137
  3. package/package.json +7 -2
  4. package/servers/lib/catalogs/instances.json +52 -1275
  5. package/servers/lib/catalogs/models.json +0 -132
  6. package/servers/lib/catalogs/popular-diffusors.json +1 -110
  7. package/src/app.js +29 -2
  8. package/src/lib/config-manager.js +17 -0
  9. package/src/lib/generated/cli-options.js +467 -0
  10. package/src/lib/generated/validation-rules.js +202 -0
  11. package/src/lib/mcp-client.js +16 -1
  12. package/src/lib/mcp-command-handler.js +10 -2
  13. package/src/lib/prompt-runner.js +16 -2
  14. package/src/lib/train-config-parser.js +136 -0
  15. package/src/lib/train-config-persistence.js +143 -0
  16. package/src/lib/train-config-validator.js +112 -0
  17. package/src/lib/train-feedback.js +46 -0
  18. package/src/lib/train-idempotency.js +97 -0
  19. package/src/lib/train-request-builder.js +120 -0
  20. package/templates/code/serve +5 -134
  21. package/templates/code/serve.d/lmi.ejs +19 -0
  22. package/templates/code/serve.d/sglang.ejs +47 -0
  23. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  24. package/templates/code/serve.d/vllm.ejs +48 -0
  25. package/templates/do/.train_build_request.py +141 -0
  26. package/templates/do/.train_poll_parser.py +135 -0
  27. package/templates/do/.train_status_parser.py +187 -0
  28. package/templates/do/clean +1 -1387
  29. package/templates/do/clean.d/async-inference.ejs +508 -0
  30. package/templates/do/clean.d/batch-transform.ejs +512 -0
  31. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  32. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  33. package/templates/do/deploy +1 -1766
  34. package/templates/do/deploy.d/async-inference.ejs +501 -0
  35. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  36. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  37. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  38. package/templates/do/lib/feedback.sh +41 -0
  39. package/templates/do/train +786 -0
  40. package/templates/do/training/config.yaml +140 -0
  41. package/templates/do/training/train.py +463 -0
package/bin/cli.js CHANGED
@@ -6,6 +6,7 @@ import { createRequire } from 'module';
6
6
  import path from 'path';
7
7
  import { program, Option, Help } from 'commander';
8
8
  import { run } from '../src/app.js';
9
+ import { cliOptions, helpGroups } from '../src/lib/generated/cli-options.js';
9
10
 
10
11
  const require = createRequire(import.meta.url);
11
12
  const { version } = require('../package.json');
@@ -23,116 +24,32 @@ program
23
24
  .version(version)
24
25
  .enablePositionalOptions()
25
26
  .helpCommand('help [command]', 'Display help for command')
26
- .argument('[project-name...]', 'Name for the generated project')
27
-
28
- // --- General ---
29
- .addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
30
- .addOption(new Option('--auto-prompt', 'Fill defaults, prompt only for missing required values'))
31
- .addOption(new Option('--config <path>', 'Path to JSON configuration file'))
32
- .addOption(new Option('--project-name <name>', 'Project name'))
33
- .addOption(new Option('--project-dir <dir>', 'Output directory path'))
34
- .addOption(new Option('--force', 'Overwrite existing output directory without prompting'))
35
-
36
- // --- Model & Framework ---
37
- .addOption(new Option('--deployment-config <config>', 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)'))
38
- .addOption(new Option('--framework <framework>', 'ML framework — DEPRECATED: use --deployment-config').choices(['sklearn', 'xgboost', 'tensorflow', 'transformers']).hideHelp())
39
- .addOption(new Option('--model-format <format>', 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)'))
40
- .addOption(new Option('--model-name <name>', 'Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)'))
41
- .addOption(new Option('--model-server <server>', 'Model server DEPRECATED: use --deployment-config').choices(['flask', 'fastapi', 'vllm', 'sglang']).hideHelp())
42
- .addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
43
-
44
- // --- Build & Infrastructure ---
45
- .addOption(new Option('--deployment-target <target>', 'Deployment target (realtime-inference, async-inference, batch-transform, hyperpod-eks)'))
46
- .addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
47
- .addOption(new Option('--region <region>', 'AWS region'))
48
- .addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
49
- .addOption(new Option('--build-target <target>', 'Build target (codebuild)'))
50
- .addOption(new Option('--codebuild-compute-type <type>', 'CodeBuild compute type (SMALL, MEDIUM, LARGE)'))
51
-
52
- // --- Endpoint (Real-Time Inference) ---
53
- .addOption(new Option('--endpoint-initial-instance-count <n>', 'Number of instances for the endpoint (default: 1)'))
54
- .addOption(new Option('--endpoint-data-capture-percent <pct>', 'Data capture percentage for monitoring, 0-100 (default: 0)'))
55
- .addOption(new Option('--endpoint-variant-name <name>', 'Production variant name (default: AllTraffic)'))
56
- .addOption(new Option('--endpoint-volume-size <gb>', 'ML storage volume size in GB'))
57
-
58
- // --- Inference Component ---
59
- .addOption(new Option('--ic-cpu-count <n>', 'vCPUs allocated to the inference component'))
60
- .addOption(new Option('--ic-memory-size <mb>', 'Memory in MB for the inference component'))
61
- .addOption(new Option('--ic-gpu-count <n>', 'GPUs allocated to the inference component'))
62
- .addOption(new Option('--ic-copy-count <n>', 'Number of inference component copies (default: 1)'))
63
- .addOption(new Option('--ic-model-weight <weight>', 'Traffic routing weight, 0-1 (default: 1.0)'))
64
-
65
- // --- Async Inference ---
66
- .addOption(new Option('--async-s3-output-path <path>', 'S3 output path for async results'))
67
- .addOption(new Option('--async-sns-success-topic <arn>', 'SNS topic ARN for success notifications'))
68
- .addOption(new Option('--async-sns-error-topic <arn>', 'SNS topic ARN for error notifications'))
69
- .addOption(new Option('--async-max-concurrent <n>', 'Max concurrent invocations per instance (default: 1)'))
70
-
71
- // --- Batch Transform ---
72
- .addOption(new Option('--batch-input-path <path>', 'S3 input path for batch data'))
73
- .addOption(new Option('--batch-output-path <path>', 'S3 output path for batch results'))
74
- .addOption(new Option('--batch-instance-count <n>', 'Number of instances (default: 1)'))
75
- .addOption(new Option('--batch-split-type <type>', 'Input split type: Line, RecordIO, None (default: Line)'))
76
- .addOption(new Option('--batch-strategy <strategy>', 'Batch strategy: MultiRecord, SingleRecord (default: MultiRecord)'))
77
- .addOption(new Option('--batch-join-source <source>', 'Join source: Input, None (default: None)'))
78
- .addOption(new Option('--batch-max-concurrent <n>', 'Max concurrent transforms per instance (default: 1)'))
79
- .addOption(new Option('--batch-max-payload <mb>', 'Max payload size in MB, 0-100 (default: 6)'))
80
-
81
- // --- HyperPod (EKS) ---
82
- .addOption(new Option('--hyperpod-cluster <name>', 'HyperPod EKS cluster name'))
83
- .addOption(new Option('--hyperpod-namespace <ns>', 'Kubernetes namespace (default: default)'))
84
- .addOption(new Option('--hyperpod-replicas <count>', 'Number of replicas (default: 1)'))
85
- .addOption(new Option('--fsx-volume-handle <handle>', 'FSx for Lustre volume handle'))
86
-
87
- // --- Environment Variables ---
88
- .addOption(new Option('--model-env <KEY=VALUE>', 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)').argParser(collect).default([]))
89
- .addOption(new Option('--server-env <KEY=VALUE>', 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)').argParser(collect).default([]))
90
-
91
- // --- Authentication ---
92
- .addOption(new Option('--hf-token <token>', 'HuggingFace token (or "$HF_TOKEN" for env var reference)'))
93
- .addOption(new Option('--hf-token-arn <arn>', 'HuggingFace token ARN from Secrets Manager'))
94
- .addOption(new Option('--ngc-token <token>', 'NVIDIA NGC token (or "$NGC_API_KEY" for env var reference)'))
95
- .addOption(new Option('--ngc-token-arn <arn>', 'NVIDIA NGC token ARN from Secrets Manager'))
96
-
97
- // --- Optional Features ---
98
- .addOption(new Option('--include-sample', 'Include sample model code'))
99
- .addOption(new Option('--include-testing', 'Include test suite'))
100
- .addOption(new Option('--test-types <types>', 'Comma-separated test types'))
101
- .addOption(new Option('--enable-lora', 'Enable LoRA adapter serving (transformers with vllm/sglang/djl-lmi only)'))
102
- .addOption(new Option('--max-loras <n>', 'Maximum concurrent LoRA adapters in GPU memory (default: 30)'))
103
- .addOption(new Option('--max-lora-rank <n>', 'Maximum LoRA rank (default: 64)'))
104
-
105
- // --- Benchmarking ---
106
- .addOption(new Option('--include-benchmark', 'Include SageMaker AI Benchmarking (transformers/diffusors only)'))
107
- .addOption(new Option('--benchmark-concurrency <n>', 'Benchmark concurrent requests (default: 10)'))
108
- .addOption(new Option('--benchmark-input-tokens <n>', 'Benchmark mean input tokens (default: 550)'))
109
- .addOption(new Option('--benchmark-output-tokens <n>', 'Benchmark mean output tokens (default: 150)'))
110
- .addOption(new Option('--benchmark-streaming', 'Enable streaming in benchmark (default: true)'))
111
- .addOption(new Option('--benchmark-request-count <n>', 'Total benchmark requests (optional)'))
112
- .addOption(new Option('--benchmark-s3-output-path <path>', 'S3 path for benchmark results'))
113
-
114
- // --- MCP & Discovery ---
115
- .addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
116
- .addOption(new Option('--discover', 'Enable live registry lookups via MCP discovery'))
117
-
118
- // --- Validation ---
119
- .addOption(new Option('--no-validate', 'Skip schema-driven validation at generation time'))
120
- .addOption(new Option('--validate-env-vars', 'Enable environment variable validation (default: true)'))
121
- .addOption(new Option('--validate-with-docker', 'Enable Docker introspection validation (opt-in)'))
122
- .addOption(new Option('--offline', 'Disable HuggingFace API lookups'))
123
-
124
- .action((projectNameArgs, options) => {
125
- // Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
126
- if (options.hfToken && options.hfTokenArn) {
127
- console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
128
- process.exit(1);
129
- }
130
- if (options.ngcToken && options.ngcTokenArn) {
131
- console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
132
- process.exit(1);
133
- }
134
- return run(projectNameArgs?.[0] || null, options);
135
- });
27
+ .argument('[project-name...]', 'Name for the generated project');
28
+
29
+ // Register all CLI options from generated schema
30
+ for (const opt of cliOptions) {
31
+ if (opt.hidden) continue;
32
+ const option = new Option(opt.flag, opt.description);
33
+ if (opt.choices) option.choices(opt.choices);
34
+ if (opt.repeatable) {
35
+ option.argParser(collect);
36
+ option.default([]);
37
+ }
38
+ program.addOption(option);
39
+ }
40
+
41
+ program.action((projectNameArgs, options) => {
42
+ // Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
43
+ if (options.hfToken && options.hfTokenArn) {
44
+ console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
45
+ process.exit(1);
46
+ }
47
+ if (options.ngcToken && options.ngcTokenArn) {
48
+ console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
49
+ process.exit(1);
50
+ }
51
+ return run(projectNameArgs?.[0] || null, options);
52
+ });
136
53
 
137
54
  // Custom help formatting — group options into logical sections (root command only)
138
55
  program.configureHelp({
@@ -163,7 +80,7 @@ program.configureHelp({
163
80
  // Collect all visible options
164
81
  const allOptions = helper.visibleOptions(cmd);
165
82
 
166
- // Partition options into groups by flag prefix/purpose
83
+ // Partition options into groups using schema-derived helpGroups
167
84
  const groups = {
168
85
  general: [],
169
86
  model: [],
@@ -182,32 +99,9 @@ program.configureHelp({
182
99
 
183
100
  for (const opt of allOptions) {
184
101
  const long = opt.long || '';
185
- if (['--skip-prompts', '--auto-prompt', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
186
- groups.general.push(opt);
187
- } else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
188
- groups.model.push(opt);
189
- } else if (['--deployment-target', '--instance-type', '--region', '--role-arn', '--build-target', '--codebuild-compute-type'].includes(long)) {
190
- groups.infra.push(opt);
191
- } else if (long.startsWith('--endpoint-')) {
192
- groups.endpoint.push(opt);
193
- } else if (long.startsWith('--ic-')) {
194
- groups.ic.push(opt);
195
- } else if (long.startsWith('--async-')) {
196
- groups.async.push(opt);
197
- } else if (long.startsWith('--batch-')) {
198
- groups.batch.push(opt);
199
- } else if (long.startsWith('--hyperpod-') || long === '--fsx-volume-handle') {
200
- groups.hyperpod.push(opt);
201
- } else if (['--model-env', '--server-env'].includes(long)) {
202
- groups.env.push(opt);
203
- } else if (['--hf-token', '--hf-token-arn', '--ngc-token', '--ngc-token-arn'].includes(long)) {
204
- groups.auth.push(opt);
205
- } else if (['--include-sample', '--include-testing', '--test-types', '--enable-lora', '--max-loras', '--max-lora-rank'].includes(long)) {
206
- groups.features.push(opt);
207
- } else if (['--smart', '--discover'].includes(long)) {
208
- groups.mcp.push(opt);
209
- } else if (['--validate-env-vars', '--validate-with-docker', '--offline', '--no-validate'].includes(long)) {
210
- groups.validation.push(opt);
102
+ const section = helpGroups[long] || 'general';
103
+ if (groups[section]) {
104
+ groups[section].push(opt);
211
105
  } else {
212
106
  groups.general.push(opt);
213
107
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.8.0",
3
+ "version": "0.9.1",
4
4
  "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
5
5
  "type": "module",
6
6
  "main": "src/app.js",
@@ -70,10 +70,11 @@
70
70
  ],
71
71
  "scripts": {
72
72
  "test": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter test/reporters/progress-reporter.cjs",
73
- "test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
73
+ "test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --parallel --reporter spec",
74
74
  "test:verbose": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
75
75
  "test:minimal": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --reporter test/reporters/minimal-reporter.cjs",
76
76
  "test:watch": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --watch --reporter test/reporters/progress-reporter.cjs",
77
+ "test:fast": "VALIDATE_ENV_VARS=false PROPERTY_NUM_RUNS=10 mocha test/**/*.test.js --exit --parallel --timeout 60000 --reporter dot",
77
78
  "test:coverage": "VALIDATE_ENV_VARS=false nyc mocha test/**/*.test.js",
78
79
  "test:unit": "VALIDATE_ENV_VARS=false mocha test/unit/**/*.test.js test/helpers/**/*.test.js --reporter spec",
79
80
  "test:integration": "VALIDATE_ENV_VARS=false mocha test/input-parsing-and-generation/**/*.test.js --reporter spec",
@@ -94,6 +95,10 @@
94
95
  "docs:serve": "mkdocs serve",
95
96
  "docs:build": "mkdocs build",
96
97
  "docs:deploy": "mkdocs gh-deploy",
98
+ "docs:sync": "node scripts/sync-command-generator.js",
99
+ "docs:check": "node scripts/sync-command-generator.js --check",
100
+ "codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js",
101
+ "codegen:check": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parity.js",
97
102
  "_sbom": "npm sbom --sbom-format spdx > sbom.json && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npm sbom --sbom-format spdx > sbom.json); done",
98
103
  "_licenses:review": "license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && echo \"\\nChecking $dir\" && (cd \"$dir\" && npx license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD); done",
99
104
  "_licenses:csv": "license-checker --csv --out ./licenses.csv && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npx license-checker --csv --out licenses.csv); done && cat servers/*/licenses.csv >> licenses.csv",