@aws/ml-container-creator 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +50760 -16218
- package/bin/cli.js +31 -137
- package/package.json +7 -2
- package/servers/lib/catalogs/instances.json +52 -1275
- package/servers/lib/catalogs/models.json +0 -132
- package/servers/lib/catalogs/popular-diffusors.json +1 -110
- package/src/app.js +29 -2
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/src/lib/mcp-client.js +16 -1
- package/src/lib/mcp-command-handler.js +10 -2
- package/src/lib/prompt-runner.js +16 -2
- package/src/lib/train-config-parser.js +136 -0
- package/src/lib/train-config-persistence.js +143 -0
- package/src/lib/train-config-validator.js +112 -0
- package/src/lib/train-feedback.js +46 -0
- package/src/lib/train-idempotency.js +97 -0
- package/src/lib/train-request-builder.js +120 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/.train_build_request.py +141 -0
- package/templates/do/.train_poll_parser.py +135 -0
- package/templates/do/.train_status_parser.py +187 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
- package/templates/do/lib/feedback.sh +41 -0
- package/templates/do/train +786 -0
- package/templates/do/training/config.yaml +140 -0
- package/templates/do/training/train.py +463 -0
package/bin/cli.js
CHANGED
|
@@ -6,6 +6,7 @@ import { createRequire } from 'module';
|
|
|
6
6
|
import path from 'path';
|
|
7
7
|
import { program, Option, Help } from 'commander';
|
|
8
8
|
import { run } from '../src/app.js';
|
|
9
|
+
import { cliOptions, helpGroups } from '../src/lib/generated/cli-options.js';
|
|
9
10
|
|
|
10
11
|
const require = createRequire(import.meta.url);
|
|
11
12
|
const { version } = require('../package.json');
|
|
@@ -23,116 +24,32 @@ program
|
|
|
23
24
|
.version(version)
|
|
24
25
|
.enablePositionalOptions()
|
|
25
26
|
.helpCommand('help [command]', 'Display help for command')
|
|
26
|
-
.argument('[project-name...]', 'Name for the generated project')
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
.addOption(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
// --- Endpoint (Real-Time Inference) ---
|
|
53
|
-
.addOption(new Option('--endpoint-initial-instance-count <n>', 'Number of instances for the endpoint (default: 1)'))
|
|
54
|
-
.addOption(new Option('--endpoint-data-capture-percent <pct>', 'Data capture percentage for monitoring, 0-100 (default: 0)'))
|
|
55
|
-
.addOption(new Option('--endpoint-variant-name <name>', 'Production variant name (default: AllTraffic)'))
|
|
56
|
-
.addOption(new Option('--endpoint-volume-size <gb>', 'ML storage volume size in GB'))
|
|
57
|
-
|
|
58
|
-
// --- Inference Component ---
|
|
59
|
-
.addOption(new Option('--ic-cpu-count <n>', 'vCPUs allocated to the inference component'))
|
|
60
|
-
.addOption(new Option('--ic-memory-size <mb>', 'Memory in MB for the inference component'))
|
|
61
|
-
.addOption(new Option('--ic-gpu-count <n>', 'GPUs allocated to the inference component'))
|
|
62
|
-
.addOption(new Option('--ic-copy-count <n>', 'Number of inference component copies (default: 1)'))
|
|
63
|
-
.addOption(new Option('--ic-model-weight <weight>', 'Traffic routing weight, 0-1 (default: 1.0)'))
|
|
64
|
-
|
|
65
|
-
// --- Async Inference ---
|
|
66
|
-
.addOption(new Option('--async-s3-output-path <path>', 'S3 output path for async results'))
|
|
67
|
-
.addOption(new Option('--async-sns-success-topic <arn>', 'SNS topic ARN for success notifications'))
|
|
68
|
-
.addOption(new Option('--async-sns-error-topic <arn>', 'SNS topic ARN for error notifications'))
|
|
69
|
-
.addOption(new Option('--async-max-concurrent <n>', 'Max concurrent invocations per instance (default: 1)'))
|
|
70
|
-
|
|
71
|
-
// --- Batch Transform ---
|
|
72
|
-
.addOption(new Option('--batch-input-path <path>', 'S3 input path for batch data'))
|
|
73
|
-
.addOption(new Option('--batch-output-path <path>', 'S3 output path for batch results'))
|
|
74
|
-
.addOption(new Option('--batch-instance-count <n>', 'Number of instances (default: 1)'))
|
|
75
|
-
.addOption(new Option('--batch-split-type <type>', 'Input split type: Line, RecordIO, None (default: Line)'))
|
|
76
|
-
.addOption(new Option('--batch-strategy <strategy>', 'Batch strategy: MultiRecord, SingleRecord (default: MultiRecord)'))
|
|
77
|
-
.addOption(new Option('--batch-join-source <source>', 'Join source: Input, None (default: None)'))
|
|
78
|
-
.addOption(new Option('--batch-max-concurrent <n>', 'Max concurrent transforms per instance (default: 1)'))
|
|
79
|
-
.addOption(new Option('--batch-max-payload <mb>', 'Max payload size in MB, 0-100 (default: 6)'))
|
|
80
|
-
|
|
81
|
-
// --- HyperPod (EKS) ---
|
|
82
|
-
.addOption(new Option('--hyperpod-cluster <name>', 'HyperPod EKS cluster name'))
|
|
83
|
-
.addOption(new Option('--hyperpod-namespace <ns>', 'Kubernetes namespace (default: default)'))
|
|
84
|
-
.addOption(new Option('--hyperpod-replicas <count>', 'Number of replicas (default: 1)'))
|
|
85
|
-
.addOption(new Option('--fsx-volume-handle <handle>', 'FSx for Lustre volume handle'))
|
|
86
|
-
|
|
87
|
-
// --- Environment Variables ---
|
|
88
|
-
.addOption(new Option('--model-env <KEY=VALUE>', 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)').argParser(collect).default([]))
|
|
89
|
-
.addOption(new Option('--server-env <KEY=VALUE>', 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)').argParser(collect).default([]))
|
|
90
|
-
|
|
91
|
-
// --- Authentication ---
|
|
92
|
-
.addOption(new Option('--hf-token <token>', 'HuggingFace token (or "$HF_TOKEN" for env var reference)'))
|
|
93
|
-
.addOption(new Option('--hf-token-arn <arn>', 'HuggingFace token ARN from Secrets Manager'))
|
|
94
|
-
.addOption(new Option('--ngc-token <token>', 'NVIDIA NGC token (or "$NGC_API_KEY" for env var reference)'))
|
|
95
|
-
.addOption(new Option('--ngc-token-arn <arn>', 'NVIDIA NGC token ARN from Secrets Manager'))
|
|
96
|
-
|
|
97
|
-
// --- Optional Features ---
|
|
98
|
-
.addOption(new Option('--include-sample', 'Include sample model code'))
|
|
99
|
-
.addOption(new Option('--include-testing', 'Include test suite'))
|
|
100
|
-
.addOption(new Option('--test-types <types>', 'Comma-separated test types'))
|
|
101
|
-
.addOption(new Option('--enable-lora', 'Enable LoRA adapter serving (transformers with vllm/sglang/djl-lmi only)'))
|
|
102
|
-
.addOption(new Option('--max-loras <n>', 'Maximum concurrent LoRA adapters in GPU memory (default: 30)'))
|
|
103
|
-
.addOption(new Option('--max-lora-rank <n>', 'Maximum LoRA rank (default: 64)'))
|
|
104
|
-
|
|
105
|
-
// --- Benchmarking ---
|
|
106
|
-
.addOption(new Option('--include-benchmark', 'Include SageMaker AI Benchmarking (transformers/diffusors only)'))
|
|
107
|
-
.addOption(new Option('--benchmark-concurrency <n>', 'Benchmark concurrent requests (default: 10)'))
|
|
108
|
-
.addOption(new Option('--benchmark-input-tokens <n>', 'Benchmark mean input tokens (default: 550)'))
|
|
109
|
-
.addOption(new Option('--benchmark-output-tokens <n>', 'Benchmark mean output tokens (default: 150)'))
|
|
110
|
-
.addOption(new Option('--benchmark-streaming', 'Enable streaming in benchmark (default: true)'))
|
|
111
|
-
.addOption(new Option('--benchmark-request-count <n>', 'Total benchmark requests (optional)'))
|
|
112
|
-
.addOption(new Option('--benchmark-s3-output-path <path>', 'S3 path for benchmark results'))
|
|
113
|
-
|
|
114
|
-
// --- MCP & Discovery ---
|
|
115
|
-
.addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
|
|
116
|
-
.addOption(new Option('--discover', 'Enable live registry lookups via MCP discovery'))
|
|
117
|
-
|
|
118
|
-
// --- Validation ---
|
|
119
|
-
.addOption(new Option('--no-validate', 'Skip schema-driven validation at generation time'))
|
|
120
|
-
.addOption(new Option('--validate-env-vars', 'Enable environment variable validation (default: true)'))
|
|
121
|
-
.addOption(new Option('--validate-with-docker', 'Enable Docker introspection validation (opt-in)'))
|
|
122
|
-
.addOption(new Option('--offline', 'Disable HuggingFace API lookups'))
|
|
123
|
-
|
|
124
|
-
.action((projectNameArgs, options) => {
|
|
125
|
-
// Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
|
|
126
|
-
if (options.hfToken && options.hfTokenArn) {
|
|
127
|
-
console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
|
|
128
|
-
process.exit(1);
|
|
129
|
-
}
|
|
130
|
-
if (options.ngcToken && options.ngcTokenArn) {
|
|
131
|
-
console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
|
|
132
|
-
process.exit(1);
|
|
133
|
-
}
|
|
134
|
-
return run(projectNameArgs?.[0] || null, options);
|
|
135
|
-
});
|
|
27
|
+
.argument('[project-name...]', 'Name for the generated project');
|
|
28
|
+
|
|
29
|
+
// Register all CLI options from generated schema
|
|
30
|
+
for (const opt of cliOptions) {
|
|
31
|
+
if (opt.hidden) continue;
|
|
32
|
+
const option = new Option(opt.flag, opt.description);
|
|
33
|
+
if (opt.choices) option.choices(opt.choices);
|
|
34
|
+
if (opt.repeatable) {
|
|
35
|
+
option.argParser(collect);
|
|
36
|
+
option.default([]);
|
|
37
|
+
}
|
|
38
|
+
program.addOption(option);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
program.action((projectNameArgs, options) => {
|
|
42
|
+
// Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
|
|
43
|
+
if (options.hfToken && options.hfTokenArn) {
|
|
44
|
+
console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
if (options.ngcToken && options.ngcTokenArn) {
|
|
48
|
+
console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
return run(projectNameArgs?.[0] || null, options);
|
|
52
|
+
});
|
|
136
53
|
|
|
137
54
|
// Custom help formatting — group options into logical sections (root command only)
|
|
138
55
|
program.configureHelp({
|
|
@@ -163,7 +80,7 @@ program.configureHelp({
|
|
|
163
80
|
// Collect all visible options
|
|
164
81
|
const allOptions = helper.visibleOptions(cmd);
|
|
165
82
|
|
|
166
|
-
// Partition options into groups
|
|
83
|
+
// Partition options into groups using schema-derived helpGroups
|
|
167
84
|
const groups = {
|
|
168
85
|
general: [],
|
|
169
86
|
model: [],
|
|
@@ -182,32 +99,9 @@ program.configureHelp({
|
|
|
182
99
|
|
|
183
100
|
for (const opt of allOptions) {
|
|
184
101
|
const long = opt.long || '';
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
groups.model.push(opt);
|
|
189
|
-
} else if (['--deployment-target', '--instance-type', '--region', '--role-arn', '--build-target', '--codebuild-compute-type'].includes(long)) {
|
|
190
|
-
groups.infra.push(opt);
|
|
191
|
-
} else if (long.startsWith('--endpoint-')) {
|
|
192
|
-
groups.endpoint.push(opt);
|
|
193
|
-
} else if (long.startsWith('--ic-')) {
|
|
194
|
-
groups.ic.push(opt);
|
|
195
|
-
} else if (long.startsWith('--async-')) {
|
|
196
|
-
groups.async.push(opt);
|
|
197
|
-
} else if (long.startsWith('--batch-')) {
|
|
198
|
-
groups.batch.push(opt);
|
|
199
|
-
} else if (long.startsWith('--hyperpod-') || long === '--fsx-volume-handle') {
|
|
200
|
-
groups.hyperpod.push(opt);
|
|
201
|
-
} else if (['--model-env', '--server-env'].includes(long)) {
|
|
202
|
-
groups.env.push(opt);
|
|
203
|
-
} else if (['--hf-token', '--hf-token-arn', '--ngc-token', '--ngc-token-arn'].includes(long)) {
|
|
204
|
-
groups.auth.push(opt);
|
|
205
|
-
} else if (['--include-sample', '--include-testing', '--test-types', '--enable-lora', '--max-loras', '--max-lora-rank'].includes(long)) {
|
|
206
|
-
groups.features.push(opt);
|
|
207
|
-
} else if (['--smart', '--discover'].includes(long)) {
|
|
208
|
-
groups.mcp.push(opt);
|
|
209
|
-
} else if (['--validate-env-vars', '--validate-with-docker', '--offline', '--no-validate'].includes(long)) {
|
|
210
|
-
groups.validation.push(opt);
|
|
102
|
+
const section = helpGroups[long] || 'general';
|
|
103
|
+
if (groups[section]) {
|
|
104
|
+
groups[section].push(opt);
|
|
211
105
|
} else {
|
|
212
106
|
groups.general.push(opt);
|
|
213
107
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aws/ml-container-creator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -70,10 +70,11 @@
|
|
|
70
70
|
],
|
|
71
71
|
"scripts": {
|
|
72
72
|
"test": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter test/reporters/progress-reporter.cjs",
|
|
73
|
-
"test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
|
|
73
|
+
"test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --parallel --reporter spec",
|
|
74
74
|
"test:verbose": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
|
|
75
75
|
"test:minimal": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --reporter test/reporters/minimal-reporter.cjs",
|
|
76
76
|
"test:watch": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --watch --reporter test/reporters/progress-reporter.cjs",
|
|
77
|
+
"test:fast": "VALIDATE_ENV_VARS=false PROPERTY_NUM_RUNS=10 mocha test/**/*.test.js --exit --parallel --timeout 60000 --reporter dot",
|
|
77
78
|
"test:coverage": "VALIDATE_ENV_VARS=false nyc mocha test/**/*.test.js",
|
|
78
79
|
"test:unit": "VALIDATE_ENV_VARS=false mocha test/unit/**/*.test.js test/helpers/**/*.test.js --reporter spec",
|
|
79
80
|
"test:integration": "VALIDATE_ENV_VARS=false mocha test/input-parsing-and-generation/**/*.test.js --reporter spec",
|
|
@@ -94,6 +95,10 @@
|
|
|
94
95
|
"docs:serve": "mkdocs serve",
|
|
95
96
|
"docs:build": "mkdocs build",
|
|
96
97
|
"docs:deploy": "mkdocs gh-deploy",
|
|
98
|
+
"docs:sync": "node scripts/sync-command-generator.js",
|
|
99
|
+
"docs:check": "node scripts/sync-command-generator.js --check",
|
|
100
|
+
"codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js",
|
|
101
|
+
"codegen:check": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parity.js",
|
|
97
102
|
"_sbom": "npm sbom --sbom-format spdx > sbom.json && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npm sbom --sbom-format spdx > sbom.json); done",
|
|
98
103
|
"_licenses:review": "license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && echo \"\\nChecking $dir\" && (cd \"$dir\" && npx license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD); done",
|
|
99
104
|
"_licenses:csv": "license-checker --csv --out ./licenses.csv && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npx license-checker --csv --out licenses.csv); done && cat servers/*/licenses.csv >> licenses.csv",
|