@aws/ml-container-creator 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/package.json +5 -2
- package/src/app.js +5 -0
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
package/bin/cli.js
CHANGED
|
@@ -6,6 +6,7 @@ import { createRequire } from 'module';
|
|
|
6
6
|
import path from 'path';
|
|
7
7
|
import { program, Option, Help } from 'commander';
|
|
8
8
|
import { run } from '../src/app.js';
|
|
9
|
+
import { cliOptions, helpGroups } from '../src/lib/generated/cli-options.js';
|
|
9
10
|
|
|
10
11
|
const require = createRequire(import.meta.url);
|
|
11
12
|
const { version } = require('../package.json');
|
|
@@ -23,116 +24,32 @@ program
|
|
|
23
24
|
.version(version)
|
|
24
25
|
.enablePositionalOptions()
|
|
25
26
|
.helpCommand('help [command]', 'Display help for command')
|
|
26
|
-
.argument('[project-name...]', 'Name for the generated project')
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
.addOption(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
// --- Endpoint (Real-Time Inference) ---
|
|
53
|
-
.addOption(new Option('--endpoint-initial-instance-count <n>', 'Number of instances for the endpoint (default: 1)'))
|
|
54
|
-
.addOption(new Option('--endpoint-data-capture-percent <pct>', 'Data capture percentage for monitoring, 0-100 (default: 0)'))
|
|
55
|
-
.addOption(new Option('--endpoint-variant-name <name>', 'Production variant name (default: AllTraffic)'))
|
|
56
|
-
.addOption(new Option('--endpoint-volume-size <gb>', 'ML storage volume size in GB'))
|
|
57
|
-
|
|
58
|
-
// --- Inference Component ---
|
|
59
|
-
.addOption(new Option('--ic-cpu-count <n>', 'vCPUs allocated to the inference component'))
|
|
60
|
-
.addOption(new Option('--ic-memory-size <mb>', 'Memory in MB for the inference component'))
|
|
61
|
-
.addOption(new Option('--ic-gpu-count <n>', 'GPUs allocated to the inference component'))
|
|
62
|
-
.addOption(new Option('--ic-copy-count <n>', 'Number of inference component copies (default: 1)'))
|
|
63
|
-
.addOption(new Option('--ic-model-weight <weight>', 'Traffic routing weight, 0-1 (default: 1.0)'))
|
|
64
|
-
|
|
65
|
-
// --- Async Inference ---
|
|
66
|
-
.addOption(new Option('--async-s3-output-path <path>', 'S3 output path for async results'))
|
|
67
|
-
.addOption(new Option('--async-sns-success-topic <arn>', 'SNS topic ARN for success notifications'))
|
|
68
|
-
.addOption(new Option('--async-sns-error-topic <arn>', 'SNS topic ARN for error notifications'))
|
|
69
|
-
.addOption(new Option('--async-max-concurrent <n>', 'Max concurrent invocations per instance (default: 1)'))
|
|
70
|
-
|
|
71
|
-
// --- Batch Transform ---
|
|
72
|
-
.addOption(new Option('--batch-input-path <path>', 'S3 input path for batch data'))
|
|
73
|
-
.addOption(new Option('--batch-output-path <path>', 'S3 output path for batch results'))
|
|
74
|
-
.addOption(new Option('--batch-instance-count <n>', 'Number of instances (default: 1)'))
|
|
75
|
-
.addOption(new Option('--batch-split-type <type>', 'Input split type: Line, RecordIO, None (default: Line)'))
|
|
76
|
-
.addOption(new Option('--batch-strategy <strategy>', 'Batch strategy: MultiRecord, SingleRecord (default: MultiRecord)'))
|
|
77
|
-
.addOption(new Option('--batch-join-source <source>', 'Join source: Input, None (default: None)'))
|
|
78
|
-
.addOption(new Option('--batch-max-concurrent <n>', 'Max concurrent transforms per instance (default: 1)'))
|
|
79
|
-
.addOption(new Option('--batch-max-payload <mb>', 'Max payload size in MB, 0-100 (default: 6)'))
|
|
80
|
-
|
|
81
|
-
// --- HyperPod (EKS) ---
|
|
82
|
-
.addOption(new Option('--hyperpod-cluster <name>', 'HyperPod EKS cluster name'))
|
|
83
|
-
.addOption(new Option('--hyperpod-namespace <ns>', 'Kubernetes namespace (default: default)'))
|
|
84
|
-
.addOption(new Option('--hyperpod-replicas <count>', 'Number of replicas (default: 1)'))
|
|
85
|
-
.addOption(new Option('--fsx-volume-handle <handle>', 'FSx for Lustre volume handle'))
|
|
86
|
-
|
|
87
|
-
// --- Environment Variables ---
|
|
88
|
-
.addOption(new Option('--model-env <KEY=VALUE>', 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)').argParser(collect).default([]))
|
|
89
|
-
.addOption(new Option('--server-env <KEY=VALUE>', 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)').argParser(collect).default([]))
|
|
90
|
-
|
|
91
|
-
// --- Authentication ---
|
|
92
|
-
.addOption(new Option('--hf-token <token>', 'HuggingFace token (or "$HF_TOKEN" for env var reference)'))
|
|
93
|
-
.addOption(new Option('--hf-token-arn <arn>', 'HuggingFace token ARN from Secrets Manager'))
|
|
94
|
-
.addOption(new Option('--ngc-token <token>', 'NVIDIA NGC token (or "$NGC_API_KEY" for env var reference)'))
|
|
95
|
-
.addOption(new Option('--ngc-token-arn <arn>', 'NVIDIA NGC token ARN from Secrets Manager'))
|
|
96
|
-
|
|
97
|
-
// --- Optional Features ---
|
|
98
|
-
.addOption(new Option('--include-sample', 'Include sample model code'))
|
|
99
|
-
.addOption(new Option('--include-testing', 'Include test suite'))
|
|
100
|
-
.addOption(new Option('--test-types <types>', 'Comma-separated test types'))
|
|
101
|
-
.addOption(new Option('--enable-lora', 'Enable LoRA adapter serving (transformers with vllm/sglang/djl-lmi only)'))
|
|
102
|
-
.addOption(new Option('--max-loras <n>', 'Maximum concurrent LoRA adapters in GPU memory (default: 30)'))
|
|
103
|
-
.addOption(new Option('--max-lora-rank <n>', 'Maximum LoRA rank (default: 64)'))
|
|
104
|
-
|
|
105
|
-
// --- Benchmarking ---
|
|
106
|
-
.addOption(new Option('--include-benchmark', 'Include SageMaker AI Benchmarking (transformers/diffusors only)'))
|
|
107
|
-
.addOption(new Option('--benchmark-concurrency <n>', 'Benchmark concurrent requests (default: 10)'))
|
|
108
|
-
.addOption(new Option('--benchmark-input-tokens <n>', 'Benchmark mean input tokens (default: 550)'))
|
|
109
|
-
.addOption(new Option('--benchmark-output-tokens <n>', 'Benchmark mean output tokens (default: 150)'))
|
|
110
|
-
.addOption(new Option('--benchmark-streaming', 'Enable streaming in benchmark (default: true)'))
|
|
111
|
-
.addOption(new Option('--benchmark-request-count <n>', 'Total benchmark requests (optional)'))
|
|
112
|
-
.addOption(new Option('--benchmark-s3-output-path <path>', 'S3 path for benchmark results'))
|
|
113
|
-
|
|
114
|
-
// --- MCP & Discovery ---
|
|
115
|
-
.addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
|
|
116
|
-
.addOption(new Option('--discover', 'Enable live registry lookups via MCP discovery'))
|
|
117
|
-
|
|
118
|
-
// --- Validation ---
|
|
119
|
-
.addOption(new Option('--no-validate', 'Skip schema-driven validation at generation time'))
|
|
120
|
-
.addOption(new Option('--validate-env-vars', 'Enable environment variable validation (default: true)'))
|
|
121
|
-
.addOption(new Option('--validate-with-docker', 'Enable Docker introspection validation (opt-in)'))
|
|
122
|
-
.addOption(new Option('--offline', 'Disable HuggingFace API lookups'))
|
|
123
|
-
|
|
124
|
-
.action((projectNameArgs, options) => {
|
|
125
|
-
// Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
|
|
126
|
-
if (options.hfToken && options.hfTokenArn) {
|
|
127
|
-
console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
|
|
128
|
-
process.exit(1);
|
|
129
|
-
}
|
|
130
|
-
if (options.ngcToken && options.ngcTokenArn) {
|
|
131
|
-
console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
|
|
132
|
-
process.exit(1);
|
|
133
|
-
}
|
|
134
|
-
return run(projectNameArgs?.[0] || null, options);
|
|
135
|
-
});
|
|
27
|
+
.argument('[project-name...]', 'Name for the generated project');
|
|
28
|
+
|
|
29
|
+
// Register all CLI options from generated schema
|
|
30
|
+
for (const opt of cliOptions) {
|
|
31
|
+
if (opt.hidden) continue;
|
|
32
|
+
const option = new Option(opt.flag, opt.description);
|
|
33
|
+
if (opt.choices) option.choices(opt.choices);
|
|
34
|
+
if (opt.repeatable) {
|
|
35
|
+
option.argParser(collect);
|
|
36
|
+
option.default([]);
|
|
37
|
+
}
|
|
38
|
+
program.addOption(option);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
program.action((projectNameArgs, options) => {
|
|
42
|
+
// Mutual exclusion validation: plaintext token and ARN flags cannot both be provided
|
|
43
|
+
if (options.hfToken && options.hfTokenArn) {
|
|
44
|
+
console.error('❌ Cannot specify both --hf-token and --hf-token-arn. Use one or the other.');
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
if (options.ngcToken && options.ngcTokenArn) {
|
|
48
|
+
console.error('❌ Cannot specify both --ngc-token and --ngc-token-arn. Use one or the other.');
|
|
49
|
+
process.exit(1);
|
|
50
|
+
}
|
|
51
|
+
return run(projectNameArgs?.[0] || null, options);
|
|
52
|
+
});
|
|
136
53
|
|
|
137
54
|
// Custom help formatting — group options into logical sections (root command only)
|
|
138
55
|
program.configureHelp({
|
|
@@ -163,7 +80,7 @@ program.configureHelp({
|
|
|
163
80
|
// Collect all visible options
|
|
164
81
|
const allOptions = helper.visibleOptions(cmd);
|
|
165
82
|
|
|
166
|
-
// Partition options into groups
|
|
83
|
+
// Partition options into groups using schema-derived helpGroups
|
|
167
84
|
const groups = {
|
|
168
85
|
general: [],
|
|
169
86
|
model: [],
|
|
@@ -182,32 +99,9 @@ program.configureHelp({
|
|
|
182
99
|
|
|
183
100
|
for (const opt of allOptions) {
|
|
184
101
|
const long = opt.long || '';
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
groups.model.push(opt);
|
|
189
|
-
} else if (['--deployment-target', '--instance-type', '--region', '--role-arn', '--build-target', '--codebuild-compute-type'].includes(long)) {
|
|
190
|
-
groups.infra.push(opt);
|
|
191
|
-
} else if (long.startsWith('--endpoint-')) {
|
|
192
|
-
groups.endpoint.push(opt);
|
|
193
|
-
} else if (long.startsWith('--ic-')) {
|
|
194
|
-
groups.ic.push(opt);
|
|
195
|
-
} else if (long.startsWith('--async-')) {
|
|
196
|
-
groups.async.push(opt);
|
|
197
|
-
} else if (long.startsWith('--batch-')) {
|
|
198
|
-
groups.batch.push(opt);
|
|
199
|
-
} else if (long.startsWith('--hyperpod-') || long === '--fsx-volume-handle') {
|
|
200
|
-
groups.hyperpod.push(opt);
|
|
201
|
-
} else if (['--model-env', '--server-env'].includes(long)) {
|
|
202
|
-
groups.env.push(opt);
|
|
203
|
-
} else if (['--hf-token', '--hf-token-arn', '--ngc-token', '--ngc-token-arn'].includes(long)) {
|
|
204
|
-
groups.auth.push(opt);
|
|
205
|
-
} else if (['--include-sample', '--include-testing', '--test-types', '--enable-lora', '--max-loras', '--max-lora-rank'].includes(long)) {
|
|
206
|
-
groups.features.push(opt);
|
|
207
|
-
} else if (['--smart', '--discover'].includes(long)) {
|
|
208
|
-
groups.mcp.push(opt);
|
|
209
|
-
} else if (['--validate-env-vars', '--validate-with-docker', '--offline', '--no-validate'].includes(long)) {
|
|
210
|
-
groups.validation.push(opt);
|
|
102
|
+
const section = helpGroups[long] || 'general';
|
|
103
|
+
if (groups[section]) {
|
|
104
|
+
groups[section].push(opt);
|
|
211
105
|
} else {
|
|
212
106
|
groups.general.push(opt);
|
|
213
107
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aws/ml-container-creator",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/app.js",
|
|
@@ -70,10 +70,11 @@
|
|
|
70
70
|
],
|
|
71
71
|
"scripts": {
|
|
72
72
|
"test": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter test/reporters/progress-reporter.cjs",
|
|
73
|
-
"test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
|
|
73
|
+
"test:ci": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --parallel --reporter spec",
|
|
74
74
|
"test:verbose": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --exit --reporter spec",
|
|
75
75
|
"test:minimal": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --reporter test/reporters/minimal-reporter.cjs",
|
|
76
76
|
"test:watch": "VALIDATE_ENV_VARS=false mocha test/**/*.test.js --watch --reporter test/reporters/progress-reporter.cjs",
|
|
77
|
+
"test:fast": "VALIDATE_ENV_VARS=false PROPERTY_NUM_RUNS=10 mocha test/**/*.test.js --exit --parallel --timeout 60000 --reporter dot",
|
|
77
78
|
"test:coverage": "VALIDATE_ENV_VARS=false nyc mocha test/**/*.test.js",
|
|
78
79
|
"test:unit": "VALIDATE_ENV_VARS=false mocha test/unit/**/*.test.js test/helpers/**/*.test.js --reporter spec",
|
|
79
80
|
"test:integration": "VALIDATE_ENV_VARS=false mocha test/input-parsing-and-generation/**/*.test.js --reporter spec",
|
|
@@ -96,6 +97,8 @@
|
|
|
96
97
|
"docs:deploy": "mkdocs gh-deploy",
|
|
97
98
|
"docs:sync": "node scripts/sync-command-generator.js",
|
|
98
99
|
"docs:check": "node scripts/sync-command-generator.js --check",
|
|
100
|
+
"codegen": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js",
|
|
101
|
+
"codegen:check": "node scripts/codegen-cli.js && node scripts/codegen-validator.js && node scripts/codegen-widget.js && node scripts/codegen-parity.js",
|
|
99
102
|
"_sbom": "npm sbom --sbom-format spdx > sbom.json && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npm sbom --sbom-format spdx > sbom.json); done",
|
|
100
103
|
"_licenses:review": "license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && echo \"\\nChecking $dir\" && (cd \"$dir\" && npx license-checker --production --exclude MIT,Apache-2.0,BSD-2-Clause,BSD-3-Clause,ISC,0BSD); done",
|
|
101
104
|
"_licenses:csv": "license-checker --csv --out ./licenses.csv && for dir in servers/*/; do [ -f \"$dir/package.json\" ] && (cd \"$dir\" && npx license-checker --csv --out licenses.csv); done && cat servers/*/licenses.csv >> licenses.csv",
|
package/src/app.js
CHANGED
|
@@ -296,6 +296,11 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
296
296
|
// Build ignore patterns
|
|
297
297
|
const ignorePatterns = [];
|
|
298
298
|
|
|
299
|
+
// EJS partials — included by templates at render time, not copied to output
|
|
300
|
+
ignorePatterns.push('**/serve.d/**');
|
|
301
|
+
ignorePatterns.push('**/deploy.d/**');
|
|
302
|
+
ignorePatterns.push('**/clean.d/**');
|
|
303
|
+
|
|
299
304
|
if (answers.deploymentTarget !== 'hyperpod-eks') {
|
|
300
305
|
ignorePatterns.push('**/hyperpod/**');
|
|
301
306
|
}
|
|
@@ -26,6 +26,7 @@ import DeploymentConfigResolver from './deployment-config-resolver.js';
|
|
|
26
26
|
import BootstrapConfig from './bootstrap-config.js';
|
|
27
27
|
import { parseKeyValue } from './key-value-parser.js';
|
|
28
28
|
import ParameterSchemaValidator from './parameter-schema-validator.js';
|
|
29
|
+
import { validationRules } from './generated/validation-rules.js';
|
|
29
30
|
|
|
30
31
|
const __configMgrFilename = fileURLToPath(import.meta.url);
|
|
31
32
|
const __configMgrDir = dirname(__configMgrFilename);
|
|
@@ -2261,6 +2262,22 @@ export default class ConfigManager {
|
|
|
2261
2262
|
* @private
|
|
2262
2263
|
*/
|
|
2263
2264
|
_validateParameterValue(parameter, value, context = {}) {
|
|
2265
|
+
// First pass: schema-derived validation rules (type, range, pattern, enum)
|
|
2266
|
+
// Skip deprecated params — they have relaxed validation handled by the switch below
|
|
2267
|
+
const schemaRule = validationRules[parameter];
|
|
2268
|
+
if (schemaRule && value !== null && value !== undefined) {
|
|
2269
|
+
// Don't apply strict enum validation to internally-derived values
|
|
2270
|
+
// The switch statement below handles context-dependent validation
|
|
2271
|
+
const skipSchemaValidation = ['framework', 'modelServer', 'deploymentConfig'].includes(parameter);
|
|
2272
|
+
if (!skipSchemaValidation) {
|
|
2273
|
+
const error = schemaRule(value);
|
|
2274
|
+
if (error) {
|
|
2275
|
+
throw new ValidationError(error, parameter, value);
|
|
2276
|
+
}
|
|
2277
|
+
}
|
|
2278
|
+
}
|
|
2279
|
+
|
|
2280
|
+
// Second pass: context-dependent validations that require runtime state
|
|
2264
2281
|
const supportedOptions = this._getSupportedOptions();
|
|
2265
2282
|
|
|
2266
2283
|
switch (parameter) {
|