@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
package/NOTICE ADDED
@@ -0,0 +1,2 @@
1
+ awslabs/ml-container-creator
2
+ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
package/README.md ADDED
@@ -0,0 +1,106 @@
1
+ # ML Container Creator
2
+
3
+ A CLI tool that creates SageMaker-compatible Docker containers for deploying ML models using the Bring Your Own Container (BYOC) paradigm.
4
+
5
+ > **Note:** This is a pre-release (`0.x`). APIs may change between minor versions. Weekly releases are planned until v1.
6
+
7
+ ## Supported Configurations
8
+
9
+ | Architecture | Model Servers | Use Case |
10
+ |---|---|---|
11
+ | HTTP (traditional ML) | Flask, FastAPI | sklearn, XGBoost, TensorFlow |
12
+ | Transformers (LLMs) | vLLM, SGLang, TensorRT-LLM, DJL/LMI | HuggingFace models, JumpStart, S3 |
13
+ | Triton | FIL, ONNX, Python, TensorRT-LLM, vLLM | Multi-framework serving |
14
+ | Diffusors | vLLM | Image generation models |
15
+
16
+ | Deployment Target | Description |
17
+ |---|---|
18
+ | Managed Inference | SageMaker real-time endpoints |
19
+ | Async Inference | SageMaker async endpoints with S3 output |
20
+ | Batch Transform | SageMaker batch processing |
21
+ | HyperPod EKS | Kubernetes-based deployment |
22
+
23
+ ## Quick Start
24
+
25
+ ### Install from npm
26
+
27
+ ```bash
28
+ npm install -g @aws/ml-container-creator
29
+ ```
30
+
31
+ ### Or use without installing (npx)
32
+
33
+ ```bash
34
+ npx @aws/ml-container-creator --help
35
+ ```
36
+
37
+ ### Or install from source
38
+
39
+ ```bash
40
+ git clone https://github.com/awslabs/ml-container-creator.git
41
+ cd ml-container-creator
42
+ npm install && npm link
43
+ ```
44
+
45
+ ### Bootstrap AWS infrastructure (one-time)
46
+
47
+ ```bash
48
+ ml-container-creator bootstrap
49
+ ```
50
+
51
+ Sets up an IAM execution role, ECR repository, optional S3 buckets, and optional CI Integration Harness for automated testing. Configuration is saved to `~/.ml-container-creator/config.json`.
52
+
53
+ ### Generate a project
54
+
55
+ ```bash
56
+ # Interactive
57
+ ml-container-creator
58
+
59
+ # Non-interactive
60
+ ml-container-creator my-model \
61
+ --deployment-config=transformers-vllm \
62
+ --model-name=openai/gpt-oss-20b \
63
+ --instance-type=ml.g6.12xlarge \
64
+ --region=us-east-1 \
65
+ --skip-prompts
66
+ ```
67
+
68
+ ### Build, push, deploy
69
+
70
+ ```bash
71
+ ./do/build # Build Docker image
72
+ ./do/push # Push to Amazon ECR
73
+ ./do/deploy # Deploy to SageMaker
74
+ ./do/test # Test the endpoint
75
+ ```
76
+
77
+ ## Documentation
78
+
79
+ Full documentation is available at [awslabs.github.io/ml-container-creator](https://awslabs.github.io/ml-container-creator/).
80
+
81
+ - [Getting Started](https://awslabs.github.io/ml-container-creator/getting-started/) — Installation and walkthroughs
82
+ - [Configuration](https://awslabs.github.io/ml-container-creator/configuration/) — CLI flags, env vars, config files, MCP servers
83
+ - [Deployment Guide](https://awslabs.github.io/ml-container-creator/deployments/) — All deployment targets and lifecycle scripts
84
+ - [CI Integration](https://awslabs.github.io/ml-container-creator/ci-integration/) — Automated lifecycle testing for all deployment configurations
85
+ - [Examples](https://awslabs.github.io/ml-container-creator/EXAMPLES/) — Framework-specific walkthroughs
86
+ - [Troubleshooting](https://awslabs.github.io/ml-container-creator/TROUBLESHOOTING/) — Common issues and solutions
87
+
88
+ ## Prerequisites
89
+
90
+ | Tool | Version | Purpose |
91
+ |---|---|---|
92
+ | [Node.js](https://nodejs.org/) | 24+ | Runs the CLI |
93
+ | [Docker](https://docs.docker.com/get-docker/) | 20+ | Container builds |
94
+ | [AWS CLI](https://aws.amazon.com/cli/) | 2+ | AWS resource management |
95
+
96
+ ## Contributing
97
+
98
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
99
+
100
+ ## Security
101
+
102
+ See [CONTRIBUTING.md](CONTRIBUTING.md#security-issue-notifications) for reporting security issues.
103
+
104
+ ## License
105
+
106
+ Apache-2.0. See [LICENSE](LICENSE).
package/bin/cli.js ADDED
@@ -0,0 +1,365 @@
1
+ #!/usr/bin/env node
2
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ import { createRequire } from 'module'
6
+ import path from 'path'
7
+ import { program, Option, Help } from 'commander'
8
+ import { run } from '../src/app.js'
9
+
10
+ const require = createRequire(import.meta.url)
11
+ const { version } = require('../package.json')
12
+
13
+ /**
14
+ * Collect repeatable options into an array.
15
+ * Used for --model-env and --server-env which can be specified multiple times.
16
+ */
17
+ function collect(value, previous) {
18
+ return previous.concat([value])
19
+ }
20
+
21
+ program
22
+ .name('ml-container-creator')
23
+ .version(version)
24
+ .enablePositionalOptions()
25
+ .passThroughOptions()
26
+ .helpCommand('help [command]', 'Display help for command')
27
+ .argument('[project-name]', 'Name for the generated project')
28
+
29
+ // --- General ---
30
+ .addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
31
+ .addOption(new Option('--config <path>', 'Path to configuration file'))
32
+ .addOption(new Option('--project-name <name>', 'Project name'))
33
+ .addOption(new Option('--project-dir <dir>', 'Output directory path'))
34
+
35
+ // --- Model & Framework ---
36
+ .addOption(new Option('--deployment-config <config>', 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)'))
37
+ .addOption(new Option('--framework <framework>', 'ML framework — DEPRECATED: use --deployment-config').choices(['sklearn', 'xgboost', 'tensorflow', 'transformers']).hideHelp())
38
+ .addOption(new Option('--model-format <format>', 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)'))
39
+ .addOption(new Option('--model-name <name>', 'Model identifier (HuggingFace ID, s3://, jumpstart://, registry://)'))
40
+ .addOption(new Option('--model-server <server>', 'Model server — DEPRECATED: use --deployment-config').choices(['flask', 'fastapi', 'vllm', 'sglang']).hideHelp())
41
+ .addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
42
+
43
+ // --- Build & Infrastructure ---
44
+ .addOption(new Option('--deployment-target <target>', 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)'))
45
+ .addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
46
+ .addOption(new Option('--region <region>', 'AWS region'))
47
+ .addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
48
+ .addOption(new Option('--build-target <target>', 'Build target (codebuild)'))
49
+ .addOption(new Option('--codebuild-compute-type <type>', 'CodeBuild compute type (SMALL, MEDIUM, LARGE)'))
50
+
51
+ // --- Endpoint (Real-Time Inference) ---
52
+ .addOption(new Option('--endpoint-initial-instance-count <n>', 'Number of instances for the endpoint (default: 1)'))
53
+ .addOption(new Option('--endpoint-data-capture-percent <pct>', 'Data capture percentage for monitoring, 0-100 (default: 0)'))
54
+ .addOption(new Option('--endpoint-variant-name <name>', 'Production variant name (default: AllTraffic)'))
55
+ .addOption(new Option('--endpoint-volume-size <gb>', 'ML storage volume size in GB'))
56
+
57
+ // --- Inference Component ---
58
+ .addOption(new Option('--ic-cpu-count <n>', 'vCPUs allocated to the inference component'))
59
+ .addOption(new Option('--ic-memory-size <mb>', 'Memory in MB for the inference component'))
60
+ .addOption(new Option('--ic-gpu-count <n>', 'GPUs allocated to the inference component'))
61
+ .addOption(new Option('--ic-copy-count <n>', 'Number of inference component copies (default: 1)'))
62
+ .addOption(new Option('--ic-model-weight <weight>', 'Traffic routing weight, 0-1 (default: 1.0)'))
63
+
64
+ // --- Async Inference ---
65
+ .addOption(new Option('--async-s3-output-path <path>', 'S3 output path for async results'))
66
+ .addOption(new Option('--async-sns-success-topic <arn>', 'SNS topic ARN for success notifications'))
67
+ .addOption(new Option('--async-sns-error-topic <arn>', 'SNS topic ARN for error notifications'))
68
+ .addOption(new Option('--async-max-concurrent <n>', 'Max concurrent invocations per instance (default: 1)'))
69
+
70
+ // --- Batch Transform ---
71
+ .addOption(new Option('--batch-input-path <path>', 'S3 input path for batch data'))
72
+ .addOption(new Option('--batch-output-path <path>', 'S3 output path for batch results'))
73
+ .addOption(new Option('--batch-instance-count <n>', 'Number of instances (default: 1)'))
74
+ .addOption(new Option('--batch-split-type <type>', 'Input split type: Line, RecordIO, None (default: Line)'))
75
+ .addOption(new Option('--batch-strategy <strategy>', 'Batch strategy: MultiRecord, SingleRecord (default: MultiRecord)'))
76
+ .addOption(new Option('--batch-join-source <source>', 'Join source: Input, None (default: None)'))
77
+ .addOption(new Option('--batch-max-concurrent <n>', 'Max concurrent transforms per instance (default: 1)'))
78
+ .addOption(new Option('--batch-max-payload <mb>', 'Max payload size in MB, 0-100 (default: 6)'))
79
+
80
+ // --- HyperPod (EKS) ---
81
+ .addOption(new Option('--hyperpod-cluster <name>', 'HyperPod EKS cluster name'))
82
+ .addOption(new Option('--hyperpod-namespace <ns>', 'Kubernetes namespace (default: default)'))
83
+ .addOption(new Option('--hyperpod-replicas <count>', 'Number of replicas (default: 1)'))
84
+ .addOption(new Option('--fsx-volume-handle <handle>', 'FSx for Lustre volume handle'))
85
+
86
+ // --- Environment Variables ---
87
+ .addOption(new Option('--model-env <KEY=VALUE>', 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)').argParser(collect).default([]))
88
+ .addOption(new Option('--server-env <KEY=VALUE>', 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)').argParser(collect).default([]))
89
+
90
+ // --- Authentication ---
91
+ .addOption(new Option('--hf-token <token>', 'HuggingFace token (or "$HF_TOKEN" for env var reference)'))
92
+
93
+ // --- Optional Features ---
94
+ .addOption(new Option('--include-sample', 'Include sample model code'))
95
+ .addOption(new Option('--include-testing', 'Include test suite'))
96
+ .addOption(new Option('--test-types <types>', 'Comma-separated test types'))
97
+
98
+ // --- MCP & Discovery ---
99
+ .addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
100
+ .addOption(new Option('--discover', 'Enable live registry lookups via MCP discovery'))
101
+
102
+ // --- Validation ---
103
+ .addOption(new Option('--validate-env-vars', 'Enable environment variable validation (default: true)'))
104
+ .addOption(new Option('--validate-with-docker', 'Enable Docker introspection validation (opt-in)'))
105
+ .addOption(new Option('--offline', 'Disable HuggingFace API lookups'))
106
+
107
+ .action(run)
108
+
109
+ // Custom help formatting — group options into logical sections (root command only)
110
+ program.configureHelp({
111
+ formatHelp(cmd, helper) {
112
+ // Only apply custom grouping to the root command
113
+ if (cmd !== program) {
114
+ // Fall back to default Commander formatting for subcommands
115
+ return Help.prototype.formatHelp.call(this, cmd, helper)
116
+ }
117
+
118
+ const termWidth = helper.padWidth(cmd, helper)
119
+
120
+ function callFormatItem(term, description) {
121
+ return helper.formatItem(term, termWidth, description, helper)
122
+ }
123
+
124
+ function formatSection(title, options) {
125
+ if (options.length === 0) return []
126
+ const lines = options.map(opt => {
127
+ return callFormatItem(
128
+ helper.styleOptionTerm(helper.optionTerm(opt)),
129
+ helper.styleOptionDescription(helper.optionDescription(opt))
130
+ )
131
+ })
132
+ return [helper.styleTitle(`${title}:`), ...lines, '']
133
+ }
134
+
135
+ // Collect all visible options
136
+ const allOptions = helper.visibleOptions(cmd)
137
+
138
+ // Partition options into groups by flag prefix/purpose
139
+ const groups = {
140
+ general: [],
141
+ model: [],
142
+ infra: [],
143
+ endpoint: [],
144
+ ic: [],
145
+ async: [],
146
+ batch: [],
147
+ hyperpod: [],
148
+ env: [],
149
+ auth: [],
150
+ features: [],
151
+ mcp: [],
152
+ validation: []
153
+ }
154
+
155
+ for (const opt of allOptions) {
156
+ const long = opt.long || ''
157
+ if (['--skip-prompts', '--config', '--project-name', '--project-dir', '--version', '--help'].includes(long)) {
158
+ groups.general.push(opt)
159
+ } else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
160
+ groups.model.push(opt)
161
+ } else if (['--deployment-target', '--instance-type', '--region', '--role-arn', '--build-target', '--codebuild-compute-type'].includes(long)) {
162
+ groups.infra.push(opt)
163
+ } else if (long.startsWith('--endpoint-')) {
164
+ groups.endpoint.push(opt)
165
+ } else if (long.startsWith('--ic-')) {
166
+ groups.ic.push(opt)
167
+ } else if (long.startsWith('--async-')) {
168
+ groups.async.push(opt)
169
+ } else if (long.startsWith('--batch-')) {
170
+ groups.batch.push(opt)
171
+ } else if (long.startsWith('--hyperpod-') || long === '--fsx-volume-handle') {
172
+ groups.hyperpod.push(opt)
173
+ } else if (['--model-env', '--server-env'].includes(long)) {
174
+ groups.env.push(opt)
175
+ } else if (['--hf-token'].includes(long)) {
176
+ groups.auth.push(opt)
177
+ } else if (['--include-sample', '--include-testing', '--test-types'].includes(long)) {
178
+ groups.features.push(opt)
179
+ } else if (['--smart', '--discover'].includes(long)) {
180
+ groups.mcp.push(opt)
181
+ } else if (['--validate-env-vars', '--validate-with-docker', '--offline'].includes(long)) {
182
+ groups.validation.push(opt)
183
+ } else {
184
+ groups.general.push(opt)
185
+ }
186
+ }
187
+
188
+ // Build output
189
+ let output = [
190
+ `${helper.styleTitle('Usage:')} ${helper.styleUsage(helper.commandUsage(cmd))}`,
191
+ ''
192
+ ]
193
+
194
+ // Arguments
195
+ const args = helper.visibleArguments(cmd)
196
+ if (args.length > 0) {
197
+ const argList = args.map(arg => {
198
+ return callFormatItem(
199
+ helper.styleArgumentTerm(helper.argumentTerm(arg)),
200
+ helper.styleArgumentDescription(helper.argumentDescription(arg))
201
+ )
202
+ })
203
+ output = output.concat([helper.styleTitle('Arguments:'), ...argList, ''])
204
+ }
205
+
206
+ // Option sections
207
+ output = output.concat(formatSection('General', groups.general))
208
+ output = output.concat(formatSection('Model & Framework', groups.model))
209
+ output = output.concat(formatSection('Build & Infrastructure', groups.infra))
210
+ output = output.concat(formatSection('Endpoint (Real-Time Inference)', groups.endpoint))
211
+ output = output.concat(formatSection('Inference Component', groups.ic))
212
+ output = output.concat(formatSection('Async Inference', groups.async))
213
+ output = output.concat(formatSection('Batch Transform', groups.batch))
214
+ output = output.concat(formatSection('HyperPod (EKS)', groups.hyperpod))
215
+ output = output.concat(formatSection('Environment Variables', groups.env))
216
+ output = output.concat(formatSection('Authentication', groups.auth))
217
+ output = output.concat(formatSection('Optional Features', groups.features))
218
+ output = output.concat(formatSection('MCP & Discovery', groups.mcp))
219
+ output = output.concat(formatSection('Validation', groups.validation))
220
+
221
+ // Commands
222
+ const cmds = helper.visibleCommands(cmd)
223
+ if (cmds.length > 0) {
224
+ const cmdList = cmds.map(sub => {
225
+ return callFormatItem(
226
+ helper.styleSubcommandTerm(helper.subcommandTerm(sub)),
227
+ helper.styleSubcommandDescription(helper.subcommandDescription(sub))
228
+ )
229
+ })
230
+ output = output.concat([helper.styleTitle('Commands:'), ...cmdList, ''])
231
+ }
232
+
233
+ return output.join('\n')
234
+ }
235
+ })
236
+
237
+ // Sub-commands — wired to actual handlers
238
+
239
+ program
240
+ .command('bootstrap')
241
+ .description('Set up AWS infrastructure (IAM role, ECR repo, S3 buckets)')
242
+ .argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update)')
243
+ .argument('[args...]', 'Additional arguments')
244
+ .option('--profile <profile>', 'AWS profile name')
245
+ .option('--region <region>', 'AWS region')
246
+ .option('--role-arn <arn>', 'Existing IAM role ARN to use')
247
+ .option('--non-interactive', 'Run without prompts (requires --profile and --region)')
248
+ .option('--force', 'Force removal without confirmation')
249
+ .option('--verify', 'Verify resources exist (for status)')
250
+ .option('--delete-stack', 'Delete CloudFormation stack on remove')
251
+ .action(async (action, args, options) => {
252
+ const { default: BootstrapCommandHandler } = await import('../src/lib/bootstrap-command-handler.js')
253
+ const handler = new BootstrapCommandHandler()
254
+ const allArgs = action ? [action, ...args] : []
255
+ await handler.handle(allArgs, options)
256
+ })
257
+
258
+ program
259
+ .command('mcp')
260
+ .description('Manage MCP servers (add, list, get, remove, init)')
261
+ .argument('<action>', 'MCP action (add, list, get, remove, init)')
262
+ .argument('[args...]', 'Additional arguments')
263
+ .option('-e <env>', 'Environment variable in KEY=VALUE format (for add)')
264
+ .option('--tool-name <name>', 'Tool name for MCP server (for add)')
265
+ .option('--limit <n>', 'Result limit for MCP server (for add)')
266
+ .option('--bundled', 'Use a bundled server from servers/ directory')
267
+ .action(async (action, args, options) => {
268
+ const { default: McpCommandHandler } = await import('../src/lib/mcp-command-handler.js')
269
+ const { runPrompts } = await import('../src/prompt-adapter.js')
270
+ // McpCommandHandler expects a generator-like object with destinationPath() and prompt()
271
+ const generatorAdapter = {
272
+ destinationPath(...segments) {
273
+ if (segments.length === 0) return process.cwd()
274
+ return path.join(process.cwd(), ...segments)
275
+ },
276
+ async prompt(prompts) {
277
+ return runPrompts(prompts)
278
+ }
279
+ }
280
+ const handler = new McpCommandHandler(generatorAdapter)
281
+ await handler.handle([action, ...args], options)
282
+ })
283
+
284
+ program
285
+ .command('registry')
286
+ .description('Registry operations (list, get, remove, replay, export, import, search) — experimental, may be reconciled with do/register')
287
+ .argument('<action>', 'Registry action (log, list, get, remove, replay, export, import, search)')
288
+ .argument('[args...]', 'Additional arguments')
289
+ .option('--backend <backend>', 'Filter by backend')
290
+ .option('--architecture <arch>', 'Filter by architecture')
291
+ .option('--model <model>', 'Filter by model name')
292
+ .option('--instance-type <type>', 'Filter by instance type')
293
+ .option('--status <status>', 'Filter by status')
294
+ .option('--merge', 'Merge on import')
295
+ .option('--replace', 'Replace on import')
296
+ // Options used by `registry log` (called from do/register)
297
+ .option('--deployment-config <config>', 'Deployment configuration')
298
+ .option('--region <region>', 'AWS region')
299
+ .option('--deployment-target <target>', 'Deployment target')
300
+ .option('--build-target <target>', 'Build target')
301
+ .option('--model-name <name>', 'Model name')
302
+ .option('--model-format <format>', 'Model format')
303
+ .option('--base-image <image>', 'Base container image')
304
+ .option('--notes <text>', 'Deployment notes')
305
+ .option('--project', 'Use project-level registry')
306
+ .option('--parameters <json>', 'Parameters JSON string')
307
+ .option('--generator-version <version>', 'Generator version')
308
+ .action(async (action, args, options) => {
309
+ const { default: RegistryCommandHandler } = await import('../src/lib/registry-command-handler.js')
310
+ const handler = new RegistryCommandHandler()
311
+ await handler.handle([action, ...args], options)
312
+ })
313
+
314
+ program
315
+ .command('configure')
316
+ .description('Interactive configuration setup (experimental)')
317
+ .action(async () => {
318
+ const { runPrompts } = await import('../src/prompt-adapter.js')
319
+ const { default: ConfigurationExporter } = await import('../src/lib/configuration-exporter.js')
320
+
321
+ console.log('\n🔧 ML Container Creator Configuration (experimental)')
322
+ console.log('\nThis will help you set up configuration files for your project.\n')
323
+
324
+ const answers = await runPrompts([
325
+ {
326
+ type: 'list',
327
+ name: 'configType',
328
+ message: 'What type of configuration would you like to create?',
329
+ choices: [
330
+ { name: 'Show CLI option examples', value: 'cli' },
331
+ { name: 'Show environment variable examples', value: 'env' }
332
+ ]
333
+ }
334
+ ])
335
+
336
+ if (answers.configType === 'cli') {
337
+ console.log(`
338
+ 💻 CLI Examples:
339
+
340
+ # Basic sklearn project
341
+ ml-container-creator --deployment-config=http-flask --model-format=pkl --skip-prompts
342
+
343
+ # Transformers with vLLM
344
+ ml-container-creator --deployment-config=transformers-vllm \\
345
+ --model-name=meta-llama/Llama-2-7b-chat-hf \\
346
+ --instance-type=ml.g5.xlarge --skip-prompts
347
+
348
+ # Using a config file
349
+ ml-container-creator --config=my-config.json --skip-prompts
350
+ `)
351
+ } else if (answers.configType === 'env') {
352
+ console.log(`
353
+ 🌍 Environment Variables:
354
+
355
+ export ML_INSTANCE_TYPE="ml.m5.large"
356
+ export AWS_REGION="us-east-1"
357
+ export AWS_ROLE="arn:aws:iam::123456789012:role/SageMakerRole"
358
+ export HF_TOKEN="hf_..."
359
+
360
+ Then run: ml-container-creator --deployment-config=http-flask --skip-prompts
361
+ `)
362
+ }
363
+ })
364
+
365
+ program.parse()
@@ -0,0 +1,32 @@
1
+ {
2
+ "generator": {
3
+ "name": "ml-container-creator",
4
+ "version": "1.0.0"
5
+ },
6
+ "defaults": {
7
+ "framework": "sklearn",
8
+ "modelFormat": "pkl",
9
+ "modelServer": "flask",
10
+ "deployTarget": "sagemaker",
11
+ "instanceType": "cpu-optimized",
12
+ "awsRegion": "us-east-1",
13
+ "includeTesting": true,
14
+ "testTypes": ["local-model-cli", "local-model-server", "hosted-model-endpoint"],
15
+ "includeSampleModel": false,
16
+ "skipPrompts": false
17
+ },
18
+ "validation": {
19
+ "supportedFrameworks": ["sklearn", "xgboost", "tensorflow", "transformers"],
20
+ "supportedModelServers": ["flask", "fastapi", "vllm", "sglang"],
21
+ "supportedInstanceTypes": ["cpu-optimized", "gpu-enabled"],
22
+ "supportedRegions": ["us-east-1", "us-west-2", "eu-west-1"]
23
+ },
24
+ "templates": {
25
+ "ignorePatterns": [
26
+ "**/node_modules/**",
27
+ "**/.git/**",
28
+ "**/dist/**",
29
+ "**/*.log"
30
+ ]
31
+ }
32
+ }
@@ -0,0 +1,26 @@
1
+ {
2
+ "name": "Transformers DJL Preset",
3
+ "description": "DJL Serving configuration with PyTorch backend",
4
+ "config": {
5
+ "framework": "transformers",
6
+ "modelServer": "djl",
7
+ "frameworkVersion": "0.32.0",
8
+ "instanceType": "gpu-enabled",
9
+ "awsRegion": "us-east-1",
10
+ "includeTesting": true,
11
+ "testTypes": ["hosted-model-endpoint"],
12
+ "includeSampleModel": false,
13
+ "skipPrompts": true
14
+ },
15
+ "environment": {
16
+ "ENGINE": "Python",
17
+ "OPTION_DEVICE_MAP": "auto",
18
+ "OPTION_TENSOR_PARALLEL_DEGREE": "1"
19
+ },
20
+ "dockerOptions": {
21
+ "baseImage": "deepjavalibrary/djl-serving:0.32.0-pytorch-cu126",
22
+ "gpuSupport": true,
23
+ "sharedMemorySize": "2g"
24
+ },
25
+ "notes": "DJL Serving provides flexible model serving with Java-based infrastructure and Python engine support"
26
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "Transformers GPU Preset",
3
+ "description": "Optimized configuration for transformer models with GPU acceleration",
4
+ "config": {
5
+ "framework": "transformers",
6
+ "modelServer": "vllm",
7
+ "instanceType": "gpu-enabled",
8
+ "awsRegion": "us-east-1",
9
+ "includeTesting": true,
10
+ "testTypes": ["hosted-model-endpoint"],
11
+ "includeSampleModel": false,
12
+ "skipPrompts": true
13
+ },
14
+ "environment": {
15
+ "CUDA_VISIBLE_DEVICES": "0",
16
+ "TRANSFORMERS_CACHE": "/opt/ml/model/cache",
17
+ "HF_HOME": "/opt/ml/model/hf_cache"
18
+ },
19
+ "dockerOptions": {
20
+ "baseImage": "nvidia/cuda:12.1-runtime-ubuntu20.04",
21
+ "gpuSupport": true,
22
+ "sharedMemorySize": "2g"
23
+ }
24
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "Transformers LMI Preset",
3
+ "description": "AWS Large Model Inference (LMI) configuration with automatic backend selection",
4
+ "config": {
5
+ "framework": "transformers",
6
+ "modelServer": "lmi",
7
+ "frameworkVersion": "14.0.0",
8
+ "instanceType": "gpu-enabled",
9
+ "awsRegion": "us-east-1",
10
+ "includeTesting": true,
11
+ "testTypes": ["hosted-model-endpoint"],
12
+ "includeSampleModel": false,
13
+ "skipPrompts": true
14
+ },
15
+ "environment": {
16
+ "OPTION_ROLLING_BATCH": "auto",
17
+ "OPTION_MAX_ROLLING_BATCH_SIZE": "32",
18
+ "OPTION_DTYPE": "fp16",
19
+ "OPTION_GPU_MEMORY_UTILIZATION": "0.9"
20
+ },
21
+ "dockerOptions": {
22
+ "baseImage": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.32.0-lmi14.0.0-cu126",
23
+ "gpuSupport": true,
24
+ "sharedMemorySize": "2g"
25
+ },
26
+ "notes": "LMI automatically selects the best backend (vLLM, TensorRT-LLM, LMI-Dist) based on your model architecture"
27
+ }