@aws/ml-container-creator 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/LICENSE-THIRD-PARTY +68620 -0
- package/NOTICE +2 -0
- package/README.md +106 -0
- package/bin/cli.js +365 -0
- package/config/defaults.json +32 -0
- package/config/presets/transformers-djl.json +26 -0
- package/config/presets/transformers-gpu.json +24 -0
- package/config/presets/transformers-lmi.json +27 -0
- package/package.json +129 -0
- package/servers/README.md +419 -0
- package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
- package/servers/base-image-picker/catalogs/python-slim.json +38 -0
- package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
- package/servers/base-image-picker/catalogs/triton.json +38 -0
- package/servers/base-image-picker/index.js +495 -0
- package/servers/base-image-picker/manifest.json +17 -0
- package/servers/base-image-picker/package.json +15 -0
- package/servers/hyperpod-cluster-picker/LICENSE +202 -0
- package/servers/hyperpod-cluster-picker/index.js +424 -0
- package/servers/hyperpod-cluster-picker/manifest.json +14 -0
- package/servers/hyperpod-cluster-picker/package.json +17 -0
- package/servers/instance-recommender/LICENSE +202 -0
- package/servers/instance-recommender/catalogs/instances.json +852 -0
- package/servers/instance-recommender/index.js +284 -0
- package/servers/instance-recommender/manifest.json +16 -0
- package/servers/instance-recommender/package.json +15 -0
- package/servers/lib/LICENSE +202 -0
- package/servers/lib/bedrock-client.js +160 -0
- package/servers/lib/custom-validators.js +46 -0
- package/servers/lib/dynamic-resolver.js +36 -0
- package/servers/lib/package.json +11 -0
- package/servers/lib/schemas/image-catalog.schema.json +185 -0
- package/servers/lib/schemas/instances.schema.json +124 -0
- package/servers/lib/schemas/manifest.schema.json +64 -0
- package/servers/lib/schemas/model-catalog.schema.json +91 -0
- package/servers/lib/schemas/regions.schema.json +26 -0
- package/servers/lib/schemas/triton-backends.schema.json +51 -0
- package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
- package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
- package/servers/model-picker/catalogs/popular-transformers.json +226 -0
- package/servers/model-picker/index.js +1693 -0
- package/servers/model-picker/manifest.json +18 -0
- package/servers/model-picker/package.json +20 -0
- package/servers/region-picker/LICENSE +202 -0
- package/servers/region-picker/catalogs/regions.json +263 -0
- package/servers/region-picker/index.js +230 -0
- package/servers/region-picker/manifest.json +16 -0
- package/servers/region-picker/package.json +15 -0
- package/src/app.js +1007 -0
- package/src/copy-tpl.js +77 -0
- package/src/lib/accelerator-validator.js +39 -0
- package/src/lib/asset-manager.js +385 -0
- package/src/lib/aws-profile-parser.js +181 -0
- package/src/lib/bootstrap-command-handler.js +1647 -0
- package/src/lib/bootstrap-config.js +238 -0
- package/src/lib/ci-register-helpers.js +124 -0
- package/src/lib/ci-report-helpers.js +158 -0
- package/src/lib/ci-stage-helpers.js +268 -0
- package/src/lib/cli-handler.js +529 -0
- package/src/lib/comment-generator.js +544 -0
- package/src/lib/community-reports-validator.js +91 -0
- package/src/lib/config-manager.js +2106 -0
- package/src/lib/configuration-exporter.js +204 -0
- package/src/lib/configuration-manager.js +695 -0
- package/src/lib/configuration-matcher.js +221 -0
- package/src/lib/cpu-validator.js +36 -0
- package/src/lib/cuda-validator.js +57 -0
- package/src/lib/deployment-config-resolver.js +103 -0
- package/src/lib/deployment-entry-schema.js +125 -0
- package/src/lib/deployment-registry.js +598 -0
- package/src/lib/docker-introspection-validator.js +51 -0
- package/src/lib/engine-prefix-resolver.js +60 -0
- package/src/lib/huggingface-client.js +172 -0
- package/src/lib/key-value-parser.js +37 -0
- package/src/lib/known-flags-validator.js +200 -0
- package/src/lib/manifest-cli.js +280 -0
- package/src/lib/mcp-client.js +303 -0
- package/src/lib/mcp-command-handler.js +532 -0
- package/src/lib/neuron-validator.js +80 -0
- package/src/lib/parameter-schema-validator.js +284 -0
- package/src/lib/prompt-runner.js +1349 -0
- package/src/lib/prompts.js +1138 -0
- package/src/lib/registry-command-handler.js +519 -0
- package/src/lib/registry-loader.js +198 -0
- package/src/lib/rocm-validator.js +80 -0
- package/src/lib/schema-validator.js +157 -0
- package/src/lib/sensitive-redactor.js +59 -0
- package/src/lib/template-engine.js +156 -0
- package/src/lib/template-manager.js +341 -0
- package/src/lib/validation-engine.js +314 -0
- package/src/prompt-adapter.js +63 -0
- package/templates/Dockerfile +300 -0
- package/templates/IAM_PERMISSIONS.md +84 -0
- package/templates/MIGRATION.md +488 -0
- package/templates/PROJECT_README.md +439 -0
- package/templates/TEMPLATE_SYSTEM.md +243 -0
- package/templates/buildspec.yml +64 -0
- package/templates/code/chat_template.jinja +1 -0
- package/templates/code/flask/gunicorn_config.py +35 -0
- package/templates/code/flask/wsgi.py +10 -0
- package/templates/code/model_handler.py +387 -0
- package/templates/code/serve +300 -0
- package/templates/code/serve.py +175 -0
- package/templates/code/serving.properties +105 -0
- package/templates/code/start_server.py +39 -0
- package/templates/code/start_server.sh +39 -0
- package/templates/diffusors/Dockerfile +72 -0
- package/templates/diffusors/patch_image_api.py +35 -0
- package/templates/diffusors/serve +115 -0
- package/templates/diffusors/start_server.sh +114 -0
- package/templates/do/.gitkeep +1 -0
- package/templates/do/README.md +541 -0
- package/templates/do/build +83 -0
- package/templates/do/ci +681 -0
- package/templates/do/clean +811 -0
- package/templates/do/config +260 -0
- package/templates/do/deploy +1560 -0
- package/templates/do/export +306 -0
- package/templates/do/logs +319 -0
- package/templates/do/manifest +12 -0
- package/templates/do/push +119 -0
- package/templates/do/register +580 -0
- package/templates/do/run +113 -0
- package/templates/do/submit +417 -0
- package/templates/do/test +1147 -0
- package/templates/hyperpod/configmap.yaml +24 -0
- package/templates/hyperpod/deployment.yaml +71 -0
- package/templates/hyperpod/pvc.yaml +42 -0
- package/templates/hyperpod/service.yaml +17 -0
- package/templates/nginx-diffusors.conf +74 -0
- package/templates/nginx-predictors.conf +47 -0
- package/templates/nginx-tensorrt.conf +74 -0
- package/templates/requirements.txt +61 -0
- package/templates/sample_model/test_inference.py +123 -0
- package/templates/sample_model/train_abalone.py +252 -0
- package/templates/test/test_endpoint.sh +79 -0
- package/templates/test/test_local_image.sh +80 -0
- package/templates/test/test_model_handler.py +180 -0
- package/templates/triton/Dockerfile +128 -0
- package/templates/triton/config.pbtxt +163 -0
- package/templates/triton/model.py +130 -0
- package/templates/triton/requirements.txt +11 -0
package/NOTICE
ADDED
package/README.md
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# ML Container Creator
|
|
2
|
+
|
|
3
|
+
A CLI tool that creates SageMaker-compatible Docker containers for deploying ML models using the Bring Your Own Container (BYOC) paradigm.
|
|
4
|
+
|
|
5
|
+
> **Note:** This is a pre-release (`0.x`). APIs may change between minor versions. Weekly releases are planned until v1.
|
|
6
|
+
|
|
7
|
+
## Supported Configurations
|
|
8
|
+
|
|
9
|
+
| Architecture | Model Servers | Use Case |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| HTTP (traditional ML) | Flask, FastAPI | sklearn, XGBoost, TensorFlow |
|
|
12
|
+
| Transformers (LLMs) | vLLM, SGLang, TensorRT-LLM, DJL/LMI | HuggingFace models, JumpStart, S3 |
|
|
13
|
+
| Triton | FIL, ONNX, Python, TensorRT-LLM, vLLM | Multi-framework serving |
|
|
14
|
+
| Diffusors | vLLM | Image generation models |
|
|
15
|
+
|
|
16
|
+
| Deployment Target | Description |
|
|
17
|
+
|---|---|
|
|
18
|
+
| Managed Inference | SageMaker real-time endpoints |
|
|
19
|
+
| Async Inference | SageMaker async endpoints with S3 output |
|
|
20
|
+
| Batch Transform | SageMaker batch processing |
|
|
21
|
+
| HyperPod EKS | Kubernetes-based deployment |
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
### Install from npm
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm install -g @aws/ml-container-creator
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### Or use without installing (npx)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
npx @aws/ml-container-creator --help
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Or install from source
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
git clone https://github.com/awslabs/ml-container-creator.git
|
|
41
|
+
cd ml-container-creator
|
|
42
|
+
npm install && npm link
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Bootstrap AWS infrastructure (one-time)
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
ml-container-creator bootstrap
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Sets up an IAM execution role, ECR repository, optional S3 buckets, and optional CI Integration Harness for automated testing. Configuration is saved to `~/.ml-container-creator/config.json`.
|
|
52
|
+
|
|
53
|
+
### Generate a project
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Interactive
|
|
57
|
+
ml-container-creator
|
|
58
|
+
|
|
59
|
+
# Non-interactive
|
|
60
|
+
ml-container-creator my-model \
|
|
61
|
+
--deployment-config=transformers-vllm \
|
|
62
|
+
--model-name=openai/gpt-oss-20b \
|
|
63
|
+
--instance-type=ml.g6.12xlarge \
|
|
64
|
+
--region=us-east-1 \
|
|
65
|
+
--skip-prompts
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Build, push, deploy
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
./do/build # Build Docker image
|
|
72
|
+
./do/push # Push to Amazon ECR
|
|
73
|
+
./do/deploy # Deploy to SageMaker
|
|
74
|
+
./do/test # Test the endpoint
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Documentation
|
|
78
|
+
|
|
79
|
+
Full documentation is available at [awslabs.github.io/ml-container-creator](https://awslabs.github.io/ml-container-creator/).
|
|
80
|
+
|
|
81
|
+
- [Getting Started](https://awslabs.github.io/ml-container-creator/getting-started/) — Installation and walkthroughs
|
|
82
|
+
- [Configuration](https://awslabs.github.io/ml-container-creator/configuration/) — CLI flags, env vars, config files, MCP servers
|
|
83
|
+
- [Deployment Guide](https://awslabs.github.io/ml-container-creator/deployments/) — All deployment targets and lifecycle scripts
|
|
84
|
+
- [CI Integration](https://awslabs.github.io/ml-container-creator/ci-integration/) — Automated lifecycle testing for all deployment configurations
|
|
85
|
+
- [Examples](https://awslabs.github.io/ml-container-creator/EXAMPLES/) — Framework-specific walkthroughs
|
|
86
|
+
- [Troubleshooting](https://awslabs.github.io/ml-container-creator/TROUBLESHOOTING/) — Common issues and solutions
|
|
87
|
+
|
|
88
|
+
## Prerequisites
|
|
89
|
+
|
|
90
|
+
| Tool | Version | Purpose |
|
|
91
|
+
|---|---|---|
|
|
92
|
+
| [Node.js](https://nodejs.org/) | 24+ | Runs the CLI |
|
|
93
|
+
| [Docker](https://docs.docker.com/get-docker/) | 20+ | Container builds |
|
|
94
|
+
| [AWS CLI](https://aws.amazon.com/cli/) | 2+ | AWS resource management |
|
|
95
|
+
|
|
96
|
+
## Contributing
|
|
97
|
+
|
|
98
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
99
|
+
|
|
100
|
+
## Security
|
|
101
|
+
|
|
102
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md#security-issue-notifications) for reporting security issues.
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
Apache-2.0. See [LICENSE](LICENSE).
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import { createRequire } from 'module'
|
|
6
|
+
import path from 'path'
|
|
7
|
+
import { program, Option, Help } from 'commander'
|
|
8
|
+
import { run } from '../src/app.js'
|
|
9
|
+
|
|
10
|
+
const require = createRequire(import.meta.url)
|
|
11
|
+
const { version } = require('../package.json')
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Collect repeatable options into an array.
|
|
15
|
+
* Used for --model-env and --server-env which can be specified multiple times.
|
|
16
|
+
*/
|
|
17
|
+
function collect(value, previous) {
|
|
18
|
+
return previous.concat([value])
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
program
|
|
22
|
+
.name('ml-container-creator')
|
|
23
|
+
.version(version)
|
|
24
|
+
.enablePositionalOptions()
|
|
25
|
+
.passThroughOptions()
|
|
26
|
+
.helpCommand('help [command]', 'Display help for command')
|
|
27
|
+
.argument('[project-name]', 'Name for the generated project')
|
|
28
|
+
|
|
29
|
+
// --- General ---
|
|
30
|
+
.addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
|
|
31
|
+
.addOption(new Option('--config <path>', 'Path to configuration file'))
|
|
32
|
+
.addOption(new Option('--project-name <name>', 'Project name'))
|
|
33
|
+
.addOption(new Option('--project-dir <dir>', 'Output directory path'))
|
|
34
|
+
|
|
35
|
+
// --- Model & Framework ---
|
|
36
|
+
.addOption(new Option('--deployment-config <config>', 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)'))
|
|
37
|
+
.addOption(new Option('--framework <framework>', 'ML framework — DEPRECATED: use --deployment-config').choices(['sklearn', 'xgboost', 'tensorflow', 'transformers']).hideHelp())
|
|
38
|
+
.addOption(new Option('--model-format <format>', 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)'))
|
|
39
|
+
.addOption(new Option('--model-name <name>', 'Model identifier (HuggingFace ID, s3://, jumpstart://, registry://)'))
|
|
40
|
+
.addOption(new Option('--model-server <server>', 'Model server — DEPRECATED: use --deployment-config').choices(['flask', 'fastapi', 'vllm', 'sglang']).hideHelp())
|
|
41
|
+
.addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
|
|
42
|
+
|
|
43
|
+
// --- Build & Infrastructure ---
|
|
44
|
+
.addOption(new Option('--deployment-target <target>', 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)'))
|
|
45
|
+
.addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
|
|
46
|
+
.addOption(new Option('--region <region>', 'AWS region'))
|
|
47
|
+
.addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
|
|
48
|
+
.addOption(new Option('--build-target <target>', 'Build target (codebuild)'))
|
|
49
|
+
.addOption(new Option('--codebuild-compute-type <type>', 'CodeBuild compute type (SMALL, MEDIUM, LARGE)'))
|
|
50
|
+
|
|
51
|
+
// --- Endpoint (Real-Time Inference) ---
|
|
52
|
+
.addOption(new Option('--endpoint-initial-instance-count <n>', 'Number of instances for the endpoint (default: 1)'))
|
|
53
|
+
.addOption(new Option('--endpoint-data-capture-percent <pct>', 'Data capture percentage for monitoring, 0-100 (default: 0)'))
|
|
54
|
+
.addOption(new Option('--endpoint-variant-name <name>', 'Production variant name (default: AllTraffic)'))
|
|
55
|
+
.addOption(new Option('--endpoint-volume-size <gb>', 'ML storage volume size in GB'))
|
|
56
|
+
|
|
57
|
+
// --- Inference Component ---
|
|
58
|
+
.addOption(new Option('--ic-cpu-count <n>', 'vCPUs allocated to the inference component'))
|
|
59
|
+
.addOption(new Option('--ic-memory-size <mb>', 'Memory in MB for the inference component'))
|
|
60
|
+
.addOption(new Option('--ic-gpu-count <n>', 'GPUs allocated to the inference component'))
|
|
61
|
+
.addOption(new Option('--ic-copy-count <n>', 'Number of inference component copies (default: 1)'))
|
|
62
|
+
.addOption(new Option('--ic-model-weight <weight>', 'Traffic routing weight, 0-1 (default: 1.0)'))
|
|
63
|
+
|
|
64
|
+
// --- Async Inference ---
|
|
65
|
+
.addOption(new Option('--async-s3-output-path <path>', 'S3 output path for async results'))
|
|
66
|
+
.addOption(new Option('--async-sns-success-topic <arn>', 'SNS topic ARN for success notifications'))
|
|
67
|
+
.addOption(new Option('--async-sns-error-topic <arn>', 'SNS topic ARN for error notifications'))
|
|
68
|
+
.addOption(new Option('--async-max-concurrent <n>', 'Max concurrent invocations per instance (default: 1)'))
|
|
69
|
+
|
|
70
|
+
// --- Batch Transform ---
|
|
71
|
+
.addOption(new Option('--batch-input-path <path>', 'S3 input path for batch data'))
|
|
72
|
+
.addOption(new Option('--batch-output-path <path>', 'S3 output path for batch results'))
|
|
73
|
+
.addOption(new Option('--batch-instance-count <n>', 'Number of instances (default: 1)'))
|
|
74
|
+
.addOption(new Option('--batch-split-type <type>', 'Input split type: Line, RecordIO, None (default: Line)'))
|
|
75
|
+
.addOption(new Option('--batch-strategy <strategy>', 'Batch strategy: MultiRecord, SingleRecord (default: MultiRecord)'))
|
|
76
|
+
.addOption(new Option('--batch-join-source <source>', 'Join source: Input, None (default: None)'))
|
|
77
|
+
.addOption(new Option('--batch-max-concurrent <n>', 'Max concurrent transforms per instance (default: 1)'))
|
|
78
|
+
.addOption(new Option('--batch-max-payload <mb>', 'Max payload size in MB, 0-100 (default: 6)'))
|
|
79
|
+
|
|
80
|
+
// --- HyperPod (EKS) ---
|
|
81
|
+
.addOption(new Option('--hyperpod-cluster <name>', 'HyperPod EKS cluster name'))
|
|
82
|
+
.addOption(new Option('--hyperpod-namespace <ns>', 'Kubernetes namespace (default: default)'))
|
|
83
|
+
.addOption(new Option('--hyperpod-replicas <count>', 'Number of replicas (default: 1)'))
|
|
84
|
+
.addOption(new Option('--fsx-volume-handle <handle>', 'FSx for Lustre volume handle'))
|
|
85
|
+
|
|
86
|
+
// --- Environment Variables ---
|
|
87
|
+
.addOption(new Option('--model-env <KEY=VALUE>', 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)').argParser(collect).default([]))
|
|
88
|
+
.addOption(new Option('--server-env <KEY=VALUE>', 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)').argParser(collect).default([]))
|
|
89
|
+
|
|
90
|
+
// --- Authentication ---
|
|
91
|
+
.addOption(new Option('--hf-token <token>', 'HuggingFace token (or "$HF_TOKEN" for env var reference)'))
|
|
92
|
+
|
|
93
|
+
// --- Optional Features ---
|
|
94
|
+
.addOption(new Option('--include-sample', 'Include sample model code'))
|
|
95
|
+
.addOption(new Option('--include-testing', 'Include test suite'))
|
|
96
|
+
.addOption(new Option('--test-types <types>', 'Comma-separated test types'))
|
|
97
|
+
|
|
98
|
+
// --- MCP & Discovery ---
|
|
99
|
+
.addOption(new Option('--smart', 'Enable Bedrock-powered smart mode on MCP servers'))
|
|
100
|
+
.addOption(new Option('--discover', 'Enable live registry lookups via MCP discovery'))
|
|
101
|
+
|
|
102
|
+
// --- Validation ---
|
|
103
|
+
.addOption(new Option('--validate-env-vars', 'Enable environment variable validation (default: true)'))
|
|
104
|
+
.addOption(new Option('--validate-with-docker', 'Enable Docker introspection validation (opt-in)'))
|
|
105
|
+
.addOption(new Option('--offline', 'Disable HuggingFace API lookups'))
|
|
106
|
+
|
|
107
|
+
.action(run)
|
|
108
|
+
|
|
109
|
+
// Custom help formatting — group options into logical sections (root command only)
|
|
110
|
+
program.configureHelp({
|
|
111
|
+
formatHelp(cmd, helper) {
|
|
112
|
+
// Only apply custom grouping to the root command
|
|
113
|
+
if (cmd !== program) {
|
|
114
|
+
// Fall back to default Commander formatting for subcommands
|
|
115
|
+
return Help.prototype.formatHelp.call(this, cmd, helper)
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const termWidth = helper.padWidth(cmd, helper)
|
|
119
|
+
|
|
120
|
+
function callFormatItem(term, description) {
|
|
121
|
+
return helper.formatItem(term, termWidth, description, helper)
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function formatSection(title, options) {
|
|
125
|
+
if (options.length === 0) return []
|
|
126
|
+
const lines = options.map(opt => {
|
|
127
|
+
return callFormatItem(
|
|
128
|
+
helper.styleOptionTerm(helper.optionTerm(opt)),
|
|
129
|
+
helper.styleOptionDescription(helper.optionDescription(opt))
|
|
130
|
+
)
|
|
131
|
+
})
|
|
132
|
+
return [helper.styleTitle(`${title}:`), ...lines, '']
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Collect all visible options
|
|
136
|
+
const allOptions = helper.visibleOptions(cmd)
|
|
137
|
+
|
|
138
|
+
// Partition options into groups by flag prefix/purpose
|
|
139
|
+
const groups = {
|
|
140
|
+
general: [],
|
|
141
|
+
model: [],
|
|
142
|
+
infra: [],
|
|
143
|
+
endpoint: [],
|
|
144
|
+
ic: [],
|
|
145
|
+
async: [],
|
|
146
|
+
batch: [],
|
|
147
|
+
hyperpod: [],
|
|
148
|
+
env: [],
|
|
149
|
+
auth: [],
|
|
150
|
+
features: [],
|
|
151
|
+
mcp: [],
|
|
152
|
+
validation: []
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
for (const opt of allOptions) {
|
|
156
|
+
const long = opt.long || ''
|
|
157
|
+
if (['--skip-prompts', '--config', '--project-name', '--project-dir', '--version', '--help'].includes(long)) {
|
|
158
|
+
groups.general.push(opt)
|
|
159
|
+
} else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
|
|
160
|
+
groups.model.push(opt)
|
|
161
|
+
} else if (['--deployment-target', '--instance-type', '--region', '--role-arn', '--build-target', '--codebuild-compute-type'].includes(long)) {
|
|
162
|
+
groups.infra.push(opt)
|
|
163
|
+
} else if (long.startsWith('--endpoint-')) {
|
|
164
|
+
groups.endpoint.push(opt)
|
|
165
|
+
} else if (long.startsWith('--ic-')) {
|
|
166
|
+
groups.ic.push(opt)
|
|
167
|
+
} else if (long.startsWith('--async-')) {
|
|
168
|
+
groups.async.push(opt)
|
|
169
|
+
} else if (long.startsWith('--batch-')) {
|
|
170
|
+
groups.batch.push(opt)
|
|
171
|
+
} else if (long.startsWith('--hyperpod-') || long === '--fsx-volume-handle') {
|
|
172
|
+
groups.hyperpod.push(opt)
|
|
173
|
+
} else if (['--model-env', '--server-env'].includes(long)) {
|
|
174
|
+
groups.env.push(opt)
|
|
175
|
+
} else if (['--hf-token'].includes(long)) {
|
|
176
|
+
groups.auth.push(opt)
|
|
177
|
+
} else if (['--include-sample', '--include-testing', '--test-types'].includes(long)) {
|
|
178
|
+
groups.features.push(opt)
|
|
179
|
+
} else if (['--smart', '--discover'].includes(long)) {
|
|
180
|
+
groups.mcp.push(opt)
|
|
181
|
+
} else if (['--validate-env-vars', '--validate-with-docker', '--offline'].includes(long)) {
|
|
182
|
+
groups.validation.push(opt)
|
|
183
|
+
} else {
|
|
184
|
+
groups.general.push(opt)
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Build output
|
|
189
|
+
let output = [
|
|
190
|
+
`${helper.styleTitle('Usage:')} ${helper.styleUsage(helper.commandUsage(cmd))}`,
|
|
191
|
+
''
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
// Arguments
|
|
195
|
+
const args = helper.visibleArguments(cmd)
|
|
196
|
+
if (args.length > 0) {
|
|
197
|
+
const argList = args.map(arg => {
|
|
198
|
+
return callFormatItem(
|
|
199
|
+
helper.styleArgumentTerm(helper.argumentTerm(arg)),
|
|
200
|
+
helper.styleArgumentDescription(helper.argumentDescription(arg))
|
|
201
|
+
)
|
|
202
|
+
})
|
|
203
|
+
output = output.concat([helper.styleTitle('Arguments:'), ...argList, ''])
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Option sections
|
|
207
|
+
output = output.concat(formatSection('General', groups.general))
|
|
208
|
+
output = output.concat(formatSection('Model & Framework', groups.model))
|
|
209
|
+
output = output.concat(formatSection('Build & Infrastructure', groups.infra))
|
|
210
|
+
output = output.concat(formatSection('Endpoint (Real-Time Inference)', groups.endpoint))
|
|
211
|
+
output = output.concat(formatSection('Inference Component', groups.ic))
|
|
212
|
+
output = output.concat(formatSection('Async Inference', groups.async))
|
|
213
|
+
output = output.concat(formatSection('Batch Transform', groups.batch))
|
|
214
|
+
output = output.concat(formatSection('HyperPod (EKS)', groups.hyperpod))
|
|
215
|
+
output = output.concat(formatSection('Environment Variables', groups.env))
|
|
216
|
+
output = output.concat(formatSection('Authentication', groups.auth))
|
|
217
|
+
output = output.concat(formatSection('Optional Features', groups.features))
|
|
218
|
+
output = output.concat(formatSection('MCP & Discovery', groups.mcp))
|
|
219
|
+
output = output.concat(formatSection('Validation', groups.validation))
|
|
220
|
+
|
|
221
|
+
// Commands
|
|
222
|
+
const cmds = helper.visibleCommands(cmd)
|
|
223
|
+
if (cmds.length > 0) {
|
|
224
|
+
const cmdList = cmds.map(sub => {
|
|
225
|
+
return callFormatItem(
|
|
226
|
+
helper.styleSubcommandTerm(helper.subcommandTerm(sub)),
|
|
227
|
+
helper.styleSubcommandDescription(helper.subcommandDescription(sub))
|
|
228
|
+
)
|
|
229
|
+
})
|
|
230
|
+
output = output.concat([helper.styleTitle('Commands:'), ...cmdList, ''])
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return output.join('\n')
|
|
234
|
+
}
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
// Sub-commands — wired to actual handlers
|
|
238
|
+
|
|
239
|
+
program
|
|
240
|
+
.command('bootstrap')
|
|
241
|
+
.description('Set up AWS infrastructure (IAM role, ECR repo, S3 buckets)')
|
|
242
|
+
.argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update)')
|
|
243
|
+
.argument('[args...]', 'Additional arguments')
|
|
244
|
+
.option('--profile <profile>', 'AWS profile name')
|
|
245
|
+
.option('--region <region>', 'AWS region')
|
|
246
|
+
.option('--role-arn <arn>', 'Existing IAM role ARN to use')
|
|
247
|
+
.option('--non-interactive', 'Run without prompts (requires --profile and --region)')
|
|
248
|
+
.option('--force', 'Force removal without confirmation')
|
|
249
|
+
.option('--verify', 'Verify resources exist (for status)')
|
|
250
|
+
.option('--delete-stack', 'Delete CloudFormation stack on remove')
|
|
251
|
+
.action(async (action, args, options) => {
|
|
252
|
+
const { default: BootstrapCommandHandler } = await import('../src/lib/bootstrap-command-handler.js')
|
|
253
|
+
const handler = new BootstrapCommandHandler()
|
|
254
|
+
const allArgs = action ? [action, ...args] : []
|
|
255
|
+
await handler.handle(allArgs, options)
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
program
|
|
259
|
+
.command('mcp')
|
|
260
|
+
.description('Manage MCP servers (add, list, get, remove, init)')
|
|
261
|
+
.argument('<action>', 'MCP action (add, list, get, remove, init)')
|
|
262
|
+
.argument('[args...]', 'Additional arguments')
|
|
263
|
+
.option('-e <env>', 'Environment variable in KEY=VALUE format (for add)')
|
|
264
|
+
.option('--tool-name <name>', 'Tool name for MCP server (for add)')
|
|
265
|
+
.option('--limit <n>', 'Result limit for MCP server (for add)')
|
|
266
|
+
.option('--bundled', 'Use a bundled server from servers/ directory')
|
|
267
|
+
.action(async (action, args, options) => {
|
|
268
|
+
const { default: McpCommandHandler } = await import('../src/lib/mcp-command-handler.js')
|
|
269
|
+
const { runPrompts } = await import('../src/prompt-adapter.js')
|
|
270
|
+
// McpCommandHandler expects a generator-like object with destinationPath() and prompt()
|
|
271
|
+
const generatorAdapter = {
|
|
272
|
+
destinationPath(...segments) {
|
|
273
|
+
if (segments.length === 0) return process.cwd()
|
|
274
|
+
return path.join(process.cwd(), ...segments)
|
|
275
|
+
},
|
|
276
|
+
async prompt(prompts) {
|
|
277
|
+
return runPrompts(prompts)
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
const handler = new McpCommandHandler(generatorAdapter)
|
|
281
|
+
await handler.handle([action, ...args], options)
|
|
282
|
+
})
|
|
283
|
+
|
|
284
|
+
program
|
|
285
|
+
.command('registry')
|
|
286
|
+
.description('Registry operations (list, get, remove, replay, export, import, search) — experimental, may be reconciled with do/register')
|
|
287
|
+
.argument('<action>', 'Registry action (log, list, get, remove, replay, export, import, search)')
|
|
288
|
+
.argument('[args...]', 'Additional arguments')
|
|
289
|
+
.option('--backend <backend>', 'Filter by backend')
|
|
290
|
+
.option('--architecture <arch>', 'Filter by architecture')
|
|
291
|
+
.option('--model <model>', 'Filter by model name')
|
|
292
|
+
.option('--instance-type <type>', 'Filter by instance type')
|
|
293
|
+
.option('--status <status>', 'Filter by status')
|
|
294
|
+
.option('--merge', 'Merge on import')
|
|
295
|
+
.option('--replace', 'Replace on import')
|
|
296
|
+
// Options used by `registry log` (called from do/register)
|
|
297
|
+
.option('--deployment-config <config>', 'Deployment configuration')
|
|
298
|
+
.option('--region <region>', 'AWS region')
|
|
299
|
+
.option('--deployment-target <target>', 'Deployment target')
|
|
300
|
+
.option('--build-target <target>', 'Build target')
|
|
301
|
+
.option('--model-name <name>', 'Model name')
|
|
302
|
+
.option('--model-format <format>', 'Model format')
|
|
303
|
+
.option('--base-image <image>', 'Base container image')
|
|
304
|
+
.option('--notes <text>', 'Deployment notes')
|
|
305
|
+
.option('--project', 'Use project-level registry')
|
|
306
|
+
.option('--parameters <json>', 'Parameters JSON string')
|
|
307
|
+
.option('--generator-version <version>', 'Generator version')
|
|
308
|
+
.action(async (action, args, options) => {
|
|
309
|
+
const { default: RegistryCommandHandler } = await import('../src/lib/registry-command-handler.js')
|
|
310
|
+
const handler = new RegistryCommandHandler()
|
|
311
|
+
await handler.handle([action, ...args], options)
|
|
312
|
+
})
|
|
313
|
+
|
|
314
|
+
program
|
|
315
|
+
.command('configure')
|
|
316
|
+
.description('Interactive configuration setup (experimental)')
|
|
317
|
+
.action(async () => {
|
|
318
|
+
const { runPrompts } = await import('../src/prompt-adapter.js')
|
|
319
|
+
const { default: ConfigurationExporter } = await import('../src/lib/configuration-exporter.js')
|
|
320
|
+
|
|
321
|
+
console.log('\n🔧 ML Container Creator Configuration (experimental)')
|
|
322
|
+
console.log('\nThis will help you set up configuration files for your project.\n')
|
|
323
|
+
|
|
324
|
+
const answers = await runPrompts([
|
|
325
|
+
{
|
|
326
|
+
type: 'list',
|
|
327
|
+
name: 'configType',
|
|
328
|
+
message: 'What type of configuration would you like to create?',
|
|
329
|
+
choices: [
|
|
330
|
+
{ name: 'Show CLI option examples', value: 'cli' },
|
|
331
|
+
{ name: 'Show environment variable examples', value: 'env' }
|
|
332
|
+
]
|
|
333
|
+
}
|
|
334
|
+
])
|
|
335
|
+
|
|
336
|
+
if (answers.configType === 'cli') {
|
|
337
|
+
console.log(`
|
|
338
|
+
💻 CLI Examples:
|
|
339
|
+
|
|
340
|
+
# Basic sklearn project
|
|
341
|
+
ml-container-creator --deployment-config=http-flask --model-format=pkl --skip-prompts
|
|
342
|
+
|
|
343
|
+
# Transformers with vLLM
|
|
344
|
+
ml-container-creator --deployment-config=transformers-vllm \\
|
|
345
|
+
--model-name=meta-llama/Llama-2-7b-chat-hf \\
|
|
346
|
+
--instance-type=ml.g5.xlarge --skip-prompts
|
|
347
|
+
|
|
348
|
+
# Using a config file
|
|
349
|
+
ml-container-creator --config=my-config.json --skip-prompts
|
|
350
|
+
`)
|
|
351
|
+
} else if (answers.configType === 'env') {
|
|
352
|
+
console.log(`
|
|
353
|
+
🌍 Environment Variables:
|
|
354
|
+
|
|
355
|
+
export ML_INSTANCE_TYPE="ml.m5.large"
|
|
356
|
+
export AWS_REGION="us-east-1"
|
|
357
|
+
export AWS_ROLE="arn:aws:iam::123456789012:role/SageMakerRole"
|
|
358
|
+
export HF_TOKEN="hf_..."
|
|
359
|
+
|
|
360
|
+
Then run: ml-container-creator --deployment-config=http-flask --skip-prompts
|
|
361
|
+
`)
|
|
362
|
+
}
|
|
363
|
+
})
|
|
364
|
+
|
|
365
|
+
program.parse()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"generator": {
|
|
3
|
+
"name": "ml-container-creator",
|
|
4
|
+
"version": "1.0.0"
|
|
5
|
+
},
|
|
6
|
+
"defaults": {
|
|
7
|
+
"framework": "sklearn",
|
|
8
|
+
"modelFormat": "pkl",
|
|
9
|
+
"modelServer": "flask",
|
|
10
|
+
"deployTarget": "sagemaker",
|
|
11
|
+
"instanceType": "cpu-optimized",
|
|
12
|
+
"awsRegion": "us-east-1",
|
|
13
|
+
"includeTesting": true,
|
|
14
|
+
"testTypes": ["local-model-cli", "local-model-server", "hosted-model-endpoint"],
|
|
15
|
+
"includeSampleModel": false,
|
|
16
|
+
"skipPrompts": false
|
|
17
|
+
},
|
|
18
|
+
"validation": {
|
|
19
|
+
"supportedFrameworks": ["sklearn", "xgboost", "tensorflow", "transformers"],
|
|
20
|
+
"supportedModelServers": ["flask", "fastapi", "vllm", "sglang"],
|
|
21
|
+
"supportedInstanceTypes": ["cpu-optimized", "gpu-enabled"],
|
|
22
|
+
"supportedRegions": ["us-east-1", "us-west-2", "eu-west-1"]
|
|
23
|
+
},
|
|
24
|
+
"templates": {
|
|
25
|
+
"ignorePatterns": [
|
|
26
|
+
"**/node_modules/**",
|
|
27
|
+
"**/.git/**",
|
|
28
|
+
"**/dist/**",
|
|
29
|
+
"**/*.log"
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Transformers DJL Preset",
|
|
3
|
+
"description": "DJL Serving configuration with PyTorch backend",
|
|
4
|
+
"config": {
|
|
5
|
+
"framework": "transformers",
|
|
6
|
+
"modelServer": "djl",
|
|
7
|
+
"frameworkVersion": "0.32.0",
|
|
8
|
+
"instanceType": "gpu-enabled",
|
|
9
|
+
"awsRegion": "us-east-1",
|
|
10
|
+
"includeTesting": true,
|
|
11
|
+
"testTypes": ["hosted-model-endpoint"],
|
|
12
|
+
"includeSampleModel": false,
|
|
13
|
+
"skipPrompts": true
|
|
14
|
+
},
|
|
15
|
+
"environment": {
|
|
16
|
+
"ENGINE": "Python",
|
|
17
|
+
"OPTION_DEVICE_MAP": "auto",
|
|
18
|
+
"OPTION_TENSOR_PARALLEL_DEGREE": "1"
|
|
19
|
+
},
|
|
20
|
+
"dockerOptions": {
|
|
21
|
+
"baseImage": "deepjavalibrary/djl-serving:0.32.0-pytorch-cu126",
|
|
22
|
+
"gpuSupport": true,
|
|
23
|
+
"sharedMemorySize": "2g"
|
|
24
|
+
},
|
|
25
|
+
"notes": "DJL Serving provides flexible model serving with Java-based infrastructure and Python engine support"
|
|
26
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Transformers GPU Preset",
|
|
3
|
+
"description": "Optimized configuration for transformer models with GPU acceleration",
|
|
4
|
+
"config": {
|
|
5
|
+
"framework": "transformers",
|
|
6
|
+
"modelServer": "vllm",
|
|
7
|
+
"instanceType": "gpu-enabled",
|
|
8
|
+
"awsRegion": "us-east-1",
|
|
9
|
+
"includeTesting": true,
|
|
10
|
+
"testTypes": ["hosted-model-endpoint"],
|
|
11
|
+
"includeSampleModel": false,
|
|
12
|
+
"skipPrompts": true
|
|
13
|
+
},
|
|
14
|
+
"environment": {
|
|
15
|
+
"CUDA_VISIBLE_DEVICES": "0",
|
|
16
|
+
"TRANSFORMERS_CACHE": "/opt/ml/model/cache",
|
|
17
|
+
"HF_HOME": "/opt/ml/model/hf_cache"
|
|
18
|
+
},
|
|
19
|
+
"dockerOptions": {
|
|
20
|
+
"baseImage": "nvidia/cuda:12.1-runtime-ubuntu20.04",
|
|
21
|
+
"gpuSupport": true,
|
|
22
|
+
"sharedMemorySize": "2g"
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "Transformers LMI Preset",
|
|
3
|
+
"description": "AWS Large Model Inference (LMI) configuration with automatic backend selection",
|
|
4
|
+
"config": {
|
|
5
|
+
"framework": "transformers",
|
|
6
|
+
"modelServer": "lmi",
|
|
7
|
+
"frameworkVersion": "14.0.0",
|
|
8
|
+
"instanceType": "gpu-enabled",
|
|
9
|
+
"awsRegion": "us-east-1",
|
|
10
|
+
"includeTesting": true,
|
|
11
|
+
"testTypes": ["hosted-model-endpoint"],
|
|
12
|
+
"includeSampleModel": false,
|
|
13
|
+
"skipPrompts": true
|
|
14
|
+
},
|
|
15
|
+
"environment": {
|
|
16
|
+
"OPTION_ROLLING_BATCH": "auto",
|
|
17
|
+
"OPTION_MAX_ROLLING_BATCH_SIZE": "32",
|
|
18
|
+
"OPTION_DTYPE": "fp16",
|
|
19
|
+
"OPTION_GPU_MEMORY_UTILIZATION": "0.9"
|
|
20
|
+
},
|
|
21
|
+
"dockerOptions": {
|
|
22
|
+
"baseImage": "763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.32.0-lmi14.0.0-cu126",
|
|
23
|
+
"gpuSupport": true,
|
|
24
|
+
"sharedMemorySize": "2g"
|
|
25
|
+
},
|
|
26
|
+
"notes": "LMI automatically selects the best backend (vLLM, TensorRT-LLM, LMI-Dist) based on your model architecture"
|
|
27
|
+
}
|