@aws/ml-container-creator 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +298 -62
- package/bin/cli.js +4 -3
- package/config/parameter-schema.json +1 -1
- package/package.json +1 -1
- package/src/app.js +17 -1
- package/src/lib/auto-prompt-builder.js +172 -0
- package/src/lib/ci-register-helpers.js +1 -1
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +177 -3
- package/src/lib/parameter-schema-validator.js +10 -10
- package/src/lib/prompt-runner.js +51 -7
- package/src/lib/prompts.js +7 -7
- package/src/lib/template-manager.js +2 -2
- package/templates/do/clean +6 -6
- package/templates/do/config +6 -6
- package/templates/do/deploy +5 -5
- package/templates/do/export +5 -5
- package/templates/do/logs +4 -4
- package/templates/do/register +3 -3
- package/templates/do/test +4 -4
package/README.md
CHANGED
|
@@ -1,106 +1,342 @@
|
|
|
1
|
-
#
|
|
1
|
+
# sharp-transformer-deployment
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
SageMaker-compatible ML container for deploying transformers models using vllm.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Generated on 2026-05-08T09-52-06 using [ML Container Creator](https://github.com/yourusername/ml-container-creator).
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## Quick Start
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|---|---|---|
|
|
11
|
-
| HTTP (traditional ML) | Flask, FastAPI | sklearn, XGBoost, TensorFlow |
|
|
12
|
-
| Transformers (LLMs) | vLLM, SGLang, TensorRT-LLM, DJL/LMI | HuggingFace models, JumpStart, S3 |
|
|
13
|
-
| Triton | FIL, ONNX, Python, TensorRT-LLM, vLLM | Multi-framework serving |
|
|
14
|
-
| Diffusors | vLLM | Image generation models |
|
|
9
|
+
### 1. Build the Container
|
|
15
10
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
| Async Inference | SageMaker async endpoints with S3 output |
|
|
20
|
-
| Batch Transform | SageMaker batch processing |
|
|
21
|
-
| HyperPod EKS | Kubernetes-based deployment |
|
|
11
|
+
```bash
|
|
12
|
+
./do/build
|
|
13
|
+
```
|
|
22
14
|
|
|
23
|
-
|
|
15
|
+
Builds a Docker image tagged as `sharp-transformer-deployment:latest`.
|
|
16
|
+
|
|
17
|
+
### 2. Test Locally
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Start the container
|
|
21
|
+
./do/run
|
|
22
|
+
|
|
23
|
+
# In another terminal, test the endpoints
|
|
24
|
+
./do/test
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### 3. Push to ECR
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
./do/push
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Pushes the image to Amazon ECR in the `us-west-2` region.
|
|
34
|
+
|
|
35
|
+
### 4. Deploy to SageMaker
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
./do/deploy <your-sagemaker-execution-role-arn>
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Creates a SageMaker endpoint named `sharp-transformer-deployment-endpoint`.
|
|
42
|
+
|
|
43
|
+
### 5. Test the Endpoint
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
./do/test sharp-transformer-deployment-endpoint
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Project Structure
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
sharp-transformer-deployment/
|
|
53
|
+
├── do/ # do-framework lifecycle scripts
|
|
54
|
+
│ ├── build # Build Docker image
|
|
55
|
+
│ ├── push # Push to Amazon ECR
|
|
56
|
+
│ ├── deploy # Deploy to SageMaker
|
|
57
|
+
│ ├── run # Run container locally
|
|
58
|
+
│ ├── test # Test container or endpoint
|
|
59
|
+
│ ├── clean # Clean up resources
|
|
60
|
+
│ ├── submit # Submit build to CodeBuild
|
|
61
|
+
│ ├── config # Configuration variables
|
|
62
|
+
│ └── README.md # Detailed do-framework documentation
|
|
63
|
+
├── code/ # Model serving code
|
|
64
|
+
│ └── serve # vllm entrypoint script
|
|
65
|
+
├── deploy/ # Legacy scripts (deprecated)
|
|
66
|
+
│ ├── build_and_push.sh # Use ./do/build && ./do/push instead
|
|
67
|
+
│ └── deploy.sh # Use ./do/deploy instead
|
|
68
|
+
|
|
69
|
+
├── test/ # Test suite
|
|
70
|
+
│ ├── test_endpoint.sh # Test SageMaker endpoint
|
|
71
|
+
│ └── test_local_image.sh # Test local container
|
|
72
|
+
|
|
73
|
+
├── Dockerfile # Container definition
|
|
74
|
+
├── requirements.txt # Python dependencies
|
|
75
|
+
└── README.md # This file
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Configuration
|
|
79
|
+
|
|
80
|
+
All deployment configuration is centralized in `do/config`:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Project identification
|
|
84
|
+
PROJECT_NAME="sharp-transformer-deployment"
|
|
85
|
+
DEPLOYMENT_CONFIG="transformers-vllm"
|
|
86
|
+
|
|
87
|
+
# AWS configuration
|
|
88
|
+
AWS_REGION="us-west-2"
|
|
89
|
+
INSTANCE_TYPE="ml.g5.xlarge"
|
|
90
|
+
|
|
91
|
+
# Framework configuration
|
|
92
|
+
FRAMEWORK="transformers"
|
|
93
|
+
MODEL_SERVER="vllm"
|
|
94
|
+
|
|
95
|
+
# Model configuration
|
|
96
|
+
MODEL_NAME="openai/gpt-oss-20b"
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
You can override these values by setting environment variables before running do scripts.
|
|
101
|
+
|
|
102
|
+
## Deployment Workflows
|
|
103
|
+
|
|
104
|
+
### Local Development Workflow
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# Build and test locally
|
|
108
|
+
./do/build
|
|
109
|
+
./do/run &
|
|
110
|
+
./do/test
|
|
111
|
+
|
|
112
|
+
# When satisfied, push to ECR
|
|
113
|
+
./do/push
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### CodeBuild Workflow
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# Submit build to CodeBuild (builds and pushes to ECR)
|
|
120
|
+
./do/submit
|
|
121
|
+
|
|
122
|
+
# Deploy to SageMaker
|
|
123
|
+
./do/deploy <role-arn>
|
|
124
|
+
|
|
125
|
+
# Test the endpoint
|
|
126
|
+
./do/test sharp-transformer-deployment-endpoint
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Cleanup
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# Remove local images
|
|
133
|
+
./do/clean local
|
|
134
|
+
|
|
135
|
+
# Remove ECR images
|
|
136
|
+
./do/clean ecr
|
|
137
|
+
|
|
138
|
+
# Delete SageMaker endpoint
|
|
139
|
+
./do/clean endpoint
|
|
140
|
+
|
|
141
|
+
# Clean everything
|
|
142
|
+
./do/clean all
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## do-framework Commands
|
|
146
|
+
|
|
147
|
+
This project uses the [do-framework](https://github.com/iankoulski/do-framework) for standardized container lifecycle management.
|
|
148
|
+
|
|
149
|
+
### Available Commands
|
|
150
|
+
|
|
151
|
+
| Command | Description |
|
|
152
|
+
|---------|-------------|
|
|
153
|
+
| `./do/build` | Build Docker image locally |
|
|
154
|
+
| `./do/push` | Push image to Amazon ECR |
|
|
155
|
+
| `./do/deploy <role-arn>` | Deploy to SageMaker endpoint |
|
|
156
|
+
| `./do/run` | Run container locally on port 8080 |
|
|
157
|
+
| `./do/test [endpoint]` | Test local container or SageMaker endpoint |
|
|
158
|
+
| `./do/clean <target>` | Clean up resources (local/ecr/endpoint/all) |
|
|
159
|
+
| `./do/submit` | Submit build to AWS CodeBuild |
|
|
160
|
+
|
|
161
|
+
For detailed documentation on each command, see `do/README.md`.
|
|
162
|
+
|
|
163
|
+
## Framework-Specific Information
|
|
164
|
+
|
|
165
|
+
### Transformers (vllm)
|
|
166
|
+
|
|
167
|
+
This container serves transformer models using vllm.
|
|
168
|
+
|
|
169
|
+
**Model**: openai/gpt-oss-20b
|
|
24
170
|
|
|
25
|
-
|
|
171
|
+
**Server**: vLLM - High-throughput LLM serving with PagedAttention
|
|
172
|
+
|
|
173
|
+
**Features**:
|
|
174
|
+
- Continuous batching
|
|
175
|
+
- Optimized CUDA kernels
|
|
176
|
+
- OpenAI-compatible API
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
**Inference**: Send requests to `/invocations` endpoint with:
|
|
180
|
+
```json
|
|
181
|
+
{
|
|
182
|
+
"inputs": "Your prompt here",
|
|
183
|
+
"parameters": {
|
|
184
|
+
"max_new_tokens": 100,
|
|
185
|
+
"temperature": 0.7
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
## SageMaker Endpoints
|
|
193
|
+
|
|
194
|
+
### Health Check
|
|
195
|
+
|
|
196
|
+
SageMaker calls the `/ping` endpoint to verify container health:
|
|
26
197
|
|
|
27
198
|
```bash
|
|
28
|
-
|
|
199
|
+
curl http://localhost:8080/ping
|
|
29
200
|
```
|
|
30
201
|
|
|
31
|
-
|
|
202
|
+
Expected response: `200 OK`
|
|
203
|
+
|
|
204
|
+
### Inference
|
|
205
|
+
|
|
206
|
+
Send prediction requests to the `/invocations` endpoint:
|
|
32
207
|
|
|
33
208
|
```bash
|
|
34
|
-
|
|
209
|
+
curl -X POST http://localhost:8080/invocations \
|
|
210
|
+
-H "Content-Type: application/json" \
|
|
211
|
+
-d '{
|
|
212
|
+
"inputs": "What is machine learning?",
|
|
213
|
+
"parameters": {
|
|
214
|
+
"max_new_tokens": 100,
|
|
215
|
+
"temperature": 0.7
|
|
216
|
+
}
|
|
217
|
+
}'
|
|
35
218
|
```
|
|
36
219
|
|
|
37
|
-
### Or install from source
|
|
38
220
|
|
|
221
|
+
## AWS Requirements
|
|
222
|
+
|
|
223
|
+
### IAM Permissions
|
|
224
|
+
|
|
225
|
+
The SageMaker execution role needs these permissions:
|
|
226
|
+
|
|
227
|
+
- `ecr:GetAuthorizationToken`
|
|
228
|
+
- `ecr:BatchCheckLayerAvailability`
|
|
229
|
+
- `ecr:GetDownloadUrlForLayer`
|
|
230
|
+
- `ecr:BatchGetImage`
|
|
231
|
+
- `s3:GetObject` (if using S3 for model artifacts)
|
|
232
|
+
- `logs:CreateLogGroup`
|
|
233
|
+
- `logs:CreateLogStream`
|
|
234
|
+
- `logs:PutLogEvents`
|
|
235
|
+
|
|
236
|
+
See `IAM_PERMISSIONS.md` for detailed permission requirements.
|
|
237
|
+
|
|
238
|
+
### AWS CLI Configuration
|
|
239
|
+
|
|
240
|
+
Ensure AWS CLI is configured with appropriate credentials:
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
aws configure
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Or use environment variables:
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
export AWS_ACCESS_KEY_ID=your-access-key
|
|
250
|
+
export AWS_SECRET_ACCESS_KEY=your-secret-key
|
|
251
|
+
export AWS_DEFAULT_REGION=us-west-2
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Troubleshooting
|
|
255
|
+
|
|
256
|
+
### Build Issues
|
|
257
|
+
|
|
258
|
+
**Docker Not Found**
|
|
259
|
+
|
|
260
|
+
Install Docker: https://docs.docker.com/get-docker/
|
|
261
|
+
|
|
262
|
+
**Permission Denied**
|
|
263
|
+
|
|
264
|
+
Add your user to the docker group:
|
|
39
265
|
```bash
|
|
40
|
-
|
|
41
|
-
cd ml-container-creator
|
|
42
|
-
npm install && npm link
|
|
266
|
+
sudo usermod -aG docker $USER
|
|
43
267
|
```
|
|
44
268
|
|
|
45
|
-
###
|
|
269
|
+
### Deployment Issues
|
|
46
270
|
|
|
271
|
+
**ECR Push Failed**
|
|
272
|
+
|
|
273
|
+
Check AWS credentials and IAM permissions:
|
|
47
274
|
```bash
|
|
48
|
-
|
|
275
|
+
aws sts get-caller-identity
|
|
49
276
|
```
|
|
50
277
|
|
|
51
|
-
|
|
278
|
+
**Endpoint Creation Failed**
|
|
279
|
+
|
|
280
|
+
- Verify the execution role ARN is correct
|
|
281
|
+
- Check IAM permissions
|
|
282
|
+
- Ensure the instance type is available in your region
|
|
52
283
|
|
|
53
|
-
|
|
284
|
+
**Endpoint Stuck in Creating**
|
|
54
285
|
|
|
286
|
+
Check CloudWatch logs:
|
|
55
287
|
```bash
|
|
56
|
-
|
|
57
|
-
|
|
288
|
+
aws logs tail /aws/sagemaker/Endpoints/sharp-transformer-deployment-endpoint --follow
|
|
289
|
+
```
|
|
58
290
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
291
|
+
### Runtime Issues
|
|
292
|
+
|
|
293
|
+
**Container Exits Immediately**
|
|
294
|
+
|
|
295
|
+
Check container logs:
|
|
296
|
+
```bash
|
|
297
|
+
docker logs $(docker ps -a | grep sharp-transformer-deployment | awk '{print $1}')
|
|
66
298
|
```
|
|
67
299
|
|
|
68
|
-
|
|
300
|
+
**Out of Memory**
|
|
69
301
|
|
|
302
|
+
Increase instance size or optimize model:
|
|
70
303
|
```bash
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
./do/deploy # Deploy to SageMaker
|
|
74
|
-
./do/test # Test the endpoint
|
|
304
|
+
# Edit do/config
|
|
305
|
+
INSTANCE_TYPE="ml.m5.2xlarge" # Larger instance
|
|
75
306
|
```
|
|
76
307
|
|
|
77
|
-
##
|
|
308
|
+
## Migration from Legacy Scripts
|
|
309
|
+
|
|
310
|
+
If you're familiar with the old `deploy/` scripts, see `MIGRATION.md` for a command mapping guide.
|
|
311
|
+
|
|
312
|
+
**Quick Reference**:
|
|
313
|
+
|
|
314
|
+
| Legacy Command | do-framework Command |
|
|
315
|
+
|----------------|---------------------|
|
|
316
|
+
| `./deploy/build_and_push.sh` | `./do/build && ./do/push` |
|
|
317
|
+
| `./deploy/deploy.sh <role>` | `./do/deploy <role>` |
|
|
318
|
+
| `./deploy/submit_build.sh` | `./do/submit` |
|
|
78
319
|
|
|
79
|
-
|
|
320
|
+
The legacy scripts are still available but deprecated. They will display warnings and forward to do-framework commands.
|
|
80
321
|
|
|
81
|
-
|
|
82
|
-
- [Configuration](https://awslabs.github.io/ml-container-creator/configuration/) — CLI flags, env vars, config files, MCP servers
|
|
83
|
-
- [Deployment Guide](https://awslabs.github.io/ml-container-creator/deployments/) — All deployment targets and lifecycle scripts
|
|
84
|
-
- [CI Integration](https://awslabs.github.io/ml-container-creator/ci-integration/) — Automated lifecycle testing for all deployment configurations
|
|
85
|
-
- [Examples](https://awslabs.github.io/ml-container-creator/EXAMPLES/) — Framework-specific walkthroughs
|
|
86
|
-
- [Troubleshooting](https://awslabs.github.io/ml-container-creator/TROUBLESHOOTING/) — Common issues and solutions
|
|
322
|
+
## Additional Resources
|
|
87
323
|
|
|
88
|
-
|
|
324
|
+
- [do-framework Documentation](https://github.com/iankoulski/do-framework)
|
|
325
|
+
- [AWS SageMaker Documentation](https://docs.aws.amazon.com/sagemaker/)
|
|
326
|
+
- [SageMaker BYOC Guide](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms.html)
|
|
89
327
|
|
|
90
|
-
|
|
91
|
-
|---|---|---|
|
|
92
|
-
| [Node.js](https://nodejs.org/) | 24+ | Runs the CLI |
|
|
93
|
-
| [Docker](https://docs.docker.com/get-docker/) | 20+ | Container builds |
|
|
94
|
-
| [AWS CLI](https://aws.amazon.com/cli/) | 2+ | AWS resource management |
|
|
328
|
+
- [vLLM Documentation](https://docs.vllm.ai/)
|
|
95
329
|
|
|
96
|
-
## Contributing
|
|
97
330
|
|
|
98
|
-
|
|
331
|
+
## Support
|
|
99
332
|
|
|
100
|
-
|
|
333
|
+
For issues or questions:
|
|
101
334
|
|
|
102
|
-
|
|
335
|
+
1. Check `do/README.md` for detailed command documentation
|
|
336
|
+
2. Review CloudWatch logs for deployment issues
|
|
337
|
+
3. See `MIGRATION.md` if migrating from legacy scripts
|
|
338
|
+
4. Open an issue on the [ML Container Creator repository](https://github.com/yourusername/ml-container-creator)
|
|
103
339
|
|
|
104
340
|
## License
|
|
105
341
|
|
|
106
|
-
|
|
342
|
+
This generated project is provided as starter code. Modify as needed for your use case.
|
package/bin/cli.js
CHANGED
|
@@ -27,7 +27,8 @@ program
|
|
|
27
27
|
|
|
28
28
|
// --- General ---
|
|
29
29
|
.addOption(new Option('--skip-prompts', 'Skip interactive prompts and use configuration from other sources'))
|
|
30
|
-
.addOption(new Option('--
|
|
30
|
+
.addOption(new Option('--auto-prompt', 'Fill defaults, prompt only for missing required values'))
|
|
31
|
+
.addOption(new Option('--config <path>', 'Path to JSON configuration file'))
|
|
31
32
|
.addOption(new Option('--project-name <name>', 'Project name'))
|
|
32
33
|
.addOption(new Option('--project-dir <dir>', 'Output directory path'))
|
|
33
34
|
.addOption(new Option('--force', 'Overwrite existing output directory without prompting'))
|
|
@@ -41,7 +42,7 @@ program
|
|
|
41
42
|
.addOption(new Option('--base-image <image>', 'Base container image for Dockerfile'))
|
|
42
43
|
|
|
43
44
|
// --- Build & Infrastructure ---
|
|
44
|
-
.addOption(new Option('--deployment-target <target>', 'Deployment target (
|
|
45
|
+
.addOption(new Option('--deployment-target <target>', 'Deployment target (realtime-inference, async-inference, batch-transform, hyperpod-eks)'))
|
|
45
46
|
.addOption(new Option('--instance-type <type>', 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'))
|
|
46
47
|
.addOption(new Option('--region <region>', 'AWS region'))
|
|
47
48
|
.addOption(new Option('--role-arn <arn>', 'IAM role ARN for SageMaker execution'))
|
|
@@ -154,7 +155,7 @@ program.configureHelp({
|
|
|
154
155
|
|
|
155
156
|
for (const opt of allOptions) {
|
|
156
157
|
const long = opt.long || '';
|
|
157
|
-
if (['--skip-prompts', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
|
|
158
|
+
if (['--skip-prompts', '--auto-prompt', '--config', '--project-name', '--project-dir', '--force', '--version', '--help'].includes(long)) {
|
|
158
159
|
groups.general.push(opt);
|
|
159
160
|
} else if (['--deployment-config', '--framework', '--model-format', '--model-name', '--model-server', '--base-image'].includes(long)) {
|
|
160
161
|
groups.model.push(opt);
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -156,6 +156,22 @@ export async function run(projectName, options) {
|
|
|
156
156
|
console.log(' If your model package lacks an InferenceSpecification, use the S3 path');
|
|
157
157
|
console.log(' directly instead: --model-name="s3://bucket/path/model.tar.gz"\n');
|
|
158
158
|
}
|
|
159
|
+
} else if (configManager.isAutoPrompt()) {
|
|
160
|
+
// Auto-prompt mode: run the wizard with all resolved values pre-filled.
|
|
161
|
+
// The wizard skips prompts for values already in explicitConfig and
|
|
162
|
+
// uses phase-level gates to skip irrelevant sections entirely.
|
|
163
|
+
// This gives context-aware prompting (correct MCP queries, filtered choices)
|
|
164
|
+
// while only asking for what's truly missing.
|
|
165
|
+
console.log('\n🔄 Auto-prompt mode — prompting only for missing values with full context');
|
|
166
|
+
|
|
167
|
+
const promptRunner = new PromptRunner({
|
|
168
|
+
configManager,
|
|
169
|
+
options: kebabOptions,
|
|
170
|
+
registryConfigManager,
|
|
171
|
+
baseConfig
|
|
172
|
+
});
|
|
173
|
+
const promptAnswers = await promptRunner.run();
|
|
174
|
+
answers = configManager.getFinalConfiguration(promptAnswers);
|
|
159
175
|
} else {
|
|
160
176
|
const promptRunner = new PromptRunner({
|
|
161
177
|
configManager,
|
|
@@ -482,7 +498,7 @@ async function _ensureTemplateVariables(answers, registryConfigManager = null) {
|
|
|
482
498
|
testTypes: [],
|
|
483
499
|
buildTimestamp: new Date().toISOString(),
|
|
484
500
|
buildTarget: 'codebuild',
|
|
485
|
-
deploymentTarget: '
|
|
501
|
+
deploymentTarget: 'realtime-inference',
|
|
486
502
|
hyperPodCluster: null,
|
|
487
503
|
hyperPodNamespace: 'default',
|
|
488
504
|
hyperPodReplicas: 1,
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Auto-Prompt Builder — generates targeted prompts for missing required parameters.
|
|
6
|
+
*
|
|
7
|
+
* Used by --auto-prompt mode to ask only for values that cannot be inferred
|
|
8
|
+
* or defaulted from the provided CLI flags.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Builds a minimal set of prompts for the given missing parameters.
|
|
13
|
+
* Each prompt is self-contained and doesn't depend on multi-phase wizard state.
|
|
14
|
+
*
|
|
15
|
+
* @param {string[]} missingParams - Parameter names that need values
|
|
16
|
+
* @param {object} currentConfig - Current configuration (with defaults filled)
|
|
17
|
+
* @returns {Array} Array of prompt objects compatible with runPrompts()
|
|
18
|
+
*/
|
|
19
|
+
export function buildAutoPrompts(missingParams, currentConfig) {
|
|
20
|
+
const prompts = [];
|
|
21
|
+
|
|
22
|
+
for (const param of missingParams) {
|
|
23
|
+
const builder = PROMPT_BUILDERS[param];
|
|
24
|
+
if (builder) {
|
|
25
|
+
const prompt = builder(currentConfig);
|
|
26
|
+
if (prompt) {
|
|
27
|
+
prompts.push(prompt);
|
|
28
|
+
}
|
|
29
|
+
} else {
|
|
30
|
+
// Fallback: generic text input for unknown parameters
|
|
31
|
+
prompts.push({
|
|
32
|
+
type: 'input',
|
|
33
|
+
name: param,
|
|
34
|
+
message: `Enter value for ${param}:`
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return prompts;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Map of parameter names to prompt builder functions.
|
|
44
|
+
* Each builder receives the current config and returns a prompt object.
|
|
45
|
+
*/
|
|
46
|
+
const PROMPT_BUILDERS = {
|
|
47
|
+
deploymentConfig: (_config) => ({
|
|
48
|
+
type: 'list',
|
|
49
|
+
name: 'deploymentConfig',
|
|
50
|
+
message: 'Select deployment configuration:',
|
|
51
|
+
choices: [
|
|
52
|
+
{ type: 'separator', separator: '── Large Language Models ──' },
|
|
53
|
+
{ name: 'Transformers with vLLM', value: 'transformers-vllm' },
|
|
54
|
+
{ name: 'Transformers with SGLang', value: 'transformers-sglang' },
|
|
55
|
+
{ name: 'Transformers with TensorRT-LLM', value: 'transformers-tensorrt-llm' },
|
|
56
|
+
{ name: 'Transformers with LMI', value: 'transformers-lmi' },
|
|
57
|
+
{ name: 'Transformers with DJL', value: 'transformers-djl' },
|
|
58
|
+
{ type: 'separator', separator: '── HTTP Serving ──' },
|
|
59
|
+
{ name: 'HTTP with Flask', value: 'http-flask' },
|
|
60
|
+
{ name: 'HTTP with FastAPI', value: 'http-fastapi' },
|
|
61
|
+
{ type: 'separator', separator: '── NVIDIA Triton ──' },
|
|
62
|
+
{ name: 'Triton FIL (XGBoost, LightGBM)', value: 'triton-fil' },
|
|
63
|
+
{ name: 'Triton ONNX Runtime', value: 'triton-onnxruntime' },
|
|
64
|
+
{ name: 'Triton TensorFlow', value: 'triton-tensorflow' },
|
|
65
|
+
{ name: 'Triton PyTorch', value: 'triton-pytorch' },
|
|
66
|
+
{ name: 'Triton vLLM', value: 'triton-vllm' },
|
|
67
|
+
{ name: 'Triton TensorRT-LLM', value: 'triton-tensorrtllm' },
|
|
68
|
+
{ name: 'Triton Python Backend', value: 'triton-python' },
|
|
69
|
+
{ type: 'separator', separator: '── Diffusion Models ──' },
|
|
70
|
+
{ name: 'Diffusors with vLLM Omni', value: 'diffusors-vllm-omni' }
|
|
71
|
+
]
|
|
72
|
+
}),
|
|
73
|
+
|
|
74
|
+
instanceType: (config) => {
|
|
75
|
+
const architecture = config.architecture || 'http';
|
|
76
|
+
const isGpu = architecture === 'transformers' || architecture === 'triton' || architecture === 'diffusors';
|
|
77
|
+
|
|
78
|
+
const gpuChoices = [
|
|
79
|
+
{ name: 'ml.g5.xlarge (1× A10G 24GB — small LLMs)', value: 'ml.g5.xlarge' },
|
|
80
|
+
{ name: 'ml.g5.2xlarge (1× A10G 24GB — medium LLMs)', value: 'ml.g5.2xlarge' },
|
|
81
|
+
{ name: 'ml.g5.4xlarge (1× A10G 24GB — larger models)', value: 'ml.g5.4xlarge' },
|
|
82
|
+
{ name: 'ml.g5.12xlarge (4× A10G 96GB — large LLMs)', value: 'ml.g5.12xlarge' },
|
|
83
|
+
{ name: 'ml.g5.48xlarge (8× A10G 192GB — very large)', value: 'ml.g5.48xlarge' },
|
|
84
|
+
{ name: 'ml.g6.xlarge (1× L4 24GB)', value: 'ml.g6.xlarge' },
|
|
85
|
+
{ name: 'ml.g6.2xlarge (1× L4 24GB)', value: 'ml.g6.2xlarge' },
|
|
86
|
+
{ name: 'ml.p4d.24xlarge (8× A100 320GB)', value: 'ml.p4d.24xlarge' },
|
|
87
|
+
{ name: 'ml.p5.48xlarge (8× H100 640GB)', value: 'ml.p5.48xlarge' },
|
|
88
|
+
{ name: 'Custom (enter manually)', value: '_custom' }
|
|
89
|
+
];
|
|
90
|
+
|
|
91
|
+
const cpuChoices = [
|
|
92
|
+
{ name: 'ml.m5.large (2 vCPU, 8GB — lightweight)', value: 'ml.m5.large' },
|
|
93
|
+
{ name: 'ml.m5.xlarge (4 vCPU, 16GB — small models)', value: 'ml.m5.xlarge' },
|
|
94
|
+
{ name: 'ml.m5.2xlarge (8 vCPU, 32GB — medium models)', value: 'ml.m5.2xlarge' },
|
|
95
|
+
{ name: 'ml.m5.4xlarge (16 vCPU, 64GB — large models)', value: 'ml.m5.4xlarge' },
|
|
96
|
+
{ name: 'ml.c5.xlarge (4 vCPU, 8GB — compute-heavy)', value: 'ml.c5.xlarge' },
|
|
97
|
+
{ name: 'ml.c5.2xlarge (8 vCPU, 16GB — compute-heavy)', value: 'ml.c5.2xlarge' },
|
|
98
|
+
{ name: 'Custom (enter manually)', value: '_custom' }
|
|
99
|
+
];
|
|
100
|
+
|
|
101
|
+
return {
|
|
102
|
+
type: 'list',
|
|
103
|
+
name: 'instanceType',
|
|
104
|
+
message: `Select instance type${isGpu ? ' (GPU recommended for this architecture)' : ''}:`,
|
|
105
|
+
choices: isGpu ? gpuChoices : cpuChoices
|
|
106
|
+
};
|
|
107
|
+
},
|
|
108
|
+
|
|
109
|
+
deploymentTarget: (_config) => ({
|
|
110
|
+
type: 'list',
|
|
111
|
+
name: 'deploymentTarget',
|
|
112
|
+
message: 'Select deployment target:',
|
|
113
|
+
choices: [
|
|
114
|
+
{ name: 'Real-Time Inference', value: 'realtime-inference' },
|
|
115
|
+
{ name: 'Async Inference', value: 'async-inference' },
|
|
116
|
+
{ name: 'Batch Transform', value: 'batch-transform' },
|
|
117
|
+
{ name: 'HyperPod EKS', value: 'hyperpod-eks' }
|
|
118
|
+
]
|
|
119
|
+
}),
|
|
120
|
+
|
|
121
|
+
modelFormat: (config) => {
|
|
122
|
+
const engine = config.engine || 'sklearn';
|
|
123
|
+
const formatMap = {
|
|
124
|
+
sklearn: [
|
|
125
|
+
{ name: 'pkl (pickle)', value: 'pkl' },
|
|
126
|
+
{ name: 'joblib', value: 'joblib' }
|
|
127
|
+
],
|
|
128
|
+
xgboost: [
|
|
129
|
+
{ name: 'json', value: 'json' },
|
|
130
|
+
{ name: 'model (binary)', value: 'model' },
|
|
131
|
+
{ name: 'ubj (universal binary JSON)', value: 'ubj' }
|
|
132
|
+
],
|
|
133
|
+
tensorflow: [
|
|
134
|
+
{ name: 'keras', value: 'keras' },
|
|
135
|
+
{ name: 'h5', value: 'h5' },
|
|
136
|
+
{ name: 'SavedModel', value: 'SavedModel' }
|
|
137
|
+
]
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const choices = formatMap[engine] || formatMap.sklearn;
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
type: 'list',
|
|
144
|
+
name: 'modelFormat',
|
|
145
|
+
message: `Select model format for ${engine}:`,
|
|
146
|
+
choices
|
|
147
|
+
};
|
|
148
|
+
},
|
|
149
|
+
|
|
150
|
+
awsRegion: (_config) => ({
|
|
151
|
+
type: 'list',
|
|
152
|
+
name: 'awsRegion',
|
|
153
|
+
message: 'Select AWS region:',
|
|
154
|
+
choices: [
|
|
155
|
+
{ name: 'us-east-1 (N. Virginia)', value: 'us-east-1' },
|
|
156
|
+
{ name: 'us-west-2 (Oregon)', value: 'us-west-2' },
|
|
157
|
+
{ name: 'eu-west-1 (Ireland)', value: 'eu-west-1' },
|
|
158
|
+
{ name: 'ap-northeast-1 (Tokyo)', value: 'ap-northeast-1' },
|
|
159
|
+
{ name: 'ap-southeast-1 (Singapore)', value: 'ap-southeast-1' },
|
|
160
|
+
{ name: 'Custom (enter manually)', value: '_custom' }
|
|
161
|
+
]
|
|
162
|
+
}),
|
|
163
|
+
|
|
164
|
+
buildTarget: (_config) => ({
|
|
165
|
+
type: 'list',
|
|
166
|
+
name: 'buildTarget',
|
|
167
|
+
message: 'Select build target:',
|
|
168
|
+
choices: [
|
|
169
|
+
{ name: 'CodeBuild (recommended)', value: 'codebuild' }
|
|
170
|
+
]
|
|
171
|
+
})
|
|
172
|
+
};
|
|
@@ -25,7 +25,7 @@ import { createHash } from 'node:crypto';
|
|
|
25
25
|
* @param {string} modelName - e.g. "meta-llama/Llama-2-7b-chat-hf", defaults to "none"
|
|
26
26
|
* @param {string} instanceType - e.g. "ml.g5.xlarge"
|
|
27
27
|
* @param {string} region - e.g. "us-east-1"
|
|
28
|
-
* @param {string} deploymentTarget - e.g. "
|
|
28
|
+
* @param {string} deploymentTarget - e.g. "realtime-inference"
|
|
29
29
|
* @returns {string} 16-character lowercase hex string
|
|
30
30
|
*/
|
|
31
31
|
export function computeConfigId(deploymentConfig, modelName, instanceType, region, deploymentTarget) {
|
package/src/lib/cli-handler.js
CHANGED
|
@@ -190,7 +190,7 @@ CLI OPTIONS:
|
|
|
190
190
|
--instance-type=<type> SageMaker instance type (e.g., ml.m5.large, ml.g5.xlarge)
|
|
191
191
|
--region=<region> AWS region
|
|
192
192
|
--role-arn=<arn> AWS IAM role ARN for SageMaker execution
|
|
193
|
-
--deployment-target=<target> Deployment target (
|
|
193
|
+
--deployment-target=<target> Deployment target (realtime-inference|async-inference|batch-transform|hyperpod-eks)
|
|
194
194
|
--hyperpod-cluster=<name> HyperPod EKS cluster name
|
|
195
195
|
--hyperpod-namespace=<ns> Kubernetes namespace for HyperPod (default: default)
|
|
196
196
|
--hyperpod-replicas=<n> Number of replicas for HyperPod (default: 1)
|