@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/cli.js +31 -137
  2. package/config/parameter-schema-v2.json +2065 -0
  3. package/package.json +6 -3
  4. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  5. package/servers/lib/catalogs/models.json +182 -26
  6. package/src/app.js +6 -389
  7. package/src/lib/bootstrap-command-handler.js +75 -1078
  8. package/src/lib/bootstrap-profile-manager.js +634 -0
  9. package/src/lib/bootstrap-provisioners.js +421 -0
  10. package/src/lib/config-loader.js +405 -0
  11. package/src/lib/config-manager.js +59 -1668
  12. package/src/lib/config-mcp-client.js +118 -0
  13. package/src/lib/config-validator.js +634 -0
  14. package/src/lib/cuda-resolver.js +140 -0
  15. package/src/lib/e2e-catalog-validator.js +251 -3
  16. package/src/lib/e2e-ci-recorder.js +103 -0
  17. package/src/lib/generated/cli-options.js +471 -0
  18. package/src/lib/generated/parameter-matrix.js +671 -0
  19. package/src/lib/generated/validation-rules.js +202 -0
  20. package/src/lib/marketplace-flow.js +276 -0
  21. package/src/lib/mcp-query-runner.js +768 -0
  22. package/src/lib/parameter-schema-validator.js +62 -18
  23. package/src/lib/prompt-runner.js +41 -1504
  24. package/src/lib/prompts/feature-prompts.js +172 -0
  25. package/src/lib/prompts/index.js +48 -0
  26. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  27. package/src/lib/prompts/model-prompts.js +552 -0
  28. package/src/lib/prompts/project-prompts.js +70 -0
  29. package/src/lib/prompts.js +2 -1446
  30. package/src/lib/registry-command-handler.js +135 -3
  31. package/src/lib/secrets-prompt-runner.js +251 -0
  32. package/src/lib/template-variable-resolver.js +398 -0
  33. package/templates/code/serve +5 -134
  34. package/templates/code/serve.d/lmi.ejs +19 -0
  35. package/templates/code/serve.d/sglang.ejs +47 -0
  36. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  37. package/templates/code/serve.d/vllm.ejs +48 -0
  38. package/templates/do/clean +1 -1387
  39. package/templates/do/clean.d/async-inference.ejs +508 -0
  40. package/templates/do/clean.d/batch-transform.ejs +512 -0
  41. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  42. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  43. package/templates/do/deploy +1 -1766
  44. package/templates/do/deploy.d/async-inference.ejs +501 -0
  45. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  46. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  47. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  48. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,471 @@
1
+ // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
2
+ // Source: config/parameter-schema-v2.json
3
+ // Generated: 2026-05-23T12:02:19.426Z
4
+
5
+ /**
6
+ * CLI option definitions derived from parameter-schema-v2.json.
7
+ * Each entry can be registered with Commander via:
8
+ * new Option(entry.flag, entry.description)
9
+ */
10
+ export const cliOptions = [
11
+ {
12
+ "flag": "--project-name <name>",
13
+ "description": "Name for the generated project"
14
+ },
15
+ {
16
+ "flag": "--deployment-config <config>",
17
+ "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
18
+ "choices": [
19
+ "http-flask",
20
+ "http-fastapi",
21
+ "transformers-vllm",
22
+ "transformers-sglang",
23
+ "transformers-tensorrt-llm",
24
+ "transformers-lmi",
25
+ "transformers-djl",
26
+ "triton-fil",
27
+ "triton-onnxruntime",
28
+ "triton-tensorflow",
29
+ "triton-pytorch",
30
+ "triton-vllm",
31
+ "triton-tensorrtllm",
32
+ "triton-python",
33
+ "diffusors-vllm-omni",
34
+ "marketplace"
35
+ ]
36
+ },
37
+ {
38
+ "flag": "--model-name <name>",
39
+ "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
40
+ },
41
+ {
42
+ "flag": "--deployment-target <target>",
43
+ "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
44
+ "choices": [
45
+ "managed-inference",
46
+ "realtime-inference",
47
+ "async-inference",
48
+ "batch-transform",
49
+ "hyperpod-eks"
50
+ ],
51
+ "defaultValue": "realtime-inference"
52
+ },
53
+ {
54
+ "flag": "--instance-type <type>",
55
+ "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
56
+ },
57
+ {
58
+ "flag": "--ic-gpu-count <n>",
59
+ "description": "GPUs allocated to the inference component"
60
+ },
61
+ {
62
+ "flag": "--ic-copy-count <n>",
63
+ "description": "Number of inference component copies",
64
+ "defaultValue": 1
65
+ },
66
+ {
67
+ "flag": "--ic-memory-size <mb>",
68
+ "description": "Memory in MB for the inference component",
69
+ "defaultValue": 1024
70
+ },
71
+ {
72
+ "flag": "--enable-lora",
73
+ "description": "Enable LoRA adapter serving",
74
+ "defaultValue": false
75
+ },
76
+ {
77
+ "flag": "--max-loras <n>",
78
+ "description": "Maximum concurrent LoRA adapters in GPU memory",
79
+ "defaultValue": 30
80
+ },
81
+ {
82
+ "flag": "--max-lora-rank <n>",
83
+ "description": "Maximum LoRA rank",
84
+ "defaultValue": 64
85
+ },
86
+ {
87
+ "flag": "--include-benchmark",
88
+ "description": "Include SageMaker AI Benchmarking",
89
+ "defaultValue": false
90
+ },
91
+ {
92
+ "flag": "--benchmark-concurrency <n>",
93
+ "description": "Benchmark concurrent requests",
94
+ "defaultValue": 10
95
+ },
96
+ {
97
+ "flag": "--benchmark-input-tokens <n>",
98
+ "description": "Benchmark mean input tokens",
99
+ "defaultValue": 550
100
+ },
101
+ {
102
+ "flag": "--benchmark-output-tokens <n>",
103
+ "description": "Benchmark mean output tokens",
104
+ "defaultValue": 150
105
+ },
106
+ {
107
+ "flag": "--benchmark-streaming",
108
+ "description": "Enable streaming in benchmark",
109
+ "defaultValue": true
110
+ },
111
+ {
112
+ "flag": "--benchmark-request-count <n>",
113
+ "description": "Total number of benchmark requests to send"
114
+ },
115
+ {
116
+ "flag": "--benchmark-s3-output-path <path>",
117
+ "description": "S3 URI for benchmark results output"
118
+ },
119
+ {
120
+ "flag": "--skip-prompts",
121
+ "description": "Skip interactive prompts and use configuration from other sources",
122
+ "defaultValue": false
123
+ },
124
+ {
125
+ "flag": "--auto-prompt",
126
+ "description": "Fill defaults, prompt only for missing required values",
127
+ "defaultValue": false
128
+ },
129
+ {
130
+ "flag": "--config <path>",
131
+ "description": "Path to JSON configuration file"
132
+ },
133
+ {
134
+ "flag": "--project-dir <dir>",
135
+ "description": "Output directory path"
136
+ },
137
+ {
138
+ "flag": "--force",
139
+ "description": "Overwrite existing output directory without prompting",
140
+ "defaultValue": false
141
+ },
142
+ {
143
+ "flag": "--framework <framework>",
144
+ "description": "ML framework",
145
+ "choices": [
146
+ "sklearn",
147
+ "xgboost",
148
+ "tensorflow",
149
+ "transformers"
150
+ ],
151
+ "hidden": true
152
+ },
153
+ {
154
+ "flag": "--model-format <format>",
155
+ "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
156
+ "choices": [
157
+ "pkl",
158
+ "joblib",
159
+ "json",
160
+ "model",
161
+ "ubj",
162
+ "keras",
163
+ "h5",
164
+ "SavedModel"
165
+ ]
166
+ },
167
+ {
168
+ "flag": "--model-server <server>",
169
+ "description": "Model server",
170
+ "choices": [
171
+ "flask",
172
+ "fastapi",
173
+ "vllm",
174
+ "sglang"
175
+ ],
176
+ "hidden": true
177
+ },
178
+ {
179
+ "flag": "--base-image <image>",
180
+ "description": "Base container image for Dockerfile"
181
+ },
182
+ {
183
+ "flag": "--region <region>",
184
+ "description": "AWS region",
185
+ "defaultValue": "us-east-1"
186
+ },
187
+ {
188
+ "flag": "--role-arn <arn>",
189
+ "description": "IAM role ARN for SageMaker execution"
190
+ },
191
+ {
192
+ "flag": "--build-target <target>",
193
+ "description": "Build target (codebuild)",
194
+ "choices": [
195
+ "codebuild"
196
+ ]
197
+ },
198
+ {
199
+ "flag": "--codebuild-compute-type <type>",
200
+ "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
201
+ "choices": [
202
+ "SMALL",
203
+ "MEDIUM",
204
+ "LARGE",
205
+ "BUILD_GENERAL1_SMALL",
206
+ "BUILD_GENERAL1_MEDIUM",
207
+ "BUILD_GENERAL1_LARGE",
208
+ "BUILD_GENERAL1_2XLARGE"
209
+ ],
210
+ "defaultValue": "BUILD_GENERAL1_LARGE"
211
+ },
212
+ {
213
+ "flag": "--hf-token <token>",
214
+ "description": "HuggingFace token (or $HF_TOKEN for env var reference)"
215
+ },
216
+ {
217
+ "flag": "--hf-token-arn <arn>",
218
+ "description": "HuggingFace token ARN from Secrets Manager"
219
+ },
220
+ {
221
+ "flag": "--ngc-token <token>",
222
+ "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
223
+ },
224
+ {
225
+ "flag": "--ngc-token-arn <arn>",
226
+ "description": "NVIDIA NGC token ARN from Secrets Manager"
227
+ },
228
+ {
229
+ "flag": "--endpoint-initial-instance-count <n>",
230
+ "description": "Number of instances for the endpoint",
231
+ "defaultValue": 1
232
+ },
233
+ {
234
+ "flag": "--endpoint-data-capture-percent <pct>",
235
+ "description": "Data capture percentage for monitoring, 0-100",
236
+ "defaultValue": 0
237
+ },
238
+ {
239
+ "flag": "--endpoint-variant-name <name>",
240
+ "description": "Production variant name",
241
+ "defaultValue": "AllTraffic"
242
+ },
243
+ {
244
+ "flag": "--endpoint-volume-size <gb>",
245
+ "description": "ML storage volume size in GB"
246
+ },
247
+ {
248
+ "flag": "--ic-cpu-count <n>",
249
+ "description": "vCPUs allocated to the inference component"
250
+ },
251
+ {
252
+ "flag": "--ic-model-weight <weight>",
253
+ "description": "Traffic routing weight, 0-1",
254
+ "defaultValue": 1
255
+ },
256
+ {
257
+ "flag": "--async-s3-output-path <path>",
258
+ "description": "S3 output path for async results"
259
+ },
260
+ {
261
+ "flag": "--async-sns-success-topic <arn>",
262
+ "description": "SNS topic ARN for success notifications"
263
+ },
264
+ {
265
+ "flag": "--async-sns-error-topic <arn>",
266
+ "description": "SNS topic ARN for error notifications"
267
+ },
268
+ {
269
+ "flag": "--async-max-concurrent <n>",
270
+ "description": "Max concurrent invocations per instance",
271
+ "defaultValue": 1
272
+ },
273
+ {
274
+ "flag": "--batch-input-path <path>",
275
+ "description": "S3 input path for batch data"
276
+ },
277
+ {
278
+ "flag": "--batch-output-path <path>",
279
+ "description": "S3 output path for batch results"
280
+ },
281
+ {
282
+ "flag": "--batch-instance-count <n>",
283
+ "description": "Number of batch instances",
284
+ "defaultValue": 1
285
+ },
286
+ {
287
+ "flag": "--batch-split-type <type>",
288
+ "description": "Input split type: Line, RecordIO, None",
289
+ "choices": [
290
+ "Line",
291
+ "RecordIO",
292
+ "None"
293
+ ],
294
+ "defaultValue": "Line"
295
+ },
296
+ {
297
+ "flag": "--batch-strategy <strategy>",
298
+ "description": "Batch strategy: MultiRecord, SingleRecord",
299
+ "choices": [
300
+ "MultiRecord",
301
+ "SingleRecord"
302
+ ],
303
+ "defaultValue": "MultiRecord"
304
+ },
305
+ {
306
+ "flag": "--batch-join-source <source>",
307
+ "description": "Join source: Input, None",
308
+ "choices": [
309
+ "Input",
310
+ "None"
311
+ ],
312
+ "defaultValue": "None"
313
+ },
314
+ {
315
+ "flag": "--batch-max-concurrent <n>",
316
+ "description": "Max concurrent transforms per instance",
317
+ "defaultValue": 1
318
+ },
319
+ {
320
+ "flag": "--batch-max-payload <mb>",
321
+ "description": "Max payload size in MB, 0-100",
322
+ "defaultValue": 6
323
+ },
324
+ {
325
+ "flag": "--hyperpod-cluster <name>",
326
+ "description": "HyperPod EKS cluster name"
327
+ },
328
+ {
329
+ "flag": "--hyperpod-namespace <ns>",
330
+ "description": "Kubernetes namespace",
331
+ "defaultValue": "default"
332
+ },
333
+ {
334
+ "flag": "--hyperpod-replicas <count>",
335
+ "description": "Number of replicas",
336
+ "defaultValue": 1
337
+ },
338
+ {
339
+ "flag": "--fsx-volume-handle <handle>",
340
+ "description": "FSx for Lustre volume handle"
341
+ },
342
+ {
343
+ "flag": "--model-env <KEY=VALUE>",
344
+ "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
345
+ "repeatable": true
346
+ },
347
+ {
348
+ "flag": "--server-env <KEY=VALUE>",
349
+ "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
350
+ "repeatable": true
351
+ },
352
+ {
353
+ "flag": "--include-sample",
354
+ "description": "Include sample model code",
355
+ "defaultValue": true
356
+ },
357
+ {
358
+ "flag": "--include-testing",
359
+ "description": "Include test suite",
360
+ "defaultValue": true
361
+ },
362
+ {
363
+ "flag": "--test-types <types>",
364
+ "description": "Comma-separated test types"
365
+ },
366
+ {
367
+ "flag": "--smart",
368
+ "description": "Enable smart mode (live AWS API calls for MCP servers)",
369
+ "defaultValue": false
370
+ },
371
+ {
372
+ "flag": "--discover",
373
+ "description": "Enable discovery mode for MCP servers",
374
+ "defaultValue": false
375
+ },
376
+ {
377
+ "flag": "--no-validate",
378
+ "description": "Skip parameter validation",
379
+ "defaultValue": false
380
+ },
381
+ {
382
+ "flag": "--validate-env-vars",
383
+ "description": "Validate environment variables against schema",
384
+ "defaultValue": false
385
+ },
386
+ {
387
+ "flag": "--validate-with-docker",
388
+ "description": "Validate Dockerfile builds successfully",
389
+ "defaultValue": false
390
+ },
391
+ {
392
+ "flag": "--offline",
393
+ "description": "Run in offline mode (no network calls)",
394
+ "defaultValue": false
395
+ }
396
+ ];
397
+
398
+ /**
399
+ * Maps CLI flags to help section groups.
400
+ * Used by the custom help formatter in bin/cli.js.
401
+ */
402
+ export const helpGroups = {
403
+ "--project-name": "general",
404
+ "--deployment-config": "model",
405
+ "--model-name": "model",
406
+ "--deployment-target": "infra",
407
+ "--instance-type": "infra",
408
+ "--ic-gpu-count": "ic",
409
+ "--ic-copy-count": "ic",
410
+ "--ic-memory-size": "ic",
411
+ "--enable-lora": "features",
412
+ "--max-loras": "features",
413
+ "--max-lora-rank": "features",
414
+ "--include-benchmark": "general",
415
+ "--benchmark-concurrency": "general",
416
+ "--benchmark-input-tokens": "general",
417
+ "--benchmark-output-tokens": "general",
418
+ "--benchmark-streaming": "general",
419
+ "--benchmark-request-count": "general",
420
+ "--benchmark-s3-output-path": "general",
421
+ "--skip-prompts": "general",
422
+ "--auto-prompt": "general",
423
+ "--config": "general",
424
+ "--project-dir": "general",
425
+ "--force": "general",
426
+ "--framework": "model",
427
+ "--model-format": "model",
428
+ "--model-server": "model",
429
+ "--base-image": "infra",
430
+ "--region": "infra",
431
+ "--role-arn": "infra",
432
+ "--build-target": "infra",
433
+ "--codebuild-compute-type": "infra",
434
+ "--hf-token": "auth",
435
+ "--hf-token-arn": "auth",
436
+ "--ngc-token": "auth",
437
+ "--ngc-token-arn": "auth",
438
+ "--endpoint-initial-instance-count": "endpoint",
439
+ "--endpoint-data-capture-percent": "endpoint",
440
+ "--endpoint-variant-name": "endpoint",
441
+ "--endpoint-volume-size": "endpoint",
442
+ "--ic-cpu-count": "ic",
443
+ "--ic-model-weight": "ic",
444
+ "--async-s3-output-path": "async",
445
+ "--async-sns-success-topic": "async",
446
+ "--async-sns-error-topic": "async",
447
+ "--async-max-concurrent": "async",
448
+ "--batch-input-path": "batch",
449
+ "--batch-output-path": "batch",
450
+ "--batch-instance-count": "batch",
451
+ "--batch-split-type": "batch",
452
+ "--batch-strategy": "batch",
453
+ "--batch-join-source": "batch",
454
+ "--batch-max-concurrent": "batch",
455
+ "--batch-max-payload": "batch",
456
+ "--hyperpod-cluster": "hyperpod",
457
+ "--hyperpod-namespace": "hyperpod",
458
+ "--hyperpod-replicas": "hyperpod",
459
+ "--fsx-volume-handle": "hyperpod",
460
+ "--model-env": "env",
461
+ "--server-env": "env",
462
+ "--include-sample": "features",
463
+ "--include-testing": "features",
464
+ "--test-types": "features",
465
+ "--smart": "mcp",
466
+ "--discover": "mcp",
467
+ "--no-validate": "validation",
468
+ "--validate-env-vars": "validation",
469
+ "--validate-with-docker": "validation",
470
+ "--offline": "validation"
471
+ };