@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +33 -22
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -67
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +166 -153
  23. package/servers/instance-sizer/lib/instance-ranker.js +120 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/instances.json +27 -0
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/custom-validators.js +13 -13
  31. package/servers/lib/dynamic-resolver.js +4 -4
  32. package/servers/marketplace-picker/index.js +342 -0
  33. package/servers/marketplace-picker/manifest.json +14 -0
  34. package/servers/marketplace-picker/package.json +18 -0
  35. package/servers/model-picker/index.js +382 -382
  36. package/servers/region-picker/index.js +56 -56
  37. package/servers/workload-picker/LICENSE +202 -0
  38. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  39. package/servers/workload-picker/index.js +171 -0
  40. package/servers/workload-picker/manifest.json +16 -0
  41. package/servers/workload-picker/package.json +16 -0
  42. package/src/app.js +12 -3
  43. package/src/lib/bootstrap-command-handler.js +609 -15
  44. package/src/lib/bootstrap-config.js +36 -0
  45. package/src/lib/bootstrap-profile-manager.js +48 -41
  46. package/src/lib/ci-register-helpers.js +74 -0
  47. package/src/lib/config-loader.js +3 -0
  48. package/src/lib/config-manager.js +7 -0
  49. package/src/lib/config-validator.js +1 -1
  50. package/src/lib/cuda-resolver.js +17 -8
  51. package/src/lib/generated/cli-options.js +319 -314
  52. package/src/lib/generated/parameter-matrix.js +672 -661
  53. package/src/lib/generated/validation-rules.js +76 -72
  54. package/src/lib/path-prover-brain.js +664 -0
  55. package/src/lib/prompts/infrastructure-prompts.js +2 -2
  56. package/src/lib/prompts/model-prompts.js +6 -0
  57. package/src/lib/prompts/project-prompts.js +12 -0
  58. package/src/lib/secrets-prompt-runner.js +4 -0
  59. package/src/lib/template-manager.js +1 -1
  60. package/src/lib/template-variable-resolver.js +87 -1
  61. package/src/lib/tune-catalog-validator.js +37 -4
  62. package/templates/Dockerfile +9 -0
  63. package/templates/code/adapter_sidecar.py +444 -0
  64. package/templates/code/serve +6 -0
  65. package/templates/code/serve.d/vllm.ejs +1 -1
  66. package/templates/do/.benchmark_writer.py +1476 -0
  67. package/templates/do/.tune_helper.py +982 -57
  68. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  69. package/templates/do/adapter +154 -0
  70. package/templates/do/benchmark +639 -85
  71. package/templates/do/build +5 -0
  72. package/templates/do/clean.d/async-inference.ejs +5 -0
  73. package/templates/do/clean.d/batch-transform.ejs +5 -0
  74. package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
  75. package/templates/do/clean.d/managed-inference.ejs +5 -0
  76. package/templates/do/config +115 -45
  77. package/templates/do/deploy.d/async-inference.ejs +30 -3
  78. package/templates/do/deploy.d/batch-transform.ejs +29 -3
  79. package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
  80. package/templates/do/deploy.d/managed-inference.ejs +216 -14
  81. package/templates/do/lib/endpoint-config.sh +1 -1
  82. package/templates/do/lib/profile.sh +44 -0
  83. package/templates/do/optimize +106 -37
  84. package/templates/do/push +5 -0
  85. package/templates/do/register +94 -0
  86. package/templates/do/stage +567 -0
  87. package/templates/do/submit +7 -0
  88. package/templates/do/test +14 -0
  89. package/templates/do/tune +382 -59
  90. package/templates/do/validate +44 -4
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-05-23T12:02:19.426Z
3
+ // Generated: 2026-06-12T22:03:00.429Z
4
4
 
5
5
  /**
6
6
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -9,389 +9,393 @@
9
9
  */
10
10
  export const cliOptions = [
11
11
  {
12
- "flag": "--project-name <name>",
13
- "description": "Name for the generated project"
14
- },
15
- {
16
- "flag": "--deployment-config <config>",
17
- "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
18
- "choices": [
19
- "http-flask",
20
- "http-fastapi",
21
- "transformers-vllm",
22
- "transformers-sglang",
23
- "transformers-tensorrt-llm",
24
- "transformers-lmi",
25
- "transformers-djl",
26
- "triton-fil",
27
- "triton-onnxruntime",
28
- "triton-tensorflow",
29
- "triton-pytorch",
30
- "triton-vllm",
31
- "triton-tensorrtllm",
32
- "triton-python",
33
- "diffusors-vllm-omni",
34
- "marketplace"
12
+ 'flag': '--project-name <name>',
13
+ 'description': 'Name for the generated project'
14
+ },
15
+ {
16
+ 'flag': '--deployment-config <config>',
17
+ 'description': 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)',
18
+ 'choices': [
19
+ 'http-flask',
20
+ 'http-fastapi',
21
+ 'transformers-vllm',
22
+ 'transformers-sglang',
23
+ 'transformers-tensorrt-llm',
24
+ 'transformers-lmi',
25
+ 'transformers-djl',
26
+ 'triton-fil',
27
+ 'triton-onnxruntime',
28
+ 'triton-tensorflow',
29
+ 'triton-pytorch',
30
+ 'triton-vllm',
31
+ 'triton-tensorrtllm',
32
+ 'triton-python',
33
+ 'diffusors-vllm-omni',
34
+ 'marketplace'
35
35
  ]
36
36
  },
37
37
  {
38
- "flag": "--model-name <name>",
39
- "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
38
+ 'flag': '--model-name <name>',
39
+ 'description': 'Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)'
40
40
  },
41
41
  {
42
- "flag": "--deployment-target <target>",
43
- "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
44
- "choices": [
45
- "managed-inference",
46
- "realtime-inference",
47
- "async-inference",
48
- "batch-transform",
49
- "hyperpod-eks"
42
+ 'flag': '--deployment-target <target>',
43
+ 'description': 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)',
44
+ 'choices': [
45
+ 'managed-inference',
46
+ 'realtime-inference',
47
+ 'async-inference',
48
+ 'batch-transform',
49
+ 'hyperpod-eks'
50
50
  ],
51
- "defaultValue": "realtime-inference"
51
+ 'defaultValue': 'realtime-inference'
52
52
  },
53
53
  {
54
- "flag": "--instance-type <type>",
55
- "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
54
+ 'flag': '--instance-type <type>',
55
+ 'description': 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'
56
56
  },
57
57
  {
58
- "flag": "--ic-gpu-count <n>",
59
- "description": "GPUs allocated to the inference component"
58
+ 'flag': '--ic-gpu-count <n>',
59
+ 'description': 'GPUs allocated to the inference component'
60
60
  },
61
61
  {
62
- "flag": "--ic-copy-count <n>",
63
- "description": "Number of inference component copies",
64
- "defaultValue": 1
62
+ 'flag': '--ic-copy-count <n>',
63
+ 'description': 'Number of inference component copies',
64
+ 'defaultValue': 1
65
65
  },
66
66
  {
67
- "flag": "--ic-memory-size <mb>",
68
- "description": "Memory in MB for the inference component",
69
- "defaultValue": 1024
67
+ 'flag': '--ic-memory-size <mb>',
68
+ 'description': 'Memory in MB for the inference component'
70
69
  },
71
70
  {
72
- "flag": "--enable-lora",
73
- "description": "Enable LoRA adapter serving",
74
- "defaultValue": false
71
+ 'flag': '--enable-lora',
72
+ 'description': 'Enable LoRA adapter serving',
73
+ 'defaultValue': false
75
74
  },
76
75
  {
77
- "flag": "--max-loras <n>",
78
- "description": "Maximum concurrent LoRA adapters in GPU memory",
79
- "defaultValue": 30
76
+ 'flag': '--max-loras <n>',
77
+ 'description': 'Maximum concurrent LoRA adapters in GPU memory',
78
+ 'defaultValue': 30
80
79
  },
81
80
  {
82
- "flag": "--max-lora-rank <n>",
83
- "description": "Maximum LoRA rank",
84
- "defaultValue": 64
81
+ 'flag': '--max-lora-rank <n>',
82
+ 'description': 'Maximum LoRA rank',
83
+ 'defaultValue': 64
85
84
  },
86
85
  {
87
- "flag": "--include-benchmark",
88
- "description": "Include SageMaker AI Benchmarking",
89
- "defaultValue": false
86
+ 'flag': '--include-benchmark',
87
+ 'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
88
+ 'defaultValue': false
90
89
  },
91
90
  {
92
- "flag": "--benchmark-concurrency <n>",
93
- "description": "Benchmark concurrent requests",
94
- "defaultValue": 10
91
+ 'flag': '--benchmark-concurrency <n>',
92
+ 'description': 'Benchmark concurrent requests',
93
+ 'defaultValue': 10
95
94
  },
96
95
  {
97
- "flag": "--benchmark-input-tokens <n>",
98
- "description": "Benchmark mean input tokens",
99
- "defaultValue": 550
96
+ 'flag': '--benchmark-input-tokens <n>',
97
+ 'description': 'Benchmark mean input tokens',
98
+ 'defaultValue': 550
100
99
  },
101
100
  {
102
- "flag": "--benchmark-output-tokens <n>",
103
- "description": "Benchmark mean output tokens",
104
- "defaultValue": 150
101
+ 'flag': '--benchmark-output-tokens <n>',
102
+ 'description': 'Benchmark mean output tokens',
103
+ 'defaultValue': 150
105
104
  },
106
105
  {
107
- "flag": "--benchmark-streaming",
108
- "description": "Enable streaming in benchmark",
109
- "defaultValue": true
106
+ 'flag': '--benchmark-streaming',
107
+ 'description': 'Enable streaming in benchmark',
108
+ 'defaultValue': true
110
109
  },
111
110
  {
112
- "flag": "--benchmark-request-count <n>",
113
- "description": "Total number of benchmark requests to send"
111
+ 'flag': '--benchmark-request-count <n>',
112
+ 'description': 'Total number of benchmark requests to send'
114
113
  },
115
114
  {
116
- "flag": "--benchmark-s3-output-path <path>",
117
- "description": "S3 URI for benchmark results output"
115
+ 'flag': '--benchmark-s3-output-path <path>',
116
+ 'description': 'S3 URI for benchmark results output'
118
117
  },
119
118
  {
120
- "flag": "--skip-prompts",
121
- "description": "Skip interactive prompts and use configuration from other sources",
122
- "defaultValue": false
119
+ 'flag': '--skip-prompts',
120
+ 'description': 'Skip interactive prompts and use configuration from other sources',
121
+ 'defaultValue': false
123
122
  },
124
123
  {
125
- "flag": "--auto-prompt",
126
- "description": "Fill defaults, prompt only for missing required values",
127
- "defaultValue": false
124
+ 'flag': '--auto-prompt',
125
+ 'description': 'Fill defaults, prompt only for missing required values',
126
+ 'defaultValue': false
128
127
  },
129
128
  {
130
- "flag": "--config <path>",
131
- "description": "Path to JSON configuration file"
129
+ 'flag': '--config <path>',
130
+ 'description': 'Path to JSON configuration file'
132
131
  },
133
132
  {
134
- "flag": "--project-dir <dir>",
135
- "description": "Output directory path"
133
+ 'flag': '--project-dir <dir>',
134
+ 'description': 'Output directory path'
136
135
  },
137
136
  {
138
- "flag": "--force",
139
- "description": "Overwrite existing output directory without prompting",
140
- "defaultValue": false
137
+ 'flag': '--force',
138
+ 'description': 'Overwrite existing output directory without prompting',
139
+ 'defaultValue': false
141
140
  },
142
141
  {
143
- "flag": "--framework <framework>",
144
- "description": "ML framework",
145
- "choices": [
146
- "sklearn",
147
- "xgboost",
148
- "tensorflow",
149
- "transformers"
142
+ 'flag': '--framework <framework>',
143
+ 'description': 'ML framework',
144
+ 'choices': [
145
+ 'sklearn',
146
+ 'xgboost',
147
+ 'tensorflow',
148
+ 'transformers'
150
149
  ],
151
- "hidden": true
152
- },
153
- {
154
- "flag": "--model-format <format>",
155
- "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
156
- "choices": [
157
- "pkl",
158
- "joblib",
159
- "json",
160
- "model",
161
- "ubj",
162
- "keras",
163
- "h5",
164
- "SavedModel"
150
+ 'hidden': true
151
+ },
152
+ {
153
+ 'flag': '--model-format <format>',
154
+ 'description': 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)',
155
+ 'choices': [
156
+ 'pkl',
157
+ 'joblib',
158
+ 'json',
159
+ 'model',
160
+ 'ubj',
161
+ 'keras',
162
+ 'h5',
163
+ 'SavedModel'
165
164
  ]
166
165
  },
167
166
  {
168
- "flag": "--model-server <server>",
169
- "description": "Model server",
170
- "choices": [
171
- "flask",
172
- "fastapi",
173
- "vllm",
174
- "sglang"
167
+ 'flag': '--model-server <server>',
168
+ 'description': 'Model server',
169
+ 'choices': [
170
+ 'flask',
171
+ 'fastapi',
172
+ 'vllm',
173
+ 'sglang'
175
174
  ],
176
- "hidden": true
175
+ 'hidden': true
177
176
  },
178
177
  {
179
- "flag": "--base-image <image>",
180
- "description": "Base container image for Dockerfile"
178
+ 'flag': '--base-image <image>',
179
+ 'description': 'Base container image for Dockerfile'
181
180
  },
182
181
  {
183
- "flag": "--region <region>",
184
- "description": "AWS region",
185
- "defaultValue": "us-east-1"
182
+ 'flag': '--region <region>',
183
+ 'description': 'AWS region',
184
+ 'defaultValue': 'us-east-1'
186
185
  },
187
186
  {
188
- "flag": "--role-arn <arn>",
189
- "description": "IAM role ARN for SageMaker execution"
187
+ 'flag': '--role-arn <arn>',
188
+ 'description': 'IAM role ARN for SageMaker execution'
190
189
  },
191
190
  {
192
- "flag": "--build-target <target>",
193
- "description": "Build target (codebuild)",
194
- "choices": [
195
- "codebuild"
196
- ]
191
+ 'flag': '--build-target <target>',
192
+ 'description': 'Build target (codebuild)',
193
+ 'choices': [
194
+ 'codebuild'
195
+ ],
196
+ 'defaultValue': 'codebuild'
197
+ },
198
+ {
199
+ 'flag': '--codebuild-compute-type <type>',
200
+ 'description': 'CodeBuild compute type (SMALL, MEDIUM, LARGE)',
201
+ 'choices': [
202
+ 'SMALL',
203
+ 'MEDIUM',
204
+ 'LARGE',
205
+ 'BUILD_GENERAL1_SMALL',
206
+ 'BUILD_GENERAL1_MEDIUM',
207
+ 'BUILD_GENERAL1_LARGE',
208
+ 'BUILD_GENERAL1_2XLARGE'
209
+ ],
210
+ 'defaultValue': 'BUILD_GENERAL1_LARGE'
197
211
  },
198
212
  {
199
- "flag": "--codebuild-compute-type <type>",
200
- "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
201
- "choices": [
202
- "SMALL",
203
- "MEDIUM",
204
- "LARGE",
205
- "BUILD_GENERAL1_SMALL",
206
- "BUILD_GENERAL1_MEDIUM",
207
- "BUILD_GENERAL1_LARGE",
208
- "BUILD_GENERAL1_2XLARGE"
209
- ],
210
- "defaultValue": "BUILD_GENERAL1_LARGE"
213
+ 'flag': '--hf-token <token>',
214
+ 'description': 'HuggingFace token (or $HF_TOKEN for env var reference)'
211
215
  },
212
216
  {
213
- "flag": "--hf-token <token>",
214
- "description": "HuggingFace token (or $HF_TOKEN for env var reference)"
217
+ 'flag': '--hf-token-arn <arn>',
218
+ 'description': 'HuggingFace token ARN from Secrets Manager'
215
219
  },
216
220
  {
217
- "flag": "--hf-token-arn <arn>",
218
- "description": "HuggingFace token ARN from Secrets Manager"
221
+ 'flag': '--ngc-token <token>',
222
+ 'description': 'NVIDIA NGC token (or $NGC_API_KEY for env var reference)'
219
223
  },
220
224
  {
221
- "flag": "--ngc-token <token>",
222
- "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
225
+ 'flag': '--ngc-token-arn <arn>',
226
+ 'description': 'NVIDIA NGC token ARN from Secrets Manager'
223
227
  },
224
228
  {
225
- "flag": "--ngc-token-arn <arn>",
226
- "description": "NVIDIA NGC token ARN from Secrets Manager"
229
+ 'flag': '--endpoint-initial-instance-count <n>',
230
+ 'description': 'Number of instances for the endpoint',
231
+ 'defaultValue': 1
227
232
  },
228
233
  {
229
- "flag": "--endpoint-initial-instance-count <n>",
230
- "description": "Number of instances for the endpoint",
231
- "defaultValue": 1
234
+ 'flag': '--endpoint-data-capture-percent <pct>',
235
+ 'description': 'Data capture percentage for monitoring, 0-100',
236
+ 'defaultValue': 0
232
237
  },
233
238
  {
234
- "flag": "--endpoint-data-capture-percent <pct>",
235
- "description": "Data capture percentage for monitoring, 0-100",
236
- "defaultValue": 0
239
+ 'flag': '--endpoint-variant-name <name>',
240
+ 'description': 'Production variant name',
241
+ 'defaultValue': 'AllTraffic'
237
242
  },
238
243
  {
239
- "flag": "--endpoint-variant-name <name>",
240
- "description": "Production variant name",
241
- "defaultValue": "AllTraffic"
244
+ 'flag': '--endpoint-volume-size <gb>',
245
+ 'description': 'ML storage volume size in GB'
242
246
  },
243
247
  {
244
- "flag": "--endpoint-volume-size <gb>",
245
- "description": "ML storage volume size in GB"
248
+ 'flag': '--capacity-reservation-arn <arn>',
249
+ 'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
246
250
  },
247
251
  {
248
- "flag": "--ic-cpu-count <n>",
249
- "description": "vCPUs allocated to the inference component"
252
+ 'flag': '--ic-cpu-count <n>',
253
+ 'description': 'vCPUs allocated to the inference component'
250
254
  },
251
255
  {
252
- "flag": "--ic-model-weight <weight>",
253
- "description": "Traffic routing weight, 0-1",
254
- "defaultValue": 1
256
+ 'flag': '--ic-model-weight <weight>',
257
+ 'description': 'Traffic routing weight, 0-1',
258
+ 'defaultValue': 1
255
259
  },
256
260
  {
257
- "flag": "--async-s3-output-path <path>",
258
- "description": "S3 output path for async results"
261
+ 'flag': '--async-s3-output-path <path>',
262
+ 'description': 'S3 output path for async results'
259
263
  },
260
264
  {
261
- "flag": "--async-sns-success-topic <arn>",
262
- "description": "SNS topic ARN for success notifications"
265
+ 'flag': '--async-sns-success-topic <arn>',
266
+ 'description': 'SNS topic ARN for success notifications'
263
267
  },
264
268
  {
265
- "flag": "--async-sns-error-topic <arn>",
266
- "description": "SNS topic ARN for error notifications"
269
+ 'flag': '--async-sns-error-topic <arn>',
270
+ 'description': 'SNS topic ARN for error notifications'
267
271
  },
268
272
  {
269
- "flag": "--async-max-concurrent <n>",
270
- "description": "Max concurrent invocations per instance",
271
- "defaultValue": 1
273
+ 'flag': '--async-max-concurrent <n>',
274
+ 'description': 'Max concurrent invocations per instance',
275
+ 'defaultValue': 1
272
276
  },
273
277
  {
274
- "flag": "--batch-input-path <path>",
275
- "description": "S3 input path for batch data"
278
+ 'flag': '--batch-input-path <path>',
279
+ 'description': 'S3 input path for batch data'
276
280
  },
277
281
  {
278
- "flag": "--batch-output-path <path>",
279
- "description": "S3 output path for batch results"
282
+ 'flag': '--batch-output-path <path>',
283
+ 'description': 'S3 output path for batch results'
280
284
  },
281
285
  {
282
- "flag": "--batch-instance-count <n>",
283
- "description": "Number of batch instances",
284
- "defaultValue": 1
286
+ 'flag': '--batch-instance-count <n>',
287
+ 'description': 'Number of batch instances',
288
+ 'defaultValue': 1
285
289
  },
286
290
  {
287
- "flag": "--batch-split-type <type>",
288
- "description": "Input split type: Line, RecordIO, None",
289
- "choices": [
290
- "Line",
291
- "RecordIO",
292
- "None"
291
+ 'flag': '--batch-split-type <type>',
292
+ 'description': 'Input split type: Line, RecordIO, None',
293
+ 'choices': [
294
+ 'Line',
295
+ 'RecordIO',
296
+ 'None'
293
297
  ],
294
- "defaultValue": "Line"
298
+ 'defaultValue': 'Line'
295
299
  },
296
300
  {
297
- "flag": "--batch-strategy <strategy>",
298
- "description": "Batch strategy: MultiRecord, SingleRecord",
299
- "choices": [
300
- "MultiRecord",
301
- "SingleRecord"
301
+ 'flag': '--batch-strategy <strategy>',
302
+ 'description': 'Batch strategy: MultiRecord, SingleRecord',
303
+ 'choices': [
304
+ 'MultiRecord',
305
+ 'SingleRecord'
302
306
  ],
303
- "defaultValue": "MultiRecord"
307
+ 'defaultValue': 'MultiRecord'
304
308
  },
305
309
  {
306
- "flag": "--batch-join-source <source>",
307
- "description": "Join source: Input, None",
308
- "choices": [
309
- "Input",
310
- "None"
310
+ 'flag': '--batch-join-source <source>',
311
+ 'description': 'Join source: Input, None',
312
+ 'choices': [
313
+ 'Input',
314
+ 'None'
311
315
  ],
312
- "defaultValue": "None"
316
+ 'defaultValue': 'None'
313
317
  },
314
318
  {
315
- "flag": "--batch-max-concurrent <n>",
316
- "description": "Max concurrent transforms per instance",
317
- "defaultValue": 1
319
+ 'flag': '--batch-max-concurrent <n>',
320
+ 'description': 'Max concurrent transforms per instance',
321
+ 'defaultValue': 1
318
322
  },
319
323
  {
320
- "flag": "--batch-max-payload <mb>",
321
- "description": "Max payload size in MB, 0-100",
322
- "defaultValue": 6
324
+ 'flag': '--batch-max-payload <mb>',
325
+ 'description': 'Max payload size in MB, 0-100',
326
+ 'defaultValue': 6
323
327
  },
324
328
  {
325
- "flag": "--hyperpod-cluster <name>",
326
- "description": "HyperPod EKS cluster name"
329
+ 'flag': '--hyperpod-cluster <name>',
330
+ 'description': 'HyperPod EKS cluster name'
327
331
  },
328
332
  {
329
- "flag": "--hyperpod-namespace <ns>",
330
- "description": "Kubernetes namespace",
331
- "defaultValue": "default"
333
+ 'flag': '--hyperpod-namespace <ns>',
334
+ 'description': 'Kubernetes namespace',
335
+ 'defaultValue': 'default'
332
336
  },
333
337
  {
334
- "flag": "--hyperpod-replicas <count>",
335
- "description": "Number of replicas",
336
- "defaultValue": 1
338
+ 'flag': '--hyperpod-replicas <count>',
339
+ 'description': 'Number of replicas',
340
+ 'defaultValue': 1
337
341
  },
338
342
  {
339
- "flag": "--fsx-volume-handle <handle>",
340
- "description": "FSx for Lustre volume handle"
343
+ 'flag': '--fsx-volume-handle <handle>',
344
+ 'description': 'FSx for Lustre volume handle'
341
345
  },
342
346
  {
343
- "flag": "--model-env <KEY=VALUE>",
344
- "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
345
- "repeatable": true
347
+ 'flag': '--model-env <KEY=VALUE>',
348
+ 'description': 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)',
349
+ 'repeatable': true
346
350
  },
347
351
  {
348
- "flag": "--server-env <KEY=VALUE>",
349
- "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
350
- "repeatable": true
352
+ 'flag': '--server-env <KEY=VALUE>',
353
+ 'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
354
+ 'repeatable': true
351
355
  },
352
356
  {
353
- "flag": "--include-sample",
354
- "description": "Include sample model code",
355
- "defaultValue": true
357
+ 'flag': '--include-sample',
358
+ 'description': 'Include sample model code',
359
+ 'defaultValue': true
356
360
  },
357
361
  {
358
- "flag": "--include-testing",
359
- "description": "Include test suite",
360
- "defaultValue": true
362
+ 'flag': '--include-testing',
363
+ 'description': 'Include test suite',
364
+ 'defaultValue': true
361
365
  },
362
366
  {
363
- "flag": "--test-types <types>",
364
- "description": "Comma-separated test types"
367
+ 'flag': '--test-types <types>',
368
+ 'description': 'Comma-separated test types'
365
369
  },
366
370
  {
367
- "flag": "--smart",
368
- "description": "Enable smart mode (live AWS API calls for MCP servers)",
369
- "defaultValue": false
371
+ 'flag': '--smart',
372
+ 'description': 'Enable smart mode (live AWS API calls for MCP servers)',
373
+ 'defaultValue': false
370
374
  },
371
375
  {
372
- "flag": "--discover",
373
- "description": "Enable discovery mode for MCP servers",
374
- "defaultValue": false
376
+ 'flag': '--discover',
377
+ 'description': 'Enable discovery mode for MCP servers',
378
+ 'defaultValue': false
375
379
  },
376
380
  {
377
- "flag": "--no-validate",
378
- "description": "Skip parameter validation",
379
- "defaultValue": false
381
+ 'flag': '--no-validate',
382
+ 'description': 'Skip parameter validation',
383
+ 'defaultValue': false
380
384
  },
381
385
  {
382
- "flag": "--validate-env-vars",
383
- "description": "Validate environment variables against schema",
384
- "defaultValue": false
386
+ 'flag': '--validate-env-vars',
387
+ 'description': 'Validate environment variables against schema',
388
+ 'defaultValue': false
385
389
  },
386
390
  {
387
- "flag": "--validate-with-docker",
388
- "description": "Validate Dockerfile builds successfully",
389
- "defaultValue": false
391
+ 'flag': '--validate-with-docker',
392
+ 'description': 'Validate Dockerfile builds successfully',
393
+ 'defaultValue': false
390
394
  },
391
395
  {
392
- "flag": "--offline",
393
- "description": "Run in offline mode (no network calls)",
394
- "defaultValue": false
396
+ 'flag': '--offline',
397
+ 'description': 'Run in offline mode (no network calls)',
398
+ 'defaultValue': false
395
399
  }
396
400
  ];
397
401
 
@@ -400,72 +404,73 @@ export const cliOptions = [
400
404
  * Used by the custom help formatter in bin/cli.js.
401
405
  */
402
406
  export const helpGroups = {
403
- "--project-name": "general",
404
- "--deployment-config": "model",
405
- "--model-name": "model",
406
- "--deployment-target": "infra",
407
- "--instance-type": "infra",
408
- "--ic-gpu-count": "ic",
409
- "--ic-copy-count": "ic",
410
- "--ic-memory-size": "ic",
411
- "--enable-lora": "features",
412
- "--max-loras": "features",
413
- "--max-lora-rank": "features",
414
- "--include-benchmark": "general",
415
- "--benchmark-concurrency": "general",
416
- "--benchmark-input-tokens": "general",
417
- "--benchmark-output-tokens": "general",
418
- "--benchmark-streaming": "general",
419
- "--benchmark-request-count": "general",
420
- "--benchmark-s3-output-path": "general",
421
- "--skip-prompts": "general",
422
- "--auto-prompt": "general",
423
- "--config": "general",
424
- "--project-dir": "general",
425
- "--force": "general",
426
- "--framework": "model",
427
- "--model-format": "model",
428
- "--model-server": "model",
429
- "--base-image": "infra",
430
- "--region": "infra",
431
- "--role-arn": "infra",
432
- "--build-target": "infra",
433
- "--codebuild-compute-type": "infra",
434
- "--hf-token": "auth",
435
- "--hf-token-arn": "auth",
436
- "--ngc-token": "auth",
437
- "--ngc-token-arn": "auth",
438
- "--endpoint-initial-instance-count": "endpoint",
439
- "--endpoint-data-capture-percent": "endpoint",
440
- "--endpoint-variant-name": "endpoint",
441
- "--endpoint-volume-size": "endpoint",
442
- "--ic-cpu-count": "ic",
443
- "--ic-model-weight": "ic",
444
- "--async-s3-output-path": "async",
445
- "--async-sns-success-topic": "async",
446
- "--async-sns-error-topic": "async",
447
- "--async-max-concurrent": "async",
448
- "--batch-input-path": "batch",
449
- "--batch-output-path": "batch",
450
- "--batch-instance-count": "batch",
451
- "--batch-split-type": "batch",
452
- "--batch-strategy": "batch",
453
- "--batch-join-source": "batch",
454
- "--batch-max-concurrent": "batch",
455
- "--batch-max-payload": "batch",
456
- "--hyperpod-cluster": "hyperpod",
457
- "--hyperpod-namespace": "hyperpod",
458
- "--hyperpod-replicas": "hyperpod",
459
- "--fsx-volume-handle": "hyperpod",
460
- "--model-env": "env",
461
- "--server-env": "env",
462
- "--include-sample": "features",
463
- "--include-testing": "features",
464
- "--test-types": "features",
465
- "--smart": "mcp",
466
- "--discover": "mcp",
467
- "--no-validate": "validation",
468
- "--validate-env-vars": "validation",
469
- "--validate-with-docker": "validation",
470
- "--offline": "validation"
407
+ '--project-name': 'general',
408
+ '--deployment-config': 'model',
409
+ '--model-name': 'model',
410
+ '--deployment-target': 'infra',
411
+ '--instance-type': 'infra',
412
+ '--ic-gpu-count': 'ic',
413
+ '--ic-copy-count': 'ic',
414
+ '--ic-memory-size': 'ic',
415
+ '--enable-lora': 'features',
416
+ '--max-loras': 'features',
417
+ '--max-lora-rank': 'features',
418
+ '--include-benchmark': 'general',
419
+ '--benchmark-concurrency': 'general',
420
+ '--benchmark-input-tokens': 'general',
421
+ '--benchmark-output-tokens': 'general',
422
+ '--benchmark-streaming': 'general',
423
+ '--benchmark-request-count': 'general',
424
+ '--benchmark-s3-output-path': 'general',
425
+ '--skip-prompts': 'general',
426
+ '--auto-prompt': 'general',
427
+ '--config': 'general',
428
+ '--project-dir': 'general',
429
+ '--force': 'general',
430
+ '--framework': 'model',
431
+ '--model-format': 'model',
432
+ '--model-server': 'model',
433
+ '--base-image': 'infra',
434
+ '--region': 'infra',
435
+ '--role-arn': 'infra',
436
+ '--build-target': 'infra',
437
+ '--codebuild-compute-type': 'infra',
438
+ '--hf-token': 'auth',
439
+ '--hf-token-arn': 'auth',
440
+ '--ngc-token': 'auth',
441
+ '--ngc-token-arn': 'auth',
442
+ '--endpoint-initial-instance-count': 'endpoint',
443
+ '--endpoint-data-capture-percent': 'endpoint',
444
+ '--endpoint-variant-name': 'endpoint',
445
+ '--endpoint-volume-size': 'endpoint',
446
+ '--capacity-reservation-arn': 'endpoint',
447
+ '--ic-cpu-count': 'ic',
448
+ '--ic-model-weight': 'ic',
449
+ '--async-s3-output-path': 'async',
450
+ '--async-sns-success-topic': 'async',
451
+ '--async-sns-error-topic': 'async',
452
+ '--async-max-concurrent': 'async',
453
+ '--batch-input-path': 'batch',
454
+ '--batch-output-path': 'batch',
455
+ '--batch-instance-count': 'batch',
456
+ '--batch-split-type': 'batch',
457
+ '--batch-strategy': 'batch',
458
+ '--batch-join-source': 'batch',
459
+ '--batch-max-concurrent': 'batch',
460
+ '--batch-max-payload': 'batch',
461
+ '--hyperpod-cluster': 'hyperpod',
462
+ '--hyperpod-namespace': 'hyperpod',
463
+ '--hyperpod-replicas': 'hyperpod',
464
+ '--fsx-volume-handle': 'hyperpod',
465
+ '--model-env': 'env',
466
+ '--server-env': 'env',
467
+ '--include-sample': 'features',
468
+ '--include-testing': 'features',
469
+ '--test-types': 'features',
470
+ '--smart': 'mcp',
471
+ '--discover': 'mcp',
472
+ '--no-validate': 'validation',
473
+ '--validate-env-vars': 'validation',
474
+ '--validate-with-docker': 'validation',
475
+ '--offline': 'validation'
471
476
  };