@aws/ml-container-creator 0.10.0 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +5 -21
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +51 -66
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/model-servers.json +201 -3
  29. package/servers/lib/custom-validators.js +13 -13
  30. package/servers/lib/dynamic-resolver.js +4 -4
  31. package/servers/marketplace-picker/index.js +342 -0
  32. package/servers/marketplace-picker/manifest.json +14 -0
  33. package/servers/marketplace-picker/package.json +18 -0
  34. package/servers/model-picker/index.js +382 -382
  35. package/servers/region-picker/index.js +56 -56
  36. package/servers/workload-picker/LICENSE +202 -0
  37. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  38. package/servers/workload-picker/index.js +171 -0
  39. package/servers/workload-picker/manifest.json +16 -0
  40. package/servers/workload-picker/package.json +16 -0
  41. package/src/app.js +4 -2
  42. package/src/lib/bootstrap-command-handler.js +579 -14
  43. package/src/lib/bootstrap-config.js +36 -0
  44. package/src/lib/bootstrap-profile-manager.js +48 -41
  45. package/src/lib/ci-register-helpers.js +74 -0
  46. package/src/lib/config-loader.js +3 -0
  47. package/src/lib/config-manager.js +7 -0
  48. package/src/lib/cuda-resolver.js +17 -8
  49. package/src/lib/generated/cli-options.js +315 -315
  50. package/src/lib/generated/parameter-matrix.js +661 -661
  51. package/src/lib/generated/validation-rules.js +71 -71
  52. package/src/lib/path-prover-brain.js +607 -0
  53. package/src/lib/prompts/project-prompts.js +12 -0
  54. package/src/lib/template-variable-resolver.js +25 -1
  55. package/src/lib/tune-catalog-validator.js +37 -4
  56. package/templates/Dockerfile +9 -0
  57. package/templates/code/adapter_sidecar.py +444 -0
  58. package/templates/code/serve +6 -0
  59. package/templates/code/serve.d/vllm.ejs +1 -1
  60. package/templates/do/.benchmark_writer.py +1476 -0
  61. package/templates/do/.tune_helper.py +982 -57
  62. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  63. package/templates/do/adapter +149 -0
  64. package/templates/do/benchmark +639 -85
  65. package/templates/do/config +108 -5
  66. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  67. package/templates/do/optimize +106 -37
  68. package/templates/do/register +89 -0
  69. package/templates/do/test +13 -0
  70. package/templates/do/tune +378 -59
  71. package/templates/do/validate +44 -4
@@ -1,6 +1,6 @@
1
1
  // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
2
2
  // Source: config/parameter-schema-v2.json
3
- // Generated: 2026-05-23T12:02:19.426Z
3
+ // Generated: 2026-06-10T13:42:40.974Z
4
4
 
5
5
  /**
6
6
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -9,389 +9,389 @@
9
9
  */
10
10
  export const cliOptions = [
11
11
  {
12
- "flag": "--project-name <name>",
13
- "description": "Name for the generated project"
14
- },
15
- {
16
- "flag": "--deployment-config <config>",
17
- "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
18
- "choices": [
19
- "http-flask",
20
- "http-fastapi",
21
- "transformers-vllm",
22
- "transformers-sglang",
23
- "transformers-tensorrt-llm",
24
- "transformers-lmi",
25
- "transformers-djl",
26
- "triton-fil",
27
- "triton-onnxruntime",
28
- "triton-tensorflow",
29
- "triton-pytorch",
30
- "triton-vllm",
31
- "triton-tensorrtllm",
32
- "triton-python",
33
- "diffusors-vllm-omni",
34
- "marketplace"
12
+ 'flag': '--project-name <name>',
13
+ 'description': 'Name for the generated project'
14
+ },
15
+ {
16
+ 'flag': '--deployment-config <config>',
17
+ 'description': 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)',
18
+ 'choices': [
19
+ 'http-flask',
20
+ 'http-fastapi',
21
+ 'transformers-vllm',
22
+ 'transformers-sglang',
23
+ 'transformers-tensorrt-llm',
24
+ 'transformers-lmi',
25
+ 'transformers-djl',
26
+ 'triton-fil',
27
+ 'triton-onnxruntime',
28
+ 'triton-tensorflow',
29
+ 'triton-pytorch',
30
+ 'triton-vllm',
31
+ 'triton-tensorrtllm',
32
+ 'triton-python',
33
+ 'diffusors-vllm-omni',
34
+ 'marketplace'
35
35
  ]
36
36
  },
37
37
  {
38
- "flag": "--model-name <name>",
39
- "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
38
+ 'flag': '--model-name <name>',
39
+ 'description': 'Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)'
40
40
  },
41
41
  {
42
- "flag": "--deployment-target <target>",
43
- "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
44
- "choices": [
45
- "managed-inference",
46
- "realtime-inference",
47
- "async-inference",
48
- "batch-transform",
49
- "hyperpod-eks"
42
+ 'flag': '--deployment-target <target>',
43
+ 'description': 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)',
44
+ 'choices': [
45
+ 'managed-inference',
46
+ 'realtime-inference',
47
+ 'async-inference',
48
+ 'batch-transform',
49
+ 'hyperpod-eks'
50
50
  ],
51
- "defaultValue": "realtime-inference"
51
+ 'defaultValue': 'realtime-inference'
52
52
  },
53
53
  {
54
- "flag": "--instance-type <type>",
55
- "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
54
+ 'flag': '--instance-type <type>',
55
+ 'description': 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'
56
56
  },
57
57
  {
58
- "flag": "--ic-gpu-count <n>",
59
- "description": "GPUs allocated to the inference component"
58
+ 'flag': '--ic-gpu-count <n>',
59
+ 'description': 'GPUs allocated to the inference component'
60
60
  },
61
61
  {
62
- "flag": "--ic-copy-count <n>",
63
- "description": "Number of inference component copies",
64
- "defaultValue": 1
62
+ 'flag': '--ic-copy-count <n>',
63
+ 'description': 'Number of inference component copies',
64
+ 'defaultValue': 1
65
65
  },
66
66
  {
67
- "flag": "--ic-memory-size <mb>",
68
- "description": "Memory in MB for the inference component",
69
- "defaultValue": 1024
67
+ 'flag': '--ic-memory-size <mb>',
68
+ 'description': 'Memory in MB for the inference component'
70
69
  },
71
70
  {
72
- "flag": "--enable-lora",
73
- "description": "Enable LoRA adapter serving",
74
- "defaultValue": false
71
+ 'flag': '--enable-lora',
72
+ 'description': 'Enable LoRA adapter serving',
73
+ 'defaultValue': false
75
74
  },
76
75
  {
77
- "flag": "--max-loras <n>",
78
- "description": "Maximum concurrent LoRA adapters in GPU memory",
79
- "defaultValue": 30
76
+ 'flag': '--max-loras <n>',
77
+ 'description': 'Maximum concurrent LoRA adapters in GPU memory',
78
+ 'defaultValue': 30
80
79
  },
81
80
  {
82
- "flag": "--max-lora-rank <n>",
83
- "description": "Maximum LoRA rank",
84
- "defaultValue": 64
81
+ 'flag': '--max-lora-rank <n>',
82
+ 'description': 'Maximum LoRA rank',
83
+ 'defaultValue': 64
85
84
  },
86
85
  {
87
- "flag": "--include-benchmark",
88
- "description": "Include SageMaker AI Benchmarking",
89
- "defaultValue": false
86
+ 'flag': '--include-benchmark',
87
+ 'description': 'Include SageMaker AI Benchmarking',
88
+ 'defaultValue': false
90
89
  },
91
90
  {
92
- "flag": "--benchmark-concurrency <n>",
93
- "description": "Benchmark concurrent requests",
94
- "defaultValue": 10
91
+ 'flag': '--benchmark-concurrency <n>',
92
+ 'description': 'Benchmark concurrent requests',
93
+ 'defaultValue': 10
95
94
  },
96
95
  {
97
- "flag": "--benchmark-input-tokens <n>",
98
- "description": "Benchmark mean input tokens",
99
- "defaultValue": 550
96
+ 'flag': '--benchmark-input-tokens <n>',
97
+ 'description': 'Benchmark mean input tokens',
98
+ 'defaultValue': 550
100
99
  },
101
100
  {
102
- "flag": "--benchmark-output-tokens <n>",
103
- "description": "Benchmark mean output tokens",
104
- "defaultValue": 150
101
+ 'flag': '--benchmark-output-tokens <n>',
102
+ 'description': 'Benchmark mean output tokens',
103
+ 'defaultValue': 150
105
104
  },
106
105
  {
107
- "flag": "--benchmark-streaming",
108
- "description": "Enable streaming in benchmark",
109
- "defaultValue": true
106
+ 'flag': '--benchmark-streaming',
107
+ 'description': 'Enable streaming in benchmark',
108
+ 'defaultValue': true
110
109
  },
111
110
  {
112
- "flag": "--benchmark-request-count <n>",
113
- "description": "Total number of benchmark requests to send"
111
+ 'flag': '--benchmark-request-count <n>',
112
+ 'description': 'Total number of benchmark requests to send'
114
113
  },
115
114
  {
116
- "flag": "--benchmark-s3-output-path <path>",
117
- "description": "S3 URI for benchmark results output"
115
+ 'flag': '--benchmark-s3-output-path <path>',
116
+ 'description': 'S3 URI for benchmark results output'
118
117
  },
119
118
  {
120
- "flag": "--skip-prompts",
121
- "description": "Skip interactive prompts and use configuration from other sources",
122
- "defaultValue": false
119
+ 'flag': '--skip-prompts',
120
+ 'description': 'Skip interactive prompts and use configuration from other sources',
121
+ 'defaultValue': false
123
122
  },
124
123
  {
125
- "flag": "--auto-prompt",
126
- "description": "Fill defaults, prompt only for missing required values",
127
- "defaultValue": false
124
+ 'flag': '--auto-prompt',
125
+ 'description': 'Fill defaults, prompt only for missing required values',
126
+ 'defaultValue': false
128
127
  },
129
128
  {
130
- "flag": "--config <path>",
131
- "description": "Path to JSON configuration file"
129
+ 'flag': '--config <path>',
130
+ 'description': 'Path to JSON configuration file'
132
131
  },
133
132
  {
134
- "flag": "--project-dir <dir>",
135
- "description": "Output directory path"
133
+ 'flag': '--project-dir <dir>',
134
+ 'description': 'Output directory path'
136
135
  },
137
136
  {
138
- "flag": "--force",
139
- "description": "Overwrite existing output directory without prompting",
140
- "defaultValue": false
137
+ 'flag': '--force',
138
+ 'description': 'Overwrite existing output directory without prompting',
139
+ 'defaultValue': false
141
140
  },
142
141
  {
143
- "flag": "--framework <framework>",
144
- "description": "ML framework",
145
- "choices": [
146
- "sklearn",
147
- "xgboost",
148
- "tensorflow",
149
- "transformers"
142
+ 'flag': '--framework <framework>',
143
+ 'description': 'ML framework',
144
+ 'choices': [
145
+ 'sklearn',
146
+ 'xgboost',
147
+ 'tensorflow',
148
+ 'transformers'
150
149
  ],
151
- "hidden": true
152
- },
153
- {
154
- "flag": "--model-format <format>",
155
- "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
156
- "choices": [
157
- "pkl",
158
- "joblib",
159
- "json",
160
- "model",
161
- "ubj",
162
- "keras",
163
- "h5",
164
- "SavedModel"
150
+ 'hidden': true
151
+ },
152
+ {
153
+ 'flag': '--model-format <format>',
154
+ 'description': 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)',
155
+ 'choices': [
156
+ 'pkl',
157
+ 'joblib',
158
+ 'json',
159
+ 'model',
160
+ 'ubj',
161
+ 'keras',
162
+ 'h5',
163
+ 'SavedModel'
165
164
  ]
166
165
  },
167
166
  {
168
- "flag": "--model-server <server>",
169
- "description": "Model server",
170
- "choices": [
171
- "flask",
172
- "fastapi",
173
- "vllm",
174
- "sglang"
167
+ 'flag': '--model-server <server>',
168
+ 'description': 'Model server',
169
+ 'choices': [
170
+ 'flask',
171
+ 'fastapi',
172
+ 'vllm',
173
+ 'sglang'
175
174
  ],
176
- "hidden": true
175
+ 'hidden': true
177
176
  },
178
177
  {
179
- "flag": "--base-image <image>",
180
- "description": "Base container image for Dockerfile"
178
+ 'flag': '--base-image <image>',
179
+ 'description': 'Base container image for Dockerfile'
181
180
  },
182
181
  {
183
- "flag": "--region <region>",
184
- "description": "AWS region",
185
- "defaultValue": "us-east-1"
182
+ 'flag': '--region <region>',
183
+ 'description': 'AWS region',
184
+ 'defaultValue': 'us-east-1'
186
185
  },
187
186
  {
188
- "flag": "--role-arn <arn>",
189
- "description": "IAM role ARN for SageMaker execution"
187
+ 'flag': '--role-arn <arn>',
188
+ 'description': 'IAM role ARN for SageMaker execution'
190
189
  },
191
190
  {
192
- "flag": "--build-target <target>",
193
- "description": "Build target (codebuild)",
194
- "choices": [
195
- "codebuild"
196
- ]
197
- },
198
- {
199
- "flag": "--codebuild-compute-type <type>",
200
- "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
201
- "choices": [
202
- "SMALL",
203
- "MEDIUM",
204
- "LARGE",
205
- "BUILD_GENERAL1_SMALL",
206
- "BUILD_GENERAL1_MEDIUM",
207
- "BUILD_GENERAL1_LARGE",
208
- "BUILD_GENERAL1_2XLARGE"
191
+ 'flag': '--build-target <target>',
192
+ 'description': 'Build target (codebuild)',
193
+ 'choices': [
194
+ 'codebuild'
195
+ ],
196
+ 'defaultValue': 'codebuild'
197
+ },
198
+ {
199
+ 'flag': '--codebuild-compute-type <type>',
200
+ 'description': 'CodeBuild compute type (SMALL, MEDIUM, LARGE)',
201
+ 'choices': [
202
+ 'SMALL',
203
+ 'MEDIUM',
204
+ 'LARGE',
205
+ 'BUILD_GENERAL1_SMALL',
206
+ 'BUILD_GENERAL1_MEDIUM',
207
+ 'BUILD_GENERAL1_LARGE',
208
+ 'BUILD_GENERAL1_2XLARGE'
209
209
  ],
210
- "defaultValue": "BUILD_GENERAL1_LARGE"
210
+ 'defaultValue': 'BUILD_GENERAL1_LARGE'
211
211
  },
212
212
  {
213
- "flag": "--hf-token <token>",
214
- "description": "HuggingFace token (or $HF_TOKEN for env var reference)"
213
+ 'flag': '--hf-token <token>',
214
+ 'description': 'HuggingFace token (or $HF_TOKEN for env var reference)'
215
215
  },
216
216
  {
217
- "flag": "--hf-token-arn <arn>",
218
- "description": "HuggingFace token ARN from Secrets Manager"
217
+ 'flag': '--hf-token-arn <arn>',
218
+ 'description': 'HuggingFace token ARN from Secrets Manager'
219
219
  },
220
220
  {
221
- "flag": "--ngc-token <token>",
222
- "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
221
+ 'flag': '--ngc-token <token>',
222
+ 'description': 'NVIDIA NGC token (or $NGC_API_KEY for env var reference)'
223
223
  },
224
224
  {
225
- "flag": "--ngc-token-arn <arn>",
226
- "description": "NVIDIA NGC token ARN from Secrets Manager"
225
+ 'flag': '--ngc-token-arn <arn>',
226
+ 'description': 'NVIDIA NGC token ARN from Secrets Manager'
227
227
  },
228
228
  {
229
- "flag": "--endpoint-initial-instance-count <n>",
230
- "description": "Number of instances for the endpoint",
231
- "defaultValue": 1
229
+ 'flag': '--endpoint-initial-instance-count <n>',
230
+ 'description': 'Number of instances for the endpoint',
231
+ 'defaultValue': 1
232
232
  },
233
233
  {
234
- "flag": "--endpoint-data-capture-percent <pct>",
235
- "description": "Data capture percentage for monitoring, 0-100",
236
- "defaultValue": 0
234
+ 'flag': '--endpoint-data-capture-percent <pct>',
235
+ 'description': 'Data capture percentage for monitoring, 0-100',
236
+ 'defaultValue': 0
237
237
  },
238
238
  {
239
- "flag": "--endpoint-variant-name <name>",
240
- "description": "Production variant name",
241
- "defaultValue": "AllTraffic"
239
+ 'flag': '--endpoint-variant-name <name>',
240
+ 'description': 'Production variant name',
241
+ 'defaultValue': 'AllTraffic'
242
242
  },
243
243
  {
244
- "flag": "--endpoint-volume-size <gb>",
245
- "description": "ML storage volume size in GB"
244
+ 'flag': '--endpoint-volume-size <gb>',
245
+ 'description': 'ML storage volume size in GB'
246
246
  },
247
247
  {
248
- "flag": "--ic-cpu-count <n>",
249
- "description": "vCPUs allocated to the inference component"
248
+ 'flag': '--ic-cpu-count <n>',
249
+ 'description': 'vCPUs allocated to the inference component'
250
250
  },
251
251
  {
252
- "flag": "--ic-model-weight <weight>",
253
- "description": "Traffic routing weight, 0-1",
254
- "defaultValue": 1
252
+ 'flag': '--ic-model-weight <weight>',
253
+ 'description': 'Traffic routing weight, 0-1',
254
+ 'defaultValue': 1
255
255
  },
256
256
  {
257
- "flag": "--async-s3-output-path <path>",
258
- "description": "S3 output path for async results"
257
+ 'flag': '--async-s3-output-path <path>',
258
+ 'description': 'S3 output path for async results'
259
259
  },
260
260
  {
261
- "flag": "--async-sns-success-topic <arn>",
262
- "description": "SNS topic ARN for success notifications"
261
+ 'flag': '--async-sns-success-topic <arn>',
262
+ 'description': 'SNS topic ARN for success notifications'
263
263
  },
264
264
  {
265
- "flag": "--async-sns-error-topic <arn>",
266
- "description": "SNS topic ARN for error notifications"
265
+ 'flag': '--async-sns-error-topic <arn>',
266
+ 'description': 'SNS topic ARN for error notifications'
267
267
  },
268
268
  {
269
- "flag": "--async-max-concurrent <n>",
270
- "description": "Max concurrent invocations per instance",
271
- "defaultValue": 1
269
+ 'flag': '--async-max-concurrent <n>',
270
+ 'description': 'Max concurrent invocations per instance',
271
+ 'defaultValue': 1
272
272
  },
273
273
  {
274
- "flag": "--batch-input-path <path>",
275
- "description": "S3 input path for batch data"
274
+ 'flag': '--batch-input-path <path>',
275
+ 'description': 'S3 input path for batch data'
276
276
  },
277
277
  {
278
- "flag": "--batch-output-path <path>",
279
- "description": "S3 output path for batch results"
278
+ 'flag': '--batch-output-path <path>',
279
+ 'description': 'S3 output path for batch results'
280
280
  },
281
281
  {
282
- "flag": "--batch-instance-count <n>",
283
- "description": "Number of batch instances",
284
- "defaultValue": 1
282
+ 'flag': '--batch-instance-count <n>',
283
+ 'description': 'Number of batch instances',
284
+ 'defaultValue': 1
285
285
  },
286
286
  {
287
- "flag": "--batch-split-type <type>",
288
- "description": "Input split type: Line, RecordIO, None",
289
- "choices": [
290
- "Line",
291
- "RecordIO",
292
- "None"
287
+ 'flag': '--batch-split-type <type>',
288
+ 'description': 'Input split type: Line, RecordIO, None',
289
+ 'choices': [
290
+ 'Line',
291
+ 'RecordIO',
292
+ 'None'
293
293
  ],
294
- "defaultValue": "Line"
294
+ 'defaultValue': 'Line'
295
295
  },
296
296
  {
297
- "flag": "--batch-strategy <strategy>",
298
- "description": "Batch strategy: MultiRecord, SingleRecord",
299
- "choices": [
300
- "MultiRecord",
301
- "SingleRecord"
297
+ 'flag': '--batch-strategy <strategy>',
298
+ 'description': 'Batch strategy: MultiRecord, SingleRecord',
299
+ 'choices': [
300
+ 'MultiRecord',
301
+ 'SingleRecord'
302
302
  ],
303
- "defaultValue": "MultiRecord"
303
+ 'defaultValue': 'MultiRecord'
304
304
  },
305
305
  {
306
- "flag": "--batch-join-source <source>",
307
- "description": "Join source: Input, None",
308
- "choices": [
309
- "Input",
310
- "None"
306
+ 'flag': '--batch-join-source <source>',
307
+ 'description': 'Join source: Input, None',
308
+ 'choices': [
309
+ 'Input',
310
+ 'None'
311
311
  ],
312
- "defaultValue": "None"
312
+ 'defaultValue': 'None'
313
313
  },
314
314
  {
315
- "flag": "--batch-max-concurrent <n>",
316
- "description": "Max concurrent transforms per instance",
317
- "defaultValue": 1
315
+ 'flag': '--batch-max-concurrent <n>',
316
+ 'description': 'Max concurrent transforms per instance',
317
+ 'defaultValue': 1
318
318
  },
319
319
  {
320
- "flag": "--batch-max-payload <mb>",
321
- "description": "Max payload size in MB, 0-100",
322
- "defaultValue": 6
320
+ 'flag': '--batch-max-payload <mb>',
321
+ 'description': 'Max payload size in MB, 0-100',
322
+ 'defaultValue': 6
323
323
  },
324
324
  {
325
- "flag": "--hyperpod-cluster <name>",
326
- "description": "HyperPod EKS cluster name"
325
+ 'flag': '--hyperpod-cluster <name>',
326
+ 'description': 'HyperPod EKS cluster name'
327
327
  },
328
328
  {
329
- "flag": "--hyperpod-namespace <ns>",
330
- "description": "Kubernetes namespace",
331
- "defaultValue": "default"
329
+ 'flag': '--hyperpod-namespace <ns>',
330
+ 'description': 'Kubernetes namespace',
331
+ 'defaultValue': 'default'
332
332
  },
333
333
  {
334
- "flag": "--hyperpod-replicas <count>",
335
- "description": "Number of replicas",
336
- "defaultValue": 1
334
+ 'flag': '--hyperpod-replicas <count>',
335
+ 'description': 'Number of replicas',
336
+ 'defaultValue': 1
337
337
  },
338
338
  {
339
- "flag": "--fsx-volume-handle <handle>",
340
- "description": "FSx for Lustre volume handle"
339
+ 'flag': '--fsx-volume-handle <handle>',
340
+ 'description': 'FSx for Lustre volume handle'
341
341
  },
342
342
  {
343
- "flag": "--model-env <KEY=VALUE>",
344
- "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
345
- "repeatable": true
343
+ 'flag': '--model-env <KEY=VALUE>',
344
+ 'description': 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)',
345
+ 'repeatable': true
346
346
  },
347
347
  {
348
- "flag": "--server-env <KEY=VALUE>",
349
- "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
350
- "repeatable": true
348
+ 'flag': '--server-env <KEY=VALUE>',
349
+ 'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
350
+ 'repeatable': true
351
351
  },
352
352
  {
353
- "flag": "--include-sample",
354
- "description": "Include sample model code",
355
- "defaultValue": true
353
+ 'flag': '--include-sample',
354
+ 'description': 'Include sample model code',
355
+ 'defaultValue': true
356
356
  },
357
357
  {
358
- "flag": "--include-testing",
359
- "description": "Include test suite",
360
- "defaultValue": true
358
+ 'flag': '--include-testing',
359
+ 'description': 'Include test suite',
360
+ 'defaultValue': true
361
361
  },
362
362
  {
363
- "flag": "--test-types <types>",
364
- "description": "Comma-separated test types"
363
+ 'flag': '--test-types <types>',
364
+ 'description': 'Comma-separated test types'
365
365
  },
366
366
  {
367
- "flag": "--smart",
368
- "description": "Enable smart mode (live AWS API calls for MCP servers)",
369
- "defaultValue": false
367
+ 'flag': '--smart',
368
+ 'description': 'Enable smart mode (live AWS API calls for MCP servers)',
369
+ 'defaultValue': false
370
370
  },
371
371
  {
372
- "flag": "--discover",
373
- "description": "Enable discovery mode for MCP servers",
374
- "defaultValue": false
372
+ 'flag': '--discover',
373
+ 'description': 'Enable discovery mode for MCP servers',
374
+ 'defaultValue': false
375
375
  },
376
376
  {
377
- "flag": "--no-validate",
378
- "description": "Skip parameter validation",
379
- "defaultValue": false
377
+ 'flag': '--no-validate',
378
+ 'description': 'Skip parameter validation',
379
+ 'defaultValue': false
380
380
  },
381
381
  {
382
- "flag": "--validate-env-vars",
383
- "description": "Validate environment variables against schema",
384
- "defaultValue": false
382
+ 'flag': '--validate-env-vars',
383
+ 'description': 'Validate environment variables against schema',
384
+ 'defaultValue': false
385
385
  },
386
386
  {
387
- "flag": "--validate-with-docker",
388
- "description": "Validate Dockerfile builds successfully",
389
- "defaultValue": false
387
+ 'flag': '--validate-with-docker',
388
+ 'description': 'Validate Dockerfile builds successfully',
389
+ 'defaultValue': false
390
390
  },
391
391
  {
392
- "flag": "--offline",
393
- "description": "Run in offline mode (no network calls)",
394
- "defaultValue": false
392
+ 'flag': '--offline',
393
+ 'description': 'Run in offline mode (no network calls)',
394
+ 'defaultValue': false
395
395
  }
396
396
  ];
397
397
 
@@ -400,72 +400,72 @@ export const cliOptions = [
400
400
  * Used by the custom help formatter in bin/cli.js.
401
401
  */
402
402
  export const helpGroups = {
403
- "--project-name": "general",
404
- "--deployment-config": "model",
405
- "--model-name": "model",
406
- "--deployment-target": "infra",
407
- "--instance-type": "infra",
408
- "--ic-gpu-count": "ic",
409
- "--ic-copy-count": "ic",
410
- "--ic-memory-size": "ic",
411
- "--enable-lora": "features",
412
- "--max-loras": "features",
413
- "--max-lora-rank": "features",
414
- "--include-benchmark": "general",
415
- "--benchmark-concurrency": "general",
416
- "--benchmark-input-tokens": "general",
417
- "--benchmark-output-tokens": "general",
418
- "--benchmark-streaming": "general",
419
- "--benchmark-request-count": "general",
420
- "--benchmark-s3-output-path": "general",
421
- "--skip-prompts": "general",
422
- "--auto-prompt": "general",
423
- "--config": "general",
424
- "--project-dir": "general",
425
- "--force": "general",
426
- "--framework": "model",
427
- "--model-format": "model",
428
- "--model-server": "model",
429
- "--base-image": "infra",
430
- "--region": "infra",
431
- "--role-arn": "infra",
432
- "--build-target": "infra",
433
- "--codebuild-compute-type": "infra",
434
- "--hf-token": "auth",
435
- "--hf-token-arn": "auth",
436
- "--ngc-token": "auth",
437
- "--ngc-token-arn": "auth",
438
- "--endpoint-initial-instance-count": "endpoint",
439
- "--endpoint-data-capture-percent": "endpoint",
440
- "--endpoint-variant-name": "endpoint",
441
- "--endpoint-volume-size": "endpoint",
442
- "--ic-cpu-count": "ic",
443
- "--ic-model-weight": "ic",
444
- "--async-s3-output-path": "async",
445
- "--async-sns-success-topic": "async",
446
- "--async-sns-error-topic": "async",
447
- "--async-max-concurrent": "async",
448
- "--batch-input-path": "batch",
449
- "--batch-output-path": "batch",
450
- "--batch-instance-count": "batch",
451
- "--batch-split-type": "batch",
452
- "--batch-strategy": "batch",
453
- "--batch-join-source": "batch",
454
- "--batch-max-concurrent": "batch",
455
- "--batch-max-payload": "batch",
456
- "--hyperpod-cluster": "hyperpod",
457
- "--hyperpod-namespace": "hyperpod",
458
- "--hyperpod-replicas": "hyperpod",
459
- "--fsx-volume-handle": "hyperpod",
460
- "--model-env": "env",
461
- "--server-env": "env",
462
- "--include-sample": "features",
463
- "--include-testing": "features",
464
- "--test-types": "features",
465
- "--smart": "mcp",
466
- "--discover": "mcp",
467
- "--no-validate": "validation",
468
- "--validate-env-vars": "validation",
469
- "--validate-with-docker": "validation",
470
- "--offline": "validation"
403
+ '--project-name': 'general',
404
+ '--deployment-config': 'model',
405
+ '--model-name': 'model',
406
+ '--deployment-target': 'infra',
407
+ '--instance-type': 'infra',
408
+ '--ic-gpu-count': 'ic',
409
+ '--ic-copy-count': 'ic',
410
+ '--ic-memory-size': 'ic',
411
+ '--enable-lora': 'features',
412
+ '--max-loras': 'features',
413
+ '--max-lora-rank': 'features',
414
+ '--include-benchmark': 'general',
415
+ '--benchmark-concurrency': 'general',
416
+ '--benchmark-input-tokens': 'general',
417
+ '--benchmark-output-tokens': 'general',
418
+ '--benchmark-streaming': 'general',
419
+ '--benchmark-request-count': 'general',
420
+ '--benchmark-s3-output-path': 'general',
421
+ '--skip-prompts': 'general',
422
+ '--auto-prompt': 'general',
423
+ '--config': 'general',
424
+ '--project-dir': 'general',
425
+ '--force': 'general',
426
+ '--framework': 'model',
427
+ '--model-format': 'model',
428
+ '--model-server': 'model',
429
+ '--base-image': 'infra',
430
+ '--region': 'infra',
431
+ '--role-arn': 'infra',
432
+ '--build-target': 'infra',
433
+ '--codebuild-compute-type': 'infra',
434
+ '--hf-token': 'auth',
435
+ '--hf-token-arn': 'auth',
436
+ '--ngc-token': 'auth',
437
+ '--ngc-token-arn': 'auth',
438
+ '--endpoint-initial-instance-count': 'endpoint',
439
+ '--endpoint-data-capture-percent': 'endpoint',
440
+ '--endpoint-variant-name': 'endpoint',
441
+ '--endpoint-volume-size': 'endpoint',
442
+ '--ic-cpu-count': 'ic',
443
+ '--ic-model-weight': 'ic',
444
+ '--async-s3-output-path': 'async',
445
+ '--async-sns-success-topic': 'async',
446
+ '--async-sns-error-topic': 'async',
447
+ '--async-max-concurrent': 'async',
448
+ '--batch-input-path': 'batch',
449
+ '--batch-output-path': 'batch',
450
+ '--batch-instance-count': 'batch',
451
+ '--batch-split-type': 'batch',
452
+ '--batch-strategy': 'batch',
453
+ '--batch-join-source': 'batch',
454
+ '--batch-max-concurrent': 'batch',
455
+ '--batch-max-payload': 'batch',
456
+ '--hyperpod-cluster': 'hyperpod',
457
+ '--hyperpod-namespace': 'hyperpod',
458
+ '--hyperpod-replicas': 'hyperpod',
459
+ '--fsx-volume-handle': 'hyperpod',
460
+ '--model-env': 'env',
461
+ '--server-env': 'env',
462
+ '--include-sample': 'features',
463
+ '--include-testing': 'features',
464
+ '--test-types': 'features',
465
+ '--smart': 'mcp',
466
+ '--discover': 'mcp',
467
+ '--no-validate': 'validation',
468
+ '--validate-env-vars': 'validation',
469
+ '--validate-with-docker': 'validation',
470
+ '--offline': 'validation'
471
471
  };