@aws/ml-container-creator 0.9.1 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +2049 -0
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -68
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +138 -138
- package/servers/instance-sizer/lib/instance-ranker.js +76 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/jumpstart-public.json +101 -16
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/catalogs/models.json +182 -26
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +4 -390
- package/src/lib/bootstrap-command-handler.js +710 -1148
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +641 -0
- package/src/lib/bootstrap-provisioners.js +421 -0
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +408 -0
- package/src/lib/config-manager.js +66 -1685
- package/src/lib/config-mcp-client.js +118 -0
- package/src/lib/config-validator.js +634 -0
- package/src/lib/cuda-resolver.js +149 -0
- package/src/lib/e2e-catalog-validator.js +251 -3
- package/src/lib/e2e-ci-recorder.js +103 -0
- package/src/lib/generated/cli-options.js +315 -311
- package/src/lib/generated/parameter-matrix.js +671 -0
- package/src/lib/generated/validation-rules.js +71 -71
- package/src/lib/marketplace-flow.js +276 -0
- package/src/lib/mcp-query-runner.js +768 -0
- package/src/lib/parameter-schema-validator.js +62 -18
- package/src/lib/path-prover-brain.js +607 -0
- package/src/lib/prompt-runner.js +41 -1504
- package/src/lib/prompts/feature-prompts.js +172 -0
- package/src/lib/prompts/index.js +48 -0
- package/src/lib/prompts/infrastructure-prompts.js +690 -0
- package/src/lib/prompts/model-prompts.js +552 -0
- package/src/lib/prompts/project-prompts.js +82 -0
- package/src/lib/prompts.js +2 -1446
- package/src/lib/registry-command-handler.js +135 -3
- package/src/lib/secrets-prompt-runner.js +251 -0
- package/src/lib/template-variable-resolver.js +422 -0
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +149 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/config +108 -5
- package/templates/do/deploy.d/managed-inference.ejs +192 -11
- package/templates/do/optimize +106 -37
- package/templates/do/register +89 -0
- package/templates/do/test +13 -0
- package/templates/do/tune +378 -59
- package/templates/do/validate +44 -4
- package/config/parameter-schema.json +0 -88
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-
|
|
3
|
+
// Generated: 2026-06-10T13:42:40.974Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* CLI option definitions derived from parameter-schema-v2.json.
|
|
@@ -9,385 +9,389 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export const cliOptions = [
|
|
11
11
|
{
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
},
|
|
15
|
-
{
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
12
|
+
'flag': '--project-name <name>',
|
|
13
|
+
'description': 'Name for the generated project'
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
'flag': '--deployment-config <config>',
|
|
17
|
+
'description': 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)',
|
|
18
|
+
'choices': [
|
|
19
|
+
'http-flask',
|
|
20
|
+
'http-fastapi',
|
|
21
|
+
'transformers-vllm',
|
|
22
|
+
'transformers-sglang',
|
|
23
|
+
'transformers-tensorrt-llm',
|
|
24
|
+
'transformers-lmi',
|
|
25
|
+
'transformers-djl',
|
|
26
|
+
'triton-fil',
|
|
27
|
+
'triton-onnxruntime',
|
|
28
|
+
'triton-tensorflow',
|
|
29
|
+
'triton-pytorch',
|
|
30
|
+
'triton-vllm',
|
|
31
|
+
'triton-tensorrtllm',
|
|
32
|
+
'triton-python',
|
|
33
|
+
'diffusors-vllm-omni',
|
|
34
|
+
'marketplace'
|
|
35
35
|
]
|
|
36
36
|
},
|
|
37
37
|
{
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
'flag': '--model-name <name>',
|
|
39
|
+
'description': 'Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)'
|
|
40
40
|
},
|
|
41
41
|
{
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
42
|
+
'flag': '--deployment-target <target>',
|
|
43
|
+
'description': 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)',
|
|
44
|
+
'choices': [
|
|
45
|
+
'managed-inference',
|
|
46
|
+
'realtime-inference',
|
|
47
|
+
'async-inference',
|
|
48
|
+
'batch-transform',
|
|
49
|
+
'hyperpod-eks'
|
|
50
50
|
],
|
|
51
|
-
|
|
51
|
+
'defaultValue': 'realtime-inference'
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
'flag': '--instance-type <type>',
|
|
55
|
+
'description': 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'
|
|
56
56
|
},
|
|
57
57
|
{
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
'flag': '--ic-gpu-count <n>',
|
|
59
|
+
'description': 'GPUs allocated to the inference component'
|
|
60
60
|
},
|
|
61
61
|
{
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
'flag': '--ic-copy-count <n>',
|
|
63
|
+
'description': 'Number of inference component copies',
|
|
64
|
+
'defaultValue': 1
|
|
65
65
|
},
|
|
66
66
|
{
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"defaultValue": 1024
|
|
67
|
+
'flag': '--ic-memory-size <mb>',
|
|
68
|
+
'description': 'Memory in MB for the inference component'
|
|
70
69
|
},
|
|
71
70
|
{
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
'flag': '--enable-lora',
|
|
72
|
+
'description': 'Enable LoRA adapter serving',
|
|
73
|
+
'defaultValue': false
|
|
75
74
|
},
|
|
76
75
|
{
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
'flag': '--max-loras <n>',
|
|
77
|
+
'description': 'Maximum concurrent LoRA adapters in GPU memory',
|
|
78
|
+
'defaultValue': 30
|
|
80
79
|
},
|
|
81
80
|
{
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
81
|
+
'flag': '--max-lora-rank <n>',
|
|
82
|
+
'description': 'Maximum LoRA rank',
|
|
83
|
+
'defaultValue': 64
|
|
85
84
|
},
|
|
86
85
|
{
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
86
|
+
'flag': '--include-benchmark',
|
|
87
|
+
'description': 'Include SageMaker AI Benchmarking',
|
|
88
|
+
'defaultValue': false
|
|
90
89
|
},
|
|
91
90
|
{
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
'flag': '--benchmark-concurrency <n>',
|
|
92
|
+
'description': 'Benchmark concurrent requests',
|
|
93
|
+
'defaultValue': 10
|
|
95
94
|
},
|
|
96
95
|
{
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
'flag': '--benchmark-input-tokens <n>',
|
|
97
|
+
'description': 'Benchmark mean input tokens',
|
|
98
|
+
'defaultValue': 550
|
|
100
99
|
},
|
|
101
100
|
{
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
'flag': '--benchmark-output-tokens <n>',
|
|
102
|
+
'description': 'Benchmark mean output tokens',
|
|
103
|
+
'defaultValue': 150
|
|
105
104
|
},
|
|
106
105
|
{
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
'flag': '--benchmark-streaming',
|
|
107
|
+
'description': 'Enable streaming in benchmark',
|
|
108
|
+
'defaultValue': true
|
|
110
109
|
},
|
|
111
110
|
{
|
|
112
|
-
|
|
113
|
-
|
|
111
|
+
'flag': '--benchmark-request-count <n>',
|
|
112
|
+
'description': 'Total number of benchmark requests to send'
|
|
114
113
|
},
|
|
115
114
|
{
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
'flag': '--benchmark-s3-output-path <path>',
|
|
116
|
+
'description': 'S3 URI for benchmark results output'
|
|
118
117
|
},
|
|
119
118
|
{
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
119
|
+
'flag': '--skip-prompts',
|
|
120
|
+
'description': 'Skip interactive prompts and use configuration from other sources',
|
|
121
|
+
'defaultValue': false
|
|
123
122
|
},
|
|
124
123
|
{
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
124
|
+
'flag': '--auto-prompt',
|
|
125
|
+
'description': 'Fill defaults, prompt only for missing required values',
|
|
126
|
+
'defaultValue': false
|
|
128
127
|
},
|
|
129
128
|
{
|
|
130
|
-
|
|
131
|
-
|
|
129
|
+
'flag': '--config <path>',
|
|
130
|
+
'description': 'Path to JSON configuration file'
|
|
132
131
|
},
|
|
133
132
|
{
|
|
134
|
-
|
|
135
|
-
|
|
133
|
+
'flag': '--project-dir <dir>',
|
|
134
|
+
'description': 'Output directory path'
|
|
136
135
|
},
|
|
137
136
|
{
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
137
|
+
'flag': '--force',
|
|
138
|
+
'description': 'Overwrite existing output directory without prompting',
|
|
139
|
+
'defaultValue': false
|
|
141
140
|
},
|
|
142
141
|
{
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
142
|
+
'flag': '--framework <framework>',
|
|
143
|
+
'description': 'ML framework',
|
|
144
|
+
'choices': [
|
|
145
|
+
'sklearn',
|
|
146
|
+
'xgboost',
|
|
147
|
+
'tensorflow',
|
|
148
|
+
'transformers'
|
|
150
149
|
],
|
|
151
|
-
|
|
152
|
-
},
|
|
153
|
-
{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
150
|
+
'hidden': true
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
'flag': '--model-format <format>',
|
|
154
|
+
'description': 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)',
|
|
155
|
+
'choices': [
|
|
156
|
+
'pkl',
|
|
157
|
+
'joblib',
|
|
158
|
+
'json',
|
|
159
|
+
'model',
|
|
160
|
+
'ubj',
|
|
161
|
+
'keras',
|
|
162
|
+
'h5',
|
|
163
|
+
'SavedModel'
|
|
165
164
|
]
|
|
166
165
|
},
|
|
167
166
|
{
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
167
|
+
'flag': '--model-server <server>',
|
|
168
|
+
'description': 'Model server',
|
|
169
|
+
'choices': [
|
|
170
|
+
'flask',
|
|
171
|
+
'fastapi',
|
|
172
|
+
'vllm',
|
|
173
|
+
'sglang'
|
|
175
174
|
],
|
|
176
|
-
|
|
175
|
+
'hidden': true
|
|
177
176
|
},
|
|
178
177
|
{
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
'flag': '--base-image <image>',
|
|
179
|
+
'description': 'Base container image for Dockerfile'
|
|
181
180
|
},
|
|
182
181
|
{
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
182
|
+
'flag': '--region <region>',
|
|
183
|
+
'description': 'AWS region',
|
|
184
|
+
'defaultValue': 'us-east-1'
|
|
186
185
|
},
|
|
187
186
|
{
|
|
188
|
-
|
|
189
|
-
|
|
187
|
+
'flag': '--role-arn <arn>',
|
|
188
|
+
'description': 'IAM role ARN for SageMaker execution'
|
|
190
189
|
},
|
|
191
190
|
{
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
]
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
191
|
+
'flag': '--build-target <target>',
|
|
192
|
+
'description': 'Build target (codebuild)',
|
|
193
|
+
'choices': [
|
|
194
|
+
'codebuild'
|
|
195
|
+
],
|
|
196
|
+
'defaultValue': 'codebuild'
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
'flag': '--codebuild-compute-type <type>',
|
|
200
|
+
'description': 'CodeBuild compute type (SMALL, MEDIUM, LARGE)',
|
|
201
|
+
'choices': [
|
|
202
|
+
'SMALL',
|
|
203
|
+
'MEDIUM',
|
|
204
|
+
'LARGE',
|
|
205
|
+
'BUILD_GENERAL1_SMALL',
|
|
206
|
+
'BUILD_GENERAL1_MEDIUM',
|
|
207
|
+
'BUILD_GENERAL1_LARGE',
|
|
208
|
+
'BUILD_GENERAL1_2XLARGE'
|
|
205
209
|
],
|
|
206
|
-
|
|
210
|
+
'defaultValue': 'BUILD_GENERAL1_LARGE'
|
|
207
211
|
},
|
|
208
212
|
{
|
|
209
|
-
|
|
210
|
-
|
|
213
|
+
'flag': '--hf-token <token>',
|
|
214
|
+
'description': 'HuggingFace token (or $HF_TOKEN for env var reference)'
|
|
211
215
|
},
|
|
212
216
|
{
|
|
213
|
-
|
|
214
|
-
|
|
217
|
+
'flag': '--hf-token-arn <arn>',
|
|
218
|
+
'description': 'HuggingFace token ARN from Secrets Manager'
|
|
215
219
|
},
|
|
216
220
|
{
|
|
217
|
-
|
|
218
|
-
|
|
221
|
+
'flag': '--ngc-token <token>',
|
|
222
|
+
'description': 'NVIDIA NGC token (or $NGC_API_KEY for env var reference)'
|
|
219
223
|
},
|
|
220
224
|
{
|
|
221
|
-
|
|
222
|
-
|
|
225
|
+
'flag': '--ngc-token-arn <arn>',
|
|
226
|
+
'description': 'NVIDIA NGC token ARN from Secrets Manager'
|
|
223
227
|
},
|
|
224
228
|
{
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
229
|
+
'flag': '--endpoint-initial-instance-count <n>',
|
|
230
|
+
'description': 'Number of instances for the endpoint',
|
|
231
|
+
'defaultValue': 1
|
|
228
232
|
},
|
|
229
233
|
{
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
234
|
+
'flag': '--endpoint-data-capture-percent <pct>',
|
|
235
|
+
'description': 'Data capture percentage for monitoring, 0-100',
|
|
236
|
+
'defaultValue': 0
|
|
233
237
|
},
|
|
234
238
|
{
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
239
|
+
'flag': '--endpoint-variant-name <name>',
|
|
240
|
+
'description': 'Production variant name',
|
|
241
|
+
'defaultValue': 'AllTraffic'
|
|
238
242
|
},
|
|
239
243
|
{
|
|
240
|
-
|
|
241
|
-
|
|
244
|
+
'flag': '--endpoint-volume-size <gb>',
|
|
245
|
+
'description': 'ML storage volume size in GB'
|
|
242
246
|
},
|
|
243
247
|
{
|
|
244
|
-
|
|
245
|
-
|
|
248
|
+
'flag': '--ic-cpu-count <n>',
|
|
249
|
+
'description': 'vCPUs allocated to the inference component'
|
|
246
250
|
},
|
|
247
251
|
{
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
252
|
+
'flag': '--ic-model-weight <weight>',
|
|
253
|
+
'description': 'Traffic routing weight, 0-1',
|
|
254
|
+
'defaultValue': 1
|
|
251
255
|
},
|
|
252
256
|
{
|
|
253
|
-
|
|
254
|
-
|
|
257
|
+
'flag': '--async-s3-output-path <path>',
|
|
258
|
+
'description': 'S3 output path for async results'
|
|
255
259
|
},
|
|
256
260
|
{
|
|
257
|
-
|
|
258
|
-
|
|
261
|
+
'flag': '--async-sns-success-topic <arn>',
|
|
262
|
+
'description': 'SNS topic ARN for success notifications'
|
|
259
263
|
},
|
|
260
264
|
{
|
|
261
|
-
|
|
262
|
-
|
|
265
|
+
'flag': '--async-sns-error-topic <arn>',
|
|
266
|
+
'description': 'SNS topic ARN for error notifications'
|
|
263
267
|
},
|
|
264
268
|
{
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
269
|
+
'flag': '--async-max-concurrent <n>',
|
|
270
|
+
'description': 'Max concurrent invocations per instance',
|
|
271
|
+
'defaultValue': 1
|
|
268
272
|
},
|
|
269
273
|
{
|
|
270
|
-
|
|
271
|
-
|
|
274
|
+
'flag': '--batch-input-path <path>',
|
|
275
|
+
'description': 'S3 input path for batch data'
|
|
272
276
|
},
|
|
273
277
|
{
|
|
274
|
-
|
|
275
|
-
|
|
278
|
+
'flag': '--batch-output-path <path>',
|
|
279
|
+
'description': 'S3 output path for batch results'
|
|
276
280
|
},
|
|
277
281
|
{
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
282
|
+
'flag': '--batch-instance-count <n>',
|
|
283
|
+
'description': 'Number of batch instances',
|
|
284
|
+
'defaultValue': 1
|
|
281
285
|
},
|
|
282
286
|
{
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
287
|
+
'flag': '--batch-split-type <type>',
|
|
288
|
+
'description': 'Input split type: Line, RecordIO, None',
|
|
289
|
+
'choices': [
|
|
290
|
+
'Line',
|
|
291
|
+
'RecordIO',
|
|
292
|
+
'None'
|
|
289
293
|
],
|
|
290
|
-
|
|
294
|
+
'defaultValue': 'Line'
|
|
291
295
|
},
|
|
292
296
|
{
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
297
|
+
'flag': '--batch-strategy <strategy>',
|
|
298
|
+
'description': 'Batch strategy: MultiRecord, SingleRecord',
|
|
299
|
+
'choices': [
|
|
300
|
+
'MultiRecord',
|
|
301
|
+
'SingleRecord'
|
|
298
302
|
],
|
|
299
|
-
|
|
303
|
+
'defaultValue': 'MultiRecord'
|
|
300
304
|
},
|
|
301
305
|
{
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
306
|
+
'flag': '--batch-join-source <source>',
|
|
307
|
+
'description': 'Join source: Input, None',
|
|
308
|
+
'choices': [
|
|
309
|
+
'Input',
|
|
310
|
+
'None'
|
|
307
311
|
],
|
|
308
|
-
|
|
312
|
+
'defaultValue': 'None'
|
|
309
313
|
},
|
|
310
314
|
{
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
315
|
+
'flag': '--batch-max-concurrent <n>',
|
|
316
|
+
'description': 'Max concurrent transforms per instance',
|
|
317
|
+
'defaultValue': 1
|
|
314
318
|
},
|
|
315
319
|
{
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
320
|
+
'flag': '--batch-max-payload <mb>',
|
|
321
|
+
'description': 'Max payload size in MB, 0-100',
|
|
322
|
+
'defaultValue': 6
|
|
319
323
|
},
|
|
320
324
|
{
|
|
321
|
-
|
|
322
|
-
|
|
325
|
+
'flag': '--hyperpod-cluster <name>',
|
|
326
|
+
'description': 'HyperPod EKS cluster name'
|
|
323
327
|
},
|
|
324
328
|
{
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
329
|
+
'flag': '--hyperpod-namespace <ns>',
|
|
330
|
+
'description': 'Kubernetes namespace',
|
|
331
|
+
'defaultValue': 'default'
|
|
328
332
|
},
|
|
329
333
|
{
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
334
|
+
'flag': '--hyperpod-replicas <count>',
|
|
335
|
+
'description': 'Number of replicas',
|
|
336
|
+
'defaultValue': 1
|
|
333
337
|
},
|
|
334
338
|
{
|
|
335
|
-
|
|
336
|
-
|
|
339
|
+
'flag': '--fsx-volume-handle <handle>',
|
|
340
|
+
'description': 'FSx for Lustre volume handle'
|
|
337
341
|
},
|
|
338
342
|
{
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
343
|
+
'flag': '--model-env <KEY=VALUE>',
|
|
344
|
+
'description': 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)',
|
|
345
|
+
'repeatable': true
|
|
342
346
|
},
|
|
343
347
|
{
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
348
|
+
'flag': '--server-env <KEY=VALUE>',
|
|
349
|
+
'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
|
|
350
|
+
'repeatable': true
|
|
347
351
|
},
|
|
348
352
|
{
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
353
|
+
'flag': '--include-sample',
|
|
354
|
+
'description': 'Include sample model code',
|
|
355
|
+
'defaultValue': true
|
|
352
356
|
},
|
|
353
357
|
{
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
358
|
+
'flag': '--include-testing',
|
|
359
|
+
'description': 'Include test suite',
|
|
360
|
+
'defaultValue': true
|
|
357
361
|
},
|
|
358
362
|
{
|
|
359
|
-
|
|
360
|
-
|
|
363
|
+
'flag': '--test-types <types>',
|
|
364
|
+
'description': 'Comma-separated test types'
|
|
361
365
|
},
|
|
362
366
|
{
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
367
|
+
'flag': '--smart',
|
|
368
|
+
'description': 'Enable smart mode (live AWS API calls for MCP servers)',
|
|
369
|
+
'defaultValue': false
|
|
366
370
|
},
|
|
367
371
|
{
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
372
|
+
'flag': '--discover',
|
|
373
|
+
'description': 'Enable discovery mode for MCP servers',
|
|
374
|
+
'defaultValue': false
|
|
371
375
|
},
|
|
372
376
|
{
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
377
|
+
'flag': '--no-validate',
|
|
378
|
+
'description': 'Skip parameter validation',
|
|
379
|
+
'defaultValue': false
|
|
376
380
|
},
|
|
377
381
|
{
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
382
|
+
'flag': '--validate-env-vars',
|
|
383
|
+
'description': 'Validate environment variables against schema',
|
|
384
|
+
'defaultValue': false
|
|
381
385
|
},
|
|
382
386
|
{
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
387
|
+
'flag': '--validate-with-docker',
|
|
388
|
+
'description': 'Validate Dockerfile builds successfully',
|
|
389
|
+
'defaultValue': false
|
|
386
390
|
},
|
|
387
391
|
{
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
392
|
+
'flag': '--offline',
|
|
393
|
+
'description': 'Run in offline mode (no network calls)',
|
|
394
|
+
'defaultValue': false
|
|
391
395
|
}
|
|
392
396
|
];
|
|
393
397
|
|
|
@@ -396,72 +400,72 @@ export const cliOptions = [
|
|
|
396
400
|
* Used by the custom help formatter in bin/cli.js.
|
|
397
401
|
*/
|
|
398
402
|
export const helpGroups = {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
403
|
+
'--project-name': 'general',
|
|
404
|
+
'--deployment-config': 'model',
|
|
405
|
+
'--model-name': 'model',
|
|
406
|
+
'--deployment-target': 'infra',
|
|
407
|
+
'--instance-type': 'infra',
|
|
408
|
+
'--ic-gpu-count': 'ic',
|
|
409
|
+
'--ic-copy-count': 'ic',
|
|
410
|
+
'--ic-memory-size': 'ic',
|
|
411
|
+
'--enable-lora': 'features',
|
|
412
|
+
'--max-loras': 'features',
|
|
413
|
+
'--max-lora-rank': 'features',
|
|
414
|
+
'--include-benchmark': 'general',
|
|
415
|
+
'--benchmark-concurrency': 'general',
|
|
416
|
+
'--benchmark-input-tokens': 'general',
|
|
417
|
+
'--benchmark-output-tokens': 'general',
|
|
418
|
+
'--benchmark-streaming': 'general',
|
|
419
|
+
'--benchmark-request-count': 'general',
|
|
420
|
+
'--benchmark-s3-output-path': 'general',
|
|
421
|
+
'--skip-prompts': 'general',
|
|
422
|
+
'--auto-prompt': 'general',
|
|
423
|
+
'--config': 'general',
|
|
424
|
+
'--project-dir': 'general',
|
|
425
|
+
'--force': 'general',
|
|
426
|
+
'--framework': 'model',
|
|
427
|
+
'--model-format': 'model',
|
|
428
|
+
'--model-server': 'model',
|
|
429
|
+
'--base-image': 'infra',
|
|
430
|
+
'--region': 'infra',
|
|
431
|
+
'--role-arn': 'infra',
|
|
432
|
+
'--build-target': 'infra',
|
|
433
|
+
'--codebuild-compute-type': 'infra',
|
|
434
|
+
'--hf-token': 'auth',
|
|
435
|
+
'--hf-token-arn': 'auth',
|
|
436
|
+
'--ngc-token': 'auth',
|
|
437
|
+
'--ngc-token-arn': 'auth',
|
|
438
|
+
'--endpoint-initial-instance-count': 'endpoint',
|
|
439
|
+
'--endpoint-data-capture-percent': 'endpoint',
|
|
440
|
+
'--endpoint-variant-name': 'endpoint',
|
|
441
|
+
'--endpoint-volume-size': 'endpoint',
|
|
442
|
+
'--ic-cpu-count': 'ic',
|
|
443
|
+
'--ic-model-weight': 'ic',
|
|
444
|
+
'--async-s3-output-path': 'async',
|
|
445
|
+
'--async-sns-success-topic': 'async',
|
|
446
|
+
'--async-sns-error-topic': 'async',
|
|
447
|
+
'--async-max-concurrent': 'async',
|
|
448
|
+
'--batch-input-path': 'batch',
|
|
449
|
+
'--batch-output-path': 'batch',
|
|
450
|
+
'--batch-instance-count': 'batch',
|
|
451
|
+
'--batch-split-type': 'batch',
|
|
452
|
+
'--batch-strategy': 'batch',
|
|
453
|
+
'--batch-join-source': 'batch',
|
|
454
|
+
'--batch-max-concurrent': 'batch',
|
|
455
|
+
'--batch-max-payload': 'batch',
|
|
456
|
+
'--hyperpod-cluster': 'hyperpod',
|
|
457
|
+
'--hyperpod-namespace': 'hyperpod',
|
|
458
|
+
'--hyperpod-replicas': 'hyperpod',
|
|
459
|
+
'--fsx-volume-handle': 'hyperpod',
|
|
460
|
+
'--model-env': 'env',
|
|
461
|
+
'--server-env': 'env',
|
|
462
|
+
'--include-sample': 'features',
|
|
463
|
+
'--include-testing': 'features',
|
|
464
|
+
'--test-types': 'features',
|
|
465
|
+
'--smart': 'mcp',
|
|
466
|
+
'--discover': 'mcp',
|
|
467
|
+
'--no-validate': 'validation',
|
|
468
|
+
'--validate-env-vars': 'validation',
|
|
469
|
+
'--validate-with-docker': 'validation',
|
|
470
|
+
'--offline': 'validation'
|
|
467
471
|
};
|