@aws/ml-container-creator 0.10.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +9304 -0
- package/bin/cli.js +2 -0
- package/config/bootstrap-e2e-stack.json +341 -0
- package/config/bootstrap-stack.json +40 -3
- package/config/parameter-schema-v2.json +33 -22
- package/config/tune-catalog.json +1781 -0
- package/infra/ci-harness/buildspec.yml +1 -0
- package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
- package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
- package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
- package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
- package/package.json +53 -67
- package/servers/base-image-picker/index.js +121 -121
- package/servers/e2e-status/index.js +297 -0
- package/servers/e2e-status/manifest.json +14 -0
- package/servers/e2e-status/package.json +15 -0
- package/servers/endpoint-picker/LICENSE +202 -0
- package/servers/endpoint-picker/index.js +536 -0
- package/servers/endpoint-picker/manifest.json +14 -0
- package/servers/endpoint-picker/package.json +18 -0
- package/servers/hyperpod-cluster-picker/index.js +125 -125
- package/servers/instance-sizer/index.js +166 -153
- package/servers/instance-sizer/lib/instance-ranker.js +120 -76
- package/servers/instance-sizer/lib/model-resolver.js +61 -61
- package/servers/instance-sizer/lib/quota-resolver.js +113 -113
- package/servers/instance-sizer/lib/vram-estimator.js +31 -31
- package/servers/lib/bedrock-client.js +38 -38
- package/servers/lib/catalogs/instances.json +27 -0
- package/servers/lib/catalogs/model-servers.json +201 -3
- package/servers/lib/custom-validators.js +13 -13
- package/servers/lib/dynamic-resolver.js +4 -4
- package/servers/marketplace-picker/index.js +342 -0
- package/servers/marketplace-picker/manifest.json +14 -0
- package/servers/marketplace-picker/package.json +18 -0
- package/servers/model-picker/index.js +382 -382
- package/servers/region-picker/index.js +56 -56
- package/servers/workload-picker/LICENSE +202 -0
- package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
- package/servers/workload-picker/index.js +171 -0
- package/servers/workload-picker/manifest.json +16 -0
- package/servers/workload-picker/package.json +16 -0
- package/src/app.js +12 -3
- package/src/lib/bootstrap-command-handler.js +609 -15
- package/src/lib/bootstrap-config.js +36 -0
- package/src/lib/bootstrap-profile-manager.js +48 -41
- package/src/lib/ci-register-helpers.js +74 -0
- package/src/lib/config-loader.js +3 -0
- package/src/lib/config-manager.js +7 -0
- package/src/lib/config-validator.js +1 -1
- package/src/lib/cuda-resolver.js +17 -8
- package/src/lib/generated/cli-options.js +319 -314
- package/src/lib/generated/parameter-matrix.js +672 -661
- package/src/lib/generated/validation-rules.js +76 -72
- package/src/lib/path-prover-brain.js +664 -0
- package/src/lib/prompts/infrastructure-prompts.js +2 -2
- package/src/lib/prompts/model-prompts.js +6 -0
- package/src/lib/prompts/project-prompts.js +12 -0
- package/src/lib/secrets-prompt-runner.js +4 -0
- package/src/lib/template-manager.js +1 -1
- package/src/lib/template-variable-resolver.js +87 -1
- package/src/lib/tune-catalog-validator.js +37 -4
- package/templates/Dockerfile +9 -0
- package/templates/code/adapter_sidecar.py +444 -0
- package/templates/code/serve +6 -0
- package/templates/code/serve.d/vllm.ejs +1 -1
- package/templates/do/.benchmark_writer.py +1476 -0
- package/templates/do/.tune_helper.py +982 -57
- package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
- package/templates/do/adapter +154 -0
- package/templates/do/benchmark +639 -85
- package/templates/do/build +5 -0
- package/templates/do/clean.d/async-inference.ejs +5 -0
- package/templates/do/clean.d/batch-transform.ejs +5 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
- package/templates/do/clean.d/managed-inference.ejs +5 -0
- package/templates/do/config +115 -45
- package/templates/do/deploy.d/async-inference.ejs +30 -3
- package/templates/do/deploy.d/batch-transform.ejs +29 -3
- package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
- package/templates/do/deploy.d/managed-inference.ejs +216 -14
- package/templates/do/lib/endpoint-config.sh +1 -1
- package/templates/do/lib/profile.sh +44 -0
- package/templates/do/optimize +106 -37
- package/templates/do/push +5 -0
- package/templates/do/register +94 -0
- package/templates/do/stage +567 -0
- package/templates/do/submit +7 -0
- package/templates/do/test +14 -0
- package/templates/do/tune +382 -59
- package/templates/do/validate +44 -4
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
2
|
// Source: config/parameter-schema-v2.json
|
|
3
|
-
// Generated: 2026-
|
|
3
|
+
// Generated: 2026-06-12T22:03:00.429Z
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
6
|
* CLI option definitions derived from parameter-schema-v2.json.
|
|
@@ -9,389 +9,393 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export const cliOptions = [
|
|
11
11
|
{
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
},
|
|
15
|
-
{
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
12
|
+
'flag': '--project-name <name>',
|
|
13
|
+
'description': 'Name for the generated project'
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
'flag': '--deployment-config <config>',
|
|
17
|
+
'description': 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)',
|
|
18
|
+
'choices': [
|
|
19
|
+
'http-flask',
|
|
20
|
+
'http-fastapi',
|
|
21
|
+
'transformers-vllm',
|
|
22
|
+
'transformers-sglang',
|
|
23
|
+
'transformers-tensorrt-llm',
|
|
24
|
+
'transformers-lmi',
|
|
25
|
+
'transformers-djl',
|
|
26
|
+
'triton-fil',
|
|
27
|
+
'triton-onnxruntime',
|
|
28
|
+
'triton-tensorflow',
|
|
29
|
+
'triton-pytorch',
|
|
30
|
+
'triton-vllm',
|
|
31
|
+
'triton-tensorrtllm',
|
|
32
|
+
'triton-python',
|
|
33
|
+
'diffusors-vllm-omni',
|
|
34
|
+
'marketplace'
|
|
35
35
|
]
|
|
36
36
|
},
|
|
37
37
|
{
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
'flag': '--model-name <name>',
|
|
39
|
+
'description': 'Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)'
|
|
40
40
|
},
|
|
41
41
|
{
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
42
|
+
'flag': '--deployment-target <target>',
|
|
43
|
+
'description': 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)',
|
|
44
|
+
'choices': [
|
|
45
|
+
'managed-inference',
|
|
46
|
+
'realtime-inference',
|
|
47
|
+
'async-inference',
|
|
48
|
+
'batch-transform',
|
|
49
|
+
'hyperpod-eks'
|
|
50
50
|
],
|
|
51
|
-
|
|
51
|
+
'defaultValue': 'realtime-inference'
|
|
52
52
|
},
|
|
53
53
|
{
|
|
54
|
-
|
|
55
|
-
|
|
54
|
+
'flag': '--instance-type <type>',
|
|
55
|
+
'description': 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'
|
|
56
56
|
},
|
|
57
57
|
{
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
'flag': '--ic-gpu-count <n>',
|
|
59
|
+
'description': 'GPUs allocated to the inference component'
|
|
60
60
|
},
|
|
61
61
|
{
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
'flag': '--ic-copy-count <n>',
|
|
63
|
+
'description': 'Number of inference component copies',
|
|
64
|
+
'defaultValue': 1
|
|
65
65
|
},
|
|
66
66
|
{
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
"defaultValue": 1024
|
|
67
|
+
'flag': '--ic-memory-size <mb>',
|
|
68
|
+
'description': 'Memory in MB for the inference component'
|
|
70
69
|
},
|
|
71
70
|
{
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
71
|
+
'flag': '--enable-lora',
|
|
72
|
+
'description': 'Enable LoRA adapter serving',
|
|
73
|
+
'defaultValue': false
|
|
75
74
|
},
|
|
76
75
|
{
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
'flag': '--max-loras <n>',
|
|
77
|
+
'description': 'Maximum concurrent LoRA adapters in GPU memory',
|
|
78
|
+
'defaultValue': 30
|
|
80
79
|
},
|
|
81
80
|
{
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
81
|
+
'flag': '--max-lora-rank <n>',
|
|
82
|
+
'description': 'Maximum LoRA rank',
|
|
83
|
+
'defaultValue': 64
|
|
85
84
|
},
|
|
86
85
|
{
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
86
|
+
'flag': '--include-benchmark',
|
|
87
|
+
'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
|
|
88
|
+
'defaultValue': false
|
|
90
89
|
},
|
|
91
90
|
{
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
91
|
+
'flag': '--benchmark-concurrency <n>',
|
|
92
|
+
'description': 'Benchmark concurrent requests',
|
|
93
|
+
'defaultValue': 10
|
|
95
94
|
},
|
|
96
95
|
{
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
96
|
+
'flag': '--benchmark-input-tokens <n>',
|
|
97
|
+
'description': 'Benchmark mean input tokens',
|
|
98
|
+
'defaultValue': 550
|
|
100
99
|
},
|
|
101
100
|
{
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
101
|
+
'flag': '--benchmark-output-tokens <n>',
|
|
102
|
+
'description': 'Benchmark mean output tokens',
|
|
103
|
+
'defaultValue': 150
|
|
105
104
|
},
|
|
106
105
|
{
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
'flag': '--benchmark-streaming',
|
|
107
|
+
'description': 'Enable streaming in benchmark',
|
|
108
|
+
'defaultValue': true
|
|
110
109
|
},
|
|
111
110
|
{
|
|
112
|
-
|
|
113
|
-
|
|
111
|
+
'flag': '--benchmark-request-count <n>',
|
|
112
|
+
'description': 'Total number of benchmark requests to send'
|
|
114
113
|
},
|
|
115
114
|
{
|
|
116
|
-
|
|
117
|
-
|
|
115
|
+
'flag': '--benchmark-s3-output-path <path>',
|
|
116
|
+
'description': 'S3 URI for benchmark results output'
|
|
118
117
|
},
|
|
119
118
|
{
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
119
|
+
'flag': '--skip-prompts',
|
|
120
|
+
'description': 'Skip interactive prompts and use configuration from other sources',
|
|
121
|
+
'defaultValue': false
|
|
123
122
|
},
|
|
124
123
|
{
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
124
|
+
'flag': '--auto-prompt',
|
|
125
|
+
'description': 'Fill defaults, prompt only for missing required values',
|
|
126
|
+
'defaultValue': false
|
|
128
127
|
},
|
|
129
128
|
{
|
|
130
|
-
|
|
131
|
-
|
|
129
|
+
'flag': '--config <path>',
|
|
130
|
+
'description': 'Path to JSON configuration file'
|
|
132
131
|
},
|
|
133
132
|
{
|
|
134
|
-
|
|
135
|
-
|
|
133
|
+
'flag': '--project-dir <dir>',
|
|
134
|
+
'description': 'Output directory path'
|
|
136
135
|
},
|
|
137
136
|
{
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
137
|
+
'flag': '--force',
|
|
138
|
+
'description': 'Overwrite existing output directory without prompting',
|
|
139
|
+
'defaultValue': false
|
|
141
140
|
},
|
|
142
141
|
{
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
142
|
+
'flag': '--framework <framework>',
|
|
143
|
+
'description': 'ML framework',
|
|
144
|
+
'choices': [
|
|
145
|
+
'sklearn',
|
|
146
|
+
'xgboost',
|
|
147
|
+
'tensorflow',
|
|
148
|
+
'transformers'
|
|
150
149
|
],
|
|
151
|
-
|
|
152
|
-
},
|
|
153
|
-
{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
150
|
+
'hidden': true
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
'flag': '--model-format <format>',
|
|
154
|
+
'description': 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)',
|
|
155
|
+
'choices': [
|
|
156
|
+
'pkl',
|
|
157
|
+
'joblib',
|
|
158
|
+
'json',
|
|
159
|
+
'model',
|
|
160
|
+
'ubj',
|
|
161
|
+
'keras',
|
|
162
|
+
'h5',
|
|
163
|
+
'SavedModel'
|
|
165
164
|
]
|
|
166
165
|
},
|
|
167
166
|
{
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
167
|
+
'flag': '--model-server <server>',
|
|
168
|
+
'description': 'Model server',
|
|
169
|
+
'choices': [
|
|
170
|
+
'flask',
|
|
171
|
+
'fastapi',
|
|
172
|
+
'vllm',
|
|
173
|
+
'sglang'
|
|
175
174
|
],
|
|
176
|
-
|
|
175
|
+
'hidden': true
|
|
177
176
|
},
|
|
178
177
|
{
|
|
179
|
-
|
|
180
|
-
|
|
178
|
+
'flag': '--base-image <image>',
|
|
179
|
+
'description': 'Base container image for Dockerfile'
|
|
181
180
|
},
|
|
182
181
|
{
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
182
|
+
'flag': '--region <region>',
|
|
183
|
+
'description': 'AWS region',
|
|
184
|
+
'defaultValue': 'us-east-1'
|
|
186
185
|
},
|
|
187
186
|
{
|
|
188
|
-
|
|
189
|
-
|
|
187
|
+
'flag': '--role-arn <arn>',
|
|
188
|
+
'description': 'IAM role ARN for SageMaker execution'
|
|
190
189
|
},
|
|
191
190
|
{
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
]
|
|
191
|
+
'flag': '--build-target <target>',
|
|
192
|
+
'description': 'Build target (codebuild)',
|
|
193
|
+
'choices': [
|
|
194
|
+
'codebuild'
|
|
195
|
+
],
|
|
196
|
+
'defaultValue': 'codebuild'
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
'flag': '--codebuild-compute-type <type>',
|
|
200
|
+
'description': 'CodeBuild compute type (SMALL, MEDIUM, LARGE)',
|
|
201
|
+
'choices': [
|
|
202
|
+
'SMALL',
|
|
203
|
+
'MEDIUM',
|
|
204
|
+
'LARGE',
|
|
205
|
+
'BUILD_GENERAL1_SMALL',
|
|
206
|
+
'BUILD_GENERAL1_MEDIUM',
|
|
207
|
+
'BUILD_GENERAL1_LARGE',
|
|
208
|
+
'BUILD_GENERAL1_2XLARGE'
|
|
209
|
+
],
|
|
210
|
+
'defaultValue': 'BUILD_GENERAL1_LARGE'
|
|
197
211
|
},
|
|
198
212
|
{
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
"choices": [
|
|
202
|
-
"SMALL",
|
|
203
|
-
"MEDIUM",
|
|
204
|
-
"LARGE",
|
|
205
|
-
"BUILD_GENERAL1_SMALL",
|
|
206
|
-
"BUILD_GENERAL1_MEDIUM",
|
|
207
|
-
"BUILD_GENERAL1_LARGE",
|
|
208
|
-
"BUILD_GENERAL1_2XLARGE"
|
|
209
|
-
],
|
|
210
|
-
"defaultValue": "BUILD_GENERAL1_LARGE"
|
|
213
|
+
'flag': '--hf-token <token>',
|
|
214
|
+
'description': 'HuggingFace token (or $HF_TOKEN for env var reference)'
|
|
211
215
|
},
|
|
212
216
|
{
|
|
213
|
-
|
|
214
|
-
|
|
217
|
+
'flag': '--hf-token-arn <arn>',
|
|
218
|
+
'description': 'HuggingFace token ARN from Secrets Manager'
|
|
215
219
|
},
|
|
216
220
|
{
|
|
217
|
-
|
|
218
|
-
|
|
221
|
+
'flag': '--ngc-token <token>',
|
|
222
|
+
'description': 'NVIDIA NGC token (or $NGC_API_KEY for env var reference)'
|
|
219
223
|
},
|
|
220
224
|
{
|
|
221
|
-
|
|
222
|
-
|
|
225
|
+
'flag': '--ngc-token-arn <arn>',
|
|
226
|
+
'description': 'NVIDIA NGC token ARN from Secrets Manager'
|
|
223
227
|
},
|
|
224
228
|
{
|
|
225
|
-
|
|
226
|
-
|
|
229
|
+
'flag': '--endpoint-initial-instance-count <n>',
|
|
230
|
+
'description': 'Number of instances for the endpoint',
|
|
231
|
+
'defaultValue': 1
|
|
227
232
|
},
|
|
228
233
|
{
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
234
|
+
'flag': '--endpoint-data-capture-percent <pct>',
|
|
235
|
+
'description': 'Data capture percentage for monitoring, 0-100',
|
|
236
|
+
'defaultValue': 0
|
|
232
237
|
},
|
|
233
238
|
{
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
239
|
+
'flag': '--endpoint-variant-name <name>',
|
|
240
|
+
'description': 'Production variant name',
|
|
241
|
+
'defaultValue': 'AllTraffic'
|
|
237
242
|
},
|
|
238
243
|
{
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
"defaultValue": "AllTraffic"
|
|
244
|
+
'flag': '--endpoint-volume-size <gb>',
|
|
245
|
+
'description': 'ML storage volume size in GB'
|
|
242
246
|
},
|
|
243
247
|
{
|
|
244
|
-
|
|
245
|
-
|
|
248
|
+
'flag': '--capacity-reservation-arn <arn>',
|
|
249
|
+
'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
|
|
246
250
|
},
|
|
247
251
|
{
|
|
248
|
-
|
|
249
|
-
|
|
252
|
+
'flag': '--ic-cpu-count <n>',
|
|
253
|
+
'description': 'vCPUs allocated to the inference component'
|
|
250
254
|
},
|
|
251
255
|
{
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
256
|
+
'flag': '--ic-model-weight <weight>',
|
|
257
|
+
'description': 'Traffic routing weight, 0-1',
|
|
258
|
+
'defaultValue': 1
|
|
255
259
|
},
|
|
256
260
|
{
|
|
257
|
-
|
|
258
|
-
|
|
261
|
+
'flag': '--async-s3-output-path <path>',
|
|
262
|
+
'description': 'S3 output path for async results'
|
|
259
263
|
},
|
|
260
264
|
{
|
|
261
|
-
|
|
262
|
-
|
|
265
|
+
'flag': '--async-sns-success-topic <arn>',
|
|
266
|
+
'description': 'SNS topic ARN for success notifications'
|
|
263
267
|
},
|
|
264
268
|
{
|
|
265
|
-
|
|
266
|
-
|
|
269
|
+
'flag': '--async-sns-error-topic <arn>',
|
|
270
|
+
'description': 'SNS topic ARN for error notifications'
|
|
267
271
|
},
|
|
268
272
|
{
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
273
|
+
'flag': '--async-max-concurrent <n>',
|
|
274
|
+
'description': 'Max concurrent invocations per instance',
|
|
275
|
+
'defaultValue': 1
|
|
272
276
|
},
|
|
273
277
|
{
|
|
274
|
-
|
|
275
|
-
|
|
278
|
+
'flag': '--batch-input-path <path>',
|
|
279
|
+
'description': 'S3 input path for batch data'
|
|
276
280
|
},
|
|
277
281
|
{
|
|
278
|
-
|
|
279
|
-
|
|
282
|
+
'flag': '--batch-output-path <path>',
|
|
283
|
+
'description': 'S3 output path for batch results'
|
|
280
284
|
},
|
|
281
285
|
{
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
286
|
+
'flag': '--batch-instance-count <n>',
|
|
287
|
+
'description': 'Number of batch instances',
|
|
288
|
+
'defaultValue': 1
|
|
285
289
|
},
|
|
286
290
|
{
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
291
|
+
'flag': '--batch-split-type <type>',
|
|
292
|
+
'description': 'Input split type: Line, RecordIO, None',
|
|
293
|
+
'choices': [
|
|
294
|
+
'Line',
|
|
295
|
+
'RecordIO',
|
|
296
|
+
'None'
|
|
293
297
|
],
|
|
294
|
-
|
|
298
|
+
'defaultValue': 'Line'
|
|
295
299
|
},
|
|
296
300
|
{
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
301
|
+
'flag': '--batch-strategy <strategy>',
|
|
302
|
+
'description': 'Batch strategy: MultiRecord, SingleRecord',
|
|
303
|
+
'choices': [
|
|
304
|
+
'MultiRecord',
|
|
305
|
+
'SingleRecord'
|
|
302
306
|
],
|
|
303
|
-
|
|
307
|
+
'defaultValue': 'MultiRecord'
|
|
304
308
|
},
|
|
305
309
|
{
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
310
|
+
'flag': '--batch-join-source <source>',
|
|
311
|
+
'description': 'Join source: Input, None',
|
|
312
|
+
'choices': [
|
|
313
|
+
'Input',
|
|
314
|
+
'None'
|
|
311
315
|
],
|
|
312
|
-
|
|
316
|
+
'defaultValue': 'None'
|
|
313
317
|
},
|
|
314
318
|
{
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
319
|
+
'flag': '--batch-max-concurrent <n>',
|
|
320
|
+
'description': 'Max concurrent transforms per instance',
|
|
321
|
+
'defaultValue': 1
|
|
318
322
|
},
|
|
319
323
|
{
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
324
|
+
'flag': '--batch-max-payload <mb>',
|
|
325
|
+
'description': 'Max payload size in MB, 0-100',
|
|
326
|
+
'defaultValue': 6
|
|
323
327
|
},
|
|
324
328
|
{
|
|
325
|
-
|
|
326
|
-
|
|
329
|
+
'flag': '--hyperpod-cluster <name>',
|
|
330
|
+
'description': 'HyperPod EKS cluster name'
|
|
327
331
|
},
|
|
328
332
|
{
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
333
|
+
'flag': '--hyperpod-namespace <ns>',
|
|
334
|
+
'description': 'Kubernetes namespace',
|
|
335
|
+
'defaultValue': 'default'
|
|
332
336
|
},
|
|
333
337
|
{
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
338
|
+
'flag': '--hyperpod-replicas <count>',
|
|
339
|
+
'description': 'Number of replicas',
|
|
340
|
+
'defaultValue': 1
|
|
337
341
|
},
|
|
338
342
|
{
|
|
339
|
-
|
|
340
|
-
|
|
343
|
+
'flag': '--fsx-volume-handle <handle>',
|
|
344
|
+
'description': 'FSx for Lustre volume handle'
|
|
341
345
|
},
|
|
342
346
|
{
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
347
|
+
'flag': '--model-env <KEY=VALUE>',
|
|
348
|
+
'description': 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)',
|
|
349
|
+
'repeatable': true
|
|
346
350
|
},
|
|
347
351
|
{
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
352
|
+
'flag': '--server-env <KEY=VALUE>',
|
|
353
|
+
'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
|
|
354
|
+
'repeatable': true
|
|
351
355
|
},
|
|
352
356
|
{
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
357
|
+
'flag': '--include-sample',
|
|
358
|
+
'description': 'Include sample model code',
|
|
359
|
+
'defaultValue': true
|
|
356
360
|
},
|
|
357
361
|
{
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
362
|
+
'flag': '--include-testing',
|
|
363
|
+
'description': 'Include test suite',
|
|
364
|
+
'defaultValue': true
|
|
361
365
|
},
|
|
362
366
|
{
|
|
363
|
-
|
|
364
|
-
|
|
367
|
+
'flag': '--test-types <types>',
|
|
368
|
+
'description': 'Comma-separated test types'
|
|
365
369
|
},
|
|
366
370
|
{
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
371
|
+
'flag': '--smart',
|
|
372
|
+
'description': 'Enable smart mode (live AWS API calls for MCP servers)',
|
|
373
|
+
'defaultValue': false
|
|
370
374
|
},
|
|
371
375
|
{
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
376
|
+
'flag': '--discover',
|
|
377
|
+
'description': 'Enable discovery mode for MCP servers',
|
|
378
|
+
'defaultValue': false
|
|
375
379
|
},
|
|
376
380
|
{
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
381
|
+
'flag': '--no-validate',
|
|
382
|
+
'description': 'Skip parameter validation',
|
|
383
|
+
'defaultValue': false
|
|
380
384
|
},
|
|
381
385
|
{
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
386
|
+
'flag': '--validate-env-vars',
|
|
387
|
+
'description': 'Validate environment variables against schema',
|
|
388
|
+
'defaultValue': false
|
|
385
389
|
},
|
|
386
390
|
{
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
391
|
+
'flag': '--validate-with-docker',
|
|
392
|
+
'description': 'Validate Dockerfile builds successfully',
|
|
393
|
+
'defaultValue': false
|
|
390
394
|
},
|
|
391
395
|
{
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
396
|
+
'flag': '--offline',
|
|
397
|
+
'description': 'Run in offline mode (no network calls)',
|
|
398
|
+
'defaultValue': false
|
|
395
399
|
}
|
|
396
400
|
];
|
|
397
401
|
|
|
@@ -400,72 +404,73 @@ export const cliOptions = [
|
|
|
400
404
|
* Used by the custom help formatter in bin/cli.js.
|
|
401
405
|
*/
|
|
402
406
|
export const helpGroups = {
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
407
|
+
'--project-name': 'general',
|
|
408
|
+
'--deployment-config': 'model',
|
|
409
|
+
'--model-name': 'model',
|
|
410
|
+
'--deployment-target': 'infra',
|
|
411
|
+
'--instance-type': 'infra',
|
|
412
|
+
'--ic-gpu-count': 'ic',
|
|
413
|
+
'--ic-copy-count': 'ic',
|
|
414
|
+
'--ic-memory-size': 'ic',
|
|
415
|
+
'--enable-lora': 'features',
|
|
416
|
+
'--max-loras': 'features',
|
|
417
|
+
'--max-lora-rank': 'features',
|
|
418
|
+
'--include-benchmark': 'general',
|
|
419
|
+
'--benchmark-concurrency': 'general',
|
|
420
|
+
'--benchmark-input-tokens': 'general',
|
|
421
|
+
'--benchmark-output-tokens': 'general',
|
|
422
|
+
'--benchmark-streaming': 'general',
|
|
423
|
+
'--benchmark-request-count': 'general',
|
|
424
|
+
'--benchmark-s3-output-path': 'general',
|
|
425
|
+
'--skip-prompts': 'general',
|
|
426
|
+
'--auto-prompt': 'general',
|
|
427
|
+
'--config': 'general',
|
|
428
|
+
'--project-dir': 'general',
|
|
429
|
+
'--force': 'general',
|
|
430
|
+
'--framework': 'model',
|
|
431
|
+
'--model-format': 'model',
|
|
432
|
+
'--model-server': 'model',
|
|
433
|
+
'--base-image': 'infra',
|
|
434
|
+
'--region': 'infra',
|
|
435
|
+
'--role-arn': 'infra',
|
|
436
|
+
'--build-target': 'infra',
|
|
437
|
+
'--codebuild-compute-type': 'infra',
|
|
438
|
+
'--hf-token': 'auth',
|
|
439
|
+
'--hf-token-arn': 'auth',
|
|
440
|
+
'--ngc-token': 'auth',
|
|
441
|
+
'--ngc-token-arn': 'auth',
|
|
442
|
+
'--endpoint-initial-instance-count': 'endpoint',
|
|
443
|
+
'--endpoint-data-capture-percent': 'endpoint',
|
|
444
|
+
'--endpoint-variant-name': 'endpoint',
|
|
445
|
+
'--endpoint-volume-size': 'endpoint',
|
|
446
|
+
'--capacity-reservation-arn': 'endpoint',
|
|
447
|
+
'--ic-cpu-count': 'ic',
|
|
448
|
+
'--ic-model-weight': 'ic',
|
|
449
|
+
'--async-s3-output-path': 'async',
|
|
450
|
+
'--async-sns-success-topic': 'async',
|
|
451
|
+
'--async-sns-error-topic': 'async',
|
|
452
|
+
'--async-max-concurrent': 'async',
|
|
453
|
+
'--batch-input-path': 'batch',
|
|
454
|
+
'--batch-output-path': 'batch',
|
|
455
|
+
'--batch-instance-count': 'batch',
|
|
456
|
+
'--batch-split-type': 'batch',
|
|
457
|
+
'--batch-strategy': 'batch',
|
|
458
|
+
'--batch-join-source': 'batch',
|
|
459
|
+
'--batch-max-concurrent': 'batch',
|
|
460
|
+
'--batch-max-payload': 'batch',
|
|
461
|
+
'--hyperpod-cluster': 'hyperpod',
|
|
462
|
+
'--hyperpod-namespace': 'hyperpod',
|
|
463
|
+
'--hyperpod-replicas': 'hyperpod',
|
|
464
|
+
'--fsx-volume-handle': 'hyperpod',
|
|
465
|
+
'--model-env': 'env',
|
|
466
|
+
'--server-env': 'env',
|
|
467
|
+
'--include-sample': 'features',
|
|
468
|
+
'--include-testing': 'features',
|
|
469
|
+
'--test-types': 'features',
|
|
470
|
+
'--smart': 'mcp',
|
|
471
|
+
'--discover': 'mcp',
|
|
472
|
+
'--no-validate': 'validation',
|
|
473
|
+
'--validate-env-vars': 'validation',
|
|
474
|
+
'--validate-with-docker': 'validation',
|
|
475
|
+
'--offline': 'validation'
|
|
471
476
|
};
|