@aws/ml-container-creator 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +31 -137
- package/package.json +5 -2
- package/src/app.js +5 -0
- package/src/lib/config-manager.js +17 -0
- package/src/lib/generated/cli-options.js +467 -0
- package/src/lib/generated/validation-rules.js +202 -0
- package/templates/code/serve +5 -134
- package/templates/code/serve.d/lmi.ejs +19 -0
- package/templates/code/serve.d/sglang.ejs +47 -0
- package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
- package/templates/code/serve.d/vllm.ejs +48 -0
- package/templates/do/clean +1 -1387
- package/templates/do/clean.d/async-inference.ejs +508 -0
- package/templates/do/clean.d/batch-transform.ejs +512 -0
- package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
- package/templates/do/clean.d/managed-inference.ejs +1043 -0
- package/templates/do/deploy +1 -1766
- package/templates/do/deploy.d/async-inference.ejs +501 -0
- package/templates/do/deploy.d/batch-transform.ejs +529 -0
- package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
- package/templates/do/deploy.d/managed-inference.ejs +726 -0
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
|
|
2
|
+
// Source: config/parameter-schema-v2.json
|
|
3
|
+
// Generated: 2026-05-21T22:50:37.817Z
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* CLI option definitions derived from parameter-schema-v2.json.
|
|
7
|
+
* Each entry can be registered with Commander via:
|
|
8
|
+
* new Option(entry.flag, entry.description)
|
|
9
|
+
*/
|
|
10
|
+
export const cliOptions = [
|
|
11
|
+
{
|
|
12
|
+
"flag": "--project-name <name>",
|
|
13
|
+
"description": "Name for the generated project"
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
"flag": "--deployment-config <config>",
|
|
17
|
+
"description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
|
|
18
|
+
"choices": [
|
|
19
|
+
"http-flask",
|
|
20
|
+
"http-fastapi",
|
|
21
|
+
"transformers-vllm",
|
|
22
|
+
"transformers-sglang",
|
|
23
|
+
"transformers-tensorrt-llm",
|
|
24
|
+
"transformers-lmi",
|
|
25
|
+
"transformers-djl",
|
|
26
|
+
"triton-fil",
|
|
27
|
+
"triton-onnxruntime",
|
|
28
|
+
"triton-tensorflow",
|
|
29
|
+
"triton-pytorch",
|
|
30
|
+
"triton-vllm",
|
|
31
|
+
"triton-tensorrtllm",
|
|
32
|
+
"triton-python",
|
|
33
|
+
"diffusors-vllm-omni",
|
|
34
|
+
"marketplace"
|
|
35
|
+
]
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"flag": "--model-name <name>",
|
|
39
|
+
"description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"flag": "--deployment-target <target>",
|
|
43
|
+
"description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
|
|
44
|
+
"choices": [
|
|
45
|
+
"managed-inference",
|
|
46
|
+
"realtime-inference",
|
|
47
|
+
"async-inference",
|
|
48
|
+
"batch-transform",
|
|
49
|
+
"hyperpod-eks"
|
|
50
|
+
],
|
|
51
|
+
"defaultValue": "managed-inference"
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"flag": "--instance-type <type>",
|
|
55
|
+
"description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"flag": "--ic-gpu-count <n>",
|
|
59
|
+
"description": "GPUs allocated to the inference component"
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"flag": "--ic-copy-count <n>",
|
|
63
|
+
"description": "Number of inference component copies",
|
|
64
|
+
"defaultValue": 1
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"flag": "--ic-memory-size <mb>",
|
|
68
|
+
"description": "Memory in MB for the inference component",
|
|
69
|
+
"defaultValue": 1024
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"flag": "--enable-lora",
|
|
73
|
+
"description": "Enable LoRA adapter serving",
|
|
74
|
+
"defaultValue": false
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"flag": "--max-loras <n>",
|
|
78
|
+
"description": "Maximum concurrent LoRA adapters in GPU memory",
|
|
79
|
+
"defaultValue": 30
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"flag": "--max-lora-rank <n>",
|
|
83
|
+
"description": "Maximum LoRA rank",
|
|
84
|
+
"defaultValue": 64
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"flag": "--include-benchmark",
|
|
88
|
+
"description": "Include SageMaker AI Benchmarking",
|
|
89
|
+
"defaultValue": false
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"flag": "--benchmark-concurrency <n>",
|
|
93
|
+
"description": "Benchmark concurrent requests",
|
|
94
|
+
"defaultValue": 10
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"flag": "--benchmark-input-tokens <n>",
|
|
98
|
+
"description": "Benchmark mean input tokens",
|
|
99
|
+
"defaultValue": 550
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"flag": "--benchmark-output-tokens <n>",
|
|
103
|
+
"description": "Benchmark mean output tokens",
|
|
104
|
+
"defaultValue": 150
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"flag": "--benchmark-streaming",
|
|
108
|
+
"description": "Enable streaming in benchmark",
|
|
109
|
+
"defaultValue": true
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"flag": "--benchmark-request-count <n>",
|
|
113
|
+
"description": "Total number of benchmark requests to send"
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
"flag": "--benchmark-s3-output-path <path>",
|
|
117
|
+
"description": "S3 URI for benchmark results output"
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"flag": "--skip-prompts",
|
|
121
|
+
"description": "Skip interactive prompts and use configuration from other sources",
|
|
122
|
+
"defaultValue": false
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"flag": "--auto-prompt",
|
|
126
|
+
"description": "Fill defaults, prompt only for missing required values",
|
|
127
|
+
"defaultValue": false
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"flag": "--config <path>",
|
|
131
|
+
"description": "Path to JSON configuration file"
|
|
132
|
+
},
|
|
133
|
+
{
|
|
134
|
+
"flag": "--project-dir <dir>",
|
|
135
|
+
"description": "Output directory path"
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
"flag": "--force",
|
|
139
|
+
"description": "Overwrite existing output directory without prompting",
|
|
140
|
+
"defaultValue": false
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
"flag": "--framework <framework>",
|
|
144
|
+
"description": "ML framework",
|
|
145
|
+
"choices": [
|
|
146
|
+
"sklearn",
|
|
147
|
+
"xgboost",
|
|
148
|
+
"tensorflow",
|
|
149
|
+
"transformers"
|
|
150
|
+
],
|
|
151
|
+
"hidden": true
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
"flag": "--model-format <format>",
|
|
155
|
+
"description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
|
|
156
|
+
"choices": [
|
|
157
|
+
"pkl",
|
|
158
|
+
"joblib",
|
|
159
|
+
"json",
|
|
160
|
+
"model",
|
|
161
|
+
"ubj",
|
|
162
|
+
"keras",
|
|
163
|
+
"h5",
|
|
164
|
+
"SavedModel"
|
|
165
|
+
]
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"flag": "--model-server <server>",
|
|
169
|
+
"description": "Model server",
|
|
170
|
+
"choices": [
|
|
171
|
+
"flask",
|
|
172
|
+
"fastapi",
|
|
173
|
+
"vllm",
|
|
174
|
+
"sglang"
|
|
175
|
+
],
|
|
176
|
+
"hidden": true
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"flag": "--base-image <image>",
|
|
180
|
+
"description": "Base container image for Dockerfile"
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"flag": "--region <region>",
|
|
184
|
+
"description": "AWS region",
|
|
185
|
+
"defaultValue": "us-east-1"
|
|
186
|
+
},
|
|
187
|
+
{
|
|
188
|
+
"flag": "--role-arn <arn>",
|
|
189
|
+
"description": "IAM role ARN for SageMaker execution"
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"flag": "--build-target <target>",
|
|
193
|
+
"description": "Build target (codebuild)",
|
|
194
|
+
"choices": [
|
|
195
|
+
"codebuild"
|
|
196
|
+
]
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"flag": "--codebuild-compute-type <type>",
|
|
200
|
+
"description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
|
|
201
|
+
"choices": [
|
|
202
|
+
"SMALL",
|
|
203
|
+
"MEDIUM",
|
|
204
|
+
"LARGE"
|
|
205
|
+
],
|
|
206
|
+
"defaultValue": "LARGE"
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
"flag": "--hf-token <token>",
|
|
210
|
+
"description": "HuggingFace token (or $HF_TOKEN for env var reference)"
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
"flag": "--hf-token-arn <arn>",
|
|
214
|
+
"description": "HuggingFace token ARN from Secrets Manager"
|
|
215
|
+
},
|
|
216
|
+
{
|
|
217
|
+
"flag": "--ngc-token <token>",
|
|
218
|
+
"description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
"flag": "--ngc-token-arn <arn>",
|
|
222
|
+
"description": "NVIDIA NGC token ARN from Secrets Manager"
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
"flag": "--endpoint-initial-instance-count <n>",
|
|
226
|
+
"description": "Number of instances for the endpoint",
|
|
227
|
+
"defaultValue": 1
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
"flag": "--endpoint-data-capture-percent <pct>",
|
|
231
|
+
"description": "Data capture percentage for monitoring, 0-100",
|
|
232
|
+
"defaultValue": 0
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
"flag": "--endpoint-variant-name <name>",
|
|
236
|
+
"description": "Production variant name",
|
|
237
|
+
"defaultValue": "AllTraffic"
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
"flag": "--endpoint-volume-size <gb>",
|
|
241
|
+
"description": "ML storage volume size in GB"
|
|
242
|
+
},
|
|
243
|
+
{
|
|
244
|
+
"flag": "--ic-cpu-count <n>",
|
|
245
|
+
"description": "vCPUs allocated to the inference component"
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
"flag": "--ic-model-weight <weight>",
|
|
249
|
+
"description": "Traffic routing weight, 0-1",
|
|
250
|
+
"defaultValue": 1
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"flag": "--async-s3-output-path <path>",
|
|
254
|
+
"description": "S3 output path for async results"
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"flag": "--async-sns-success-topic <arn>",
|
|
258
|
+
"description": "SNS topic ARN for success notifications"
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
"flag": "--async-sns-error-topic <arn>",
|
|
262
|
+
"description": "SNS topic ARN for error notifications"
|
|
263
|
+
},
|
|
264
|
+
{
|
|
265
|
+
"flag": "--async-max-concurrent <n>",
|
|
266
|
+
"description": "Max concurrent invocations per instance",
|
|
267
|
+
"defaultValue": 1
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
"flag": "--batch-input-path <path>",
|
|
271
|
+
"description": "S3 input path for batch data"
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"flag": "--batch-output-path <path>",
|
|
275
|
+
"description": "S3 output path for batch results"
|
|
276
|
+
},
|
|
277
|
+
{
|
|
278
|
+
"flag": "--batch-instance-count <n>",
|
|
279
|
+
"description": "Number of batch instances",
|
|
280
|
+
"defaultValue": 1
|
|
281
|
+
},
|
|
282
|
+
{
|
|
283
|
+
"flag": "--batch-split-type <type>",
|
|
284
|
+
"description": "Input split type: Line, RecordIO, None",
|
|
285
|
+
"choices": [
|
|
286
|
+
"Line",
|
|
287
|
+
"RecordIO",
|
|
288
|
+
"None"
|
|
289
|
+
],
|
|
290
|
+
"defaultValue": "Line"
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
"flag": "--batch-strategy <strategy>",
|
|
294
|
+
"description": "Batch strategy: MultiRecord, SingleRecord",
|
|
295
|
+
"choices": [
|
|
296
|
+
"MultiRecord",
|
|
297
|
+
"SingleRecord"
|
|
298
|
+
],
|
|
299
|
+
"defaultValue": "MultiRecord"
|
|
300
|
+
},
|
|
301
|
+
{
|
|
302
|
+
"flag": "--batch-join-source <source>",
|
|
303
|
+
"description": "Join source: Input, None",
|
|
304
|
+
"choices": [
|
|
305
|
+
"Input",
|
|
306
|
+
"None"
|
|
307
|
+
],
|
|
308
|
+
"defaultValue": "None"
|
|
309
|
+
},
|
|
310
|
+
{
|
|
311
|
+
"flag": "--batch-max-concurrent <n>",
|
|
312
|
+
"description": "Max concurrent transforms per instance",
|
|
313
|
+
"defaultValue": 1
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
"flag": "--batch-max-payload <mb>",
|
|
317
|
+
"description": "Max payload size in MB, 0-100",
|
|
318
|
+
"defaultValue": 6
|
|
319
|
+
},
|
|
320
|
+
{
|
|
321
|
+
"flag": "--hyperpod-cluster <name>",
|
|
322
|
+
"description": "HyperPod EKS cluster name"
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
"flag": "--hyperpod-namespace <ns>",
|
|
326
|
+
"description": "Kubernetes namespace",
|
|
327
|
+
"defaultValue": "default"
|
|
328
|
+
},
|
|
329
|
+
{
|
|
330
|
+
"flag": "--hyperpod-replicas <count>",
|
|
331
|
+
"description": "Number of replicas",
|
|
332
|
+
"defaultValue": 1
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
"flag": "--fsx-volume-handle <handle>",
|
|
336
|
+
"description": "FSx for Lustre volume handle"
|
|
337
|
+
},
|
|
338
|
+
{
|
|
339
|
+
"flag": "--model-env <KEY=VALUE>",
|
|
340
|
+
"description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
|
|
341
|
+
"repeatable": true
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
"flag": "--server-env <KEY=VALUE>",
|
|
345
|
+
"description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
|
|
346
|
+
"repeatable": true
|
|
347
|
+
},
|
|
348
|
+
{
|
|
349
|
+
"flag": "--include-sample",
|
|
350
|
+
"description": "Include sample model code",
|
|
351
|
+
"defaultValue": true
|
|
352
|
+
},
|
|
353
|
+
{
|
|
354
|
+
"flag": "--include-testing",
|
|
355
|
+
"description": "Include test suite",
|
|
356
|
+
"defaultValue": true
|
|
357
|
+
},
|
|
358
|
+
{
|
|
359
|
+
"flag": "--test-types <types>",
|
|
360
|
+
"description": "Comma-separated test types"
|
|
361
|
+
},
|
|
362
|
+
{
|
|
363
|
+
"flag": "--smart",
|
|
364
|
+
"description": "Enable smart mode (live AWS API calls for MCP servers)",
|
|
365
|
+
"defaultValue": false
|
|
366
|
+
},
|
|
367
|
+
{
|
|
368
|
+
"flag": "--discover",
|
|
369
|
+
"description": "Enable discovery mode for MCP servers",
|
|
370
|
+
"defaultValue": false
|
|
371
|
+
},
|
|
372
|
+
{
|
|
373
|
+
"flag": "--no-validate",
|
|
374
|
+
"description": "Skip parameter validation",
|
|
375
|
+
"defaultValue": false
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
"flag": "--validate-env-vars",
|
|
379
|
+
"description": "Validate environment variables against schema",
|
|
380
|
+
"defaultValue": false
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
"flag": "--validate-with-docker",
|
|
384
|
+
"description": "Validate Dockerfile builds successfully",
|
|
385
|
+
"defaultValue": false
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
"flag": "--offline",
|
|
389
|
+
"description": "Run in offline mode (no network calls)",
|
|
390
|
+
"defaultValue": false
|
|
391
|
+
}
|
|
392
|
+
];
|
|
393
|
+
|
|
394
|
+
/**
|
|
395
|
+
* Maps CLI flags to help section groups.
|
|
396
|
+
* Used by the custom help formatter in bin/cli.js.
|
|
397
|
+
*/
|
|
398
|
+
export const helpGroups = {
|
|
399
|
+
"--project-name": "general",
|
|
400
|
+
"--deployment-config": "model",
|
|
401
|
+
"--model-name": "model",
|
|
402
|
+
"--deployment-target": "infra",
|
|
403
|
+
"--instance-type": "infra",
|
|
404
|
+
"--ic-gpu-count": "ic",
|
|
405
|
+
"--ic-copy-count": "ic",
|
|
406
|
+
"--ic-memory-size": "ic",
|
|
407
|
+
"--enable-lora": "features",
|
|
408
|
+
"--max-loras": "features",
|
|
409
|
+
"--max-lora-rank": "features",
|
|
410
|
+
"--include-benchmark": "general",
|
|
411
|
+
"--benchmark-concurrency": "general",
|
|
412
|
+
"--benchmark-input-tokens": "general",
|
|
413
|
+
"--benchmark-output-tokens": "general",
|
|
414
|
+
"--benchmark-streaming": "general",
|
|
415
|
+
"--benchmark-request-count": "general",
|
|
416
|
+
"--benchmark-s3-output-path": "general",
|
|
417
|
+
"--skip-prompts": "general",
|
|
418
|
+
"--auto-prompt": "general",
|
|
419
|
+
"--config": "general",
|
|
420
|
+
"--project-dir": "general",
|
|
421
|
+
"--force": "general",
|
|
422
|
+
"--framework": "model",
|
|
423
|
+
"--model-format": "model",
|
|
424
|
+
"--model-server": "model",
|
|
425
|
+
"--base-image": "infra",
|
|
426
|
+
"--region": "infra",
|
|
427
|
+
"--role-arn": "infra",
|
|
428
|
+
"--build-target": "infra",
|
|
429
|
+
"--codebuild-compute-type": "infra",
|
|
430
|
+
"--hf-token": "auth",
|
|
431
|
+
"--hf-token-arn": "auth",
|
|
432
|
+
"--ngc-token": "auth",
|
|
433
|
+
"--ngc-token-arn": "auth",
|
|
434
|
+
"--endpoint-initial-instance-count": "endpoint",
|
|
435
|
+
"--endpoint-data-capture-percent": "endpoint",
|
|
436
|
+
"--endpoint-variant-name": "endpoint",
|
|
437
|
+
"--endpoint-volume-size": "endpoint",
|
|
438
|
+
"--ic-cpu-count": "ic",
|
|
439
|
+
"--ic-model-weight": "ic",
|
|
440
|
+
"--async-s3-output-path": "async",
|
|
441
|
+
"--async-sns-success-topic": "async",
|
|
442
|
+
"--async-sns-error-topic": "async",
|
|
443
|
+
"--async-max-concurrent": "async",
|
|
444
|
+
"--batch-input-path": "batch",
|
|
445
|
+
"--batch-output-path": "batch",
|
|
446
|
+
"--batch-instance-count": "batch",
|
|
447
|
+
"--batch-split-type": "batch",
|
|
448
|
+
"--batch-strategy": "batch",
|
|
449
|
+
"--batch-join-source": "batch",
|
|
450
|
+
"--batch-max-concurrent": "batch",
|
|
451
|
+
"--batch-max-payload": "batch",
|
|
452
|
+
"--hyperpod-cluster": "hyperpod",
|
|
453
|
+
"--hyperpod-namespace": "hyperpod",
|
|
454
|
+
"--hyperpod-replicas": "hyperpod",
|
|
455
|
+
"--fsx-volume-handle": "hyperpod",
|
|
456
|
+
"--model-env": "env",
|
|
457
|
+
"--server-env": "env",
|
|
458
|
+
"--include-sample": "features",
|
|
459
|
+
"--include-testing": "features",
|
|
460
|
+
"--test-types": "features",
|
|
461
|
+
"--smart": "mcp",
|
|
462
|
+
"--discover": "mcp",
|
|
463
|
+
"--no-validate": "validation",
|
|
464
|
+
"--validate-env-vars": "validation",
|
|
465
|
+
"--validate-with-docker": "validation",
|
|
466
|
+
"--offline": "validation"
|
|
467
|
+
};
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
// AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
|
|
2
|
+
// Source: config/parameter-schema-v2.json
|
|
3
|
+
// Generated: 2026-05-21T23:33:58.719Z
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Validation rules derived from parameter-schema-v2.json.
|
|
7
|
+
* Each key maps to a function that returns null (valid) or an error string.
|
|
8
|
+
*/
|
|
9
|
+
export const validationRules = {
|
|
10
|
+
"projectName": (value) => {
|
|
11
|
+
if (value.length < 2) return `projectName must be at least 2 characters`;
|
|
12
|
+
if (value.length > 63) return `projectName must be at most 63 characters`;
|
|
13
|
+
if (!new RegExp("^[a-z0-9][a-z0-9-]*[a-z0-9]$").test(value)) return `projectName does not match required pattern`;
|
|
14
|
+
return null;
|
|
15
|
+
},
|
|
16
|
+
"deploymentConfig": (value) => {
|
|
17
|
+
if (!["http-flask","http-fastapi","transformers-vllm","transformers-sglang","transformers-tensorrt-llm","transformers-lmi","transformers-djl","triton-fil","triton-onnxruntime","triton-tensorflow","triton-pytorch","triton-vllm","triton-tensorrtllm","triton-python","diffusors-vllm-omni","marketplace"].includes(value)) return `Invalid value "${value}" for deploymentConfig. Valid: http-flask, http-fastapi, transformers-vllm, transformers-sglang, transformers-tensorrt-llm, transformers-lmi, transformers-djl, triton-fil, triton-onnxruntime, triton-tensorflow, triton-pytorch, triton-vllm, triton-tensorrtllm, triton-python, diffusors-vllm-omni, marketplace`;
|
|
18
|
+
return null;
|
|
19
|
+
},
|
|
20
|
+
"modelName": (value) => {
|
|
21
|
+
if (value.length < 1) return `modelName must be at least 1 characters`;
|
|
22
|
+
return null;
|
|
23
|
+
},
|
|
24
|
+
"deploymentTarget": (value) => {
|
|
25
|
+
if (!["managed-inference","realtime-inference","async-inference","batch-transform","hyperpod-eks"].includes(value)) return `Invalid value "${value}" for deploymentTarget. Valid: managed-inference, realtime-inference, async-inference, batch-transform, hyperpod-eks`;
|
|
26
|
+
return null;
|
|
27
|
+
},
|
|
28
|
+
"instanceType": (value) => {
|
|
29
|
+
if (!new RegExp("^ml\\.[a-z0-9]+\\.[a-z0-9]+$").test(value)) return `instanceType does not match required pattern`;
|
|
30
|
+
return null;
|
|
31
|
+
},
|
|
32
|
+
"icGpuCount": (value) => {
|
|
33
|
+
if (value < 0) return `icGpuCount must be >= 0, got ${value}`;
|
|
34
|
+
if (value > 8) return `icGpuCount must be <= 8, got ${value}`;
|
|
35
|
+
return null;
|
|
36
|
+
},
|
|
37
|
+
"icCopyCount": (value) => {
|
|
38
|
+
if (value < 0) return `icCopyCount must be >= 0, got ${value}`;
|
|
39
|
+
if (value > 100) return `icCopyCount must be <= 100, got ${value}`;
|
|
40
|
+
return null;
|
|
41
|
+
},
|
|
42
|
+
"icMemorySize": (value) => {
|
|
43
|
+
if (value < 128) return `icMemorySize must be >= 128, got ${value}`;
|
|
44
|
+
if (value > 3145728) return `icMemorySize must be <= 3145728, got ${value}`;
|
|
45
|
+
return null;
|
|
46
|
+
},
|
|
47
|
+
"maxLoras": (value) => {
|
|
48
|
+
if (value < 1) return `maxLoras must be >= 1, got ${value}`;
|
|
49
|
+
if (value > 256) return `maxLoras must be <= 256, got ${value}`;
|
|
50
|
+
return null;
|
|
51
|
+
},
|
|
52
|
+
"maxLoraRank": (value) => {
|
|
53
|
+
if (value < 8) return `maxLoraRank must be >= 8, got ${value}`;
|
|
54
|
+
if (value > 512) return `maxLoraRank must be <= 512, got ${value}`;
|
|
55
|
+
return null;
|
|
56
|
+
},
|
|
57
|
+
"benchmarkConcurrency": (value) => {
|
|
58
|
+
if (value < 1) return `benchmarkConcurrency must be >= 1, got ${value}`;
|
|
59
|
+
if (value > 1000) return `benchmarkConcurrency must be <= 1000, got ${value}`;
|
|
60
|
+
return null;
|
|
61
|
+
},
|
|
62
|
+
"benchmarkInputTokens": (value) => {
|
|
63
|
+
if (value < 1) return `benchmarkInputTokens must be >= 1, got ${value}`;
|
|
64
|
+
if (value > 128000) return `benchmarkInputTokens must be <= 128000, got ${value}`;
|
|
65
|
+
return null;
|
|
66
|
+
},
|
|
67
|
+
"benchmarkOutputTokens": (value) => {
|
|
68
|
+
if (value < 1) return `benchmarkOutputTokens must be >= 1, got ${value}`;
|
|
69
|
+
if (value > 128000) return `benchmarkOutputTokens must be <= 128000, got ${value}`;
|
|
70
|
+
return null;
|
|
71
|
+
},
|
|
72
|
+
"benchmarkRequestCount": (value) => {
|
|
73
|
+
if (value < 1) return `benchmarkRequestCount must be >= 1, got ${value}`;
|
|
74
|
+
return null;
|
|
75
|
+
},
|
|
76
|
+
"benchmarkS3OutputPath": (value) => {
|
|
77
|
+
if (!new RegExp("^s3://").test(value)) return `benchmarkS3OutputPath does not match required pattern`;
|
|
78
|
+
return null;
|
|
79
|
+
},
|
|
80
|
+
"framework": (value) => {
|
|
81
|
+
if (!["sklearn","xgboost","tensorflow","transformers"].includes(value)) return `Invalid value "${value}" for framework. Valid: sklearn, xgboost, tensorflow, transformers`;
|
|
82
|
+
return null;
|
|
83
|
+
},
|
|
84
|
+
"modelFormat": (value) => {
|
|
85
|
+
if (!["pkl","joblib","json","model","ubj","keras","h5","SavedModel"].includes(value)) return `Invalid value "${value}" for modelFormat. Valid: pkl, joblib, json, model, ubj, keras, h5, SavedModel`;
|
|
86
|
+
return null;
|
|
87
|
+
},
|
|
88
|
+
"modelServer": (value) => {
|
|
89
|
+
if (!["flask","fastapi","vllm","sglang"].includes(value)) return `Invalid value "${value}" for modelServer. Valid: flask, fastapi, vllm, sglang`;
|
|
90
|
+
return null;
|
|
91
|
+
},
|
|
92
|
+
"region": (value) => {
|
|
93
|
+
if (!new RegExp("^[a-z]{2}-[a-z]+-\\d+$").test(value)) return `region does not match required pattern`;
|
|
94
|
+
return null;
|
|
95
|
+
},
|
|
96
|
+
"roleArn": (value) => {
|
|
97
|
+
if (!new RegExp("^arn:aws:iam::").test(value)) return `roleArn does not match required pattern`;
|
|
98
|
+
return null;
|
|
99
|
+
},
|
|
100
|
+
"buildTarget": (value) => {
|
|
101
|
+
if (!["codebuild"].includes(value)) return `Invalid value "${value}" for buildTarget. Valid: codebuild`;
|
|
102
|
+
return null;
|
|
103
|
+
},
|
|
104
|
+
"codebuildComputeType": (value) => {
|
|
105
|
+
if (!["SMALL","MEDIUM","LARGE","BUILD_GENERAL1_SMALL","BUILD_GENERAL1_MEDIUM","BUILD_GENERAL1_LARGE","BUILD_GENERAL1_2XLARGE"].includes(value)) return `Invalid value "${value}" for codebuildComputeType. Valid: SMALL, MEDIUM, LARGE, BUILD_GENERAL1_SMALL, BUILD_GENERAL1_MEDIUM, BUILD_GENERAL1_LARGE, BUILD_GENERAL1_2XLARGE`;
|
|
106
|
+
return null;
|
|
107
|
+
},
|
|
108
|
+
"hfTokenArn": (value) => {
|
|
109
|
+
if (!new RegExp("^arn:aws:secretsmanager:").test(value)) return `hfTokenArn does not match required pattern`;
|
|
110
|
+
return null;
|
|
111
|
+
},
|
|
112
|
+
"ngcTokenArn": (value) => {
|
|
113
|
+
if (!new RegExp("^arn:aws:secretsmanager:").test(value)) return `ngcTokenArn does not match required pattern`;
|
|
114
|
+
return null;
|
|
115
|
+
},
|
|
116
|
+
"endpointInitialInstanceCount": (value) => {
|
|
117
|
+
if (value < 1) return `endpointInitialInstanceCount must be >= 1, got ${value}`;
|
|
118
|
+
if (value > 100) return `endpointInitialInstanceCount must be <= 100, got ${value}`;
|
|
119
|
+
return null;
|
|
120
|
+
},
|
|
121
|
+
"endpointDataCapturePercent": (value) => {
|
|
122
|
+
if (value < 0) return `endpointDataCapturePercent must be >= 0, got ${value}`;
|
|
123
|
+
if (value > 100) return `endpointDataCapturePercent must be <= 100, got ${value}`;
|
|
124
|
+
return null;
|
|
125
|
+
},
|
|
126
|
+
"endpointVariantName": (value) => {
|
|
127
|
+
if (!new RegExp("^[a-zA-Z0-9]").test(value)) return `endpointVariantName does not match required pattern`;
|
|
128
|
+
return null;
|
|
129
|
+
},
|
|
130
|
+
"endpointVolumeSize": (value) => {
|
|
131
|
+
if (value < 1) return `endpointVolumeSize must be >= 1, got ${value}`;
|
|
132
|
+
if (value > 16384) return `endpointVolumeSize must be <= 16384, got ${value}`;
|
|
133
|
+
return null;
|
|
134
|
+
},
|
|
135
|
+
"icCpuCount": (value) => {
|
|
136
|
+
if (value < 0.25) return `icCpuCount must be >= 0.25, got ${value}`;
|
|
137
|
+
if (value > 768) return `icCpuCount must be <= 768, got ${value}`;
|
|
138
|
+
return null;
|
|
139
|
+
},
|
|
140
|
+
"icModelWeight": (value) => {
|
|
141
|
+
if (value < 0) return `icModelWeight must be >= 0, got ${value}`;
|
|
142
|
+
if (value > 1) return `icModelWeight must be <= 1, got ${value}`;
|
|
143
|
+
return null;
|
|
144
|
+
},
|
|
145
|
+
"asyncS3OutputPath": (value) => {
|
|
146
|
+
if (!new RegExp("^s3://").test(value)) return `asyncS3OutputPath does not match required pattern`;
|
|
147
|
+
return null;
|
|
148
|
+
},
|
|
149
|
+
"asyncSnsSuccessTopic": (value) => {
|
|
150
|
+
if (!new RegExp("^arn:aws:sns:").test(value)) return `asyncSnsSuccessTopic does not match required pattern`;
|
|
151
|
+
return null;
|
|
152
|
+
},
|
|
153
|
+
"asyncSnsErrorTopic": (value) => {
|
|
154
|
+
if (!new RegExp("^arn:aws:sns:").test(value)) return `asyncSnsErrorTopic does not match required pattern`;
|
|
155
|
+
return null;
|
|
156
|
+
},
|
|
157
|
+
"asyncMaxConcurrent": (value) => {
|
|
158
|
+
if (value < 1) return `asyncMaxConcurrent must be >= 1, got ${value}`;
|
|
159
|
+
if (value > 100) return `asyncMaxConcurrent must be <= 100, got ${value}`;
|
|
160
|
+
return null;
|
|
161
|
+
},
|
|
162
|
+
"batchInputPath": (value) => {
|
|
163
|
+
if (!new RegExp("^s3://").test(value)) return `batchInputPath does not match required pattern`;
|
|
164
|
+
return null;
|
|
165
|
+
},
|
|
166
|
+
"batchOutputPath": (value) => {
|
|
167
|
+
if (!new RegExp("^s3://").test(value)) return `batchOutputPath does not match required pattern`;
|
|
168
|
+
return null;
|
|
169
|
+
},
|
|
170
|
+
"batchInstanceCount": (value) => {
|
|
171
|
+
if (value < 1) return `batchInstanceCount must be >= 1, got ${value}`;
|
|
172
|
+
if (value > 100) return `batchInstanceCount must be <= 100, got ${value}`;
|
|
173
|
+
return null;
|
|
174
|
+
},
|
|
175
|
+
"batchSplitType": (value) => {
|
|
176
|
+
if (!["Line","RecordIO","None"].includes(value)) return `Invalid value "${value}" for batchSplitType. Valid: Line, RecordIO, None`;
|
|
177
|
+
return null;
|
|
178
|
+
},
|
|
179
|
+
"batchStrategy": (value) => {
|
|
180
|
+
if (!["MultiRecord","SingleRecord"].includes(value)) return `Invalid value "${value}" for batchStrategy. Valid: MultiRecord, SingleRecord`;
|
|
181
|
+
return null;
|
|
182
|
+
},
|
|
183
|
+
"batchJoinSource": (value) => {
|
|
184
|
+
if (!["Input","None"].includes(value)) return `Invalid value "${value}" for batchJoinSource. Valid: Input, None`;
|
|
185
|
+
return null;
|
|
186
|
+
},
|
|
187
|
+
"batchMaxConcurrent": (value) => {
|
|
188
|
+
if (value < 1) return `batchMaxConcurrent must be >= 1, got ${value}`;
|
|
189
|
+
return null;
|
|
190
|
+
},
|
|
191
|
+
"batchMaxPayload": (value) => {
|
|
192
|
+
if (value < 0) return `batchMaxPayload must be >= 0, got ${value}`;
|
|
193
|
+
if (value > 100) return `batchMaxPayload must be <= 100, got ${value}`;
|
|
194
|
+
return null;
|
|
195
|
+
},
|
|
196
|
+
"hyperpodReplicas": (value) => {
|
|
197
|
+
if (value < 1) return `hyperpodReplicas must be >= 1, got ${value}`;
|
|
198
|
+
return null;
|
|
199
|
+
},
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
// 43 parameters have validation rules
|