@aws/ml-container-creator 0.9.1 → 0.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/LICENSE-THIRD-PARTY +9304 -0
  2. package/bin/cli.js +2 -0
  3. package/config/bootstrap-e2e-stack.json +341 -0
  4. package/config/bootstrap-stack.json +40 -3
  5. package/config/parameter-schema-v2.json +2049 -0
  6. package/config/tune-catalog.json +1781 -0
  7. package/infra/ci-harness/buildspec.yml +1 -0
  8. package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
  9. package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
  10. package/infra/ci-harness/lib/ci-harness-stack.ts +837 -7
  11. package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
  12. package/package.json +53 -68
  13. package/servers/base-image-picker/index.js +121 -121
  14. package/servers/e2e-status/index.js +297 -0
  15. package/servers/e2e-status/manifest.json +14 -0
  16. package/servers/e2e-status/package.json +15 -0
  17. package/servers/endpoint-picker/LICENSE +202 -0
  18. package/servers/endpoint-picker/index.js +536 -0
  19. package/servers/endpoint-picker/manifest.json +14 -0
  20. package/servers/endpoint-picker/package.json +18 -0
  21. package/servers/hyperpod-cluster-picker/index.js +125 -125
  22. package/servers/instance-sizer/index.js +138 -138
  23. package/servers/instance-sizer/lib/instance-ranker.js +76 -76
  24. package/servers/instance-sizer/lib/model-resolver.js +61 -61
  25. package/servers/instance-sizer/lib/quota-resolver.js +113 -113
  26. package/servers/instance-sizer/lib/vram-estimator.js +31 -31
  27. package/servers/lib/bedrock-client.js +38 -38
  28. package/servers/lib/catalogs/jumpstart-public.json +101 -16
  29. package/servers/lib/catalogs/model-servers.json +201 -3
  30. package/servers/lib/catalogs/models.json +182 -26
  31. package/servers/lib/custom-validators.js +13 -13
  32. package/servers/lib/dynamic-resolver.js +4 -4
  33. package/servers/marketplace-picker/index.js +342 -0
  34. package/servers/marketplace-picker/manifest.json +14 -0
  35. package/servers/marketplace-picker/package.json +18 -0
  36. package/servers/model-picker/index.js +382 -382
  37. package/servers/region-picker/index.js +56 -56
  38. package/servers/workload-picker/LICENSE +202 -0
  39. package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
  40. package/servers/workload-picker/index.js +171 -0
  41. package/servers/workload-picker/manifest.json +16 -0
  42. package/servers/workload-picker/package.json +16 -0
  43. package/src/app.js +4 -390
  44. package/src/lib/bootstrap-command-handler.js +710 -1148
  45. package/src/lib/bootstrap-config.js +36 -0
  46. package/src/lib/bootstrap-profile-manager.js +641 -0
  47. package/src/lib/bootstrap-provisioners.js +421 -0
  48. package/src/lib/ci-register-helpers.js +74 -0
  49. package/src/lib/config-loader.js +408 -0
  50. package/src/lib/config-manager.js +66 -1685
  51. package/src/lib/config-mcp-client.js +118 -0
  52. package/src/lib/config-validator.js +634 -0
  53. package/src/lib/cuda-resolver.js +149 -0
  54. package/src/lib/e2e-catalog-validator.js +251 -3
  55. package/src/lib/e2e-ci-recorder.js +103 -0
  56. package/src/lib/generated/cli-options.js +315 -311
  57. package/src/lib/generated/parameter-matrix.js +671 -0
  58. package/src/lib/generated/validation-rules.js +71 -71
  59. package/src/lib/marketplace-flow.js +276 -0
  60. package/src/lib/mcp-query-runner.js +768 -0
  61. package/src/lib/parameter-schema-validator.js +62 -18
  62. package/src/lib/path-prover-brain.js +607 -0
  63. package/src/lib/prompt-runner.js +41 -1504
  64. package/src/lib/prompts/feature-prompts.js +172 -0
  65. package/src/lib/prompts/index.js +48 -0
  66. package/src/lib/prompts/infrastructure-prompts.js +690 -0
  67. package/src/lib/prompts/model-prompts.js +552 -0
  68. package/src/lib/prompts/project-prompts.js +82 -0
  69. package/src/lib/prompts.js +2 -1446
  70. package/src/lib/registry-command-handler.js +135 -3
  71. package/src/lib/secrets-prompt-runner.js +251 -0
  72. package/src/lib/template-variable-resolver.js +422 -0
  73. package/src/lib/tune-catalog-validator.js +37 -4
  74. package/templates/Dockerfile +9 -0
  75. package/templates/code/adapter_sidecar.py +444 -0
  76. package/templates/code/serve +6 -0
  77. package/templates/code/serve.d/vllm.ejs +1 -1
  78. package/templates/do/.benchmark_writer.py +1476 -0
  79. package/templates/do/.tune_helper.py +982 -57
  80. package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
  81. package/templates/do/adapter +149 -0
  82. package/templates/do/benchmark +639 -85
  83. package/templates/do/config +108 -5
  84. package/templates/do/deploy.d/managed-inference.ejs +192 -11
  85. package/templates/do/optimize +106 -37
  86. package/templates/do/register +89 -0
  87. package/templates/do/test +13 -0
  88. package/templates/do/tune +378 -59
  89. package/templates/do/validate +44 -4
  90. package/config/parameter-schema.json +0 -88
@@ -0,0 +1,2049 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "schemaVersion": "2.0.0",
4
+ "_comment": "Single source of truth for all MCC parameters. Drives CLI, validation, prompts, widget, and tests.",
5
+ "parameters": {
6
+ "projectName": {
7
+ "type": "string",
8
+ "description": "Name for the generated project",
9
+ "cliFlag": "--project-name",
10
+ "cliArgName": "name",
11
+ "envVar": "ML_PROJECT_NAME",
12
+ "templateVar": "projectName",
13
+ "configKey": "projectName",
14
+ "default": null,
15
+ "validation": {
16
+ "pattern": "^[a-z0-9][a-z0-9-]*[a-z0-9]$",
17
+ "minLength": 2,
18
+ "maxLength": 63
19
+ },
20
+ "phase": "project",
21
+ "group": "project",
22
+ "appliesTo": {
23
+ "deploymentTargets": [
24
+ "*"
25
+ ],
26
+ "architectures": [
27
+ "*"
28
+ ]
29
+ },
30
+ "widget": {
31
+ "section": "model-server",
32
+ "inputType": "text",
33
+ "placeholder": "my-project"
34
+ },
35
+ "prompt": {
36
+ "message": "Project name?",
37
+ "type": "input"
38
+ },
39
+ "deprecated": false,
40
+ "since": "0.1.0"
41
+ },
42
+ "deploymentConfig": {
43
+ "type": "enum",
44
+ "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
45
+ "cliFlag": "--deployment-config",
46
+ "cliArgName": "config",
47
+ "envVar": "ML_DEPLOYMENT_CONFIG",
48
+ "templateVar": "deploymentConfig",
49
+ "configKey": "deploymentConfig",
50
+ "default": null,
51
+ "validation": {
52
+ "enum": [
53
+ "http-flask",
54
+ "http-fastapi",
55
+ "transformers-vllm",
56
+ "transformers-sglang",
57
+ "transformers-tensorrt-llm",
58
+ "transformers-lmi",
59
+ "transformers-djl",
60
+ "triton-fil",
61
+ "triton-onnxruntime",
62
+ "triton-tensorflow",
63
+ "triton-pytorch",
64
+ "triton-vllm",
65
+ "triton-tensorrtllm",
66
+ "triton-python",
67
+ "diffusors-vllm-omni",
68
+ "marketplace"
69
+ ]
70
+ },
71
+ "phase": "model",
72
+ "group": "model",
73
+ "appliesTo": {
74
+ "deploymentTargets": [
75
+ "*"
76
+ ],
77
+ "architectures": [
78
+ "*"
79
+ ]
80
+ },
81
+ "widget": {
82
+ "section": "model-server",
83
+ "inputType": "select"
84
+ },
85
+ "prompt": {
86
+ "message": "Deployment configuration?",
87
+ "type": "list"
88
+ },
89
+ "deprecated": false,
90
+ "since": "0.5.0"
91
+ },
92
+ "modelName": {
93
+ "type": "string",
94
+ "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)",
95
+ "cliFlag": "--model-name",
96
+ "cliArgName": "name",
97
+ "envVar": "ML_MODEL_NAME",
98
+ "templateVar": "modelName",
99
+ "configKey": "modelName",
100
+ "default": null,
101
+ "validation": {
102
+ "minLength": 1
103
+ },
104
+ "phase": "model",
105
+ "group": "model",
106
+ "appliesTo": {
107
+ "deploymentTargets": [
108
+ "*"
109
+ ],
110
+ "architectures": [
111
+ "transformers",
112
+ "diffusors"
113
+ ]
114
+ },
115
+ "widget": {
116
+ "section": "model-server",
117
+ "inputType": "text",
118
+ "datalist": "models",
119
+ "placeholder": "e.g. meta-llama/Llama-3.1-8B-Instruct"
120
+ },
121
+ "prompt": {
122
+ "message": "Model name or path?",
123
+ "type": "input"
124
+ },
125
+ "deprecated": false,
126
+ "since": "0.3.0"
127
+ },
128
+ "deploymentTarget": {
129
+ "type": "enum",
130
+ "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
131
+ "cliFlag": "--deployment-target",
132
+ "cliArgName": "target",
133
+ "envVar": "ML_DEPLOYMENT_TARGET",
134
+ "templateVar": "deploymentTarget",
135
+ "configKey": "deploymentTarget",
136
+ "default": "realtime-inference",
137
+ "validation": {
138
+ "enum": [
139
+ "managed-inference",
140
+ "realtime-inference",
141
+ "async-inference",
142
+ "batch-transform",
143
+ "hyperpod-eks"
144
+ ]
145
+ },
146
+ "phase": "infrastructure",
147
+ "group": "infrastructure",
148
+ "appliesTo": {
149
+ "deploymentTargets": [
150
+ "*"
151
+ ],
152
+ "architectures": [
153
+ "*"
154
+ ]
155
+ },
156
+ "widget": {
157
+ "section": "infrastructure",
158
+ "inputType": "select"
159
+ },
160
+ "prompt": {
161
+ "message": "Deployment target?",
162
+ "type": "list"
163
+ },
164
+ "deprecated": false,
165
+ "since": "0.4.0"
166
+ },
167
+ "instanceType": {
168
+ "type": "string",
169
+ "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)",
170
+ "cliFlag": "--instance-type",
171
+ "cliArgName": "type",
172
+ "envVar": "ML_INSTANCE_TYPE",
173
+ "templateVar": "instanceType",
174
+ "configKey": "instanceType",
175
+ "default": null,
176
+ "validation": {
177
+ "pattern": "^ml\\.[a-z0-9]+\\.[a-z0-9]+$"
178
+ },
179
+ "phase": "infrastructure",
180
+ "group": "infrastructure",
181
+ "appliesTo": {
182
+ "deploymentTargets": [
183
+ "managed-inference",
184
+ "async-inference",
185
+ "batch-transform"
186
+ ],
187
+ "architectures": [
188
+ "*"
189
+ ]
190
+ },
191
+ "widget": {
192
+ "section": "infrastructure",
193
+ "inputType": "text",
194
+ "datalist": "instances",
195
+ "placeholder": "e.g. ml.g5.2xlarge"
196
+ },
197
+ "prompt": {
198
+ "message": "Instance type?",
199
+ "type": "input",
200
+ "when": "deploymentTarget !== 'hyperpod-eks'"
201
+ },
202
+ "deprecated": false,
203
+ "since": "0.1.0"
204
+ },
205
+ "icGpuCount": {
206
+ "type": "integer",
207
+ "description": "GPUs allocated to the inference component",
208
+ "cliFlag": "--ic-gpu-count",
209
+ "cliArgName": "n",
210
+ "envVar": "ML_IC_GPU_COUNT",
211
+ "templateVar": "icGpuCount",
212
+ "configKey": "icGpuCount",
213
+ "default": null,
214
+ "validation": {
215
+ "min": 0,
216
+ "max": 8
217
+ },
218
+ "phase": "infrastructure",
219
+ "group": "inference-component",
220
+ "appliesTo": {
221
+ "deploymentTargets": [
222
+ "managed-inference"
223
+ ],
224
+ "architectures": [
225
+ "*"
226
+ ]
227
+ },
228
+ "widget": {
229
+ "section": "infrastructure",
230
+ "inputType": "number",
231
+ "placeholder": "auto"
232
+ },
233
+ "prompt": null,
234
+ "serverMapping": {
235
+ "icConfVar": "IC_GPU_COUNT"
236
+ },
237
+ "deprecated": false,
238
+ "since": "0.4.0"
239
+ },
240
+ "icCopyCount": {
241
+ "type": "integer",
242
+ "description": "Number of inference component copies",
243
+ "cliFlag": "--ic-copy-count",
244
+ "cliArgName": "n",
245
+ "envVar": "ML_IC_COPY_COUNT",
246
+ "templateVar": "icCopyCount",
247
+ "configKey": "icCopyCount",
248
+ "default": 1,
249
+ "validation": {
250
+ "min": 0,
251
+ "max": 100
252
+ },
253
+ "phase": "infrastructure",
254
+ "group": "inference-component",
255
+ "appliesTo": {
256
+ "deploymentTargets": [
257
+ "managed-inference"
258
+ ],
259
+ "architectures": [
260
+ "*"
261
+ ]
262
+ },
263
+ "widget": {
264
+ "section": "infrastructure",
265
+ "inputType": "number",
266
+ "placeholder": "1"
267
+ },
268
+ "prompt": null,
269
+ "serverMapping": {
270
+ "icConfVar": "IC_COPY_COUNT"
271
+ },
272
+ "deprecated": false,
273
+ "since": "0.4.0"
274
+ },
275
+ "icMemorySize": {
276
+ "type": "integer",
277
+ "description": "Memory in MB for the inference component",
278
+ "cliFlag": "--ic-memory-size",
279
+ "cliArgName": "mb",
280
+ "envVar": "ML_IC_MEMORY_SIZE",
281
+ "templateVar": "icMemorySize",
282
+ "configKey": "icMemorySize",
283
+ "default": null,
284
+ "validation": {
285
+ "min": 128,
286
+ "max": 3145728
287
+ },
288
+ "phase": "infrastructure",
289
+ "group": "inference-component",
290
+ "appliesTo": {
291
+ "deploymentTargets": [
292
+ "managed-inference"
293
+ ],
294
+ "architectures": [
295
+ "*"
296
+ ]
297
+ },
298
+ "widget": {
299
+ "section": "infrastructure",
300
+ "inputType": "number",
301
+ "placeholder": "1024"
302
+ },
303
+ "prompt": null,
304
+ "serverMapping": {
305
+ "icConfVar": "IC_MIN_MEMORY_MB"
306
+ },
307
+ "deprecated": false,
308
+ "since": "0.4.0"
309
+ },
310
+ "enableLora": {
311
+ "type": "boolean",
312
+ "description": "Enable LoRA adapter serving",
313
+ "cliFlag": "--enable-lora",
314
+ "cliArgName": null,
315
+ "envVar": "ML_ENABLE_LORA",
316
+ "templateVar": "enableLora",
317
+ "configKey": "enableLora",
318
+ "default": false,
319
+ "validation": {},
320
+ "phase": "features",
321
+ "group": "lora",
322
+ "appliesTo": {
323
+ "deploymentTargets": [
324
+ "managed-inference"
325
+ ],
326
+ "architectures": [
327
+ "transformers"
328
+ ]
329
+ },
330
+ "widget": {
331
+ "section": "features",
332
+ "inputType": "checkbox"
333
+ },
334
+ "prompt": {
335
+ "message": "Enable LoRA adapter serving?",
336
+ "type": "confirm",
337
+ "when": "architecture === 'transformers' && ['vllm','sglang','djl'].includes(modelServer)"
338
+ },
339
+ "serverMapping": {
340
+ "envVar": "VLLM_ENABLE_LORA",
341
+ "booleanFlag": true
342
+ },
343
+ "deprecated": false,
344
+ "since": "0.5.0"
345
+ },
346
+ "maxLoras": {
347
+ "type": "integer",
348
+ "description": "Maximum concurrent LoRA adapters in GPU memory",
349
+ "cliFlag": "--max-loras",
350
+ "cliArgName": "n",
351
+ "envVar": "ML_MAX_LORAS",
352
+ "templateVar": "maxLoras",
353
+ "configKey": "maxLoras",
354
+ "default": 30,
355
+ "validation": {
356
+ "min": 1,
357
+ "max": 256
358
+ },
359
+ "phase": "features",
360
+ "group": "lora",
361
+ "appliesTo": {
362
+ "deploymentTargets": [
363
+ "managed-inference"
364
+ ],
365
+ "architectures": [
366
+ "transformers"
367
+ ]
368
+ },
369
+ "widget": {
370
+ "section": "features",
371
+ "inputType": "number",
372
+ "placeholder": "30"
373
+ },
374
+ "prompt": {
375
+ "message": "Max concurrent LoRA adapters?",
376
+ "type": "number",
377
+ "when": "enableLora === true"
378
+ },
379
+ "serverMapping": {
380
+ "envVar": "VLLM_MAX_LORAS"
381
+ },
382
+ "deprecated": false,
383
+ "since": "0.5.0"
384
+ },
385
+ "maxLoraRank": {
386
+ "type": "integer",
387
+ "description": "Maximum LoRA rank",
388
+ "cliFlag": "--max-lora-rank",
389
+ "cliArgName": "n",
390
+ "envVar": "ML_MAX_LORA_RANK",
391
+ "templateVar": "maxLoraRank",
392
+ "configKey": "maxLoraRank",
393
+ "default": 64,
394
+ "validation": {
395
+ "min": 8,
396
+ "max": 512
397
+ },
398
+ "phase": "features",
399
+ "group": "lora",
400
+ "appliesTo": {
401
+ "deploymentTargets": [
402
+ "managed-inference"
403
+ ],
404
+ "architectures": [
405
+ "transformers"
406
+ ]
407
+ },
408
+ "widget": {
409
+ "section": "features",
410
+ "inputType": "number",
411
+ "placeholder": "64"
412
+ },
413
+ "prompt": {
414
+ "message": "Max LoRA rank?",
415
+ "type": "number",
416
+ "when": "enableLora === true"
417
+ },
418
+ "serverMapping": {
419
+ "envVar": "VLLM_MAX_LORA_RANK"
420
+ },
421
+ "deprecated": false,
422
+ "since": "0.5.0"
423
+ },
424
+ "includeBenchmark": {
425
+ "type": "boolean",
426
+ "description": "Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.",
427
+ "cliFlag": "--include-benchmark",
428
+ "cliArgName": null,
429
+ "envVar": "ML_INCLUDE_BENCHMARK",
430
+ "templateVar": "includeBenchmark",
431
+ "configKey": "includeBenchmark",
432
+ "default": false,
433
+ "validation": {},
434
+ "phase": "features",
435
+ "group": "benchmark",
436
+ "appliesTo": {
437
+ "deploymentTargets": [
438
+ "managed-inference"
439
+ ],
440
+ "architectures": [
441
+ "transformers",
442
+ "diffusors"
443
+ ]
444
+ },
445
+ "widget": {
446
+ "section": "features",
447
+ "inputType": "checkbox"
448
+ },
449
+ "prompt": {
450
+ "message": "Include benchmarking?",
451
+ "type": "confirm",
452
+ "when": "['transformers','diffusors'].includes(architecture)"
453
+ },
454
+ "deprecated": false,
455
+ "since": "0.6.0"
456
+ },
457
+ "benchmarkConcurrency": {
458
+ "type": "integer",
459
+ "description": "Benchmark concurrent requests",
460
+ "cliFlag": "--benchmark-concurrency",
461
+ "cliArgName": "n",
462
+ "envVar": null,
463
+ "templateVar": "benchmarkConcurrency",
464
+ "configKey": "benchmarkConcurrency",
465
+ "default": 10,
466
+ "validation": {
467
+ "min": 1,
468
+ "max": 1000
469
+ },
470
+ "phase": "features",
471
+ "group": "benchmark",
472
+ "appliesTo": {
473
+ "deploymentTargets": [
474
+ "managed-inference"
475
+ ],
476
+ "architectures": [
477
+ "transformers",
478
+ "diffusors"
479
+ ]
480
+ },
481
+ "widget": {
482
+ "section": "features",
483
+ "inputType": "number",
484
+ "placeholder": "10"
485
+ },
486
+ "prompt": null,
487
+ "deprecated": false,
488
+ "since": "0.6.0"
489
+ },
490
+ "benchmarkInputTokens": {
491
+ "type": "integer",
492
+ "description": "Benchmark mean input tokens",
493
+ "cliFlag": "--benchmark-input-tokens",
494
+ "cliArgName": "n",
495
+ "envVar": null,
496
+ "templateVar": "benchmarkInputTokensMean",
497
+ "configKey": "benchmarkInputTokensMean",
498
+ "default": 550,
499
+ "validation": {
500
+ "min": 1,
501
+ "max": 128000
502
+ },
503
+ "phase": "features",
504
+ "group": "benchmark",
505
+ "appliesTo": {
506
+ "deploymentTargets": [
507
+ "managed-inference"
508
+ ],
509
+ "architectures": [
510
+ "transformers",
511
+ "diffusors"
512
+ ]
513
+ },
514
+ "widget": {
515
+ "section": "features",
516
+ "inputType": "number",
517
+ "placeholder": "550"
518
+ },
519
+ "prompt": null,
520
+ "deprecated": false,
521
+ "since": "0.6.0"
522
+ },
523
+ "benchmarkOutputTokens": {
524
+ "type": "integer",
525
+ "description": "Benchmark mean output tokens",
526
+ "cliFlag": "--benchmark-output-tokens",
527
+ "cliArgName": "n",
528
+ "envVar": null,
529
+ "templateVar": "benchmarkOutputTokensMean",
530
+ "configKey": "benchmarkOutputTokensMean",
531
+ "default": 150,
532
+ "validation": {
533
+ "min": 1,
534
+ "max": 128000
535
+ },
536
+ "phase": "features",
537
+ "group": "benchmark",
538
+ "appliesTo": {
539
+ "deploymentTargets": [
540
+ "managed-inference"
541
+ ],
542
+ "architectures": [
543
+ "transformers",
544
+ "diffusors"
545
+ ]
546
+ },
547
+ "widget": {
548
+ "section": "features",
549
+ "inputType": "number",
550
+ "placeholder": "150"
551
+ },
552
+ "prompt": null,
553
+ "deprecated": false,
554
+ "since": "0.6.0"
555
+ },
556
+ "benchmarkStreaming": {
557
+ "type": "boolean",
558
+ "description": "Enable streaming in benchmark",
559
+ "cliFlag": "--benchmark-streaming",
560
+ "cliArgName": null,
561
+ "envVar": null,
562
+ "templateVar": "benchmarkStreaming",
563
+ "configKey": "benchmarkStreaming",
564
+ "default": true,
565
+ "validation": {},
566
+ "phase": "features",
567
+ "group": "benchmark",
568
+ "appliesTo": {
569
+ "deploymentTargets": [
570
+ "managed-inference"
571
+ ],
572
+ "architectures": [
573
+ "transformers",
574
+ "diffusors"
575
+ ]
576
+ },
577
+ "widget": {
578
+ "section": "features",
579
+ "inputType": "checkbox"
580
+ },
581
+ "prompt": null,
582
+ "deprecated": false,
583
+ "since": "0.6.0"
584
+ },
585
+ "benchmarkRequestCount": {
586
+ "type": "integer",
587
+ "description": "Total number of benchmark requests to send",
588
+ "cliFlag": "--benchmark-request-count",
589
+ "cliArgName": "n",
590
+ "envVar": null,
591
+ "templateVar": "benchmarkRequestCount",
592
+ "configKey": "benchmarkRequestCount",
593
+ "default": null,
594
+ "validation": {
595
+ "min": 1
596
+ },
597
+ "phase": "features",
598
+ "group": "benchmark",
599
+ "appliesTo": {
600
+ "deploymentTargets": [
601
+ "managed-inference"
602
+ ],
603
+ "architectures": [
604
+ "transformers",
605
+ "diffusors"
606
+ ]
607
+ },
608
+ "widget": null,
609
+ "prompt": null,
610
+ "deprecated": false,
611
+ "since": "0.6.0"
612
+ },
613
+ "benchmarkS3OutputPath": {
614
+ "type": "string",
615
+ "description": "S3 URI for benchmark results output",
616
+ "cliFlag": "--benchmark-s3-output-path",
617
+ "cliArgName": "path",
618
+ "envVar": "ML_BENCHMARK_S3_OUTPUT_PATH",
619
+ "templateVar": "benchmarkS3OutputPath",
620
+ "configKey": "benchmarkS3OutputPath",
621
+ "default": null,
622
+ "validation": {
623
+ "pattern": "^s3://"
624
+ },
625
+ "phase": "features",
626
+ "group": "benchmark",
627
+ "appliesTo": {
628
+ "deploymentTargets": [
629
+ "managed-inference"
630
+ ],
631
+ "architectures": [
632
+ "transformers",
633
+ "diffusors"
634
+ ]
635
+ },
636
+ "widget": null,
637
+ "prompt": null,
638
+ "deprecated": false,
639
+ "since": "0.6.0"
640
+ },
641
+ "skipPrompts": {
642
+ "type": "boolean",
643
+ "description": "Skip interactive prompts and use configuration from other sources",
644
+ "cliFlag": "--skip-prompts",
645
+ "cliArgName": null,
646
+ "envVar": "MCC_SKIP_PROMPTS",
647
+ "templateVar": null,
648
+ "configKey": "skipPrompts",
649
+ "default": false,
650
+ "validation": {},
651
+ "phase": "project",
652
+ "group": "project",
653
+ "appliesTo": {
654
+ "deploymentTargets": [
655
+ "*"
656
+ ],
657
+ "architectures": [
658
+ "*"
659
+ ]
660
+ },
661
+ "widget": null,
662
+ "prompt": null,
663
+ "cliBehavior": true,
664
+ "deprecated": false,
665
+ "since": "0.5.0"
666
+ },
667
+ "autoPrompt": {
668
+ "type": "boolean",
669
+ "description": "Fill defaults, prompt only for missing required values",
670
+ "cliFlag": "--auto-prompt",
671
+ "cliArgName": null,
672
+ "envVar": null,
673
+ "templateVar": null,
674
+ "configKey": "autoPrompt",
675
+ "default": false,
676
+ "validation": {},
677
+ "phase": "project",
678
+ "group": "project",
679
+ "appliesTo": {
680
+ "deploymentTargets": [
681
+ "*"
682
+ ],
683
+ "architectures": [
684
+ "*"
685
+ ]
686
+ },
687
+ "widget": null,
688
+ "prompt": null,
689
+ "cliBehavior": true,
690
+ "deprecated": false,
691
+ "since": "0.5.0"
692
+ },
693
+ "config": {
694
+ "type": "string",
695
+ "description": "Path to JSON configuration file",
696
+ "cliFlag": "--config",
697
+ "cliArgName": "path",
698
+ "envVar": null,
699
+ "templateVar": null,
700
+ "configKey": null,
701
+ "default": null,
702
+ "validation": {},
703
+ "phase": "project",
704
+ "group": "project",
705
+ "appliesTo": {
706
+ "deploymentTargets": [
707
+ "*"
708
+ ],
709
+ "architectures": [
710
+ "*"
711
+ ]
712
+ },
713
+ "widget": null,
714
+ "prompt": null,
715
+ "cliBehavior": true,
716
+ "deprecated": false,
717
+ "since": "0.5.0"
718
+ },
719
+ "projectDir": {
720
+ "type": "string",
721
+ "description": "Output directory path",
722
+ "cliFlag": "--project-dir",
723
+ "cliArgName": "dir",
724
+ "envVar": "ML_PROJECT_DIR",
725
+ "templateVar": null,
726
+ "configKey": "projectDir",
727
+ "default": null,
728
+ "validation": {},
729
+ "phase": "project",
730
+ "group": "project",
731
+ "appliesTo": {
732
+ "deploymentTargets": [
733
+ "*"
734
+ ],
735
+ "architectures": [
736
+ "*"
737
+ ]
738
+ },
739
+ "widget": null,
740
+ "prompt": null,
741
+ "deprecated": false,
742
+ "since": "0.3.0"
743
+ },
744
+ "force": {
745
+ "type": "boolean",
746
+ "description": "Overwrite existing output directory without prompting",
747
+ "cliFlag": "--force",
748
+ "cliArgName": null,
749
+ "envVar": null,
750
+ "templateVar": null,
751
+ "configKey": "force",
752
+ "default": false,
753
+ "validation": {},
754
+ "phase": "project",
755
+ "group": "project",
756
+ "appliesTo": {
757
+ "deploymentTargets": [
758
+ "*"
759
+ ],
760
+ "architectures": [
761
+ "*"
762
+ ]
763
+ },
764
+ "widget": null,
765
+ "prompt": null,
766
+ "cliBehavior": true,
767
+ "deprecated": false,
768
+ "since": "0.3.0"
769
+ },
770
+ "framework": {
771
+ "type": "enum",
772
+ "description": "ML framework",
773
+ "cliFlag": "--framework",
774
+ "cliArgName": "framework",
775
+ "envVar": null,
776
+ "templateVar": "framework",
777
+ "configKey": "framework",
778
+ "default": null,
779
+ "validation": {
780
+ "enum": [
781
+ "sklearn",
782
+ "xgboost",
783
+ "tensorflow",
784
+ "transformers"
785
+ ]
786
+ },
787
+ "phase": "model",
788
+ "group": "model",
789
+ "appliesTo": {
790
+ "deploymentTargets": [
791
+ "*"
792
+ ],
793
+ "architectures": [
794
+ "*"
795
+ ]
796
+ },
797
+ "widget": null,
798
+ "prompt": null,
799
+ "deprecated": true,
800
+ "replacedBy": "deploymentConfig",
801
+ "since": "0.1.0"
802
+ },
803
+ "modelFormat": {
804
+ "type": "string",
805
+ "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
806
+ "cliFlag": "--model-format",
807
+ "cliArgName": "format",
808
+ "envVar": "ML_MODEL_FORMAT",
809
+ "templateVar": "modelFormat",
810
+ "configKey": "modelFormat",
811
+ "default": null,
812
+ "validation": {
813
+ "enum": [
814
+ "pkl",
815
+ "joblib",
816
+ "json",
817
+ "model",
818
+ "ubj",
819
+ "keras",
820
+ "h5",
821
+ "SavedModel"
822
+ ]
823
+ },
824
+ "phase": "model",
825
+ "group": "model",
826
+ "appliesTo": {
827
+ "deploymentTargets": [
828
+ "*"
829
+ ],
830
+ "architectures": [
831
+ "http"
832
+ ]
833
+ },
834
+ "widget": null,
835
+ "prompt": {
836
+ "message": "Model format?",
837
+ "type": "list",
838
+ "when": "architecture === 'http'"
839
+ },
840
+ "deprecated": false,
841
+ "since": "0.1.0"
842
+ },
843
+ "modelServer": {
844
+ "type": "enum",
845
+ "description": "Model server",
846
+ "cliFlag": "--model-server",
847
+ "cliArgName": "server",
848
+ "envVar": null,
849
+ "templateVar": "modelServer",
850
+ "configKey": "modelServer",
851
+ "default": null,
852
+ "validation": {
853
+ "enum": [
854
+ "flask",
855
+ "fastapi",
856
+ "vllm",
857
+ "sglang"
858
+ ]
859
+ },
860
+ "phase": "model",
861
+ "group": "model",
862
+ "appliesTo": {
863
+ "deploymentTargets": [
864
+ "*"
865
+ ],
866
+ "architectures": [
867
+ "*"
868
+ ]
869
+ },
870
+ "widget": null,
871
+ "prompt": null,
872
+ "deprecated": true,
873
+ "replacedBy": "deploymentConfig",
874
+ "since": "0.1.0"
875
+ },
876
+ "baseImage": {
877
+ "type": "string",
878
+ "description": "Base container image for Dockerfile",
879
+ "cliFlag": "--base-image",
880
+ "cliArgName": "image",
881
+ "envVar": "ML_BASE_IMAGE",
882
+ "templateVar": "baseImage",
883
+ "configKey": "baseImage",
884
+ "default": null,
885
+ "validation": {},
886
+ "phase": "build",
887
+ "group": "build",
888
+ "appliesTo": {
889
+ "deploymentTargets": [
890
+ "*"
891
+ ],
892
+ "architectures": [
893
+ "*"
894
+ ]
895
+ },
896
+ "widget": null,
897
+ "prompt": null,
898
+ "deprecated": false,
899
+ "since": "0.3.0"
900
+ },
901
+ "region": {
902
+ "type": "string",
903
+ "description": "AWS region",
904
+ "cliFlag": "--region",
905
+ "cliArgName": "region",
906
+ "envVar": "ML_REGION",
907
+ "templateVar": "awsRegion",
908
+ "configKey": "region",
909
+ "default": "us-east-1",
910
+ "validation": {
911
+ "pattern": "^[a-z]{2}-[a-z]+-\\d+$"
912
+ },
913
+ "phase": "infrastructure",
914
+ "group": "infrastructure",
915
+ "appliesTo": {
916
+ "deploymentTargets": [
917
+ "*"
918
+ ],
919
+ "architectures": [
920
+ "*"
921
+ ]
922
+ },
923
+ "widget": null,
924
+ "prompt": {
925
+ "message": "AWS region?",
926
+ "type": "input"
927
+ },
928
+ "deprecated": false,
929
+ "since": "0.1.0"
930
+ },
931
+ "roleArn": {
932
+ "type": "string",
933
+ "description": "IAM role ARN for SageMaker execution",
934
+ "cliFlag": "--role-arn",
935
+ "cliArgName": "arn",
936
+ "envVar": "ML_ROLE_ARN",
937
+ "templateVar": "roleArn",
938
+ "configKey": "roleArn",
939
+ "default": null,
940
+ "validation": {
941
+ "pattern": "^arn:aws:iam::"
942
+ },
943
+ "phase": "infrastructure",
944
+ "group": "infrastructure",
945
+ "appliesTo": {
946
+ "deploymentTargets": [
947
+ "*"
948
+ ],
949
+ "architectures": [
950
+ "*"
951
+ ]
952
+ },
953
+ "widget": null,
954
+ "prompt": null,
955
+ "deprecated": false,
956
+ "since": "0.3.0"
957
+ },
958
+ "buildTarget": {
959
+ "type": "string",
960
+ "description": "Build target (codebuild)",
961
+ "cliFlag": "--build-target",
962
+ "cliArgName": "target",
963
+ "envVar": "ML_BUILD_TARGET",
964
+ "templateVar": "buildTarget",
965
+ "configKey": "buildTarget",
966
+ "default": "codebuild",
967
+ "validation": {
968
+ "enum": [
969
+ "codebuild"
970
+ ]
971
+ },
972
+ "phase": "build",
973
+ "group": "build",
974
+ "appliesTo": {
975
+ "deploymentTargets": [
976
+ "*"
977
+ ],
978
+ "architectures": [
979
+ "*"
980
+ ]
981
+ },
982
+ "widget": null,
983
+ "prompt": null,
984
+ "deprecated": false,
985
+ "since": "0.4.0"
986
+ },
987
+ "codebuildComputeType": {
988
+ "type": "string",
989
+ "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
990
+ "cliFlag": "--codebuild-compute-type",
991
+ "cliArgName": "type",
992
+ "envVar": "ML_CODEBUILD_COMPUTE_TYPE",
993
+ "templateVar": "codebuildComputeType",
994
+ "configKey": "codebuildComputeType",
995
+ "default": "BUILD_GENERAL1_LARGE",
996
+ "validation": {
997
+ "enum": [
998
+ "SMALL",
999
+ "MEDIUM",
1000
+ "LARGE",
1001
+ "BUILD_GENERAL1_SMALL",
1002
+ "BUILD_GENERAL1_MEDIUM",
1003
+ "BUILD_GENERAL1_LARGE",
1004
+ "BUILD_GENERAL1_2XLARGE"
1005
+ ]
1006
+ },
1007
+ "phase": "build",
1008
+ "group": "build",
1009
+ "appliesTo": {
1010
+ "deploymentTargets": [
1011
+ "*"
1012
+ ],
1013
+ "architectures": [
1014
+ "*"
1015
+ ]
1016
+ },
1017
+ "widget": null,
1018
+ "prompt": null,
1019
+ "deprecated": false,
1020
+ "since": "0.4.0"
1021
+ },
1022
+ "hfToken": {
1023
+ "type": "string",
1024
+ "description": "HuggingFace token (or $HF_TOKEN for env var reference)",
1025
+ "cliFlag": "--hf-token",
1026
+ "cliArgName": "token",
1027
+ "envVar": null,
1028
+ "templateVar": "hfToken",
1029
+ "configKey": "hfToken",
1030
+ "default": null,
1031
+ "validation": {},
1032
+ "phase": "auth",
1033
+ "group": "auth",
1034
+ "appliesTo": {
1035
+ "deploymentTargets": [
1036
+ "*"
1037
+ ],
1038
+ "architectures": [
1039
+ "transformers",
1040
+ "diffusors"
1041
+ ]
1042
+ },
1043
+ "widget": null,
1044
+ "prompt": null,
1045
+ "sensitive": true,
1046
+ "deprecated": false,
1047
+ "since": "0.3.0"
1048
+ },
1049
+ "hfTokenArn": {
1050
+ "type": "string",
1051
+ "description": "HuggingFace token ARN from Secrets Manager",
1052
+ "cliFlag": "--hf-token-arn",
1053
+ "cliArgName": "arn",
1054
+ "envVar": "ML_HF_TOKEN_ARN",
1055
+ "templateVar": "hfTokenArn",
1056
+ "configKey": "hfTokenArn",
1057
+ "default": null,
1058
+ "validation": {
1059
+ "pattern": "^arn:aws:secretsmanager:"
1060
+ },
1061
+ "phase": "auth",
1062
+ "group": "auth",
1063
+ "appliesTo": {
1064
+ "deploymentTargets": [
1065
+ "*"
1066
+ ],
1067
+ "architectures": [
1068
+ "transformers",
1069
+ "diffusors"
1070
+ ]
1071
+ },
1072
+ "widget": null,
1073
+ "prompt": null,
1074
+ "deprecated": false,
1075
+ "since": "0.5.0"
1076
+ },
1077
+ "ngcToken": {
1078
+ "type": "string",
1079
+ "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)",
1080
+ "cliFlag": "--ngc-token",
1081
+ "cliArgName": "token",
1082
+ "envVar": null,
1083
+ "templateVar": "ngcToken",
1084
+ "configKey": "ngcToken",
1085
+ "default": null,
1086
+ "validation": {},
1087
+ "phase": "auth",
1088
+ "group": "auth",
1089
+ "appliesTo": {
1090
+ "deploymentTargets": [
1091
+ "*"
1092
+ ],
1093
+ "architectures": [
1094
+ "transformers"
1095
+ ]
1096
+ },
1097
+ "widget": null,
1098
+ "prompt": null,
1099
+ "sensitive": true,
1100
+ "deprecated": false,
1101
+ "since": "0.4.0"
1102
+ },
1103
+ "ngcTokenArn": {
1104
+ "type": "string",
1105
+ "description": "NVIDIA NGC token ARN from Secrets Manager",
1106
+ "cliFlag": "--ngc-token-arn",
1107
+ "cliArgName": "arn",
1108
+ "envVar": "ML_NGC_TOKEN_ARN",
1109
+ "templateVar": "ngcTokenArn",
1110
+ "configKey": "ngcTokenArn",
1111
+ "default": null,
1112
+ "validation": {
1113
+ "pattern": "^arn:aws:secretsmanager:"
1114
+ },
1115
+ "phase": "auth",
1116
+ "group": "auth",
1117
+ "appliesTo": {
1118
+ "deploymentTargets": [
1119
+ "*"
1120
+ ],
1121
+ "architectures": [
1122
+ "transformers"
1123
+ ]
1124
+ },
1125
+ "widget": null,
1126
+ "prompt": null,
1127
+ "deprecated": false,
1128
+ "since": "0.5.0"
1129
+ },
1130
+ "endpointInitialInstanceCount": {
1131
+ "type": "integer",
1132
+ "description": "Number of instances for the endpoint",
1133
+ "cliFlag": "--endpoint-initial-instance-count",
1134
+ "cliArgName": "n",
1135
+ "envVar": "ML_ENDPOINT_INSTANCE_COUNT",
1136
+ "templateVar": "endpointInitialInstanceCount",
1137
+ "configKey": "endpointInitialInstanceCount",
1138
+ "default": 1,
1139
+ "validation": {
1140
+ "min": 1,
1141
+ "max": 100
1142
+ },
1143
+ "phase": "infrastructure",
1144
+ "group": "endpoint",
1145
+ "appliesTo": {
1146
+ "deploymentTargets": [
1147
+ "managed-inference"
1148
+ ],
1149
+ "architectures": [
1150
+ "*"
1151
+ ]
1152
+ },
1153
+ "widget": null,
1154
+ "prompt": null,
1155
+ "deprecated": false,
1156
+ "since": "0.4.0"
1157
+ },
1158
+ "endpointDataCapturePercent": {
1159
+ "type": "integer",
1160
+ "description": "Data capture percentage for monitoring, 0-100",
1161
+ "cliFlag": "--endpoint-data-capture-percent",
1162
+ "cliArgName": "pct",
1163
+ "envVar": null,
1164
+ "templateVar": "endpointDataCapturePercent",
1165
+ "configKey": "endpointDataCapturePercent",
1166
+ "default": 0,
1167
+ "validation": {
1168
+ "min": 0,
1169
+ "max": 100
1170
+ },
1171
+ "phase": "infrastructure",
1172
+ "group": "endpoint",
1173
+ "appliesTo": {
1174
+ "deploymentTargets": [
1175
+ "managed-inference"
1176
+ ],
1177
+ "architectures": [
1178
+ "*"
1179
+ ]
1180
+ },
1181
+ "widget": null,
1182
+ "prompt": null,
1183
+ "deprecated": false,
1184
+ "since": "0.4.0"
1185
+ },
1186
+ "endpointVariantName": {
1187
+ "type": "string",
1188
+ "description": "Production variant name",
1189
+ "cliFlag": "--endpoint-variant-name",
1190
+ "cliArgName": "name",
1191
+ "envVar": null,
1192
+ "templateVar": "endpointVariantName",
1193
+ "configKey": "endpointVariantName",
1194
+ "default": "AllTraffic",
1195
+ "validation": {
1196
+ "pattern": "^[a-zA-Z0-9]([\\w-]{0,62}[a-zA-Z0-9])?$"
1197
+ },
1198
+ "phase": "infrastructure",
1199
+ "group": "endpoint",
1200
+ "appliesTo": {
1201
+ "deploymentTargets": [
1202
+ "managed-inference"
1203
+ ],
1204
+ "architectures": [
1205
+ "*"
1206
+ ]
1207
+ },
1208
+ "widget": null,
1209
+ "prompt": null,
1210
+ "deprecated": false,
1211
+ "since": "0.4.0"
1212
+ },
1213
+ "endpointVolumeSize": {
1214
+ "type": "integer",
1215
+ "description": "ML storage volume size in GB",
1216
+ "cliFlag": "--endpoint-volume-size",
1217
+ "cliArgName": "gb",
1218
+ "envVar": null,
1219
+ "templateVar": "endpointVolumeSize",
1220
+ "configKey": "endpointVolumeSize",
1221
+ "default": null,
1222
+ "validation": {
1223
+ "min": 1,
1224
+ "max": 16384
1225
+ },
1226
+ "phase": "infrastructure",
1227
+ "group": "endpoint",
1228
+ "appliesTo": {
1229
+ "deploymentTargets": [
1230
+ "managed-inference"
1231
+ ],
1232
+ "architectures": [
1233
+ "*"
1234
+ ]
1235
+ },
1236
+ "widget": null,
1237
+ "prompt": null,
1238
+ "deprecated": false,
1239
+ "since": "0.4.0"
1240
+ },
1241
+ "icCpuCount": {
1242
+ "type": "number",
1243
+ "description": "vCPUs allocated to the inference component",
1244
+ "cliFlag": "--ic-cpu-count",
1245
+ "cliArgName": "n",
1246
+ "envVar": "ML_IC_CPU_COUNT",
1247
+ "templateVar": "icCpuCount",
1248
+ "configKey": "icCpuCount",
1249
+ "default": null,
1250
+ "validation": {
1251
+ "min": 0.25,
1252
+ "max": 768
1253
+ },
1254
+ "phase": "infrastructure",
1255
+ "group": "inference-component",
1256
+ "appliesTo": {
1257
+ "deploymentTargets": [
1258
+ "managed-inference"
1259
+ ],
1260
+ "architectures": [
1261
+ "*"
1262
+ ]
1263
+ },
1264
+ "widget": null,
1265
+ "prompt": null,
1266
+ "deprecated": false,
1267
+ "since": "0.4.0"
1268
+ },
1269
+ "icModelWeight": {
1270
+ "type": "number",
1271
+ "description": "Traffic routing weight, 0-1",
1272
+ "cliFlag": "--ic-model-weight",
1273
+ "cliArgName": "weight",
1274
+ "envVar": null,
1275
+ "templateVar": "icModelWeight",
1276
+ "configKey": "icModelWeight",
1277
+ "default": 1,
1278
+ "validation": {
1279
+ "min": 0,
1280
+ "max": 1
1281
+ },
1282
+ "phase": "infrastructure",
1283
+ "group": "inference-component",
1284
+ "appliesTo": {
1285
+ "deploymentTargets": [
1286
+ "managed-inference"
1287
+ ],
1288
+ "architectures": [
1289
+ "*"
1290
+ ]
1291
+ },
1292
+ "widget": null,
1293
+ "prompt": null,
1294
+ "deprecated": false,
1295
+ "since": "0.5.0"
1296
+ },
1297
+ "asyncS3OutputPath": {
1298
+ "type": "string",
1299
+ "description": "S3 output path for async results",
1300
+ "cliFlag": "--async-s3-output-path",
1301
+ "cliArgName": "path",
1302
+ "envVar": "ML_ASYNC_S3_OUTPUT_PATH",
1303
+ "templateVar": "asyncS3OutputPath",
1304
+ "configKey": "asyncS3OutputPath",
1305
+ "default": null,
1306
+ "validation": {
1307
+ "pattern": "^s3://"
1308
+ },
1309
+ "phase": "infrastructure",
1310
+ "group": "async",
1311
+ "appliesTo": {
1312
+ "deploymentTargets": [
1313
+ "async-inference"
1314
+ ],
1315
+ "architectures": [
1316
+ "*"
1317
+ ]
1318
+ },
1319
+ "widget": null,
1320
+ "prompt": {
1321
+ "message": "S3 output path for async results?",
1322
+ "type": "input",
1323
+ "when": "deploymentTarget === 'async-inference'"
1324
+ },
1325
+ "deprecated": false,
1326
+ "since": "0.4.0"
1327
+ },
1328
+ "asyncSnsSuccessTopic": {
1329
+ "type": "string",
1330
+ "description": "SNS topic ARN for success notifications",
1331
+ "cliFlag": "--async-sns-success-topic",
1332
+ "cliArgName": "arn",
1333
+ "envVar": null,
1334
+ "templateVar": "asyncSnsSuccessTopic",
1335
+ "configKey": "asyncSnsSuccessTopic",
1336
+ "default": null,
1337
+ "validation": {
1338
+ "pattern": "^arn:aws:sns:"
1339
+ },
1340
+ "phase": "infrastructure",
1341
+ "group": "async",
1342
+ "appliesTo": {
1343
+ "deploymentTargets": [
1344
+ "async-inference"
1345
+ ],
1346
+ "architectures": [
1347
+ "*"
1348
+ ]
1349
+ },
1350
+ "widget": null,
1351
+ "prompt": null,
1352
+ "deprecated": false,
1353
+ "since": "0.4.0"
1354
+ },
1355
+ "asyncSnsErrorTopic": {
1356
+ "type": "string",
1357
+ "description": "SNS topic ARN for error notifications",
1358
+ "cliFlag": "--async-sns-error-topic",
1359
+ "cliArgName": "arn",
1360
+ "envVar": null,
1361
+ "templateVar": "asyncSnsErrorTopic",
1362
+ "configKey": "asyncSnsErrorTopic",
1363
+ "default": null,
1364
+ "validation": {
1365
+ "pattern": "^arn:aws:sns:"
1366
+ },
1367
+ "phase": "infrastructure",
1368
+ "group": "async",
1369
+ "appliesTo": {
1370
+ "deploymentTargets": [
1371
+ "async-inference"
1372
+ ],
1373
+ "architectures": [
1374
+ "*"
1375
+ ]
1376
+ },
1377
+ "widget": null,
1378
+ "prompt": null,
1379
+ "deprecated": false,
1380
+ "since": "0.4.0"
1381
+ },
1382
+ "asyncMaxConcurrent": {
1383
+ "type": "integer",
1384
+ "description": "Max concurrent invocations per instance",
1385
+ "cliFlag": "--async-max-concurrent",
1386
+ "cliArgName": "n",
1387
+ "envVar": null,
1388
+ "templateVar": "asyncMaxConcurrent",
1389
+ "configKey": "asyncMaxConcurrent",
1390
+ "default": 1,
1391
+ "validation": {
1392
+ "min": 1,
1393
+ "max": 100
1394
+ },
1395
+ "phase": "infrastructure",
1396
+ "group": "async",
1397
+ "appliesTo": {
1398
+ "deploymentTargets": [
1399
+ "async-inference"
1400
+ ],
1401
+ "architectures": [
1402
+ "*"
1403
+ ]
1404
+ },
1405
+ "widget": null,
1406
+ "prompt": null,
1407
+ "deprecated": false,
1408
+ "since": "0.4.0"
1409
+ },
1410
+ "batchInputPath": {
1411
+ "type": "string",
1412
+ "description": "S3 input path for batch data",
1413
+ "cliFlag": "--batch-input-path",
1414
+ "cliArgName": "path",
1415
+ "envVar": "ML_BATCH_INPUT_PATH",
1416
+ "templateVar": "batchInputPath",
1417
+ "configKey": "batchInputPath",
1418
+ "default": null,
1419
+ "validation": {
1420
+ "pattern": "^s3://"
1421
+ },
1422
+ "phase": "infrastructure",
1423
+ "group": "batch",
1424
+ "appliesTo": {
1425
+ "deploymentTargets": [
1426
+ "batch-transform"
1427
+ ],
1428
+ "architectures": [
1429
+ "*"
1430
+ ]
1431
+ },
1432
+ "widget": null,
1433
+ "prompt": {
1434
+ "message": "S3 input path?",
1435
+ "type": "input",
1436
+ "when": "deploymentTarget === 'batch-transform'"
1437
+ },
1438
+ "deprecated": false,
1439
+ "since": "0.4.0"
1440
+ },
1441
+ "batchOutputPath": {
1442
+ "type": "string",
1443
+ "description": "S3 output path for batch results",
1444
+ "cliFlag": "--batch-output-path",
1445
+ "cliArgName": "path",
1446
+ "envVar": "ML_BATCH_OUTPUT_PATH",
1447
+ "templateVar": "batchOutputPath",
1448
+ "configKey": "batchOutputPath",
1449
+ "default": null,
1450
+ "validation": {
1451
+ "pattern": "^s3://"
1452
+ },
1453
+ "phase": "infrastructure",
1454
+ "group": "batch",
1455
+ "appliesTo": {
1456
+ "deploymentTargets": [
1457
+ "batch-transform"
1458
+ ],
1459
+ "architectures": [
1460
+ "*"
1461
+ ]
1462
+ },
1463
+ "widget": null,
1464
+ "prompt": {
1465
+ "message": "S3 output path?",
1466
+ "type": "input",
1467
+ "when": "deploymentTarget === 'batch-transform'"
1468
+ },
1469
+ "deprecated": false,
1470
+ "since": "0.4.0"
1471
+ },
1472
+ "batchInstanceCount": {
1473
+ "type": "integer",
1474
+ "description": "Number of batch instances",
1475
+ "cliFlag": "--batch-instance-count",
1476
+ "cliArgName": "n",
1477
+ "envVar": null,
1478
+ "templateVar": "batchInstanceCount",
1479
+ "configKey": "batchInstanceCount",
1480
+ "default": 1,
1481
+ "validation": {
1482
+ "min": 1,
1483
+ "max": 100
1484
+ },
1485
+ "phase": "infrastructure",
1486
+ "group": "batch",
1487
+ "appliesTo": {
1488
+ "deploymentTargets": [
1489
+ "batch-transform"
1490
+ ],
1491
+ "architectures": [
1492
+ "*"
1493
+ ]
1494
+ },
1495
+ "widget": null,
1496
+ "prompt": null,
1497
+ "deprecated": false,
1498
+ "since": "0.4.0"
1499
+ },
1500
+ "batchSplitType": {
1501
+ "type": "enum",
1502
+ "description": "Input split type: Line, RecordIO, None",
1503
+ "cliFlag": "--batch-split-type",
1504
+ "cliArgName": "type",
1505
+ "envVar": null,
1506
+ "templateVar": "batchSplitType",
1507
+ "configKey": "batchSplitType",
1508
+ "default": "Line",
1509
+ "validation": {
1510
+ "enum": [
1511
+ "Line",
1512
+ "RecordIO",
1513
+ "None"
1514
+ ]
1515
+ },
1516
+ "phase": "infrastructure",
1517
+ "group": "batch",
1518
+ "appliesTo": {
1519
+ "deploymentTargets": [
1520
+ "batch-transform"
1521
+ ],
1522
+ "architectures": [
1523
+ "*"
1524
+ ]
1525
+ },
1526
+ "widget": null,
1527
+ "prompt": null,
1528
+ "deprecated": false,
1529
+ "since": "0.4.0"
1530
+ },
1531
+ "batchStrategy": {
1532
+ "type": "enum",
1533
+ "description": "Batch strategy: MultiRecord, SingleRecord",
1534
+ "cliFlag": "--batch-strategy",
1535
+ "cliArgName": "strategy",
1536
+ "envVar": null,
1537
+ "templateVar": "batchStrategy",
1538
+ "configKey": "batchStrategy",
1539
+ "default": "MultiRecord",
1540
+ "validation": {
1541
+ "enum": [
1542
+ "MultiRecord",
1543
+ "SingleRecord"
1544
+ ]
1545
+ },
1546
+ "phase": "infrastructure",
1547
+ "group": "batch",
1548
+ "appliesTo": {
1549
+ "deploymentTargets": [
1550
+ "batch-transform"
1551
+ ],
1552
+ "architectures": [
1553
+ "*"
1554
+ ]
1555
+ },
1556
+ "widget": null,
1557
+ "prompt": null,
1558
+ "deprecated": false,
1559
+ "since": "0.4.0"
1560
+ },
1561
+ "batchJoinSource": {
1562
+ "type": "enum",
1563
+ "description": "Join source: Input, None",
1564
+ "cliFlag": "--batch-join-source",
1565
+ "cliArgName": "source",
1566
+ "envVar": null,
1567
+ "templateVar": "batchJoinSource",
1568
+ "configKey": "batchJoinSource",
1569
+ "default": "None",
1570
+ "validation": {
1571
+ "enum": [
1572
+ "Input",
1573
+ "None"
1574
+ ]
1575
+ },
1576
+ "phase": "infrastructure",
1577
+ "group": "batch",
1578
+ "appliesTo": {
1579
+ "deploymentTargets": [
1580
+ "batch-transform"
1581
+ ],
1582
+ "architectures": [
1583
+ "*"
1584
+ ]
1585
+ },
1586
+ "widget": null,
1587
+ "prompt": null,
1588
+ "deprecated": false,
1589
+ "since": "0.4.0"
1590
+ },
1591
+ "batchMaxConcurrent": {
1592
+ "type": "integer",
1593
+ "description": "Max concurrent transforms per instance",
1594
+ "cliFlag": "--batch-max-concurrent",
1595
+ "cliArgName": "n",
1596
+ "envVar": null,
1597
+ "templateVar": "batchMaxConcurrent",
1598
+ "configKey": "batchMaxConcurrent",
1599
+ "default": 1,
1600
+ "validation": {
1601
+ "min": 1
1602
+ },
1603
+ "phase": "infrastructure",
1604
+ "group": "batch",
1605
+ "appliesTo": {
1606
+ "deploymentTargets": [
1607
+ "batch-transform"
1608
+ ],
1609
+ "architectures": [
1610
+ "*"
1611
+ ]
1612
+ },
1613
+ "widget": null,
1614
+ "prompt": null,
1615
+ "deprecated": false,
1616
+ "since": "0.4.0"
1617
+ },
1618
+ "batchMaxPayload": {
1619
+ "type": "integer",
1620
+ "description": "Max payload size in MB, 0-100",
1621
+ "cliFlag": "--batch-max-payload",
1622
+ "cliArgName": "mb",
1623
+ "envVar": null,
1624
+ "templateVar": "batchMaxPayload",
1625
+ "configKey": "batchMaxPayload",
1626
+ "default": 6,
1627
+ "validation": {
1628
+ "min": 0,
1629
+ "max": 100
1630
+ },
1631
+ "phase": "infrastructure",
1632
+ "group": "batch",
1633
+ "appliesTo": {
1634
+ "deploymentTargets": [
1635
+ "batch-transform"
1636
+ ],
1637
+ "architectures": [
1638
+ "*"
1639
+ ]
1640
+ },
1641
+ "widget": null,
1642
+ "prompt": null,
1643
+ "deprecated": false,
1644
+ "since": "0.4.0"
1645
+ },
1646
+ "hyperpodCluster": {
1647
+ "type": "string",
1648
+ "description": "HyperPod EKS cluster name",
1649
+ "cliFlag": "--hyperpod-cluster",
1650
+ "cliArgName": "name",
1651
+ "envVar": "ML_HYPERPOD_CLUSTER",
1652
+ "templateVar": "hyperPodCluster",
1653
+ "configKey": "hyperpodCluster",
1654
+ "default": null,
1655
+ "validation": {},
1656
+ "phase": "infrastructure",
1657
+ "group": "hyperpod",
1658
+ "appliesTo": {
1659
+ "deploymentTargets": [
1660
+ "hyperpod-eks"
1661
+ ],
1662
+ "architectures": [
1663
+ "*"
1664
+ ]
1665
+ },
1666
+ "widget": null,
1667
+ "prompt": {
1668
+ "message": "HyperPod cluster name?",
1669
+ "type": "input",
1670
+ "when": "deploymentTarget === 'hyperpod-eks'"
1671
+ },
1672
+ "deprecated": false,
1673
+ "since": "0.4.0"
1674
+ },
1675
+ "hyperpodNamespace": {
1676
+ "type": "string",
1677
+ "description": "Kubernetes namespace",
1678
+ "cliFlag": "--hyperpod-namespace",
1679
+ "cliArgName": "ns",
1680
+ "envVar": "ML_HYPERPOD_NAMESPACE",
1681
+ "templateVar": "hyperPodNamespace",
1682
+ "configKey": "hyperpodNamespace",
1683
+ "default": "default",
1684
+ "validation": {},
1685
+ "phase": "infrastructure",
1686
+ "group": "hyperpod",
1687
+ "appliesTo": {
1688
+ "deploymentTargets": [
1689
+ "hyperpod-eks"
1690
+ ],
1691
+ "architectures": [
1692
+ "*"
1693
+ ]
1694
+ },
1695
+ "widget": null,
1696
+ "prompt": null,
1697
+ "deprecated": false,
1698
+ "since": "0.4.0"
1699
+ },
1700
+ "hyperpodReplicas": {
1701
+ "type": "integer",
1702
+ "description": "Number of replicas",
1703
+ "cliFlag": "--hyperpod-replicas",
1704
+ "cliArgName": "count",
1705
+ "envVar": null,
1706
+ "templateVar": "hyperPodReplicas",
1707
+ "configKey": "hyperpodReplicas",
1708
+ "default": 1,
1709
+ "validation": {
1710
+ "min": 1
1711
+ },
1712
+ "phase": "infrastructure",
1713
+ "group": "hyperpod",
1714
+ "appliesTo": {
1715
+ "deploymentTargets": [
1716
+ "hyperpod-eks"
1717
+ ],
1718
+ "architectures": [
1719
+ "*"
1720
+ ]
1721
+ },
1722
+ "widget": null,
1723
+ "prompt": null,
1724
+ "deprecated": false,
1725
+ "since": "0.4.0"
1726
+ },
1727
+ "fsxVolumeHandle": {
1728
+ "type": "string",
1729
+ "description": "FSx for Lustre volume handle",
1730
+ "cliFlag": "--fsx-volume-handle",
1731
+ "cliArgName": "handle",
1732
+ "envVar": "ML_FSX_VOLUME_HANDLE",
1733
+ "templateVar": "fsxVolumeHandle",
1734
+ "configKey": "fsxVolumeHandle",
1735
+ "default": null,
1736
+ "validation": {},
1737
+ "phase": "infrastructure",
1738
+ "group": "hyperpod",
1739
+ "appliesTo": {
1740
+ "deploymentTargets": [
1741
+ "hyperpod-eks"
1742
+ ],
1743
+ "architectures": [
1744
+ "*"
1745
+ ]
1746
+ },
1747
+ "widget": null,
1748
+ "prompt": null,
1749
+ "deprecated": false,
1750
+ "since": "0.4.0"
1751
+ },
1752
+ "modelEnv": {
1753
+ "type": "string",
1754
+ "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
1755
+ "cliFlag": "--model-env",
1756
+ "cliArgName": "KEY=VALUE",
1757
+ "envVar": null,
1758
+ "templateVar": null,
1759
+ "configKey": "modelEnv",
1760
+ "default": [],
1761
+ "validation": {},
1762
+ "phase": "features",
1763
+ "group": "model",
1764
+ "appliesTo": {
1765
+ "deploymentTargets": [
1766
+ "*"
1767
+ ],
1768
+ "architectures": [
1769
+ "*"
1770
+ ]
1771
+ },
1772
+ "widget": {
1773
+ "section": "env-vars",
1774
+ "inputType": "repeatable-kv"
1775
+ },
1776
+ "prompt": null,
1777
+ "repeatable": true,
1778
+ "deprecated": false,
1779
+ "since": "0.5.0"
1780
+ },
1781
+ "serverEnv": {
1782
+ "type": "string",
1783
+ "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
1784
+ "cliFlag": "--server-env",
1785
+ "cliArgName": "KEY=VALUE",
1786
+ "envVar": null,
1787
+ "templateVar": null,
1788
+ "configKey": "serverEnv",
1789
+ "default": [],
1790
+ "validation": {},
1791
+ "phase": "features",
1792
+ "group": "model",
1793
+ "appliesTo": {
1794
+ "deploymentTargets": [
1795
+ "*"
1796
+ ],
1797
+ "architectures": [
1798
+ "*"
1799
+ ]
1800
+ },
1801
+ "widget": {
1802
+ "section": "env-vars",
1803
+ "inputType": "repeatable-kv"
1804
+ },
1805
+ "prompt": null,
1806
+ "repeatable": true,
1807
+ "deprecated": false,
1808
+ "since": "0.5.0"
1809
+ },
1810
+ "includeSample": {
1811
+ "type": "boolean",
1812
+ "description": "Include sample model code",
1813
+ "cliFlag": "--include-sample",
1814
+ "cliArgName": null,
1815
+ "envVar": "ML_INCLUDE_SAMPLE",
1816
+ "templateVar": "includeSampleModel",
1817
+ "configKey": "includeSample",
1818
+ "default": true,
1819
+ "validation": {},
1820
+ "phase": "features",
1821
+ "group": "testing",
1822
+ "appliesTo": {
1823
+ "deploymentTargets": [
1824
+ "*"
1825
+ ],
1826
+ "architectures": [
1827
+ "http"
1828
+ ]
1829
+ },
1830
+ "widget": null,
1831
+ "prompt": {
1832
+ "message": "Include sample model?",
1833
+ "type": "confirm",
1834
+ "when": "architecture === 'http'"
1835
+ },
1836
+ "deprecated": false,
1837
+ "since": "0.1.0"
1838
+ },
1839
+ "includeTesting": {
1840
+ "type": "boolean",
1841
+ "description": "Include test suite",
1842
+ "cliFlag": "--include-testing",
1843
+ "cliArgName": null,
1844
+ "envVar": "ML_INCLUDE_TESTING",
1845
+ "templateVar": "includeTesting",
1846
+ "configKey": "includeTesting",
1847
+ "default": true,
1848
+ "validation": {},
1849
+ "phase": "features",
1850
+ "group": "testing",
1851
+ "appliesTo": {
1852
+ "deploymentTargets": [
1853
+ "*"
1854
+ ],
1855
+ "architectures": [
1856
+ "*"
1857
+ ]
1858
+ },
1859
+ "widget": null,
1860
+ "prompt": {
1861
+ "message": "Include test suite?",
1862
+ "type": "confirm"
1863
+ },
1864
+ "deprecated": false,
1865
+ "since": "0.1.0"
1866
+ },
1867
+ "testTypes": {
1868
+ "type": "string",
1869
+ "description": "Comma-separated test types",
1870
+ "cliFlag": "--test-types",
1871
+ "cliArgName": "types",
1872
+ "envVar": "ML_TEST_TYPES",
1873
+ "templateVar": "testTypes",
1874
+ "configKey": "testTypes",
1875
+ "default": null,
1876
+ "validation": {},
1877
+ "phase": "features",
1878
+ "group": "testing",
1879
+ "appliesTo": {
1880
+ "deploymentTargets": [
1881
+ "*"
1882
+ ],
1883
+ "architectures": [
1884
+ "*"
1885
+ ]
1886
+ },
1887
+ "widget": null,
1888
+ "prompt": null,
1889
+ "deprecated": false,
1890
+ "since": "0.1.0"
1891
+ },
1892
+ "smart": {
1893
+ "type": "boolean",
1894
+ "description": "Enable smart mode (live AWS API calls for MCP servers)",
1895
+ "cliFlag": "--smart",
1896
+ "cliArgName": null,
1897
+ "envVar": null,
1898
+ "templateVar": null,
1899
+ "configKey": "smart",
1900
+ "default": false,
1901
+ "validation": {},
1902
+ "phase": "project",
1903
+ "group": "project",
1904
+ "appliesTo": {
1905
+ "deploymentTargets": [
1906
+ "*"
1907
+ ],
1908
+ "architectures": [
1909
+ "*"
1910
+ ]
1911
+ },
1912
+ "widget": null,
1913
+ "prompt": null,
1914
+ "cliBehavior": true,
1915
+ "deprecated": false,
1916
+ "since": "0.5.0"
1917
+ },
1918
+ "discover": {
1919
+ "type": "boolean",
1920
+ "description": "Enable discovery mode for MCP servers",
1921
+ "cliFlag": "--discover",
1922
+ "cliArgName": null,
1923
+ "envVar": null,
1924
+ "templateVar": null,
1925
+ "configKey": "discover",
1926
+ "default": false,
1927
+ "validation": {},
1928
+ "phase": "project",
1929
+ "group": "project",
1930
+ "appliesTo": {
1931
+ "deploymentTargets": [
1932
+ "*"
1933
+ ],
1934
+ "architectures": [
1935
+ "*"
1936
+ ]
1937
+ },
1938
+ "widget": null,
1939
+ "prompt": null,
1940
+ "cliBehavior": true,
1941
+ "deprecated": false,
1942
+ "since": "0.5.0"
1943
+ },
1944
+ "noValidate": {
1945
+ "type": "boolean",
1946
+ "description": "Skip parameter validation",
1947
+ "cliFlag": "--no-validate",
1948
+ "cliArgName": null,
1949
+ "envVar": null,
1950
+ "templateVar": null,
1951
+ "configKey": "noValidate",
1952
+ "default": false,
1953
+ "validation": {},
1954
+ "phase": "project",
1955
+ "group": "project",
1956
+ "appliesTo": {
1957
+ "deploymentTargets": [
1958
+ "*"
1959
+ ],
1960
+ "architectures": [
1961
+ "*"
1962
+ ]
1963
+ },
1964
+ "widget": null,
1965
+ "prompt": null,
1966
+ "cliBehavior": true,
1967
+ "deprecated": false,
1968
+ "since": "0.5.0"
1969
+ },
1970
+ "validateEnvVars": {
1971
+ "type": "boolean",
1972
+ "description": "Validate environment variables against schema",
1973
+ "cliFlag": "--validate-env-vars",
1974
+ "cliArgName": null,
1975
+ "envVar": null,
1976
+ "templateVar": null,
1977
+ "configKey": "validateEnvVars",
1978
+ "default": false,
1979
+ "validation": {},
1980
+ "phase": "project",
1981
+ "group": "project",
1982
+ "appliesTo": {
1983
+ "deploymentTargets": [
1984
+ "*"
1985
+ ],
1986
+ "architectures": [
1987
+ "*"
1988
+ ]
1989
+ },
1990
+ "widget": null,
1991
+ "prompt": null,
1992
+ "cliBehavior": true,
1993
+ "deprecated": false,
1994
+ "since": "0.6.0"
1995
+ },
1996
+ "validateWithDocker": {
1997
+ "type": "boolean",
1998
+ "description": "Validate Dockerfile builds successfully",
1999
+ "cliFlag": "--validate-with-docker",
2000
+ "cliArgName": null,
2001
+ "envVar": null,
2002
+ "templateVar": null,
2003
+ "configKey": "validateWithDocker",
2004
+ "default": false,
2005
+ "validation": {},
2006
+ "phase": "project",
2007
+ "group": "project",
2008
+ "appliesTo": {
2009
+ "deploymentTargets": [
2010
+ "*"
2011
+ ],
2012
+ "architectures": [
2013
+ "*"
2014
+ ]
2015
+ },
2016
+ "widget": null,
2017
+ "prompt": null,
2018
+ "cliBehavior": true,
2019
+ "deprecated": false,
2020
+ "since": "0.6.0"
2021
+ },
2022
+ "offline": {
2023
+ "type": "boolean",
2024
+ "description": "Run in offline mode (no network calls)",
2025
+ "cliFlag": "--offline",
2026
+ "cliArgName": null,
2027
+ "envVar": null,
2028
+ "templateVar": null,
2029
+ "configKey": "offline",
2030
+ "default": false,
2031
+ "validation": {},
2032
+ "phase": "project",
2033
+ "group": "project",
2034
+ "appliesTo": {
2035
+ "deploymentTargets": [
2036
+ "*"
2037
+ ],
2038
+ "architectures": [
2039
+ "*"
2040
+ ]
2041
+ },
2042
+ "widget": null,
2043
+ "prompt": null,
2044
+ "cliBehavior": true,
2045
+ "deprecated": false,
2046
+ "since": "0.6.0"
2047
+ }
2048
+ }
2049
+ }