npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-05-23T12:02:19.426Z
+// Generated: 2026-06-12T22:03:00.429Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.
@@ -9,389 +9,393 @@
  */
 export const cliOptions = [
     {
-        "flag": "--project-name <name>",
-        "description": "Name for the generated project"
-    },
-    {
-        "flag": "--deployment-config <config>",
-        "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
-        "choices": [
-            "http-flask",
-            "http-fastapi",
-            "transformers-vllm",
-            "transformers-sglang",
-            "transformers-tensorrt-llm",
-            "transformers-lmi",
-            "transformers-djl",
-            "triton-fil",
-            "triton-onnxruntime",
-            "triton-tensorflow",
-            "triton-pytorch",
-            "triton-vllm",
-            "triton-tensorrtllm",
-            "triton-python",
-            "diffusors-vllm-omni",
-            "marketplace"
+        'flag': '--project-name <name>',
+        'description': 'Name for the generated project'
+    },
+    {
+        'flag': '--deployment-config <config>',
+        'description': 'Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)',
+        'choices': [
+            'http-flask',
+            'http-fastapi',
+            'transformers-vllm',
+            'transformers-sglang',
+            'transformers-tensorrt-llm',
+            'transformers-lmi',
+            'transformers-djl',
+            'triton-fil',
+            'triton-onnxruntime',
+            'triton-tensorflow',
+            'triton-pytorch',
+            'triton-vllm',
+            'triton-tensorrtllm',
+            'triton-python',
+            'diffusors-vllm-omni',
+            'marketplace'
         ]
     },
     {
-        "flag": "--model-name <name>",
-        "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
+        'flag': '--model-name <name>',
+        'description': 'Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)'
     },
     {
-        "flag": "--deployment-target <target>",
-        "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
-        "choices": [
-            "managed-inference",
-            "realtime-inference",
-            "async-inference",
-            "batch-transform",
-            "hyperpod-eks"
+        'flag': '--deployment-target <target>',
+        'description': 'Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)',
+        'choices': [
+            'managed-inference',
+            'realtime-inference',
+            'async-inference',
+            'batch-transform',
+            'hyperpod-eks'
         ],
-        "defaultValue": "realtime-inference"
+        'defaultValue': 'realtime-inference'
     },
     {
-        "flag": "--instance-type <type>",
-        "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
+        'flag': '--instance-type <type>',
+        'description': 'SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)'
     },
     {
-        "flag": "--ic-gpu-count <n>",
-        "description": "GPUs allocated to the inference component"
+        'flag': '--ic-gpu-count <n>',
+        'description': 'GPUs allocated to the inference component'
     },
     {
-        "flag": "--ic-copy-count <n>",
-        "description": "Number of inference component copies",
-        "defaultValue": 1
+        'flag': '--ic-copy-count <n>',
+        'description': 'Number of inference component copies',
+        'defaultValue': 1
     },
     {
-        "flag": "--ic-memory-size <mb>",
-        "description": "Memory in MB for the inference component",
-        "defaultValue": 1024
+        'flag': '--ic-memory-size <mb>',
+        'description': 'Memory in MB for the inference component'
     },
     {
-        "flag": "--enable-lora",
-        "description": "Enable LoRA adapter serving",
-        "defaultValue": false
+        'flag': '--enable-lora',
+        'description': 'Enable LoRA adapter serving',
+        'defaultValue': false
     },
     {
-        "flag": "--max-loras <n>",
-        "description": "Maximum concurrent LoRA adapters in GPU memory",
-        "defaultValue": 30
+        'flag': '--max-loras <n>',
+        'description': 'Maximum concurrent LoRA adapters in GPU memory',
+        'defaultValue': 30
     },
     {
-        "flag": "--max-lora-rank <n>",
-        "description": "Maximum LoRA rank",
-        "defaultValue": 64
+        'flag': '--max-lora-rank <n>',
+        'description': 'Maximum LoRA rank',
+        'defaultValue': 64
     },
     {
-        "flag": "--include-benchmark",
-        "description": "Include SageMaker AI Benchmarking",
-        "defaultValue": false
+        'flag': '--include-benchmark',
+        'description': 'Include SageMaker AI Benchmarking scripts (do/benchmark, do/optimize). Workload configuration is specified at runtime via --workload flag.',
+        'defaultValue': false
     },
     {
-        "flag": "--benchmark-concurrency <n>",
-        "description": "Benchmark concurrent requests",
-        "defaultValue": 10
+        'flag': '--benchmark-concurrency <n>',
+        'description': 'Benchmark concurrent requests',
+        'defaultValue': 10
     },
     {
-        "flag": "--benchmark-input-tokens <n>",
-        "description": "Benchmark mean input tokens",
-        "defaultValue": 550
+        'flag': '--benchmark-input-tokens <n>',
+        'description': 'Benchmark mean input tokens',
+        'defaultValue': 550
     },
     {
-        "flag": "--benchmark-output-tokens <n>",
-        "description": "Benchmark mean output tokens",
-        "defaultValue": 150
+        'flag': '--benchmark-output-tokens <n>',
+        'description': 'Benchmark mean output tokens',
+        'defaultValue': 150
     },
     {
-        "flag": "--benchmark-streaming",
-        "description": "Enable streaming in benchmark",
-        "defaultValue": true
+        'flag': '--benchmark-streaming',
+        'description': 'Enable streaming in benchmark',
+        'defaultValue': true
     },
     {
-        "flag": "--benchmark-request-count <n>",
-        "description": "Total number of benchmark requests to send"
+        'flag': '--benchmark-request-count <n>',
+        'description': 'Total number of benchmark requests to send'
     },
     {
-        "flag": "--benchmark-s3-output-path <path>",
-        "description": "S3 URI for benchmark results output"
+        'flag': '--benchmark-s3-output-path <path>',
+        'description': 'S3 URI for benchmark results output'
     },
     {
-        "flag": "--skip-prompts",
-        "description": "Skip interactive prompts and use configuration from other sources",
-        "defaultValue": false
+        'flag': '--skip-prompts',
+        'description': 'Skip interactive prompts and use configuration from other sources',
+        'defaultValue': false
     },
     {
-        "flag": "--auto-prompt",
-        "description": "Fill defaults, prompt only for missing required values",
-        "defaultValue": false
+        'flag': '--auto-prompt',
+        'description': 'Fill defaults, prompt only for missing required values',
+        'defaultValue': false
     },
     {
-        "flag": "--config <path>",
-        "description": "Path to JSON configuration file"
+        'flag': '--config <path>',
+        'description': 'Path to JSON configuration file'
     },
     {
-        "flag": "--project-dir <dir>",
-        "description": "Output directory path"
+        'flag': '--project-dir <dir>',
+        'description': 'Output directory path'
     },
     {
-        "flag": "--force",
-        "description": "Overwrite existing output directory without prompting",
-        "defaultValue": false
+        'flag': '--force',
+        'description': 'Overwrite existing output directory without prompting',
+        'defaultValue': false
     },
     {
-        "flag": "--framework <framework>",
-        "description": "ML framework",
-        "choices": [
-            "sklearn",
-            "xgboost",
-            "tensorflow",
-            "transformers"
+        'flag': '--framework <framework>',
+        'description': 'ML framework',
+        'choices': [
+            'sklearn',
+            'xgboost',
+            'tensorflow',
+            'transformers'
         ],
-        "hidden": true
-    },
-    {
-        "flag": "--model-format <format>",
-        "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
-        "choices": [
-            "pkl",
-            "joblib",
-            "json",
-            "model",
-            "ubj",
-            "keras",
-            "h5",
-            "SavedModel"
+        'hidden': true
+    },
+    {
+        'flag': '--model-format <format>',
+        'description': 'Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)',
+        'choices': [
+            'pkl',
+            'joblib',
+            'json',
+            'model',
+            'ubj',
+            'keras',
+            'h5',
+            'SavedModel'
         ]
     },
     {
-        "flag": "--model-server <server>",
-        "description": "Model server",
-        "choices": [
-            "flask",
-            "fastapi",
-            "vllm",
-            "sglang"
+        'flag': '--model-server <server>',
+        'description': 'Model server',
+        'choices': [
+            'flask',
+            'fastapi',
+            'vllm',
+            'sglang'
         ],
-        "hidden": true
+        'hidden': true
     },
     {
-        "flag": "--base-image <image>",
-        "description": "Base container image for Dockerfile"
+        'flag': '--base-image <image>',
+        'description': 'Base container image for Dockerfile'
     },
     {
-        "flag": "--region <region>",
-        "description": "AWS region",
-        "defaultValue": "us-east-1"
+        'flag': '--region <region>',
+        'description': 'AWS region',
+        'defaultValue': 'us-east-1'
     },
     {
-        "flag": "--role-arn <arn>",
-        "description": "IAM role ARN for SageMaker execution"
+        'flag': '--role-arn <arn>',
+        'description': 'IAM role ARN for SageMaker execution'
     },
     {
-        "flag": "--build-target <target>",
-        "description": "Build target (codebuild)",
-        "choices": [
-            "codebuild"
-        ]
+        'flag': '--build-target <target>',
+        'description': 'Build target (codebuild)',
+        'choices': [
+            'codebuild'
+        ],
+        'defaultValue': 'codebuild'
+    },
+    {
+        'flag': '--codebuild-compute-type <type>',
+        'description': 'CodeBuild compute type (SMALL, MEDIUM, LARGE)',
+        'choices': [
+            'SMALL',
+            'MEDIUM',
+            'LARGE',
+            'BUILD_GENERAL1_SMALL',
+            'BUILD_GENERAL1_MEDIUM',
+            'BUILD_GENERAL1_LARGE',
+            'BUILD_GENERAL1_2XLARGE'
+        ],
+        'defaultValue': 'BUILD_GENERAL1_LARGE'
     },
     {
-        "flag": "--codebuild-compute-type <type>",
-        "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
-        "choices": [
-            "SMALL",
-            "MEDIUM",
-            "LARGE",
-            "BUILD_GENERAL1_SMALL",
-            "BUILD_GENERAL1_MEDIUM",
-            "BUILD_GENERAL1_LARGE",
-            "BUILD_GENERAL1_2XLARGE"
-        ],
-        "defaultValue": "BUILD_GENERAL1_LARGE"
+        'flag': '--hf-token <token>',
+        'description': 'HuggingFace token (or $HF_TOKEN for env var reference)'
     },
     {
-        "flag": "--hf-token <token>",
-        "description": "HuggingFace token (or $HF_TOKEN for env var reference)"
+        'flag': '--hf-token-arn <arn>',
+        'description': 'HuggingFace token ARN from Secrets Manager'
     },
     {
-        "flag": "--hf-token-arn <arn>",
-        "description": "HuggingFace token ARN from Secrets Manager"
+        'flag': '--ngc-token <token>',
+        'description': 'NVIDIA NGC token (or $NGC_API_KEY for env var reference)'
     },
     {
-        "flag": "--ngc-token <token>",
-        "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
+        'flag': '--ngc-token-arn <arn>',
+        'description': 'NVIDIA NGC token ARN from Secrets Manager'
     },
     {
-        "flag": "--ngc-token-arn <arn>",
-        "description": "NVIDIA NGC token ARN from Secrets Manager"
+        'flag': '--endpoint-initial-instance-count <n>',
+        'description': 'Number of instances for the endpoint',
+        'defaultValue': 1
     },
     {
-        "flag": "--endpoint-initial-instance-count <n>",
-        "description": "Number of instances for the endpoint",
-        "defaultValue": 1
+        'flag': '--endpoint-data-capture-percent <pct>',
+        'description': 'Data capture percentage for monitoring, 0-100',
+        'defaultValue': 0
     },
     {
-        "flag": "--endpoint-data-capture-percent <pct>",
-        "description": "Data capture percentage for monitoring, 0-100",
-        "defaultValue": 0
+        'flag': '--endpoint-variant-name <name>',
+        'description': 'Production variant name',
+        'defaultValue': 'AllTraffic'
     },
     {
-        "flag": "--endpoint-variant-name <name>",
-        "description": "Production variant name",
-        "defaultValue": "AllTraffic"
+        'flag': '--endpoint-volume-size <gb>',
+        'description': 'ML storage volume size in GB'
     },
     {
-        "flag": "--endpoint-volume-size <gb>",
-        "description": "ML storage volume size in GB"
+        'flag': '--capacity-reservation-arn <arn>',
+        'description': 'Capacity reservation ARN (FTP or ODCR) for reserved instance deployment'
     },
     {
-        "flag": "--ic-cpu-count <n>",
-        "description": "vCPUs allocated to the inference component"
+        'flag': '--ic-cpu-count <n>',
+        'description': 'vCPUs allocated to the inference component'
     },
     {
-        "flag": "--ic-model-weight <weight>",
-        "description": "Traffic routing weight, 0-1",
-        "defaultValue": 1
+        'flag': '--ic-model-weight <weight>',
+        'description': 'Traffic routing weight, 0-1',
+        'defaultValue': 1
     },
     {
-        "flag": "--async-s3-output-path <path>",
-        "description": "S3 output path for async results"
+        'flag': '--async-s3-output-path <path>',
+        'description': 'S3 output path for async results'
     },
     {
-        "flag": "--async-sns-success-topic <arn>",
-        "description": "SNS topic ARN for success notifications"
+        'flag': '--async-sns-success-topic <arn>',
+        'description': 'SNS topic ARN for success notifications'
     },
     {
-        "flag": "--async-sns-error-topic <arn>",
-        "description": "SNS topic ARN for error notifications"
+        'flag': '--async-sns-error-topic <arn>',
+        'description': 'SNS topic ARN for error notifications'
     },
     {
-        "flag": "--async-max-concurrent <n>",
-        "description": "Max concurrent invocations per instance",
-        "defaultValue": 1
+        'flag': '--async-max-concurrent <n>',
+        'description': 'Max concurrent invocations per instance',
+        'defaultValue': 1
     },
     {
-        "flag": "--batch-input-path <path>",
-        "description": "S3 input path for batch data"
+        'flag': '--batch-input-path <path>',
+        'description': 'S3 input path for batch data'
     },
     {
-        "flag": "--batch-output-path <path>",
-        "description": "S3 output path for batch results"
+        'flag': '--batch-output-path <path>',
+        'description': 'S3 output path for batch results'
     },
     {
-        "flag": "--batch-instance-count <n>",
-        "description": "Number of batch instances",
-        "defaultValue": 1
+        'flag': '--batch-instance-count <n>',
+        'description': 'Number of batch instances',
+        'defaultValue': 1
     },
     {
-        "flag": "--batch-split-type <type>",
-        "description": "Input split type: Line, RecordIO, None",
-        "choices": [
-            "Line",
-            "RecordIO",
-            "None"
+        'flag': '--batch-split-type <type>',
+        'description': 'Input split type: Line, RecordIO, None',
+        'choices': [
+            'Line',
+            'RecordIO',
+            'None'
         ],
-        "defaultValue": "Line"
+        'defaultValue': 'Line'
     },
     {
-        "flag": "--batch-strategy <strategy>",
-        "description": "Batch strategy: MultiRecord, SingleRecord",
-        "choices": [
-            "MultiRecord",
-            "SingleRecord"
+        'flag': '--batch-strategy <strategy>',
+        'description': 'Batch strategy: MultiRecord, SingleRecord',
+        'choices': [
+            'MultiRecord',
+            'SingleRecord'
         ],
-        "defaultValue": "MultiRecord"
+        'defaultValue': 'MultiRecord'
     },
     {
-        "flag": "--batch-join-source <source>",
-        "description": "Join source: Input, None",
-        "choices": [
-            "Input",
-            "None"
+        'flag': '--batch-join-source <source>',
+        'description': 'Join source: Input, None',
+        'choices': [
+            'Input',
+            'None'
         ],
-        "defaultValue": "None"
+        'defaultValue': 'None'
     },
     {
-        "flag": "--batch-max-concurrent <n>",
-        "description": "Max concurrent transforms per instance",
-        "defaultValue": 1
+        'flag': '--batch-max-concurrent <n>',
+        'description': 'Max concurrent transforms per instance',
+        'defaultValue': 1
     },
     {
-        "flag": "--batch-max-payload <mb>",
-        "description": "Max payload size in MB, 0-100",
-        "defaultValue": 6
+        'flag': '--batch-max-payload <mb>',
+        'description': 'Max payload size in MB, 0-100',
+        'defaultValue': 6
     },
     {
-        "flag": "--hyperpod-cluster <name>",
-        "description": "HyperPod EKS cluster name"
+        'flag': '--hyperpod-cluster <name>',
+        'description': 'HyperPod EKS cluster name'
     },
     {
-        "flag": "--hyperpod-namespace <ns>",
-        "description": "Kubernetes namespace",
-        "defaultValue": "default"
+        'flag': '--hyperpod-namespace <ns>',
+        'description': 'Kubernetes namespace',
+        'defaultValue': 'default'
     },
     {
-        "flag": "--hyperpod-replicas <count>",
-        "description": "Number of replicas",
-        "defaultValue": 1
+        'flag': '--hyperpod-replicas <count>',
+        'description': 'Number of replicas',
+        'defaultValue': 1
     },
     {
-        "flag": "--fsx-volume-handle <handle>",
-        "description": "FSx for Lustre volume handle"
+        'flag': '--fsx-volume-handle <handle>',
+        'description': 'FSx for Lustre volume handle'
     },
     {
-        "flag": "--model-env <KEY=VALUE>",
-        "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
-        "repeatable": true
+        'flag': '--model-env <KEY=VALUE>',
+        'description': 'Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)',
+        'repeatable': true
     },
     {
-        "flag": "--server-env <KEY=VALUE>",
-        "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
-        "repeatable": true
+        'flag': '--server-env <KEY=VALUE>',
+        'description': 'Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)',
+        'repeatable': true
     },
     {
-        "flag": "--include-sample",
-        "description": "Include sample model code",
-        "defaultValue": true
+        'flag': '--include-sample',
+        'description': 'Include sample model code',
+        'defaultValue': true
     },
     {
-        "flag": "--include-testing",
-        "description": "Include test suite",
-        "defaultValue": true
+        'flag': '--include-testing',
+        'description': 'Include test suite',
+        'defaultValue': true
     },
     {
-        "flag": "--test-types <types>",
-        "description": "Comma-separated test types"
+        'flag': '--test-types <types>',
+        'description': 'Comma-separated test types'
     },
     {
-        "flag": "--smart",
-        "description": "Enable smart mode (live AWS API calls for MCP servers)",
-        "defaultValue": false
+        'flag': '--smart',
+        'description': 'Enable smart mode (live AWS API calls for MCP servers)',
+        'defaultValue': false
     },
     {
-        "flag": "--discover",
-        "description": "Enable discovery mode for MCP servers",
-        "defaultValue": false
+        'flag': '--discover',
+        'description': 'Enable discovery mode for MCP servers',
+        'defaultValue': false
     },
     {
-        "flag": "--no-validate",
-        "description": "Skip parameter validation",
-        "defaultValue": false
+        'flag': '--no-validate',
+        'description': 'Skip parameter validation',
+        'defaultValue': false
     },
     {
-        "flag": "--validate-env-vars",
-        "description": "Validate environment variables against schema",
-        "defaultValue": false
+        'flag': '--validate-env-vars',
+        'description': 'Validate environment variables against schema',
+        'defaultValue': false
     },
     {
-        "flag": "--validate-with-docker",
-        "description": "Validate Dockerfile builds successfully",
-        "defaultValue": false
+        'flag': '--validate-with-docker',
+        'description': 'Validate Dockerfile builds successfully',
+        'defaultValue': false
     },
     {
-        "flag": "--offline",
-        "description": "Run in offline mode (no network calls)",
-        "defaultValue": false
+        'flag': '--offline',
+        'description': 'Run in offline mode (no network calls)',
+        'defaultValue': false
     }
 ];
@@ -400,72 +404,73 @@ export const cliOptions = [
  * Used by the custom help formatter in bin/cli.js.
  */
 export const helpGroups = {
-    "--project-name": "general",
-    "--deployment-config": "model",
-    "--model-name": "model",
-    "--deployment-target": "infra",
-    "--instance-type": "infra",
-    "--ic-gpu-count": "ic",
-    "--ic-copy-count": "ic",
-    "--ic-memory-size": "ic",
-    "--enable-lora": "features",
-    "--max-loras": "features",
-    "--max-lora-rank": "features",
-    "--include-benchmark": "general",
-    "--benchmark-concurrency": "general",
-    "--benchmark-input-tokens": "general",
-    "--benchmark-output-tokens": "general",
-    "--benchmark-streaming": "general",
-    "--benchmark-request-count": "general",
-    "--benchmark-s3-output-path": "general",
-    "--skip-prompts": "general",
-    "--auto-prompt": "general",
-    "--config": "general",
-    "--project-dir": "general",
-    "--force": "general",
-    "--framework": "model",
-    "--model-format": "model",
-    "--model-server": "model",
-    "--base-image": "infra",
-    "--region": "infra",
-    "--role-arn": "infra",
-    "--build-target": "infra",
-    "--codebuild-compute-type": "infra",
-    "--hf-token": "auth",
-    "--hf-token-arn": "auth",
-    "--ngc-token": "auth",
-    "--ngc-token-arn": "auth",
-    "--endpoint-initial-instance-count": "endpoint",
-    "--endpoint-data-capture-percent": "endpoint",
-    "--endpoint-variant-name": "endpoint",
-    "--endpoint-volume-size": "endpoint",
-    "--ic-cpu-count": "ic",
-    "--ic-model-weight": "ic",
-    "--async-s3-output-path": "async",
-    "--async-sns-success-topic": "async",
-    "--async-sns-error-topic": "async",
-    "--async-max-concurrent": "async",
-    "--batch-input-path": "batch",
-    "--batch-output-path": "batch",
-    "--batch-instance-count": "batch",
-    "--batch-split-type": "batch",
-    "--batch-strategy": "batch",
-    "--batch-join-source": "batch",
-    "--batch-max-concurrent": "batch",
-    "--batch-max-payload": "batch",
-    "--hyperpod-cluster": "hyperpod",
-    "--hyperpod-namespace": "hyperpod",
-    "--hyperpod-replicas": "hyperpod",
-    "--fsx-volume-handle": "hyperpod",
-    "--model-env": "env",
-    "--server-env": "env",
-    "--include-sample": "features",
-    "--include-testing": "features",
-    "--test-types": "features",
-    "--smart": "mcp",
-    "--discover": "mcp",
-    "--no-validate": "validation",
-    "--validate-env-vars": "validation",
-    "--validate-with-docker": "validation",
-    "--offline": "validation"
+    '--project-name': 'general',
+    '--deployment-config': 'model',
+    '--model-name': 'model',
+    '--deployment-target': 'infra',
+    '--instance-type': 'infra',
+    '--ic-gpu-count': 'ic',
+    '--ic-copy-count': 'ic',
+    '--ic-memory-size': 'ic',
+    '--enable-lora': 'features',
+    '--max-loras': 'features',
+    '--max-lora-rank': 'features',
+    '--include-benchmark': 'general',
+    '--benchmark-concurrency': 'general',
+    '--benchmark-input-tokens': 'general',
+    '--benchmark-output-tokens': 'general',
+    '--benchmark-streaming': 'general',
+    '--benchmark-request-count': 'general',
+    '--benchmark-s3-output-path': 'general',
+    '--skip-prompts': 'general',
+    '--auto-prompt': 'general',
+    '--config': 'general',
+    '--project-dir': 'general',
+    '--force': 'general',
+    '--framework': 'model',
+    '--model-format': 'model',
+    '--model-server': 'model',
+    '--base-image': 'infra',
+    '--region': 'infra',
+    '--role-arn': 'infra',
+    '--build-target': 'infra',
+    '--codebuild-compute-type': 'infra',
+    '--hf-token': 'auth',
+    '--hf-token-arn': 'auth',
+    '--ngc-token': 'auth',
+    '--ngc-token-arn': 'auth',
+    '--endpoint-initial-instance-count': 'endpoint',
+    '--endpoint-data-capture-percent': 'endpoint',
+    '--endpoint-variant-name': 'endpoint',
+    '--endpoint-volume-size': 'endpoint',
+    '--capacity-reservation-arn': 'endpoint',
+    '--ic-cpu-count': 'ic',
+    '--ic-model-weight': 'ic',
+    '--async-s3-output-path': 'async',
+    '--async-sns-success-topic': 'async',
+    '--async-sns-error-topic': 'async',
+    '--async-max-concurrent': 'async',
+    '--batch-input-path': 'batch',
+    '--batch-output-path': 'batch',
+    '--batch-instance-count': 'batch',
+    '--batch-split-type': 'batch',
+    '--batch-strategy': 'batch',
+    '--batch-join-source': 'batch',
+    '--batch-max-concurrent': 'batch',
+    '--batch-max-payload': 'batch',
+    '--hyperpod-cluster': 'hyperpod',
+    '--hyperpod-namespace': 'hyperpod',
+    '--hyperpod-replicas': 'hyperpod',
+    '--fsx-volume-handle': 'hyperpod',
+    '--model-env': 'env',
+    '--server-env': 'env',
+    '--include-sample': 'features',
+    '--include-testing': 'features',
+    '--test-types': 'features',
+    '--smart': 'mcp',
+    '--discover': 'mcp',
+    '--no-validate': 'validation',
+    '--validate-env-vars': 'validation',
+    '--validate-with-docker': 'validation',
+    '--offline': 'validation'
 };