npm - @aws/ml-container-creator - Versions diffs - 0.9.0 → 0.10.0 - Mend

@aws/ml-container-creator 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

package/bin/cli.js +31 -137
package/config/parameter-schema-v2.json +2065 -0
package/package.json +6 -3
package/servers/lib/catalogs/jumpstart-public.json +101 -16
package/servers/lib/catalogs/models.json +182 -26
package/src/app.js +6 -389
package/src/lib/bootstrap-command-handler.js +75 -1078
package/src/lib/bootstrap-profile-manager.js +634 -0
package/src/lib/bootstrap-provisioners.js +421 -0
package/src/lib/config-loader.js +405 -0
package/src/lib/config-manager.js +59 -1668
package/src/lib/config-mcp-client.js +118 -0
package/src/lib/config-validator.js +634 -0
package/src/lib/cuda-resolver.js +140 -0
package/src/lib/e2e-catalog-validator.js +251 -3
package/src/lib/e2e-ci-recorder.js +103 -0
package/src/lib/generated/cli-options.js +471 -0
package/src/lib/generated/parameter-matrix.js +671 -0
package/src/lib/generated/validation-rules.js +202 -0
package/src/lib/marketplace-flow.js +276 -0
package/src/lib/mcp-query-runner.js +768 -0
package/src/lib/parameter-schema-validator.js +62 -18
package/src/lib/prompt-runner.js +41 -1504
package/src/lib/prompts/feature-prompts.js +172 -0
package/src/lib/prompts/index.js +48 -0
package/src/lib/prompts/infrastructure-prompts.js +690 -0
package/src/lib/prompts/model-prompts.js +552 -0
package/src/lib/prompts/project-prompts.js +70 -0
package/src/lib/prompts.js +2 -1446
package/src/lib/registry-command-handler.js +135 -3
package/src/lib/secrets-prompt-runner.js +251 -0
package/src/lib/template-variable-resolver.js +398 -0
package/templates/code/serve +5 -134
package/templates/code/serve.d/lmi.ejs +19 -0
package/templates/code/serve.d/sglang.ejs +47 -0
package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
package/templates/code/serve.d/vllm.ejs +48 -0
package/templates/do/clean +1 -1387
package/templates/do/clean.d/async-inference.ejs +508 -0
package/templates/do/clean.d/batch-transform.ejs +512 -0
package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
package/templates/do/clean.d/managed-inference.ejs +1043 -0
package/templates/do/deploy +1 -1766
package/templates/do/deploy.d/async-inference.ejs +501 -0
package/templates/do/deploy.d/batch-transform.ejs +529 -0
package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
package/templates/do/deploy.d/managed-inference.ejs +726 -0
package/config/parameter-schema.json +0 -88

package/src/lib/generated/cli-options.js ADDED Viewed

@@ -0,0 +1,471 @@
+// AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
+// Source: config/parameter-schema-v2.json
+// Generated: 2026-05-23T12:02:19.426Z
+/**
+ * CLI option definitions derived from parameter-schema-v2.json.
+ * Each entry can be registered with Commander via:
+ *   new Option(entry.flag, entry.description)
+ */
+export const cliOptions = [
+    {
+        "flag": "--project-name <name>",
+        "description": "Name for the generated project"
+    },
+    {
+        "flag": "--deployment-config <config>",
+        "description": "Deployment configuration (e.g. http-flask, transformers-vllm, triton-fil)",
+        "choices": [
+            "http-flask",
+            "http-fastapi",
+            "transformers-vllm",
+            "transformers-sglang",
+            "transformers-tensorrt-llm",
+            "transformers-lmi",
+            "transformers-djl",
+            "triton-fil",
+            "triton-onnxruntime",
+            "triton-tensorflow",
+            "triton-pytorch",
+            "triton-vllm",
+            "triton-tensorrtllm",
+            "triton-python",
+            "diffusors-vllm-omni",
+            "marketplace"
+        ]
+    },
+    {
+        "flag": "--model-name <name>",
+        "description": "Model identifier (hf-org/model, s3://..., registry://..., marketplace://...)"
+    },
+    {
+        "flag": "--deployment-target <target>",
+        "description": "Deployment target (managed-inference, async-inference, batch-transform, hyperpod-eks)",
+        "choices": [
+            "managed-inference",
+            "realtime-inference",
+            "async-inference",
+            "batch-transform",
+            "hyperpod-eks"
+        ],
+        "defaultValue": "realtime-inference"
+    },
+    {
+        "flag": "--instance-type <type>",
+        "description": "SageMaker instance type (e.g. ml.g5.xlarge, ml.m5.large)"
+    },
+    {
+        "flag": "--ic-gpu-count <n>",
+        "description": "GPUs allocated to the inference component"
+    },
+    {
+        "flag": "--ic-copy-count <n>",
+        "description": "Number of inference component copies",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--ic-memory-size <mb>",
+        "description": "Memory in MB for the inference component",
+        "defaultValue": 1024
+    },
+    {
+        "flag": "--enable-lora",
+        "description": "Enable LoRA adapter serving",
+        "defaultValue": false
+    },
+    {
+        "flag": "--max-loras <n>",
+        "description": "Maximum concurrent LoRA adapters in GPU memory",
+        "defaultValue": 30
+    },
+    {
+        "flag": "--max-lora-rank <n>",
+        "description": "Maximum LoRA rank",
+        "defaultValue": 64
+    },
+    {
+        "flag": "--include-benchmark",
+        "description": "Include SageMaker AI Benchmarking",
+        "defaultValue": false
+    },
+    {
+        "flag": "--benchmark-concurrency <n>",
+        "description": "Benchmark concurrent requests",
+        "defaultValue": 10
+    },
+    {
+        "flag": "--benchmark-input-tokens <n>",
+        "description": "Benchmark mean input tokens",
+        "defaultValue": 550
+    },
+    {
+        "flag": "--benchmark-output-tokens <n>",
+        "description": "Benchmark mean output tokens",
+        "defaultValue": 150
+    },
+    {
+        "flag": "--benchmark-streaming",
+        "description": "Enable streaming in benchmark",
+        "defaultValue": true
+    },
+    {
+        "flag": "--benchmark-request-count <n>",
+        "description": "Total number of benchmark requests to send"
+    },
+    {
+        "flag": "--benchmark-s3-output-path <path>",
+        "description": "S3 URI for benchmark results output"
+    },
+    {
+        "flag": "--skip-prompts",
+        "description": "Skip interactive prompts and use configuration from other sources",
+        "defaultValue": false
+    },
+    {
+        "flag": "--auto-prompt",
+        "description": "Fill defaults, prompt only for missing required values",
+        "defaultValue": false
+    },
+    {
+        "flag": "--config <path>",
+        "description": "Path to JSON configuration file"
+    },
+    {
+        "flag": "--project-dir <dir>",
+        "description": "Output directory path"
+    },
+    {
+        "flag": "--force",
+        "description": "Overwrite existing output directory without prompting",
+        "defaultValue": false
+    },
+    {
+        "flag": "--framework <framework>",
+        "description": "ML framework",
+        "choices": [
+            "sklearn",
+            "xgboost",
+            "tensorflow",
+            "transformers"
+        ],
+        "hidden": true
+    },
+    {
+        "flag": "--model-format <format>",
+        "description": "Model serialization format (pkl, joblib, json, model, ubj, keras, h5, SavedModel)",
+        "choices": [
+            "pkl",
+            "joblib",
+            "json",
+            "model",
+            "ubj",
+            "keras",
+            "h5",
+            "SavedModel"
+        ]
+    },
+    {
+        "flag": "--model-server <server>",
+        "description": "Model server",
+        "choices": [
+            "flask",
+            "fastapi",
+            "vllm",
+            "sglang"
+        ],
+        "hidden": true
+    },
+    {
+        "flag": "--base-image <image>",
+        "description": "Base container image for Dockerfile"
+    },
+    {
+        "flag": "--region <region>",
+        "description": "AWS region",
+        "defaultValue": "us-east-1"
+    },
+    {
+        "flag": "--role-arn <arn>",
+        "description": "IAM role ARN for SageMaker execution"
+    },
+    {
+        "flag": "--build-target <target>",
+        "description": "Build target (codebuild)",
+        "choices": [
+            "codebuild"
+        ]
+    },
+    {
+        "flag": "--codebuild-compute-type <type>",
+        "description": "CodeBuild compute type (SMALL, MEDIUM, LARGE)",
+        "choices": [
+            "SMALL",
+            "MEDIUM",
+            "LARGE",
+            "BUILD_GENERAL1_SMALL",
+            "BUILD_GENERAL1_MEDIUM",
+            "BUILD_GENERAL1_LARGE",
+            "BUILD_GENERAL1_2XLARGE"
+        ],
+        "defaultValue": "BUILD_GENERAL1_LARGE"
+    },
+    {
+        "flag": "--hf-token <token>",
+        "description": "HuggingFace token (or $HF_TOKEN for env var reference)"
+    },
+    {
+        "flag": "--hf-token-arn <arn>",
+        "description": "HuggingFace token ARN from Secrets Manager"
+    },
+    {
+        "flag": "--ngc-token <token>",
+        "description": "NVIDIA NGC token (or $NGC_API_KEY for env var reference)"
+    },
+    {
+        "flag": "--ngc-token-arn <arn>",
+        "description": "NVIDIA NGC token ARN from Secrets Manager"
+    },
+    {
+        "flag": "--endpoint-initial-instance-count <n>",
+        "description": "Number of instances for the endpoint",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--endpoint-data-capture-percent <pct>",
+        "description": "Data capture percentage for monitoring, 0-100",
+        "defaultValue": 0
+    },
+    {
+        "flag": "--endpoint-variant-name <name>",
+        "description": "Production variant name",
+        "defaultValue": "AllTraffic"
+    },
+    {
+        "flag": "--endpoint-volume-size <gb>",
+        "description": "ML storage volume size in GB"
+    },
+    {
+        "flag": "--ic-cpu-count <n>",
+        "description": "vCPUs allocated to the inference component"
+    },
+    {
+        "flag": "--ic-model-weight <weight>",
+        "description": "Traffic routing weight, 0-1",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--async-s3-output-path <path>",
+        "description": "S3 output path for async results"
+    },
+    {
+        "flag": "--async-sns-success-topic <arn>",
+        "description": "SNS topic ARN for success notifications"
+    },
+    {
+        "flag": "--async-sns-error-topic <arn>",
+        "description": "SNS topic ARN for error notifications"
+    },
+    {
+        "flag": "--async-max-concurrent <n>",
+        "description": "Max concurrent invocations per instance",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--batch-input-path <path>",
+        "description": "S3 input path for batch data"
+    },
+    {
+        "flag": "--batch-output-path <path>",
+        "description": "S3 output path for batch results"
+    },
+    {
+        "flag": "--batch-instance-count <n>",
+        "description": "Number of batch instances",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--batch-split-type <type>",
+        "description": "Input split type: Line, RecordIO, None",
+        "choices": [
+            "Line",
+            "RecordIO",
+            "None"
+        ],
+        "defaultValue": "Line"
+    },
+    {
+        "flag": "--batch-strategy <strategy>",
+        "description": "Batch strategy: MultiRecord, SingleRecord",
+        "choices": [
+            "MultiRecord",
+            "SingleRecord"
+        ],
+        "defaultValue": "MultiRecord"
+    },
+    {
+        "flag": "--batch-join-source <source>",
+        "description": "Join source: Input, None",
+        "choices": [
+            "Input",
+            "None"
+        ],
+        "defaultValue": "None"
+    },
+    {
+        "flag": "--batch-max-concurrent <n>",
+        "description": "Max concurrent transforms per instance",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--batch-max-payload <mb>",
+        "description": "Max payload size in MB, 0-100",
+        "defaultValue": 6
+    },
+    {
+        "flag": "--hyperpod-cluster <name>",
+        "description": "HyperPod EKS cluster name"
+    },
+    {
+        "flag": "--hyperpod-namespace <ns>",
+        "description": "Kubernetes namespace",
+        "defaultValue": "default"
+    },
+    {
+        "flag": "--hyperpod-replicas <count>",
+        "description": "Number of replicas",
+        "defaultValue": 1
+    },
+    {
+        "flag": "--fsx-volume-handle <handle>",
+        "description": "FSx for Lustre volume handle"
+    },
+    {
+        "flag": "--model-env <KEY=VALUE>",
+        "description": "Model env var, repeatable (e.g. VLLM_TENSOR_PARALLEL_SIZE=4)",
+        "repeatable": true
+    },
+    {
+        "flag": "--server-env <KEY=VALUE>",
+        "description": "Server env var, repeatable (e.g. SGLANG_MEM_FRACTION=0.9)",
+        "repeatable": true
+    },
+    {
+        "flag": "--include-sample",
+        "description": "Include sample model code",
+        "defaultValue": true
+    },
+    {
+        "flag": "--include-testing",
+        "description": "Include test suite",
+        "defaultValue": true
+    },
+    {
+        "flag": "--test-types <types>",
+        "description": "Comma-separated test types"
+    },
+    {
+        "flag": "--smart",
+        "description": "Enable smart mode (live AWS API calls for MCP servers)",
+        "defaultValue": false
+    },
+    {
+        "flag": "--discover",
+        "description": "Enable discovery mode for MCP servers",
+        "defaultValue": false
+    },
+    {
+        "flag": "--no-validate",
+        "description": "Skip parameter validation",
+        "defaultValue": false
+    },
+    {
+        "flag": "--validate-env-vars",
+        "description": "Validate environment variables against schema",
+        "defaultValue": false
+    },
+    {
+        "flag": "--validate-with-docker",
+        "description": "Validate Dockerfile builds successfully",
+        "defaultValue": false
+    },
+    {
+        "flag": "--offline",
+        "description": "Run in offline mode (no network calls)",
+        "defaultValue": false
+    }
+];
+/**
+ * Maps CLI flags to help section groups.
+ * Used by the custom help formatter in bin/cli.js.
+ */
+export const helpGroups = {
+    "--project-name": "general",
+    "--deployment-config": "model",
+    "--model-name": "model",
+    "--deployment-target": "infra",
+    "--instance-type": "infra",
+    "--ic-gpu-count": "ic",
+    "--ic-copy-count": "ic",
+    "--ic-memory-size": "ic",
+    "--enable-lora": "features",
+    "--max-loras": "features",
+    "--max-lora-rank": "features",
+    "--include-benchmark": "general",
+    "--benchmark-concurrency": "general",
+    "--benchmark-input-tokens": "general",
+    "--benchmark-output-tokens": "general",
+    "--benchmark-streaming": "general",
+    "--benchmark-request-count": "general",
+    "--benchmark-s3-output-path": "general",
+    "--skip-prompts": "general",
+    "--auto-prompt": "general",
+    "--config": "general",
+    "--project-dir": "general",
+    "--force": "general",
+    "--framework": "model",
+    "--model-format": "model",
+    "--model-server": "model",
+    "--base-image": "infra",
+    "--region": "infra",
+    "--role-arn": "infra",
+    "--build-target": "infra",
+    "--codebuild-compute-type": "infra",
+    "--hf-token": "auth",
+    "--hf-token-arn": "auth",
+    "--ngc-token": "auth",
+    "--ngc-token-arn": "auth",
+    "--endpoint-initial-instance-count": "endpoint",
+    "--endpoint-data-capture-percent": "endpoint",
+    "--endpoint-variant-name": "endpoint",
+    "--endpoint-volume-size": "endpoint",
+    "--ic-cpu-count": "ic",
+    "--ic-model-weight": "ic",
+    "--async-s3-output-path": "async",
+    "--async-sns-success-topic": "async",
+    "--async-sns-error-topic": "async",
+    "--async-max-concurrent": "async",
+    "--batch-input-path": "batch",
+    "--batch-output-path": "batch",
+    "--batch-instance-count": "batch",
+    "--batch-split-type": "batch",
+    "--batch-strategy": "batch",
+    "--batch-join-source": "batch",
+    "--batch-max-concurrent": "batch",
+    "--batch-max-payload": "batch",
+    "--hyperpod-cluster": "hyperpod",
+    "--hyperpod-namespace": "hyperpod",
+    "--hyperpod-replicas": "hyperpod",
+    "--fsx-volume-handle": "hyperpod",
+    "--model-env": "env",
+    "--server-env": "env",
+    "--include-sample": "features",
+    "--include-testing": "features",
+    "--test-types": "features",
+    "--smart": "mcp",
+    "--discover": "mcp",
+    "--no-validate": "validation",
+    "--validate-env-vars": "validation",
+    "--validate-with-docker": "validation",
+    "--offline": "validation"
+};