npm - @aws/ml-container-creator - Versions diffs - 0.2.5 → 0.2.6 - Mend

@aws/ml-container-creator 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/bin/cli.js +7 -2
package/package.json +7 -8
package/servers/base-image-picker/index.js +3 -3
package/servers/base-image-picker/manifest.json +4 -2
package/servers/instance-sizer/index.js +561 -0
package/servers/instance-sizer/lib/instance-ranker.js +245 -0
package/servers/instance-sizer/lib/model-resolver.js +265 -0
package/servers/instance-sizer/lib/vram-estimator.js +177 -0
package/servers/instance-sizer/manifest.json +17 -0
package/servers/instance-sizer/package.json +15 -0
package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
package/servers/lib/catalogs/model-sizes.json +131 -0
package/servers/lib/catalogs/models.json +602 -0
package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
package/servers/lib/schemas/image-catalog.schema.json +0 -12
package/servers/lib/schemas/instances.schema.json +29 -0
package/servers/lib/schemas/model-catalog.schema.json +12 -10
package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
package/servers/model-picker/index.js +2 -3
package/servers/model-picker/manifest.json +2 -3
package/servers/region-picker/index.js +1 -1
package/servers/region-picker/manifest.json +1 -1
package/src/app.js +17 -0
package/src/lib/bootstrap-command-handler.js +38 -0
package/src/lib/cli-handler.js +3 -3
package/src/lib/config-manager.js +4 -1
package/src/lib/configuration-manager.js +2 -2
package/src/lib/cross-cutting-checker.js +341 -0
package/src/lib/dry-run-validator.js +78 -0
package/src/lib/generation-validator.js +102 -0
package/src/lib/mcp-validator-config.js +89 -0
package/src/lib/payload-builder.js +153 -0
package/src/lib/prompt-runner.js +445 -135
package/src/lib/prompts.js +1 -1
package/src/lib/registry-loader.js +5 -5
package/src/lib/schema-sync.js +203 -0
package/src/lib/schema-validation-engine.js +195 -0
package/src/lib/service-model-parser.js +102 -0
package/src/lib/validate-runner.js +167 -0
package/src/lib/validation-report.js +133 -0
package/src/lib/validators/base-validator.js +36 -0
package/src/lib/validators/catalog-validator.js +177 -0
package/src/lib/validators/enum-validator.js +120 -0
package/src/lib/validators/required-field-validator.js +150 -0
package/src/lib/validators/type-validator.js +313 -0
package/templates/Dockerfile +1 -1
package/templates/do/build +15 -5
package/templates/do/run +5 -1
package/templates/do/validate +61 -0
package/servers/instance-recommender/LICENSE +0 -202
package/servers/instance-recommender/index.js +0 -284
package/servers/instance-recommender/manifest.json +0 -16
package/servers/instance-recommender/package.json +0 -15
/package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
/package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
/package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
/package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0

package/servers/instance-sizer/package.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "name": "@amzn/ml-container-creator-instance-sizer",
+  "private": true,
+  "version": "1.0.0",
+  "description": "MCP server that estimates VRAM requirements from model metadata and returns filtered, ranked SageMaker instance recommendations.",
+  "type": "module",
+  "main": "index.js",
+  "license": "Apache-2.0",
+  "scripts": {
+    "test": "node test.js"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0"
+  }
+}

package/servers/{instance-recommender → lib}/catalogs/instances.json RENAMED Viewed

@@ -17,7 +17,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": ""
+            "notes": "",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.c5.xlarge": {
             "category": "cpu",
@@ -36,7 +39,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": ""
+            "notes": "",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.g4dn.12xlarge": {
             "category": "gpu",
@@ -59,7 +65,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism"
+            "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g4dn.16xlarge": {
             "category": "gpu",
@@ -82,7 +91,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio"
+            "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g4dn.2xlarge": {
             "category": "gpu",
@@ -108,7 +120,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing"
+            "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g4dn.4xlarge": {
             "category": "gpu",
@@ -131,7 +146,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio"
+            "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g4dn.8xlarge": {
             "category": "gpu",
@@ -154,7 +172,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU"
+            "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g4dn.xlarge": {
             "category": "gpu",
@@ -180,7 +201,10 @@
             "hardware": "NVIDIA T4",
             "gpuArchitecture": "Turing",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models"
+            "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA T4",
+            "costTier": "low"
         },
         "ml.g5.12xlarge": {
             "category": "gpu",
@@ -207,7 +231,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism"
+            "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.16xlarge": {
             "category": "gpu",
@@ -232,7 +259,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio"
+            "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.24xlarge": {
             "category": "gpu",
@@ -257,7 +287,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU"
+            "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.2xlarge": {
             "category": "gpu",
@@ -283,7 +316,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing"
+            "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.48xlarge": {
             "category": "gpu",
@@ -308,7 +344,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration"
+            "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.4xlarge": {
             "category": "gpu",
@@ -334,7 +373,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing"
+            "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.8xlarge": {
             "category": "gpu",
@@ -359,7 +401,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU"
+            "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g5.xlarge": {
             "category": "gpu",
@@ -385,7 +430,10 @@
             "hardware": "NVIDIA A10G",
             "gpuArchitecture": "Ampere",
             "defaultCudaVersion": "12.1",
-            "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models"
+            "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA A10G",
+            "costTier": "medium"
         },
         "ml.g6.12xlarge": {
             "category": "gpu",
@@ -412,7 +460,10 @@
             "hardware": "NVIDIA L4",
             "gpuArchitecture": "Ada Lovelace",
             "defaultCudaVersion": "12.2",
-            "notes": "Multi-GPU (newer)"
+            "notes": "Multi-GPU (newer)",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA L4",
+            "costTier": "medium"
         },
         "ml.g6.2xlarge": {
             "category": "gpu",
@@ -438,7 +489,10 @@
             "hardware": "NVIDIA L4",
             "gpuArchitecture": "Ada Lovelace",
             "defaultCudaVersion": "12.2",
-            "notes": "Medium GPU (newer)"
+            "notes": "Medium GPU (newer)",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA L4",
+            "costTier": "medium"
         },
         "ml.g6.xlarge": {
             "category": "gpu",
@@ -464,7 +518,10 @@
             "hardware": "NVIDIA L4",
             "gpuArchitecture": "Ada Lovelace",
             "defaultCudaVersion": "12.2",
-            "notes": "Small GPU (newer)"
+            "notes": "Small GPU (newer)",
+            "gpuMemoryGb": 24,
+            "gpuType": "NVIDIA L4",
+            "costTier": "medium"
         },
         "ml.inf2.24xlarge": {
             "category": "gpu",
@@ -488,7 +545,10 @@
             "hardware": "AWS Inferentia2",
             "gpuArchitecture": "Inferentia2",
             "defaultCudaVersion": "2.16.0",
-            "notes": "6x Inferentia2 chips. Multi-chip for large models"
+            "notes": "6x Inferentia2 chips. Multi-chip for large models",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Inferentia2",
+            "costTier": "low"
         },
         "ml.inf2.48xlarge": {
             "category": "gpu",
@@ -512,7 +572,10 @@
             "hardware": "AWS Inferentia2",
             "gpuArchitecture": "Inferentia2",
             "defaultCudaVersion": "2.16.0",
-            "notes": "12x Inferentia2 chips. Maximum multi-chip configuration"
+            "notes": "12x Inferentia2 chips. Maximum multi-chip configuration",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Inferentia2",
+            "costTier": "low"
         },
         "ml.inf2.8xlarge": {
             "category": "gpu",
@@ -536,7 +599,10 @@
             "hardware": "AWS Inferentia2",
             "gpuArchitecture": "Inferentia2",
             "defaultCudaVersion": "2.16.0",
-            "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing"
+            "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Inferentia2",
+            "costTier": "low"
         },
         "ml.inf2.xlarge": {
             "category": "gpu",
@@ -560,7 +626,10 @@
             "hardware": "AWS Inferentia2",
             "gpuArchitecture": "Inferentia2",
             "defaultCudaVersion": "2.16.0",
-            "notes": "1x Inferentia2 chip. Cost-effective for transformer inference"
+            "notes": "1x Inferentia2 chip. Cost-effective for transformer inference",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Inferentia2",
+            "costTier": "low"
         },
         "ml.m5.2xlarge": {
             "category": "cpu",
@@ -580,7 +649,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": "Large CPU workloads"
+            "notes": "Large CPU workloads",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.m5.4xlarge": {
             "category": "cpu",
@@ -601,7 +673,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": "XL CPU workloads"
+            "notes": "XL CPU workloads",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.m5.large": {
             "category": "cpu",
@@ -623,7 +698,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": "Small CPU workloads"
+            "notes": "Small CPU workloads",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.m5.xlarge": {
             "category": "cpu",
@@ -643,7 +721,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": "Medium CPU workloads"
+            "notes": "Medium CPU workloads",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.p3.16xlarge": {
             "category": "gpu",
@@ -667,7 +748,10 @@
             "hardware": "NVIDIA V100",
             "gpuArchitecture": "Volta",
             "defaultCudaVersion": "11.8",
-            "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration"
+            "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA V100",
+            "costTier": "high"
         },
         "ml.p3.2xlarge": {
             "category": "gpu",
@@ -693,7 +777,10 @@
             "hardware": "NVIDIA V100",
             "gpuArchitecture": "Volta",
             "defaultCudaVersion": "11.8",
-            "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference"
+            "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA V100",
+            "costTier": "high"
         },
         "ml.p3.8xlarge": {
             "category": "gpu",
@@ -720,7 +807,10 @@
             "hardware": "NVIDIA V100",
             "gpuArchitecture": "Volta",
             "defaultCudaVersion": "11.8",
-            "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models"
+            "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models",
+            "gpuMemoryGb": 16,
+            "gpuType": "NVIDIA V100",
+            "costTier": "high"
         },
         "ml.r5.large": {
             "category": "cpu",
@@ -739,7 +829,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": ""
+            "notes": "",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.r5.xlarge": {
             "category": "cpu",
@@ -758,7 +851,10 @@
             "hardware": "None",
             "gpuArchitecture": "None",
             "defaultCudaVersion": null,
-            "notes": ""
+            "notes": "",
+            "gpuMemoryGb": null,
+            "gpuType": null,
+            "costTier": "low"
         },
         "ml.trn1.2xlarge": {
             "category": "gpu",
@@ -782,7 +878,10 @@
             "hardware": "AWS Trainium",
             "gpuArchitecture": "Trainium1",
             "defaultCudaVersion": "2.16.0",
-            "notes": "1x Trainium chip. Optimized for training, also supports inference"
+            "notes": "1x Trainium chip. Optimized for training, also supports inference",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Trainium",
+            "costTier": "medium"
         },
         "ml.trn1.32xlarge": {
             "category": "gpu",
@@ -806,7 +905,10 @@
             "hardware": "AWS Trainium",
             "gpuArchitecture": "Trainium1",
             "defaultCudaVersion": "2.16.0",
-            "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference"
+            "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference",
+            "gpuMemoryGb": 32,
+            "gpuType": "AWS Trainium",
+            "costTier": "medium"
         }
     },
     "recommendations": {