npm - @aws/ml-container-creator - Versions diffs - 1.0.2 → 1.0.3 - Mend

@aws/ml-container-creator 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/bin/cli.js +1 -1
package/config/tune-catalog.json +303 -1
package/package.json +2 -1
package/servers/lib/catalogs/model-servers.json +334 -120
package/src/lib/bootstrap-command-handler.js +12 -2
package/src/lib/bootstrap-profile-manager.js +16 -0
package/src/lib/cross-cutting-checker.js +6 -1
package/src/lib/generated/cli-options.js +1 -1
package/src/lib/generated/parameter-matrix.js +1 -1
package/src/lib/generated/validation-rules.js +1 -1

package/bin/cli.js CHANGED Viewed

@@ -162,7 +162,7 @@ program
     .command('bootstrap')
     .description('Set up AWS infrastructure (IAM role, ECR repo, S3 buckets)')
     .passThroughOptions()
-    .argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update, sync-schemas)')
+    .argument('[action]', 'Bootstrap action (status, use, list, remove, scan, prune, update, migrate, sync-schemas, sync-model-families)')
     .argument('[args...]', 'Additional arguments')
     .option('--profile <profile>', 'AWS profile name')
     .option('--region <region>', 'AWS region')

package/config/tune-catalog.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "version": "2026-05-27",
-  "lastSynced": "2026-05-28T09:48:25.209Z",
+  "lastSynced": "2026-06-26T19:01:02.821Z",
   "source": "https://docs.aws.amazon.com/sagemaker/latest/dg/model-customize-open-weight.html",
   "models": {
     "huggingface-llm-qwen2-5-7b-instruct": {
@@ -1614,6 +1614,24 @@
               "prompt": "array"
             }
           }
+        },
+        "dpo": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-dpo",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "chosen",
+              "rejected"
+            ],
+            "types": {
+              "prompt": "string",
+              "chosen": "string",
+              "rejected": "string"
+            }
+          }
         }
       },
       "goldenPath": false
@@ -1667,6 +1685,24 @@
               "prompt": "array"
             }
           }
+        },
+        "dpo": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-dpo",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "chosen",
+              "rejected"
+            ],
+            "types": {
+              "prompt": "string",
+              "chosen": "string",
+              "rejected": "string"
+            }
+          }
         }
       },
       "goldenPath": false
@@ -1773,6 +1809,272 @@
               "prompt": "array"
             }
           }
+        },
+        "dpo": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-dpo",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "chosen",
+              "rejected"
+            ],
+            "types": {
+              "prompt": "string",
+              "chosen": "string",
+              "rejected": "string"
+            }
+          }
+        }
+      },
+      "goldenPath": false
+    },
+    "huggingface-llm-nvidia-nemotron-3-super-120b-a12b-bf16": {
+      "family": "huggingface-llm-nvidia-nemotron",
+      "provider": "unknown",
+      "displayName": "NVIDIA-Nemotron-3-Super-120B-A12B-BF16",
+      "huggingFaceId": "",
+      "techniques": {
+        "sft": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-sft",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "completion"
+            ],
+            "types": {
+              "prompt": "string",
+              "completion": "string"
+            }
+          }
+        },
+        "rlvr": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlvr",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        },
+        "rlaif": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlaif",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        }
+      },
+      "goldenPath": false
+    },
+    "huggingface-reasoning-nvidia-nemotron-3-nano-30b-a3b-bf16": {
+      "family": "huggingface-reasoning-nvidia-nemotron",
+      "provider": "unknown",
+      "displayName": "NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
+      "huggingFaceId": "",
+      "techniques": {
+        "sft": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-sft",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "completion"
+            ],
+            "types": {
+              "prompt": "string",
+              "completion": "string"
+            }
+          }
+        },
+        "rlaif": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlaif",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        },
+        "rlvr": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlvr",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        }
+      },
+      "goldenPath": false
+    },
+    "huggingface-vlm-gemma-4-e4b-it": {
+      "family": "huggingface-vlm",
+      "provider": "unknown",
+      "displayName": "gemma-4-e4b-it",
+      "huggingFaceId": "",
+      "techniques": {
+        "dpo": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-dpo",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "chosen",
+              "rejected"
+            ],
+            "types": {
+              "prompt": "string",
+              "chosen": "string",
+              "rejected": "string"
+            }
+          }
+        },
+        "sft": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-sft",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "completion"
+            ],
+            "types": {
+              "prompt": "string",
+              "completion": "string"
+            }
+          }
+        },
+        "rlvr": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlvr",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        },
+        "rlaif": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlaif",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        }
+      },
+      "goldenPath": false
+    },
+    "huggingface-vlm-gemma-4-31b-it": {
+      "family": "huggingface-vlm",
+      "provider": "unknown",
+      "displayName": "gemma-4-31b-it",
+      "huggingFaceId": "",
+      "techniques": {
+        "dpo": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-dpo",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "chosen",
+              "rejected"
+            ],
+            "types": {
+              "prompt": "string",
+              "chosen": "string",
+              "rejected": "string"
+            }
+          }
+        },
+        "sft": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-sft",
+          "datasetSchema": {
+            "required": [
+              "prompt",
+              "completion"
+            ],
+            "types": {
+              "prompt": "string",
+              "completion": "string"
+            }
+          }
+        },
+        "rlaif": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlaif",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
+        },
+        "rlvr": {
+          "trainingTypes": [
+            "lora"
+          ],
+          "datasetFormat": "default-rlvr",
+          "datasetSchema": {
+            "required": [
+              "prompt"
+            ],
+            "types": {
+              "prompt": "array"
+            }
+          }
         }
       },
       "goldenPath": false

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aws/ml-container-creator",
-  "version": "1.0.2",
+  "version": "1.0.3",
   "description": "Build and deploy custom ML containers on AWS SageMaker with minimal configuration.",
   "main": "src/index.js",
   "bin": {
@@ -107,6 +107,7 @@
     "prepare": "husky || true"
   },
   "dependencies": {
+    "@aws/ml-container-creator": "^1.0.2",
     "@inquirer/prompts": "^8.4.2",
     "@modelcontextprotocol/sdk": "^1.27.1",
     "ajv": "^8.12.0",

package/servers/lib/catalogs/model-servers.json CHANGED Viewed

@@ -1,14 +1,12 @@
 {
     "vllm": [
         {
-            "image": "vllm/vllm-openai:v0.20.2",
-            "tag": "v0.20.2",
+            "image": "vllm/vllm-openai:v0.23.0",
+            "tag": "v0.23.0",
             "architecture": "amd64",
-            "created": "2026-05-10T00:00:00Z",
+            "created": "2026-06-13T00:36:45.565402Z",
             "labels": {
-                "cuda_version": "12.9",
-                "python_version": "3.12",
-                "framework_version": "0.20.2"
+                "framework_version": "0.23.0"
             },
             "registry": "dockerhub",
             "repository": "vllm/vllm-openai",
@@ -22,15 +20,6 @@
                 },
                 "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
             },
-            "accelerator": {
-                "type": "cuda",
-                "version": "12.9",
-                "versionRange": {
-                    "min": "12.4",
-                    "max": "12.9"
-                }
-            },
-            "validationLevel": "community-validated",
             "profiles": {
                 "low-latency": {
                     "displayName": "Low Latency",
@@ -64,7 +53,16 @@
                     "notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
                 }
             },
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.9",
+                "versionRange": {
+                    "min": "12.4",
+                    "max": "12.9"
+                }
+            },
             "notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
+            "validationLevel": "community-validated",
             "supportedModelTypes": [
                 "afmoe",
                 "apertus",
@@ -84,12 +82,15 @@
                 "chatglm",
                 "cheers",
                 "clip",
+                "cohere2_moe",
+                "cohere_eagle",
                 "colbert",
                 "colmodernvbert",
                 "colpali",
                 "colqwen3",
                 "colqwen3_5",
                 "commandr",
+                "cosmos3",
                 "dbrx",
                 "deepseek_eagle",
                 "deepseek_eagle3",
@@ -97,8 +98,6 @@
                 "deepseek_ocr",
                 "deepseek_ocr2",
                 "deepseek_v2",
-                "deepseek_v4",
-                "deepseek_v4_mtp",
                 "deepseek_vl2",
                 "dots1",
                 "dots_ocr",
@@ -125,6 +124,7 @@
                 "gemma3n",
                 "gemma4",
                 "gemma4_mm",
+                "gemma4_mtp",
                 "glm",
                 "glm4",
                 "glm4_1v",
@@ -159,7 +159,6 @@
                 "internvl",
                 "iquest_loopcoder",
                 "isaac",
-                "jais",
                 "jais2",
                 "jamba",
                 "jina",
@@ -170,6 +169,7 @@
                 "kimi_k25",
                 "kimi_linear",
                 "kimi_vl",
+                "laguna",
                 "lfm2",
                 "lfm2_moe",
                 "lfm2_vl",
@@ -184,10 +184,13 @@
                 "mamba",
                 "mamba2",
                 "medusa",
+                "mellum",
                 "midashenglm",
                 "mimo",
                 "mimo_mtp",
-                "mimo_v2_flash",
+                "mimo_v2",
+                "mimo_v2_mtp",
+                "mimo_v2_omni",
                 "minicpm",
                 "minicpm3",
                 "minicpm_eagle",
@@ -196,6 +199,7 @@
                 "minimax_m2",
                 "minimax_text_01",
                 "mistral",
+                "mistral_eagle",
                 "mistral_large_3",
                 "mixtral",
                 "mllama4",
@@ -203,6 +207,7 @@
                 "modernbert",
                 "molmo",
                 "molmo2",
+                "moondream3",
                 "mpt",
                 "nano_nemotron_vl",
                 "nemotron",
@@ -218,6 +223,7 @@
                 "opencua",
                 "openpangu",
                 "openpangu_mtp",
+                "openvla",
                 "opt",
                 "orion",
                 "ouro",
@@ -265,6 +271,7 @@
                 "step3_vl",
                 "step3p5",
                 "step3p5_mtp",
+                "step3p7",
                 "step_vl",
                 "tarsier",
                 "telechat2",
@@ -279,14 +286,12 @@
             ]
         },
         {
-            "image": "vllm/vllm-openai:v0.10.1",
-            "tag": "v0.10.1",
+            "image": "vllm/vllm-openai:v0.22.1",
+            "tag": "v0.22.1",
             "architecture": "amd64",
-            "created": "2025-01-15T00:00:00Z",
+            "created": "2026-06-05T07:16:13.856004Z",
             "labels": {
-                "cuda_version": "12.4",
-                "python_version": "3.12",
-                "framework_version": "0.10.1"
+                "framework_version": "0.22.1"
             },
             "registry": "dockerhub",
             "repository": "vllm/vllm-openai",
@@ -300,15 +305,6 @@
                 },
                 "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
             },
-            "accelerator": {
-                "type": "cuda",
-                "version": "12.1",
-                "versionRange": {
-                    "min": "12.0",
-                    "max": "12.3"
-                }
-            },
-            "validationLevel": "tested",
             "profiles": {
                 "low-latency": {
                     "displayName": "Low Latency",
@@ -342,58 +338,94 @@
                     "notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
                 }
             },
-            "notes": "vLLM 0.4.0 adds prefix caching and improved performance. Requires CUDA 12.0+",
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.9",
+                "versionRange": {
+                    "min": "12.4",
+                    "max": "12.9"
+                }
+            },
+            "notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
+            "validationLevel": "community-validated",
             "supportedModelTypes": [
+                "afmoe",
+                "apertus",
                 "arcee",
                 "arctic",
                 "aria",
-                "aya_vision",
+                "bagel",
                 "baichuan",
                 "bailing_moe",
+                "bailing_moe_linear",
                 "bamba",
-                "bart",
+                "bee",
                 "bert",
                 "bert_with_rope",
                 "blip2",
                 "bloom",
-                "chameleon",
                 "chatglm",
-                "cohere2_vision",
+                "cheers",
+                "clip",
+                "cohere2_moe",
+                "cohere_eagle",
+                "colbert",
+                "colmodernvbert",
+                "colpali",
+                "colqwen3",
+                "colqwen3_5",
                 "commandr",
                 "dbrx",
-                "deepseek",
+                "deepseek_eagle",
+                "deepseek_eagle3",
                 "deepseek_mtp",
+                "deepseek_ocr",
+                "deepseek_ocr2",
                 "deepseek_v2",
                 "deepseek_vl2",
                 "dots1",
+                "dots_ocr",
+                "ernie",
                 "ernie45",
                 "ernie45_moe",
+                "ernie_mtp",
                 "exaone",
                 "exaone4",
+                "exaone4_5_mtp",
+                "exaone_moe",
+                "exaone_moe_mtp",
+                "extract_hidden_states",
                 "fairseq2_llama",
                 "falcon",
                 "falcon_h1",
-                "florence2",
+                "flex_olmo",
+                "funasr",
                 "fuyu",
                 "gemma",
                 "gemma2",
                 "gemma3",
                 "gemma3_mm",
                 "gemma3n",
-                "gemma3n_mm",
+                "gemma4",
+                "gemma4_mm",
+                "gemma4_mtp",
                 "glm",
                 "glm4",
                 "glm4_1v",
                 "glm4_moe",
+                "glm4_moe_lite",
+                "glm4_moe_lite_mtp",
                 "glm4_moe_mtp",
                 "glm4v",
+                "glm_ocr",
+                "glm_ocr_mtp",
+                "glmasr",
                 "gpt2",
                 "gpt_bigcode",
                 "gpt_j",
                 "gpt_neox",
                 "gpt_oss",
                 "granite",
-                "granite_speech",
                 "granitemoe",
                 "granitemoehybrid",
                 "granitemoeshared",
@@ -401,108 +433,149 @@
                 "grok1",
                 "h2ovl",
                 "hunyuan_v1",
+                "hy_v3",
+                "hy_v3_mtp",
+                "hyperclovax",
                 "hyperclovax_vision",
-                "idefics3",
+                "hyperclovax_vision_v2",
                 "internlm2",
                 "internlm2_ve",
-                "interns1",
                 "internvl",
+                "iquest_loopcoder",
+                "isaac",
                 "jais",
+                "jais2",
                 "jamba",
+                "jina",
                 "jina_vl",
+                "kanana_v",
                 "keye",
+                "kimi_audio",
+                "kimi_k25",
+                "kimi_linear",
                 "kimi_vl",
+                "laguna",
+                "lfm2",
+                "lfm2_moe",
+                "lfm2_vl",
                 "llama",
                 "llama4",
                 "llama4_eagle",
                 "llama_eagle",
                 "llama_eagle3",
                 "llava",
-                "llava_next",
-                "llava_next_video",
-                "llava_onevision",
+                "longcat_flash",
+                "longcat_flash_mtp",
                 "mamba",
                 "mamba2",
                 "medusa",
+                "mellum",
+                "midashenglm",
                 "mimo",
                 "mimo_mtp",
+                "mimo_v2",
+                "mimo_v2_mtp",
+                "mimo_v2_omni",
                 "minicpm",
                 "minicpm3",
                 "minicpm_eagle",
                 "minicpmo",
                 "minicpmv",
+                "minimax_m2",
                 "minimax_text_01",
-                "minimax_vl_01",
-                "mistral3",
+                "mistral",
+                "mistral_eagle",
+                "mistral_large_3",
                 "mixtral",
-                "mixtral_quant",
-                "mllama",
                 "mllama4",
                 "mlp_speculator",
                 "modernbert",
                 "molmo",
+                "molmo2",
+                "moondream3",
                 "mpt",
+                "nano_nemotron_vl",
                 "nemotron",
                 "nemotron_h",
+                "nemotron_h_mtp",
                 "nemotron_nas",
                 "nemotron_vl",
                 "nvlm_d",
                 "olmo",
                 "olmo2",
+                "olmo_hybrid",
                 "olmoe",
+                "opencua",
+                "openpangu",
+                "openpangu_mtp",
+                "openvla",
                 "opt",
                 "orion",
+                "ouro",
                 "ovis",
-                "paligemma",
+                "ovis2_5",
+                "param2moe",
                 "persimmon",
                 "phi",
                 "phi3",
                 "phi3v",
-                "phi4_multimodal",
-                "phi4flash",
                 "phi4mm",
+                "phi4siglip",
                 "phimoe",
                 "pixtral",
                 "plamo2",
-                "prithvi_geospatial_mae",
+                "plamo3",
                 "qwen",
                 "qwen2",
-                "qwen2_5_omni_thinker",
-                "qwen2_5_vl",
-                "qwen2_audio",
                 "qwen2_moe",
                 "qwen2_rm",
                 "qwen2_vl",
                 "qwen3",
+                "qwen3_5",
+                "qwen3_5_mtp",
+                "qwen3_asr_realtime",
+                "qwen3_dflash",
                 "qwen3_moe",
+                "qwen3_next",
+                "qwen3_next_mtp",
+                "qwen3_vl",
                 "qwen_vl",
+                "rnj1",
                 "roberta",
+                "rvl",
+                "sarvam",
+                "seed_oss",
+                "siglip",
                 "skyworkr1v",
                 "smolvlm",
                 "solar",
                 "stablelm",
                 "starcoder2",
+                "step1",
                 "step3_text",
                 "step3_vl",
+                "step3p5",
+                "step3p5_mtp",
+                "step_vl",
                 "tarsier",
                 "telechat2",
                 "teleflm",
+                "terratorch",
                 "transformers",
                 "ultravox",
                 "voxtral",
+                "voxtral_realtime",
                 "whisper",
                 "zamba2"
             ]
         },
         {
-            "image": "vllm/vllm-openai:v0.9.1",
-            "tag": "v0.9.1",
+            "image": "vllm/vllm-openai:v0.22.0",
+            "tag": "v0.22.0",
             "architecture": "amd64",
-            "created": "2024-12-10T00:00:00Z",
+            "created": "2026-05-29T09:06:43.475324Z",
             "labels": {
-                "cuda_version": "12.1",
-                "python_version": "3.12",
-                "framework_version": "0.9.1"
+                "framework_version": "0.22.0"
             },
             "registry": "dockerhub",
             "repository": "vllm/vllm-openai",
@@ -516,15 +589,6 @@
                 },
                 "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
             },
-            "accelerator": {
-                "type": "cuda",
-                "version": "12.1",
-                "versionRange": {
-                    "min": "12.0",
-                    "max": "12.3"
-                }
-            },
-            "validationLevel": "tested",
             "profiles": {
                 "low-latency": {
                     "displayName": "Low Latency",
@@ -558,130 +622,232 @@
                     "notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
                 }
             },
-            "notes": "vLLM 0.4.0 adds prefix caching and improved performance. Requires CUDA 12.0+",
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.9",
+                "versionRange": {
+                    "min": "12.4",
+                    "max": "12.9"
+                }
+            },
+            "notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
+            "validationLevel": "community-validated",
             "supportedModelTypes": [
+                "afmoe",
+                "apertus",
+                "arcee",
                 "arctic",
                 "aria",
-                "aya_vision",
+                "bagel",
                 "baichuan",
+                "bailing_moe",
+                "bailing_moe_linear",
                 "bamba",
-                "bart",
+                "bee",
                 "bert",
                 "bert_with_rope",
                 "blip2",
                 "bloom",
-                "chameleon",
                 "chatglm",
+                "cheers",
+                "clip",
+                "cohere2_moe",
+                "cohere_eagle",
+                "colbert",
+                "colmodernvbert",
+                "colpali",
+                "colqwen3",
+                "colqwen3_5",
                 "commandr",
                 "dbrx",
-                "deepseek",
+                "deepseek_eagle",
+                "deepseek_eagle3",
                 "deepseek_mtp",
+                "deepseek_ocr",
+                "deepseek_ocr2",
                 "deepseek_v2",
                 "deepseek_vl2",
-                "eagle",
+                "dots1",
+                "dots_ocr",
+                "ernie",
+                "ernie45",
+                "ernie45_moe",
+                "ernie_mtp",
                 "exaone",
+                "exaone4",
+                "exaone4_5_mtp",
+                "exaone_moe",
+                "exaone_moe_mtp",
+                "extract_hidden_states",
                 "fairseq2_llama",
                 "falcon",
                 "falcon_h1",
-                "florence2",
+                "flex_olmo",
+                "funasr",
                 "fuyu",
                 "gemma",
                 "gemma2",
                 "gemma3",
                 "gemma3_mm",
+                "gemma3n",
+                "gemma4",
+                "gemma4_mm",
+                "gemma4_mtp",
                 "glm",
                 "glm4",
+                "glm4_1v",
+                "glm4_moe",
+                "glm4_moe_lite",
+                "glm4_moe_lite_mtp",
+                "glm4_moe_mtp",
                 "glm4v",
+                "glm_ocr",
+                "glm_ocr_mtp",
+                "glmasr",
                 "gpt2",
                 "gpt_bigcode",
                 "gpt_j",
                 "gpt_neox",
+                "gpt_oss",
                 "granite",
-                "granite_speech",
                 "granitemoe",
                 "granitemoehybrid",
                 "granitemoeshared",
                 "gritlm",
                 "grok1",
                 "h2ovl",
-                "idefics3",
+                "hunyuan_v1",
+                "hy_v3",
+                "hy_v3_mtp",
+                "hyperclovax",
+                "hyperclovax_vision",
+                "hyperclovax_vision_v2",
                 "internlm2",
                 "internlm2_ve",
                 "internvl",
+                "iquest_loopcoder",
+                "isaac",
                 "jais",
+                "jais2",
                 "jamba",
+                "jina",
+                "jina_vl",
+                "kanana_v",
+                "keye",
+                "kimi_audio",
+                "kimi_k25",
+                "kimi_linear",
                 "kimi_vl",
+                "laguna",
+                "lfm2",
+                "lfm2_moe",
+                "lfm2_vl",
                 "llama",
+                "llama4",
+                "llama4_eagle",
                 "llama_eagle",
                 "llama_eagle3",
                 "llava",
-                "llava_next",
-                "llava_next_video",
-                "llava_onevision",
+                "longcat_flash",
+                "longcat_flash_mtp",
                 "mamba",
                 "mamba2",
                 "medusa",
+                "midashenglm",
                 "mimo",
                 "mimo_mtp",
+                "mimo_v2",
+                "mimo_v2_mtp",
+                "mimo_v2_omni",
                 "minicpm",
                 "minicpm3",
                 "minicpm_eagle",
                 "minicpmo",
                 "minicpmv",
+                "minimax_m2",
                 "minimax_text_01",
-                "minimax_vl_01",
-                "mistral3",
+                "mistral",
+                "mistral_eagle",
+                "mistral_large_3",
                 "mixtral",
-                "mixtral_quant",
-                "mllama",
                 "mllama4",
                 "mlp_speculator",
                 "modernbert",
                 "molmo",
+                "molmo2",
+                "moondream3",
                 "mpt",
+                "nano_nemotron_vl",
                 "nemotron",
                 "nemotron_h",
+                "nemotron_h_mtp",
                 "nemotron_nas",
+                "nemotron_vl",
                 "nvlm_d",
                 "olmo",
                 "olmo2",
+                "olmo_hybrid",
                 "olmoe",
+                "opencua",
+                "openpangu",
+                "openpangu_mtp",
+                "openvla",
                 "opt",
                 "orion",
+                "ouro",
                 "ovis",
-                "paligemma",
+                "ovis2_5",
+                "param2moe",
                 "persimmon",
                 "phi",
                 "phi3",
-                "phi3_small",
                 "phi3v",
                 "phi4mm",
+                "phi4siglip",
                 "phimoe",
                 "pixtral",
                 "plamo2",
-                "prithvi_geospatial_mae",
+                "plamo3",
                 "qwen",
                 "qwen2",
-                "qwen2_5_omni_thinker",
-                "qwen2_5_vl",
-                "qwen2_audio",
                 "qwen2_moe",
                 "qwen2_rm",
                 "qwen2_vl",
                 "qwen3",
+                "qwen3_5",
+                "qwen3_5_mtp",
+                "qwen3_asr_realtime",
+                "qwen3_dflash",
                 "qwen3_moe",
+                "qwen3_next",
+                "qwen3_next_mtp",
+                "qwen3_vl",
                 "qwen_vl",
+                "rnj1",
                 "roberta",
+                "rvl",
+                "sarvam",
+                "seed_oss",
+                "siglip",
                 "skyworkr1v",
                 "smolvlm",
                 "solar",
                 "stablelm",
                 "starcoder2",
+                "step1",
+                "step3_text",
+                "step3_vl",
+                "step3p5",
+                "step3p5_mtp",
+                "step_vl",
                 "tarsier",
                 "telechat2",
                 "teleflm",
+                "terratorch",
                 "transformers",
                 "ultravox",
+                "voxtral",
+                "voxtral_realtime",
                 "whisper",
                 "zamba2"
             ]
@@ -689,14 +855,12 @@
     ],
     "sglang": [
         {
-            "image": "lmsysorg/sglang:v0.5.4.post1-cu121",
-            "tag": "v0.5.4.post1-cu121",
+            "image": "lmsysorg/sglang:v0.5.14",
+            "tag": "v0.5.14",
             "architecture": "amd64",
-            "created": "2025-01-20T00:00:00Z",
+            "created": "2026-06-26T04:19:52.602207Z",
             "labels": {
-                "cuda_version": "12.1",
-                "python_version": "3.10",
-                "framework_version": "0.5.4"
+                "framework_version": "0.5.14"
             },
             "registry": "dockerhub",
             "repository": "lmsysorg/sglang",
@@ -709,15 +873,6 @@
                 },
                 "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
             },
-            "accelerator": {
-                "type": "cuda",
-                "version": "12.1",
-                "versionRange": {
-                    "min": "11.8",
-                    "max": "12.2"
-                }
-            },
-            "validationLevel": "experimental",
             "profiles": {
                 "default": {
                     "displayName": "Default Configuration",
@@ -740,17 +895,24 @@
                     "notes": "RadixAttention provides automatic KV cache reuse for improved throughput"
                 }
             },
-            "notes": "SGLang 0.2.0 features RadixAttention for automatic KV cache reuse. Experimental support"
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.1",
+                "versionRange": {
+                    "min": "11.8",
+                    "max": "12.2"
+                }
+            },
+            "notes": "SGLang 0.2.0 features RadixAttention for automatic KV cache reuse. Experimental support",
+            "validationLevel": "experimental"
         },
         {
-            "image": "lmsysorg/sglang:v0.4.6-cu121",
-            "tag": "v0.4.6-cu121",
+            "image": "lmsysorg/sglang:v0.5.13",
+            "tag": "v0.5.13",
             "architecture": "amd64",
-            "created": "2024-11-15T00:00:00Z",
+            "created": "2026-06-11T10:15:46.142149Z",
             "labels": {
-                "cuda_version": "12.1",
-                "python_version": "3.10",
-                "framework_version": "0.4.6"
+                "framework_version": "0.5.13"
             },
             "registry": "dockerhub",
             "repository": "lmsysorg/sglang",
@@ -763,6 +925,28 @@
                 },
                 "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
             },
+            "profiles": {
+                "default": {
+                    "displayName": "Default Configuration",
+                    "description": "Balanced configuration for general use",
+                    "envVars": {
+                        "SGLANG_MAX_RUNNING_REQUESTS": "256",
+                        "SGLANG_MEM_FRACTION": "0.9"
+                    },
+                    "notes": "Good starting point for most workloads"
+                },
+                "high-throughput": {
+                    "displayName": "High Throughput",
+                    "description": "Optimized for maximum throughput with RadixAttention",
+                    "envVars": {
+                        "SGLANG_MAX_RUNNING_REQUESTS": "512",
+                        "SGLANG_MEM_FRACTION": "0.95",
+                        "SGLANG_CONTEXT_LENGTH": "2048",
+                        "SGLANG_ENABLE_RADIX_CACHE": "true"
+                    },
+                    "notes": "RadixAttention provides automatic KV cache reuse for improved throughput"
+                }
+            },
             "accelerator": {
                 "type": "cuda",
                 "version": "12.1",
@@ -771,7 +955,28 @@
                     "max": "12.2"
                 }
             },
-            "validationLevel": "experimental",
+            "notes": "SGLang 0.2.0 features RadixAttention for automatic KV cache reuse. Experimental support",
+            "validationLevel": "experimental"
+        },
+        {
+            "image": "lmsysorg/sglang:v0.5.12",
+            "tag": "v0.5.12",
+            "architecture": "amd64",
+            "created": "2026-05-16T18:18:22.925418Z",
+            "labels": {
+                "framework_version": "0.5.12"
+            },
+            "registry": "dockerhub",
+            "repository": "lmsysorg/sglang",
+            "defaults": {
+                "envVars": {
+                    "SGLANG_TENSOR_PARALLEL_SIZE": "1",
+                    "SGLANG_MEM_FRACTION": "0.9",
+                    "SGLANG_MAX_RUNNING_REQUESTS": "256",
+                    "SGLANG_CONTEXT_LENGTH": "4096"
+                },
+                "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
+            },
             "profiles": {
                 "default": {
                     "displayName": "Default Configuration",
@@ -794,7 +999,16 @@
                     "notes": "RadixAttention provides automatic KV cache reuse for improved throughput"
                 }
             },
-            "notes": "SGLang 0.2.0 features RadixAttention for automatic KV cache reuse. Experimental support"
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.1",
+                "versionRange": {
+                    "min": "11.8",
+                    "max": "12.2"
+                }
+            },
+            "notes": "SGLang 0.2.0 features RadixAttention for automatic KV cache reuse. Experimental support",
+            "validationLevel": "experimental"
         }
     ],
     "tensorrt-llm": [

package/src/lib/bootstrap-command-handler.js CHANGED Viewed

@@ -64,6 +64,7 @@ export default class BootstrapCommandHandler {
     _handlePrune() { return this.profileManager._handlePrune(); }
     _handleSyncSchemas() { return this.profileManager._handleSyncSchemas(); }
     _handleSyncModelFamilies() { return this.profileManager._handleSyncModelFamilies(); }
+    _handleSyncServingVersions() { return this.profileManager._handleSyncServingVersions(); }
     /**
      * Dispatch bootstrap subcommands.
@@ -132,6 +133,9 @@ export default class BootstrapCommandHandler {
         case 'sync-model-families':
             await this._handleSyncModelFamilies();
             break;
+        case 'sync-serving-versions':
+            await this._handleSyncServingVersions();
+            break;
         // Migration path: upgrades legacy profiles to current naming conventions.
         // Corrects stackName to mlcc-bootstrap-{profileName}, renames sharedStackFrom
         // to sharedInfraFrom. Idempotent — safe to run multiple times.
@@ -1467,7 +1471,9 @@ SUBCOMMANDS:
   prune                               Remove deleted and unknown records from the deployment manifest
   update                              Re-deploy bootstrap stacks using active profile (no prompts)
   migrate                             Upgrade legacy profiles to current naming conventions
+  sync-schemas                        Download AWS service model schemas (sagemaker, iam, ecr, s3)
   sync-model-families                 Discover tune-eligible models from JumpStart Hub and update catalog
+  sync-serving-versions               Discover latest vLLM/SGLang/TRT-LLM image versions and update catalog
 SETUP OPTIONS:
   --non-interactive                   Run without interactive prompts
@@ -1477,8 +1483,10 @@ SETUP OPTIONS:
   --role-arn <arn>                    Use existing IAM role ARN (skip role creation)
   --skip-s3                           Skip S3 bucket creation
   --ci                                Provision CI testing infrastructure
+  --benchmark-infra                   Provision Athena/Glue benchmark infrastructure (requires --ci)
   --skip-ci                           Skip CI infrastructure provisioning
   --skip-post-setup                   Skip post-setup chain (mcp init, sync-architectures, sync-schemas)
+  --ignore-staleness                  Suppress schema staleness warnings
 STATUS OPTIONS:
   --verify                            Check each active resource against AWS APIs for drift detection
@@ -1495,13 +1503,15 @@ EXAMPLES:
   ml-container-creator bootstrap list
   ml-container-creator bootstrap remove dev
   ml-container-creator bootstrap remove dev --force --delete-stack
+  ml-container-creator bootstrap update
+  ml-container-creator bootstrap update --ci --benchmark-infra
   ml-container-creator bootstrap scan
+  ml-container-creator bootstrap sync-schemas
   ml-container-creator bootstrap sync-model-families
+  ml-container-creator bootstrap sync-serving-versions
   ml-container-creator bootstrap migrate
   ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2
-  ml-container-creator bootstrap --non-interactive --profile my-aws-profile --role-arn arn:aws:iam::123456789012:role/MyRole --skip-s3
   ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2 --ci
-  ml-container-creator bootstrap --non-interactive --profile my-aws-profile --region us-west-2 --skip-ci
 `);
     }

package/src/lib/bootstrap-profile-manager.js CHANGED Viewed

@@ -655,4 +655,20 @@ export default class BootstrapProfileManager {
             process.exit(1);
         }
     }
+    /**
+     * Handle sync-serving-versions subcommand: discover latest container image
+     * versions for vLLM, SGLang, and TensorRT-LLM and update the model-servers catalog.
+     */
+    async _handleSyncServingVersions() {
+        console.log('\n🔄 Sync Serving Versions — Discovering latest container images...\n');
+        try {
+            const { syncServingVersions } = await import('../../scripts/sync-serving-versions.js');
+            const result = await syncServingVersions();
+            console.log(`\n✅ Sync complete: ${result.totalAdded} new, ${result.totalRemoved} pruned\n`);
+        } catch (err) {
+            console.log(`❌ Sync failed: ${err.message}`);
+            process.exit(1);
+        }
+    }
 }

package/src/lib/cross-cutting-checker.js CHANGED Viewed

@@ -290,7 +290,12 @@ export default class CrossCuttingChecker {
         if (!modelType || !server || !serverVersion) return findings;
         const entries = modelServersCatalog[server] || [];
-        const entry = entries.find(e => e.labels?.framework_version === serverVersion);
+        // Try exact version match first, then fall back to nearest entry with supportedModelTypes
+        let entry = entries.find(e => e.labels?.framework_version === serverVersion);
+        if (!entry?.supportedModelTypes?.length) {
+            // Fall back to any entry that has supportedModelTypes populated
+            entry = entries.find(e => e.supportedModelTypes?.length > 0);
+        }
         if (!entry?.supportedModelTypes?.length) return findings;
         if (!entry.supportedModelTypes.includes(modelType.toLowerCase())) {

package/src/lib/generated/cli-options.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-cli.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.381Z
+// Generated: 2026-06-29T13:37:06.271Z
 /**
  * CLI option definitions derived from parameter-schema-v2.json.

package/src/lib/generated/parameter-matrix.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-parameter-matrix.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.482Z
+// Generated: 2026-06-29T13:37:06.375Z
 /**
  * Parameter matrix defining how each parameter is loaded from various sources.

package/src/lib/generated/validation-rules.js CHANGED Viewed

@@ -1,6 +1,6 @@
 // AUTO-GENERATED by scripts/codegen-validator.js — DO NOT EDIT
 // Source: config/parameter-schema-v2.json
-// Generated: 2026-06-23T20:55:23.412Z
+// Generated: 2026-06-29T13:37:06.303Z
 /**
  * Validation rules derived from parameter-schema-v2.json.