npm - @aws/ml-container-creator - Versions diffs - 0.7.1 → 0.9.0 - Mend

@aws/ml-container-creator 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

package/LICENSE-THIRD-PARTY +50760 -16218
package/bin/cli.js +1 -1
package/infra/ci-harness/buildspec.yml +4 -0
package/package.json +3 -1
package/servers/lib/catalogs/instances.json +52 -1275
package/servers/lib/catalogs/model-servers.json +80 -0
package/servers/lib/catalogs/models.json +0 -132
package/servers/lib/catalogs/popular-diffusors.json +1 -110
package/servers/model-picker/index.js +27 -16
package/src/app.js +113 -23
package/src/lib/cli-handler.js +1 -1
package/src/lib/config-manager.js +39 -2
package/src/lib/cross-cutting-checker.js +146 -33
package/src/lib/deployment-config-resolver.js +10 -4
package/src/lib/e2e-bootstrap.js +227 -0
package/src/lib/e2e-catalog-validator.js +103 -0
package/src/lib/e2e-quota-validator.js +135 -0
package/src/lib/mcp-client.js +16 -1
package/src/lib/mcp-command-handler.js +10 -2
package/src/lib/prompt-runner.js +306 -24
package/src/lib/prompts.js +9 -3
package/src/lib/template-manager.js +10 -4
package/src/lib/train-config-parser.js +136 -0
package/src/lib/train-config-persistence.js +143 -0
package/src/lib/train-config-validator.js +112 -0
package/src/lib/train-feedback.js +46 -0
package/src/lib/train-idempotency.js +97 -0
package/src/lib/train-request-builder.js +120 -0
package/src/lib/tune-catalog-validator.js +5 -5
package/templates/code/serve +2 -2
package/templates/code/serving.properties +2 -2
package/templates/diffusors/serve +3 -3
package/templates/do/.train_build_request.py +141 -0
package/templates/do/.train_poll_parser.py +135 -0
package/templates/do/.train_status_parser.py +187 -0
package/templates/do/.tune_helper.py +2 -2
package/templates/do/lib/feedback.sh +41 -0
package/templates/do/register +8 -2
package/templates/do/test +5 -5
package/templates/do/train +786 -0
package/templates/do/training/config.yaml +140 -0
package/templates/do/training/train.py +463 -0
package/templates/do/tune +2 -2
package/templates/marketplace/config +118 -0
package/templates/marketplace/deploy +890 -0
package/templates/marketplace/test +453 -0

package/servers/lib/catalogs/model-servers.json CHANGED Viewed

@@ -1,5 +1,85 @@
 {
     "vllm": [
+        {
+            "image": "vllm/vllm-openai:v0.20.2",
+            "tag": "v0.20.2",
+            "architecture": "amd64",
+            "created": "2026-05-10T00:00:00Z",
+            "labels": {
+                "cuda_version": "12.9",
+                "python_version": "3.12",
+                "framework_version": "0.20.2"
+            },
+            "registry": "dockerhub",
+            "repository": "vllm/vllm-openai",
+            "defaults": {
+                "envVars": {
+                    "VLLM_TENSOR_PARALLEL_SIZE": "1",
+                    "VLLM_GPU_MEMORY_UTILIZATION": "0.9",
+                    "VLLM_MAX_NUM_SEQS": "256",
+                    "VLLM_MAX_MODEL_LEN": "4096",
+                    "VLLM_ENABLE_PREFIX_CACHING": "true"
+                },
+                "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
+            },
+            "accelerator": {
+                "type": "cuda",
+                "version": "12.9",
+                "versionRange": {
+                    "min": "12.4",
+                    "max": "12.9"
+                }
+            },
+            "validationLevel": "community-validated",
+            "profiles": {
+                "low-latency": {
+                    "displayName": "Low Latency",
+                    "description": "Optimized for single-request latency with prefix caching",
+                    "envVars": {
+                        "VLLM_MAX_NUM_SEQS": "32",
+                        "VLLM_GPU_MEMORY_UTILIZATION": "0.85",
+                        "VLLM_ENABLE_PREFIX_CACHING": "true"
+                    },
+                    "notes": "Prefix caching improves latency for repeated prompts"
+                },
+                "high-throughput": {
+                    "displayName": "High Throughput",
+                    "description": "Optimized for batch processing with continuous batching",
+                    "envVars": {
+                        "VLLM_MAX_NUM_SEQS": "512",
+                        "VLLM_GPU_MEMORY_UTILIZATION": "0.95",
+                        "VLLM_MAX_MODEL_LEN": "2048",
+                        "VLLM_ENABLE_PREFIX_CACHING": "false"
+                    },
+                    "notes": "Continuous batching maximizes GPU utilization"
+                },
+                "multi-gpu": {
+                    "displayName": "Multi-GPU",
+                    "description": "Tensor parallel across multiple GPUs for large models",
+                    "envVars": {
+                        "VLLM_TENSOR_PARALLEL_SIZE": "4",
+                        "VLLM_GPU_MEMORY_UTILIZATION": "0.9",
+                        "VLLM_MAX_NUM_SEQS": "256"
+                    },
+                    "notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
+                }
+            },
+            "notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
+            "supportedModelTypes": [
+                "gemma",
+                "gemma2",
+                "gemma3",
+                "llama",
+                "mistral",
+                "mixtral",
+                "qwen2",
+                "qwen3",
+                "qwen3_moe",
+                "deepseek_v3",
+                "phi3",
+                "command-r"
+            ]
+        },
         {
             "image": "vllm/vllm-openai:v0.10.1",
             "tag": "v0.10.1",

package/servers/lib/catalogs/models.json CHANGED Viewed

@@ -555,98 +555,6 @@
             "text-generation"
         ]
     },
-    "stabilityai/stable-diffusion-3.5-medium": {
-        "family": "stable-diffusion-3",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": "StableDiffusion3Pipeline",
-        "profiles": {
-            "default": {
-                "displayName": "SD3.5 Medium",
-                "envVars": {}
-            }
-        },
-        "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline.",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX.1-dev": {
-        "family": "flux",
-        "gated": true,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "profiles": {
-            "default": {
-                "displayName": "FLUX.1 Dev",
-                "envVars": {}
-            }
-        },
-        "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM.",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX.1-schnell": {
-        "family": "flux",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
-        "family": "wan",
-        "gated": false,
-        "tags": [
-            "video-generation",
-            "diffusion",
-            "wan"
-        ],
-        "architecture": "WanPipeline",
-        "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.16.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-video"
-        ]
-    },
     "meta-llama/Llama-3*": {
         "family": "llama-3",
         "gated": true,
@@ -731,45 +639,5 @@
         "tasks": [
             "text-generation"
         ]
-    },
-    "stabilityai/stable-diffusion-*": {
-        "family": "stable-diffusion",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": null,
-        "notes": "Fallback for Stable Diffusion variants not explicitly listed",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
-    },
-    "black-forest-labs/FLUX*": {
-        "family": "flux",
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": null,
-        "notes": "Fallback for FLUX model variants not explicitly listed",
-        "chatTemplate": null,
-        "frameworkCompatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validationLevel": "experimental",
-        "modelType": "diffusor",
-        "tasks": [
-            "text-to-image"
-        ]
     }
 }

package/servers/lib/catalogs/popular-diffusors.json CHANGED Viewed

@@ -1,110 +1 @@
-{
-    "stabilityai/stable-diffusion-3.5-medium": {
-        "family": "stable-diffusion-3",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": "StableDiffusion3Pipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "profiles": {
-            "default": {
-                "displayName": "SD3.5 Medium",
-                "envVars": {}
-            }
-        },
-        "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
-    },
-    "black-forest-labs/FLUX.1-dev": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": true,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "profiles": {
-            "default": {
-                "displayName": "FLUX.1 Dev",
-                "envVars": {}
-            }
-        },
-        "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
-    },
-    "black-forest-labs/FLUX.1-schnell": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": "FluxPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality"
-    },
-    "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
-        "family": "wan",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "video-generation",
-            "diffusion",
-            "wan"
-        ],
-        "architecture": "WanPipeline",
-        "framework_compatibility": {
-            "vllm-omni": ">=0.16.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni"
-    },
-    "stabilityai/stable-diffusion-*": {
-        "family": "stable-diffusion",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "stable-diffusion"
-        ],
-        "architecture": null,
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Fallback for Stable Diffusion variants not explicitly listed"
-    },
-    "black-forest-labs/FLUX*": {
-        "family": "flux",
-        "chat_template": null,
-        "gated": false,
-        "tags": [
-            "image-generation",
-            "diffusion",
-            "flux"
-        ],
-        "architecture": null,
-        "framework_compatibility": {
-            "vllm-omni": ">=0.14.0"
-        },
-        "validation_level": "experimental",
-        "notes": "Fallback for FLUX model variants not explicitly listed"
-    }
-}
+{}

package/servers/model-picker/index.js CHANGED Viewed

@@ -1531,18 +1531,25 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
     let values = {}
     let message = null
+    // Reject deprecated JumpStart prefixes
+    if (model_id.startsWith('jumpstart://') || model_id.startsWith('jumpstart-hub://')) {
+        const bareId = model_id.replace(/^jumpstart(-hub)?:\/\//, '')
+        message = `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`
+        return {
+            content: [{
+                type: 'text',
+                text: JSON.stringify({ values: {}, choices: {}, message })
+            }]
+        }
+    }
     if (mode === 'static') {
         // Static mode: use StaticCatalogResolver only
-        // For jumpstart:// prefixed IDs, resolve from JumpStart static catalog
         const metadata = await staticResolver.fetchModelMetadata(model_id, { fields })
         if (metadata) {
             values = { ...metadata }
         } else {
-            if (model_id.startsWith('jumpstart://')) {
-                message = `Model not found in JumpStart static catalog: ${model_id}`
-            } else {
-                message = `Model not found in static catalog: ${model_id}`
-            }
+            message = `Model not found in static catalog: ${model_id}`
         }
     } else {
         // Discover mode: use ResolverRegistry for live data, merge with static
@@ -1564,11 +1571,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
             values = { ...merged }
             // If the resolver failed but we got data from static catalog, note the fallback
             if (resolverFailed && !liveData && staticData) {
-                if (model_id.startsWith('jumpstart://')) {
-                    message = '[jumpstart] SageMaker API unreachable. Using static catalog fallback.'
-                } else if (model_id.startsWith('jumpstart-hub://')) {
-                    message = '[jumpstart-hub] SageMaker API unreachable. Using static catalog fallback.'
-                } else if (model_id.startsWith('registry://')) {
+                if (model_id.startsWith('registry://')) {
                     message = '[registry] SageMaker API unreachable. Using static catalog fallback.'
                 } else if (model_id.startsWith('s3://')) {
                     message = '[s3] S3 API unreachable. Using static catalog fallback.'
@@ -1577,11 +1580,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
         } else {
             // No data from either source
             if (resolverFailed) {
-                if (model_id.startsWith('jumpstart://')) {
-                    message = `[jumpstart] Resolver could not fetch data for: ${model_id}`
-                } else if (model_id.startsWith('jumpstart-hub://')) {
-                    message = `[jumpstart-hub] Resolver could not fetch data for: ${model_id}`
-                } else if (model_id.startsWith('registry://')) {
+                if (model_id.startsWith('registry://')) {
                     message = `[registry] Resolver could not fetch data for: ${model_id}`
                 } else if (model_id.startsWith('s3://')) {
                     message = `[s3] Resolver could not fetch data for: ${model_id}`
@@ -1613,6 +1612,18 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
         values = filtered
     }
+    // Exclude jumpstart:// prefixed results from output
+    const resolvedModelId = values.modelId || model_id
+    if (resolvedModelId.startsWith('jumpstart://') || resolvedModelId.startsWith('jumpstart-hub://')) {
+        const bareId = resolvedModelId.replace(/^jumpstart(-hub)?:\/\//, '')
+        return {
+            content: [{
+                type: 'text',
+                text: JSON.stringify({ values: {}, choices: {}, message: `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}` })
+            }]
+        }
+    }
     // Build choices with provider prefix labels
     const choices = {}
     if (Object.keys(values).length > 0) {

package/src/app.js CHANGED Viewed

@@ -142,30 +142,27 @@ export async function run(projectName, options) {
         // Infer modelSource from model name prefix if not set
         const modelName = answers.modelName;
         if (!answers.modelSource && modelName) {
+            // Reject deprecated JumpStart prefixes with migration message
+            if (modelName.startsWith('jumpstart://') || modelName.startsWith('jumpstart-hub://')) {
+                const bareId = modelName.replace(/^jumpstart(-hub)?:\/\//, '');
+                console.error(`\n   ⚠️  JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`);
+                console.error('   JumpStart model sources have been removed. Use one of:');
+                console.error('     • HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)');
+                console.error('     • s3://bucket/path/model.tar.gz');
+                console.error('     • registry://model-package-name');
+                console.error('     • marketplace://arn:aws:sagemaker:...\n');
+                process.exit(1);
+            }
             if (modelName.startsWith('s3://')) {
                 answers.modelSource = 's3';
                 if (!answers.artifactUri) {
                     answers.artifactUri = modelName;
                 }
-            } else if (modelName.startsWith('jumpstart://')) {
-                answers.modelSource = 'jumpstart';
-            } else if (modelName.startsWith('jumpstart-hub://')) {
-                answers.modelSource = 'jumpstart-hub';
             } else if (modelName.startsWith('registry://')) {
                 answers.modelSource = 'registry';
             }
         }
-        // Warn about unsupported model sources
-        if (answers.modelSource === 'jumpstart-hub') {
-            console.log('\n   ⚠️  JumpStart Private Hub models are not yet fully supported.');
-            console.log('   The generated project will not be able to download model artifacts at runtime.');
-            console.log('   This feature is tracked for a future release.');
-            console.log('   Falling back to HuggingFace source.\n');
-            answers.modelSource = 'huggingface';
-            delete answers.artifactUri;
-        }
         // Note about registry model requirements
         if (answers.modelSource === 'registry') {
             console.log('\n   ℹ️  Registry model: the container will resolve the artifact URI at startup');
@@ -352,20 +349,82 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     }
     // Exclude tune files when framework is NOT transformers OR deploymentTarget is batch-transform
-    if (architecture !== 'transformers' || answers.deploymentTarget === 'batch-transform') {
+    const tuneIncluded = architecture === 'transformers' && answers.deploymentTarget !== 'batch-transform';
+    if (!tuneIncluded) {
         ignorePatterns.push('**/do/tune');
         ignorePatterns.push('**/do/.tune_helper.py');
     }
+    // Exclude train files when deploymentTarget is batch-transform
+    const trainIncluded = answers.deploymentTarget !== 'batch-transform';
+    if (!trainIncluded) {
+        ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_build_request.py');
+        ignorePatterns.push('**/do/.train_status_parser.py');
+        ignorePatterns.push('**/do/.train_poll_parser.py');
+        ignorePatterns.push('**/do/training/**');
+    }
+    // Exclude feedback.sh when neither tune nor train is included
+    if (!tuneIncluded && !trainIncluded) {
+        ignorePatterns.push('**/do/lib/feedback.sh');
+    }
     // Exclude do/test when hosted-model-endpoint is not selected
     const testTypes = answers.testTypes || [];
     if (!testTypes.includes('hosted-model-endpoint')) {
         ignorePatterns.push('**/do/test');
     }
-    // Always exclude triton and diffusors source directories
-    ignorePatterns.push('**/triton/**');
-    ignorePatterns.push('**/diffusors/**');
+    // Marketplace projects: exclude everything container-related
+    if (architecture === 'marketplace') {
+        ignorePatterns.push('**/Dockerfile');
+        ignorePatterns.push('**/code/**');
+        ignorePatterns.push('**/do/build');
+        ignorePatterns.push('**/do/push');
+        ignorePatterns.push('**/do/submit');
+        ignorePatterns.push('**/do/adapter');
+        ignorePatterns.push('**/do/adapters/**');
+        ignorePatterns.push('**/do/tune');
+        ignorePatterns.push('**/do/.tune_helper.py');
+        ignorePatterns.push('**/do/train');
+        ignorePatterns.push('**/do/.train_build_request.py');
+        ignorePatterns.push('**/do/.train_status_parser.py');
+        ignorePatterns.push('**/do/.train_poll_parser.py');
+        ignorePatterns.push('**/do/training/**');
+        ignorePatterns.push('**/do/add-ic');
+        ignorePatterns.push('**/do/run');
+        ignorePatterns.push('**/sample_model/**');
+        ignorePatterns.push('**/requirements.txt');
+        ignorePatterns.push('**/nginx-*.conf');
+        ignorePatterns.push('**/triton/**');
+        ignorePatterns.push('**/diffusors/**');
+        ignorePatterns.push('**/hyperpod/**');
+        ignorePatterns.push('**/MIGRATION.md');
+        ignorePatterns.push('**/TEMPLATE_SYSTEM.md');
+        ignorePatterns.push('**/IAM_PERMISSIONS.md');
+        ignorePatterns.push('**/PROJECT_README.md');
+        ignorePatterns.push('**/deploy_notebook_generator.py');
+        ignorePatterns.push('**/buildspec.yml');
+        ignorePatterns.push('**/test/**');
+        // Exclude templates that reference container-specific variables (framework, modelServer)
+        // Marketplace overlays its own config, deploy, and test templates
+        ignorePatterns.push('**/do/config');
+        ignorePatterns.push('**/do/deploy');
+        ignorePatterns.push('**/do/test');
+        ignorePatterns.push('**/do/README.md');
+        ignorePatterns.push('**/do/export');
+        ignorePatterns.push('**/do/validate');
+        ignorePatterns.push('**/do/ic/**');
+    }
+    // Always exclude architecture-specific source directories from main copy
+    // (they are overlaid separately for their respective architectures)
+    ignorePatterns.push('**/marketplace/**');
+    if (architecture !== 'marketplace') {
+        ignorePatterns.push('**/triton/**');
+        ignorePatterns.push('**/diffusors/**');
+    }
     // For triton and diffusors, exclude the default Dockerfile
     if (architecture === 'triton' || architecture === 'diffusors') {
@@ -431,6 +490,14 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
         _copyFile(path.join(templateDir, 'diffusors/patch_image_api.py'), path.join(destDir, 'code/patch_image_api.py'));
         break;
+    case 'marketplace':
+        // Marketplace projects: overlay marketplace-specific templates
+        // These replace the default do/config, do/deploy, and do/test with marketplace versions
+        _renderTemplate(path.join(templateDir, 'marketplace/config'), path.join(destDir, 'do/config'), templateVars);
+        _renderTemplate(path.join(templateDir, 'marketplace/deploy'), path.join(destDir, 'do/deploy'), templateVars);
+        _renderTemplate(path.join(templateDir, 'marketplace/test'), path.join(destDir, 'do/test'), templateVars);
+        break;
     default:
         // Fallback to HTTP behavior
         _unlinkIfExists(path.join(destDir, 'code/chat_template.jinja'));
@@ -450,7 +517,10 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
     }
     // Copy PROJECT_README.md as README.md (overwriting the template README)
-    _renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
+    // Marketplace projects don't use the standard README (no container/framework info)
+    if (architecture !== 'marketplace') {
+        _renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
+    }
     // Copy do/lib/ Node.js modules (plain copy, no EJS)
     const doLibDir = path.join(destDir, 'do', 'lib');
@@ -491,7 +561,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
  */
 export async function postGenerate(destDir, answers, tritonBackends = {}) {
     // Set executable permissions on shell scripts
-    _setExecutablePermissions(destDir);
+    _setExecutablePermissions(destDir, answers);
     // Run sample model training if requested
     const architecture = answers.architecture;
@@ -1092,8 +1162,25 @@ function _unlinkIfExists(filePath) {
  *
  * @param {string} destDir - Path to the generated project directory
  */
-function _setExecutablePermissions(destDir) {
-    const shellScripts = [
+function _setExecutablePermissions(destDir, answers = {}) {
+    const architecture = answers.architecture;
+    // Marketplace projects have a reduced set of scripts
+    const marketplaceScripts = [
+        'do/config',
+        'do/deploy',
+        'do/test',
+        'do/logs',
+        'do/clean',
+        'do/register',
+        'do/ci',
+        'do/manifest',
+        'do/benchmark',
+        'do/optimize',
+        'do/status'
+    ];
+    const defaultScripts = [
         'do/config',
         'do/build',
         'do/push',
@@ -1111,9 +1198,12 @@ function _setExecutablePermissions(destDir) {
         'do/status',
         'do/add-ic',
         'do/adapter',
-        'do/tune'
+        'do/tune',
+        'do/train'
     ];
+    const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
     shellScripts.forEach(script => {
         const scriptPath = path.join(destDir, script);
         try {

package/src/lib/cli-handler.js CHANGED Viewed

@@ -178,7 +178,7 @@ CLI OPTIONS:
   --project-name=<name>       Project name
   --project-dir=<dir>         Output directory path
   --framework=<framework>     ML framework (sklearn|xgboost|tensorflow|transformers)
-  --model-name=<name>         HuggingFace model name (for transformers framework)
+  --model-name=<name>         Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)
   --model-server=<server>     Model server (flask|fastapi|vllm|sglang|tensorrt-llm|lmi|djl)
   --model-format=<format>     Model format (depends on framework)
   --include-sample            Include sample model code