@aws/ml-container-creator 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/LICENSE-THIRD-PARTY +50760 -16218
  2. package/bin/cli.js +1 -1
  3. package/infra/ci-harness/buildspec.yml +4 -0
  4. package/package.json +3 -1
  5. package/servers/lib/catalogs/instances.json +52 -1275
  6. package/servers/lib/catalogs/model-servers.json +80 -0
  7. package/servers/lib/catalogs/models.json +0 -132
  8. package/servers/lib/catalogs/popular-diffusors.json +1 -110
  9. package/servers/model-picker/index.js +27 -16
  10. package/src/app.js +113 -23
  11. package/src/lib/cli-handler.js +1 -1
  12. package/src/lib/config-manager.js +39 -2
  13. package/src/lib/cross-cutting-checker.js +146 -33
  14. package/src/lib/deployment-config-resolver.js +10 -4
  15. package/src/lib/e2e-bootstrap.js +227 -0
  16. package/src/lib/e2e-catalog-validator.js +103 -0
  17. package/src/lib/e2e-quota-validator.js +135 -0
  18. package/src/lib/mcp-client.js +16 -1
  19. package/src/lib/mcp-command-handler.js +10 -2
  20. package/src/lib/prompt-runner.js +306 -24
  21. package/src/lib/prompts.js +9 -3
  22. package/src/lib/template-manager.js +10 -4
  23. package/src/lib/train-config-parser.js +136 -0
  24. package/src/lib/train-config-persistence.js +143 -0
  25. package/src/lib/train-config-validator.js +112 -0
  26. package/src/lib/train-feedback.js +46 -0
  27. package/src/lib/train-idempotency.js +97 -0
  28. package/src/lib/train-request-builder.js +120 -0
  29. package/src/lib/tune-catalog-validator.js +5 -5
  30. package/templates/code/serve +2 -2
  31. package/templates/code/serving.properties +2 -2
  32. package/templates/diffusors/serve +3 -3
  33. package/templates/do/.train_build_request.py +141 -0
  34. package/templates/do/.train_poll_parser.py +135 -0
  35. package/templates/do/.train_status_parser.py +187 -0
  36. package/templates/do/.tune_helper.py +2 -2
  37. package/templates/do/lib/feedback.sh +41 -0
  38. package/templates/do/register +8 -2
  39. package/templates/do/test +5 -5
  40. package/templates/do/train +786 -0
  41. package/templates/do/training/config.yaml +140 -0
  42. package/templates/do/training/train.py +463 -0
  43. package/templates/do/tune +2 -2
  44. package/templates/marketplace/config +118 -0
  45. package/templates/marketplace/deploy +890 -0
  46. package/templates/marketplace/test +453 -0
@@ -1,5 +1,85 @@
1
1
  {
2
2
  "vllm": [
3
+ {
4
+ "image": "vllm/vllm-openai:v0.20.2",
5
+ "tag": "v0.20.2",
6
+ "architecture": "amd64",
7
+ "created": "2026-05-10T00:00:00Z",
8
+ "labels": {
9
+ "cuda_version": "12.9",
10
+ "python_version": "3.12",
11
+ "framework_version": "0.20.2"
12
+ },
13
+ "registry": "dockerhub",
14
+ "repository": "vllm/vllm-openai",
15
+ "defaults": {
16
+ "envVars": {
17
+ "VLLM_TENSOR_PARALLEL_SIZE": "1",
18
+ "VLLM_GPU_MEMORY_UTILIZATION": "0.9",
19
+ "VLLM_MAX_NUM_SEQS": "256",
20
+ "VLLM_MAX_MODEL_LEN": "4096",
21
+ "VLLM_ENABLE_PREFIX_CACHING": "true"
22
+ },
23
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
24
+ },
25
+ "accelerator": {
26
+ "type": "cuda",
27
+ "version": "12.9",
28
+ "versionRange": {
29
+ "min": "12.4",
30
+ "max": "12.9"
31
+ }
32
+ },
33
+ "validationLevel": "community-validated",
34
+ "profiles": {
35
+ "low-latency": {
36
+ "displayName": "Low Latency",
37
+ "description": "Optimized for single-request latency with prefix caching",
38
+ "envVars": {
39
+ "VLLM_MAX_NUM_SEQS": "32",
40
+ "VLLM_GPU_MEMORY_UTILIZATION": "0.85",
41
+ "VLLM_ENABLE_PREFIX_CACHING": "true"
42
+ },
43
+ "notes": "Prefix caching improves latency for repeated prompts"
44
+ },
45
+ "high-throughput": {
46
+ "displayName": "High Throughput",
47
+ "description": "Optimized for batch processing with continuous batching",
48
+ "envVars": {
49
+ "VLLM_MAX_NUM_SEQS": "512",
50
+ "VLLM_GPU_MEMORY_UTILIZATION": "0.95",
51
+ "VLLM_MAX_MODEL_LEN": "2048",
52
+ "VLLM_ENABLE_PREFIX_CACHING": "false"
53
+ },
54
+ "notes": "Continuous batching maximizes GPU utilization"
55
+ },
56
+ "multi-gpu": {
57
+ "displayName": "Multi-GPU",
58
+ "description": "Tensor parallel across multiple GPUs for large models",
59
+ "envVars": {
60
+ "VLLM_TENSOR_PARALLEL_SIZE": "4",
61
+ "VLLM_GPU_MEMORY_UTILIZATION": "0.9",
62
+ "VLLM_MAX_NUM_SEQS": "256"
63
+ },
64
+ "notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
65
+ }
66
+ },
67
+ "notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
68
+ "supportedModelTypes": [
69
+ "gemma",
70
+ "gemma2",
71
+ "gemma3",
72
+ "llama",
73
+ "mistral",
74
+ "mixtral",
75
+ "qwen2",
76
+ "qwen3",
77
+ "qwen3_moe",
78
+ "deepseek_v3",
79
+ "phi3",
80
+ "command-r"
81
+ ]
82
+ },
3
83
  {
4
84
  "image": "vllm/vllm-openai:v0.10.1",
5
85
  "tag": "v0.10.1",
@@ -555,98 +555,6 @@
555
555
  "text-generation"
556
556
  ]
557
557
  },
558
- "stabilityai/stable-diffusion-3.5-medium": {
559
- "family": "stable-diffusion-3",
560
- "gated": false,
561
- "tags": [
562
- "image-generation",
563
- "diffusion",
564
- "stable-diffusion"
565
- ],
566
- "architecture": "StableDiffusion3Pipeline",
567
- "profiles": {
568
- "default": {
569
- "displayName": "SD3.5 Medium",
570
- "envVars": {}
571
- }
572
- },
573
- "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline.",
574
- "chatTemplate": null,
575
- "frameworkCompatibility": {
576
- "vllm-omni": ">=0.14.0"
577
- },
578
- "validationLevel": "experimental",
579
- "modelType": "diffusor",
580
- "tasks": [
581
- "text-to-image"
582
- ]
583
- },
584
- "black-forest-labs/FLUX.1-dev": {
585
- "family": "flux",
586
- "gated": true,
587
- "tags": [
588
- "image-generation",
589
- "diffusion",
590
- "flux"
591
- ],
592
- "architecture": "FluxPipeline",
593
- "profiles": {
594
- "default": {
595
- "displayName": "FLUX.1 Dev",
596
- "envVars": {}
597
- }
598
- },
599
- "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM.",
600
- "chatTemplate": null,
601
- "frameworkCompatibility": {
602
- "vllm-omni": ">=0.14.0"
603
- },
604
- "validationLevel": "experimental",
605
- "modelType": "diffusor",
606
- "tasks": [
607
- "text-to-image"
608
- ]
609
- },
610
- "black-forest-labs/FLUX.1-schnell": {
611
- "family": "flux",
612
- "gated": false,
613
- "tags": [
614
- "image-generation",
615
- "diffusion",
616
- "flux"
617
- ],
618
- "architecture": "FluxPipeline",
619
- "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality",
620
- "chatTemplate": null,
621
- "frameworkCompatibility": {
622
- "vllm-omni": ">=0.14.0"
623
- },
624
- "validationLevel": "experimental",
625
- "modelType": "diffusor",
626
- "tasks": [
627
- "text-to-image"
628
- ]
629
- },
630
- "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
631
- "family": "wan",
632
- "gated": false,
633
- "tags": [
634
- "video-generation",
635
- "diffusion",
636
- "wan"
637
- ],
638
- "architecture": "WanPipeline",
639
- "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni",
640
- "chatTemplate": null,
641
- "frameworkCompatibility": {
642
- "vllm-omni": ">=0.16.0"
643
- },
644
- "validationLevel": "experimental",
645
- "modelType": "diffusor",
646
- "tasks": [
647
- "text-to-video"
648
- ]
649
- },
650
558
  "meta-llama/Llama-3*": {
651
559
  "family": "llama-3",
652
560
  "gated": true,
@@ -731,45 +639,5 @@
731
639
  "tasks": [
732
640
  "text-generation"
733
641
  ]
734
- },
735
- "stabilityai/stable-diffusion-*": {
736
- "family": "stable-diffusion",
737
- "gated": false,
738
- "tags": [
739
- "image-generation",
740
- "diffusion",
741
- "stable-diffusion"
742
- ],
743
- "architecture": null,
744
- "notes": "Fallback for Stable Diffusion variants not explicitly listed",
745
- "chatTemplate": null,
746
- "frameworkCompatibility": {
747
- "vllm-omni": ">=0.14.0"
748
- },
749
- "validationLevel": "experimental",
750
- "modelType": "diffusor",
751
- "tasks": [
752
- "text-to-image"
753
- ]
754
- },
755
- "black-forest-labs/FLUX*": {
756
- "family": "flux",
757
- "gated": false,
758
- "tags": [
759
- "image-generation",
760
- "diffusion",
761
- "flux"
762
- ],
763
- "architecture": null,
764
- "notes": "Fallback for FLUX model variants not explicitly listed",
765
- "chatTemplate": null,
766
- "frameworkCompatibility": {
767
- "vllm-omni": ">=0.14.0"
768
- },
769
- "validationLevel": "experimental",
770
- "modelType": "diffusor",
771
- "tasks": [
772
- "text-to-image"
773
- ]
774
642
  }
775
643
  }
@@ -1,110 +1 @@
1
- {
2
- "stabilityai/stable-diffusion-3.5-medium": {
3
- "family": "stable-diffusion-3",
4
- "chat_template": null,
5
- "gated": false,
6
- "tags": [
7
- "image-generation",
8
- "diffusion",
9
- "stable-diffusion"
10
- ],
11
- "architecture": "StableDiffusion3Pipeline",
12
- "framework_compatibility": {
13
- "vllm-omni": ">=0.14.0"
14
- },
15
- "validation_level": "experimental",
16
- "profiles": {
17
- "default": {
18
- "displayName": "SD3.5 Medium",
19
- "envVars": {}
20
- }
21
- },
22
- "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
23
- },
24
- "black-forest-labs/FLUX.1-dev": {
25
- "family": "flux",
26
- "chat_template": null,
27
- "gated": true,
28
- "tags": [
29
- "image-generation",
30
- "diffusion",
31
- "flux"
32
- ],
33
- "architecture": "FluxPipeline",
34
- "framework_compatibility": {
35
- "vllm-omni": ">=0.14.0"
36
- },
37
- "validation_level": "experimental",
38
- "profiles": {
39
- "default": {
40
- "displayName": "FLUX.1 Dev",
41
- "envVars": {}
42
- }
43
- },
44
- "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
45
- },
46
- "black-forest-labs/FLUX.1-schnell": {
47
- "family": "flux",
48
- "chat_template": null,
49
- "gated": false,
50
- "tags": [
51
- "image-generation",
52
- "diffusion",
53
- "flux"
54
- ],
55
- "architecture": "FluxPipeline",
56
- "framework_compatibility": {
57
- "vllm-omni": ">=0.14.0"
58
- },
59
- "validation_level": "experimental",
60
- "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality"
61
- },
62
- "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
63
- "family": "wan",
64
- "chat_template": null,
65
- "gated": false,
66
- "tags": [
67
- "video-generation",
68
- "diffusion",
69
- "wan"
70
- ],
71
- "architecture": "WanPipeline",
72
- "framework_compatibility": {
73
- "vllm-omni": ">=0.16.0"
74
- },
75
- "validation_level": "experimental",
76
- "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni"
77
- },
78
- "stabilityai/stable-diffusion-*": {
79
- "family": "stable-diffusion",
80
- "chat_template": null,
81
- "gated": false,
82
- "tags": [
83
- "image-generation",
84
- "diffusion",
85
- "stable-diffusion"
86
- ],
87
- "architecture": null,
88
- "framework_compatibility": {
89
- "vllm-omni": ">=0.14.0"
90
- },
91
- "validation_level": "experimental",
92
- "notes": "Fallback for Stable Diffusion variants not explicitly listed"
93
- },
94
- "black-forest-labs/FLUX*": {
95
- "family": "flux",
96
- "chat_template": null,
97
- "gated": false,
98
- "tags": [
99
- "image-generation",
100
- "diffusion",
101
- "flux"
102
- ],
103
- "architecture": null,
104
- "framework_compatibility": {
105
- "vllm-omni": ">=0.14.0"
106
- },
107
- "validation_level": "experimental",
108
- "notes": "Fallback for FLUX model variants not explicitly listed"
109
- }
110
- }
1
+ {}
@@ -1531,18 +1531,25 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
1531
1531
  let values = {}
1532
1532
  let message = null
1533
1533
 
1534
+ // Reject deprecated JumpStart prefixes
1535
+ if (model_id.startsWith('jumpstart://') || model_id.startsWith('jumpstart-hub://')) {
1536
+ const bareId = model_id.replace(/^jumpstart(-hub)?:\/\//, '')
1537
+ message = `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`
1538
+ return {
1539
+ content: [{
1540
+ type: 'text',
1541
+ text: JSON.stringify({ values: {}, choices: {}, message })
1542
+ }]
1543
+ }
1544
+ }
1545
+
1534
1546
  if (mode === 'static') {
1535
1547
  // Static mode: use StaticCatalogResolver only
1536
- // For jumpstart:// prefixed IDs, resolve from JumpStart static catalog
1537
1548
  const metadata = await staticResolver.fetchModelMetadata(model_id, { fields })
1538
1549
  if (metadata) {
1539
1550
  values = { ...metadata }
1540
1551
  } else {
1541
- if (model_id.startsWith('jumpstart://')) {
1542
- message = `Model not found in JumpStart static catalog: ${model_id}`
1543
- } else {
1544
- message = `Model not found in static catalog: ${model_id}`
1545
- }
1552
+ message = `Model not found in static catalog: ${model_id}`
1546
1553
  }
1547
1554
  } else {
1548
1555
  // Discover mode: use ResolverRegistry for live data, merge with static
@@ -1564,11 +1571,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
1564
1571
  values = { ...merged }
1565
1572
  // If the resolver failed but we got data from static catalog, note the fallback
1566
1573
  if (resolverFailed && !liveData && staticData) {
1567
- if (model_id.startsWith('jumpstart://')) {
1568
- message = '[jumpstart] SageMaker API unreachable. Using static catalog fallback.'
1569
- } else if (model_id.startsWith('jumpstart-hub://')) {
1570
- message = '[jumpstart-hub] SageMaker API unreachable. Using static catalog fallback.'
1571
- } else if (model_id.startsWith('registry://')) {
1574
+ if (model_id.startsWith('registry://')) {
1572
1575
  message = '[registry] SageMaker API unreachable. Using static catalog fallback.'
1573
1576
  } else if (model_id.startsWith('s3://')) {
1574
1577
  message = '[s3] S3 API unreachable. Using static catalog fallback.'
@@ -1577,11 +1580,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
1577
1580
  } else {
1578
1581
  // No data from either source
1579
1582
  if (resolverFailed) {
1580
- if (model_id.startsWith('jumpstart://')) {
1581
- message = `[jumpstart] Resolver could not fetch data for: ${model_id}`
1582
- } else if (model_id.startsWith('jumpstart-hub://')) {
1583
- message = `[jumpstart-hub] Resolver could not fetch data for: ${model_id}`
1584
- } else if (model_id.startsWith('registry://')) {
1583
+ if (model_id.startsWith('registry://')) {
1585
1584
  message = `[registry] Resolver could not fetch data for: ${model_id}`
1586
1585
  } else if (model_id.startsWith('s3://')) {
1587
1586
  message = `[s3] Resolver could not fetch data for: ${model_id}`
@@ -1613,6 +1612,18 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
1613
1612
  values = filtered
1614
1613
  }
1615
1614
 
1615
+ // Exclude jumpstart:// prefixed results from output
1616
+ const resolvedModelId = values.modelId || model_id
1617
+ if (resolvedModelId.startsWith('jumpstart://') || resolvedModelId.startsWith('jumpstart-hub://')) {
1618
+ const bareId = resolvedModelId.replace(/^jumpstart(-hub)?:\/\//, '')
1619
+ return {
1620
+ content: [{
1621
+ type: 'text',
1622
+ text: JSON.stringify({ values: {}, choices: {}, message: `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}` })
1623
+ }]
1624
+ }
1625
+ }
1626
+
1616
1627
  // Build choices with provider prefix labels
1617
1628
  const choices = {}
1618
1629
  if (Object.keys(values).length > 0) {
package/src/app.js CHANGED
@@ -142,30 +142,27 @@ export async function run(projectName, options) {
142
142
  // Infer modelSource from model name prefix if not set
143
143
  const modelName = answers.modelName;
144
144
  if (!answers.modelSource && modelName) {
145
+ // Reject deprecated JumpStart prefixes with migration message
146
+ if (modelName.startsWith('jumpstart://') || modelName.startsWith('jumpstart-hub://')) {
147
+ const bareId = modelName.replace(/^jumpstart(-hub)?:\/\//, '');
148
+ console.error(`\n ⚠️ JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`);
149
+ console.error(' JumpStart model sources have been removed. Use one of:');
150
+ console.error(' • HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)');
151
+ console.error(' • s3://bucket/path/model.tar.gz');
152
+ console.error(' • registry://model-package-name');
153
+ console.error(' • marketplace://arn:aws:sagemaker:...\n');
154
+ process.exit(1);
155
+ }
145
156
  if (modelName.startsWith('s3://')) {
146
157
  answers.modelSource = 's3';
147
158
  if (!answers.artifactUri) {
148
159
  answers.artifactUri = modelName;
149
160
  }
150
- } else if (modelName.startsWith('jumpstart://')) {
151
- answers.modelSource = 'jumpstart';
152
- } else if (modelName.startsWith('jumpstart-hub://')) {
153
- answers.modelSource = 'jumpstart-hub';
154
161
  } else if (modelName.startsWith('registry://')) {
155
162
  answers.modelSource = 'registry';
156
163
  }
157
164
  }
158
165
 
159
- // Warn about unsupported model sources
160
- if (answers.modelSource === 'jumpstart-hub') {
161
- console.log('\n ⚠️ JumpStart Private Hub models are not yet fully supported.');
162
- console.log(' The generated project will not be able to download model artifacts at runtime.');
163
- console.log(' This feature is tracked for a future release.');
164
- console.log(' Falling back to HuggingFace source.\n');
165
- answers.modelSource = 'huggingface';
166
- delete answers.artifactUri;
167
- }
168
-
169
166
  // Note about registry model requirements
170
167
  if (answers.modelSource === 'registry') {
171
168
  console.log('\n ℹ️ Registry model: the container will resolve the artifact URI at startup');
@@ -352,20 +349,82 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
352
349
  }
353
350
 
354
351
  // Exclude tune files when framework is NOT transformers OR deploymentTarget is batch-transform
355
- if (architecture !== 'transformers' || answers.deploymentTarget === 'batch-transform') {
352
+ const tuneIncluded = architecture === 'transformers' && answers.deploymentTarget !== 'batch-transform';
353
+ if (!tuneIncluded) {
356
354
  ignorePatterns.push('**/do/tune');
357
355
  ignorePatterns.push('**/do/.tune_helper.py');
358
356
  }
359
357
 
358
+ // Exclude train files when deploymentTarget is batch-transform
359
+ const trainIncluded = answers.deploymentTarget !== 'batch-transform';
360
+ if (!trainIncluded) {
361
+ ignorePatterns.push('**/do/train');
362
+ ignorePatterns.push('**/do/.train_build_request.py');
363
+ ignorePatterns.push('**/do/.train_status_parser.py');
364
+ ignorePatterns.push('**/do/.train_poll_parser.py');
365
+ ignorePatterns.push('**/do/training/**');
366
+ }
367
+
368
+ // Exclude feedback.sh when neither tune nor train is included
369
+ if (!tuneIncluded && !trainIncluded) {
370
+ ignorePatterns.push('**/do/lib/feedback.sh');
371
+ }
372
+
360
373
  // Exclude do/test when hosted-model-endpoint is not selected
361
374
  const testTypes = answers.testTypes || [];
362
375
  if (!testTypes.includes('hosted-model-endpoint')) {
363
376
  ignorePatterns.push('**/do/test');
364
377
  }
365
378
 
366
- // Always exclude triton and diffusors source directories
367
- ignorePatterns.push('**/triton/**');
368
- ignorePatterns.push('**/diffusors/**');
379
+ // Marketplace projects: exclude everything container-related
380
+ if (architecture === 'marketplace') {
381
+ ignorePatterns.push('**/Dockerfile');
382
+ ignorePatterns.push('**/code/**');
383
+ ignorePatterns.push('**/do/build');
384
+ ignorePatterns.push('**/do/push');
385
+ ignorePatterns.push('**/do/submit');
386
+ ignorePatterns.push('**/do/adapter');
387
+ ignorePatterns.push('**/do/adapters/**');
388
+ ignorePatterns.push('**/do/tune');
389
+ ignorePatterns.push('**/do/.tune_helper.py');
390
+ ignorePatterns.push('**/do/train');
391
+ ignorePatterns.push('**/do/.train_build_request.py');
392
+ ignorePatterns.push('**/do/.train_status_parser.py');
393
+ ignorePatterns.push('**/do/.train_poll_parser.py');
394
+ ignorePatterns.push('**/do/training/**');
395
+ ignorePatterns.push('**/do/add-ic');
396
+ ignorePatterns.push('**/do/run');
397
+ ignorePatterns.push('**/sample_model/**');
398
+ ignorePatterns.push('**/requirements.txt');
399
+ ignorePatterns.push('**/nginx-*.conf');
400
+ ignorePatterns.push('**/triton/**');
401
+ ignorePatterns.push('**/diffusors/**');
402
+ ignorePatterns.push('**/hyperpod/**');
403
+ ignorePatterns.push('**/MIGRATION.md');
404
+ ignorePatterns.push('**/TEMPLATE_SYSTEM.md');
405
+ ignorePatterns.push('**/IAM_PERMISSIONS.md');
406
+ ignorePatterns.push('**/PROJECT_README.md');
407
+ ignorePatterns.push('**/deploy_notebook_generator.py');
408
+ ignorePatterns.push('**/buildspec.yml');
409
+ ignorePatterns.push('**/test/**');
410
+ // Exclude templates that reference container-specific variables (framework, modelServer)
411
+ // Marketplace overlays its own config, deploy, and test templates
412
+ ignorePatterns.push('**/do/config');
413
+ ignorePatterns.push('**/do/deploy');
414
+ ignorePatterns.push('**/do/test');
415
+ ignorePatterns.push('**/do/README.md');
416
+ ignorePatterns.push('**/do/export');
417
+ ignorePatterns.push('**/do/validate');
418
+ ignorePatterns.push('**/do/ic/**');
419
+ }
420
+
421
+ // Always exclude architecture-specific source directories from main copy
422
+ // (they are overlaid separately for their respective architectures)
423
+ ignorePatterns.push('**/marketplace/**');
424
+ if (architecture !== 'marketplace') {
425
+ ignorePatterns.push('**/triton/**');
426
+ ignorePatterns.push('**/diffusors/**');
427
+ }
369
428
 
370
429
  // For triton and diffusors, exclude the default Dockerfile
371
430
  if (architecture === 'triton' || architecture === 'diffusors') {
@@ -431,6 +490,14 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
431
490
  _copyFile(path.join(templateDir, 'diffusors/patch_image_api.py'), path.join(destDir, 'code/patch_image_api.py'));
432
491
  break;
433
492
 
493
+ case 'marketplace':
494
+ // Marketplace projects: overlay marketplace-specific templates
495
+ // These replace the default do/config, do/deploy, and do/test with marketplace versions
496
+ _renderTemplate(path.join(templateDir, 'marketplace/config'), path.join(destDir, 'do/config'), templateVars);
497
+ _renderTemplate(path.join(templateDir, 'marketplace/deploy'), path.join(destDir, 'do/deploy'), templateVars);
498
+ _renderTemplate(path.join(templateDir, 'marketplace/test'), path.join(destDir, 'do/test'), templateVars);
499
+ break;
500
+
434
501
  default:
435
502
  // Fallback to HTTP behavior
436
503
  _unlinkIfExists(path.join(destDir, 'code/chat_template.jinja'));
@@ -450,7 +517,10 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
450
517
  }
451
518
 
452
519
  // Copy PROJECT_README.md as README.md (overwriting the template README)
453
- _renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
520
+ // Marketplace projects don't use the standard README (no container/framework info)
521
+ if (architecture !== 'marketplace') {
522
+ _renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
523
+ }
454
524
 
455
525
  // Copy do/lib/ Node.js modules (plain copy, no EJS)
456
526
  const doLibDir = path.join(destDir, 'do', 'lib');
@@ -491,7 +561,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
491
561
  */
492
562
  export async function postGenerate(destDir, answers, tritonBackends = {}) {
493
563
  // Set executable permissions on shell scripts
494
- _setExecutablePermissions(destDir);
564
+ _setExecutablePermissions(destDir, answers);
495
565
 
496
566
  // Run sample model training if requested
497
567
  const architecture = answers.architecture;
@@ -1092,8 +1162,25 @@ function _unlinkIfExists(filePath) {
1092
1162
  *
1093
1163
  * @param {string} destDir - Path to the generated project directory
1094
1164
  */
1095
- function _setExecutablePermissions(destDir) {
1096
- const shellScripts = [
1165
+ function _setExecutablePermissions(destDir, answers = {}) {
1166
+ const architecture = answers.architecture;
1167
+
1168
+ // Marketplace projects have a reduced set of scripts
1169
+ const marketplaceScripts = [
1170
+ 'do/config',
1171
+ 'do/deploy',
1172
+ 'do/test',
1173
+ 'do/logs',
1174
+ 'do/clean',
1175
+ 'do/register',
1176
+ 'do/ci',
1177
+ 'do/manifest',
1178
+ 'do/benchmark',
1179
+ 'do/optimize',
1180
+ 'do/status'
1181
+ ];
1182
+
1183
+ const defaultScripts = [
1097
1184
  'do/config',
1098
1185
  'do/build',
1099
1186
  'do/push',
@@ -1111,9 +1198,12 @@ function _setExecutablePermissions(destDir) {
1111
1198
  'do/status',
1112
1199
  'do/add-ic',
1113
1200
  'do/adapter',
1114
- 'do/tune'
1201
+ 'do/tune',
1202
+ 'do/train'
1115
1203
  ];
1116
1204
 
1205
+ const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
1206
+
1117
1207
  shellScripts.forEach(script => {
1118
1208
  const scriptPath = path.join(destDir, script);
1119
1209
  try {
@@ -178,7 +178,7 @@ CLI OPTIONS:
178
178
  --project-name=<name> Project name
179
179
  --project-dir=<dir> Output directory path
180
180
  --framework=<framework> ML framework (sklearn|xgboost|tensorflow|transformers)
181
- --model-name=<name> HuggingFace model name (for transformers framework)
181
+ --model-name=<name> Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)
182
182
  --model-server=<server> Model server (flask|fastapi|vllm|sglang|tensorrt-llm|lmi|djl)
183
183
  --model-format=<format> Model format (depends on framework)
184
184
  --include-sample Include sample model code