@aws/ml-container-creator 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-THIRD-PARTY +50760 -16218
- package/bin/cli.js +1 -1
- package/infra/ci-harness/buildspec.yml +4 -0
- package/package.json +3 -1
- package/servers/lib/catalogs/instances.json +52 -1275
- package/servers/lib/catalogs/model-servers.json +80 -0
- package/servers/lib/catalogs/models.json +0 -132
- package/servers/lib/catalogs/popular-diffusors.json +1 -110
- package/servers/model-picker/index.js +27 -16
- package/src/app.js +113 -23
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +39 -2
- package/src/lib/cross-cutting-checker.js +146 -33
- package/src/lib/deployment-config-resolver.js +10 -4
- package/src/lib/e2e-bootstrap.js +227 -0
- package/src/lib/e2e-catalog-validator.js +103 -0
- package/src/lib/e2e-quota-validator.js +135 -0
- package/src/lib/mcp-client.js +16 -1
- package/src/lib/mcp-command-handler.js +10 -2
- package/src/lib/prompt-runner.js +306 -24
- package/src/lib/prompts.js +9 -3
- package/src/lib/template-manager.js +10 -4
- package/src/lib/train-config-parser.js +136 -0
- package/src/lib/train-config-persistence.js +143 -0
- package/src/lib/train-config-validator.js +112 -0
- package/src/lib/train-feedback.js +46 -0
- package/src/lib/train-idempotency.js +97 -0
- package/src/lib/train-request-builder.js +120 -0
- package/src/lib/tune-catalog-validator.js +5 -5
- package/templates/code/serve +2 -2
- package/templates/code/serving.properties +2 -2
- package/templates/diffusors/serve +3 -3
- package/templates/do/.train_build_request.py +141 -0
- package/templates/do/.train_poll_parser.py +135 -0
- package/templates/do/.train_status_parser.py +187 -0
- package/templates/do/.tune_helper.py +2 -2
- package/templates/do/lib/feedback.sh +41 -0
- package/templates/do/register +8 -2
- package/templates/do/test +5 -5
- package/templates/do/train +786 -0
- package/templates/do/training/config.yaml +140 -0
- package/templates/do/training/train.py +463 -0
- package/templates/do/tune +2 -2
- package/templates/marketplace/config +118 -0
- package/templates/marketplace/deploy +890 -0
- package/templates/marketplace/test +453 -0
|
@@ -1,5 +1,85 @@
|
|
|
1
1
|
{
|
|
2
2
|
"vllm": [
|
|
3
|
+
{
|
|
4
|
+
"image": "vllm/vllm-openai:v0.20.2",
|
|
5
|
+
"tag": "v0.20.2",
|
|
6
|
+
"architecture": "amd64",
|
|
7
|
+
"created": "2026-05-10T00:00:00Z",
|
|
8
|
+
"labels": {
|
|
9
|
+
"cuda_version": "12.9",
|
|
10
|
+
"python_version": "3.12",
|
|
11
|
+
"framework_version": "0.20.2"
|
|
12
|
+
},
|
|
13
|
+
"registry": "dockerhub",
|
|
14
|
+
"repository": "vllm/vllm-openai",
|
|
15
|
+
"defaults": {
|
|
16
|
+
"envVars": {
|
|
17
|
+
"VLLM_TENSOR_PARALLEL_SIZE": "1",
|
|
18
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.9",
|
|
19
|
+
"VLLM_MAX_NUM_SEQS": "256",
|
|
20
|
+
"VLLM_MAX_MODEL_LEN": "4096",
|
|
21
|
+
"VLLM_ENABLE_PREFIX_CACHING": "true"
|
|
22
|
+
},
|
|
23
|
+
"inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
|
|
24
|
+
},
|
|
25
|
+
"accelerator": {
|
|
26
|
+
"type": "cuda",
|
|
27
|
+
"version": "12.9",
|
|
28
|
+
"versionRange": {
|
|
29
|
+
"min": "12.4",
|
|
30
|
+
"max": "12.9"
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"validationLevel": "community-validated",
|
|
34
|
+
"profiles": {
|
|
35
|
+
"low-latency": {
|
|
36
|
+
"displayName": "Low Latency",
|
|
37
|
+
"description": "Optimized for single-request latency with prefix caching",
|
|
38
|
+
"envVars": {
|
|
39
|
+
"VLLM_MAX_NUM_SEQS": "32",
|
|
40
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.85",
|
|
41
|
+
"VLLM_ENABLE_PREFIX_CACHING": "true"
|
|
42
|
+
},
|
|
43
|
+
"notes": "Prefix caching improves latency for repeated prompts"
|
|
44
|
+
},
|
|
45
|
+
"high-throughput": {
|
|
46
|
+
"displayName": "High Throughput",
|
|
47
|
+
"description": "Optimized for batch processing with continuous batching",
|
|
48
|
+
"envVars": {
|
|
49
|
+
"VLLM_MAX_NUM_SEQS": "512",
|
|
50
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.95",
|
|
51
|
+
"VLLM_MAX_MODEL_LEN": "2048",
|
|
52
|
+
"VLLM_ENABLE_PREFIX_CACHING": "false"
|
|
53
|
+
},
|
|
54
|
+
"notes": "Continuous batching maximizes GPU utilization"
|
|
55
|
+
},
|
|
56
|
+
"multi-gpu": {
|
|
57
|
+
"displayName": "Multi-GPU",
|
|
58
|
+
"description": "Tensor parallel across multiple GPUs for large models",
|
|
59
|
+
"envVars": {
|
|
60
|
+
"VLLM_TENSOR_PARALLEL_SIZE": "4",
|
|
61
|
+
"VLLM_GPU_MEMORY_UTILIZATION": "0.9",
|
|
62
|
+
"VLLM_MAX_NUM_SEQS": "256"
|
|
63
|
+
},
|
|
64
|
+
"notes": "Requires instance with 4+ GPUs. Set TENSOR_PARALLEL_SIZE to match GPU count"
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"notes": "vLLM 0.20.2 adds Gemma 4 support, CUDA 12.9, improved multi-GPU. Requires CUDA compat on drivers < 570.",
|
|
68
|
+
"supportedModelTypes": [
|
|
69
|
+
"gemma",
|
|
70
|
+
"gemma2",
|
|
71
|
+
"gemma3",
|
|
72
|
+
"llama",
|
|
73
|
+
"mistral",
|
|
74
|
+
"mixtral",
|
|
75
|
+
"qwen2",
|
|
76
|
+
"qwen3",
|
|
77
|
+
"qwen3_moe",
|
|
78
|
+
"deepseek_v3",
|
|
79
|
+
"phi3",
|
|
80
|
+
"command-r"
|
|
81
|
+
]
|
|
82
|
+
},
|
|
3
83
|
{
|
|
4
84
|
"image": "vllm/vllm-openai:v0.10.1",
|
|
5
85
|
"tag": "v0.10.1",
|
|
@@ -555,98 +555,6 @@
|
|
|
555
555
|
"text-generation"
|
|
556
556
|
]
|
|
557
557
|
},
|
|
558
|
-
"stabilityai/stable-diffusion-3.5-medium": {
|
|
559
|
-
"family": "stable-diffusion-3",
|
|
560
|
-
"gated": false,
|
|
561
|
-
"tags": [
|
|
562
|
-
"image-generation",
|
|
563
|
-
"diffusion",
|
|
564
|
-
"stable-diffusion"
|
|
565
|
-
],
|
|
566
|
-
"architecture": "StableDiffusion3Pipeline",
|
|
567
|
-
"profiles": {
|
|
568
|
-
"default": {
|
|
569
|
-
"displayName": "SD3.5 Medium",
|
|
570
|
-
"envVars": {}
|
|
571
|
-
}
|
|
572
|
-
},
|
|
573
|
-
"notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline.",
|
|
574
|
-
"chatTemplate": null,
|
|
575
|
-
"frameworkCompatibility": {
|
|
576
|
-
"vllm-omni": ">=0.14.0"
|
|
577
|
-
},
|
|
578
|
-
"validationLevel": "experimental",
|
|
579
|
-
"modelType": "diffusor",
|
|
580
|
-
"tasks": [
|
|
581
|
-
"text-to-image"
|
|
582
|
-
]
|
|
583
|
-
},
|
|
584
|
-
"black-forest-labs/FLUX.1-dev": {
|
|
585
|
-
"family": "flux",
|
|
586
|
-
"gated": true,
|
|
587
|
-
"tags": [
|
|
588
|
-
"image-generation",
|
|
589
|
-
"diffusion",
|
|
590
|
-
"flux"
|
|
591
|
-
],
|
|
592
|
-
"architecture": "FluxPipeline",
|
|
593
|
-
"profiles": {
|
|
594
|
-
"default": {
|
|
595
|
-
"displayName": "FLUX.1 Dev",
|
|
596
|
-
"envVars": {}
|
|
597
|
-
}
|
|
598
|
-
},
|
|
599
|
-
"notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM.",
|
|
600
|
-
"chatTemplate": null,
|
|
601
|
-
"frameworkCompatibility": {
|
|
602
|
-
"vllm-omni": ">=0.14.0"
|
|
603
|
-
},
|
|
604
|
-
"validationLevel": "experimental",
|
|
605
|
-
"modelType": "diffusor",
|
|
606
|
-
"tasks": [
|
|
607
|
-
"text-to-image"
|
|
608
|
-
]
|
|
609
|
-
},
|
|
610
|
-
"black-forest-labs/FLUX.1-schnell": {
|
|
611
|
-
"family": "flux",
|
|
612
|
-
"gated": false,
|
|
613
|
-
"tags": [
|
|
614
|
-
"image-generation",
|
|
615
|
-
"diffusion",
|
|
616
|
-
"flux"
|
|
617
|
-
],
|
|
618
|
-
"architecture": "FluxPipeline",
|
|
619
|
-
"notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality",
|
|
620
|
-
"chatTemplate": null,
|
|
621
|
-
"frameworkCompatibility": {
|
|
622
|
-
"vllm-omni": ">=0.14.0"
|
|
623
|
-
},
|
|
624
|
-
"validationLevel": "experimental",
|
|
625
|
-
"modelType": "diffusor",
|
|
626
|
-
"tasks": [
|
|
627
|
-
"text-to-image"
|
|
628
|
-
]
|
|
629
|
-
},
|
|
630
|
-
"Wan-AI/Wan2.1-T2V-14B-Diffusers": {
|
|
631
|
-
"family": "wan",
|
|
632
|
-
"gated": false,
|
|
633
|
-
"tags": [
|
|
634
|
-
"video-generation",
|
|
635
|
-
"diffusion",
|
|
636
|
-
"wan"
|
|
637
|
-
],
|
|
638
|
-
"architecture": "WanPipeline",
|
|
639
|
-
"notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni",
|
|
640
|
-
"chatTemplate": null,
|
|
641
|
-
"frameworkCompatibility": {
|
|
642
|
-
"vllm-omni": ">=0.16.0"
|
|
643
|
-
},
|
|
644
|
-
"validationLevel": "experimental",
|
|
645
|
-
"modelType": "diffusor",
|
|
646
|
-
"tasks": [
|
|
647
|
-
"text-to-video"
|
|
648
|
-
]
|
|
649
|
-
},
|
|
650
558
|
"meta-llama/Llama-3*": {
|
|
651
559
|
"family": "llama-3",
|
|
652
560
|
"gated": true,
|
|
@@ -731,45 +639,5 @@
|
|
|
731
639
|
"tasks": [
|
|
732
640
|
"text-generation"
|
|
733
641
|
]
|
|
734
|
-
},
|
|
735
|
-
"stabilityai/stable-diffusion-*": {
|
|
736
|
-
"family": "stable-diffusion",
|
|
737
|
-
"gated": false,
|
|
738
|
-
"tags": [
|
|
739
|
-
"image-generation",
|
|
740
|
-
"diffusion",
|
|
741
|
-
"stable-diffusion"
|
|
742
|
-
],
|
|
743
|
-
"architecture": null,
|
|
744
|
-
"notes": "Fallback for Stable Diffusion variants not explicitly listed",
|
|
745
|
-
"chatTemplate": null,
|
|
746
|
-
"frameworkCompatibility": {
|
|
747
|
-
"vllm-omni": ">=0.14.0"
|
|
748
|
-
},
|
|
749
|
-
"validationLevel": "experimental",
|
|
750
|
-
"modelType": "diffusor",
|
|
751
|
-
"tasks": [
|
|
752
|
-
"text-to-image"
|
|
753
|
-
]
|
|
754
|
-
},
|
|
755
|
-
"black-forest-labs/FLUX*": {
|
|
756
|
-
"family": "flux",
|
|
757
|
-
"gated": false,
|
|
758
|
-
"tags": [
|
|
759
|
-
"image-generation",
|
|
760
|
-
"diffusion",
|
|
761
|
-
"flux"
|
|
762
|
-
],
|
|
763
|
-
"architecture": null,
|
|
764
|
-
"notes": "Fallback for FLUX model variants not explicitly listed",
|
|
765
|
-
"chatTemplate": null,
|
|
766
|
-
"frameworkCompatibility": {
|
|
767
|
-
"vllm-omni": ">=0.14.0"
|
|
768
|
-
},
|
|
769
|
-
"validationLevel": "experimental",
|
|
770
|
-
"modelType": "diffusor",
|
|
771
|
-
"tasks": [
|
|
772
|
-
"text-to-image"
|
|
773
|
-
]
|
|
774
642
|
}
|
|
775
643
|
}
|
|
@@ -1,110 +1 @@
|
|
|
1
|
-
{
|
|
2
|
-
"stabilityai/stable-diffusion-3.5-medium": {
|
|
3
|
-
"family": "stable-diffusion-3",
|
|
4
|
-
"chat_template": null,
|
|
5
|
-
"gated": false,
|
|
6
|
-
"tags": [
|
|
7
|
-
"image-generation",
|
|
8
|
-
"diffusion",
|
|
9
|
-
"stable-diffusion"
|
|
10
|
-
],
|
|
11
|
-
"architecture": "StableDiffusion3Pipeline",
|
|
12
|
-
"framework_compatibility": {
|
|
13
|
-
"vllm-omni": ">=0.14.0"
|
|
14
|
-
},
|
|
15
|
-
"validation_level": "experimental",
|
|
16
|
-
"profiles": {
|
|
17
|
-
"default": {
|
|
18
|
-
"displayName": "SD3.5 Medium",
|
|
19
|
-
"envVars": {}
|
|
20
|
-
}
|
|
21
|
-
},
|
|
22
|
-
"notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
|
|
23
|
-
},
|
|
24
|
-
"black-forest-labs/FLUX.1-dev": {
|
|
25
|
-
"family": "flux",
|
|
26
|
-
"chat_template": null,
|
|
27
|
-
"gated": true,
|
|
28
|
-
"tags": [
|
|
29
|
-
"image-generation",
|
|
30
|
-
"diffusion",
|
|
31
|
-
"flux"
|
|
32
|
-
],
|
|
33
|
-
"architecture": "FluxPipeline",
|
|
34
|
-
"framework_compatibility": {
|
|
35
|
-
"vllm-omni": ">=0.14.0"
|
|
36
|
-
},
|
|
37
|
-
"validation_level": "experimental",
|
|
38
|
-
"profiles": {
|
|
39
|
-
"default": {
|
|
40
|
-
"displayName": "FLUX.1 Dev",
|
|
41
|
-
"envVars": {}
|
|
42
|
-
}
|
|
43
|
-
},
|
|
44
|
-
"notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
|
|
45
|
-
},
|
|
46
|
-
"black-forest-labs/FLUX.1-schnell": {
|
|
47
|
-
"family": "flux",
|
|
48
|
-
"chat_template": null,
|
|
49
|
-
"gated": false,
|
|
50
|
-
"tags": [
|
|
51
|
-
"image-generation",
|
|
52
|
-
"diffusion",
|
|
53
|
-
"flux"
|
|
54
|
-
],
|
|
55
|
-
"architecture": "FluxPipeline",
|
|
56
|
-
"framework_compatibility": {
|
|
57
|
-
"vllm-omni": ">=0.14.0"
|
|
58
|
-
},
|
|
59
|
-
"validation_level": "experimental",
|
|
60
|
-
"notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality"
|
|
61
|
-
},
|
|
62
|
-
"Wan-AI/Wan2.1-T2V-14B-Diffusers": {
|
|
63
|
-
"family": "wan",
|
|
64
|
-
"chat_template": null,
|
|
65
|
-
"gated": false,
|
|
66
|
-
"tags": [
|
|
67
|
-
"video-generation",
|
|
68
|
-
"diffusion",
|
|
69
|
-
"wan"
|
|
70
|
-
],
|
|
71
|
-
"architecture": "WanPipeline",
|
|
72
|
-
"framework_compatibility": {
|
|
73
|
-
"vllm-omni": ">=0.16.0"
|
|
74
|
-
},
|
|
75
|
-
"validation_level": "experimental",
|
|
76
|
-
"notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni"
|
|
77
|
-
},
|
|
78
|
-
"stabilityai/stable-diffusion-*": {
|
|
79
|
-
"family": "stable-diffusion",
|
|
80
|
-
"chat_template": null,
|
|
81
|
-
"gated": false,
|
|
82
|
-
"tags": [
|
|
83
|
-
"image-generation",
|
|
84
|
-
"diffusion",
|
|
85
|
-
"stable-diffusion"
|
|
86
|
-
],
|
|
87
|
-
"architecture": null,
|
|
88
|
-
"framework_compatibility": {
|
|
89
|
-
"vllm-omni": ">=0.14.0"
|
|
90
|
-
},
|
|
91
|
-
"validation_level": "experimental",
|
|
92
|
-
"notes": "Fallback for Stable Diffusion variants not explicitly listed"
|
|
93
|
-
},
|
|
94
|
-
"black-forest-labs/FLUX*": {
|
|
95
|
-
"family": "flux",
|
|
96
|
-
"chat_template": null,
|
|
97
|
-
"gated": false,
|
|
98
|
-
"tags": [
|
|
99
|
-
"image-generation",
|
|
100
|
-
"diffusion",
|
|
101
|
-
"flux"
|
|
102
|
-
],
|
|
103
|
-
"architecture": null,
|
|
104
|
-
"framework_compatibility": {
|
|
105
|
-
"vllm-omni": ">=0.14.0"
|
|
106
|
-
},
|
|
107
|
-
"validation_level": "experimental",
|
|
108
|
-
"notes": "Fallback for FLUX model variants not explicitly listed"
|
|
109
|
-
}
|
|
110
|
-
}
|
|
1
|
+
{}
|
|
@@ -1531,18 +1531,25 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1531
1531
|
let values = {}
|
|
1532
1532
|
let message = null
|
|
1533
1533
|
|
|
1534
|
+
// Reject deprecated JumpStart prefixes
|
|
1535
|
+
if (model_id.startsWith('jumpstart://') || model_id.startsWith('jumpstart-hub://')) {
|
|
1536
|
+
const bareId = model_id.replace(/^jumpstart(-hub)?:\/\//, '')
|
|
1537
|
+
message = `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`
|
|
1538
|
+
return {
|
|
1539
|
+
content: [{
|
|
1540
|
+
type: 'text',
|
|
1541
|
+
text: JSON.stringify({ values: {}, choices: {}, message })
|
|
1542
|
+
}]
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1534
1546
|
if (mode === 'static') {
|
|
1535
1547
|
// Static mode: use StaticCatalogResolver only
|
|
1536
|
-
// For jumpstart:// prefixed IDs, resolve from JumpStart static catalog
|
|
1537
1548
|
const metadata = await staticResolver.fetchModelMetadata(model_id, { fields })
|
|
1538
1549
|
if (metadata) {
|
|
1539
1550
|
values = { ...metadata }
|
|
1540
1551
|
} else {
|
|
1541
|
-
|
|
1542
|
-
message = `Model not found in JumpStart static catalog: ${model_id}`
|
|
1543
|
-
} else {
|
|
1544
|
-
message = `Model not found in static catalog: ${model_id}`
|
|
1545
|
-
}
|
|
1552
|
+
message = `Model not found in static catalog: ${model_id}`
|
|
1546
1553
|
}
|
|
1547
1554
|
} else {
|
|
1548
1555
|
// Discover mode: use ResolverRegistry for live data, merge with static
|
|
@@ -1564,11 +1571,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1564
1571
|
values = { ...merged }
|
|
1565
1572
|
// If the resolver failed but we got data from static catalog, note the fallback
|
|
1566
1573
|
if (resolverFailed && !liveData && staticData) {
|
|
1567
|
-
if (model_id.startsWith('
|
|
1568
|
-
message = '[jumpstart] SageMaker API unreachable. Using static catalog fallback.'
|
|
1569
|
-
} else if (model_id.startsWith('jumpstart-hub://')) {
|
|
1570
|
-
message = '[jumpstart-hub] SageMaker API unreachable. Using static catalog fallback.'
|
|
1571
|
-
} else if (model_id.startsWith('registry://')) {
|
|
1574
|
+
if (model_id.startsWith('registry://')) {
|
|
1572
1575
|
message = '[registry] SageMaker API unreachable. Using static catalog fallback.'
|
|
1573
1576
|
} else if (model_id.startsWith('s3://')) {
|
|
1574
1577
|
message = '[s3] S3 API unreachable. Using static catalog fallback.'
|
|
@@ -1577,11 +1580,7 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1577
1580
|
} else {
|
|
1578
1581
|
// No data from either source
|
|
1579
1582
|
if (resolverFailed) {
|
|
1580
|
-
if (model_id.startsWith('
|
|
1581
|
-
message = `[jumpstart] Resolver could not fetch data for: ${model_id}`
|
|
1582
|
-
} else if (model_id.startsWith('jumpstart-hub://')) {
|
|
1583
|
-
message = `[jumpstart-hub] Resolver could not fetch data for: ${model_id}`
|
|
1584
|
-
} else if (model_id.startsWith('registry://')) {
|
|
1583
|
+
if (model_id.startsWith('registry://')) {
|
|
1585
1584
|
message = `[registry] Resolver could not fetch data for: ${model_id}`
|
|
1586
1585
|
} else if (model_id.startsWith('s3://')) {
|
|
1587
1586
|
message = `[s3] Resolver could not fetch data for: ${model_id}`
|
|
@@ -1613,6 +1612,18 @@ async function resolveModel({ model_id, fields, mode = 'discover', context }) {
|
|
|
1613
1612
|
values = filtered
|
|
1614
1613
|
}
|
|
1615
1614
|
|
|
1615
|
+
// Exclude jumpstart:// prefixed results from output
|
|
1616
|
+
const resolvedModelId = values.modelId || model_id
|
|
1617
|
+
if (resolvedModelId.startsWith('jumpstart://') || resolvedModelId.startsWith('jumpstart-hub://')) {
|
|
1618
|
+
const bareId = resolvedModelId.replace(/^jumpstart(-hub)?:\/\//, '')
|
|
1619
|
+
return {
|
|
1620
|
+
content: [{
|
|
1621
|
+
type: 'text',
|
|
1622
|
+
text: JSON.stringify({ values: {}, choices: {}, message: `JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}` })
|
|
1623
|
+
}]
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1616
1627
|
// Build choices with provider prefix labels
|
|
1617
1628
|
const choices = {}
|
|
1618
1629
|
if (Object.keys(values).length > 0) {
|
package/src/app.js
CHANGED
|
@@ -142,30 +142,27 @@ export async function run(projectName, options) {
|
|
|
142
142
|
// Infer modelSource from model name prefix if not set
|
|
143
143
|
const modelName = answers.modelName;
|
|
144
144
|
if (!answers.modelSource && modelName) {
|
|
145
|
+
// Reject deprecated JumpStart prefixes with migration message
|
|
146
|
+
if (modelName.startsWith('jumpstart://') || modelName.startsWith('jumpstart-hub://')) {
|
|
147
|
+
const bareId = modelName.replace(/^jumpstart(-hub)?:\/\//, '');
|
|
148
|
+
console.error(`\n ⚠️ JumpStart is no longer supported. Use the HuggingFace model ID directly: ${bareId}`);
|
|
149
|
+
console.error(' JumpStart model sources have been removed. Use one of:');
|
|
150
|
+
console.error(' • HuggingFace model ID (e.g., meta-llama/Llama-2-7b-hf)');
|
|
151
|
+
console.error(' • s3://bucket/path/model.tar.gz');
|
|
152
|
+
console.error(' • registry://model-package-name');
|
|
153
|
+
console.error(' • marketplace://arn:aws:sagemaker:...\n');
|
|
154
|
+
process.exit(1);
|
|
155
|
+
}
|
|
145
156
|
if (modelName.startsWith('s3://')) {
|
|
146
157
|
answers.modelSource = 's3';
|
|
147
158
|
if (!answers.artifactUri) {
|
|
148
159
|
answers.artifactUri = modelName;
|
|
149
160
|
}
|
|
150
|
-
} else if (modelName.startsWith('jumpstart://')) {
|
|
151
|
-
answers.modelSource = 'jumpstart';
|
|
152
|
-
} else if (modelName.startsWith('jumpstart-hub://')) {
|
|
153
|
-
answers.modelSource = 'jumpstart-hub';
|
|
154
161
|
} else if (modelName.startsWith('registry://')) {
|
|
155
162
|
answers.modelSource = 'registry';
|
|
156
163
|
}
|
|
157
164
|
}
|
|
158
165
|
|
|
159
|
-
// Warn about unsupported model sources
|
|
160
|
-
if (answers.modelSource === 'jumpstart-hub') {
|
|
161
|
-
console.log('\n ⚠️ JumpStart Private Hub models are not yet fully supported.');
|
|
162
|
-
console.log(' The generated project will not be able to download model artifacts at runtime.');
|
|
163
|
-
console.log(' This feature is tracked for a future release.');
|
|
164
|
-
console.log(' Falling back to HuggingFace source.\n');
|
|
165
|
-
answers.modelSource = 'huggingface';
|
|
166
|
-
delete answers.artifactUri;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
166
|
// Note about registry model requirements
|
|
170
167
|
if (answers.modelSource === 'registry') {
|
|
171
168
|
console.log('\n ℹ️ Registry model: the container will resolve the artifact URI at startup');
|
|
@@ -352,20 +349,82 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
352
349
|
}
|
|
353
350
|
|
|
354
351
|
// Exclude tune files when framework is NOT transformers OR deploymentTarget is batch-transform
|
|
355
|
-
|
|
352
|
+
const tuneIncluded = architecture === 'transformers' && answers.deploymentTarget !== 'batch-transform';
|
|
353
|
+
if (!tuneIncluded) {
|
|
356
354
|
ignorePatterns.push('**/do/tune');
|
|
357
355
|
ignorePatterns.push('**/do/.tune_helper.py');
|
|
358
356
|
}
|
|
359
357
|
|
|
358
|
+
// Exclude train files when deploymentTarget is batch-transform
|
|
359
|
+
const trainIncluded = answers.deploymentTarget !== 'batch-transform';
|
|
360
|
+
if (!trainIncluded) {
|
|
361
|
+
ignorePatterns.push('**/do/train');
|
|
362
|
+
ignorePatterns.push('**/do/.train_build_request.py');
|
|
363
|
+
ignorePatterns.push('**/do/.train_status_parser.py');
|
|
364
|
+
ignorePatterns.push('**/do/.train_poll_parser.py');
|
|
365
|
+
ignorePatterns.push('**/do/training/**');
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// Exclude feedback.sh when neither tune nor train is included
|
|
369
|
+
if (!tuneIncluded && !trainIncluded) {
|
|
370
|
+
ignorePatterns.push('**/do/lib/feedback.sh');
|
|
371
|
+
}
|
|
372
|
+
|
|
360
373
|
// Exclude do/test when hosted-model-endpoint is not selected
|
|
361
374
|
const testTypes = answers.testTypes || [];
|
|
362
375
|
if (!testTypes.includes('hosted-model-endpoint')) {
|
|
363
376
|
ignorePatterns.push('**/do/test');
|
|
364
377
|
}
|
|
365
378
|
|
|
366
|
-
//
|
|
367
|
-
|
|
368
|
-
|
|
379
|
+
// Marketplace projects: exclude everything container-related
|
|
380
|
+
if (architecture === 'marketplace') {
|
|
381
|
+
ignorePatterns.push('**/Dockerfile');
|
|
382
|
+
ignorePatterns.push('**/code/**');
|
|
383
|
+
ignorePatterns.push('**/do/build');
|
|
384
|
+
ignorePatterns.push('**/do/push');
|
|
385
|
+
ignorePatterns.push('**/do/submit');
|
|
386
|
+
ignorePatterns.push('**/do/adapter');
|
|
387
|
+
ignorePatterns.push('**/do/adapters/**');
|
|
388
|
+
ignorePatterns.push('**/do/tune');
|
|
389
|
+
ignorePatterns.push('**/do/.tune_helper.py');
|
|
390
|
+
ignorePatterns.push('**/do/train');
|
|
391
|
+
ignorePatterns.push('**/do/.train_build_request.py');
|
|
392
|
+
ignorePatterns.push('**/do/.train_status_parser.py');
|
|
393
|
+
ignorePatterns.push('**/do/.train_poll_parser.py');
|
|
394
|
+
ignorePatterns.push('**/do/training/**');
|
|
395
|
+
ignorePatterns.push('**/do/add-ic');
|
|
396
|
+
ignorePatterns.push('**/do/run');
|
|
397
|
+
ignorePatterns.push('**/sample_model/**');
|
|
398
|
+
ignorePatterns.push('**/requirements.txt');
|
|
399
|
+
ignorePatterns.push('**/nginx-*.conf');
|
|
400
|
+
ignorePatterns.push('**/triton/**');
|
|
401
|
+
ignorePatterns.push('**/diffusors/**');
|
|
402
|
+
ignorePatterns.push('**/hyperpod/**');
|
|
403
|
+
ignorePatterns.push('**/MIGRATION.md');
|
|
404
|
+
ignorePatterns.push('**/TEMPLATE_SYSTEM.md');
|
|
405
|
+
ignorePatterns.push('**/IAM_PERMISSIONS.md');
|
|
406
|
+
ignorePatterns.push('**/PROJECT_README.md');
|
|
407
|
+
ignorePatterns.push('**/deploy_notebook_generator.py');
|
|
408
|
+
ignorePatterns.push('**/buildspec.yml');
|
|
409
|
+
ignorePatterns.push('**/test/**');
|
|
410
|
+
// Exclude templates that reference container-specific variables (framework, modelServer)
|
|
411
|
+
// Marketplace overlays its own config, deploy, and test templates
|
|
412
|
+
ignorePatterns.push('**/do/config');
|
|
413
|
+
ignorePatterns.push('**/do/deploy');
|
|
414
|
+
ignorePatterns.push('**/do/test');
|
|
415
|
+
ignorePatterns.push('**/do/README.md');
|
|
416
|
+
ignorePatterns.push('**/do/export');
|
|
417
|
+
ignorePatterns.push('**/do/validate');
|
|
418
|
+
ignorePatterns.push('**/do/ic/**');
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Always exclude architecture-specific source directories from main copy
|
|
422
|
+
// (they are overlaid separately for their respective architectures)
|
|
423
|
+
ignorePatterns.push('**/marketplace/**');
|
|
424
|
+
if (architecture !== 'marketplace') {
|
|
425
|
+
ignorePatterns.push('**/triton/**');
|
|
426
|
+
ignorePatterns.push('**/diffusors/**');
|
|
427
|
+
}
|
|
369
428
|
|
|
370
429
|
// For triton and diffusors, exclude the default Dockerfile
|
|
371
430
|
if (architecture === 'triton' || architecture === 'diffusors') {
|
|
@@ -431,6 +490,14 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
431
490
|
_copyFile(path.join(templateDir, 'diffusors/patch_image_api.py'), path.join(destDir, 'code/patch_image_api.py'));
|
|
432
491
|
break;
|
|
433
492
|
|
|
493
|
+
case 'marketplace':
|
|
494
|
+
// Marketplace projects: overlay marketplace-specific templates
|
|
495
|
+
// These replace the default do/config, do/deploy, and do/test with marketplace versions
|
|
496
|
+
_renderTemplate(path.join(templateDir, 'marketplace/config'), path.join(destDir, 'do/config'), templateVars);
|
|
497
|
+
_renderTemplate(path.join(templateDir, 'marketplace/deploy'), path.join(destDir, 'do/deploy'), templateVars);
|
|
498
|
+
_renderTemplate(path.join(templateDir, 'marketplace/test'), path.join(destDir, 'do/test'), templateVars);
|
|
499
|
+
break;
|
|
500
|
+
|
|
434
501
|
default:
|
|
435
502
|
// Fallback to HTTP behavior
|
|
436
503
|
_unlinkIfExists(path.join(destDir, 'code/chat_template.jinja'));
|
|
@@ -450,7 +517,10 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
450
517
|
}
|
|
451
518
|
|
|
452
519
|
// Copy PROJECT_README.md as README.md (overwriting the template README)
|
|
453
|
-
|
|
520
|
+
// Marketplace projects don't use the standard README (no container/framework info)
|
|
521
|
+
if (architecture !== 'marketplace') {
|
|
522
|
+
_renderTemplate(path.join(templateDir, 'PROJECT_README.md'), path.join(destDir, 'README.md'), templateVars);
|
|
523
|
+
}
|
|
454
524
|
|
|
455
525
|
// Copy do/lib/ Node.js modules (plain copy, no EJS)
|
|
456
526
|
const doLibDir = path.join(destDir, 'do', 'lib');
|
|
@@ -491,7 +561,7 @@ export async function writeProject(templateDir, destDir, answers, registryConfig
|
|
|
491
561
|
*/
|
|
492
562
|
export async function postGenerate(destDir, answers, tritonBackends = {}) {
|
|
493
563
|
// Set executable permissions on shell scripts
|
|
494
|
-
_setExecutablePermissions(destDir);
|
|
564
|
+
_setExecutablePermissions(destDir, answers);
|
|
495
565
|
|
|
496
566
|
// Run sample model training if requested
|
|
497
567
|
const architecture = answers.architecture;
|
|
@@ -1092,8 +1162,25 @@ function _unlinkIfExists(filePath) {
|
|
|
1092
1162
|
*
|
|
1093
1163
|
* @param {string} destDir - Path to the generated project directory
|
|
1094
1164
|
*/
|
|
1095
|
-
function _setExecutablePermissions(destDir) {
|
|
1096
|
-
const
|
|
1165
|
+
function _setExecutablePermissions(destDir, answers = {}) {
|
|
1166
|
+
const architecture = answers.architecture;
|
|
1167
|
+
|
|
1168
|
+
// Marketplace projects have a reduced set of scripts
|
|
1169
|
+
const marketplaceScripts = [
|
|
1170
|
+
'do/config',
|
|
1171
|
+
'do/deploy',
|
|
1172
|
+
'do/test',
|
|
1173
|
+
'do/logs',
|
|
1174
|
+
'do/clean',
|
|
1175
|
+
'do/register',
|
|
1176
|
+
'do/ci',
|
|
1177
|
+
'do/manifest',
|
|
1178
|
+
'do/benchmark',
|
|
1179
|
+
'do/optimize',
|
|
1180
|
+
'do/status'
|
|
1181
|
+
];
|
|
1182
|
+
|
|
1183
|
+
const defaultScripts = [
|
|
1097
1184
|
'do/config',
|
|
1098
1185
|
'do/build',
|
|
1099
1186
|
'do/push',
|
|
@@ -1111,9 +1198,12 @@ function _setExecutablePermissions(destDir) {
|
|
|
1111
1198
|
'do/status',
|
|
1112
1199
|
'do/add-ic',
|
|
1113
1200
|
'do/adapter',
|
|
1114
|
-
'do/tune'
|
|
1201
|
+
'do/tune',
|
|
1202
|
+
'do/train'
|
|
1115
1203
|
];
|
|
1116
1204
|
|
|
1205
|
+
const shellScripts = architecture === 'marketplace' ? marketplaceScripts : defaultScripts;
|
|
1206
|
+
|
|
1117
1207
|
shellScripts.forEach(script => {
|
|
1118
1208
|
const scriptPath = path.join(destDir, script);
|
|
1119
1209
|
try {
|
package/src/lib/cli-handler.js
CHANGED
|
@@ -178,7 +178,7 @@ CLI OPTIONS:
|
|
|
178
178
|
--project-name=<name> Project name
|
|
179
179
|
--project-dir=<dir> Output directory path
|
|
180
180
|
--framework=<framework> ML framework (sklearn|xgboost|tensorflow|transformers)
|
|
181
|
-
--model-name=<name>
|
|
181
|
+
--model-name=<name> Model identifier (<hf-org/model>, s3://..., registry://..., marketplace://...)
|
|
182
182
|
--model-server=<server> Model server (flask|fastapi|vllm|sglang|tensorrt-llm|lmi|djl)
|
|
183
183
|
--model-format=<format> Model format (depends on framework)
|
|
184
184
|
--include-sample Include sample model code
|