@aws/ml-container-creator 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/LICENSE-THIRD-PARTY +68620 -0
- package/NOTICE +2 -0
- package/README.md +106 -0
- package/bin/cli.js +365 -0
- package/config/defaults.json +32 -0
- package/config/presets/transformers-djl.json +26 -0
- package/config/presets/transformers-gpu.json +24 -0
- package/config/presets/transformers-lmi.json +27 -0
- package/package.json +129 -0
- package/servers/README.md +419 -0
- package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
- package/servers/base-image-picker/catalogs/python-slim.json +38 -0
- package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
- package/servers/base-image-picker/catalogs/triton.json +38 -0
- package/servers/base-image-picker/index.js +495 -0
- package/servers/base-image-picker/manifest.json +17 -0
- package/servers/base-image-picker/package.json +15 -0
- package/servers/hyperpod-cluster-picker/LICENSE +202 -0
- package/servers/hyperpod-cluster-picker/index.js +424 -0
- package/servers/hyperpod-cluster-picker/manifest.json +14 -0
- package/servers/hyperpod-cluster-picker/package.json +17 -0
- package/servers/instance-recommender/LICENSE +202 -0
- package/servers/instance-recommender/catalogs/instances.json +852 -0
- package/servers/instance-recommender/index.js +284 -0
- package/servers/instance-recommender/manifest.json +16 -0
- package/servers/instance-recommender/package.json +15 -0
- package/servers/lib/LICENSE +202 -0
- package/servers/lib/bedrock-client.js +160 -0
- package/servers/lib/custom-validators.js +46 -0
- package/servers/lib/dynamic-resolver.js +36 -0
- package/servers/lib/package.json +11 -0
- package/servers/lib/schemas/image-catalog.schema.json +185 -0
- package/servers/lib/schemas/instances.schema.json +124 -0
- package/servers/lib/schemas/manifest.schema.json +64 -0
- package/servers/lib/schemas/model-catalog.schema.json +91 -0
- package/servers/lib/schemas/regions.schema.json +26 -0
- package/servers/lib/schemas/triton-backends.schema.json +51 -0
- package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
- package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
- package/servers/model-picker/catalogs/popular-transformers.json +226 -0
- package/servers/model-picker/index.js +1693 -0
- package/servers/model-picker/manifest.json +18 -0
- package/servers/model-picker/package.json +20 -0
- package/servers/region-picker/LICENSE +202 -0
- package/servers/region-picker/catalogs/regions.json +263 -0
- package/servers/region-picker/index.js +230 -0
- package/servers/region-picker/manifest.json +16 -0
- package/servers/region-picker/package.json +15 -0
- package/src/app.js +1007 -0
- package/src/copy-tpl.js +77 -0
- package/src/lib/accelerator-validator.js +39 -0
- package/src/lib/asset-manager.js +385 -0
- package/src/lib/aws-profile-parser.js +181 -0
- package/src/lib/bootstrap-command-handler.js +1647 -0
- package/src/lib/bootstrap-config.js +238 -0
- package/src/lib/ci-register-helpers.js +124 -0
- package/src/lib/ci-report-helpers.js +158 -0
- package/src/lib/ci-stage-helpers.js +268 -0
- package/src/lib/cli-handler.js +529 -0
- package/src/lib/comment-generator.js +544 -0
- package/src/lib/community-reports-validator.js +91 -0
- package/src/lib/config-manager.js +2106 -0
- package/src/lib/configuration-exporter.js +204 -0
- package/src/lib/configuration-manager.js +695 -0
- package/src/lib/configuration-matcher.js +221 -0
- package/src/lib/cpu-validator.js +36 -0
- package/src/lib/cuda-validator.js +57 -0
- package/src/lib/deployment-config-resolver.js +103 -0
- package/src/lib/deployment-entry-schema.js +125 -0
- package/src/lib/deployment-registry.js +598 -0
- package/src/lib/docker-introspection-validator.js +51 -0
- package/src/lib/engine-prefix-resolver.js +60 -0
- package/src/lib/huggingface-client.js +172 -0
- package/src/lib/key-value-parser.js +37 -0
- package/src/lib/known-flags-validator.js +200 -0
- package/src/lib/manifest-cli.js +280 -0
- package/src/lib/mcp-client.js +303 -0
- package/src/lib/mcp-command-handler.js +532 -0
- package/src/lib/neuron-validator.js +80 -0
- package/src/lib/parameter-schema-validator.js +284 -0
- package/src/lib/prompt-runner.js +1349 -0
- package/src/lib/prompts.js +1138 -0
- package/src/lib/registry-command-handler.js +519 -0
- package/src/lib/registry-loader.js +198 -0
- package/src/lib/rocm-validator.js +80 -0
- package/src/lib/schema-validator.js +157 -0
- package/src/lib/sensitive-redactor.js +59 -0
- package/src/lib/template-engine.js +156 -0
- package/src/lib/template-manager.js +341 -0
- package/src/lib/validation-engine.js +314 -0
- package/src/prompt-adapter.js +63 -0
- package/templates/Dockerfile +300 -0
- package/templates/IAM_PERMISSIONS.md +84 -0
- package/templates/MIGRATION.md +488 -0
- package/templates/PROJECT_README.md +439 -0
- package/templates/TEMPLATE_SYSTEM.md +243 -0
- package/templates/buildspec.yml +64 -0
- package/templates/code/chat_template.jinja +1 -0
- package/templates/code/flask/gunicorn_config.py +35 -0
- package/templates/code/flask/wsgi.py +10 -0
- package/templates/code/model_handler.py +387 -0
- package/templates/code/serve +300 -0
- package/templates/code/serve.py +175 -0
- package/templates/code/serving.properties +105 -0
- package/templates/code/start_server.py +39 -0
- package/templates/code/start_server.sh +39 -0
- package/templates/diffusors/Dockerfile +72 -0
- package/templates/diffusors/patch_image_api.py +35 -0
- package/templates/diffusors/serve +115 -0
- package/templates/diffusors/start_server.sh +114 -0
- package/templates/do/.gitkeep +1 -0
- package/templates/do/README.md +541 -0
- package/templates/do/build +83 -0
- package/templates/do/ci +681 -0
- package/templates/do/clean +811 -0
- package/templates/do/config +260 -0
- package/templates/do/deploy +1560 -0
- package/templates/do/export +306 -0
- package/templates/do/logs +319 -0
- package/templates/do/manifest +12 -0
- package/templates/do/push +119 -0
- package/templates/do/register +580 -0
- package/templates/do/run +113 -0
- package/templates/do/submit +417 -0
- package/templates/do/test +1147 -0
- package/templates/hyperpod/configmap.yaml +24 -0
- package/templates/hyperpod/deployment.yaml +71 -0
- package/templates/hyperpod/pvc.yaml +42 -0
- package/templates/hyperpod/service.yaml +17 -0
- package/templates/nginx-diffusors.conf +74 -0
- package/templates/nginx-predictors.conf +47 -0
- package/templates/nginx-tensorrt.conf +74 -0
- package/templates/requirements.txt +61 -0
- package/templates/sample_model/test_inference.py +123 -0
- package/templates/sample_model/train_abalone.py +252 -0
- package/templates/test/test_endpoint.sh +79 -0
- package/templates/test/test_local_image.sh +80 -0
- package/templates/test/test_model_handler.py +180 -0
- package/templates/triton/Dockerfile +128 -0
- package/templates/triton/config.pbtxt +163 -0
- package/templates/triton/model.py +130 -0
- package/templates/triton/requirements.txt +11 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
{
|
|
2
|
+
"jumpstart://huggingface-llm-falcon-7b": {
|
|
3
|
+
"modelId": "jumpstart://huggingface-llm-falcon-7b",
|
|
4
|
+
"family": "falcon",
|
|
5
|
+
"framework": "huggingface",
|
|
6
|
+
"provider": "jumpstart",
|
|
7
|
+
"tags": ["text-generation", "llm"],
|
|
8
|
+
"description": "Falcon 7B via JumpStart"
|
|
9
|
+
},
|
|
10
|
+
"jumpstart://huggingface-llm-falcon-40b": {
|
|
11
|
+
"modelId": "jumpstart://huggingface-llm-falcon-40b",
|
|
12
|
+
"family": "falcon",
|
|
13
|
+
"framework": "huggingface",
|
|
14
|
+
"provider": "jumpstart",
|
|
15
|
+
"tags": ["text-generation", "llm"],
|
|
16
|
+
"description": "Falcon 40B via JumpStart"
|
|
17
|
+
},
|
|
18
|
+
"jumpstart://meta-textgeneration-llama-2-7b": {
|
|
19
|
+
"modelId": "jumpstart://meta-textgeneration-llama-2-7b",
|
|
20
|
+
"family": "llama-2",
|
|
21
|
+
"framework": "huggingface",
|
|
22
|
+
"provider": "jumpstart",
|
|
23
|
+
"tags": ["text-generation", "llm", "llama-2"],
|
|
24
|
+
"description": "Llama 2 7B via JumpStart"
|
|
25
|
+
},
|
|
26
|
+
"jumpstart://meta-textgeneration-llama-2-13b": {
|
|
27
|
+
"modelId": "jumpstart://meta-textgeneration-llama-2-13b",
|
|
28
|
+
"family": "llama-2",
|
|
29
|
+
"framework": "huggingface",
|
|
30
|
+
"provider": "jumpstart",
|
|
31
|
+
"tags": ["text-generation", "llm", "llama-2"],
|
|
32
|
+
"description": "Llama 2 13B via JumpStart"
|
|
33
|
+
},
|
|
34
|
+
"jumpstart://meta-textgeneration-llama-2-70b": {
|
|
35
|
+
"modelId": "jumpstart://meta-textgeneration-llama-2-70b",
|
|
36
|
+
"family": "llama-2",
|
|
37
|
+
"framework": "huggingface",
|
|
38
|
+
"provider": "jumpstart",
|
|
39
|
+
"tags": ["text-generation", "llm", "llama-2"],
|
|
40
|
+
"description": "Llama 2 70B via JumpStart"
|
|
41
|
+
},
|
|
42
|
+
"jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base": {
|
|
43
|
+
"modelId": "jumpstart://model-txt2img-stabilityai-stable-diffusion-v2-1-base",
|
|
44
|
+
"family": "stable-diffusion",
|
|
45
|
+
"framework": "huggingface",
|
|
46
|
+
"provider": "jumpstart",
|
|
47
|
+
"tags": ["image-generation", "diffusion", "stable-diffusion"],
|
|
48
|
+
"description": "Stable Diffusion v2.1 Base via JumpStart"
|
|
49
|
+
},
|
|
50
|
+
"jumpstart://huggingface-text2text-flan-t5-xl": {
|
|
51
|
+
"modelId": "jumpstart://huggingface-text2text-flan-t5-xl",
|
|
52
|
+
"family": "flan-t5",
|
|
53
|
+
"framework": "huggingface",
|
|
54
|
+
"provider": "jumpstart",
|
|
55
|
+
"tags": ["text-generation", "text2text", "flan-t5"],
|
|
56
|
+
"description": "Flan-T5 XL via JumpStart"
|
|
57
|
+
},
|
|
58
|
+
"jumpstart://huggingface-textembedding-gpt-j-6b": {
|
|
59
|
+
"modelId": "jumpstart://huggingface-textembedding-gpt-j-6b",
|
|
60
|
+
"family": "gpt-j",
|
|
61
|
+
"framework": "huggingface",
|
|
62
|
+
"provider": "jumpstart",
|
|
63
|
+
"tags": ["text-embedding", "gpt-j"],
|
|
64
|
+
"description": "GPT-J 6B Embedding via JumpStart"
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"stabilityai/stable-diffusion-3.5-medium": {
|
|
3
|
+
"family": "stable-diffusion-3",
|
|
4
|
+
"chat_template": null,
|
|
5
|
+
"gated": false,
|
|
6
|
+
"tags": ["image-generation", "diffusion", "stable-diffusion"],
|
|
7
|
+
"architecture": "StableDiffusion3Pipeline",
|
|
8
|
+
"framework_compatibility": {
|
|
9
|
+
"vllm-omni": ">=0.14.0"
|
|
10
|
+
},
|
|
11
|
+
"validation_level": "experimental",
|
|
12
|
+
"profiles": {
|
|
13
|
+
"default": {
|
|
14
|
+
"displayName": "SD3.5 Medium",
|
|
15
|
+
"envVars": {},
|
|
16
|
+
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
|
|
20
|
+
},
|
|
21
|
+
"black-forest-labs/FLUX.1-dev": {
|
|
22
|
+
"family": "flux",
|
|
23
|
+
"chat_template": null,
|
|
24
|
+
"gated": true,
|
|
25
|
+
"tags": ["image-generation", "diffusion", "flux"],
|
|
26
|
+
"architecture": "FluxPipeline",
|
|
27
|
+
"framework_compatibility": {
|
|
28
|
+
"vllm-omni": ">=0.14.0"
|
|
29
|
+
},
|
|
30
|
+
"validation_level": "experimental",
|
|
31
|
+
"profiles": {
|
|
32
|
+
"default": {
|
|
33
|
+
"displayName": "FLUX.1 Dev",
|
|
34
|
+
"envVars": {},
|
|
35
|
+
"recommendedInstanceTypes": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
|
|
39
|
+
},
|
|
40
|
+
"black-forest-labs/FLUX.1-schnell": {
|
|
41
|
+
"family": "flux",
|
|
42
|
+
"chat_template": null,
|
|
43
|
+
"gated": false,
|
|
44
|
+
"tags": ["image-generation", "diffusion", "flux"],
|
|
45
|
+
"architecture": "FluxPipeline",
|
|
46
|
+
"framework_compatibility": {
|
|
47
|
+
"vllm-omni": ">=0.14.0"
|
|
48
|
+
},
|
|
49
|
+
"validation_level": "experimental",
|
|
50
|
+
"notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality"
|
|
51
|
+
},
|
|
52
|
+
"Wan-AI/Wan2.1-T2V-14B-Diffusers": {
|
|
53
|
+
"family": "wan",
|
|
54
|
+
"chat_template": null,
|
|
55
|
+
"gated": false,
|
|
56
|
+
"tags": ["video-generation", "diffusion", "wan"],
|
|
57
|
+
"architecture": "WanPipeline",
|
|
58
|
+
"framework_compatibility": {
|
|
59
|
+
"vllm-omni": ">=0.16.0"
|
|
60
|
+
},
|
|
61
|
+
"validation_level": "experimental",
|
|
62
|
+
"notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni"
|
|
63
|
+
},
|
|
64
|
+
"stabilityai/stable-diffusion-*": {
|
|
65
|
+
"family": "stable-diffusion",
|
|
66
|
+
"chat_template": null,
|
|
67
|
+
"gated": false,
|
|
68
|
+
"tags": ["image-generation", "diffusion", "stable-diffusion"],
|
|
69
|
+
"architecture": null,
|
|
70
|
+
"framework_compatibility": {
|
|
71
|
+
"vllm-omni": ">=0.14.0"
|
|
72
|
+
},
|
|
73
|
+
"validation_level": "experimental",
|
|
74
|
+
"notes": "Fallback for Stable Diffusion variants not explicitly listed"
|
|
75
|
+
},
|
|
76
|
+
"black-forest-labs/FLUX*": {
|
|
77
|
+
"family": "flux",
|
|
78
|
+
"chat_template": null,
|
|
79
|
+
"gated": false,
|
|
80
|
+
"tags": ["image-generation", "diffusion", "flux"],
|
|
81
|
+
"architecture": null,
|
|
82
|
+
"framework_compatibility": {
|
|
83
|
+
"vllm-omni": ">=0.14.0"
|
|
84
|
+
},
|
|
85
|
+
"validation_level": "experimental",
|
|
86
|
+
"notes": "Fallback for FLUX model variants not explicitly listed"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
{
|
|
2
|
+
"openai/gpt-oss-20b": {
|
|
3
|
+
"family": "gpt-oss",
|
|
4
|
+
"chat_template": "",
|
|
5
|
+
"gated": false,
|
|
6
|
+
"tags": ["text-generation", "openai", "conversational"],
|
|
7
|
+
"architecture": "GPT2LMHeadModel",
|
|
8
|
+
"framework_compatibility": {
|
|
9
|
+
"vllm": ">=0.3.0",
|
|
10
|
+
"tensorrt-llm": ">=0.8.0",
|
|
11
|
+
"sglang": ">=0.2.0"
|
|
12
|
+
},
|
|
13
|
+
"validation_level": "community-validated",
|
|
14
|
+
"notes": "Open-source 20B parameter model. Requires significant GPU memory for inference"
|
|
15
|
+
},
|
|
16
|
+
"meta-llama/Llama-2-7b-chat-hf": {
|
|
17
|
+
"family": "llama-2",
|
|
18
|
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
19
|
+
"gated": true,
|
|
20
|
+
"tags": ["text-generation", "llama-2", "conversational"],
|
|
21
|
+
"architecture": "LlamaForCausalLM",
|
|
22
|
+
"framework_compatibility": {
|
|
23
|
+
"vllm": ">=0.3.0",
|
|
24
|
+
"tensorrt-llm": ">=0.8.0",
|
|
25
|
+
"sglang": ">=0.2.0"
|
|
26
|
+
},
|
|
27
|
+
"validation_level": "tested",
|
|
28
|
+
"profiles": {
|
|
29
|
+
"7b": {
|
|
30
|
+
"displayName": "Llama-2 7B",
|
|
31
|
+
"envVars": {
|
|
32
|
+
"MAX_MODEL_LEN": "4096",
|
|
33
|
+
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
34
|
+
},
|
|
35
|
+
"recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
"notes": "Llama-2 7B chat model with official chat template. Requires HuggingFace authentication for download"
|
|
39
|
+
},
|
|
40
|
+
"meta-llama/Llama-2-13b-chat-hf": {
|
|
41
|
+
"family": "llama-2",
|
|
42
|
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
43
|
+
"gated": true,
|
|
44
|
+
"tags": ["text-generation", "llama-2", "conversational"],
|
|
45
|
+
"architecture": "LlamaForCausalLM",
|
|
46
|
+
"framework_compatibility": {
|
|
47
|
+
"vllm": ">=0.3.0",
|
|
48
|
+
"tensorrt-llm": ">=0.8.0",
|
|
49
|
+
"sglang": ">=0.2.0"
|
|
50
|
+
},
|
|
51
|
+
"validation_level": "tested",
|
|
52
|
+
"profiles": {
|
|
53
|
+
"13b": {
|
|
54
|
+
"displayName": "Llama-2 13B",
|
|
55
|
+
"envVars": {
|
|
56
|
+
"MAX_MODEL_LEN": "4096",
|
|
57
|
+
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
58
|
+
},
|
|
59
|
+
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"notes": "Llama-2 13B chat model. Requires more GPU memory than 7B variant"
|
|
63
|
+
},
|
|
64
|
+
|
|
65
|
+
"meta-llama/Llama-2-70b-chat-hf": {
|
|
66
|
+
"family": "llama-2",
|
|
67
|
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
68
|
+
"gated": true,
|
|
69
|
+
"tags": ["text-generation", "llama-2", "conversational"],
|
|
70
|
+
"architecture": "LlamaForCausalLM",
|
|
71
|
+
"framework_compatibility": {
|
|
72
|
+
"vllm": ">=0.3.0",
|
|
73
|
+
"tensorrt-llm": ">=0.8.0",
|
|
74
|
+
"sglang": ">=0.2.0"
|
|
75
|
+
},
|
|
76
|
+
"validation_level": "community-validated",
|
|
77
|
+
"profiles": {
|
|
78
|
+
"70b-tp2": {
|
|
79
|
+
"displayName": "Llama-2 70B (2-GPU)",
|
|
80
|
+
"envVars": {
|
|
81
|
+
"TENSOR_PARALLEL_SIZE": "2",
|
|
82
|
+
"MAX_MODEL_LEN": "4096",
|
|
83
|
+
"GPU_MEMORY_UTILIZATION": "0.95"
|
|
84
|
+
},
|
|
85
|
+
"recommendedInstanceTypes": ["ml.g5.12xlarge"]
|
|
86
|
+
},
|
|
87
|
+
"70b-tp4": {
|
|
88
|
+
"displayName": "Llama-2 70B (4-GPU)",
|
|
89
|
+
"envVars": {
|
|
90
|
+
"TENSOR_PARALLEL_SIZE": "4",
|
|
91
|
+
"MAX_MODEL_LEN": "4096",
|
|
92
|
+
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
93
|
+
},
|
|
94
|
+
"recommendedInstanceTypes": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
"notes": "Llama-2 70B requires tensor parallelism across multiple GPUs"
|
|
98
|
+
},
|
|
99
|
+
"mistralai/Mistral-7B-Instruct-v0.1": {
|
|
100
|
+
"family": "mistral",
|
|
101
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
102
|
+
"gated": false,
|
|
103
|
+
"tags": ["text-generation", "mistral", "conversational"],
|
|
104
|
+
"architecture": "MistralForCausalLM",
|
|
105
|
+
"framework_compatibility": {
|
|
106
|
+
"vllm": ">=0.3.0",
|
|
107
|
+
"tensorrt-llm": ">=0.8.0",
|
|
108
|
+
"sglang": ">=0.2.0"
|
|
109
|
+
},
|
|
110
|
+
"validation_level": "tested",
|
|
111
|
+
"profiles": {
|
|
112
|
+
"7b": {
|
|
113
|
+
"displayName": "Mistral 7B Instruct",
|
|
114
|
+
"envVars": {
|
|
115
|
+
"MAX_MODEL_LEN": "8192",
|
|
116
|
+
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
117
|
+
},
|
|
118
|
+
"recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
"notes": "Mistral 7B v0.1 with 8K context window"
|
|
122
|
+
},
|
|
123
|
+
"mistralai/Mistral-7B-Instruct-v0.2": {
|
|
124
|
+
"family": "mistral",
|
|
125
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
126
|
+
"gated": false,
|
|
127
|
+
"tags": ["text-generation", "mistral", "conversational"],
|
|
128
|
+
"architecture": "MistralForCausalLM",
|
|
129
|
+
"framework_compatibility": {
|
|
130
|
+
"vllm": ">=0.3.0",
|
|
131
|
+
"tensorrt-llm": ">=0.8.0",
|
|
132
|
+
"sglang": ">=0.2.0"
|
|
133
|
+
},
|
|
134
|
+
"validation_level": "tested",
|
|
135
|
+
"profiles": {
|
|
136
|
+
"7b": {
|
|
137
|
+
"displayName": "Mistral 7B Instruct v0.2",
|
|
138
|
+
"envVars": {
|
|
139
|
+
"MAX_MODEL_LEN": "32768",
|
|
140
|
+
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
141
|
+
},
|
|
142
|
+
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
143
|
+
}
|
|
144
|
+
},
|
|
145
|
+
"notes": "Mistral 7B v0.2 with extended 32K context window. Requires more memory for long contexts"
|
|
146
|
+
},
|
|
147
|
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
|
148
|
+
"family": "mistral",
|
|
149
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
150
|
+
"gated": false,
|
|
151
|
+
"tags": ["text-generation", "mistral", "mixture-of-experts"],
|
|
152
|
+
"architecture": "MixtralForCausalLM",
|
|
153
|
+
"framework_compatibility": {
|
|
154
|
+
"vllm": ">=0.3.0",
|
|
155
|
+
"tensorrt-llm": ">=0.8.0",
|
|
156
|
+
"sglang": ">=0.2.0"
|
|
157
|
+
},
|
|
158
|
+
"validation_level": "community-validated",
|
|
159
|
+
"profiles": {
|
|
160
|
+
"8x7b-tp2": {
|
|
161
|
+
"displayName": "Mixtral 8x7B (2-GPU)",
|
|
162
|
+
"envVars": {
|
|
163
|
+
"TENSOR_PARALLEL_SIZE": "2",
|
|
164
|
+
"MAX_MODEL_LEN": "32768",
|
|
165
|
+
"GPU_MEMORY_UTILIZATION": "0.95"
|
|
166
|
+
},
|
|
167
|
+
"recommendedInstanceTypes": ["ml.g5.12xlarge"]
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
"notes": "Mixtral 8x7B MoE model. Requires tensor parallelism for efficient inference"
|
|
171
|
+
},
|
|
172
|
+
"meta-llama/Llama-2-*": {
|
|
173
|
+
"family": "llama-2",
|
|
174
|
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
175
|
+
"gated": true,
|
|
176
|
+
"tags": ["text-generation", "llama-2"],
|
|
177
|
+
"architecture": null,
|
|
178
|
+
"framework_compatibility": {
|
|
179
|
+
"vllm": ">=0.3.0",
|
|
180
|
+
"tensorrt-llm": ">=0.8.0",
|
|
181
|
+
"sglang": ">=0.2.0"
|
|
182
|
+
},
|
|
183
|
+
"validation_level": "experimental",
|
|
184
|
+
"notes": "Fallback configuration for Llama-2 models not explicitly listed. Uses standard Llama-2 chat template"
|
|
185
|
+
},
|
|
186
|
+
"mistralai/Mistral-*": {
|
|
187
|
+
"family": "mistral",
|
|
188
|
+
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
189
|
+
"gated": false,
|
|
190
|
+
"tags": ["text-generation", "mistral"],
|
|
191
|
+
"architecture": null,
|
|
192
|
+
"framework_compatibility": {
|
|
193
|
+
"vllm": ">=0.3.0",
|
|
194
|
+
"tensorrt-llm": ">=0.8.0",
|
|
195
|
+
"sglang": ">=0.2.0"
|
|
196
|
+
},
|
|
197
|
+
"validation_level": "experimental",
|
|
198
|
+
"notes": "Fallback configuration for Mistral models not explicitly listed. Uses standard Mistral chat template"
|
|
199
|
+
},
|
|
200
|
+
"codellama/*": {
|
|
201
|
+
"family": "codellama",
|
|
202
|
+
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
203
|
+
"gated": false,
|
|
204
|
+
"tags": ["text-generation", "code", "codellama"],
|
|
205
|
+
"architecture": null,
|
|
206
|
+
"framework_compatibility": {
|
|
207
|
+
"vllm": ">=0.3.0",
|
|
208
|
+
"tensorrt-llm": ">=0.8.0"
|
|
209
|
+
},
|
|
210
|
+
"validation_level": "experimental",
|
|
211
|
+
"notes": "CodeLlama models use Llama-2 chat template. Optimized for code generation"
|
|
212
|
+
},
|
|
213
|
+
"tiiuae/falcon-*": {
|
|
214
|
+
"family": "falcon",
|
|
215
|
+
"chat_template": null,
|
|
216
|
+
"gated": false,
|
|
217
|
+
"tags": ["text-generation", "falcon"],
|
|
218
|
+
"architecture": null,
|
|
219
|
+
"framework_compatibility": {
|
|
220
|
+
"vllm": ">=0.3.0",
|
|
221
|
+
"tensorrt-llm": ">=0.8.0"
|
|
222
|
+
},
|
|
223
|
+
"validation_level": "experimental",
|
|
224
|
+
"notes": "Falcon models typically don't require chat templates for instruction following"
|
|
225
|
+
}
|
|
226
|
+
}
|