@aws/ml-container-creator 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +5 -2
- package/config/bootstrap-stack.json +40 -9
- package/infra/ci-harness/buildspec.yml +60 -0
- package/infra/ci-harness/package-lock.json +5 -1
- package/package.json +1 -1
- package/servers/README.md +41 -1
- package/servers/instance-sizer/index.js +10 -4
- package/servers/instance-sizer/lib/model-resolver.js +1 -1
- package/servers/lib/catalogs/model-sizes.json +135 -90
- package/servers/lib/catalogs/models.json +483 -411
- package/src/app.js +33 -2
- package/src/lib/bootstrap-command-handler.js +6 -0
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +41 -2
- package/src/lib/deployment-entry-schema.js +16 -0
- package/src/lib/mcp-client.js +3 -3
- package/src/lib/prompt-runner.js +179 -8
- package/src/lib/prompts.js +253 -7
- package/src/lib/registry-command-handler.js +12 -0
- package/templates/Dockerfile +12 -0
- package/templates/code/serving.properties +14 -0
- package/templates/do/adapter +1230 -0
- package/templates/do/adapters/.gitkeep +2 -0
- package/templates/do/add-ic +130 -0
- package/templates/do/benchmark +81 -9
- package/templates/do/clean +507 -17
- package/templates/do/config +28 -5
- package/templates/do/deploy +513 -367
- package/templates/do/ic/default.conf +32 -0
- package/templates/do/lib/endpoint-config.sh +216 -0
- package/templates/do/lib/inference-component.sh +167 -0
- package/templates/do/lib/secrets.sh +44 -0
- package/templates/do/lib/wait.sh +131 -0
- package/templates/do/logs +107 -27
- package/templates/do/optimize +528 -0
- package/templates/do/register +111 -1
- package/templates/do/status +337 -0
- package/templates/do/test +80 -28
|
@@ -1,50 +1,23 @@
|
|
|
1
1
|
{
|
|
2
2
|
"catalogVersion": "1.0.0",
|
|
3
3
|
"models": {
|
|
4
|
-
"meta-llama/Llama-2-
|
|
5
|
-
"parameterCount":
|
|
6
|
-
"defaultDtype": "float16",
|
|
7
|
-
"architecture": "LlamaForCausalLM",
|
|
8
|
-
"maxPositionEmbeddings": 4096,
|
|
9
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
10
|
-
"minVramGb": 18,
|
|
11
|
-
"recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
12
|
-
},
|
|
13
|
-
"meta-llama/Llama-2-13b*": {
|
|
14
|
-
"parameterCount": 13015864320,
|
|
15
|
-
"defaultDtype": "float16",
|
|
16
|
-
"architecture": "LlamaForCausalLM",
|
|
17
|
-
"maxPositionEmbeddings": 4096,
|
|
18
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
19
|
-
"minVramGb": 34,
|
|
20
|
-
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
21
|
-
},
|
|
22
|
-
"meta-llama/Llama-2-70b*": {
|
|
23
|
-
"parameterCount": 68976648192,
|
|
24
|
-
"defaultDtype": "float16",
|
|
25
|
-
"architecture": "LlamaForCausalLM",
|
|
26
|
-
"maxPositionEmbeddings": 4096,
|
|
27
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
28
|
-
"minVramGb": 180,
|
|
29
|
-
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
30
|
-
},
|
|
31
|
-
"meta-llama/Meta-Llama-3-8B*": {
|
|
32
|
-
"parameterCount": 8030261248,
|
|
4
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
5
|
+
"parameterCount": 1235814400,
|
|
33
6
|
"defaultDtype": "bfloat16",
|
|
34
7
|
"architecture": "LlamaForCausalLM",
|
|
35
|
-
"maxPositionEmbeddings":
|
|
8
|
+
"maxPositionEmbeddings": 131072,
|
|
36
9
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
37
|
-
"minVramGb":
|
|
38
|
-
"recommendedInstances": ["ml.g5.
|
|
10
|
+
"minVramGb": 5,
|
|
11
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
39
12
|
},
|
|
40
|
-
"meta-llama/
|
|
41
|
-
"parameterCount":
|
|
13
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
14
|
+
"parameterCount": 3212749824,
|
|
42
15
|
"defaultDtype": "bfloat16",
|
|
43
16
|
"architecture": "LlamaForCausalLM",
|
|
44
|
-
"maxPositionEmbeddings":
|
|
17
|
+
"maxPositionEmbeddings": 131072,
|
|
45
18
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
46
|
-
"minVramGb":
|
|
47
|
-
"recommendedInstances": ["ml.g5.
|
|
19
|
+
"minVramGb": 9,
|
|
20
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
48
21
|
},
|
|
49
22
|
"meta-llama/Llama-3.1-8B*": {
|
|
50
23
|
"parameterCount": 8030261248,
|
|
@@ -55,104 +28,176 @@
|
|
|
55
28
|
"minVramGb": 20,
|
|
56
29
|
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
57
30
|
},
|
|
58
|
-
"meta-llama/Llama-3.
|
|
59
|
-
"parameterCount":
|
|
31
|
+
"meta-llama/Llama-3.3-70B*": {
|
|
32
|
+
"parameterCount": 70553706496,
|
|
60
33
|
"defaultDtype": "bfloat16",
|
|
61
34
|
"architecture": "LlamaForCausalLM",
|
|
62
35
|
"maxPositionEmbeddings": 131072,
|
|
36
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
37
|
+
"minVramGb": 184,
|
|
38
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
39
|
+
},
|
|
40
|
+
"Qwen/Qwen3-0.6B*": {
|
|
41
|
+
"parameterCount": 600000000,
|
|
42
|
+
"defaultDtype": "bfloat16",
|
|
43
|
+
"architecture": "Qwen3ForCausalLM",
|
|
44
|
+
"maxPositionEmbeddings": 32768,
|
|
63
45
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
64
|
-
"minVramGb":
|
|
46
|
+
"minVramGb": 3,
|
|
65
47
|
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
66
48
|
},
|
|
67
|
-
"
|
|
68
|
-
"parameterCount":
|
|
49
|
+
"Qwen/Qwen3-1.7B*": {
|
|
50
|
+
"parameterCount": 1700000000,
|
|
69
51
|
"defaultDtype": "bfloat16",
|
|
70
|
-
"architecture": "
|
|
71
|
-
"maxPositionEmbeddings":
|
|
52
|
+
"architecture": "Qwen3ForCausalLM",
|
|
53
|
+
"maxPositionEmbeddings": 32768,
|
|
72
54
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
73
|
-
"minVramGb":
|
|
55
|
+
"minVramGb": 6,
|
|
74
56
|
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
75
57
|
},
|
|
76
|
-
"
|
|
77
|
-
"parameterCount":
|
|
58
|
+
"Qwen/Qwen3-4B*": {
|
|
59
|
+
"parameterCount": 4000000000,
|
|
78
60
|
"defaultDtype": "bfloat16",
|
|
79
|
-
"architecture": "
|
|
61
|
+
"architecture": "Qwen3ForCausalLM",
|
|
80
62
|
"maxPositionEmbeddings": 32768,
|
|
81
63
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
82
|
-
"minVramGb":
|
|
83
|
-
"recommendedInstances": ["ml.g5.
|
|
64
|
+
"minVramGb": 11,
|
|
65
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
84
66
|
},
|
|
85
|
-
"
|
|
86
|
-
"parameterCount":
|
|
67
|
+
"Qwen/Qwen3-8B*": {
|
|
68
|
+
"parameterCount": 8000000000,
|
|
87
69
|
"defaultDtype": "bfloat16",
|
|
88
|
-
"architecture": "
|
|
70
|
+
"architecture": "Qwen3ForCausalLM",
|
|
89
71
|
"maxPositionEmbeddings": 32768,
|
|
90
72
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
91
|
-
"minVramGb":
|
|
92
|
-
"recommendedInstances": ["ml.g5.
|
|
73
|
+
"minVramGb": 20,
|
|
74
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
93
75
|
},
|
|
94
|
-
"Qwen/
|
|
95
|
-
"parameterCount":
|
|
76
|
+
"Qwen/Qwen3-14B*": {
|
|
77
|
+
"parameterCount": 14000000000,
|
|
96
78
|
"defaultDtype": "bfloat16",
|
|
97
|
-
"architecture": "
|
|
98
|
-
"maxPositionEmbeddings":
|
|
79
|
+
"architecture": "Qwen3ForCausalLM",
|
|
80
|
+
"maxPositionEmbeddings": 32768,
|
|
99
81
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
100
|
-
"minVramGb":
|
|
101
|
-
"recommendedInstances": ["ml.g5.
|
|
82
|
+
"minVramGb": 37,
|
|
83
|
+
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
84
|
+
},
|
|
85
|
+
"Qwen/Qwen3-32B*": {
|
|
86
|
+
"parameterCount": 32000000000,
|
|
87
|
+
"defaultDtype": "bfloat16",
|
|
88
|
+
"architecture": "Qwen3ForCausalLM",
|
|
89
|
+
"maxPositionEmbeddings": 32768,
|
|
90
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
91
|
+
"minVramGb": 84,
|
|
92
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
102
93
|
},
|
|
103
|
-
"Qwen/Qwen2-7B*": {
|
|
94
|
+
"Qwen/Qwen2.5-7B*": {
|
|
104
95
|
"parameterCount": 7721324544,
|
|
105
96
|
"defaultDtype": "bfloat16",
|
|
106
97
|
"architecture": "Qwen2ForCausalLM",
|
|
107
|
-
"maxPositionEmbeddings":
|
|
98
|
+
"maxPositionEmbeddings": 131072,
|
|
108
99
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
109
100
|
"minVramGb": 20,
|
|
110
|
-
"recommendedInstances": ["ml.g5.2xlarge", "ml.
|
|
101
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
111
102
|
},
|
|
112
|
-
"Qwen/
|
|
103
|
+
"Qwen/Qwen2.5-14B*": {
|
|
113
104
|
"parameterCount": 14167134208,
|
|
114
105
|
"defaultDtype": "bfloat16",
|
|
115
|
-
"architecture": "
|
|
116
|
-
"maxPositionEmbeddings":
|
|
106
|
+
"architecture": "Qwen2ForCausalLM",
|
|
107
|
+
"maxPositionEmbeddings": 131072,
|
|
117
108
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
118
109
|
"minVramGb": 37,
|
|
119
110
|
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
120
111
|
},
|
|
121
|
-
"Qwen/Qwen2-
|
|
122
|
-
"parameterCount":
|
|
112
|
+
"Qwen/Qwen2.5-32B*": {
|
|
113
|
+
"parameterCount": 32000000000,
|
|
123
114
|
"defaultDtype": "bfloat16",
|
|
124
115
|
"architecture": "Qwen2ForCausalLM",
|
|
125
|
-
"maxPositionEmbeddings":
|
|
116
|
+
"maxPositionEmbeddings": 131072,
|
|
126
117
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
127
|
-
"minVramGb":
|
|
128
|
-
"recommendedInstances": ["ml.g5.
|
|
118
|
+
"minVramGb": 84,
|
|
119
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
129
120
|
},
|
|
130
|
-
"Qwen/
|
|
121
|
+
"Qwen/Qwen2.5-72B*": {
|
|
131
122
|
"parameterCount": 72710410240,
|
|
132
123
|
"defaultDtype": "bfloat16",
|
|
133
|
-
"architecture": "
|
|
134
|
-
"maxPositionEmbeddings":
|
|
135
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
124
|
+
"architecture": "Qwen2ForCausalLM",
|
|
125
|
+
"maxPositionEmbeddings": 131072,
|
|
126
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
136
127
|
"minVramGb": 190,
|
|
137
|
-
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
128
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
138
129
|
},
|
|
139
|
-
"
|
|
140
|
-
"parameterCount":
|
|
130
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B*": {
|
|
131
|
+
"parameterCount": 1500000000,
|
|
141
132
|
"defaultDtype": "bfloat16",
|
|
142
133
|
"architecture": "Qwen2ForCausalLM",
|
|
143
|
-
"maxPositionEmbeddings":
|
|
134
|
+
"maxPositionEmbeddings": 131072,
|
|
144
135
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
145
|
-
"minVramGb":
|
|
146
|
-
"recommendedInstances": ["ml.g5.
|
|
147
|
-
},
|
|
148
|
-
"
|
|
149
|
-
"parameterCount":
|
|
150
|
-
"defaultDtype": "
|
|
151
|
-
"architecture": "
|
|
152
|
-
"maxPositionEmbeddings":
|
|
153
|
-
"recommendedQuantizations": ["gptq"],
|
|
154
|
-
"minVramGb":
|
|
136
|
+
"minVramGb": 5,
|
|
137
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
138
|
+
},
|
|
139
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B*": {
|
|
140
|
+
"parameterCount": 7000000000,
|
|
141
|
+
"defaultDtype": "bfloat16",
|
|
142
|
+
"architecture": "Qwen2ForCausalLM",
|
|
143
|
+
"maxPositionEmbeddings": 131072,
|
|
144
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
145
|
+
"minVramGb": 18,
|
|
146
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
147
|
+
},
|
|
148
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B*": {
|
|
149
|
+
"parameterCount": 14000000000,
|
|
150
|
+
"defaultDtype": "bfloat16",
|
|
151
|
+
"architecture": "Qwen2ForCausalLM",
|
|
152
|
+
"maxPositionEmbeddings": 131072,
|
|
153
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
154
|
+
"minVramGb": 37,
|
|
155
|
+
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
156
|
+
},
|
|
157
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B*": {
|
|
158
|
+
"parameterCount": 32000000000,
|
|
159
|
+
"defaultDtype": "bfloat16",
|
|
160
|
+
"architecture": "Qwen2ForCausalLM",
|
|
161
|
+
"maxPositionEmbeddings": 131072,
|
|
162
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
163
|
+
"minVramGb": 84,
|
|
164
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
165
|
+
},
|
|
166
|
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B*": {
|
|
167
|
+
"parameterCount": 8000000000,
|
|
168
|
+
"defaultDtype": "bfloat16",
|
|
169
|
+
"architecture": "LlamaForCausalLM",
|
|
170
|
+
"maxPositionEmbeddings": 131072,
|
|
171
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
172
|
+
"minVramGb": 20,
|
|
173
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
174
|
+
},
|
|
175
|
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B*": {
|
|
176
|
+
"parameterCount": 70000000000,
|
|
177
|
+
"defaultDtype": "bfloat16",
|
|
178
|
+
"architecture": "LlamaForCausalLM",
|
|
179
|
+
"maxPositionEmbeddings": 131072,
|
|
180
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
181
|
+
"minVramGb": 184,
|
|
182
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
183
|
+
},
|
|
184
|
+
"openai/gpt-oss-20b*": {
|
|
185
|
+
"parameterCount": 20000000000,
|
|
186
|
+
"defaultDtype": "bfloat16",
|
|
187
|
+
"architecture": "GPT2LMHeadModel",
|
|
188
|
+
"maxPositionEmbeddings": 8192,
|
|
189
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
190
|
+
"minVramGb": 52,
|
|
155
191
|
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
192
|
+
},
|
|
193
|
+
"openai/gpt-oss-120b*": {
|
|
194
|
+
"parameterCount": 120000000000,
|
|
195
|
+
"defaultDtype": "bfloat16",
|
|
196
|
+
"architecture": "GPT2LMHeadModel",
|
|
197
|
+
"maxPositionEmbeddings": 8192,
|
|
198
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
199
|
+
"minVramGb": 312,
|
|
200
|
+
"recommendedInstances": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
|
|
156
201
|
}
|
|
157
202
|
}
|
|
158
203
|
}
|