@aws/ml-container-creator 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/bin/cli.js +5 -2
  2. package/config/bootstrap-stack.json +40 -9
  3. package/infra/ci-harness/buildspec.yml +60 -0
  4. package/infra/ci-harness/package-lock.json +5 -1
  5. package/package.json +1 -1
  6. package/servers/README.md +41 -1
  7. package/servers/instance-sizer/index.js +10 -4
  8. package/servers/instance-sizer/lib/model-resolver.js +1 -1
  9. package/servers/lib/catalogs/model-sizes.json +135 -90
  10. package/servers/lib/catalogs/models.json +483 -411
  11. package/src/app.js +33 -2
  12. package/src/lib/bootstrap-command-handler.js +6 -0
  13. package/src/lib/cli-handler.js +1 -1
  14. package/src/lib/config-manager.js +41 -2
  15. package/src/lib/deployment-entry-schema.js +16 -0
  16. package/src/lib/mcp-client.js +3 -3
  17. package/src/lib/prompt-runner.js +179 -8
  18. package/src/lib/prompts.js +253 -7
  19. package/src/lib/registry-command-handler.js +12 -0
  20. package/templates/Dockerfile +12 -0
  21. package/templates/code/serving.properties +14 -0
  22. package/templates/do/adapter +1230 -0
  23. package/templates/do/adapters/.gitkeep +2 -0
  24. package/templates/do/add-ic +130 -0
  25. package/templates/do/benchmark +81 -9
  26. package/templates/do/clean +507 -17
  27. package/templates/do/config +28 -5
  28. package/templates/do/deploy +513 -367
  29. package/templates/do/ic/default.conf +32 -0
  30. package/templates/do/lib/endpoint-config.sh +216 -0
  31. package/templates/do/lib/inference-component.sh +167 -0
  32. package/templates/do/lib/secrets.sh +44 -0
  33. package/templates/do/lib/wait.sh +131 -0
  34. package/templates/do/logs +107 -27
  35. package/templates/do/optimize +528 -0
  36. package/templates/do/register +111 -1
  37. package/templates/do/status +337 -0
  38. package/templates/do/test +80 -28
@@ -1,50 +1,23 @@
1
1
  {
2
2
  "catalogVersion": "1.0.0",
3
3
  "models": {
4
- "meta-llama/Llama-2-7b*": {
5
- "parameterCount": 6738415616,
6
- "defaultDtype": "float16",
7
- "architecture": "LlamaForCausalLM",
8
- "maxPositionEmbeddings": 4096,
9
- "recommendedQuantizations": ["awq", "gptq"],
10
- "minVramGb": 18,
11
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
12
- },
13
- "meta-llama/Llama-2-13b*": {
14
- "parameterCount": 13015864320,
15
- "defaultDtype": "float16",
16
- "architecture": "LlamaForCausalLM",
17
- "maxPositionEmbeddings": 4096,
18
- "recommendedQuantizations": ["awq", "gptq"],
19
- "minVramGb": 34,
20
- "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
21
- },
22
- "meta-llama/Llama-2-70b*": {
23
- "parameterCount": 68976648192,
24
- "defaultDtype": "float16",
25
- "architecture": "LlamaForCausalLM",
26
- "maxPositionEmbeddings": 4096,
27
- "recommendedQuantizations": ["awq", "gptq"],
28
- "minVramGb": 180,
29
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
30
- },
31
- "meta-llama/Meta-Llama-3-8B*": {
32
- "parameterCount": 8030261248,
4
+ "meta-llama/Llama-3.2-1B*": {
5
+ "parameterCount": 1235814400,
33
6
  "defaultDtype": "bfloat16",
34
7
  "architecture": "LlamaForCausalLM",
35
- "maxPositionEmbeddings": 8192,
8
+ "maxPositionEmbeddings": 131072,
36
9
  "recommendedQuantizations": ["awq", "gptq"],
37
- "minVramGb": 21,
38
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
10
+ "minVramGb": 5,
11
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
39
12
  },
40
- "meta-llama/Meta-Llama-3-70B*": {
41
- "parameterCount": 70553706496,
13
+ "meta-llama/Llama-3.2-3B*": {
14
+ "parameterCount": 3212749824,
42
15
  "defaultDtype": "bfloat16",
43
16
  "architecture": "LlamaForCausalLM",
44
- "maxPositionEmbeddings": 8192,
17
+ "maxPositionEmbeddings": 131072,
45
18
  "recommendedQuantizations": ["awq", "gptq"],
46
- "minVramGb": 184,
47
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
19
+ "minVramGb": 9,
20
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
48
21
  },
49
22
  "meta-llama/Llama-3.1-8B*": {
50
23
  "parameterCount": 8030261248,
@@ -55,104 +28,176 @@
55
28
  "minVramGb": 20,
56
29
  "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
57
30
  },
58
- "meta-llama/Llama-3.2-1B*": {
59
- "parameterCount": 1235814400,
31
+ "meta-llama/Llama-3.3-70B*": {
32
+ "parameterCount": 70553706496,
60
33
  "defaultDtype": "bfloat16",
61
34
  "architecture": "LlamaForCausalLM",
62
35
  "maxPositionEmbeddings": 131072,
36
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
37
+ "minVramGb": 184,
38
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
39
+ },
40
+ "Qwen/Qwen3-0.6B*": {
41
+ "parameterCount": 600000000,
42
+ "defaultDtype": "bfloat16",
43
+ "architecture": "Qwen3ForCausalLM",
44
+ "maxPositionEmbeddings": 32768,
63
45
  "recommendedQuantizations": ["awq", "gptq"],
64
- "minVramGb": 5,
46
+ "minVramGb": 3,
65
47
  "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
66
48
  },
67
- "meta-llama/Llama-3.2-3B*": {
68
- "parameterCount": 3212749824,
49
+ "Qwen/Qwen3-1.7B*": {
50
+ "parameterCount": 1700000000,
69
51
  "defaultDtype": "bfloat16",
70
- "architecture": "LlamaForCausalLM",
71
- "maxPositionEmbeddings": 131072,
52
+ "architecture": "Qwen3ForCausalLM",
53
+ "maxPositionEmbeddings": 32768,
72
54
  "recommendedQuantizations": ["awq", "gptq"],
73
- "minVramGb": 9,
55
+ "minVramGb": 6,
74
56
  "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
75
57
  },
76
- "mistralai/Mistral-7B*": {
77
- "parameterCount": 7241732096,
58
+ "Qwen/Qwen3-4B*": {
59
+ "parameterCount": 4000000000,
78
60
  "defaultDtype": "bfloat16",
79
- "architecture": "MistralForCausalLM",
61
+ "architecture": "Qwen3ForCausalLM",
80
62
  "maxPositionEmbeddings": 32768,
81
63
  "recommendedQuantizations": ["awq", "gptq"],
82
- "minVramGb": 19,
83
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
64
+ "minVramGb": 11,
65
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
84
66
  },
85
- "mistralai/Mixtral-8x7B*": {
86
- "parameterCount": 46702792704,
67
+ "Qwen/Qwen3-8B*": {
68
+ "parameterCount": 8000000000,
87
69
  "defaultDtype": "bfloat16",
88
- "architecture": "MixtralForCausalLM",
70
+ "architecture": "Qwen3ForCausalLM",
89
71
  "maxPositionEmbeddings": 32768,
90
72
  "recommendedQuantizations": ["awq", "gptq"],
91
- "minVramGb": 122,
92
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
73
+ "minVramGb": 20,
74
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
93
75
  },
94
- "Qwen/Qwen-7B*": {
95
- "parameterCount": 7721324544,
76
+ "Qwen/Qwen3-14B*": {
77
+ "parameterCount": 14000000000,
96
78
  "defaultDtype": "bfloat16",
97
- "architecture": "QWenLMHeadModel",
98
- "maxPositionEmbeddings": 8192,
79
+ "architecture": "Qwen3ForCausalLM",
80
+ "maxPositionEmbeddings": 32768,
99
81
  "recommendedQuantizations": ["awq", "gptq"],
100
- "minVramGb": 20,
101
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
82
+ "minVramGb": 37,
83
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
84
+ },
85
+ "Qwen/Qwen3-32B*": {
86
+ "parameterCount": 32000000000,
87
+ "defaultDtype": "bfloat16",
88
+ "architecture": "Qwen3ForCausalLM",
89
+ "maxPositionEmbeddings": 32768,
90
+ "recommendedQuantizations": ["awq", "gptq"],
91
+ "minVramGb": 84,
92
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
102
93
  },
103
- "Qwen/Qwen2-7B*": {
94
+ "Qwen/Qwen2.5-7B*": {
104
95
  "parameterCount": 7721324544,
105
96
  "defaultDtype": "bfloat16",
106
97
  "architecture": "Qwen2ForCausalLM",
107
- "maxPositionEmbeddings": 32768,
98
+ "maxPositionEmbeddings": 131072,
108
99
  "recommendedQuantizations": ["awq", "gptq"],
109
100
  "minVramGb": 20,
110
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
101
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
111
102
  },
112
- "Qwen/Qwen-14B*": {
103
+ "Qwen/Qwen2.5-14B*": {
113
104
  "parameterCount": 14167134208,
114
105
  "defaultDtype": "bfloat16",
115
- "architecture": "QWenLMHeadModel",
116
- "maxPositionEmbeddings": 8192,
106
+ "architecture": "Qwen2ForCausalLM",
107
+ "maxPositionEmbeddings": 131072,
117
108
  "recommendedQuantizations": ["awq", "gptq"],
118
109
  "minVramGb": 37,
119
110
  "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
120
111
  },
121
- "Qwen/Qwen2-14B*": {
122
- "parameterCount": 14167134208,
112
+ "Qwen/Qwen2.5-32B*": {
113
+ "parameterCount": 32000000000,
123
114
  "defaultDtype": "bfloat16",
124
115
  "architecture": "Qwen2ForCausalLM",
125
- "maxPositionEmbeddings": 32768,
116
+ "maxPositionEmbeddings": 131072,
126
117
  "recommendedQuantizations": ["awq", "gptq"],
127
- "minVramGb": 37,
128
- "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
118
+ "minVramGb": 84,
119
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
129
120
  },
130
- "Qwen/Qwen-72B*": {
121
+ "Qwen/Qwen2.5-72B*": {
131
122
  "parameterCount": 72710410240,
132
123
  "defaultDtype": "bfloat16",
133
- "architecture": "QWenLMHeadModel",
134
- "maxPositionEmbeddings": 32768,
135
- "recommendedQuantizations": ["awq", "gptq"],
124
+ "architecture": "Qwen2ForCausalLM",
125
+ "maxPositionEmbeddings": 131072,
126
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
136
127
  "minVramGb": 190,
137
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
128
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
138
129
  },
139
- "Qwen/Qwen2-72B*": {
140
- "parameterCount": 72710410240,
130
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B*": {
131
+ "parameterCount": 1500000000,
141
132
  "defaultDtype": "bfloat16",
142
133
  "architecture": "Qwen2ForCausalLM",
143
- "maxPositionEmbeddings": 32768,
134
+ "maxPositionEmbeddings": 131072,
144
135
  "recommendedQuantizations": ["awq", "gptq"],
145
- "minVramGb": 190,
146
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
147
- },
148
- "EleutherAI/gpt-neox-20b*": {
149
- "parameterCount": 20554568704,
150
- "defaultDtype": "float16",
151
- "architecture": "GPTNeoXForCausalLM",
152
- "maxPositionEmbeddings": 2048,
153
- "recommendedQuantizations": ["gptq"],
154
- "minVramGb": 54,
136
+ "minVramGb": 5,
137
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
138
+ },
139
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B*": {
140
+ "parameterCount": 7000000000,
141
+ "defaultDtype": "bfloat16",
142
+ "architecture": "Qwen2ForCausalLM",
143
+ "maxPositionEmbeddings": 131072,
144
+ "recommendedQuantizations": ["awq", "gptq"],
145
+ "minVramGb": 18,
146
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
147
+ },
148
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B*": {
149
+ "parameterCount": 14000000000,
150
+ "defaultDtype": "bfloat16",
151
+ "architecture": "Qwen2ForCausalLM",
152
+ "maxPositionEmbeddings": 131072,
153
+ "recommendedQuantizations": ["awq", "gptq"],
154
+ "minVramGb": 37,
155
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
156
+ },
157
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B*": {
158
+ "parameterCount": 32000000000,
159
+ "defaultDtype": "bfloat16",
160
+ "architecture": "Qwen2ForCausalLM",
161
+ "maxPositionEmbeddings": 131072,
162
+ "recommendedQuantizations": ["awq", "gptq"],
163
+ "minVramGb": 84,
164
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
165
+ },
166
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B*": {
167
+ "parameterCount": 8000000000,
168
+ "defaultDtype": "bfloat16",
169
+ "architecture": "LlamaForCausalLM",
170
+ "maxPositionEmbeddings": 131072,
171
+ "recommendedQuantizations": ["awq", "gptq"],
172
+ "minVramGb": 20,
173
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
174
+ },
175
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B*": {
176
+ "parameterCount": 70000000000,
177
+ "defaultDtype": "bfloat16",
178
+ "architecture": "LlamaForCausalLM",
179
+ "maxPositionEmbeddings": 131072,
180
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
181
+ "minVramGb": 184,
182
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
183
+ },
184
+ "openai/gpt-oss-20b*": {
185
+ "parameterCount": 20000000000,
186
+ "defaultDtype": "bfloat16",
187
+ "architecture": "GPT2LMHeadModel",
188
+ "maxPositionEmbeddings": 8192,
189
+ "recommendedQuantizations": ["awq", "gptq"],
190
+ "minVramGb": 52,
155
191
  "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
192
+ },
193
+ "openai/gpt-oss-120b*": {
194
+ "parameterCount": 120000000000,
195
+ "defaultDtype": "bfloat16",
196
+ "architecture": "GPT2LMHeadModel",
197
+ "maxPositionEmbeddings": 8192,
198
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
199
+ "minVramGb": 312,
200
+ "recommendedInstances": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
156
201
  }
157
202
  }
158
203
  }