@aws/ml-container-creator 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/bootstrap-stack.json +40 -9
- package/infra/ci-harness/package-lock.json +5 -1
- package/package.json +1 -1
- package/servers/instance-sizer/index.js +4 -4
- package/servers/instance-sizer/lib/model-resolver.js +1 -1
- package/servers/lib/catalogs/model-sizes.json +135 -90
- package/servers/lib/catalogs/models.json +483 -411
- package/src/lib/bootstrap-command-handler.js +6 -0
- package/src/lib/cli-handler.js +1 -1
- package/src/lib/config-manager.js +1 -1
- package/src/lib/mcp-client.js +3 -3
- package/src/lib/prompt-runner.js +5 -5
- package/src/lib/prompts.js +31 -5
- package/templates/do/adapter +21 -5
- package/templates/do/config +5 -4
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
"Type": "String",
|
|
8
8
|
"Default": "false",
|
|
9
9
|
"AllowedValues": ["true", "false"],
|
|
10
|
-
"Description": "Whether to create S3 buckets for async inference
|
|
10
|
+
"Description": "Whether to create S3 buckets for async inference, batch transform, adapters, and benchmarks"
|
|
11
11
|
},
|
|
12
12
|
"UseExistingRoleArn": {
|
|
13
13
|
"Type": "String",
|
|
@@ -134,8 +134,8 @@
|
|
|
134
134
|
"s3:ListBucket"
|
|
135
135
|
],
|
|
136
136
|
"Resource": [
|
|
137
|
-
"arn:aws:s3:::
|
|
138
|
-
"arn:aws:s3:::
|
|
137
|
+
"arn:aws:s3:::mlcc-*",
|
|
138
|
+
"arn:aws:s3:::mlcc-*/*"
|
|
139
139
|
]
|
|
140
140
|
},
|
|
141
141
|
{
|
|
@@ -209,7 +209,7 @@
|
|
|
209
209
|
"DeletionPolicy": "Retain",
|
|
210
210
|
"UpdateReplacePolicy": "Retain",
|
|
211
211
|
"Properties": {
|
|
212
|
-
"BucketName": { "Fn::Sub": "
|
|
212
|
+
"BucketName": { "Fn::Sub": "mlcc-async-${AWS::AccountId}-${AWS::Region}" },
|
|
213
213
|
"VersioningConfiguration": { "Status": "Enabled" },
|
|
214
214
|
"BucketEncryption": {
|
|
215
215
|
"ServerSideEncryptionConfiguration": [
|
|
@@ -218,7 +218,8 @@
|
|
|
218
218
|
},
|
|
219
219
|
"Tags": [
|
|
220
220
|
{ "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
|
|
221
|
-
{ "Key": "mlcc:created-by", "Value": "bootstrap" }
|
|
221
|
+
{ "Key": "mlcc:created-by", "Value": "bootstrap" },
|
|
222
|
+
{ "Key": "mlcc:purpose", "Value": "async-inference-output" }
|
|
222
223
|
]
|
|
223
224
|
}
|
|
224
225
|
},
|
|
@@ -229,7 +230,7 @@
|
|
|
229
230
|
"DeletionPolicy": "Retain",
|
|
230
231
|
"UpdateReplacePolicy": "Retain",
|
|
231
232
|
"Properties": {
|
|
232
|
-
"BucketName": { "Fn::Sub": "
|
|
233
|
+
"BucketName": { "Fn::Sub": "mlcc-batch-${AWS::AccountId}-${AWS::Region}" },
|
|
233
234
|
"VersioningConfiguration": { "Status": "Enabled" },
|
|
234
235
|
"BucketEncryption": {
|
|
235
236
|
"ServerSideEncryptionConfiguration": [
|
|
@@ -238,17 +239,40 @@
|
|
|
238
239
|
},
|
|
239
240
|
"Tags": [
|
|
240
241
|
{ "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
|
|
241
|
-
{ "Key": "mlcc:created-by", "Value": "bootstrap" }
|
|
242
|
+
{ "Key": "mlcc:created-by", "Value": "bootstrap" },
|
|
243
|
+
{ "Key": "mlcc:purpose", "Value": "batch-transform-io" }
|
|
244
|
+
]
|
|
245
|
+
}
|
|
246
|
+
},
|
|
247
|
+
|
|
248
|
+
"AdapterS3Bucket": {
|
|
249
|
+
"Type": "AWS::S3::Bucket",
|
|
250
|
+
"Condition": "ShouldCreateS3Buckets",
|
|
251
|
+
"DeletionPolicy": "Retain",
|
|
252
|
+
"UpdateReplacePolicy": "Retain",
|
|
253
|
+
"Properties": {
|
|
254
|
+
"BucketName": { "Fn::Sub": "mlcc-adapters-${AWS::AccountId}-${AWS::Region}" },
|
|
255
|
+
"VersioningConfiguration": { "Status": "Enabled" },
|
|
256
|
+
"BucketEncryption": {
|
|
257
|
+
"ServerSideEncryptionConfiguration": [
|
|
258
|
+
{ "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
|
|
259
|
+
]
|
|
260
|
+
},
|
|
261
|
+
"Tags": [
|
|
262
|
+
{ "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
|
|
263
|
+
{ "Key": "mlcc:created-by", "Value": "bootstrap" },
|
|
264
|
+
{ "Key": "mlcc:purpose", "Value": "lora-adapter-storage" }
|
|
242
265
|
]
|
|
243
266
|
}
|
|
244
267
|
},
|
|
245
268
|
|
|
246
269
|
"BenchmarkS3Bucket": {
|
|
247
270
|
"Type": "AWS::S3::Bucket",
|
|
271
|
+
"Condition": "ShouldCreateS3Buckets",
|
|
248
272
|
"DeletionPolicy": "Retain",
|
|
249
273
|
"UpdateReplacePolicy": "Retain",
|
|
250
274
|
"Properties": {
|
|
251
|
-
"BucketName": { "Fn::Sub": "
|
|
275
|
+
"BucketName": { "Fn::Sub": "mlcc-benchmark-${AWS::AccountId}-${AWS::Region}" },
|
|
252
276
|
"VersioningConfiguration": { "Status": "Enabled" },
|
|
253
277
|
"BucketEncryption": {
|
|
254
278
|
"ServerSideEncryptionConfiguration": [
|
|
@@ -257,7 +281,8 @@
|
|
|
257
281
|
},
|
|
258
282
|
"Tags": [
|
|
259
283
|
{ "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
|
|
260
|
-
{ "Key": "mlcc:created-by", "Value": "bootstrap" }
|
|
284
|
+
{ "Key": "mlcc:created-by", "Value": "bootstrap" },
|
|
285
|
+
{ "Key": "mlcc:purpose", "Value": "benchmark-results" }
|
|
261
286
|
]
|
|
262
287
|
}
|
|
263
288
|
}
|
|
@@ -292,7 +317,13 @@
|
|
|
292
317
|
"Description": "S3 bucket for batch transform I/O",
|
|
293
318
|
"Value": { "Ref": "BatchS3Bucket" }
|
|
294
319
|
},
|
|
320
|
+
"AdapterS3BucketName": {
|
|
321
|
+
"Condition": "ShouldCreateS3Buckets",
|
|
322
|
+
"Description": "S3 bucket for LoRA adapter storage",
|
|
323
|
+
"Value": { "Ref": "AdapterS3Bucket" }
|
|
324
|
+
},
|
|
295
325
|
"BenchmarkS3BucketName": {
|
|
326
|
+
"Condition": "ShouldCreateS3Buckets",
|
|
296
327
|
"Description": "S3 bucket for benchmark results output",
|
|
297
328
|
"Value": { "Ref": "BenchmarkS3Bucket" }
|
|
298
329
|
},
|
|
@@ -48,6 +48,7 @@
|
|
|
48
48
|
"semver"
|
|
49
49
|
],
|
|
50
50
|
"license": "Apache-2.0",
|
|
51
|
+
"peer": true,
|
|
51
52
|
"dependencies": {
|
|
52
53
|
"jsonschema": "~1.4.1",
|
|
53
54
|
"semver": "^7.7.4"
|
|
@@ -2150,6 +2151,7 @@
|
|
|
2150
2151
|
"integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
|
|
2151
2152
|
"dev": true,
|
|
2152
2153
|
"license": "MIT",
|
|
2154
|
+
"peer": true,
|
|
2153
2155
|
"dependencies": {
|
|
2154
2156
|
"undici-types": "~6.21.0"
|
|
2155
2157
|
}
|
|
@@ -2789,7 +2791,8 @@
|
|
|
2789
2791
|
"version": "10.6.0",
|
|
2790
2792
|
"resolved": "https://registry.npmjs.org/constructs/-/constructs-10.6.0.tgz",
|
|
2791
2793
|
"integrity": "sha512-TxHOnBO5zMo/G76ykzGF/wMpEHu257TbWiIxP9K0Yv/+t70UzgBQiTqjkAsWOPC6jW91DzJI0+ehQV6xDRNBuQ==",
|
|
2792
|
-
"license": "Apache-2.0"
|
|
2794
|
+
"license": "Apache-2.0",
|
|
2795
|
+
"peer": true
|
|
2793
2796
|
},
|
|
2794
2797
|
"node_modules/create-require": {
|
|
2795
2798
|
"version": "1.1.1",
|
|
@@ -3694,6 +3697,7 @@
|
|
|
3694
3697
|
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
|
|
3695
3698
|
"dev": true,
|
|
3696
3699
|
"license": "Apache-2.0",
|
|
3700
|
+
"peer": true,
|
|
3697
3701
|
"bin": {
|
|
3698
3702
|
"tsc": "bin/tsc",
|
|
3699
3703
|
"tsserver": "bin/tsserver"
|
package/package.json
CHANGED
|
@@ -51,7 +51,7 @@ try {
|
|
|
51
51
|
|
|
52
52
|
// ── Mode configuration ───────────────────────────────────────────────────────
|
|
53
53
|
|
|
54
|
-
const DISCOVER_MODE = process.
|
|
54
|
+
const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
|
|
55
55
|
const SMART_MODE = process.env.BEDROCK_SMART === 'true'
|
|
56
56
|
const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
|
|
57
57
|
const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
|
|
@@ -593,10 +593,10 @@ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
|
|
|
593
593
|
if (isMain) {
|
|
594
594
|
if (SMART_MODE) {
|
|
595
595
|
log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
|
|
596
|
-
} else if (DISCOVER_MODE) {
|
|
597
|
-
log('
|
|
596
|
+
} else if (!DISCOVER_MODE) {
|
|
597
|
+
log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
|
|
598
598
|
} else {
|
|
599
|
-
log('
|
|
599
|
+
log('Discover mode (HuggingFace API + quota lookups active)')
|
|
600
600
|
}
|
|
601
601
|
|
|
602
602
|
const transport = new StdioServerTransport()
|
|
@@ -207,7 +207,7 @@ const isHuggingFacePattern = (modelName) => {
|
|
|
207
207
|
* @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
|
|
208
208
|
*/
|
|
209
209
|
const resolveModelMetadata = async (modelName, options = {}) => {
|
|
210
|
-
const { discover =
|
|
210
|
+
const { discover = true, catalogPath } = options
|
|
211
211
|
|
|
212
212
|
// Tier 1: Catalog lookup
|
|
213
213
|
const catalog = await loadCatalog(catalogPath)
|
|
@@ -1,50 +1,23 @@
|
|
|
1
1
|
{
|
|
2
2
|
"catalogVersion": "1.0.0",
|
|
3
3
|
"models": {
|
|
4
|
-
"meta-llama/Llama-2-
|
|
5
|
-
"parameterCount":
|
|
6
|
-
"defaultDtype": "float16",
|
|
7
|
-
"architecture": "LlamaForCausalLM",
|
|
8
|
-
"maxPositionEmbeddings": 4096,
|
|
9
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
10
|
-
"minVramGb": 18,
|
|
11
|
-
"recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
12
|
-
},
|
|
13
|
-
"meta-llama/Llama-2-13b*": {
|
|
14
|
-
"parameterCount": 13015864320,
|
|
15
|
-
"defaultDtype": "float16",
|
|
16
|
-
"architecture": "LlamaForCausalLM",
|
|
17
|
-
"maxPositionEmbeddings": 4096,
|
|
18
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
19
|
-
"minVramGb": 34,
|
|
20
|
-
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
21
|
-
},
|
|
22
|
-
"meta-llama/Llama-2-70b*": {
|
|
23
|
-
"parameterCount": 68976648192,
|
|
24
|
-
"defaultDtype": "float16",
|
|
25
|
-
"architecture": "LlamaForCausalLM",
|
|
26
|
-
"maxPositionEmbeddings": 4096,
|
|
27
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
28
|
-
"minVramGb": 180,
|
|
29
|
-
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
30
|
-
},
|
|
31
|
-
"meta-llama/Meta-Llama-3-8B*": {
|
|
32
|
-
"parameterCount": 8030261248,
|
|
4
|
+
"meta-llama/Llama-3.2-1B*": {
|
|
5
|
+
"parameterCount": 1235814400,
|
|
33
6
|
"defaultDtype": "bfloat16",
|
|
34
7
|
"architecture": "LlamaForCausalLM",
|
|
35
|
-
"maxPositionEmbeddings":
|
|
8
|
+
"maxPositionEmbeddings": 131072,
|
|
36
9
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
37
|
-
"minVramGb":
|
|
38
|
-
"recommendedInstances": ["ml.g5.
|
|
10
|
+
"minVramGb": 5,
|
|
11
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
39
12
|
},
|
|
40
|
-
"meta-llama/
|
|
41
|
-
"parameterCount":
|
|
13
|
+
"meta-llama/Llama-3.2-3B*": {
|
|
14
|
+
"parameterCount": 3212749824,
|
|
42
15
|
"defaultDtype": "bfloat16",
|
|
43
16
|
"architecture": "LlamaForCausalLM",
|
|
44
|
-
"maxPositionEmbeddings":
|
|
17
|
+
"maxPositionEmbeddings": 131072,
|
|
45
18
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
46
|
-
"minVramGb":
|
|
47
|
-
"recommendedInstances": ["ml.g5.
|
|
19
|
+
"minVramGb": 9,
|
|
20
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
48
21
|
},
|
|
49
22
|
"meta-llama/Llama-3.1-8B*": {
|
|
50
23
|
"parameterCount": 8030261248,
|
|
@@ -55,104 +28,176 @@
|
|
|
55
28
|
"minVramGb": 20,
|
|
56
29
|
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
57
30
|
},
|
|
58
|
-
"meta-llama/Llama-3.
|
|
59
|
-
"parameterCount":
|
|
31
|
+
"meta-llama/Llama-3.3-70B*": {
|
|
32
|
+
"parameterCount": 70553706496,
|
|
60
33
|
"defaultDtype": "bfloat16",
|
|
61
34
|
"architecture": "LlamaForCausalLM",
|
|
62
35
|
"maxPositionEmbeddings": 131072,
|
|
36
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
37
|
+
"minVramGb": 184,
|
|
38
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
39
|
+
},
|
|
40
|
+
"Qwen/Qwen3-0.6B*": {
|
|
41
|
+
"parameterCount": 600000000,
|
|
42
|
+
"defaultDtype": "bfloat16",
|
|
43
|
+
"architecture": "Qwen3ForCausalLM",
|
|
44
|
+
"maxPositionEmbeddings": 32768,
|
|
63
45
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
64
|
-
"minVramGb":
|
|
46
|
+
"minVramGb": 3,
|
|
65
47
|
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
66
48
|
},
|
|
67
|
-
"
|
|
68
|
-
"parameterCount":
|
|
49
|
+
"Qwen/Qwen3-1.7B*": {
|
|
50
|
+
"parameterCount": 1700000000,
|
|
69
51
|
"defaultDtype": "bfloat16",
|
|
70
|
-
"architecture": "
|
|
71
|
-
"maxPositionEmbeddings":
|
|
52
|
+
"architecture": "Qwen3ForCausalLM",
|
|
53
|
+
"maxPositionEmbeddings": 32768,
|
|
72
54
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
73
|
-
"minVramGb":
|
|
55
|
+
"minVramGb": 6,
|
|
74
56
|
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
75
57
|
},
|
|
76
|
-
"
|
|
77
|
-
"parameterCount":
|
|
58
|
+
"Qwen/Qwen3-4B*": {
|
|
59
|
+
"parameterCount": 4000000000,
|
|
78
60
|
"defaultDtype": "bfloat16",
|
|
79
|
-
"architecture": "
|
|
61
|
+
"architecture": "Qwen3ForCausalLM",
|
|
80
62
|
"maxPositionEmbeddings": 32768,
|
|
81
63
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
82
|
-
"minVramGb":
|
|
83
|
-
"recommendedInstances": ["ml.g5.
|
|
64
|
+
"minVramGb": 11,
|
|
65
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
84
66
|
},
|
|
85
|
-
"
|
|
86
|
-
"parameterCount":
|
|
67
|
+
"Qwen/Qwen3-8B*": {
|
|
68
|
+
"parameterCount": 8000000000,
|
|
87
69
|
"defaultDtype": "bfloat16",
|
|
88
|
-
"architecture": "
|
|
70
|
+
"architecture": "Qwen3ForCausalLM",
|
|
89
71
|
"maxPositionEmbeddings": 32768,
|
|
90
72
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
91
|
-
"minVramGb":
|
|
92
|
-
"recommendedInstances": ["ml.g5.
|
|
73
|
+
"minVramGb": 20,
|
|
74
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
93
75
|
},
|
|
94
|
-
"Qwen/
|
|
95
|
-
"parameterCount":
|
|
76
|
+
"Qwen/Qwen3-14B*": {
|
|
77
|
+
"parameterCount": 14000000000,
|
|
96
78
|
"defaultDtype": "bfloat16",
|
|
97
|
-
"architecture": "
|
|
98
|
-
"maxPositionEmbeddings":
|
|
79
|
+
"architecture": "Qwen3ForCausalLM",
|
|
80
|
+
"maxPositionEmbeddings": 32768,
|
|
99
81
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
100
|
-
"minVramGb":
|
|
101
|
-
"recommendedInstances": ["ml.g5.
|
|
82
|
+
"minVramGb": 37,
|
|
83
|
+
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
84
|
+
},
|
|
85
|
+
"Qwen/Qwen3-32B*": {
|
|
86
|
+
"parameterCount": 32000000000,
|
|
87
|
+
"defaultDtype": "bfloat16",
|
|
88
|
+
"architecture": "Qwen3ForCausalLM",
|
|
89
|
+
"maxPositionEmbeddings": 32768,
|
|
90
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
91
|
+
"minVramGb": 84,
|
|
92
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
102
93
|
},
|
|
103
|
-
"Qwen/Qwen2-7B*": {
|
|
94
|
+
"Qwen/Qwen2.5-7B*": {
|
|
104
95
|
"parameterCount": 7721324544,
|
|
105
96
|
"defaultDtype": "bfloat16",
|
|
106
97
|
"architecture": "Qwen2ForCausalLM",
|
|
107
|
-
"maxPositionEmbeddings":
|
|
98
|
+
"maxPositionEmbeddings": 131072,
|
|
108
99
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
109
100
|
"minVramGb": 20,
|
|
110
|
-
"recommendedInstances": ["ml.g5.2xlarge", "ml.
|
|
101
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
111
102
|
},
|
|
112
|
-
"Qwen/
|
|
103
|
+
"Qwen/Qwen2.5-14B*": {
|
|
113
104
|
"parameterCount": 14167134208,
|
|
114
105
|
"defaultDtype": "bfloat16",
|
|
115
|
-
"architecture": "
|
|
116
|
-
"maxPositionEmbeddings":
|
|
106
|
+
"architecture": "Qwen2ForCausalLM",
|
|
107
|
+
"maxPositionEmbeddings": 131072,
|
|
117
108
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
118
109
|
"minVramGb": 37,
|
|
119
110
|
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
120
111
|
},
|
|
121
|
-
"Qwen/Qwen2-
|
|
122
|
-
"parameterCount":
|
|
112
|
+
"Qwen/Qwen2.5-32B*": {
|
|
113
|
+
"parameterCount": 32000000000,
|
|
123
114
|
"defaultDtype": "bfloat16",
|
|
124
115
|
"architecture": "Qwen2ForCausalLM",
|
|
125
|
-
"maxPositionEmbeddings":
|
|
116
|
+
"maxPositionEmbeddings": 131072,
|
|
126
117
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
127
|
-
"minVramGb":
|
|
128
|
-
"recommendedInstances": ["ml.g5.
|
|
118
|
+
"minVramGb": 84,
|
|
119
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
129
120
|
},
|
|
130
|
-
"Qwen/
|
|
121
|
+
"Qwen/Qwen2.5-72B*": {
|
|
131
122
|
"parameterCount": 72710410240,
|
|
132
123
|
"defaultDtype": "bfloat16",
|
|
133
|
-
"architecture": "
|
|
134
|
-
"maxPositionEmbeddings":
|
|
135
|
-
"recommendedQuantizations": ["awq", "gptq"],
|
|
124
|
+
"architecture": "Qwen2ForCausalLM",
|
|
125
|
+
"maxPositionEmbeddings": 131072,
|
|
126
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
136
127
|
"minVramGb": 190,
|
|
137
|
-
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
|
|
128
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
138
129
|
},
|
|
139
|
-
"
|
|
140
|
-
"parameterCount":
|
|
130
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B*": {
|
|
131
|
+
"parameterCount": 1500000000,
|
|
141
132
|
"defaultDtype": "bfloat16",
|
|
142
133
|
"architecture": "Qwen2ForCausalLM",
|
|
143
|
-
"maxPositionEmbeddings":
|
|
134
|
+
"maxPositionEmbeddings": 131072,
|
|
144
135
|
"recommendedQuantizations": ["awq", "gptq"],
|
|
145
|
-
"minVramGb":
|
|
146
|
-
"recommendedInstances": ["ml.g5.
|
|
147
|
-
},
|
|
148
|
-
"
|
|
149
|
-
"parameterCount":
|
|
150
|
-
"defaultDtype": "
|
|
151
|
-
"architecture": "
|
|
152
|
-
"maxPositionEmbeddings":
|
|
153
|
-
"recommendedQuantizations": ["gptq"],
|
|
154
|
-
"minVramGb":
|
|
136
|
+
"minVramGb": 5,
|
|
137
|
+
"recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
|
|
138
|
+
},
|
|
139
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B*": {
|
|
140
|
+
"parameterCount": 7000000000,
|
|
141
|
+
"defaultDtype": "bfloat16",
|
|
142
|
+
"architecture": "Qwen2ForCausalLM",
|
|
143
|
+
"maxPositionEmbeddings": 131072,
|
|
144
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
145
|
+
"minVramGb": 18,
|
|
146
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
147
|
+
},
|
|
148
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B*": {
|
|
149
|
+
"parameterCount": 14000000000,
|
|
150
|
+
"defaultDtype": "bfloat16",
|
|
151
|
+
"architecture": "Qwen2ForCausalLM",
|
|
152
|
+
"maxPositionEmbeddings": 131072,
|
|
153
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
154
|
+
"minVramGb": 37,
|
|
155
|
+
"recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
156
|
+
},
|
|
157
|
+
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B*": {
|
|
158
|
+
"parameterCount": 32000000000,
|
|
159
|
+
"defaultDtype": "bfloat16",
|
|
160
|
+
"architecture": "Qwen2ForCausalLM",
|
|
161
|
+
"maxPositionEmbeddings": 131072,
|
|
162
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
163
|
+
"minVramGb": 84,
|
|
164
|
+
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
165
|
+
},
|
|
166
|
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B*": {
|
|
167
|
+
"parameterCount": 8000000000,
|
|
168
|
+
"defaultDtype": "bfloat16",
|
|
169
|
+
"architecture": "LlamaForCausalLM",
|
|
170
|
+
"maxPositionEmbeddings": 131072,
|
|
171
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
172
|
+
"minVramGb": 20,
|
|
173
|
+
"recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
|
|
174
|
+
},
|
|
175
|
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B*": {
|
|
176
|
+
"parameterCount": 70000000000,
|
|
177
|
+
"defaultDtype": "bfloat16",
|
|
178
|
+
"architecture": "LlamaForCausalLM",
|
|
179
|
+
"maxPositionEmbeddings": 131072,
|
|
180
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
181
|
+
"minVramGb": 184,
|
|
182
|
+
"recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
|
|
183
|
+
},
|
|
184
|
+
"openai/gpt-oss-20b*": {
|
|
185
|
+
"parameterCount": 20000000000,
|
|
186
|
+
"defaultDtype": "bfloat16",
|
|
187
|
+
"architecture": "GPT2LMHeadModel",
|
|
188
|
+
"maxPositionEmbeddings": 8192,
|
|
189
|
+
"recommendedQuantizations": ["awq", "gptq"],
|
|
190
|
+
"minVramGb": 52,
|
|
155
191
|
"recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
192
|
+
},
|
|
193
|
+
"openai/gpt-oss-120b*": {
|
|
194
|
+
"parameterCount": 120000000000,
|
|
195
|
+
"defaultDtype": "bfloat16",
|
|
196
|
+
"architecture": "GPT2LMHeadModel",
|
|
197
|
+
"maxPositionEmbeddings": 8192,
|
|
198
|
+
"recommendedQuantizations": ["awq", "gptq", "fp8"],
|
|
199
|
+
"minVramGb": 312,
|
|
200
|
+
"recommendedInstances": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
|
|
156
201
|
}
|
|
157
202
|
}
|
|
158
203
|
}
|