@aws/ml-container-creator 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@
7
7
  "Type": "String",
8
8
  "Default": "false",
9
9
  "AllowedValues": ["true", "false"],
10
- "Description": "Whether to create S3 buckets for async inference and batch transform"
10
+ "Description": "Whether to create S3 buckets for async inference, batch transform, adapters, and benchmarks"
11
11
  },
12
12
  "UseExistingRoleArn": {
13
13
  "Type": "String",
@@ -134,8 +134,8 @@
134
134
  "s3:ListBucket"
135
135
  ],
136
136
  "Resource": [
137
- "arn:aws:s3:::ml-container-creator-*",
138
- "arn:aws:s3:::ml-container-creator-*/*"
137
+ "arn:aws:s3:::mlcc-*",
138
+ "arn:aws:s3:::mlcc-*/*"
139
139
  ]
140
140
  },
141
141
  {
@@ -209,7 +209,7 @@
209
209
  "DeletionPolicy": "Retain",
210
210
  "UpdateReplacePolicy": "Retain",
211
211
  "Properties": {
212
- "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-async" },
212
+ "BucketName": { "Fn::Sub": "mlcc-async-${AWS::AccountId}-${AWS::Region}" },
213
213
  "VersioningConfiguration": { "Status": "Enabled" },
214
214
  "BucketEncryption": {
215
215
  "ServerSideEncryptionConfiguration": [
@@ -218,7 +218,8 @@
218
218
  },
219
219
  "Tags": [
220
220
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
221
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
221
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
222
+ { "Key": "mlcc:purpose", "Value": "async-inference-output" }
222
223
  ]
223
224
  }
224
225
  },
@@ -229,7 +230,7 @@
229
230
  "DeletionPolicy": "Retain",
230
231
  "UpdateReplacePolicy": "Retain",
231
232
  "Properties": {
232
- "BucketName": { "Fn::Sub": "${AWS::AccountId}-${AWS::Region}-ml-container-creator-batch" },
233
+ "BucketName": { "Fn::Sub": "mlcc-batch-${AWS::AccountId}-${AWS::Region}" },
233
234
  "VersioningConfiguration": { "Status": "Enabled" },
234
235
  "BucketEncryption": {
235
236
  "ServerSideEncryptionConfiguration": [
@@ -238,17 +239,40 @@
238
239
  },
239
240
  "Tags": [
240
241
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
241
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
242
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
243
+ { "Key": "mlcc:purpose", "Value": "batch-transform-io" }
244
+ ]
245
+ }
246
+ },
247
+
248
+ "AdapterS3Bucket": {
249
+ "Type": "AWS::S3::Bucket",
250
+ "Condition": "ShouldCreateS3Buckets",
251
+ "DeletionPolicy": "Retain",
252
+ "UpdateReplacePolicy": "Retain",
253
+ "Properties": {
254
+ "BucketName": { "Fn::Sub": "mlcc-adapters-${AWS::AccountId}-${AWS::Region}" },
255
+ "VersioningConfiguration": { "Status": "Enabled" },
256
+ "BucketEncryption": {
257
+ "ServerSideEncryptionConfiguration": [
258
+ { "ServerSideEncryptionByDefault": { "SSEAlgorithm": "AES256" } }
259
+ ]
260
+ },
261
+ "Tags": [
262
+ { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
263
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
264
+ { "Key": "mlcc:purpose", "Value": "lora-adapter-storage" }
242
265
  ]
243
266
  }
244
267
  },
245
268
 
246
269
  "BenchmarkS3Bucket": {
247
270
  "Type": "AWS::S3::Bucket",
271
+ "Condition": "ShouldCreateS3Buckets",
248
272
  "DeletionPolicy": "Retain",
249
273
  "UpdateReplacePolicy": "Retain",
250
274
  "Properties": {
251
- "BucketName": { "Fn::Sub": "ml-container-creator-benchmark-${AWS::Region}-${AWS::AccountId}" },
275
+ "BucketName": { "Fn::Sub": "mlcc-benchmark-${AWS::AccountId}-${AWS::Region}" },
252
276
  "VersioningConfiguration": { "Status": "Enabled" },
253
277
  "BucketEncryption": {
254
278
  "ServerSideEncryptionConfiguration": [
@@ -257,7 +281,8 @@
257
281
  },
258
282
  "Tags": [
259
283
  { "Key": "mlcc:managed-by", "Value": "ml-container-creator" },
260
- { "Key": "mlcc:created-by", "Value": "bootstrap" }
284
+ { "Key": "mlcc:created-by", "Value": "bootstrap" },
285
+ { "Key": "mlcc:purpose", "Value": "benchmark-results" }
261
286
  ]
262
287
  }
263
288
  }
@@ -292,7 +317,13 @@
292
317
  "Description": "S3 bucket for batch transform I/O",
293
318
  "Value": { "Ref": "BatchS3Bucket" }
294
319
  },
320
+ "AdapterS3BucketName": {
321
+ "Condition": "ShouldCreateS3Buckets",
322
+ "Description": "S3 bucket for LoRA adapter storage",
323
+ "Value": { "Ref": "AdapterS3Bucket" }
324
+ },
295
325
  "BenchmarkS3BucketName": {
326
+ "Condition": "ShouldCreateS3Buckets",
296
327
  "Description": "S3 bucket for benchmark results output",
297
328
  "Value": { "Ref": "BenchmarkS3Bucket" }
298
329
  },
@@ -48,6 +48,7 @@
48
48
  "semver"
49
49
  ],
50
50
  "license": "Apache-2.0",
51
+ "peer": true,
51
52
  "dependencies": {
52
53
  "jsonschema": "~1.4.1",
53
54
  "semver": "^7.7.4"
@@ -2150,6 +2151,7 @@
2150
2151
  "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==",
2151
2152
  "dev": true,
2152
2153
  "license": "MIT",
2154
+ "peer": true,
2153
2155
  "dependencies": {
2154
2156
  "undici-types": "~6.21.0"
2155
2157
  }
@@ -2789,7 +2791,8 @@
2789
2791
  "version": "10.6.0",
2790
2792
  "resolved": "https://registry.npmjs.org/constructs/-/constructs-10.6.0.tgz",
2791
2793
  "integrity": "sha512-TxHOnBO5zMo/G76ykzGF/wMpEHu257TbWiIxP9K0Yv/+t70UzgBQiTqjkAsWOPC6jW91DzJI0+ehQV6xDRNBuQ==",
2792
- "license": "Apache-2.0"
2794
+ "license": "Apache-2.0",
2795
+ "peer": true
2793
2796
  },
2794
2797
  "node_modules/create-require": {
2795
2798
  "version": "1.1.1",
@@ -3694,6 +3697,7 @@
3694
3697
  "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
3695
3698
  "dev": true,
3696
3699
  "license": "Apache-2.0",
3700
+ "peer": true,
3697
3701
  "bin": {
3698
3702
  "tsc": "bin/tsc",
3699
3703
  "tsserver": "bin/tsserver"
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aws/ml-container-creator",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Generator for SageMaker AI BYOC paradigm for predictive inference use-cases.",
5
5
  "type": "module",
6
6
  "main": "src/app.js",
@@ -51,7 +51,7 @@ try {
51
51
 
52
52
  // ── Mode configuration ───────────────────────────────────────────────────────
53
53
 
54
- const DISCOVER_MODE = process.argv.includes('--discover') || process.env.DISCOVER_MODE === 'true'
54
+ const DISCOVER_MODE = process.env.DISCOVER_MODE !== 'false' && !process.argv.includes('--no-discover')
55
55
  const SMART_MODE = process.env.BEDROCK_SMART === 'true'
56
56
  const BEDROCK_MODEL = process.env.BEDROCK_MODEL || 'global.anthropic.claude-sonnet-4-20250514-v1:0'
57
57
  const BEDROCK_REGION = process.env.BEDROCK_REGION || process.env.AWS_REGION || 'us-east-1'
@@ -593,10 +593,10 @@ const isMain = process.argv[1] && resolve(process.argv[1]) === __filename
593
593
  if (isMain) {
594
594
  if (SMART_MODE) {
595
595
  log(`Smart mode enabled (model: ${BEDROCK_MODEL}, region: ${BEDROCK_REGION})`)
596
- } else if (DISCOVER_MODE) {
597
- log('Discover mode enabled (HuggingFace API lookups active)')
596
+ } else if (!DISCOVER_MODE) {
597
+ log('Static mode (catalog-only, no network calls) — use --no-discover to force this')
598
598
  } else {
599
- log('Static mode (catalog-only, no network calls)')
599
+ log('Discover mode (HuggingFace API + quota lookups active)')
600
600
  }
601
601
 
602
602
  const transport = new StdioServerTransport()
@@ -207,7 +207,7 @@ const isHuggingFacePattern = (modelName) => {
207
207
  * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
208
208
  */
209
209
  const resolveModelMetadata = async (modelName, options = {}) => {
210
- const { discover = false, catalogPath } = options
210
+ const { discover = true, catalogPath } = options
211
211
 
212
212
  // Tier 1: Catalog lookup
213
213
  const catalog = await loadCatalog(catalogPath)
@@ -1,50 +1,23 @@
1
1
  {
2
2
  "catalogVersion": "1.0.0",
3
3
  "models": {
4
- "meta-llama/Llama-2-7b*": {
5
- "parameterCount": 6738415616,
6
- "defaultDtype": "float16",
7
- "architecture": "LlamaForCausalLM",
8
- "maxPositionEmbeddings": 4096,
9
- "recommendedQuantizations": ["awq", "gptq"],
10
- "minVramGb": 18,
11
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
12
- },
13
- "meta-llama/Llama-2-13b*": {
14
- "parameterCount": 13015864320,
15
- "defaultDtype": "float16",
16
- "architecture": "LlamaForCausalLM",
17
- "maxPositionEmbeddings": 4096,
18
- "recommendedQuantizations": ["awq", "gptq"],
19
- "minVramGb": 34,
20
- "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
21
- },
22
- "meta-llama/Llama-2-70b*": {
23
- "parameterCount": 68976648192,
24
- "defaultDtype": "float16",
25
- "architecture": "LlamaForCausalLM",
26
- "maxPositionEmbeddings": 4096,
27
- "recommendedQuantizations": ["awq", "gptq"],
28
- "minVramGb": 180,
29
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
30
- },
31
- "meta-llama/Meta-Llama-3-8B*": {
32
- "parameterCount": 8030261248,
4
+ "meta-llama/Llama-3.2-1B*": {
5
+ "parameterCount": 1235814400,
33
6
  "defaultDtype": "bfloat16",
34
7
  "architecture": "LlamaForCausalLM",
35
- "maxPositionEmbeddings": 8192,
8
+ "maxPositionEmbeddings": 131072,
36
9
  "recommendedQuantizations": ["awq", "gptq"],
37
- "minVramGb": 21,
38
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
10
+ "minVramGb": 5,
11
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
39
12
  },
40
- "meta-llama/Meta-Llama-3-70B*": {
41
- "parameterCount": 70553706496,
13
+ "meta-llama/Llama-3.2-3B*": {
14
+ "parameterCount": 3212749824,
42
15
  "defaultDtype": "bfloat16",
43
16
  "architecture": "LlamaForCausalLM",
44
- "maxPositionEmbeddings": 8192,
17
+ "maxPositionEmbeddings": 131072,
45
18
  "recommendedQuantizations": ["awq", "gptq"],
46
- "minVramGb": 184,
47
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
19
+ "minVramGb": 9,
20
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
48
21
  },
49
22
  "meta-llama/Llama-3.1-8B*": {
50
23
  "parameterCount": 8030261248,
@@ -55,104 +28,176 @@
55
28
  "minVramGb": 20,
56
29
  "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
57
30
  },
58
- "meta-llama/Llama-3.2-1B*": {
59
- "parameterCount": 1235814400,
31
+ "meta-llama/Llama-3.3-70B*": {
32
+ "parameterCount": 70553706496,
60
33
  "defaultDtype": "bfloat16",
61
34
  "architecture": "LlamaForCausalLM",
62
35
  "maxPositionEmbeddings": 131072,
36
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
37
+ "minVramGb": 184,
38
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
39
+ },
40
+ "Qwen/Qwen3-0.6B*": {
41
+ "parameterCount": 600000000,
42
+ "defaultDtype": "bfloat16",
43
+ "architecture": "Qwen3ForCausalLM",
44
+ "maxPositionEmbeddings": 32768,
63
45
  "recommendedQuantizations": ["awq", "gptq"],
64
- "minVramGb": 5,
46
+ "minVramGb": 3,
65
47
  "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
66
48
  },
67
- "meta-llama/Llama-3.2-3B*": {
68
- "parameterCount": 3212749824,
49
+ "Qwen/Qwen3-1.7B*": {
50
+ "parameterCount": 1700000000,
69
51
  "defaultDtype": "bfloat16",
70
- "architecture": "LlamaForCausalLM",
71
- "maxPositionEmbeddings": 131072,
52
+ "architecture": "Qwen3ForCausalLM",
53
+ "maxPositionEmbeddings": 32768,
72
54
  "recommendedQuantizations": ["awq", "gptq"],
73
- "minVramGb": 9,
55
+ "minVramGb": 6,
74
56
  "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
75
57
  },
76
- "mistralai/Mistral-7B*": {
77
- "parameterCount": 7241732096,
58
+ "Qwen/Qwen3-4B*": {
59
+ "parameterCount": 4000000000,
78
60
  "defaultDtype": "bfloat16",
79
- "architecture": "MistralForCausalLM",
61
+ "architecture": "Qwen3ForCausalLM",
80
62
  "maxPositionEmbeddings": 32768,
81
63
  "recommendedQuantizations": ["awq", "gptq"],
82
- "minVramGb": 19,
83
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
64
+ "minVramGb": 11,
65
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
84
66
  },
85
- "mistralai/Mixtral-8x7B*": {
86
- "parameterCount": 46702792704,
67
+ "Qwen/Qwen3-8B*": {
68
+ "parameterCount": 8000000000,
87
69
  "defaultDtype": "bfloat16",
88
- "architecture": "MixtralForCausalLM",
70
+ "architecture": "Qwen3ForCausalLM",
89
71
  "maxPositionEmbeddings": 32768,
90
72
  "recommendedQuantizations": ["awq", "gptq"],
91
- "minVramGb": 122,
92
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
73
+ "minVramGb": 20,
74
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
93
75
  },
94
- "Qwen/Qwen-7B*": {
95
- "parameterCount": 7721324544,
76
+ "Qwen/Qwen3-14B*": {
77
+ "parameterCount": 14000000000,
96
78
  "defaultDtype": "bfloat16",
97
- "architecture": "QWenLMHeadModel",
98
- "maxPositionEmbeddings": 8192,
79
+ "architecture": "Qwen3ForCausalLM",
80
+ "maxPositionEmbeddings": 32768,
99
81
  "recommendedQuantizations": ["awq", "gptq"],
100
- "minVramGb": 20,
101
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
82
+ "minVramGb": 37,
83
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
84
+ },
85
+ "Qwen/Qwen3-32B*": {
86
+ "parameterCount": 32000000000,
87
+ "defaultDtype": "bfloat16",
88
+ "architecture": "Qwen3ForCausalLM",
89
+ "maxPositionEmbeddings": 32768,
90
+ "recommendedQuantizations": ["awq", "gptq"],
91
+ "minVramGb": 84,
92
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
102
93
  },
103
- "Qwen/Qwen2-7B*": {
94
+ "Qwen/Qwen2.5-7B*": {
104
95
  "parameterCount": 7721324544,
105
96
  "defaultDtype": "bfloat16",
106
97
  "architecture": "Qwen2ForCausalLM",
107
- "maxPositionEmbeddings": 32768,
98
+ "maxPositionEmbeddings": 131072,
108
99
  "recommendedQuantizations": ["awq", "gptq"],
109
100
  "minVramGb": 20,
110
- "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
101
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
111
102
  },
112
- "Qwen/Qwen-14B*": {
103
+ "Qwen/Qwen2.5-14B*": {
113
104
  "parameterCount": 14167134208,
114
105
  "defaultDtype": "bfloat16",
115
- "architecture": "QWenLMHeadModel",
116
- "maxPositionEmbeddings": 8192,
106
+ "architecture": "Qwen2ForCausalLM",
107
+ "maxPositionEmbeddings": 131072,
117
108
  "recommendedQuantizations": ["awq", "gptq"],
118
109
  "minVramGb": 37,
119
110
  "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
120
111
  },
121
- "Qwen/Qwen2-14B*": {
122
- "parameterCount": 14167134208,
112
+ "Qwen/Qwen2.5-32B*": {
113
+ "parameterCount": 32000000000,
123
114
  "defaultDtype": "bfloat16",
124
115
  "architecture": "Qwen2ForCausalLM",
125
- "maxPositionEmbeddings": 32768,
116
+ "maxPositionEmbeddings": 131072,
126
117
  "recommendedQuantizations": ["awq", "gptq"],
127
- "minVramGb": 37,
128
- "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
118
+ "minVramGb": 84,
119
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
129
120
  },
130
- "Qwen/Qwen-72B*": {
121
+ "Qwen/Qwen2.5-72B*": {
131
122
  "parameterCount": 72710410240,
132
123
  "defaultDtype": "bfloat16",
133
- "architecture": "QWenLMHeadModel",
134
- "maxPositionEmbeddings": 32768,
135
- "recommendedQuantizations": ["awq", "gptq"],
124
+ "architecture": "Qwen2ForCausalLM",
125
+ "maxPositionEmbeddings": 131072,
126
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
136
127
  "minVramGb": 190,
137
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
128
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
138
129
  },
139
- "Qwen/Qwen2-72B*": {
140
- "parameterCount": 72710410240,
130
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B*": {
131
+ "parameterCount": 1500000000,
141
132
  "defaultDtype": "bfloat16",
142
133
  "architecture": "Qwen2ForCausalLM",
143
- "maxPositionEmbeddings": 32768,
134
+ "maxPositionEmbeddings": 131072,
144
135
  "recommendedQuantizations": ["awq", "gptq"],
145
- "minVramGb": 190,
146
- "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
147
- },
148
- "EleutherAI/gpt-neox-20b*": {
149
- "parameterCount": 20554568704,
150
- "defaultDtype": "float16",
151
- "architecture": "GPTNeoXForCausalLM",
152
- "maxPositionEmbeddings": 2048,
153
- "recommendedQuantizations": ["gptq"],
154
- "minVramGb": 54,
136
+ "minVramGb": 5,
137
+ "recommendedInstances": ["ml.g5.xlarge", "ml.g6.xlarge"]
138
+ },
139
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B*": {
140
+ "parameterCount": 7000000000,
141
+ "defaultDtype": "bfloat16",
142
+ "architecture": "Qwen2ForCausalLM",
143
+ "maxPositionEmbeddings": 131072,
144
+ "recommendedQuantizations": ["awq", "gptq"],
145
+ "minVramGb": 18,
146
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
147
+ },
148
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B*": {
149
+ "parameterCount": 14000000000,
150
+ "defaultDtype": "bfloat16",
151
+ "architecture": "Qwen2ForCausalLM",
152
+ "maxPositionEmbeddings": 131072,
153
+ "recommendedQuantizations": ["awq", "gptq"],
154
+ "minVramGb": 37,
155
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
156
+ },
157
+ "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B*": {
158
+ "parameterCount": 32000000000,
159
+ "defaultDtype": "bfloat16",
160
+ "architecture": "Qwen2ForCausalLM",
161
+ "maxPositionEmbeddings": 131072,
162
+ "recommendedQuantizations": ["awq", "gptq"],
163
+ "minVramGb": 84,
164
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
165
+ },
166
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B*": {
167
+ "parameterCount": 8000000000,
168
+ "defaultDtype": "bfloat16",
169
+ "architecture": "LlamaForCausalLM",
170
+ "maxPositionEmbeddings": 131072,
171
+ "recommendedQuantizations": ["awq", "gptq"],
172
+ "minVramGb": 20,
173
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g6.2xlarge"]
174
+ },
175
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-70B*": {
176
+ "parameterCount": 70000000000,
177
+ "defaultDtype": "bfloat16",
178
+ "architecture": "LlamaForCausalLM",
179
+ "maxPositionEmbeddings": 131072,
180
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
181
+ "minVramGb": 184,
182
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge", "ml.g6e.48xlarge"]
183
+ },
184
+ "openai/gpt-oss-20b*": {
185
+ "parameterCount": 20000000000,
186
+ "defaultDtype": "bfloat16",
187
+ "architecture": "GPT2LMHeadModel",
188
+ "maxPositionEmbeddings": 8192,
189
+ "recommendedQuantizations": ["awq", "gptq"],
190
+ "minVramGb": 52,
155
191
  "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
192
+ },
193
+ "openai/gpt-oss-120b*": {
194
+ "parameterCount": 120000000000,
195
+ "defaultDtype": "bfloat16",
196
+ "architecture": "GPT2LMHeadModel",
197
+ "maxPositionEmbeddings": 8192,
198
+ "recommendedQuantizations": ["awq", "gptq", "fp8"],
199
+ "minVramGb": 312,
200
+ "recommendedInstances": ["ml.p4d.24xlarge", "ml.p5.48xlarge"]
156
201
  }
157
202
  }
158
203
  }