@aws/ml-container-creator 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/bin/cli.js +7 -2
  2. package/package.json +7 -8
  3. package/servers/base-image-picker/index.js +3 -3
  4. package/servers/base-image-picker/manifest.json +4 -2
  5. package/servers/instance-sizer/index.js +561 -0
  6. package/servers/instance-sizer/lib/instance-ranker.js +245 -0
  7. package/servers/instance-sizer/lib/model-resolver.js +265 -0
  8. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  9. package/servers/instance-sizer/manifest.json +17 -0
  10. package/servers/instance-sizer/package.json +15 -0
  11. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  12. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
  13. package/servers/lib/catalogs/model-sizes.json +131 -0
  14. package/servers/lib/catalogs/models.json +602 -0
  15. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  16. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  17. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  18. package/servers/lib/schemas/image-catalog.schema.json +0 -12
  19. package/servers/lib/schemas/instances.schema.json +29 -0
  20. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  21. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  22. package/servers/model-picker/index.js +2 -3
  23. package/servers/model-picker/manifest.json +2 -3
  24. package/servers/region-picker/index.js +1 -1
  25. package/servers/region-picker/manifest.json +1 -1
  26. package/src/app.js +17 -0
  27. package/src/lib/bootstrap-command-handler.js +38 -0
  28. package/src/lib/cli-handler.js +3 -3
  29. package/src/lib/config-manager.js +4 -1
  30. package/src/lib/configuration-manager.js +2 -2
  31. package/src/lib/cross-cutting-checker.js +341 -0
  32. package/src/lib/dry-run-validator.js +78 -0
  33. package/src/lib/generation-validator.js +102 -0
  34. package/src/lib/mcp-validator-config.js +89 -0
  35. package/src/lib/payload-builder.js +153 -0
  36. package/src/lib/prompt-runner.js +445 -135
  37. package/src/lib/prompts.js +1 -1
  38. package/src/lib/registry-loader.js +5 -5
  39. package/src/lib/schema-sync.js +203 -0
  40. package/src/lib/schema-validation-engine.js +195 -0
  41. package/src/lib/service-model-parser.js +102 -0
  42. package/src/lib/validate-runner.js +167 -0
  43. package/src/lib/validation-report.js +133 -0
  44. package/src/lib/validators/base-validator.js +36 -0
  45. package/src/lib/validators/catalog-validator.js +177 -0
  46. package/src/lib/validators/enum-validator.js +120 -0
  47. package/src/lib/validators/required-field-validator.js +150 -0
  48. package/src/lib/validators/type-validator.js +313 -0
  49. package/templates/Dockerfile +1 -1
  50. package/templates/do/build +15 -5
  51. package/templates/do/run +5 -1
  52. package/templates/do/validate +61 -0
  53. package/servers/instance-recommender/LICENSE +0 -202
  54. package/servers/instance-recommender/index.js +0 -284
  55. package/servers/instance-recommender/manifest.json +0 -16
  56. package/servers/instance-recommender/package.json +0 -15
  57. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  58. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  59. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  60. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -0,0 +1,602 @@
1
+ {
2
+ "openai/gpt-oss-20b": {
3
+ "family": "gpt-oss",
4
+ "gated": false,
5
+ "tags": [
6
+ "text-generation",
7
+ "openai",
8
+ "conversational"
9
+ ],
10
+ "architecture": "GPT2LMHeadModel",
11
+ "notes": "Open-source 20B parameter model. Requires significant GPU memory for inference",
12
+ "chatTemplate": "",
13
+ "frameworkCompatibility": {
14
+ "vllm": ">=0.3.0",
15
+ "tensorrt-llm": ">=0.8.0",
16
+ "sglang": ">=0.2.0"
17
+ },
18
+ "validationLevel": "community-validated",
19
+ "modelType": "transformer",
20
+ "tasks": [
21
+ "text-generation"
22
+ ]
23
+ },
24
+ "meta-llama/Llama-2-7b-chat-hf": {
25
+ "family": "llama-2",
26
+ "gated": true,
27
+ "tags": [
28
+ "text-generation",
29
+ "llama-2",
30
+ "conversational"
31
+ ],
32
+ "architecture": "LlamaForCausalLM",
33
+ "profiles": {
34
+ "7b": {
35
+ "displayName": "Llama-2 7B",
36
+ "envVars": {
37
+ "MAX_MODEL_LEN": "4096",
38
+ "GPU_MEMORY_UTILIZATION": "0.9"
39
+ }
40
+ }
41
+ },
42
+ "notes": "Llama-2 7B chat model with official chat template. Requires HuggingFace authentication for download",
43
+ "chatTemplate": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
44
+ "frameworkCompatibility": {
45
+ "vllm": ">=0.3.0",
46
+ "tensorrt-llm": ">=0.8.0",
47
+ "sglang": ">=0.2.0"
48
+ },
49
+ "validationLevel": "tested",
50
+ "modelType": "transformer",
51
+ "parameterCount": 6738415616,
52
+ "defaultDtype": "float16",
53
+ "maxPositionEmbeddings": 4096,
54
+ "recommendedQuantizations": [
55
+ "awq",
56
+ "gptq"
57
+ ],
58
+ "tasks": [
59
+ "text-generation"
60
+ ]
61
+ },
62
+ "meta-llama/Llama-2-13b-chat-hf": {
63
+ "family": "llama-2",
64
+ "gated": true,
65
+ "tags": [
66
+ "text-generation",
67
+ "llama-2",
68
+ "conversational"
69
+ ],
70
+ "architecture": "LlamaForCausalLM",
71
+ "profiles": {
72
+ "13b": {
73
+ "displayName": "Llama-2 13B",
74
+ "envVars": {
75
+ "MAX_MODEL_LEN": "4096",
76
+ "GPU_MEMORY_UTILIZATION": "0.9"
77
+ }
78
+ }
79
+ },
80
+ "notes": "Llama-2 13B chat model. Requires more GPU memory than 7B variant",
81
+ "chatTemplate": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
82
+ "frameworkCompatibility": {
83
+ "vllm": ">=0.3.0",
84
+ "tensorrt-llm": ">=0.8.0",
85
+ "sglang": ">=0.2.0"
86
+ },
87
+ "validationLevel": "tested",
88
+ "modelType": "transformer",
89
+ "parameterCount": 13015864320,
90
+ "defaultDtype": "float16",
91
+ "maxPositionEmbeddings": 4096,
92
+ "recommendedQuantizations": [
93
+ "awq",
94
+ "gptq"
95
+ ],
96
+ "tasks": [
97
+ "text-generation"
98
+ ]
99
+ },
100
+ "meta-llama/Llama-2-70b-chat-hf": {
101
+ "family": "llama-2",
102
+ "gated": true,
103
+ "tags": [
104
+ "text-generation",
105
+ "llama-2",
106
+ "conversational"
107
+ ],
108
+ "architecture": "LlamaForCausalLM",
109
+ "profiles": {
110
+ "70b-tp2": {
111
+ "displayName": "Llama-2 70B (2-GPU)",
112
+ "envVars": {
113
+ "TENSOR_PARALLEL_SIZE": "2",
114
+ "MAX_MODEL_LEN": "4096",
115
+ "GPU_MEMORY_UTILIZATION": "0.95"
116
+ }
117
+ },
118
+ "70b-tp4": {
119
+ "displayName": "Llama-2 70B (4-GPU)",
120
+ "envVars": {
121
+ "TENSOR_PARALLEL_SIZE": "4",
122
+ "MAX_MODEL_LEN": "4096",
123
+ "GPU_MEMORY_UTILIZATION": "0.9"
124
+ }
125
+ }
126
+ },
127
+ "notes": "Llama-2 70B requires tensor parallelism across multiple GPUs",
128
+ "chatTemplate": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
129
+ "frameworkCompatibility": {
130
+ "vllm": ">=0.3.0",
131
+ "tensorrt-llm": ">=0.8.0",
132
+ "sglang": ">=0.2.0"
133
+ },
134
+ "validationLevel": "community-validated",
135
+ "modelType": "transformer",
136
+ "parameterCount": 68976648192,
137
+ "defaultDtype": "float16",
138
+ "maxPositionEmbeddings": 4096,
139
+ "recommendedQuantizations": [
140
+ "awq",
141
+ "gptq"
142
+ ],
143
+ "tasks": [
144
+ "text-generation"
145
+ ]
146
+ },
147
+ "mistralai/Mistral-7B-Instruct-v0.1": {
148
+ "family": "mistral",
149
+ "gated": false,
150
+ "tags": [
151
+ "text-generation",
152
+ "mistral",
153
+ "conversational"
154
+ ],
155
+ "architecture": "MistralForCausalLM",
156
+ "profiles": {
157
+ "7b": {
158
+ "displayName": "Mistral 7B Instruct",
159
+ "envVars": {
160
+ "MAX_MODEL_LEN": "8192",
161
+ "GPU_MEMORY_UTILIZATION": "0.9"
162
+ }
163
+ }
164
+ },
165
+ "notes": "Mistral 7B v0.1 with 8K context window",
166
+ "chatTemplate": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
167
+ "frameworkCompatibility": {
168
+ "vllm": ">=0.3.0",
169
+ "tensorrt-llm": ">=0.8.0",
170
+ "sglang": ">=0.2.0"
171
+ },
172
+ "validationLevel": "tested",
173
+ "modelType": "transformer",
174
+ "parameterCount": 7241732096,
175
+ "defaultDtype": "bfloat16",
176
+ "maxPositionEmbeddings": 32768,
177
+ "recommendedQuantizations": [
178
+ "awq",
179
+ "gptq"
180
+ ],
181
+ "tasks": [
182
+ "text-generation"
183
+ ]
184
+ },
185
+ "mistralai/Mistral-7B-Instruct-v0.2": {
186
+ "family": "mistral",
187
+ "gated": false,
188
+ "tags": [
189
+ "text-generation",
190
+ "mistral",
191
+ "conversational"
192
+ ],
193
+ "architecture": "MistralForCausalLM",
194
+ "profiles": {
195
+ "7b": {
196
+ "displayName": "Mistral 7B Instruct v0.2",
197
+ "envVars": {
198
+ "MAX_MODEL_LEN": "32768",
199
+ "GPU_MEMORY_UTILIZATION": "0.9"
200
+ }
201
+ }
202
+ },
203
+ "notes": "Mistral 7B v0.2 with extended 32K context window. Requires more memory for long contexts",
204
+ "chatTemplate": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
205
+ "frameworkCompatibility": {
206
+ "vllm": ">=0.3.0",
207
+ "tensorrt-llm": ">=0.8.0",
208
+ "sglang": ">=0.2.0"
209
+ },
210
+ "validationLevel": "tested",
211
+ "modelType": "transformer",
212
+ "parameterCount": 7241732096,
213
+ "defaultDtype": "bfloat16",
214
+ "maxPositionEmbeddings": 32768,
215
+ "recommendedQuantizations": [
216
+ "awq",
217
+ "gptq"
218
+ ],
219
+ "tasks": [
220
+ "text-generation"
221
+ ]
222
+ },
223
+ "mistralai/Mixtral-8x7B-Instruct-v0.1": {
224
+ "family": "mistral",
225
+ "gated": false,
226
+ "tags": [
227
+ "text-generation",
228
+ "mistral",
229
+ "mixture-of-experts"
230
+ ],
231
+ "architecture": "MixtralForCausalLM",
232
+ "profiles": {
233
+ "8x7b-tp2": {
234
+ "displayName": "Mixtral 8x7B (2-GPU)",
235
+ "envVars": {
236
+ "TENSOR_PARALLEL_SIZE": "2",
237
+ "MAX_MODEL_LEN": "32768",
238
+ "GPU_MEMORY_UTILIZATION": "0.95"
239
+ }
240
+ }
241
+ },
242
+ "notes": "Mixtral 8x7B MoE model. Requires tensor parallelism for efficient inference",
243
+ "chatTemplate": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
244
+ "frameworkCompatibility": {
245
+ "vllm": ">=0.3.0",
246
+ "tensorrt-llm": ">=0.8.0",
247
+ "sglang": ">=0.2.0"
248
+ },
249
+ "validationLevel": "community-validated",
250
+ "modelType": "transformer",
251
+ "parameterCount": 46702792704,
252
+ "defaultDtype": "bfloat16",
253
+ "maxPositionEmbeddings": 32768,
254
+ "recommendedQuantizations": [
255
+ "awq",
256
+ "gptq"
257
+ ],
258
+ "tasks": [
259
+ "text-generation"
260
+ ]
261
+ },
262
+ "meta-llama/Llama-2-*": {
263
+ "family": "llama-2",
264
+ "gated": true,
265
+ "tags": [
266
+ "text-generation",
267
+ "llama-2"
268
+ ],
269
+ "architecture": null,
270
+ "notes": "Fallback configuration for Llama-2 models not explicitly listed. Uses standard Llama-2 chat template",
271
+ "chatTemplate": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
272
+ "frameworkCompatibility": {
273
+ "vllm": ">=0.3.0",
274
+ "tensorrt-llm": ">=0.8.0",
275
+ "sglang": ">=0.2.0"
276
+ },
277
+ "validationLevel": "experimental",
278
+ "modelType": "transformer",
279
+ "tasks": [
280
+ "text-generation"
281
+ ]
282
+ },
283
+ "mistralai/Mistral-*": {
284
+ "family": "mistral",
285
+ "gated": false,
286
+ "tags": [
287
+ "text-generation",
288
+ "mistral"
289
+ ],
290
+ "architecture": null,
291
+ "notes": "Fallback configuration for Mistral models not explicitly listed. Uses standard Mistral chat template",
292
+ "chatTemplate": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
293
+ "frameworkCompatibility": {
294
+ "vllm": ">=0.3.0",
295
+ "tensorrt-llm": ">=0.8.0",
296
+ "sglang": ">=0.2.0"
297
+ },
298
+ "validationLevel": "experimental",
299
+ "modelType": "transformer",
300
+ "tasks": [
301
+ "text-generation"
302
+ ]
303
+ },
304
+ "codellama/*": {
305
+ "family": "codellama",
306
+ "gated": false,
307
+ "tags": [
308
+ "text-generation",
309
+ "code",
310
+ "codellama"
311
+ ],
312
+ "architecture": null,
313
+ "notes": "CodeLlama models use Llama-2 chat template. Optimized for code generation",
314
+ "chatTemplate": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
315
+ "frameworkCompatibility": {
316
+ "vllm": ">=0.3.0",
317
+ "tensorrt-llm": ">=0.8.0"
318
+ },
319
+ "validationLevel": "experimental",
320
+ "modelType": "transformer",
321
+ "tasks": [
322
+ "text-generation"
323
+ ]
324
+ },
325
+ "tiiuae/falcon-*": {
326
+ "family": "falcon",
327
+ "gated": false,
328
+ "tags": [
329
+ "text-generation",
330
+ "falcon"
331
+ ],
332
+ "architecture": null,
333
+ "notes": "Falcon models typically don't require chat templates for instruction following",
334
+ "chatTemplate": null,
335
+ "frameworkCompatibility": {
336
+ "vllm": ">=0.3.0",
337
+ "tensorrt-llm": ">=0.8.0"
338
+ },
339
+ "validationLevel": "experimental",
340
+ "modelType": "transformer",
341
+ "tasks": [
342
+ "text-generation"
343
+ ]
344
+ },
345
+ "stabilityai/stable-diffusion-3.5-medium": {
346
+ "family": "stable-diffusion-3",
347
+ "gated": false,
348
+ "tags": [
349
+ "image-generation",
350
+ "diffusion",
351
+ "stable-diffusion"
352
+ ],
353
+ "architecture": "StableDiffusion3Pipeline",
354
+ "profiles": {
355
+ "default": {
356
+ "displayName": "SD3.5 Medium",
357
+ "envVars": {}
358
+ }
359
+ },
360
+ "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline.",
361
+ "chatTemplate": null,
362
+ "frameworkCompatibility": {
363
+ "vllm-omni": ">=0.14.0"
364
+ },
365
+ "validationLevel": "experimental",
366
+ "modelType": "diffusor",
367
+ "tasks": [
368
+ "text-to-image"
369
+ ]
370
+ },
371
+ "black-forest-labs/FLUX.1-dev": {
372
+ "family": "flux",
373
+ "gated": true,
374
+ "tags": [
375
+ "image-generation",
376
+ "diffusion",
377
+ "flux"
378
+ ],
379
+ "architecture": "FluxPipeline",
380
+ "profiles": {
381
+ "default": {
382
+ "displayName": "FLUX.1 Dev",
383
+ "envVars": {}
384
+ }
385
+ },
386
+ "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM.",
387
+ "chatTemplate": null,
388
+ "frameworkCompatibility": {
389
+ "vllm-omni": ">=0.14.0"
390
+ },
391
+ "validationLevel": "experimental",
392
+ "modelType": "diffusor",
393
+ "tasks": [
394
+ "text-to-image"
395
+ ]
396
+ },
397
+ "black-forest-labs/FLUX.1-schnell": {
398
+ "family": "flux",
399
+ "gated": false,
400
+ "tags": [
401
+ "image-generation",
402
+ "diffusion",
403
+ "flux"
404
+ ],
405
+ "architecture": "FluxPipeline",
406
+ "notes": "FLUX.1-schnell fast generation model. Fewer denoising steps for faster inference at slightly lower quality",
407
+ "chatTemplate": null,
408
+ "frameworkCompatibility": {
409
+ "vllm-omni": ">=0.14.0"
410
+ },
411
+ "validationLevel": "experimental",
412
+ "modelType": "diffusor",
413
+ "tasks": [
414
+ "text-to-image"
415
+ ]
416
+ },
417
+ "Wan-AI/Wan2.1-T2V-14B-Diffusers": {
418
+ "family": "wan",
419
+ "gated": false,
420
+ "tags": [
421
+ "video-generation",
422
+ "diffusion",
423
+ "wan"
424
+ ],
425
+ "architecture": "WanPipeline",
426
+ "notes": "Wan2.1 text-to-video 14B model (diffusers format). Requires multi-GPU instance (ml.g5.12xlarge or larger). Must use the -Diffusers variant — the base Wan2.1-T2V-14B repo lacks model_index.json required by vLLM-Omni",
427
+ "chatTemplate": null,
428
+ "frameworkCompatibility": {
429
+ "vllm-omni": ">=0.16.0"
430
+ },
431
+ "validationLevel": "experimental",
432
+ "modelType": "diffusor",
433
+ "tasks": [
434
+ "text-to-video"
435
+ ]
436
+ },
437
+ "stabilityai/stable-diffusion-*": {
438
+ "family": "stable-diffusion",
439
+ "gated": false,
440
+ "tags": [
441
+ "image-generation",
442
+ "diffusion",
443
+ "stable-diffusion"
444
+ ],
445
+ "architecture": null,
446
+ "notes": "Fallback for Stable Diffusion variants not explicitly listed",
447
+ "chatTemplate": null,
448
+ "frameworkCompatibility": {
449
+ "vllm-omni": ">=0.14.0"
450
+ },
451
+ "validationLevel": "experimental",
452
+ "modelType": "diffusor",
453
+ "tasks": [
454
+ "text-to-image"
455
+ ]
456
+ },
457
+ "black-forest-labs/FLUX*": {
458
+ "family": "flux",
459
+ "gated": false,
460
+ "tags": [
461
+ "image-generation",
462
+ "diffusion",
463
+ "flux"
464
+ ],
465
+ "architecture": null,
466
+ "notes": "Fallback for FLUX model variants not explicitly listed",
467
+ "chatTemplate": null,
468
+ "frameworkCompatibility": {
469
+ "vllm-omni": ">=0.14.0"
470
+ },
471
+ "validationLevel": "experimental",
472
+ "modelType": "diffusor",
473
+ "tasks": [
474
+ "text-to-image"
475
+ ]
476
+ },
477
+ "meta-llama/Meta-Llama-3-8B*": {
478
+ "parameterCount": 8030261248,
479
+ "defaultDtype": "bfloat16",
480
+ "architecture": "LlamaForCausalLM",
481
+ "maxPositionEmbeddings": 8192,
482
+ "recommendedQuantizations": [
483
+ "awq",
484
+ "gptq"
485
+ ],
486
+ "modelType": "transformer",
487
+ "tasks": [
488
+ "text-generation"
489
+ ]
490
+ },
491
+ "meta-llama/Meta-Llama-3-70B*": {
492
+ "parameterCount": 70553706496,
493
+ "defaultDtype": "bfloat16",
494
+ "architecture": "LlamaForCausalLM",
495
+ "maxPositionEmbeddings": 8192,
496
+ "recommendedQuantizations": [
497
+ "awq",
498
+ "gptq"
499
+ ],
500
+ "modelType": "transformer",
501
+ "tasks": [
502
+ "text-generation"
503
+ ]
504
+ },
505
+ "Qwen/Qwen-7B*": {
506
+ "parameterCount": 7721324544,
507
+ "defaultDtype": "bfloat16",
508
+ "architecture": "QWenLMHeadModel",
509
+ "maxPositionEmbeddings": 8192,
510
+ "recommendedQuantizations": [
511
+ "awq",
512
+ "gptq"
513
+ ],
514
+ "modelType": "transformer",
515
+ "tasks": [
516
+ "text-generation"
517
+ ]
518
+ },
519
+ "Qwen/Qwen2-7B*": {
520
+ "parameterCount": 7721324544,
521
+ "defaultDtype": "bfloat16",
522
+ "architecture": "Qwen2ForCausalLM",
523
+ "maxPositionEmbeddings": 32768,
524
+ "recommendedQuantizations": [
525
+ "awq",
526
+ "gptq"
527
+ ],
528
+ "modelType": "transformer",
529
+ "tasks": [
530
+ "text-generation"
531
+ ]
532
+ },
533
+ "Qwen/Qwen-14B*": {
534
+ "parameterCount": 14167134208,
535
+ "defaultDtype": "bfloat16",
536
+ "architecture": "QWenLMHeadModel",
537
+ "maxPositionEmbeddings": 8192,
538
+ "recommendedQuantizations": [
539
+ "awq",
540
+ "gptq"
541
+ ],
542
+ "modelType": "transformer",
543
+ "tasks": [
544
+ "text-generation"
545
+ ]
546
+ },
547
+ "Qwen/Qwen2-14B*": {
548
+ "parameterCount": 14167134208,
549
+ "defaultDtype": "bfloat16",
550
+ "architecture": "Qwen2ForCausalLM",
551
+ "maxPositionEmbeddings": 32768,
552
+ "recommendedQuantizations": [
553
+ "awq",
554
+ "gptq"
555
+ ],
556
+ "modelType": "transformer",
557
+ "tasks": [
558
+ "text-generation"
559
+ ]
560
+ },
561
+ "Qwen/Qwen-72B*": {
562
+ "parameterCount": 72710410240,
563
+ "defaultDtype": "bfloat16",
564
+ "architecture": "QWenLMHeadModel",
565
+ "maxPositionEmbeddings": 32768,
566
+ "recommendedQuantizations": [
567
+ "awq",
568
+ "gptq"
569
+ ],
570
+ "modelType": "transformer",
571
+ "tasks": [
572
+ "text-generation"
573
+ ]
574
+ },
575
+ "Qwen/Qwen2-72B*": {
576
+ "parameterCount": 72710410240,
577
+ "defaultDtype": "bfloat16",
578
+ "architecture": "Qwen2ForCausalLM",
579
+ "maxPositionEmbeddings": 32768,
580
+ "recommendedQuantizations": [
581
+ "awq",
582
+ "gptq"
583
+ ],
584
+ "modelType": "transformer",
585
+ "tasks": [
586
+ "text-generation"
587
+ ]
588
+ },
589
+ "EleutherAI/gpt-neox-20b*": {
590
+ "parameterCount": 20554568704,
591
+ "defaultDtype": "float16",
592
+ "architecture": "GPTNeoXForCausalLM",
593
+ "maxPositionEmbeddings": 2048,
594
+ "recommendedQuantizations": [
595
+ "gptq"
596
+ ],
597
+ "modelType": "transformer",
598
+ "tasks": [
599
+ "text-generation"
600
+ ]
601
+ }
602
+ }