@aws/ml-container-creator 0.8.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE-THIRD-PARTY +50760 -16218
  2. package/bin/cli.js +31 -137
  3. package/package.json +7 -2
  4. package/servers/lib/catalogs/instances.json +52 -1275
  5. package/servers/lib/catalogs/models.json +0 -132
  6. package/servers/lib/catalogs/popular-diffusors.json +1 -110
  7. package/src/app.js +29 -2
  8. package/src/lib/config-manager.js +17 -0
  9. package/src/lib/generated/cli-options.js +467 -0
  10. package/src/lib/generated/validation-rules.js +202 -0
  11. package/src/lib/mcp-client.js +16 -1
  12. package/src/lib/mcp-command-handler.js +10 -2
  13. package/src/lib/prompt-runner.js +16 -2
  14. package/src/lib/train-config-parser.js +136 -0
  15. package/src/lib/train-config-persistence.js +143 -0
  16. package/src/lib/train-config-validator.js +112 -0
  17. package/src/lib/train-feedback.js +46 -0
  18. package/src/lib/train-idempotency.js +97 -0
  19. package/src/lib/train-request-builder.js +120 -0
  20. package/templates/code/serve +5 -134
  21. package/templates/code/serve.d/lmi.ejs +19 -0
  22. package/templates/code/serve.d/sglang.ejs +47 -0
  23. package/templates/code/serve.d/tensorrt-llm.ejs +53 -0
  24. package/templates/code/serve.d/vllm.ejs +48 -0
  25. package/templates/do/.train_build_request.py +141 -0
  26. package/templates/do/.train_poll_parser.py +135 -0
  27. package/templates/do/.train_status_parser.py +187 -0
  28. package/templates/do/clean +1 -1387
  29. package/templates/do/clean.d/async-inference.ejs +508 -0
  30. package/templates/do/clean.d/batch-transform.ejs +512 -0
  31. package/templates/do/clean.d/hyperpod-eks.ejs +481 -0
  32. package/templates/do/clean.d/managed-inference.ejs +1043 -0
  33. package/templates/do/deploy +1 -1766
  34. package/templates/do/deploy.d/async-inference.ejs +501 -0
  35. package/templates/do/deploy.d/batch-transform.ejs +529 -0
  36. package/templates/do/deploy.d/hyperpod-eks.ejs +339 -0
  37. package/templates/do/deploy.d/managed-inference.ejs +726 -0
  38. package/templates/do/lib/feedback.sh +41 -0
  39. package/templates/do/train +786 -0
  40. package/templates/do/training/config.yaml +140 -0
  41. package/templates/do/training/train.py +463 -0
@@ -1,217 +1,11 @@
1
1
  {
2
2
  "catalog": {
3
- "ml.c5.2xlarge": {
4
- "category": "cpu",
5
- "gpus": 0,
6
- "vcpus": 8,
7
- "memGb": 16,
8
- "accelerator": "",
9
- "cudaVersions": null,
10
- "tags": [
11
- "compute",
12
- "cpu",
13
- "high-cpu"
14
- ],
15
- "family": "c5",
16
- "acceleratorType": "cpu",
17
- "hardware": "None",
18
- "gpuArchitecture": "None",
19
- "defaultCudaVersion": null,
20
- "notes": "",
21
- "gpuMemoryGb": null,
22
- "gpuType": null,
23
- "costTier": "low"
24
- },
25
- "ml.c5.xlarge": {
26
- "category": "cpu",
27
- "gpus": 0,
28
- "vcpus": 4,
29
- "memGb": 8,
30
- "accelerator": "",
31
- "cudaVersions": null,
32
- "tags": [
33
- "compute",
34
- "cpu",
35
- "cost-effective"
36
- ],
37
- "family": "c5",
38
- "acceleratorType": "cpu",
39
- "hardware": "None",
40
- "gpuArchitecture": "None",
41
- "defaultCudaVersion": null,
42
- "notes": "",
43
- "gpuMemoryGb": null,
44
- "gpuType": null,
45
- "costTier": "low"
46
- },
47
- "ml.g4dn.12xlarge": {
48
- "category": "gpu",
49
- "gpus": 4,
50
- "vcpus": 48,
51
- "memGb": 192,
52
- "accelerator": "4x T4 64GB",
53
- "cudaVersions": [
54
- "11.4",
55
- "11.8"
56
- ],
57
- "tags": [
58
- "gpu",
59
- "multi-gpu",
60
- "t4",
61
- "cuda-11"
62
- ],
63
- "family": "g4dn",
64
- "acceleratorType": "cuda",
65
- "hardware": "NVIDIA T4",
66
- "gpuArchitecture": "Turing",
67
- "defaultCudaVersion": "11.8",
68
- "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism",
69
- "gpuMemoryGb": 16,
70
- "gpuType": "NVIDIA T4",
71
- "costTier": "low"
72
- },
73
- "ml.g4dn.16xlarge": {
74
- "category": "gpu",
75
- "gpus": 1,
76
- "vcpus": 64,
77
- "memGb": 256,
78
- "accelerator": "T4 16GB",
79
- "cudaVersions": [
80
- "11.4",
81
- "11.8"
82
- ],
83
- "tags": [
84
- "gpu",
85
- "single-gpu",
86
- "t4",
87
- "cuda-11"
88
- ],
89
- "family": "g4dn",
90
- "acceleratorType": "cuda",
91
- "hardware": "NVIDIA T4",
92
- "gpuArchitecture": "Turing",
93
- "defaultCudaVersion": "11.8",
94
- "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio",
95
- "gpuMemoryGb": 16,
96
- "gpuType": "NVIDIA T4",
97
- "costTier": "low"
98
- },
99
- "ml.g4dn.2xlarge": {
100
- "category": "gpu",
101
- "gpus": 1,
102
- "vcpus": 8,
103
- "memGb": 32,
104
- "accelerator": "T4 16GB",
105
- "cudaVersions": [
106
- "11.4",
107
- "11.8"
108
- ],
109
- "tags": [
110
- "gpu",
111
- "single-gpu",
112
- "budget",
113
- "cost-effective",
114
- "inference",
115
- "t4",
116
- "cuda-11"
117
- ],
118
- "family": "g4dn",
119
- "acceleratorType": "cuda",
120
- "hardware": "NVIDIA T4",
121
- "gpuArchitecture": "Turing",
122
- "defaultCudaVersion": "11.8",
123
- "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing",
124
- "gpuMemoryGb": 16,
125
- "gpuType": "NVIDIA T4",
126
- "costTier": "low"
127
- },
128
- "ml.g4dn.4xlarge": {
129
- "category": "gpu",
130
- "gpus": 1,
131
- "vcpus": 16,
132
- "memGb": 64,
133
- "accelerator": "T4 16GB",
134
- "cudaVersions": [
135
- "11.4",
136
- "11.8"
137
- ],
138
- "tags": [
139
- "gpu",
140
- "single-gpu",
141
- "t4",
142
- "cuda-11"
143
- ],
144
- "family": "g4dn",
145
- "acceleratorType": "cuda",
146
- "hardware": "NVIDIA T4",
147
- "gpuArchitecture": "Turing",
148
- "defaultCudaVersion": "11.8",
149
- "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio",
150
- "gpuMemoryGb": 16,
151
- "gpuType": "NVIDIA T4",
152
- "costTier": "low"
153
- },
154
- "ml.g4dn.8xlarge": {
155
- "category": "gpu",
156
- "gpus": 1,
157
- "vcpus": 32,
158
- "memGb": 128,
159
- "accelerator": "T4 16GB",
160
- "cudaVersions": [
161
- "11.4",
162
- "11.8"
163
- ],
164
- "tags": [
165
- "gpu",
166
- "single-gpu",
167
- "t4",
168
- "cuda-11"
169
- ],
170
- "family": "g4dn",
171
- "acceleratorType": "cuda",
172
- "hardware": "NVIDIA T4",
173
- "gpuArchitecture": "Turing",
174
- "defaultCudaVersion": "11.8",
175
- "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU",
176
- "gpuMemoryGb": 16,
177
- "gpuType": "NVIDIA T4",
178
- "costTier": "low"
179
- },
180
- "ml.g4dn.xlarge": {
3
+ "ml.g5.xlarge": {
181
4
  "category": "gpu",
182
5
  "gpus": 1,
183
6
  "vcpus": 4,
184
7
  "memGb": 16,
185
- "accelerator": "T4 16GB",
186
- "cudaVersions": [
187
- "11.4",
188
- "11.8"
189
- ],
190
- "tags": [
191
- "gpu",
192
- "single-gpu",
193
- "budget",
194
- "cost-effective",
195
- "inference",
196
- "t4",
197
- "cuda-11"
198
- ],
199
- "family": "g4dn",
200
- "acceleratorType": "cuda",
201
- "hardware": "NVIDIA T4",
202
- "gpuArchitecture": "Turing",
203
- "defaultCudaVersion": "11.8",
204
- "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models",
205
- "gpuMemoryGb": 16,
206
- "gpuType": "NVIDIA T4",
207
- "costTier": "low"
208
- },
209
- "ml.g5.12xlarge": {
210
- "category": "gpu",
211
- "gpus": 4,
212
- "vcpus": 48,
213
- "memGb": 192,
214
- "accelerator": "4x A10G 96GB",
8
+ "accelerator": "A10G 24GB",
215
9
  "cudaVersions": [
216
10
  "11.8",
217
11
  "12.1",
@@ -219,10 +13,9 @@
219
13
  ],
220
14
  "tags": [
221
15
  "gpu",
222
- "multi-gpu",
223
- "large",
16
+ "single-gpu",
17
+ "inference",
224
18
  "a10g",
225
- "parallel",
226
19
  "cuda-11",
227
20
  "cuda-12"
228
21
  ],
@@ -231,16 +24,16 @@
231
24
  "hardware": "NVIDIA A10G",
232
25
  "gpuArchitecture": "Ampere",
233
26
  "defaultCudaVersion": "12.1",
234
- "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism",
27
+ "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models",
235
28
  "gpuMemoryGb": 24,
236
29
  "gpuType": "NVIDIA A10G",
237
30
  "costTier": "medium"
238
31
  },
239
- "ml.g5.16xlarge": {
32
+ "ml.g5.2xlarge": {
240
33
  "category": "gpu",
241
34
  "gpus": 1,
242
- "vcpus": 64,
243
- "memGb": 256,
35
+ "vcpus": 8,
36
+ "memGb": 32,
244
37
  "accelerator": "A10G 24GB",
245
38
  "cudaVersions": [
246
39
  "11.8",
@@ -250,6 +43,7 @@
250
43
  "tags": [
251
44
  "gpu",
252
45
  "single-gpu",
46
+ "inference",
253
47
  "a10g",
254
48
  "cuda-11",
255
49
  "cuda-12"
@@ -259,17 +53,17 @@
259
53
  "hardware": "NVIDIA A10G",
260
54
  "gpuArchitecture": "Ampere",
261
55
  "defaultCudaVersion": "12.1",
262
- "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio",
56
+ "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing",
263
57
  "gpuMemoryGb": 24,
264
58
  "gpuType": "NVIDIA A10G",
265
59
  "costTier": "medium"
266
60
  },
267
- "ml.g5.24xlarge": {
61
+ "ml.g5.4xlarge": {
268
62
  "category": "gpu",
269
- "gpus": 4,
270
- "vcpus": 96,
271
- "memGb": 384,
272
- "accelerator": "4x A10G 96GB",
63
+ "gpus": 1,
64
+ "vcpus": 16,
65
+ "memGb": 64,
66
+ "accelerator": "A10G 24GB",
273
67
  "cudaVersions": [
274
68
  "11.8",
275
69
  "12.1",
@@ -277,7 +71,8 @@
277
71
  ],
278
72
  "tags": [
279
73
  "gpu",
280
- "multi-gpu",
74
+ "single-gpu",
75
+ "large",
281
76
  "a10g",
282
77
  "cuda-11",
283
78
  "cuda-12"
@@ -287,16 +82,16 @@
287
82
  "hardware": "NVIDIA A10G",
288
83
  "gpuArchitecture": "Ampere",
289
84
  "defaultCudaVersion": "12.1",
290
- "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU",
85
+ "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing",
291
86
  "gpuMemoryGb": 24,
292
87
  "gpuType": "NVIDIA A10G",
293
88
  "costTier": "medium"
294
89
  },
295
- "ml.g5.2xlarge": {
90
+ "ml.g5.8xlarge": {
296
91
  "category": "gpu",
297
92
  "gpus": 1,
298
- "vcpus": 8,
299
- "memGb": 32,
93
+ "vcpus": 32,
94
+ "memGb": 128,
300
95
  "accelerator": "A10G 24GB",
301
96
  "cudaVersions": [
302
97
  "11.8",
@@ -306,7 +101,6 @@
306
101
  "tags": [
307
102
  "gpu",
308
103
  "single-gpu",
309
- "inference",
310
104
  "a10g",
311
105
  "cuda-11",
312
106
  "cuda-12"
@@ -316,17 +110,17 @@
316
110
  "hardware": "NVIDIA A10G",
317
111
  "gpuArchitecture": "Ampere",
318
112
  "defaultCudaVersion": "12.1",
319
- "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing",
113
+ "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU",
320
114
  "gpuMemoryGb": 24,
321
115
  "gpuType": "NVIDIA A10G",
322
116
  "costTier": "medium"
323
117
  },
324
- "ml.g5.48xlarge": {
118
+ "ml.g5.12xlarge": {
325
119
  "category": "gpu",
326
- "gpus": 8,
327
- "vcpus": 192,
328
- "memGb": 768,
329
- "accelerator": "8x A10G 192GB",
120
+ "gpus": 4,
121
+ "vcpus": 48,
122
+ "memGb": 192,
123
+ "accelerator": "4x A10G 96GB",
330
124
  "cudaVersions": [
331
125
  "11.8",
332
126
  "12.1",
@@ -335,7 +129,9 @@
335
129
  "tags": [
336
130
  "gpu",
337
131
  "multi-gpu",
132
+ "large",
338
133
  "a10g",
134
+ "parallel",
339
135
  "cuda-11",
340
136
  "cuda-12"
341
137
  ],
@@ -344,16 +140,16 @@
344
140
  "hardware": "NVIDIA A10G",
345
141
  "gpuArchitecture": "Ampere",
346
142
  "defaultCudaVersion": "12.1",
347
- "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration",
143
+ "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism",
348
144
  "gpuMemoryGb": 24,
349
145
  "gpuType": "NVIDIA A10G",
350
146
  "costTier": "medium"
351
147
  },
352
- "ml.g5.4xlarge": {
148
+ "ml.g5.16xlarge": {
353
149
  "category": "gpu",
354
150
  "gpus": 1,
355
- "vcpus": 16,
356
- "memGb": 64,
151
+ "vcpus": 64,
152
+ "memGb": 256,
357
153
  "accelerator": "A10G 24GB",
358
154
  "cudaVersions": [
359
155
  "11.8",
@@ -363,7 +159,6 @@
363
159
  "tags": [
364
160
  "gpu",
365
161
  "single-gpu",
366
- "large",
367
162
  "a10g",
368
163
  "cuda-11",
369
164
  "cuda-12"
@@ -373,17 +168,17 @@
373
168
  "hardware": "NVIDIA A10G",
374
169
  "gpuArchitecture": "Ampere",
375
170
  "defaultCudaVersion": "12.1",
376
- "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing",
171
+ "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio",
377
172
  "gpuMemoryGb": 24,
378
173
  "gpuType": "NVIDIA A10G",
379
174
  "costTier": "medium"
380
175
  },
381
- "ml.g5.8xlarge": {
176
+ "ml.g5.24xlarge": {
382
177
  "category": "gpu",
383
- "gpus": 1,
384
- "vcpus": 32,
385
- "memGb": 128,
386
- "accelerator": "A10G 24GB",
178
+ "gpus": 4,
179
+ "vcpus": 96,
180
+ "memGb": 384,
181
+ "accelerator": "4x A10G 96GB",
387
182
  "cudaVersions": [
388
183
  "11.8",
389
184
  "12.1",
@@ -391,7 +186,7 @@
391
186
  ],
392
187
  "tags": [
393
188
  "gpu",
394
- "single-gpu",
189
+ "multi-gpu",
395
190
  "a10g",
396
191
  "cuda-11",
397
192
  "cuda-12"
@@ -401,17 +196,17 @@
401
196
  "hardware": "NVIDIA A10G",
402
197
  "gpuArchitecture": "Ampere",
403
198
  "defaultCudaVersion": "12.1",
404
- "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU",
199
+ "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU",
405
200
  "gpuMemoryGb": 24,
406
201
  "gpuType": "NVIDIA A10G",
407
202
  "costTier": "medium"
408
203
  },
409
- "ml.g5.xlarge": {
204
+ "ml.g5.48xlarge": {
410
205
  "category": "gpu",
411
- "gpus": 1,
412
- "vcpus": 4,
413
- "memGb": 16,
414
- "accelerator": "A10G 24GB",
206
+ "gpus": 8,
207
+ "vcpus": 192,
208
+ "memGb": 768,
209
+ "accelerator": "8x A10G 192GB",
415
210
  "cudaVersions": [
416
211
  "11.8",
417
212
  "12.1",
@@ -419,8 +214,7 @@
419
214
  ],
420
215
  "tags": [
421
216
  "gpu",
422
- "single-gpu",
423
- "inference",
217
+ "multi-gpu",
424
218
  "a10g",
425
219
  "cuda-11",
426
220
  "cuda-12"
@@ -430,1040 +224,23 @@
430
224
  "hardware": "NVIDIA A10G",
431
225
  "gpuArchitecture": "Ampere",
432
226
  "defaultCudaVersion": "12.1",
433
- "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models",
227
+ "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration",
434
228
  "gpuMemoryGb": 24,
435
229
  "gpuType": "NVIDIA A10G",
436
230
  "costTier": "medium"
437
- },
438
- "ml.g6.12xlarge": {
439
- "category": "gpu",
440
- "gpus": 4,
441
- "vcpus": 48,
442
- "memGb": 192,
443
- "accelerator": "4x L4 96GB",
444
- "cudaVersions": [
445
- "12.1",
446
- "12.2",
447
- "12.4"
448
- ],
449
- "tags": [
450
- "gpu",
451
- "multi-gpu",
452
- "large",
453
- "l4",
454
- "newer",
455
- "parallel",
456
- "cuda-12"
457
- ],
458
- "family": "g6",
459
- "acceleratorType": "cuda",
460
- "hardware": "NVIDIA L4",
461
- "gpuArchitecture": "Ada Lovelace",
462
- "defaultCudaVersion": "12.2",
463
- "notes": "Multi-GPU (newer)",
464
- "gpuMemoryGb": 24,
465
- "gpuType": "NVIDIA L4",
466
- "costTier": "medium"
467
- },
468
- "ml.g6.2xlarge": {
469
- "category": "gpu",
470
- "gpus": 1,
471
- "vcpus": 8,
472
- "memGb": 32,
473
- "accelerator": "L4 24GB",
474
- "cudaVersions": [
475
- "12.1",
476
- "12.2",
477
- "12.4"
478
- ],
479
- "tags": [
480
- "gpu",
481
- "single-gpu",
482
- "inference",
483
- "l4",
484
- "newer",
485
- "cuda-12"
486
- ],
487
- "family": "g6",
488
- "acceleratorType": "cuda",
489
- "hardware": "NVIDIA L4",
490
- "gpuArchitecture": "Ada Lovelace",
491
- "defaultCudaVersion": "12.2",
492
- "notes": "Medium GPU (newer)",
493
- "gpuMemoryGb": 24,
494
- "gpuType": "NVIDIA L4",
495
- "costTier": "medium"
496
- },
497
- "ml.g6.xlarge": {
498
- "category": "gpu",
499
- "gpus": 1,
500
- "vcpus": 4,
501
- "memGb": 16,
502
- "accelerator": "L4 24GB",
503
- "cudaVersions": [
504
- "12.1",
505
- "12.2",
506
- "12.4"
507
- ],
508
- "tags": [
509
- "gpu",
510
- "single-gpu",
511
- "inference",
512
- "l4",
513
- "newer",
514
- "cuda-12"
515
- ],
516
- "family": "g6",
517
- "acceleratorType": "cuda",
518
- "hardware": "NVIDIA L4",
519
- "gpuArchitecture": "Ada Lovelace",
520
- "defaultCudaVersion": "12.2",
521
- "notes": "Small GPU (newer)",
522
- "gpuMemoryGb": 24,
523
- "gpuType": "NVIDIA L4",
524
- "costTier": "medium"
525
- },
526
- "ml.inf2.24xlarge": {
527
- "category": "gpu",
528
- "gpus": 6,
529
- "vcpus": 96,
530
- "memGb": 384,
531
- "accelerator": "6x Inferentia2",
532
- "cudaVersions": [
533
- "2.15.0",
534
- "2.16.0",
535
- "2.17.0"
536
- ],
537
- "tags": [
538
- "gpu",
539
- "multi-gpu",
540
- "inferentia2",
541
- "neuron"
542
- ],
543
- "family": "inf2",
544
- "acceleratorType": "neuron",
545
- "hardware": "AWS Inferentia2",
546
- "gpuArchitecture": "Inferentia2",
547
- "defaultCudaVersion": "2.16.0",
548
- "notes": "6x Inferentia2 chips. Multi-chip for large models",
549
- "gpuMemoryGb": 32,
550
- "gpuType": "AWS Inferentia2",
551
- "costTier": "low"
552
- },
553
- "ml.inf2.48xlarge": {
554
- "category": "gpu",
555
- "gpus": 12,
556
- "vcpus": 192,
557
- "memGb": 768,
558
- "accelerator": "12x Inferentia2",
559
- "cudaVersions": [
560
- "2.15.0",
561
- "2.16.0",
562
- "2.17.0"
563
- ],
564
- "tags": [
565
- "gpu",
566
- "multi-gpu",
567
- "inferentia2",
568
- "neuron"
569
- ],
570
- "family": "inf2",
571
- "acceleratorType": "neuron",
572
- "hardware": "AWS Inferentia2",
573
- "gpuArchitecture": "Inferentia2",
574
- "defaultCudaVersion": "2.16.0",
575
- "notes": "12x Inferentia2 chips. Maximum multi-chip configuration",
576
- "gpuMemoryGb": 32,
577
- "gpuType": "AWS Inferentia2",
578
- "costTier": "low"
579
- },
580
- "ml.inf2.8xlarge": {
581
- "category": "gpu",
582
- "gpus": 1,
583
- "vcpus": 32,
584
- "memGb": 128,
585
- "accelerator": "Inferentia2",
586
- "cudaVersions": [
587
- "2.15.0",
588
- "2.16.0",
589
- "2.17.0"
590
- ],
591
- "tags": [
592
- "gpu",
593
- "single-gpu",
594
- "inferentia2",
595
- "neuron"
596
- ],
597
- "family": "inf2",
598
- "acceleratorType": "neuron",
599
- "hardware": "AWS Inferentia2",
600
- "gpuArchitecture": "Inferentia2",
601
- "defaultCudaVersion": "2.16.0",
602
- "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing",
603
- "gpuMemoryGb": 32,
604
- "gpuType": "AWS Inferentia2",
605
- "costTier": "low"
606
- },
607
- "ml.inf2.xlarge": {
608
- "category": "gpu",
609
- "gpus": 1,
610
- "vcpus": 4,
611
- "memGb": 16,
612
- "accelerator": "Inferentia2",
613
- "cudaVersions": [
614
- "2.15.0",
615
- "2.16.0",
616
- "2.17.0"
617
- ],
618
- "tags": [
619
- "gpu",
620
- "single-gpu",
621
- "inferentia2",
622
- "neuron"
623
- ],
624
- "family": "inf2",
625
- "acceleratorType": "neuron",
626
- "hardware": "AWS Inferentia2",
627
- "gpuArchitecture": "Inferentia2",
628
- "defaultCudaVersion": "2.16.0",
629
- "notes": "1x Inferentia2 chip. Cost-effective for transformer inference",
630
- "gpuMemoryGb": 32,
631
- "gpuType": "AWS Inferentia2",
632
- "costTier": "low"
633
- },
634
- "ml.m5.2xlarge": {
635
- "category": "cpu",
636
- "gpus": 0,
637
- "vcpus": 8,
638
- "memGb": 32,
639
- "accelerator": "",
640
- "cudaVersions": null,
641
- "tags": [
642
- "large",
643
- "cpu",
644
- "general",
645
- "high-memory"
646
- ],
647
- "family": "m5",
648
- "acceleratorType": "cpu",
649
- "hardware": "None",
650
- "gpuArchitecture": "None",
651
- "defaultCudaVersion": null,
652
- "notes": "Large CPU workloads",
653
- "gpuMemoryGb": null,
654
- "gpuType": null,
655
- "costTier": "low"
656
- },
657
- "ml.m5.4xlarge": {
658
- "category": "cpu",
659
- "gpus": 0,
660
- "vcpus": 16,
661
- "memGb": 64,
662
- "accelerator": "",
663
- "cudaVersions": null,
664
- "tags": [
665
- "xlarge",
666
- "cpu",
667
- "general",
668
- "high-memory",
669
- "high-cpu"
670
- ],
671
- "family": "m5",
672
- "acceleratorType": "cpu",
673
- "hardware": "None",
674
- "gpuArchitecture": "None",
675
- "defaultCudaVersion": null,
676
- "notes": "XL CPU workloads",
677
- "gpuMemoryGb": null,
678
- "gpuType": null,
679
- "costTier": "low"
680
- },
681
- "ml.m5.large": {
682
- "category": "cpu",
683
- "gpus": 0,
684
- "vcpus": 2,
685
- "memGb": 8,
686
- "accelerator": "",
687
- "cudaVersions": null,
688
- "tags": [
689
- "small",
690
- "cpu",
691
- "general",
692
- "cheap",
693
- "cost-effective",
694
- "budget"
695
- ],
696
- "family": "m5",
697
- "acceleratorType": "cpu",
698
- "hardware": "None",
699
- "gpuArchitecture": "None",
700
- "defaultCudaVersion": null,
701
- "notes": "Small CPU workloads",
702
- "gpuMemoryGb": null,
703
- "gpuType": null,
704
- "costTier": "low"
705
- },
706
- "ml.m5.xlarge": {
707
- "category": "cpu",
708
- "gpus": 0,
709
- "vcpus": 4,
710
- "memGb": 16,
711
- "accelerator": "",
712
- "cudaVersions": null,
713
- "tags": [
714
- "medium",
715
- "cpu",
716
- "general",
717
- "cost-effective"
718
- ],
719
- "family": "m5",
720
- "acceleratorType": "cpu",
721
- "hardware": "None",
722
- "gpuArchitecture": "None",
723
- "defaultCudaVersion": null,
724
- "notes": "Medium CPU workloads",
725
- "gpuMemoryGb": null,
726
- "gpuType": null,
727
- "costTier": "low"
728
- },
729
- "ml.p3.16xlarge": {
730
- "category": "gpu",
731
- "gpus": 8,
732
- "vcpus": 64,
733
- "memGb": 488,
734
- "accelerator": "8x V100 128GB",
735
- "cudaVersions": [
736
- "11.0",
737
- "11.4",
738
- "11.8"
739
- ],
740
- "tags": [
741
- "gpu",
742
- "multi-gpu",
743
- "v100",
744
- "cuda-11"
745
- ],
746
- "family": "p3",
747
- "acceleratorType": "cuda",
748
- "hardware": "NVIDIA V100",
749
- "gpuArchitecture": "Volta",
750
- "defaultCudaVersion": "11.8",
751
- "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration",
752
- "gpuMemoryGb": 16,
753
- "gpuType": "NVIDIA V100",
754
- "costTier": "high"
755
- },
756
- "ml.p3.2xlarge": {
757
- "category": "gpu",
758
- "gpus": 1,
759
- "vcpus": 8,
760
- "memGb": 61,
761
- "accelerator": "V100 16GB",
762
- "cudaVersions": [
763
- "11.0",
764
- "11.4",
765
- "11.8"
766
- ],
767
- "tags": [
768
- "gpu",
769
- "single-gpu",
770
- "high-performance",
771
- "training",
772
- "v100",
773
- "cuda-11"
774
- ],
775
- "family": "p3",
776
- "acceleratorType": "cuda",
777
- "hardware": "NVIDIA V100",
778
- "gpuArchitecture": "Volta",
779
- "defaultCudaVersion": "11.8",
780
- "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference",
781
- "gpuMemoryGb": 16,
782
- "gpuType": "NVIDIA V100",
783
- "costTier": "high"
784
- },
785
- "ml.p3.8xlarge": {
786
- "category": "gpu",
787
- "gpus": 4,
788
- "vcpus": 32,
789
- "memGb": 244,
790
- "accelerator": "4x V100 64GB",
791
- "cudaVersions": [
792
- "11.0",
793
- "11.4",
794
- "11.8"
795
- ],
796
- "tags": [
797
- "gpu",
798
- "multi-gpu",
799
- "high-performance",
800
- "training",
801
- "v100",
802
- "parallel",
803
- "cuda-11"
804
- ],
805
- "family": "p3",
806
- "acceleratorType": "cuda",
807
- "hardware": "NVIDIA V100",
808
- "gpuArchitecture": "Volta",
809
- "defaultCudaVersion": "11.8",
810
- "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models",
811
- "gpuMemoryGb": 16,
812
- "gpuType": "NVIDIA V100",
813
- "costTier": "high"
814
- },
815
- "ml.p4d.24xlarge": {
816
- "category": "gpu",
817
- "gpus": 8,
818
- "vcpus": 96,
819
- "memGb": 1152,
820
- "accelerator": "8x A100 320GB",
821
- "cudaVersions": [
822
- "11.8",
823
- "12.1",
824
- "12.2",
825
- "12.4"
826
- ],
827
- "tags": [
828
- "gpu",
829
- "multi-gpu",
830
- "high-performance",
831
- "training",
832
- "a100",
833
- "parallel",
834
- "cuda-12",
835
- "large-models"
836
- ],
837
- "family": "p4d",
838
- "acceleratorType": "cuda",
839
- "hardware": "NVIDIA A100",
840
- "gpuArchitecture": "Ampere",
841
- "defaultCudaVersion": "12.1",
842
- "notes": "8x NVIDIA A100 GPUs (40GB each, 320GB total). For very large models requiring multi-GPU TP",
843
- "gpuMemoryGb": 40,
844
- "gpuType": "NVIDIA A100",
845
- "costTier": "high"
846
- },
847
- "ml.p5.48xlarge": {
848
- "category": "gpu",
849
- "gpus": 8,
850
- "vcpus": 192,
851
- "memGb": 2048,
852
- "accelerator": "8x H100 640GB",
853
- "cudaVersions": [
854
- "12.1",
855
- "12.2",
856
- "12.4"
857
- ],
858
- "tags": [
859
- "gpu",
860
- "multi-gpu",
861
- "high-performance",
862
- "h100",
863
- "parallel",
864
- "cuda-12",
865
- "large-models"
866
- ],
867
- "family": "p5",
868
- "acceleratorType": "cuda",
869
- "hardware": "NVIDIA H100",
870
- "gpuArchitecture": "Hopper",
871
- "defaultCudaVersion": "12.2",
872
- "notes": "8x NVIDIA H100 GPUs (80GB each, 640GB total). High-performance for large LLMs",
873
- "gpuMemoryGb": 80,
874
- "gpuType": "NVIDIA H100",
875
- "costTier": "high"
876
- },
877
- "ml.p5e.48xlarge": {
878
- "category": "gpu",
879
- "gpus": 8,
880
- "vcpus": 192,
881
- "memGb": 2048,
882
- "accelerator": "8x H200 1128GB",
883
- "cudaVersions": [
884
- "12.4"
885
- ],
886
- "tags": [
887
- "gpu",
888
- "multi-gpu",
889
- "high-performance",
890
- "h200",
891
- "parallel",
892
- "cuda-12",
893
- "large-models"
894
- ],
895
- "family": "p5e",
896
- "acceleratorType": "cuda",
897
- "hardware": "NVIDIA H200",
898
- "gpuArchitecture": "Hopper",
899
- "defaultCudaVersion": "12.4",
900
- "notes": "8x NVIDIA H200 GPUs (141GB each, 1128GB total). Maximum Hopper-class memory",
901
- "gpuMemoryGb": 141,
902
- "gpuType": "NVIDIA H200",
903
- "costTier": "high"
904
- },
905
- "ml.p5en.48xlarge": {
906
- "category": "gpu",
907
- "gpus": 8,
908
- "vcpus": 192,
909
- "memGb": 2048,
910
- "accelerator": "8x H200 1128GB",
911
- "cudaVersions": [
912
- "12.4"
913
- ],
914
- "tags": [
915
- "gpu",
916
- "multi-gpu",
917
- "high-performance",
918
- "h200",
919
- "parallel",
920
- "cuda-12",
921
- "large-models",
922
- "enhanced-networking"
923
- ],
924
- "family": "p5en",
925
- "acceleratorType": "cuda",
926
- "hardware": "NVIDIA H200",
927
- "gpuArchitecture": "Hopper",
928
- "defaultCudaVersion": "12.4",
929
- "notes": "8x NVIDIA H200 GPUs (141GB each, 1128GB total). Enhanced networking variant of p5e",
930
- "gpuMemoryGb": 141,
931
- "gpuType": "NVIDIA H200",
932
- "costTier": "high"
933
- },
934
- "ml.g6e.xlarge": {
935
- "category": "gpu",
936
- "gpus": 1,
937
- "vcpus": 4,
938
- "memGb": 32,
939
- "accelerator": "L40S 48GB",
940
- "cudaVersions": [
941
- "12.2",
942
- "12.4"
943
- ],
944
- "tags": [
945
- "gpu",
946
- "single-gpu",
947
- "inference",
948
- "l40s",
949
- "newer",
950
- "cuda-12"
951
- ],
952
- "family": "g6e",
953
- "acceleratorType": "cuda",
954
- "hardware": "NVIDIA L40S",
955
- "gpuArchitecture": "Ada Lovelace",
956
- "defaultCudaVersion": "12.4",
957
- "notes": "1x NVIDIA L40S GPU (48GB). Cost-effective for medium models",
958
- "gpuMemoryGb": 48,
959
- "gpuType": "NVIDIA L40S",
960
- "costTier": "medium"
961
- },
962
- "ml.g6e.2xlarge": {
963
- "category": "gpu",
964
- "gpus": 1,
965
- "vcpus": 8,
966
- "memGb": 64,
967
- "accelerator": "L40S 48GB",
968
- "cudaVersions": [
969
- "12.2",
970
- "12.4"
971
- ],
972
- "tags": [
973
- "gpu",
974
- "single-gpu",
975
- "inference",
976
- "l40s",
977
- "newer",
978
- "cuda-12"
979
- ],
980
- "family": "g6e",
981
- "acceleratorType": "cuda",
982
- "hardware": "NVIDIA L40S",
983
- "gpuArchitecture": "Ada Lovelace",
984
- "defaultCudaVersion": "12.4",
985
- "notes": "1x NVIDIA L40S GPU (48GB). Better CPU/memory for preprocessing",
986
- "gpuMemoryGb": 48,
987
- "gpuType": "NVIDIA L40S",
988
- "costTier": "medium"
989
- },
990
- "ml.g6e.4xlarge": {
991
- "category": "gpu",
992
- "gpus": 1,
993
- "vcpus": 16,
994
- "memGb": 128,
995
- "accelerator": "L40S 48GB",
996
- "cudaVersions": [
997
- "12.2",
998
- "12.4"
999
- ],
1000
- "tags": [
1001
- "gpu",
1002
- "single-gpu",
1003
- "l40s",
1004
- "newer",
1005
- "cuda-12"
1006
- ],
1007
- "family": "g6e",
1008
- "acceleratorType": "cuda",
1009
- "hardware": "NVIDIA L40S",
1010
- "gpuArchitecture": "Ada Lovelace",
1011
- "defaultCudaVersion": "12.4",
1012
- "notes": "1x NVIDIA L40S GPU (48GB). High CPU/memory ratio",
1013
- "gpuMemoryGb": 48,
1014
- "gpuType": "NVIDIA L40S",
1015
- "costTier": "medium"
1016
- },
1017
- "ml.g6e.8xlarge": {
1018
- "category": "gpu",
1019
- "gpus": 1,
1020
- "vcpus": 32,
1021
- "memGb": 256,
1022
- "accelerator": "L40S 48GB",
1023
- "cudaVersions": [
1024
- "12.2",
1025
- "12.4"
1026
- ],
1027
- "tags": [
1028
- "gpu",
1029
- "single-gpu",
1030
- "l40s",
1031
- "newer",
1032
- "cuda-12"
1033
- ],
1034
- "family": "g6e",
1035
- "acceleratorType": "cuda",
1036
- "hardware": "NVIDIA L40S",
1037
- "gpuArchitecture": "Ada Lovelace",
1038
- "defaultCudaVersion": "12.4",
1039
- "notes": "1x NVIDIA L40S GPU (48GB). Maximum CPU/memory for single GPU",
1040
- "gpuMemoryGb": 48,
1041
- "gpuType": "NVIDIA L40S",
1042
- "costTier": "medium"
1043
- },
1044
- "ml.g6e.12xlarge": {
1045
- "category": "gpu",
1046
- "gpus": 4,
1047
- "vcpus": 48,
1048
- "memGb": 384,
1049
- "accelerator": "4x L40S 192GB",
1050
- "cudaVersions": [
1051
- "12.2",
1052
- "12.4"
1053
- ],
1054
- "tags": [
1055
- "gpu",
1056
- "multi-gpu",
1057
- "l40s",
1058
- "newer",
1059
- "parallel",
1060
- "cuda-12"
1061
- ],
1062
- "family": "g6e",
1063
- "acceleratorType": "cuda",
1064
- "hardware": "NVIDIA L40S",
1065
- "gpuArchitecture": "Ada Lovelace",
1066
- "defaultCudaVersion": "12.4",
1067
- "notes": "4x NVIDIA L40S GPUs (192GB total). Good for tensor parallelism",
1068
- "gpuMemoryGb": 48,
1069
- "gpuType": "NVIDIA L40S",
1070
- "costTier": "medium"
1071
- },
1072
- "ml.g6e.24xlarge": {
1073
- "category": "gpu",
1074
- "gpus": 4,
1075
- "vcpus": 96,
1076
- "memGb": 768,
1077
- "accelerator": "4x L40S 192GB",
1078
- "cudaVersions": [
1079
- "12.2",
1080
- "12.4"
1081
- ],
1082
- "tags": [
1083
- "gpu",
1084
- "multi-gpu",
1085
- "l40s",
1086
- "newer",
1087
- "cuda-12"
1088
- ],
1089
- "family": "g6e",
1090
- "acceleratorType": "cuda",
1091
- "hardware": "NVIDIA L40S",
1092
- "gpuArchitecture": "Ada Lovelace",
1093
- "defaultCudaVersion": "12.4",
1094
- "notes": "4x NVIDIA L40S GPUs (192GB total). High CPU/memory with multi-GPU",
1095
- "gpuMemoryGb": 48,
1096
- "gpuType": "NVIDIA L40S",
1097
- "costTier": "medium"
1098
- },
1099
- "ml.g6e.48xlarge": {
1100
- "category": "gpu",
1101
- "gpus": 8,
1102
- "vcpus": 192,
1103
- "memGb": 1536,
1104
- "accelerator": "8x L40S 384GB",
1105
- "cudaVersions": [
1106
- "12.2",
1107
- "12.4"
1108
- ],
1109
- "tags": [
1110
- "gpu",
1111
- "multi-gpu",
1112
- "l40s",
1113
- "newer",
1114
- "cuda-12",
1115
- "large-models"
1116
- ],
1117
- "family": "g6e",
1118
- "acceleratorType": "cuda",
1119
- "hardware": "NVIDIA L40S",
1120
- "gpuArchitecture": "Ada Lovelace",
1121
- "defaultCudaVersion": "12.4",
1122
- "notes": "8x NVIDIA L40S GPUs (384GB total). Maximum multi-GPU configuration",
1123
- "gpuMemoryGb": 48,
1124
- "gpuType": "NVIDIA L40S",
1125
- "costTier": "medium"
1126
- },
1127
- "ml.p6-b200.48xlarge": {
1128
- "category": "gpu",
1129
- "gpus": 8,
1130
- "vcpus": 192,
1131
- "memGb": 2048,
1132
- "accelerator": "8x B200 1432GB",
1133
- "cudaVersions": [
1134
- "13.0"
1135
- ],
1136
- "tags": [
1137
- "gpu",
1138
- "multi-gpu",
1139
- "high-performance",
1140
- "blackwell",
1141
- "b200",
1142
- "parallel",
1143
- "cuda-13",
1144
- "large-models"
1145
- ],
1146
- "family": "p6",
1147
- "acceleratorType": "cuda",
1148
- "hardware": "NVIDIA B200",
1149
- "gpuArchitecture": "Blackwell",
1150
- "defaultCudaVersion": "13.0",
1151
- "notes": "8x NVIDIA B200 GPUs (179GB each, 1432GB total). Blackwell architecture for frontier models",
1152
- "gpuMemoryGb": 179,
1153
- "gpuType": "NVIDIA B200",
1154
- "costTier": "high"
1155
- },
1156
- "ml.g7e.2xlarge": {
1157
- "category": "gpu",
1158
- "gpus": 1,
1159
- "vcpus": 8,
1160
- "memGb": 64,
1161
- "accelerator": "RTX PRO 6000 96GB",
1162
- "cudaVersions": [
1163
- "13.0"
1164
- ],
1165
- "tags": [
1166
- "gpu",
1167
- "single-gpu",
1168
- "inference",
1169
- "blackwell",
1170
- "rtx-pro",
1171
- "cuda-13"
1172
- ],
1173
- "family": "g7e",
1174
- "acceleratorType": "cuda",
1175
- "hardware": "NVIDIA RTX PRO 6000",
1176
- "gpuArchitecture": "Blackwell",
1177
- "defaultCudaVersion": "13.0",
1178
- "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). Blackwell for inference",
1179
- "gpuMemoryGb": 96,
1180
- "gpuType": "NVIDIA RTX PRO 6000",
1181
- "costTier": "medium"
1182
- },
1183
- "ml.g7e.4xlarge": {
1184
- "category": "gpu",
1185
- "gpus": 1,
1186
- "vcpus": 16,
1187
- "memGb": 128,
1188
- "accelerator": "RTX PRO 6000 96GB",
1189
- "cudaVersions": [
1190
- "13.0"
1191
- ],
1192
- "tags": [
1193
- "gpu",
1194
- "single-gpu",
1195
- "blackwell",
1196
- "rtx-pro",
1197
- "cuda-13"
1198
- ],
1199
- "family": "g7e",
1200
- "acceleratorType": "cuda",
1201
- "hardware": "NVIDIA RTX PRO 6000",
1202
- "gpuArchitecture": "Blackwell",
1203
- "defaultCudaVersion": "13.0",
1204
- "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). High CPU/memory ratio",
1205
- "gpuMemoryGb": 96,
1206
- "gpuType": "NVIDIA RTX PRO 6000",
1207
- "costTier": "medium"
1208
- },
1209
- "ml.g7e.8xlarge": {
1210
- "category": "gpu",
1211
- "gpus": 1,
1212
- "vcpus": 32,
1213
- "memGb": 256,
1214
- "accelerator": "RTX PRO 6000 96GB",
1215
- "cudaVersions": [
1216
- "13.0"
1217
- ],
1218
- "tags": [
1219
- "gpu",
1220
- "single-gpu",
1221
- "blackwell",
1222
- "rtx-pro",
1223
- "cuda-13"
1224
- ],
1225
- "family": "g7e",
1226
- "acceleratorType": "cuda",
1227
- "hardware": "NVIDIA RTX PRO 6000",
1228
- "gpuArchitecture": "Blackwell",
1229
- "defaultCudaVersion": "13.0",
1230
- "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). Maximum CPU/memory for single GPU",
1231
- "gpuMemoryGb": 96,
1232
- "gpuType": "NVIDIA RTX PRO 6000",
1233
- "costTier": "medium"
1234
- },
1235
- "ml.g7e.12xlarge": {
1236
- "category": "gpu",
1237
- "gpus": 2,
1238
- "vcpus": 48,
1239
- "memGb": 512,
1240
- "accelerator": "2x RTX PRO 6000 192GB",
1241
- "cudaVersions": [
1242
- "13.0"
1243
- ],
1244
- "tags": [
1245
- "gpu",
1246
- "multi-gpu",
1247
- "blackwell",
1248
- "rtx-pro",
1249
- "parallel",
1250
- "cuda-13"
1251
- ],
1252
- "family": "g7e",
1253
- "acceleratorType": "cuda",
1254
- "hardware": "NVIDIA RTX PRO 6000",
1255
- "gpuArchitecture": "Blackwell",
1256
- "defaultCudaVersion": "13.0",
1257
- "notes": "2x NVIDIA RTX PRO 6000 GPUs (192GB total). Multi-GPU for larger models",
1258
- "gpuMemoryGb": 96,
1259
- "gpuType": "NVIDIA RTX PRO 6000",
1260
- "costTier": "medium"
1261
- },
1262
- "ml.g7e.24xlarge": {
1263
- "category": "gpu",
1264
- "gpus": 4,
1265
- "vcpus": 96,
1266
- "memGb": 1024,
1267
- "accelerator": "4x RTX PRO 6000 384GB",
1268
- "cudaVersions": [
1269
- "13.0"
1270
- ],
1271
- "tags": [
1272
- "gpu",
1273
- "multi-gpu",
1274
- "blackwell",
1275
- "rtx-pro",
1276
- "parallel",
1277
- "cuda-13"
1278
- ],
1279
- "family": "g7e",
1280
- "acceleratorType": "cuda",
1281
- "hardware": "NVIDIA RTX PRO 6000",
1282
- "gpuArchitecture": "Blackwell",
1283
- "defaultCudaVersion": "13.0",
1284
- "notes": "4x NVIDIA RTX PRO 6000 GPUs (384GB total). High-capacity multi-GPU",
1285
- "gpuMemoryGb": 96,
1286
- "gpuType": "NVIDIA RTX PRO 6000",
1287
- "costTier": "medium"
1288
- },
1289
- "ml.g7e.48xlarge": {
1290
- "category": "gpu",
1291
- "gpus": 8,
1292
- "vcpus": 192,
1293
- "memGb": 2048,
1294
- "accelerator": "8x RTX PRO 6000 768GB",
1295
- "cudaVersions": [
1296
- "13.0"
1297
- ],
1298
- "tags": [
1299
- "gpu",
1300
- "multi-gpu",
1301
- "blackwell",
1302
- "rtx-pro",
1303
- "parallel",
1304
- "cuda-13",
1305
- "large-models"
1306
- ],
1307
- "family": "g7e",
1308
- "acceleratorType": "cuda",
1309
- "hardware": "NVIDIA RTX PRO 6000",
1310
- "gpuArchitecture": "Blackwell",
1311
- "defaultCudaVersion": "13.0",
1312
- "notes": "8x NVIDIA RTX PRO 6000 GPUs (768GB total). Maximum Blackwell multi-GPU",
1313
- "gpuMemoryGb": 96,
1314
- "gpuType": "NVIDIA RTX PRO 6000",
1315
- "costTier": "medium"
1316
- },
1317
- "ml.r5.large": {
1318
- "category": "cpu",
1319
- "gpus": 0,
1320
- "vcpus": 2,
1321
- "memGb": 16,
1322
- "accelerator": "",
1323
- "cudaVersions": null,
1324
- "tags": [
1325
- "memory",
1326
- "cpu",
1327
- "high-memory"
1328
- ],
1329
- "family": "r5",
1330
- "acceleratorType": "cpu",
1331
- "hardware": "None",
1332
- "gpuArchitecture": "None",
1333
- "defaultCudaVersion": null,
1334
- "notes": "",
1335
- "gpuMemoryGb": null,
1336
- "gpuType": null,
1337
- "costTier": "low"
1338
- },
1339
- "ml.r5.xlarge": {
1340
- "category": "cpu",
1341
- "gpus": 0,
1342
- "vcpus": 4,
1343
- "memGb": 32,
1344
- "accelerator": "",
1345
- "cudaVersions": null,
1346
- "tags": [
1347
- "memory",
1348
- "cpu",
1349
- "high-memory"
1350
- ],
1351
- "family": "r5",
1352
- "acceleratorType": "cpu",
1353
- "hardware": "None",
1354
- "gpuArchitecture": "None",
1355
- "defaultCudaVersion": null,
1356
- "notes": "",
1357
- "gpuMemoryGb": null,
1358
- "gpuType": null,
1359
- "costTier": "low"
1360
- },
1361
- "ml.trn1.2xlarge": {
1362
- "category": "gpu",
1363
- "gpus": 1,
1364
- "vcpus": 8,
1365
- "memGb": 32,
1366
- "accelerator": "Trainium",
1367
- "cudaVersions": [
1368
- "2.15.0",
1369
- "2.16.0",
1370
- "2.17.0"
1371
- ],
1372
- "tags": [
1373
- "gpu",
1374
- "single-gpu",
1375
- "trainium",
1376
- "neuron"
1377
- ],
1378
- "family": "trn1",
1379
- "acceleratorType": "neuron",
1380
- "hardware": "AWS Trainium",
1381
- "gpuArchitecture": "Trainium1",
1382
- "defaultCudaVersion": "2.16.0",
1383
- "notes": "1x Trainium chip. Optimized for training, also supports inference",
1384
- "gpuMemoryGb": 32,
1385
- "gpuType": "AWS Trainium",
1386
- "costTier": "medium"
1387
- },
1388
- "ml.trn1.32xlarge": {
1389
- "category": "gpu",
1390
- "gpus": 16,
1391
- "vcpus": 128,
1392
- "memGb": 512,
1393
- "accelerator": "16x Trainium",
1394
- "cudaVersions": [
1395
- "2.15.0",
1396
- "2.16.0",
1397
- "2.17.0"
1398
- ],
1399
- "tags": [
1400
- "gpu",
1401
- "multi-gpu",
1402
- "trainium",
1403
- "neuron"
1404
- ],
1405
- "family": "trn1",
1406
- "acceleratorType": "neuron",
1407
- "hardware": "AWS Trainium",
1408
- "gpuArchitecture": "Trainium1",
1409
- "defaultCudaVersion": "2.16.0",
1410
- "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference",
1411
- "gpuMemoryGb": 32,
1412
- "gpuType": "AWS Trainium",
1413
- "costTier": "medium"
1414
231
  }
1415
232
  },
1416
233
  "recommendations": {
1417
- "cpu": [
1418
- "ml.m5.large",
1419
- "ml.m5.xlarge",
1420
- "ml.m5.2xlarge",
1421
- "ml.m5.4xlarge",
1422
- "ml.c5.xlarge",
1423
- "ml.c5.2xlarge",
1424
- "ml.r5.large",
1425
- "ml.r5.xlarge"
1426
- ],
234
+ "cpu": [],
1427
235
  "gpu": [
1428
- "ml.g7e.2xlarge",
1429
- "ml.g7e.4xlarge",
1430
- "ml.g7e.8xlarge",
1431
- "ml.g7e.12xlarge",
1432
- "ml.g7e.24xlarge",
1433
- "ml.g7e.48xlarge",
1434
- "ml.g6e.xlarge",
1435
- "ml.g6e.2xlarge",
1436
- "ml.g6e.4xlarge",
1437
- "ml.g6e.8xlarge",
1438
- "ml.g6e.12xlarge",
1439
- "ml.g6e.24xlarge",
1440
- "ml.g6e.48xlarge",
1441
- "ml.g6.xlarge",
1442
- "ml.g6.2xlarge",
1443
- "ml.g6.12xlarge",
1444
236
  "ml.g5.xlarge",
1445
237
  "ml.g5.2xlarge",
1446
238
  "ml.g5.4xlarge",
239
+ "ml.g5.8xlarge",
1447
240
  "ml.g5.12xlarge",
241
+ "ml.g5.16xlarge",
1448
242
  "ml.g5.24xlarge",
1449
- "ml.g5.48xlarge",
1450
- "ml.g4dn.xlarge",
1451
- "ml.g4dn.2xlarge",
1452
- "ml.g4dn.12xlarge",
1453
- "ml.p6-b200.48xlarge",
1454
- "ml.p5e.48xlarge",
1455
- "ml.p5en.48xlarge",
1456
- "ml.p5.48xlarge",
1457
- "ml.p4d.24xlarge",
1458
- "ml.p3.2xlarge",
1459
- "ml.p3.8xlarge",
1460
- "ml.p3.16xlarge",
1461
- "ml.inf2.xlarge",
1462
- "ml.inf2.8xlarge",
1463
- "ml.inf2.24xlarge",
1464
- "ml.inf2.48xlarge",
1465
- "ml.trn1.2xlarge",
1466
- "ml.trn1.32xlarge"
243
+ "ml.g5.48xlarge"
1467
244
  ]
1468
245
  }
1469
246
  }