@aws/ml-container-creator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/LICENSE +202 -0
  2. package/LICENSE-THIRD-PARTY +68620 -0
  3. package/NOTICE +2 -0
  4. package/README.md +106 -0
  5. package/bin/cli.js +365 -0
  6. package/config/defaults.json +32 -0
  7. package/config/presets/transformers-djl.json +26 -0
  8. package/config/presets/transformers-gpu.json +24 -0
  9. package/config/presets/transformers-lmi.json +27 -0
  10. package/package.json +129 -0
  11. package/servers/README.md +419 -0
  12. package/servers/base-image-picker/catalogs/model-servers.json +1191 -0
  13. package/servers/base-image-picker/catalogs/python-slim.json +38 -0
  14. package/servers/base-image-picker/catalogs/triton-backends.json +51 -0
  15. package/servers/base-image-picker/catalogs/triton.json +38 -0
  16. package/servers/base-image-picker/index.js +495 -0
  17. package/servers/base-image-picker/manifest.json +17 -0
  18. package/servers/base-image-picker/package.json +15 -0
  19. package/servers/hyperpod-cluster-picker/LICENSE +202 -0
  20. package/servers/hyperpod-cluster-picker/index.js +424 -0
  21. package/servers/hyperpod-cluster-picker/manifest.json +14 -0
  22. package/servers/hyperpod-cluster-picker/package.json +17 -0
  23. package/servers/instance-recommender/LICENSE +202 -0
  24. package/servers/instance-recommender/catalogs/instances.json +852 -0
  25. package/servers/instance-recommender/index.js +284 -0
  26. package/servers/instance-recommender/manifest.json +16 -0
  27. package/servers/instance-recommender/package.json +15 -0
  28. package/servers/lib/LICENSE +202 -0
  29. package/servers/lib/bedrock-client.js +160 -0
  30. package/servers/lib/custom-validators.js +46 -0
  31. package/servers/lib/dynamic-resolver.js +36 -0
  32. package/servers/lib/package.json +11 -0
  33. package/servers/lib/schemas/image-catalog.schema.json +185 -0
  34. package/servers/lib/schemas/instances.schema.json +124 -0
  35. package/servers/lib/schemas/manifest.schema.json +64 -0
  36. package/servers/lib/schemas/model-catalog.schema.json +91 -0
  37. package/servers/lib/schemas/regions.schema.json +26 -0
  38. package/servers/lib/schemas/triton-backends.schema.json +51 -0
  39. package/servers/model-picker/catalogs/jumpstart-public.json +66 -0
  40. package/servers/model-picker/catalogs/popular-diffusors.json +88 -0
  41. package/servers/model-picker/catalogs/popular-transformers.json +226 -0
  42. package/servers/model-picker/index.js +1693 -0
  43. package/servers/model-picker/manifest.json +18 -0
  44. package/servers/model-picker/package.json +20 -0
  45. package/servers/region-picker/LICENSE +202 -0
  46. package/servers/region-picker/catalogs/regions.json +263 -0
  47. package/servers/region-picker/index.js +230 -0
  48. package/servers/region-picker/manifest.json +16 -0
  49. package/servers/region-picker/package.json +15 -0
  50. package/src/app.js +1007 -0
  51. package/src/copy-tpl.js +77 -0
  52. package/src/lib/accelerator-validator.js +39 -0
  53. package/src/lib/asset-manager.js +385 -0
  54. package/src/lib/aws-profile-parser.js +181 -0
  55. package/src/lib/bootstrap-command-handler.js +1647 -0
  56. package/src/lib/bootstrap-config.js +238 -0
  57. package/src/lib/ci-register-helpers.js +124 -0
  58. package/src/lib/ci-report-helpers.js +158 -0
  59. package/src/lib/ci-stage-helpers.js +268 -0
  60. package/src/lib/cli-handler.js +529 -0
  61. package/src/lib/comment-generator.js +544 -0
  62. package/src/lib/community-reports-validator.js +91 -0
  63. package/src/lib/config-manager.js +2106 -0
  64. package/src/lib/configuration-exporter.js +204 -0
  65. package/src/lib/configuration-manager.js +695 -0
  66. package/src/lib/configuration-matcher.js +221 -0
  67. package/src/lib/cpu-validator.js +36 -0
  68. package/src/lib/cuda-validator.js +57 -0
  69. package/src/lib/deployment-config-resolver.js +103 -0
  70. package/src/lib/deployment-entry-schema.js +125 -0
  71. package/src/lib/deployment-registry.js +598 -0
  72. package/src/lib/docker-introspection-validator.js +51 -0
  73. package/src/lib/engine-prefix-resolver.js +60 -0
  74. package/src/lib/huggingface-client.js +172 -0
  75. package/src/lib/key-value-parser.js +37 -0
  76. package/src/lib/known-flags-validator.js +200 -0
  77. package/src/lib/manifest-cli.js +280 -0
  78. package/src/lib/mcp-client.js +303 -0
  79. package/src/lib/mcp-command-handler.js +532 -0
  80. package/src/lib/neuron-validator.js +80 -0
  81. package/src/lib/parameter-schema-validator.js +284 -0
  82. package/src/lib/prompt-runner.js +1349 -0
  83. package/src/lib/prompts.js +1138 -0
  84. package/src/lib/registry-command-handler.js +519 -0
  85. package/src/lib/registry-loader.js +198 -0
  86. package/src/lib/rocm-validator.js +80 -0
  87. package/src/lib/schema-validator.js +157 -0
  88. package/src/lib/sensitive-redactor.js +59 -0
  89. package/src/lib/template-engine.js +156 -0
  90. package/src/lib/template-manager.js +341 -0
  91. package/src/lib/validation-engine.js +314 -0
  92. package/src/prompt-adapter.js +63 -0
  93. package/templates/Dockerfile +300 -0
  94. package/templates/IAM_PERMISSIONS.md +84 -0
  95. package/templates/MIGRATION.md +488 -0
  96. package/templates/PROJECT_README.md +439 -0
  97. package/templates/TEMPLATE_SYSTEM.md +243 -0
  98. package/templates/buildspec.yml +64 -0
  99. package/templates/code/chat_template.jinja +1 -0
  100. package/templates/code/flask/gunicorn_config.py +35 -0
  101. package/templates/code/flask/wsgi.py +10 -0
  102. package/templates/code/model_handler.py +387 -0
  103. package/templates/code/serve +300 -0
  104. package/templates/code/serve.py +175 -0
  105. package/templates/code/serving.properties +105 -0
  106. package/templates/code/start_server.py +39 -0
  107. package/templates/code/start_server.sh +39 -0
  108. package/templates/diffusors/Dockerfile +72 -0
  109. package/templates/diffusors/patch_image_api.py +35 -0
  110. package/templates/diffusors/serve +115 -0
  111. package/templates/diffusors/start_server.sh +114 -0
  112. package/templates/do/.gitkeep +1 -0
  113. package/templates/do/README.md +541 -0
  114. package/templates/do/build +83 -0
  115. package/templates/do/ci +681 -0
  116. package/templates/do/clean +811 -0
  117. package/templates/do/config +260 -0
  118. package/templates/do/deploy +1560 -0
  119. package/templates/do/export +306 -0
  120. package/templates/do/logs +319 -0
  121. package/templates/do/manifest +12 -0
  122. package/templates/do/push +119 -0
  123. package/templates/do/register +580 -0
  124. package/templates/do/run +113 -0
  125. package/templates/do/submit +417 -0
  126. package/templates/do/test +1147 -0
  127. package/templates/hyperpod/configmap.yaml +24 -0
  128. package/templates/hyperpod/deployment.yaml +71 -0
  129. package/templates/hyperpod/pvc.yaml +42 -0
  130. package/templates/hyperpod/service.yaml +17 -0
  131. package/templates/nginx-diffusors.conf +74 -0
  132. package/templates/nginx-predictors.conf +47 -0
  133. package/templates/nginx-tensorrt.conf +74 -0
  134. package/templates/requirements.txt +61 -0
  135. package/templates/sample_model/test_inference.py +123 -0
  136. package/templates/sample_model/train_abalone.py +252 -0
  137. package/templates/test/test_endpoint.sh +79 -0
  138. package/templates/test/test_local_image.sh +80 -0
  139. package/templates/test/test_model_handler.py +180 -0
  140. package/templates/triton/Dockerfile +128 -0
  141. package/templates/triton/config.pbtxt +163 -0
  142. package/templates/triton/model.py +130 -0
  143. package/templates/triton/requirements.txt +11 -0
@@ -0,0 +1,852 @@
1
+ {
2
+ "catalog": {
3
+ "ml.c5.2xlarge": {
4
+ "category": "cpu",
5
+ "gpus": 0,
6
+ "vcpus": 8,
7
+ "memGb": 16,
8
+ "accelerator": "",
9
+ "cudaVersions": null,
10
+ "tags": [
11
+ "compute",
12
+ "cpu",
13
+ "high-cpu"
14
+ ],
15
+ "family": "c5",
16
+ "acceleratorType": "cpu",
17
+ "hardware": "None",
18
+ "gpuArchitecture": "None",
19
+ "defaultCudaVersion": null,
20
+ "notes": ""
21
+ },
22
+ "ml.c5.xlarge": {
23
+ "category": "cpu",
24
+ "gpus": 0,
25
+ "vcpus": 4,
26
+ "memGb": 8,
27
+ "accelerator": "",
28
+ "cudaVersions": null,
29
+ "tags": [
30
+ "compute",
31
+ "cpu",
32
+ "cost-effective"
33
+ ],
34
+ "family": "c5",
35
+ "acceleratorType": "cpu",
36
+ "hardware": "None",
37
+ "gpuArchitecture": "None",
38
+ "defaultCudaVersion": null,
39
+ "notes": ""
40
+ },
41
+ "ml.g4dn.12xlarge": {
42
+ "category": "gpu",
43
+ "gpus": 4,
44
+ "vcpus": 48,
45
+ "memGb": 192,
46
+ "accelerator": "4x T4 64GB",
47
+ "cudaVersions": [
48
+ "11.4",
49
+ "11.8"
50
+ ],
51
+ "tags": [
52
+ "gpu",
53
+ "multi-gpu",
54
+ "t4",
55
+ "cuda-11"
56
+ ],
57
+ "family": "g4dn",
58
+ "acceleratorType": "cuda",
59
+ "hardware": "NVIDIA T4",
60
+ "gpuArchitecture": "Turing",
61
+ "defaultCudaVersion": "11.8",
62
+ "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism"
63
+ },
64
+ "ml.g4dn.16xlarge": {
65
+ "category": "gpu",
66
+ "gpus": 1,
67
+ "vcpus": 64,
68
+ "memGb": 256,
69
+ "accelerator": "T4 16GB",
70
+ "cudaVersions": [
71
+ "11.4",
72
+ "11.8"
73
+ ],
74
+ "tags": [
75
+ "gpu",
76
+ "single-gpu",
77
+ "t4",
78
+ "cuda-11"
79
+ ],
80
+ "family": "g4dn",
81
+ "acceleratorType": "cuda",
82
+ "hardware": "NVIDIA T4",
83
+ "gpuArchitecture": "Turing",
84
+ "defaultCudaVersion": "11.8",
85
+ "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio"
86
+ },
87
+ "ml.g4dn.2xlarge": {
88
+ "category": "gpu",
89
+ "gpus": 1,
90
+ "vcpus": 8,
91
+ "memGb": 32,
92
+ "accelerator": "T4 16GB",
93
+ "cudaVersions": [
94
+ "11.4",
95
+ "11.8"
96
+ ],
97
+ "tags": [
98
+ "gpu",
99
+ "single-gpu",
100
+ "budget",
101
+ "cost-effective",
102
+ "inference",
103
+ "t4",
104
+ "cuda-11"
105
+ ],
106
+ "family": "g4dn",
107
+ "acceleratorType": "cuda",
108
+ "hardware": "NVIDIA T4",
109
+ "gpuArchitecture": "Turing",
110
+ "defaultCudaVersion": "11.8",
111
+ "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing"
112
+ },
113
+ "ml.g4dn.4xlarge": {
114
+ "category": "gpu",
115
+ "gpus": 1,
116
+ "vcpus": 16,
117
+ "memGb": 64,
118
+ "accelerator": "T4 16GB",
119
+ "cudaVersions": [
120
+ "11.4",
121
+ "11.8"
122
+ ],
123
+ "tags": [
124
+ "gpu",
125
+ "single-gpu",
126
+ "t4",
127
+ "cuda-11"
128
+ ],
129
+ "family": "g4dn",
130
+ "acceleratorType": "cuda",
131
+ "hardware": "NVIDIA T4",
132
+ "gpuArchitecture": "Turing",
133
+ "defaultCudaVersion": "11.8",
134
+ "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio"
135
+ },
136
+ "ml.g4dn.8xlarge": {
137
+ "category": "gpu",
138
+ "gpus": 1,
139
+ "vcpus": 32,
140
+ "memGb": 128,
141
+ "accelerator": "T4 16GB",
142
+ "cudaVersions": [
143
+ "11.4",
144
+ "11.8"
145
+ ],
146
+ "tags": [
147
+ "gpu",
148
+ "single-gpu",
149
+ "t4",
150
+ "cuda-11"
151
+ ],
152
+ "family": "g4dn",
153
+ "acceleratorType": "cuda",
154
+ "hardware": "NVIDIA T4",
155
+ "gpuArchitecture": "Turing",
156
+ "defaultCudaVersion": "11.8",
157
+ "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU"
158
+ },
159
+ "ml.g4dn.xlarge": {
160
+ "category": "gpu",
161
+ "gpus": 1,
162
+ "vcpus": 4,
163
+ "memGb": 16,
164
+ "accelerator": "T4 16GB",
165
+ "cudaVersions": [
166
+ "11.4",
167
+ "11.8"
168
+ ],
169
+ "tags": [
170
+ "gpu",
171
+ "single-gpu",
172
+ "budget",
173
+ "cost-effective",
174
+ "inference",
175
+ "t4",
176
+ "cuda-11"
177
+ ],
178
+ "family": "g4dn",
179
+ "acceleratorType": "cuda",
180
+ "hardware": "NVIDIA T4",
181
+ "gpuArchitecture": "Turing",
182
+ "defaultCudaVersion": "11.8",
183
+ "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models"
184
+ },
185
+ "ml.g5.12xlarge": {
186
+ "category": "gpu",
187
+ "gpus": 4,
188
+ "vcpus": 48,
189
+ "memGb": 192,
190
+ "accelerator": "4x A10G 96GB",
191
+ "cudaVersions": [
192
+ "11.8",
193
+ "12.1",
194
+ "12.2"
195
+ ],
196
+ "tags": [
197
+ "gpu",
198
+ "multi-gpu",
199
+ "large",
200
+ "a10g",
201
+ "parallel",
202
+ "cuda-11",
203
+ "cuda-12"
204
+ ],
205
+ "family": "g5",
206
+ "acceleratorType": "cuda",
207
+ "hardware": "NVIDIA A10G",
208
+ "gpuArchitecture": "Ampere",
209
+ "defaultCudaVersion": "12.1",
210
+ "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism"
211
+ },
212
+ "ml.g5.16xlarge": {
213
+ "category": "gpu",
214
+ "gpus": 1,
215
+ "vcpus": 64,
216
+ "memGb": 256,
217
+ "accelerator": "A10G 24GB",
218
+ "cudaVersions": [
219
+ "11.8",
220
+ "12.1",
221
+ "12.2"
222
+ ],
223
+ "tags": [
224
+ "gpu",
225
+ "single-gpu",
226
+ "a10g",
227
+ "cuda-11",
228
+ "cuda-12"
229
+ ],
230
+ "family": "g5",
231
+ "acceleratorType": "cuda",
232
+ "hardware": "NVIDIA A10G",
233
+ "gpuArchitecture": "Ampere",
234
+ "defaultCudaVersion": "12.1",
235
+ "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio"
236
+ },
237
+ "ml.g5.24xlarge": {
238
+ "category": "gpu",
239
+ "gpus": 4,
240
+ "vcpus": 96,
241
+ "memGb": 384,
242
+ "accelerator": "4x A10G 96GB",
243
+ "cudaVersions": [
244
+ "11.8",
245
+ "12.1",
246
+ "12.2"
247
+ ],
248
+ "tags": [
249
+ "gpu",
250
+ "multi-gpu",
251
+ "a10g",
252
+ "cuda-11",
253
+ "cuda-12"
254
+ ],
255
+ "family": "g5",
256
+ "acceleratorType": "cuda",
257
+ "hardware": "NVIDIA A10G",
258
+ "gpuArchitecture": "Ampere",
259
+ "defaultCudaVersion": "12.1",
260
+ "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU"
261
+ },
262
+ "ml.g5.2xlarge": {
263
+ "category": "gpu",
264
+ "gpus": 1,
265
+ "vcpus": 8,
266
+ "memGb": 32,
267
+ "accelerator": "A10G 24GB",
268
+ "cudaVersions": [
269
+ "11.8",
270
+ "12.1",
271
+ "12.2"
272
+ ],
273
+ "tags": [
274
+ "gpu",
275
+ "single-gpu",
276
+ "inference",
277
+ "a10g",
278
+ "cuda-11",
279
+ "cuda-12"
280
+ ],
281
+ "family": "g5",
282
+ "acceleratorType": "cuda",
283
+ "hardware": "NVIDIA A10G",
284
+ "gpuArchitecture": "Ampere",
285
+ "defaultCudaVersion": "12.1",
286
+ "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing"
287
+ },
288
+ "ml.g5.48xlarge": {
289
+ "category": "gpu",
290
+ "gpus": 8,
291
+ "vcpus": 192,
292
+ "memGb": 768,
293
+ "accelerator": "8x A10G 192GB",
294
+ "cudaVersions": [
295
+ "11.8",
296
+ "12.1",
297
+ "12.2"
298
+ ],
299
+ "tags": [
300
+ "gpu",
301
+ "multi-gpu",
302
+ "a10g",
303
+ "cuda-11",
304
+ "cuda-12"
305
+ ],
306
+ "family": "g5",
307
+ "acceleratorType": "cuda",
308
+ "hardware": "NVIDIA A10G",
309
+ "gpuArchitecture": "Ampere",
310
+ "defaultCudaVersion": "12.1",
311
+ "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration"
312
+ },
313
+ "ml.g5.4xlarge": {
314
+ "category": "gpu",
315
+ "gpus": 1,
316
+ "vcpus": 16,
317
+ "memGb": 64,
318
+ "accelerator": "A10G 24GB",
319
+ "cudaVersions": [
320
+ "11.8",
321
+ "12.1",
322
+ "12.2"
323
+ ],
324
+ "tags": [
325
+ "gpu",
326
+ "single-gpu",
327
+ "large",
328
+ "a10g",
329
+ "cuda-11",
330
+ "cuda-12"
331
+ ],
332
+ "family": "g5",
333
+ "acceleratorType": "cuda",
334
+ "hardware": "NVIDIA A10G",
335
+ "gpuArchitecture": "Ampere",
336
+ "defaultCudaVersion": "12.1",
337
+ "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing"
338
+ },
339
+ "ml.g5.8xlarge": {
340
+ "category": "gpu",
341
+ "gpus": 1,
342
+ "vcpus": 32,
343
+ "memGb": 128,
344
+ "accelerator": "A10G 24GB",
345
+ "cudaVersions": [
346
+ "11.8",
347
+ "12.1",
348
+ "12.2"
349
+ ],
350
+ "tags": [
351
+ "gpu",
352
+ "single-gpu",
353
+ "a10g",
354
+ "cuda-11",
355
+ "cuda-12"
356
+ ],
357
+ "family": "g5",
358
+ "acceleratorType": "cuda",
359
+ "hardware": "NVIDIA A10G",
360
+ "gpuArchitecture": "Ampere",
361
+ "defaultCudaVersion": "12.1",
362
+ "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU"
363
+ },
364
+ "ml.g5.xlarge": {
365
+ "category": "gpu",
366
+ "gpus": 1,
367
+ "vcpus": 4,
368
+ "memGb": 16,
369
+ "accelerator": "A10G 24GB",
370
+ "cudaVersions": [
371
+ "11.8",
372
+ "12.1",
373
+ "12.2"
374
+ ],
375
+ "tags": [
376
+ "gpu",
377
+ "single-gpu",
378
+ "inference",
379
+ "a10g",
380
+ "cuda-11",
381
+ "cuda-12"
382
+ ],
383
+ "family": "g5",
384
+ "acceleratorType": "cuda",
385
+ "hardware": "NVIDIA A10G",
386
+ "gpuArchitecture": "Ampere",
387
+ "defaultCudaVersion": "12.1",
388
+ "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models"
389
+ },
390
+ "ml.g6.12xlarge": {
391
+ "category": "gpu",
392
+ "gpus": 4,
393
+ "vcpus": 48,
394
+ "memGb": 192,
395
+ "accelerator": "4x L4 96GB",
396
+ "cudaVersions": [
397
+ "12.1",
398
+ "12.2",
399
+ "12.4"
400
+ ],
401
+ "tags": [
402
+ "gpu",
403
+ "multi-gpu",
404
+ "large",
405
+ "l4",
406
+ "newer",
407
+ "parallel",
408
+ "cuda-12"
409
+ ],
410
+ "family": "g6",
411
+ "acceleratorType": "cuda",
412
+ "hardware": "NVIDIA L4",
413
+ "gpuArchitecture": "Ada Lovelace",
414
+ "defaultCudaVersion": "12.2",
415
+ "notes": "Multi-GPU (newer)"
416
+ },
417
+ "ml.g6.2xlarge": {
418
+ "category": "gpu",
419
+ "gpus": 1,
420
+ "vcpus": 8,
421
+ "memGb": 32,
422
+ "accelerator": "L4 24GB",
423
+ "cudaVersions": [
424
+ "12.1",
425
+ "12.2",
426
+ "12.4"
427
+ ],
428
+ "tags": [
429
+ "gpu",
430
+ "single-gpu",
431
+ "inference",
432
+ "l4",
433
+ "newer",
434
+ "cuda-12"
435
+ ],
436
+ "family": "g6",
437
+ "acceleratorType": "cuda",
438
+ "hardware": "NVIDIA L4",
439
+ "gpuArchitecture": "Ada Lovelace",
440
+ "defaultCudaVersion": "12.2",
441
+ "notes": "Medium GPU (newer)"
442
+ },
443
+ "ml.g6.xlarge": {
444
+ "category": "gpu",
445
+ "gpus": 1,
446
+ "vcpus": 4,
447
+ "memGb": 16,
448
+ "accelerator": "L4 24GB",
449
+ "cudaVersions": [
450
+ "12.1",
451
+ "12.2",
452
+ "12.4"
453
+ ],
454
+ "tags": [
455
+ "gpu",
456
+ "single-gpu",
457
+ "inference",
458
+ "l4",
459
+ "newer",
460
+ "cuda-12"
461
+ ],
462
+ "family": "g6",
463
+ "acceleratorType": "cuda",
464
+ "hardware": "NVIDIA L4",
465
+ "gpuArchitecture": "Ada Lovelace",
466
+ "defaultCudaVersion": "12.2",
467
+ "notes": "Small GPU (newer)"
468
+ },
469
+ "ml.inf2.24xlarge": {
470
+ "category": "gpu",
471
+ "gpus": 6,
472
+ "vcpus": 96,
473
+ "memGb": 384,
474
+ "accelerator": "6x Inferentia2",
475
+ "cudaVersions": [
476
+ "2.15.0",
477
+ "2.16.0",
478
+ "2.17.0"
479
+ ],
480
+ "tags": [
481
+ "gpu",
482
+ "multi-gpu",
483
+ "inferentia2",
484
+ "neuron"
485
+ ],
486
+ "family": "inf2",
487
+ "acceleratorType": "neuron",
488
+ "hardware": "AWS Inferentia2",
489
+ "gpuArchitecture": "Inferentia2",
490
+ "defaultCudaVersion": "2.16.0",
491
+ "notes": "6x Inferentia2 chips. Multi-chip for large models"
492
+ },
493
+ "ml.inf2.48xlarge": {
494
+ "category": "gpu",
495
+ "gpus": 12,
496
+ "vcpus": 192,
497
+ "memGb": 768,
498
+ "accelerator": "12x Inferentia2",
499
+ "cudaVersions": [
500
+ "2.15.0",
501
+ "2.16.0",
502
+ "2.17.0"
503
+ ],
504
+ "tags": [
505
+ "gpu",
506
+ "multi-gpu",
507
+ "inferentia2",
508
+ "neuron"
509
+ ],
510
+ "family": "inf2",
511
+ "acceleratorType": "neuron",
512
+ "hardware": "AWS Inferentia2",
513
+ "gpuArchitecture": "Inferentia2",
514
+ "defaultCudaVersion": "2.16.0",
515
+ "notes": "12x Inferentia2 chips. Maximum multi-chip configuration"
516
+ },
517
+ "ml.inf2.8xlarge": {
518
+ "category": "gpu",
519
+ "gpus": 1,
520
+ "vcpus": 32,
521
+ "memGb": 128,
522
+ "accelerator": "Inferentia2",
523
+ "cudaVersions": [
524
+ "2.15.0",
525
+ "2.16.0",
526
+ "2.17.0"
527
+ ],
528
+ "tags": [
529
+ "gpu",
530
+ "single-gpu",
531
+ "inferentia2",
532
+ "neuron"
533
+ ],
534
+ "family": "inf2",
535
+ "acceleratorType": "neuron",
536
+ "hardware": "AWS Inferentia2",
537
+ "gpuArchitecture": "Inferentia2",
538
+ "defaultCudaVersion": "2.16.0",
539
+ "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing"
540
+ },
541
+ "ml.inf2.xlarge": {
542
+ "category": "gpu",
543
+ "gpus": 1,
544
+ "vcpus": 4,
545
+ "memGb": 16,
546
+ "accelerator": "Inferentia2",
547
+ "cudaVersions": [
548
+ "2.15.0",
549
+ "2.16.0",
550
+ "2.17.0"
551
+ ],
552
+ "tags": [
553
+ "gpu",
554
+ "single-gpu",
555
+ "inferentia2",
556
+ "neuron"
557
+ ],
558
+ "family": "inf2",
559
+ "acceleratorType": "neuron",
560
+ "hardware": "AWS Inferentia2",
561
+ "gpuArchitecture": "Inferentia2",
562
+ "defaultCudaVersion": "2.16.0",
563
+ "notes": "1x Inferentia2 chip. Cost-effective for transformer inference"
564
+ },
565
+ "ml.m5.2xlarge": {
566
+ "category": "cpu",
567
+ "gpus": 0,
568
+ "vcpus": 8,
569
+ "memGb": 32,
570
+ "accelerator": "",
571
+ "cudaVersions": null,
572
+ "tags": [
573
+ "large",
574
+ "cpu",
575
+ "general",
576
+ "high-memory"
577
+ ],
578
+ "family": "m5",
579
+ "acceleratorType": "cpu",
580
+ "hardware": "None",
581
+ "gpuArchitecture": "None",
582
+ "defaultCudaVersion": null,
583
+ "notes": "Large CPU workloads"
584
+ },
585
+ "ml.m5.4xlarge": {
586
+ "category": "cpu",
587
+ "gpus": 0,
588
+ "vcpus": 16,
589
+ "memGb": 64,
590
+ "accelerator": "",
591
+ "cudaVersions": null,
592
+ "tags": [
593
+ "xlarge",
594
+ "cpu",
595
+ "general",
596
+ "high-memory",
597
+ "high-cpu"
598
+ ],
599
+ "family": "m5",
600
+ "acceleratorType": "cpu",
601
+ "hardware": "None",
602
+ "gpuArchitecture": "None",
603
+ "defaultCudaVersion": null,
604
+ "notes": "XL CPU workloads"
605
+ },
606
+ "ml.m5.large": {
607
+ "category": "cpu",
608
+ "gpus": 0,
609
+ "vcpus": 2,
610
+ "memGb": 8,
611
+ "accelerator": "",
612
+ "cudaVersions": null,
613
+ "tags": [
614
+ "small",
615
+ "cpu",
616
+ "general",
617
+ "cheap",
618
+ "cost-effective",
619
+ "budget"
620
+ ],
621
+ "family": "m5",
622
+ "acceleratorType": "cpu",
623
+ "hardware": "None",
624
+ "gpuArchitecture": "None",
625
+ "defaultCudaVersion": null,
626
+ "notes": "Small CPU workloads"
627
+ },
628
+ "ml.m5.xlarge": {
629
+ "category": "cpu",
630
+ "gpus": 0,
631
+ "vcpus": 4,
632
+ "memGb": 16,
633
+ "accelerator": "",
634
+ "cudaVersions": null,
635
+ "tags": [
636
+ "medium",
637
+ "cpu",
638
+ "general",
639
+ "cost-effective"
640
+ ],
641
+ "family": "m5",
642
+ "acceleratorType": "cpu",
643
+ "hardware": "None",
644
+ "gpuArchitecture": "None",
645
+ "defaultCudaVersion": null,
646
+ "notes": "Medium CPU workloads"
647
+ },
648
+ "ml.p3.16xlarge": {
649
+ "category": "gpu",
650
+ "gpus": 8,
651
+ "vcpus": 64,
652
+ "memGb": 488,
653
+ "accelerator": "8x V100 128GB",
654
+ "cudaVersions": [
655
+ "11.0",
656
+ "11.4",
657
+ "11.8"
658
+ ],
659
+ "tags": [
660
+ "gpu",
661
+ "multi-gpu",
662
+ "v100",
663
+ "cuda-11"
664
+ ],
665
+ "family": "p3",
666
+ "acceleratorType": "cuda",
667
+ "hardware": "NVIDIA V100",
668
+ "gpuArchitecture": "Volta",
669
+ "defaultCudaVersion": "11.8",
670
+ "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration"
671
+ },
672
+ "ml.p3.2xlarge": {
673
+ "category": "gpu",
674
+ "gpus": 1,
675
+ "vcpus": 8,
676
+ "memGb": 61,
677
+ "accelerator": "V100 16GB",
678
+ "cudaVersions": [
679
+ "11.0",
680
+ "11.4",
681
+ "11.8"
682
+ ],
683
+ "tags": [
684
+ "gpu",
685
+ "single-gpu",
686
+ "high-performance",
687
+ "training",
688
+ "v100",
689
+ "cuda-11"
690
+ ],
691
+ "family": "p3",
692
+ "acceleratorType": "cuda",
693
+ "hardware": "NVIDIA V100",
694
+ "gpuArchitecture": "Volta",
695
+ "defaultCudaVersion": "11.8",
696
+ "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference"
697
+ },
698
+ "ml.p3.8xlarge": {
699
+ "category": "gpu",
700
+ "gpus": 4,
701
+ "vcpus": 32,
702
+ "memGb": 244,
703
+ "accelerator": "4x V100 64GB",
704
+ "cudaVersions": [
705
+ "11.0",
706
+ "11.4",
707
+ "11.8"
708
+ ],
709
+ "tags": [
710
+ "gpu",
711
+ "multi-gpu",
712
+ "high-performance",
713
+ "training",
714
+ "v100",
715
+ "parallel",
716
+ "cuda-11"
717
+ ],
718
+ "family": "p3",
719
+ "acceleratorType": "cuda",
720
+ "hardware": "NVIDIA V100",
721
+ "gpuArchitecture": "Volta",
722
+ "defaultCudaVersion": "11.8",
723
+ "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models"
724
+ },
725
+ "ml.r5.large": {
726
+ "category": "cpu",
727
+ "gpus": 0,
728
+ "vcpus": 2,
729
+ "memGb": 16,
730
+ "accelerator": "",
731
+ "cudaVersions": null,
732
+ "tags": [
733
+ "memory",
734
+ "cpu",
735
+ "high-memory"
736
+ ],
737
+ "family": "r5",
738
+ "acceleratorType": "cpu",
739
+ "hardware": "None",
740
+ "gpuArchitecture": "None",
741
+ "defaultCudaVersion": null,
742
+ "notes": ""
743
+ },
744
+ "ml.r5.xlarge": {
745
+ "category": "cpu",
746
+ "gpus": 0,
747
+ "vcpus": 4,
748
+ "memGb": 32,
749
+ "accelerator": "",
750
+ "cudaVersions": null,
751
+ "tags": [
752
+ "memory",
753
+ "cpu",
754
+ "high-memory"
755
+ ],
756
+ "family": "r5",
757
+ "acceleratorType": "cpu",
758
+ "hardware": "None",
759
+ "gpuArchitecture": "None",
760
+ "defaultCudaVersion": null,
761
+ "notes": ""
762
+ },
763
+ "ml.trn1.2xlarge": {
764
+ "category": "gpu",
765
+ "gpus": 1,
766
+ "vcpus": 8,
767
+ "memGb": 32,
768
+ "accelerator": "Trainium",
769
+ "cudaVersions": [
770
+ "2.15.0",
771
+ "2.16.0",
772
+ "2.17.0"
773
+ ],
774
+ "tags": [
775
+ "gpu",
776
+ "single-gpu",
777
+ "trainium",
778
+ "neuron"
779
+ ],
780
+ "family": "trn1",
781
+ "acceleratorType": "neuron",
782
+ "hardware": "AWS Trainium",
783
+ "gpuArchitecture": "Trainium1",
784
+ "defaultCudaVersion": "2.16.0",
785
+ "notes": "1x Trainium chip. Optimized for training, also supports inference"
786
+ },
787
+ "ml.trn1.32xlarge": {
788
+ "category": "gpu",
789
+ "gpus": 16,
790
+ "vcpus": 128,
791
+ "memGb": 512,
792
+ "accelerator": "16x Trainium",
793
+ "cudaVersions": [
794
+ "2.15.0",
795
+ "2.16.0",
796
+ "2.17.0"
797
+ ],
798
+ "tags": [
799
+ "gpu",
800
+ "multi-gpu",
801
+ "trainium",
802
+ "neuron"
803
+ ],
804
+ "family": "trn1",
805
+ "acceleratorType": "neuron",
806
+ "hardware": "AWS Trainium",
807
+ "gpuArchitecture": "Trainium1",
808
+ "defaultCudaVersion": "2.16.0",
809
+ "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference"
810
+ }
811
+ },
812
+ "recommendations": {
813
+ "cpu": [
814
+ "ml.m5.large",
815
+ "ml.m5.xlarge",
816
+ "ml.m5.2xlarge",
817
+ "ml.m5.4xlarge",
818
+ "ml.c5.xlarge",
819
+ "ml.c5.2xlarge",
820
+ "ml.r5.large",
821
+ "ml.r5.xlarge"
822
+ ],
823
+ "gpu": [
824
+ "ml.g4dn.xlarge",
825
+ "ml.g4dn.2xlarge",
826
+ "ml.g5.xlarge",
827
+ "ml.g5.2xlarge",
828
+ "ml.g5.4xlarge",
829
+ "ml.p3.2xlarge",
830
+ "ml.g5.12xlarge",
831
+ "ml.g6.xlarge",
832
+ "ml.g6.2xlarge",
833
+ "ml.g6.12xlarge",
834
+ "ml.p3.8xlarge",
835
+ "ml.g5.8xlarge",
836
+ "ml.g5.16xlarge",
837
+ "ml.g5.24xlarge",
838
+ "ml.g5.48xlarge",
839
+ "ml.g4dn.4xlarge",
840
+ "ml.g4dn.8xlarge",
841
+ "ml.g4dn.12xlarge",
842
+ "ml.g4dn.16xlarge",
843
+ "ml.p3.16xlarge",
844
+ "ml.inf2.xlarge",
845
+ "ml.inf2.8xlarge",
846
+ "ml.inf2.24xlarge",
847
+ "ml.inf2.48xlarge",
848
+ "ml.trn1.2xlarge",
849
+ "ml.trn1.32xlarge"
850
+ ]
851
+ }
852
+ }