@aws/ml-container-creator 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -812,6 +812,508 @@
812
812
  "gpuType": "NVIDIA V100",
813
813
  "costTier": "high"
814
814
  },
815
+ "ml.p4d.24xlarge": {
816
+ "category": "gpu",
817
+ "gpus": 8,
818
+ "vcpus": 96,
819
+ "memGb": 1152,
820
+ "accelerator": "8x A100 320GB",
821
+ "cudaVersions": [
822
+ "11.8",
823
+ "12.1",
824
+ "12.2",
825
+ "12.4"
826
+ ],
827
+ "tags": [
828
+ "gpu",
829
+ "multi-gpu",
830
+ "high-performance",
831
+ "training",
832
+ "a100",
833
+ "parallel",
834
+ "cuda-12",
835
+ "large-models"
836
+ ],
837
+ "family": "p4d",
838
+ "acceleratorType": "cuda",
839
+ "hardware": "NVIDIA A100",
840
+ "gpuArchitecture": "Ampere",
841
+ "defaultCudaVersion": "12.1",
842
+ "notes": "8x NVIDIA A100 GPUs (40GB each, 320GB total). For very large models requiring multi-GPU TP",
843
+ "gpuMemoryGb": 40,
844
+ "gpuType": "NVIDIA A100",
845
+ "costTier": "high"
846
+ },
847
+ "ml.p5.48xlarge": {
848
+ "category": "gpu",
849
+ "gpus": 8,
850
+ "vcpus": 192,
851
+ "memGb": 2048,
852
+ "accelerator": "8x H100 640GB",
853
+ "cudaVersions": [
854
+ "12.1",
855
+ "12.2",
856
+ "12.4"
857
+ ],
858
+ "tags": [
859
+ "gpu",
860
+ "multi-gpu",
861
+ "high-performance",
862
+ "h100",
863
+ "parallel",
864
+ "cuda-12",
865
+ "large-models"
866
+ ],
867
+ "family": "p5",
868
+ "acceleratorType": "cuda",
869
+ "hardware": "NVIDIA H100",
870
+ "gpuArchitecture": "Hopper",
871
+ "defaultCudaVersion": "12.2",
872
+ "notes": "8x NVIDIA H100 GPUs (80GB each, 640GB total). High-performance for large LLMs",
873
+ "gpuMemoryGb": 80,
874
+ "gpuType": "NVIDIA H100",
875
+ "costTier": "high"
876
+ },
877
+ "ml.p5e.48xlarge": {
878
+ "category": "gpu",
879
+ "gpus": 8,
880
+ "vcpus": 192,
881
+ "memGb": 2048,
882
+ "accelerator": "8x H200 1128GB",
883
+ "cudaVersions": [
884
+ "12.4"
885
+ ],
886
+ "tags": [
887
+ "gpu",
888
+ "multi-gpu",
889
+ "high-performance",
890
+ "h200",
891
+ "parallel",
892
+ "cuda-12",
893
+ "large-models"
894
+ ],
895
+ "family": "p5e",
896
+ "acceleratorType": "cuda",
897
+ "hardware": "NVIDIA H200",
898
+ "gpuArchitecture": "Hopper",
899
+ "defaultCudaVersion": "12.4",
900
+ "notes": "8x NVIDIA H200 GPUs (141GB each, 1128GB total). Maximum Hopper-class memory",
901
+ "gpuMemoryGb": 141,
902
+ "gpuType": "NVIDIA H200",
903
+ "costTier": "high"
904
+ },
905
+ "ml.p5en.48xlarge": {
906
+ "category": "gpu",
907
+ "gpus": 8,
908
+ "vcpus": 192,
909
+ "memGb": 2048,
910
+ "accelerator": "8x H200 1128GB",
911
+ "cudaVersions": [
912
+ "12.4"
913
+ ],
914
+ "tags": [
915
+ "gpu",
916
+ "multi-gpu",
917
+ "high-performance",
918
+ "h200",
919
+ "parallel",
920
+ "cuda-12",
921
+ "large-models",
922
+ "enhanced-networking"
923
+ ],
924
+ "family": "p5en",
925
+ "acceleratorType": "cuda",
926
+ "hardware": "NVIDIA H200",
927
+ "gpuArchitecture": "Hopper",
928
+ "defaultCudaVersion": "12.4",
929
+ "notes": "8x NVIDIA H200 GPUs (141GB each, 1128GB total). Enhanced networking variant of p5e",
930
+ "gpuMemoryGb": 141,
931
+ "gpuType": "NVIDIA H200",
932
+ "costTier": "high"
933
+ },
934
+ "ml.g6e.xlarge": {
935
+ "category": "gpu",
936
+ "gpus": 1,
937
+ "vcpus": 4,
938
+ "memGb": 32,
939
+ "accelerator": "L40S 48GB",
940
+ "cudaVersions": [
941
+ "12.2",
942
+ "12.4"
943
+ ],
944
+ "tags": [
945
+ "gpu",
946
+ "single-gpu",
947
+ "inference",
948
+ "l40s",
949
+ "newer",
950
+ "cuda-12"
951
+ ],
952
+ "family": "g6e",
953
+ "acceleratorType": "cuda",
954
+ "hardware": "NVIDIA L40S",
955
+ "gpuArchitecture": "Ada Lovelace",
956
+ "defaultCudaVersion": "12.4",
957
+ "notes": "1x NVIDIA L40S GPU (48GB). Cost-effective for medium models",
958
+ "gpuMemoryGb": 48,
959
+ "gpuType": "NVIDIA L40S",
960
+ "costTier": "medium"
961
+ },
962
+ "ml.g6e.2xlarge": {
963
+ "category": "gpu",
964
+ "gpus": 1,
965
+ "vcpus": 8,
966
+ "memGb": 64,
967
+ "accelerator": "L40S 48GB",
968
+ "cudaVersions": [
969
+ "12.2",
970
+ "12.4"
971
+ ],
972
+ "tags": [
973
+ "gpu",
974
+ "single-gpu",
975
+ "inference",
976
+ "l40s",
977
+ "newer",
978
+ "cuda-12"
979
+ ],
980
+ "family": "g6e",
981
+ "acceleratorType": "cuda",
982
+ "hardware": "NVIDIA L40S",
983
+ "gpuArchitecture": "Ada Lovelace",
984
+ "defaultCudaVersion": "12.4",
985
+ "notes": "1x NVIDIA L40S GPU (48GB). Better CPU/memory for preprocessing",
986
+ "gpuMemoryGb": 48,
987
+ "gpuType": "NVIDIA L40S",
988
+ "costTier": "medium"
989
+ },
990
+ "ml.g6e.4xlarge": {
991
+ "category": "gpu",
992
+ "gpus": 1,
993
+ "vcpus": 16,
994
+ "memGb": 128,
995
+ "accelerator": "L40S 48GB",
996
+ "cudaVersions": [
997
+ "12.2",
998
+ "12.4"
999
+ ],
1000
+ "tags": [
1001
+ "gpu",
1002
+ "single-gpu",
1003
+ "l40s",
1004
+ "newer",
1005
+ "cuda-12"
1006
+ ],
1007
+ "family": "g6e",
1008
+ "acceleratorType": "cuda",
1009
+ "hardware": "NVIDIA L40S",
1010
+ "gpuArchitecture": "Ada Lovelace",
1011
+ "defaultCudaVersion": "12.4",
1012
+ "notes": "1x NVIDIA L40S GPU (48GB). High CPU/memory ratio",
1013
+ "gpuMemoryGb": 48,
1014
+ "gpuType": "NVIDIA L40S",
1015
+ "costTier": "medium"
1016
+ },
1017
+ "ml.g6e.8xlarge": {
1018
+ "category": "gpu",
1019
+ "gpus": 1,
1020
+ "vcpus": 32,
1021
+ "memGb": 256,
1022
+ "accelerator": "L40S 48GB",
1023
+ "cudaVersions": [
1024
+ "12.2",
1025
+ "12.4"
1026
+ ],
1027
+ "tags": [
1028
+ "gpu",
1029
+ "single-gpu",
1030
+ "l40s",
1031
+ "newer",
1032
+ "cuda-12"
1033
+ ],
1034
+ "family": "g6e",
1035
+ "acceleratorType": "cuda",
1036
+ "hardware": "NVIDIA L40S",
1037
+ "gpuArchitecture": "Ada Lovelace",
1038
+ "defaultCudaVersion": "12.4",
1039
+ "notes": "1x NVIDIA L40S GPU (48GB). Maximum CPU/memory for single GPU",
1040
+ "gpuMemoryGb": 48,
1041
+ "gpuType": "NVIDIA L40S",
1042
+ "costTier": "medium"
1043
+ },
1044
+ "ml.g6e.12xlarge": {
1045
+ "category": "gpu",
1046
+ "gpus": 4,
1047
+ "vcpus": 48,
1048
+ "memGb": 384,
1049
+ "accelerator": "4x L40S 192GB",
1050
+ "cudaVersions": [
1051
+ "12.2",
1052
+ "12.4"
1053
+ ],
1054
+ "tags": [
1055
+ "gpu",
1056
+ "multi-gpu",
1057
+ "l40s",
1058
+ "newer",
1059
+ "parallel",
1060
+ "cuda-12"
1061
+ ],
1062
+ "family": "g6e",
1063
+ "acceleratorType": "cuda",
1064
+ "hardware": "NVIDIA L40S",
1065
+ "gpuArchitecture": "Ada Lovelace",
1066
+ "defaultCudaVersion": "12.4",
1067
+ "notes": "4x NVIDIA L40S GPUs (192GB total). Good for tensor parallelism",
1068
+ "gpuMemoryGb": 48,
1069
+ "gpuType": "NVIDIA L40S",
1070
+ "costTier": "medium"
1071
+ },
1072
+ "ml.g6e.24xlarge": {
1073
+ "category": "gpu",
1074
+ "gpus": 4,
1075
+ "vcpus": 96,
1076
+ "memGb": 768,
1077
+ "accelerator": "4x L40S 192GB",
1078
+ "cudaVersions": [
1079
+ "12.2",
1080
+ "12.4"
1081
+ ],
1082
+ "tags": [
1083
+ "gpu",
1084
+ "multi-gpu",
1085
+ "l40s",
1086
+ "newer",
1087
+ "cuda-12"
1088
+ ],
1089
+ "family": "g6e",
1090
+ "acceleratorType": "cuda",
1091
+ "hardware": "NVIDIA L40S",
1092
+ "gpuArchitecture": "Ada Lovelace",
1093
+ "defaultCudaVersion": "12.4",
1094
+ "notes": "4x NVIDIA L40S GPUs (192GB total). High CPU/memory with multi-GPU",
1095
+ "gpuMemoryGb": 48,
1096
+ "gpuType": "NVIDIA L40S",
1097
+ "costTier": "medium"
1098
+ },
1099
+ "ml.g6e.48xlarge": {
1100
+ "category": "gpu",
1101
+ "gpus": 8,
1102
+ "vcpus": 192,
1103
+ "memGb": 1536,
1104
+ "accelerator": "8x L40S 384GB",
1105
+ "cudaVersions": [
1106
+ "12.2",
1107
+ "12.4"
1108
+ ],
1109
+ "tags": [
1110
+ "gpu",
1111
+ "multi-gpu",
1112
+ "l40s",
1113
+ "newer",
1114
+ "cuda-12",
1115
+ "large-models"
1116
+ ],
1117
+ "family": "g6e",
1118
+ "acceleratorType": "cuda",
1119
+ "hardware": "NVIDIA L40S",
1120
+ "gpuArchitecture": "Ada Lovelace",
1121
+ "defaultCudaVersion": "12.4",
1122
+ "notes": "8x NVIDIA L40S GPUs (384GB total). Maximum multi-GPU configuration",
1123
+ "gpuMemoryGb": 48,
1124
+ "gpuType": "NVIDIA L40S",
1125
+ "costTier": "medium"
1126
+ },
1127
+ "ml.p6-b200.48xlarge": {
1128
+ "category": "gpu",
1129
+ "gpus": 8,
1130
+ "vcpus": 192,
1131
+ "memGb": 2048,
1132
+ "accelerator": "8x B200 1432GB",
1133
+ "cudaVersions": [
1134
+ "13.0"
1135
+ ],
1136
+ "tags": [
1137
+ "gpu",
1138
+ "multi-gpu",
1139
+ "high-performance",
1140
+ "blackwell",
1141
+ "b200",
1142
+ "parallel",
1143
+ "cuda-13",
1144
+ "large-models"
1145
+ ],
1146
+ "family": "p6",
1147
+ "acceleratorType": "cuda",
1148
+ "hardware": "NVIDIA B200",
1149
+ "gpuArchitecture": "Blackwell",
1150
+ "defaultCudaVersion": "13.0",
1151
+ "notes": "8x NVIDIA B200 GPUs (179GB each, 1432GB total). Blackwell architecture for frontier models",
1152
+ "gpuMemoryGb": 179,
1153
+ "gpuType": "NVIDIA B200",
1154
+ "costTier": "high"
1155
+ },
1156
+ "ml.g7e.2xlarge": {
1157
+ "category": "gpu",
1158
+ "gpus": 1,
1159
+ "vcpus": 8,
1160
+ "memGb": 64,
1161
+ "accelerator": "RTX PRO 6000 96GB",
1162
+ "cudaVersions": [
1163
+ "13.0"
1164
+ ],
1165
+ "tags": [
1166
+ "gpu",
1167
+ "single-gpu",
1168
+ "inference",
1169
+ "blackwell",
1170
+ "rtx-pro",
1171
+ "cuda-13"
1172
+ ],
1173
+ "family": "g7e",
1174
+ "acceleratorType": "cuda",
1175
+ "hardware": "NVIDIA RTX PRO 6000",
1176
+ "gpuArchitecture": "Blackwell",
1177
+ "defaultCudaVersion": "13.0",
1178
+ "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). Blackwell for inference",
1179
+ "gpuMemoryGb": 96,
1180
+ "gpuType": "NVIDIA RTX PRO 6000",
1181
+ "costTier": "medium"
1182
+ },
1183
+ "ml.g7e.4xlarge": {
1184
+ "category": "gpu",
1185
+ "gpus": 1,
1186
+ "vcpus": 16,
1187
+ "memGb": 128,
1188
+ "accelerator": "RTX PRO 6000 96GB",
1189
+ "cudaVersions": [
1190
+ "13.0"
1191
+ ],
1192
+ "tags": [
1193
+ "gpu",
1194
+ "single-gpu",
1195
+ "blackwell",
1196
+ "rtx-pro",
1197
+ "cuda-13"
1198
+ ],
1199
+ "family": "g7e",
1200
+ "acceleratorType": "cuda",
1201
+ "hardware": "NVIDIA RTX PRO 6000",
1202
+ "gpuArchitecture": "Blackwell",
1203
+ "defaultCudaVersion": "13.0",
1204
+ "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). High CPU/memory ratio",
1205
+ "gpuMemoryGb": 96,
1206
+ "gpuType": "NVIDIA RTX PRO 6000",
1207
+ "costTier": "medium"
1208
+ },
1209
+ "ml.g7e.8xlarge": {
1210
+ "category": "gpu",
1211
+ "gpus": 1,
1212
+ "vcpus": 32,
1213
+ "memGb": 256,
1214
+ "accelerator": "RTX PRO 6000 96GB",
1215
+ "cudaVersions": [
1216
+ "13.0"
1217
+ ],
1218
+ "tags": [
1219
+ "gpu",
1220
+ "single-gpu",
1221
+ "blackwell",
1222
+ "rtx-pro",
1223
+ "cuda-13"
1224
+ ],
1225
+ "family": "g7e",
1226
+ "acceleratorType": "cuda",
1227
+ "hardware": "NVIDIA RTX PRO 6000",
1228
+ "gpuArchitecture": "Blackwell",
1229
+ "defaultCudaVersion": "13.0",
1230
+ "notes": "1x NVIDIA RTX PRO 6000 GPU (96GB). Maximum CPU/memory for single GPU",
1231
+ "gpuMemoryGb": 96,
1232
+ "gpuType": "NVIDIA RTX PRO 6000",
1233
+ "costTier": "medium"
1234
+ },
1235
+ "ml.g7e.12xlarge": {
1236
+ "category": "gpu",
1237
+ "gpus": 2,
1238
+ "vcpus": 48,
1239
+ "memGb": 512,
1240
+ "accelerator": "2x RTX PRO 6000 192GB",
1241
+ "cudaVersions": [
1242
+ "13.0"
1243
+ ],
1244
+ "tags": [
1245
+ "gpu",
1246
+ "multi-gpu",
1247
+ "blackwell",
1248
+ "rtx-pro",
1249
+ "parallel",
1250
+ "cuda-13"
1251
+ ],
1252
+ "family": "g7e",
1253
+ "acceleratorType": "cuda",
1254
+ "hardware": "NVIDIA RTX PRO 6000",
1255
+ "gpuArchitecture": "Blackwell",
1256
+ "defaultCudaVersion": "13.0",
1257
+ "notes": "2x NVIDIA RTX PRO 6000 GPUs (192GB total). Multi-GPU for larger models",
1258
+ "gpuMemoryGb": 96,
1259
+ "gpuType": "NVIDIA RTX PRO 6000",
1260
+ "costTier": "medium"
1261
+ },
1262
+ "ml.g7e.24xlarge": {
1263
+ "category": "gpu",
1264
+ "gpus": 4,
1265
+ "vcpus": 96,
1266
+ "memGb": 1024,
1267
+ "accelerator": "4x RTX PRO 6000 384GB",
1268
+ "cudaVersions": [
1269
+ "13.0"
1270
+ ],
1271
+ "tags": [
1272
+ "gpu",
1273
+ "multi-gpu",
1274
+ "blackwell",
1275
+ "rtx-pro",
1276
+ "parallel",
1277
+ "cuda-13"
1278
+ ],
1279
+ "family": "g7e",
1280
+ "acceleratorType": "cuda",
1281
+ "hardware": "NVIDIA RTX PRO 6000",
1282
+ "gpuArchitecture": "Blackwell",
1283
+ "defaultCudaVersion": "13.0",
1284
+ "notes": "4x NVIDIA RTX PRO 6000 GPUs (384GB total). High-capacity multi-GPU",
1285
+ "gpuMemoryGb": 96,
1286
+ "gpuType": "NVIDIA RTX PRO 6000",
1287
+ "costTier": "medium"
1288
+ },
1289
+ "ml.g7e.48xlarge": {
1290
+ "category": "gpu",
1291
+ "gpus": 8,
1292
+ "vcpus": 192,
1293
+ "memGb": 2048,
1294
+ "accelerator": "8x RTX PRO 6000 768GB",
1295
+ "cudaVersions": [
1296
+ "13.0"
1297
+ ],
1298
+ "tags": [
1299
+ "gpu",
1300
+ "multi-gpu",
1301
+ "blackwell",
1302
+ "rtx-pro",
1303
+ "parallel",
1304
+ "cuda-13",
1305
+ "large-models"
1306
+ ],
1307
+ "family": "g7e",
1308
+ "acceleratorType": "cuda",
1309
+ "hardware": "NVIDIA RTX PRO 6000",
1310
+ "gpuArchitecture": "Blackwell",
1311
+ "defaultCudaVersion": "13.0",
1312
+ "notes": "8x NVIDIA RTX PRO 6000 GPUs (768GB total). Maximum Blackwell multi-GPU",
1313
+ "gpuMemoryGb": 96,
1314
+ "gpuType": "NVIDIA RTX PRO 6000",
1315
+ "costTier": "medium"
1316
+ },
815
1317
  "ml.r5.large": {
816
1318
  "category": "cpu",
817
1319
  "gpus": 0,
@@ -923,25 +1425,38 @@
923
1425
  "ml.r5.xlarge"
924
1426
  ],
925
1427
  "gpu": [
926
- "ml.g4dn.xlarge",
927
- "ml.g4dn.2xlarge",
1428
+ "ml.g7e.2xlarge",
1429
+ "ml.g7e.4xlarge",
1430
+ "ml.g7e.8xlarge",
1431
+ "ml.g7e.12xlarge",
1432
+ "ml.g7e.24xlarge",
1433
+ "ml.g7e.48xlarge",
1434
+ "ml.g6e.xlarge",
1435
+ "ml.g6e.2xlarge",
1436
+ "ml.g6e.4xlarge",
1437
+ "ml.g6e.8xlarge",
1438
+ "ml.g6e.12xlarge",
1439
+ "ml.g6e.24xlarge",
1440
+ "ml.g6e.48xlarge",
1441
+ "ml.g6.xlarge",
1442
+ "ml.g6.2xlarge",
1443
+ "ml.g6.12xlarge",
928
1444
  "ml.g5.xlarge",
929
1445
  "ml.g5.2xlarge",
930
1446
  "ml.g5.4xlarge",
931
- "ml.p3.2xlarge",
932
1447
  "ml.g5.12xlarge",
933
- "ml.g6.xlarge",
934
- "ml.g6.2xlarge",
935
- "ml.g6.12xlarge",
936
- "ml.p3.8xlarge",
937
- "ml.g5.8xlarge",
938
- "ml.g5.16xlarge",
939
1448
  "ml.g5.24xlarge",
940
1449
  "ml.g5.48xlarge",
941
- "ml.g4dn.4xlarge",
942
- "ml.g4dn.8xlarge",
1450
+ "ml.g4dn.xlarge",
1451
+ "ml.g4dn.2xlarge",
943
1452
  "ml.g4dn.12xlarge",
944
- "ml.g4dn.16xlarge",
1453
+ "ml.p6-b200.48xlarge",
1454
+ "ml.p5e.48xlarge",
1455
+ "ml.p5en.48xlarge",
1456
+ "ml.p5.48xlarge",
1457
+ "ml.p4d.24xlarge",
1458
+ "ml.p3.2xlarge",
1459
+ "ml.p3.8xlarge",
945
1460
  "ml.p3.16xlarge",
946
1461
  "ml.inf2.xlarge",
947
1462
  "ml.inf2.8xlarge",
@@ -542,7 +542,7 @@
542
542
  "TRTLLM_ENABLE_CHUNKED_CONTEXT": "true",
543
543
  "UCX_MEMTYPE_CACHE": "n"
544
544
  },
545
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
545
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
546
546
  },
547
547
  "accelerator": {
548
548
  "type": "cuda",
@@ -612,7 +612,7 @@
612
612
  "TRTLLM_ENABLE_CHUNKED_CONTEXT": "true",
613
613
  "UCX_MEMTYPE_CACHE": "n"
614
614
  },
615
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
615
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
616
616
  },
617
617
  "accelerator": {
618
618
  "type": "cuda",
@@ -681,7 +681,7 @@
681
681
  "OPTION_MAX_ROLLING_BATCH_SIZE": "32",
682
682
  "OPTION_DTYPE": "fp16"
683
683
  },
684
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
684
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
685
685
  },
686
686
  "accelerator": {
687
687
  "type": "cuda",
@@ -754,7 +754,7 @@
754
754
  "OPTION_MAX_ROLLING_BATCH_SIZE": "32",
755
755
  "OPTION_DTYPE": "fp16"
756
756
  },
757
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
757
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
758
758
  },
759
759
  "accelerator": {
760
760
  "type": "cuda",
@@ -828,7 +828,7 @@
828
828
  "OPTION_TENSOR_PARALLEL_DEGREE": "1",
829
829
  "OPTION_DEVICE_MAP": "auto"
830
830
  },
831
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
831
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
832
832
  },
833
833
  "accelerator": {
834
834
  "type": "cuda",
@@ -881,7 +881,7 @@
881
881
  "OPTION_TENSOR_PARALLEL_DEGREE": "1",
882
882
  "OPTION_DEVICE_MAP": "auto"
883
883
  },
884
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
884
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
885
885
  },
886
886
  "accelerator": {
887
887
  "type": "cuda",
@@ -935,7 +935,7 @@
935
935
  "HF_TOKEN": "${hfToken}",
936
936
  "VLLM_WORKER_MULTIPROC_METHOD": "spawn"
937
937
  },
938
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
938
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
939
939
  },
940
940
  "accelerator": {
941
941
  "type": "cuda",
@@ -985,7 +985,7 @@
985
985
  "HF_TOKEN": "${hfToken}",
986
986
  "VLLM_WORKER_MULTIPROC_METHOD": "spawn"
987
987
  },
988
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
988
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
989
989
  },
990
990
  "accelerator": {
991
991
  "type": "cuda",
@@ -1036,7 +1036,7 @@
1036
1036
  "envVars": {
1037
1037
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1038
1038
  },
1039
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1039
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1040
1040
  },
1041
1041
  "accelerator": {
1042
1042
  "type": "cuda",
@@ -1067,7 +1067,7 @@
1067
1067
  "envVars": {
1068
1068
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1069
1069
  },
1070
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1070
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1071
1071
  },
1072
1072
  "accelerator": {
1073
1073
  "type": "cuda",
@@ -1098,7 +1098,7 @@
1098
1098
  "envVars": {
1099
1099
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1100
1100
  },
1101
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1101
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1102
1102
  },
1103
1103
  "accelerator": {
1104
1104
  "type": "cuda",
@@ -1129,7 +1129,7 @@
1129
1129
  "envVars": {
1130
1130
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1131
1131
  },
1132
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1132
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1133
1133
  },
1134
1134
  "accelerator": {
1135
1135
  "type": "cuda",
@@ -1160,7 +1160,7 @@
1160
1160
  "envVars": {
1161
1161
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1162
1162
  },
1163
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1163
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1164
1164
  },
1165
1165
  "accelerator": {
1166
1166
  "type": "cuda",
@@ -1191,7 +1191,7 @@
1191
1191
  "envVars": {
1192
1192
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1193
1193
  },
1194
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1194
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1195
1195
  },
1196
1196
  "accelerator": {
1197
1197
  "type": "cuda",
@@ -1222,7 +1222,7 @@
1222
1222
  "envVars": {
1223
1223
  "TRITON_MODEL_REPOSITORY": "/opt/ml/model/model_repository"
1224
1224
  },
1225
- "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-2"
1225
+ "inferenceAmiVersion": "al2-ami-sagemaker-inference-gpu-3-1"
1226
1226
  },
1227
1227
  "accelerator": {
1228
1228
  "type": "cuda",