@aws/ml-container-creator 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/bin/cli.js +7 -2
  2. package/package.json +7 -8
  3. package/servers/base-image-picker/index.js +3 -3
  4. package/servers/base-image-picker/manifest.json +4 -2
  5. package/servers/instance-sizer/index.js +561 -0
  6. package/servers/instance-sizer/lib/instance-ranker.js +245 -0
  7. package/servers/instance-sizer/lib/model-resolver.js +265 -0
  8. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  9. package/servers/instance-sizer/manifest.json +17 -0
  10. package/servers/instance-sizer/package.json +15 -0
  11. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  12. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
  13. package/servers/lib/catalogs/model-sizes.json +131 -0
  14. package/servers/lib/catalogs/models.json +602 -0
  15. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  16. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  17. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  18. package/servers/lib/schemas/image-catalog.schema.json +0 -12
  19. package/servers/lib/schemas/instances.schema.json +29 -0
  20. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  21. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  22. package/servers/model-picker/index.js +2 -3
  23. package/servers/model-picker/manifest.json +2 -3
  24. package/servers/region-picker/index.js +1 -1
  25. package/servers/region-picker/manifest.json +1 -1
  26. package/src/app.js +17 -0
  27. package/src/lib/bootstrap-command-handler.js +38 -0
  28. package/src/lib/cli-handler.js +3 -3
  29. package/src/lib/config-manager.js +4 -1
  30. package/src/lib/configuration-manager.js +2 -2
  31. package/src/lib/cross-cutting-checker.js +341 -0
  32. package/src/lib/dry-run-validator.js +78 -0
  33. package/src/lib/generation-validator.js +102 -0
  34. package/src/lib/mcp-validator-config.js +89 -0
  35. package/src/lib/payload-builder.js +153 -0
  36. package/src/lib/prompt-runner.js +445 -135
  37. package/src/lib/prompts.js +1 -1
  38. package/src/lib/registry-loader.js +5 -5
  39. package/src/lib/schema-sync.js +203 -0
  40. package/src/lib/schema-validation-engine.js +195 -0
  41. package/src/lib/service-model-parser.js +102 -0
  42. package/src/lib/validate-runner.js +167 -0
  43. package/src/lib/validation-report.js +133 -0
  44. package/src/lib/validators/base-validator.js +36 -0
  45. package/src/lib/validators/catalog-validator.js +177 -0
  46. package/src/lib/validators/enum-validator.js +120 -0
  47. package/src/lib/validators/required-field-validator.js +150 -0
  48. package/src/lib/validators/type-validator.js +313 -0
  49. package/templates/Dockerfile +1 -1
  50. package/templates/do/build +15 -5
  51. package/templates/do/run +5 -1
  52. package/templates/do/validate +61 -0
  53. package/servers/instance-recommender/LICENSE +0 -202
  54. package/servers/instance-recommender/index.js +0 -284
  55. package/servers/instance-recommender/manifest.json +0 -16
  56. package/servers/instance-recommender/package.json +0 -15
  57. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  58. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  59. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  60. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -0,0 +1,15 @@
1
+ {
2
+ "name": "@amzn/ml-container-creator-instance-sizer",
3
+ "private": true,
4
+ "version": "1.0.0",
5
+ "description": "MCP server that estimates VRAM requirements from model metadata and returns filtered, ranked SageMaker instance recommendations.",
6
+ "type": "module",
7
+ "main": "index.js",
8
+ "license": "Apache-2.0",
9
+ "scripts": {
10
+ "test": "node test.js"
11
+ },
12
+ "dependencies": {
13
+ "@modelcontextprotocol/sdk": "^1.0.0"
14
+ }
15
+ }
@@ -17,7 +17,10 @@
17
17
  "hardware": "None",
18
18
  "gpuArchitecture": "None",
19
19
  "defaultCudaVersion": null,
20
- "notes": ""
20
+ "notes": "",
21
+ "gpuMemoryGb": null,
22
+ "gpuType": null,
23
+ "costTier": "low"
21
24
  },
22
25
  "ml.c5.xlarge": {
23
26
  "category": "cpu",
@@ -36,7 +39,10 @@
36
39
  "hardware": "None",
37
40
  "gpuArchitecture": "None",
38
41
  "defaultCudaVersion": null,
39
- "notes": ""
42
+ "notes": "",
43
+ "gpuMemoryGb": null,
44
+ "gpuType": null,
45
+ "costTier": "low"
40
46
  },
41
47
  "ml.g4dn.12xlarge": {
42
48
  "category": "gpu",
@@ -59,7 +65,10 @@
59
65
  "hardware": "NVIDIA T4",
60
66
  "gpuArchitecture": "Turing",
61
67
  "defaultCudaVersion": "11.8",
62
- "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism"
68
+ "notes": "4x NVIDIA T4 GPUs (64GB total). Multi-GPU for tensor parallelism",
69
+ "gpuMemoryGb": 16,
70
+ "gpuType": "NVIDIA T4",
71
+ "costTier": "low"
63
72
  },
64
73
  "ml.g4dn.16xlarge": {
65
74
  "category": "gpu",
@@ -82,7 +91,10 @@
82
91
  "hardware": "NVIDIA T4",
83
92
  "gpuArchitecture": "Turing",
84
93
  "defaultCudaVersion": "11.8",
85
- "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio"
94
+ "notes": "1x NVIDIA T4 GPU (16GB). Very high CPU/memory ratio",
95
+ "gpuMemoryGb": 16,
96
+ "gpuType": "NVIDIA T4",
97
+ "costTier": "low"
86
98
  },
87
99
  "ml.g4dn.2xlarge": {
88
100
  "category": "gpu",
@@ -108,7 +120,10 @@
108
120
  "hardware": "NVIDIA T4",
109
121
  "gpuArchitecture": "Turing",
110
122
  "defaultCudaVersion": "11.8",
111
- "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing"
123
+ "notes": "1x NVIDIA T4 GPU (16GB). Better CPU/memory for preprocessing",
124
+ "gpuMemoryGb": 16,
125
+ "gpuType": "NVIDIA T4",
126
+ "costTier": "low"
112
127
  },
113
128
  "ml.g4dn.4xlarge": {
114
129
  "category": "gpu",
@@ -131,7 +146,10 @@
131
146
  "hardware": "NVIDIA T4",
132
147
  "gpuArchitecture": "Turing",
133
148
  "defaultCudaVersion": "11.8",
134
- "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio"
149
+ "notes": "1x NVIDIA T4 GPU (16GB). High CPU/memory ratio",
150
+ "gpuMemoryGb": 16,
151
+ "gpuType": "NVIDIA T4",
152
+ "costTier": "low"
135
153
  },
136
154
  "ml.g4dn.8xlarge": {
137
155
  "category": "gpu",
@@ -154,7 +172,10 @@
154
172
  "hardware": "NVIDIA T4",
155
173
  "gpuArchitecture": "Turing",
156
174
  "defaultCudaVersion": "11.8",
157
- "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU"
175
+ "notes": "1x NVIDIA T4 GPU (16GB). Maximum CPU/memory for single GPU",
176
+ "gpuMemoryGb": 16,
177
+ "gpuType": "NVIDIA T4",
178
+ "costTier": "low"
158
179
  },
159
180
  "ml.g4dn.xlarge": {
160
181
  "category": "gpu",
@@ -180,7 +201,10 @@
180
201
  "hardware": "NVIDIA T4",
181
202
  "gpuArchitecture": "Turing",
182
203
  "defaultCudaVersion": "11.8",
183
- "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models"
204
+ "notes": "1x NVIDIA T4 GPU (16GB). Cost-effective for smaller models",
205
+ "gpuMemoryGb": 16,
206
+ "gpuType": "NVIDIA T4",
207
+ "costTier": "low"
184
208
  },
185
209
  "ml.g5.12xlarge": {
186
210
  "category": "gpu",
@@ -207,7 +231,10 @@
207
231
  "hardware": "NVIDIA A10G",
208
232
  "gpuArchitecture": "Ampere",
209
233
  "defaultCudaVersion": "12.1",
210
- "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism"
234
+ "notes": "4x NVIDIA A10G GPUs (96GB total). Good for tensor parallelism",
235
+ "gpuMemoryGb": 24,
236
+ "gpuType": "NVIDIA A10G",
237
+ "costTier": "medium"
211
238
  },
212
239
  "ml.g5.16xlarge": {
213
240
  "category": "gpu",
@@ -232,7 +259,10 @@
232
259
  "hardware": "NVIDIA A10G",
233
260
  "gpuArchitecture": "Ampere",
234
261
  "defaultCudaVersion": "12.1",
235
- "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio"
262
+ "notes": "1x NVIDIA A10G GPU (24GB). Very high CPU/memory ratio",
263
+ "gpuMemoryGb": 24,
264
+ "gpuType": "NVIDIA A10G",
265
+ "costTier": "medium"
236
266
  },
237
267
  "ml.g5.24xlarge": {
238
268
  "category": "gpu",
@@ -257,7 +287,10 @@
257
287
  "hardware": "NVIDIA A10G",
258
288
  "gpuArchitecture": "Ampere",
259
289
  "defaultCudaVersion": "12.1",
260
- "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU"
290
+ "notes": "4x NVIDIA A10G GPUs (96GB total). High CPU/memory with multi-GPU",
291
+ "gpuMemoryGb": 24,
292
+ "gpuType": "NVIDIA A10G",
293
+ "costTier": "medium"
261
294
  },
262
295
  "ml.g5.2xlarge": {
263
296
  "category": "gpu",
@@ -283,7 +316,10 @@
283
316
  "hardware": "NVIDIA A10G",
284
317
  "gpuArchitecture": "Ampere",
285
318
  "defaultCudaVersion": "12.1",
286
- "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing"
319
+ "notes": "1x NVIDIA A10G GPU (24GB). Better CPU/memory for preprocessing",
320
+ "gpuMemoryGb": 24,
321
+ "gpuType": "NVIDIA A10G",
322
+ "costTier": "medium"
287
323
  },
288
324
  "ml.g5.48xlarge": {
289
325
  "category": "gpu",
@@ -308,7 +344,10 @@
308
344
  "hardware": "NVIDIA A10G",
309
345
  "gpuArchitecture": "Ampere",
310
346
  "defaultCudaVersion": "12.1",
311
- "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration"
347
+ "notes": "8x NVIDIA A10G GPUs (192GB total). Maximum multi-GPU configuration",
348
+ "gpuMemoryGb": 24,
349
+ "gpuType": "NVIDIA A10G",
350
+ "costTier": "medium"
312
351
  },
313
352
  "ml.g5.4xlarge": {
314
353
  "category": "gpu",
@@ -334,7 +373,10 @@
334
373
  "hardware": "NVIDIA A10G",
335
374
  "gpuArchitecture": "Ampere",
336
375
  "defaultCudaVersion": "12.1",
337
- "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing"
376
+ "notes": "1x NVIDIA A10G GPU (24GB). High CPU/memory for complex preprocessing",
377
+ "gpuMemoryGb": 24,
378
+ "gpuType": "NVIDIA A10G",
379
+ "costTier": "medium"
338
380
  },
339
381
  "ml.g5.8xlarge": {
340
382
  "category": "gpu",
@@ -359,7 +401,10 @@
359
401
  "hardware": "NVIDIA A10G",
360
402
  "gpuArchitecture": "Ampere",
361
403
  "defaultCudaVersion": "12.1",
362
- "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU"
404
+ "notes": "1x NVIDIA A10G GPU (24GB). Maximum CPU/memory for single GPU",
405
+ "gpuMemoryGb": 24,
406
+ "gpuType": "NVIDIA A10G",
407
+ "costTier": "medium"
363
408
  },
364
409
  "ml.g5.xlarge": {
365
410
  "category": "gpu",
@@ -385,7 +430,10 @@
385
430
  "hardware": "NVIDIA A10G",
386
431
  "gpuArchitecture": "Ampere",
387
432
  "defaultCudaVersion": "12.1",
388
- "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models"
433
+ "notes": "1x NVIDIA A10G GPU (24GB). Good for small to medium models",
434
+ "gpuMemoryGb": 24,
435
+ "gpuType": "NVIDIA A10G",
436
+ "costTier": "medium"
389
437
  },
390
438
  "ml.g6.12xlarge": {
391
439
  "category": "gpu",
@@ -412,7 +460,10 @@
412
460
  "hardware": "NVIDIA L4",
413
461
  "gpuArchitecture": "Ada Lovelace",
414
462
  "defaultCudaVersion": "12.2",
415
- "notes": "Multi-GPU (newer)"
463
+ "notes": "Multi-GPU (newer)",
464
+ "gpuMemoryGb": 24,
465
+ "gpuType": "NVIDIA L4",
466
+ "costTier": "medium"
416
467
  },
417
468
  "ml.g6.2xlarge": {
418
469
  "category": "gpu",
@@ -438,7 +489,10 @@
438
489
  "hardware": "NVIDIA L4",
439
490
  "gpuArchitecture": "Ada Lovelace",
440
491
  "defaultCudaVersion": "12.2",
441
- "notes": "Medium GPU (newer)"
492
+ "notes": "Medium GPU (newer)",
493
+ "gpuMemoryGb": 24,
494
+ "gpuType": "NVIDIA L4",
495
+ "costTier": "medium"
442
496
  },
443
497
  "ml.g6.xlarge": {
444
498
  "category": "gpu",
@@ -464,7 +518,10 @@
464
518
  "hardware": "NVIDIA L4",
465
519
  "gpuArchitecture": "Ada Lovelace",
466
520
  "defaultCudaVersion": "12.2",
467
- "notes": "Small GPU (newer)"
521
+ "notes": "Small GPU (newer)",
522
+ "gpuMemoryGb": 24,
523
+ "gpuType": "NVIDIA L4",
524
+ "costTier": "medium"
468
525
  },
469
526
  "ml.inf2.24xlarge": {
470
527
  "category": "gpu",
@@ -488,7 +545,10 @@
488
545
  "hardware": "AWS Inferentia2",
489
546
  "gpuArchitecture": "Inferentia2",
490
547
  "defaultCudaVersion": "2.16.0",
491
- "notes": "6x Inferentia2 chips. Multi-chip for large models"
548
+ "notes": "6x Inferentia2 chips. Multi-chip for large models",
549
+ "gpuMemoryGb": 32,
550
+ "gpuType": "AWS Inferentia2",
551
+ "costTier": "low"
492
552
  },
493
553
  "ml.inf2.48xlarge": {
494
554
  "category": "gpu",
@@ -512,7 +572,10 @@
512
572
  "hardware": "AWS Inferentia2",
513
573
  "gpuArchitecture": "Inferentia2",
514
574
  "defaultCudaVersion": "2.16.0",
515
- "notes": "12x Inferentia2 chips. Maximum multi-chip configuration"
575
+ "notes": "12x Inferentia2 chips. Maximum multi-chip configuration",
576
+ "gpuMemoryGb": 32,
577
+ "gpuType": "AWS Inferentia2",
578
+ "costTier": "low"
516
579
  },
517
580
  "ml.inf2.8xlarge": {
518
581
  "category": "gpu",
@@ -536,7 +599,10 @@
536
599
  "hardware": "AWS Inferentia2",
537
600
  "gpuArchitecture": "Inferentia2",
538
601
  "defaultCudaVersion": "2.16.0",
539
- "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing"
602
+ "notes": "1x Inferentia2 chip. Higher CPU/memory for preprocessing",
603
+ "gpuMemoryGb": 32,
604
+ "gpuType": "AWS Inferentia2",
605
+ "costTier": "low"
540
606
  },
541
607
  "ml.inf2.xlarge": {
542
608
  "category": "gpu",
@@ -560,7 +626,10 @@
560
626
  "hardware": "AWS Inferentia2",
561
627
  "gpuArchitecture": "Inferentia2",
562
628
  "defaultCudaVersion": "2.16.0",
563
- "notes": "1x Inferentia2 chip. Cost-effective for transformer inference"
629
+ "notes": "1x Inferentia2 chip. Cost-effective for transformer inference",
630
+ "gpuMemoryGb": 32,
631
+ "gpuType": "AWS Inferentia2",
632
+ "costTier": "low"
564
633
  },
565
634
  "ml.m5.2xlarge": {
566
635
  "category": "cpu",
@@ -580,7 +649,10 @@
580
649
  "hardware": "None",
581
650
  "gpuArchitecture": "None",
582
651
  "defaultCudaVersion": null,
583
- "notes": "Large CPU workloads"
652
+ "notes": "Large CPU workloads",
653
+ "gpuMemoryGb": null,
654
+ "gpuType": null,
655
+ "costTier": "low"
584
656
  },
585
657
  "ml.m5.4xlarge": {
586
658
  "category": "cpu",
@@ -601,7 +673,10 @@
601
673
  "hardware": "None",
602
674
  "gpuArchitecture": "None",
603
675
  "defaultCudaVersion": null,
604
- "notes": "XL CPU workloads"
676
+ "notes": "XL CPU workloads",
677
+ "gpuMemoryGb": null,
678
+ "gpuType": null,
679
+ "costTier": "low"
605
680
  },
606
681
  "ml.m5.large": {
607
682
  "category": "cpu",
@@ -623,7 +698,10 @@
623
698
  "hardware": "None",
624
699
  "gpuArchitecture": "None",
625
700
  "defaultCudaVersion": null,
626
- "notes": "Small CPU workloads"
701
+ "notes": "Small CPU workloads",
702
+ "gpuMemoryGb": null,
703
+ "gpuType": null,
704
+ "costTier": "low"
627
705
  },
628
706
  "ml.m5.xlarge": {
629
707
  "category": "cpu",
@@ -643,7 +721,10 @@
643
721
  "hardware": "None",
644
722
  "gpuArchitecture": "None",
645
723
  "defaultCudaVersion": null,
646
- "notes": "Medium CPU workloads"
724
+ "notes": "Medium CPU workloads",
725
+ "gpuMemoryGb": null,
726
+ "gpuType": null,
727
+ "costTier": "low"
647
728
  },
648
729
  "ml.p3.16xlarge": {
649
730
  "category": "gpu",
@@ -667,7 +748,10 @@
667
748
  "hardware": "NVIDIA V100",
668
749
  "gpuArchitecture": "Volta",
669
750
  "defaultCudaVersion": "11.8",
670
- "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration"
751
+ "notes": "8x NVIDIA V100 GPUs (128GB total). Maximum multi-GPU configuration",
752
+ "gpuMemoryGb": 16,
753
+ "gpuType": "NVIDIA V100",
754
+ "costTier": "high"
671
755
  },
672
756
  "ml.p3.2xlarge": {
673
757
  "category": "gpu",
@@ -693,7 +777,10 @@
693
777
  "hardware": "NVIDIA V100",
694
778
  "gpuArchitecture": "Volta",
695
779
  "defaultCudaVersion": "11.8",
696
- "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference"
780
+ "notes": "1x NVIDIA V100 GPU (16GB). High-performance for training and inference",
781
+ "gpuMemoryGb": 16,
782
+ "gpuType": "NVIDIA V100",
783
+ "costTier": "high"
697
784
  },
698
785
  "ml.p3.8xlarge": {
699
786
  "category": "gpu",
@@ -720,7 +807,10 @@
720
807
  "hardware": "NVIDIA V100",
721
808
  "gpuArchitecture": "Volta",
722
809
  "defaultCudaVersion": "11.8",
723
- "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models"
810
+ "notes": "4x NVIDIA V100 GPUs (64GB total). Multi-GPU for large models",
811
+ "gpuMemoryGb": 16,
812
+ "gpuType": "NVIDIA V100",
813
+ "costTier": "high"
724
814
  },
725
815
  "ml.r5.large": {
726
816
  "category": "cpu",
@@ -739,7 +829,10 @@
739
829
  "hardware": "None",
740
830
  "gpuArchitecture": "None",
741
831
  "defaultCudaVersion": null,
742
- "notes": ""
832
+ "notes": "",
833
+ "gpuMemoryGb": null,
834
+ "gpuType": null,
835
+ "costTier": "low"
743
836
  },
744
837
  "ml.r5.xlarge": {
745
838
  "category": "cpu",
@@ -758,7 +851,10 @@
758
851
  "hardware": "None",
759
852
  "gpuArchitecture": "None",
760
853
  "defaultCudaVersion": null,
761
- "notes": ""
854
+ "notes": "",
855
+ "gpuMemoryGb": null,
856
+ "gpuType": null,
857
+ "costTier": "low"
762
858
  },
763
859
  "ml.trn1.2xlarge": {
764
860
  "category": "gpu",
@@ -782,7 +878,10 @@
782
878
  "hardware": "AWS Trainium",
783
879
  "gpuArchitecture": "Trainium1",
784
880
  "defaultCudaVersion": "2.16.0",
785
- "notes": "1x Trainium chip. Optimized for training, also supports inference"
881
+ "notes": "1x Trainium chip. Optimized for training, also supports inference",
882
+ "gpuMemoryGb": 32,
883
+ "gpuType": "AWS Trainium",
884
+ "costTier": "medium"
786
885
  },
787
886
  "ml.trn1.32xlarge": {
788
887
  "category": "gpu",
@@ -806,7 +905,10 @@
806
905
  "hardware": "AWS Trainium",
807
906
  "gpuArchitecture": "Trainium1",
808
907
  "defaultCudaVersion": "2.16.0",
809
- "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference"
908
+ "notes": "16x Trainium chips. Maximum multi-chip for large-scale training/inference",
909
+ "gpuMemoryGb": 32,
910
+ "gpuType": "AWS Trainium",
911
+ "costTier": "medium"
810
912
  }
811
913
  },
812
914
  "recommendations": {