@aws/ml-container-creator 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/bin/cli.js +7 -2
  2. package/package.json +7 -8
  3. package/servers/base-image-picker/index.js +3 -3
  4. package/servers/base-image-picker/manifest.json +4 -2
  5. package/servers/instance-sizer/index.js +561 -0
  6. package/servers/instance-sizer/lib/instance-ranker.js +245 -0
  7. package/servers/instance-sizer/lib/model-resolver.js +265 -0
  8. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  9. package/servers/instance-sizer/manifest.json +17 -0
  10. package/servers/instance-sizer/package.json +15 -0
  11. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  12. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +19 -249
  13. package/servers/lib/catalogs/model-sizes.json +131 -0
  14. package/servers/lib/catalogs/models.json +602 -0
  15. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  16. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  17. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  18. package/servers/lib/schemas/image-catalog.schema.json +0 -12
  19. package/servers/lib/schemas/instances.schema.json +29 -0
  20. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  21. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  22. package/servers/model-picker/index.js +2 -3
  23. package/servers/model-picker/manifest.json +2 -3
  24. package/servers/region-picker/index.js +1 -1
  25. package/servers/region-picker/manifest.json +1 -1
  26. package/src/app.js +17 -0
  27. package/src/lib/bootstrap-command-handler.js +38 -0
  28. package/src/lib/cli-handler.js +3 -3
  29. package/src/lib/config-manager.js +4 -1
  30. package/src/lib/configuration-manager.js +2 -2
  31. package/src/lib/cross-cutting-checker.js +341 -0
  32. package/src/lib/dry-run-validator.js +78 -0
  33. package/src/lib/generation-validator.js +102 -0
  34. package/src/lib/mcp-validator-config.js +89 -0
  35. package/src/lib/payload-builder.js +153 -0
  36. package/src/lib/prompt-runner.js +445 -135
  37. package/src/lib/prompts.js +1 -1
  38. package/src/lib/registry-loader.js +5 -5
  39. package/src/lib/schema-sync.js +203 -0
  40. package/src/lib/schema-validation-engine.js +195 -0
  41. package/src/lib/service-model-parser.js +102 -0
  42. package/src/lib/validate-runner.js +167 -0
  43. package/src/lib/validation-report.js +133 -0
  44. package/src/lib/validators/base-validator.js +36 -0
  45. package/src/lib/validators/catalog-validator.js +177 -0
  46. package/src/lib/validators/enum-validator.js +120 -0
  47. package/src/lib/validators/required-field-validator.js +150 -0
  48. package/src/lib/validators/type-validator.js +313 -0
  49. package/templates/Dockerfile +1 -1
  50. package/templates/do/build +15 -5
  51. package/templates/do/run +5 -1
  52. package/templates/do/validate +61 -0
  53. package/servers/instance-recommender/LICENSE +0 -202
  54. package/servers/instance-recommender/index.js +0 -284
  55. package/servers/instance-recommender/manifest.json +0 -16
  56. package/servers/instance-recommender/package.json +0 -15
  57. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  58. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  59. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  60. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -0,0 +1,131 @@
1
+ {
2
+ "catalogVersion": "1.0.0",
3
+ "models": {
4
+ "meta-llama/Llama-2-7b*": {
5
+ "parameterCount": 6738415616,
6
+ "defaultDtype": "float16",
7
+ "architecture": "LlamaForCausalLM",
8
+ "maxPositionEmbeddings": 4096,
9
+ "recommendedQuantizations": ["awq", "gptq"],
10
+ "minVramGb": 18,
11
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
12
+ },
13
+ "meta-llama/Llama-2-13b*": {
14
+ "parameterCount": 13015864320,
15
+ "defaultDtype": "float16",
16
+ "architecture": "LlamaForCausalLM",
17
+ "maxPositionEmbeddings": 4096,
18
+ "recommendedQuantizations": ["awq", "gptq"],
19
+ "minVramGb": 34,
20
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
21
+ },
22
+ "meta-llama/Llama-2-70b*": {
23
+ "parameterCount": 68976648192,
24
+ "defaultDtype": "float16",
25
+ "architecture": "LlamaForCausalLM",
26
+ "maxPositionEmbeddings": 4096,
27
+ "recommendedQuantizations": ["awq", "gptq"],
28
+ "minVramGb": 180,
29
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
30
+ },
31
+ "meta-llama/Meta-Llama-3-8B*": {
32
+ "parameterCount": 8030261248,
33
+ "defaultDtype": "bfloat16",
34
+ "architecture": "LlamaForCausalLM",
35
+ "maxPositionEmbeddings": 8192,
36
+ "recommendedQuantizations": ["awq", "gptq"],
37
+ "minVramGb": 21,
38
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
39
+ },
40
+ "meta-llama/Meta-Llama-3-70B*": {
41
+ "parameterCount": 70553706496,
42
+ "defaultDtype": "bfloat16",
43
+ "architecture": "LlamaForCausalLM",
44
+ "maxPositionEmbeddings": 8192,
45
+ "recommendedQuantizations": ["awq", "gptq"],
46
+ "minVramGb": 184,
47
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
48
+ },
49
+ "mistralai/Mistral-7B*": {
50
+ "parameterCount": 7241732096,
51
+ "defaultDtype": "bfloat16",
52
+ "architecture": "MistralForCausalLM",
53
+ "maxPositionEmbeddings": 32768,
54
+ "recommendedQuantizations": ["awq", "gptq"],
55
+ "minVramGb": 19,
56
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
57
+ },
58
+ "mistralai/Mixtral-8x7B*": {
59
+ "parameterCount": 46702792704,
60
+ "defaultDtype": "bfloat16",
61
+ "architecture": "MixtralForCausalLM",
62
+ "maxPositionEmbeddings": 32768,
63
+ "recommendedQuantizations": ["awq", "gptq"],
64
+ "minVramGb": 122,
65
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
66
+ },
67
+ "Qwen/Qwen-7B*": {
68
+ "parameterCount": 7721324544,
69
+ "defaultDtype": "bfloat16",
70
+ "architecture": "QWenLMHeadModel",
71
+ "maxPositionEmbeddings": 8192,
72
+ "recommendedQuantizations": ["awq", "gptq"],
73
+ "minVramGb": 20,
74
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
75
+ },
76
+ "Qwen/Qwen2-7B*": {
77
+ "parameterCount": 7721324544,
78
+ "defaultDtype": "bfloat16",
79
+ "architecture": "Qwen2ForCausalLM",
80
+ "maxPositionEmbeddings": 32768,
81
+ "recommendedQuantizations": ["awq", "gptq"],
82
+ "minVramGb": 20,
83
+ "recommendedInstances": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
84
+ },
85
+ "Qwen/Qwen-14B*": {
86
+ "parameterCount": 14167134208,
87
+ "defaultDtype": "bfloat16",
88
+ "architecture": "QWenLMHeadModel",
89
+ "maxPositionEmbeddings": 8192,
90
+ "recommendedQuantizations": ["awq", "gptq"],
91
+ "minVramGb": 37,
92
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
93
+ },
94
+ "Qwen/Qwen2-14B*": {
95
+ "parameterCount": 14167134208,
96
+ "defaultDtype": "bfloat16",
97
+ "architecture": "Qwen2ForCausalLM",
98
+ "maxPositionEmbeddings": 32768,
99
+ "recommendedQuantizations": ["awq", "gptq"],
100
+ "minVramGb": 37,
101
+ "recommendedInstances": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
102
+ },
103
+ "Qwen/Qwen-72B*": {
104
+ "parameterCount": 72710410240,
105
+ "defaultDtype": "bfloat16",
106
+ "architecture": "QWenLMHeadModel",
107
+ "maxPositionEmbeddings": 32768,
108
+ "recommendedQuantizations": ["awq", "gptq"],
109
+ "minVramGb": 190,
110
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
111
+ },
112
+ "Qwen/Qwen2-72B*": {
113
+ "parameterCount": 72710410240,
114
+ "defaultDtype": "bfloat16",
115
+ "architecture": "Qwen2ForCausalLM",
116
+ "maxPositionEmbeddings": 32768,
117
+ "recommendedQuantizations": ["awq", "gptq"],
118
+ "minVramGb": 190,
119
+ "recommendedInstances": ["ml.g5.48xlarge", "ml.p4d.24xlarge"]
120
+ },
121
+ "EleutherAI/gpt-neox-20b*": {
122
+ "parameterCount": 20554568704,
123
+ "defaultDtype": "float16",
124
+ "architecture": "GPTNeoXForCausalLM",
125
+ "maxPositionEmbeddings": 2048,
126
+ "recommendedQuantizations": ["gptq"],
127
+ "minVramGb": 54,
128
+ "recommendedInstances": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
129
+ }
130
+ }
131
+ }