@aws/ml-container-creator 0.2.5 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +45 -4
- package/config/bootstrap-stack.json +14 -0
- package/infra/ci-harness/package-lock.json +22 -9
- package/package.json +7 -8
- package/servers/base-image-picker/index.js +3 -3
- package/servers/base-image-picker/manifest.json +4 -2
- package/servers/instance-sizer/index.js +564 -0
- package/servers/instance-sizer/lib/instance-ranker.js +270 -0
- package/servers/instance-sizer/lib/model-resolver.js +269 -0
- package/servers/instance-sizer/lib/vram-estimator.js +177 -0
- package/servers/instance-sizer/manifest.json +17 -0
- package/servers/instance-sizer/package.json +15 -0
- package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
- package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
- package/servers/lib/catalogs/model-sizes.json +131 -0
- package/servers/lib/catalogs/models.json +632 -0
- package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
- package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
- package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
- package/servers/lib/schemas/image-catalog.schema.json +6 -12
- package/servers/lib/schemas/instances.schema.json +29 -0
- package/servers/lib/schemas/model-catalog.schema.json +12 -10
- package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
- package/servers/model-picker/index.js +4 -4
- package/servers/model-picker/manifest.json +2 -3
- package/servers/region-picker/index.js +1 -1
- package/servers/region-picker/manifest.json +1 -1
- package/src/app.js +36 -0
- package/src/lib/architecture-sync.js +171 -0
- package/src/lib/arn-detection.js +22 -0
- package/src/lib/bootstrap-command-handler.js +120 -0
- package/src/lib/cli-handler.js +3 -3
- package/src/lib/config-manager.js +47 -1
- package/src/lib/configuration-manager.js +2 -2
- package/src/lib/cross-cutting-checker.js +460 -0
- package/src/lib/deployment-entry-schema.js +1 -2
- package/src/lib/dry-run-validator.js +78 -0
- package/src/lib/generation-validator.js +102 -0
- package/src/lib/mcp-validator-config.js +89 -0
- package/src/lib/payload-builder.js +153 -0
- package/src/lib/prompt-runner.js +866 -149
- package/src/lib/prompts.js +2 -2
- package/src/lib/registry-command-handler.js +236 -0
- package/src/lib/registry-loader.js +5 -5
- package/src/lib/schema-sync.js +203 -0
- package/src/lib/schema-validation-engine.js +195 -0
- package/src/lib/secret-classification.js +56 -0
- package/src/lib/secrets-command-handler.js +550 -0
- package/src/lib/service-model-parser.js +102 -0
- package/src/lib/validate-runner.js +216 -0
- package/src/lib/validation-report.js +140 -0
- package/src/lib/validators/base-validator.js +36 -0
- package/src/lib/validators/catalog-validator.js +177 -0
- package/src/lib/validators/enum-validator.js +120 -0
- package/src/lib/validators/required-field-validator.js +150 -0
- package/src/lib/validators/type-validator.js +313 -0
- package/src/prompt-adapter.js +3 -2
- package/templates/Dockerfile +1 -1
- package/templates/do/build +37 -5
- package/templates/do/config +15 -3
- package/templates/do/deploy +60 -5
- package/templates/do/logs +18 -3
- package/templates/do/run +15 -1
- package/templates/do/validate +61 -0
- package/servers/instance-recommender/LICENSE +0 -202
- package/servers/instance-recommender/index.js +0 -284
- package/servers/instance-recommender/manifest.json +0 -16
- package/servers/instance-recommender/package.json +0 -15
- /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
- /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
- /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
"family": "stable-diffusion-3",
|
|
4
4
|
"chat_template": null,
|
|
5
5
|
"gated": false,
|
|
6
|
-
"tags": [
|
|
6
|
+
"tags": [
|
|
7
|
+
"image-generation",
|
|
8
|
+
"diffusion",
|
|
9
|
+
"stable-diffusion"
|
|
10
|
+
],
|
|
7
11
|
"architecture": "StableDiffusion3Pipeline",
|
|
8
12
|
"framework_compatibility": {
|
|
9
13
|
"vllm-omni": ">=0.14.0"
|
|
@@ -12,8 +16,7 @@
|
|
|
12
16
|
"profiles": {
|
|
13
17
|
"default": {
|
|
14
18
|
"displayName": "SD3.5 Medium",
|
|
15
|
-
"envVars": {}
|
|
16
|
-
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
19
|
+
"envVars": {}
|
|
17
20
|
}
|
|
18
21
|
},
|
|
19
22
|
"notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
|
|
@@ -22,7 +25,11 @@
|
|
|
22
25
|
"family": "flux",
|
|
23
26
|
"chat_template": null,
|
|
24
27
|
"gated": true,
|
|
25
|
-
"tags": [
|
|
28
|
+
"tags": [
|
|
29
|
+
"image-generation",
|
|
30
|
+
"diffusion",
|
|
31
|
+
"flux"
|
|
32
|
+
],
|
|
26
33
|
"architecture": "FluxPipeline",
|
|
27
34
|
"framework_compatibility": {
|
|
28
35
|
"vllm-omni": ">=0.14.0"
|
|
@@ -31,8 +38,7 @@
|
|
|
31
38
|
"profiles": {
|
|
32
39
|
"default": {
|
|
33
40
|
"displayName": "FLUX.1 Dev",
|
|
34
|
-
"envVars": {}
|
|
35
|
-
"recommendedInstanceTypes": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
|
|
41
|
+
"envVars": {}
|
|
36
42
|
}
|
|
37
43
|
},
|
|
38
44
|
"notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
|
|
@@ -41,7 +47,11 @@
|
|
|
41
47
|
"family": "flux",
|
|
42
48
|
"chat_template": null,
|
|
43
49
|
"gated": false,
|
|
44
|
-
"tags": [
|
|
50
|
+
"tags": [
|
|
51
|
+
"image-generation",
|
|
52
|
+
"diffusion",
|
|
53
|
+
"flux"
|
|
54
|
+
],
|
|
45
55
|
"architecture": "FluxPipeline",
|
|
46
56
|
"framework_compatibility": {
|
|
47
57
|
"vllm-omni": ">=0.14.0"
|
|
@@ -53,7 +63,11 @@
|
|
|
53
63
|
"family": "wan",
|
|
54
64
|
"chat_template": null,
|
|
55
65
|
"gated": false,
|
|
56
|
-
"tags": [
|
|
66
|
+
"tags": [
|
|
67
|
+
"video-generation",
|
|
68
|
+
"diffusion",
|
|
69
|
+
"wan"
|
|
70
|
+
],
|
|
57
71
|
"architecture": "WanPipeline",
|
|
58
72
|
"framework_compatibility": {
|
|
59
73
|
"vllm-omni": ">=0.16.0"
|
|
@@ -65,7 +79,11 @@
|
|
|
65
79
|
"family": "stable-diffusion",
|
|
66
80
|
"chat_template": null,
|
|
67
81
|
"gated": false,
|
|
68
|
-
"tags": [
|
|
82
|
+
"tags": [
|
|
83
|
+
"image-generation",
|
|
84
|
+
"diffusion",
|
|
85
|
+
"stable-diffusion"
|
|
86
|
+
],
|
|
69
87
|
"architecture": null,
|
|
70
88
|
"framework_compatibility": {
|
|
71
89
|
"vllm-omni": ">=0.14.0"
|
|
@@ -77,7 +95,11 @@
|
|
|
77
95
|
"family": "flux",
|
|
78
96
|
"chat_template": null,
|
|
79
97
|
"gated": false,
|
|
80
|
-
"tags": [
|
|
98
|
+
"tags": [
|
|
99
|
+
"image-generation",
|
|
100
|
+
"diffusion",
|
|
101
|
+
"flux"
|
|
102
|
+
],
|
|
81
103
|
"architecture": null,
|
|
82
104
|
"framework_compatibility": {
|
|
83
105
|
"vllm-omni": ">=0.14.0"
|
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
"family": "gpt-oss",
|
|
4
4
|
"chat_template": "",
|
|
5
5
|
"gated": false,
|
|
6
|
-
"tags": [
|
|
6
|
+
"tags": [
|
|
7
|
+
"text-generation",
|
|
8
|
+
"openai",
|
|
9
|
+
"conversational"
|
|
10
|
+
],
|
|
7
11
|
"architecture": "GPT2LMHeadModel",
|
|
8
12
|
"framework_compatibility": {
|
|
9
13
|
"vllm": ">=0.3.0",
|
|
@@ -17,7 +21,11 @@
|
|
|
17
21
|
"family": "llama-2",
|
|
18
22
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
19
23
|
"gated": true,
|
|
20
|
-
"tags": [
|
|
24
|
+
"tags": [
|
|
25
|
+
"text-generation",
|
|
26
|
+
"llama-2",
|
|
27
|
+
"conversational"
|
|
28
|
+
],
|
|
21
29
|
"architecture": "LlamaForCausalLM",
|
|
22
30
|
"framework_compatibility": {
|
|
23
31
|
"vllm": ">=0.3.0",
|
|
@@ -31,8 +39,7 @@
|
|
|
31
39
|
"envVars": {
|
|
32
40
|
"MAX_MODEL_LEN": "4096",
|
|
33
41
|
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
34
|
-
}
|
|
35
|
-
"recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
|
|
42
|
+
}
|
|
36
43
|
}
|
|
37
44
|
},
|
|
38
45
|
"notes": "Llama-2 7B chat model with official chat template. Requires HuggingFace authentication for download"
|
|
@@ -41,7 +48,11 @@
|
|
|
41
48
|
"family": "llama-2",
|
|
42
49
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
43
50
|
"gated": true,
|
|
44
|
-
"tags": [
|
|
51
|
+
"tags": [
|
|
52
|
+
"text-generation",
|
|
53
|
+
"llama-2",
|
|
54
|
+
"conversational"
|
|
55
|
+
],
|
|
45
56
|
"architecture": "LlamaForCausalLM",
|
|
46
57
|
"framework_compatibility": {
|
|
47
58
|
"vllm": ">=0.3.0",
|
|
@@ -55,18 +66,20 @@
|
|
|
55
66
|
"envVars": {
|
|
56
67
|
"MAX_MODEL_LEN": "4096",
|
|
57
68
|
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
58
|
-
}
|
|
59
|
-
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
69
|
+
}
|
|
60
70
|
}
|
|
61
71
|
},
|
|
62
72
|
"notes": "Llama-2 13B chat model. Requires more GPU memory than 7B variant"
|
|
63
73
|
},
|
|
64
|
-
|
|
65
74
|
"meta-llama/Llama-2-70b-chat-hf": {
|
|
66
75
|
"family": "llama-2",
|
|
67
76
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
68
77
|
"gated": true,
|
|
69
|
-
"tags": [
|
|
78
|
+
"tags": [
|
|
79
|
+
"text-generation",
|
|
80
|
+
"llama-2",
|
|
81
|
+
"conversational"
|
|
82
|
+
],
|
|
70
83
|
"architecture": "LlamaForCausalLM",
|
|
71
84
|
"framework_compatibility": {
|
|
72
85
|
"vllm": ">=0.3.0",
|
|
@@ -81,8 +94,7 @@
|
|
|
81
94
|
"TENSOR_PARALLEL_SIZE": "2",
|
|
82
95
|
"MAX_MODEL_LEN": "4096",
|
|
83
96
|
"GPU_MEMORY_UTILIZATION": "0.95"
|
|
84
|
-
}
|
|
85
|
-
"recommendedInstanceTypes": ["ml.g5.12xlarge"]
|
|
97
|
+
}
|
|
86
98
|
},
|
|
87
99
|
"70b-tp4": {
|
|
88
100
|
"displayName": "Llama-2 70B (4-GPU)",
|
|
@@ -90,8 +102,7 @@
|
|
|
90
102
|
"TENSOR_PARALLEL_SIZE": "4",
|
|
91
103
|
"MAX_MODEL_LEN": "4096",
|
|
92
104
|
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
93
|
-
}
|
|
94
|
-
"recommendedInstanceTypes": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
|
|
105
|
+
}
|
|
95
106
|
}
|
|
96
107
|
},
|
|
97
108
|
"notes": "Llama-2 70B requires tensor parallelism across multiple GPUs"
|
|
@@ -100,7 +111,11 @@
|
|
|
100
111
|
"family": "mistral",
|
|
101
112
|
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
102
113
|
"gated": false,
|
|
103
|
-
"tags": [
|
|
114
|
+
"tags": [
|
|
115
|
+
"text-generation",
|
|
116
|
+
"mistral",
|
|
117
|
+
"conversational"
|
|
118
|
+
],
|
|
104
119
|
"architecture": "MistralForCausalLM",
|
|
105
120
|
"framework_compatibility": {
|
|
106
121
|
"vllm": ">=0.3.0",
|
|
@@ -114,8 +129,7 @@
|
|
|
114
129
|
"envVars": {
|
|
115
130
|
"MAX_MODEL_LEN": "8192",
|
|
116
131
|
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
117
|
-
}
|
|
118
|
-
"recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
|
|
132
|
+
}
|
|
119
133
|
}
|
|
120
134
|
},
|
|
121
135
|
"notes": "Mistral 7B v0.1 with 8K context window"
|
|
@@ -124,7 +138,11 @@
|
|
|
124
138
|
"family": "mistral",
|
|
125
139
|
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
126
140
|
"gated": false,
|
|
127
|
-
"tags": [
|
|
141
|
+
"tags": [
|
|
142
|
+
"text-generation",
|
|
143
|
+
"mistral",
|
|
144
|
+
"conversational"
|
|
145
|
+
],
|
|
128
146
|
"architecture": "MistralForCausalLM",
|
|
129
147
|
"framework_compatibility": {
|
|
130
148
|
"vllm": ">=0.3.0",
|
|
@@ -138,8 +156,7 @@
|
|
|
138
156
|
"envVars": {
|
|
139
157
|
"MAX_MODEL_LEN": "32768",
|
|
140
158
|
"GPU_MEMORY_UTILIZATION": "0.9"
|
|
141
|
-
}
|
|
142
|
-
"recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
|
|
159
|
+
}
|
|
143
160
|
}
|
|
144
161
|
},
|
|
145
162
|
"notes": "Mistral 7B v0.2 with extended 32K context window. Requires more memory for long contexts"
|
|
@@ -148,7 +165,11 @@
|
|
|
148
165
|
"family": "mistral",
|
|
149
166
|
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
150
167
|
"gated": false,
|
|
151
|
-
"tags": [
|
|
168
|
+
"tags": [
|
|
169
|
+
"text-generation",
|
|
170
|
+
"mistral",
|
|
171
|
+
"mixture-of-experts"
|
|
172
|
+
],
|
|
152
173
|
"architecture": "MixtralForCausalLM",
|
|
153
174
|
"framework_compatibility": {
|
|
154
175
|
"vllm": ">=0.3.0",
|
|
@@ -163,8 +184,7 @@
|
|
|
163
184
|
"TENSOR_PARALLEL_SIZE": "2",
|
|
164
185
|
"MAX_MODEL_LEN": "32768",
|
|
165
186
|
"GPU_MEMORY_UTILIZATION": "0.95"
|
|
166
|
-
}
|
|
167
|
-
"recommendedInstanceTypes": ["ml.g5.12xlarge"]
|
|
187
|
+
}
|
|
168
188
|
}
|
|
169
189
|
},
|
|
170
190
|
"notes": "Mixtral 8x7B MoE model. Requires tensor parallelism for efficient inference"
|
|
@@ -173,7 +193,10 @@
|
|
|
173
193
|
"family": "llama-2",
|
|
174
194
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
175
195
|
"gated": true,
|
|
176
|
-
"tags": [
|
|
196
|
+
"tags": [
|
|
197
|
+
"text-generation",
|
|
198
|
+
"llama-2"
|
|
199
|
+
],
|
|
177
200
|
"architecture": null,
|
|
178
201
|
"framework_compatibility": {
|
|
179
202
|
"vllm": ">=0.3.0",
|
|
@@ -187,7 +210,10 @@
|
|
|
187
210
|
"family": "mistral",
|
|
188
211
|
"chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
|
189
212
|
"gated": false,
|
|
190
|
-
"tags": [
|
|
213
|
+
"tags": [
|
|
214
|
+
"text-generation",
|
|
215
|
+
"mistral"
|
|
216
|
+
],
|
|
191
217
|
"architecture": null,
|
|
192
218
|
"framework_compatibility": {
|
|
193
219
|
"vllm": ">=0.3.0",
|
|
@@ -201,7 +227,11 @@
|
|
|
201
227
|
"family": "codellama",
|
|
202
228
|
"chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
|
|
203
229
|
"gated": false,
|
|
204
|
-
"tags": [
|
|
230
|
+
"tags": [
|
|
231
|
+
"text-generation",
|
|
232
|
+
"code",
|
|
233
|
+
"codellama"
|
|
234
|
+
],
|
|
205
235
|
"architecture": null,
|
|
206
236
|
"framework_compatibility": {
|
|
207
237
|
"vllm": ">=0.3.0",
|
|
@@ -214,7 +244,10 @@
|
|
|
214
244
|
"family": "falcon",
|
|
215
245
|
"chat_template": null,
|
|
216
246
|
"gated": false,
|
|
217
|
-
"tags": [
|
|
247
|
+
"tags": [
|
|
248
|
+
"text-generation",
|
|
249
|
+
"falcon"
|
|
250
|
+
],
|
|
218
251
|
"architecture": null,
|
|
219
252
|
"framework_compatibility": {
|
|
220
253
|
"vllm": ">=0.3.0",
|
|
@@ -1,38 +1,38 @@
|
|
|
1
1
|
[
|
|
2
2
|
{
|
|
3
|
-
"image": "python:3.12-slim",
|
|
3
|
+
"image": "public.ecr.aws/docker/library/python:3.12-slim",
|
|
4
4
|
"tag": "3.12-slim",
|
|
5
5
|
"architecture": "amd64",
|
|
6
6
|
"created": "2024-10-01T00:00:00Z",
|
|
7
7
|
"labels": { "python_version": "3.12" },
|
|
8
|
-
"registry": "
|
|
9
|
-
"repository": "python"
|
|
8
|
+
"registry": "ecr-public",
|
|
9
|
+
"repository": "docker/library/python"
|
|
10
10
|
},
|
|
11
11
|
{
|
|
12
|
-
"image": "python:3.11-slim",
|
|
12
|
+
"image": "public.ecr.aws/docker/library/python:3.11-slim",
|
|
13
13
|
"tag": "3.11-slim",
|
|
14
14
|
"architecture": "amd64",
|
|
15
15
|
"created": "2023-10-01T00:00:00Z",
|
|
16
16
|
"labels": { "python_version": "3.11" },
|
|
17
|
-
"registry": "
|
|
18
|
-
"repository": "python"
|
|
17
|
+
"registry": "ecr-public",
|
|
18
|
+
"repository": "docker/library/python"
|
|
19
19
|
},
|
|
20
20
|
{
|
|
21
|
-
"image": "python:3.10-slim",
|
|
21
|
+
"image": "public.ecr.aws/docker/library/python:3.10-slim",
|
|
22
22
|
"tag": "3.10-slim",
|
|
23
23
|
"architecture": "amd64",
|
|
24
24
|
"created": "2022-10-01T00:00:00Z",
|
|
25
25
|
"labels": { "python_version": "3.10" },
|
|
26
|
-
"registry": "
|
|
27
|
-
"repository": "python"
|
|
26
|
+
"registry": "ecr-public",
|
|
27
|
+
"repository": "docker/library/python"
|
|
28
28
|
},
|
|
29
29
|
{
|
|
30
|
-
"image": "python:3.9-slim",
|
|
30
|
+
"image": "public.ecr.aws/docker/library/python:3.9-slim",
|
|
31
31
|
"tag": "3.9-slim",
|
|
32
32
|
"architecture": "amd64",
|
|
33
33
|
"created": "2021-10-01T00:00:00Z",
|
|
34
34
|
"labels": { "python_version": "3.9" },
|
|
35
|
-
"registry": "
|
|
36
|
-
"repository": "python"
|
|
35
|
+
"registry": "ecr-public",
|
|
36
|
+
"repository": "docker/library/python"
|
|
37
37
|
}
|
|
38
38
|
]
|
|
@@ -63,12 +63,6 @@
|
|
|
63
63
|
},
|
|
64
64
|
"inferenceAmiVersion": {
|
|
65
65
|
"type": "string"
|
|
66
|
-
},
|
|
67
|
-
"recommendedInstanceTypes": {
|
|
68
|
-
"type": "array",
|
|
69
|
-
"items": {
|
|
70
|
-
"type": "string"
|
|
71
|
-
}
|
|
72
66
|
}
|
|
73
67
|
},
|
|
74
68
|
"additionalProperties": false
|
|
@@ -142,12 +136,6 @@
|
|
|
142
136
|
"type": "string"
|
|
143
137
|
}
|
|
144
138
|
},
|
|
145
|
-
"recommendedInstanceTypes": {
|
|
146
|
-
"type": "array",
|
|
147
|
-
"items": {
|
|
148
|
-
"type": "string"
|
|
149
|
-
}
|
|
150
|
-
},
|
|
151
139
|
"notes": {
|
|
152
140
|
"type": "string"
|
|
153
141
|
}
|
|
@@ -157,6 +145,12 @@
|
|
|
157
145
|
},
|
|
158
146
|
"notes": {
|
|
159
147
|
"type": "string"
|
|
148
|
+
},
|
|
149
|
+
"supportedModelTypes": {
|
|
150
|
+
"type": "array",
|
|
151
|
+
"items": {
|
|
152
|
+
"type": "string"
|
|
153
|
+
}
|
|
160
154
|
}
|
|
161
155
|
},
|
|
162
156
|
"additionalProperties": false
|
|
@@ -92,6 +92,35 @@
|
|
|
92
92
|
},
|
|
93
93
|
"notes": {
|
|
94
94
|
"type": "string"
|
|
95
|
+
},
|
|
96
|
+
"gpuMemoryGb": {
|
|
97
|
+
"oneOf": [
|
|
98
|
+
{
|
|
99
|
+
"type": "number",
|
|
100
|
+
"minimum": 0
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"type": "null"
|
|
104
|
+
}
|
|
105
|
+
]
|
|
106
|
+
},
|
|
107
|
+
"gpuType": {
|
|
108
|
+
"oneOf": [
|
|
109
|
+
{
|
|
110
|
+
"type": "string"
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
"type": "null"
|
|
114
|
+
}
|
|
115
|
+
]
|
|
116
|
+
},
|
|
117
|
+
"costTier": {
|
|
118
|
+
"type": "string",
|
|
119
|
+
"enum": [
|
|
120
|
+
"low",
|
|
121
|
+
"medium",
|
|
122
|
+
"high"
|
|
123
|
+
]
|
|
95
124
|
}
|
|
96
125
|
},
|
|
97
126
|
"additionalProperties": false
|
|
@@ -21,8 +21,12 @@
|
|
|
21
21
|
},
|
|
22
22
|
"chat_template": {
|
|
23
23
|
"oneOf": [
|
|
24
|
-
{
|
|
25
|
-
|
|
24
|
+
{
|
|
25
|
+
"type": "string"
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"type": "null"
|
|
29
|
+
}
|
|
26
30
|
]
|
|
27
31
|
},
|
|
28
32
|
"gated": {
|
|
@@ -36,8 +40,12 @@
|
|
|
36
40
|
},
|
|
37
41
|
"architecture": {
|
|
38
42
|
"oneOf": [
|
|
39
|
-
{
|
|
40
|
-
|
|
43
|
+
{
|
|
44
|
+
"type": "string"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"type": "null"
|
|
48
|
+
}
|
|
41
49
|
]
|
|
42
50
|
},
|
|
43
51
|
"framework_compatibility": {
|
|
@@ -74,12 +82,6 @@
|
|
|
74
82
|
"additionalProperties": {
|
|
75
83
|
"type": "string"
|
|
76
84
|
}
|
|
77
|
-
},
|
|
78
|
-
"recommendedInstanceTypes": {
|
|
79
|
-
"type": "array",
|
|
80
|
-
"items": {
|
|
81
|
-
"type": "string"
|
|
82
|
-
}
|
|
83
85
|
}
|
|
84
86
|
},
|
|
85
87
|
"additionalProperties": false
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "unified-model-catalog.schema.json",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"minProperties": 1,
|
|
6
|
+
"additionalProperties": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"required": [
|
|
9
|
+
"architecture",
|
|
10
|
+
"tasks",
|
|
11
|
+
"modelType"
|
|
12
|
+
],
|
|
13
|
+
"properties": {
|
|
14
|
+
"architecture": {
|
|
15
|
+
"oneOf": [
|
|
16
|
+
{
|
|
17
|
+
"type": "string",
|
|
18
|
+
"minLength": 1
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"type": "null"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"tasks": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": {
|
|
28
|
+
"type": "string"
|
|
29
|
+
},
|
|
30
|
+
"minItems": 1
|
|
31
|
+
},
|
|
32
|
+
"modelType": {
|
|
33
|
+
"type": "string",
|
|
34
|
+
"enum": [
|
|
35
|
+
"transformer",
|
|
36
|
+
"diffusor",
|
|
37
|
+
"predictor"
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
"family": {
|
|
41
|
+
"type": "string"
|
|
42
|
+
},
|
|
43
|
+
"parameterCount": {
|
|
44
|
+
"type": "integer"
|
|
45
|
+
},
|
|
46
|
+
"defaultDtype": {
|
|
47
|
+
"type": "string",
|
|
48
|
+
"enum": [
|
|
49
|
+
"float32",
|
|
50
|
+
"float16",
|
|
51
|
+
"bfloat16",
|
|
52
|
+
"int8",
|
|
53
|
+
"int4"
|
|
54
|
+
]
|
|
55
|
+
},
|
|
56
|
+
"maxPositionEmbeddings": {
|
|
57
|
+
"type": "integer"
|
|
58
|
+
},
|
|
59
|
+
"recommendedQuantizations": {
|
|
60
|
+
"type": "array",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "string"
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"chatTemplate": {
|
|
66
|
+
"oneOf": [
|
|
67
|
+
{
|
|
68
|
+
"type": "string"
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"type": "null"
|
|
72
|
+
}
|
|
73
|
+
]
|
|
74
|
+
},
|
|
75
|
+
"gated": {
|
|
76
|
+
"type": "boolean"
|
|
77
|
+
},
|
|
78
|
+
"tags": {
|
|
79
|
+
"type": "array",
|
|
80
|
+
"items": {
|
|
81
|
+
"type": "string"
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
"frameworkCompatibility": {
|
|
85
|
+
"type": "object",
|
|
86
|
+
"additionalProperties": {
|
|
87
|
+
"type": "string"
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"validationLevel": {
|
|
91
|
+
"type": "string",
|
|
92
|
+
"enum": [
|
|
93
|
+
"tested",
|
|
94
|
+
"community-validated",
|
|
95
|
+
"experimental",
|
|
96
|
+
"untested"
|
|
97
|
+
]
|
|
98
|
+
},
|
|
99
|
+
"profiles": {
|
|
100
|
+
"type": "object",
|
|
101
|
+
"additionalProperties": {
|
|
102
|
+
"type": "object",
|
|
103
|
+
"required": [
|
|
104
|
+
"displayName"
|
|
105
|
+
],
|
|
106
|
+
"properties": {
|
|
107
|
+
"displayName": {
|
|
108
|
+
"type": "string"
|
|
109
|
+
},
|
|
110
|
+
"envVars": {
|
|
111
|
+
"type": "object",
|
|
112
|
+
"additionalProperties": {
|
|
113
|
+
"type": "string"
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"additionalProperties": false
|
|
118
|
+
}
|
|
119
|
+
},
|
|
120
|
+
"notes": {
|
|
121
|
+
"type": "string"
|
|
122
|
+
},
|
|
123
|
+
"pipeline": {
|
|
124
|
+
"type": "string"
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"additionalProperties": false
|
|
128
|
+
}
|
|
129
|
+
}
|
|
@@ -195,11 +195,12 @@ class HuggingFaceResolver extends ModelResolver {
|
|
|
195
195
|
}
|
|
196
196
|
|
|
197
197
|
// Fetch model config (conditional)
|
|
198
|
-
if (!fields || fields.includes('architecture')) {
|
|
198
|
+
if (!fields || fields.includes('architecture') || fields.includes('model_type')) {
|
|
199
199
|
const modelConfig = await this._fetchJson(
|
|
200
200
|
`${this.baseUrl}/${modelId}/resolve/main/config.json`
|
|
201
201
|
)
|
|
202
202
|
metadata.architecture = modelConfig?.architectures?.[0] || null
|
|
203
|
+
metadata.model_type = modelConfig?.model_type || null
|
|
203
204
|
}
|
|
204
205
|
|
|
205
206
|
return Object.keys(metadata).length > 0 ? metadata : null
|
|
@@ -1433,9 +1434,8 @@ let POPULAR_MODELS_CATALOG
|
|
|
1433
1434
|
|
|
1434
1435
|
try {
|
|
1435
1436
|
POPULAR_MODELS_CATALOG = {
|
|
1436
|
-
...loadCatalog('
|
|
1437
|
-
...loadCatalog('
|
|
1438
|
-
...loadCatalog('./catalogs/jumpstart-public.json')
|
|
1437
|
+
...loadCatalog('../lib/catalogs/models.json'),
|
|
1438
|
+
...loadCatalog('../lib/catalogs/jumpstart-public.json')
|
|
1439
1439
|
}
|
|
1440
1440
|
} catch (err) {
|
|
1441
1441
|
process.stderr.write(`[model-picker] Fatal: ${err.message}\n`)
|
|
@@ -8,9 +8,8 @@
|
|
|
8
8
|
"discover": true
|
|
9
9
|
},
|
|
10
10
|
"catalogs": {
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
"jumpstart-public": "./catalogs/jumpstart-public.json"
|
|
11
|
+
"models": "../lib/catalogs/models.json",
|
|
12
|
+
"jumpstart-public": "../lib/catalogs/jumpstart-public.json"
|
|
14
13
|
},
|
|
15
14
|
"tool": {
|
|
16
15
|
"name": "get_models"
|
|
@@ -64,7 +64,7 @@ let AWS_REGIONS
|
|
|
64
64
|
let VALID_REGION_CODES
|
|
65
65
|
|
|
66
66
|
try {
|
|
67
|
-
AWS_REGIONS = loadCatalog('
|
|
67
|
+
AWS_REGIONS = loadCatalog('../lib/catalogs/regions.json')
|
|
68
68
|
VALID_REGION_CODES = new Set(AWS_REGIONS.map(r => r.code))
|
|
69
69
|
} catch (err) {
|
|
70
70
|
process.stderr.write(`[region-picker] Fatal: ${err.message}\n`)
|