@aws/ml-container-creator 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/cli.js +45 -4
  2. package/config/bootstrap-stack.json +14 -0
  3. package/infra/ci-harness/package-lock.json +22 -9
  4. package/package.json +7 -8
  5. package/servers/base-image-picker/index.js +3 -3
  6. package/servers/base-image-picker/manifest.json +4 -2
  7. package/servers/instance-sizer/index.js +564 -0
  8. package/servers/instance-sizer/lib/instance-ranker.js +270 -0
  9. package/servers/instance-sizer/lib/model-resolver.js +269 -0
  10. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  11. package/servers/instance-sizer/manifest.json +17 -0
  12. package/servers/instance-sizer/package.json +15 -0
  13. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  14. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
  15. package/servers/lib/catalogs/model-sizes.json +131 -0
  16. package/servers/lib/catalogs/models.json +632 -0
  17. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  18. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  19. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  20. package/servers/lib/schemas/image-catalog.schema.json +6 -12
  21. package/servers/lib/schemas/instances.schema.json +29 -0
  22. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  23. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  24. package/servers/model-picker/index.js +4 -4
  25. package/servers/model-picker/manifest.json +2 -3
  26. package/servers/region-picker/index.js +1 -1
  27. package/servers/region-picker/manifest.json +1 -1
  28. package/src/app.js +36 -0
  29. package/src/lib/architecture-sync.js +171 -0
  30. package/src/lib/arn-detection.js +22 -0
  31. package/src/lib/bootstrap-command-handler.js +120 -0
  32. package/src/lib/cli-handler.js +3 -3
  33. package/src/lib/config-manager.js +47 -1
  34. package/src/lib/configuration-manager.js +2 -2
  35. package/src/lib/cross-cutting-checker.js +460 -0
  36. package/src/lib/deployment-entry-schema.js +1 -2
  37. package/src/lib/dry-run-validator.js +78 -0
  38. package/src/lib/generation-validator.js +102 -0
  39. package/src/lib/mcp-validator-config.js +89 -0
  40. package/src/lib/payload-builder.js +153 -0
  41. package/src/lib/prompt-runner.js +866 -149
  42. package/src/lib/prompts.js +2 -2
  43. package/src/lib/registry-command-handler.js +236 -0
  44. package/src/lib/registry-loader.js +5 -5
  45. package/src/lib/schema-sync.js +203 -0
  46. package/src/lib/schema-validation-engine.js +195 -0
  47. package/src/lib/secret-classification.js +56 -0
  48. package/src/lib/secrets-command-handler.js +550 -0
  49. package/src/lib/service-model-parser.js +102 -0
  50. package/src/lib/validate-runner.js +216 -0
  51. package/src/lib/validation-report.js +140 -0
  52. package/src/lib/validators/base-validator.js +36 -0
  53. package/src/lib/validators/catalog-validator.js +177 -0
  54. package/src/lib/validators/enum-validator.js +120 -0
  55. package/src/lib/validators/required-field-validator.js +150 -0
  56. package/src/lib/validators/type-validator.js +313 -0
  57. package/src/prompt-adapter.js +3 -2
  58. package/templates/Dockerfile +1 -1
  59. package/templates/do/build +37 -5
  60. package/templates/do/config +15 -3
  61. package/templates/do/deploy +60 -5
  62. package/templates/do/logs +18 -3
  63. package/templates/do/run +15 -1
  64. package/templates/do/validate +61 -0
  65. package/servers/instance-recommender/LICENSE +0 -202
  66. package/servers/instance-recommender/index.js +0 -284
  67. package/servers/instance-recommender/manifest.json +0 -16
  68. package/servers/instance-recommender/package.json +0 -15
  69. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  70. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  71. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  72. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -3,7 +3,11 @@
3
3
  "family": "stable-diffusion-3",
4
4
  "chat_template": null,
5
5
  "gated": false,
6
- "tags": ["image-generation", "diffusion", "stable-diffusion"],
6
+ "tags": [
7
+ "image-generation",
8
+ "diffusion",
9
+ "stable-diffusion"
10
+ ],
7
11
  "architecture": "StableDiffusion3Pipeline",
8
12
  "framework_compatibility": {
9
13
  "vllm-omni": ">=0.14.0"
@@ -12,8 +16,7 @@
12
16
  "profiles": {
13
17
  "default": {
14
18
  "displayName": "SD3.5 Medium",
15
- "envVars": {},
16
- "recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
19
+ "envVars": {}
17
20
  }
18
21
  },
19
22
  "notes": "Stable Diffusion 3.5 medium model. Supported natively by vLLM-Omni StableDiffusion3Pipeline."
@@ -22,7 +25,11 @@
22
25
  "family": "flux",
23
26
  "chat_template": null,
24
27
  "gated": true,
25
- "tags": ["image-generation", "diffusion", "flux"],
28
+ "tags": [
29
+ "image-generation",
30
+ "diffusion",
31
+ "flux"
32
+ ],
26
33
  "architecture": "FluxPipeline",
27
34
  "framework_compatibility": {
28
35
  "vllm-omni": ">=0.14.0"
@@ -31,8 +38,7 @@
31
38
  "profiles": {
32
39
  "default": {
33
40
  "displayName": "FLUX.1 Dev",
34
- "envVars": {},
35
- "recommendedInstanceTypes": ["ml.g5.4xlarge", "ml.g5.12xlarge"]
41
+ "envVars": {}
36
42
  }
37
43
  },
38
44
  "notes": "FLUX.1-dev high-quality generation model. Uses dual text encoders (CLIP + T5) and FlowMatchEuler scheduler. Requires significant VRAM."
@@ -41,7 +47,11 @@
41
47
  "family": "flux",
42
48
  "chat_template": null,
43
49
  "gated": false,
44
- "tags": ["image-generation", "diffusion", "flux"],
50
+ "tags": [
51
+ "image-generation",
52
+ "diffusion",
53
+ "flux"
54
+ ],
45
55
  "architecture": "FluxPipeline",
46
56
  "framework_compatibility": {
47
57
  "vllm-omni": ">=0.14.0"
@@ -53,7 +63,11 @@
53
63
  "family": "wan",
54
64
  "chat_template": null,
55
65
  "gated": false,
56
- "tags": ["video-generation", "diffusion", "wan"],
66
+ "tags": [
67
+ "video-generation",
68
+ "diffusion",
69
+ "wan"
70
+ ],
57
71
  "architecture": "WanPipeline",
58
72
  "framework_compatibility": {
59
73
  "vllm-omni": ">=0.16.0"
@@ -65,7 +79,11 @@
65
79
  "family": "stable-diffusion",
66
80
  "chat_template": null,
67
81
  "gated": false,
68
- "tags": ["image-generation", "diffusion", "stable-diffusion"],
82
+ "tags": [
83
+ "image-generation",
84
+ "diffusion",
85
+ "stable-diffusion"
86
+ ],
69
87
  "architecture": null,
70
88
  "framework_compatibility": {
71
89
  "vllm-omni": ">=0.14.0"
@@ -77,7 +95,11 @@
77
95
  "family": "flux",
78
96
  "chat_template": null,
79
97
  "gated": false,
80
- "tags": ["image-generation", "diffusion", "flux"],
98
+ "tags": [
99
+ "image-generation",
100
+ "diffusion",
101
+ "flux"
102
+ ],
81
103
  "architecture": null,
82
104
  "framework_compatibility": {
83
105
  "vllm-omni": ">=0.14.0"
@@ -3,7 +3,11 @@
3
3
  "family": "gpt-oss",
4
4
  "chat_template": "",
5
5
  "gated": false,
6
- "tags": ["text-generation", "openai", "conversational"],
6
+ "tags": [
7
+ "text-generation",
8
+ "openai",
9
+ "conversational"
10
+ ],
7
11
  "architecture": "GPT2LMHeadModel",
8
12
  "framework_compatibility": {
9
13
  "vllm": ">=0.3.0",
@@ -17,7 +21,11 @@
17
21
  "family": "llama-2",
18
22
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
19
23
  "gated": true,
20
- "tags": ["text-generation", "llama-2", "conversational"],
24
+ "tags": [
25
+ "text-generation",
26
+ "llama-2",
27
+ "conversational"
28
+ ],
21
29
  "architecture": "LlamaForCausalLM",
22
30
  "framework_compatibility": {
23
31
  "vllm": ">=0.3.0",
@@ -31,8 +39,7 @@
31
39
  "envVars": {
32
40
  "MAX_MODEL_LEN": "4096",
33
41
  "GPU_MEMORY_UTILIZATION": "0.9"
34
- },
35
- "recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
42
+ }
36
43
  }
37
44
  },
38
45
  "notes": "Llama-2 7B chat model with official chat template. Requires HuggingFace authentication for download"
@@ -41,7 +48,11 @@
41
48
  "family": "llama-2",
42
49
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
43
50
  "gated": true,
44
- "tags": ["text-generation", "llama-2", "conversational"],
51
+ "tags": [
52
+ "text-generation",
53
+ "llama-2",
54
+ "conversational"
55
+ ],
45
56
  "architecture": "LlamaForCausalLM",
46
57
  "framework_compatibility": {
47
58
  "vllm": ">=0.3.0",
@@ -55,18 +66,20 @@
55
66
  "envVars": {
56
67
  "MAX_MODEL_LEN": "4096",
57
68
  "GPU_MEMORY_UTILIZATION": "0.9"
58
- },
59
- "recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
69
+ }
60
70
  }
61
71
  },
62
72
  "notes": "Llama-2 13B chat model. Requires more GPU memory than 7B variant"
63
73
  },
64
-
65
74
  "meta-llama/Llama-2-70b-chat-hf": {
66
75
  "family": "llama-2",
67
76
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
68
77
  "gated": true,
69
- "tags": ["text-generation", "llama-2", "conversational"],
78
+ "tags": [
79
+ "text-generation",
80
+ "llama-2",
81
+ "conversational"
82
+ ],
70
83
  "architecture": "LlamaForCausalLM",
71
84
  "framework_compatibility": {
72
85
  "vllm": ">=0.3.0",
@@ -81,8 +94,7 @@
81
94
  "TENSOR_PARALLEL_SIZE": "2",
82
95
  "MAX_MODEL_LEN": "4096",
83
96
  "GPU_MEMORY_UTILIZATION": "0.95"
84
- },
85
- "recommendedInstanceTypes": ["ml.g5.12xlarge"]
97
+ }
86
98
  },
87
99
  "70b-tp4": {
88
100
  "displayName": "Llama-2 70B (4-GPU)",
@@ -90,8 +102,7 @@
90
102
  "TENSOR_PARALLEL_SIZE": "4",
91
103
  "MAX_MODEL_LEN": "4096",
92
104
  "GPU_MEMORY_UTILIZATION": "0.9"
93
- },
94
- "recommendedInstanceTypes": ["ml.g5.12xlarge", "ml.g5.48xlarge"]
105
+ }
95
106
  }
96
107
  },
97
108
  "notes": "Llama-2 70B requires tensor parallelism across multiple GPUs"
@@ -100,7 +111,11 @@
100
111
  "family": "mistral",
101
112
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
102
113
  "gated": false,
103
- "tags": ["text-generation", "mistral", "conversational"],
114
+ "tags": [
115
+ "text-generation",
116
+ "mistral",
117
+ "conversational"
118
+ ],
104
119
  "architecture": "MistralForCausalLM",
105
120
  "framework_compatibility": {
106
121
  "vllm": ">=0.3.0",
@@ -114,8 +129,7 @@
114
129
  "envVars": {
115
130
  "MAX_MODEL_LEN": "8192",
116
131
  "GPU_MEMORY_UTILIZATION": "0.9"
117
- },
118
- "recommendedInstanceTypes": ["ml.g5.xlarge", "ml.g5.2xlarge"]
132
+ }
119
133
  }
120
134
  },
121
135
  "notes": "Mistral 7B v0.1 with 8K context window"
@@ -124,7 +138,11 @@
124
138
  "family": "mistral",
125
139
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
126
140
  "gated": false,
127
- "tags": ["text-generation", "mistral", "conversational"],
141
+ "tags": [
142
+ "text-generation",
143
+ "mistral",
144
+ "conversational"
145
+ ],
128
146
  "architecture": "MistralForCausalLM",
129
147
  "framework_compatibility": {
130
148
  "vllm": ">=0.3.0",
@@ -138,8 +156,7 @@
138
156
  "envVars": {
139
157
  "MAX_MODEL_LEN": "32768",
140
158
  "GPU_MEMORY_UTILIZATION": "0.9"
141
- },
142
- "recommendedInstanceTypes": ["ml.g5.2xlarge", "ml.g5.4xlarge"]
159
+ }
143
160
  }
144
161
  },
145
162
  "notes": "Mistral 7B v0.2 with extended 32K context window. Requires more memory for long contexts"
@@ -148,7 +165,11 @@
148
165
  "family": "mistral",
149
166
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
150
167
  "gated": false,
151
- "tags": ["text-generation", "mistral", "mixture-of-experts"],
168
+ "tags": [
169
+ "text-generation",
170
+ "mistral",
171
+ "mixture-of-experts"
172
+ ],
152
173
  "architecture": "MixtralForCausalLM",
153
174
  "framework_compatibility": {
154
175
  "vllm": ">=0.3.0",
@@ -163,8 +184,7 @@
163
184
  "TENSOR_PARALLEL_SIZE": "2",
164
185
  "MAX_MODEL_LEN": "32768",
165
186
  "GPU_MEMORY_UTILIZATION": "0.95"
166
- },
167
- "recommendedInstanceTypes": ["ml.g5.12xlarge"]
187
+ }
168
188
  }
169
189
  },
170
190
  "notes": "Mixtral 8x7B MoE model. Requires tensor parallelism for efficient inference"
@@ -173,7 +193,10 @@
173
193
  "family": "llama-2",
174
194
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
175
195
  "gated": true,
176
- "tags": ["text-generation", "llama-2"],
196
+ "tags": [
197
+ "text-generation",
198
+ "llama-2"
199
+ ],
177
200
  "architecture": null,
178
201
  "framework_compatibility": {
179
202
  "vllm": ">=0.3.0",
@@ -187,7 +210,10 @@
187
210
  "family": "mistral",
188
211
  "chat_template": "{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
189
212
  "gated": false,
190
- "tags": ["text-generation", "mistral"],
213
+ "tags": [
214
+ "text-generation",
215
+ "mistral"
216
+ ],
191
217
  "architecture": null,
192
218
  "framework_compatibility": {
193
219
  "vllm": ">=0.3.0",
@@ -201,7 +227,11 @@
201
227
  "family": "codellama",
202
228
  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '[INST] <<SYS>>\\n' + message['content'] + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' }}{% endif %}{% endfor %}",
203
229
  "gated": false,
204
- "tags": ["text-generation", "code", "codellama"],
230
+ "tags": [
231
+ "text-generation",
232
+ "code",
233
+ "codellama"
234
+ ],
205
235
  "architecture": null,
206
236
  "framework_compatibility": {
207
237
  "vllm": ">=0.3.0",
@@ -214,7 +244,10 @@
214
244
  "family": "falcon",
215
245
  "chat_template": null,
216
246
  "gated": false,
217
- "tags": ["text-generation", "falcon"],
247
+ "tags": [
248
+ "text-generation",
249
+ "falcon"
250
+ ],
218
251
  "architecture": null,
219
252
  "framework_compatibility": {
220
253
  "vllm": ">=0.3.0",
@@ -1,38 +1,38 @@
1
1
  [
2
2
  {
3
- "image": "python:3.12-slim",
3
+ "image": "public.ecr.aws/docker/library/python:3.12-slim",
4
4
  "tag": "3.12-slim",
5
5
  "architecture": "amd64",
6
6
  "created": "2024-10-01T00:00:00Z",
7
7
  "labels": { "python_version": "3.12" },
8
- "registry": "dockerhub",
9
- "repository": "python"
8
+ "registry": "ecr-public",
9
+ "repository": "docker/library/python"
10
10
  },
11
11
  {
12
- "image": "python:3.11-slim",
12
+ "image": "public.ecr.aws/docker/library/python:3.11-slim",
13
13
  "tag": "3.11-slim",
14
14
  "architecture": "amd64",
15
15
  "created": "2023-10-01T00:00:00Z",
16
16
  "labels": { "python_version": "3.11" },
17
- "registry": "dockerhub",
18
- "repository": "python"
17
+ "registry": "ecr-public",
18
+ "repository": "docker/library/python"
19
19
  },
20
20
  {
21
- "image": "python:3.10-slim",
21
+ "image": "public.ecr.aws/docker/library/python:3.10-slim",
22
22
  "tag": "3.10-slim",
23
23
  "architecture": "amd64",
24
24
  "created": "2022-10-01T00:00:00Z",
25
25
  "labels": { "python_version": "3.10" },
26
- "registry": "dockerhub",
27
- "repository": "python"
26
+ "registry": "ecr-public",
27
+ "repository": "docker/library/python"
28
28
  },
29
29
  {
30
- "image": "python:3.9-slim",
30
+ "image": "public.ecr.aws/docker/library/python:3.9-slim",
31
31
  "tag": "3.9-slim",
32
32
  "architecture": "amd64",
33
33
  "created": "2021-10-01T00:00:00Z",
34
34
  "labels": { "python_version": "3.9" },
35
- "registry": "dockerhub",
36
- "repository": "python"
35
+ "registry": "ecr-public",
36
+ "repository": "docker/library/python"
37
37
  }
38
38
  ]
@@ -63,12 +63,6 @@
63
63
  },
64
64
  "inferenceAmiVersion": {
65
65
  "type": "string"
66
- },
67
- "recommendedInstanceTypes": {
68
- "type": "array",
69
- "items": {
70
- "type": "string"
71
- }
72
66
  }
73
67
  },
74
68
  "additionalProperties": false
@@ -142,12 +136,6 @@
142
136
  "type": "string"
143
137
  }
144
138
  },
145
- "recommendedInstanceTypes": {
146
- "type": "array",
147
- "items": {
148
- "type": "string"
149
- }
150
- },
151
139
  "notes": {
152
140
  "type": "string"
153
141
  }
@@ -157,6 +145,12 @@
157
145
  },
158
146
  "notes": {
159
147
  "type": "string"
148
+ },
149
+ "supportedModelTypes": {
150
+ "type": "array",
151
+ "items": {
152
+ "type": "string"
153
+ }
160
154
  }
161
155
  },
162
156
  "additionalProperties": false
@@ -92,6 +92,35 @@
92
92
  },
93
93
  "notes": {
94
94
  "type": "string"
95
+ },
96
+ "gpuMemoryGb": {
97
+ "oneOf": [
98
+ {
99
+ "type": "number",
100
+ "minimum": 0
101
+ },
102
+ {
103
+ "type": "null"
104
+ }
105
+ ]
106
+ },
107
+ "gpuType": {
108
+ "oneOf": [
109
+ {
110
+ "type": "string"
111
+ },
112
+ {
113
+ "type": "null"
114
+ }
115
+ ]
116
+ },
117
+ "costTier": {
118
+ "type": "string",
119
+ "enum": [
120
+ "low",
121
+ "medium",
122
+ "high"
123
+ ]
95
124
  }
96
125
  },
97
126
  "additionalProperties": false
@@ -21,8 +21,12 @@
21
21
  },
22
22
  "chat_template": {
23
23
  "oneOf": [
24
- { "type": "string" },
25
- { "type": "null" }
24
+ {
25
+ "type": "string"
26
+ },
27
+ {
28
+ "type": "null"
29
+ }
26
30
  ]
27
31
  },
28
32
  "gated": {
@@ -36,8 +40,12 @@
36
40
  },
37
41
  "architecture": {
38
42
  "oneOf": [
39
- { "type": "string" },
40
- { "type": "null" }
43
+ {
44
+ "type": "string"
45
+ },
46
+ {
47
+ "type": "null"
48
+ }
41
49
  ]
42
50
  },
43
51
  "framework_compatibility": {
@@ -74,12 +82,6 @@
74
82
  "additionalProperties": {
75
83
  "type": "string"
76
84
  }
77
- },
78
- "recommendedInstanceTypes": {
79
- "type": "array",
80
- "items": {
81
- "type": "string"
82
- }
83
85
  }
84
86
  },
85
87
  "additionalProperties": false
@@ -0,0 +1,129 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "unified-model-catalog.schema.json",
4
+ "type": "object",
5
+ "minProperties": 1,
6
+ "additionalProperties": {
7
+ "type": "object",
8
+ "required": [
9
+ "architecture",
10
+ "tasks",
11
+ "modelType"
12
+ ],
13
+ "properties": {
14
+ "architecture": {
15
+ "oneOf": [
16
+ {
17
+ "type": "string",
18
+ "minLength": 1
19
+ },
20
+ {
21
+ "type": "null"
22
+ }
23
+ ]
24
+ },
25
+ "tasks": {
26
+ "type": "array",
27
+ "items": {
28
+ "type": "string"
29
+ },
30
+ "minItems": 1
31
+ },
32
+ "modelType": {
33
+ "type": "string",
34
+ "enum": [
35
+ "transformer",
36
+ "diffusor",
37
+ "predictor"
38
+ ]
39
+ },
40
+ "family": {
41
+ "type": "string"
42
+ },
43
+ "parameterCount": {
44
+ "type": "integer"
45
+ },
46
+ "defaultDtype": {
47
+ "type": "string",
48
+ "enum": [
49
+ "float32",
50
+ "float16",
51
+ "bfloat16",
52
+ "int8",
53
+ "int4"
54
+ ]
55
+ },
56
+ "maxPositionEmbeddings": {
57
+ "type": "integer"
58
+ },
59
+ "recommendedQuantizations": {
60
+ "type": "array",
61
+ "items": {
62
+ "type": "string"
63
+ }
64
+ },
65
+ "chatTemplate": {
66
+ "oneOf": [
67
+ {
68
+ "type": "string"
69
+ },
70
+ {
71
+ "type": "null"
72
+ }
73
+ ]
74
+ },
75
+ "gated": {
76
+ "type": "boolean"
77
+ },
78
+ "tags": {
79
+ "type": "array",
80
+ "items": {
81
+ "type": "string"
82
+ }
83
+ },
84
+ "frameworkCompatibility": {
85
+ "type": "object",
86
+ "additionalProperties": {
87
+ "type": "string"
88
+ }
89
+ },
90
+ "validationLevel": {
91
+ "type": "string",
92
+ "enum": [
93
+ "tested",
94
+ "community-validated",
95
+ "experimental",
96
+ "untested"
97
+ ]
98
+ },
99
+ "profiles": {
100
+ "type": "object",
101
+ "additionalProperties": {
102
+ "type": "object",
103
+ "required": [
104
+ "displayName"
105
+ ],
106
+ "properties": {
107
+ "displayName": {
108
+ "type": "string"
109
+ },
110
+ "envVars": {
111
+ "type": "object",
112
+ "additionalProperties": {
113
+ "type": "string"
114
+ }
115
+ }
116
+ },
117
+ "additionalProperties": false
118
+ }
119
+ },
120
+ "notes": {
121
+ "type": "string"
122
+ },
123
+ "pipeline": {
124
+ "type": "string"
125
+ }
126
+ },
127
+ "additionalProperties": false
128
+ }
129
+ }
@@ -195,11 +195,12 @@ class HuggingFaceResolver extends ModelResolver {
195
195
  }
196
196
 
197
197
  // Fetch model config (conditional)
198
- if (!fields || fields.includes('architecture')) {
198
+ if (!fields || fields.includes('architecture') || fields.includes('model_type')) {
199
199
  const modelConfig = await this._fetchJson(
200
200
  `${this.baseUrl}/${modelId}/resolve/main/config.json`
201
201
  )
202
202
  metadata.architecture = modelConfig?.architectures?.[0] || null
203
+ metadata.model_type = modelConfig?.model_type || null
203
204
  }
204
205
 
205
206
  return Object.keys(metadata).length > 0 ? metadata : null
@@ -1433,9 +1434,8 @@ let POPULAR_MODELS_CATALOG
1433
1434
 
1434
1435
  try {
1435
1436
  POPULAR_MODELS_CATALOG = {
1436
- ...loadCatalog('./catalogs/popular-transformers.json'),
1437
- ...loadCatalog('./catalogs/popular-diffusors.json'),
1438
- ...loadCatalog('./catalogs/jumpstart-public.json')
1437
+ ...loadCatalog('../lib/catalogs/models.json'),
1438
+ ...loadCatalog('../lib/catalogs/jumpstart-public.json')
1439
1439
  }
1440
1440
  } catch (err) {
1441
1441
  process.stderr.write(`[model-picker] Fatal: ${err.message}\n`)
@@ -8,9 +8,8 @@
8
8
  "discover": true
9
9
  },
10
10
  "catalogs": {
11
- "popular-transformers": "./catalogs/popular-transformers.json",
12
- "popular-diffusors": "./catalogs/popular-diffusors.json",
13
- "jumpstart-public": "./catalogs/jumpstart-public.json"
11
+ "models": "../lib/catalogs/models.json",
12
+ "jumpstart-public": "../lib/catalogs/jumpstart-public.json"
14
13
  },
15
14
  "tool": {
16
15
  "name": "get_models"
@@ -64,7 +64,7 @@ let AWS_REGIONS
64
64
  let VALID_REGION_CODES
65
65
 
66
66
  try {
67
- AWS_REGIONS = loadCatalog('./catalogs/regions.json')
67
+ AWS_REGIONS = loadCatalog('../lib/catalogs/regions.json')
68
68
  VALID_REGION_CODES = new Set(AWS_REGIONS.map(r => r.code))
69
69
  } catch (err) {
70
70
  process.stderr.write(`[region-picker] Fatal: ${err.message}\n`)
@@ -8,7 +8,7 @@
8
8
  "discover": false
9
9
  },
10
10
  "catalogs": {
11
- "regions": "./catalogs/regions.json"
11
+ "regions": "../lib/catalogs/regions.json"
12
12
  },
13
13
  "tool": {
14
14
  "name": "get_regions"