@aws/ml-container-creator 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/bin/cli.js +45 -4
  2. package/config/bootstrap-stack.json +14 -0
  3. package/infra/ci-harness/package-lock.json +22 -9
  4. package/package.json +7 -8
  5. package/servers/base-image-picker/index.js +3 -3
  6. package/servers/base-image-picker/manifest.json +4 -2
  7. package/servers/instance-sizer/index.js +564 -0
  8. package/servers/instance-sizer/lib/instance-ranker.js +270 -0
  9. package/servers/instance-sizer/lib/model-resolver.js +269 -0
  10. package/servers/instance-sizer/lib/vram-estimator.js +177 -0
  11. package/servers/instance-sizer/manifest.json +17 -0
  12. package/servers/instance-sizer/package.json +15 -0
  13. package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
  14. package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
  15. package/servers/lib/catalogs/model-sizes.json +131 -0
  16. package/servers/lib/catalogs/models.json +632 -0
  17. package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
  18. package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
  19. package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
  20. package/servers/lib/schemas/image-catalog.schema.json +6 -12
  21. package/servers/lib/schemas/instances.schema.json +29 -0
  22. package/servers/lib/schemas/model-catalog.schema.json +12 -10
  23. package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
  24. package/servers/model-picker/index.js +4 -4
  25. package/servers/model-picker/manifest.json +2 -3
  26. package/servers/region-picker/index.js +1 -1
  27. package/servers/region-picker/manifest.json +1 -1
  28. package/src/app.js +36 -0
  29. package/src/lib/architecture-sync.js +171 -0
  30. package/src/lib/arn-detection.js +22 -0
  31. package/src/lib/bootstrap-command-handler.js +120 -0
  32. package/src/lib/cli-handler.js +3 -3
  33. package/src/lib/config-manager.js +47 -1
  34. package/src/lib/configuration-manager.js +2 -2
  35. package/src/lib/cross-cutting-checker.js +460 -0
  36. package/src/lib/deployment-entry-schema.js +1 -2
  37. package/src/lib/dry-run-validator.js +78 -0
  38. package/src/lib/generation-validator.js +102 -0
  39. package/src/lib/mcp-validator-config.js +89 -0
  40. package/src/lib/payload-builder.js +153 -0
  41. package/src/lib/prompt-runner.js +866 -149
  42. package/src/lib/prompts.js +2 -2
  43. package/src/lib/registry-command-handler.js +236 -0
  44. package/src/lib/registry-loader.js +5 -5
  45. package/src/lib/schema-sync.js +203 -0
  46. package/src/lib/schema-validation-engine.js +195 -0
  47. package/src/lib/secret-classification.js +56 -0
  48. package/src/lib/secrets-command-handler.js +550 -0
  49. package/src/lib/service-model-parser.js +102 -0
  50. package/src/lib/validate-runner.js +216 -0
  51. package/src/lib/validation-report.js +140 -0
  52. package/src/lib/validators/base-validator.js +36 -0
  53. package/src/lib/validators/catalog-validator.js +177 -0
  54. package/src/lib/validators/enum-validator.js +120 -0
  55. package/src/lib/validators/required-field-validator.js +150 -0
  56. package/src/lib/validators/type-validator.js +313 -0
  57. package/src/prompt-adapter.js +3 -2
  58. package/templates/Dockerfile +1 -1
  59. package/templates/do/build +37 -5
  60. package/templates/do/config +15 -3
  61. package/templates/do/deploy +60 -5
  62. package/templates/do/logs +18 -3
  63. package/templates/do/run +15 -1
  64. package/templates/do/validate +61 -0
  65. package/servers/instance-recommender/LICENSE +0 -202
  66. package/servers/instance-recommender/index.js +0 -284
  67. package/servers/instance-recommender/manifest.json +0 -16
  68. package/servers/instance-recommender/package.json +0 -15
  69. /package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
  70. /package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
  71. /package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
  72. /package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0
@@ -0,0 +1,270 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Instance Filter & Ranker
6
+ *
7
+ * Filters and ranks SageMaker instances by compatibility with a model's
8
+ * VRAM requirement. Considers tensor parallelism for multi-GPU instances
9
+ * and applies cost-efficiency ranking within each TP tier.
10
+ */
11
+
12
+ // ── Constants ────────────────────────────────────────────────────────────────
13
+
14
+ /**
15
+ * GPU memory per chip (in GB) by hardware type.
16
+ * Used when the catalog doesn't have a direct gpuMemoryGb field.
17
+ */
18
+ const GPU_MEMORY_MAP = {
19
+ 'NVIDIA T4': 16,
20
+ 'NVIDIA A10G': 24,
21
+ 'NVIDIA V100': 16,
22
+ 'NVIDIA L4': 24,
23
+ 'NVIDIA A100': 40,
24
+ 'NVIDIA H100': 80,
25
+ 'AWS Inferentia2': 32,
26
+ 'AWS Trainium': 32
27
+ }
28
+
29
+ /**
30
+ * Cost tier classification by instance family.
31
+ */
32
+ const COST_TIER_MAP = {
33
+ 'g4dn': 'low',
34
+ 'inf2': 'low',
35
+ 'g5': 'medium',
36
+ 'g6': 'medium',
37
+ 'trn1': 'medium',
38
+ 'p3': 'high',
39
+ 'p4d': 'high',
40
+ 'p4de': 'high',
41
+ 'p5': 'high'
42
+ }
43
+
44
+ /**
45
+ * Relative cost weight by tier for sorting within TP groups.
46
+ * Lower is better (more cost-efficient).
47
+ */
48
+ const COST_TIER_WEIGHT = {
49
+ 'low': 1,
50
+ 'medium': 2,
51
+ 'high': 3
52
+ }
53
+
54
+ /**
55
+ * Generation weight by instance family.
56
+ * Lower is newer (sorted first). Newer generations offer better perf/$.
57
+ */
58
+ const GENERATION_WEIGHT = {
59
+ 'g6': 1,
60
+ 'p5': 1,
61
+ 'trn1': 2,
62
+ 'inf2': 2,
63
+ 'g5': 3,
64
+ 'p4de': 4,
65
+ 'p4d': 4,
66
+ 'p3': 5,
67
+ 'g4dn': 6
68
+ }
69
+
70
+ /**
71
+ * TP overhead penalty: 10% per additional GPU beyond the first.
72
+ * Effective VRAM = totalVram × (1 - 0.10 × (gpuCount - 1))
73
+ */
74
+ const TP_OVERHEAD_PER_GPU = 0.10
75
+
76
+ // ── Helper Functions ─────────────────────────────────────────────────────────
77
+
78
+ /**
79
+ * Extract per-GPU memory in GB from an instance catalog entry.
80
+ *
81
+ * Tries these approaches in order:
82
+ * 1. Direct gpuMemoryGb field (if catalog has been extended)
83
+ * 2. Parse from accelerator string (e.g., "4x A10G 96GB" → 24 per GPU)
84
+ * 3. Lookup by hardware type from GPU_MEMORY_MAP
85
+ *
86
+ * @param {object} instance - Instance catalog entry
87
+ * @returns {number|null} Per-GPU memory in GB, or null if not determinable
88
+ */
89
+ const getPerGpuMemoryGb = (instance) => {
90
+ // 1. Direct field
91
+ if (instance.gpuMemoryGb) {
92
+ return instance.gpuMemoryGb
93
+ }
94
+
95
+ // 2. Parse from accelerator string
96
+ if (instance.accelerator) {
97
+ // Match patterns like "A10G 24GB", "4x A10G 96GB", "T4 16GB"
98
+ const totalMatch = instance.accelerator.match(/(\d+)GB/)
99
+ if (totalMatch) {
100
+ const totalGb = parseInt(totalMatch[1], 10)
101
+ const gpuCount = instance.gpus || 1
102
+ // If the string has a multiplier prefix like "4x", the GB is total
103
+ const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/)
104
+ if (hasMultiplier) {
105
+ return totalGb / gpuCount
106
+ }
107
+ // Single GPU entry — the GB value is per-GPU
108
+ return totalGb
109
+ }
110
+ }
111
+
112
+ // 3. Lookup by hardware type
113
+ if (instance.hardware && GPU_MEMORY_MAP[instance.hardware]) {
114
+ return GPU_MEMORY_MAP[instance.hardware]
115
+ }
116
+
117
+ return null
118
+ }
119
+
120
+ /**
121
+ * Determine cost tier for an instance based on its family.
122
+ *
123
+ * @param {object} instance - Instance catalog entry
124
+ * @returns {string} 'low', 'medium', or 'high'
125
+ */
126
+ const getCostTier = (instance) => {
127
+ if (instance.costTier) {
128
+ return instance.costTier
129
+ }
130
+ const family = instance.family || ''
131
+ return COST_TIER_MAP[family] || 'medium'
132
+ }
133
+
134
+ /**
135
+ * Calculate effective VRAM available after TP overhead penalty.
136
+ *
137
+ * Each additional GPU beyond the first loses 10% of its per-GPU capacity
138
+ * to communication overhead. The first GPU contributes its full capacity.
139
+ *
140
+ * Formula: perGpuMemory + (gpuCount - 1) × perGpuMemory × (1 - TP_OVERHEAD_PER_GPU)
141
+ * Simplified: perGpuMemory × (1 + (gpuCount - 1) × 0.9)
142
+ * Or equivalently: totalVram - perGpuMemory × 0.10 × (gpuCount - 1)
143
+ *
144
+ * @param {number} totalVramGb - Total GPU VRAM in GB
145
+ * @param {number} gpuCount - Number of GPUs (TP degree)
146
+ * @returns {number} Effective usable VRAM in GB
147
+ */
148
+ const effectiveVram = (totalVramGb, gpuCount) => {
149
+ if (gpuCount <= 1) return totalVramGb
150
+ const perGpuMemory = totalVramGb / gpuCount
151
+ const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1)
152
+ return totalVramGb - overhead
153
+ }
154
+
155
+ // ── Main Function ────────────────────────────────────────────────────────────
156
+
157
+ /**
158
+ * Filter and rank instances by compatibility with VRAM requirement.
159
+ *
160
+ * @param {number} vramRequired - Required VRAM in GB
161
+ * @param {object} instanceCatalog - Object keyed by instance type, values are metadata
162
+ * @param {object} [options={}]
163
+ * @param {number} [options.limit=10] - Max results to return
164
+ * @param {boolean} [options.allowTensorParallelism=true] - Consider multi-GPU splits
165
+ * @returns {object[]} Ranked list of compatible instances
166
+ */
167
+ const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) => {
168
+ const { limit = 10, allowTensorParallelism = true } = options
169
+
170
+ if (!vramRequired || vramRequired <= 0) {
171
+ return []
172
+ }
173
+
174
+ if (!instanceCatalog || typeof instanceCatalog !== 'object') {
175
+ return []
176
+ }
177
+
178
+ const candidates = []
179
+
180
+ for (const [instanceType, meta] of Object.entries(instanceCatalog)) {
181
+ // Skip non-GPU instances
182
+ if (!meta.gpus || meta.gpus <= 0) continue
183
+ if (meta.category !== 'gpu') continue
184
+
185
+ const perGpuMemory = getPerGpuMemoryGb(meta)
186
+ if (!perGpuMemory) continue
187
+
188
+ const gpuCount = meta.gpus
189
+ const totalVramGb = perGpuMemory * gpuCount
190
+
191
+ // Determine if model fits on a single GPU
192
+ if (gpuCount === 1) {
193
+ if (perGpuMemory >= vramRequired) {
194
+ const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100)
195
+ candidates.push({
196
+ instanceType,
197
+ gpuCount,
198
+ totalVramGb,
199
+ utilizationPercent,
200
+ tensorParallelism: 1,
201
+ costTier: getCostTier(meta),
202
+ family: meta.family || ''
203
+ })
204
+ }
205
+ } else if (allowTensorParallelism) {
206
+ // Multi-GPU: check if model fits with TP across all GPUs
207
+ const effectiveTotal = effectiveVram(totalVramGb, gpuCount)
208
+ if (effectiveTotal >= vramRequired) {
209
+ const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100)
210
+ candidates.push({
211
+ instanceType,
212
+ gpuCount,
213
+ totalVramGb,
214
+ utilizationPercent,
215
+ tensorParallelism: gpuCount,
216
+ costTier: getCostTier(meta),
217
+ family: meta.family || ''
218
+ })
219
+ }
220
+ }
221
+ }
222
+
223
+ // Sort candidates by ranking criteria:
224
+ // 1. Single-GPU first (TP=1), then multi-GPU by lowest TP degree
225
+ // 2. Within each TP tier, newest generation first (g6 > g5 > g4dn)
226
+ // 3. Within same generation, sort by cost tier (lower is better)
227
+ // 4. Within same cost tier, prefer lower total VRAM (right-sized)
228
+ candidates.sort((a, b) => {
229
+ // Primary: TP degree (lower is better)
230
+ if (a.tensorParallelism !== b.tensorParallelism) {
231
+ return a.tensorParallelism - b.tensorParallelism
232
+ }
233
+
234
+ // Secondary: generation (newer is better — lower weight)
235
+ const genA = GENERATION_WEIGHT[a.family] || 4
236
+ const genB = GENERATION_WEIGHT[b.family] || 4
237
+ if (genA !== genB) {
238
+ return genA - genB
239
+ }
240
+
241
+ // Tertiary: cost tier (lower is better)
242
+ const costA = COST_TIER_WEIGHT[a.costTier] || 2
243
+ const costB = COST_TIER_WEIGHT[b.costTier] || 2
244
+ if (costA !== costB) {
245
+ return costA - costB
246
+ }
247
+
248
+ // Quaternary: prefer lower total VRAM (right-sized, less waste)
249
+ if (a.totalVramGb !== b.totalVramGb) {
250
+ return a.totalVramGb - b.totalVramGb
251
+ }
252
+
253
+ // Final tiebreaker: instance type name for deterministic ordering
254
+ return a.instanceType.localeCompare(b.instanceType)
255
+ })
256
+
257
+ return candidates.slice(0, limit)
258
+ }
259
+
260
+ export {
261
+ filterAndRankInstances,
262
+ getPerGpuMemoryGb,
263
+ getCostTier,
264
+ effectiveVram,
265
+ GPU_MEMORY_MAP,
266
+ COST_TIER_MAP,
267
+ COST_TIER_WEIGHT,
268
+ GENERATION_WEIGHT,
269
+ TP_OVERHEAD_PER_GPU
270
+ }
@@ -0,0 +1,269 @@
1
+ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ // SPDX-License-Identifier: Apache-2.0
3
+
4
+ /**
5
+ * Model Metadata Resolver
6
+ *
7
+ * Three-tier resolution strategy for model metadata:
8
+ * 1. Check model-sizes catalog (exact match or glob pattern match)
9
+ * 2. If discover mode enabled, fetch HuggingFace config.json
10
+ * 3. If neither available, return null (caller handles fallback)
11
+ */
12
+
13
+ import { readFile } from 'node:fs/promises'
14
+ import { fileURLToPath } from 'node:url'
15
+ import { dirname, join } from 'node:path'
16
+
17
+ // ── Constants ────────────────────────────────────────────────────────────────
18
+
19
+ const __filename = fileURLToPath(import.meta.url)
20
+ const __dirname = dirname(__filename)
21
+
22
+ const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json')
23
+ const HUGGINGFACE_BASE_URL = 'https://huggingface.co'
24
+ const HUGGINGFACE_TIMEOUT_MS = 5000
25
+
26
+ // ── Glob Pattern Matching ────────────────────────────────────────────────────
27
+
28
+ /**
29
+ * Simple glob pattern matcher supporting * wildcards.
30
+ * Case-insensitive matching.
31
+ *
32
+ * @param {string} pattern - Glob pattern (e.g., 'meta-llama/Llama-2-7b*')
33
+ * @param {string} text - Text to match against
34
+ * @returns {boolean} Whether the text matches the pattern
35
+ */
36
+ const globMatch = (pattern, text) => {
37
+ const regexStr = pattern
38
+ .replace(/[.+^${}()|[\]\\]/g, '\\$&')
39
+ .replace(/\*/g, '.*')
40
+ const regex = new RegExp(`^${regexStr}$`, 'i')
41
+ return regex.test(text)
42
+ }
43
+
44
+ // ── Catalog Lookup ───────────────────────────────────────────────────────────
45
+
46
+ /**
47
+ * Load the model-sizes catalog from disk.
48
+ *
49
+ * @param {string} [catalogPath] - Path to catalog JSON file
50
+ * @returns {Promise<object|null>} Parsed catalog or null on failure
51
+ */
52
+ const loadCatalog = async (catalogPath) => {
53
+ try {
54
+ const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8')
55
+ return JSON.parse(raw)
56
+ } catch {
57
+ return null
58
+ }
59
+ }
60
+
61
+ /**
62
+ * Look up a model in the catalog by exact match or glob pattern.
63
+ *
64
+ * @param {string} modelName - HuggingFace model ID or catalog key
65
+ * @param {object} catalog - Parsed catalog object (flat or with .models wrapper)
66
+ * @returns {object|null} Catalog entry or null if not found
67
+ */
68
+ const catalogLookup = (modelName, catalog) => {
69
+ if (!catalog) {
70
+ return null
71
+ }
72
+
73
+ // Support both flat catalog (models.json) and wrapped format ({ models: {...} })
74
+ const models = catalog.models || catalog
75
+
76
+ // Try exact match first
77
+ if (models[modelName]) {
78
+ return models[modelName]
79
+ }
80
+
81
+ // Try glob pattern matching
82
+ for (const pattern of Object.keys(models)) {
83
+ if (globMatch(pattern, modelName)) {
84
+ return models[pattern]
85
+ }
86
+ }
87
+
88
+ return null
89
+ }
90
+
91
+ // ── HuggingFace API ──────────────────────────────────────────────────────────
92
+
93
+ /**
94
+ * Fetch model config.json from HuggingFace Hub.
95
+ *
96
+ * @param {string} modelName - HuggingFace model ID (e.g., 'meta-llama/Llama-2-7b-chat-hf')
97
+ * @returns {Promise<object|null>} Parsed config or null on failure
98
+ */
99
+ const fetchHuggingFaceConfig = async (modelName) => {
100
+ const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`
101
+
102
+ try {
103
+ const controller = new AbortController()
104
+ const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS)
105
+
106
+ const response = await fetch(url, {
107
+ signal: controller.signal,
108
+ headers: { 'Accept': 'application/json' }
109
+ })
110
+
111
+ clearTimeout(timeout)
112
+
113
+ if (!response.ok) {
114
+ return null
115
+ }
116
+
117
+ return await response.json()
118
+ } catch {
119
+ return null
120
+ }
121
+ }
122
+
123
+ /**
124
+ * Estimate parameter count from architecture dimensions.
125
+ * Uses the approximation: hidden_size × num_hidden_layers × 12
126
+ *
127
+ * This accounts for:
128
+ * - Attention weights (Q, K, V, O projections = 4 × hidden_size²)
129
+ * - FFN weights (typically 8 × hidden_size²)
130
+ * - Embeddings and other components
131
+ *
132
+ * @param {object} config - HuggingFace config.json contents
133
+ * @returns {number|null} Estimated parameter count or null if dimensions unavailable
134
+ */
135
+ const estimateParamsFromConfig = (config) => {
136
+ const hiddenSize = config.hidden_size
137
+ const numLayers = config.num_hidden_layers
138
+
139
+ if (!hiddenSize || !numLayers) {
140
+ return null
141
+ }
142
+
143
+ return hiddenSize * numLayers * 12
144
+ }
145
+
146
+ /**
147
+ * Extract model metadata from a HuggingFace config.json.
148
+ *
149
+ * @param {object} config - Parsed HuggingFace config.json
150
+ * @returns {object} Extracted metadata
151
+ */
152
+ const extractFromHuggingFaceConfig = (config) => {
153
+ const parameterCount = config.num_parameters
154
+ ?? estimateParamsFromConfig(config)
155
+
156
+ const dtype = config.torch_dtype || 'float16'
157
+ const architecture = config.architectures?.[0] || 'unknown'
158
+ const maxPositionEmbeddings = config.max_position_embeddings || 4096
159
+
160
+ return {
161
+ parameterCount,
162
+ dtype,
163
+ architecture,
164
+ maxPositionEmbeddings,
165
+ source: 'huggingface_api'
166
+ }
167
+ }
168
+
169
+ // ── In-memory cache for discover mode ────────────────────────────────────────
170
+
171
+ const discoverCache = new Map()
172
+
173
+ // ── Protocol prefix detection ────────────────────────────────────────────────
174
+
175
+ const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://']
176
+
177
+ /**
178
+ * Check if a model name matches the HuggingFace org/model-name pattern.
179
+ * Must contain exactly one `/` and no protocol prefix.
180
+ *
181
+ * @param {string} modelName - Model identifier to check
182
+ * @returns {boolean} True if it matches the HuggingFace pattern
183
+ */
184
+ const isHuggingFacePattern = (modelName) => {
185
+ if (!modelName || typeof modelName !== 'string') return false
186
+ // Must not have a protocol prefix
187
+ if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false
188
+ // Must contain exactly one `/` (org/model-name)
189
+ const slashCount = (modelName.match(/\//g) || []).length
190
+ return slashCount === 1
191
+ }
192
+
193
+ // ── Main Resolver ────────────────────────────────────────────────────────────
194
+
195
+ /**
196
+ * Resolve model metadata from available sources.
197
+ *
198
+ * Three-tier resolution:
199
+ * 1. Check model-sizes catalog (exact match or pattern match)
200
+ * 2. If discover mode enabled AND model matches HuggingFace pattern, fetch config.json
201
+ * 3. If neither available, return null
202
+ *
203
+ * @param {string} modelName - HuggingFace model ID or catalog key
204
+ * @param {object} [options={}]
205
+ * @param {boolean} [options.discover=false] - Enable HuggingFace API lookups
206
+ * @param {string} [options.catalogPath] - Path to model-sizes catalog (for testing)
207
+ * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
208
+ */
209
+ const resolveModelMetadata = async (modelName, options = {}) => {
210
+ const { discover = false, catalogPath } = options
211
+
212
+ // Tier 1: Catalog lookup
213
+ const catalog = await loadCatalog(catalogPath)
214
+ const catalogEntry = catalogLookup(modelName, catalog)
215
+
216
+ if (catalogEntry) {
217
+ // Only use catalog entry if it has a usable parameterCount for VRAM estimation.
218
+ // If parameterCount is missing, fall through to HuggingFace API (tier 2).
219
+ if (catalogEntry.parameterCount) {
220
+ return {
221
+ parameterCount: catalogEntry.parameterCount,
222
+ dtype: catalogEntry.defaultDtype,
223
+ architecture: catalogEntry.architecture,
224
+ maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings,
225
+ source: 'catalog'
226
+ }
227
+ }
228
+ }
229
+
230
+ // Tier 2: HuggingFace API (only in discover mode, only for org/model-name pattern)
231
+ if (discover && isHuggingFacePattern(modelName)) {
232
+ // Check in-memory cache first
233
+ if (discoverCache.has(modelName)) {
234
+ return discoverCache.get(modelName)
235
+ }
236
+
237
+ const config = await fetchHuggingFaceConfig(modelName)
238
+
239
+ if (config) {
240
+ const metadata = extractFromHuggingFaceConfig(config)
241
+
242
+ // Only return if we got a usable parameter count
243
+ if (metadata.parameterCount) {
244
+ // Cache for session duration
245
+ discoverCache.set(modelName, metadata)
246
+ return metadata
247
+ }
248
+ }
249
+ }
250
+
251
+ // Tier 3: No metadata available
252
+ return null
253
+ }
254
+
255
+ export {
256
+ resolveModelMetadata,
257
+ globMatch,
258
+ loadCatalog,
259
+ catalogLookup,
260
+ fetchHuggingFaceConfig,
261
+ estimateParamsFromConfig,
262
+ extractFromHuggingFaceConfig,
263
+ isHuggingFacePattern,
264
+ discoverCache,
265
+ PROTOCOL_PREFIXES,
266
+ DEFAULT_CATALOG_PATH,
267
+ HUGGINGFACE_BASE_URL,
268
+ HUGGINGFACE_TIMEOUT_MS
269
+ }