npm - @aws/ml-container-creator - Versions diffs - 0.2.5 → 0.3.0 - Mend

@aws/ml-container-creator 0.2.5 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/bin/cli.js +45 -4
package/config/bootstrap-stack.json +14 -0
package/infra/ci-harness/package-lock.json +22 -9
package/package.json +7 -8
package/servers/base-image-picker/index.js +3 -3
package/servers/base-image-picker/manifest.json +4 -2
package/servers/instance-sizer/index.js +564 -0
package/servers/instance-sizer/lib/instance-ranker.js +270 -0
package/servers/instance-sizer/lib/model-resolver.js +269 -0
package/servers/instance-sizer/lib/vram-estimator.js +177 -0
package/servers/instance-sizer/manifest.json +17 -0
package/servers/instance-sizer/package.json +15 -0
package/servers/{instance-recommender → lib}/catalogs/instances.json +136 -34
package/servers/{base-image-picker → lib}/catalogs/model-servers.json +302 -254
package/servers/lib/catalogs/model-sizes.json +131 -0
package/servers/lib/catalogs/models.json +632 -0
package/servers/{model-picker → lib}/catalogs/popular-diffusors.json +32 -10
package/servers/{model-picker → lib}/catalogs/popular-transformers.json +59 -26
package/servers/{base-image-picker → lib}/catalogs/python-slim.json +12 -12
package/servers/lib/schemas/image-catalog.schema.json +6 -12
package/servers/lib/schemas/instances.schema.json +29 -0
package/servers/lib/schemas/model-catalog.schema.json +12 -10
package/servers/lib/schemas/unified-model-catalog.schema.json +129 -0
package/servers/model-picker/index.js +4 -4
package/servers/model-picker/manifest.json +2 -3
package/servers/region-picker/index.js +1 -1
package/servers/region-picker/manifest.json +1 -1
package/src/app.js +36 -0
package/src/lib/architecture-sync.js +171 -0
package/src/lib/arn-detection.js +22 -0
package/src/lib/bootstrap-command-handler.js +120 -0
package/src/lib/cli-handler.js +3 -3
package/src/lib/config-manager.js +47 -1
package/src/lib/configuration-manager.js +2 -2
package/src/lib/cross-cutting-checker.js +460 -0
package/src/lib/deployment-entry-schema.js +1 -2
package/src/lib/dry-run-validator.js +78 -0
package/src/lib/generation-validator.js +102 -0
package/src/lib/mcp-validator-config.js +89 -0
package/src/lib/payload-builder.js +153 -0
package/src/lib/prompt-runner.js +866 -149
package/src/lib/prompts.js +2 -2
package/src/lib/registry-command-handler.js +236 -0
package/src/lib/registry-loader.js +5 -5
package/src/lib/schema-sync.js +203 -0
package/src/lib/schema-validation-engine.js +195 -0
package/src/lib/secret-classification.js +56 -0
package/src/lib/secrets-command-handler.js +550 -0
package/src/lib/service-model-parser.js +102 -0
package/src/lib/validate-runner.js +216 -0
package/src/lib/validation-report.js +140 -0
package/src/lib/validators/base-validator.js +36 -0
package/src/lib/validators/catalog-validator.js +177 -0
package/src/lib/validators/enum-validator.js +120 -0
package/src/lib/validators/required-field-validator.js +150 -0
package/src/lib/validators/type-validator.js +313 -0
package/src/prompt-adapter.js +3 -2
package/templates/Dockerfile +1 -1
package/templates/do/build +37 -5
package/templates/do/config +15 -3
package/templates/do/deploy +60 -5
package/templates/do/logs +18 -3
package/templates/do/run +15 -1
package/templates/do/validate +61 -0
package/servers/instance-recommender/LICENSE +0 -202
package/servers/instance-recommender/index.js +0 -284
package/servers/instance-recommender/manifest.json +0 -16
package/servers/instance-recommender/package.json +0 -15
/package/servers/{model-picker → lib}/catalogs/jumpstart-public.json +0 -0
/package/servers/{region-picker → lib}/catalogs/regions.json +0 -0
/package/servers/{base-image-picker → lib}/catalogs/triton-backends.json +0 -0
/package/servers/{base-image-picker → lib}/catalogs/triton.json +0 -0

package/servers/instance-sizer/lib/instance-ranker.js ADDED Viewed

@@ -0,0 +1,270 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Instance Filter & Ranker
+ *
+ * Filters and ranks SageMaker instances by compatibility with a model's
+ * VRAM requirement. Considers tensor parallelism for multi-GPU instances
+ * and applies cost-efficiency ranking within each TP tier.
+ */
+// ── Constants ────────────────────────────────────────────────────────────────
+/**
+ * GPU memory per chip (in GB) by hardware type.
+ * Used when the catalog doesn't have a direct gpuMemoryGb field.
+ */
+const GPU_MEMORY_MAP = {
+    'NVIDIA T4': 16,
+    'NVIDIA A10G': 24,
+    'NVIDIA V100': 16,
+    'NVIDIA L4': 24,
+    'NVIDIA A100': 40,
+    'NVIDIA H100': 80,
+    'AWS Inferentia2': 32,
+    'AWS Trainium': 32
+}
+/**
+ * Cost tier classification by instance family.
+ */
+const COST_TIER_MAP = {
+    'g4dn': 'low',
+    'inf2': 'low',
+    'g5': 'medium',
+    'g6': 'medium',
+    'trn1': 'medium',
+    'p3': 'high',
+    'p4d': 'high',
+    'p4de': 'high',
+    'p5': 'high'
+}
+/**
+ * Relative cost weight by tier for sorting within TP groups.
+ * Lower is better (more cost-efficient).
+ */
+const COST_TIER_WEIGHT = {
+    'low': 1,
+    'medium': 2,
+    'high': 3
+}
+/**
+ * Generation weight by instance family.
+ * Lower is newer (sorted first). Newer generations offer better perf/$.
+ */
+const GENERATION_WEIGHT = {
+    'g6': 1,
+    'p5': 1,
+    'trn1': 2,
+    'inf2': 2,
+    'g5': 3,
+    'p4de': 4,
+    'p4d': 4,
+    'p3': 5,
+    'g4dn': 6
+}
+/**
+ * TP overhead penalty: 10% per additional GPU beyond the first.
+ * Effective VRAM = totalVram × (1 - 0.10 × (gpuCount - 1))
+ */
+const TP_OVERHEAD_PER_GPU = 0.10
+// ── Helper Functions ─────────────────────────────────────────────────────────
+/**
+ * Extract per-GPU memory in GB from an instance catalog entry.
+ *
+ * Tries these approaches in order:
+ * 1. Direct gpuMemoryGb field (if catalog has been extended)
+ * 2. Parse from accelerator string (e.g., "4x A10G 96GB" → 24 per GPU)
+ * 3. Lookup by hardware type from GPU_MEMORY_MAP
+ *
+ * @param {object} instance - Instance catalog entry
+ * @returns {number|null} Per-GPU memory in GB, or null if not determinable
+ */
+const getPerGpuMemoryGb = (instance) => {
+    // 1. Direct field
+    if (instance.gpuMemoryGb) {
+        return instance.gpuMemoryGb
+    }
+    // 2. Parse from accelerator string
+    if (instance.accelerator) {
+        // Match patterns like "A10G 24GB", "4x A10G 96GB", "T4 16GB"
+        const totalMatch = instance.accelerator.match(/(\d+)GB/)
+        if (totalMatch) {
+            const totalGb = parseInt(totalMatch[1], 10)
+            const gpuCount = instance.gpus || 1
+            // If the string has a multiplier prefix like "4x", the GB is total
+            const hasMultiplier = instance.accelerator.match(/^(\d+)x\s/)
+            if (hasMultiplier) {
+                return totalGb / gpuCount
+            }
+            // Single GPU entry — the GB value is per-GPU
+            return totalGb
+        }
+    }
+    // 3. Lookup by hardware type
+    if (instance.hardware && GPU_MEMORY_MAP[instance.hardware]) {
+        return GPU_MEMORY_MAP[instance.hardware]
+    }
+    return null
+}
+/**
+ * Determine cost tier for an instance based on its family.
+ *
+ * @param {object} instance - Instance catalog entry
+ * @returns {string} 'low', 'medium', or 'high'
+ */
+const getCostTier = (instance) => {
+    if (instance.costTier) {
+        return instance.costTier
+    }
+    const family = instance.family || ''
+    return COST_TIER_MAP[family] || 'medium'
+}
+/**
+ * Calculate effective VRAM available after TP overhead penalty.
+ *
+ * Each additional GPU beyond the first loses 10% of its per-GPU capacity
+ * to communication overhead. The first GPU contributes its full capacity.
+ *
+ * Formula: perGpuMemory + (gpuCount - 1) × perGpuMemory × (1 - TP_OVERHEAD_PER_GPU)
+ * Simplified: perGpuMemory × (1 + (gpuCount - 1) × 0.9)
+ * Or equivalently: totalVram - perGpuMemory × 0.10 × (gpuCount - 1)
+ *
+ * @param {number} totalVramGb - Total GPU VRAM in GB
+ * @param {number} gpuCount - Number of GPUs (TP degree)
+ * @returns {number} Effective usable VRAM in GB
+ */
+const effectiveVram = (totalVramGb, gpuCount) => {
+    if (gpuCount <= 1) return totalVramGb
+    const perGpuMemory = totalVramGb / gpuCount
+    const overhead = perGpuMemory * TP_OVERHEAD_PER_GPU * (gpuCount - 1)
+    return totalVramGb - overhead
+}
+// ── Main Function ────────────────────────────────────────────────────────────
+/**
+ * Filter and rank instances by compatibility with VRAM requirement.
+ *
+ * @param {number} vramRequired - Required VRAM in GB
+ * @param {object} instanceCatalog - Object keyed by instance type, values are metadata
+ * @param {object} [options={}]
+ * @param {number} [options.limit=10] - Max results to return
+ * @param {boolean} [options.allowTensorParallelism=true] - Consider multi-GPU splits
+ * @returns {object[]} Ranked list of compatible instances
+ */
+const filterAndRankInstances = (vramRequired, instanceCatalog, options = {}) => {
+    const { limit = 10, allowTensorParallelism = true } = options
+    if (!vramRequired || vramRequired <= 0) {
+        return []
+    }
+    if (!instanceCatalog || typeof instanceCatalog !== 'object') {
+        return []
+    }
+    const candidates = []
+    for (const [instanceType, meta] of Object.entries(instanceCatalog)) {
+        // Skip non-GPU instances
+        if (!meta.gpus || meta.gpus <= 0) continue
+        if (meta.category !== 'gpu') continue
+        const perGpuMemory = getPerGpuMemoryGb(meta)
+        if (!perGpuMemory) continue
+        const gpuCount = meta.gpus
+        const totalVramGb = perGpuMemory * gpuCount
+        // Determine if model fits on a single GPU
+        if (gpuCount === 1) {
+            if (perGpuMemory >= vramRequired) {
+                const utilizationPercent = Math.round((vramRequired / perGpuMemory) * 100)
+                candidates.push({
+                    instanceType,
+                    gpuCount,
+                    totalVramGb,
+                    utilizationPercent,
+                    tensorParallelism: 1,
+                    costTier: getCostTier(meta),
+                    family: meta.family || ''
+                })
+            }
+        } else if (allowTensorParallelism) {
+            // Multi-GPU: check if model fits with TP across all GPUs
+            const effectiveTotal = effectiveVram(totalVramGb, gpuCount)
+            if (effectiveTotal >= vramRequired) {
+                const utilizationPercent = Math.round((vramRequired / effectiveTotal) * 100)
+                candidates.push({
+                    instanceType,
+                    gpuCount,
+                    totalVramGb,
+                    utilizationPercent,
+                    tensorParallelism: gpuCount,
+                    costTier: getCostTier(meta),
+                    family: meta.family || ''
+                })
+            }
+        }
+    }
+    // Sort candidates by ranking criteria:
+    // 1. Single-GPU first (TP=1), then multi-GPU by lowest TP degree
+    // 2. Within each TP tier, newest generation first (g6 > g5 > g4dn)
+    // 3. Within same generation, sort by cost tier (lower is better)
+    // 4. Within same cost tier, prefer lower total VRAM (right-sized)
+    candidates.sort((a, b) => {
+        // Primary: TP degree (lower is better)
+        if (a.tensorParallelism !== b.tensorParallelism) {
+            return a.tensorParallelism - b.tensorParallelism
+        }
+        // Secondary: generation (newer is better — lower weight)
+        const genA = GENERATION_WEIGHT[a.family] || 4
+        const genB = GENERATION_WEIGHT[b.family] || 4
+        if (genA !== genB) {
+            return genA - genB
+        }
+        // Tertiary: cost tier (lower is better)
+        const costA = COST_TIER_WEIGHT[a.costTier] || 2
+        const costB = COST_TIER_WEIGHT[b.costTier] || 2
+        if (costA !== costB) {
+            return costA - costB
+        }
+        // Quaternary: prefer lower total VRAM (right-sized, less waste)
+        if (a.totalVramGb !== b.totalVramGb) {
+            return a.totalVramGb - b.totalVramGb
+        }
+        // Final tiebreaker: instance type name for deterministic ordering
+        return a.instanceType.localeCompare(b.instanceType)
+    })
+    return candidates.slice(0, limit)
+}
+export {
+    filterAndRankInstances,
+    getPerGpuMemoryGb,
+    getCostTier,
+    effectiveVram,
+    GPU_MEMORY_MAP,
+    COST_TIER_MAP,
+    COST_TIER_WEIGHT,
+    GENERATION_WEIGHT,
+    TP_OVERHEAD_PER_GPU
+}

package/servers/instance-sizer/lib/model-resolver.js ADDED Viewed

@@ -0,0 +1,269 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+/**
+ * Model Metadata Resolver
+ *
+ * Three-tier resolution strategy for model metadata:
+ * 1. Check model-sizes catalog (exact match or glob pattern match)
+ * 2. If discover mode enabled, fetch HuggingFace config.json
+ * 3. If neither available, return null (caller handles fallback)
+ */
+import { readFile } from 'node:fs/promises'
+import { fileURLToPath } from 'node:url'
+import { dirname, join } from 'node:path'
+// ── Constants ────────────────────────────────────────────────────────────────
+const __filename = fileURLToPath(import.meta.url)
+const __dirname = dirname(__filename)
+const DEFAULT_CATALOG_PATH = join(__dirname, '..', '..', 'lib', 'catalogs', 'models.json')
+const HUGGINGFACE_BASE_URL = 'https://huggingface.co'
+const HUGGINGFACE_TIMEOUT_MS = 5000
+// ── Glob Pattern Matching ────────────────────────────────────────────────────
+/**
+ * Simple glob pattern matcher supporting * wildcards.
+ * Case-insensitive matching.
+ *
+ * @param {string} pattern - Glob pattern (e.g., 'meta-llama/Llama-2-7b*')
+ * @param {string} text - Text to match against
+ * @returns {boolean} Whether the text matches the pattern
+ */
+const globMatch = (pattern, text) => {
+    const regexStr = pattern
+        .replace(/[.+^${}()|[\]\\]/g, '\\$&')
+        .replace(/\*/g, '.*')
+    const regex = new RegExp(`^${regexStr}$`, 'i')
+    return regex.test(text)
+}
+// ── Catalog Lookup ───────────────────────────────────────────────────────────
+/**
+ * Load the model-sizes catalog from disk.
+ *
+ * @param {string} [catalogPath] - Path to catalog JSON file
+ * @returns {Promise<object|null>} Parsed catalog or null on failure
+ */
+const loadCatalog = async (catalogPath) => {
+    try {
+        const raw = await readFile(catalogPath || DEFAULT_CATALOG_PATH, 'utf-8')
+        return JSON.parse(raw)
+    } catch {
+        return null
+    }
+}
+/**
+ * Look up a model in the catalog by exact match or glob pattern.
+ *
+ * @param {string} modelName - HuggingFace model ID or catalog key
+ * @param {object} catalog - Parsed catalog object (flat or with .models wrapper)
+ * @returns {object|null} Catalog entry or null if not found
+ */
+const catalogLookup = (modelName, catalog) => {
+    if (!catalog) {
+        return null
+    }
+    // Support both flat catalog (models.json) and wrapped format ({ models: {...} })
+    const models = catalog.models || catalog
+    // Try exact match first
+    if (models[modelName]) {
+        return models[modelName]
+    }
+    // Try glob pattern matching
+    for (const pattern of Object.keys(models)) {
+        if (globMatch(pattern, modelName)) {
+            return models[pattern]
+        }
+    }
+    return null
+}
+// ── HuggingFace API ──────────────────────────────────────────────────────────
+/**
+ * Fetch model config.json from HuggingFace Hub.
+ *
+ * @param {string} modelName - HuggingFace model ID (e.g., 'meta-llama/Llama-2-7b-chat-hf')
+ * @returns {Promise<object|null>} Parsed config or null on failure
+ */
+const fetchHuggingFaceConfig = async (modelName) => {
+    const url = `${HUGGINGFACE_BASE_URL}/${modelName}/resolve/main/config.json`
+    try {
+        const controller = new AbortController()
+        const timeout = setTimeout(() => controller.abort(), HUGGINGFACE_TIMEOUT_MS)
+        const response = await fetch(url, {
+            signal: controller.signal,
+            headers: { 'Accept': 'application/json' }
+        })
+        clearTimeout(timeout)
+        if (!response.ok) {
+            return null
+        }
+        return await response.json()
+    } catch {
+        return null
+    }
+}
+/**
+ * Estimate parameter count from architecture dimensions.
+ * Uses the approximation: hidden_size × num_hidden_layers × 12
+ *
+ * This accounts for:
+ * - Attention weights (Q, K, V, O projections = 4 × hidden_size²)
+ * - FFN weights (typically 8 × hidden_size²)
+ * - Embeddings and other components
+ *
+ * @param {object} config - HuggingFace config.json contents
+ * @returns {number|null} Estimated parameter count or null if dimensions unavailable
+ */
+const estimateParamsFromConfig = (config) => {
+    const hiddenSize = config.hidden_size
+    const numLayers = config.num_hidden_layers
+    if (!hiddenSize || !numLayers) {
+        return null
+    }
+    return hiddenSize * numLayers * 12
+}
+/**
+ * Extract model metadata from a HuggingFace config.json.
+ *
+ * @param {object} config - Parsed HuggingFace config.json
+ * @returns {object} Extracted metadata
+ */
+const extractFromHuggingFaceConfig = (config) => {
+    const parameterCount = config.num_parameters
+        ?? estimateParamsFromConfig(config)
+    const dtype = config.torch_dtype || 'float16'
+    const architecture = config.architectures?.[0] || 'unknown'
+    const maxPositionEmbeddings = config.max_position_embeddings || 4096
+    return {
+        parameterCount,
+        dtype,
+        architecture,
+        maxPositionEmbeddings,
+        source: 'huggingface_api'
+    }
+}
+// ── In-memory cache for discover mode ────────────────────────────────────────
+const discoverCache = new Map()
+// ── Protocol prefix detection ────────────────────────────────────────────────
+const PROTOCOL_PREFIXES = ['jumpstart://', 'jumpstart-hub://', 's3://', 'registry://']
+/**
+ * Check if a model name matches the HuggingFace org/model-name pattern.
+ * Must contain exactly one `/` and no protocol prefix.
+ *
+ * @param {string} modelName - Model identifier to check
+ * @returns {boolean} True if it matches the HuggingFace pattern
+ */
+const isHuggingFacePattern = (modelName) => {
+    if (!modelName || typeof modelName !== 'string') return false
+    // Must not have a protocol prefix
+    if (PROTOCOL_PREFIXES.some(prefix => modelName.startsWith(prefix))) return false
+    // Must contain exactly one `/` (org/model-name)
+    const slashCount = (modelName.match(/\//g) || []).length
+    return slashCount === 1
+}
+// ── Main Resolver ────────────────────────────────────────────────────────────
+/**
+ * Resolve model metadata from available sources.
+ *
+ * Three-tier resolution:
+ * 1. Check model-sizes catalog (exact match or pattern match)
+ * 2. If discover mode enabled AND model matches HuggingFace pattern, fetch config.json
+ * 3. If neither available, return null
+ *
+ * @param {string} modelName - HuggingFace model ID or catalog key
+ * @param {object} [options={}]
+ * @param {boolean} [options.discover=false] - Enable HuggingFace API lookups
+ * @param {string} [options.catalogPath] - Path to model-sizes catalog (for testing)
+ * @returns {Promise<{ parameterCount: number, dtype: string, architecture: string, maxPositionEmbeddings: number, source: string } | null>}
+ */
+const resolveModelMetadata = async (modelName, options = {}) => {
+    const { discover = false, catalogPath } = options
+    // Tier 1: Catalog lookup
+    const catalog = await loadCatalog(catalogPath)
+    const catalogEntry = catalogLookup(modelName, catalog)
+    if (catalogEntry) {
+        // Only use catalog entry if it has a usable parameterCount for VRAM estimation.
+        // If parameterCount is missing, fall through to HuggingFace API (tier 2).
+        if (catalogEntry.parameterCount) {
+            return {
+                parameterCount: catalogEntry.parameterCount,
+                dtype: catalogEntry.defaultDtype,
+                architecture: catalogEntry.architecture,
+                maxPositionEmbeddings: catalogEntry.maxPositionEmbeddings,
+                source: 'catalog'
+            }
+        }
+    }
+    // Tier 2: HuggingFace API (only in discover mode, only for org/model-name pattern)
+    if (discover && isHuggingFacePattern(modelName)) {
+        // Check in-memory cache first
+        if (discoverCache.has(modelName)) {
+            return discoverCache.get(modelName)
+        }
+        const config = await fetchHuggingFaceConfig(modelName)
+        if (config) {
+            const metadata = extractFromHuggingFaceConfig(config)
+            // Only return if we got a usable parameter count
+            if (metadata.parameterCount) {
+                // Cache for session duration
+                discoverCache.set(modelName, metadata)
+                return metadata
+            }
+        }
+    }
+    // Tier 3: No metadata available
+    return null
+}
+export {
+    resolveModelMetadata,
+    globMatch,
+    loadCatalog,
+    catalogLookup,
+    fetchHuggingFaceConfig,
+    estimateParamsFromConfig,
+    extractFromHuggingFaceConfig,
+    isHuggingFacePattern,
+    discoverCache,
+    PROTOCOL_PREFIXES,
+    DEFAULT_CATALOG_PATH,
+    HUGGINGFACE_BASE_URL,
+    HUGGINGFACE_TIMEOUT_MS
+}