npm - @aws/ml-container-creator - Versions diffs - 0.10.0 → 0.12.1 - Mend

@aws/ml-container-creator 0.10.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/LICENSE-THIRD-PARTY +9304 -0
package/bin/cli.js +2 -0
package/config/bootstrap-e2e-stack.json +341 -0
package/config/bootstrap-stack.json +40 -3
package/config/parameter-schema-v2.json +33 -22
package/config/tune-catalog.json +1781 -0
package/infra/ci-harness/buildspec.yml +1 -0
package/infra/ci-harness/lambda/path-prover/brain.ts +306 -0
package/infra/ci-harness/lambda/path-prover/write-results.ts +152 -0
package/infra/ci-harness/lib/ci-harness-stack.ts +851 -7
package/infra/ci-harness/state-machines/path-prover.asl.json +496 -0
package/package.json +53 -67
package/servers/base-image-picker/index.js +121 -121
package/servers/e2e-status/index.js +297 -0
package/servers/e2e-status/manifest.json +14 -0
package/servers/e2e-status/package.json +15 -0
package/servers/endpoint-picker/LICENSE +202 -0
package/servers/endpoint-picker/index.js +536 -0
package/servers/endpoint-picker/manifest.json +14 -0
package/servers/endpoint-picker/package.json +18 -0
package/servers/hyperpod-cluster-picker/index.js +125 -125
package/servers/instance-sizer/index.js +166 -153
package/servers/instance-sizer/lib/instance-ranker.js +120 -76
package/servers/instance-sizer/lib/model-resolver.js +61 -61
package/servers/instance-sizer/lib/quota-resolver.js +113 -113
package/servers/instance-sizer/lib/vram-estimator.js +31 -31
package/servers/lib/bedrock-client.js +38 -38
package/servers/lib/catalogs/instances.json +27 -0
package/servers/lib/catalogs/model-servers.json +201 -3
package/servers/lib/custom-validators.js +13 -13
package/servers/lib/dynamic-resolver.js +4 -4
package/servers/marketplace-picker/index.js +342 -0
package/servers/marketplace-picker/manifest.json +14 -0
package/servers/marketplace-picker/package.json +18 -0
package/servers/model-picker/index.js +382 -382
package/servers/region-picker/index.js +56 -56
package/servers/workload-picker/LICENSE +202 -0
package/servers/workload-picker/catalogs/workload-profiles.json +67 -0
package/servers/workload-picker/index.js +171 -0
package/servers/workload-picker/manifest.json +16 -0
package/servers/workload-picker/package.json +16 -0
package/src/app.js +12 -3
package/src/lib/bootstrap-command-handler.js +609 -15
package/src/lib/bootstrap-config.js +36 -0
package/src/lib/bootstrap-profile-manager.js +48 -41
package/src/lib/ci-register-helpers.js +74 -0
package/src/lib/config-loader.js +3 -0
package/src/lib/config-manager.js +7 -0
package/src/lib/config-validator.js +1 -1
package/src/lib/cuda-resolver.js +17 -8
package/src/lib/generated/cli-options.js +319 -314
package/src/lib/generated/parameter-matrix.js +672 -661
package/src/lib/generated/validation-rules.js +76 -72
package/src/lib/path-prover-brain.js +664 -0
package/src/lib/prompts/infrastructure-prompts.js +2 -2
package/src/lib/prompts/model-prompts.js +6 -0
package/src/lib/prompts/project-prompts.js +12 -0
package/src/lib/secrets-prompt-runner.js +4 -0
package/src/lib/template-manager.js +1 -1
package/src/lib/template-variable-resolver.js +87 -1
package/src/lib/tune-catalog-validator.js +37 -4
package/templates/Dockerfile +9 -0
package/templates/code/adapter_sidecar.py +444 -0
package/templates/code/serve +6 -0
package/templates/code/serve.d/vllm.ejs +1 -1
package/templates/do/.benchmark_writer.py +1476 -0
package/templates/do/.tune_helper.py +982 -57
package/templates/do/__pycache__/.benchmark_writer.cpython-312.pyc +0 -0
package/templates/do/adapter +154 -0
package/templates/do/benchmark +639 -85
package/templates/do/build +5 -0
package/templates/do/clean.d/async-inference.ejs +5 -0
package/templates/do/clean.d/batch-transform.ejs +5 -0
package/templates/do/clean.d/hyperpod-eks.ejs +5 -0
package/templates/do/clean.d/managed-inference.ejs +5 -0
package/templates/do/config +115 -45
package/templates/do/deploy.d/async-inference.ejs +30 -3
package/templates/do/deploy.d/batch-transform.ejs +29 -3
package/templates/do/deploy.d/hyperpod-eks.ejs +4 -0
package/templates/do/deploy.d/managed-inference.ejs +216 -14
package/templates/do/lib/endpoint-config.sh +1 -1
package/templates/do/lib/profile.sh +44 -0
package/templates/do/optimize +106 -37
package/templates/do/push +5 -0
package/templates/do/register +94 -0
package/templates/do/stage +567 -0
package/templates/do/submit +7 -0
package/templates/do/test +14 -0
package/templates/do/tune +382 -59
package/templates/do/validate +44 -4

package/src/lib/path-prover-brain.js ADDED Viewed

@@ -0,0 +1,664 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+import { readFileSync } from 'node:fs';
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+/**
+ * Path Prover Brain
+ *
+ * Implements the intelligence layer for the Path Prover agent mode.
+ * This module identifies coverage gaps, finds nearest substitutions,
+ * classifies failures, gates tune/adapter stages, and builds
+ * Athena-compatible records with run_type='path_prove'.
+ *
+ * Feature: ci-benchmark-pipeline
+ * Requirements: 8.1–8.12
+ */
+// ── Configuration Dimensions ─────────────────────────────────────────────────
+/**
+ * The ordered vector of config dimensions used for Hamming distance calculation.
+ */
+export const CONFIG_DIMENSIONS = [
+    'deployment_config',
+    'model_family',
+    'instance_family',
+    'quantization',
+    'tp_degree',
+    'deployment_target'
+];
+// ── Failure Classification ───────────────────────────────────────────────────
+/**
+ * Valid failure categories for Path Prover classification.
+ */
+export const FAILURE_CATEGORIES = [
+    'capacity',
+    'timeout',
+    'oom',
+    'code_bug',
+    'model_incompatibility',
+    'service_limitation'
+];
+/**
+ * Error pattern matchers for failure classification.
+ * Each entry maps a regex pattern to a category and retryable flag.
+ */
+const ERROR_PATTERNS = [
+    { pattern: /InsufficientInstanceCapacity/i, category: 'capacity', retryable: true },
+    { pattern: /CapacityError/i, category: 'capacity', retryable: true },
+    { pattern: /no capacity/i, category: 'capacity', retryable: true },
+    { pattern: /timed?\s*out/i, category: 'timeout', retryable: true },
+    { pattern: /timeout/i, category: 'timeout', retryable: true },
+    { pattern: /deadline exceeded/i, category: 'timeout', retryable: true },
+    { pattern: /OutOfMemory/i, category: 'oom', retryable: false },
+    { pattern: /OOM/i, category: 'oom', retryable: false },
+    { pattern: /CUDA out of memory/i, category: 'oom', retryable: false },
+    { pattern: /Cannot allocate memory/i, category: 'oom', retryable: false },
+    { pattern: /killed.*memory/i, category: 'oom', retryable: false },
+    { pattern: /template.*error/i, category: 'code_bug', retryable: false },
+    { pattern: /SyntaxError/i, category: 'code_bug', retryable: false },
+    { pattern: /ReferenceError/i, category: 'code_bug', retryable: false },
+    { pattern: /TypeError/i, category: 'code_bug', retryable: false },
+    { pattern: /script crash/i, category: 'code_bug', retryable: false },
+    { pattern: /rendering failed/i, category: 'code_bug', retryable: false },
+    { pattern: /not supported.*model/i, category: 'model_incompatibility', retryable: false },
+    { pattern: /model.*incompatible/i, category: 'model_incompatibility', retryable: false },
+    { pattern: /unsupported.*architecture/i, category: 'model_incompatibility', retryable: false },
+    { pattern: /LoRA.*not supported/i, category: 'model_incompatibility', retryable: false },
+    { pattern: /adapter.*not compatible/i, category: 'model_incompatibility', retryable: false },
+    { pattern: /not available.*region/i, category: 'service_limitation', retryable: false },
+    { pattern: /service.*not supported/i, category: 'service_limitation', retryable: false },
+    { pattern: /API.*not available/i, category: 'service_limitation', retryable: false },
+    { pattern: /feature.*not.*region/i, category: 'service_limitation', retryable: false },
+    { pattern: /ValidationException/i, category: 'service_limitation', retryable: false }
+];
+// ── Gap Identification (Task 5.1) ────────────────────────────────────────────
+/**
+ * Identify coverage gaps given a set of proven configurations.
+ *
+ * A "gap" is a config dimension combination that has no records in Athena.
+ * This function compares the known dimension space (all unique values seen
+ * across proven configs) against what is actually proven, and returns
+ * combinations that are missing.
+ *
+ * @param {object[]} provenConfigs - Array of proven config objects from Athena
+ *   Each object must have keys matching CONFIG_DIMENSIONS plus `status`
+ * @returns {object[]} Ordered list of gap configs to prove, sorted by
+ *   coverage priority (more neighbors proven = higher priority)
+ */
+export function identifyGaps(provenConfigs) {
+    if (!provenConfigs || provenConfigs.length === 0) {
+        return [];
+    }
+    // Extract unique values for each dimension from proven configs
+    const dimensionValues = {};
+    for (const dim of CONFIG_DIMENSIONS) {
+        const values = new Set();
+        for (const config of provenConfigs) {
+            if (config[dim] !== undefined && config[dim] !== null) {
+                values.add(String(config[dim]));
+            }
+        }
+        dimensionValues[dim] = [...values];
+    }
+    // Build a set of proven config signatures for fast lookup
+    const provenSignatures = new Set();
+    for (const config of provenConfigs) {
+        if (config.status === 'completed') {
+            const sig = CONFIG_DIMENSIONS.map(d => String(config[d] ?? '')).join('|');
+            provenSignatures.add(sig);
+        }
+    }
+    // Generate all combinations from observed values and find gaps
+    const gaps = [];
+    const combinations = cartesianProduct(dimensionValues);
+    for (const combo of combinations) {
+        const sig = CONFIG_DIMENSIONS.map(d => String(combo[d] ?? '')).join('|');
+        if (!provenSignatures.has(sig)) {
+            // Count how many neighbors (distance=1) are proven — higher = more valuable
+            let neighborCount = 0;
+            for (const provenSig of provenSignatures) {
+                const provenParts = provenSig.split('|');
+                const comboParts = sig.split('|');
+                let diff = 0;
+                for (let i = 0; i < provenParts.length; i++) {
+                    if (provenParts[i] !== comboParts[i]) diff++;
+                }
+                if (diff === 1) neighborCount++;
+            }
+            gaps.push({ ...combo, _neighborCount: neighborCount });
+        }
+    }
+    // Sort by neighbor count descending (most surrounded gaps first)
+    gaps.sort((a, b) => b._neighborCount - a._neighborCount);
+    // Remove internal sorting field before returning
+    return gaps.map(({ _neighborCount, ...config }) => config);
+}
+/**
+ * Generate cartesian product of dimension value arrays.
+ * @param {object} dimensionValues - Map of dimension name to array of values
+ * @returns {object[]} Array of config objects representing all combinations
+ */
+function cartesianProduct(dimensionValues) {
+    const dims = CONFIG_DIMENSIONS;
+    const results = [];
+    function generate(index, current) {
+        if (index === dims.length) {
+            results.push({ ...current });
+            return;
+        }
+        const dim = dims[index];
+        const values = dimensionValues[dim] || [];
+        if (values.length === 0) {
+            generate(index + 1, current);
+            return;
+        }
+        for (const val of values) {
+            current[dim] = val;
+            generate(index + 1, current);
+        }
+    }
+    generate(0, {});
+    return results;
+}
+// ── Substitution Algorithm (Task 5.2) ────────────────────────────────────────
+/**
+ * Find the nearest proven substitution for a requested configuration.
+ *
+ * Uses Hamming distance on the config dimension vector. Only considers
+ * configs with status='completed'. Never crosses the model_family boundary.
+ *
+ * @param {object} requestedConfig - The requested config with dimension fields
+ * @param {object[]} provenConfigs - Array of proven configs from Athena
+ * @returns {object} Result object:
+ *   - If matches found: { substitutions: [{config, distance, explanation}...] } (top 3)
+ *   - If no matches: { noMatch: true, message: string }
+ */
+export function findNearestSubstitution(requestedConfig, provenConfigs) {
+    if (!requestedConfig || !provenConfigs || provenConfigs.length === 0) {
+        return { noMatch: true, message: 'no coverage — no proven configs available' };
+    }
+    const requestedFamily = requestedConfig.model_family;
+    // Filter to only completed configs in the same model_family
+    const candidates = provenConfigs.filter(c =>
+        c.status === 'completed' && c.model_family === requestedFamily
+    );
+    if (candidates.length === 0) {
+        // Find nearest across families for the message
+        const allCompleted = provenConfigs.filter(c => c.status === 'completed');
+        if (allCompleted.length === 0) {
+            return { noMatch: true, message: 'no coverage — no proven configs available' };
+        }
+        const minDistance = Math.min(
+            ...allCompleted.map(c => hammingDistance(requestedConfig, c))
+        );
+        return {
+            noMatch: true,
+            message: `no coverage — nearest proven config is ${minDistance} dimensions away`
+        };
+    }
+    // Compute distances and sort
+    const scored = candidates.map(config => {
+        const distance = hammingDistance(requestedConfig, config);
+        const explanation = buildExplanation(requestedConfig, config);
+        return { config, distance, explanation };
+    });
+    // Sort by distance ascending, then by recency (if run_timestamp available)
+    scored.sort((a, b) => {
+        if (a.distance !== b.distance) return a.distance - b.distance;
+        // Secondary sort: prefer more recent configs
+        const aTime = a.config.run_timestamp || '';
+        const bTime = b.config.run_timestamp || '';
+        return bTime.localeCompare(aTime);
+    });
+    // Return top 3
+    const substitutions = scored.slice(0, 3).map(({ config, distance, explanation }) => ({
+        config,
+        distance,
+        explanation
+    }));
+    return { substitutions };
+}
+/**
+ * Compute Hamming distance between two config vectors.
+ * Counts the number of dimensions that differ.
+ *
+ * @param {object} configA - First config
+ * @param {object} configB - Second config
+ * @returns {number} Number of dimensions that differ
+ */
+export function hammingDistance(configA, configB) {
+    let distance = 0;
+    for (const dim of CONFIG_DIMENSIONS) {
+        const valA = String(configA[dim] ?? '');
+        const valB = String(configB[dim] ?? '');
+        if (valA !== valB) {
+            distance++;
+        }
+    }
+    return distance;
+}
+/**
+ * Build a human-readable explanation of which dimensions differ.
+ *
+ * @param {object} requested - The requested config
+ * @param {object} suggested - The suggested substitution
+ * @returns {string[]} Array of dimension difference explanations
+ */
+function buildExplanation(requested, suggested) {
+    const diffs = [];
+    for (const dim of CONFIG_DIMENSIONS) {
+        const reqVal = String(requested[dim] ?? '');
+        const sugVal = String(suggested[dim] ?? '');
+        if (reqVal !== sugVal) {
+            diffs.push(`${dim}: '${reqVal}' → '${sugVal}'`);
+        }
+    }
+    return diffs;
+}
+// ── Tune/Adapter Stage Gating (Task 5.3) ─────────────────────────────────────
+/**
+ * Determine whether tune/adapter stages should execute for a prove request.
+ *
+ * Tune stages only execute when the prove request explicitly includes
+ * fine-tuning (e.g., the gap involves a tune technique or the user
+ * requested adapter serving).
+ *
+ * @param {object} proveRequest - The prove request object
+ * @param {boolean} [proveRequest.include_tuning] - Explicitly request tuning
+ * @param {boolean} [proveRequest.enable_lora] - Whether LoRA is enabled
+ * @param {string} [proveRequest.tune_technique] - Tune technique (sft, dpo, etc.)
+ * @returns {boolean} True if tune stages should execute
+ */
+export function shouldExecuteTuneStages(proveRequest) {
+    if (!proveRequest) return false;
+    // Explicit tuning request
+    if (proveRequest.include_tuning === true) return true;
+    // LoRA adapter serving requested
+    if (proveRequest.enable_lora === true) return true;
+    // Tune technique specified
+    if (proveRequest.tune_technique && proveRequest.tune_technique !== 'none') return true;
+    return false;
+}
+// ── Failure Classification (Task 5.4) ────────────────────────────────────────
+/**
+ * Classify a failure from error output.
+ *
+ * Parses error output for known patterns and returns a structured
+ * classification with stage, category, and retryable flag.
+ *
+ * @param {string|object} errorOutput - Error output (string or structured object)
+ * @param {string} [errorOutput.error] - Error message (if object)
+ * @param {string} [errorOutput.stage] - Stage that failed (if object)
+ * @returns {object} Classification: { stage, category, retryable }
+ */
+export function classifyFailure(errorOutput) {
+    if (!errorOutput) {
+        return { stage: 'unknown', category: 'code_bug', retryable: false };
+    }
+    // Extract error message and stage
+    let errorMsg = '';
+    let stage = 'unknown';
+    if (typeof errorOutput === 'string') {
+        errorMsg = errorOutput;
+        stage = detectStage(errorOutput);
+    } else if (typeof errorOutput === 'object') {
+        errorMsg = errorOutput.error || errorOutput.message || JSON.stringify(errorOutput);
+        stage = errorOutput.stage || detectStage(errorMsg);
+    }
+    // Match against known patterns
+    for (const { pattern, category, retryable } of ERROR_PATTERNS) {
+        if (pattern.test(errorMsg)) {
+            return { stage, category, retryable };
+        }
+    }
+    // Default: unrecognized errors are classified as code_bug (non-retryable)
+    return { stage, category: 'code_bug', retryable: false };
+}
+/**
+ * Detect which lifecycle stage produced an error from the error message.
+ *
+ * @param {string} errorMsg - The error message
+ * @returns {string} The detected stage name
+ */
+function detectStage(errorMsg) {
+    const stagePatterns = [
+        { pattern: /\b(generate|generation)\b/i, stage: 'generate' },
+        { pattern: /\bdo\/stage\b|model.staging|pre-stage|s3.*stag/i, stage: 'stage' },
+        { pattern: /\b(build|docker)\b/i, stage: 'build' },
+        { pattern: /\b(push|ecr|registry)\b/i, stage: 'push' },
+        { pattern: /\b(deploy|endpoint|CreateEndpoint|InferenceComponent)\b/i, stage: 'deploy' },
+        { pattern: /\b(test|invoke|invocation|inference)\b/i, stage: 'test' },
+        { pattern: /\b(tune|fine-?tun|customization)\b/i, stage: 'tune' },
+        { pattern: /\b(adapter|lora)\b/i, stage: 'adapter' },
+        { pattern: /\b(benchmark|bench)\b/i, stage: 'benchmark' },
+        { pattern: /\b(register|dynamo)\b/i, stage: 'register' },
+        { pattern: /\b(clean|delete)\b/i, stage: 'clean' }
+    ];
+    for (const { pattern, stage } of stagePatterns) {
+        if (pattern.test(errorMsg)) {
+            return stage;
+        }
+    }
+    return 'unknown';
+}
+// ── Result Writing (Task 5.5) ────────────────────────────────────────────────
+/**
+ * Build a Path Prover Athena record from execution result and classification.
+ *
+ * All records have run_type='path_prove'. On success, status='completed'.
+ * On non-retryable failure, status='unfeasible' with failure_reason populated.
+ * On retryable failure, status='failed' with failure_reason populated.
+ *
+ * @param {object} result - The execution result
+ * @param {boolean} result.success - Whether the prove run succeeded
+ * @param {object} [result.metrics] - Benchmark metrics (on success)
+ * @param {object} [result.config] - The config that was proven
+ * @param {string} [result.error] - Error message (on failure)
+ * @param {object|null} [classification] - Failure classification (from classifyFailure)
+ * @param {string} [classification.stage] - Stage that failed
+ * @param {string} [classification.category] - Error category
+ * @param {boolean} [classification.retryable] - Whether failure is retryable
+ * @returns {object} Athena-compatible record with run_type='path_prove'
+ */
+export function buildPathProverRecord(result, classification) {
+    const record = {
+        run_type: 'path_prove',
+        run_timestamp: new Date().toISOString()
+    };
+    // Merge config dimensions if provided
+    if (result.config) {
+        for (const dim of CONFIG_DIMENSIONS) {
+            if (result.config[dim] !== undefined) {
+                record[dim] = result.config[dim];
+            }
+        }
+        // Also copy non-dimension config fields
+        if (result.config.config_id) record.config_id = result.config.config_id;
+        if (result.config.model_name) record.model_name = result.config.model_name;
+        if (result.config.instance_type) record.instance_type = result.config.instance_type;
+    }
+    if (result.success) {
+        record.status = 'completed';
+        // Merge metrics if available
+        if (result.metrics) {
+            Object.assign(record, result.metrics);
+        }
+    } else {
+        // Failure case
+        if (classification && classification.retryable === false) {
+            record.status = 'unfeasible';
+        } else {
+            record.status = 'failed';
+        }
+        // Populate failure details
+        record.failure_reason = result.error || 'Unknown failure';
+        if (classification) {
+            record.failure_stage = classification.stage;
+            record.failure_category = classification.category;
+            record.failure_retryable = classification.retryable;
+        }
+    }
+    return record;
+}
+/**
+ * Check if a config is known to be unfeasible (prevents repeated attempts).
+ *
+ * @param {object} config - The config to check
+ * @param {object[]} existingRecords - Existing Athena records
+ * @returns {object|null} The unfeasible record if found, null otherwise
+ */
+export function findUnfeasibleRecord(config, existingRecords) {
+    if (!config || !existingRecords || existingRecords.length === 0) {
+        return null;
+    }
+    for (const record of existingRecords) {
+        if (record.status !== 'unfeasible') continue;
+        if (record.run_type !== 'path_prove') continue;
+        // Check if all dimensions match
+        const allMatch = CONFIG_DIMENSIONS.every(dim =>
+            String(record[dim] ?? '') === String(config[dim] ?? '')
+        );
+        if (allMatch) return record;
+    }
+    return null;
+}
+// ── Priority Queue (v1 Validation Mode) ──────────────────────────────────────
+/**
+ * Get the next unproven config from the priority queue.
+ *
+ * Checks the priority targets list and returns the first target whose
+ * status is 'pending' and which hasn't been proven in existing records.
+ * If all priority targets are proven/completed, returns null to fall
+ * through to gap-finding mode.
+ *
+ * @param {object} event - The Step Functions event object
+ * @param {string} [event.priorityConfigPath] - Path to priority targets JSON
+ * @param {object[]} [event.previousResults] - Previously proven configs in this run
+ * @param {object|null} priorityData - Pre-loaded priority data (for Lambda/testing).
+ *   If null, attempts to load from event.priorityConfigPath.
+ * @returns {object|null} Next config to prove, or null if priority queue exhausted
+ */
+export function getNextPriorityConfig(event, priorityData = null) {
+    // Resolve priority data: explicit param > event._priorityData > load from file
+    const data = priorityData || event._priorityData || (
+        event.priorityConfigPath ? loadPriorityTargets(event.priorityConfigPath) : null
+    );
+    if (!data || !data.targets || !Array.isArray(data.targets)) {
+        return null;
+    }
+    const defaults = data.defaults || {};
+    const provenNames = new Set((data.proven || []).map(p => p.model_name));
+    // Also consider previousResults from this run as proven
+    const previousResults = event.previousResults || [];
+    for (const result of previousResults) {
+        if (result.success && result.config && result.config.model_name) {
+            provenNames.add(result.config.model_name);
+        }
+    }
+    // Find first pending target not yet proven
+    for (const target of data.targets) {
+        if (target.status !== 'pending') continue;
+        if (provenNames.has(target.model_name)) continue;
+        // Build full config from defaults + target overrides
+        const config = { ...defaults, ...target };
+        delete config.status; // status is metadata, not a config field
+        return config;
+    }
+    // All priority targets are proven or non-pending
+    return null;
+}
+/**
+ * Update a priority target's status after a prove attempt.
+ *
+ * @param {object} priorityData - The loaded priority targets data (mutated in place)
+ * @param {string} modelName - The model_name to update
+ * @param {string} newStatus - New status: 'proven', 'failed', or 'unfeasible'
+ * @param {object} [details] - Additional details (error_category, error_message)
+ * @returns {object} Updated priority data (same reference, mutated)
+ */
+export function updatePriorityStatus(priorityData, modelName, newStatus, details = {}) {
+    if (!priorityData || !priorityData.targets) return priorityData;
+    const targetIndex = priorityData.targets.findIndex(t => t.model_name === modelName);
+    if (targetIndex === -1) return priorityData;
+    if (newStatus === 'proven') {
+        // Move from targets to proven list
+        priorityData.targets.splice(targetIndex, 1);
+        priorityData.proven = priorityData.proven || [];
+        priorityData.proven.push({
+            model_name: modelName,
+            proven_date: new Date().toISOString().split('T')[0],
+            ...details
+        });
+    } else {
+        // Update status in place (failed, unfeasible)
+        const target = priorityData.targets[targetIndex];
+        target.status = newStatus;
+        if (details.error_category) target.error_category = details.error_category;
+        if (details.error_message) target.error_message = details.error_message;
+        target.last_attempt = new Date().toISOString();
+    }
+    return priorityData;
+}
+/**
+ * Get a summary of priority queue status.
+ *
+ * @param {object} priorityData - The loaded priority targets data
+ * @returns {object} Summary with counts: { total, pending, proven, failed, unfeasible }
+ */
+export function getPriorityQueueStatus(priorityData) {
+    if (!priorityData) {
+        return { total: 0, pending: 0, proven: 0, failed: 0, unfeasible: 0 };
+    }
+    const targets = priorityData.targets || [];
+    const proven = priorityData.proven || [];
+    return {
+        total: targets.length + proven.length,
+        pending: targets.filter(t => t.status === 'pending').length,
+        proven: proven.length,
+        failed: targets.filter(t => t.status === 'failed').length,
+        unfeasible: targets.filter(t => t.status === 'unfeasible').length
+    };
+}
+/**
+ * Load priority targets from a JSON file path (synchronous).
+ *
+ * @param {string} configPath - Absolute or relative path to the JSON file
+ * @returns {object|null} Parsed priority data, or null if not found/invalid
+ */
+export function loadPriorityTargets(configPath) {
+    try {
+        const resolvedPath = resolve(configPath);
+        const raw = readFileSync(resolvedPath, 'utf8');
+        return JSON.parse(raw);
+    } catch {
+        return null;
+    }
+}
+// ── TP Degree Auto-Resolution at Prove-Time (Task 6.5) ──────────────────────
+/**
+ * Resolve tp_degree from instance catalog when not explicitly set in a prove config.
+ *
+ * At prove-time, if the target config omits tp_degree but specifies an instance_type,
+ * look up the instance catalog's GPU count and use it as the tp_degree.
+ * This mirrors the generation-time logic in template-variable-resolver.js.
+ *
+ * Requirements: FTP-1 (extension) — task 6.5
+ *
+ * @param {object} config - The prove config (from getNextPriorityConfig or similar)
+ * @param {string} [config.instance_type] - Instance type (e.g. 'ml.g5.48xlarge')
+ * @param {number|string|undefined} [config.tp_degree] - TP degree if already set
+ * @param {object|null} [catalogOverride] - Optional catalog data for testing (skips file load)
+ * @returns {object} The config object (mutated in place) with tp_degree resolved
+ */
+export function resolveProveTpDegree(config, catalogOverride = null) {
+    if (!config) return config;
+    // If tp_degree is already set (not null/undefined), respect it
+    if (config.tp_degree !== undefined && config.tp_degree !== null) {
+        return config;
+    }
+    // Need an instance_type to look up GPU count
+    if (!config.instance_type) {
+        return config;
+    }
+    // Load instance catalog
+    let catalog = catalogOverride;
+    if (!catalog) {
+        try {
+            const catalogPath = resolve(__dirname, '..', '..', 'servers', 'lib', 'catalogs', 'instances.json');
+            const raw = readFileSync(catalogPath, 'utf8');
+            catalog = JSON.parse(raw);
+        } catch {
+            return config;
+        }
+    }
+    const instanceInfo = catalog?.catalog?.[config.instance_type];
+    if (instanceInfo?.gpus && instanceInfo.gpus > 0) {
+        config.tp_degree = instanceInfo.gpus;
+        config._tpAutoResolved = true;
+        config._tpAutoResolvedFrom = config.instance_type;
+    }
+    return config;
+}

package/src/lib/prompts/infrastructure-prompts.js CHANGED Viewed

@@ -252,7 +252,7 @@ const infraInstancePrompts = [
             if (!input || input.trim() === '') {
                 return 'At least one instance type is required';
             }
-            const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+            const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             const instances = input.split(',').map(s => s.trim()).filter(s => s.length > 0);
             if (instances.length === 0) {
                 return 'At least one instance type is required';
@@ -384,7 +384,7 @@ const infraInstancePrompts = [
             if (!input || input.trim() === '') {
                 return 'Instance type is required';
             }
-            const instancePattern = /^ml\.[a-z0-9]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
+            const instancePattern = /^ml\.[a-z0-9-]+\.(nano|micro|small|medium|large|xlarge|[0-9]+xlarge)$/;
             if (!instancePattern.test(input.trim())) {
                 return 'Invalid instance type format. Expected format: ml.{family}.{size} (e.g., ml.m5.large, ml.g4dn.xlarge)';
             }

package/src/lib/prompts/model-prompts.js CHANGED Viewed

@@ -459,6 +459,12 @@ const hfTokenPrompts = [
                 return false;
             }
+            // Skip HF token when model name is an S3 URI (no HF download needed)
+            const modelName = answers.customModelName || answers.modelName;
+            if (modelName && modelName.startsWith('s3://')) {
+                return false;
+            }
             // Display security warning before prompting
             console.log('\n🔐 HuggingFace Authentication');
             console.log('   Many models (e.g. Llama, Mistral) are gated and require a token.');