npm - @elizaos/training - Versions diffs - 2.0.0-alpha.77 → 2.0.0-alpha.78 - Mend

@elizaos/training 2.0.0-alpha.77 → 2.0.0-alpha.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/package.json +2 -2
package/.turbo/turbo-lint.log +0 -3
package/.turbo/turbo-typecheck.log +0 -1
package/dist/.tsbuildinfo +0 -1
package/dist/adapter.js +0 -59
package/dist/archetypes/ArchetypeConfigService.js +0 -510
package/dist/archetypes/derive-archetype.js +0 -196
package/dist/archetypes/index.js +0 -7
package/dist/benchmark/ArchetypeMatchupBenchmark.js +0 -547
package/dist/benchmark/BenchmarkChartGenerator.js +0 -632
package/dist/benchmark/BenchmarkDataGenerator.js +0 -825
package/dist/benchmark/BenchmarkDataViewer.js +0 -197
package/dist/benchmark/BenchmarkHistoryService.js +0 -135
package/dist/benchmark/BenchmarkRunner.js +0 -483
package/dist/benchmark/BenchmarkValidator.js +0 -158
package/dist/benchmark/FastEvalRunner.js +0 -133
package/dist/benchmark/MetricsValidator.js +0 -104
package/dist/benchmark/MetricsVisualizer.js +0 -775
package/dist/benchmark/ModelBenchmarkService.js +0 -433
package/dist/benchmark/ModelRegistry.js +0 -122
package/dist/benchmark/RulerBenchmarkIntegration.js +0 -168
package/dist/benchmark/SimulationA2AInterface.js +0 -683
package/dist/benchmark/SimulationEngine.js +0 -522
package/dist/benchmark/TaskRunner.js +0 -60
package/dist/benchmark/__tests__/BenchmarkRunner.test.js +0 -409
package/dist/benchmark/__tests__/HeadToHead.test.js +0 -105
package/dist/benchmark/index.js +0 -23
package/dist/benchmark/parseSimulationMetrics.js +0 -86
package/dist/benchmark/simulation-types.js +0 -1
package/dist/dependencies.js +0 -197
package/dist/generation/TrajectoryGenerator.js +0 -244
package/dist/generation/index.js +0 -6
package/dist/huggingface/HuggingFaceDatasetUploader.js +0 -463
package/dist/huggingface/HuggingFaceIntegrationService.js +0 -272
package/dist/huggingface/HuggingFaceModelUploader.js +0 -385
package/dist/huggingface/index.js +0 -9
package/dist/huggingface/shared/HuggingFaceUploadUtil.js +0 -144
package/dist/index.js +0 -41
package/dist/init-training.js +0 -43
package/dist/metrics/TrajectoryMetricsExtractor.js +0 -523
package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +0 -628
package/dist/metrics/index.js +0 -7
package/dist/metrics/types.js +0 -21
package/dist/rubrics/__tests__/index.test.js +0 -150
package/dist/rubrics/ass-kisser.js +0 -83
package/dist/rubrics/degen.js +0 -78
package/dist/rubrics/goody-twoshoes.js +0 -82
package/dist/rubrics/index.js +0 -184
package/dist/rubrics/information-trader.js +0 -82
package/dist/rubrics/infosec.js +0 -99
package/dist/rubrics/liar.js +0 -102
package/dist/rubrics/perps-trader.js +0 -85
package/dist/rubrics/researcher.js +0 -79
package/dist/rubrics/scammer.js +0 -80
package/dist/rubrics/social-butterfly.js +0 -71
package/dist/rubrics/super-predictor.js +0 -95
package/dist/rubrics/trader.js +0 -65
package/dist/scoring/ArchetypeScoringService.js +0 -301
package/dist/scoring/JudgePromptBuilder.js +0 -401
package/dist/scoring/LLMJudgeCache.js +0 -263
package/dist/scoring/index.js +0 -8
package/dist/training/AutomationPipeline.js +0 -714
package/dist/training/BenchmarkService.js +0 -370
package/dist/training/ConfigValidator.js +0 -153
package/dist/training/MarketOutcomesTracker.js +0 -142
package/dist/training/ModelDeployer.js +0 -128
package/dist/training/ModelFetcher.js +0 -48
package/dist/training/ModelSelectionService.js +0 -248
package/dist/training/ModelUsageVerifier.js +0 -106
package/dist/training/MultiModelOrchestrator.js +0 -349
package/dist/training/RLModelConfig.js +0 -295
package/dist/training/RewardBackpropagationService.js +0 -117
package/dist/training/RulerScoringService.js +0 -450
package/dist/training/TrainingMonitor.js +0 -108
package/dist/training/TrajectoryRecorder.js +0 -281
package/dist/training/__tests__/TrajectoryRecorder.test.js +0 -363
package/dist/training/index.js +0 -30
package/dist/training/logRLConfig.js +0 -29
package/dist/training/pipeline.js +0 -80
package/dist/training/storage/ModelStorageService.js +0 -190
package/dist/training/storage/TrainingDataArchiver.js +0 -136
package/dist/training/storage/index.js +0 -7
package/dist/training/types.js +0 -6
package/dist/training/window-utils.js +0 -100
package/dist/utils/index.js +0 -73
package/dist/utils/logger.js +0 -55
package/dist/utils/snowflake.js +0 -15
package/dist/utils/synthetic-detector.js +0 -67
package/vitest.config.ts +0 -8

package/dist/benchmark/FastEvalRunner.js DELETED Viewed

@@ -1,133 +0,0 @@
-/**
- * Fast Evaluation Runner
- *
- * Provides efficient evaluation of agents on benchmarks with:
- * - Fast-forward mode (skip waiting)
- * - Batch processing
- * - Parallel execution
- * - Progress tracking
- */
-import { logger } from "../utils/logger";
-import { BenchmarkRunner } from "./BenchmarkRunner";
-// biome-ignore lint/complexity/noStaticOnlyClass: Runner namespace - run/runWithProgress are logically grouped
-export class FastEvalRunner {
-    /**
-     * Run fast evaluation
-     *
-     * Executes efficient batch evaluation of an agent on a benchmark with
-     * parallel runs and progress tracking. Optimized for speed and throughput.
-     *
-     * @param config - Fast evaluation configuration
-     * @returns FastEvalResult with all run results and summary statistics
-     * @throws Error if evaluation fails
-     *
-     * @remarks
-     * - Runs multiple iterations in parallel batches
-     * - Provides progress callbacks for monitoring
-     * - Calculates aggregate statistics across all runs
-     * - Identifies best and worst performing runs
-     *
-     * @example
-     * ```typescript
-     * const result = await FastEvalRunner.run({
-     *   benchmarkPath: './benchmarks/test.json',
-     *   agentRuntime: runtime,
-     *   agentUserId: 'agent-123',
-     *   parallelRuns: 3,
-     *   iterations: 10,
-     *   outputDir: './results'
-     * });
-     * console.log(`Average P&L: ${result.summary.avgPnl}`);
-     * ```
-     */
-    static async run(config) {
-        const startTime = Date.now();
-        const iterations = config.iterations || 1;
-        const parallelRuns = config.parallelRuns || 1;
-        logger.info("Starting fast evaluation", {
-            benchmarkPath: config.benchmarkPath,
-            agentUserId: config.agentUserId,
-            iterations,
-            parallelRuns,
-        });
-        const results = [];
-        let completed = 0;
-        // Run iterations in batches
-        for (let batchStart = 0; batchStart < iterations; batchStart += parallelRuns) {
-            const batchEnd = Math.min(batchStart + parallelRuns, iterations);
-            const batchSize = batchEnd - batchStart;
-            logger.info(`Running batch ${batchStart + 1}-${batchEnd} of ${iterations}`);
-            // Run batch in parallel
-            const batchPromises = Array.from({ length: batchSize }, (_, i) => {
-                const runIndex = batchStart + i;
-                const runOutputDir = `${config.outputDir}/run-${runIndex + 1}`;
-                return BenchmarkRunner.runSingle({
-                    benchmarkPath: config.benchmarkPath,
-                    agentRuntime: config.agentRuntime,
-                    agentUserId: config.agentUserId,
-                    saveTrajectory: config.saveTrajectory ?? false,
-                    outputDir: runOutputDir,
-                }).then((result) => {
-                    completed++;
-                    if (config.onProgress) {
-                        config.onProgress({
-                            completed,
-                            total: iterations,
-                            currentRun: `run-${runIndex + 1}`,
-                        });
-                    }
-                    return result;
-                });
-            });
-            const batchResults = await Promise.all(batchPromises);
-            results.push(...batchResults);
-        }
-        const totalDuration = Date.now() - startTime;
-        // Calculate summary
-        const avgPnl = results.reduce((sum, r) => sum + r.metrics.totalPnl, 0) / results.length;
-        const avgAccuracy = results.reduce((sum, r) => sum + r.metrics.predictionMetrics.accuracy, 0) / results.length;
-        const avgOptimality = results.reduce((sum, r) => sum + r.metrics.optimalityScore, 0) /
-            results.length;
-        const bestRun = results.reduce((best, current) => current.metrics.totalPnl > best.metrics.totalPnl ? current : best);
-        const worstRun = results.reduce((worst, current) => current.metrics.totalPnl < worst.metrics.totalPnl ? current : worst);
-        const summary = {
-            avgPnl,
-            avgAccuracy,
-            avgOptimality,
-            totalDuration,
-            runsCompleted: results.length,
-        };
-        logger.info("Fast evaluation completed", summary);
-        return {
-            results,
-            summary,
-            bestRun,
-            worstRun,
-        };
-    }
-    /**
-     * Run evaluation with progress bar
-     */
-    static async runWithProgress(config) {
-        let lastProgress = 0;
-        return FastEvalRunner.run({
-            ...config,
-            onProgress: (progress) => {
-                const percent = Math.round((progress.completed / progress.total) * 100);
-                if (percent !== lastProgress) {
-                    const barLength = 40;
-                    const filled = Math.round((progress.completed / progress.total) * barLength);
-                    const bar = "█".repeat(filled) + "░".repeat(barLength - filled);
-                    process.stdout.write(`\r[${bar}] ${percent}% (${progress.completed}/${progress.total})`);
-                    lastProgress = percent;
-                }
-                if (config.onProgress) {
-                    config.onProgress(progress);
-                }
-            },
-        }).then((result) => {
-            process.stdout.write("\n");
-            return result;
-        });
-    }
-}

package/dist/benchmark/MetricsValidator.js DELETED Viewed

@@ -1,104 +0,0 @@
-/**
- * Metrics Validator
- *
- * Validates that benchmark metrics are calculated correctly against ground truth.
- */
-import { logger } from "../utils/logger";
-function validatePredictionMetrics(_predictionMetrics, actions, groundTruth) {
-    const errors = [];
-    const warnings = [];
-    // Get all prediction actions
-    const predictionActions = actions.filter((a) => a.type === "buy_prediction");
-    // Validate each action against ground truth
-    for (const action of predictionActions) {
-        const data = action.data;
-        const marketId = data.marketId;
-        // Check if we have ground truth for this market
-        if (!(marketId in groundTruth.marketOutcomes)) {
-            warnings.push(`No ground truth for market ${marketId}`);
-        }
-        // Verify the outcome exists in ground truth
-        // (actual verification of correctness happens in SimulationEngine)
-    }
-    return { valid: errors.length === 0, errors, warnings };
-}
-/**
- * Validate metrics against ground truth
- */
-export function validateMetrics(metrics, actions, groundTruth) {
-    const errors = [];
-    const warnings = [];
-    // 1. Validate prediction accuracy calculation
-    const predictionValidation = validatePredictionMetrics(metrics.predictionMetrics, actions, groundTruth);
-    errors.push(...predictionValidation.errors);
-    warnings.push(...predictionValidation.warnings);
-    // 2. Validate optimality score is in valid range
-    if (metrics.optimalityScore < 0 || metrics.optimalityScore > 100) {
-        errors.push(`Optimality score out of range: ${metrics.optimalityScore}`);
-    }
-    // 3. Validate timing metrics are reasonable
-    if (metrics.timing.avgResponseTime < 0) {
-        errors.push(`Invalid average response time: ${metrics.timing.avgResponseTime}`);
-    }
-    if (metrics.timing.maxResponseTime < metrics.timing.avgResponseTime) {
-        errors.push(`Max response time less than average: ${metrics.timing.maxResponseTime} < ${metrics.timing.avgResponseTime}`);
-    }
-    // 4. Validate action counts match
-    const predictionActions = actions.filter((a) => a.type === "buy_prediction");
-    if (predictionActions.length !== metrics.predictionMetrics.totalPositions) {
-        warnings.push(`Prediction action count mismatch: ${predictionActions.length} actions vs ${metrics.predictionMetrics.totalPositions} positions`);
-    }
-    // 5. Validate accuracy calculation
-    const { correctPredictions, incorrectPredictions, totalPositions } = metrics.predictionMetrics;
-    const calculatedAccuracy = totalPositions > 0 ? correctPredictions / totalPositions : 0;
-    const accuracyDiff = Math.abs(calculatedAccuracy - metrics.predictionMetrics.accuracy);
-    if (accuracyDiff > 0.01) {
-        // Allow 1% tolerance for floating point
-        errors.push(`Accuracy calculation mismatch: reported ${metrics.predictionMetrics.accuracy}, calculated ${calculatedAccuracy}`);
-    }
-    // 6. Validate correct + incorrect = total
-    if (correctPredictions + incorrectPredictions !== totalPositions) {
-        errors.push(`Prediction count mismatch: ${correctPredictions} + ${incorrectPredictions} != ${totalPositions}`);
-    }
-    // 7. Validate perp win rate calculation
-    if (metrics.perpMetrics.totalTrades > 0) {
-        const calculatedWinRate = metrics.perpMetrics.profitableTrades / metrics.perpMetrics.totalTrades;
-        const winRateDiff = Math.abs(calculatedWinRate - metrics.perpMetrics.winRate);
-        if (winRateDiff > 0.01) {
-            errors.push(`Win rate calculation mismatch: reported ${metrics.perpMetrics.winRate}, calculated ${calculatedWinRate}`);
-        }
-    }
-    logger.info("Metrics validation complete", {
-        valid: errors.length === 0,
-        errors: errors.length,
-        warnings: warnings.length,
-    });
-    return {
-        valid: errors.length === 0,
-        errors,
-        warnings,
-    };
-}
-/**
- * Quick sanity check for metrics
- */
-export function metricsSanityCheck(metrics) {
-    // Basic sanity checks
-    if (metrics.optimalityScore < 0 || metrics.optimalityScore > 100)
-        return false;
-    if (metrics.predictionMetrics.accuracy < 0 ||
-        metrics.predictionMetrics.accuracy > 1)
-        return false;
-    if (metrics.perpMetrics.winRate < 0 || metrics.perpMetrics.winRate > 1)
-        return false;
-    if (metrics.timing.avgResponseTime < 0)
-        return false;
-    if (metrics.timing.maxResponseTime < 0)
-        return false;
-    return true;
-}
-/** @deprecated Use validateMetrics and metricsSanityCheck instead */
-export const MetricsValidator = {
-    validate: validateMetrics,
-    sanityCheck: metricsSanityCheck,
-};