npm - @elizaos/training - Versions diffs - 2.0.0-alpha.77 → 2.0.0-alpha.78 - Mend

@elizaos/training 2.0.0-alpha.77 → 2.0.0-alpha.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (89) hide show

package/package.json +2 -2
package/.turbo/turbo-lint.log +0 -3
package/.turbo/turbo-typecheck.log +0 -1
package/dist/.tsbuildinfo +0 -1
package/dist/adapter.js +0 -59
package/dist/archetypes/ArchetypeConfigService.js +0 -510
package/dist/archetypes/derive-archetype.js +0 -196
package/dist/archetypes/index.js +0 -7
package/dist/benchmark/ArchetypeMatchupBenchmark.js +0 -547
package/dist/benchmark/BenchmarkChartGenerator.js +0 -632
package/dist/benchmark/BenchmarkDataGenerator.js +0 -825
package/dist/benchmark/BenchmarkDataViewer.js +0 -197
package/dist/benchmark/BenchmarkHistoryService.js +0 -135
package/dist/benchmark/BenchmarkRunner.js +0 -483
package/dist/benchmark/BenchmarkValidator.js +0 -158
package/dist/benchmark/FastEvalRunner.js +0 -133
package/dist/benchmark/MetricsValidator.js +0 -104
package/dist/benchmark/MetricsVisualizer.js +0 -775
package/dist/benchmark/ModelBenchmarkService.js +0 -433
package/dist/benchmark/ModelRegistry.js +0 -122
package/dist/benchmark/RulerBenchmarkIntegration.js +0 -168
package/dist/benchmark/SimulationA2AInterface.js +0 -683
package/dist/benchmark/SimulationEngine.js +0 -522
package/dist/benchmark/TaskRunner.js +0 -60
package/dist/benchmark/__tests__/BenchmarkRunner.test.js +0 -409
package/dist/benchmark/__tests__/HeadToHead.test.js +0 -105
package/dist/benchmark/index.js +0 -23
package/dist/benchmark/parseSimulationMetrics.js +0 -86
package/dist/benchmark/simulation-types.js +0 -1
package/dist/dependencies.js +0 -197
package/dist/generation/TrajectoryGenerator.js +0 -244
package/dist/generation/index.js +0 -6
package/dist/huggingface/HuggingFaceDatasetUploader.js +0 -463
package/dist/huggingface/HuggingFaceIntegrationService.js +0 -272
package/dist/huggingface/HuggingFaceModelUploader.js +0 -385
package/dist/huggingface/index.js +0 -9
package/dist/huggingface/shared/HuggingFaceUploadUtil.js +0 -144
package/dist/index.js +0 -41
package/dist/init-training.js +0 -43
package/dist/metrics/TrajectoryMetricsExtractor.js +0 -523
package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +0 -628
package/dist/metrics/index.js +0 -7
package/dist/metrics/types.js +0 -21
package/dist/rubrics/__tests__/index.test.js +0 -150
package/dist/rubrics/ass-kisser.js +0 -83
package/dist/rubrics/degen.js +0 -78
package/dist/rubrics/goody-twoshoes.js +0 -82
package/dist/rubrics/index.js +0 -184
package/dist/rubrics/information-trader.js +0 -82
package/dist/rubrics/infosec.js +0 -99
package/dist/rubrics/liar.js +0 -102
package/dist/rubrics/perps-trader.js +0 -85
package/dist/rubrics/researcher.js +0 -79
package/dist/rubrics/scammer.js +0 -80
package/dist/rubrics/social-butterfly.js +0 -71
package/dist/rubrics/super-predictor.js +0 -95
package/dist/rubrics/trader.js +0 -65
package/dist/scoring/ArchetypeScoringService.js +0 -301
package/dist/scoring/JudgePromptBuilder.js +0 -401
package/dist/scoring/LLMJudgeCache.js +0 -263
package/dist/scoring/index.js +0 -8
package/dist/training/AutomationPipeline.js +0 -714
package/dist/training/BenchmarkService.js +0 -370
package/dist/training/ConfigValidator.js +0 -153
package/dist/training/MarketOutcomesTracker.js +0 -142
package/dist/training/ModelDeployer.js +0 -128
package/dist/training/ModelFetcher.js +0 -48
package/dist/training/ModelSelectionService.js +0 -248
package/dist/training/ModelUsageVerifier.js +0 -106
package/dist/training/MultiModelOrchestrator.js +0 -349
package/dist/training/RLModelConfig.js +0 -295
package/dist/training/RewardBackpropagationService.js +0 -117
package/dist/training/RulerScoringService.js +0 -450
package/dist/training/TrainingMonitor.js +0 -108
package/dist/training/TrajectoryRecorder.js +0 -281
package/dist/training/__tests__/TrajectoryRecorder.test.js +0 -363
package/dist/training/index.js +0 -30
package/dist/training/logRLConfig.js +0 -29
package/dist/training/pipeline.js +0 -80
package/dist/training/storage/ModelStorageService.js +0 -190
package/dist/training/storage/TrainingDataArchiver.js +0 -136
package/dist/training/storage/index.js +0 -7
package/dist/training/types.js +0 -6
package/dist/training/window-utils.js +0 -100
package/dist/utils/index.js +0 -73
package/dist/utils/logger.js +0 -55
package/dist/utils/snowflake.js +0 -15
package/dist/utils/synthetic-detector.js +0 -67
package/vitest.config.ts +0 -8

package/dist/benchmark/BenchmarkDataViewer.js DELETED Viewed

@@ -1,197 +0,0 @@
-/**
- * Benchmark Data Viewer
- *
- * Provides utilities to view and inspect benchmark data.
- * Useful for validation and understanding benchmark structure.
- */
-import { promises as fs } from "node:fs";
-import * as BenchmarkValidator from "./BenchmarkValidator";
-function analyzeTicks(ticks) {
-    const eventTypes = {};
-    let withEvents = 0;
-    for (const tick of ticks) {
-        if (tick.events.length > 0) {
-            withEvents++;
-        }
-        for (const event of tick.events) {
-            eventTypes[event.type] = (eventTypes[event.type] || 0) + 1;
-        }
-    }
-    return {
-        total: ticks.length,
-        withEvents,
-        eventTypes,
-    };
-}
-function analyzeGroundTruth(groundTruth) {
-    return {
-        marketOutcomes: Object.keys(groundTruth.marketOutcomes).length,
-        priceHistory: Object.fromEntries(Object.entries(groundTruth.priceHistory).map(([ticker, history]) => [
-            ticker,
-            history.length,
-        ])),
-        optimalActions: groundTruth.optimalActions.length,
-        socialOpportunities: groundTruth.socialOpportunities.length,
-        hiddenFacts: groundTruth.hiddenFacts?.length || 0,
-        hiddenEvents: groundTruth.hiddenEvents?.length || 0,
-        trueFacts: Object.keys(groundTruth.trueFacts || {}),
-    };
-}
-/**
- * Load and view a benchmark file
- */
-export async function viewBenchmark(filePath, options = {}) {
-    const data = await fs.readFile(filePath, "utf-8");
-    const snapshot = JSON.parse(data);
-    // Validate
-    const validation = BenchmarkValidator.validate(snapshot);
-    // Build view
-    const view = {
-        id: snapshot.id,
-        version: snapshot.version,
-        createdAt: snapshot.createdAt,
-        duration: snapshot.duration,
-        tickInterval: snapshot.tickInterval,
-        initialState: {
-            predictionMarkets: snapshot.initialState.predictionMarkets.length,
-            perpetualMarkets: snapshot.initialState.perpetualMarkets.length,
-            agents: snapshot.initialState.agents.length,
-            posts: snapshot.initialState.posts?.length || 0,
-            groupChats: snapshot.initialState.groupChats?.length || 0,
-        },
-        ticks: analyzeTicks(snapshot.ticks),
-        validation,
-    };
-    if (options.showGroundTruth || options.verbose) {
-        view.groundTruth = analyzeGroundTruth(snapshot.groundTruth);
-    }
-    return view;
-}
-/**
- * Print view to console
- */
-export function printBenchmarkView(view, options = {}) {
-    console.log("\n📊 Benchmark Data View\n");
-    console.log(`ID: ${view.id}`);
-    console.log(`Version: ${view.version}`);
-    console.log(`Created: ${new Date(view.createdAt).toISOString()}`);
-    console.log(`Duration: ${(view.duration / 60).toFixed(1)} minutes`);
-    console.log(`Tick Interval: ${view.tickInterval}s`);
-    console.log("\n📈 Initial State:");
-    console.log(`  Prediction Markets: ${view.initialState.predictionMarkets}`);
-    console.log(`  Perpetual Markets: ${view.initialState.perpetualMarkets}`);
-    console.log(`  Agents: ${view.initialState.agents}`);
-    console.log(`  Posts: ${view.initialState.posts}`);
-    console.log(`  Group Chats: ${view.initialState.groupChats}`);
-    console.log("\n⏱️  Ticks:");
-    console.log(`  Total: ${view.ticks.total}`);
-    console.log(`  With Events: ${view.ticks.withEvents}`);
-    if (options.verbose) {
-        console.log(`  Event Types:`);
-        for (const [type, count] of Object.entries(view.ticks.eventTypes)) {
-            console.log(`    ${type}: ${count}`);
-        }
-    }
-    if (view.groundTruth) {
-        console.log("\n🎯 Ground Truth:");
-        console.log(`  Market Outcomes: ${view.groundTruth.marketOutcomes}`);
-        console.log(`  Price History:`);
-        for (const [ticker, count] of Object.entries(view.groundTruth.priceHistory)) {
-            console.log(`    ${ticker}: ${count} ticks`);
-        }
-        console.log(`  Optimal Actions: ${view.groundTruth.optimalActions}`);
-        console.log(`  Social Opportunities: ${view.groundTruth.socialOpportunities}`);
-        if (options.showHidden) {
-            console.log(`  Hidden Facts: ${view.groundTruth.hiddenFacts}`);
-            console.log(`  Hidden Events: ${view.groundTruth.hiddenEvents}`);
-            console.log(`  True Facts: ${view.groundTruth.trueFacts.join(", ")}`);
-        }
-    }
-    console.log("\n✅ Validation:");
-    console.log(`  Valid: ${view.validation.valid ? "✅" : "❌"}`);
-    if (view.validation.errors.length > 0) {
-        console.log(`  Errors: ${view.validation.errors.length}`);
-        if (options.verbose) {
-            for (const error of view.validation.errors) {
-                console.log(`    ❌ ${error}`);
-            }
-        }
-    }
-    if (view.validation.warnings.length > 0) {
-        console.log(`  Warnings: ${view.validation.warnings.length}`);
-        if (options.verbose) {
-            for (const warning of view.validation.warnings) {
-                console.log(`    ⚠️  ${warning}`);
-            }
-        }
-    }
-    console.log("");
-}
-/**
- * Get tick details
- */
-export function getTickDetails(snapshot, tickNumber) {
-    const tick = snapshot.ticks[tickNumber] || null;
-    if (!tick) {
-        return { tick: null, state: null, events: [] };
-    }
-    return {
-        tick,
-        state: tick.state,
-        events: tick.events.map((e) => ({
-            type: e.type,
-            data: e.data,
-        })),
-    };
-}
-/**
- * Get ground truth for a specific tick
- */
-export function getGroundTruthForTick(snapshot, tickNumber) {
-    const gt = snapshot.groundTruth;
-    return {
-        hiddenFacts: (gt.hiddenFacts || [])
-            .filter((f) => f.tick === tickNumber)
-            .map((f) => ({ fact: f.fact, category: f.category })),
-        hiddenEvents: (gt.hiddenEvents || [])
-            .filter((e) => e.tick === tickNumber)
-            .map((e) => ({ type: e.type, description: e.description })),
-        marketOutcomes: gt.marketOutcomes,
-    };
-}
-/**
- * Check if agent can access hidden facts (should always be false)
- */
-export function verifyAgentCannotAccessHiddenFacts(snapshot) {
-    // Agents can only access game state via SimulationA2AInterface
-    // Ground truth is stored separately and not exposed
-    // This is a verification check
-    const state = snapshot.initialState;
-    const hasGroundTruth = !!snapshot.groundTruth;
-    const hasHiddenFacts = !!snapshot.groundTruth?.hiddenFacts?.length;
-    // Check if ground truth is accidentally in state
-    const stateKeys = Object.keys(state);
-    const hasGroundTruthInState = stateKeys.includes("groundTruth") ||
-        stateKeys.includes("hiddenFacts") ||
-        stateKeys.includes("hiddenEvents");
-    if (hasGroundTruthInState) {
-        return {
-            canAccess: true,
-            reason: "Ground truth found in game state (security issue!)",
-        };
-    }
-    return {
-        canAccess: false,
-        reason: hasGroundTruth && hasHiddenFacts
-            ? "Ground truth exists but is properly isolated from game state"
-            : "No ground truth data found",
-    };
-}
-/** @deprecated Use viewBenchmark, printBenchmarkView, etc. instead */
-export const BenchmarkDataViewer = {
-    view: viewBenchmark,
-    print: printBenchmarkView,
-    getTickDetails,
-    getGroundTruthForTick,
-    verifyAgentCannotAccessHiddenFacts,
-};

package/dist/benchmark/BenchmarkHistoryService.js DELETED Viewed

@@ -1,135 +0,0 @@
-/**
- * Benchmark History Service
- *
- * Persists benchmark results to the database for historical tracking and analysis.
- */
-import { getTrainingDataAdapter, } from "../adapter";
-import { logger } from "../utils/logger";
-import { generateSnowflakeId } from "../utils/snowflake";
-/**
- * Service for managing benchmark result history
- */
-// biome-ignore lint/complexity/noStaticOnlyClass: Service namespace - methods are logically grouped
-export class BenchmarkHistoryService {
-    /**
-     * Save a benchmark result to the database
-     */
-    static async saveResult(input) {
-        const id = await generateSnowflakeId();
-        const now = new Date();
-        const insertData = {
-            id,
-            modelId: input.modelId,
-            benchmarkId: input.benchmarkId,
-            benchmarkPath: input.benchmarkPath,
-            runAt: now,
-            totalPnl: input.metrics.totalPnl,
-            predictionAccuracy: input.metrics.predictionMetrics.accuracy,
-            perpWinRate: input.metrics.perpMetrics.winRate,
-            optimalityScore: input.metrics.optimalityScore,
-            detailedMetrics: JSON.parse(JSON.stringify(input.metrics)),
-            baselinePnlDelta: input.baselineComparison?.pnlDelta ?? null,
-            baselineAccuracyDelta: input.baselineComparison?.accuracyDelta ?? null,
-            improved: input.baselineComparison?.improved ?? null,
-            duration: input.duration,
-        };
-        await getTrainingDataAdapter().insertBenchmarkResult(insertData);
-        logger.info("Saved benchmark result", {
-            id,
-            modelId: input.modelId,
-            benchmarkId: input.benchmarkId,
-            totalPnl: input.metrics.totalPnl,
-        });
-        return { ...insertData, createdAt: now };
-    }
-    /**
-     * Get benchmark results by query
-     */
-    static async getResults(query) {
-        return getTrainingDataAdapter().queryBenchmarkResults({
-            modelId: query.modelId,
-            benchmarkId: query.benchmarkId,
-            startDate: query.startDate,
-            endDate: query.endDate,
-            limit: query.limit ?? 100,
-        });
-    }
-    /**
-     * Get the latest result for a model
-     */
-    static async getLatestResult(modelId) {
-        const results = await getTrainingDataAdapter().queryBenchmarkResults({
-            modelId,
-            limit: 1,
-        });
-        return results[0] ?? null;
-    }
-    /**
-     * Get trend data for a model
-     */
-    static async getTrendData(modelId, limit = 20) {
-        const results = await getTrainingDataAdapter().queryBenchmarkResults({
-            modelId,
-            limit,
-        });
-        // queryBenchmarkResults returns desc by runAt, reverse for chronological
-        const chronological = results.reverse();
-        return {
-            modelId,
-            dates: chronological.map((r) => r.runAt),
-            pnlHistory: chronological.map((r) => r.totalPnl),
-            accuracyHistory: chronological.map((r) => r.predictionAccuracy),
-            optimalityHistory: chronological.map((r) => r.optimalityScore),
-        };
-    }
-    /**
-     * Get comparison data for multiple models
-     */
-    static async getModelComparison(modelIds, benchmarkId) {
-        const adapter = getTrainingDataAdapter();
-        const comparison = new Map();
-        for (const modelId of modelIds) {
-            const results = await adapter.queryBenchmarkResults({
-                modelId,
-                benchmarkId,
-                limit: 10,
-            });
-            comparison.set(modelId, results);
-        }
-        return comparison;
-    }
-    /**
-     * Get summary statistics for all models
-     */
-    static async getModelSummary() {
-        return getTrainingDataAdapter().getBenchmarkModelSummary();
-    }
-    /**
-     * Check if a model improved vs baseline
-     */
-    static async checkImprovement(modelId, baselineModelId, benchmarkId) {
-        const adapter = getTrainingDataAdapter();
-        const modelResults = await adapter.queryBenchmarkResults({
-            modelId,
-            benchmarkId,
-            limit: 1,
-        });
-        const baselineResults = await adapter.queryBenchmarkResults({
-            modelId: baselineModelId,
-            benchmarkId,
-            limit: 1,
-        });
-        const modelResult = modelResults[0];
-        const baselineResult = baselineResults[0];
-        if (!modelResult || !baselineResult) {
-            return null;
-        }
-        const delta = modelResult.totalPnl - baselineResult.totalPnl;
-        return {
-            improved: delta > 0,
-            modelPnl: modelResult.totalPnl,
-            baselinePnl: baselineResult.totalPnl,
-            delta,
-        };
-    }
-}