npm - @elizaos/training - Versions diffs - 2.0.0-alpha.13 → 2.0.0-alpha.15 - Mend

@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/package.json +2 -2
package/research-output/training-runs/training-run-1773726941205.json +38 -0
package/scripts/rank_trajectories.ts +0 -1
package/scripts/run_task_benchmark.ts +4 -11
package/src/adapter.ts +96 -49
package/src/archetypes/ArchetypeConfigService.ts +188 -185
package/src/archetypes/derive-archetype.ts +47 -47
package/src/archetypes/index.ts +2 -2
package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
package/src/benchmark/BenchmarkDataViewer.ts +32 -30
package/src/benchmark/BenchmarkHistoryService.ts +13 -12
package/src/benchmark/BenchmarkRunner.ts +87 -83
package/src/benchmark/BenchmarkValidator.ts +48 -46
package/src/benchmark/FastEvalRunner.ts +17 -16
package/src/benchmark/MetricsValidator.ts +20 -21
package/src/benchmark/MetricsVisualizer.ts +92 -85
package/src/benchmark/ModelBenchmarkService.ts +90 -82
package/src/benchmark/ModelRegistry.ts +44 -44
package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
package/src/benchmark/SimulationA2AInterface.ts +118 -118
package/src/benchmark/SimulationEngine.ts +51 -51
package/src/benchmark/TaskRunner.ts +87 -79
package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
package/src/benchmark/index.ts +27 -27
package/src/benchmark/parseSimulationMetrics.ts +32 -32
package/src/benchmark/simulation-types.ts +10 -10
package/src/dependencies.ts +34 -34
package/src/generation/TrajectoryGenerator.ts +39 -37
package/src/generation/index.ts +1 -1
package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
package/src/huggingface/index.ts +6 -6
package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
package/src/index.ts +27 -27
package/src/init-training.ts +6 -6
package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
package/src/metrics/index.ts +2 -2
package/src/rubrics/__tests__/index.test.ts +73 -73
package/src/rubrics/ass-kisser.ts +6 -6
package/src/rubrics/degen.ts +6 -6
package/src/rubrics/goody-twoshoes.ts +6 -6
package/src/rubrics/index.ts +50 -50
package/src/rubrics/information-trader.ts +6 -6
package/src/rubrics/infosec.ts +6 -6
package/src/rubrics/liar.ts +6 -6
package/src/rubrics/perps-trader.ts +6 -6
package/src/rubrics/researcher.ts +6 -6
package/src/rubrics/scammer.ts +6 -6
package/src/rubrics/social-butterfly.ts +7 -7
package/src/rubrics/super-predictor.ts +6 -6
package/src/rubrics/trader.ts +5 -5
package/src/scoring/ArchetypeScoringService.ts +56 -54
package/src/scoring/JudgePromptBuilder.ts +96 -96
package/src/scoring/LLMJudgeCache.ts +26 -23
package/src/scoring/index.ts +3 -3
package/src/training/AutomationPipeline.ts +149 -140
package/src/training/BenchmarkService.ts +49 -45
package/src/training/ConfigValidator.ts +38 -32
package/src/training/MarketOutcomesTracker.ts +22 -12
package/src/training/ModelDeployer.ts +15 -15
package/src/training/ModelFetcher.ts +7 -7
package/src/training/ModelSelectionService.ts +32 -32
package/src/training/ModelUsageVerifier.ts +31 -24
package/src/training/MultiModelOrchestrator.ts +44 -44
package/src/training/RLModelConfig.ts +57 -57
package/src/training/RewardBackpropagationService.ts +18 -17
package/src/training/RulerScoringService.ts +73 -72
package/src/training/TrainingMonitor.ts +29 -29
package/src/training/TrajectoryRecorder.ts +25 -27
package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
package/src/training/index.ts +36 -36
package/src/training/logRLConfig.ts +7 -7
package/src/training/pipeline.ts +13 -16
package/src/training/storage/ModelStorageService.ts +32 -32
package/src/training/storage/TrainingDataArchiver.ts +21 -21
package/src/training/storage/index.ts +2 -2
package/src/training/types.ts +6 -6
package/src/training/window-utils.ts +14 -14
package/src/utils/index.ts +7 -7
package/src/utils/logger.ts +5 -5
package/src/utils/snowflake.ts +1 -1
package/src/utils/synthetic-detector.ts +7 -7

package/src/benchmark/RulerBenchmarkIntegration.ts CHANGED Viewed

@@ -5,11 +5,11 @@
  * This allows RULER to evaluate agent trajectories against known benchmark outcomes.
  */
-import type { MarketOutcomes } from '../training/RulerScoringService';
+import type { MarketOutcomes } from "../training/RulerScoringService";
 import type {
   BenchmarkGameSnapshot,
   GroundTruth,
-} from './BenchmarkDataGenerator';
+} from "./BenchmarkDataGenerator";
 /**
  * Extract market outcomes from benchmark ground truth for RULER scoring
@@ -28,15 +28,15 @@ import type {
  * ```
  */
 export function extractMarketOutcomesFromBenchmark(
-  snapshot: BenchmarkGameSnapshot
+  snapshot: BenchmarkGameSnapshot,
 ): MarketOutcomes {
   const gt = snapshot.groundTruth;
   // Extract prediction market outcomes
-  const predictions: Array<{ marketId: string; outcome: 'YES' | 'NO' }> =
+  const predictions: Array<{ marketId: string; outcome: "YES" | "NO" }> =
     Object.entries(gt.marketOutcomes).map(([marketId, outcome]) => ({
       marketId,
-      outcome: outcome ? 'YES' : 'NO',
+      outcome: outcome ? "YES" : "NO",
     }));
   // Extract stock/perpetual outcomes from price history
@@ -77,10 +77,10 @@ export function extractMarketOutcomesFromBenchmark(
  */
 export function getHiddenFactsForTick(
   snapshot: BenchmarkGameSnapshot,
-  tickNumber: number
-): GroundTruth['hiddenFacts'] {
+  tickNumber: number,
+): GroundTruth["hiddenFacts"] {
   return (snapshot.groundTruth.hiddenFacts || []).filter(
-    (f) => f.tick === tickNumber
+    (f) => f.tick === tickNumber,
   );
 }
@@ -96,10 +96,10 @@ export function getHiddenFactsForTick(
  */
 export function getHiddenEventsForTick(
   snapshot: BenchmarkGameSnapshot,
-  tickNumber: number
-): GroundTruth['hiddenEvents'] {
+  tickNumber: number,
+): GroundTruth["hiddenEvents"] {
   return (snapshot.groundTruth.hiddenEvents || []).filter(
-    (e) => e.tick === tickNumber
+    (e) => e.tick === tickNumber,
   );
 }
@@ -119,7 +119,7 @@ export function wasDecisionOptimal(
   snapshot: BenchmarkGameSnapshot,
   tickNumber: number,
   actionType: string,
-  target: string
+  target: string,
 ): boolean {
   const optimalActions = snapshot.groundTruth.optimalActions;
@@ -129,7 +129,7 @@ export function wasDecisionOptimal(
     (a) =>
       Math.abs(a.tick - tickNumber) <= window &&
       a.type === actionType &&
-      a.target === target
+      a.target === target,
   );
   return relevantActions.length > 0;
@@ -145,8 +145,8 @@ export function wasDecisionOptimal(
  * @returns Object containing true facts about the world state
  */
 export function getTrueFacts(
-  snapshot: BenchmarkGameSnapshot
-): GroundTruth['trueFacts'] {
+  snapshot: BenchmarkGameSnapshot,
+): GroundTruth["trueFacts"] {
   return snapshot.groundTruth.trueFacts || {};
 }
@@ -166,10 +166,10 @@ export function getTrueFacts(
  */
 export function createRulerContext(snapshot: BenchmarkGameSnapshot): {
   marketOutcomes: MarketOutcomes;
-  trueFacts: GroundTruth['trueFacts'];
-  hiddenFacts: GroundTruth['hiddenFacts'];
-  hiddenEvents: GroundTruth['hiddenEvents'];
-  optimalActions: GroundTruth['optimalActions'];
+  trueFacts: GroundTruth["trueFacts"];
+  hiddenFacts: GroundTruth["hiddenFacts"];
+  hiddenEvents: GroundTruth["hiddenEvents"];
+  optimalActions: GroundTruth["optimalActions"];
 } {
   return {
     marketOutcomes: extractMarketOutcomesFromBenchmark(snapshot),
@@ -201,14 +201,14 @@ export function scoreActionAgainstGroundTruth(
   snapshot: BenchmarkGameSnapshot,
   tickNumber: number,
   actionType: string,
-  target: string
+  target: string,
 ): number {
   // Check if action was optimal
   const wasOptimal = wasDecisionOptimal(
     snapshot,
     tickNumber,
     actionType,
-    target
+    target,
   );
   if (wasOptimal) {
@@ -220,9 +220,9 @@ export function scoreActionAgainstGroundTruth(
   const relevantFacts = hiddenFacts.filter(
     (f) =>
       f.value &&
-      typeof f.value === 'object' &&
-      'marketId' in f.value &&
-      (f.value as { marketId: string }).marketId === target
+      typeof f.value === "object" &&
+      "marketId" in f.value &&
+      (f.value as { marketId: string }).marketId === target,
   );
   if (relevantFacts.length > 0) {