@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/Dockerfile +75 -0
  2. package/LICENSE +21 -0
  3. package/Makefile +374 -0
  4. package/README.md +346 -0
  5. package/config/rubrics.json +137 -0
  6. package/docker-compose.test.yml +57 -0
  7. package/package.json +57 -0
  8. package/python/config/babylon_atropos.yaml +90 -0
  9. package/python/config/profiles/12gb.json +11 -0
  10. package/python/config/profiles/16gb.json +10 -0
  11. package/python/config/profiles/24gb.json +10 -0
  12. package/python/config/profiles/48gb.json +10 -0
  13. package/python/config/profiles/cpu.json +11 -0
  14. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  15. package/python/config/profiles/l40-2gpu.json +22 -0
  16. package/python/config/profiles/l40-4gpu.json +21 -0
  17. package/python/config/profiles/l40.json +17 -0
  18. package/python/config/tinker_training.yaml +143 -0
  19. package/python/curriculum_state.json +165 -0
  20. package/python/env.template +86 -0
  21. package/python/env.training.template +46 -0
  22. package/python/pyproject.toml +41 -0
  23. package/python/requirements-ci.txt +31 -0
  24. package/python/requirements.txt +87 -0
  25. package/python/scripts/__init__.py +4 -0
  26. package/python/scripts/benchmark_should_respond.py +190 -0
  27. package/python/scripts/debug_inference.py +62 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/optimize_prompt_grpo.py +269 -0
  36. package/python/scripts/run_ab_test.py +143 -0
  37. package/python/scripts/run_full_pipeline.py +544 -0
  38. package/python/scripts/run_tinker_training.py +192 -0
  39. package/python/scripts/run_training.py +914 -0
  40. package/python/scripts/test_generation.py +29 -0
  41. package/python/scripts/test_judge.py +155 -0
  42. package/python/scripts/test_pipeline.py +356 -0
  43. package/python/scripts/test_trained_model.py +380 -0
  44. package/python/scripts/train_grpo.py +360 -0
  45. package/python/scripts/train_jsonl.py +223 -0
  46. package/python/scripts/train_local.py +528 -0
  47. package/python/setup.py +20 -0
  48. package/python/src/__init__.py +190 -0
  49. package/python/src/data_bridge/__init__.py +24 -0
  50. package/python/src/data_bridge/converter.py +435 -0
  51. package/python/src/data_bridge/reader.py +393 -0
  52. package/python/src/models.py +283 -0
  53. package/python/src/training/__init__.py +605 -0
  54. package/python/src/training/ab_testing.py +404 -0
  55. package/python/src/training/action_executor.py +621 -0
  56. package/python/src/training/archetype_trainer.py +347 -0
  57. package/python/src/training/atropos_trainer.py +980 -0
  58. package/python/src/training/babylon_env.py +1254 -0
  59. package/python/src/training/error_recovery.py +647 -0
  60. package/python/src/training/evaluation.py +856 -0
  61. package/python/src/training/fast_simulator.py +880 -0
  62. package/python/src/training/format_validator.py +584 -0
  63. package/python/src/training/hybrid_env.py +522 -0
  64. package/python/src/training/kl_controller.py +628 -0
  65. package/python/src/training/multi_prompt_dataset.py +883 -0
  66. package/python/src/training/multi_turn.py +656 -0
  67. package/python/src/training/online_env.py +1084 -0
  68. package/python/src/training/quality_scorer.py +391 -0
  69. package/python/src/training/quality_utils.py +633 -0
  70. package/python/src/training/rewards.py +1344 -0
  71. package/python/src/training/rlaif_env.py +17 -0
  72. package/python/src/training/rollout_generator.py +502 -0
  73. package/python/src/training/rubric_loader.py +198 -0
  74. package/python/src/training/scenario_pool.py +1072 -0
  75. package/python/src/training/schemas.py +481 -0
  76. package/python/src/training/service_manager.py +552 -0
  77. package/python/src/training/simulation_bridge.py +535 -0
  78. package/python/src/training/tick_reward_attribution.py +399 -0
  79. package/python/src/training/tinker_client.py +575 -0
  80. package/python/src/training/tinker_trainer.py +646 -0
  81. package/python/src/training/tokenization_utils.py +402 -0
  82. package/python/tests/e2e/__init__.py +13 -0
  83. package/python/tests/e2e/conftest.py +258 -0
  84. package/python/tests/e2e/test_full_pipeline.py +643 -0
  85. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  86. package/python/tests/integration/__init__.py +12 -0
  87. package/python/tests/integration/conftest.py +383 -0
  88. package/python/tests/integration/test_db_integration.py +649 -0
  89. package/python/tests/integration/test_json_mode_integration.py +554 -0
  90. package/python/tests/test_action_executor.py +594 -0
  91. package/python/tests/test_archetype_scoring.py +1027 -0
  92. package/python/tests/test_atropos_integration.py +360 -0
  93. package/python/tests/test_evaluation.py +727 -0
  94. package/python/tests/test_format_validator.py +486 -0
  95. package/python/tests/test_kl_controller.py +432 -0
  96. package/python/tests/test_lr_scheduler.py +579 -0
  97. package/python/tests/test_multi_turn.py +590 -0
  98. package/python/tests/test_online_env.py +519 -0
  99. package/python/tests/test_quality_scorer.py +474 -0
  100. package/python/tests/test_scenario_pool.py +735 -0
  101. package/python/tests/test_service_manager.py +585 -0
  102. package/python/tests/test_simulation_rollout.py +581 -0
  103. package/python/tests/test_tokenization_utils.py +501 -0
  104. package/python/tests/test_training_orchestrator.py +497 -0
  105. package/python/tests/test_training_output_structure.py +661 -0
  106. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  107. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  108. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  109. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  110. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  111. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  112. package/research-output/training-runs/training-run-1771276293257.json +38 -0
  113. package/research-output/training-runs/training-run-1771276389280.json +38 -0
  114. package/research-output/training-runs/training-run-1771276502776.json +38 -0
  115. package/research-output/training-runs/training-run-1771277340748.json +38 -0
  116. package/research-output/training-runs/training-run-1773013658993.json +38 -0
  117. package/research-output/training-runs/training-run-1773013861014.json +38 -0
  118. package/research-output/training-runs/training-run-1773014215983.json +38 -0
  119. package/scripts/assess-training-data.ts +422 -0
  120. package/scripts/e2e-training-test.ts +550 -0
  121. package/scripts/export-rubrics.ts +64 -0
  122. package/scripts/generate-research-report.ts +1523 -0
  123. package/scripts/generate_dataset.sh +173 -0
  124. package/scripts/generate_should_respond.ts +267 -0
  125. package/scripts/generate_should_respond_dataset.ts +162 -0
  126. package/scripts/json-mode-benchmark.ts +399 -0
  127. package/scripts/rank_trajectories.ts +207 -0
  128. package/scripts/real-archetype-benchmark.ts +210 -0
  129. package/scripts/run-baseline-comparison.ts +116 -0
  130. package/scripts/run-full-pipeline.ts +272 -0
  131. package/scripts/run_rlaif_loop.ts +78 -0
  132. package/scripts/run_task_benchmark.ts +247 -0
  133. package/scripts/runpod_setup.sh +137 -0
  134. package/scripts/runpod_validate.sh +147 -0
  135. package/scripts/test-model-in-game.ts +955 -0
  136. package/scripts/test-scoring.ts +73 -0
  137. package/scripts/test-trained-model.ts +209 -0
  138. package/scripts/train-and-test.ts +824 -0
  139. package/scripts/verify-final.ts +118 -0
  140. package/src/adapter.ts +516 -0
  141. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  142. package/src/archetypes/derive-archetype.ts +249 -0
  143. package/src/archetypes/index.ts +22 -0
  144. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  145. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  146. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  147. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  148. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  149. package/src/benchmark/BenchmarkRunner.ts +685 -0
  150. package/src/benchmark/BenchmarkValidator.ts +204 -0
  151. package/src/benchmark/FastEvalRunner.ts +225 -0
  152. package/src/benchmark/MetricsValidator.ts +165 -0
  153. package/src/benchmark/MetricsVisualizer.ts +909 -0
  154. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  155. package/src/benchmark/ModelRegistry.ts +158 -0
  156. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  157. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  158. package/src/benchmark/SimulationEngine.ts +832 -0
  159. package/src/benchmark/TaskRunner.ts +94 -0
  160. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  161. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  162. package/src/benchmark/index.ts +91 -0
  163. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  164. package/src/benchmark/simulation-types.ts +78 -0
  165. package/src/dependencies.ts +475 -0
  166. package/src/generation/TrajectoryGenerator.ts +387 -0
  167. package/src/generation/index.ts +12 -0
  168. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  169. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  170. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  171. package/src/huggingface/index.ts +27 -0
  172. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  173. package/src/index.ts +102 -0
  174. package/src/init-training.ts +53 -0
  175. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  176. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  177. package/src/metrics/index.ts +8 -0
  178. package/src/metrics/types.ts +200 -0
  179. package/src/rubrics/__tests__/index.test.ts +184 -0
  180. package/src/rubrics/ass-kisser.ts +85 -0
  181. package/src/rubrics/degen.ts +80 -0
  182. package/src/rubrics/goody-twoshoes.ts +84 -0
  183. package/src/rubrics/index.ts +236 -0
  184. package/src/rubrics/information-trader.ts +84 -0
  185. package/src/rubrics/infosec.ts +101 -0
  186. package/src/rubrics/liar.ts +104 -0
  187. package/src/rubrics/perps-trader.ts +87 -0
  188. package/src/rubrics/researcher.ts +81 -0
  189. package/src/rubrics/scammer.ts +82 -0
  190. package/src/rubrics/social-butterfly.ts +73 -0
  191. package/src/rubrics/super-predictor.ts +97 -0
  192. package/src/rubrics/trader.ts +67 -0
  193. package/src/scoring/ArchetypeScoringService.ts +486 -0
  194. package/src/scoring/JudgePromptBuilder.ts +556 -0
  195. package/src/scoring/LLMJudgeCache.ts +401 -0
  196. package/src/scoring/index.ts +9 -0
  197. package/src/training/AutomationPipeline.ts +916 -0
  198. package/src/training/BenchmarkService.ts +518 -0
  199. package/src/training/ConfigValidator.ts +220 -0
  200. package/src/training/MarketOutcomesTracker.ts +187 -0
  201. package/src/training/ModelDeployer.ts +186 -0
  202. package/src/training/ModelFetcher.ts +76 -0
  203. package/src/training/ModelSelectionService.ts +341 -0
  204. package/src/training/ModelUsageVerifier.ts +160 -0
  205. package/src/training/MultiModelOrchestrator.ts +580 -0
  206. package/src/training/RLModelConfig.ts +407 -0
  207. package/src/training/RewardBackpropagationService.ts +149 -0
  208. package/src/training/RulerScoringService.ts +666 -0
  209. package/src/training/TrainingMonitor.ts +166 -0
  210. package/src/training/TrajectoryRecorder.ts +399 -0
  211. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  212. package/src/training/index.ts +100 -0
  213. package/src/training/logRLConfig.ts +34 -0
  214. package/src/training/pipeline.ts +129 -0
  215. package/src/training/storage/ModelStorageService.ts +279 -0
  216. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  217. package/src/training/storage/index.ts +17 -0
  218. package/src/training/types.ts +207 -0
  219. package/src/training/window-utils.ts +138 -0
  220. package/src/utils/index.ts +101 -0
  221. package/src/utils/logger.ts +59 -0
  222. package/src/utils/snowflake.ts +17 -0
  223. package/src/utils/synthetic-detector.ts +111 -0
  224. package/tsconfig.json +20 -0
@@ -0,0 +1,126 @@
1
+ import { describe, expect, it } from 'bun:test';
2
+ import type { BenchmarkGameSnapshot } from '../BenchmarkDataGenerator';
3
+ import { MetricsVisualizer } from '../MetricsVisualizer';
4
+ import { SimulationEngine, type SimulationResult } from '../SimulationEngine';
5
+
6
+ describe('Head-to-Head Benchmark Infrastructure', () => {
7
+ // 1. Test Simulation Engine PnL History Tracking
8
+ describe('SimulationEngine PnL History', () => {
9
+ it('should initialize with empty pnlHistory and return it after run()', async () => {
10
+ const mockSnapshot = {
11
+ id: 'test',
12
+ ticks: [],
13
+ initialState: {
14
+ predictionMarkets: [],
15
+ perpetualMarkets: [],
16
+ agents: [],
17
+ },
18
+ groundTruth: {
19
+ marketOutcomes: {},
20
+ priceHistory: {},
21
+ optimalActions: [],
22
+ },
23
+ } as unknown as BenchmarkGameSnapshot;
24
+
25
+ const engine = new SimulationEngine({
26
+ snapshot: mockSnapshot,
27
+ agentId: 'test-agent',
28
+ fastForward: true,
29
+ });
30
+
31
+ engine.initialize();
32
+ // Use public API - run() returns pnlHistory
33
+ const result = await engine.run();
34
+ expect(result.pnlHistory).toEqual([]);
35
+ });
36
+ });
37
+
38
+ // 2. Test MetricsVisualizer Logic
39
+ describe('MetricsVisualizer Comparison Logic', () => {
40
+ // Mock Result Helper
41
+ const createMockResult = (
42
+ id: string,
43
+ pnl: number,
44
+ history: number[]
45
+ ): SimulationResult => ({
46
+ id,
47
+ agentId: id,
48
+ benchmarkId: 'bench-1',
49
+ startTime: 0,
50
+ endTime: 1000,
51
+ ticksProcessed: history.length,
52
+ actions: [],
53
+ metrics: {
54
+ totalPnl: pnl,
55
+ predictionMetrics: {
56
+ accuracy: 0.5,
57
+ totalPositions: 0,
58
+ correctPredictions: 0,
59
+ incorrectPredictions: 0,
60
+ avgPnlPerPosition: 0,
61
+ },
62
+ perpMetrics: {
63
+ winRate: 0.5,
64
+ totalTrades: 0,
65
+ profitableTrades: 0,
66
+ avgPnlPerTrade: 0,
67
+ maxDrawdown: 0,
68
+ },
69
+ socialMetrics: {
70
+ postsCreated: 0,
71
+ groupsJoined: 0,
72
+ messagesReceived: 0,
73
+ reputationGained: 0,
74
+ },
75
+ timing: { totalDuration: 0, avgResponseTime: 0, maxResponseTime: 0 },
76
+ optimalityScore: 50,
77
+ },
78
+ trajectory: { states: [], actions: [], rewards: [], windowId: '' },
79
+ pnlHistory: history.map((val, idx) => ({ tick: idx, pnl: val })),
80
+ });
81
+
82
+ it('should correctly merge PnL histories of equal length', () => {
83
+ const baseline = createMockResult('baseline', 100, [10, 50, 100]);
84
+ const challenger = createMockResult('challenger', 200, [20, 100, 200]);
85
+
86
+ // Use public static method
87
+ const history = MetricsVisualizer.mergePnlHistory(baseline, challenger);
88
+
89
+ expect(history).toHaveLength(3);
90
+ expect(history[2]).toEqual({ tick: 2, baseline: 100, challenger: 200 });
91
+ });
92
+
93
+ it('should handle unequal history lengths (fill with final value)', () => {
94
+ // Baseline died early (e.g., bankruptcy or crash)
95
+ const baseline = createMockResult('baseline', -50, [10, -50]);
96
+ // Challenger kept going
97
+ const challenger = createMockResult('challenger', 100, [20, 60, 80, 100]);
98
+
99
+ const history = MetricsVisualizer.mergePnlHistory(baseline, challenger);
100
+
101
+ expect(history).toHaveLength(4); // Should match longest
102
+ // Tick 0
103
+ expect(history[0]).toEqual({ tick: 0, baseline: 10, challenger: 20 });
104
+ // Tick 1
105
+ expect(history[1]).toEqual({ tick: 1, baseline: -50, challenger: 60 });
106
+ // Tick 2 (Baseline stopped, should carry over -50)
107
+ expect(history[2]).toEqual({ tick: 2, baseline: -50, challenger: 80 });
108
+ // Tick 3
109
+ expect(history[3]).toEqual({ tick: 3, baseline: -50, challenger: 100 });
110
+ });
111
+
112
+ it('should generate ASCII chart string', () => {
113
+ const baseline = createMockResult('baseline', 100, [10, 100]);
114
+ const challenger = createMockResult('challenger', 200, [20, 200]);
115
+
116
+ const chart = MetricsVisualizer.generateAsciiComparison(
117
+ baseline,
118
+ challenger
119
+ );
120
+
121
+ expect(chart).toContain('HEAD-TO-HEAD RESULTS');
122
+ expect(chart).toContain('WINNER: Challenger');
123
+ expect(chart).toContain('Alpha Generated: +$100.00');
124
+ });
125
+ });
126
+ });
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Benchmark Module
3
+ *
4
+ * Tools for evaluating agent performance through simulation.
5
+ */
6
+
7
+ // Multi-archetype matchup benchmarking
8
+ export type {
9
+ ArchetypeVsResult,
10
+ MatchupAgent,
11
+ MatchupAgentResult,
12
+ MatchupBenchmarkConfig,
13
+ MatchupBenchmarkResult,
14
+ } from './ArchetypeMatchupBenchmark';
15
+ export {
16
+ ArchetypeMatchupBenchmark,
17
+ runQuickMatchupBenchmark,
18
+ } from './ArchetypeMatchupBenchmark';
19
+ export type {
20
+ BenchmarkHistoryEntry,
21
+ ModelComparisonData,
22
+ } from './BenchmarkChartGenerator';
23
+ export { BenchmarkChartGenerator } from './BenchmarkChartGenerator';
24
+ export type {
25
+ BenchmarkConfig,
26
+ BenchmarkGameSnapshot,
27
+ CausalEventType,
28
+ GroundTruth,
29
+ HiddenNarrativeFact,
30
+ ScheduledCausalEvent,
31
+ VolatilityBucket,
32
+ } from './BenchmarkDataGenerator';
33
+ export { BenchmarkDataGenerator, SeededRandom } from './BenchmarkDataGenerator';
34
+ export { BenchmarkDataViewer } from './BenchmarkDataViewer';
35
+ export type {
36
+ BenchmarkHistoryQuery,
37
+ BenchmarkResultInput,
38
+ BenchmarkTrendData,
39
+ } from './BenchmarkHistoryService';
40
+ export { BenchmarkHistoryService } from './BenchmarkHistoryService';
41
+ export type {
42
+ BenchmarkComparisonResult,
43
+ BenchmarkRunConfig,
44
+ } from './BenchmarkRunner';
45
+ export { BenchmarkRunner } from './BenchmarkRunner';
46
+ export * as BenchmarkValidator from './BenchmarkValidator';
47
+ export type { FastEvalConfig, FastEvalResult } from './FastEvalRunner';
48
+ export { FastEvalRunner } from './FastEvalRunner';
49
+ export { MetricsValidator } from './MetricsValidator';
50
+ export { MetricsVisualizer } from './MetricsVisualizer';
51
+ export type {
52
+ AverageMetrics,
53
+ ModelBenchmarkOptions,
54
+ ModelBenchmarkResult,
55
+ ModelComparisonResult,
56
+ } from './ModelBenchmarkService';
57
+ export { ModelBenchmarkService } from './ModelBenchmarkService';
58
+ export type { ModelConfig } from './ModelRegistry';
59
+ export {
60
+ getBaselineModels,
61
+ getModelById,
62
+ getModelByModelId,
63
+ getModelDisplayName,
64
+ getModelsByProvider,
65
+ getModelsByTier,
66
+ MODEL_REGISTRY,
67
+ validateModelId,
68
+ } from './ModelRegistry';
69
+ // Shared utilities
70
+ export {
71
+ type JsonValue,
72
+ parseSimulationMetrics,
73
+ } from './parseSimulationMetrics';
74
+ export {
75
+ createRulerContext,
76
+ extractMarketOutcomesFromBenchmark,
77
+ getHiddenEventsForTick,
78
+ getHiddenFactsForTick,
79
+ getTrueFacts,
80
+ scoreActionAgainstGroundTruth,
81
+ wasDecisionOptimal,
82
+ } from './RulerBenchmarkIntegration';
83
+ export { SimulationA2AInterface } from './SimulationA2AInterface';
84
+ export type {
85
+ SimulationConfig,
86
+ SimulationMetrics,
87
+ SimulationResult,
88
+ } from './SimulationEngine';
89
+ export { SimulationEngine } from './SimulationEngine';
90
+ export type { TaskRunnerConfig, TaskRunResult } from './TaskRunner';
91
+ export { TaskRunner } from './TaskRunner';
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Simulation Metrics Parser
3
+ *
4
+ * Shared utility for validating and parsing SimulationMetrics from JSON data.
5
+ * Used by ModelBenchmarkService and HuggingFaceModelUploader.
6
+ */
7
+
8
+ import type { SimulationMetrics } from './SimulationEngine';
9
+
10
+ /**
11
+ * JSON value type for parsing untyped data
12
+ */
13
+ export type JsonValue =
14
+ | string
15
+ | number
16
+ | boolean
17
+ | null
18
+ | undefined
19
+ | JsonValue[]
20
+ | { [key: string]: JsonValue };
21
+
22
+ /**
23
+ * Parse and validate SimulationMetrics from JSON data
24
+ *
25
+ * @param data - Raw JSON data to parse
26
+ * @returns Validated SimulationMetrics object
27
+ * @throws Error if data is invalid or missing required fields
28
+ */
29
+ export function parseSimulationMetrics(data: JsonValue): SimulationMetrics {
30
+ if (typeof data !== 'object' || data === null) {
31
+ throw new Error('Invalid SimulationMetrics: expected object');
32
+ }
33
+
34
+ const metrics = data as Record<string, JsonValue>;
35
+
36
+ // Validate required fields
37
+ if (typeof metrics.totalPnl !== 'number') {
38
+ throw new Error('Invalid SimulationMetrics: totalPnl must be a number');
39
+ }
40
+
41
+ if (
42
+ typeof metrics.predictionMetrics !== 'object' ||
43
+ metrics.predictionMetrics === null
44
+ ) {
45
+ throw new Error(
46
+ 'Invalid SimulationMetrics: predictionMetrics must be an object'
47
+ );
48
+ }
49
+
50
+ if (typeof metrics.perpMetrics !== 'object' || metrics.perpMetrics === null) {
51
+ throw new Error('Invalid SimulationMetrics: perpMetrics must be an object');
52
+ }
53
+
54
+ if (typeof metrics.optimalityScore !== 'number') {
55
+ throw new Error(
56
+ 'Invalid SimulationMetrics: optimalityScore must be a number'
57
+ );
58
+ }
59
+
60
+ if (typeof metrics.timing !== 'object' || metrics.timing === null) {
61
+ throw new Error('Invalid SimulationMetrics: timing must be an object');
62
+ }
63
+
64
+ // Validate nested structures
65
+ const predictionMetrics = metrics.predictionMetrics as Record<
66
+ string,
67
+ JsonValue
68
+ >;
69
+ const perpMetrics = metrics.perpMetrics as Record<string, JsonValue>;
70
+ const timing = metrics.timing as Record<string, JsonValue>;
71
+
72
+ // Helper to safely get number or default
73
+ const getNumber = (obj: Record<string, JsonValue>, key: string): number => {
74
+ const val = obj[key];
75
+ return typeof val === 'number' ? val : 0;
76
+ };
77
+
78
+ // Parse socialMetrics if present
79
+ const socialMetricsData = metrics.socialMetrics;
80
+ const socialMetrics =
81
+ typeof socialMetricsData === 'object' && socialMetricsData !== null
82
+ ? (socialMetricsData as Record<string, JsonValue>)
83
+ : null;
84
+
85
+ return {
86
+ totalPnl: metrics.totalPnl as number,
87
+ predictionMetrics: {
88
+ totalPositions: getNumber(predictionMetrics, 'totalPositions'),
89
+ correctPredictions: getNumber(predictionMetrics, 'correctPredictions'),
90
+ incorrectPredictions: getNumber(
91
+ predictionMetrics,
92
+ 'incorrectPredictions'
93
+ ),
94
+ accuracy: getNumber(predictionMetrics, 'accuracy'),
95
+ avgPnlPerPosition: getNumber(predictionMetrics, 'avgPnlPerPosition'),
96
+ },
97
+ perpMetrics: {
98
+ totalTrades: getNumber(perpMetrics, 'totalTrades'),
99
+ profitableTrades: getNumber(perpMetrics, 'profitableTrades'),
100
+ winRate: getNumber(perpMetrics, 'winRate'),
101
+ avgPnlPerTrade: getNumber(perpMetrics, 'avgPnlPerTrade'),
102
+ maxDrawdown: getNumber(perpMetrics, 'maxDrawdown'),
103
+ },
104
+ socialMetrics: socialMetrics
105
+ ? {
106
+ postsCreated: getNumber(socialMetrics, 'postsCreated'),
107
+ groupsJoined: getNumber(socialMetrics, 'groupsJoined'),
108
+ messagesReceived: getNumber(socialMetrics, 'messagesReceived'),
109
+ reputationGained: getNumber(socialMetrics, 'reputationGained'),
110
+ }
111
+ : {
112
+ postsCreated: 0,
113
+ groupsJoined: 0,
114
+ messagesReceived: 0,
115
+ reputationGained: 0,
116
+ },
117
+ timing: {
118
+ avgResponseTime: getNumber(timing, 'avgResponseTime'),
119
+ maxResponseTime: getNumber(timing, 'maxResponseTime'),
120
+ totalDuration: getNumber(timing, 'totalDuration'),
121
+ },
122
+ optimalityScore: metrics.optimalityScore as number,
123
+ };
124
+ }
@@ -0,0 +1,78 @@
1
+ import type { JsonValue } from '../adapter';
2
+
3
+ export type AgentActionType =
4
+ | 'query_state'
5
+ | 'buy_prediction'
6
+ | 'sell_prediction'
7
+ | 'open_perp'
8
+ | 'close_perp'
9
+ | 'create_post'
10
+ | 'join_group'
11
+ | 'send_message';
12
+
13
+ export interface AgentAction {
14
+ tick: number;
15
+ timestamp: number;
16
+ type: AgentActionType;
17
+ data: Record<string, JsonValue>;
18
+ /** How long agent took to respond in milliseconds */
19
+ duration: number;
20
+ correctness?: {
21
+ /** Prediction market correctness tracking */
22
+ predictionCorrect?: boolean;
23
+ actualOutcome?: boolean;
24
+ predictedOutcome?: boolean;
25
+
26
+ /** Perpetual trade correctness tracking */
27
+ perpCorrect?: boolean;
28
+ sentimentAtTrade?: number;
29
+ priceChange?: number;
30
+ expectedDirection?: 'up' | 'down';
31
+
32
+ /** Sentiment analysis accuracy tracking */
33
+ sentimentAccuracy?: number;
34
+ sentimentAtTime?: number;
35
+ actualSentiment?: number;
36
+ };
37
+ }
38
+
39
+ export interface SimulationMetrics {
40
+ /** Total P&L from all positions */
41
+ totalPnl: number;
42
+
43
+ /** Prediction market metrics */
44
+ predictionMetrics: {
45
+ totalPositions: number;
46
+ correctPredictions: number;
47
+ incorrectPredictions: number;
48
+ accuracy: number;
49
+ avgPnlPerPosition: number;
50
+ };
51
+
52
+ /** Perpetual trading metrics */
53
+ perpMetrics: {
54
+ totalTrades: number;
55
+ profitableTrades: number;
56
+ winRate: number;
57
+ avgPnlPerTrade: number;
58
+ maxDrawdown: number;
59
+ };
60
+
61
+ /** Social metrics */
62
+ socialMetrics: {
63
+ postsCreated: number;
64
+ groupsJoined: number;
65
+ messagesReceived: number;
66
+ reputationGained: number;
67
+ };
68
+
69
+ /** Timing metrics */
70
+ timing: {
71
+ avgResponseTime: number;
72
+ maxResponseTime: number;
73
+ totalDuration: number;
74
+ };
75
+
76
+ /** Compared to optimal actions */
77
+ optimalityScore: number; // 0-100, how close to optimal
78
+ }