@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/Dockerfile +75 -0
  2. package/LICENSE +21 -0
  3. package/Makefile +374 -0
  4. package/README.md +346 -0
  5. package/config/rubrics.json +137 -0
  6. package/docker-compose.test.yml +57 -0
  7. package/package.json +57 -0
  8. package/python/config/babylon_atropos.yaml +90 -0
  9. package/python/config/profiles/12gb.json +11 -0
  10. package/python/config/profiles/16gb.json +10 -0
  11. package/python/config/profiles/24gb.json +10 -0
  12. package/python/config/profiles/48gb.json +10 -0
  13. package/python/config/profiles/cpu.json +11 -0
  14. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  15. package/python/config/profiles/l40-2gpu.json +22 -0
  16. package/python/config/profiles/l40-4gpu.json +21 -0
  17. package/python/config/profiles/l40.json +17 -0
  18. package/python/config/tinker_training.yaml +143 -0
  19. package/python/curriculum_state.json +165 -0
  20. package/python/env.template +86 -0
  21. package/python/env.training.template +46 -0
  22. package/python/pyproject.toml +41 -0
  23. package/python/requirements-ci.txt +31 -0
  24. package/python/requirements.txt +87 -0
  25. package/python/scripts/__init__.py +4 -0
  26. package/python/scripts/benchmark_should_respond.py +190 -0
  27. package/python/scripts/debug_inference.py +62 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/optimize_prompt_grpo.py +269 -0
  36. package/python/scripts/run_ab_test.py +143 -0
  37. package/python/scripts/run_full_pipeline.py +544 -0
  38. package/python/scripts/run_tinker_training.py +192 -0
  39. package/python/scripts/run_training.py +914 -0
  40. package/python/scripts/test_generation.py +29 -0
  41. package/python/scripts/test_judge.py +155 -0
  42. package/python/scripts/test_pipeline.py +356 -0
  43. package/python/scripts/test_trained_model.py +380 -0
  44. package/python/scripts/train_grpo.py +360 -0
  45. package/python/scripts/train_jsonl.py +223 -0
  46. package/python/scripts/train_local.py +528 -0
  47. package/python/setup.py +20 -0
  48. package/python/src/__init__.py +190 -0
  49. package/python/src/data_bridge/__init__.py +24 -0
  50. package/python/src/data_bridge/converter.py +435 -0
  51. package/python/src/data_bridge/reader.py +393 -0
  52. package/python/src/models.py +283 -0
  53. package/python/src/training/__init__.py +605 -0
  54. package/python/src/training/ab_testing.py +404 -0
  55. package/python/src/training/action_executor.py +621 -0
  56. package/python/src/training/archetype_trainer.py +347 -0
  57. package/python/src/training/atropos_trainer.py +980 -0
  58. package/python/src/training/babylon_env.py +1254 -0
  59. package/python/src/training/error_recovery.py +647 -0
  60. package/python/src/training/evaluation.py +856 -0
  61. package/python/src/training/fast_simulator.py +880 -0
  62. package/python/src/training/format_validator.py +584 -0
  63. package/python/src/training/hybrid_env.py +522 -0
  64. package/python/src/training/kl_controller.py +628 -0
  65. package/python/src/training/multi_prompt_dataset.py +883 -0
  66. package/python/src/training/multi_turn.py +656 -0
  67. package/python/src/training/online_env.py +1084 -0
  68. package/python/src/training/quality_scorer.py +391 -0
  69. package/python/src/training/quality_utils.py +633 -0
  70. package/python/src/training/rewards.py +1344 -0
  71. package/python/src/training/rlaif_env.py +17 -0
  72. package/python/src/training/rollout_generator.py +502 -0
  73. package/python/src/training/rubric_loader.py +198 -0
  74. package/python/src/training/scenario_pool.py +1072 -0
  75. package/python/src/training/schemas.py +481 -0
  76. package/python/src/training/service_manager.py +552 -0
  77. package/python/src/training/simulation_bridge.py +535 -0
  78. package/python/src/training/tick_reward_attribution.py +399 -0
  79. package/python/src/training/tinker_client.py +575 -0
  80. package/python/src/training/tinker_trainer.py +646 -0
  81. package/python/src/training/tokenization_utils.py +402 -0
  82. package/python/tests/e2e/__init__.py +13 -0
  83. package/python/tests/e2e/conftest.py +258 -0
  84. package/python/tests/e2e/test_full_pipeline.py +643 -0
  85. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  86. package/python/tests/integration/__init__.py +12 -0
  87. package/python/tests/integration/conftest.py +383 -0
  88. package/python/tests/integration/test_db_integration.py +649 -0
  89. package/python/tests/integration/test_json_mode_integration.py +554 -0
  90. package/python/tests/test_action_executor.py +594 -0
  91. package/python/tests/test_archetype_scoring.py +1027 -0
  92. package/python/tests/test_atropos_integration.py +360 -0
  93. package/python/tests/test_evaluation.py +727 -0
  94. package/python/tests/test_format_validator.py +486 -0
  95. package/python/tests/test_kl_controller.py +432 -0
  96. package/python/tests/test_lr_scheduler.py +579 -0
  97. package/python/tests/test_multi_turn.py +590 -0
  98. package/python/tests/test_online_env.py +519 -0
  99. package/python/tests/test_quality_scorer.py +474 -0
  100. package/python/tests/test_scenario_pool.py +735 -0
  101. package/python/tests/test_service_manager.py +585 -0
  102. package/python/tests/test_simulation_rollout.py +581 -0
  103. package/python/tests/test_tokenization_utils.py +501 -0
  104. package/python/tests/test_training_orchestrator.py +497 -0
  105. package/python/tests/test_training_output_structure.py +661 -0
  106. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  107. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  108. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  109. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  110. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  111. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  112. package/research-output/training-runs/training-run-1771276293257.json +38 -0
  113. package/research-output/training-runs/training-run-1771276389280.json +38 -0
  114. package/research-output/training-runs/training-run-1771276502776.json +38 -0
  115. package/research-output/training-runs/training-run-1771277340748.json +38 -0
  116. package/research-output/training-runs/training-run-1773013658993.json +38 -0
  117. package/research-output/training-runs/training-run-1773013861014.json +38 -0
  118. package/research-output/training-runs/training-run-1773014215983.json +38 -0
  119. package/scripts/assess-training-data.ts +422 -0
  120. package/scripts/e2e-training-test.ts +550 -0
  121. package/scripts/export-rubrics.ts +64 -0
  122. package/scripts/generate-research-report.ts +1523 -0
  123. package/scripts/generate_dataset.sh +173 -0
  124. package/scripts/generate_should_respond.ts +267 -0
  125. package/scripts/generate_should_respond_dataset.ts +162 -0
  126. package/scripts/json-mode-benchmark.ts +399 -0
  127. package/scripts/rank_trajectories.ts +207 -0
  128. package/scripts/real-archetype-benchmark.ts +210 -0
  129. package/scripts/run-baseline-comparison.ts +116 -0
  130. package/scripts/run-full-pipeline.ts +272 -0
  131. package/scripts/run_rlaif_loop.ts +78 -0
  132. package/scripts/run_task_benchmark.ts +247 -0
  133. package/scripts/runpod_setup.sh +137 -0
  134. package/scripts/runpod_validate.sh +147 -0
  135. package/scripts/test-model-in-game.ts +955 -0
  136. package/scripts/test-scoring.ts +73 -0
  137. package/scripts/test-trained-model.ts +209 -0
  138. package/scripts/train-and-test.ts +824 -0
  139. package/scripts/verify-final.ts +118 -0
  140. package/src/adapter.ts +516 -0
  141. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  142. package/src/archetypes/derive-archetype.ts +249 -0
  143. package/src/archetypes/index.ts +22 -0
  144. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  145. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  146. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  147. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  148. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  149. package/src/benchmark/BenchmarkRunner.ts +685 -0
  150. package/src/benchmark/BenchmarkValidator.ts +204 -0
  151. package/src/benchmark/FastEvalRunner.ts +225 -0
  152. package/src/benchmark/MetricsValidator.ts +165 -0
  153. package/src/benchmark/MetricsVisualizer.ts +909 -0
  154. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  155. package/src/benchmark/ModelRegistry.ts +158 -0
  156. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  157. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  158. package/src/benchmark/SimulationEngine.ts +832 -0
  159. package/src/benchmark/TaskRunner.ts +94 -0
  160. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  161. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  162. package/src/benchmark/index.ts +91 -0
  163. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  164. package/src/benchmark/simulation-types.ts +78 -0
  165. package/src/dependencies.ts +475 -0
  166. package/src/generation/TrajectoryGenerator.ts +387 -0
  167. package/src/generation/index.ts +12 -0
  168. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  169. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  170. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  171. package/src/huggingface/index.ts +27 -0
  172. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  173. package/src/index.ts +102 -0
  174. package/src/init-training.ts +53 -0
  175. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  176. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  177. package/src/metrics/index.ts +8 -0
  178. package/src/metrics/types.ts +200 -0
  179. package/src/rubrics/__tests__/index.test.ts +184 -0
  180. package/src/rubrics/ass-kisser.ts +85 -0
  181. package/src/rubrics/degen.ts +80 -0
  182. package/src/rubrics/goody-twoshoes.ts +84 -0
  183. package/src/rubrics/index.ts +236 -0
  184. package/src/rubrics/information-trader.ts +84 -0
  185. package/src/rubrics/infosec.ts +101 -0
  186. package/src/rubrics/liar.ts +104 -0
  187. package/src/rubrics/perps-trader.ts +87 -0
  188. package/src/rubrics/researcher.ts +81 -0
  189. package/src/rubrics/scammer.ts +82 -0
  190. package/src/rubrics/social-butterfly.ts +73 -0
  191. package/src/rubrics/super-predictor.ts +97 -0
  192. package/src/rubrics/trader.ts +67 -0
  193. package/src/scoring/ArchetypeScoringService.ts +486 -0
  194. package/src/scoring/JudgePromptBuilder.ts +556 -0
  195. package/src/scoring/LLMJudgeCache.ts +401 -0
  196. package/src/scoring/index.ts +9 -0
  197. package/src/training/AutomationPipeline.ts +916 -0
  198. package/src/training/BenchmarkService.ts +518 -0
  199. package/src/training/ConfigValidator.ts +220 -0
  200. package/src/training/MarketOutcomesTracker.ts +187 -0
  201. package/src/training/ModelDeployer.ts +186 -0
  202. package/src/training/ModelFetcher.ts +76 -0
  203. package/src/training/ModelSelectionService.ts +341 -0
  204. package/src/training/ModelUsageVerifier.ts +160 -0
  205. package/src/training/MultiModelOrchestrator.ts +580 -0
  206. package/src/training/RLModelConfig.ts +407 -0
  207. package/src/training/RewardBackpropagationService.ts +149 -0
  208. package/src/training/RulerScoringService.ts +666 -0
  209. package/src/training/TrainingMonitor.ts +166 -0
  210. package/src/training/TrajectoryRecorder.ts +399 -0
  211. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  212. package/src/training/index.ts +100 -0
  213. package/src/training/logRLConfig.ts +34 -0
  214. package/src/training/pipeline.ts +129 -0
  215. package/src/training/storage/ModelStorageService.ts +279 -0
  216. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  217. package/src/training/storage/index.ts +17 -0
  218. package/src/training/types.ts +207 -0
  219. package/src/training/window-utils.ts +138 -0
  220. package/src/utils/index.ts +101 -0
  221. package/src/utils/logger.ts +59 -0
  222. package/src/utils/snowflake.ts +17 -0
  223. package/src/utils/synthetic-detector.ts +111 -0
  224. package/tsconfig.json +20 -0
@@ -0,0 +1,1288 @@
1
+ /**
2
+ * Benchmark Data Generator
3
+ *
4
+ * Generates deterministic benchmark scenarios for agent testing.
5
+ * Creates pre-recorded game states with known outcomes for reproducible testing.
6
+ *
7
+ * Supports two modes:
8
+ * 1. Random Walk Mode (default): Prices follow random walk with drift
9
+ * 2. Causal Simulation Mode: Hidden facts → Events → Price movements (learnable signal)
10
+ */
11
+
12
+ import type { JsonValue } from '../adapter';
13
+ import { logger } from '../utils/logger';
14
+
15
+ /**
16
+ * Volatility bucket for price movements
17
+ * - low: Small price movements (-2% to -4% or +2% to +4%)
18
+ * - medium: Moderate price movements (-5% to -10% or +5% to +10%)
19
+ * - high: Large price movements (-15%+ or +15%+)
20
+ */
21
+ export type VolatilityBucket = 'low' | 'medium' | 'high';
22
+
23
+ /**
24
+ * Event types that can be generated from hidden facts
25
+ */
26
+ export type CausalEventType =
27
+ | 'leak'
28
+ | 'rumor'
29
+ | 'scandal'
30
+ | 'development'
31
+ | 'deal'
32
+ | 'announcement';
33
+
34
+ /**
35
+ * Scheduled event in the causal event schedule
36
+ * Events are scheduled with a base day and hour, plus jitter
37
+ */
38
+ export interface ScheduledCausalEvent {
39
+ /** Base day for the event (1-30) */
40
+ baseDay: number;
41
+ /** Base hour for the event (0-23) */
42
+ baseHour: number;
43
+ /** Jitter applied to the event timing in hours (calculated from seed) */
44
+ jitterHours: number;
45
+ /** Type of event */
46
+ eventType: CausalEventType;
47
+ /** Volatility bucket for price impact */
48
+ volatilityBucket: VolatilityBucket;
49
+ /** Whether the event is positive (true) or negative (false) for affected tickers */
50
+ isPositive: boolean;
51
+ /** Description template for the event */
52
+ descriptionTemplate: string;
53
+ }
54
+
55
+ /**
56
+ * Hidden narrative fact that drives causal events
57
+ * Each fact has a sequence of events that unfold over time
58
+ */
59
+ export interface HiddenNarrativeFact {
60
+ /** Unique identifier for the fact */
61
+ id: string;
62
+ /** The hidden fact description (e.g., "TeslAI has a secret battery flaw") */
63
+ fact: string;
64
+ /** Tickers affected by this fact */
65
+ affectsTickers: string[];
66
+ /** Sequence of events scheduled to occur based on this fact */
67
+ eventSchedule: ScheduledCausalEvent[];
68
+ /** Overall sentiment of the narrative: negative facts lead to price drops */
69
+ sentiment: 'positive' | 'negative';
70
+ }
71
+
72
+ export interface BenchmarkConfig {
73
+ /** Duration of benchmark in minutes */
74
+ durationMinutes: number;
75
+
76
+ /** Interval between ticks in seconds */
77
+ tickInterval: number;
78
+
79
+ /** Number of prediction markets */
80
+ numPredictionMarkets: number;
81
+
82
+ /** Number of perpetual markets */
83
+ numPerpetualMarkets: number;
84
+
85
+ /** Number of other simulated agents */
86
+ numAgents: number;
87
+
88
+ /** Random seed for reproducibility */
89
+ seed?: number;
90
+
91
+ /**
92
+ * Enable causal simulation mode
93
+ * When true, prices are driven by events from hidden facts instead of random walk
94
+ * Default: false (backward compatible)
95
+ */
96
+ useCausalSimulation?: boolean;
97
+ }
98
+
99
+ export interface GameState {
100
+ tick: number;
101
+ timestamp: number;
102
+ predictionMarkets: PredictionMarket[];
103
+ perpetualMarkets: PerpetualMarket[];
104
+ agents: SimulatedAgent[];
105
+ posts?: Post[];
106
+ groupChats?: GroupChat[];
107
+ }
108
+
109
+ export interface PredictionMarket {
110
+ id: string;
111
+ question: string;
112
+ yesShares: number;
113
+ noShares: number;
114
+ yesPrice: number;
115
+ noPrice: number;
116
+ totalVolume: number;
117
+ liquidity: number;
118
+ resolved: boolean;
119
+ createdAt: number;
120
+ resolveAt: number;
121
+ }
122
+
123
+ export interface PerpetualMarket {
124
+ ticker: string;
125
+ price: number;
126
+ priceChange24h: number;
127
+ volume24h: number;
128
+ openInterest: number;
129
+ fundingRate: number;
130
+ nextFundingTime: number;
131
+ }
132
+
133
+ export interface SimulatedAgent {
134
+ id: string;
135
+ name: string;
136
+ reputation: number;
137
+ totalPnl: number;
138
+ }
139
+
140
+ export interface Post {
141
+ id: string;
142
+ authorId: string;
143
+ authorName: string;
144
+ content: string;
145
+ createdAt: number;
146
+ likes: number;
147
+ comments: number;
148
+ marketId?: string;
149
+ }
150
+
151
+ export interface GroupChat {
152
+ id: string;
153
+ name: string;
154
+ memberIds: string[];
155
+ messageCount: number;
156
+ lastActivity: number;
157
+ invitedAgent?: boolean;
158
+ messages?: Array<{
159
+ id: string;
160
+ authorId: string;
161
+ authorName: string;
162
+ content: string;
163
+ timestamp: number;
164
+ }>;
165
+ }
166
+
167
+ export interface Tick {
168
+ number: number;
169
+ timestamp: number;
170
+ events: TickEvent[];
171
+ state: GameState;
172
+ }
173
+
174
+ export interface TickEvent {
175
+ type: string;
176
+ timestamp: number;
177
+ data: Record<string, JsonValue>;
178
+ }
179
+
180
+ export interface GroundTruth {
181
+ // =========================================================================
182
+ // REAL DATA - Used for training and evaluation
183
+ // =========================================================================
184
+
185
+ /** Known market outcomes (marketId -> boolean) - REAL */
186
+ marketOutcomes: Record<string, boolean>;
187
+
188
+ /**
189
+ * Historical price data - REAL
190
+ * In causal mode: prices change only at event ticks
191
+ * In random walk mode: prices follow random walk each tick
192
+ */
193
+ priceHistory: Record<
194
+ string,
195
+ Array<{ tick: number; timestamp: number; price: number }>
196
+ >;
197
+
198
+ /**
199
+ * Hidden narrative facts that drive causal events - REAL (Causal Mode only)
200
+ * Each fact generates a sequence of events that affect specific tickers
201
+ */
202
+ hiddenNarrativeFacts?: HiddenNarrativeFact[];
203
+
204
+ /**
205
+ * Causal events with pre-calculated timing and price changes - REAL (Causal Mode only)
206
+ * These events causally drive price movements, creating a learnable signal
207
+ */
208
+ causalEvents?: Array<{
209
+ tick: number;
210
+ day: number;
211
+ hour: number;
212
+ eventType: CausalEventType;
213
+ description: string;
214
+ affectedTickers: string[];
215
+ volatilityBucket: VolatilityBucket;
216
+ isPositive: boolean;
217
+ /** Pre-calculated percentage change for each ticker (e.g., -0.07 for -7%) */
218
+ priceChanges: Record<string, number>;
219
+ sourceFactId: string;
220
+ }>;
221
+
222
+ // =========================================================================
223
+ // LEGACY/SYNTHETIC DATA - For backward compatibility only
224
+ // These fields contain placeholder values, NOT real ground truth
225
+ // =========================================================================
226
+
227
+ /**
228
+ * @deprecated SYNTHETIC placeholder - simple heuristic, not real optimal actions
229
+ */
230
+ optimalActions: Array<{
231
+ tick: number;
232
+ type: string;
233
+ target: string;
234
+ expectedValue: number;
235
+ reason: string;
236
+ }>;
237
+
238
+ /**
239
+ * @deprecated SYNTHETIC placeholder - not real social opportunities
240
+ */
241
+ socialOpportunities: Array<{
242
+ tick: number;
243
+ type: string;
244
+ value: number;
245
+ description: string;
246
+ }>;
247
+
248
+ /**
249
+ * @deprecated SYNTHETIC - empty array, never meaningfully implemented
250
+ */
251
+ hiddenFacts: Array<{
252
+ tick: number;
253
+ fact: string;
254
+ category: 'market' | 'social' | 'event' | 'insider';
255
+ value: JsonValue;
256
+ }>;
257
+
258
+ /**
259
+ * @deprecated SYNTHETIC - empty array, never meaningfully implemented
260
+ */
261
+ hiddenEvents: Array<{
262
+ tick: number;
263
+ type: string;
264
+ description: string;
265
+ impact: Record<string, JsonValue>;
266
+ }>;
267
+
268
+ /** Computed facts from initial state (not synthetic, but not all fields are meaningful) */
269
+ trueFacts: Record<string, JsonValue>;
270
+ }
271
+
272
+ export interface BenchmarkGameSnapshot {
273
+ id: string;
274
+ version: string;
275
+ createdAt: number;
276
+ duration: number;
277
+ tickInterval: number;
278
+ initialState: GameState;
279
+ ticks: Tick[];
280
+ groundTruth: GroundTruth;
281
+ }
282
+
283
+ /**
284
+ * Narrative fact templates for causal simulation
285
+ * Each template defines a hidden fact and its event sequence
286
+ */
287
+ const NARRATIVE_FACT_TEMPLATES: Array<{
288
+ factTemplate: string;
289
+ sentiment: 'positive' | 'negative';
290
+ /** Event sequence with relative timing and volatility */
291
+ eventSequence: Array<{
292
+ relativeDay: number; // Days from start (e.g., 5, 10, 15)
293
+ eventType: CausalEventType;
294
+ volatilityBucket: VolatilityBucket;
295
+ descriptionTemplate: string;
296
+ }>;
297
+ }> = [
298
+ // Negative narratives (price drops)
299
+ {
300
+ factTemplate:
301
+ '{ticker} has a secret product flaw that will require a recall',
302
+ sentiment: 'negative',
303
+ eventSequence: [
304
+ {
305
+ relativeDay: 5,
306
+ eventType: 'leak',
307
+ volatilityBucket: 'medium',
308
+ descriptionTemplate:
309
+ 'Internal documents leaked: {ticker} product flaw discovered by engineers',
310
+ },
311
+ {
312
+ relativeDay: 10,
313
+ eventType: 'rumor',
314
+ volatilityBucket: 'medium',
315
+ descriptionTemplate:
316
+ 'Industry sources report potential {ticker} recall due to safety issues',
317
+ },
318
+ {
319
+ relativeDay: 18,
320
+ eventType: 'scandal',
321
+ volatilityBucket: 'high',
322
+ descriptionTemplate:
323
+ '{ticker} board meeting: CEO denies cover-up allegations as evidence mounts',
324
+ },
325
+ ],
326
+ },
327
+ {
328
+ factTemplate: '{ticker} is secretly insolvent and hiding massive losses',
329
+ sentiment: 'negative',
330
+ eventSequence: [
331
+ {
332
+ relativeDay: 4,
333
+ eventType: 'rumor',
334
+ volatilityBucket: 'low',
335
+ descriptionTemplate:
336
+ 'Anonymous source claims {ticker} accounting irregularities',
337
+ },
338
+ {
339
+ relativeDay: 12,
340
+ eventType: 'leak',
341
+ volatilityBucket: 'medium',
342
+ descriptionTemplate:
343
+ 'Leaked memo reveals {ticker} executives discussing "liquidity concerns"',
344
+ },
345
+ {
346
+ relativeDay: 20,
347
+ eventType: 'scandal',
348
+ volatilityBucket: 'high',
349
+ descriptionTemplate:
350
+ 'Whistleblower exposes {ticker} hidden debt: stock halted pending investigation',
351
+ },
352
+ ],
353
+ },
354
+ {
355
+ factTemplate: '{ticker} CEO is about to be indicted for fraud',
356
+ sentiment: 'negative',
357
+ eventSequence: [
358
+ {
359
+ relativeDay: 6,
360
+ eventType: 'rumor',
361
+ volatilityBucket: 'low',
362
+ descriptionTemplate:
363
+ 'Rumors swirl about {ticker} CEO facing regulatory scrutiny',
364
+ },
365
+ {
366
+ relativeDay: 14,
367
+ eventType: 'leak',
368
+ volatilityBucket: 'medium',
369
+ descriptionTemplate:
370
+ 'Sources close to investigation: {ticker} CEO under federal probe',
371
+ },
372
+ {
373
+ relativeDay: 22,
374
+ eventType: 'announcement',
375
+ volatilityBucket: 'high',
376
+ descriptionTemplate:
377
+ '{ticker} confirms CEO departure amid ongoing investigation',
378
+ },
379
+ ],
380
+ },
381
+ // Positive narratives (price increases)
382
+ {
383
+ factTemplate:
384
+ '{ticker} is about to announce a breakthrough product that will dominate the market',
385
+ sentiment: 'positive',
386
+ eventSequence: [
387
+ {
388
+ relativeDay: 5,
389
+ eventType: 'rumor',
390
+ volatilityBucket: 'low',
391
+ descriptionTemplate:
392
+ 'Insider whispers: {ticker} working on game-changing technology',
393
+ },
394
+ {
395
+ relativeDay: 12,
396
+ eventType: 'leak',
397
+ volatilityBucket: 'medium',
398
+ descriptionTemplate:
399
+ 'Leaked patent filings suggest {ticker} breakthrough imminent',
400
+ },
401
+ {
402
+ relativeDay: 20,
403
+ eventType: 'announcement',
404
+ volatilityBucket: 'high',
405
+ descriptionTemplate:
406
+ '{ticker} announces revolutionary product: analysts upgrade to strong buy',
407
+ },
408
+ ],
409
+ },
410
+ {
411
+ factTemplate: '{ticker} is the secret acquisition target of a tech giant',
412
+ sentiment: 'positive',
413
+ eventSequence: [
414
+ {
415
+ relativeDay: 4,
416
+ eventType: 'rumor',
417
+ volatilityBucket: 'low',
418
+ descriptionTemplate:
419
+ 'M&A rumors surface: {ticker} reportedly in acquisition talks',
420
+ },
421
+ {
422
+ relativeDay: 10,
423
+ eventType: 'leak',
424
+ volatilityBucket: 'medium',
425
+ descriptionTemplate:
426
+ 'Anonymous source: {ticker} board reviewing buyout offer at premium',
427
+ },
428
+ {
429
+ relativeDay: 16,
430
+ eventType: 'deal',
431
+ volatilityBucket: 'high',
432
+ descriptionTemplate:
433
+ '{ticker} confirms acquisition discussions: shares surge on takeover premium',
434
+ },
435
+ ],
436
+ },
437
+ {
438
+ factTemplate: '{ticker} has secretly achieved major regulatory approval',
439
+ sentiment: 'positive',
440
+ eventSequence: [
441
+ {
442
+ relativeDay: 6,
443
+ eventType: 'rumor',
444
+ volatilityBucket: 'low',
445
+ descriptionTemplate:
446
+ 'Industry insiders: {ticker} regulatory submission shows promise',
447
+ },
448
+ {
449
+ relativeDay: 13,
450
+ eventType: 'leak',
451
+ volatilityBucket: 'medium',
452
+ descriptionTemplate:
453
+ 'Sources say {ticker} cleared key regulatory hurdle ahead of schedule',
454
+ },
455
+ {
456
+ relativeDay: 21,
457
+ eventType: 'announcement',
458
+ volatilityBucket: 'high',
459
+ descriptionTemplate:
460
+ '{ticker} receives full regulatory approval: new market opportunity unlocked',
461
+ },
462
+ ],
463
+ },
464
+ ];
465
+
466
+ /**
467
+ * Volatility bucket ranges for price changes
468
+ * Each bucket defines min/max percentage change (absolute value)
469
+ */
470
+ const VOLATILITY_BUCKET_RANGES: Record<
471
+ VolatilityBucket,
472
+ { min: number; max: number }
473
+ > = {
474
+ low: { min: 0.02, max: 0.04 }, // 2% to 4%
475
+ medium: { min: 0.05, max: 0.1 }, // 5% to 10%
476
+ high: { min: 0.15, max: 0.25 }, // 15% to 25%
477
+ };
478
+
479
+ /**
480
+ * Jitter range in hours for event timing
481
+ * Events are scheduled at base day/hour ± jitter
482
+ */
483
+ const EVENT_JITTER_HOURS = 8;
484
+
485
+ export class BenchmarkDataGenerator {
486
+ private config: BenchmarkConfig;
487
+ private rng: SeededRandom;
488
+
489
+ constructor(config: BenchmarkConfig) {
490
+ // Validate tickInterval for causal simulation
491
+ // The tick calculation assumes 1 tick = 1 hour (tickInterval = 3600 seconds)
492
+ if (config.useCausalSimulation && config.tickInterval !== 3600) {
493
+ throw new Error(
494
+ `Causal simulation requires tickInterval=3600 (1 hour). Got: ${config.tickInterval}. ` +
495
+ `The day/hour event scheduling assumes 1 tick per hour.`
496
+ );
497
+ }
498
+
499
+ this.config = config;
500
+ this.rng = new SeededRandom(config.seed || Date.now());
501
+ }
502
+
503
+ /**
504
+ * Get the SeededRandom instance for external use (e.g., MarketMoverAgent)
505
+ */
506
+ getRng(): SeededRandom {
507
+ return this.rng;
508
+ }
509
+
510
+ /**
511
+ * Check if causal simulation mode is enabled
512
+ */
513
+ isCausalSimulationEnabled(): boolean {
514
+ return this.config.useCausalSimulation === true;
515
+ }
516
+
517
+ /**
518
+ * Generate a complete benchmark snapshot
519
+ */
520
+ async generate(): Promise<BenchmarkGameSnapshot> {
521
+ const id = Date.now().toString();
522
+ const createdAt = Date.now();
523
+ const numTicks = Math.floor(
524
+ (this.config.durationMinutes * 60) / this.config.tickInterval
525
+ );
526
+
527
+ logger.info('Generating benchmark', {
528
+ id,
529
+ duration: this.config.durationMinutes,
530
+ ticks: numTicks,
531
+ });
532
+
533
+ // Generate initial state
534
+ const initialState = this.generateInitialState(createdAt);
535
+
536
+ // Generate ground truth (outcomes)
537
+ const groundTruth = this.generateGroundTruth(initialState, numTicks);
538
+
539
+ // Generate tick-by-tick progression
540
+ const ticks = this.generateTicks(
541
+ initialState,
542
+ groundTruth,
543
+ numTicks,
544
+ createdAt
545
+ );
546
+
547
+ logger.info('Benchmark generated', {
548
+ id,
549
+ ticks: ticks.length,
550
+ markets: initialState.predictionMarkets.length,
551
+ perps: initialState.perpetualMarkets.length,
552
+ });
553
+
554
+ return {
555
+ id,
556
+ version: '1.0.0',
557
+ createdAt,
558
+ duration: this.config.durationMinutes * 60,
559
+ tickInterval: this.config.tickInterval,
560
+ initialState,
561
+ ticks,
562
+ groundTruth,
563
+ };
564
+ }
565
+
566
+ /**
567
+ * Generate initial game state
568
+ */
569
+ private generateInitialState(timestamp: number): GameState {
570
+ const predictionMarkets: PredictionMarket[] = [];
571
+ const questions = [
572
+ 'Will BitcAIn reach $150k by end of month?',
573
+ 'Will The FUD announce emergency rate cut?',
574
+ 'Will Trump Terminal tweet cause market crash?',
575
+ 'Will EtherAIum gas fees drop below $1?',
576
+ 'Will TeslAI stock hit $500 this quarter?',
577
+ 'Will OpenAGI release Cognition-9000 this year?',
578
+ 'Will SolanAI flip EtherAIum in TVL?',
579
+ 'Will AIlon Musk announce Mars colony launch?',
580
+ 'Will Mark Zuckerborg rebrand MetAI again?',
581
+ 'Will Sam AIltman declare AGI achieved?',
582
+ ];
583
+
584
+ for (let i = 0; i < this.config.numPredictionMarkets; i++) {
585
+ const question = questions[i % questions.length];
586
+ // Generate markets with varied prices (some low, some high)
587
+ // Minimum 10,000 liquidity for acceptable price impact (<5% for $100 trades)
588
+ const ratio = this.rng.next();
589
+ const baseLiquidity = 5000; // Each side starts with at least 5000
590
+ const yesShares =
591
+ ratio < 0.5
592
+ ? baseLiquidity + this.rng.next() * 1500 // 5000-6500 for low side
593
+ : baseLiquidity + 1500 + this.rng.next() * 3500; // 6500-10000 for high side
594
+ const noShares =
595
+ ratio < 0.5
596
+ ? baseLiquidity + 1500 + this.rng.next() * 3500 // 6500-10000 for high side
597
+ : baseLiquidity + this.rng.next() * 1500; // 5000-6500 for low side
598
+ const totalShares = yesShares + noShares; // Now 10,000 - 16,500 total
599
+ const yesPrice = yesShares / totalShares;
600
+ const noPrice = noShares / totalShares;
601
+
602
+ if (question) {
603
+ predictionMarkets.push({
604
+ id: `market-${i}`,
605
+ question,
606
+ yesShares,
607
+ noShares,
608
+ yesPrice,
609
+ noPrice,
610
+ totalVolume: 0,
611
+ liquidity: yesShares + noShares,
612
+ resolved: false,
613
+ createdAt: timestamp,
614
+ resolveAt: timestamp + this.config.durationMinutes * 60 * 1000,
615
+ });
616
+ }
617
+ }
618
+
619
+ const perpetualMarkets: PerpetualMarket[] = [];
620
+ const tickers = ['BTCAI', 'ETHAI', 'SOLAI', 'TSLAI', 'METAI'];
621
+ const basePrices = [120000, 4000, 200, 450, 520];
622
+
623
+ for (let i = 0; i < this.config.numPerpetualMarkets; i++) {
624
+ const ticker = tickers[i % tickers.length]!;
625
+ const basePrice = basePrices[i % basePrices.length]!;
626
+
627
+ perpetualMarkets.push({
628
+ ticker,
629
+ price: basePrice,
630
+ priceChange24h: (this.rng.next() - 0.5) * 10,
631
+ volume24h: 1000000 + this.rng.next() * 2000000,
632
+ openInterest: 500000 + this.rng.next() * 1000000,
633
+ fundingRate: (this.rng.next() - 0.5) * 0.002,
634
+ nextFundingTime: timestamp + 8 * 60 * 60 * 1000,
635
+ });
636
+ }
637
+
638
+ const agents: SimulatedAgent[] = [];
639
+ for (let i = 0; i < this.config.numAgents; i++) {
640
+ agents.push({
641
+ id: `agent-${i}`,
642
+ name: `Agent ${i}`,
643
+ reputation: 50 + this.rng.next() * 50,
644
+ totalPnl: (this.rng.next() - 0.5) * 1000,
645
+ });
646
+ }
647
+
648
+ // Initialize empty arrays for posts and group chats
649
+ const posts: Post[] = [];
650
+ const groupChats: GroupChat[] = [];
651
+
652
+ return {
653
+ tick: 0,
654
+ timestamp,
655
+ predictionMarkets,
656
+ perpetualMarkets,
657
+ agents,
658
+ posts,
659
+ groupChats,
660
+ };
661
+ }
662
+
663
+ /**
664
+ * Generate a hidden narrative fact for causal simulation
665
+ * Selects ONE dominant narrative that affects a specific ticker
666
+ */
667
+ private generateHiddenNarrativeFact(
668
+ initialState: GameState
669
+ ): HiddenNarrativeFact {
670
+ // Select a random narrative template
671
+ const templateIndex = Math.floor(
672
+ this.rng.next() * NARRATIVE_FACT_TEMPLATES.length
673
+ );
674
+ const template = NARRATIVE_FACT_TEMPLATES[templateIndex]!;
675
+
676
+ // Select a random ticker to be affected
677
+ const tickerIndex = Math.floor(
678
+ this.rng.next() * initialState.perpetualMarkets.length
679
+ );
680
+ const affectedTicker = initialState.perpetualMarkets[tickerIndex]!.ticker;
681
+
682
+ // Generate the fact description by replacing {ticker} placeholder
683
+ const fact = template.factTemplate.replace(/{ticker}/g, affectedTicker);
684
+
685
+ // Generate event schedule with jitter
686
+ const eventSchedule: ScheduledCausalEvent[] = template.eventSequence.map(
687
+ (event) => {
688
+ // Calculate jitter: ±EVENT_JITTER_HOURS hours
689
+ // Use rng to get a value between -EVENT_JITTER_HOURS and +EVENT_JITTER_HOURS
690
+ const jitterHours = Math.round(
691
+ (this.rng.next() * 2 - 1) * EVENT_JITTER_HOURS
692
+ );
693
+
694
+ // Base hour is random within the day (but during "market hours" 8am-8pm for realism)
695
+ const baseHour = 8 + Math.floor(this.rng.next() * 12); // 8am to 8pm
696
+
697
+ return {
698
+ baseDay: event.relativeDay,
699
+ baseHour,
700
+ jitterHours,
701
+ eventType: event.eventType,
702
+ volatilityBucket: event.volatilityBucket,
703
+ isPositive: template.sentiment === 'positive',
704
+ descriptionTemplate: event.descriptionTemplate.replace(
705
+ /{ticker}/g,
706
+ affectedTicker
707
+ ),
708
+ };
709
+ }
710
+ );
711
+
712
+ return {
713
+ id: `narrative-fact-${Date.now()}-${Math.floor(this.rng.next() * 1000000)}`,
714
+ fact,
715
+ affectsTickers: [affectedTicker],
716
+ eventSchedule,
717
+ sentiment: template.sentiment,
718
+ };
719
+ }
720
+
721
+ /**
722
+ * Calculate the tick number for a scheduled event
723
+ * Takes into account base day, base hour, jitter, and ticks per hour
724
+ */
725
+ private calculateEventTick(
726
+ event: ScheduledCausalEvent,
727
+ ticksPerHour: number
728
+ ): { tick: number; day: number; hour: number } {
729
+ // Calculate total hours from start: (day - 1) * 24 + hour + jitter
730
+ // Day 1 starts at hour 0, so day 5 hour 12 = (5-1) * 24 + 12 = 108 hours
731
+ const totalHours =
732
+ (event.baseDay - 1) * 24 + event.baseHour + event.jitterHours;
733
+
734
+ // Clamp to valid range (at least hour 1, at most day 29)
735
+ const clampedHours = Math.max(1, Math.min(totalHours, 29 * 24 - 1));
736
+
737
+ // Convert back to day and hour
738
+ const day = Math.floor(clampedHours / 24) + 1;
739
+ const hour = clampedHours % 24;
740
+
741
+ // Calculate tick number
742
+ const tick = clampedHours * ticksPerHour;
743
+
744
+ return { tick, day, hour };
745
+ }
746
+
747
+ /**
748
+ * Select a percentage change within a volatility bucket using seeded RNG
749
+ * Returns a value like -0.07 for -7% or +0.05 for +5%
750
+ */
751
+ private selectPercentageFromBucket(
752
+ bucket: VolatilityBucket,
753
+ isPositive: boolean
754
+ ): number {
755
+ const range = VOLATILITY_BUCKET_RANGES[bucket];
756
+ const magnitude = range.min + this.rng.next() * (range.max - range.min);
757
+ return isPositive ? magnitude : -magnitude;
758
+ }
759
+
760
+ /**
761
+ * Generate ground truth (known outcomes)
762
+ */
763
+ private generateGroundTruth(
764
+ initialState: GameState,
765
+ numTicks: number
766
+ ): GroundTruth {
767
+ // Randomly determine market outcomes
768
+ const marketOutcomes: Record<string, boolean> = {};
769
+ for (const market of initialState.predictionMarkets) {
770
+ marketOutcomes[market.id] = this.rng.next() > 0.5;
771
+ }
772
+
773
+ // Calculate ticks per hour (for event scheduling)
774
+ const ticksPerHour = Math.floor(3600 / this.config.tickInterval);
775
+
776
+ // Generate causal simulation data if enabled
777
+ let hiddenNarrativeFacts: HiddenNarrativeFact[] | undefined;
778
+ let causalEvents: GroundTruth['causalEvents'] | undefined;
779
+
780
+ if (this.config.useCausalSimulation) {
781
+ // Generate ONE dominant narrative fact
782
+ const narrativeFact = this.generateHiddenNarrativeFact(initialState);
783
+ hiddenNarrativeFacts = [narrativeFact];
784
+
785
+ // Pre-calculate causal events with their timing and price changes
786
+ causalEvents = narrativeFact.eventSchedule.map((scheduledEvent) => {
787
+ const timing = this.calculateEventTick(scheduledEvent, ticksPerHour);
788
+
789
+ // Calculate price changes for each affected ticker
790
+ const priceChanges: Record<string, number> = {};
791
+ for (const ticker of narrativeFact.affectsTickers) {
792
+ priceChanges[ticker] = this.selectPercentageFromBucket(
793
+ scheduledEvent.volatilityBucket,
794
+ scheduledEvent.isPositive
795
+ );
796
+ }
797
+
798
+ return {
799
+ tick: timing.tick,
800
+ day: timing.day,
801
+ hour: timing.hour,
802
+ eventType: scheduledEvent.eventType,
803
+ description: scheduledEvent.descriptionTemplate,
804
+ affectedTickers: narrativeFact.affectsTickers,
805
+ volatilityBucket: scheduledEvent.volatilityBucket,
806
+ isPositive: scheduledEvent.isPositive,
807
+ priceChanges,
808
+ sourceFactId: narrativeFact.id,
809
+ };
810
+ });
811
+
812
+ // Sort events by tick
813
+ causalEvents.sort((a, b) => a.tick - b.tick);
814
+
815
+ logger.info('Generated causal simulation data', {
816
+ narrativeFact: narrativeFact.fact,
817
+ affectedTickers: narrativeFact.affectsTickers,
818
+ numEvents: causalEvents.length,
819
+ eventTicks: causalEvents.map((e) => ({
820
+ tick: e.tick,
821
+ day: e.day,
822
+ hour: e.hour,
823
+ type: e.eventType,
824
+ })),
825
+ });
826
+ }
827
+
828
+ // Generate price history for perpetuals
829
+ // In causal mode, we DON'T pre-generate prices - they will be calculated during tick generation
830
+ // based on events. In random walk mode, we pre-generate the full price history.
831
+ const priceHistory: Record<
832
+ string,
833
+ Array<{ tick: number; timestamp: number; price: number }>
834
+ > = {};
835
+
836
+ if (!this.config.useCausalSimulation) {
837
+ // Random walk mode (backward compatible)
838
+ for (const perp of initialState.perpetualMarkets) {
839
+ const history: Array<{
840
+ tick: number;
841
+ timestamp: number;
842
+ price: number;
843
+ }> = [];
844
+ let currentPrice = perp.price;
845
+
846
+ for (let tick = 0; tick < numTicks; tick++) {
847
+ // Random walk with drift
848
+ const change = (this.rng.next() - 0.48) * 0.02; // Slight upward bias
849
+ currentPrice = currentPrice * (1 + change);
850
+
851
+ history.push({
852
+ tick,
853
+ timestamp: 0, // Will be filled in during tick generation
854
+ price: currentPrice,
855
+ });
856
+ }
857
+
858
+ priceHistory[perp.ticker] = history;
859
+ }
860
+ } else {
861
+ // Causal simulation mode: generate price history based on events
862
+ // Prices start at initial values and only change when events occur
863
+ for (const perp of initialState.perpetualMarkets) {
864
+ const history: Array<{
865
+ tick: number;
866
+ timestamp: number;
867
+ price: number;
868
+ }> = [];
869
+ let currentPrice = perp.price;
870
+
871
+ // Build a map of tick -> price change for this ticker
872
+ const priceChangesByTick = new Map<number, number>();
873
+ if (causalEvents) {
874
+ for (const event of causalEvents) {
875
+ if (event.priceChanges[perp.ticker] !== undefined) {
876
+ priceChangesByTick.set(
877
+ event.tick,
878
+ event.priceChanges[perp.ticker]!
879
+ );
880
+ }
881
+ }
882
+ }
883
+
884
+ for (let tick = 0; tick < numTicks; tick++) {
885
+ // Apply price change if there's an event at this tick
886
+ const priceChange = priceChangesByTick.get(tick);
887
+ if (priceChange !== undefined) {
888
+ currentPrice = currentPrice * (1 + priceChange);
889
+ // Enforce price bounds: 10% to 400% of initial price
890
+ const minPrice = perp.price * 0.1;
891
+ const maxPrice = perp.price * 4.0;
892
+ currentPrice = Math.max(minPrice, Math.min(maxPrice, currentPrice));
893
+ }
894
+
895
+ history.push({
896
+ tick,
897
+ timestamp: 0, // Will be filled in during tick generation
898
+ price: currentPrice,
899
+ });
900
+ }
901
+
902
+ priceHistory[perp.ticker] = history;
903
+ }
904
+ }
905
+
906
+ // =========================================================================
907
+ // LEGACY PLACEHOLDER DATA (not used by causal simulation)
908
+ // These fields exist for backward compatibility with older benchmarks.
909
+ // They contain synthetic placeholder data, NOT real ground truth.
910
+ // For causal simulation, use: hiddenNarrativeFacts, causalEvents, priceHistory
911
+ // =========================================================================
912
+
913
+ // SYNTHETIC: Simple heuristic - buying the correct outcome at tick 1
914
+ // This is NOT a sophisticated optimal action calculation
915
+ const optimalActions: GroundTruth['optimalActions'] = [];
916
+ for (const [marketId, outcome] of Object.entries(marketOutcomes)) {
917
+ optimalActions.push({
918
+ tick: 1,
919
+ type: 'buy_prediction',
920
+ target: marketId,
921
+ expectedValue: 100, // Placeholder value
922
+ reason: `[SYNTHETIC] Market ${marketId} will resolve ${outcome ? 'YES' : 'NO'}`,
923
+ });
924
+ }
925
+
926
+ // SYNTHETIC: Placeholder social opportunities at regular intervals
927
+ const socialOpportunities: GroundTruth['socialOpportunities'] = [];
928
+ const socialInterval = Math.max(1, Math.floor(numTicks / 5));
929
+ for (let i = 0; i < numTicks; i += socialInterval) {
930
+ socialOpportunities.push({
931
+ tick: i,
932
+ type: 'synthetic_opportunity',
933
+ value: 100, // Fixed placeholder value
934
+ description: `[SYNTHETIC] Placeholder opportunity at tick ${i}`,
935
+ });
936
+ }
937
+
938
+ // SYNTHETIC: Empty arrays - these were never meaningfully implemented
939
+ const hiddenFacts: GroundTruth['hiddenFacts'] = [];
940
+ const hiddenEvents: GroundTruth['hiddenEvents'] = [];
941
+
942
+ // TRUE FACTS: Actual computed values from initial state
943
+ const trueFacts: GroundTruth['trueFacts'] = {
944
+ totalLiquidity: initialState.predictionMarkets.reduce(
945
+ (sum, m) => sum + m.liquidity,
946
+ 0
947
+ ),
948
+ averageMarketPrice:
949
+ initialState.predictionMarkets.length > 0
950
+ ? initialState.predictionMarkets.reduce(
951
+ (sum, m) => sum + m.yesPrice,
952
+ 0
953
+ ) / initialState.predictionMarkets.length
954
+ : 0,
955
+ numPerpetualMarkets: initialState.perpetualMarkets.length,
956
+ numAgents: initialState.agents.length,
957
+ };
958
+
959
+ return {
960
+ marketOutcomes,
961
+ priceHistory,
962
+ optimalActions,
963
+ socialOpportunities,
964
+ hiddenFacts,
965
+ hiddenEvents,
966
+ trueFacts,
967
+ hiddenNarrativeFacts,
968
+ causalEvents,
969
+ };
970
+ }
971
+
972
+ /**
973
+ * Generate tick-by-tick progression
974
+ */
975
+ private generateTicks(
976
+ initialState: GameState,
977
+ groundTruth: GroundTruth,
978
+ numTicks: number,
979
+ startTimestamp: number
980
+ ): Tick[] {
981
+ const ticks: Tick[] = [];
982
+ // Create a mutable copy of initial state
983
+ const currentState: GameState = {
984
+ ...initialState,
985
+ predictionMarkets: [...initialState.predictionMarkets],
986
+ perpetualMarkets: [...initialState.perpetualMarkets],
987
+ agents: [...initialState.agents],
988
+ posts: initialState.posts ? [...initialState.posts] : [],
989
+ groupChats: initialState.groupChats ? [...initialState.groupChats] : [],
990
+ };
991
+
992
+ // Track group chats across ticks
993
+ const groupChatMap = new Map<string, GroupChat>();
994
+ let nextGroupChatId = 0;
995
+
996
+ for (let i = 0; i < numTicks; i++) {
997
+ const tickTimestamp =
998
+ startTimestamp + (i + 1) * this.config.tickInterval * 1000;
999
+ const events: TickEvent[] = [];
1000
+
1001
+ // Update perpetual prices
1002
+ for (const perp of currentState.perpetualMarkets) {
1003
+ const tickerHistory = groundTruth.priceHistory[perp.ticker];
1004
+ const priceAtTick = tickerHistory?.[i];
1005
+ const newPrice = priceAtTick?.price ?? perp.price;
1006
+ events.push({
1007
+ type: 'price:updated',
1008
+ timestamp: tickTimestamp,
1009
+ data: {
1010
+ ticker: perp.ticker,
1011
+ oldPrice: perp.price,
1012
+ newPrice,
1013
+ },
1014
+ });
1015
+ perp.price = newPrice;
1016
+ }
1017
+
1018
+ // Simulate some agent actions
1019
+ if (this.rng.next() > 0.5) {
1020
+ const agentId = `agent-${Math.floor(this.rng.next() * this.config.numAgents)}`;
1021
+ const marketId = `market-${Math.floor(this.rng.next() * this.config.numPredictionMarkets)}`;
1022
+ const outcome = this.rng.next() > 0.5 ? 'YES' : 'NO';
1023
+
1024
+ events.push({
1025
+ type: 'market:trade',
1026
+ timestamp: tickTimestamp,
1027
+ data: {
1028
+ marketId,
1029
+ agentId,
1030
+ outcome,
1031
+ amount: 10 + this.rng.next() * 90,
1032
+ },
1033
+ });
1034
+ }
1035
+
1036
+ // Simulate social activity - create posts and add to state
1037
+ if (this.rng.next() > 0.7) {
1038
+ const agentId = `agent-${Math.floor(this.rng.next() * this.config.numAgents)}`;
1039
+ const agent = currentState.agents.find(
1040
+ (a: { id: string }) => a.id === agentId
1041
+ );
1042
+ const marketId = `market-${Math.floor(this.rng.next() * this.config.numPredictionMarkets)}`;
1043
+ const market = currentState.predictionMarkets.find(
1044
+ (m: { id: string; question: string }) => m.id === marketId
1045
+ );
1046
+
1047
+ const postId = `post-${i}-${Math.floor(this.rng.next() * 1000000)}`;
1048
+ const post: Post = {
1049
+ id: postId,
1050
+ authorId: agentId,
1051
+ authorName: agent?.name || `Agent ${agentId.split('-')[1]}`,
1052
+ content: `Market sentiment seems ${this.rng.next() > 0.5 ? 'bullish' : 'bearish'} on ${market?.question || 'markets'}`,
1053
+ createdAt: tickTimestamp,
1054
+ likes: Math.floor(this.rng.next() * 20),
1055
+ comments: Math.floor(this.rng.next() * 5),
1056
+ marketId,
1057
+ };
1058
+
1059
+ // Add post to state
1060
+ if (!currentState.posts) {
1061
+ currentState.posts = [];
1062
+ }
1063
+ currentState.posts.push(post);
1064
+
1065
+ // Keep only last 50 posts to avoid memory issues
1066
+ if (currentState.posts.length > 50) {
1067
+ currentState.posts = currentState.posts.slice(-50);
1068
+ }
1069
+
1070
+ events.push({
1071
+ type: 'post:created',
1072
+ timestamp: tickTimestamp,
1073
+ data: {
1074
+ postId: post.id,
1075
+ authorId: post.authorId,
1076
+ authorName: post.authorName,
1077
+ content: post.content,
1078
+ marketId: post.marketId ?? null,
1079
+ },
1080
+ });
1081
+ }
1082
+
1083
+ // Simulate group chat creation and messages
1084
+ if (this.rng.next() > 0.95 && i > 5) {
1085
+ // Create a new group chat occasionally
1086
+ const groupChatId = `group-${nextGroupChatId++}`;
1087
+ const adminAgentId = `agent-${Math.floor(this.rng.next() * this.config.numAgents)}`;
1088
+ const adminAgent = currentState.agents.find(
1089
+ (a: { id: string }) => a.id === adminAgentId
1090
+ );
1091
+
1092
+ const groupChat: GroupChat = {
1093
+ id: groupChatId,
1094
+ name: `${adminAgent?.name || 'Agent'}'s Trading Group`,
1095
+ memberIds: [adminAgentId],
1096
+ messageCount: 0,
1097
+ lastActivity: tickTimestamp,
1098
+ invitedAgent: false,
1099
+ messages: [],
1100
+ };
1101
+
1102
+ groupChatMap.set(groupChatId, groupChat);
1103
+
1104
+ if (!currentState.groupChats) {
1105
+ currentState.groupChats = [];
1106
+ }
1107
+ currentState.groupChats.push(groupChat);
1108
+
1109
+ events.push({
1110
+ type: 'group:created',
1111
+ timestamp: tickTimestamp,
1112
+ data: {
1113
+ groupId: groupChatId,
1114
+ adminId: adminAgentId,
1115
+ name: groupChat.name,
1116
+ },
1117
+ });
1118
+ }
1119
+
1120
+ // Add messages to existing group chats - INSIDER ALPHA CONTENT
1121
+ // These messages should contain actionable information tied to ground truth
1122
+ for (const [groupId, groupChat] of groupChatMap.entries()) {
1123
+ if (this.rng.next() > 0.8 && groupChat.memberIds.length > 0) {
1124
+ const senderId =
1125
+ groupChat.memberIds[
1126
+ Math.floor(this.rng.next() * groupChat.memberIds.length)
1127
+ ]!;
1128
+ const sender = currentState.agents.find(
1129
+ (a: { id: string }) => a.id === senderId
1130
+ );
1131
+
1132
+ // Generate insider-style content tied to market/question outcomes
1133
+ const insiderMessages = [
1134
+ // Actionable alpha tied to prediction markets
1135
+ `🤫 Between us, I'm loading up on YES for Q1. My sources say it's happening.`,
1136
+ `Just went heavy SHORT on $PERP-0. Trust me on this one.`,
1137
+ `Get out of Q2 NOW. I know something the market doesn't.`,
1138
+ `Real talk: market is wrong about Q0. Should be trading at 80%+`,
1139
+ `Insider tip: $PERP-1 announcement coming. Load up before it drops.`,
1140
+ // Position reveals
1141
+ `My actual position: 500 shares YES on Q1. Public says otherwise 😉`,
1142
+ `Don't tell anyone but I'm shorting $PERP-2 hard right now.`,
1143
+ // Strategic coordination
1144
+ `We should coordinate on Q0 - push it to YES, then dump.`,
1145
+ `Anyone else seeing the weakness in $PERP-0? Time to short?`,
1146
+ // Contradicting public statements
1147
+ `Ignore what I posted publicly. Q2 is a buy.`,
1148
+ ];
1149
+
1150
+ const messageId = `msg-${i}-${groupId}-${Math.floor(this.rng.next() * 1000000)}`;
1151
+ const randomInsiderMsg =
1152
+ insiderMessages[
1153
+ Math.floor(this.rng.next() * insiderMessages.length)
1154
+ ]!;
1155
+ const message = {
1156
+ id: messageId,
1157
+ authorId: senderId,
1158
+ authorName: sender?.name || `Agent ${senderId.split('-')[1]}`,
1159
+ content: randomInsiderMsg,
1160
+ timestamp: tickTimestamp,
1161
+ };
1162
+
1163
+ if (!groupChat.messages) {
1164
+ groupChat.messages = [];
1165
+ }
1166
+ groupChat.messages.push(message);
1167
+ groupChat.messageCount++;
1168
+ groupChat.lastActivity = tickTimestamp;
1169
+
1170
+ // Keep only last 20 messages per group
1171
+ if (groupChat.messages.length > 20) {
1172
+ groupChat.messages = groupChat.messages.slice(-20);
1173
+ }
1174
+
1175
+ events.push({
1176
+ type: 'group:message',
1177
+ timestamp: tickTimestamp,
1178
+ data: {
1179
+ groupId,
1180
+ messageId: message.id,
1181
+ authorId: senderId,
1182
+ content: message.content,
1183
+ },
1184
+ });
1185
+ }
1186
+ }
1187
+
1188
+ // Simulate group chat invites (for the agent being tested)
1189
+ if (
1190
+ this.rng.next() > 0.9 &&
1191
+ currentState.groupChats &&
1192
+ currentState.groupChats.length > 0
1193
+ ) {
1194
+ const groupChat =
1195
+ currentState.groupChats[
1196
+ Math.floor(this.rng.next() * currentState.groupChats.length)
1197
+ ];
1198
+ if (groupChat && groupChat.memberIds.length < 10) {
1199
+ groupChat.invitedAgent = true;
1200
+ events.push({
1201
+ type: 'group:invite',
1202
+ timestamp: tickTimestamp,
1203
+ data: {
1204
+ groupId: groupChat.id,
1205
+ groupName: groupChat.name,
1206
+ inviterId: groupChat.memberIds[0] ?? 'unknown',
1207
+ },
1208
+ });
1209
+ }
1210
+ }
1211
+
1212
+ // Update current state
1213
+ currentState.tick = i + 1;
1214
+ currentState.timestamp = tickTimestamp;
1215
+
1216
+ // Update group chats array from map
1217
+ currentState.groupChats = Array.from(groupChatMap.values());
1218
+
1219
+ // Create snapshot of state (shallow copy is sufficient since we're not mutating nested objects)
1220
+ const stateSnapshot: GameState = {
1221
+ ...currentState,
1222
+ predictionMarkets: [...currentState.predictionMarkets],
1223
+ perpetualMarkets: [...currentState.perpetualMarkets],
1224
+ agents: [...currentState.agents],
1225
+ posts: currentState.posts ? [...currentState.posts] : [],
1226
+ groupChats: currentState.groupChats
1227
+ ? currentState.groupChats.map((gc) => ({
1228
+ ...gc,
1229
+ memberIds: [...gc.memberIds],
1230
+ messages: gc.messages ? [...gc.messages] : undefined,
1231
+ }))
1232
+ : [],
1233
+ };
1234
+
1235
+ ticks.push({
1236
+ number: i,
1237
+ timestamp: tickTimestamp,
1238
+ events,
1239
+ state: stateSnapshot,
1240
+ });
1241
+ }
1242
+
1243
+ return ticks;
1244
+ }
1245
+ }
1246
+
1247
+ /**
1248
+ * Seeded random number generator for reproducibility
1249
+ * Exported for use by other components (e.g., MarketMoverAgent)
1250
+ */
1251
+ export class SeededRandom {
1252
+ private seed: number;
1253
+
1254
+ constructor(seed: number) {
1255
+ this.seed = seed;
1256
+ }
1257
+
1258
+ /**
1259
+ * Generate next random number (0-1)
1260
+ */
1261
+ next(): number {
1262
+ // Linear congruential generator
1263
+ this.seed = (this.seed * 1664525 + 1013904223) % 4294967296;
1264
+ return this.seed / 4294967296;
1265
+ }
1266
+
1267
+ /**
1268
+ * Generate a random integer in the range [min, max] (inclusive)
1269
+ */
1270
+ nextInt(min: number, max: number): number {
1271
+ return Math.floor(this.next() * (max - min + 1)) + min;
1272
+ }
1273
+
1274
+ /**
1275
+ * Generate a random float in the range [min, max]
1276
+ */
1277
+ nextFloat(min: number, max: number): number {
1278
+ return min + this.next() * (max - min);
1279
+ }
1280
+
1281
+ /**
1282
+ * Pick a random element from an array
1283
+ */
1284
+ pick<T>(array: T[]): T {
1285
+ const index = Math.floor(this.next() * array.length);
1286
+ return array[index]!;
1287
+ }
1288
+ }