@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/package.json +2 -2
  2. package/research-output/training-runs/training-run-1773726941205.json +38 -0
  3. package/scripts/rank_trajectories.ts +0 -1
  4. package/scripts/run_task_benchmark.ts +4 -11
  5. package/src/adapter.ts +96 -49
  6. package/src/archetypes/ArchetypeConfigService.ts +188 -185
  7. package/src/archetypes/derive-archetype.ts +47 -47
  8. package/src/archetypes/index.ts +2 -2
  9. package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
  10. package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
  11. package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
  12. package/src/benchmark/BenchmarkDataViewer.ts +32 -30
  13. package/src/benchmark/BenchmarkHistoryService.ts +13 -12
  14. package/src/benchmark/BenchmarkRunner.ts +87 -83
  15. package/src/benchmark/BenchmarkValidator.ts +48 -46
  16. package/src/benchmark/FastEvalRunner.ts +17 -16
  17. package/src/benchmark/MetricsValidator.ts +20 -21
  18. package/src/benchmark/MetricsVisualizer.ts +92 -85
  19. package/src/benchmark/ModelBenchmarkService.ts +90 -82
  20. package/src/benchmark/ModelRegistry.ts +44 -44
  21. package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
  22. package/src/benchmark/SimulationA2AInterface.ts +118 -118
  23. package/src/benchmark/SimulationEngine.ts +51 -51
  24. package/src/benchmark/TaskRunner.ts +87 -79
  25. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
  26. package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
  27. package/src/benchmark/index.ts +27 -27
  28. package/src/benchmark/parseSimulationMetrics.ts +32 -32
  29. package/src/benchmark/simulation-types.ts +10 -10
  30. package/src/dependencies.ts +34 -34
  31. package/src/generation/TrajectoryGenerator.ts +39 -37
  32. package/src/generation/index.ts +1 -1
  33. package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
  34. package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
  35. package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
  36. package/src/huggingface/index.ts +6 -6
  37. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
  38. package/src/index.ts +27 -27
  39. package/src/init-training.ts +6 -6
  40. package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
  41. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
  42. package/src/metrics/index.ts +2 -2
  43. package/src/rubrics/__tests__/index.test.ts +73 -73
  44. package/src/rubrics/ass-kisser.ts +6 -6
  45. package/src/rubrics/degen.ts +6 -6
  46. package/src/rubrics/goody-twoshoes.ts +6 -6
  47. package/src/rubrics/index.ts +50 -50
  48. package/src/rubrics/information-trader.ts +6 -6
  49. package/src/rubrics/infosec.ts +6 -6
  50. package/src/rubrics/liar.ts +6 -6
  51. package/src/rubrics/perps-trader.ts +6 -6
  52. package/src/rubrics/researcher.ts +6 -6
  53. package/src/rubrics/scammer.ts +6 -6
  54. package/src/rubrics/social-butterfly.ts +7 -7
  55. package/src/rubrics/super-predictor.ts +6 -6
  56. package/src/rubrics/trader.ts +5 -5
  57. package/src/scoring/ArchetypeScoringService.ts +56 -54
  58. package/src/scoring/JudgePromptBuilder.ts +96 -96
  59. package/src/scoring/LLMJudgeCache.ts +26 -23
  60. package/src/scoring/index.ts +3 -3
  61. package/src/training/AutomationPipeline.ts +149 -140
  62. package/src/training/BenchmarkService.ts +49 -45
  63. package/src/training/ConfigValidator.ts +38 -32
  64. package/src/training/MarketOutcomesTracker.ts +22 -12
  65. package/src/training/ModelDeployer.ts +15 -15
  66. package/src/training/ModelFetcher.ts +7 -7
  67. package/src/training/ModelSelectionService.ts +32 -32
  68. package/src/training/ModelUsageVerifier.ts +31 -24
  69. package/src/training/MultiModelOrchestrator.ts +44 -44
  70. package/src/training/RLModelConfig.ts +57 -57
  71. package/src/training/RewardBackpropagationService.ts +18 -17
  72. package/src/training/RulerScoringService.ts +73 -72
  73. package/src/training/TrainingMonitor.ts +29 -29
  74. package/src/training/TrajectoryRecorder.ts +25 -27
  75. package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
  76. package/src/training/index.ts +36 -36
  77. package/src/training/logRLConfig.ts +7 -7
  78. package/src/training/pipeline.ts +13 -16
  79. package/src/training/storage/ModelStorageService.ts +32 -32
  80. package/src/training/storage/TrainingDataArchiver.ts +21 -21
  81. package/src/training/storage/index.ts +2 -2
  82. package/src/training/types.ts +6 -6
  83. package/src/training/window-utils.ts +14 -14
  84. package/src/utils/index.ts +7 -7
  85. package/src/utils/logger.ts +5 -5
  86. package/src/utils/snowflake.ts +1 -1
  87. package/src/utils/synthetic-detector.ts +7 -7
@@ -7,29 +7,29 @@
7
7
  * @packageDocumentation
8
8
  */
9
9
 
10
- import type { JsonValue, TrajectoryRecord } from '../adapter';
11
- import { getTrainingDataAdapter, getLlmLogAdapter } from '../adapter';
12
- import { logger } from '../utils/logger';
13
- import { generateSnowflakeId } from '../utils/snowflake';
10
+ import type { JsonValue, TrajectoryRecord } from "../adapter";
11
+ import { getLlmLogAdapter, getTrainingDataAdapter } from "../adapter";
12
+ import { logger } from "../utils/logger";
13
+ import { generateSnowflakeId } from "../utils/snowflake";
14
14
  import type {
15
15
  Action,
16
16
  EnvironmentState,
17
17
  LLMCall,
18
18
  ProviderAccess,
19
19
  TrajectoryStep,
20
- } from './types';
21
- import { getCurrentWindowId } from './window-utils';
20
+ } from "./types";
21
+ import { getCurrentWindowId } from "./window-utils";
22
22
 
23
23
  export type {
24
- TrajectoryStep,
24
+ Action,
25
25
  EnvironmentState,
26
- ProviderAccess,
27
26
  LLMCall,
28
- Action,
27
+ ProviderAccess,
28
+ TrajectoryStep,
29
29
  };
30
30
 
31
- import * as fs from 'fs';
32
- import * as path from 'path';
31
+ import * as fs from "node:fs";
32
+ import * as path from "node:path";
33
33
 
34
34
  // ─── Simulation mode flag ────────────────────────────────────────────
35
35
  // Replaces the `isSimulationMode` import from `@elizaos/db`.
@@ -118,7 +118,7 @@ export class TrajectoryRecorder {
118
118
  steps: [],
119
119
  });
120
120
 
121
- logger.info('Started trajectory recording', {
121
+ logger.info("Started trajectory recording", {
122
122
  trajectoryId,
123
123
  agentId: options.agentId,
124
124
  archetype: options.archetype,
@@ -163,7 +163,7 @@ export class TrajectoryRecorder {
163
163
  providerName: string;
164
164
  data: Record<string, JsonValue>;
165
165
  purpose: string;
166
- }
166
+ },
167
167
  ): void {
168
168
  const traj = this.activeTrajectories.get(trajectoryId);
169
169
  if (!traj?.currentStep) {
@@ -225,7 +225,7 @@ export class TrajectoryRecorder {
225
225
  */
226
226
  async endTrajectory(
227
227
  trajectoryId: string,
228
- options: EndTrajectoryOptions = {}
228
+ options: EndTrajectoryOptions = {},
229
229
  ): Promise<void> {
230
230
  const traj = this.activeTrajectories.get(trajectoryId);
231
231
  if (!traj) {
@@ -240,19 +240,19 @@ export class TrajectoryRecorder {
240
240
  // Calculate metrics
241
241
  const tradesExecuted = traj.steps.filter(
242
242
  (s) =>
243
- s.action.actionType.includes('BUY') ||
244
- s.action.actionType.includes('SELL')
243
+ s.action.actionType.includes("BUY") ||
244
+ s.action.actionType.includes("SELL"),
245
245
  ).length;
246
246
 
247
247
  const postsCreated = traj.steps.filter((s) =>
248
- s.action.actionType.includes('POST')
248
+ s.action.actionType.includes("POST"),
249
249
  ).length;
250
250
 
251
251
  const errorCount = traj.steps.filter((s) => !s.action.success).length;
252
- const finalStatus = errorCount > 0 ? 'completed_with_errors' : 'completed';
252
+ const finalStatus = errorCount > 0 ? "completed_with_errors" : "completed";
253
253
 
254
254
  // 1. Prepare the standard data object (Used for both JSON and DB)
255
- const trajectoryData: Omit<TrajectoryRecord, 'createdAt' | 'updatedAt'> = {
255
+ const trajectoryData: Omit<TrajectoryRecord, "createdAt" | "updatedAt"> = {
256
256
  id: await generateSnowflakeId(),
257
257
  trajectoryId,
258
258
  agentId: traj.agentId,
@@ -261,9 +261,7 @@ export class TrajectoryRecorder {
261
261
  endTime: new Date(endTime),
262
262
  durationMs,
263
263
  scenarioId: traj.scenarioId || windowId,
264
- episodeId: traj.scenarioId
265
- ? `${traj.scenarioId}-${Date.now()}`
266
- : null,
264
+ episodeId: traj.scenarioId ? `${traj.scenarioId}-${Date.now()}` : null,
267
265
  windowId,
268
266
  windowHours: 1,
269
267
  batchId: null,
@@ -300,7 +298,7 @@ export class TrajectoryRecorder {
300
298
 
301
299
  // Simulation Mode Bypass
302
300
  if (isSimulationMode()) {
303
- const outputDir = './training-data-output/trajectories';
301
+ const outputDir = "./training-data-output/trajectories";
304
302
  if (!fs.existsSync(outputDir)) {
305
303
  fs.mkdirSync(outputDir, { recursive: true });
306
304
  }
@@ -312,7 +310,7 @@ export class TrajectoryRecorder {
312
310
  stepNumber: step.stepNumber,
313
311
  callIndex: idx,
314
312
  ...call,
315
- }))
313
+ })),
316
314
  ),
317
315
  };
318
316
 
@@ -320,9 +318,9 @@ export class TrajectoryRecorder {
320
318
  fs.writeFileSync(filePath, JSON.stringify(fullData, null, 2));
321
319
 
322
320
  logger.info(
323
- 'Saved trajectory to JSON (Simulation Mode)',
321
+ "Saved trajectory to JSON (Simulation Mode)",
324
322
  { trajectoryId, path: filePath },
325
- 'TrajectoryRecorder'
323
+ "TrajectoryRecorder",
326
324
  );
327
325
 
328
326
  this.activeTrajectories.delete(trajectoryId);
@@ -357,7 +355,7 @@ export class TrajectoryRecorder {
357
355
  }
358
356
  }
359
357
 
360
- logger.info('Trajectory saved to database', {
358
+ logger.info("Trajectory saved to database", {
361
359
  trajectoryId,
362
360
  archetype: traj.archetype,
363
361
  steps: traj.steps.length,
@@ -5,20 +5,20 @@
5
5
  * Uses simulation mode to avoid database dependency.
6
6
  */
7
7
 
8
- import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
9
- import * as fs from 'fs';
10
- import * as path from 'path';
8
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
9
+ import * as fs from "node:fs";
10
+ import * as path from "node:path";
11
11
 
12
- import { setSimulationMode, TrajectoryRecorder } from '../TrajectoryRecorder';
13
- import type { Action, EnvironmentState, LLMCall } from '../types';
12
+ import { setSimulationMode, TrajectoryRecorder } from "../TrajectoryRecorder";
13
+ import type { Action, EnvironmentState, LLMCall } from "../types";
14
14
 
15
15
  // =============================================================================
16
16
  // Test Setup
17
17
  // =============================================================================
18
18
 
19
- const TEST_OUTPUT_DIR = './training-data-output/trajectories';
19
+ const TEST_OUTPUT_DIR = "./training-data-output/trajectories";
20
20
 
21
- describe('TrajectoryRecorder - Real Class Tests', () => {
21
+ describe("TrajectoryRecorder - Real Class Tests", () => {
22
22
  let recorder: TrajectoryRecorder;
23
23
 
24
24
  beforeEach(() => {
@@ -28,7 +28,7 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
28
28
  if (fs.existsSync(TEST_OUTPUT_DIR)) {
29
29
  const files = fs.readdirSync(TEST_OUTPUT_DIR);
30
30
  for (const file of files) {
31
- if (file.startsWith('test-')) {
31
+ if (file.startsWith("test-")) {
32
32
  fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
33
33
  }
34
34
  }
@@ -41,7 +41,7 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
41
41
  if (fs.existsSync(TEST_OUTPUT_DIR)) {
42
42
  const files = fs.readdirSync(TEST_OUTPUT_DIR);
43
43
  for (const file of files) {
44
- if (file.includes('test-agent')) {
44
+ if (file.includes("test-agent")) {
45
45
  try {
46
46
  fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
47
47
  } catch {
@@ -56,23 +56,23 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
56
56
  // Lifecycle Tests
57
57
  // ===========================================================================
58
58
 
59
- test('startTrajectory creates a new active trajectory', async () => {
59
+ test("startTrajectory creates a new active trajectory", async () => {
60
60
  const trajectoryId = await recorder.startTrajectory({
61
- agentId: 'test-agent-1',
62
- archetype: 'trader',
61
+ agentId: "test-agent-1",
62
+ archetype: "trader",
63
63
  });
64
64
 
65
65
  expect(trajectoryId).toBeDefined();
66
- expect(typeof trajectoryId).toBe('string');
66
+ expect(typeof trajectoryId).toBe("string");
67
67
  expect(trajectoryId.length).toBeGreaterThan(10);
68
68
  expect(recorder.isActive(trajectoryId)).toBe(true);
69
69
  expect(recorder.getActiveCount()).toBe(1);
70
70
  });
71
71
 
72
- test('multiple trajectories can be active simultaneously', async () => {
73
- const id1 = await recorder.startTrajectory({ agentId: 'test-agent-1' });
74
- const id2 = await recorder.startTrajectory({ agentId: 'test-agent-2' });
75
- const id3 = await recorder.startTrajectory({ agentId: 'test-agent-3' });
72
+ test("multiple trajectories can be active simultaneously", async () => {
73
+ const id1 = await recorder.startTrajectory({ agentId: "test-agent-1" });
74
+ const id2 = await recorder.startTrajectory({ agentId: "test-agent-2" });
75
+ const id3 = await recorder.startTrajectory({ agentId: "test-agent-3" });
76
76
 
77
77
  expect(recorder.getActiveCount()).toBe(3);
78
78
  expect(recorder.isActive(id1)).toBe(true);
@@ -82,24 +82,24 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
82
82
  expect(id2).not.toBe(id3);
83
83
  });
84
84
 
85
- test('getActiveTrajectory returns correct trajectory', async () => {
85
+ test("getActiveTrajectory returns correct trajectory", async () => {
86
86
  const trajectoryId = await recorder.startTrajectory({
87
- agentId: 'test-agent-x',
88
- archetype: 'degen',
89
- scenarioId: 'test-scenario',
87
+ agentId: "test-agent-x",
88
+ archetype: "degen",
89
+ scenarioId: "test-scenario",
90
90
  });
91
91
 
92
92
  const active = recorder.getActiveTrajectory(trajectoryId);
93
93
 
94
94
  expect(active).toBeDefined();
95
- expect(active?.agentId).toBe('test-agent-x');
96
- expect(active?.archetype).toBe('degen');
97
- expect(active?.scenarioId).toBe('test-scenario');
95
+ expect(active?.agentId).toBe("test-agent-x");
96
+ expect(active?.archetype).toBe("degen");
97
+ expect(active?.scenarioId).toBe("test-scenario");
98
98
  expect(active?.steps).toHaveLength(0);
99
99
  });
100
100
 
101
- test('getActiveTrajectory returns undefined for non-existent id', () => {
102
- const result = recorder.getActiveTrajectory('non-existent-id');
101
+ test("getActiveTrajectory returns undefined for non-existent id", () => {
102
+ const result = recorder.getActiveTrajectory("non-existent-id");
103
103
  expect(result).toBeUndefined();
104
104
  });
105
105
 
@@ -107,9 +107,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
107
107
  // Step Recording Tests
108
108
  // ===========================================================================
109
109
 
110
- test('startStep initializes current step with environment state', async () => {
110
+ test("startStep initializes current step with environment state", async () => {
111
111
  const trajectoryId = await recorder.startTrajectory({
112
- agentId: 'test-agent',
112
+ agentId: "test-agent",
113
113
  });
114
114
 
115
115
  const envState: EnvironmentState = {
@@ -126,19 +126,19 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
126
126
  expect(active?.currentStep?.stepNumber).toBe(0);
127
127
  });
128
128
 
129
- test('startStep throws for non-existent trajectory', () => {
129
+ test("startStep throws for non-existent trajectory", () => {
130
130
  expect(() => {
131
- recorder.startStep('fake-id', {
131
+ recorder.startStep("fake-id", {
132
132
  agentBalance: 0,
133
133
  agentPnL: 0,
134
134
  openPositions: 0,
135
135
  });
136
- }).toThrow('Trajectory not found: fake-id');
136
+ }).toThrow("Trajectory not found: fake-id");
137
137
  });
138
138
 
139
- test('logProviderAccess adds provider data to current step', async () => {
139
+ test("logProviderAccess adds provider data to current step", async () => {
140
140
  const trajectoryId = await recorder.startTrajectory({
141
- agentId: 'test-agent',
141
+ agentId: "test-agent",
142
142
  });
143
143
  recorder.startStep(trajectoryId, {
144
144
  agentBalance: 1000,
@@ -147,36 +147,36 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
147
147
  });
148
148
 
149
149
  recorder.logProviderAccess(trajectoryId, {
150
- providerName: 'market-data',
151
- data: { ticker: 'BTCAI', price: 50000 },
152
- purpose: 'price lookup',
150
+ providerName: "market-data",
151
+ data: { ticker: "BTCAI", price: 50000 },
152
+ purpose: "price lookup",
153
153
  });
154
154
 
155
155
  const active = recorder.getActiveTrajectory(trajectoryId);
156
156
  expect(active?.currentStep?.providerAccesses).toHaveLength(1);
157
157
  expect(active?.currentStep?.providerAccesses?.[0]?.providerName).toBe(
158
- 'market-data'
158
+ "market-data",
159
159
  );
160
160
  });
161
161
 
162
- test('logProviderAccess throws when no current step', async () => {
162
+ test("logProviderAccess throws when no current step", async () => {
163
163
  const trajectoryId = await recorder.startTrajectory({
164
- agentId: 'test-agent',
164
+ agentId: "test-agent",
165
165
  });
166
166
  // Don't call startStep
167
167
 
168
168
  expect(() => {
169
169
  recorder.logProviderAccess(trajectoryId, {
170
- providerName: 'test',
170
+ providerName: "test",
171
171
  data: {},
172
- purpose: 'test',
172
+ purpose: "test",
173
173
  });
174
- }).toThrow('No current step');
174
+ }).toThrow("No current step");
175
175
  });
176
176
 
177
- test('logLLMCall adds LLM call to current step', async () => {
177
+ test("logLLMCall adds LLM call to current step", async () => {
178
178
  const trajectoryId = await recorder.startTrajectory({
179
- agentId: 'test-agent',
179
+ agentId: "test-agent",
180
180
  });
181
181
  recorder.startStep(trajectoryId, {
182
182
  agentBalance: 1000,
@@ -185,14 +185,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
185
185
  });
186
186
 
187
187
  const llmCall: LLMCall = {
188
- model: 'qwen-32b',
189
- systemPrompt: 'You are a trading agent',
190
- userPrompt: 'What should I do?',
191
- response: 'Buy BTCAI',
192
- reasoning: 'Bullish momentum',
188
+ model: "qwen-32b",
189
+ systemPrompt: "You are a trading agent",
190
+ userPrompt: "What should I do?",
191
+ response: "Buy BTCAI",
192
+ reasoning: "Bullish momentum",
193
193
  temperature: 0.7,
194
194
  maxTokens: 2000,
195
- purpose: 'action',
195
+ purpose: "action",
196
196
  latencyMs: 250,
197
197
  };
198
198
 
@@ -200,13 +200,13 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
200
200
 
201
201
  const active = recorder.getActiveTrajectory(trajectoryId);
202
202
  expect(active?.currentStep?.llmCalls).toHaveLength(1);
203
- expect(active?.currentStep?.llmCalls?.[0]?.model).toBe('qwen-32b');
203
+ expect(active?.currentStep?.llmCalls?.[0]?.model).toBe("qwen-32b");
204
204
  expect(active?.currentStep?.llmCalls?.[0]?.latencyMs).toBe(250);
205
205
  });
206
206
 
207
- test('completeStep finalizes step and adds to trajectory', async () => {
207
+ test("completeStep finalizes step and adds to trajectory", async () => {
208
208
  const trajectoryId = await recorder.startTrajectory({
209
- agentId: 'test-agent',
209
+ agentId: "test-agent",
210
210
  });
211
211
  recorder.startStep(trajectoryId, {
212
212
  agentBalance: 1000,
@@ -215,8 +215,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
215
215
  });
216
216
 
217
217
  const action: Action = {
218
- actionType: 'buy',
219
- parameters: { ticker: 'BTCAI', amount: 100 },
218
+ actionType: "buy",
219
+ parameters: { ticker: "BTCAI", amount: 100 },
220
220
  success: true,
221
221
  };
222
222
 
@@ -224,14 +224,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
224
224
 
225
225
  const active = recorder.getActiveTrajectory(trajectoryId);
226
226
  expect(active?.steps).toHaveLength(1);
227
- expect(active?.steps[0]?.action.actionType).toBe('buy');
227
+ expect(active?.steps[0]?.action.actionType).toBe("buy");
228
228
  expect(active?.steps[0]?.reward).toBe(0.5);
229
229
  expect(active?.currentStep).toBeUndefined();
230
230
  });
231
231
 
232
- test('multiple steps increment step number correctly', async () => {
232
+ test("multiple steps increment step number correctly", async () => {
233
233
  const trajectoryId = await recorder.startTrajectory({
234
- agentId: 'test-agent',
234
+ agentId: "test-agent",
235
235
  });
236
236
 
237
237
  for (let i = 0; i < 5; i++) {
@@ -242,8 +242,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
242
242
  });
243
243
  recorder.completeStep(
244
244
  trajectoryId,
245
- { actionType: 'hold', parameters: {}, success: true },
246
- 0.1
245
+ { actionType: "hold", parameters: {}, success: true },
246
+ 0.1,
247
247
  );
248
248
  }
249
249
 
@@ -257,10 +257,10 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
257
257
  // End Trajectory Tests (Simulation Mode - File Output)
258
258
  // ===========================================================================
259
259
 
260
- test('endTrajectory saves JSON file in simulation mode', async () => {
260
+ test("endTrajectory saves JSON file in simulation mode", async () => {
261
261
  const trajectoryId = await recorder.startTrajectory({
262
- agentId: 'test-agent-file',
263
- archetype: 'trader',
262
+ agentId: "test-agent-file",
263
+ archetype: "trader",
264
264
  });
265
265
 
266
266
  // Add a step
@@ -270,18 +270,18 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
270
270
  openPositions: 0,
271
271
  });
272
272
  recorder.logLLMCall(trajectoryId, {
273
- model: 'test-model',
274
- systemPrompt: 'system',
275
- userPrompt: 'user',
276
- response: 'response',
273
+ model: "test-model",
274
+ systemPrompt: "system",
275
+ userPrompt: "user",
276
+ response: "response",
277
277
  temperature: 0.5,
278
278
  maxTokens: 100,
279
- purpose: 'action',
279
+ purpose: "action",
280
280
  });
281
281
  recorder.completeStep(
282
282
  trajectoryId,
283
- { actionType: 'buy', parameters: { ticker: 'BTCAI' }, success: true },
284
- 1.0
283
+ { actionType: "buy", parameters: { ticker: "BTCAI" }, success: true },
284
+ 1.0,
285
285
  );
286
286
 
287
287
  await recorder.endTrajectory(trajectoryId, {
@@ -294,9 +294,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
294
294
  expect(fs.existsSync(filePath)).toBe(true);
295
295
 
296
296
  // Verify file contents
297
- const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
298
- expect(content.trajectory.agentId).toBe('test-agent-file');
299
- expect(content.trajectory.archetype).toBe('trader');
297
+ const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
298
+ expect(content.trajectory.agentId).toBe("test-agent-file");
299
+ expect(content.trajectory.archetype).toBe("trader");
300
300
  expect(content.trajectory.episodeLength).toBe(1);
301
301
  expect(content.trajectory.finalBalance).toBe(10500);
302
302
  expect(content.trajectory.finalPnL).toBe(500);
@@ -306,9 +306,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
306
306
  fs.unlinkSync(filePath);
307
307
  });
308
308
 
309
- test('endTrajectory removes trajectory from active map', async () => {
309
+ test("endTrajectory removes trajectory from active map", async () => {
310
310
  const trajectoryId = await recorder.startTrajectory({
311
- agentId: 'test-agent',
311
+ agentId: "test-agent",
312
312
  });
313
313
  expect(recorder.isActive(trajectoryId)).toBe(true);
314
314
 
@@ -318,15 +318,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
318
318
  expect(recorder.getActiveCount()).toBe(0);
319
319
  });
320
320
 
321
- test('endTrajectory throws for non-existent trajectory', async () => {
322
- await expect(recorder.endTrajectory('fake-id')).rejects.toThrow(
323
- 'Trajectory not found: fake-id'
321
+ test("endTrajectory throws for non-existent trajectory", async () => {
322
+ await expect(recorder.endTrajectory("fake-id")).rejects.toThrow(
323
+ "Trajectory not found: fake-id",
324
324
  );
325
325
  });
326
326
 
327
- test('endTrajectory calculates metrics correctly', async () => {
327
+ test("endTrajectory calculates metrics correctly", async () => {
328
328
  const trajectoryId = await recorder.startTrajectory({
329
- agentId: 'test-agent',
329
+ agentId: "test-agent",
330
330
  });
331
331
 
332
332
  // Add buy action
@@ -337,8 +337,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
337
337
  });
338
338
  recorder.completeStep(
339
339
  trajectoryId,
340
- { actionType: 'BUY_YES', parameters: {}, success: true },
341
- 1.0
340
+ { actionType: "BUY_YES", parameters: {}, success: true },
341
+ 1.0,
342
342
  );
343
343
 
344
344
  // Add sell action
@@ -349,8 +349,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
349
349
  });
350
350
  recorder.completeStep(
351
351
  trajectoryId,
352
- { actionType: 'SELL', parameters: {}, success: true },
353
- 0.5
352
+ { actionType: "SELL", parameters: {}, success: true },
353
+ 0.5,
354
354
  );
355
355
 
356
356
  // Add failed action
@@ -362,24 +362,24 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
362
362
  recorder.completeStep(
363
363
  trajectoryId,
364
364
  {
365
- actionType: 'BUY_NO',
365
+ actionType: "BUY_NO",
366
366
  parameters: {},
367
367
  success: false,
368
- error: 'Insufficient funds',
368
+ error: "Insufficient funds",
369
369
  },
370
- -0.5
370
+ -0.5,
371
371
  );
372
372
 
373
373
  await recorder.endTrajectory(trajectoryId);
374
374
 
375
375
  // Check that file was written with correct metrics
376
376
  const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
377
- const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
377
+ const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
378
378
 
379
379
  expect(content.trajectory.episodeLength).toBe(3);
380
380
  expect(content.trajectory.tradesExecuted).toBe(3); // BUY_YES, SELL, BUY_NO
381
381
  expect(content.trajectory.totalReward).toBe(1.0); // 1.0 + 0.5 + (-0.5)
382
- expect(content.trajectory.finalStatus).toBe('completed_with_errors');
382
+ expect(content.trajectory.finalStatus).toBe("completed_with_errors");
383
383
 
384
384
  fs.unlinkSync(filePath);
385
385
  });
@@ -388,15 +388,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
388
388
  // Edge Cases
389
389
  // ===========================================================================
390
390
 
391
- test('handles trajectory with zero steps', async () => {
391
+ test("handles trajectory with zero steps", async () => {
392
392
  const trajectoryId = await recorder.startTrajectory({
393
- agentId: 'test-agent',
393
+ agentId: "test-agent",
394
394
  });
395
395
 
396
396
  await recorder.endTrajectory(trajectoryId);
397
397
 
398
398
  const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
399
- const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
399
+ const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
400
400
 
401
401
  expect(content.trajectory.episodeLength).toBe(0);
402
402
  expect(content.trajectory.totalReward).toBe(0);
@@ -405,9 +405,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
405
405
  fs.unlinkSync(filePath);
406
406
  });
407
407
 
408
- test('handles very long prompts in LLM calls', async () => {
408
+ test("handles very long prompts in LLM calls", async () => {
409
409
  const trajectoryId = await recorder.startTrajectory({
410
- agentId: 'test-agent',
410
+ agentId: "test-agent",
411
411
  });
412
412
  recorder.startStep(trajectoryId, {
413
413
  agentBalance: 1000,
@@ -415,22 +415,22 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
415
415
  openPositions: 0,
416
416
  });
417
417
 
418
- const longPrompt = 'A'.repeat(50000); // 50k characters
418
+ const longPrompt = "A".repeat(50000); // 50k characters
419
419
 
420
420
  recorder.logLLMCall(trajectoryId, {
421
- model: 'test',
421
+ model: "test",
422
422
  systemPrompt: longPrompt,
423
423
  userPrompt: longPrompt,
424
424
  response: longPrompt,
425
425
  temperature: 0.5,
426
426
  maxTokens: 100,
427
- purpose: 'action',
427
+ purpose: "action",
428
428
  });
429
429
 
430
430
  recorder.completeStep(
431
431
  trajectoryId,
432
- { actionType: 'hold', parameters: {}, success: true },
433
- 0
432
+ { actionType: "hold", parameters: {}, success: true },
433
+ 0,
434
434
  );
435
435
 
436
436
  await recorder.endTrajectory(trajectoryId);
@@ -438,15 +438,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
438
438
  const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
439
439
  expect(fs.existsSync(filePath)).toBe(true);
440
440
 
441
- const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
441
+ const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
442
442
  expect(content.llmCalls[0].systemPrompt.length).toBe(50000);
443
443
 
444
444
  fs.unlinkSync(filePath);
445
445
  });
446
446
 
447
- test('handles negative rewards correctly', async () => {
447
+ test("handles negative rewards correctly", async () => {
448
448
  const trajectoryId = await recorder.startTrajectory({
449
- agentId: 'test-agent',
449
+ agentId: "test-agent",
450
450
  });
451
451
 
452
452
  recorder.startStep(trajectoryId, {
@@ -456,14 +456,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
456
456
  });
457
457
  recorder.completeStep(
458
458
  trajectoryId,
459
- { actionType: 'buy', parameters: {}, success: false, error: 'Bad trade' },
460
- -5.0
459
+ { actionType: "buy", parameters: {}, success: false, error: "Bad trade" },
460
+ -5.0,
461
461
  );
462
462
 
463
463
  await recorder.endTrajectory(trajectoryId);
464
464
 
465
465
  const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
466
- const content = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
466
+ const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
467
467
 
468
468
  expect(content.trajectory.totalReward).toBe(-5.0);
469
469