@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -7,29 +7,29 @@
|
|
|
7
7
|
* @packageDocumentation
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import type { JsonValue, TrajectoryRecord } from
|
|
11
|
-
import {
|
|
12
|
-
import { logger } from
|
|
13
|
-
import { generateSnowflakeId } from
|
|
10
|
+
import type { JsonValue, TrajectoryRecord } from "../adapter";
|
|
11
|
+
import { getLlmLogAdapter, getTrainingDataAdapter } from "../adapter";
|
|
12
|
+
import { logger } from "../utils/logger";
|
|
13
|
+
import { generateSnowflakeId } from "../utils/snowflake";
|
|
14
14
|
import type {
|
|
15
15
|
Action,
|
|
16
16
|
EnvironmentState,
|
|
17
17
|
LLMCall,
|
|
18
18
|
ProviderAccess,
|
|
19
19
|
TrajectoryStep,
|
|
20
|
-
} from
|
|
21
|
-
import { getCurrentWindowId } from
|
|
20
|
+
} from "./types";
|
|
21
|
+
import { getCurrentWindowId } from "./window-utils";
|
|
22
22
|
|
|
23
23
|
export type {
|
|
24
|
-
|
|
24
|
+
Action,
|
|
25
25
|
EnvironmentState,
|
|
26
|
-
ProviderAccess,
|
|
27
26
|
LLMCall,
|
|
28
|
-
|
|
27
|
+
ProviderAccess,
|
|
28
|
+
TrajectoryStep,
|
|
29
29
|
};
|
|
30
30
|
|
|
31
|
-
import * as fs from
|
|
32
|
-
import * as path from
|
|
31
|
+
import * as fs from "node:fs";
|
|
32
|
+
import * as path from "node:path";
|
|
33
33
|
|
|
34
34
|
// ─── Simulation mode flag ────────────────────────────────────────────
|
|
35
35
|
// Replaces the `isSimulationMode` import from `@elizaos/db`.
|
|
@@ -118,7 +118,7 @@ export class TrajectoryRecorder {
|
|
|
118
118
|
steps: [],
|
|
119
119
|
});
|
|
120
120
|
|
|
121
|
-
logger.info(
|
|
121
|
+
logger.info("Started trajectory recording", {
|
|
122
122
|
trajectoryId,
|
|
123
123
|
agentId: options.agentId,
|
|
124
124
|
archetype: options.archetype,
|
|
@@ -163,7 +163,7 @@ export class TrajectoryRecorder {
|
|
|
163
163
|
providerName: string;
|
|
164
164
|
data: Record<string, JsonValue>;
|
|
165
165
|
purpose: string;
|
|
166
|
-
}
|
|
166
|
+
},
|
|
167
167
|
): void {
|
|
168
168
|
const traj = this.activeTrajectories.get(trajectoryId);
|
|
169
169
|
if (!traj?.currentStep) {
|
|
@@ -225,7 +225,7 @@ export class TrajectoryRecorder {
|
|
|
225
225
|
*/
|
|
226
226
|
async endTrajectory(
|
|
227
227
|
trajectoryId: string,
|
|
228
|
-
options: EndTrajectoryOptions = {}
|
|
228
|
+
options: EndTrajectoryOptions = {},
|
|
229
229
|
): Promise<void> {
|
|
230
230
|
const traj = this.activeTrajectories.get(trajectoryId);
|
|
231
231
|
if (!traj) {
|
|
@@ -240,19 +240,19 @@ export class TrajectoryRecorder {
|
|
|
240
240
|
// Calculate metrics
|
|
241
241
|
const tradesExecuted = traj.steps.filter(
|
|
242
242
|
(s) =>
|
|
243
|
-
s.action.actionType.includes(
|
|
244
|
-
s.action.actionType.includes(
|
|
243
|
+
s.action.actionType.includes("BUY") ||
|
|
244
|
+
s.action.actionType.includes("SELL"),
|
|
245
245
|
).length;
|
|
246
246
|
|
|
247
247
|
const postsCreated = traj.steps.filter((s) =>
|
|
248
|
-
s.action.actionType.includes(
|
|
248
|
+
s.action.actionType.includes("POST"),
|
|
249
249
|
).length;
|
|
250
250
|
|
|
251
251
|
const errorCount = traj.steps.filter((s) => !s.action.success).length;
|
|
252
|
-
const finalStatus = errorCount > 0 ?
|
|
252
|
+
const finalStatus = errorCount > 0 ? "completed_with_errors" : "completed";
|
|
253
253
|
|
|
254
254
|
// 1. Prepare the standard data object (Used for both JSON and DB)
|
|
255
|
-
const trajectoryData: Omit<TrajectoryRecord,
|
|
255
|
+
const trajectoryData: Omit<TrajectoryRecord, "createdAt" | "updatedAt"> = {
|
|
256
256
|
id: await generateSnowflakeId(),
|
|
257
257
|
trajectoryId,
|
|
258
258
|
agentId: traj.agentId,
|
|
@@ -261,9 +261,7 @@ export class TrajectoryRecorder {
|
|
|
261
261
|
endTime: new Date(endTime),
|
|
262
262
|
durationMs,
|
|
263
263
|
scenarioId: traj.scenarioId || windowId,
|
|
264
|
-
episodeId: traj.scenarioId
|
|
265
|
-
? `${traj.scenarioId}-${Date.now()}`
|
|
266
|
-
: null,
|
|
264
|
+
episodeId: traj.scenarioId ? `${traj.scenarioId}-${Date.now()}` : null,
|
|
267
265
|
windowId,
|
|
268
266
|
windowHours: 1,
|
|
269
267
|
batchId: null,
|
|
@@ -300,7 +298,7 @@ export class TrajectoryRecorder {
|
|
|
300
298
|
|
|
301
299
|
// Simulation Mode Bypass
|
|
302
300
|
if (isSimulationMode()) {
|
|
303
|
-
const outputDir =
|
|
301
|
+
const outputDir = "./training-data-output/trajectories";
|
|
304
302
|
if (!fs.existsSync(outputDir)) {
|
|
305
303
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
306
304
|
}
|
|
@@ -312,7 +310,7 @@ export class TrajectoryRecorder {
|
|
|
312
310
|
stepNumber: step.stepNumber,
|
|
313
311
|
callIndex: idx,
|
|
314
312
|
...call,
|
|
315
|
-
}))
|
|
313
|
+
})),
|
|
316
314
|
),
|
|
317
315
|
};
|
|
318
316
|
|
|
@@ -320,9 +318,9 @@ export class TrajectoryRecorder {
|
|
|
320
318
|
fs.writeFileSync(filePath, JSON.stringify(fullData, null, 2));
|
|
321
319
|
|
|
322
320
|
logger.info(
|
|
323
|
-
|
|
321
|
+
"Saved trajectory to JSON (Simulation Mode)",
|
|
324
322
|
{ trajectoryId, path: filePath },
|
|
325
|
-
|
|
323
|
+
"TrajectoryRecorder",
|
|
326
324
|
);
|
|
327
325
|
|
|
328
326
|
this.activeTrajectories.delete(trajectoryId);
|
|
@@ -357,7 +355,7 @@ export class TrajectoryRecorder {
|
|
|
357
355
|
}
|
|
358
356
|
}
|
|
359
357
|
|
|
360
|
-
logger.info(
|
|
358
|
+
logger.info("Trajectory saved to database", {
|
|
361
359
|
trajectoryId,
|
|
362
360
|
archetype: traj.archetype,
|
|
363
361
|
steps: traj.steps.length,
|
|
@@ -5,20 +5,20 @@
|
|
|
5
5
|
* Uses simulation mode to avoid database dependency.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
|
-
import { afterEach, beforeEach, describe, expect, test } from
|
|
9
|
-
import * as fs from
|
|
10
|
-
import * as path from
|
|
8
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
9
|
+
import * as fs from "node:fs";
|
|
10
|
+
import * as path from "node:path";
|
|
11
11
|
|
|
12
|
-
import { setSimulationMode, TrajectoryRecorder } from
|
|
13
|
-
import type { Action, EnvironmentState, LLMCall } from
|
|
12
|
+
import { setSimulationMode, TrajectoryRecorder } from "../TrajectoryRecorder";
|
|
13
|
+
import type { Action, EnvironmentState, LLMCall } from "../types";
|
|
14
14
|
|
|
15
15
|
// =============================================================================
|
|
16
16
|
// Test Setup
|
|
17
17
|
// =============================================================================
|
|
18
18
|
|
|
19
|
-
const TEST_OUTPUT_DIR =
|
|
19
|
+
const TEST_OUTPUT_DIR = "./training-data-output/trajectories";
|
|
20
20
|
|
|
21
|
-
describe(
|
|
21
|
+
describe("TrajectoryRecorder - Real Class Tests", () => {
|
|
22
22
|
let recorder: TrajectoryRecorder;
|
|
23
23
|
|
|
24
24
|
beforeEach(() => {
|
|
@@ -28,7 +28,7 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
28
28
|
if (fs.existsSync(TEST_OUTPUT_DIR)) {
|
|
29
29
|
const files = fs.readdirSync(TEST_OUTPUT_DIR);
|
|
30
30
|
for (const file of files) {
|
|
31
|
-
if (file.startsWith(
|
|
31
|
+
if (file.startsWith("test-")) {
|
|
32
32
|
fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
|
|
33
33
|
}
|
|
34
34
|
}
|
|
@@ -41,7 +41,7 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
41
41
|
if (fs.existsSync(TEST_OUTPUT_DIR)) {
|
|
42
42
|
const files = fs.readdirSync(TEST_OUTPUT_DIR);
|
|
43
43
|
for (const file of files) {
|
|
44
|
-
if (file.includes(
|
|
44
|
+
if (file.includes("test-agent")) {
|
|
45
45
|
try {
|
|
46
46
|
fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
|
|
47
47
|
} catch {
|
|
@@ -56,23 +56,23 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
56
56
|
// Lifecycle Tests
|
|
57
57
|
// ===========================================================================
|
|
58
58
|
|
|
59
|
-
test(
|
|
59
|
+
test("startTrajectory creates a new active trajectory", async () => {
|
|
60
60
|
const trajectoryId = await recorder.startTrajectory({
|
|
61
|
-
agentId:
|
|
62
|
-
archetype:
|
|
61
|
+
agentId: "test-agent-1",
|
|
62
|
+
archetype: "trader",
|
|
63
63
|
});
|
|
64
64
|
|
|
65
65
|
expect(trajectoryId).toBeDefined();
|
|
66
|
-
expect(typeof trajectoryId).toBe(
|
|
66
|
+
expect(typeof trajectoryId).toBe("string");
|
|
67
67
|
expect(trajectoryId.length).toBeGreaterThan(10);
|
|
68
68
|
expect(recorder.isActive(trajectoryId)).toBe(true);
|
|
69
69
|
expect(recorder.getActiveCount()).toBe(1);
|
|
70
70
|
});
|
|
71
71
|
|
|
72
|
-
test(
|
|
73
|
-
const id1 = await recorder.startTrajectory({ agentId:
|
|
74
|
-
const id2 = await recorder.startTrajectory({ agentId:
|
|
75
|
-
const id3 = await recorder.startTrajectory({ agentId:
|
|
72
|
+
test("multiple trajectories can be active simultaneously", async () => {
|
|
73
|
+
const id1 = await recorder.startTrajectory({ agentId: "test-agent-1" });
|
|
74
|
+
const id2 = await recorder.startTrajectory({ agentId: "test-agent-2" });
|
|
75
|
+
const id3 = await recorder.startTrajectory({ agentId: "test-agent-3" });
|
|
76
76
|
|
|
77
77
|
expect(recorder.getActiveCount()).toBe(3);
|
|
78
78
|
expect(recorder.isActive(id1)).toBe(true);
|
|
@@ -82,24 +82,24 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
82
82
|
expect(id2).not.toBe(id3);
|
|
83
83
|
});
|
|
84
84
|
|
|
85
|
-
test(
|
|
85
|
+
test("getActiveTrajectory returns correct trajectory", async () => {
|
|
86
86
|
const trajectoryId = await recorder.startTrajectory({
|
|
87
|
-
agentId:
|
|
88
|
-
archetype:
|
|
89
|
-
scenarioId:
|
|
87
|
+
agentId: "test-agent-x",
|
|
88
|
+
archetype: "degen",
|
|
89
|
+
scenarioId: "test-scenario",
|
|
90
90
|
});
|
|
91
91
|
|
|
92
92
|
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
93
93
|
|
|
94
94
|
expect(active).toBeDefined();
|
|
95
|
-
expect(active?.agentId).toBe(
|
|
96
|
-
expect(active?.archetype).toBe(
|
|
97
|
-
expect(active?.scenarioId).toBe(
|
|
95
|
+
expect(active?.agentId).toBe("test-agent-x");
|
|
96
|
+
expect(active?.archetype).toBe("degen");
|
|
97
|
+
expect(active?.scenarioId).toBe("test-scenario");
|
|
98
98
|
expect(active?.steps).toHaveLength(0);
|
|
99
99
|
});
|
|
100
100
|
|
|
101
|
-
test(
|
|
102
|
-
const result = recorder.getActiveTrajectory(
|
|
101
|
+
test("getActiveTrajectory returns undefined for non-existent id", () => {
|
|
102
|
+
const result = recorder.getActiveTrajectory("non-existent-id");
|
|
103
103
|
expect(result).toBeUndefined();
|
|
104
104
|
});
|
|
105
105
|
|
|
@@ -107,9 +107,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
107
107
|
// Step Recording Tests
|
|
108
108
|
// ===========================================================================
|
|
109
109
|
|
|
110
|
-
test(
|
|
110
|
+
test("startStep initializes current step with environment state", async () => {
|
|
111
111
|
const trajectoryId = await recorder.startTrajectory({
|
|
112
|
-
agentId:
|
|
112
|
+
agentId: "test-agent",
|
|
113
113
|
});
|
|
114
114
|
|
|
115
115
|
const envState: EnvironmentState = {
|
|
@@ -126,19 +126,19 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
126
126
|
expect(active?.currentStep?.stepNumber).toBe(0);
|
|
127
127
|
});
|
|
128
128
|
|
|
129
|
-
test(
|
|
129
|
+
test("startStep throws for non-existent trajectory", () => {
|
|
130
130
|
expect(() => {
|
|
131
|
-
recorder.startStep(
|
|
131
|
+
recorder.startStep("fake-id", {
|
|
132
132
|
agentBalance: 0,
|
|
133
133
|
agentPnL: 0,
|
|
134
134
|
openPositions: 0,
|
|
135
135
|
});
|
|
136
|
-
}).toThrow(
|
|
136
|
+
}).toThrow("Trajectory not found: fake-id");
|
|
137
137
|
});
|
|
138
138
|
|
|
139
|
-
test(
|
|
139
|
+
test("logProviderAccess adds provider data to current step", async () => {
|
|
140
140
|
const trajectoryId = await recorder.startTrajectory({
|
|
141
|
-
agentId:
|
|
141
|
+
agentId: "test-agent",
|
|
142
142
|
});
|
|
143
143
|
recorder.startStep(trajectoryId, {
|
|
144
144
|
agentBalance: 1000,
|
|
@@ -147,36 +147,36 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
147
147
|
});
|
|
148
148
|
|
|
149
149
|
recorder.logProviderAccess(trajectoryId, {
|
|
150
|
-
providerName:
|
|
151
|
-
data: { ticker:
|
|
152
|
-
purpose:
|
|
150
|
+
providerName: "market-data",
|
|
151
|
+
data: { ticker: "BTCAI", price: 50000 },
|
|
152
|
+
purpose: "price lookup",
|
|
153
153
|
});
|
|
154
154
|
|
|
155
155
|
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
156
156
|
expect(active?.currentStep?.providerAccesses).toHaveLength(1);
|
|
157
157
|
expect(active?.currentStep?.providerAccesses?.[0]?.providerName).toBe(
|
|
158
|
-
|
|
158
|
+
"market-data",
|
|
159
159
|
);
|
|
160
160
|
});
|
|
161
161
|
|
|
162
|
-
test(
|
|
162
|
+
test("logProviderAccess throws when no current step", async () => {
|
|
163
163
|
const trajectoryId = await recorder.startTrajectory({
|
|
164
|
-
agentId:
|
|
164
|
+
agentId: "test-agent",
|
|
165
165
|
});
|
|
166
166
|
// Don't call startStep
|
|
167
167
|
|
|
168
168
|
expect(() => {
|
|
169
169
|
recorder.logProviderAccess(trajectoryId, {
|
|
170
|
-
providerName:
|
|
170
|
+
providerName: "test",
|
|
171
171
|
data: {},
|
|
172
|
-
purpose:
|
|
172
|
+
purpose: "test",
|
|
173
173
|
});
|
|
174
|
-
}).toThrow(
|
|
174
|
+
}).toThrow("No current step");
|
|
175
175
|
});
|
|
176
176
|
|
|
177
|
-
test(
|
|
177
|
+
test("logLLMCall adds LLM call to current step", async () => {
|
|
178
178
|
const trajectoryId = await recorder.startTrajectory({
|
|
179
|
-
agentId:
|
|
179
|
+
agentId: "test-agent",
|
|
180
180
|
});
|
|
181
181
|
recorder.startStep(trajectoryId, {
|
|
182
182
|
agentBalance: 1000,
|
|
@@ -185,14 +185,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
185
185
|
});
|
|
186
186
|
|
|
187
187
|
const llmCall: LLMCall = {
|
|
188
|
-
model:
|
|
189
|
-
systemPrompt:
|
|
190
|
-
userPrompt:
|
|
191
|
-
response:
|
|
192
|
-
reasoning:
|
|
188
|
+
model: "qwen-32b",
|
|
189
|
+
systemPrompt: "You are a trading agent",
|
|
190
|
+
userPrompt: "What should I do?",
|
|
191
|
+
response: "Buy BTCAI",
|
|
192
|
+
reasoning: "Bullish momentum",
|
|
193
193
|
temperature: 0.7,
|
|
194
194
|
maxTokens: 2000,
|
|
195
|
-
purpose:
|
|
195
|
+
purpose: "action",
|
|
196
196
|
latencyMs: 250,
|
|
197
197
|
};
|
|
198
198
|
|
|
@@ -200,13 +200,13 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
200
200
|
|
|
201
201
|
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
202
202
|
expect(active?.currentStep?.llmCalls).toHaveLength(1);
|
|
203
|
-
expect(active?.currentStep?.llmCalls?.[0]?.model).toBe(
|
|
203
|
+
expect(active?.currentStep?.llmCalls?.[0]?.model).toBe("qwen-32b");
|
|
204
204
|
expect(active?.currentStep?.llmCalls?.[0]?.latencyMs).toBe(250);
|
|
205
205
|
});
|
|
206
206
|
|
|
207
|
-
test(
|
|
207
|
+
test("completeStep finalizes step and adds to trajectory", async () => {
|
|
208
208
|
const trajectoryId = await recorder.startTrajectory({
|
|
209
|
-
agentId:
|
|
209
|
+
agentId: "test-agent",
|
|
210
210
|
});
|
|
211
211
|
recorder.startStep(trajectoryId, {
|
|
212
212
|
agentBalance: 1000,
|
|
@@ -215,8 +215,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
215
215
|
});
|
|
216
216
|
|
|
217
217
|
const action: Action = {
|
|
218
|
-
actionType:
|
|
219
|
-
parameters: { ticker:
|
|
218
|
+
actionType: "buy",
|
|
219
|
+
parameters: { ticker: "BTCAI", amount: 100 },
|
|
220
220
|
success: true,
|
|
221
221
|
};
|
|
222
222
|
|
|
@@ -224,14 +224,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
224
224
|
|
|
225
225
|
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
226
226
|
expect(active?.steps).toHaveLength(1);
|
|
227
|
-
expect(active?.steps[0]?.action.actionType).toBe(
|
|
227
|
+
expect(active?.steps[0]?.action.actionType).toBe("buy");
|
|
228
228
|
expect(active?.steps[0]?.reward).toBe(0.5);
|
|
229
229
|
expect(active?.currentStep).toBeUndefined();
|
|
230
230
|
});
|
|
231
231
|
|
|
232
|
-
test(
|
|
232
|
+
test("multiple steps increment step number correctly", async () => {
|
|
233
233
|
const trajectoryId = await recorder.startTrajectory({
|
|
234
|
-
agentId:
|
|
234
|
+
agentId: "test-agent",
|
|
235
235
|
});
|
|
236
236
|
|
|
237
237
|
for (let i = 0; i < 5; i++) {
|
|
@@ -242,8 +242,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
242
242
|
});
|
|
243
243
|
recorder.completeStep(
|
|
244
244
|
trajectoryId,
|
|
245
|
-
{ actionType:
|
|
246
|
-
0.1
|
|
245
|
+
{ actionType: "hold", parameters: {}, success: true },
|
|
246
|
+
0.1,
|
|
247
247
|
);
|
|
248
248
|
}
|
|
249
249
|
|
|
@@ -257,10 +257,10 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
257
257
|
// End Trajectory Tests (Simulation Mode - File Output)
|
|
258
258
|
// ===========================================================================
|
|
259
259
|
|
|
260
|
-
test(
|
|
260
|
+
test("endTrajectory saves JSON file in simulation mode", async () => {
|
|
261
261
|
const trajectoryId = await recorder.startTrajectory({
|
|
262
|
-
agentId:
|
|
263
|
-
archetype:
|
|
262
|
+
agentId: "test-agent-file",
|
|
263
|
+
archetype: "trader",
|
|
264
264
|
});
|
|
265
265
|
|
|
266
266
|
// Add a step
|
|
@@ -270,18 +270,18 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
270
270
|
openPositions: 0,
|
|
271
271
|
});
|
|
272
272
|
recorder.logLLMCall(trajectoryId, {
|
|
273
|
-
model:
|
|
274
|
-
systemPrompt:
|
|
275
|
-
userPrompt:
|
|
276
|
-
response:
|
|
273
|
+
model: "test-model",
|
|
274
|
+
systemPrompt: "system",
|
|
275
|
+
userPrompt: "user",
|
|
276
|
+
response: "response",
|
|
277
277
|
temperature: 0.5,
|
|
278
278
|
maxTokens: 100,
|
|
279
|
-
purpose:
|
|
279
|
+
purpose: "action",
|
|
280
280
|
});
|
|
281
281
|
recorder.completeStep(
|
|
282
282
|
trajectoryId,
|
|
283
|
-
{ actionType:
|
|
284
|
-
1.0
|
|
283
|
+
{ actionType: "buy", parameters: { ticker: "BTCAI" }, success: true },
|
|
284
|
+
1.0,
|
|
285
285
|
);
|
|
286
286
|
|
|
287
287
|
await recorder.endTrajectory(trajectoryId, {
|
|
@@ -294,9 +294,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
294
294
|
expect(fs.existsSync(filePath)).toBe(true);
|
|
295
295
|
|
|
296
296
|
// Verify file contents
|
|
297
|
-
const content = JSON.parse(fs.readFileSync(filePath,
|
|
298
|
-
expect(content.trajectory.agentId).toBe(
|
|
299
|
-
expect(content.trajectory.archetype).toBe(
|
|
297
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
298
|
+
expect(content.trajectory.agentId).toBe("test-agent-file");
|
|
299
|
+
expect(content.trajectory.archetype).toBe("trader");
|
|
300
300
|
expect(content.trajectory.episodeLength).toBe(1);
|
|
301
301
|
expect(content.trajectory.finalBalance).toBe(10500);
|
|
302
302
|
expect(content.trajectory.finalPnL).toBe(500);
|
|
@@ -306,9 +306,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
306
306
|
fs.unlinkSync(filePath);
|
|
307
307
|
});
|
|
308
308
|
|
|
309
|
-
test(
|
|
309
|
+
test("endTrajectory removes trajectory from active map", async () => {
|
|
310
310
|
const trajectoryId = await recorder.startTrajectory({
|
|
311
|
-
agentId:
|
|
311
|
+
agentId: "test-agent",
|
|
312
312
|
});
|
|
313
313
|
expect(recorder.isActive(trajectoryId)).toBe(true);
|
|
314
314
|
|
|
@@ -318,15 +318,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
318
318
|
expect(recorder.getActiveCount()).toBe(0);
|
|
319
319
|
});
|
|
320
320
|
|
|
321
|
-
test(
|
|
322
|
-
await expect(recorder.endTrajectory(
|
|
323
|
-
|
|
321
|
+
test("endTrajectory throws for non-existent trajectory", async () => {
|
|
322
|
+
await expect(recorder.endTrajectory("fake-id")).rejects.toThrow(
|
|
323
|
+
"Trajectory not found: fake-id",
|
|
324
324
|
);
|
|
325
325
|
});
|
|
326
326
|
|
|
327
|
-
test(
|
|
327
|
+
test("endTrajectory calculates metrics correctly", async () => {
|
|
328
328
|
const trajectoryId = await recorder.startTrajectory({
|
|
329
|
-
agentId:
|
|
329
|
+
agentId: "test-agent",
|
|
330
330
|
});
|
|
331
331
|
|
|
332
332
|
// Add buy action
|
|
@@ -337,8 +337,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
337
337
|
});
|
|
338
338
|
recorder.completeStep(
|
|
339
339
|
trajectoryId,
|
|
340
|
-
{ actionType:
|
|
341
|
-
1.0
|
|
340
|
+
{ actionType: "BUY_YES", parameters: {}, success: true },
|
|
341
|
+
1.0,
|
|
342
342
|
);
|
|
343
343
|
|
|
344
344
|
// Add sell action
|
|
@@ -349,8 +349,8 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
349
349
|
});
|
|
350
350
|
recorder.completeStep(
|
|
351
351
|
trajectoryId,
|
|
352
|
-
{ actionType:
|
|
353
|
-
0.5
|
|
352
|
+
{ actionType: "SELL", parameters: {}, success: true },
|
|
353
|
+
0.5,
|
|
354
354
|
);
|
|
355
355
|
|
|
356
356
|
// Add failed action
|
|
@@ -362,24 +362,24 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
362
362
|
recorder.completeStep(
|
|
363
363
|
trajectoryId,
|
|
364
364
|
{
|
|
365
|
-
actionType:
|
|
365
|
+
actionType: "BUY_NO",
|
|
366
366
|
parameters: {},
|
|
367
367
|
success: false,
|
|
368
|
-
error:
|
|
368
|
+
error: "Insufficient funds",
|
|
369
369
|
},
|
|
370
|
-
-0.5
|
|
370
|
+
-0.5,
|
|
371
371
|
);
|
|
372
372
|
|
|
373
373
|
await recorder.endTrajectory(trajectoryId);
|
|
374
374
|
|
|
375
375
|
// Check that file was written with correct metrics
|
|
376
376
|
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
377
|
-
const content = JSON.parse(fs.readFileSync(filePath,
|
|
377
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
378
378
|
|
|
379
379
|
expect(content.trajectory.episodeLength).toBe(3);
|
|
380
380
|
expect(content.trajectory.tradesExecuted).toBe(3); // BUY_YES, SELL, BUY_NO
|
|
381
381
|
expect(content.trajectory.totalReward).toBe(1.0); // 1.0 + 0.5 + (-0.5)
|
|
382
|
-
expect(content.trajectory.finalStatus).toBe(
|
|
382
|
+
expect(content.trajectory.finalStatus).toBe("completed_with_errors");
|
|
383
383
|
|
|
384
384
|
fs.unlinkSync(filePath);
|
|
385
385
|
});
|
|
@@ -388,15 +388,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
388
388
|
// Edge Cases
|
|
389
389
|
// ===========================================================================
|
|
390
390
|
|
|
391
|
-
test(
|
|
391
|
+
test("handles trajectory with zero steps", async () => {
|
|
392
392
|
const trajectoryId = await recorder.startTrajectory({
|
|
393
|
-
agentId:
|
|
393
|
+
agentId: "test-agent",
|
|
394
394
|
});
|
|
395
395
|
|
|
396
396
|
await recorder.endTrajectory(trajectoryId);
|
|
397
397
|
|
|
398
398
|
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
399
|
-
const content = JSON.parse(fs.readFileSync(filePath,
|
|
399
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
400
400
|
|
|
401
401
|
expect(content.trajectory.episodeLength).toBe(0);
|
|
402
402
|
expect(content.trajectory.totalReward).toBe(0);
|
|
@@ -405,9 +405,9 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
405
405
|
fs.unlinkSync(filePath);
|
|
406
406
|
});
|
|
407
407
|
|
|
408
|
-
test(
|
|
408
|
+
test("handles very long prompts in LLM calls", async () => {
|
|
409
409
|
const trajectoryId = await recorder.startTrajectory({
|
|
410
|
-
agentId:
|
|
410
|
+
agentId: "test-agent",
|
|
411
411
|
});
|
|
412
412
|
recorder.startStep(trajectoryId, {
|
|
413
413
|
agentBalance: 1000,
|
|
@@ -415,22 +415,22 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
415
415
|
openPositions: 0,
|
|
416
416
|
});
|
|
417
417
|
|
|
418
|
-
const longPrompt =
|
|
418
|
+
const longPrompt = "A".repeat(50000); // 50k characters
|
|
419
419
|
|
|
420
420
|
recorder.logLLMCall(trajectoryId, {
|
|
421
|
-
model:
|
|
421
|
+
model: "test",
|
|
422
422
|
systemPrompt: longPrompt,
|
|
423
423
|
userPrompt: longPrompt,
|
|
424
424
|
response: longPrompt,
|
|
425
425
|
temperature: 0.5,
|
|
426
426
|
maxTokens: 100,
|
|
427
|
-
purpose:
|
|
427
|
+
purpose: "action",
|
|
428
428
|
});
|
|
429
429
|
|
|
430
430
|
recorder.completeStep(
|
|
431
431
|
trajectoryId,
|
|
432
|
-
{ actionType:
|
|
433
|
-
0
|
|
432
|
+
{ actionType: "hold", parameters: {}, success: true },
|
|
433
|
+
0,
|
|
434
434
|
);
|
|
435
435
|
|
|
436
436
|
await recorder.endTrajectory(trajectoryId);
|
|
@@ -438,15 +438,15 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
438
438
|
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
439
439
|
expect(fs.existsSync(filePath)).toBe(true);
|
|
440
440
|
|
|
441
|
-
const content = JSON.parse(fs.readFileSync(filePath,
|
|
441
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
442
442
|
expect(content.llmCalls[0].systemPrompt.length).toBe(50000);
|
|
443
443
|
|
|
444
444
|
fs.unlinkSync(filePath);
|
|
445
445
|
});
|
|
446
446
|
|
|
447
|
-
test(
|
|
447
|
+
test("handles negative rewards correctly", async () => {
|
|
448
448
|
const trajectoryId = await recorder.startTrajectory({
|
|
449
|
-
agentId:
|
|
449
|
+
agentId: "test-agent",
|
|
450
450
|
});
|
|
451
451
|
|
|
452
452
|
recorder.startStep(trajectoryId, {
|
|
@@ -456,14 +456,14 @@ describe('TrajectoryRecorder - Real Class Tests', () => {
|
|
|
456
456
|
});
|
|
457
457
|
recorder.completeStep(
|
|
458
458
|
trajectoryId,
|
|
459
|
-
{ actionType:
|
|
460
|
-
-5.0
|
|
459
|
+
{ actionType: "buy", parameters: {}, success: false, error: "Bad trade" },
|
|
460
|
+
-5.0,
|
|
461
461
|
);
|
|
462
462
|
|
|
463
463
|
await recorder.endTrajectory(trajectoryId);
|
|
464
464
|
|
|
465
465
|
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
466
|
-
const content = JSON.parse(fs.readFileSync(filePath,
|
|
466
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
467
467
|
|
|
468
468
|
expect(content.trajectory.totalReward).toBe(-5.0);
|
|
469
469
|
|