@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-lint.log +2 -0
- package/.turbo/turbo-typecheck.log +1 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/adapter.js +59 -0
- package/dist/archetypes/ArchetypeConfigService.js +510 -0
- package/dist/archetypes/derive-archetype.js +196 -0
- package/dist/archetypes/index.js +7 -0
- package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
- package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
- package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
- package/dist/benchmark/BenchmarkDataViewer.js +197 -0
- package/dist/benchmark/BenchmarkHistoryService.js +135 -0
- package/dist/benchmark/BenchmarkRunner.js +483 -0
- package/dist/benchmark/BenchmarkValidator.js +158 -0
- package/dist/benchmark/FastEvalRunner.js +133 -0
- package/dist/benchmark/MetricsValidator.js +104 -0
- package/dist/benchmark/MetricsVisualizer.js +775 -0
- package/dist/benchmark/ModelBenchmarkService.js +433 -0
- package/dist/benchmark/ModelRegistry.js +122 -0
- package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
- package/dist/benchmark/SimulationA2AInterface.js +683 -0
- package/dist/benchmark/SimulationEngine.js +522 -0
- package/dist/benchmark/TaskRunner.js +60 -0
- package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
- package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
- package/dist/benchmark/index.js +23 -0
- package/dist/benchmark/parseSimulationMetrics.js +86 -0
- package/dist/benchmark/simulation-types.js +1 -0
- package/dist/dependencies.js +197 -0
- package/dist/generation/TrajectoryGenerator.js +244 -0
- package/dist/generation/index.js +6 -0
- package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
- package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
- package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
- package/dist/huggingface/index.js +9 -0
- package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
- package/dist/index.js +41 -0
- package/dist/init-training.js +43 -0
- package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
- package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/types.js +21 -0
- package/dist/rubrics/__tests__/index.test.js +150 -0
- package/dist/rubrics/ass-kisser.js +83 -0
- package/dist/rubrics/degen.js +78 -0
- package/dist/rubrics/goody-twoshoes.js +82 -0
- package/dist/rubrics/index.js +184 -0
- package/dist/rubrics/information-trader.js +82 -0
- package/dist/rubrics/infosec.js +99 -0
- package/dist/rubrics/liar.js +102 -0
- package/dist/rubrics/perps-trader.js +85 -0
- package/dist/rubrics/researcher.js +79 -0
- package/dist/rubrics/scammer.js +80 -0
- package/dist/rubrics/social-butterfly.js +71 -0
- package/dist/rubrics/super-predictor.js +95 -0
- package/dist/rubrics/trader.js +65 -0
- package/dist/scoring/ArchetypeScoringService.js +301 -0
- package/dist/scoring/JudgePromptBuilder.js +401 -0
- package/dist/scoring/LLMJudgeCache.js +263 -0
- package/dist/scoring/index.js +8 -0
- package/dist/training/AutomationPipeline.js +714 -0
- package/dist/training/BenchmarkService.js +370 -0
- package/dist/training/ConfigValidator.js +153 -0
- package/dist/training/MarketOutcomesTracker.js +142 -0
- package/dist/training/ModelDeployer.js +128 -0
- package/dist/training/ModelFetcher.js +48 -0
- package/dist/training/ModelSelectionService.js +248 -0
- package/dist/training/ModelUsageVerifier.js +106 -0
- package/dist/training/MultiModelOrchestrator.js +349 -0
- package/dist/training/RLModelConfig.js +295 -0
- package/dist/training/RewardBackpropagationService.js +117 -0
- package/dist/training/RulerScoringService.js +450 -0
- package/dist/training/TrainingMonitor.js +108 -0
- package/dist/training/TrajectoryRecorder.js +281 -0
- package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
- package/dist/training/index.js +30 -0
- package/dist/training/logRLConfig.js +29 -0
- package/dist/training/pipeline.js +80 -0
- package/dist/training/storage/ModelStorageService.js +190 -0
- package/dist/training/storage/TrainingDataArchiver.js +136 -0
- package/dist/training/storage/index.js +7 -0
- package/dist/training/types.js +6 -0
- package/dist/training/window-utils.js +100 -0
- package/dist/utils/index.js +73 -0
- package/dist/utils/logger.js +55 -0
- package/dist/utils/snowflake.js +15 -0
- package/dist/utils/synthetic-detector.js +67 -0
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773742857616.json +38 -0
- package/research-output/training-runs/training-run-1773742946977.json +38 -0
- package/research-output/training-runs/training-run-1773743278891.json +38 -0
- package/research-output/training-runs/training-run-1773743409754.json +38 -0
- package/research-output/training-runs/training-run-1773743651086.json +38 -0
- package/research-output/training-runs/training-run-1773743782883.json +38 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrajectoryRecorder
|
|
3
|
+
*
|
|
4
|
+
* Records agent decisions with full context for GRPO training.
|
|
5
|
+
* Captures environment state, LLM calls, actions, and rewards.
|
|
6
|
+
*
|
|
7
|
+
* @packageDocumentation
|
|
8
|
+
*/
|
|
9
|
+
import { getLlmLogAdapter, getTrainingDataAdapter } from "../adapter";
|
|
10
|
+
import { logger } from "../utils/logger";
|
|
11
|
+
import { generateSnowflakeId } from "../utils/snowflake";
|
|
12
|
+
import { getCurrentWindowId } from "./window-utils";
|
|
13
|
+
import * as fs from "node:fs";
|
|
14
|
+
import * as path from "node:path";
|
|
15
|
+
// ─── Simulation mode flag ────────────────────────────────────────────
|
|
16
|
+
// Replaces the `isSimulationMode` import from `@elizaos/db`.
|
|
17
|
+
// Set via `setSimulationMode(true)` before recording trajectories in
|
|
18
|
+
// simulation/benchmark contexts.
|
|
19
|
+
let _simulationMode = false;
|
|
20
|
+
/** Enable or disable simulation mode for trajectory recording. */
|
|
21
|
+
export function setSimulationMode(enabled) {
|
|
22
|
+
_simulationMode = enabled;
|
|
23
|
+
}
|
|
24
|
+
/** Check whether simulation mode is active. */
|
|
25
|
+
export function isSimulationMode() {
|
|
26
|
+
return _simulationMode;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Records agent trajectories for RL training.
|
|
30
|
+
*/
|
|
31
|
+
export class TrajectoryRecorder {
|
|
32
|
+
activeTrajectories = new Map();
|
|
33
|
+
/**
|
|
34
|
+
* Start recording a new trajectory.
|
|
35
|
+
* @param options - Configuration for the trajectory
|
|
36
|
+
* @returns The unique trajectory ID
|
|
37
|
+
*/
|
|
38
|
+
async startTrajectory(options) {
|
|
39
|
+
const trajectoryId = await generateSnowflakeId();
|
|
40
|
+
const windowId = options.windowId || getCurrentWindowId();
|
|
41
|
+
this.activeTrajectories.set(trajectoryId, {
|
|
42
|
+
trajectoryId,
|
|
43
|
+
agentId: options.agentId,
|
|
44
|
+
archetype: options.archetype,
|
|
45
|
+
scenarioId: options.scenarioId || windowId,
|
|
46
|
+
startTime: Date.now(),
|
|
47
|
+
steps: [],
|
|
48
|
+
});
|
|
49
|
+
logger.info("Started trajectory recording", {
|
|
50
|
+
trajectoryId,
|
|
51
|
+
agentId: options.agentId,
|
|
52
|
+
archetype: options.archetype,
|
|
53
|
+
scenarioId: options.scenarioId,
|
|
54
|
+
windowId,
|
|
55
|
+
});
|
|
56
|
+
return trajectoryId;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Start a new step in the trajectory.
|
|
60
|
+
* @param trajectoryId - The trajectory ID
|
|
61
|
+
* @param environmentState - Current environment state
|
|
62
|
+
* @throws Error if trajectory not found
|
|
63
|
+
*/
|
|
64
|
+
startStep(trajectoryId, environmentState) {
|
|
65
|
+
const traj = this.activeTrajectories.get(trajectoryId);
|
|
66
|
+
if (!traj) {
|
|
67
|
+
throw new Error(`Trajectory not found: ${trajectoryId}`);
|
|
68
|
+
}
|
|
69
|
+
traj.currentStep = {
|
|
70
|
+
stepNumber: traj.steps.length,
|
|
71
|
+
timestamp: Date.now(),
|
|
72
|
+
environmentState,
|
|
73
|
+
providerAccesses: [],
|
|
74
|
+
llmCalls: [],
|
|
75
|
+
reward: 0,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Log a provider access in the current step.
|
|
80
|
+
* @param trajectoryId - The trajectory ID
|
|
81
|
+
* @param access - Provider access details
|
|
82
|
+
* @throws Error if no current step exists
|
|
83
|
+
*/
|
|
84
|
+
logProviderAccess(trajectoryId, access) {
|
|
85
|
+
const traj = this.activeTrajectories.get(trajectoryId);
|
|
86
|
+
if (!traj?.currentStep) {
|
|
87
|
+
throw new Error(`No current step for trajectory: ${trajectoryId}`);
|
|
88
|
+
}
|
|
89
|
+
traj.currentStep.providerAccesses = traj.currentStep.providerAccesses || [];
|
|
90
|
+
traj.currentStep.providerAccesses.push(access);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Log an LLM call in the current step.
|
|
94
|
+
* @param trajectoryId - The trajectory ID
|
|
95
|
+
* @param llmCall - LLM call details
|
|
96
|
+
* @throws Error if no current step exists
|
|
97
|
+
*/
|
|
98
|
+
logLLMCall(trajectoryId, llmCall) {
|
|
99
|
+
const traj = this.activeTrajectories.get(trajectoryId);
|
|
100
|
+
if (!traj?.currentStep) {
|
|
101
|
+
throw new Error(`No current step for trajectory: ${trajectoryId}`);
|
|
102
|
+
}
|
|
103
|
+
traj.currentStep.llmCalls = traj.currentStep.llmCalls || [];
|
|
104
|
+
traj.currentStep.llmCalls.push(llmCall);
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Complete the current step with an action.
|
|
108
|
+
* @param trajectoryId - The trajectory ID
|
|
109
|
+
* @param action - The action taken
|
|
110
|
+
* @param reward - Immediate reward for the step
|
|
111
|
+
* @throws Error if no current step exists
|
|
112
|
+
*/
|
|
113
|
+
completeStep(trajectoryId, action, reward = 0) {
|
|
114
|
+
const traj = this.activeTrajectories.get(trajectoryId);
|
|
115
|
+
if (!traj?.currentStep) {
|
|
116
|
+
throw new Error(`No current step for trajectory: ${trajectoryId}`);
|
|
117
|
+
}
|
|
118
|
+
const stepNumber = traj.currentStep.stepNumber;
|
|
119
|
+
const timestamp = traj.currentStep.timestamp;
|
|
120
|
+
const environmentState = traj.currentStep.environmentState;
|
|
121
|
+
if (stepNumber === undefined ||
|
|
122
|
+
timestamp === undefined ||
|
|
123
|
+
environmentState === undefined) {
|
|
124
|
+
throw new Error(`Current step incomplete for trajectory: ${trajectoryId}`);
|
|
125
|
+
}
|
|
126
|
+
const completeStep = {
|
|
127
|
+
stepNumber,
|
|
128
|
+
timestamp,
|
|
129
|
+
environmentState,
|
|
130
|
+
providerAccesses: traj.currentStep.providerAccesses || [],
|
|
131
|
+
llmCalls: traj.currentStep.llmCalls || [],
|
|
132
|
+
action,
|
|
133
|
+
reward,
|
|
134
|
+
};
|
|
135
|
+
traj.steps.push(completeStep);
|
|
136
|
+
traj.currentStep = undefined;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* End trajectory and save to database.
|
|
140
|
+
* @param trajectoryId - The trajectory ID
|
|
141
|
+
* @param options - End options including final metrics
|
|
142
|
+
* @throws Error if trajectory not found
|
|
143
|
+
*/
|
|
144
|
+
async endTrajectory(trajectoryId, options = {}) {
|
|
145
|
+
const traj = this.activeTrajectories.get(trajectoryId);
|
|
146
|
+
if (!traj) {
|
|
147
|
+
throw new Error(`Trajectory not found: ${trajectoryId}`);
|
|
148
|
+
}
|
|
149
|
+
const endTime = Date.now();
|
|
150
|
+
const durationMs = endTime - traj.startTime;
|
|
151
|
+
const totalReward = traj.steps.reduce((sum, step) => sum + step.reward, 0);
|
|
152
|
+
const windowId = options.windowId || getCurrentWindowId();
|
|
153
|
+
// Calculate metrics
|
|
154
|
+
const tradesExecuted = traj.steps.filter((s) => s.action.actionType.includes("BUY") ||
|
|
155
|
+
s.action.actionType.includes("SELL")).length;
|
|
156
|
+
const postsCreated = traj.steps.filter((s) => s.action.actionType.includes("POST")).length;
|
|
157
|
+
const errorCount = traj.steps.filter((s) => !s.action.success).length;
|
|
158
|
+
const finalStatus = errorCount > 0 ? "completed_with_errors" : "completed";
|
|
159
|
+
// 1. Prepare the standard data object (Used for both JSON and DB)
|
|
160
|
+
const trajectoryData = {
|
|
161
|
+
id: await generateSnowflakeId(),
|
|
162
|
+
trajectoryId,
|
|
163
|
+
agentId: traj.agentId,
|
|
164
|
+
archetype: traj.archetype ?? null,
|
|
165
|
+
startTime: new Date(traj.startTime),
|
|
166
|
+
endTime: new Date(endTime),
|
|
167
|
+
durationMs,
|
|
168
|
+
scenarioId: traj.scenarioId || windowId,
|
|
169
|
+
episodeId: traj.scenarioId ? `${traj.scenarioId}-${Date.now()}` : null,
|
|
170
|
+
windowId,
|
|
171
|
+
windowHours: 1,
|
|
172
|
+
batchId: null,
|
|
173
|
+
stepsJson: JSON.stringify(traj.steps),
|
|
174
|
+
rewardComponentsJson: JSON.stringify({ environmentReward: totalReward }),
|
|
175
|
+
metricsJson: JSON.stringify({
|
|
176
|
+
episodeLength: traj.steps.length,
|
|
177
|
+
finalStatus,
|
|
178
|
+
finalBalance: options.finalBalance,
|
|
179
|
+
finalPnL: options.finalPnL,
|
|
180
|
+
tradesExecuted,
|
|
181
|
+
postsCreated,
|
|
182
|
+
errorCount,
|
|
183
|
+
}),
|
|
184
|
+
metadataJson: JSON.stringify({
|
|
185
|
+
isTrainingData: true,
|
|
186
|
+
gameKnowledge: options.gameKnowledge || {},
|
|
187
|
+
}),
|
|
188
|
+
totalReward,
|
|
189
|
+
episodeLength: traj.steps.length,
|
|
190
|
+
finalStatus,
|
|
191
|
+
finalBalance: options.finalBalance ?? null,
|
|
192
|
+
finalPnL: options.finalPnL ?? null,
|
|
193
|
+
tradesExecuted: tradesExecuted ?? null,
|
|
194
|
+
postsCreated: postsCreated ?? null,
|
|
195
|
+
aiJudgeReward: null,
|
|
196
|
+
aiJudgeReasoning: null,
|
|
197
|
+
judgedAt: null,
|
|
198
|
+
isTrainingData: true,
|
|
199
|
+
isEvaluation: false,
|
|
200
|
+
usedInTraining: false,
|
|
201
|
+
trainedInBatch: null,
|
|
202
|
+
};
|
|
203
|
+
// Simulation Mode Bypass
|
|
204
|
+
if (isSimulationMode()) {
|
|
205
|
+
const outputDir = "./training-data-output/trajectories";
|
|
206
|
+
if (!fs.existsSync(outputDir)) {
|
|
207
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
208
|
+
}
|
|
209
|
+
const fullData = {
|
|
210
|
+
trajectory: trajectoryData,
|
|
211
|
+
llmCalls: traj.steps.flatMap((step) => step.llmCalls.map((call, idx) => ({
|
|
212
|
+
stepNumber: step.stepNumber,
|
|
213
|
+
callIndex: idx,
|
|
214
|
+
...call,
|
|
215
|
+
}))),
|
|
216
|
+
};
|
|
217
|
+
const filePath = path.join(outputDir, `${trajectoryId}.json`);
|
|
218
|
+
fs.writeFileSync(filePath, JSON.stringify(fullData, null, 2));
|
|
219
|
+
logger.info("Saved trajectory to JSON (Simulation Mode)", { trajectoryId, path: filePath }, "TrajectoryRecorder");
|
|
220
|
+
this.activeTrajectories.delete(trajectoryId);
|
|
221
|
+
return;
|
|
222
|
+
}
|
|
223
|
+
const adapter = getTrainingDataAdapter();
|
|
224
|
+
await adapter.insertTrajectory(trajectoryData);
|
|
225
|
+
// Save LLM calls via adapter (if LLM log adapter is registered)
|
|
226
|
+
const llmLogAdapter = getLlmLogAdapter();
|
|
227
|
+
if (llmLogAdapter) {
|
|
228
|
+
for (const step of traj.steps) {
|
|
229
|
+
for (const llmCall of step.llmCalls) {
|
|
230
|
+
await llmLogAdapter.insertLLMCallLog({
|
|
231
|
+
id: await generateSnowflakeId(),
|
|
232
|
+
trajectoryId,
|
|
233
|
+
stepId: `${trajectoryId}-step-${step.stepNumber}`,
|
|
234
|
+
callId: `${trajectoryId}-call-${step.stepNumber}-${step.llmCalls.indexOf(llmCall)}`,
|
|
235
|
+
timestamp: new Date(step.timestamp),
|
|
236
|
+
latencyMs: llmCall.latencyMs ?? null,
|
|
237
|
+
model: llmCall.model,
|
|
238
|
+
purpose: llmCall.purpose,
|
|
239
|
+
actionType: llmCall.actionType ?? null,
|
|
240
|
+
systemPrompt: llmCall.systemPrompt,
|
|
241
|
+
userPrompt: llmCall.userPrompt,
|
|
242
|
+
response: llmCall.response,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
logger.info("Trajectory saved to database", {
|
|
248
|
+
trajectoryId,
|
|
249
|
+
archetype: traj.archetype,
|
|
250
|
+
steps: traj.steps.length,
|
|
251
|
+
reward: totalReward,
|
|
252
|
+
duration: durationMs,
|
|
253
|
+
});
|
|
254
|
+
this.activeTrajectories.delete(trajectoryId);
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Get an active trajectory by ID.
|
|
258
|
+
* @param trajectoryId - The trajectory ID
|
|
259
|
+
* @returns The active trajectory or undefined
|
|
260
|
+
*/
|
|
261
|
+
getActiveTrajectory(trajectoryId) {
|
|
262
|
+
return this.activeTrajectories.get(trajectoryId);
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Check if a trajectory is active.
|
|
266
|
+
* @param trajectoryId - The trajectory ID
|
|
267
|
+
* @returns True if trajectory is active
|
|
268
|
+
*/
|
|
269
|
+
isActive(trajectoryId) {
|
|
270
|
+
return this.activeTrajectories.has(trajectoryId);
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Get count of active trajectories.
|
|
274
|
+
* @returns Number of active trajectories
|
|
275
|
+
*/
|
|
276
|
+
getActiveCount() {
|
|
277
|
+
return this.activeTrajectories.size;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
/** Singleton instance */
|
|
281
|
+
export const trajectoryRecorder = new TrajectoryRecorder();
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrajectoryRecorder Tests
|
|
3
|
+
*
|
|
4
|
+
* REAL tests that exercise the actual TrajectoryRecorder class.
|
|
5
|
+
* Uses simulation mode to avoid database dependency.
|
|
6
|
+
*/
|
|
7
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
8
|
+
import * as fs from "node:fs";
|
|
9
|
+
import * as path from "node:path";
|
|
10
|
+
import { setSimulationMode, TrajectoryRecorder } from "../TrajectoryRecorder";
|
|
11
|
+
// =============================================================================
|
|
12
|
+
// Test Setup
|
|
13
|
+
// =============================================================================
|
|
14
|
+
const TEST_OUTPUT_DIR = "./training-data-output/trajectories";
|
|
15
|
+
describe("TrajectoryRecorder - Real Class Tests", () => {
|
|
16
|
+
let recorder;
|
|
17
|
+
beforeEach(() => {
|
|
18
|
+
setSimulationMode(true);
|
|
19
|
+
recorder = new TrajectoryRecorder();
|
|
20
|
+
// Clean up test output
|
|
21
|
+
if (fs.existsSync(TEST_OUTPUT_DIR)) {
|
|
22
|
+
const files = fs.readdirSync(TEST_OUTPUT_DIR);
|
|
23
|
+
for (const file of files) {
|
|
24
|
+
if (file.startsWith("test-")) {
|
|
25
|
+
fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
});
|
|
30
|
+
afterEach(() => {
|
|
31
|
+
setSimulationMode(false);
|
|
32
|
+
// Cleanup after each test
|
|
33
|
+
if (fs.existsSync(TEST_OUTPUT_DIR)) {
|
|
34
|
+
const files = fs.readdirSync(TEST_OUTPUT_DIR);
|
|
35
|
+
for (const file of files) {
|
|
36
|
+
if (file.includes("test-agent")) {
|
|
37
|
+
try {
|
|
38
|
+
fs.unlinkSync(path.join(TEST_OUTPUT_DIR, file));
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
// Ignore cleanup errors
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
// ===========================================================================
|
|
48
|
+
// Lifecycle Tests
|
|
49
|
+
// ===========================================================================
|
|
50
|
+
test("startTrajectory creates a new active trajectory", async () => {
|
|
51
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
52
|
+
agentId: "test-agent-1",
|
|
53
|
+
archetype: "trader",
|
|
54
|
+
});
|
|
55
|
+
expect(trajectoryId).toBeDefined();
|
|
56
|
+
expect(typeof trajectoryId).toBe("string");
|
|
57
|
+
expect(trajectoryId.length).toBeGreaterThan(10);
|
|
58
|
+
expect(recorder.isActive(trajectoryId)).toBe(true);
|
|
59
|
+
expect(recorder.getActiveCount()).toBe(1);
|
|
60
|
+
});
|
|
61
|
+
test("multiple trajectories can be active simultaneously", async () => {
|
|
62
|
+
const id1 = await recorder.startTrajectory({ agentId: "test-agent-1" });
|
|
63
|
+
const id2 = await recorder.startTrajectory({ agentId: "test-agent-2" });
|
|
64
|
+
const id3 = await recorder.startTrajectory({ agentId: "test-agent-3" });
|
|
65
|
+
expect(recorder.getActiveCount()).toBe(3);
|
|
66
|
+
expect(recorder.isActive(id1)).toBe(true);
|
|
67
|
+
expect(recorder.isActive(id2)).toBe(true);
|
|
68
|
+
expect(recorder.isActive(id3)).toBe(true);
|
|
69
|
+
expect(id1).not.toBe(id2);
|
|
70
|
+
expect(id2).not.toBe(id3);
|
|
71
|
+
});
|
|
72
|
+
test("getActiveTrajectory returns correct trajectory", async () => {
|
|
73
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
74
|
+
agentId: "test-agent-x",
|
|
75
|
+
archetype: "degen",
|
|
76
|
+
scenarioId: "test-scenario",
|
|
77
|
+
});
|
|
78
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
79
|
+
expect(active).toBeDefined();
|
|
80
|
+
expect(active?.agentId).toBe("test-agent-x");
|
|
81
|
+
expect(active?.archetype).toBe("degen");
|
|
82
|
+
expect(active?.scenarioId).toBe("test-scenario");
|
|
83
|
+
expect(active?.steps).toHaveLength(0);
|
|
84
|
+
});
|
|
85
|
+
test("getActiveTrajectory returns undefined for non-existent id", () => {
|
|
86
|
+
const result = recorder.getActiveTrajectory("non-existent-id");
|
|
87
|
+
expect(result).toBeUndefined();
|
|
88
|
+
});
|
|
89
|
+
// ===========================================================================
|
|
90
|
+
// Step Recording Tests
|
|
91
|
+
// ===========================================================================
|
|
92
|
+
test("startStep initializes current step with environment state", async () => {
|
|
93
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
94
|
+
agentId: "test-agent",
|
|
95
|
+
});
|
|
96
|
+
const envState = {
|
|
97
|
+
agentBalance: 10000,
|
|
98
|
+
agentPnL: 0,
|
|
99
|
+
openPositions: 0,
|
|
100
|
+
};
|
|
101
|
+
recorder.startStep(trajectoryId, envState);
|
|
102
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
103
|
+
expect(active?.currentStep).toBeDefined();
|
|
104
|
+
expect(active?.currentStep?.environmentState).toEqual(envState);
|
|
105
|
+
expect(active?.currentStep?.stepNumber).toBe(0);
|
|
106
|
+
});
|
|
107
|
+
test("startStep throws for non-existent trajectory", () => {
|
|
108
|
+
expect(() => {
|
|
109
|
+
recorder.startStep("fake-id", {
|
|
110
|
+
agentBalance: 0,
|
|
111
|
+
agentPnL: 0,
|
|
112
|
+
openPositions: 0,
|
|
113
|
+
});
|
|
114
|
+
}).toThrow("Trajectory not found: fake-id");
|
|
115
|
+
});
|
|
116
|
+
test("logProviderAccess adds provider data to current step", async () => {
|
|
117
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
118
|
+
agentId: "test-agent",
|
|
119
|
+
});
|
|
120
|
+
recorder.startStep(trajectoryId, {
|
|
121
|
+
agentBalance: 1000,
|
|
122
|
+
agentPnL: 0,
|
|
123
|
+
openPositions: 0,
|
|
124
|
+
});
|
|
125
|
+
recorder.logProviderAccess(trajectoryId, {
|
|
126
|
+
providerName: "market-data",
|
|
127
|
+
data: { ticker: "BTCAI", price: 50000 },
|
|
128
|
+
purpose: "price lookup",
|
|
129
|
+
});
|
|
130
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
131
|
+
expect(active?.currentStep?.providerAccesses).toHaveLength(1);
|
|
132
|
+
expect(active?.currentStep?.providerAccesses?.[0]?.providerName).toBe("market-data");
|
|
133
|
+
});
|
|
134
|
+
test("logProviderAccess throws when no current step", async () => {
|
|
135
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
136
|
+
agentId: "test-agent",
|
|
137
|
+
});
|
|
138
|
+
// Don't call startStep
|
|
139
|
+
expect(() => {
|
|
140
|
+
recorder.logProviderAccess(trajectoryId, {
|
|
141
|
+
providerName: "test",
|
|
142
|
+
data: {},
|
|
143
|
+
purpose: "test",
|
|
144
|
+
});
|
|
145
|
+
}).toThrow("No current step");
|
|
146
|
+
});
|
|
147
|
+
test("logLLMCall adds LLM call to current step", async () => {
|
|
148
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
149
|
+
agentId: "test-agent",
|
|
150
|
+
});
|
|
151
|
+
recorder.startStep(trajectoryId, {
|
|
152
|
+
agentBalance: 1000,
|
|
153
|
+
agentPnL: 0,
|
|
154
|
+
openPositions: 0,
|
|
155
|
+
});
|
|
156
|
+
const llmCall = {
|
|
157
|
+
model: "qwen-32b",
|
|
158
|
+
systemPrompt: "You are a trading agent",
|
|
159
|
+
userPrompt: "What should I do?",
|
|
160
|
+
response: "Buy BTCAI",
|
|
161
|
+
reasoning: "Bullish momentum",
|
|
162
|
+
temperature: 0.7,
|
|
163
|
+
maxTokens: 2000,
|
|
164
|
+
purpose: "action",
|
|
165
|
+
latencyMs: 250,
|
|
166
|
+
};
|
|
167
|
+
recorder.logLLMCall(trajectoryId, llmCall);
|
|
168
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
169
|
+
expect(active?.currentStep?.llmCalls).toHaveLength(1);
|
|
170
|
+
expect(active?.currentStep?.llmCalls?.[0]?.model).toBe("qwen-32b");
|
|
171
|
+
expect(active?.currentStep?.llmCalls?.[0]?.latencyMs).toBe(250);
|
|
172
|
+
});
|
|
173
|
+
test("completeStep finalizes step and adds to trajectory", async () => {
|
|
174
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
175
|
+
agentId: "test-agent",
|
|
176
|
+
});
|
|
177
|
+
recorder.startStep(trajectoryId, {
|
|
178
|
+
agentBalance: 1000,
|
|
179
|
+
agentPnL: 0,
|
|
180
|
+
openPositions: 0,
|
|
181
|
+
});
|
|
182
|
+
const action = {
|
|
183
|
+
actionType: "buy",
|
|
184
|
+
parameters: { ticker: "BTCAI", amount: 100 },
|
|
185
|
+
success: true,
|
|
186
|
+
};
|
|
187
|
+
recorder.completeStep(trajectoryId, action, 0.5);
|
|
188
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
189
|
+
expect(active?.steps).toHaveLength(1);
|
|
190
|
+
expect(active?.steps[0]?.action.actionType).toBe("buy");
|
|
191
|
+
expect(active?.steps[0]?.reward).toBe(0.5);
|
|
192
|
+
expect(active?.currentStep).toBeUndefined();
|
|
193
|
+
});
|
|
194
|
+
test("multiple steps increment step number correctly", async () => {
|
|
195
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
196
|
+
agentId: "test-agent",
|
|
197
|
+
});
|
|
198
|
+
for (let i = 0; i < 5; i++) {
|
|
199
|
+
recorder.startStep(trajectoryId, {
|
|
200
|
+
agentBalance: 1000 - i * 100,
|
|
201
|
+
agentPnL: i * 10,
|
|
202
|
+
openPositions: i,
|
|
203
|
+
});
|
|
204
|
+
recorder.completeStep(trajectoryId, { actionType: "hold", parameters: {}, success: true }, 0.1);
|
|
205
|
+
}
|
|
206
|
+
const active = recorder.getActiveTrajectory(trajectoryId);
|
|
207
|
+
expect(active?.steps).toHaveLength(5);
|
|
208
|
+
expect(active?.steps[0]?.stepNumber).toBe(0);
|
|
209
|
+
expect(active?.steps[4]?.stepNumber).toBe(4);
|
|
210
|
+
});
|
|
211
|
+
// ===========================================================================
|
|
212
|
+
// End Trajectory Tests (Simulation Mode - File Output)
|
|
213
|
+
// ===========================================================================
|
|
214
|
+
test("endTrajectory saves JSON file in simulation mode", async () => {
|
|
215
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
216
|
+
agentId: "test-agent-file",
|
|
217
|
+
archetype: "trader",
|
|
218
|
+
});
|
|
219
|
+
// Add a step
|
|
220
|
+
recorder.startStep(trajectoryId, {
|
|
221
|
+
agentBalance: 10000,
|
|
222
|
+
agentPnL: 0,
|
|
223
|
+
openPositions: 0,
|
|
224
|
+
});
|
|
225
|
+
recorder.logLLMCall(trajectoryId, {
|
|
226
|
+
model: "test-model",
|
|
227
|
+
systemPrompt: "system",
|
|
228
|
+
userPrompt: "user",
|
|
229
|
+
response: "response",
|
|
230
|
+
temperature: 0.5,
|
|
231
|
+
maxTokens: 100,
|
|
232
|
+
purpose: "action",
|
|
233
|
+
});
|
|
234
|
+
recorder.completeStep(trajectoryId, { actionType: "buy", parameters: { ticker: "BTCAI" }, success: true }, 1.0);
|
|
235
|
+
await recorder.endTrajectory(trajectoryId, {
|
|
236
|
+
finalBalance: 10500,
|
|
237
|
+
finalPnL: 500,
|
|
238
|
+
});
|
|
239
|
+
// Verify file was created
|
|
240
|
+
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
241
|
+
expect(fs.existsSync(filePath)).toBe(true);
|
|
242
|
+
// Verify file contents
|
|
243
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
244
|
+
expect(content.trajectory.agentId).toBe("test-agent-file");
|
|
245
|
+
expect(content.trajectory.archetype).toBe("trader");
|
|
246
|
+
expect(content.trajectory.episodeLength).toBe(1);
|
|
247
|
+
expect(content.trajectory.finalBalance).toBe(10500);
|
|
248
|
+
expect(content.trajectory.finalPnL).toBe(500);
|
|
249
|
+
expect(content.llmCalls).toHaveLength(1);
|
|
250
|
+
// Cleanup
|
|
251
|
+
fs.unlinkSync(filePath);
|
|
252
|
+
});
|
|
253
|
+
test("endTrajectory removes trajectory from active map", async () => {
|
|
254
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
255
|
+
agentId: "test-agent",
|
|
256
|
+
});
|
|
257
|
+
expect(recorder.isActive(trajectoryId)).toBe(true);
|
|
258
|
+
await recorder.endTrajectory(trajectoryId);
|
|
259
|
+
expect(recorder.isActive(trajectoryId)).toBe(false);
|
|
260
|
+
expect(recorder.getActiveCount()).toBe(0);
|
|
261
|
+
});
|
|
262
|
+
test("endTrajectory throws for non-existent trajectory", async () => {
|
|
263
|
+
await expect(recorder.endTrajectory("fake-id")).rejects.toThrow("Trajectory not found: fake-id");
|
|
264
|
+
});
|
|
265
|
+
test("endTrajectory calculates metrics correctly", async () => {
|
|
266
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
267
|
+
agentId: "test-agent",
|
|
268
|
+
});
|
|
269
|
+
// Add buy action
|
|
270
|
+
recorder.startStep(trajectoryId, {
|
|
271
|
+
agentBalance: 10000,
|
|
272
|
+
agentPnL: 0,
|
|
273
|
+
openPositions: 0,
|
|
274
|
+
});
|
|
275
|
+
recorder.completeStep(trajectoryId, { actionType: "BUY_YES", parameters: {}, success: true }, 1.0);
|
|
276
|
+
// Add sell action
|
|
277
|
+
recorder.startStep(trajectoryId, {
|
|
278
|
+
agentBalance: 9000,
|
|
279
|
+
agentPnL: 100,
|
|
280
|
+
openPositions: 1,
|
|
281
|
+
});
|
|
282
|
+
recorder.completeStep(trajectoryId, { actionType: "SELL", parameters: {}, success: true }, 0.5);
|
|
283
|
+
// Add failed action
|
|
284
|
+
recorder.startStep(trajectoryId, {
|
|
285
|
+
agentBalance: 9500,
|
|
286
|
+
agentPnL: 150,
|
|
287
|
+
openPositions: 0,
|
|
288
|
+
});
|
|
289
|
+
recorder.completeStep(trajectoryId, {
|
|
290
|
+
actionType: "BUY_NO",
|
|
291
|
+
parameters: {},
|
|
292
|
+
success: false,
|
|
293
|
+
error: "Insufficient funds",
|
|
294
|
+
}, -0.5);
|
|
295
|
+
await recorder.endTrajectory(trajectoryId);
|
|
296
|
+
// Check that file was written with correct metrics
|
|
297
|
+
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
298
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
299
|
+
expect(content.trajectory.episodeLength).toBe(3);
|
|
300
|
+
expect(content.trajectory.tradesExecuted).toBe(3); // BUY_YES, SELL, BUY_NO
|
|
301
|
+
expect(content.trajectory.totalReward).toBe(1.0); // 1.0 + 0.5 + (-0.5)
|
|
302
|
+
expect(content.trajectory.finalStatus).toBe("completed_with_errors");
|
|
303
|
+
fs.unlinkSync(filePath);
|
|
304
|
+
});
|
|
305
|
+
// ===========================================================================
|
|
306
|
+
// Edge Cases
|
|
307
|
+
// ===========================================================================
|
|
308
|
+
test("handles trajectory with zero steps", async () => {
|
|
309
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
310
|
+
agentId: "test-agent",
|
|
311
|
+
});
|
|
312
|
+
await recorder.endTrajectory(trajectoryId);
|
|
313
|
+
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
314
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
315
|
+
expect(content.trajectory.episodeLength).toBe(0);
|
|
316
|
+
expect(content.trajectory.totalReward).toBe(0);
|
|
317
|
+
expect(content.llmCalls).toHaveLength(0);
|
|
318
|
+
fs.unlinkSync(filePath);
|
|
319
|
+
});
|
|
320
|
+
test("handles very long prompts in LLM calls", async () => {
|
|
321
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
322
|
+
agentId: "test-agent",
|
|
323
|
+
});
|
|
324
|
+
recorder.startStep(trajectoryId, {
|
|
325
|
+
agentBalance: 1000,
|
|
326
|
+
agentPnL: 0,
|
|
327
|
+
openPositions: 0,
|
|
328
|
+
});
|
|
329
|
+
const longPrompt = "A".repeat(50000); // 50k characters
|
|
330
|
+
recorder.logLLMCall(trajectoryId, {
|
|
331
|
+
model: "test",
|
|
332
|
+
systemPrompt: longPrompt,
|
|
333
|
+
userPrompt: longPrompt,
|
|
334
|
+
response: longPrompt,
|
|
335
|
+
temperature: 0.5,
|
|
336
|
+
maxTokens: 100,
|
|
337
|
+
purpose: "action",
|
|
338
|
+
});
|
|
339
|
+
recorder.completeStep(trajectoryId, { actionType: "hold", parameters: {}, success: true }, 0);
|
|
340
|
+
await recorder.endTrajectory(trajectoryId);
|
|
341
|
+
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
342
|
+
expect(fs.existsSync(filePath)).toBe(true);
|
|
343
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
344
|
+
expect(content.llmCalls[0].systemPrompt.length).toBe(50000);
|
|
345
|
+
fs.unlinkSync(filePath);
|
|
346
|
+
});
|
|
347
|
+
test("handles negative rewards correctly", async () => {
|
|
348
|
+
const trajectoryId = await recorder.startTrajectory({
|
|
349
|
+
agentId: "test-agent",
|
|
350
|
+
});
|
|
351
|
+
recorder.startStep(trajectoryId, {
|
|
352
|
+
agentBalance: 1000,
|
|
353
|
+
agentPnL: 0,
|
|
354
|
+
openPositions: 0,
|
|
355
|
+
});
|
|
356
|
+
recorder.completeStep(trajectoryId, { actionType: "buy", parameters: {}, success: false, error: "Bad trade" }, -5.0);
|
|
357
|
+
await recorder.endTrajectory(trajectoryId);
|
|
358
|
+
const filePath = path.join(TEST_OUTPUT_DIR, `${trajectoryId}.json`);
|
|
359
|
+
const content = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
360
|
+
expect(content.trajectory.totalReward).toBe(-5.0);
|
|
361
|
+
fs.unlinkSync(filePath);
|
|
362
|
+
});
|
|
363
|
+
});
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Training Module
|
|
3
|
+
*
|
|
4
|
+
* Core training pipeline services for RL model development.
|
|
5
|
+
*/
|
|
6
|
+
export { AutomationPipeline, automationPipeline } from "./AutomationPipeline";
|
|
7
|
+
export { BenchmarkService, benchmarkService } from "./BenchmarkService";
|
|
8
|
+
export { ConfigValidator } from "./ConfigValidator";
|
|
9
|
+
export { logRLConfigOnStartup } from "./logRLConfig";
|
|
10
|
+
export { MarketOutcomesTracker } from "./MarketOutcomesTracker";
|
|
11
|
+
export { ModelDeployer, modelDeployer } from "./ModelDeployer";
|
|
12
|
+
// Model fetching
|
|
13
|
+
export { getLatestRLModel } from "./ModelFetcher";
|
|
14
|
+
export { ModelSelectionService, modelSelectionService, } from "./ModelSelectionService";
|
|
15
|
+
export { ModelUsageVerifier } from "./ModelUsageVerifier";
|
|
16
|
+
export { createMultiModelOrchestrator, MultiModelOrchestrator, } from "./MultiModelOrchestrator";
|
|
17
|
+
export { benchmarkAndMaybeDeployModel, checkTrainingReadiness, deployModelVersion, getAutomationPipelineStatus, getNextTrainingModelSelection, monitorTrainingJob, rollbackModelVersion, triggerTraining, } from "./pipeline";
|
|
18
|
+
export { RewardBackpropagationService, rewardBackpropagationService, } from "./RewardBackpropagationService";
|
|
19
|
+
export { clearArchetypeModels, getAllArchetypeModels, getAvailableModelTiers, getModelForArchetype, getModelForTier, getModelTierForVram, getMultiModelConfig, getQuantizedModelName, getRLModelConfig, getVramRequirement, hasArchetypeModel, isRLModelAvailable, isTierAvailable, logRLModelConfig, MODEL_TIERS,
|
|
20
|
+
// Archetype model management
|
|
21
|
+
registerArchetypeModel, } from "./RLModelConfig";
|
|
22
|
+
export { RulerScoringService, rulerScoringService, } from "./RulerScoringService";
|
|
23
|
+
// Storage services
|
|
24
|
+
export * from "./storage";
|
|
25
|
+
export { TrainingMonitor, trainingMonitor } from "./TrainingMonitor";
|
|
26
|
+
export { TrajectoryRecorder, trajectoryRecorder } from "./TrajectoryRecorder";
|
|
27
|
+
// Types
|
|
28
|
+
export * from "./types";
|
|
29
|
+
// Window utilities
|
|
30
|
+
export { generateWindowIds, getCurrentWindowId, getPreviousWindowId, getWindowIdForTimestamp, getWindowRange, isTimestampInWindow, isWindowComplete, parseWindowId, } from "./window-utils";
|