@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -4,16 +4,16 @@
|
|
|
4
4
|
* Validates that all metrics are properly extracted and never null/undefined/NaN.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
|
-
import { beforeEach, describe, expect, it } from
|
|
8
|
-
import type { JsonValue } from
|
|
9
|
-
import type { TrajectoryStep } from
|
|
7
|
+
import { beforeEach, describe, expect, it } from "bun:test";
|
|
8
|
+
import type { JsonValue } from "../../adapter";
|
|
9
|
+
import type { TrajectoryStep } from "../../training/types";
|
|
10
10
|
import {
|
|
11
11
|
TrajectoryMetricsExtractor,
|
|
12
12
|
trajectoryMetricsExtractor,
|
|
13
|
-
} from
|
|
14
|
-
import type { BehavioralMetrics } from
|
|
13
|
+
} from "../TrajectoryMetricsExtractor";
|
|
14
|
+
import type { BehavioralMetrics } from "../types";
|
|
15
15
|
|
|
16
|
-
describe(
|
|
16
|
+
describe("TrajectoryMetricsExtractor", () => {
|
|
17
17
|
let extractor: TrajectoryMetricsExtractor;
|
|
18
18
|
|
|
19
19
|
beforeEach(() => {
|
|
@@ -31,212 +31,212 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
31
31
|
|
|
32
32
|
// Check social metrics - all should be finite numbers
|
|
33
33
|
const social = metrics.social;
|
|
34
|
-
expect(typeof social.groupChatsJoined).toBe(
|
|
34
|
+
expect(typeof social.groupChatsJoined).toBe("number");
|
|
35
35
|
expect(Number.isFinite(social.groupChatsJoined)).toBe(true);
|
|
36
36
|
expect(social.groupChatsJoined).toBeGreaterThanOrEqual(0);
|
|
37
37
|
|
|
38
|
-
expect(typeof social.groupChatsCreated).toBe(
|
|
38
|
+
expect(typeof social.groupChatsCreated).toBe("number");
|
|
39
39
|
expect(Number.isFinite(social.groupChatsCreated)).toBe(true);
|
|
40
40
|
expect(social.groupChatsCreated).toBeGreaterThanOrEqual(0);
|
|
41
41
|
|
|
42
|
-
expect(typeof social.groupMessagesSent).toBe(
|
|
42
|
+
expect(typeof social.groupMessagesSent).toBe("number");
|
|
43
43
|
expect(Number.isFinite(social.groupMessagesSent)).toBe(true);
|
|
44
44
|
expect(social.groupMessagesSent).toBeGreaterThanOrEqual(0);
|
|
45
45
|
|
|
46
|
-
expect(typeof social.dmsInitiated).toBe(
|
|
46
|
+
expect(typeof social.dmsInitiated).toBe("number");
|
|
47
47
|
expect(Number.isFinite(social.dmsInitiated)).toBe(true);
|
|
48
48
|
expect(social.dmsInitiated).toBeGreaterThanOrEqual(0);
|
|
49
49
|
|
|
50
|
-
expect(typeof social.dmsReceived).toBe(
|
|
50
|
+
expect(typeof social.dmsReceived).toBe("number");
|
|
51
51
|
expect(Number.isFinite(social.dmsReceived)).toBe(true);
|
|
52
52
|
expect(social.dmsReceived).toBeGreaterThanOrEqual(0);
|
|
53
53
|
|
|
54
|
-
expect(typeof social.dmResponseRate).toBe(
|
|
54
|
+
expect(typeof social.dmResponseRate).toBe("number");
|
|
55
55
|
expect(Number.isFinite(social.dmResponseRate)).toBe(true);
|
|
56
56
|
expect(social.dmResponseRate).toBeGreaterThanOrEqual(0);
|
|
57
57
|
expect(social.dmResponseRate).toBeLessThanOrEqual(1);
|
|
58
58
|
|
|
59
|
-
expect(typeof social.uniqueUsersInteracted).toBe(
|
|
59
|
+
expect(typeof social.uniqueUsersInteracted).toBe("number");
|
|
60
60
|
expect(Number.isFinite(social.uniqueUsersInteracted)).toBe(true);
|
|
61
61
|
expect(social.uniqueUsersInteracted).toBeGreaterThanOrEqual(0);
|
|
62
62
|
|
|
63
|
-
expect(typeof social.postsCreated).toBe(
|
|
63
|
+
expect(typeof social.postsCreated).toBe("number");
|
|
64
64
|
expect(Number.isFinite(social.postsCreated)).toBe(true);
|
|
65
65
|
expect(social.postsCreated).toBeGreaterThanOrEqual(0);
|
|
66
66
|
|
|
67
|
-
expect(typeof social.commentsMade).toBe(
|
|
67
|
+
expect(typeof social.commentsMade).toBe("number");
|
|
68
68
|
expect(Number.isFinite(social.commentsMade)).toBe(true);
|
|
69
69
|
expect(social.commentsMade).toBeGreaterThanOrEqual(0);
|
|
70
70
|
|
|
71
|
-
expect(typeof social.mentionsGiven).toBe(
|
|
71
|
+
expect(typeof social.mentionsGiven).toBe("number");
|
|
72
72
|
expect(Number.isFinite(social.mentionsGiven)).toBe(true);
|
|
73
73
|
expect(social.mentionsGiven).toBeGreaterThanOrEqual(0);
|
|
74
74
|
|
|
75
|
-
expect(typeof social.mentionsReceived).toBe(
|
|
75
|
+
expect(typeof social.mentionsReceived).toBe("number");
|
|
76
76
|
expect(Number.isFinite(social.mentionsReceived)).toBe(true);
|
|
77
77
|
expect(social.mentionsReceived).toBeGreaterThanOrEqual(0);
|
|
78
78
|
|
|
79
|
-
expect(typeof social.invitationsSent).toBe(
|
|
79
|
+
expect(typeof social.invitationsSent).toBe("number");
|
|
80
80
|
expect(Number.isFinite(social.invitationsSent)).toBe(true);
|
|
81
81
|
expect(social.invitationsSent).toBeGreaterThanOrEqual(0);
|
|
82
82
|
|
|
83
83
|
// Check trading metrics
|
|
84
84
|
const trading = metrics.trading;
|
|
85
|
-
expect(typeof trading.tradesExecuted).toBe(
|
|
85
|
+
expect(typeof trading.tradesExecuted).toBe("number");
|
|
86
86
|
expect(Number.isFinite(trading.tradesExecuted)).toBe(true);
|
|
87
87
|
expect(trading.tradesExecuted).toBeGreaterThanOrEqual(0);
|
|
88
88
|
|
|
89
|
-
expect(typeof trading.profitableTrades).toBe(
|
|
89
|
+
expect(typeof trading.profitableTrades).toBe("number");
|
|
90
90
|
expect(Number.isFinite(trading.profitableTrades)).toBe(true);
|
|
91
91
|
expect(trading.profitableTrades).toBeGreaterThanOrEqual(0);
|
|
92
92
|
|
|
93
|
-
expect(typeof trading.winRate).toBe(
|
|
93
|
+
expect(typeof trading.winRate).toBe("number");
|
|
94
94
|
expect(Number.isFinite(trading.winRate)).toBe(true);
|
|
95
95
|
expect(trading.winRate).toBeGreaterThanOrEqual(0);
|
|
96
96
|
expect(trading.winRate).toBeLessThanOrEqual(1);
|
|
97
97
|
|
|
98
|
-
expect(typeof trading.totalPnL).toBe(
|
|
98
|
+
expect(typeof trading.totalPnL).toBe("number");
|
|
99
99
|
expect(Number.isFinite(trading.totalPnL)).toBe(true);
|
|
100
100
|
|
|
101
|
-
expect(typeof trading.maxDrawdown).toBe(
|
|
101
|
+
expect(typeof trading.maxDrawdown).toBe("number");
|
|
102
102
|
expect(Number.isFinite(trading.maxDrawdown)).toBe(true);
|
|
103
103
|
expect(trading.maxDrawdown).toBeGreaterThanOrEqual(0);
|
|
104
104
|
|
|
105
|
-
expect(typeof trading.sharpeRatio).toBe(
|
|
105
|
+
expect(typeof trading.sharpeRatio).toBe("number");
|
|
106
106
|
expect(Number.isFinite(trading.sharpeRatio)).toBe(true);
|
|
107
107
|
|
|
108
|
-
expect(typeof trading.avgPositionSize).toBe(
|
|
108
|
+
expect(typeof trading.avgPositionSize).toBe("number");
|
|
109
109
|
expect(Number.isFinite(trading.avgPositionSize)).toBe(true);
|
|
110
110
|
expect(trading.avgPositionSize).toBeGreaterThanOrEqual(0);
|
|
111
111
|
|
|
112
|
-
expect(typeof trading.avgHoldingPeriod).toBe(
|
|
112
|
+
expect(typeof trading.avgHoldingPeriod).toBe("number");
|
|
113
113
|
expect(Number.isFinite(trading.avgHoldingPeriod)).toBe(true);
|
|
114
114
|
expect(trading.avgHoldingPeriod).toBeGreaterThanOrEqual(0);
|
|
115
115
|
|
|
116
|
-
expect(typeof trading.marketsTraded).toBe(
|
|
116
|
+
expect(typeof trading.marketsTraded).toBe("number");
|
|
117
117
|
expect(Number.isFinite(trading.marketsTraded)).toBe(true);
|
|
118
118
|
expect(trading.marketsTraded).toBeGreaterThanOrEqual(0);
|
|
119
119
|
|
|
120
|
-
expect(typeof trading.buyTrades).toBe(
|
|
120
|
+
expect(typeof trading.buyTrades).toBe("number");
|
|
121
121
|
expect(Number.isFinite(trading.buyTrades)).toBe(true);
|
|
122
122
|
expect(trading.buyTrades).toBeGreaterThanOrEqual(0);
|
|
123
123
|
|
|
124
|
-
expect(typeof trading.sellTrades).toBe(
|
|
124
|
+
expect(typeof trading.sellTrades).toBe("number");
|
|
125
125
|
expect(Number.isFinite(trading.sellTrades)).toBe(true);
|
|
126
126
|
expect(trading.sellTrades).toBeGreaterThanOrEqual(0);
|
|
127
127
|
|
|
128
|
-
expect(typeof trading.largestWin).toBe(
|
|
128
|
+
expect(typeof trading.largestWin).toBe("number");
|
|
129
129
|
expect(Number.isFinite(trading.largestWin)).toBe(true);
|
|
130
130
|
|
|
131
|
-
expect(typeof trading.largestLoss).toBe(
|
|
131
|
+
expect(typeof trading.largestLoss).toBe("number");
|
|
132
132
|
expect(Number.isFinite(trading.largestLoss)).toBe(true);
|
|
133
133
|
|
|
134
134
|
// Check influence metrics
|
|
135
135
|
const influence = metrics.influence;
|
|
136
|
-
expect(typeof influence.followersGained).toBe(
|
|
136
|
+
expect(typeof influence.followersGained).toBe("number");
|
|
137
137
|
expect(Number.isFinite(influence.followersGained)).toBe(true);
|
|
138
138
|
|
|
139
|
-
expect(typeof influence.reputationDelta).toBe(
|
|
139
|
+
expect(typeof influence.reputationDelta).toBe("number");
|
|
140
140
|
expect(Number.isFinite(influence.reputationDelta)).toBe(true);
|
|
141
141
|
|
|
142
|
-
expect(typeof influence.trustLevelDelta).toBe(
|
|
142
|
+
expect(typeof influence.trustLevelDelta).toBe("number");
|
|
143
143
|
expect(Number.isFinite(influence.trustLevelDelta)).toBe(true);
|
|
144
144
|
|
|
145
|
-
expect(typeof influence.influenceScore).toBe(
|
|
145
|
+
expect(typeof influence.influenceScore).toBe("number");
|
|
146
146
|
expect(Number.isFinite(influence.influenceScore)).toBe(true);
|
|
147
147
|
|
|
148
|
-
expect(typeof influence.informationSpread).toBe(
|
|
148
|
+
expect(typeof influence.informationSpread).toBe("number");
|
|
149
149
|
expect(Number.isFinite(influence.informationSpread)).toBe(true);
|
|
150
150
|
expect(influence.informationSpread).toBeGreaterThanOrEqual(0);
|
|
151
151
|
|
|
152
|
-
expect(typeof influence.positiveReactions).toBe(
|
|
152
|
+
expect(typeof influence.positiveReactions).toBe("number");
|
|
153
153
|
expect(Number.isFinite(influence.positiveReactions)).toBe(true);
|
|
154
154
|
expect(influence.positiveReactions).toBeGreaterThanOrEqual(0);
|
|
155
155
|
|
|
156
|
-
expect(typeof influence.negativeReactions).toBe(
|
|
156
|
+
expect(typeof influence.negativeReactions).toBe("number");
|
|
157
157
|
expect(Number.isFinite(influence.negativeReactions)).toBe(true);
|
|
158
158
|
expect(influence.negativeReactions).toBeGreaterThanOrEqual(0);
|
|
159
159
|
|
|
160
160
|
// Check behavior metrics
|
|
161
161
|
const behavior = metrics.behavior;
|
|
162
|
-
expect(typeof behavior.actionsPerTick).toBe(
|
|
162
|
+
expect(typeof behavior.actionsPerTick).toBe("number");
|
|
163
163
|
expect(Number.isFinite(behavior.actionsPerTick)).toBe(true);
|
|
164
164
|
expect(behavior.actionsPerTick).toBeGreaterThanOrEqual(0);
|
|
165
165
|
|
|
166
|
-
expect(typeof behavior.socialToTradeRatio).toBe(
|
|
166
|
+
expect(typeof behavior.socialToTradeRatio).toBe("number");
|
|
167
167
|
expect(Number.isFinite(behavior.socialToTradeRatio)).toBe(true);
|
|
168
168
|
expect(behavior.socialToTradeRatio).toBeGreaterThanOrEqual(0);
|
|
169
169
|
|
|
170
|
-
expect(typeof behavior.avgResponseTime).toBe(
|
|
170
|
+
expect(typeof behavior.avgResponseTime).toBe("number");
|
|
171
171
|
expect(Number.isFinite(behavior.avgResponseTime)).toBe(true);
|
|
172
172
|
expect(behavior.avgResponseTime).toBeGreaterThanOrEqual(0);
|
|
173
173
|
|
|
174
|
-
expect(typeof behavior.consistencyScore).toBe(
|
|
174
|
+
expect(typeof behavior.consistencyScore).toBe("number");
|
|
175
175
|
expect(Number.isFinite(behavior.consistencyScore)).toBe(true);
|
|
176
176
|
expect(behavior.consistencyScore).toBeGreaterThanOrEqual(0);
|
|
177
177
|
expect(behavior.consistencyScore).toBeLessThanOrEqual(1);
|
|
178
178
|
|
|
179
|
-
expect(typeof behavior.totalActions).toBe(
|
|
179
|
+
expect(typeof behavior.totalActions).toBe("number");
|
|
180
180
|
expect(Number.isFinite(behavior.totalActions)).toBe(true);
|
|
181
181
|
expect(behavior.totalActions).toBeGreaterThanOrEqual(0);
|
|
182
182
|
|
|
183
|
-
expect(typeof behavior.failedActions).toBe(
|
|
183
|
+
expect(typeof behavior.failedActions).toBe("number");
|
|
184
184
|
expect(Number.isFinite(behavior.failedActions)).toBe(true);
|
|
185
185
|
expect(behavior.failedActions).toBeGreaterThanOrEqual(0);
|
|
186
186
|
|
|
187
|
-
expect(typeof behavior.actionSuccessRate).toBe(
|
|
187
|
+
expect(typeof behavior.actionSuccessRate).toBe("number");
|
|
188
188
|
expect(Number.isFinite(behavior.actionSuccessRate)).toBe(true);
|
|
189
189
|
expect(behavior.actionSuccessRate).toBeGreaterThanOrEqual(0);
|
|
190
190
|
expect(behavior.actionSuccessRate).toBeLessThanOrEqual(1);
|
|
191
191
|
|
|
192
|
-
expect(typeof behavior.episodeLength).toBe(
|
|
192
|
+
expect(typeof behavior.episodeLength).toBe("number");
|
|
193
193
|
expect(Number.isFinite(behavior.episodeLength)).toBe(true);
|
|
194
194
|
expect(behavior.episodeLength).toBeGreaterThanOrEqual(0);
|
|
195
195
|
|
|
196
196
|
expect(Array.isArray(behavior.actionTypesUsed)).toBe(true);
|
|
197
|
-
expect(typeof behavior.dominantActionType).toBe(
|
|
197
|
+
expect(typeof behavior.dominantActionType).toBe("string");
|
|
198
198
|
|
|
199
199
|
// Check information metrics
|
|
200
200
|
const information = metrics.information;
|
|
201
|
-
expect(typeof information.researchActions).toBe(
|
|
201
|
+
expect(typeof information.researchActions).toBe("number");
|
|
202
202
|
expect(Number.isFinite(information.researchActions)).toBe(true);
|
|
203
203
|
expect(information.researchActions).toBeGreaterThanOrEqual(0);
|
|
204
204
|
|
|
205
|
-
expect(typeof information.newsConsumed).toBe(
|
|
205
|
+
expect(typeof information.newsConsumed).toBe("number");
|
|
206
206
|
expect(Number.isFinite(information.newsConsumed)).toBe(true);
|
|
207
207
|
expect(information.newsConsumed).toBeGreaterThanOrEqual(0);
|
|
208
208
|
|
|
209
|
-
expect(typeof information.marketDataQueries).toBe(
|
|
209
|
+
expect(typeof information.marketDataQueries).toBe("number");
|
|
210
210
|
expect(Number.isFinite(information.marketDataQueries)).toBe(true);
|
|
211
211
|
expect(information.marketDataQueries).toBeGreaterThanOrEqual(0);
|
|
212
212
|
|
|
213
|
-
expect(typeof information.infoRequestsSent).toBe(
|
|
213
|
+
expect(typeof information.infoRequestsSent).toBe("number");
|
|
214
214
|
expect(Number.isFinite(information.infoRequestsSent)).toBe(true);
|
|
215
215
|
expect(information.infoRequestsSent).toBeGreaterThanOrEqual(0);
|
|
216
216
|
|
|
217
|
-
expect(typeof information.infoShared).toBe(
|
|
217
|
+
expect(typeof information.infoShared).toBe("number");
|
|
218
218
|
expect(Number.isFinite(information.infoShared)).toBe(true);
|
|
219
219
|
expect(information.infoShared).toBeGreaterThanOrEqual(0);
|
|
220
220
|
|
|
221
|
-
expect(typeof information.predictionsMade).toBe(
|
|
221
|
+
expect(typeof information.predictionsMade).toBe("number");
|
|
222
222
|
expect(Number.isFinite(information.predictionsMade)).toBe(true);
|
|
223
223
|
expect(information.predictionsMade).toBeGreaterThanOrEqual(0);
|
|
224
224
|
|
|
225
|
-
expect(typeof information.correctPredictions).toBe(
|
|
225
|
+
expect(typeof information.correctPredictions).toBe("number");
|
|
226
226
|
expect(Number.isFinite(information.correctPredictions)).toBe(true);
|
|
227
227
|
expect(information.correctPredictions).toBeGreaterThanOrEqual(0);
|
|
228
228
|
|
|
229
|
-
expect(typeof information.predictionAccuracy).toBe(
|
|
229
|
+
expect(typeof information.predictionAccuracy).toBe("number");
|
|
230
230
|
expect(Number.isFinite(information.predictionAccuracy)).toBe(true);
|
|
231
231
|
expect(information.predictionAccuracy).toBeGreaterThanOrEqual(0);
|
|
232
232
|
expect(information.predictionAccuracy).toBeLessThanOrEqual(1);
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
-
describe(
|
|
236
|
-
it(
|
|
235
|
+
describe("extract()", () => {
|
|
236
|
+
it("should return valid metrics for empty steps array", () => {
|
|
237
237
|
const metrics = extractor.extract({
|
|
238
|
-
trajectoryId:
|
|
239
|
-
agentId:
|
|
238
|
+
trajectoryId: "test-traj-1",
|
|
239
|
+
agentId: "test-agent-1",
|
|
240
240
|
steps: [],
|
|
241
241
|
});
|
|
242
242
|
|
|
@@ -245,7 +245,7 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
245
245
|
expect(metrics.behavior.totalActions).toBe(0);
|
|
246
246
|
});
|
|
247
247
|
|
|
248
|
-
it(
|
|
248
|
+
it("should return valid metrics for minimal step with no action", () => {
|
|
249
249
|
const steps: TrajectoryStep[] = [
|
|
250
250
|
{
|
|
251
251
|
stepNumber: 0,
|
|
@@ -258,7 +258,7 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
258
258
|
providerAccesses: [],
|
|
259
259
|
llmCalls: [],
|
|
260
260
|
action: {
|
|
261
|
-
actionType:
|
|
261
|
+
actionType: "idle",
|
|
262
262
|
parameters: {},
|
|
263
263
|
success: true,
|
|
264
264
|
},
|
|
@@ -267,8 +267,8 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
267
267
|
];
|
|
268
268
|
|
|
269
269
|
const metrics = extractor.extract({
|
|
270
|
-
trajectoryId:
|
|
271
|
-
agentId:
|
|
270
|
+
trajectoryId: "test-traj-2",
|
|
271
|
+
agentId: "test-agent-2",
|
|
272
272
|
steps,
|
|
273
273
|
});
|
|
274
274
|
|
|
@@ -276,31 +276,31 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
276
276
|
expect(metrics.behavior.episodeLength).toBe(1);
|
|
277
277
|
});
|
|
278
278
|
|
|
279
|
-
it(
|
|
279
|
+
it("should correctly count trading actions", () => {
|
|
280
280
|
const steps: TrajectoryStep[] = [
|
|
281
281
|
createStep({
|
|
282
|
-
actionType:
|
|
283
|
-
parameters: { marketId:
|
|
282
|
+
actionType: "buy",
|
|
283
|
+
parameters: { marketId: "BTC", amount: 100, side: "buy" },
|
|
284
284
|
result: { pnl: 10 },
|
|
285
285
|
success: true,
|
|
286
286
|
}),
|
|
287
287
|
createStep({
|
|
288
|
-
actionType:
|
|
289
|
-
parameters: { marketId:
|
|
288
|
+
actionType: "sell",
|
|
289
|
+
parameters: { marketId: "ETH", amount: 50, side: "sell" },
|
|
290
290
|
result: { pnl: -5 },
|
|
291
291
|
success: true,
|
|
292
292
|
}),
|
|
293
293
|
createStep({
|
|
294
|
-
actionType:
|
|
295
|
-
parameters: { marketId:
|
|
294
|
+
actionType: "trade",
|
|
295
|
+
parameters: { marketId: "BTC", amount: 200 },
|
|
296
296
|
result: { pnl: 20 },
|
|
297
297
|
success: true,
|
|
298
298
|
}),
|
|
299
299
|
];
|
|
300
300
|
|
|
301
301
|
const metrics = extractor.extract({
|
|
302
|
-
trajectoryId:
|
|
303
|
-
agentId:
|
|
302
|
+
trajectoryId: "test-traj-3",
|
|
303
|
+
agentId: "test-agent-3",
|
|
304
304
|
steps,
|
|
305
305
|
});
|
|
306
306
|
|
|
@@ -316,38 +316,38 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
316
316
|
expect(metrics.trading.largestLoss).toBe(-5);
|
|
317
317
|
});
|
|
318
318
|
|
|
319
|
-
it(
|
|
319
|
+
it("should correctly count social actions", () => {
|
|
320
320
|
const steps: TrajectoryStep[] = [
|
|
321
321
|
createStep({
|
|
322
|
-
actionType:
|
|
323
|
-
parameters: { groupId:
|
|
322
|
+
actionType: "join_group_chat",
|
|
323
|
+
parameters: { groupId: "group-1" },
|
|
324
324
|
success: true,
|
|
325
325
|
}),
|
|
326
326
|
createStep({
|
|
327
|
-
actionType:
|
|
328
|
-
parameters: { groupId:
|
|
327
|
+
actionType: "post_group_message",
|
|
328
|
+
parameters: { groupId: "group-1", message: "Hello" },
|
|
329
329
|
success: true,
|
|
330
330
|
}),
|
|
331
331
|
createStep({
|
|
332
|
-
actionType:
|
|
333
|
-
parameters: { toUserId:
|
|
332
|
+
actionType: "send_dm",
|
|
333
|
+
parameters: { toUserId: "user-2", initiator: "test-agent-4" },
|
|
334
334
|
success: true,
|
|
335
335
|
}),
|
|
336
336
|
createStep({
|
|
337
|
-
actionType:
|
|
337
|
+
actionType: "create_post",
|
|
338
338
|
parameters: {},
|
|
339
339
|
success: true,
|
|
340
340
|
}),
|
|
341
341
|
createStep({
|
|
342
|
-
actionType:
|
|
343
|
-
parameters: { authorId:
|
|
342
|
+
actionType: "comment",
|
|
343
|
+
parameters: { authorId: "user-3" },
|
|
344
344
|
success: true,
|
|
345
345
|
}),
|
|
346
346
|
];
|
|
347
347
|
|
|
348
348
|
const metrics = extractor.extract({
|
|
349
|
-
trajectoryId:
|
|
350
|
-
agentId:
|
|
349
|
+
trajectoryId: "test-traj-4",
|
|
350
|
+
agentId: "test-agent-4",
|
|
351
351
|
steps,
|
|
352
352
|
});
|
|
353
353
|
|
|
@@ -360,17 +360,17 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
360
360
|
expect(metrics.social.uniqueUsersInteracted).toBeGreaterThanOrEqual(2);
|
|
361
361
|
});
|
|
362
362
|
|
|
363
|
-
it(
|
|
363
|
+
it("should handle failed actions correctly", () => {
|
|
364
364
|
const steps: TrajectoryStep[] = [
|
|
365
|
-
createStep({ actionType:
|
|
366
|
-
createStep({ actionType:
|
|
367
|
-
createStep({ actionType:
|
|
368
|
-
createStep({ actionType:
|
|
365
|
+
createStep({ actionType: "trade", success: true }),
|
|
366
|
+
createStep({ actionType: "trade", success: false }),
|
|
367
|
+
createStep({ actionType: "trade", success: false }),
|
|
368
|
+
createStep({ actionType: "trade", success: true }),
|
|
369
369
|
];
|
|
370
370
|
|
|
371
371
|
const metrics = extractor.extract({
|
|
372
|
-
trajectoryId:
|
|
373
|
-
agentId:
|
|
372
|
+
trajectoryId: "test-traj-5",
|
|
373
|
+
agentId: "test-agent-5",
|
|
374
374
|
steps,
|
|
375
375
|
});
|
|
376
376
|
|
|
@@ -380,17 +380,17 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
380
380
|
expect(metrics.behavior.actionSuccessRate).toBe(0.5);
|
|
381
381
|
});
|
|
382
382
|
|
|
383
|
-
it(
|
|
383
|
+
it("should calculate socialToTradeRatio correctly", () => {
|
|
384
384
|
const steps: TrajectoryStep[] = [
|
|
385
|
-
createStep({ actionType:
|
|
386
|
-
createStep({ actionType:
|
|
387
|
-
createStep({ actionType:
|
|
388
|
-
createStep({ actionType:
|
|
385
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
386
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
387
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
388
|
+
createStep({ actionType: "trade", success: true }),
|
|
389
389
|
];
|
|
390
390
|
|
|
391
391
|
const metrics = extractor.extract({
|
|
392
|
-
trajectoryId:
|
|
393
|
-
agentId:
|
|
392
|
+
trajectoryId: "test-traj-6",
|
|
393
|
+
agentId: "test-agent-6",
|
|
394
394
|
steps,
|
|
395
395
|
});
|
|
396
396
|
|
|
@@ -398,16 +398,16 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
398
398
|
expect(metrics.behavior.socialToTradeRatio).toBe(3);
|
|
399
399
|
});
|
|
400
400
|
|
|
401
|
-
it(
|
|
401
|
+
it("should handle social-only trajectories without weird ratios", () => {
|
|
402
402
|
const steps: TrajectoryStep[] = [
|
|
403
|
-
createStep({ actionType:
|
|
404
|
-
createStep({ actionType:
|
|
405
|
-
createStep({ actionType:
|
|
403
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
404
|
+
createStep({ actionType: "create_post", success: true }),
|
|
405
|
+
createStep({ actionType: "comment", success: true }),
|
|
406
406
|
];
|
|
407
407
|
|
|
408
408
|
const metrics = extractor.extract({
|
|
409
|
-
trajectoryId:
|
|
410
|
-
agentId:
|
|
409
|
+
trajectoryId: "test-traj-7",
|
|
410
|
+
agentId: "test-agent-7",
|
|
411
411
|
steps,
|
|
412
412
|
});
|
|
413
413
|
|
|
@@ -417,7 +417,7 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
417
417
|
expect(Number.isFinite(metrics.behavior.socialToTradeRatio)).toBe(true);
|
|
418
418
|
});
|
|
419
419
|
|
|
420
|
-
it(
|
|
420
|
+
it("should track reputation changes", () => {
|
|
421
421
|
const steps: TrajectoryStep[] = [
|
|
422
422
|
createStepWithEnvState({ reputation: 100, trustLevel: 50 }),
|
|
423
423
|
createStepWithEnvState({ reputation: 110, trustLevel: 55 }),
|
|
@@ -425,8 +425,8 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
425
425
|
];
|
|
426
426
|
|
|
427
427
|
const metrics = extractor.extract({
|
|
428
|
-
trajectoryId:
|
|
429
|
-
agentId:
|
|
428
|
+
trajectoryId: "test-traj-8",
|
|
429
|
+
agentId: "test-agent-8",
|
|
430
430
|
steps,
|
|
431
431
|
});
|
|
432
432
|
|
|
@@ -435,17 +435,17 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
435
435
|
expect(metrics.influence.trustLevelDelta).toBe(10); // 60 - 50
|
|
436
436
|
});
|
|
437
437
|
|
|
438
|
-
it(
|
|
438
|
+
it("should calculate consistency score correctly", () => {
|
|
439
439
|
// Perfectly consistent (all same action)
|
|
440
440
|
const consistentSteps: TrajectoryStep[] = [
|
|
441
|
-
createStep({ actionType:
|
|
442
|
-
createStep({ actionType:
|
|
443
|
-
createStep({ actionType:
|
|
441
|
+
createStep({ actionType: "trade", success: true }),
|
|
442
|
+
createStep({ actionType: "trade", success: true }),
|
|
443
|
+
createStep({ actionType: "trade", success: true }),
|
|
444
444
|
];
|
|
445
445
|
|
|
446
446
|
const consistentMetrics = extractor.extract({
|
|
447
|
-
trajectoryId:
|
|
448
|
-
agentId:
|
|
447
|
+
trajectoryId: "test-traj-9",
|
|
448
|
+
agentId: "test-agent-9",
|
|
449
449
|
steps: consistentSteps,
|
|
450
450
|
});
|
|
451
451
|
|
|
@@ -454,16 +454,16 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
454
454
|
|
|
455
455
|
// Less consistent (varied actions)
|
|
456
456
|
const variedSteps: TrajectoryStep[] = [
|
|
457
|
-
createStep({ actionType:
|
|
458
|
-
createStep({ actionType:
|
|
459
|
-
createStep({ actionType:
|
|
460
|
-
createStep({ actionType:
|
|
461
|
-
createStep({ actionType:
|
|
457
|
+
createStep({ actionType: "trade", success: true }),
|
|
458
|
+
createStep({ actionType: "trade", success: true }),
|
|
459
|
+
createStep({ actionType: "trade", success: true }),
|
|
460
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
461
|
+
createStep({ actionType: "create_post", success: true }),
|
|
462
462
|
];
|
|
463
463
|
|
|
464
464
|
const variedMetrics = extractor.extract({
|
|
465
|
-
trajectoryId:
|
|
466
|
-
agentId:
|
|
465
|
+
trajectoryId: "test-traj-10",
|
|
466
|
+
agentId: "test-agent-10",
|
|
467
467
|
steps: variedSteps,
|
|
468
468
|
});
|
|
469
469
|
|
|
@@ -472,47 +472,47 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
472
472
|
expect(variedMetrics.behavior.consistencyScore).toBeGreaterThan(0);
|
|
473
473
|
});
|
|
474
474
|
|
|
475
|
-
it(
|
|
475
|
+
it("should correctly identify dominant action type", () => {
|
|
476
476
|
const steps: TrajectoryStep[] = [
|
|
477
|
-
createStep({ actionType:
|
|
478
|
-
createStep({ actionType:
|
|
479
|
-
createStep({ actionType:
|
|
480
|
-
createStep({ actionType:
|
|
481
|
-
createStep({ actionType:
|
|
477
|
+
createStep({ actionType: "trade", success: true }),
|
|
478
|
+
createStep({ actionType: "trade", success: true }),
|
|
479
|
+
createStep({ actionType: "trade", success: true }),
|
|
480
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
481
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
482
482
|
];
|
|
483
483
|
|
|
484
484
|
const metrics = extractor.extract({
|
|
485
|
-
trajectoryId:
|
|
486
|
-
agentId:
|
|
485
|
+
trajectoryId: "test-traj-11",
|
|
486
|
+
agentId: "test-agent-11",
|
|
487
487
|
steps,
|
|
488
488
|
});
|
|
489
489
|
|
|
490
490
|
assertValidMetrics(metrics);
|
|
491
|
-
expect(metrics.behavior.dominantActionType).toBe(
|
|
491
|
+
expect(metrics.behavior.dominantActionType).toBe("trade");
|
|
492
492
|
});
|
|
493
493
|
|
|
494
|
-
it(
|
|
494
|
+
it("should handle prediction correctness tracking", () => {
|
|
495
495
|
const steps: TrajectoryStep[] = [
|
|
496
496
|
createStep({
|
|
497
|
-
actionType:
|
|
497
|
+
actionType: "predict",
|
|
498
498
|
success: true,
|
|
499
499
|
correctness: { predictionCorrect: true },
|
|
500
500
|
}),
|
|
501
501
|
createStep({
|
|
502
|
-
actionType:
|
|
502
|
+
actionType: "predict",
|
|
503
503
|
success: true,
|
|
504
504
|
correctness: { predictionCorrect: false },
|
|
505
505
|
}),
|
|
506
506
|
createStep({
|
|
507
|
-
actionType:
|
|
507
|
+
actionType: "predict",
|
|
508
508
|
success: true,
|
|
509
509
|
correctness: { predictionCorrect: true },
|
|
510
510
|
}),
|
|
511
511
|
];
|
|
512
512
|
|
|
513
513
|
const metrics = extractor.extract({
|
|
514
|
-
trajectoryId:
|
|
515
|
-
agentId:
|
|
514
|
+
trajectoryId: "test-traj-12",
|
|
515
|
+
agentId: "test-agent-12",
|
|
516
516
|
steps,
|
|
517
517
|
});
|
|
518
518
|
|
|
@@ -523,46 +523,46 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
523
523
|
});
|
|
524
524
|
});
|
|
525
525
|
|
|
526
|
-
describe(
|
|
527
|
-
it(
|
|
526
|
+
describe("extractFromRaw()", () => {
|
|
527
|
+
it("should return null for invalid JSON", () => {
|
|
528
528
|
const result = extractor.extractFromRaw({
|
|
529
|
-
trajectoryId:
|
|
530
|
-
agentId:
|
|
531
|
-
stepsJson:
|
|
529
|
+
trajectoryId: "test-traj-13",
|
|
530
|
+
agentId: "test-agent-13",
|
|
531
|
+
stepsJson: "not valid json",
|
|
532
532
|
});
|
|
533
533
|
|
|
534
534
|
expect(result).toBeNull();
|
|
535
535
|
});
|
|
536
536
|
|
|
537
|
-
it(
|
|
537
|
+
it("should return null for empty array JSON", () => {
|
|
538
538
|
const result = extractor.extractFromRaw({
|
|
539
|
-
trajectoryId:
|
|
540
|
-
agentId:
|
|
541
|
-
stepsJson:
|
|
539
|
+
trajectoryId: "test-traj-14",
|
|
540
|
+
agentId: "test-agent-14",
|
|
541
|
+
stepsJson: "[]",
|
|
542
542
|
});
|
|
543
543
|
|
|
544
544
|
expect(result).toBeNull();
|
|
545
545
|
});
|
|
546
546
|
|
|
547
|
-
it(
|
|
547
|
+
it("should return null for null JSON", () => {
|
|
548
548
|
const result = extractor.extractFromRaw({
|
|
549
|
-
trajectoryId:
|
|
550
|
-
agentId:
|
|
551
|
-
stepsJson:
|
|
549
|
+
trajectoryId: "test-traj-15",
|
|
550
|
+
agentId: "test-agent-15",
|
|
551
|
+
stepsJson: "null",
|
|
552
552
|
});
|
|
553
553
|
|
|
554
554
|
expect(result).toBeNull();
|
|
555
555
|
});
|
|
556
556
|
|
|
557
|
-
it(
|
|
557
|
+
it("should correctly parse valid JSON steps", () => {
|
|
558
558
|
const steps: TrajectoryStep[] = [
|
|
559
|
-
createStep({ actionType:
|
|
560
|
-
createStep({ actionType:
|
|
559
|
+
createStep({ actionType: "trade", success: true }),
|
|
560
|
+
createStep({ actionType: "send_dm", success: true }),
|
|
561
561
|
];
|
|
562
562
|
|
|
563
563
|
const result = extractor.extractFromRaw({
|
|
564
|
-
trajectoryId:
|
|
565
|
-
agentId:
|
|
564
|
+
trajectoryId: "test-traj-16",
|
|
565
|
+
agentId: "test-agent-16",
|
|
566
566
|
stepsJson: JSON.stringify(steps),
|
|
567
567
|
});
|
|
568
568
|
|
|
@@ -573,7 +573,7 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
573
573
|
}
|
|
574
574
|
});
|
|
575
575
|
|
|
576
|
-
it(
|
|
576
|
+
it("should use finalPnL when provided", () => {
|
|
577
577
|
const steps: TrajectoryStep[] = [
|
|
578
578
|
{
|
|
579
579
|
stepNumber: 0,
|
|
@@ -585,14 +585,14 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
585
585
|
},
|
|
586
586
|
providerAccesses: [],
|
|
587
587
|
llmCalls: [],
|
|
588
|
-
action: { actionType:
|
|
588
|
+
action: { actionType: "idle", parameters: {}, success: true },
|
|
589
589
|
reward: 0,
|
|
590
590
|
},
|
|
591
591
|
];
|
|
592
592
|
|
|
593
593
|
const result = extractor.extractFromRaw({
|
|
594
|
-
trajectoryId:
|
|
595
|
-
agentId:
|
|
594
|
+
trajectoryId: "test-traj-17",
|
|
595
|
+
agentId: "test-agent-17",
|
|
596
596
|
stepsJson: JSON.stringify(steps),
|
|
597
597
|
finalPnL: 150.5,
|
|
598
598
|
});
|
|
@@ -604,16 +604,16 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
604
604
|
});
|
|
605
605
|
});
|
|
606
606
|
|
|
607
|
-
describe(
|
|
608
|
-
it(
|
|
607
|
+
describe("singleton instance", () => {
|
|
608
|
+
it("should export a singleton instance", () => {
|
|
609
609
|
expect(trajectoryMetricsExtractor).toBeInstanceOf(
|
|
610
|
-
TrajectoryMetricsExtractor
|
|
610
|
+
TrajectoryMetricsExtractor,
|
|
611
611
|
);
|
|
612
612
|
});
|
|
613
613
|
});
|
|
614
614
|
|
|
615
|
-
describe(
|
|
616
|
-
it(
|
|
615
|
+
describe("edge cases", () => {
|
|
616
|
+
it("should handle undefined parameters gracefully", () => {
|
|
617
617
|
const steps: TrajectoryStep[] = [
|
|
618
618
|
{
|
|
619
619
|
stepNumber: 0,
|
|
@@ -626,7 +626,7 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
626
626
|
providerAccesses: [],
|
|
627
627
|
llmCalls: [],
|
|
628
628
|
action: {
|
|
629
|
-
actionType:
|
|
629
|
+
actionType: "trade",
|
|
630
630
|
parameters: {},
|
|
631
631
|
success: true,
|
|
632
632
|
},
|
|
@@ -635,18 +635,18 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
635
635
|
];
|
|
636
636
|
|
|
637
637
|
const metrics = extractor.extract({
|
|
638
|
-
trajectoryId:
|
|
639
|
-
agentId:
|
|
638
|
+
trajectoryId: "test-edge-1",
|
|
639
|
+
agentId: "test-agent-edge-1",
|
|
640
640
|
steps,
|
|
641
641
|
});
|
|
642
642
|
|
|
643
643
|
assertValidMetrics(metrics);
|
|
644
644
|
});
|
|
645
645
|
|
|
646
|
-
it(
|
|
646
|
+
it("should handle very large numbers without overflow", () => {
|
|
647
647
|
const steps: TrajectoryStep[] = [
|
|
648
648
|
createStep({
|
|
649
|
-
actionType:
|
|
649
|
+
actionType: "trade",
|
|
650
650
|
parameters: { amount: 1e15 },
|
|
651
651
|
result: { pnl: 1e12 },
|
|
652
652
|
success: true,
|
|
@@ -654,8 +654,8 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
654
654
|
];
|
|
655
655
|
|
|
656
656
|
const metrics = extractor.extract({
|
|
657
|
-
trajectoryId:
|
|
658
|
-
agentId:
|
|
657
|
+
trajectoryId: "test-edge-2",
|
|
658
|
+
agentId: "test-agent-edge-2",
|
|
659
659
|
steps,
|
|
660
660
|
});
|
|
661
661
|
|
|
@@ -664,23 +664,23 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
664
664
|
expect(Number.isFinite(metrics.trading.avgPositionSize)).toBe(true);
|
|
665
665
|
});
|
|
666
666
|
|
|
667
|
-
it(
|
|
667
|
+
it("should handle negative PnL correctly", () => {
|
|
668
668
|
const steps: TrajectoryStep[] = [
|
|
669
669
|
createStep({
|
|
670
|
-
actionType:
|
|
670
|
+
actionType: "trade",
|
|
671
671
|
result: { pnl: -100 },
|
|
672
672
|
success: true,
|
|
673
673
|
}),
|
|
674
674
|
createStep({
|
|
675
|
-
actionType:
|
|
675
|
+
actionType: "trade",
|
|
676
676
|
result: { pnl: -50 },
|
|
677
677
|
success: true,
|
|
678
678
|
}),
|
|
679
679
|
];
|
|
680
680
|
|
|
681
681
|
const metrics = extractor.extract({
|
|
682
|
-
trajectoryId:
|
|
683
|
-
agentId:
|
|
682
|
+
trajectoryId: "test-edge-3",
|
|
683
|
+
agentId: "test-agent-edge-3",
|
|
684
684
|
steps,
|
|
685
685
|
});
|
|
686
686
|
|
|
@@ -691,16 +691,16 @@ describe('TrajectoryMetricsExtractor', () => {
|
|
|
691
691
|
expect(metrics.trading.largestLoss).toBe(-100);
|
|
692
692
|
});
|
|
693
693
|
|
|
694
|
-
it(
|
|
694
|
+
it("should handle mixed case action types", () => {
|
|
695
695
|
const steps: TrajectoryStep[] = [
|
|
696
|
-
createStep({ actionType:
|
|
697
|
-
createStep({ actionType:
|
|
698
|
-
createStep({ actionType:
|
|
696
|
+
createStep({ actionType: "TRADE", success: true }),
|
|
697
|
+
createStep({ actionType: "Trade", success: true }),
|
|
698
|
+
createStep({ actionType: "trade", success: true }),
|
|
699
699
|
];
|
|
700
700
|
|
|
701
701
|
const metrics = extractor.extract({
|
|
702
|
-
trajectoryId:
|
|
703
|
-
agentId:
|
|
702
|
+
trajectoryId: "test-edge-4",
|
|
703
|
+
agentId: "test-agent-edge-4",
|
|
704
704
|
steps,
|
|
705
705
|
});
|
|
706
706
|
|
|
@@ -736,7 +736,7 @@ function createStep(options: {
|
|
|
736
736
|
}
|
|
737
737
|
|
|
738
738
|
function createStepWithEnvState(
|
|
739
|
-
envState: Record<string, number
|
|
739
|
+
envState: Record<string, number>,
|
|
740
740
|
): TrajectoryStep {
|
|
741
741
|
return {
|
|
742
742
|
stepNumber: 0,
|
|
@@ -750,7 +750,7 @@ function createStepWithEnvState(
|
|
|
750
750
|
providerAccesses: [],
|
|
751
751
|
llmCalls: [],
|
|
752
752
|
action: {
|
|
753
|
-
actionType:
|
|
753
|
+
actionType: "idle",
|
|
754
754
|
parameters: {},
|
|
755
755
|
success: true,
|
|
756
756
|
},
|