@elizaos/training 2.0.0-alpha.76 → 2.0.0-alpha.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/package.json +2 -2
  2. package/.turbo/turbo-lint.log +0 -3
  3. package/.turbo/turbo-typecheck.log +0 -1
  4. package/dist/.tsbuildinfo +0 -1
  5. package/dist/adapter.js +0 -59
  6. package/dist/archetypes/ArchetypeConfigService.js +0 -510
  7. package/dist/archetypes/derive-archetype.js +0 -196
  8. package/dist/archetypes/index.js +0 -7
  9. package/dist/benchmark/ArchetypeMatchupBenchmark.js +0 -547
  10. package/dist/benchmark/BenchmarkChartGenerator.js +0 -632
  11. package/dist/benchmark/BenchmarkDataGenerator.js +0 -825
  12. package/dist/benchmark/BenchmarkDataViewer.js +0 -197
  13. package/dist/benchmark/BenchmarkHistoryService.js +0 -135
  14. package/dist/benchmark/BenchmarkRunner.js +0 -483
  15. package/dist/benchmark/BenchmarkValidator.js +0 -158
  16. package/dist/benchmark/FastEvalRunner.js +0 -133
  17. package/dist/benchmark/MetricsValidator.js +0 -104
  18. package/dist/benchmark/MetricsVisualizer.js +0 -775
  19. package/dist/benchmark/ModelBenchmarkService.js +0 -433
  20. package/dist/benchmark/ModelRegistry.js +0 -122
  21. package/dist/benchmark/RulerBenchmarkIntegration.js +0 -168
  22. package/dist/benchmark/SimulationA2AInterface.js +0 -683
  23. package/dist/benchmark/SimulationEngine.js +0 -522
  24. package/dist/benchmark/TaskRunner.js +0 -60
  25. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +0 -409
  26. package/dist/benchmark/__tests__/HeadToHead.test.js +0 -105
  27. package/dist/benchmark/index.js +0 -23
  28. package/dist/benchmark/parseSimulationMetrics.js +0 -86
  29. package/dist/benchmark/simulation-types.js +0 -1
  30. package/dist/dependencies.js +0 -197
  31. package/dist/generation/TrajectoryGenerator.js +0 -244
  32. package/dist/generation/index.js +0 -6
  33. package/dist/huggingface/HuggingFaceDatasetUploader.js +0 -463
  34. package/dist/huggingface/HuggingFaceIntegrationService.js +0 -272
  35. package/dist/huggingface/HuggingFaceModelUploader.js +0 -385
  36. package/dist/huggingface/index.js +0 -9
  37. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +0 -144
  38. package/dist/index.js +0 -41
  39. package/dist/init-training.js +0 -43
  40. package/dist/metrics/TrajectoryMetricsExtractor.js +0 -523
  41. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +0 -628
  42. package/dist/metrics/index.js +0 -7
  43. package/dist/metrics/types.js +0 -21
  44. package/dist/rubrics/__tests__/index.test.js +0 -150
  45. package/dist/rubrics/ass-kisser.js +0 -83
  46. package/dist/rubrics/degen.js +0 -78
  47. package/dist/rubrics/goody-twoshoes.js +0 -82
  48. package/dist/rubrics/index.js +0 -184
  49. package/dist/rubrics/information-trader.js +0 -82
  50. package/dist/rubrics/infosec.js +0 -99
  51. package/dist/rubrics/liar.js +0 -102
  52. package/dist/rubrics/perps-trader.js +0 -85
  53. package/dist/rubrics/researcher.js +0 -79
  54. package/dist/rubrics/scammer.js +0 -80
  55. package/dist/rubrics/social-butterfly.js +0 -71
  56. package/dist/rubrics/super-predictor.js +0 -95
  57. package/dist/rubrics/trader.js +0 -65
  58. package/dist/scoring/ArchetypeScoringService.js +0 -301
  59. package/dist/scoring/JudgePromptBuilder.js +0 -401
  60. package/dist/scoring/LLMJudgeCache.js +0 -263
  61. package/dist/scoring/index.js +0 -8
  62. package/dist/training/AutomationPipeline.js +0 -714
  63. package/dist/training/BenchmarkService.js +0 -370
  64. package/dist/training/ConfigValidator.js +0 -153
  65. package/dist/training/MarketOutcomesTracker.js +0 -142
  66. package/dist/training/ModelDeployer.js +0 -128
  67. package/dist/training/ModelFetcher.js +0 -48
  68. package/dist/training/ModelSelectionService.js +0 -248
  69. package/dist/training/ModelUsageVerifier.js +0 -106
  70. package/dist/training/MultiModelOrchestrator.js +0 -349
  71. package/dist/training/RLModelConfig.js +0 -295
  72. package/dist/training/RewardBackpropagationService.js +0 -117
  73. package/dist/training/RulerScoringService.js +0 -450
  74. package/dist/training/TrainingMonitor.js +0 -108
  75. package/dist/training/TrajectoryRecorder.js +0 -281
  76. package/dist/training/__tests__/TrajectoryRecorder.test.js +0 -363
  77. package/dist/training/index.js +0 -30
  78. package/dist/training/logRLConfig.js +0 -29
  79. package/dist/training/pipeline.js +0 -80
  80. package/dist/training/storage/ModelStorageService.js +0 -190
  81. package/dist/training/storage/TrainingDataArchiver.js +0 -136
  82. package/dist/training/storage/index.js +0 -7
  83. package/dist/training/types.js +0 -6
  84. package/dist/training/window-utils.js +0 -100
  85. package/dist/utils/index.js +0 -73
  86. package/dist/utils/logger.js +0 -55
  87. package/dist/utils/snowflake.js +0 -15
  88. package/dist/utils/synthetic-detector.js +0 -67
  89. package/vitest.config.ts +0 -8
@@ -1,628 +0,0 @@
1
- /**
2
- * Trajectory Metrics Extractor Tests
3
- *
4
- * Validates that all metrics are properly extracted and never null/undefined/NaN.
5
- */
6
- import { beforeEach, describe, expect, it } from "vitest";
7
- import { TrajectoryMetricsExtractor, trajectoryMetricsExtractor, } from "../TrajectoryMetricsExtractor";
8
- describe("TrajectoryMetricsExtractor", () => {
9
- let extractor;
10
- beforeEach(() => {
11
- extractor = new TrajectoryMetricsExtractor();
12
- });
13
- /**
14
- * Helper to check all metrics are valid numbers (not null, undefined, NaN, Infinity)
15
- */
16
- function assertValidMetrics(metrics) {
17
- // Check root level
18
- expect(metrics.trajectoryId).toBeDefined();
19
- expect(metrics.agentId).toBeDefined();
20
- expect(metrics.extractedAt).toBeInstanceOf(Date);
21
- // Check social metrics - all should be finite numbers
22
- const social = metrics.social;
23
- expect(typeof social.groupChatsJoined).toBe("number");
24
- expect(Number.isFinite(social.groupChatsJoined)).toBe(true);
25
- expect(social.groupChatsJoined).toBeGreaterThanOrEqual(0);
26
- expect(typeof social.groupChatsCreated).toBe("number");
27
- expect(Number.isFinite(social.groupChatsCreated)).toBe(true);
28
- expect(social.groupChatsCreated).toBeGreaterThanOrEqual(0);
29
- expect(typeof social.groupMessagesSent).toBe("number");
30
- expect(Number.isFinite(social.groupMessagesSent)).toBe(true);
31
- expect(social.groupMessagesSent).toBeGreaterThanOrEqual(0);
32
- expect(typeof social.dmsInitiated).toBe("number");
33
- expect(Number.isFinite(social.dmsInitiated)).toBe(true);
34
- expect(social.dmsInitiated).toBeGreaterThanOrEqual(0);
35
- expect(typeof social.dmsReceived).toBe("number");
36
- expect(Number.isFinite(social.dmsReceived)).toBe(true);
37
- expect(social.dmsReceived).toBeGreaterThanOrEqual(0);
38
- expect(typeof social.dmResponseRate).toBe("number");
39
- expect(Number.isFinite(social.dmResponseRate)).toBe(true);
40
- expect(social.dmResponseRate).toBeGreaterThanOrEqual(0);
41
- expect(social.dmResponseRate).toBeLessThanOrEqual(1);
42
- expect(typeof social.uniqueUsersInteracted).toBe("number");
43
- expect(Number.isFinite(social.uniqueUsersInteracted)).toBe(true);
44
- expect(social.uniqueUsersInteracted).toBeGreaterThanOrEqual(0);
45
- expect(typeof social.postsCreated).toBe("number");
46
- expect(Number.isFinite(social.postsCreated)).toBe(true);
47
- expect(social.postsCreated).toBeGreaterThanOrEqual(0);
48
- expect(typeof social.commentsMade).toBe("number");
49
- expect(Number.isFinite(social.commentsMade)).toBe(true);
50
- expect(social.commentsMade).toBeGreaterThanOrEqual(0);
51
- expect(typeof social.mentionsGiven).toBe("number");
52
- expect(Number.isFinite(social.mentionsGiven)).toBe(true);
53
- expect(social.mentionsGiven).toBeGreaterThanOrEqual(0);
54
- expect(typeof social.mentionsReceived).toBe("number");
55
- expect(Number.isFinite(social.mentionsReceived)).toBe(true);
56
- expect(social.mentionsReceived).toBeGreaterThanOrEqual(0);
57
- expect(typeof social.invitationsSent).toBe("number");
58
- expect(Number.isFinite(social.invitationsSent)).toBe(true);
59
- expect(social.invitationsSent).toBeGreaterThanOrEqual(0);
60
- // Check trading metrics
61
- const trading = metrics.trading;
62
- expect(typeof trading.tradesExecuted).toBe("number");
63
- expect(Number.isFinite(trading.tradesExecuted)).toBe(true);
64
- expect(trading.tradesExecuted).toBeGreaterThanOrEqual(0);
65
- expect(typeof trading.profitableTrades).toBe("number");
66
- expect(Number.isFinite(trading.profitableTrades)).toBe(true);
67
- expect(trading.profitableTrades).toBeGreaterThanOrEqual(0);
68
- expect(typeof trading.winRate).toBe("number");
69
- expect(Number.isFinite(trading.winRate)).toBe(true);
70
- expect(trading.winRate).toBeGreaterThanOrEqual(0);
71
- expect(trading.winRate).toBeLessThanOrEqual(1);
72
- expect(typeof trading.totalPnL).toBe("number");
73
- expect(Number.isFinite(trading.totalPnL)).toBe(true);
74
- expect(typeof trading.maxDrawdown).toBe("number");
75
- expect(Number.isFinite(trading.maxDrawdown)).toBe(true);
76
- expect(trading.maxDrawdown).toBeGreaterThanOrEqual(0);
77
- expect(typeof trading.sharpeRatio).toBe("number");
78
- expect(Number.isFinite(trading.sharpeRatio)).toBe(true);
79
- expect(typeof trading.avgPositionSize).toBe("number");
80
- expect(Number.isFinite(trading.avgPositionSize)).toBe(true);
81
- expect(trading.avgPositionSize).toBeGreaterThanOrEqual(0);
82
- expect(typeof trading.avgHoldingPeriod).toBe("number");
83
- expect(Number.isFinite(trading.avgHoldingPeriod)).toBe(true);
84
- expect(trading.avgHoldingPeriod).toBeGreaterThanOrEqual(0);
85
- expect(typeof trading.marketsTraded).toBe("number");
86
- expect(Number.isFinite(trading.marketsTraded)).toBe(true);
87
- expect(trading.marketsTraded).toBeGreaterThanOrEqual(0);
88
- expect(typeof trading.buyTrades).toBe("number");
89
- expect(Number.isFinite(trading.buyTrades)).toBe(true);
90
- expect(trading.buyTrades).toBeGreaterThanOrEqual(0);
91
- expect(typeof trading.sellTrades).toBe("number");
92
- expect(Number.isFinite(trading.sellTrades)).toBe(true);
93
- expect(trading.sellTrades).toBeGreaterThanOrEqual(0);
94
- expect(typeof trading.largestWin).toBe("number");
95
- expect(Number.isFinite(trading.largestWin)).toBe(true);
96
- expect(typeof trading.largestLoss).toBe("number");
97
- expect(Number.isFinite(trading.largestLoss)).toBe(true);
98
- // Check influence metrics
99
- const influence = metrics.influence;
100
- expect(typeof influence.followersGained).toBe("number");
101
- expect(Number.isFinite(influence.followersGained)).toBe(true);
102
- expect(typeof influence.reputationDelta).toBe("number");
103
- expect(Number.isFinite(influence.reputationDelta)).toBe(true);
104
- expect(typeof influence.trustLevelDelta).toBe("number");
105
- expect(Number.isFinite(influence.trustLevelDelta)).toBe(true);
106
- expect(typeof influence.influenceScore).toBe("number");
107
- expect(Number.isFinite(influence.influenceScore)).toBe(true);
108
- expect(typeof influence.informationSpread).toBe("number");
109
- expect(Number.isFinite(influence.informationSpread)).toBe(true);
110
- expect(influence.informationSpread).toBeGreaterThanOrEqual(0);
111
- expect(typeof influence.positiveReactions).toBe("number");
112
- expect(Number.isFinite(influence.positiveReactions)).toBe(true);
113
- expect(influence.positiveReactions).toBeGreaterThanOrEqual(0);
114
- expect(typeof influence.negativeReactions).toBe("number");
115
- expect(Number.isFinite(influence.negativeReactions)).toBe(true);
116
- expect(influence.negativeReactions).toBeGreaterThanOrEqual(0);
117
- // Check behavior metrics
118
- const behavior = metrics.behavior;
119
- expect(typeof behavior.actionsPerTick).toBe("number");
120
- expect(Number.isFinite(behavior.actionsPerTick)).toBe(true);
121
- expect(behavior.actionsPerTick).toBeGreaterThanOrEqual(0);
122
- expect(typeof behavior.socialToTradeRatio).toBe("number");
123
- expect(Number.isFinite(behavior.socialToTradeRatio)).toBe(true);
124
- expect(behavior.socialToTradeRatio).toBeGreaterThanOrEqual(0);
125
- expect(typeof behavior.avgResponseTime).toBe("number");
126
- expect(Number.isFinite(behavior.avgResponseTime)).toBe(true);
127
- expect(behavior.avgResponseTime).toBeGreaterThanOrEqual(0);
128
- expect(typeof behavior.consistencyScore).toBe("number");
129
- expect(Number.isFinite(behavior.consistencyScore)).toBe(true);
130
- expect(behavior.consistencyScore).toBeGreaterThanOrEqual(0);
131
- expect(behavior.consistencyScore).toBeLessThanOrEqual(1);
132
- expect(typeof behavior.totalActions).toBe("number");
133
- expect(Number.isFinite(behavior.totalActions)).toBe(true);
134
- expect(behavior.totalActions).toBeGreaterThanOrEqual(0);
135
- expect(typeof behavior.failedActions).toBe("number");
136
- expect(Number.isFinite(behavior.failedActions)).toBe(true);
137
- expect(behavior.failedActions).toBeGreaterThanOrEqual(0);
138
- expect(typeof behavior.actionSuccessRate).toBe("number");
139
- expect(Number.isFinite(behavior.actionSuccessRate)).toBe(true);
140
- expect(behavior.actionSuccessRate).toBeGreaterThanOrEqual(0);
141
- expect(behavior.actionSuccessRate).toBeLessThanOrEqual(1);
142
- expect(typeof behavior.episodeLength).toBe("number");
143
- expect(Number.isFinite(behavior.episodeLength)).toBe(true);
144
- expect(behavior.episodeLength).toBeGreaterThanOrEqual(0);
145
- expect(Array.isArray(behavior.actionTypesUsed)).toBe(true);
146
- expect(typeof behavior.dominantActionType).toBe("string");
147
- // Check information metrics
148
- const information = metrics.information;
149
- expect(typeof information.researchActions).toBe("number");
150
- expect(Number.isFinite(information.researchActions)).toBe(true);
151
- expect(information.researchActions).toBeGreaterThanOrEqual(0);
152
- expect(typeof information.newsConsumed).toBe("number");
153
- expect(Number.isFinite(information.newsConsumed)).toBe(true);
154
- expect(information.newsConsumed).toBeGreaterThanOrEqual(0);
155
- expect(typeof information.marketDataQueries).toBe("number");
156
- expect(Number.isFinite(information.marketDataQueries)).toBe(true);
157
- expect(information.marketDataQueries).toBeGreaterThanOrEqual(0);
158
- expect(typeof information.infoRequestsSent).toBe("number");
159
- expect(Number.isFinite(information.infoRequestsSent)).toBe(true);
160
- expect(information.infoRequestsSent).toBeGreaterThanOrEqual(0);
161
- expect(typeof information.infoShared).toBe("number");
162
- expect(Number.isFinite(information.infoShared)).toBe(true);
163
- expect(information.infoShared).toBeGreaterThanOrEqual(0);
164
- expect(typeof information.predictionsMade).toBe("number");
165
- expect(Number.isFinite(information.predictionsMade)).toBe(true);
166
- expect(information.predictionsMade).toBeGreaterThanOrEqual(0);
167
- expect(typeof information.correctPredictions).toBe("number");
168
- expect(Number.isFinite(information.correctPredictions)).toBe(true);
169
- expect(information.correctPredictions).toBeGreaterThanOrEqual(0);
170
- expect(typeof information.predictionAccuracy).toBe("number");
171
- expect(Number.isFinite(information.predictionAccuracy)).toBe(true);
172
- expect(information.predictionAccuracy).toBeGreaterThanOrEqual(0);
173
- expect(information.predictionAccuracy).toBeLessThanOrEqual(1);
174
- }
175
- describe("extract()", () => {
176
- it("should return valid metrics for empty steps array", () => {
177
- const metrics = extractor.extract({
178
- trajectoryId: "test-traj-1",
179
- agentId: "test-agent-1",
180
- steps: [],
181
- });
182
- assertValidMetrics(metrics);
183
- expect(metrics.behavior.episodeLength).toBe(0);
184
- expect(metrics.behavior.totalActions).toBe(0);
185
- });
186
- it("should return valid metrics for minimal step with no action", () => {
187
- const steps = [
188
- {
189
- stepNumber: 0,
190
- timestamp: Date.now(),
191
- environmentState: {
192
- agentBalance: 1000,
193
- agentPnL: 0,
194
- openPositions: 0,
195
- },
196
- providerAccesses: [],
197
- llmCalls: [],
198
- action: {
199
- actionType: "idle",
200
- parameters: {},
201
- success: true,
202
- },
203
- reward: 0,
204
- },
205
- ];
206
- const metrics = extractor.extract({
207
- trajectoryId: "test-traj-2",
208
- agentId: "test-agent-2",
209
- steps,
210
- });
211
- assertValidMetrics(metrics);
212
- expect(metrics.behavior.episodeLength).toBe(1);
213
- });
214
- it("should correctly count trading actions", () => {
215
- const steps = [
216
- createStep({
217
- actionType: "buy",
218
- parameters: { marketId: "BTC", amount: 100, side: "buy" },
219
- result: { pnl: 10 },
220
- success: true,
221
- }),
222
- createStep({
223
- actionType: "sell",
224
- parameters: { marketId: "ETH", amount: 50, side: "sell" },
225
- result: { pnl: -5 },
226
- success: true,
227
- }),
228
- createStep({
229
- actionType: "trade",
230
- parameters: { marketId: "BTC", amount: 200 },
231
- result: { pnl: 20 },
232
- success: true,
233
- }),
234
- ];
235
- const metrics = extractor.extract({
236
- trajectoryId: "test-traj-3",
237
- agentId: "test-agent-3",
238
- steps,
239
- });
240
- assertValidMetrics(metrics);
241
- expect(metrics.trading.tradesExecuted).toBe(3);
242
- expect(metrics.trading.buyTrades).toBe(1);
243
- expect(metrics.trading.sellTrades).toBe(1);
244
- expect(metrics.trading.marketsTraded).toBe(2);
245
- expect(metrics.trading.profitableTrades).toBe(2);
246
- expect(metrics.trading.totalPnL).toBe(25);
247
- expect(metrics.trading.winRate).toBeCloseTo(2 / 3, 2);
248
- expect(metrics.trading.largestWin).toBe(20);
249
- expect(metrics.trading.largestLoss).toBe(-5);
250
- });
251
- it("should correctly count social actions", () => {
252
- const steps = [
253
- createStep({
254
- actionType: "join_group_chat",
255
- parameters: { groupId: "group-1" },
256
- success: true,
257
- }),
258
- createStep({
259
- actionType: "post_group_message",
260
- parameters: { groupId: "group-1", message: "Hello" },
261
- success: true,
262
- }),
263
- createStep({
264
- actionType: "send_dm",
265
- parameters: { toUserId: "user-2", initiator: "test-agent-4" },
266
- success: true,
267
- }),
268
- createStep({
269
- actionType: "create_post",
270
- parameters: {},
271
- success: true,
272
- }),
273
- createStep({
274
- actionType: "comment",
275
- parameters: { authorId: "user-3" },
276
- success: true,
277
- }),
278
- ];
279
- const metrics = extractor.extract({
280
- trajectoryId: "test-traj-4",
281
- agentId: "test-agent-4",
282
- steps,
283
- });
284
- assertValidMetrics(metrics);
285
- expect(metrics.social.groupChatsJoined).toBe(1);
286
- expect(metrics.social.groupMessagesSent).toBe(1);
287
- expect(metrics.social.dmsInitiated).toBe(1);
288
- expect(metrics.social.postsCreated).toBe(1);
289
- expect(metrics.social.commentsMade).toBe(1);
290
- expect(metrics.social.uniqueUsersInteracted).toBeGreaterThanOrEqual(2);
291
- });
292
- it("should handle failed actions correctly", () => {
293
- const steps = [
294
- createStep({ actionType: "trade", success: true }),
295
- createStep({ actionType: "trade", success: false }),
296
- createStep({ actionType: "trade", success: false }),
297
- createStep({ actionType: "trade", success: true }),
298
- ];
299
- const metrics = extractor.extract({
300
- trajectoryId: "test-traj-5",
301
- agentId: "test-agent-5",
302
- steps,
303
- });
304
- assertValidMetrics(metrics);
305
- expect(metrics.behavior.totalActions).toBe(4);
306
- expect(metrics.behavior.failedActions).toBe(2);
307
- expect(metrics.behavior.actionSuccessRate).toBe(0.5);
308
- });
309
- it("should calculate socialToTradeRatio correctly", () => {
310
- const steps = [
311
- createStep({ actionType: "send_dm", success: true }),
312
- createStep({ actionType: "send_dm", success: true }),
313
- createStep({ actionType: "send_dm", success: true }),
314
- createStep({ actionType: "trade", success: true }),
315
- ];
316
- const metrics = extractor.extract({
317
- trajectoryId: "test-traj-6",
318
- agentId: "test-agent-6",
319
- steps,
320
- });
321
- assertValidMetrics(metrics);
322
- expect(metrics.behavior.socialToTradeRatio).toBe(3);
323
- });
324
- it("should handle social-only trajectories without weird ratios", () => {
325
- const steps = [
326
- createStep({ actionType: "send_dm", success: true }),
327
- createStep({ actionType: "create_post", success: true }),
328
- createStep({ actionType: "comment", success: true }),
329
- ];
330
- const metrics = extractor.extract({
331
- trajectoryId: "test-traj-7",
332
- agentId: "test-agent-7",
333
- steps,
334
- });
335
- assertValidMetrics(metrics);
336
- // When no trades, socialToTradeRatio equals the social action count
337
- expect(metrics.behavior.socialToTradeRatio).toBe(3);
338
- expect(Number.isFinite(metrics.behavior.socialToTradeRatio)).toBe(true);
339
- });
340
- it("should track reputation changes", () => {
341
- const steps = [
342
- createStepWithEnvState({ reputation: 100, trustLevel: 50 }),
343
- createStepWithEnvState({ reputation: 110, trustLevel: 55 }),
344
- createStepWithEnvState({ reputation: 105, trustLevel: 60 }),
345
- ];
346
- const metrics = extractor.extract({
347
- trajectoryId: "test-traj-8",
348
- agentId: "test-agent-8",
349
- steps,
350
- });
351
- assertValidMetrics(metrics);
352
- expect(metrics.influence.reputationDelta).toBe(5); // 105 - 100
353
- expect(metrics.influence.trustLevelDelta).toBe(10); // 60 - 50
354
- });
355
- it("should calculate consistency score correctly", () => {
356
- // Perfectly consistent (all same action)
357
- const consistentSteps = [
358
- createStep({ actionType: "trade", success: true }),
359
- createStep({ actionType: "trade", success: true }),
360
- createStep({ actionType: "trade", success: true }),
361
- ];
362
- const consistentMetrics = extractor.extract({
363
- trajectoryId: "test-traj-9",
364
- agentId: "test-agent-9",
365
- steps: consistentSteps,
366
- });
367
- assertValidMetrics(consistentMetrics);
368
- expect(consistentMetrics.behavior.consistencyScore).toBe(1);
369
- // Less consistent (varied actions)
370
- const variedSteps = [
371
- createStep({ actionType: "trade", success: true }),
372
- createStep({ actionType: "trade", success: true }),
373
- createStep({ actionType: "trade", success: true }),
374
- createStep({ actionType: "send_dm", success: true }),
375
- createStep({ actionType: "create_post", success: true }),
376
- ];
377
- const variedMetrics = extractor.extract({
378
- trajectoryId: "test-traj-10",
379
- agentId: "test-agent-10",
380
- steps: variedSteps,
381
- });
382
- assertValidMetrics(variedMetrics);
383
- expect(variedMetrics.behavior.consistencyScore).toBeLessThan(1);
384
- expect(variedMetrics.behavior.consistencyScore).toBeGreaterThan(0);
385
- });
386
- it("should correctly identify dominant action type", () => {
387
- const steps = [
388
- createStep({ actionType: "trade", success: true }),
389
- createStep({ actionType: "trade", success: true }),
390
- createStep({ actionType: "trade", success: true }),
391
- createStep({ actionType: "send_dm", success: true }),
392
- createStep({ actionType: "send_dm", success: true }),
393
- ];
394
- const metrics = extractor.extract({
395
- trajectoryId: "test-traj-11",
396
- agentId: "test-agent-11",
397
- steps,
398
- });
399
- assertValidMetrics(metrics);
400
- expect(metrics.behavior.dominantActionType).toBe("trade");
401
- });
402
- it("should handle prediction correctness tracking", () => {
403
- const steps = [
404
- createStep({
405
- actionType: "predict",
406
- success: true,
407
- correctness: { predictionCorrect: true },
408
- }),
409
- createStep({
410
- actionType: "predict",
411
- success: true,
412
- correctness: { predictionCorrect: false },
413
- }),
414
- createStep({
415
- actionType: "predict",
416
- success: true,
417
- correctness: { predictionCorrect: true },
418
- }),
419
- ];
420
- const metrics = extractor.extract({
421
- trajectoryId: "test-traj-12",
422
- agentId: "test-agent-12",
423
- steps,
424
- });
425
- assertValidMetrics(metrics);
426
- expect(metrics.information.predictionsMade).toBe(3);
427
- expect(metrics.information.correctPredictions).toBe(2);
428
- expect(metrics.information.predictionAccuracy).toBeCloseTo(2 / 3, 2);
429
- });
430
- });
431
- describe("extractFromRaw()", () => {
432
- it("should return null for invalid JSON", () => {
433
- const result = extractor.extractFromRaw({
434
- trajectoryId: "test-traj-13",
435
- agentId: "test-agent-13",
436
- stepsJson: "not valid json",
437
- });
438
- expect(result).toBeNull();
439
- });
440
- it("should return null for empty array JSON", () => {
441
- const result = extractor.extractFromRaw({
442
- trajectoryId: "test-traj-14",
443
- agentId: "test-agent-14",
444
- stepsJson: "[]",
445
- });
446
- expect(result).toBeNull();
447
- });
448
- it("should return null for null JSON", () => {
449
- const result = extractor.extractFromRaw({
450
- trajectoryId: "test-traj-15",
451
- agentId: "test-agent-15",
452
- stepsJson: "null",
453
- });
454
- expect(result).toBeNull();
455
- });
456
- it("should correctly parse valid JSON steps", () => {
457
- const steps = [
458
- createStep({ actionType: "trade", success: true }),
459
- createStep({ actionType: "send_dm", success: true }),
460
- ];
461
- const result = extractor.extractFromRaw({
462
- trajectoryId: "test-traj-16",
463
- agentId: "test-agent-16",
464
- stepsJson: JSON.stringify(steps),
465
- });
466
- expect(result).not.toBeNull();
467
- if (result) {
468
- assertValidMetrics(result);
469
- expect(result.behavior.totalActions).toBe(2);
470
- }
471
- });
472
- it("should use finalPnL when provided", () => {
473
- const steps = [
474
- {
475
- stepNumber: 0,
476
- timestamp: Date.now(),
477
- environmentState: {
478
- agentBalance: 1000,
479
- agentPnL: 0,
480
- openPositions: 0,
481
- },
482
- providerAccesses: [],
483
- llmCalls: [],
484
- action: { actionType: "idle", parameters: {}, success: true },
485
- reward: 0,
486
- },
487
- ];
488
- const result = extractor.extractFromRaw({
489
- trajectoryId: "test-traj-17",
490
- agentId: "test-agent-17",
491
- stepsJson: JSON.stringify(steps),
492
- finalPnL: 150.5,
493
- });
494
- expect(result).not.toBeNull();
495
- if (result) {
496
- assertValidMetrics(result);
497
- }
498
- });
499
- });
500
- describe("singleton instance", () => {
501
- it("should export a singleton instance", () => {
502
- expect(trajectoryMetricsExtractor).toBeInstanceOf(TrajectoryMetricsExtractor);
503
- });
504
- });
505
- describe("edge cases", () => {
506
- it("should handle undefined parameters gracefully", () => {
507
- const steps = [
508
- {
509
- stepNumber: 0,
510
- timestamp: Date.now(),
511
- environmentState: {
512
- agentBalance: 1000,
513
- agentPnL: 0,
514
- openPositions: 0,
515
- },
516
- providerAccesses: [],
517
- llmCalls: [],
518
- action: {
519
- actionType: "trade",
520
- parameters: {},
521
- success: true,
522
- },
523
- reward: 0,
524
- },
525
- ];
526
- const metrics = extractor.extract({
527
- trajectoryId: "test-edge-1",
528
- agentId: "test-agent-edge-1",
529
- steps,
530
- });
531
- assertValidMetrics(metrics);
532
- });
533
- it("should handle very large numbers without overflow", () => {
534
- const steps = [
535
- createStep({
536
- actionType: "trade",
537
- parameters: { amount: 1e15 },
538
- result: { pnl: 1e12 },
539
- success: true,
540
- }),
541
- ];
542
- const metrics = extractor.extract({
543
- trajectoryId: "test-edge-2",
544
- agentId: "test-agent-edge-2",
545
- steps,
546
- });
547
- assertValidMetrics(metrics);
548
- expect(Number.isFinite(metrics.trading.totalPnL)).toBe(true);
549
- expect(Number.isFinite(metrics.trading.avgPositionSize)).toBe(true);
550
- });
551
- it("should handle negative PnL correctly", () => {
552
- const steps = [
553
- createStep({
554
- actionType: "trade",
555
- result: { pnl: -100 },
556
- success: true,
557
- }),
558
- createStep({
559
- actionType: "trade",
560
- result: { pnl: -50 },
561
- success: true,
562
- }),
563
- ];
564
- const metrics = extractor.extract({
565
- trajectoryId: "test-edge-3",
566
- agentId: "test-agent-edge-3",
567
- steps,
568
- });
569
- assertValidMetrics(metrics);
570
- expect(metrics.trading.totalPnL).toBe(-150);
571
- expect(metrics.trading.profitableTrades).toBe(0);
572
- expect(metrics.trading.winRate).toBe(0);
573
- expect(metrics.trading.largestLoss).toBe(-100);
574
- });
575
- it("should handle mixed case action types", () => {
576
- const steps = [
577
- createStep({ actionType: "TRADE", success: true }),
578
- createStep({ actionType: "Trade", success: true }),
579
- createStep({ actionType: "trade", success: true }),
580
- ];
581
- const metrics = extractor.extract({
582
- trajectoryId: "test-edge-4",
583
- agentId: "test-agent-edge-4",
584
- steps,
585
- });
586
- assertValidMetrics(metrics);
587
- expect(metrics.trading.tradesExecuted).toBe(3);
588
- });
589
- });
590
- });
591
- // Helper functions to create test steps
592
- function createStep(options) {
593
- return {
594
- stepNumber: 0,
595
- timestamp: Date.now(),
596
- environmentState: { agentBalance: 1000, agentPnL: 0, openPositions: 0 },
597
- providerAccesses: [],
598
- llmCalls: [],
599
- action: {
600
- actionType: options.actionType,
601
- parameters: options.parameters || {},
602
- result: options.result,
603
- success: options.success ?? true,
604
- correctness: options.correctness,
605
- },
606
- reward: 0,
607
- };
608
- }
609
- function createStepWithEnvState(envState) {
610
- return {
611
- stepNumber: 0,
612
- timestamp: Date.now(),
613
- environmentState: {
614
- agentBalance: 1000,
615
- agentPnL: 0,
616
- openPositions: 0,
617
- ...envState,
618
- },
619
- providerAccesses: [],
620
- llmCalls: [],
621
- action: {
622
- actionType: "idle",
623
- parameters: {},
624
- success: true,
625
- },
626
- reward: 0,
627
- };
628
- }
@@ -1,7 +0,0 @@
1
- /**
2
- * Trajectory Metrics Module
3
- *
4
- * Exports for behavioral metrics extraction and types.
5
- */
6
- export * from "./TrajectoryMetricsExtractor";
7
- export * from "./types";
@@ -1,21 +0,0 @@
1
- /**
2
- * Trajectory Metrics Types
3
- *
4
- * Comprehensive behavioral metrics extracted from agent trajectories
5
- * for use in multi-criteria evaluation with LLM-as-judge.
6
- */
7
- /**
8
- * Extract summary from full metrics
9
- */
10
- export function getMetricsSummary(metrics) {
11
- return {
12
- totalPnL: metrics.trading.totalPnL,
13
- winRate: metrics.trading.winRate,
14
- tradesExecuted: metrics.trading.tradesExecuted,
15
- uniqueUsersInteracted: metrics.social.uniqueUsersInteracted,
16
- socialToTradeRatio: metrics.behavior.socialToTradeRatio,
17
- actionSuccessRate: metrics.behavior.actionSuccessRate,
18
- reputationDelta: metrics.influence.reputationDelta,
19
- episodeLength: metrics.behavior.episodeLength,
20
- };
21
- }