@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.turbo/turbo-lint.log +2 -0
  2. package/.turbo/turbo-typecheck.log +1 -0
  3. package/dist/.tsbuildinfo +1 -0
  4. package/dist/adapter.js +59 -0
  5. package/dist/archetypes/ArchetypeConfigService.js +510 -0
  6. package/dist/archetypes/derive-archetype.js +196 -0
  7. package/dist/archetypes/index.js +7 -0
  8. package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
  9. package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
  10. package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
  11. package/dist/benchmark/BenchmarkDataViewer.js +197 -0
  12. package/dist/benchmark/BenchmarkHistoryService.js +135 -0
  13. package/dist/benchmark/BenchmarkRunner.js +483 -0
  14. package/dist/benchmark/BenchmarkValidator.js +158 -0
  15. package/dist/benchmark/FastEvalRunner.js +133 -0
  16. package/dist/benchmark/MetricsValidator.js +104 -0
  17. package/dist/benchmark/MetricsVisualizer.js +775 -0
  18. package/dist/benchmark/ModelBenchmarkService.js +433 -0
  19. package/dist/benchmark/ModelRegistry.js +122 -0
  20. package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
  21. package/dist/benchmark/SimulationA2AInterface.js +683 -0
  22. package/dist/benchmark/SimulationEngine.js +522 -0
  23. package/dist/benchmark/TaskRunner.js +60 -0
  24. package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
  25. package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
  26. package/dist/benchmark/index.js +23 -0
  27. package/dist/benchmark/parseSimulationMetrics.js +86 -0
  28. package/dist/benchmark/simulation-types.js +1 -0
  29. package/dist/dependencies.js +197 -0
  30. package/dist/generation/TrajectoryGenerator.js +244 -0
  31. package/dist/generation/index.js +6 -0
  32. package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
  33. package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
  34. package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
  35. package/dist/huggingface/index.js +9 -0
  36. package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
  37. package/dist/index.js +41 -0
  38. package/dist/init-training.js +43 -0
  39. package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
  40. package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
  41. package/dist/metrics/index.js +7 -0
  42. package/dist/metrics/types.js +21 -0
  43. package/dist/rubrics/__tests__/index.test.js +150 -0
  44. package/dist/rubrics/ass-kisser.js +83 -0
  45. package/dist/rubrics/degen.js +78 -0
  46. package/dist/rubrics/goody-twoshoes.js +82 -0
  47. package/dist/rubrics/index.js +184 -0
  48. package/dist/rubrics/information-trader.js +82 -0
  49. package/dist/rubrics/infosec.js +99 -0
  50. package/dist/rubrics/liar.js +102 -0
  51. package/dist/rubrics/perps-trader.js +85 -0
  52. package/dist/rubrics/researcher.js +79 -0
  53. package/dist/rubrics/scammer.js +80 -0
  54. package/dist/rubrics/social-butterfly.js +71 -0
  55. package/dist/rubrics/super-predictor.js +95 -0
  56. package/dist/rubrics/trader.js +65 -0
  57. package/dist/scoring/ArchetypeScoringService.js +301 -0
  58. package/dist/scoring/JudgePromptBuilder.js +401 -0
  59. package/dist/scoring/LLMJudgeCache.js +263 -0
  60. package/dist/scoring/index.js +8 -0
  61. package/dist/training/AutomationPipeline.js +714 -0
  62. package/dist/training/BenchmarkService.js +370 -0
  63. package/dist/training/ConfigValidator.js +153 -0
  64. package/dist/training/MarketOutcomesTracker.js +142 -0
  65. package/dist/training/ModelDeployer.js +128 -0
  66. package/dist/training/ModelFetcher.js +48 -0
  67. package/dist/training/ModelSelectionService.js +248 -0
  68. package/dist/training/ModelUsageVerifier.js +106 -0
  69. package/dist/training/MultiModelOrchestrator.js +349 -0
  70. package/dist/training/RLModelConfig.js +295 -0
  71. package/dist/training/RewardBackpropagationService.js +117 -0
  72. package/dist/training/RulerScoringService.js +450 -0
  73. package/dist/training/TrainingMonitor.js +108 -0
  74. package/dist/training/TrajectoryRecorder.js +281 -0
  75. package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
  76. package/dist/training/index.js +30 -0
  77. package/dist/training/logRLConfig.js +29 -0
  78. package/dist/training/pipeline.js +80 -0
  79. package/dist/training/storage/ModelStorageService.js +190 -0
  80. package/dist/training/storage/TrainingDataArchiver.js +136 -0
  81. package/dist/training/storage/index.js +7 -0
  82. package/dist/training/types.js +6 -0
  83. package/dist/training/window-utils.js +100 -0
  84. package/dist/utils/index.js +73 -0
  85. package/dist/utils/logger.js +55 -0
  86. package/dist/utils/snowflake.js +15 -0
  87. package/dist/utils/synthetic-detector.js +67 -0
  88. package/package.json +2 -2
  89. package/research-output/training-runs/training-run-1773742857616.json +38 -0
  90. package/research-output/training-runs/training-run-1773742946977.json +38 -0
  91. package/research-output/training-runs/training-run-1773743278891.json +38 -0
  92. package/research-output/training-runs/training-run-1773743409754.json +38 -0
  93. package/research-output/training-runs/training-run-1773743651086.json +38 -0
  94. package/research-output/training-runs/training-run-1773743782883.json +38 -0
@@ -0,0 +1,523 @@
1
+ /**
2
+ * TrajectoryMetricsExtractor
3
+ *
4
+ * Extracts comprehensive behavioral metrics from agent trajectories
5
+ * for use in multi-criteria LLM-as-judge evaluation.
6
+ *
7
+ * Extracts 5 categories of metrics:
8
+ * - Social: group chats, DMs, posts, mentions
9
+ * - Trading: P&L, win rate, Sharpe ratio, drawdown
10
+ * - Influence: followers, reputation, reactions
11
+ * - Behavior: action patterns, consistency
12
+ * - Information: research, predictions
13
+ *
14
+ * @packageDocumentation
15
+ */
16
+ import { logger } from "../utils/logger";
17
+ /**
18
+ * Action types that count as social interactions
19
+ */
20
+ const SOCIAL_ACTION_TYPES = new Set([
21
+ "join_group_chat",
22
+ "create_group_chat",
23
+ "leave_group_chat",
24
+ "post_group_message",
25
+ "send_dm",
26
+ "reply_dm",
27
+ "create_post",
28
+ "comment",
29
+ "like",
30
+ "follow",
31
+ "unfollow",
32
+ "mention",
33
+ "invite",
34
+ "react",
35
+ "share",
36
+ ]);
37
+ /**
38
+ * Action types that count as trading actions
39
+ */
40
+ const TRADING_ACTION_TYPES = new Set([
41
+ "trade",
42
+ "buy",
43
+ "sell",
44
+ "place_order",
45
+ "cancel_order",
46
+ "close_position",
47
+ "open_position",
48
+ "predict",
49
+ "bet",
50
+ "swap",
51
+ ]);
52
+ export class TrajectoryMetricsExtractor {
53
+ /**
54
+ * Extract all metrics from a trajectory
55
+ */
56
+ extract(params) {
57
+ const { trajectoryId, agentId, steps, scenarioId, startBalance, endBalance, } = params;
58
+ const social = this.extractSocialMetrics(steps, agentId);
59
+ const trading = this.extractTradingMetrics(steps, startBalance, endBalance);
60
+ const influence = this.extractInfluenceMetrics(steps);
61
+ const behavior = this.extractBehaviorMetrics(steps);
62
+ const information = this.extractInformationMetrics(steps);
63
+ return {
64
+ social,
65
+ trading,
66
+ influence,
67
+ behavior,
68
+ information,
69
+ extractedAt: new Date(),
70
+ trajectoryId,
71
+ agentId,
72
+ scenarioId,
73
+ };
74
+ }
75
+ /**
76
+ * Extract social interaction metrics
77
+ */
78
+ extractSocialMetrics(steps, agentId) {
79
+ const metrics = {
80
+ groupChatsJoined: 0,
81
+ groupChatsCreated: 0,
82
+ groupMessagesSent: 0,
83
+ dmsInitiated: 0,
84
+ dmsReceived: 0,
85
+ dmResponseRate: 0,
86
+ uniqueUsersInteracted: 0,
87
+ postsCreated: 0,
88
+ commentsMade: 0,
89
+ mentionsGiven: 0,
90
+ mentionsReceived: 0,
91
+ invitationsSent: 0,
92
+ };
93
+ const usersInteracted = new Set();
94
+ let dmsReplied = 0;
95
+ for (const step of steps) {
96
+ const action = step.action;
97
+ if (!action)
98
+ continue;
99
+ const actionType = action.actionType.toLowerCase();
100
+ const params = action.parameters || {};
101
+ // Group chat actions
102
+ if (actionType === "join_group_chat") {
103
+ metrics.groupChatsJoined++;
104
+ }
105
+ else if (actionType === "create_group_chat") {
106
+ metrics.groupChatsCreated++;
107
+ }
108
+ else if (actionType === "post_group_message" ||
109
+ actionType === "group_message") {
110
+ metrics.groupMessagesSent++;
111
+ if (params.groupId) {
112
+ usersInteracted.add(String(params.groupId));
113
+ }
114
+ }
115
+ // DM actions
116
+ else if (actionType === "send_dm" || actionType === "dm") {
117
+ const isInitiator = params.initiator === agentId || params.fromAgent === agentId;
118
+ if (isInitiator) {
119
+ metrics.dmsInitiated++;
120
+ }
121
+ if (params.toUserId || params.recipientId) {
122
+ usersInteracted.add(String(params.toUserId || params.recipientId));
123
+ }
124
+ }
125
+ else if (actionType === "reply_dm") {
126
+ dmsReplied++;
127
+ }
128
+ // Post/comment actions
129
+ else if (actionType === "create_post" || actionType === "post") {
130
+ metrics.postsCreated++;
131
+ }
132
+ else if (actionType === "comment" || actionType === "reply") {
133
+ metrics.commentsMade++;
134
+ if (params.authorId) {
135
+ usersInteracted.add(String(params.authorId));
136
+ }
137
+ }
138
+ // Mention/invite actions
139
+ else if (actionType === "mention") {
140
+ metrics.mentionsGiven++;
141
+ if (params.mentionedUserId) {
142
+ usersInteracted.add(String(params.mentionedUserId));
143
+ }
144
+ }
145
+ else if (actionType === "invite") {
146
+ metrics.invitationsSent++;
147
+ if (params.invitedUserId) {
148
+ usersInteracted.add(String(params.invitedUserId));
149
+ }
150
+ }
151
+ // Track users from any interaction
152
+ if (params.userId && params.userId !== agentId) {
153
+ usersInteracted.add(String(params.userId));
154
+ }
155
+ if (params.targetUserId && params.targetUserId !== agentId) {
156
+ usersInteracted.add(String(params.targetUserId));
157
+ }
158
+ }
159
+ // Calculate DM response rate
160
+ if (metrics.dmsReceived > 0) {
161
+ metrics.dmResponseRate = dmsReplied / metrics.dmsReceived;
162
+ }
163
+ metrics.uniqueUsersInteracted = usersInteracted.size;
164
+ return metrics;
165
+ }
166
+ /**
167
+ * Extract trading performance metrics
168
+ */
169
+ extractTradingMetrics(steps, startBalance, endBalance) {
170
+ const metrics = {
171
+ tradesExecuted: 0,
172
+ profitableTrades: 0,
173
+ winRate: 0,
174
+ totalPnL: 0,
175
+ maxDrawdown: 0,
176
+ sharpeRatio: 0,
177
+ avgPositionSize: 0,
178
+ avgHoldingPeriod: 0,
179
+ marketsTraded: 0,
180
+ buyTrades: 0,
181
+ sellTrades: 0,
182
+ largestWin: 0,
183
+ largestLoss: 0,
184
+ };
185
+ const tradePnLs = [];
186
+ const positionSizes = [];
187
+ const marketsSet = new Set();
188
+ let runningPnL = 0;
189
+ let peakPnL = 0;
190
+ let maxDrawdown = 0;
191
+ for (const step of steps) {
192
+ const action = step.action;
193
+ if (!action)
194
+ continue;
195
+ const actionType = action.actionType.toLowerCase();
196
+ const params = action.parameters || {};
197
+ const result = action.result || {};
198
+ if (TRADING_ACTION_TYPES.has(actionType)) {
199
+ metrics.tradesExecuted++;
200
+ // Track buy/sell
201
+ if (actionType === "buy" ||
202
+ params.side === "buy" ||
203
+ params.direction === "long") {
204
+ metrics.buyTrades++;
205
+ }
206
+ else if (actionType === "sell" ||
207
+ params.side === "sell" ||
208
+ params.direction === "short") {
209
+ metrics.sellTrades++;
210
+ }
211
+ // Track market
212
+ const marketId = params.marketId || params.market || params.ticker;
213
+ if (marketId) {
214
+ marketsSet.add(String(marketId));
215
+ }
216
+ // Track position size
217
+ const size = Number(params.amount || params.size || params.quantity || 0);
218
+ if (size > 0) {
219
+ positionSizes.push(size);
220
+ }
221
+ // Track P&L from result
222
+ const tradePnL = Number(result.pnl || result.profit || result.return || 0);
223
+ if (tradePnL !== 0) {
224
+ tradePnLs.push(tradePnL);
225
+ runningPnL += tradePnL;
226
+ if (tradePnL > 0) {
227
+ metrics.profitableTrades++;
228
+ if (tradePnL > metrics.largestWin) {
229
+ metrics.largestWin = tradePnL;
230
+ }
231
+ }
232
+ else {
233
+ if (tradePnL < metrics.largestLoss) {
234
+ metrics.largestLoss = tradePnL;
235
+ }
236
+ }
237
+ // Track drawdown
238
+ if (runningPnL > peakPnL) {
239
+ peakPnL = runningPnL;
240
+ }
241
+ const drawdown = peakPnL - runningPnL;
242
+ if (drawdown > maxDrawdown) {
243
+ maxDrawdown = drawdown;
244
+ }
245
+ }
246
+ }
247
+ }
248
+ // Calculate derived metrics
249
+ if (metrics.tradesExecuted > 0) {
250
+ metrics.winRate = metrics.profitableTrades / metrics.tradesExecuted;
251
+ }
252
+ // Calculate total P&L from trades or balance difference
253
+ if (tradePnLs.length > 0) {
254
+ metrics.totalPnL = tradePnLs.reduce((sum, pnl) => sum + pnl, 0);
255
+ }
256
+ else if (startBalance !== undefined && endBalance !== undefined) {
257
+ metrics.totalPnL = endBalance - startBalance;
258
+ }
259
+ metrics.maxDrawdown = maxDrawdown;
260
+ metrics.marketsTraded = marketsSet.size;
261
+ // Average position size
262
+ if (positionSizes.length > 0) {
263
+ metrics.avgPositionSize =
264
+ positionSizes.reduce((sum, s) => sum + s, 0) / positionSizes.length;
265
+ }
266
+ // Calculate Sharpe ratio (simplified)
267
+ if (tradePnLs.length > 1) {
268
+ const mean = tradePnLs.reduce((sum, pnl) => sum + pnl, 0) / tradePnLs.length;
269
+ const variance = tradePnLs.reduce((sum, pnl) => sum + (pnl - mean) ** 2, 0) /
270
+ tradePnLs.length;
271
+ const stdDev = Math.sqrt(variance);
272
+ if (stdDev > 0) {
273
+ metrics.sharpeRatio = mean / stdDev;
274
+ }
275
+ }
276
+ return metrics;
277
+ }
278
+ /**
279
+ * Extract influence and reputation metrics
280
+ */
281
+ extractInfluenceMetrics(steps) {
282
+ const metrics = {
283
+ followersGained: 0,
284
+ reputationDelta: 0,
285
+ trustLevelDelta: 0,
286
+ influenceScore: 0,
287
+ informationSpread: 0,
288
+ positiveReactions: 0,
289
+ negativeReactions: 0,
290
+ };
291
+ let startReputation = null;
292
+ let endReputation = null;
293
+ let startTrust = null;
294
+ let endTrust = null;
295
+ let startFollowers = null;
296
+ let endFollowers = null;
297
+ for (const step of steps) {
298
+ const envState = step.environmentState || {};
299
+ // Track reputation changes
300
+ const reputation = Number(envState.reputation || envState.agentReputation || 0);
301
+ if (reputation !== 0) {
302
+ if (startReputation === null) {
303
+ startReputation = reputation;
304
+ }
305
+ endReputation = reputation;
306
+ }
307
+ // Track trust changes
308
+ const trust = Number(envState.trustLevel || envState.trust || 0);
309
+ if (trust !== 0) {
310
+ if (startTrust === null) {
311
+ startTrust = trust;
312
+ }
313
+ endTrust = trust;
314
+ }
315
+ // Track follower changes
316
+ const followers = Number(envState.followers || envState.followerCount || 0);
317
+ if (followers !== 0) {
318
+ if (startFollowers === null) {
319
+ startFollowers = followers;
320
+ }
321
+ endFollowers = followers;
322
+ }
323
+ // Track reactions from action results
324
+ const action = step.action;
325
+ if (action?.result) {
326
+ const result = action.result;
327
+ if (result.likes || result.upvotes) {
328
+ metrics.positiveReactions += Number(result.likes || result.upvotes || 0);
329
+ }
330
+ if (result.dislikes || result.downvotes) {
331
+ metrics.negativeReactions += Number(result.dislikes || result.downvotes || 0);
332
+ }
333
+ if (result.shares || result.reshares) {
334
+ metrics.informationSpread += Number(result.shares || result.reshares || 0);
335
+ }
336
+ }
337
+ }
338
+ // Calculate deltas
339
+ if (startReputation !== null && endReputation !== null) {
340
+ metrics.reputationDelta = endReputation - startReputation;
341
+ }
342
+ if (startTrust !== null && endTrust !== null) {
343
+ metrics.trustLevelDelta = endTrust - startTrust;
344
+ }
345
+ if (startFollowers !== null && endFollowers !== null) {
346
+ metrics.followersGained = endFollowers - startFollowers;
347
+ }
348
+ // Calculate influence score (simple composite)
349
+ metrics.influenceScore =
350
+ metrics.followersGained * 2 +
351
+ metrics.positiveReactions -
352
+ metrics.negativeReactions +
353
+ metrics.informationSpread * 3;
354
+ return metrics;
355
+ }
356
+ /**
357
+ * Extract behavioral pattern metrics
358
+ */
359
+ extractBehaviorMetrics(steps) {
360
+ const metrics = {
361
+ actionsPerTick: 0,
362
+ socialToTradeRatio: 0,
363
+ avgResponseTime: 0,
364
+ consistencyScore: 0,
365
+ totalActions: 0,
366
+ failedActions: 0,
367
+ actionSuccessRate: 0,
368
+ episodeLength: steps.length,
369
+ actionTypesUsed: [],
370
+ dominantActionType: "",
371
+ };
372
+ const actionTypeCounts = new Map();
373
+ let socialActions = 0;
374
+ let tradeActions = 0;
375
+ for (const step of steps) {
376
+ const action = step.action;
377
+ if (!action)
378
+ continue;
379
+ metrics.totalActions++;
380
+ if (!action.success) {
381
+ metrics.failedActions++;
382
+ }
383
+ const actionType = action.actionType.toLowerCase();
384
+ // Count action types
385
+ actionTypeCounts.set(actionType, (actionTypeCounts.get(actionType) || 0) + 1);
386
+ // Categorize actions
387
+ if (SOCIAL_ACTION_TYPES.has(actionType)) {
388
+ socialActions++;
389
+ }
390
+ if (TRADING_ACTION_TYPES.has(actionType)) {
391
+ tradeActions++;
392
+ }
393
+ }
394
+ // Calculate derived metrics
395
+ if (steps.length > 0) {
396
+ metrics.actionsPerTick = metrics.totalActions / steps.length;
397
+ }
398
+ if (metrics.totalActions > 0) {
399
+ metrics.actionSuccessRate =
400
+ (metrics.totalActions - metrics.failedActions) / metrics.totalActions;
401
+ }
402
+ if (tradeActions > 0) {
403
+ metrics.socialToTradeRatio = socialActions / tradeActions;
404
+ }
405
+ else if (socialActions > 0) {
406
+ // No trades but has social actions - use social count as ratio
407
+ // This makes it clear they're social-focused without a magic number
408
+ metrics.socialToTradeRatio = socialActions;
409
+ }
410
+ // If both are 0, ratio stays at 0 (no activity)
411
+ // Find action types used and dominant type
412
+ metrics.actionTypesUsed = Array.from(actionTypeCounts.keys());
413
+ let maxCount = 0;
414
+ for (const [actionType, count] of actionTypeCounts) {
415
+ if (count > maxCount) {
416
+ maxCount = count;
417
+ metrics.dominantActionType = actionType;
418
+ }
419
+ }
420
+ // Calculate consistency score (inverse of variance in action distribution)
421
+ if (metrics.actionTypesUsed.length > 1) {
422
+ const counts = Array.from(actionTypeCounts.values());
423
+ const mean = counts.reduce((sum, c) => sum + c, 0) / counts.length;
424
+ const variance = counts.reduce((sum, c) => sum + (c - mean) ** 2, 0) / counts.length;
425
+ // Normalize to 0-1 range (higher = more consistent)
426
+ metrics.consistencyScore = 1 / (1 + Math.sqrt(variance) / mean);
427
+ }
428
+ else {
429
+ metrics.consistencyScore = 1; // Single action type = perfectly consistent
430
+ }
431
+ return metrics;
432
+ }
433
+ /**
434
+ * Extract information gathering metrics
435
+ */
436
+ extractInformationMetrics(steps) {
437
+ const metrics = {
438
+ researchActions: 0,
439
+ newsConsumed: 0,
440
+ marketDataQueries: 0,
441
+ infoRequestsSent: 0,
442
+ infoShared: 0,
443
+ predictionsMade: 0,
444
+ correctPredictions: 0,
445
+ predictionAccuracy: 0,
446
+ };
447
+ for (const step of steps) {
448
+ const action = step.action;
449
+ if (!action)
450
+ continue;
451
+ const actionType = action.actionType.toLowerCase();
452
+ if (actionType === "research" || actionType === "analyze") {
453
+ metrics.researchActions++;
454
+ }
455
+ else if (actionType === "read_news" || actionType === "consume_news") {
456
+ metrics.newsConsumed++;
457
+ }
458
+ else if (actionType === "query_market" ||
459
+ actionType === "check_price" ||
460
+ actionType === "get_quote") {
461
+ metrics.marketDataQueries++;
462
+ }
463
+ else if (actionType === "request_info" || actionType === "ask") {
464
+ metrics.infoRequestsSent++;
465
+ }
466
+ else if (actionType === "share_info" || actionType === "share") {
467
+ metrics.infoShared++;
468
+ }
469
+ else if (actionType === "predict" || actionType === "bet") {
470
+ metrics.predictionsMade++;
471
+ // Check if prediction was correct
472
+ if (action.correctness?.predictionCorrect === true) {
473
+ metrics.correctPredictions++;
474
+ }
475
+ }
476
+ }
477
+ // Calculate prediction accuracy
478
+ if (metrics.predictionsMade > 0) {
479
+ metrics.predictionAccuracy =
480
+ metrics.correctPredictions / metrics.predictionsMade;
481
+ }
482
+ return metrics;
483
+ }
484
+ /**
485
+ * Parse trajectory from database and extract metrics
486
+ */
487
+ extractFromRaw(params) {
488
+ try {
489
+ const steps = JSON.parse(params.stepsJson);
490
+ if (!Array.isArray(steps) || steps.length === 0) {
491
+ logger.warn("Invalid or empty steps array", { trajectoryId: params.trajectoryId }, "MetricsExtractor");
492
+ return null;
493
+ }
494
+ // Get start/end balance from environment state
495
+ const startBalance = steps[0]?.environmentState?.agentBalance;
496
+ const endBalance = steps[steps.length - 1]?.environmentState?.agentBalance;
497
+ return this.extract({
498
+ trajectoryId: params.trajectoryId,
499
+ agentId: params.agentId,
500
+ steps,
501
+ scenarioId: params.scenarioId,
502
+ startBalance: startBalance !== undefined ? Number(startBalance) : undefined,
503
+ endBalance: endBalance !== undefined
504
+ ? Number(endBalance)
505
+ : params.finalPnL !== undefined
506
+ ? (startBalance !== undefined ? Number(startBalance) : 0) +
507
+ params.finalPnL
508
+ : undefined,
509
+ });
510
+ }
511
+ catch (error) {
512
+ logger.error("Failed to extract metrics", {
513
+ trajectoryId: params.trajectoryId,
514
+ error: error instanceof Error ? error.message : String(error),
515
+ }, "MetricsExtractor");
516
+ return null;
517
+ }
518
+ }
519
+ }
520
+ /**
521
+ * Singleton instance
522
+ */
523
+ export const trajectoryMetricsExtractor = new TrajectoryMetricsExtractor();