@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-lint.log +2 -0
- package/.turbo/turbo-typecheck.log +1 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/adapter.js +59 -0
- package/dist/archetypes/ArchetypeConfigService.js +510 -0
- package/dist/archetypes/derive-archetype.js +196 -0
- package/dist/archetypes/index.js +7 -0
- package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
- package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
- package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
- package/dist/benchmark/BenchmarkDataViewer.js +197 -0
- package/dist/benchmark/BenchmarkHistoryService.js +135 -0
- package/dist/benchmark/BenchmarkRunner.js +483 -0
- package/dist/benchmark/BenchmarkValidator.js +158 -0
- package/dist/benchmark/FastEvalRunner.js +133 -0
- package/dist/benchmark/MetricsValidator.js +104 -0
- package/dist/benchmark/MetricsVisualizer.js +775 -0
- package/dist/benchmark/ModelBenchmarkService.js +433 -0
- package/dist/benchmark/ModelRegistry.js +122 -0
- package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
- package/dist/benchmark/SimulationA2AInterface.js +683 -0
- package/dist/benchmark/SimulationEngine.js +522 -0
- package/dist/benchmark/TaskRunner.js +60 -0
- package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
- package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
- package/dist/benchmark/index.js +23 -0
- package/dist/benchmark/parseSimulationMetrics.js +86 -0
- package/dist/benchmark/simulation-types.js +1 -0
- package/dist/dependencies.js +197 -0
- package/dist/generation/TrajectoryGenerator.js +244 -0
- package/dist/generation/index.js +6 -0
- package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
- package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
- package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
- package/dist/huggingface/index.js +9 -0
- package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
- package/dist/index.js +41 -0
- package/dist/init-training.js +43 -0
- package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
- package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/types.js +21 -0
- package/dist/rubrics/__tests__/index.test.js +150 -0
- package/dist/rubrics/ass-kisser.js +83 -0
- package/dist/rubrics/degen.js +78 -0
- package/dist/rubrics/goody-twoshoes.js +82 -0
- package/dist/rubrics/index.js +184 -0
- package/dist/rubrics/information-trader.js +82 -0
- package/dist/rubrics/infosec.js +99 -0
- package/dist/rubrics/liar.js +102 -0
- package/dist/rubrics/perps-trader.js +85 -0
- package/dist/rubrics/researcher.js +79 -0
- package/dist/rubrics/scammer.js +80 -0
- package/dist/rubrics/social-butterfly.js +71 -0
- package/dist/rubrics/super-predictor.js +95 -0
- package/dist/rubrics/trader.js +65 -0
- package/dist/scoring/ArchetypeScoringService.js +301 -0
- package/dist/scoring/JudgePromptBuilder.js +401 -0
- package/dist/scoring/LLMJudgeCache.js +263 -0
- package/dist/scoring/index.js +8 -0
- package/dist/training/AutomationPipeline.js +714 -0
- package/dist/training/BenchmarkService.js +370 -0
- package/dist/training/ConfigValidator.js +153 -0
- package/dist/training/MarketOutcomesTracker.js +142 -0
- package/dist/training/ModelDeployer.js +128 -0
- package/dist/training/ModelFetcher.js +48 -0
- package/dist/training/ModelSelectionService.js +248 -0
- package/dist/training/ModelUsageVerifier.js +106 -0
- package/dist/training/MultiModelOrchestrator.js +349 -0
- package/dist/training/RLModelConfig.js +295 -0
- package/dist/training/RewardBackpropagationService.js +117 -0
- package/dist/training/RulerScoringService.js +450 -0
- package/dist/training/TrainingMonitor.js +108 -0
- package/dist/training/TrajectoryRecorder.js +281 -0
- package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
- package/dist/training/index.js +30 -0
- package/dist/training/logRLConfig.js +29 -0
- package/dist/training/pipeline.js +80 -0
- package/dist/training/storage/ModelStorageService.js +190 -0
- package/dist/training/storage/TrainingDataArchiver.js +136 -0
- package/dist/training/storage/index.js +7 -0
- package/dist/training/types.js +6 -0
- package/dist/training/window-utils.js +100 -0
- package/dist/utils/index.js +73 -0
- package/dist/utils/logger.js +55 -0
- package/dist/utils/snowflake.js +15 -0
- package/dist/utils/synthetic-detector.js +67 -0
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773742857616.json +38 -0
- package/research-output/training-runs/training-run-1773742946977.json +38 -0
- package/research-output/training-runs/training-run-1773743278891.json +38 -0
- package/research-output/training-runs/training-run-1773743409754.json +38 -0
- package/research-output/training-runs/training-run-1773743651086.json +38 -0
- package/research-output/training-runs/training-run-1773743782883.json +38 -0
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TrajectoryMetricsExtractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts comprehensive behavioral metrics from agent trajectories
|
|
5
|
+
* for use in multi-criteria LLM-as-judge evaluation.
|
|
6
|
+
*
|
|
7
|
+
* Extracts 5 categories of metrics:
|
|
8
|
+
* - Social: group chats, DMs, posts, mentions
|
|
9
|
+
* - Trading: P&L, win rate, Sharpe ratio, drawdown
|
|
10
|
+
* - Influence: followers, reputation, reactions
|
|
11
|
+
* - Behavior: action patterns, consistency
|
|
12
|
+
* - Information: research, predictions
|
|
13
|
+
*
|
|
14
|
+
* @packageDocumentation
|
|
15
|
+
*/
|
|
16
|
+
import { logger } from "../utils/logger";
|
|
17
|
+
/**
|
|
18
|
+
* Action types that count as social interactions
|
|
19
|
+
*/
|
|
20
|
+
const SOCIAL_ACTION_TYPES = new Set([
|
|
21
|
+
"join_group_chat",
|
|
22
|
+
"create_group_chat",
|
|
23
|
+
"leave_group_chat",
|
|
24
|
+
"post_group_message",
|
|
25
|
+
"send_dm",
|
|
26
|
+
"reply_dm",
|
|
27
|
+
"create_post",
|
|
28
|
+
"comment",
|
|
29
|
+
"like",
|
|
30
|
+
"follow",
|
|
31
|
+
"unfollow",
|
|
32
|
+
"mention",
|
|
33
|
+
"invite",
|
|
34
|
+
"react",
|
|
35
|
+
"share",
|
|
36
|
+
]);
|
|
37
|
+
/**
|
|
38
|
+
* Action types that count as trading actions
|
|
39
|
+
*/
|
|
40
|
+
const TRADING_ACTION_TYPES = new Set([
|
|
41
|
+
"trade",
|
|
42
|
+
"buy",
|
|
43
|
+
"sell",
|
|
44
|
+
"place_order",
|
|
45
|
+
"cancel_order",
|
|
46
|
+
"close_position",
|
|
47
|
+
"open_position",
|
|
48
|
+
"predict",
|
|
49
|
+
"bet",
|
|
50
|
+
"swap",
|
|
51
|
+
]);
|
|
52
|
+
export class TrajectoryMetricsExtractor {
|
|
53
|
+
/**
|
|
54
|
+
* Extract all metrics from a trajectory
|
|
55
|
+
*/
|
|
56
|
+
extract(params) {
|
|
57
|
+
const { trajectoryId, agentId, steps, scenarioId, startBalance, endBalance, } = params;
|
|
58
|
+
const social = this.extractSocialMetrics(steps, agentId);
|
|
59
|
+
const trading = this.extractTradingMetrics(steps, startBalance, endBalance);
|
|
60
|
+
const influence = this.extractInfluenceMetrics(steps);
|
|
61
|
+
const behavior = this.extractBehaviorMetrics(steps);
|
|
62
|
+
const information = this.extractInformationMetrics(steps);
|
|
63
|
+
return {
|
|
64
|
+
social,
|
|
65
|
+
trading,
|
|
66
|
+
influence,
|
|
67
|
+
behavior,
|
|
68
|
+
information,
|
|
69
|
+
extractedAt: new Date(),
|
|
70
|
+
trajectoryId,
|
|
71
|
+
agentId,
|
|
72
|
+
scenarioId,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Extract social interaction metrics
|
|
77
|
+
*/
|
|
78
|
+
extractSocialMetrics(steps, agentId) {
|
|
79
|
+
const metrics = {
|
|
80
|
+
groupChatsJoined: 0,
|
|
81
|
+
groupChatsCreated: 0,
|
|
82
|
+
groupMessagesSent: 0,
|
|
83
|
+
dmsInitiated: 0,
|
|
84
|
+
dmsReceived: 0,
|
|
85
|
+
dmResponseRate: 0,
|
|
86
|
+
uniqueUsersInteracted: 0,
|
|
87
|
+
postsCreated: 0,
|
|
88
|
+
commentsMade: 0,
|
|
89
|
+
mentionsGiven: 0,
|
|
90
|
+
mentionsReceived: 0,
|
|
91
|
+
invitationsSent: 0,
|
|
92
|
+
};
|
|
93
|
+
const usersInteracted = new Set();
|
|
94
|
+
let dmsReplied = 0;
|
|
95
|
+
for (const step of steps) {
|
|
96
|
+
const action = step.action;
|
|
97
|
+
if (!action)
|
|
98
|
+
continue;
|
|
99
|
+
const actionType = action.actionType.toLowerCase();
|
|
100
|
+
const params = action.parameters || {};
|
|
101
|
+
// Group chat actions
|
|
102
|
+
if (actionType === "join_group_chat") {
|
|
103
|
+
metrics.groupChatsJoined++;
|
|
104
|
+
}
|
|
105
|
+
else if (actionType === "create_group_chat") {
|
|
106
|
+
metrics.groupChatsCreated++;
|
|
107
|
+
}
|
|
108
|
+
else if (actionType === "post_group_message" ||
|
|
109
|
+
actionType === "group_message") {
|
|
110
|
+
metrics.groupMessagesSent++;
|
|
111
|
+
if (params.groupId) {
|
|
112
|
+
usersInteracted.add(String(params.groupId));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// DM actions
|
|
116
|
+
else if (actionType === "send_dm" || actionType === "dm") {
|
|
117
|
+
const isInitiator = params.initiator === agentId || params.fromAgent === agentId;
|
|
118
|
+
if (isInitiator) {
|
|
119
|
+
metrics.dmsInitiated++;
|
|
120
|
+
}
|
|
121
|
+
if (params.toUserId || params.recipientId) {
|
|
122
|
+
usersInteracted.add(String(params.toUserId || params.recipientId));
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
else if (actionType === "reply_dm") {
|
|
126
|
+
dmsReplied++;
|
|
127
|
+
}
|
|
128
|
+
// Post/comment actions
|
|
129
|
+
else if (actionType === "create_post" || actionType === "post") {
|
|
130
|
+
metrics.postsCreated++;
|
|
131
|
+
}
|
|
132
|
+
else if (actionType === "comment" || actionType === "reply") {
|
|
133
|
+
metrics.commentsMade++;
|
|
134
|
+
if (params.authorId) {
|
|
135
|
+
usersInteracted.add(String(params.authorId));
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
// Mention/invite actions
|
|
139
|
+
else if (actionType === "mention") {
|
|
140
|
+
metrics.mentionsGiven++;
|
|
141
|
+
if (params.mentionedUserId) {
|
|
142
|
+
usersInteracted.add(String(params.mentionedUserId));
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
else if (actionType === "invite") {
|
|
146
|
+
metrics.invitationsSent++;
|
|
147
|
+
if (params.invitedUserId) {
|
|
148
|
+
usersInteracted.add(String(params.invitedUserId));
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Track users from any interaction
|
|
152
|
+
if (params.userId && params.userId !== agentId) {
|
|
153
|
+
usersInteracted.add(String(params.userId));
|
|
154
|
+
}
|
|
155
|
+
if (params.targetUserId && params.targetUserId !== agentId) {
|
|
156
|
+
usersInteracted.add(String(params.targetUserId));
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Calculate DM response rate
|
|
160
|
+
if (metrics.dmsReceived > 0) {
|
|
161
|
+
metrics.dmResponseRate = dmsReplied / metrics.dmsReceived;
|
|
162
|
+
}
|
|
163
|
+
metrics.uniqueUsersInteracted = usersInteracted.size;
|
|
164
|
+
return metrics;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Extract trading performance metrics
|
|
168
|
+
*/
|
|
169
|
+
extractTradingMetrics(steps, startBalance, endBalance) {
|
|
170
|
+
const metrics = {
|
|
171
|
+
tradesExecuted: 0,
|
|
172
|
+
profitableTrades: 0,
|
|
173
|
+
winRate: 0,
|
|
174
|
+
totalPnL: 0,
|
|
175
|
+
maxDrawdown: 0,
|
|
176
|
+
sharpeRatio: 0,
|
|
177
|
+
avgPositionSize: 0,
|
|
178
|
+
avgHoldingPeriod: 0,
|
|
179
|
+
marketsTraded: 0,
|
|
180
|
+
buyTrades: 0,
|
|
181
|
+
sellTrades: 0,
|
|
182
|
+
largestWin: 0,
|
|
183
|
+
largestLoss: 0,
|
|
184
|
+
};
|
|
185
|
+
const tradePnLs = [];
|
|
186
|
+
const positionSizes = [];
|
|
187
|
+
const marketsSet = new Set();
|
|
188
|
+
let runningPnL = 0;
|
|
189
|
+
let peakPnL = 0;
|
|
190
|
+
let maxDrawdown = 0;
|
|
191
|
+
for (const step of steps) {
|
|
192
|
+
const action = step.action;
|
|
193
|
+
if (!action)
|
|
194
|
+
continue;
|
|
195
|
+
const actionType = action.actionType.toLowerCase();
|
|
196
|
+
const params = action.parameters || {};
|
|
197
|
+
const result = action.result || {};
|
|
198
|
+
if (TRADING_ACTION_TYPES.has(actionType)) {
|
|
199
|
+
metrics.tradesExecuted++;
|
|
200
|
+
// Track buy/sell
|
|
201
|
+
if (actionType === "buy" ||
|
|
202
|
+
params.side === "buy" ||
|
|
203
|
+
params.direction === "long") {
|
|
204
|
+
metrics.buyTrades++;
|
|
205
|
+
}
|
|
206
|
+
else if (actionType === "sell" ||
|
|
207
|
+
params.side === "sell" ||
|
|
208
|
+
params.direction === "short") {
|
|
209
|
+
metrics.sellTrades++;
|
|
210
|
+
}
|
|
211
|
+
// Track market
|
|
212
|
+
const marketId = params.marketId || params.market || params.ticker;
|
|
213
|
+
if (marketId) {
|
|
214
|
+
marketsSet.add(String(marketId));
|
|
215
|
+
}
|
|
216
|
+
// Track position size
|
|
217
|
+
const size = Number(params.amount || params.size || params.quantity || 0);
|
|
218
|
+
if (size > 0) {
|
|
219
|
+
positionSizes.push(size);
|
|
220
|
+
}
|
|
221
|
+
// Track P&L from result
|
|
222
|
+
const tradePnL = Number(result.pnl || result.profit || result.return || 0);
|
|
223
|
+
if (tradePnL !== 0) {
|
|
224
|
+
tradePnLs.push(tradePnL);
|
|
225
|
+
runningPnL += tradePnL;
|
|
226
|
+
if (tradePnL > 0) {
|
|
227
|
+
metrics.profitableTrades++;
|
|
228
|
+
if (tradePnL > metrics.largestWin) {
|
|
229
|
+
metrics.largestWin = tradePnL;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
else {
|
|
233
|
+
if (tradePnL < metrics.largestLoss) {
|
|
234
|
+
metrics.largestLoss = tradePnL;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
// Track drawdown
|
|
238
|
+
if (runningPnL > peakPnL) {
|
|
239
|
+
peakPnL = runningPnL;
|
|
240
|
+
}
|
|
241
|
+
const drawdown = peakPnL - runningPnL;
|
|
242
|
+
if (drawdown > maxDrawdown) {
|
|
243
|
+
maxDrawdown = drawdown;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
// Calculate derived metrics
|
|
249
|
+
if (metrics.tradesExecuted > 0) {
|
|
250
|
+
metrics.winRate = metrics.profitableTrades / metrics.tradesExecuted;
|
|
251
|
+
}
|
|
252
|
+
// Calculate total P&L from trades or balance difference
|
|
253
|
+
if (tradePnLs.length > 0) {
|
|
254
|
+
metrics.totalPnL = tradePnLs.reduce((sum, pnl) => sum + pnl, 0);
|
|
255
|
+
}
|
|
256
|
+
else if (startBalance !== undefined && endBalance !== undefined) {
|
|
257
|
+
metrics.totalPnL = endBalance - startBalance;
|
|
258
|
+
}
|
|
259
|
+
metrics.maxDrawdown = maxDrawdown;
|
|
260
|
+
metrics.marketsTraded = marketsSet.size;
|
|
261
|
+
// Average position size
|
|
262
|
+
if (positionSizes.length > 0) {
|
|
263
|
+
metrics.avgPositionSize =
|
|
264
|
+
positionSizes.reduce((sum, s) => sum + s, 0) / positionSizes.length;
|
|
265
|
+
}
|
|
266
|
+
// Calculate Sharpe ratio (simplified)
|
|
267
|
+
if (tradePnLs.length > 1) {
|
|
268
|
+
const mean = tradePnLs.reduce((sum, pnl) => sum + pnl, 0) / tradePnLs.length;
|
|
269
|
+
const variance = tradePnLs.reduce((sum, pnl) => sum + (pnl - mean) ** 2, 0) /
|
|
270
|
+
tradePnLs.length;
|
|
271
|
+
const stdDev = Math.sqrt(variance);
|
|
272
|
+
if (stdDev > 0) {
|
|
273
|
+
metrics.sharpeRatio = mean / stdDev;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
return metrics;
|
|
277
|
+
}
|
|
278
|
+
/**
|
|
279
|
+
* Extract influence and reputation metrics
|
|
280
|
+
*/
|
|
281
|
+
extractInfluenceMetrics(steps) {
|
|
282
|
+
const metrics = {
|
|
283
|
+
followersGained: 0,
|
|
284
|
+
reputationDelta: 0,
|
|
285
|
+
trustLevelDelta: 0,
|
|
286
|
+
influenceScore: 0,
|
|
287
|
+
informationSpread: 0,
|
|
288
|
+
positiveReactions: 0,
|
|
289
|
+
negativeReactions: 0,
|
|
290
|
+
};
|
|
291
|
+
let startReputation = null;
|
|
292
|
+
let endReputation = null;
|
|
293
|
+
let startTrust = null;
|
|
294
|
+
let endTrust = null;
|
|
295
|
+
let startFollowers = null;
|
|
296
|
+
let endFollowers = null;
|
|
297
|
+
for (const step of steps) {
|
|
298
|
+
const envState = step.environmentState || {};
|
|
299
|
+
// Track reputation changes
|
|
300
|
+
const reputation = Number(envState.reputation || envState.agentReputation || 0);
|
|
301
|
+
if (reputation !== 0) {
|
|
302
|
+
if (startReputation === null) {
|
|
303
|
+
startReputation = reputation;
|
|
304
|
+
}
|
|
305
|
+
endReputation = reputation;
|
|
306
|
+
}
|
|
307
|
+
// Track trust changes
|
|
308
|
+
const trust = Number(envState.trustLevel || envState.trust || 0);
|
|
309
|
+
if (trust !== 0) {
|
|
310
|
+
if (startTrust === null) {
|
|
311
|
+
startTrust = trust;
|
|
312
|
+
}
|
|
313
|
+
endTrust = trust;
|
|
314
|
+
}
|
|
315
|
+
// Track follower changes
|
|
316
|
+
const followers = Number(envState.followers || envState.followerCount || 0);
|
|
317
|
+
if (followers !== 0) {
|
|
318
|
+
if (startFollowers === null) {
|
|
319
|
+
startFollowers = followers;
|
|
320
|
+
}
|
|
321
|
+
endFollowers = followers;
|
|
322
|
+
}
|
|
323
|
+
// Track reactions from action results
|
|
324
|
+
const action = step.action;
|
|
325
|
+
if (action?.result) {
|
|
326
|
+
const result = action.result;
|
|
327
|
+
if (result.likes || result.upvotes) {
|
|
328
|
+
metrics.positiveReactions += Number(result.likes || result.upvotes || 0);
|
|
329
|
+
}
|
|
330
|
+
if (result.dislikes || result.downvotes) {
|
|
331
|
+
metrics.negativeReactions += Number(result.dislikes || result.downvotes || 0);
|
|
332
|
+
}
|
|
333
|
+
if (result.shares || result.reshares) {
|
|
334
|
+
metrics.informationSpread += Number(result.shares || result.reshares || 0);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
// Calculate deltas
|
|
339
|
+
if (startReputation !== null && endReputation !== null) {
|
|
340
|
+
metrics.reputationDelta = endReputation - startReputation;
|
|
341
|
+
}
|
|
342
|
+
if (startTrust !== null && endTrust !== null) {
|
|
343
|
+
metrics.trustLevelDelta = endTrust - startTrust;
|
|
344
|
+
}
|
|
345
|
+
if (startFollowers !== null && endFollowers !== null) {
|
|
346
|
+
metrics.followersGained = endFollowers - startFollowers;
|
|
347
|
+
}
|
|
348
|
+
// Calculate influence score (simple composite)
|
|
349
|
+
metrics.influenceScore =
|
|
350
|
+
metrics.followersGained * 2 +
|
|
351
|
+
metrics.positiveReactions -
|
|
352
|
+
metrics.negativeReactions +
|
|
353
|
+
metrics.informationSpread * 3;
|
|
354
|
+
return metrics;
|
|
355
|
+
}
|
|
356
|
+
/**
|
|
357
|
+
* Extract behavioral pattern metrics
|
|
358
|
+
*/
|
|
359
|
+
extractBehaviorMetrics(steps) {
|
|
360
|
+
const metrics = {
|
|
361
|
+
actionsPerTick: 0,
|
|
362
|
+
socialToTradeRatio: 0,
|
|
363
|
+
avgResponseTime: 0,
|
|
364
|
+
consistencyScore: 0,
|
|
365
|
+
totalActions: 0,
|
|
366
|
+
failedActions: 0,
|
|
367
|
+
actionSuccessRate: 0,
|
|
368
|
+
episodeLength: steps.length,
|
|
369
|
+
actionTypesUsed: [],
|
|
370
|
+
dominantActionType: "",
|
|
371
|
+
};
|
|
372
|
+
const actionTypeCounts = new Map();
|
|
373
|
+
let socialActions = 0;
|
|
374
|
+
let tradeActions = 0;
|
|
375
|
+
for (const step of steps) {
|
|
376
|
+
const action = step.action;
|
|
377
|
+
if (!action)
|
|
378
|
+
continue;
|
|
379
|
+
metrics.totalActions++;
|
|
380
|
+
if (!action.success) {
|
|
381
|
+
metrics.failedActions++;
|
|
382
|
+
}
|
|
383
|
+
const actionType = action.actionType.toLowerCase();
|
|
384
|
+
// Count action types
|
|
385
|
+
actionTypeCounts.set(actionType, (actionTypeCounts.get(actionType) || 0) + 1);
|
|
386
|
+
// Categorize actions
|
|
387
|
+
if (SOCIAL_ACTION_TYPES.has(actionType)) {
|
|
388
|
+
socialActions++;
|
|
389
|
+
}
|
|
390
|
+
if (TRADING_ACTION_TYPES.has(actionType)) {
|
|
391
|
+
tradeActions++;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
// Calculate derived metrics
|
|
395
|
+
if (steps.length > 0) {
|
|
396
|
+
metrics.actionsPerTick = metrics.totalActions / steps.length;
|
|
397
|
+
}
|
|
398
|
+
if (metrics.totalActions > 0) {
|
|
399
|
+
metrics.actionSuccessRate =
|
|
400
|
+
(metrics.totalActions - metrics.failedActions) / metrics.totalActions;
|
|
401
|
+
}
|
|
402
|
+
if (tradeActions > 0) {
|
|
403
|
+
metrics.socialToTradeRatio = socialActions / tradeActions;
|
|
404
|
+
}
|
|
405
|
+
else if (socialActions > 0) {
|
|
406
|
+
// No trades but has social actions - use social count as ratio
|
|
407
|
+
// This makes it clear they're social-focused without a magic number
|
|
408
|
+
metrics.socialToTradeRatio = socialActions;
|
|
409
|
+
}
|
|
410
|
+
// If both are 0, ratio stays at 0 (no activity)
|
|
411
|
+
// Find action types used and dominant type
|
|
412
|
+
metrics.actionTypesUsed = Array.from(actionTypeCounts.keys());
|
|
413
|
+
let maxCount = 0;
|
|
414
|
+
for (const [actionType, count] of actionTypeCounts) {
|
|
415
|
+
if (count > maxCount) {
|
|
416
|
+
maxCount = count;
|
|
417
|
+
metrics.dominantActionType = actionType;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
// Calculate consistency score (inverse of variance in action distribution)
|
|
421
|
+
if (metrics.actionTypesUsed.length > 1) {
|
|
422
|
+
const counts = Array.from(actionTypeCounts.values());
|
|
423
|
+
const mean = counts.reduce((sum, c) => sum + c, 0) / counts.length;
|
|
424
|
+
const variance = counts.reduce((sum, c) => sum + (c - mean) ** 2, 0) / counts.length;
|
|
425
|
+
// Normalize to 0-1 range (higher = more consistent)
|
|
426
|
+
metrics.consistencyScore = 1 / (1 + Math.sqrt(variance) / mean);
|
|
427
|
+
}
|
|
428
|
+
else {
|
|
429
|
+
metrics.consistencyScore = 1; // Single action type = perfectly consistent
|
|
430
|
+
}
|
|
431
|
+
return metrics;
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Extract information gathering metrics
|
|
435
|
+
*/
|
|
436
|
+
extractInformationMetrics(steps) {
|
|
437
|
+
const metrics = {
|
|
438
|
+
researchActions: 0,
|
|
439
|
+
newsConsumed: 0,
|
|
440
|
+
marketDataQueries: 0,
|
|
441
|
+
infoRequestsSent: 0,
|
|
442
|
+
infoShared: 0,
|
|
443
|
+
predictionsMade: 0,
|
|
444
|
+
correctPredictions: 0,
|
|
445
|
+
predictionAccuracy: 0,
|
|
446
|
+
};
|
|
447
|
+
for (const step of steps) {
|
|
448
|
+
const action = step.action;
|
|
449
|
+
if (!action)
|
|
450
|
+
continue;
|
|
451
|
+
const actionType = action.actionType.toLowerCase();
|
|
452
|
+
if (actionType === "research" || actionType === "analyze") {
|
|
453
|
+
metrics.researchActions++;
|
|
454
|
+
}
|
|
455
|
+
else if (actionType === "read_news" || actionType === "consume_news") {
|
|
456
|
+
metrics.newsConsumed++;
|
|
457
|
+
}
|
|
458
|
+
else if (actionType === "query_market" ||
|
|
459
|
+
actionType === "check_price" ||
|
|
460
|
+
actionType === "get_quote") {
|
|
461
|
+
metrics.marketDataQueries++;
|
|
462
|
+
}
|
|
463
|
+
else if (actionType === "request_info" || actionType === "ask") {
|
|
464
|
+
metrics.infoRequestsSent++;
|
|
465
|
+
}
|
|
466
|
+
else if (actionType === "share_info" || actionType === "share") {
|
|
467
|
+
metrics.infoShared++;
|
|
468
|
+
}
|
|
469
|
+
else if (actionType === "predict" || actionType === "bet") {
|
|
470
|
+
metrics.predictionsMade++;
|
|
471
|
+
// Check if prediction was correct
|
|
472
|
+
if (action.correctness?.predictionCorrect === true) {
|
|
473
|
+
metrics.correctPredictions++;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
// Calculate prediction accuracy
|
|
478
|
+
if (metrics.predictionsMade > 0) {
|
|
479
|
+
metrics.predictionAccuracy =
|
|
480
|
+
metrics.correctPredictions / metrics.predictionsMade;
|
|
481
|
+
}
|
|
482
|
+
return metrics;
|
|
483
|
+
}
|
|
484
|
+
/**
|
|
485
|
+
* Parse trajectory from database and extract metrics
|
|
486
|
+
*/
|
|
487
|
+
extractFromRaw(params) {
|
|
488
|
+
try {
|
|
489
|
+
const steps = JSON.parse(params.stepsJson);
|
|
490
|
+
if (!Array.isArray(steps) || steps.length === 0) {
|
|
491
|
+
logger.warn("Invalid or empty steps array", { trajectoryId: params.trajectoryId }, "MetricsExtractor");
|
|
492
|
+
return null;
|
|
493
|
+
}
|
|
494
|
+
// Get start/end balance from environment state
|
|
495
|
+
const startBalance = steps[0]?.environmentState?.agentBalance;
|
|
496
|
+
const endBalance = steps[steps.length - 1]?.environmentState?.agentBalance;
|
|
497
|
+
return this.extract({
|
|
498
|
+
trajectoryId: params.trajectoryId,
|
|
499
|
+
agentId: params.agentId,
|
|
500
|
+
steps,
|
|
501
|
+
scenarioId: params.scenarioId,
|
|
502
|
+
startBalance: startBalance !== undefined ? Number(startBalance) : undefined,
|
|
503
|
+
endBalance: endBalance !== undefined
|
|
504
|
+
? Number(endBalance)
|
|
505
|
+
: params.finalPnL !== undefined
|
|
506
|
+
? (startBalance !== undefined ? Number(startBalance) : 0) +
|
|
507
|
+
params.finalPnL
|
|
508
|
+
: undefined,
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
catch (error) {
|
|
512
|
+
logger.error("Failed to extract metrics", {
|
|
513
|
+
trajectoryId: params.trajectoryId,
|
|
514
|
+
error: error instanceof Error ? error.message : String(error),
|
|
515
|
+
}, "MetricsExtractor");
|
|
516
|
+
return null;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
/**
|
|
521
|
+
* Singleton instance
|
|
522
|
+
*/
|
|
523
|
+
export const trajectoryMetricsExtractor = new TrajectoryMetricsExtractor();
|