@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* @packageDocumentation
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { getAvailableArchetypes, normalizeArchetype } from
|
|
10
|
+
import { getAvailableArchetypes, normalizeArchetype } from "../rubrics";
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* NPC characteristics used for archetype derivation
|
|
@@ -29,28 +29,28 @@ export interface NPCCharacteristics {
|
|
|
29
29
|
*/
|
|
30
30
|
const ROLE_TO_ARCHETYPE: Record<string, string> = {
|
|
31
31
|
// High-reliability roles → ethical archetypes
|
|
32
|
-
insider:
|
|
33
|
-
expert:
|
|
34
|
-
whistleblower:
|
|
35
|
-
analyst:
|
|
32
|
+
insider: "information-trader",
|
|
33
|
+
expert: "researcher",
|
|
34
|
+
whistleblower: "goody-twoshoes",
|
|
35
|
+
analyst: "researcher",
|
|
36
36
|
|
|
37
37
|
// Media/content roles
|
|
38
|
-
journalist:
|
|
39
|
-
reporter:
|
|
40
|
-
influencer:
|
|
38
|
+
journalist: "social-butterfly",
|
|
39
|
+
reporter: "social-butterfly",
|
|
40
|
+
influencer: "social-butterfly",
|
|
41
41
|
|
|
42
42
|
// Low-reliability roles → deceptive archetypes
|
|
43
|
-
deceiver:
|
|
44
|
-
politician:
|
|
45
|
-
conspiracy:
|
|
43
|
+
deceiver: "scammer",
|
|
44
|
+
politician: "liar",
|
|
45
|
+
conspiracy: "liar",
|
|
46
46
|
|
|
47
47
|
// Trading-focused roles
|
|
48
|
-
trader:
|
|
49
|
-
investor:
|
|
50
|
-
speculator:
|
|
48
|
+
trader: "trader",
|
|
49
|
+
investor: "trader",
|
|
50
|
+
speculator: "degen",
|
|
51
51
|
|
|
52
52
|
// Default fallback
|
|
53
|
-
unknown:
|
|
53
|
+
unknown: "trader",
|
|
54
54
|
};
|
|
55
55
|
|
|
56
56
|
/**
|
|
@@ -64,57 +64,57 @@ const PERSONALITY_KEYWORDS: Array<{
|
|
|
64
64
|
}> = [
|
|
65
65
|
// High priority - distinctive personalities
|
|
66
66
|
{
|
|
67
|
-
keywords: [
|
|
68
|
-
archetype:
|
|
67
|
+
keywords: ["manipulative", "deceptive", "cunning", "unethical"],
|
|
68
|
+
archetype: "scammer",
|
|
69
69
|
priority: 10,
|
|
70
70
|
},
|
|
71
71
|
{
|
|
72
|
-
keywords: [
|
|
73
|
-
archetype:
|
|
72
|
+
keywords: ["reckless", "impulsive", "yolo", "fomo", "aggressive"],
|
|
73
|
+
archetype: "degen",
|
|
74
74
|
priority: 10,
|
|
75
75
|
},
|
|
76
76
|
{
|
|
77
|
-
keywords: [
|
|
78
|
-
archetype:
|
|
77
|
+
keywords: ["honest", "ethical", "helpful", "transparent", "altruistic"],
|
|
78
|
+
archetype: "goody-twoshoes",
|
|
79
79
|
priority: 10,
|
|
80
80
|
},
|
|
81
81
|
{
|
|
82
|
-
keywords: [
|
|
83
|
-
archetype:
|
|
82
|
+
keywords: ["thorough", "meticulous", "analytical", "data-driven"],
|
|
83
|
+
archetype: "researcher",
|
|
84
84
|
priority: 8,
|
|
85
85
|
},
|
|
86
86
|
|
|
87
87
|
// Medium priority - trading styles
|
|
88
88
|
{
|
|
89
|
-
keywords: [
|
|
90
|
-
archetype:
|
|
89
|
+
keywords: ["disciplined", "methodical", "patient", "risk-averse"],
|
|
90
|
+
archetype: "trader",
|
|
91
91
|
priority: 5,
|
|
92
92
|
},
|
|
93
93
|
{
|
|
94
|
-
keywords: [
|
|
95
|
-
archetype:
|
|
94
|
+
keywords: ["social", "networker", "outgoing", "community"],
|
|
95
|
+
archetype: "social-butterfly",
|
|
96
96
|
priority: 5,
|
|
97
97
|
},
|
|
98
98
|
{
|
|
99
|
-
keywords: [
|
|
100
|
-
archetype:
|
|
99
|
+
keywords: ["flattering", "agreeable", "sycophantic", "pleasing"],
|
|
100
|
+
archetype: "ass-kisser",
|
|
101
101
|
priority: 5,
|
|
102
102
|
},
|
|
103
103
|
|
|
104
104
|
// Low priority - general
|
|
105
105
|
{
|
|
106
|
-
keywords: [
|
|
107
|
-
archetype:
|
|
106
|
+
keywords: ["suspicious", "secretive", "paranoid", "security"],
|
|
107
|
+
archetype: "infosec",
|
|
108
108
|
priority: 3,
|
|
109
109
|
},
|
|
110
110
|
{
|
|
111
|
-
keywords: [
|
|
112
|
-
archetype:
|
|
111
|
+
keywords: ["leverage", "perpetual", "futures", "derivatives"],
|
|
112
|
+
archetype: "perps-trader",
|
|
113
113
|
priority: 3,
|
|
114
114
|
},
|
|
115
115
|
{
|
|
116
|
-
keywords: [
|
|
117
|
-
archetype:
|
|
116
|
+
keywords: ["prediction", "forecast", "oracle", "prophet"],
|
|
117
|
+
archetype: "super-predictor",
|
|
118
118
|
priority: 3,
|
|
119
119
|
},
|
|
120
120
|
];
|
|
@@ -155,7 +155,7 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
|
|
|
155
155
|
npc.willingToLie === true
|
|
156
156
|
) {
|
|
157
157
|
// Confirmed deceptive: low reliability + actively willing to lie
|
|
158
|
-
return
|
|
158
|
+
return "scammer";
|
|
159
159
|
}
|
|
160
160
|
|
|
161
161
|
// Note: High reliability is factored into personality analysis below, not used as an override.
|
|
@@ -168,7 +168,7 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
|
|
|
168
168
|
|
|
169
169
|
for (const mapping of PERSONALITY_KEYWORDS) {
|
|
170
170
|
const matchCount = mapping.keywords.filter((keyword) =>
|
|
171
|
-
personalityLower.includes(keyword)
|
|
171
|
+
personalityLower.includes(keyword),
|
|
172
172
|
).length;
|
|
173
173
|
|
|
174
174
|
if (matchCount > 0) {
|
|
@@ -190,19 +190,19 @@ export function deriveArchetype(npc: NPCCharacteristics): string {
|
|
|
190
190
|
// 4. Check domain for trading specialization
|
|
191
191
|
if (npc.domain && npc.domain.length > 0) {
|
|
192
192
|
const domains = npc.domain.map((d) => d.toLowerCase());
|
|
193
|
-
if (domains.includes(
|
|
194
|
-
return
|
|
193
|
+
if (domains.includes("trading") || domains.includes("finance")) {
|
|
194
|
+
return "trader";
|
|
195
195
|
}
|
|
196
|
-
if (domains.includes(
|
|
197
|
-
return
|
|
196
|
+
if (domains.includes("technology") || domains.includes("tech")) {
|
|
197
|
+
return "researcher";
|
|
198
198
|
}
|
|
199
|
-
if (domains.includes(
|
|
200
|
-
return
|
|
199
|
+
if (domains.includes("media") || domains.includes("social")) {
|
|
200
|
+
return "social-butterfly";
|
|
201
201
|
}
|
|
202
202
|
}
|
|
203
203
|
|
|
204
204
|
// 5. Default fallback
|
|
205
|
-
return
|
|
205
|
+
return "trader";
|
|
206
206
|
}
|
|
207
207
|
|
|
208
208
|
/**
|
|
@@ -218,7 +218,7 @@ export type ArchetypeResolver = (npcId: string) => string;
|
|
|
218
218
|
* @returns Function that resolves archetype from NPC ID
|
|
219
219
|
*/
|
|
220
220
|
export function createArchetypeResolver(
|
|
221
|
-
npcs: NPCCharacteristics[]
|
|
221
|
+
npcs: NPCCharacteristics[],
|
|
222
222
|
): ArchetypeResolver {
|
|
223
223
|
const archetypeMap = new Map<string, string>();
|
|
224
224
|
|
|
@@ -227,7 +227,7 @@ export function createArchetypeResolver(
|
|
|
227
227
|
}
|
|
228
228
|
|
|
229
229
|
return (npcId: string): string => {
|
|
230
|
-
return archetypeMap.get(npcId) ??
|
|
230
|
+
return archetypeMap.get(npcId) ?? "trader";
|
|
231
231
|
};
|
|
232
232
|
}
|
|
233
233
|
|
|
@@ -237,7 +237,7 @@ export function createArchetypeResolver(
|
|
|
237
237
|
*/
|
|
238
238
|
export function getRoleArchetype(role: string): string {
|
|
239
239
|
const normalized = role.toLowerCase().trim();
|
|
240
|
-
return ROLE_TO_ARCHETYPE[normalized] ??
|
|
240
|
+
return ROLE_TO_ARCHETYPE[normalized] ?? "trader";
|
|
241
241
|
}
|
|
242
242
|
|
|
243
243
|
/**
|
package/src/archetypes/index.ts
CHANGED
|
@@ -10,7 +10,7 @@ export {
|
|
|
10
10
|
ArchetypeConfigService,
|
|
11
11
|
type ArchetypeTraits,
|
|
12
12
|
archetypeConfigService,
|
|
13
|
-
} from
|
|
13
|
+
} from "./ArchetypeConfigService";
|
|
14
14
|
|
|
15
15
|
export {
|
|
16
16
|
type ArchetypeResolver,
|
|
@@ -19,4 +19,4 @@ export {
|
|
|
19
19
|
getRoleArchetype,
|
|
20
20
|
getValidArchetypes,
|
|
21
21
|
type NPCCharacteristics,
|
|
22
|
-
} from
|
|
22
|
+
} from "./derive-archetype";
|
|
@@ -12,18 +12,18 @@
|
|
|
12
12
|
import {
|
|
13
13
|
type ArchetypeConfig,
|
|
14
14
|
ArchetypeConfigService,
|
|
15
|
-
} from
|
|
15
|
+
} from "../archetypes/ArchetypeConfigService";
|
|
16
16
|
import {
|
|
17
17
|
createMultiModelOrchestrator,
|
|
18
18
|
type MultiModelOrchestrator,
|
|
19
|
-
} from
|
|
20
|
-
import { logger } from
|
|
19
|
+
} from "../training/MultiModelOrchestrator";
|
|
20
|
+
import { logger } from "../utils/logger";
|
|
21
21
|
import {
|
|
22
22
|
type BenchmarkConfig,
|
|
23
23
|
BenchmarkDataGenerator,
|
|
24
24
|
type BenchmarkGameSnapshot,
|
|
25
25
|
type Tick,
|
|
26
|
-
} from
|
|
26
|
+
} from "./BenchmarkDataGenerator";
|
|
27
27
|
|
|
28
28
|
/**
|
|
29
29
|
* Individual agent in the matchup simulation
|
|
@@ -95,7 +95,7 @@ export interface MatchupBenchmarkResult {
|
|
|
95
95
|
headToHead: ArchetypeVsResult[];
|
|
96
96
|
|
|
97
97
|
/** Market condition during benchmark */
|
|
98
|
-
marketCondition:
|
|
98
|
+
marketCondition: "bull" | "bear" | "volatile" | "stable";
|
|
99
99
|
|
|
100
100
|
/** Insights derived from the matchup */
|
|
101
101
|
insights: string[];
|
|
@@ -106,7 +106,7 @@ export interface MatchupBenchmarkResult {
|
|
|
106
106
|
*/
|
|
107
107
|
export interface MatchupBenchmarkConfig {
|
|
108
108
|
/** Archetypes to include in matchup (or 'all' for all archetypes) */
|
|
109
|
-
archetypes: string[] |
|
|
109
|
+
archetypes: string[] | "all";
|
|
110
110
|
|
|
111
111
|
/** Number of agents per archetype */
|
|
112
112
|
agentsPerArchetype: number;
|
|
@@ -118,7 +118,7 @@ export interface MatchupBenchmarkConfig {
|
|
|
118
118
|
ticksPerRound: number;
|
|
119
119
|
|
|
120
120
|
/** Market conditions to test */
|
|
121
|
-
marketConditions: Array<
|
|
121
|
+
marketConditions: Array<"bull" | "bear" | "volatile" | "stable">;
|
|
122
122
|
|
|
123
123
|
/** Available VRAM for model loading */
|
|
124
124
|
availableVramGb: number;
|
|
@@ -140,7 +140,7 @@ export class ArchetypeMatchupBenchmark {
|
|
|
140
140
|
* Get all archetypes to benchmark
|
|
141
141
|
*/
|
|
142
142
|
private getArchetypes(): string[] {
|
|
143
|
-
if (this.config.archetypes ===
|
|
143
|
+
if (this.config.archetypes === "all") {
|
|
144
144
|
return ArchetypeConfigService.getAvailableArchetypes();
|
|
145
145
|
}
|
|
146
146
|
return this.config.archetypes;
|
|
@@ -173,7 +173,7 @@ export class ArchetypeMatchupBenchmark {
|
|
|
173
173
|
* Market condition affects seed to create different scenarios
|
|
174
174
|
*/
|
|
175
175
|
private async generateBenchmarkData(
|
|
176
|
-
condition:
|
|
176
|
+
condition: "bull" | "bear" | "volatile" | "stable",
|
|
177
177
|
): Promise<BenchmarkGameSnapshot> {
|
|
178
178
|
// Convert ticks to duration minutes (assuming 1 tick per second)
|
|
179
179
|
const durationMinutes = Math.ceil(this.config.ticksPerRound / 60);
|
|
@@ -190,8 +190,8 @@ export class ArchetypeMatchupBenchmark {
|
|
|
190
190
|
const benchmarkConfig: BenchmarkConfig = {
|
|
191
191
|
durationMinutes,
|
|
192
192
|
tickInterval: 1,
|
|
193
|
-
numPredictionMarkets: condition ===
|
|
194
|
-
numPerpetualMarkets: condition ===
|
|
193
|
+
numPredictionMarkets: condition === "volatile" ? 8 : 5,
|
|
194
|
+
numPerpetualMarkets: condition === "volatile" ? 5 : 3,
|
|
195
195
|
numAgents: 10,
|
|
196
196
|
seed: baseSeed + (Date.now() % 1000), // Semi-reproducible
|
|
197
197
|
};
|
|
@@ -206,18 +206,18 @@ export class ArchetypeMatchupBenchmark {
|
|
|
206
206
|
private async simulateRound(
|
|
207
207
|
agents: MatchupAgent[],
|
|
208
208
|
snapshot: BenchmarkGameSnapshot,
|
|
209
|
-
roundNumber: number
|
|
209
|
+
roundNumber: number,
|
|
210
210
|
): Promise<MatchupAgentResult[]> {
|
|
211
211
|
const results: MatchupAgentResult[] = [];
|
|
212
212
|
|
|
213
213
|
logger.info(
|
|
214
214
|
`Simulating round ${roundNumber} with ${agents.length} agents`,
|
|
215
215
|
{ archetypes: [...new Set(agents.map((a) => a.archetype))] },
|
|
216
|
-
|
|
216
|
+
"ArchetypeMatchupBenchmark",
|
|
217
217
|
);
|
|
218
218
|
|
|
219
219
|
// Check if we should use real inference or simulation
|
|
220
|
-
const useRealInference = process.env.USE_REAL_INFERENCE ===
|
|
220
|
+
const useRealInference = process.env.USE_REAL_INFERENCE === "true";
|
|
221
221
|
|
|
222
222
|
if (useRealInference) {
|
|
223
223
|
// Use real model inference via the orchestrator
|
|
@@ -247,7 +247,7 @@ export class ArchetypeMatchupBenchmark {
|
|
|
247
247
|
*/
|
|
248
248
|
private async runAgentWithRealModel(
|
|
249
249
|
agent: MatchupAgent,
|
|
250
|
-
snapshot: BenchmarkGameSnapshot
|
|
250
|
+
snapshot: BenchmarkGameSnapshot,
|
|
251
251
|
): Promise<MatchupAgentResult> {
|
|
252
252
|
let totalPnl = 0;
|
|
253
253
|
let totalTrades = 0;
|
|
@@ -275,20 +275,20 @@ export class ArchetypeMatchupBenchmark {
|
|
|
275
275
|
// Parse the decision and simulate outcome
|
|
276
276
|
const decision = this.parseAgentDecision(response.response);
|
|
277
277
|
|
|
278
|
-
if (decision.action ===
|
|
278
|
+
if (decision.action === "trade") {
|
|
279
279
|
totalTrades++;
|
|
280
280
|
// Simulate trade outcome based on market conditions
|
|
281
281
|
const marketTrend = this.getMarketTrend(tick);
|
|
282
282
|
const isCorrectDirection =
|
|
283
|
-
(decision.direction ===
|
|
284
|
-
(decision.direction ===
|
|
283
|
+
(decision.direction === "long" && marketTrend > 0) ||
|
|
284
|
+
(decision.direction === "short" && marketTrend < 0);
|
|
285
285
|
if (isCorrectDirection) {
|
|
286
286
|
wins++;
|
|
287
287
|
totalPnl += Math.abs(marketTrend) * 100 * (decision.confidence || 1);
|
|
288
288
|
} else {
|
|
289
289
|
totalPnl -= Math.abs(marketTrend) * 50 * (decision.confidence || 1);
|
|
290
290
|
}
|
|
291
|
-
} else if (decision.action ===
|
|
291
|
+
} else if (decision.action === "post") {
|
|
292
292
|
postsCreated++;
|
|
293
293
|
}
|
|
294
294
|
}
|
|
@@ -326,7 +326,7 @@ export class ArchetypeMatchupBenchmark {
|
|
|
326
326
|
|
|
327
327
|
// Extract market prices from perpetual markets
|
|
328
328
|
const marketPrices = Object.fromEntries(
|
|
329
|
-
state.perpetualMarkets.map((m) => [m.ticker, m.price])
|
|
329
|
+
state.perpetualMarkets.map((m) => [m.ticker, m.price]),
|
|
330
330
|
);
|
|
331
331
|
|
|
332
332
|
// Recent posts can serve as "news"
|
|
@@ -352,8 +352,8 @@ Respond with a JSON object containing:
|
|
|
352
352
|
* Parse agent decision from model response
|
|
353
353
|
*/
|
|
354
354
|
private parseAgentDecision(response: string): {
|
|
355
|
-
action:
|
|
356
|
-
direction?:
|
|
355
|
+
action: "trade" | "post" | "observe";
|
|
356
|
+
direction?: "long" | "short";
|
|
357
357
|
confidence?: number;
|
|
358
358
|
} {
|
|
359
359
|
try {
|
|
@@ -362,7 +362,7 @@ Respond with a JSON object containing:
|
|
|
362
362
|
if (jsonMatch) {
|
|
363
363
|
const parsed = JSON.parse(jsonMatch[0]);
|
|
364
364
|
return {
|
|
365
|
-
action: parsed.action ||
|
|
365
|
+
action: parsed.action || "observe",
|
|
366
366
|
direction: parsed.direction,
|
|
367
367
|
confidence: parsed.confidence || 0.5,
|
|
368
368
|
};
|
|
@@ -373,25 +373,25 @@ Respond with a JSON object containing:
|
|
|
373
373
|
|
|
374
374
|
// Default behavior based on response content
|
|
375
375
|
if (
|
|
376
|
-
response.toLowerCase().includes(
|
|
377
|
-
response.toLowerCase().includes(
|
|
378
|
-
response.toLowerCase().includes(
|
|
376
|
+
response.toLowerCase().includes("trade") ||
|
|
377
|
+
response.toLowerCase().includes("buy") ||
|
|
378
|
+
response.toLowerCase().includes("sell")
|
|
379
379
|
) {
|
|
380
380
|
return {
|
|
381
|
-
action:
|
|
382
|
-
direction: response.toLowerCase().includes(
|
|
381
|
+
action: "trade",
|
|
382
|
+
direction: response.toLowerCase().includes("short") ? "short" : "long",
|
|
383
383
|
confidence: 0.5,
|
|
384
384
|
};
|
|
385
385
|
}
|
|
386
386
|
|
|
387
387
|
if (
|
|
388
|
-
response.toLowerCase().includes(
|
|
389
|
-
response.toLowerCase().includes(
|
|
388
|
+
response.toLowerCase().includes("post") ||
|
|
389
|
+
response.toLowerCase().includes("share")
|
|
390
390
|
) {
|
|
391
|
-
return { action:
|
|
391
|
+
return { action: "post" };
|
|
392
392
|
}
|
|
393
393
|
|
|
394
|
-
return { action:
|
|
394
|
+
return { action: "observe" };
|
|
395
395
|
}
|
|
396
396
|
|
|
397
397
|
/**
|
|
@@ -403,7 +403,7 @@ Respond with a JSON object containing:
|
|
|
403
403
|
if (state.perpetualMarkets.length === 0) return 0;
|
|
404
404
|
|
|
405
405
|
const prices = Object.fromEntries(
|
|
406
|
-
state.perpetualMarkets.map((m) => [m.ticker, m.price])
|
|
406
|
+
state.perpetualMarkets.map((m) => [m.ticker, m.price]),
|
|
407
407
|
);
|
|
408
408
|
|
|
409
409
|
// Calculate average price change
|
|
@@ -422,7 +422,7 @@ Respond with a JSON object containing:
|
|
|
422
422
|
*/
|
|
423
423
|
private simulateAgentPerformance(
|
|
424
424
|
agent: MatchupAgent,
|
|
425
|
-
snapshot: BenchmarkGameSnapshot
|
|
425
|
+
snapshot: BenchmarkGameSnapshot,
|
|
426
426
|
): MatchupAgentResult {
|
|
427
427
|
const config = agent.config;
|
|
428
428
|
const tickCount = snapshot.ticks.length;
|
|
@@ -465,7 +465,7 @@ Respond with a JSON object containing:
|
|
|
465
465
|
* Calculate head-to-head results between archetypes
|
|
466
466
|
*/
|
|
467
467
|
private calculateHeadToHead(
|
|
468
|
-
allResults: MatchupAgentResult[][]
|
|
468
|
+
allResults: MatchupAgentResult[][],
|
|
469
469
|
): ArchetypeVsResult[] {
|
|
470
470
|
const archetypes = this.getArchetypes();
|
|
471
471
|
const headToHead: ArchetypeVsResult[] = [];
|
|
@@ -484,10 +484,10 @@ Respond with a JSON object containing:
|
|
|
484
484
|
// Compare performance in each round
|
|
485
485
|
for (const roundResults of allResults) {
|
|
486
486
|
const arch1Results = roundResults.filter(
|
|
487
|
-
(r) => r.archetype === arch1
|
|
487
|
+
(r) => r.archetype === arch1,
|
|
488
488
|
);
|
|
489
489
|
const arch2Results = roundResults.filter(
|
|
490
|
-
(r) => r.archetype === arch2
|
|
490
|
+
(r) => r.archetype === arch2,
|
|
491
491
|
);
|
|
492
492
|
|
|
493
493
|
if (arch1Results.length === 0 || arch2Results.length === 0) continue;
|
|
@@ -532,8 +532,8 @@ Respond with a JSON object containing:
|
|
|
532
532
|
* Calculate overall archetype rankings
|
|
533
533
|
*/
|
|
534
534
|
private calculateRankings(
|
|
535
|
-
allResults: MatchupAgentResult[][]
|
|
536
|
-
): MatchupBenchmarkResult[
|
|
535
|
+
allResults: MatchupAgentResult[][],
|
|
536
|
+
): MatchupBenchmarkResult["archetypeRankings"] {
|
|
537
537
|
const archetypes = this.getArchetypes();
|
|
538
538
|
const rankings: Map<
|
|
539
539
|
string,
|
|
@@ -593,9 +593,9 @@ Respond with a JSON object containing:
|
|
|
593
593
|
* Generate insights from the matchup results
|
|
594
594
|
*/
|
|
595
595
|
private generateInsights(
|
|
596
|
-
rankings: MatchupBenchmarkResult[
|
|
596
|
+
rankings: MatchupBenchmarkResult["archetypeRankings"],
|
|
597
597
|
headToHead: ArchetypeVsResult[],
|
|
598
|
-
marketCondition: string
|
|
598
|
+
marketCondition: string,
|
|
599
599
|
): string[] {
|
|
600
600
|
const insights: string[] = [];
|
|
601
601
|
|
|
@@ -603,7 +603,7 @@ Respond with a JSON object containing:
|
|
|
603
603
|
const topRanking = rankings[0];
|
|
604
604
|
if (topRanking) {
|
|
605
605
|
insights.push(
|
|
606
|
-
`${topRanking.archetype} performed best in ${marketCondition} conditions with avg rank ${topRanking.avgRank.toFixed(2)}
|
|
606
|
+
`${topRanking.archetype} performed best in ${marketCondition} conditions with avg rank ${topRanking.avgRank.toFixed(2)}`,
|
|
607
607
|
);
|
|
608
608
|
}
|
|
609
609
|
|
|
@@ -611,11 +611,11 @@ Respond with a JSON object containing:
|
|
|
611
611
|
for (const h2h of headToHead) {
|
|
612
612
|
if (h2h.winRate1 >= 0.7) {
|
|
613
613
|
insights.push(
|
|
614
|
-
`${h2h.archetype1} dominates ${h2h.archetype2} (${(h2h.winRate1 * 100).toFixed(0)}% win rate)
|
|
614
|
+
`${h2h.archetype1} dominates ${h2h.archetype2} (${(h2h.winRate1 * 100).toFixed(0)}% win rate)`,
|
|
615
615
|
);
|
|
616
616
|
} else if (h2h.winRate2 >= 0.7) {
|
|
617
617
|
insights.push(
|
|
618
|
-
`${h2h.archetype2} dominates ${h2h.archetype1} (${(h2h.winRate2 * 100).toFixed(0)}% win rate)
|
|
618
|
+
`${h2h.archetype2} dominates ${h2h.archetype1} (${(h2h.winRate2 * 100).toFixed(0)}% win rate)`,
|
|
619
619
|
);
|
|
620
620
|
}
|
|
621
621
|
}
|
|
@@ -657,9 +657,9 @@ Respond with a JSON object containing:
|
|
|
657
657
|
if (bWins) {
|
|
658
658
|
for (const c of bWins) {
|
|
659
659
|
const cWins = wins.get(c);
|
|
660
|
-
if (cWins
|
|
660
|
+
if (cWins?.has(a)) {
|
|
661
661
|
insights.push(
|
|
662
|
-
`Counter triangle found: ${a} → ${b} → ${c} → ${a}
|
|
662
|
+
`Counter triangle found: ${a} → ${b} → ${c} → ${a}`,
|
|
663
663
|
);
|
|
664
664
|
}
|
|
665
665
|
}
|
|
@@ -678,14 +678,14 @@ Respond with a JSON object containing:
|
|
|
678
678
|
const results: MatchupBenchmarkResult[] = [];
|
|
679
679
|
|
|
680
680
|
logger.info(
|
|
681
|
-
|
|
681
|
+
"Starting Archetype Matchup Benchmark",
|
|
682
682
|
{
|
|
683
683
|
archetypes: this.getArchetypes(),
|
|
684
684
|
agentsPerArchetype: this.config.agentsPerArchetype,
|
|
685
685
|
rounds: this.config.rounds,
|
|
686
686
|
conditions: this.config.marketConditions,
|
|
687
687
|
},
|
|
688
|
-
|
|
688
|
+
"ArchetypeMatchupBenchmark",
|
|
689
689
|
);
|
|
690
690
|
|
|
691
691
|
const agents = this.createAgents();
|
|
@@ -694,7 +694,7 @@ Respond with a JSON object containing:
|
|
|
694
694
|
logger.info(
|
|
695
695
|
`Testing in ${condition} market conditions`,
|
|
696
696
|
{},
|
|
697
|
-
|
|
697
|
+
"ArchetypeMatchupBenchmark",
|
|
698
698
|
);
|
|
699
699
|
|
|
700
700
|
const allRoundResults: MatchupAgentResult[][] = [];
|
|
@@ -704,7 +704,7 @@ Respond with a JSON object containing:
|
|
|
704
704
|
const roundResults = await this.simulateRound(
|
|
705
705
|
agents,
|
|
706
706
|
snapshot,
|
|
707
|
-
round + 1
|
|
707
|
+
round + 1,
|
|
708
708
|
);
|
|
709
709
|
allRoundResults.push(roundResults);
|
|
710
710
|
}
|
|
@@ -734,7 +734,7 @@ Respond with a JSON object containing:
|
|
|
734
734
|
topArchetype: rankings[0]?.archetype,
|
|
735
735
|
avgPnl: rankings[0]?.avgPnl.toFixed(2),
|
|
736
736
|
},
|
|
737
|
-
|
|
737
|
+
"ArchetypeMatchupBenchmark",
|
|
738
738
|
);
|
|
739
739
|
}
|
|
740
740
|
|
|
@@ -743,13 +743,13 @@ Respond with a JSON object containing:
|
|
|
743
743
|
|
|
744
744
|
const totalDuration = Date.now() - startTime;
|
|
745
745
|
logger.info(
|
|
746
|
-
|
|
746
|
+
"Archetype Matchup Benchmark complete",
|
|
747
747
|
{
|
|
748
748
|
totalDurationMs: totalDuration,
|
|
749
749
|
conditionsTested: this.config.marketConditions.length,
|
|
750
750
|
totalRounds: this.config.rounds * this.config.marketConditions.length,
|
|
751
751
|
},
|
|
752
|
-
|
|
752
|
+
"ArchetypeMatchupBenchmark",
|
|
753
753
|
);
|
|
754
754
|
|
|
755
755
|
return results;
|
|
@@ -760,49 +760,49 @@ Respond with a JSON object containing:
|
|
|
760
760
|
*/
|
|
761
761
|
static generateReport(results: MatchupBenchmarkResult[]): string {
|
|
762
762
|
const lines: string[] = [];
|
|
763
|
-
lines.push(
|
|
763
|
+
lines.push("# Archetype Matchup Benchmark Report\n");
|
|
764
764
|
|
|
765
765
|
for (const result of results) {
|
|
766
766
|
lines.push(
|
|
767
|
-
`## ${result.marketCondition.toUpperCase()} Market Conditions\n
|
|
767
|
+
`## ${result.marketCondition.toUpperCase()} Market Conditions\n`,
|
|
768
768
|
);
|
|
769
769
|
|
|
770
770
|
// Rankings table
|
|
771
|
-
lines.push(
|
|
772
|
-
lines.push(
|
|
773
|
-
lines.push(
|
|
771
|
+
lines.push("### Overall Rankings\n");
|
|
772
|
+
lines.push("| Rank | Archetype | Avg PnL | Win Rate |");
|
|
773
|
+
lines.push("|------|-----------|---------|----------|");
|
|
774
774
|
for (const ranking of result.archetypeRankings) {
|
|
775
775
|
lines.push(
|
|
776
|
-
`| ${ranking.avgRank.toFixed(1)} | ${ranking.archetype} | ${ranking.avgPnl.toFixed(2)} | ${(ranking.winRate * 100).toFixed(1)}%
|
|
776
|
+
`| ${ranking.avgRank.toFixed(1)} | ${ranking.archetype} | ${ranking.avgPnl.toFixed(2)} | ${(ranking.winRate * 100).toFixed(1)}% |`,
|
|
777
777
|
);
|
|
778
778
|
}
|
|
779
|
-
lines.push(
|
|
779
|
+
lines.push("");
|
|
780
780
|
|
|
781
781
|
// Head-to-head table
|
|
782
|
-
lines.push(
|
|
783
|
-
lines.push(
|
|
784
|
-
lines.push(
|
|
782
|
+
lines.push("### Head-to-Head Results\n");
|
|
783
|
+
lines.push("| Matchup | Winner | Win Rate |");
|
|
784
|
+
lines.push("|---------|--------|----------|");
|
|
785
785
|
for (const h2h of result.headToHead) {
|
|
786
786
|
const winner =
|
|
787
787
|
h2h.winRate1 > h2h.winRate2 ? h2h.archetype1 : h2h.archetype2;
|
|
788
788
|
const winRate = Math.max(h2h.winRate1, h2h.winRate2);
|
|
789
789
|
lines.push(
|
|
790
|
-
`| ${h2h.archetype1} vs ${h2h.archetype2} | ${winner} | ${(winRate * 100).toFixed(1)}%
|
|
790
|
+
`| ${h2h.archetype1} vs ${h2h.archetype2} | ${winner} | ${(winRate * 100).toFixed(1)}% |`,
|
|
791
791
|
);
|
|
792
792
|
}
|
|
793
|
-
lines.push(
|
|
793
|
+
lines.push("");
|
|
794
794
|
|
|
795
795
|
// Insights
|
|
796
796
|
if (result.insights.length > 0) {
|
|
797
|
-
lines.push(
|
|
797
|
+
lines.push("### Key Insights\n");
|
|
798
798
|
for (const insight of result.insights) {
|
|
799
799
|
lines.push(`- ${insight}`);
|
|
800
800
|
}
|
|
801
|
-
lines.push(
|
|
801
|
+
lines.push("");
|
|
802
802
|
}
|
|
803
803
|
}
|
|
804
804
|
|
|
805
|
-
return lines.join(
|
|
805
|
+
return lines.join("\n");
|
|
806
806
|
}
|
|
807
807
|
}
|
|
808
808
|
|
|
@@ -813,11 +813,11 @@ export async function runQuickMatchupBenchmark(): Promise<
|
|
|
813
813
|
MatchupBenchmarkResult[]
|
|
814
814
|
> {
|
|
815
815
|
const benchmark = new ArchetypeMatchupBenchmark({
|
|
816
|
-
archetypes:
|
|
816
|
+
archetypes: "all",
|
|
817
817
|
agentsPerArchetype: 2,
|
|
818
818
|
rounds: 5,
|
|
819
819
|
ticksPerRound: 100,
|
|
820
|
-
marketConditions: [
|
|
820
|
+
marketConditions: ["bull", "bear", "volatile", "stable"],
|
|
821
821
|
availableVramGb: 16,
|
|
822
822
|
});
|
|
823
823
|
|