@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
package/src/metrics/index.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tests for rubric utilities
|
|
3
3
|
*/
|
|
4
|
-
import { describe, expect, it } from
|
|
4
|
+
import { describe, expect, it } from "bun:test";
|
|
5
5
|
import {
|
|
6
6
|
DEFAULT_RUBRIC,
|
|
7
7
|
getAllRubricsHash,
|
|
@@ -12,55 +12,55 @@ import {
|
|
|
12
12
|
hasCustomRubric,
|
|
13
13
|
normalizeArchetype,
|
|
14
14
|
RUBRICS_VERSION,
|
|
15
|
-
} from
|
|
15
|
+
} from "../index";
|
|
16
16
|
|
|
17
|
-
describe(
|
|
18
|
-
it(
|
|
19
|
-
expect(normalizeArchetype(
|
|
20
|
-
expect(normalizeArchetype(
|
|
21
|
-
expect(normalizeArchetype(
|
|
17
|
+
describe("normalizeArchetype", () => {
|
|
18
|
+
it("should convert to lowercase", () => {
|
|
19
|
+
expect(normalizeArchetype("DEGEN")).toBe("degen");
|
|
20
|
+
expect(normalizeArchetype("Trader")).toBe("trader");
|
|
21
|
+
expect(normalizeArchetype("SOCIAL-BUTTERFLY")).toBe("social-butterfly");
|
|
22
22
|
});
|
|
23
23
|
|
|
24
|
-
it(
|
|
25
|
-
expect(normalizeArchetype(
|
|
26
|
-
expect(normalizeArchetype(
|
|
27
|
-
expect(normalizeArchetype(
|
|
24
|
+
it("should replace underscores with hyphens", () => {
|
|
25
|
+
expect(normalizeArchetype("social_butterfly")).toBe("social-butterfly");
|
|
26
|
+
expect(normalizeArchetype("goody_twoshoes")).toBe("goody-twoshoes");
|
|
27
|
+
expect(normalizeArchetype("perps_trader")).toBe("perps-trader");
|
|
28
28
|
});
|
|
29
29
|
|
|
30
|
-
it(
|
|
31
|
-
expect(normalizeArchetype(
|
|
32
|
-
expect(normalizeArchetype(
|
|
30
|
+
it("should trim whitespace", () => {
|
|
31
|
+
expect(normalizeArchetype(" degen ")).toBe("degen");
|
|
32
|
+
expect(normalizeArchetype("\ttrader\n")).toBe("trader");
|
|
33
33
|
});
|
|
34
34
|
|
|
35
|
-
it(
|
|
36
|
-
expect(normalizeArchetype(
|
|
37
|
-
expect(normalizeArchetype(
|
|
35
|
+
it("should handle mixed case with underscores", () => {
|
|
36
|
+
expect(normalizeArchetype("Social_Butterfly")).toBe("social-butterfly");
|
|
37
|
+
expect(normalizeArchetype("PERPS_TRADER")).toBe("perps-trader");
|
|
38
38
|
});
|
|
39
39
|
|
|
40
40
|
it('should return "default" for empty/null/undefined', () => {
|
|
41
|
-
expect(normalizeArchetype(
|
|
42
|
-
expect(normalizeArchetype(
|
|
43
|
-
expect(normalizeArchetype(null)).toBe(
|
|
44
|
-
expect(normalizeArchetype(undefined)).toBe(
|
|
41
|
+
expect(normalizeArchetype("")).toBe("default");
|
|
42
|
+
expect(normalizeArchetype(" ")).toBe("default");
|
|
43
|
+
expect(normalizeArchetype(null)).toBe("default");
|
|
44
|
+
expect(normalizeArchetype(undefined)).toBe("default");
|
|
45
45
|
});
|
|
46
46
|
|
|
47
|
-
it(
|
|
48
|
-
expect(normalizeArchetype(
|
|
49
|
-
expect(normalizeArchetype(
|
|
47
|
+
it("should handle already normalized archetypes", () => {
|
|
48
|
+
expect(normalizeArchetype("degen")).toBe("degen");
|
|
49
|
+
expect(normalizeArchetype("social-butterfly")).toBe("social-butterfly");
|
|
50
50
|
});
|
|
51
51
|
});
|
|
52
52
|
|
|
53
|
-
describe(
|
|
54
|
-
it(
|
|
53
|
+
describe("getRubric", () => {
|
|
54
|
+
it("should return rubric for known archetypes", () => {
|
|
55
55
|
const archetypes = getAvailableArchetypes();
|
|
56
56
|
for (const archetype of archetypes) {
|
|
57
57
|
const rubric = getRubric(archetype);
|
|
58
|
-
expect(typeof rubric).toBe(
|
|
58
|
+
expect(typeof rubric).toBe("string");
|
|
59
59
|
expect(rubric.length).toBeGreaterThan(0);
|
|
60
60
|
}
|
|
61
61
|
});
|
|
62
62
|
|
|
63
|
-
it(
|
|
63
|
+
it("should return custom rubrics (not default) for all available archetypes", () => {
|
|
64
64
|
const archetypes = getAvailableArchetypes();
|
|
65
65
|
for (const archetype of archetypes) {
|
|
66
66
|
expect(hasCustomRubric(archetype)).toBe(true);
|
|
@@ -70,28 +70,28 @@ describe('getRubric', () => {
|
|
|
70
70
|
}
|
|
71
71
|
});
|
|
72
72
|
|
|
73
|
-
it(
|
|
74
|
-
const rubric = getRubric(
|
|
73
|
+
it("should return default rubric for unknown archetypes", () => {
|
|
74
|
+
const rubric = getRubric("unknown-archetype-xyz");
|
|
75
75
|
expect(rubric).toBe(DEFAULT_RUBRIC);
|
|
76
76
|
});
|
|
77
77
|
|
|
78
|
-
it(
|
|
79
|
-
const lower = getRubric(
|
|
80
|
-
const upper = getRubric(
|
|
81
|
-
const mixed = getRubric(
|
|
78
|
+
it("should handle case normalization", () => {
|
|
79
|
+
const lower = getRubric("degen");
|
|
80
|
+
const upper = getRubric("DEGEN");
|
|
81
|
+
const mixed = getRubric("Degen");
|
|
82
82
|
expect(lower).toBe(upper);
|
|
83
83
|
expect(lower).toBe(mixed);
|
|
84
84
|
});
|
|
85
85
|
|
|
86
|
-
it(
|
|
87
|
-
const hyphen = getRubric(
|
|
88
|
-
const underscore = getRubric(
|
|
86
|
+
it("should handle underscore/hyphen normalization", () => {
|
|
87
|
+
const hyphen = getRubric("social-butterfly");
|
|
88
|
+
const underscore = getRubric("social_butterfly");
|
|
89
89
|
expect(hyphen).toBe(underscore);
|
|
90
90
|
});
|
|
91
91
|
});
|
|
92
92
|
|
|
93
|
-
describe(
|
|
94
|
-
it(
|
|
93
|
+
describe("getPriorityMetrics", () => {
|
|
94
|
+
it("should return array of metrics for known archetypes", () => {
|
|
95
95
|
const archetypes = getAvailableArchetypes();
|
|
96
96
|
for (const archetype of archetypes) {
|
|
97
97
|
const metrics = getPriorityMetrics(archetype);
|
|
@@ -100,85 +100,85 @@ describe('getPriorityMetrics', () => {
|
|
|
100
100
|
}
|
|
101
101
|
});
|
|
102
102
|
|
|
103
|
-
it(
|
|
104
|
-
const metrics = getPriorityMetrics(
|
|
103
|
+
it("should return default metrics for unknown archetypes", () => {
|
|
104
|
+
const metrics = getPriorityMetrics("unknown-archetype");
|
|
105
105
|
expect(Array.isArray(metrics)).toBe(true);
|
|
106
106
|
expect(metrics.length).toBeGreaterThan(0);
|
|
107
107
|
});
|
|
108
108
|
});
|
|
109
109
|
|
|
110
|
-
describe(
|
|
111
|
-
it(
|
|
112
|
-
expect(hasCustomRubric(
|
|
113
|
-
expect(hasCustomRubric(
|
|
114
|
-
expect(hasCustomRubric(
|
|
110
|
+
describe("hasCustomRubric", () => {
|
|
111
|
+
it("should return true for known archetypes", () => {
|
|
112
|
+
expect(hasCustomRubric("degen")).toBe(true);
|
|
113
|
+
expect(hasCustomRubric("trader")).toBe(true);
|
|
114
|
+
expect(hasCustomRubric("social-butterfly")).toBe(true);
|
|
115
115
|
});
|
|
116
116
|
|
|
117
|
-
it(
|
|
118
|
-
expect(hasCustomRubric(
|
|
119
|
-
expect(hasCustomRubric(
|
|
117
|
+
it("should return false for unknown archetypes", () => {
|
|
118
|
+
expect(hasCustomRubric("unknown")).toBe(false);
|
|
119
|
+
expect(hasCustomRubric("random-name")).toBe(false);
|
|
120
120
|
});
|
|
121
121
|
|
|
122
|
-
it(
|
|
123
|
-
expect(hasCustomRubric(
|
|
124
|
-
expect(hasCustomRubric(
|
|
122
|
+
it("should handle case normalization", () => {
|
|
123
|
+
expect(hasCustomRubric("DEGEN")).toBe(true);
|
|
124
|
+
expect(hasCustomRubric("Trader")).toBe(true);
|
|
125
125
|
});
|
|
126
126
|
});
|
|
127
127
|
|
|
128
|
-
describe(
|
|
129
|
-
it(
|
|
128
|
+
describe("getAvailableArchetypes", () => {
|
|
129
|
+
it("should return array of canonical archetype names", () => {
|
|
130
130
|
const archetypes = getAvailableArchetypes();
|
|
131
131
|
expect(Array.isArray(archetypes)).toBe(true);
|
|
132
132
|
expect(archetypes.length).toBeGreaterThanOrEqual(12);
|
|
133
133
|
});
|
|
134
134
|
|
|
135
|
-
it(
|
|
135
|
+
it("should only contain hyphenated names (not aliases)", () => {
|
|
136
136
|
const archetypes = getAvailableArchetypes();
|
|
137
137
|
// Should not contain aliases like 'socialbutterfly'
|
|
138
|
-
expect(archetypes).not.toContain(
|
|
139
|
-
expect(archetypes).not.toContain(
|
|
138
|
+
expect(archetypes).not.toContain("socialbutterfly");
|
|
139
|
+
expect(archetypes).not.toContain("goodytwoshoes");
|
|
140
140
|
// Should contain canonical names
|
|
141
|
-
expect(archetypes).toContain(
|
|
142
|
-
expect(archetypes).toContain(
|
|
141
|
+
expect(archetypes).toContain("social-butterfly");
|
|
142
|
+
expect(archetypes).toContain("goody-twoshoes");
|
|
143
143
|
});
|
|
144
144
|
});
|
|
145
145
|
|
|
146
|
-
describe(
|
|
147
|
-
it(
|
|
148
|
-
const hash1 = getRubricHash(
|
|
149
|
-
const hash2 = getRubricHash(
|
|
146
|
+
describe("getRubricHash", () => {
|
|
147
|
+
it("should return consistent hash for same archetype", () => {
|
|
148
|
+
const hash1 = getRubricHash("degen");
|
|
149
|
+
const hash2 = getRubricHash("degen");
|
|
150
150
|
expect(hash1).toBe(hash2);
|
|
151
151
|
});
|
|
152
152
|
|
|
153
|
-
it(
|
|
154
|
-
const degenHash = getRubricHash(
|
|
155
|
-
const traderHash = getRubricHash(
|
|
153
|
+
it("should return different hashes for different archetypes", () => {
|
|
154
|
+
const degenHash = getRubricHash("degen");
|
|
155
|
+
const traderHash = getRubricHash("trader");
|
|
156
156
|
expect(degenHash).not.toBe(traderHash);
|
|
157
157
|
});
|
|
158
158
|
|
|
159
|
-
it(
|
|
160
|
-
const hash = getRubricHash(
|
|
159
|
+
it("should return 16-character hex string", () => {
|
|
160
|
+
const hash = getRubricHash("degen");
|
|
161
161
|
expect(hash.length).toBe(16);
|
|
162
162
|
expect(/^[0-9a-f]+$/.test(hash)).toBe(true);
|
|
163
163
|
});
|
|
164
164
|
});
|
|
165
165
|
|
|
166
|
-
describe(
|
|
167
|
-
it(
|
|
166
|
+
describe("getAllRubricsHash", () => {
|
|
167
|
+
it("should return consistent hash", () => {
|
|
168
168
|
const hash1 = getAllRubricsHash();
|
|
169
169
|
const hash2 = getAllRubricsHash();
|
|
170
170
|
expect(hash1).toBe(hash2);
|
|
171
171
|
});
|
|
172
172
|
|
|
173
|
-
it(
|
|
173
|
+
it("should return 16-character hex string", () => {
|
|
174
174
|
const hash = getAllRubricsHash();
|
|
175
175
|
expect(hash.length).toBe(16);
|
|
176
176
|
expect(/^[0-9a-f]+$/.test(hash)).toBe(true);
|
|
177
177
|
});
|
|
178
178
|
});
|
|
179
179
|
|
|
180
|
-
describe(
|
|
181
|
-
it(
|
|
180
|
+
describe("RUBRICS_VERSION", () => {
|
|
181
|
+
it("should be a valid semver string", () => {
|
|
182
182
|
expect(RUBRICS_VERSION).toMatch(/^\d+\.\d+\.\d+$/);
|
|
183
183
|
});
|
|
184
184
|
});
|
|
@@ -76,10 +76,10 @@ Score higher for evidence of strategic, targeted flattery.
|
|
|
76
76
|
`;
|
|
77
77
|
|
|
78
78
|
export const ASS_KISSER_PRIORITY_METRICS = [
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
79
|
+
"influence.reputationDelta",
|
|
80
|
+
"influence.followersGained",
|
|
81
|
+
"influence.positiveReactions",
|
|
82
|
+
"social.dmsInitiated",
|
|
83
|
+
"social.commentsMade",
|
|
84
|
+
"social.mentionsGiven",
|
|
85
85
|
];
|
package/src/rubrics/degen.ts
CHANGED
|
@@ -71,10 +71,10 @@ The question isn't "did they make money?" but "did they TRADE like a degen?"
|
|
|
71
71
|
`;
|
|
72
72
|
|
|
73
73
|
export const DEGEN_PRIORITY_METRICS = [
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
"trading.tradesExecuted",
|
|
75
|
+
"trading.avgPositionSize",
|
|
76
|
+
"trading.largestWin",
|
|
77
|
+
"trading.largestLoss",
|
|
78
|
+
"trading.marketsTraded",
|
|
79
|
+
"behavior.actionsPerTick",
|
|
80
80
|
];
|
|
@@ -75,10 +75,10 @@ For this archetype, reputation delta is the most important metric:
|
|
|
75
75
|
`;
|
|
76
76
|
|
|
77
77
|
export const GOODY_TWOSHOES_PRIORITY_METRICS = [
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
"influence.reputationDelta",
|
|
79
|
+
"information.infoShared",
|
|
80
|
+
"influence.positiveReactions",
|
|
81
|
+
"influence.followersGained",
|
|
82
|
+
"social.uniqueUsersInteracted",
|
|
83
|
+
"social.commentsMade",
|
|
84
84
|
];
|
package/src/rubrics/index.ts
CHANGED
|
@@ -7,34 +7,34 @@
|
|
|
7
7
|
* @packageDocumentation
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
-
import { createHash } from
|
|
11
|
-
import { ASS_KISSER_PRIORITY_METRICS, ASS_KISSER_RUBRIC } from
|
|
12
|
-
import { DEGEN_PRIORITY_METRICS, DEGEN_RUBRIC } from
|
|
10
|
+
import { createHash } from "node:crypto";
|
|
11
|
+
import { ASS_KISSER_PRIORITY_METRICS, ASS_KISSER_RUBRIC } from "./ass-kisser";
|
|
12
|
+
import { DEGEN_PRIORITY_METRICS, DEGEN_RUBRIC } from "./degen";
|
|
13
13
|
import {
|
|
14
14
|
GOODY_TWOSHOES_PRIORITY_METRICS,
|
|
15
15
|
GOODY_TWOSHOES_RUBRIC,
|
|
16
|
-
} from
|
|
16
|
+
} from "./goody-twoshoes";
|
|
17
17
|
import {
|
|
18
18
|
INFORMATION_TRADER_PRIORITY_METRICS,
|
|
19
19
|
INFORMATION_TRADER_RUBRIC,
|
|
20
|
-
} from
|
|
21
|
-
import { INFOSEC_PRIORITY_METRICS, INFOSEC_RUBRIC } from
|
|
22
|
-
import { LIAR_PRIORITY_METRICS, LIAR_RUBRIC } from
|
|
20
|
+
} from "./information-trader";
|
|
21
|
+
import { INFOSEC_PRIORITY_METRICS, INFOSEC_RUBRIC } from "./infosec";
|
|
22
|
+
import { LIAR_PRIORITY_METRICS, LIAR_RUBRIC } from "./liar";
|
|
23
23
|
import {
|
|
24
24
|
PERPS_TRADER_PRIORITY_METRICS,
|
|
25
25
|
PERPS_TRADER_RUBRIC,
|
|
26
|
-
} from
|
|
27
|
-
import { RESEARCHER_PRIORITY_METRICS, RESEARCHER_RUBRIC } from
|
|
28
|
-
import { SCAMMER_PRIORITY_METRICS, SCAMMER_RUBRIC } from
|
|
26
|
+
} from "./perps-trader";
|
|
27
|
+
import { RESEARCHER_PRIORITY_METRICS, RESEARCHER_RUBRIC } from "./researcher";
|
|
28
|
+
import { SCAMMER_PRIORITY_METRICS, SCAMMER_RUBRIC } from "./scammer";
|
|
29
29
|
import {
|
|
30
30
|
SOCIAL_BUTTERFLY_PRIORITY_METRICS,
|
|
31
31
|
SOCIAL_BUTTERFLY_RUBRIC,
|
|
32
|
-
} from
|
|
32
|
+
} from "./social-butterfly";
|
|
33
33
|
import {
|
|
34
34
|
SUPER_PREDICTOR_PRIORITY_METRICS,
|
|
35
35
|
SUPER_PREDICTOR_RUBRIC,
|
|
36
|
-
} from
|
|
37
|
-
import { TRADER_PRIORITY_METRICS, TRADER_RUBRIC } from
|
|
36
|
+
} from "./super-predictor";
|
|
37
|
+
import { TRADER_PRIORITY_METRICS, TRADER_RUBRIC } from "./trader";
|
|
38
38
|
|
|
39
39
|
/**
|
|
40
40
|
* Default rubric for unknown archetypes
|
|
@@ -62,10 +62,10 @@ If one trajectory is significantly better, reflect that in score differences.
|
|
|
62
62
|
`;
|
|
63
63
|
|
|
64
64
|
export const DEFAULT_PRIORITY_METRICS = [
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
65
|
+
"trading.totalPnL",
|
|
66
|
+
"trading.winRate",
|
|
67
|
+
"behavior.actionSuccessRate",
|
|
68
|
+
"behavior.episodeLength",
|
|
69
69
|
];
|
|
70
70
|
|
|
71
71
|
/**
|
|
@@ -73,15 +73,15 @@ export const DEFAULT_PRIORITY_METRICS = [
|
|
|
73
73
|
*/
|
|
74
74
|
export const RUBRICS: Record<string, string> = {
|
|
75
75
|
trader: TRADER_RUBRIC,
|
|
76
|
-
|
|
76
|
+
"social-butterfly": SOCIAL_BUTTERFLY_RUBRIC,
|
|
77
77
|
scammer: SCAMMER_RUBRIC,
|
|
78
78
|
degen: DEGEN_RUBRIC,
|
|
79
79
|
researcher: RESEARCHER_RUBRIC,
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
80
|
+
"information-trader": INFORMATION_TRADER_RUBRIC,
|
|
81
|
+
"goody-twoshoes": GOODY_TWOSHOES_RUBRIC,
|
|
82
|
+
"ass-kisser": ASS_KISSER_RUBRIC,
|
|
83
|
+
"perps-trader": PERPS_TRADER_RUBRIC,
|
|
84
|
+
"super-predictor": SUPER_PREDICTOR_RUBRIC,
|
|
85
85
|
infosec: INFOSEC_RUBRIC,
|
|
86
86
|
liar: LIAR_RUBRIC,
|
|
87
87
|
// Aliases
|
|
@@ -98,15 +98,15 @@ export const RUBRICS: Record<string, string> = {
|
|
|
98
98
|
*/
|
|
99
99
|
export const PRIORITY_METRICS: Record<string, string[]> = {
|
|
100
100
|
trader: TRADER_PRIORITY_METRICS,
|
|
101
|
-
|
|
101
|
+
"social-butterfly": SOCIAL_BUTTERFLY_PRIORITY_METRICS,
|
|
102
102
|
scammer: SCAMMER_PRIORITY_METRICS,
|
|
103
103
|
degen: DEGEN_PRIORITY_METRICS,
|
|
104
104
|
researcher: RESEARCHER_PRIORITY_METRICS,
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
105
|
+
"information-trader": INFORMATION_TRADER_PRIORITY_METRICS,
|
|
106
|
+
"goody-twoshoes": GOODY_TWOSHOES_PRIORITY_METRICS,
|
|
107
|
+
"ass-kisser": ASS_KISSER_PRIORITY_METRICS,
|
|
108
|
+
"perps-trader": PERPS_TRADER_PRIORITY_METRICS,
|
|
109
|
+
"super-predictor": SUPER_PREDICTOR_PRIORITY_METRICS,
|
|
110
110
|
infosec: INFOSEC_PRIORITY_METRICS,
|
|
111
111
|
liar: LIAR_PRIORITY_METRICS,
|
|
112
112
|
};
|
|
@@ -123,12 +123,12 @@ export const VALID_ARCHETYPES = new Set(Object.keys(RUBRICS));
|
|
|
123
123
|
* Note: Does NOT validate against whitelist - use sanitizeArchetype() for that
|
|
124
124
|
*/
|
|
125
125
|
export function normalizeArchetype(
|
|
126
|
-
archetype: string | null | undefined
|
|
126
|
+
archetype: string | null | undefined,
|
|
127
127
|
): string {
|
|
128
|
-
if (!archetype || archetype.trim() ===
|
|
129
|
-
return
|
|
128
|
+
if (!archetype || archetype.trim() === "") {
|
|
129
|
+
return "default";
|
|
130
130
|
}
|
|
131
|
-
return archetype.toLowerCase().trim().replace(/_/g,
|
|
131
|
+
return archetype.toLowerCase().trim().replace(/_/g, "-");
|
|
132
132
|
}
|
|
133
133
|
|
|
134
134
|
/**
|
|
@@ -137,7 +137,7 @@ export function normalizeArchetype(
|
|
|
137
137
|
*/
|
|
138
138
|
export function isValidArchetype(archetype: string): boolean {
|
|
139
139
|
const normalized = normalizeArchetype(archetype);
|
|
140
|
-
return normalized ===
|
|
140
|
+
return normalized === "default" || VALID_ARCHETYPES.has(normalized);
|
|
141
141
|
}
|
|
142
142
|
|
|
143
143
|
/**
|
|
@@ -145,13 +145,13 @@ export function isValidArchetype(archetype: string): boolean {
|
|
|
145
145
|
* Returns normalized archetype if valid, 'default' otherwise
|
|
146
146
|
*/
|
|
147
147
|
export function sanitizeArchetype(
|
|
148
|
-
archetype: string | null | undefined
|
|
148
|
+
archetype: string | null | undefined,
|
|
149
149
|
): string {
|
|
150
150
|
const normalized = normalizeArchetype(archetype);
|
|
151
|
-
if (normalized ===
|
|
151
|
+
if (normalized === "default" || VALID_ARCHETYPES.has(normalized)) {
|
|
152
152
|
return normalized;
|
|
153
153
|
}
|
|
154
|
-
return
|
|
154
|
+
return "default";
|
|
155
155
|
}
|
|
156
156
|
|
|
157
157
|
/**
|
|
@@ -183,7 +183,7 @@ export function hasCustomRubric(archetype: string): boolean {
|
|
|
183
183
|
* Single source of truth - derived from PRIORITY_METRICS keys which only contains canonical names
|
|
184
184
|
*/
|
|
185
185
|
export const CANONICAL_ARCHETYPES = Object.keys(
|
|
186
|
-
PRIORITY_METRICS
|
|
186
|
+
PRIORITY_METRICS,
|
|
187
187
|
) as readonly string[];
|
|
188
188
|
|
|
189
189
|
/**
|
|
@@ -196,25 +196,25 @@ export function getAvailableArchetypes(): string[] {
|
|
|
196
196
|
|
|
197
197
|
// Re-export individual rubrics
|
|
198
198
|
export {
|
|
199
|
-
|
|
200
|
-
SOCIAL_BUTTERFLY_RUBRIC,
|
|
201
|
-
SCAMMER_RUBRIC,
|
|
199
|
+
ASS_KISSER_RUBRIC,
|
|
202
200
|
DEGEN_RUBRIC,
|
|
203
|
-
RESEARCHER_RUBRIC,
|
|
204
|
-
INFORMATION_TRADER_RUBRIC,
|
|
205
201
|
GOODY_TWOSHOES_RUBRIC,
|
|
206
|
-
|
|
207
|
-
PERPS_TRADER_RUBRIC,
|
|
208
|
-
SUPER_PREDICTOR_RUBRIC,
|
|
202
|
+
INFORMATION_TRADER_RUBRIC,
|
|
209
203
|
INFOSEC_RUBRIC,
|
|
210
204
|
LIAR_RUBRIC,
|
|
205
|
+
PERPS_TRADER_RUBRIC,
|
|
206
|
+
RESEARCHER_RUBRIC,
|
|
207
|
+
SCAMMER_RUBRIC,
|
|
208
|
+
SOCIAL_BUTTERFLY_RUBRIC,
|
|
209
|
+
SUPER_PREDICTOR_RUBRIC,
|
|
210
|
+
TRADER_RUBRIC,
|
|
211
211
|
};
|
|
212
212
|
|
|
213
213
|
/**
|
|
214
214
|
* Rubrics version - increment when rubrics change significantly
|
|
215
215
|
* Used for cache invalidation
|
|
216
216
|
*/
|
|
217
|
-
export const RUBRICS_VERSION =
|
|
217
|
+
export const RUBRICS_VERSION = "1.0.0";
|
|
218
218
|
|
|
219
219
|
/**
|
|
220
220
|
* Get a hash of the rubric for an archetype
|
|
@@ -222,7 +222,7 @@ export const RUBRICS_VERSION = '1.0.0';
|
|
|
222
222
|
*/
|
|
223
223
|
export function getRubricHash(archetype: string): string {
|
|
224
224
|
const rubric = getRubric(archetype);
|
|
225
|
-
return createHash(
|
|
225
|
+
return createHash("sha256").update(rubric).digest("hex").substring(0, 16);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
/**
|
|
@@ -231,6 +231,6 @@ export function getRubricHash(archetype: string): string {
|
|
|
231
231
|
* Note: Sorted to match Python implementation for cross-language consistency
|
|
232
232
|
*/
|
|
233
233
|
export function getAllRubricsHash(): string {
|
|
234
|
-
const allRubrics = Object.values(RUBRICS).sort().join(
|
|
235
|
-
return createHash(
|
|
234
|
+
const allRubrics = Object.values(RUBRICS).sort().join("::") + DEFAULT_RUBRIC;
|
|
235
|
+
return createHash("sha256").update(allRubrics).digest("hex").substring(0, 16);
|
|
236
236
|
}
|
|
@@ -75,10 +75,10 @@ The information trader must balance both sides:
|
|
|
75
75
|
`;
|
|
76
76
|
|
|
77
77
|
export const INFORMATION_TRADER_PRIORITY_METRICS = [
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
78
|
+
"trading.totalPnL",
|
|
79
|
+
"social.groupChatsJoined",
|
|
80
|
+
"social.dmsInitiated",
|
|
81
|
+
"behavior.socialToTradeRatio",
|
|
82
|
+
"information.infoRequestsSent",
|
|
83
|
+
"trading.winRate",
|
|
84
84
|
];
|
package/src/rubrics/infosec.ts
CHANGED
|
@@ -92,10 +92,10 @@ For infosec, steady beats spectacular:
|
|
|
92
92
|
`;
|
|
93
93
|
|
|
94
94
|
export const INFOSEC_PRIORITY_METRICS = [
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
95
|
+
"trading.maxDrawdown",
|
|
96
|
+
"trading.winRate",
|
|
97
|
+
"information.infoShared",
|
|
98
|
+
"social.dmResponseRate",
|
|
99
|
+
"behavior.consistencyScore",
|
|
100
|
+
"trading.totalPnL",
|
|
101
101
|
];
|
package/src/rubrics/liar.ts
CHANGED
|
@@ -95,10 +95,10 @@ If these dominate, score low regardless of other metrics.
|
|
|
95
95
|
`;
|
|
96
96
|
|
|
97
97
|
export const LIAR_PRIORITY_METRICS = [
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
98
|
+
"influence.informationSpread",
|
|
99
|
+
"social.uniqueUsersInteracted",
|
|
100
|
+
"influence.reputationDelta",
|
|
101
|
+
"social.postsCreated",
|
|
102
|
+
"social.groupMessagesSent",
|
|
103
|
+
"social.dmsInitiated",
|
|
104
104
|
];
|
|
@@ -78,10 +78,10 @@ Perps traders should be trading-focused:
|
|
|
78
78
|
`;
|
|
79
79
|
|
|
80
80
|
export const PERPS_TRADER_PRIORITY_METRICS = [
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
81
|
+
"trading.totalPnL",
|
|
82
|
+
"trading.maxDrawdown",
|
|
83
|
+
"trading.winRate",
|
|
84
|
+
"trading.sharpeRatio",
|
|
85
|
+
"trading.tradesExecuted",
|
|
86
|
+
"behavior.socialToTradeRatio",
|
|
87
87
|
];
|
|
@@ -72,10 +72,10 @@ If they only check prices without reading news or doing analysis, score lower.
|
|
|
72
72
|
`;
|
|
73
73
|
|
|
74
74
|
export const RESEARCHER_PRIORITY_METRICS = [
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
75
|
+
"information.researchActions",
|
|
76
|
+
"information.predictionAccuracy",
|
|
77
|
+
"information.marketDataQueries",
|
|
78
|
+
"information.newsConsumed",
|
|
79
|
+
"trading.winRate",
|
|
80
|
+
"trading.totalPnL",
|
|
81
81
|
];
|
package/src/rubrics/scammer.ts
CHANGED
|
@@ -73,10 +73,10 @@ If these behaviors dominate, score low regardless of P&L.
|
|
|
73
73
|
`;
|
|
74
74
|
|
|
75
75
|
export const SCAMMER_PRIORITY_METRICS = [
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
76
|
+
"trading.totalPnL",
|
|
77
|
+
"social.uniqueUsersInteracted",
|
|
78
|
+
"influence.reputationDelta",
|
|
79
|
+
"social.dmsInitiated",
|
|
80
|
+
"influence.informationSpread",
|
|
81
|
+
"social.groupMessagesSent",
|
|
82
82
|
];
|