@elizaos/training 2.0.0-alpha.13 → 2.0.0-alpha.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773726941205.json +38 -0
- package/scripts/rank_trajectories.ts +0 -1
- package/scripts/run_task_benchmark.ts +4 -11
- package/src/adapter.ts +96 -49
- package/src/archetypes/ArchetypeConfigService.ts +188 -185
- package/src/archetypes/derive-archetype.ts +47 -47
- package/src/archetypes/index.ts +2 -2
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +70 -70
- package/src/benchmark/BenchmarkChartGenerator.ts +70 -69
- package/src/benchmark/BenchmarkDataGenerator.ts +136 -136
- package/src/benchmark/BenchmarkDataViewer.ts +32 -30
- package/src/benchmark/BenchmarkHistoryService.ts +13 -12
- package/src/benchmark/BenchmarkRunner.ts +87 -83
- package/src/benchmark/BenchmarkValidator.ts +48 -46
- package/src/benchmark/FastEvalRunner.ts +17 -16
- package/src/benchmark/MetricsValidator.ts +20 -21
- package/src/benchmark/MetricsVisualizer.ts +92 -85
- package/src/benchmark/ModelBenchmarkService.ts +90 -82
- package/src/benchmark/ModelRegistry.ts +44 -44
- package/src/benchmark/RulerBenchmarkIntegration.ts +24 -24
- package/src/benchmark/SimulationA2AInterface.ts +118 -118
- package/src/benchmark/SimulationEngine.ts +51 -51
- package/src/benchmark/TaskRunner.ts +87 -79
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +80 -80
- package/src/benchmark/__tests__/HeadToHead.test.ts +26 -26
- package/src/benchmark/index.ts +27 -27
- package/src/benchmark/parseSimulationMetrics.ts +32 -32
- package/src/benchmark/simulation-types.ts +10 -10
- package/src/dependencies.ts +34 -34
- package/src/generation/TrajectoryGenerator.ts +39 -37
- package/src/generation/index.ts +1 -1
- package/src/huggingface/HuggingFaceDatasetUploader.ts +72 -72
- package/src/huggingface/HuggingFaceIntegrationService.ts +59 -53
- package/src/huggingface/HuggingFaceModelUploader.ts +60 -59
- package/src/huggingface/index.ts +6 -6
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +32 -32
- package/src/index.ts +27 -27
- package/src/init-training.ts +6 -6
- package/src/metrics/TrajectoryMetricsExtractor.ts +70 -71
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +182 -182
- package/src/metrics/index.ts +2 -2
- package/src/rubrics/__tests__/index.test.ts +73 -73
- package/src/rubrics/ass-kisser.ts +6 -6
- package/src/rubrics/degen.ts +6 -6
- package/src/rubrics/goody-twoshoes.ts +6 -6
- package/src/rubrics/index.ts +50 -50
- package/src/rubrics/information-trader.ts +6 -6
- package/src/rubrics/infosec.ts +6 -6
- package/src/rubrics/liar.ts +6 -6
- package/src/rubrics/perps-trader.ts +6 -6
- package/src/rubrics/researcher.ts +6 -6
- package/src/rubrics/scammer.ts +6 -6
- package/src/rubrics/social-butterfly.ts +7 -7
- package/src/rubrics/super-predictor.ts +6 -6
- package/src/rubrics/trader.ts +5 -5
- package/src/scoring/ArchetypeScoringService.ts +56 -54
- package/src/scoring/JudgePromptBuilder.ts +96 -96
- package/src/scoring/LLMJudgeCache.ts +26 -23
- package/src/scoring/index.ts +3 -3
- package/src/training/AutomationPipeline.ts +149 -140
- package/src/training/BenchmarkService.ts +49 -45
- package/src/training/ConfigValidator.ts +38 -32
- package/src/training/MarketOutcomesTracker.ts +22 -12
- package/src/training/ModelDeployer.ts +15 -15
- package/src/training/ModelFetcher.ts +7 -7
- package/src/training/ModelSelectionService.ts +32 -32
- package/src/training/ModelUsageVerifier.ts +31 -24
- package/src/training/MultiModelOrchestrator.ts +44 -44
- package/src/training/RLModelConfig.ts +57 -57
- package/src/training/RewardBackpropagationService.ts +18 -17
- package/src/training/RulerScoringService.ts +73 -72
- package/src/training/TrainingMonitor.ts +29 -29
- package/src/training/TrajectoryRecorder.ts +25 -27
- package/src/training/__tests__/TrajectoryRecorder.test.ts +105 -105
- package/src/training/index.ts +36 -36
- package/src/training/logRLConfig.ts +7 -7
- package/src/training/pipeline.ts +13 -16
- package/src/training/storage/ModelStorageService.ts +32 -32
- package/src/training/storage/TrainingDataArchiver.ts +21 -21
- package/src/training/storage/index.ts +2 -2
- package/src/training/types.ts +6 -6
- package/src/training/window-utils.ts +14 -14
- package/src/utils/index.ts +7 -7
- package/src/utils/logger.ts +5 -5
- package/src/utils/snowflake.ts +1 -1
- package/src/utils/synthetic-detector.ts +7 -7
|
@@ -13,10 +13,10 @@ export function getCurrentWindowId(): string {
|
|
|
13
13
|
const now = new Date();
|
|
14
14
|
// Round down to the start of the current hour
|
|
15
15
|
const windowStart = new Date(
|
|
16
|
-
Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000)
|
|
16
|
+
Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000),
|
|
17
17
|
);
|
|
18
18
|
// Format as ISO string, take first 13 chars + :00
|
|
19
|
-
return windowStart.toISOString().slice(0, 13)
|
|
19
|
+
return `${windowStart.toISOString().slice(0, 13)}:00`;
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
/**
|
|
@@ -27,11 +27,11 @@ export function getCurrentWindowId(): string {
|
|
|
27
27
|
export function getPreviousWindowId(offset: number = 1): string {
|
|
28
28
|
const now = new Date();
|
|
29
29
|
const windowStart = new Date(
|
|
30
|
-
Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000)
|
|
30
|
+
Math.floor(now.getTime() / (60 * 60 * 1000)) * (60 * 60 * 1000),
|
|
31
31
|
);
|
|
32
32
|
// Go back N hours
|
|
33
33
|
windowStart.setHours(windowStart.getHours() - offset);
|
|
34
|
-
return windowStart.toISOString().slice(0, 13)
|
|
34
|
+
return `${windowStart.toISOString().slice(0, 13)}:00`;
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
/**
|
|
@@ -51,11 +51,11 @@ export function parseWindowId(windowId: string): Date {
|
|
|
51
51
|
*/
|
|
52
52
|
export function isWindowComplete(
|
|
53
53
|
windowId: string,
|
|
54
|
-
windowDurationHours: number = 1
|
|
54
|
+
windowDurationHours: number = 1,
|
|
55
55
|
): boolean {
|
|
56
56
|
const windowStart = parseWindowId(windowId);
|
|
57
57
|
const windowEnd = new Date(
|
|
58
|
-
windowStart.getTime() + windowDurationHours * 60 * 60 * 1000
|
|
58
|
+
windowStart.getTime() + windowDurationHours * 60 * 60 * 1000,
|
|
59
59
|
);
|
|
60
60
|
return Date.now() > windowEnd.getTime();
|
|
61
61
|
}
|
|
@@ -68,7 +68,7 @@ export function isWindowComplete(
|
|
|
68
68
|
*/
|
|
69
69
|
export function getWindowRange(
|
|
70
70
|
windowId: string,
|
|
71
|
-
windowDurationHours: number = 1
|
|
71
|
+
windowDurationHours: number = 1,
|
|
72
72
|
) {
|
|
73
73
|
const start = parseWindowId(windowId);
|
|
74
74
|
const end = new Date(start.getTime() + windowDurationHours * 60 * 60 * 1000);
|
|
@@ -85,18 +85,18 @@ export function getWindowRange(
|
|
|
85
85
|
export function generateWindowIds(
|
|
86
86
|
startTime: Date,
|
|
87
87
|
endTime: Date,
|
|
88
|
-
windowDurationHours: number = 1
|
|
88
|
+
windowDurationHours: number = 1,
|
|
89
89
|
): string[] {
|
|
90
90
|
const windows: string[] = [];
|
|
91
91
|
const windowMs = windowDurationHours * 60 * 60 * 1000;
|
|
92
92
|
|
|
93
93
|
// Round start time down to window boundary
|
|
94
94
|
const currentWindowStart = new Date(
|
|
95
|
-
Math.floor(startTime.getTime() / windowMs) * windowMs
|
|
95
|
+
Math.floor(startTime.getTime() / windowMs) * windowMs,
|
|
96
96
|
);
|
|
97
97
|
|
|
98
98
|
while (currentWindowStart.getTime() <= endTime.getTime()) {
|
|
99
|
-
windows.push(currentWindowStart.toISOString().slice(0, 13)
|
|
99
|
+
windows.push(`${currentWindowStart.toISOString().slice(0, 13)}:00`);
|
|
100
100
|
currentWindowStart.setTime(currentWindowStart.getTime() + windowMs);
|
|
101
101
|
}
|
|
102
102
|
|
|
@@ -111,13 +111,13 @@ export function generateWindowIds(
|
|
|
111
111
|
*/
|
|
112
112
|
export function getWindowIdForTimestamp(
|
|
113
113
|
timestamp: Date,
|
|
114
|
-
windowDurationHours: number = 1
|
|
114
|
+
windowDurationHours: number = 1,
|
|
115
115
|
): string {
|
|
116
116
|
const windowMs = windowDurationHours * 60 * 60 * 1000;
|
|
117
117
|
const windowStart = new Date(
|
|
118
|
-
Math.floor(timestamp.getTime() / windowMs) * windowMs
|
|
118
|
+
Math.floor(timestamp.getTime() / windowMs) * windowMs,
|
|
119
119
|
);
|
|
120
|
-
return windowStart.toISOString().slice(0, 13)
|
|
120
|
+
return `${windowStart.toISOString().slice(0, 13)}:00`;
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
/**
|
|
@@ -130,7 +130,7 @@ export function getWindowIdForTimestamp(
|
|
|
130
130
|
export function isTimestampInWindow(
|
|
131
131
|
timestamp: Date,
|
|
132
132
|
windowId: string,
|
|
133
|
-
windowDurationHours: number = 1
|
|
133
|
+
windowDurationHours: number = 1,
|
|
134
134
|
): boolean {
|
|
135
135
|
const { start, end } = getWindowRange(windowId, windowDurationHours);
|
|
136
136
|
const time = timestamp.getTime();
|
package/src/utils/index.ts
CHANGED
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Training Package Utilities
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
export { logger } from
|
|
6
|
-
export { generateSnowflakeId } from
|
|
7
|
-
export { assertHasLLMCalls, validateLLMCalls } from
|
|
5
|
+
export { logger } from "./logger";
|
|
6
|
+
export { generateSnowflakeId } from "./snowflake";
|
|
7
|
+
export { assertHasLLMCalls, validateLLMCalls } from "./synthetic-detector";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* Split an array into batches of a specified size
|
|
@@ -77,9 +77,9 @@ export function formatPercent(value: number, decimals = 1): string {
|
|
|
77
77
|
export function formatCurrency(
|
|
78
78
|
value: number,
|
|
79
79
|
decimals = 2,
|
|
80
|
-
prefix =
|
|
80
|
+
prefix = "$",
|
|
81
81
|
): string {
|
|
82
|
-
const sign = value >= 0 ?
|
|
82
|
+
const sign = value >= 0 ? "" : "-";
|
|
83
83
|
return `${sign}${prefix}${Math.abs(value).toFixed(decimals)}`;
|
|
84
84
|
}
|
|
85
85
|
|
|
@@ -94,8 +94,8 @@ export function formatCurrency(
|
|
|
94
94
|
export function formatCurrencyWithSign(
|
|
95
95
|
value: number,
|
|
96
96
|
decimals = 2,
|
|
97
|
-
prefix =
|
|
97
|
+
prefix = "$",
|
|
98
98
|
): string {
|
|
99
|
-
const sign = value >= 0 ?
|
|
99
|
+
const sign = value >= 0 ? "+" : "-";
|
|
100
100
|
return `${sign}${prefix}${Math.abs(value).toFixed(decimals)}`;
|
|
101
101
|
}
|
package/src/utils/logger.ts
CHANGED
|
@@ -12,7 +12,7 @@ function formatData(data: LogData): string {
|
|
|
12
12
|
if (data instanceof Error) {
|
|
13
13
|
return data.message;
|
|
14
14
|
}
|
|
15
|
-
if (typeof data ===
|
|
15
|
+
if (typeof data === "object" && data !== null) {
|
|
16
16
|
return JSON.stringify(data, null, 2);
|
|
17
17
|
}
|
|
18
18
|
return String(data);
|
|
@@ -20,7 +20,7 @@ function formatData(data: LogData): string {
|
|
|
20
20
|
|
|
21
21
|
export const logger = {
|
|
22
22
|
info: (message: string, data?: LogData, context?: string) => {
|
|
23
|
-
const prefix = context ? `[${context}] ` :
|
|
23
|
+
const prefix = context ? `[${context}] ` : "";
|
|
24
24
|
if (data !== undefined) {
|
|
25
25
|
console.log(`${prefix}[INFO] ${message}`, formatData(data));
|
|
26
26
|
} else {
|
|
@@ -29,7 +29,7 @@ export const logger = {
|
|
|
29
29
|
},
|
|
30
30
|
|
|
31
31
|
error: (message: string, data?: LogData, context?: string) => {
|
|
32
|
-
const prefix = context ? `[${context}] ` :
|
|
32
|
+
const prefix = context ? `[${context}] ` : "";
|
|
33
33
|
if (data !== undefined) {
|
|
34
34
|
console.error(`${prefix}[ERROR] ${message}`, formatData(data));
|
|
35
35
|
} else {
|
|
@@ -38,7 +38,7 @@ export const logger = {
|
|
|
38
38
|
},
|
|
39
39
|
|
|
40
40
|
warn: (message: string, data?: LogData, context?: string) => {
|
|
41
|
-
const prefix = context ? `[${context}] ` :
|
|
41
|
+
const prefix = context ? `[${context}] ` : "";
|
|
42
42
|
if (data !== undefined) {
|
|
43
43
|
console.warn(`${prefix}[WARN] ${message}`, formatData(data));
|
|
44
44
|
} else {
|
|
@@ -48,7 +48,7 @@ export const logger = {
|
|
|
48
48
|
|
|
49
49
|
debug: (message: string, data?: LogData, context?: string) => {
|
|
50
50
|
if (process.env.DEBUG) {
|
|
51
|
-
const prefix = context ? `[${context}] ` :
|
|
51
|
+
const prefix = context ? `[${context}] ` : "";
|
|
52
52
|
if (data !== undefined) {
|
|
53
53
|
console.log(`${prefix}[DEBUG] ${message}`, formatData(data));
|
|
54
54
|
} else {
|
package/src/utils/snowflake.ts
CHANGED
|
@@ -13,5 +13,5 @@ export async function generateSnowflakeId(): Promise<string> {
|
|
|
13
13
|
if (counter > 999) counter = 0;
|
|
14
14
|
|
|
15
15
|
// Format: timestamp (13 digits) + counter (3 digits)
|
|
16
|
-
return `${timestamp}${currentCounter.toString().padStart(3,
|
|
16
|
+
return `${timestamp}${currentCounter.toString().padStart(3, "0")}`;
|
|
17
17
|
}
|
|
@@ -58,9 +58,9 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
|
|
|
58
58
|
totalLLMCalls++;
|
|
59
59
|
|
|
60
60
|
// Validate LLM call has actual content
|
|
61
|
-
const systemPrompt = call.systemPrompt ?? call.system_prompt ??
|
|
62
|
-
const userPrompt = call.userPrompt ?? call.user_prompt ??
|
|
63
|
-
const response = call.response ??
|
|
61
|
+
const systemPrompt = call.systemPrompt ?? call.system_prompt ?? "";
|
|
62
|
+
const userPrompt = call.userPrompt ?? call.user_prompt ?? "";
|
|
63
|
+
const response = call.response ?? "";
|
|
64
64
|
|
|
65
65
|
if (systemPrompt.length < 10) {
|
|
66
66
|
issues.push(`Step ${i}, call ${j}: Missing or empty system prompt`);
|
|
@@ -79,7 +79,7 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
|
|
|
79
79
|
// At least 3 steps should have LLM calls for valid training data
|
|
80
80
|
if (stepsWithLLM < 3) {
|
|
81
81
|
issues.push(
|
|
82
|
-
`Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)
|
|
82
|
+
`Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)`,
|
|
83
83
|
);
|
|
84
84
|
}
|
|
85
85
|
|
|
@@ -98,14 +98,14 @@ export function validateLLMCalls(steps: TrajectoryStep[]): {
|
|
|
98
98
|
*/
|
|
99
99
|
export function assertHasLLMCalls(
|
|
100
100
|
steps: TrajectoryStep[],
|
|
101
|
-
trajectoryId: string
|
|
101
|
+
trajectoryId: string,
|
|
102
102
|
): void {
|
|
103
103
|
const validation = validateLLMCalls(steps);
|
|
104
104
|
|
|
105
105
|
if (!validation.valid) {
|
|
106
106
|
throw new Error(
|
|
107
|
-
`Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join(
|
|
108
|
-
|
|
107
|
+
`Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join("; ")}. ` +
|
|
108
|
+
"Training data must contain real LLM calls.",
|
|
109
109
|
);
|
|
110
110
|
}
|
|
111
111
|
}
|