@elizaos/training 2.0.0-alpha.21 → 2.0.0-alpha.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-lint.log +2 -0
- package/.turbo/turbo-typecheck.log +1 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/adapter.js +59 -0
- package/dist/archetypes/ArchetypeConfigService.js +510 -0
- package/dist/archetypes/derive-archetype.js +196 -0
- package/dist/archetypes/index.js +7 -0
- package/dist/benchmark/ArchetypeMatchupBenchmark.js +547 -0
- package/dist/benchmark/BenchmarkChartGenerator.js +632 -0
- package/dist/benchmark/BenchmarkDataGenerator.js +825 -0
- package/dist/benchmark/BenchmarkDataViewer.js +197 -0
- package/dist/benchmark/BenchmarkHistoryService.js +135 -0
- package/dist/benchmark/BenchmarkRunner.js +483 -0
- package/dist/benchmark/BenchmarkValidator.js +158 -0
- package/dist/benchmark/FastEvalRunner.js +133 -0
- package/dist/benchmark/MetricsValidator.js +104 -0
- package/dist/benchmark/MetricsVisualizer.js +775 -0
- package/dist/benchmark/ModelBenchmarkService.js +433 -0
- package/dist/benchmark/ModelRegistry.js +122 -0
- package/dist/benchmark/RulerBenchmarkIntegration.js +168 -0
- package/dist/benchmark/SimulationA2AInterface.js +683 -0
- package/dist/benchmark/SimulationEngine.js +522 -0
- package/dist/benchmark/TaskRunner.js +60 -0
- package/dist/benchmark/__tests__/BenchmarkRunner.test.js +409 -0
- package/dist/benchmark/__tests__/HeadToHead.test.js +105 -0
- package/dist/benchmark/index.js +23 -0
- package/dist/benchmark/parseSimulationMetrics.js +86 -0
- package/dist/benchmark/simulation-types.js +1 -0
- package/dist/dependencies.js +197 -0
- package/dist/generation/TrajectoryGenerator.js +244 -0
- package/dist/generation/index.js +6 -0
- package/dist/huggingface/HuggingFaceDatasetUploader.js +463 -0
- package/dist/huggingface/HuggingFaceIntegrationService.js +272 -0
- package/dist/huggingface/HuggingFaceModelUploader.js +385 -0
- package/dist/huggingface/index.js +9 -0
- package/dist/huggingface/shared/HuggingFaceUploadUtil.js +144 -0
- package/dist/index.js +41 -0
- package/dist/init-training.js +43 -0
- package/dist/metrics/TrajectoryMetricsExtractor.js +523 -0
- package/dist/metrics/__tests__/TrajectoryMetricsExtractor.test.js +628 -0
- package/dist/metrics/index.js +7 -0
- package/dist/metrics/types.js +21 -0
- package/dist/rubrics/__tests__/index.test.js +150 -0
- package/dist/rubrics/ass-kisser.js +83 -0
- package/dist/rubrics/degen.js +78 -0
- package/dist/rubrics/goody-twoshoes.js +82 -0
- package/dist/rubrics/index.js +184 -0
- package/dist/rubrics/information-trader.js +82 -0
- package/dist/rubrics/infosec.js +99 -0
- package/dist/rubrics/liar.js +102 -0
- package/dist/rubrics/perps-trader.js +85 -0
- package/dist/rubrics/researcher.js +79 -0
- package/dist/rubrics/scammer.js +80 -0
- package/dist/rubrics/social-butterfly.js +71 -0
- package/dist/rubrics/super-predictor.js +95 -0
- package/dist/rubrics/trader.js +65 -0
- package/dist/scoring/ArchetypeScoringService.js +301 -0
- package/dist/scoring/JudgePromptBuilder.js +401 -0
- package/dist/scoring/LLMJudgeCache.js +263 -0
- package/dist/scoring/index.js +8 -0
- package/dist/training/AutomationPipeline.js +714 -0
- package/dist/training/BenchmarkService.js +370 -0
- package/dist/training/ConfigValidator.js +153 -0
- package/dist/training/MarketOutcomesTracker.js +142 -0
- package/dist/training/ModelDeployer.js +128 -0
- package/dist/training/ModelFetcher.js +48 -0
- package/dist/training/ModelSelectionService.js +248 -0
- package/dist/training/ModelUsageVerifier.js +106 -0
- package/dist/training/MultiModelOrchestrator.js +349 -0
- package/dist/training/RLModelConfig.js +295 -0
- package/dist/training/RewardBackpropagationService.js +117 -0
- package/dist/training/RulerScoringService.js +450 -0
- package/dist/training/TrainingMonitor.js +108 -0
- package/dist/training/TrajectoryRecorder.js +281 -0
- package/dist/training/__tests__/TrajectoryRecorder.test.js +363 -0
- package/dist/training/index.js +30 -0
- package/dist/training/logRLConfig.js +29 -0
- package/dist/training/pipeline.js +80 -0
- package/dist/training/storage/ModelStorageService.js +190 -0
- package/dist/training/storage/TrainingDataArchiver.js +136 -0
- package/dist/training/storage/index.js +7 -0
- package/dist/training/types.js +6 -0
- package/dist/training/window-utils.js +100 -0
- package/dist/utils/index.js +73 -0
- package/dist/utils/logger.js +55 -0
- package/dist/utils/snowflake.js +15 -0
- package/dist/utils/synthetic-detector.js +67 -0
- package/package.json +2 -2
- package/research-output/training-runs/training-run-1773742857616.json +38 -0
- package/research-output/training-runs/training-run-1773742946977.json +38 -0
- package/research-output/training-runs/training-run-1773743278891.json +38 -0
- package/research-output/training-runs/training-run-1773743409754.json +38 -0
- package/research-output/training-runs/training-run-1773743651086.json +38 -0
- package/research-output/training-runs/training-run-1773743782883.json +38 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Snowflake ID Generator
|
|
3
|
+
*
|
|
4
|
+
* Generates unique IDs for training package entities.
|
|
5
|
+
* Uses a simple timestamp-based approach.
|
|
6
|
+
*/
|
|
7
|
+
let counter = 0;
|
|
8
|
+
export async function generateSnowflakeId() {
|
|
9
|
+
const timestamp = Date.now();
|
|
10
|
+
const currentCounter = counter++;
|
|
11
|
+
if (counter > 999)
|
|
12
|
+
counter = 0;
|
|
13
|
+
// Format: timestamp (13 digits) + counter (3 digits)
|
|
14
|
+
return `${timestamp}${currentCounter.toString().padStart(3, "0")}`;
|
|
15
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Training Data Validator
|
|
3
|
+
*
|
|
4
|
+
* Validates that training data contains real LLM calls.
|
|
5
|
+
* No synthetic pattern detection needed - we simply don't generate synthetic data.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Validate that trajectory steps contain real LLM calls.
|
|
9
|
+
*
|
|
10
|
+
* Training data MUST have actual LLM calls with real prompts and responses.
|
|
11
|
+
*
|
|
12
|
+
* @returns Object with validation result and details
|
|
13
|
+
*/
|
|
14
|
+
export function validateLLMCalls(steps) {
|
|
15
|
+
const issues = [];
|
|
16
|
+
let stepsWithLLM = 0;
|
|
17
|
+
let totalLLMCalls = 0;
|
|
18
|
+
for (let i = 0; i < steps.length; i++) {
|
|
19
|
+
const step = steps[i];
|
|
20
|
+
const llmCalls = step?.llmCalls ?? step?.llm_calls ?? [];
|
|
21
|
+
if (llmCalls.length === 0) {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
stepsWithLLM++;
|
|
25
|
+
for (let j = 0; j < llmCalls.length; j++) {
|
|
26
|
+
const call = llmCalls[j];
|
|
27
|
+
if (!call)
|
|
28
|
+
continue;
|
|
29
|
+
totalLLMCalls++;
|
|
30
|
+
// Validate LLM call has actual content
|
|
31
|
+
const systemPrompt = call.systemPrompt ?? call.system_prompt ?? "";
|
|
32
|
+
const userPrompt = call.userPrompt ?? call.user_prompt ?? "";
|
|
33
|
+
const response = call.response ?? "";
|
|
34
|
+
if (systemPrompt.length < 10) {
|
|
35
|
+
issues.push(`Step ${i}, call ${j}: Missing or empty system prompt`);
|
|
36
|
+
}
|
|
37
|
+
if (userPrompt.length < 10) {
|
|
38
|
+
issues.push(`Step ${i}, call ${j}: Missing or empty user prompt`);
|
|
39
|
+
}
|
|
40
|
+
if (response.length < 5) {
|
|
41
|
+
issues.push(`Step ${i}, call ${j}: Missing or empty response`);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// At least 3 steps should have LLM calls for valid training data
|
|
46
|
+
if (stepsWithLLM < 3) {
|
|
47
|
+
issues.push(`Only ${stepsWithLLM}/${steps.length} steps have LLM calls (minimum: 3)`);
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
valid: issues.length === 0,
|
|
51
|
+
totalSteps: steps.length,
|
|
52
|
+
stepsWithLLM,
|
|
53
|
+
totalLLMCalls,
|
|
54
|
+
issues,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Assert that trajectory steps contain real LLM calls.
|
|
59
|
+
* Throws an error if validation fails.
|
|
60
|
+
*/
|
|
61
|
+
export function assertHasLLMCalls(steps, trajectoryId) {
|
|
62
|
+
const validation = validateLLMCalls(steps);
|
|
63
|
+
if (!validation.valid) {
|
|
64
|
+
throw new Error(`Trajectory ${trajectoryId} failed LLM validation: ${validation.issues.join("; ")}. ` +
|
|
65
|
+
"Training data must contain real LLM calls.");
|
|
66
|
+
}
|
|
67
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@elizaos/training",
|
|
3
|
-
"version": "2.0.0-alpha.
|
|
3
|
+
"version": "2.0.0-alpha.22",
|
|
4
4
|
"description": "ElizaOS RL training pipeline with benchmarking and model publishing support",
|
|
5
5
|
"main": "./src/index.ts",
|
|
6
6
|
"types": "./src/index.ts",
|
|
@@ -53,5 +53,5 @@
|
|
|
53
53
|
"bun-types": "^1.3.2",
|
|
54
54
|
"typescript": "^5.9.3"
|
|
55
55
|
},
|
|
56
|
-
"gitHead": "
|
|
56
|
+
"gitHead": "56caa0e2d9f193f75091154d639df4a48065d80f"
|
|
57
57
|
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:20:57.616Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 132
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 1528
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 0
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 1660,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:22:26.977Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 55
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 1215
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 0
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 1270,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:27:58.891Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 315
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 3870
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 0
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 4185,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:30:09.754Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 145
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 2265
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 1
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 2412,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:34:11.086Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 138
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 1809
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 1
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 1949,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"timestamp": "2026-03-17T10:36:22.883Z",
|
|
3
|
+
"config": {
|
|
4
|
+
"skipTraining": true,
|
|
5
|
+
"skipBenchmark": true,
|
|
6
|
+
"ticks": 100,
|
|
7
|
+
"archetype": "trader",
|
|
8
|
+
"verbose": false
|
|
9
|
+
},
|
|
10
|
+
"results": [
|
|
11
|
+
{
|
|
12
|
+
"name": "Check Prerequisites",
|
|
13
|
+
"success": true,
|
|
14
|
+
"message": "Prerequisites satisfied (Ollama: yes)",
|
|
15
|
+
"details": {
|
|
16
|
+
"python": true,
|
|
17
|
+
"trainingDir": true,
|
|
18
|
+
"mlx": false,
|
|
19
|
+
"ollama": true
|
|
20
|
+
},
|
|
21
|
+
"duration": 40
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Install Dependencies",
|
|
25
|
+
"success": true,
|
|
26
|
+
"message": "Python dependencies installed",
|
|
27
|
+
"duration": 1138
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Train Model",
|
|
31
|
+
"success": true,
|
|
32
|
+
"message": "Training skipped (--skip-training)",
|
|
33
|
+
"duration": 0
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"totalDuration": 1178,
|
|
37
|
+
"success": true
|
|
38
|
+
}
|