@elizaos/training 2.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +75 -0
- package/LICENSE +21 -0
- package/Makefile +374 -0
- package/README.md +346 -0
- package/config/rubrics.json +137 -0
- package/docker-compose.test.yml +57 -0
- package/package.json +57 -0
- package/python/config/babylon_atropos.yaml +90 -0
- package/python/config/profiles/12gb.json +11 -0
- package/python/config/profiles/16gb.json +10 -0
- package/python/config/profiles/24gb.json +10 -0
- package/python/config/profiles/48gb.json +10 -0
- package/python/config/profiles/cpu.json +11 -0
- package/python/config/profiles/l40-2gpu-safe.json +20 -0
- package/python/config/profiles/l40-2gpu.json +22 -0
- package/python/config/profiles/l40-4gpu.json +21 -0
- package/python/config/profiles/l40.json +17 -0
- package/python/config/tinker_training.yaml +143 -0
- package/python/curriculum_state.json +165 -0
- package/python/env.template +86 -0
- package/python/env.training.template +46 -0
- package/python/pyproject.toml +41 -0
- package/python/requirements-ci.txt +31 -0
- package/python/requirements.txt +87 -0
- package/python/scripts/__init__.py +4 -0
- package/python/scripts/benchmark_should_respond.py +190 -0
- package/python/scripts/debug_inference.py +62 -0
- package/python/scripts/import_json_trajectories.py +412 -0
- package/python/scripts/local-finetune/README.md +63 -0
- package/python/scripts/local-finetune/ingest_and_score.py +139 -0
- package/python/scripts/local-finetune/merge_model.py +32 -0
- package/python/scripts/local-finetune/test_adapter.py +91 -0
- package/python/scripts/local-finetune/train_from_csv.py +132 -0
- package/python/scripts/merge_trajectories.py +318 -0
- package/python/scripts/optimize_prompt_grpo.py +269 -0
- package/python/scripts/run_ab_test.py +143 -0
- package/python/scripts/run_full_pipeline.py +544 -0
- package/python/scripts/run_tinker_training.py +192 -0
- package/python/scripts/run_training.py +914 -0
- package/python/scripts/test_generation.py +29 -0
- package/python/scripts/test_judge.py +155 -0
- package/python/scripts/test_pipeline.py +356 -0
- package/python/scripts/test_trained_model.py +380 -0
- package/python/scripts/train_grpo.py +360 -0
- package/python/scripts/train_jsonl.py +223 -0
- package/python/scripts/train_local.py +528 -0
- package/python/setup.py +20 -0
- package/python/src/__init__.py +190 -0
- package/python/src/data_bridge/__init__.py +24 -0
- package/python/src/data_bridge/converter.py +435 -0
- package/python/src/data_bridge/reader.py +393 -0
- package/python/src/models.py +283 -0
- package/python/src/training/__init__.py +605 -0
- package/python/src/training/ab_testing.py +404 -0
- package/python/src/training/action_executor.py +621 -0
- package/python/src/training/archetype_trainer.py +347 -0
- package/python/src/training/atropos_trainer.py +980 -0
- package/python/src/training/babylon_env.py +1254 -0
- package/python/src/training/error_recovery.py +647 -0
- package/python/src/training/evaluation.py +856 -0
- package/python/src/training/fast_simulator.py +880 -0
- package/python/src/training/format_validator.py +584 -0
- package/python/src/training/hybrid_env.py +522 -0
- package/python/src/training/kl_controller.py +628 -0
- package/python/src/training/multi_prompt_dataset.py +883 -0
- package/python/src/training/multi_turn.py +656 -0
- package/python/src/training/online_env.py +1084 -0
- package/python/src/training/quality_scorer.py +391 -0
- package/python/src/training/quality_utils.py +633 -0
- package/python/src/training/rewards.py +1344 -0
- package/python/src/training/rlaif_env.py +17 -0
- package/python/src/training/rollout_generator.py +502 -0
- package/python/src/training/rubric_loader.py +198 -0
- package/python/src/training/scenario_pool.py +1072 -0
- package/python/src/training/schemas.py +481 -0
- package/python/src/training/service_manager.py +552 -0
- package/python/src/training/simulation_bridge.py +535 -0
- package/python/src/training/tick_reward_attribution.py +399 -0
- package/python/src/training/tinker_client.py +575 -0
- package/python/src/training/tinker_trainer.py +646 -0
- package/python/src/training/tokenization_utils.py +402 -0
- package/python/tests/e2e/__init__.py +13 -0
- package/python/tests/e2e/conftest.py +258 -0
- package/python/tests/e2e/test_full_pipeline.py +643 -0
- package/python/tests/e2e/test_online_training_e2e.py +365 -0
- package/python/tests/integration/__init__.py +12 -0
- package/python/tests/integration/conftest.py +383 -0
- package/python/tests/integration/test_db_integration.py +649 -0
- package/python/tests/integration/test_json_mode_integration.py +554 -0
- package/python/tests/test_action_executor.py +594 -0
- package/python/tests/test_archetype_scoring.py +1027 -0
- package/python/tests/test_atropos_integration.py +360 -0
- package/python/tests/test_evaluation.py +727 -0
- package/python/tests/test_format_validator.py +486 -0
- package/python/tests/test_kl_controller.py +432 -0
- package/python/tests/test_lr_scheduler.py +579 -0
- package/python/tests/test_multi_turn.py +590 -0
- package/python/tests/test_online_env.py +519 -0
- package/python/tests/test_quality_scorer.py +474 -0
- package/python/tests/test_scenario_pool.py +735 -0
- package/python/tests/test_service_manager.py +585 -0
- package/python/tests/test_simulation_rollout.py +581 -0
- package/python/tests/test_tokenization_utils.py +501 -0
- package/python/tests/test_training_orchestrator.py +497 -0
- package/python/tests/test_training_output_structure.py +661 -0
- package/research-output/training-runs/training-run-1770772042899.json +26 -0
- package/research-output/training-runs/training-run-1770930079670.json +32 -0
- package/research-output/training-runs/training-run-1770930143700.json +44 -0
- package/research-output/training-runs/training-run-1770930183638.json +38 -0
- package/research-output/training-runs/training-run-1770930442049.json +38 -0
- package/research-output/training-runs/training-run-1770930793243.json +38 -0
- package/research-output/training-runs/training-run-1771276293257.json +38 -0
- package/research-output/training-runs/training-run-1771276389280.json +38 -0
- package/research-output/training-runs/training-run-1771276502776.json +38 -0
- package/research-output/training-runs/training-run-1771277340748.json +38 -0
- package/research-output/training-runs/training-run-1773013658993.json +38 -0
- package/research-output/training-runs/training-run-1773013861014.json +38 -0
- package/research-output/training-runs/training-run-1773014215983.json +38 -0
- package/scripts/assess-training-data.ts +422 -0
- package/scripts/e2e-training-test.ts +550 -0
- package/scripts/export-rubrics.ts +64 -0
- package/scripts/generate-research-report.ts +1523 -0
- package/scripts/generate_dataset.sh +173 -0
- package/scripts/generate_should_respond.ts +267 -0
- package/scripts/generate_should_respond_dataset.ts +162 -0
- package/scripts/json-mode-benchmark.ts +399 -0
- package/scripts/rank_trajectories.ts +207 -0
- package/scripts/real-archetype-benchmark.ts +210 -0
- package/scripts/run-baseline-comparison.ts +116 -0
- package/scripts/run-full-pipeline.ts +272 -0
- package/scripts/run_rlaif_loop.ts +78 -0
- package/scripts/run_task_benchmark.ts +247 -0
- package/scripts/runpod_setup.sh +137 -0
- package/scripts/runpod_validate.sh +147 -0
- package/scripts/test-model-in-game.ts +955 -0
- package/scripts/test-scoring.ts +73 -0
- package/scripts/test-trained-model.ts +209 -0
- package/scripts/train-and-test.ts +824 -0
- package/scripts/verify-final.ts +118 -0
- package/src/adapter.ts +516 -0
- package/src/archetypes/ArchetypeConfigService.ts +626 -0
- package/src/archetypes/derive-archetype.ts +249 -0
- package/src/archetypes/index.ts +22 -0
- package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
- package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
- package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
- package/src/benchmark/BenchmarkDataViewer.ts +324 -0
- package/src/benchmark/BenchmarkHistoryService.ts +221 -0
- package/src/benchmark/BenchmarkRunner.ts +685 -0
- package/src/benchmark/BenchmarkValidator.ts +204 -0
- package/src/benchmark/FastEvalRunner.ts +225 -0
- package/src/benchmark/MetricsValidator.ts +165 -0
- package/src/benchmark/MetricsVisualizer.ts +909 -0
- package/src/benchmark/ModelBenchmarkService.ts +611 -0
- package/src/benchmark/ModelRegistry.ts +158 -0
- package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
- package/src/benchmark/SimulationA2AInterface.ts +1169 -0
- package/src/benchmark/SimulationEngine.ts +832 -0
- package/src/benchmark/TaskRunner.ts +94 -0
- package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
- package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
- package/src/benchmark/index.ts +91 -0
- package/src/benchmark/parseSimulationMetrics.ts +124 -0
- package/src/benchmark/simulation-types.ts +78 -0
- package/src/dependencies.ts +475 -0
- package/src/generation/TrajectoryGenerator.ts +387 -0
- package/src/generation/index.ts +12 -0
- package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
- package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
- package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
- package/src/huggingface/index.ts +27 -0
- package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
- package/src/index.ts +102 -0
- package/src/init-training.ts +53 -0
- package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
- package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
- package/src/metrics/index.ts +8 -0
- package/src/metrics/types.ts +200 -0
- package/src/rubrics/__tests__/index.test.ts +184 -0
- package/src/rubrics/ass-kisser.ts +85 -0
- package/src/rubrics/degen.ts +80 -0
- package/src/rubrics/goody-twoshoes.ts +84 -0
- package/src/rubrics/index.ts +236 -0
- package/src/rubrics/information-trader.ts +84 -0
- package/src/rubrics/infosec.ts +101 -0
- package/src/rubrics/liar.ts +104 -0
- package/src/rubrics/perps-trader.ts +87 -0
- package/src/rubrics/researcher.ts +81 -0
- package/src/rubrics/scammer.ts +82 -0
- package/src/rubrics/social-butterfly.ts +73 -0
- package/src/rubrics/super-predictor.ts +97 -0
- package/src/rubrics/trader.ts +67 -0
- package/src/scoring/ArchetypeScoringService.ts +486 -0
- package/src/scoring/JudgePromptBuilder.ts +556 -0
- package/src/scoring/LLMJudgeCache.ts +401 -0
- package/src/scoring/index.ts +9 -0
- package/src/training/AutomationPipeline.ts +916 -0
- package/src/training/BenchmarkService.ts +518 -0
- package/src/training/ConfigValidator.ts +220 -0
- package/src/training/MarketOutcomesTracker.ts +187 -0
- package/src/training/ModelDeployer.ts +186 -0
- package/src/training/ModelFetcher.ts +76 -0
- package/src/training/ModelSelectionService.ts +341 -0
- package/src/training/ModelUsageVerifier.ts +160 -0
- package/src/training/MultiModelOrchestrator.ts +580 -0
- package/src/training/RLModelConfig.ts +407 -0
- package/src/training/RewardBackpropagationService.ts +149 -0
- package/src/training/RulerScoringService.ts +666 -0
- package/src/training/TrainingMonitor.ts +166 -0
- package/src/training/TrajectoryRecorder.ts +399 -0
- package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
- package/src/training/index.ts +100 -0
- package/src/training/logRLConfig.ts +34 -0
- package/src/training/pipeline.ts +129 -0
- package/src/training/storage/ModelStorageService.ts +279 -0
- package/src/training/storage/TrainingDataArchiver.ts +197 -0
- package/src/training/storage/index.ts +17 -0
- package/src/training/types.ts +207 -0
- package/src/training/window-utils.ts +138 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +59 -0
- package/src/utils/snowflake.ts +17 -0
- package/src/utils/synthetic-detector.ts +111 -0
- package/tsconfig.json +20 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HuggingFace Integration Service
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the complete HuggingFace integration pipeline.
|
|
5
|
+
* Main entry point for all HuggingFace operations.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getTrainingDataAdapter } from '../adapter';
|
|
9
|
+
import { ModelBenchmarkService } from '../benchmark/ModelBenchmarkService';
|
|
10
|
+
import { getExportToHuggingFace } from '../dependencies';
|
|
11
|
+
import { logger } from '../utils';
|
|
12
|
+
import { HuggingFaceDatasetUploader } from './HuggingFaceDatasetUploader';
|
|
13
|
+
import { HuggingFaceModelUploader } from './HuggingFaceModelUploader';
|
|
14
|
+
import { getHuggingFaceToken } from './shared/HuggingFaceUploadUtil';
|
|
15
|
+
|
|
16
|
+
export interface WeeklyUploadResult {
|
|
17
|
+
success: boolean;
|
|
18
|
+
datasets: {
|
|
19
|
+
benchmarks: { success: boolean; url?: string; error?: string };
|
|
20
|
+
trajectories: { success: boolean; url?: string; error?: string };
|
|
21
|
+
};
|
|
22
|
+
models: {
|
|
23
|
+
processed: number;
|
|
24
|
+
benchmarked: number;
|
|
25
|
+
uploaded: number;
|
|
26
|
+
};
|
|
27
|
+
errors: string[];
|
|
28
|
+
duration: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface DatasetUploadOptions {
|
|
32
|
+
datasetName?: string;
|
|
33
|
+
trajectoryDatasetName?: string;
|
|
34
|
+
modelNamePrefix?: string;
|
|
35
|
+
modelDescriptionPrefix?: string;
|
|
36
|
+
dryRun?: boolean;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export class HuggingFaceIntegrationService {
|
|
40
|
+
private datasetUploader: HuggingFaceDatasetUploader;
|
|
41
|
+
private modelUploader: HuggingFaceModelUploader;
|
|
42
|
+
|
|
43
|
+
constructor() {
|
|
44
|
+
this.datasetUploader = new HuggingFaceDatasetUploader();
|
|
45
|
+
this.modelUploader = new HuggingFaceModelUploader();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Execute complete weekly upload pipeline
|
|
50
|
+
*/
|
|
51
|
+
async executeWeeklyUpload(
|
|
52
|
+
options: DatasetUploadOptions = {}
|
|
53
|
+
): Promise<WeeklyUploadResult> {
|
|
54
|
+
const startTime = Date.now();
|
|
55
|
+
logger.info(
|
|
56
|
+
'Starting weekly upload pipeline',
|
|
57
|
+
options,
|
|
58
|
+
'HuggingFaceIntegration'
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
const result: WeeklyUploadResult = {
|
|
62
|
+
success: false,
|
|
63
|
+
datasets: {
|
|
64
|
+
benchmarks: { success: false },
|
|
65
|
+
trajectories: { success: false },
|
|
66
|
+
},
|
|
67
|
+
models: {
|
|
68
|
+
processed: 0,
|
|
69
|
+
benchmarked: 0,
|
|
70
|
+
uploaded: 0,
|
|
71
|
+
},
|
|
72
|
+
errors: [],
|
|
73
|
+
duration: 0,
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
// Step 1: Upload benchmark dataset
|
|
78
|
+
if (!options.dryRun) {
|
|
79
|
+
logger.info(
|
|
80
|
+
'Step 1: Uploading benchmark dataset',
|
|
81
|
+
undefined,
|
|
82
|
+
'HuggingFaceIntegration'
|
|
83
|
+
);
|
|
84
|
+
const benchmarkResult = await this.datasetUploader.uploadDataset({
|
|
85
|
+
datasetName:
|
|
86
|
+
options.datasetName ||
|
|
87
|
+
process.env.HF_DATASET_NAME ||
|
|
88
|
+
'elizaos/agent-benchmarks',
|
|
89
|
+
description:
|
|
90
|
+
'Weekly benchmark results for autonomous ElizaOS agents',
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
result.datasets.benchmarks = {
|
|
94
|
+
success: benchmarkResult.success,
|
|
95
|
+
url: benchmarkResult.datasetUrl,
|
|
96
|
+
error: benchmarkResult.error,
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
if (!benchmarkResult.success) {
|
|
100
|
+
result.errors.push(
|
|
101
|
+
`Benchmark dataset upload: ${benchmarkResult.error}`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
logger.info(
|
|
106
|
+
'DRY RUN: Skipping benchmark dataset upload',
|
|
107
|
+
undefined,
|
|
108
|
+
'HuggingFaceIntegration'
|
|
109
|
+
);
|
|
110
|
+
result.datasets.benchmarks.success = true;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Step 2: Upload trajectory dataset
|
|
114
|
+
if (!options.dryRun) {
|
|
115
|
+
logger.info(
|
|
116
|
+
'Step 2: Uploading trajectory dataset',
|
|
117
|
+
undefined,
|
|
118
|
+
'HuggingFaceIntegration'
|
|
119
|
+
);
|
|
120
|
+
const exportToHuggingFace = getExportToHuggingFace();
|
|
121
|
+
const trajectoryResult = await exportToHuggingFace({
|
|
122
|
+
datasetName:
|
|
123
|
+
options.trajectoryDatasetName ||
|
|
124
|
+
process.env.HF_TRAJECTORY_DATASET_NAME ||
|
|
125
|
+
'elizaos/agent-trajectories',
|
|
126
|
+
format: 'jsonl',
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
result.datasets.trajectories = {
|
|
130
|
+
success: trajectoryResult.success,
|
|
131
|
+
url: trajectoryResult.url,
|
|
132
|
+
error: trajectoryResult.error,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
if (!trajectoryResult.success) {
|
|
136
|
+
result.errors.push(
|
|
137
|
+
`Trajectory dataset upload: ${trajectoryResult.error}`
|
|
138
|
+
);
|
|
139
|
+
}
|
|
140
|
+
} else {
|
|
141
|
+
logger.info(
|
|
142
|
+
'DRY RUN: Skipping trajectory dataset upload',
|
|
143
|
+
undefined,
|
|
144
|
+
'HuggingFaceIntegration'
|
|
145
|
+
);
|
|
146
|
+
result.datasets.trajectories.success = true;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Step 3: Process models
|
|
150
|
+
const unbenchmarkedModels =
|
|
151
|
+
await ModelBenchmarkService.getUnbenchmarkedModels();
|
|
152
|
+
result.models.processed = unbenchmarkedModels.length;
|
|
153
|
+
|
|
154
|
+
logger.info(
|
|
155
|
+
`Step 3: Found ${unbenchmarkedModels.length} unbenchmarked models`,
|
|
156
|
+
undefined,
|
|
157
|
+
'HuggingFaceIntegration'
|
|
158
|
+
);
|
|
159
|
+
|
|
160
|
+
if (unbenchmarkedModels.length > 0) {
|
|
161
|
+
const standardBenchmarks =
|
|
162
|
+
await ModelBenchmarkService.getStandardBenchmarkPaths();
|
|
163
|
+
|
|
164
|
+
if (standardBenchmarks.length === 0) {
|
|
165
|
+
const error = 'No standard benchmarks available for model evaluation';
|
|
166
|
+
logger.error(error, undefined, 'HuggingFaceIntegration');
|
|
167
|
+
result.errors.push(error);
|
|
168
|
+
} else {
|
|
169
|
+
for (const modelId of unbenchmarkedModels) {
|
|
170
|
+
try {
|
|
171
|
+
// Benchmark model
|
|
172
|
+
logger.info(
|
|
173
|
+
`Benchmarking model: ${modelId}`,
|
|
174
|
+
undefined,
|
|
175
|
+
'HuggingFaceIntegration'
|
|
176
|
+
);
|
|
177
|
+
await ModelBenchmarkService.benchmarkModel({
|
|
178
|
+
modelId,
|
|
179
|
+
benchmarkPaths: standardBenchmarks,
|
|
180
|
+
saveResults: true,
|
|
181
|
+
});
|
|
182
|
+
result.models.benchmarked++;
|
|
183
|
+
|
|
184
|
+
// Compare to baseline
|
|
185
|
+
const comparison =
|
|
186
|
+
await ModelBenchmarkService.compareToBaseline(modelId);
|
|
187
|
+
|
|
188
|
+
// Upload if improved
|
|
189
|
+
if (comparison.recommendation === 'deploy' && !options.dryRun) {
|
|
190
|
+
logger.info(
|
|
191
|
+
`Model ${modelId} improved, uploading`,
|
|
192
|
+
undefined,
|
|
193
|
+
'HuggingFaceIntegration'
|
|
194
|
+
);
|
|
195
|
+
|
|
196
|
+
const model = await getTrainingDataAdapter().getModelById(modelId);
|
|
197
|
+
|
|
198
|
+
if (model) {
|
|
199
|
+
const modelName = options.modelNamePrefix
|
|
200
|
+
? `${options.modelNamePrefix}-${model.version}`
|
|
201
|
+
: process.env.HF_MODEL_NAME
|
|
202
|
+
? `${process.env.HF_MODEL_NAME}-${model.version}`
|
|
203
|
+
: `elizaos/agent-${model.version}`;
|
|
204
|
+
|
|
205
|
+
const modelDescription =
|
|
206
|
+
options.modelDescriptionPrefix ||
|
|
207
|
+
process.env.HF_MODEL_DESCRIPTION_PREFIX ||
|
|
208
|
+
'Autonomous ElizaOS agent';
|
|
209
|
+
|
|
210
|
+
const uploadResult = await this.modelUploader.uploadModel({
|
|
211
|
+
modelId,
|
|
212
|
+
modelName,
|
|
213
|
+
description: `${modelDescription} - v${model.version}`,
|
|
214
|
+
includeWeights: true,
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
if (uploadResult.success) {
|
|
218
|
+
result.models.uploaded++;
|
|
219
|
+
|
|
220
|
+
// Update model with HuggingFace repo
|
|
221
|
+
await getTrainingDataAdapter().updateModelHuggingFaceRepo(modelId, modelName);
|
|
222
|
+
} else {
|
|
223
|
+
result.errors.push(
|
|
224
|
+
`Model upload ${modelId}: ${uploadResult.error}`
|
|
225
|
+
);
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
} else {
|
|
229
|
+
logger.info(
|
|
230
|
+
`Model ${modelId} not ready for deployment: ${comparison.recommendation}`,
|
|
231
|
+
undefined,
|
|
232
|
+
'HuggingFaceIntegration'
|
|
233
|
+
);
|
|
234
|
+
}
|
|
235
|
+
} catch (error) {
|
|
236
|
+
const errorMsg =
|
|
237
|
+
error instanceof Error ? error.message : String(error);
|
|
238
|
+
logger.error(
|
|
239
|
+
`Failed to process model ${modelId}`,
|
|
240
|
+
{ error },
|
|
241
|
+
'HuggingFaceIntegration'
|
|
242
|
+
);
|
|
243
|
+
result.errors.push(`Model ${modelId}: ${errorMsg}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
result.success = result.errors.length === 0;
|
|
250
|
+
result.duration = Date.now() - startTime;
|
|
251
|
+
|
|
252
|
+
logger.info(
|
|
253
|
+
'Weekly upload pipeline complete',
|
|
254
|
+
{
|
|
255
|
+
success: result.success,
|
|
256
|
+
benchmarkDataset: result.datasets.benchmarks.success,
|
|
257
|
+
trajectoryDataset: result.datasets.trajectories.success,
|
|
258
|
+
modelsProcessed: result.models.processed,
|
|
259
|
+
modelsBenchmarked: result.models.benchmarked,
|
|
260
|
+
modelsUploaded: result.models.uploaded,
|
|
261
|
+
errors: result.errors.length,
|
|
262
|
+
duration: result.duration,
|
|
263
|
+
},
|
|
264
|
+
'HuggingFaceIntegration'
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
return result;
|
|
268
|
+
} catch (error) {
|
|
269
|
+
result.duration = Date.now() - startTime;
|
|
270
|
+
result.errors.push(
|
|
271
|
+
error instanceof Error ? error.message : String(error)
|
|
272
|
+
);
|
|
273
|
+
logger.error(
|
|
274
|
+
'Weekly upload pipeline failed',
|
|
275
|
+
{ error },
|
|
276
|
+
'HuggingFaceIntegration'
|
|
277
|
+
);
|
|
278
|
+
return result;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Check if new data is available for upload
|
|
284
|
+
*/
|
|
285
|
+
async hasNewDataToUpload(): Promise<{
|
|
286
|
+
hasNewBenchmarks: boolean;
|
|
287
|
+
hasNewTrajectories: boolean;
|
|
288
|
+
hasUnbenchmarkedModels: boolean;
|
|
289
|
+
details: {
|
|
290
|
+
newBenchmarksSince?: Date;
|
|
291
|
+
newTrajectoriesCount: number;
|
|
292
|
+
unbenchmarkedModels: number;
|
|
293
|
+
};
|
|
294
|
+
}> {
|
|
295
|
+
const adapter = getTrainingDataAdapter();
|
|
296
|
+
|
|
297
|
+
// Get last upload time from database
|
|
298
|
+
const lastUploadTime = (await adapter.getLastDeployedModelDate()) || new Date(0);
|
|
299
|
+
|
|
300
|
+
// Check for new benchmarks since last upload
|
|
301
|
+
const newBenchmarksCount = await adapter.countBenchmarksSince(lastUploadTime);
|
|
302
|
+
|
|
303
|
+
// Check for new trajectories since last upload
|
|
304
|
+
const newTrajectoriesCount = await adapter.countTrajectoriesSince(lastUploadTime);
|
|
305
|
+
|
|
306
|
+
// Check for unbenchmarked models
|
|
307
|
+
const unbenchmarkedModels =
|
|
308
|
+
await ModelBenchmarkService.getUnbenchmarkedModels();
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
hasNewBenchmarks: newBenchmarksCount > 0,
|
|
312
|
+
hasNewTrajectories: newTrajectoriesCount > 0,
|
|
313
|
+
hasUnbenchmarkedModels: unbenchmarkedModels.length > 0,
|
|
314
|
+
details: {
|
|
315
|
+
newBenchmarksSince: lastUploadTime,
|
|
316
|
+
newTrajectoriesCount,
|
|
317
|
+
unbenchmarkedModels: unbenchmarkedModels.length,
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Validate system is ready for HuggingFace operations
|
|
324
|
+
*/
|
|
325
|
+
async validateSystemReadiness(): Promise<{
|
|
326
|
+
ready: boolean;
|
|
327
|
+
issues: string[];
|
|
328
|
+
warnings: string[];
|
|
329
|
+
}> {
|
|
330
|
+
const issues: string[] = [];
|
|
331
|
+
const warnings: string[] = [];
|
|
332
|
+
|
|
333
|
+
// Check HuggingFace token
|
|
334
|
+
if (!getHuggingFaceToken()) {
|
|
335
|
+
issues.push(
|
|
336
|
+
'HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set'
|
|
337
|
+
);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const adapter = getTrainingDataAdapter();
|
|
341
|
+
|
|
342
|
+
// Check database connection
|
|
343
|
+
try {
|
|
344
|
+
const healthy = await adapter.healthCheck();
|
|
345
|
+
if (!healthy) {
|
|
346
|
+
issues.push('Cannot connect to database');
|
|
347
|
+
}
|
|
348
|
+
} catch {
|
|
349
|
+
issues.push('Cannot connect to database');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Check for standard benchmarks
|
|
353
|
+
const standardBenchmarks =
|
|
354
|
+
await ModelBenchmarkService.getStandardBenchmarkPaths();
|
|
355
|
+
if (standardBenchmarks.length === 0) {
|
|
356
|
+
warnings.push(
|
|
357
|
+
'No standard benchmarks found. Generate benchmark fixtures before upload.'
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Check for data using training statistics
|
|
362
|
+
try {
|
|
363
|
+
const stats = await adapter.getTrainingStatistics();
|
|
364
|
+
|
|
365
|
+
if (stats.benchmarkCount === 0) {
|
|
366
|
+
warnings.push(
|
|
367
|
+
'No benchmark results in database. Run some benchmarks first.'
|
|
368
|
+
);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if (stats.trajectoryTraining === 0) {
|
|
372
|
+
warnings.push(
|
|
373
|
+
'No training trajectories in database. Generate with agents or test data.'
|
|
374
|
+
);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (stats.modelTotal === 0) {
|
|
378
|
+
warnings.push('No trained models in database.');
|
|
379
|
+
}
|
|
380
|
+
} catch {
|
|
381
|
+
issues.push('Could not retrieve training statistics');
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
return {
|
|
385
|
+
ready: issues.length === 0,
|
|
386
|
+
issues,
|
|
387
|
+
warnings,
|
|
388
|
+
};
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Get integration statistics
|
|
393
|
+
*/
|
|
394
|
+
async getStatistics(): Promise<{
|
|
395
|
+
benchmarks: { total: number; lastUpload?: Date };
|
|
396
|
+
trajectories: { total: number; training: number };
|
|
397
|
+
models: { total: number; benchmarked: number; deployed: number };
|
|
398
|
+
huggingface: { datasetsPublished: number; modelsPublished: number };
|
|
399
|
+
}> {
|
|
400
|
+
const stats = await getTrainingDataAdapter().getTrainingStatistics();
|
|
401
|
+
|
|
402
|
+
return {
|
|
403
|
+
benchmarks: {
|
|
404
|
+
total: stats.benchmarkCount,
|
|
405
|
+
lastUpload: stats.lastBenchmarkDate ?? undefined,
|
|
406
|
+
},
|
|
407
|
+
trajectories: {
|
|
408
|
+
total: stats.trajectoryTotal,
|
|
409
|
+
training: stats.trajectoryTraining,
|
|
410
|
+
},
|
|
411
|
+
models: {
|
|
412
|
+
total: stats.modelTotal,
|
|
413
|
+
benchmarked: stats.modelBenchmarked,
|
|
414
|
+
deployed: stats.modelDeployed,
|
|
415
|
+
},
|
|
416
|
+
huggingface: {
|
|
417
|
+
datasetsPublished:
|
|
418
|
+
(stats.benchmarkCount > 0 ? 1 : 0) +
|
|
419
|
+
(stats.trajectoryTraining > 0 ? 1 : 0),
|
|
420
|
+
modelsPublished: stats.publishedRepoCount,
|
|
421
|
+
},
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
export const huggingFaceIntegration = new HuggingFaceIntegrationService();
|