@elizaos/training 2.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. package/Dockerfile +75 -0
  2. package/LICENSE +21 -0
  3. package/Makefile +374 -0
  4. package/README.md +346 -0
  5. package/config/rubrics.json +137 -0
  6. package/docker-compose.test.yml +57 -0
  7. package/package.json +57 -0
  8. package/python/config/babylon_atropos.yaml +90 -0
  9. package/python/config/profiles/12gb.json +11 -0
  10. package/python/config/profiles/16gb.json +10 -0
  11. package/python/config/profiles/24gb.json +10 -0
  12. package/python/config/profiles/48gb.json +10 -0
  13. package/python/config/profiles/cpu.json +11 -0
  14. package/python/config/profiles/l40-2gpu-safe.json +20 -0
  15. package/python/config/profiles/l40-2gpu.json +22 -0
  16. package/python/config/profiles/l40-4gpu.json +21 -0
  17. package/python/config/profiles/l40.json +17 -0
  18. package/python/config/tinker_training.yaml +143 -0
  19. package/python/curriculum_state.json +165 -0
  20. package/python/env.template +86 -0
  21. package/python/env.training.template +46 -0
  22. package/python/pyproject.toml +41 -0
  23. package/python/requirements-ci.txt +31 -0
  24. package/python/requirements.txt +87 -0
  25. package/python/scripts/__init__.py +4 -0
  26. package/python/scripts/benchmark_should_respond.py +190 -0
  27. package/python/scripts/debug_inference.py +62 -0
  28. package/python/scripts/import_json_trajectories.py +412 -0
  29. package/python/scripts/local-finetune/README.md +63 -0
  30. package/python/scripts/local-finetune/ingest_and_score.py +139 -0
  31. package/python/scripts/local-finetune/merge_model.py +32 -0
  32. package/python/scripts/local-finetune/test_adapter.py +91 -0
  33. package/python/scripts/local-finetune/train_from_csv.py +132 -0
  34. package/python/scripts/merge_trajectories.py +318 -0
  35. package/python/scripts/optimize_prompt_grpo.py +269 -0
  36. package/python/scripts/run_ab_test.py +143 -0
  37. package/python/scripts/run_full_pipeline.py +544 -0
  38. package/python/scripts/run_tinker_training.py +192 -0
  39. package/python/scripts/run_training.py +914 -0
  40. package/python/scripts/test_generation.py +29 -0
  41. package/python/scripts/test_judge.py +155 -0
  42. package/python/scripts/test_pipeline.py +356 -0
  43. package/python/scripts/test_trained_model.py +380 -0
  44. package/python/scripts/train_grpo.py +360 -0
  45. package/python/scripts/train_jsonl.py +223 -0
  46. package/python/scripts/train_local.py +528 -0
  47. package/python/setup.py +20 -0
  48. package/python/src/__init__.py +190 -0
  49. package/python/src/data_bridge/__init__.py +24 -0
  50. package/python/src/data_bridge/converter.py +435 -0
  51. package/python/src/data_bridge/reader.py +393 -0
  52. package/python/src/models.py +283 -0
  53. package/python/src/training/__init__.py +605 -0
  54. package/python/src/training/ab_testing.py +404 -0
  55. package/python/src/training/action_executor.py +621 -0
  56. package/python/src/training/archetype_trainer.py +347 -0
  57. package/python/src/training/atropos_trainer.py +980 -0
  58. package/python/src/training/babylon_env.py +1254 -0
  59. package/python/src/training/error_recovery.py +647 -0
  60. package/python/src/training/evaluation.py +856 -0
  61. package/python/src/training/fast_simulator.py +880 -0
  62. package/python/src/training/format_validator.py +584 -0
  63. package/python/src/training/hybrid_env.py +522 -0
  64. package/python/src/training/kl_controller.py +628 -0
  65. package/python/src/training/multi_prompt_dataset.py +883 -0
  66. package/python/src/training/multi_turn.py +656 -0
  67. package/python/src/training/online_env.py +1084 -0
  68. package/python/src/training/quality_scorer.py +391 -0
  69. package/python/src/training/quality_utils.py +633 -0
  70. package/python/src/training/rewards.py +1344 -0
  71. package/python/src/training/rlaif_env.py +17 -0
  72. package/python/src/training/rollout_generator.py +502 -0
  73. package/python/src/training/rubric_loader.py +198 -0
  74. package/python/src/training/scenario_pool.py +1072 -0
  75. package/python/src/training/schemas.py +481 -0
  76. package/python/src/training/service_manager.py +552 -0
  77. package/python/src/training/simulation_bridge.py +535 -0
  78. package/python/src/training/tick_reward_attribution.py +399 -0
  79. package/python/src/training/tinker_client.py +575 -0
  80. package/python/src/training/tinker_trainer.py +646 -0
  81. package/python/src/training/tokenization_utils.py +402 -0
  82. package/python/tests/e2e/__init__.py +13 -0
  83. package/python/tests/e2e/conftest.py +258 -0
  84. package/python/tests/e2e/test_full_pipeline.py +643 -0
  85. package/python/tests/e2e/test_online_training_e2e.py +365 -0
  86. package/python/tests/integration/__init__.py +12 -0
  87. package/python/tests/integration/conftest.py +383 -0
  88. package/python/tests/integration/test_db_integration.py +649 -0
  89. package/python/tests/integration/test_json_mode_integration.py +554 -0
  90. package/python/tests/test_action_executor.py +594 -0
  91. package/python/tests/test_archetype_scoring.py +1027 -0
  92. package/python/tests/test_atropos_integration.py +360 -0
  93. package/python/tests/test_evaluation.py +727 -0
  94. package/python/tests/test_format_validator.py +486 -0
  95. package/python/tests/test_kl_controller.py +432 -0
  96. package/python/tests/test_lr_scheduler.py +579 -0
  97. package/python/tests/test_multi_turn.py +590 -0
  98. package/python/tests/test_online_env.py +519 -0
  99. package/python/tests/test_quality_scorer.py +474 -0
  100. package/python/tests/test_scenario_pool.py +735 -0
  101. package/python/tests/test_service_manager.py +585 -0
  102. package/python/tests/test_simulation_rollout.py +581 -0
  103. package/python/tests/test_tokenization_utils.py +501 -0
  104. package/python/tests/test_training_orchestrator.py +497 -0
  105. package/python/tests/test_training_output_structure.py +661 -0
  106. package/research-output/training-runs/training-run-1770772042899.json +26 -0
  107. package/research-output/training-runs/training-run-1770930079670.json +32 -0
  108. package/research-output/training-runs/training-run-1770930143700.json +44 -0
  109. package/research-output/training-runs/training-run-1770930183638.json +38 -0
  110. package/research-output/training-runs/training-run-1770930442049.json +38 -0
  111. package/research-output/training-runs/training-run-1770930793243.json +38 -0
  112. package/research-output/training-runs/training-run-1771276293257.json +38 -0
  113. package/research-output/training-runs/training-run-1771276389280.json +38 -0
  114. package/research-output/training-runs/training-run-1771276502776.json +38 -0
  115. package/research-output/training-runs/training-run-1771277340748.json +38 -0
  116. package/research-output/training-runs/training-run-1773013658993.json +38 -0
  117. package/research-output/training-runs/training-run-1773013861014.json +38 -0
  118. package/research-output/training-runs/training-run-1773014215983.json +38 -0
  119. package/scripts/assess-training-data.ts +422 -0
  120. package/scripts/e2e-training-test.ts +550 -0
  121. package/scripts/export-rubrics.ts +64 -0
  122. package/scripts/generate-research-report.ts +1523 -0
  123. package/scripts/generate_dataset.sh +173 -0
  124. package/scripts/generate_should_respond.ts +267 -0
  125. package/scripts/generate_should_respond_dataset.ts +162 -0
  126. package/scripts/json-mode-benchmark.ts +399 -0
  127. package/scripts/rank_trajectories.ts +207 -0
  128. package/scripts/real-archetype-benchmark.ts +210 -0
  129. package/scripts/run-baseline-comparison.ts +116 -0
  130. package/scripts/run-full-pipeline.ts +272 -0
  131. package/scripts/run_rlaif_loop.ts +78 -0
  132. package/scripts/run_task_benchmark.ts +247 -0
  133. package/scripts/runpod_setup.sh +137 -0
  134. package/scripts/runpod_validate.sh +147 -0
  135. package/scripts/test-model-in-game.ts +955 -0
  136. package/scripts/test-scoring.ts +73 -0
  137. package/scripts/test-trained-model.ts +209 -0
  138. package/scripts/train-and-test.ts +824 -0
  139. package/scripts/verify-final.ts +118 -0
  140. package/src/adapter.ts +516 -0
  141. package/src/archetypes/ArchetypeConfigService.ts +626 -0
  142. package/src/archetypes/derive-archetype.ts +249 -0
  143. package/src/archetypes/index.ts +22 -0
  144. package/src/benchmark/ArchetypeMatchupBenchmark.ts +825 -0
  145. package/src/benchmark/BenchmarkChartGenerator.ts +748 -0
  146. package/src/benchmark/BenchmarkDataGenerator.ts +1288 -0
  147. package/src/benchmark/BenchmarkDataViewer.ts +324 -0
  148. package/src/benchmark/BenchmarkHistoryService.ts +221 -0
  149. package/src/benchmark/BenchmarkRunner.ts +685 -0
  150. package/src/benchmark/BenchmarkValidator.ts +204 -0
  151. package/src/benchmark/FastEvalRunner.ts +225 -0
  152. package/src/benchmark/MetricsValidator.ts +165 -0
  153. package/src/benchmark/MetricsVisualizer.ts +909 -0
  154. package/src/benchmark/ModelBenchmarkService.ts +611 -0
  155. package/src/benchmark/ModelRegistry.ts +158 -0
  156. package/src/benchmark/RulerBenchmarkIntegration.ts +235 -0
  157. package/src/benchmark/SimulationA2AInterface.ts +1169 -0
  158. package/src/benchmark/SimulationEngine.ts +832 -0
  159. package/src/benchmark/TaskRunner.ts +94 -0
  160. package/src/benchmark/__tests__/BenchmarkRunner.test.ts +534 -0
  161. package/src/benchmark/__tests__/HeadToHead.test.ts +126 -0
  162. package/src/benchmark/index.ts +91 -0
  163. package/src/benchmark/parseSimulationMetrics.ts +124 -0
  164. package/src/benchmark/simulation-types.ts +78 -0
  165. package/src/dependencies.ts +475 -0
  166. package/src/generation/TrajectoryGenerator.ts +387 -0
  167. package/src/generation/index.ts +12 -0
  168. package/src/huggingface/HuggingFaceDatasetUploader.ts +636 -0
  169. package/src/huggingface/HuggingFaceIntegrationService.ts +426 -0
  170. package/src/huggingface/HuggingFaceModelUploader.ts +532 -0
  171. package/src/huggingface/index.ts +27 -0
  172. package/src/huggingface/shared/HuggingFaceUploadUtil.ts +206 -0
  173. package/src/index.ts +102 -0
  174. package/src/init-training.ts +53 -0
  175. package/src/metrics/TrajectoryMetricsExtractor.ts +653 -0
  176. package/src/metrics/__tests__/TrajectoryMetricsExtractor.test.ts +759 -0
  177. package/src/metrics/index.ts +8 -0
  178. package/src/metrics/types.ts +200 -0
  179. package/src/rubrics/__tests__/index.test.ts +184 -0
  180. package/src/rubrics/ass-kisser.ts +85 -0
  181. package/src/rubrics/degen.ts +80 -0
  182. package/src/rubrics/goody-twoshoes.ts +84 -0
  183. package/src/rubrics/index.ts +236 -0
  184. package/src/rubrics/information-trader.ts +84 -0
  185. package/src/rubrics/infosec.ts +101 -0
  186. package/src/rubrics/liar.ts +104 -0
  187. package/src/rubrics/perps-trader.ts +87 -0
  188. package/src/rubrics/researcher.ts +81 -0
  189. package/src/rubrics/scammer.ts +82 -0
  190. package/src/rubrics/social-butterfly.ts +73 -0
  191. package/src/rubrics/super-predictor.ts +97 -0
  192. package/src/rubrics/trader.ts +67 -0
  193. package/src/scoring/ArchetypeScoringService.ts +486 -0
  194. package/src/scoring/JudgePromptBuilder.ts +556 -0
  195. package/src/scoring/LLMJudgeCache.ts +401 -0
  196. package/src/scoring/index.ts +9 -0
  197. package/src/training/AutomationPipeline.ts +916 -0
  198. package/src/training/BenchmarkService.ts +518 -0
  199. package/src/training/ConfigValidator.ts +220 -0
  200. package/src/training/MarketOutcomesTracker.ts +187 -0
  201. package/src/training/ModelDeployer.ts +186 -0
  202. package/src/training/ModelFetcher.ts +76 -0
  203. package/src/training/ModelSelectionService.ts +341 -0
  204. package/src/training/ModelUsageVerifier.ts +160 -0
  205. package/src/training/MultiModelOrchestrator.ts +580 -0
  206. package/src/training/RLModelConfig.ts +407 -0
  207. package/src/training/RewardBackpropagationService.ts +149 -0
  208. package/src/training/RulerScoringService.ts +666 -0
  209. package/src/training/TrainingMonitor.ts +166 -0
  210. package/src/training/TrajectoryRecorder.ts +399 -0
  211. package/src/training/__tests__/TrajectoryRecorder.test.ts +472 -0
  212. package/src/training/index.ts +100 -0
  213. package/src/training/logRLConfig.ts +34 -0
  214. package/src/training/pipeline.ts +129 -0
  215. package/src/training/storage/ModelStorageService.ts +279 -0
  216. package/src/training/storage/TrainingDataArchiver.ts +197 -0
  217. package/src/training/storage/index.ts +17 -0
  218. package/src/training/types.ts +207 -0
  219. package/src/training/window-utils.ts +138 -0
  220. package/src/utils/index.ts +101 -0
  221. package/src/utils/logger.ts +59 -0
  222. package/src/utils/snowflake.ts +17 -0
  223. package/src/utils/synthetic-detector.ts +111 -0
  224. package/tsconfig.json +20 -0
@@ -0,0 +1,426 @@
1
+ /**
2
+ * HuggingFace Integration Service
3
+ *
4
+ * Orchestrates the complete HuggingFace integration pipeline.
5
+ * Main entry point for all HuggingFace operations.
6
+ */
7
+
8
+ import { getTrainingDataAdapter } from '../adapter';
9
+ import { ModelBenchmarkService } from '../benchmark/ModelBenchmarkService';
10
+ import { getExportToHuggingFace } from '../dependencies';
11
+ import { logger } from '../utils';
12
+ import { HuggingFaceDatasetUploader } from './HuggingFaceDatasetUploader';
13
+ import { HuggingFaceModelUploader } from './HuggingFaceModelUploader';
14
+ import { getHuggingFaceToken } from './shared/HuggingFaceUploadUtil';
15
+
16
+ export interface WeeklyUploadResult {
17
+ success: boolean;
18
+ datasets: {
19
+ benchmarks: { success: boolean; url?: string; error?: string };
20
+ trajectories: { success: boolean; url?: string; error?: string };
21
+ };
22
+ models: {
23
+ processed: number;
24
+ benchmarked: number;
25
+ uploaded: number;
26
+ };
27
+ errors: string[];
28
+ duration: number;
29
+ }
30
+
31
+ export interface DatasetUploadOptions {
32
+ datasetName?: string;
33
+ trajectoryDatasetName?: string;
34
+ modelNamePrefix?: string;
35
+ modelDescriptionPrefix?: string;
36
+ dryRun?: boolean;
37
+ }
38
+
39
+ export class HuggingFaceIntegrationService {
40
+ private datasetUploader: HuggingFaceDatasetUploader;
41
+ private modelUploader: HuggingFaceModelUploader;
42
+
43
+ constructor() {
44
+ this.datasetUploader = new HuggingFaceDatasetUploader();
45
+ this.modelUploader = new HuggingFaceModelUploader();
46
+ }
47
+
48
+ /**
49
+ * Execute complete weekly upload pipeline
50
+ */
51
+ async executeWeeklyUpload(
52
+ options: DatasetUploadOptions = {}
53
+ ): Promise<WeeklyUploadResult> {
54
+ const startTime = Date.now();
55
+ logger.info(
56
+ 'Starting weekly upload pipeline',
57
+ options,
58
+ 'HuggingFaceIntegration'
59
+ );
60
+
61
+ const result: WeeklyUploadResult = {
62
+ success: false,
63
+ datasets: {
64
+ benchmarks: { success: false },
65
+ trajectories: { success: false },
66
+ },
67
+ models: {
68
+ processed: 0,
69
+ benchmarked: 0,
70
+ uploaded: 0,
71
+ },
72
+ errors: [],
73
+ duration: 0,
74
+ };
75
+
76
+ try {
77
+ // Step 1: Upload benchmark dataset
78
+ if (!options.dryRun) {
79
+ logger.info(
80
+ 'Step 1: Uploading benchmark dataset',
81
+ undefined,
82
+ 'HuggingFaceIntegration'
83
+ );
84
+ const benchmarkResult = await this.datasetUploader.uploadDataset({
85
+ datasetName:
86
+ options.datasetName ||
87
+ process.env.HF_DATASET_NAME ||
88
+ 'elizaos/agent-benchmarks',
89
+ description:
90
+ 'Weekly benchmark results for autonomous ElizaOS agents',
91
+ });
92
+
93
+ result.datasets.benchmarks = {
94
+ success: benchmarkResult.success,
95
+ url: benchmarkResult.datasetUrl,
96
+ error: benchmarkResult.error,
97
+ };
98
+
99
+ if (!benchmarkResult.success) {
100
+ result.errors.push(
101
+ `Benchmark dataset upload: ${benchmarkResult.error}`
102
+ );
103
+ }
104
+ } else {
105
+ logger.info(
106
+ 'DRY RUN: Skipping benchmark dataset upload',
107
+ undefined,
108
+ 'HuggingFaceIntegration'
109
+ );
110
+ result.datasets.benchmarks.success = true;
111
+ }
112
+
113
+ // Step 2: Upload trajectory dataset
114
+ if (!options.dryRun) {
115
+ logger.info(
116
+ 'Step 2: Uploading trajectory dataset',
117
+ undefined,
118
+ 'HuggingFaceIntegration'
119
+ );
120
+ const exportToHuggingFace = getExportToHuggingFace();
121
+ const trajectoryResult = await exportToHuggingFace({
122
+ datasetName:
123
+ options.trajectoryDatasetName ||
124
+ process.env.HF_TRAJECTORY_DATASET_NAME ||
125
+ 'elizaos/agent-trajectories',
126
+ format: 'jsonl',
127
+ });
128
+
129
+ result.datasets.trajectories = {
130
+ success: trajectoryResult.success,
131
+ url: trajectoryResult.url,
132
+ error: trajectoryResult.error,
133
+ };
134
+
135
+ if (!trajectoryResult.success) {
136
+ result.errors.push(
137
+ `Trajectory dataset upload: ${trajectoryResult.error}`
138
+ );
139
+ }
140
+ } else {
141
+ logger.info(
142
+ 'DRY RUN: Skipping trajectory dataset upload',
143
+ undefined,
144
+ 'HuggingFaceIntegration'
145
+ );
146
+ result.datasets.trajectories.success = true;
147
+ }
148
+
149
+ // Step 3: Process models
150
+ const unbenchmarkedModels =
151
+ await ModelBenchmarkService.getUnbenchmarkedModels();
152
+ result.models.processed = unbenchmarkedModels.length;
153
+
154
+ logger.info(
155
+ `Step 3: Found ${unbenchmarkedModels.length} unbenchmarked models`,
156
+ undefined,
157
+ 'HuggingFaceIntegration'
158
+ );
159
+
160
+ if (unbenchmarkedModels.length > 0) {
161
+ const standardBenchmarks =
162
+ await ModelBenchmarkService.getStandardBenchmarkPaths();
163
+
164
+ if (standardBenchmarks.length === 0) {
165
+ const error = 'No standard benchmarks available for model evaluation';
166
+ logger.error(error, undefined, 'HuggingFaceIntegration');
167
+ result.errors.push(error);
168
+ } else {
169
+ for (const modelId of unbenchmarkedModels) {
170
+ try {
171
+ // Benchmark model
172
+ logger.info(
173
+ `Benchmarking model: ${modelId}`,
174
+ undefined,
175
+ 'HuggingFaceIntegration'
176
+ );
177
+ await ModelBenchmarkService.benchmarkModel({
178
+ modelId,
179
+ benchmarkPaths: standardBenchmarks,
180
+ saveResults: true,
181
+ });
182
+ result.models.benchmarked++;
183
+
184
+ // Compare to baseline
185
+ const comparison =
186
+ await ModelBenchmarkService.compareToBaseline(modelId);
187
+
188
+ // Upload if improved
189
+ if (comparison.recommendation === 'deploy' && !options.dryRun) {
190
+ logger.info(
191
+ `Model ${modelId} improved, uploading`,
192
+ undefined,
193
+ 'HuggingFaceIntegration'
194
+ );
195
+
196
+ const model = await getTrainingDataAdapter().getModelById(modelId);
197
+
198
+ if (model) {
199
+ const modelName = options.modelNamePrefix
200
+ ? `${options.modelNamePrefix}-${model.version}`
201
+ : process.env.HF_MODEL_NAME
202
+ ? `${process.env.HF_MODEL_NAME}-${model.version}`
203
+ : `elizaos/agent-${model.version}`;
204
+
205
+ const modelDescription =
206
+ options.modelDescriptionPrefix ||
207
+ process.env.HF_MODEL_DESCRIPTION_PREFIX ||
208
+ 'Autonomous ElizaOS agent';
209
+
210
+ const uploadResult = await this.modelUploader.uploadModel({
211
+ modelId,
212
+ modelName,
213
+ description: `${modelDescription} - v${model.version}`,
214
+ includeWeights: true,
215
+ });
216
+
217
+ if (uploadResult.success) {
218
+ result.models.uploaded++;
219
+
220
+ // Update model with HuggingFace repo
221
+ await getTrainingDataAdapter().updateModelHuggingFaceRepo(modelId, modelName);
222
+ } else {
223
+ result.errors.push(
224
+ `Model upload ${modelId}: ${uploadResult.error}`
225
+ );
226
+ }
227
+ }
228
+ } else {
229
+ logger.info(
230
+ `Model ${modelId} not ready for deployment: ${comparison.recommendation}`,
231
+ undefined,
232
+ 'HuggingFaceIntegration'
233
+ );
234
+ }
235
+ } catch (error) {
236
+ const errorMsg =
237
+ error instanceof Error ? error.message : String(error);
238
+ logger.error(
239
+ `Failed to process model ${modelId}`,
240
+ { error },
241
+ 'HuggingFaceIntegration'
242
+ );
243
+ result.errors.push(`Model ${modelId}: ${errorMsg}`);
244
+ }
245
+ }
246
+ }
247
+ }
248
+
249
+ result.success = result.errors.length === 0;
250
+ result.duration = Date.now() - startTime;
251
+
252
+ logger.info(
253
+ 'Weekly upload pipeline complete',
254
+ {
255
+ success: result.success,
256
+ benchmarkDataset: result.datasets.benchmarks.success,
257
+ trajectoryDataset: result.datasets.trajectories.success,
258
+ modelsProcessed: result.models.processed,
259
+ modelsBenchmarked: result.models.benchmarked,
260
+ modelsUploaded: result.models.uploaded,
261
+ errors: result.errors.length,
262
+ duration: result.duration,
263
+ },
264
+ 'HuggingFaceIntegration'
265
+ );
266
+
267
+ return result;
268
+ } catch (error) {
269
+ result.duration = Date.now() - startTime;
270
+ result.errors.push(
271
+ error instanceof Error ? error.message : String(error)
272
+ );
273
+ logger.error(
274
+ 'Weekly upload pipeline failed',
275
+ { error },
276
+ 'HuggingFaceIntegration'
277
+ );
278
+ return result;
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Check if new data is available for upload
284
+ */
285
+ async hasNewDataToUpload(): Promise<{
286
+ hasNewBenchmarks: boolean;
287
+ hasNewTrajectories: boolean;
288
+ hasUnbenchmarkedModels: boolean;
289
+ details: {
290
+ newBenchmarksSince?: Date;
291
+ newTrajectoriesCount: number;
292
+ unbenchmarkedModels: number;
293
+ };
294
+ }> {
295
+ const adapter = getTrainingDataAdapter();
296
+
297
+ // Get last upload time from database
298
+ const lastUploadTime = (await adapter.getLastDeployedModelDate()) || new Date(0);
299
+
300
+ // Check for new benchmarks since last upload
301
+ const newBenchmarksCount = await adapter.countBenchmarksSince(lastUploadTime);
302
+
303
+ // Check for new trajectories since last upload
304
+ const newTrajectoriesCount = await adapter.countTrajectoriesSince(lastUploadTime);
305
+
306
+ // Check for unbenchmarked models
307
+ const unbenchmarkedModels =
308
+ await ModelBenchmarkService.getUnbenchmarkedModels();
309
+
310
+ return {
311
+ hasNewBenchmarks: newBenchmarksCount > 0,
312
+ hasNewTrajectories: newTrajectoriesCount > 0,
313
+ hasUnbenchmarkedModels: unbenchmarkedModels.length > 0,
314
+ details: {
315
+ newBenchmarksSince: lastUploadTime,
316
+ newTrajectoriesCount,
317
+ unbenchmarkedModels: unbenchmarkedModels.length,
318
+ },
319
+ };
320
+ }
321
+
322
+ /**
323
+ * Validate system is ready for HuggingFace operations
324
+ */
325
+ async validateSystemReadiness(): Promise<{
326
+ ready: boolean;
327
+ issues: string[];
328
+ warnings: string[];
329
+ }> {
330
+ const issues: string[] = [];
331
+ const warnings: string[] = [];
332
+
333
+ // Check HuggingFace token
334
+ if (!getHuggingFaceToken()) {
335
+ issues.push(
336
+ 'HUGGING_FACE_TOKEN or HF_TOKEN environment variable not set'
337
+ );
338
+ }
339
+
340
+ const adapter = getTrainingDataAdapter();
341
+
342
+ // Check database connection
343
+ try {
344
+ const healthy = await adapter.healthCheck();
345
+ if (!healthy) {
346
+ issues.push('Cannot connect to database');
347
+ }
348
+ } catch {
349
+ issues.push('Cannot connect to database');
350
+ }
351
+
352
+ // Check for standard benchmarks
353
+ const standardBenchmarks =
354
+ await ModelBenchmarkService.getStandardBenchmarkPaths();
355
+ if (standardBenchmarks.length === 0) {
356
+ warnings.push(
357
+ 'No standard benchmarks found. Generate benchmark fixtures before upload.'
358
+ );
359
+ }
360
+
361
+ // Check for data using training statistics
362
+ try {
363
+ const stats = await adapter.getTrainingStatistics();
364
+
365
+ if (stats.benchmarkCount === 0) {
366
+ warnings.push(
367
+ 'No benchmark results in database. Run some benchmarks first.'
368
+ );
369
+ }
370
+
371
+ if (stats.trajectoryTraining === 0) {
372
+ warnings.push(
373
+ 'No training trajectories in database. Generate with agents or test data.'
374
+ );
375
+ }
376
+
377
+ if (stats.modelTotal === 0) {
378
+ warnings.push('No trained models in database.');
379
+ }
380
+ } catch {
381
+ issues.push('Could not retrieve training statistics');
382
+ }
383
+
384
+ return {
385
+ ready: issues.length === 0,
386
+ issues,
387
+ warnings,
388
+ };
389
+ }
390
+
391
+ /**
392
+ * Get integration statistics
393
+ */
394
+ async getStatistics(): Promise<{
395
+ benchmarks: { total: number; lastUpload?: Date };
396
+ trajectories: { total: number; training: number };
397
+ models: { total: number; benchmarked: number; deployed: number };
398
+ huggingface: { datasetsPublished: number; modelsPublished: number };
399
+ }> {
400
+ const stats = await getTrainingDataAdapter().getTrainingStatistics();
401
+
402
+ return {
403
+ benchmarks: {
404
+ total: stats.benchmarkCount,
405
+ lastUpload: stats.lastBenchmarkDate ?? undefined,
406
+ },
407
+ trajectories: {
408
+ total: stats.trajectoryTotal,
409
+ training: stats.trajectoryTraining,
410
+ },
411
+ models: {
412
+ total: stats.modelTotal,
413
+ benchmarked: stats.modelBenchmarked,
414
+ deployed: stats.modelDeployed,
415
+ },
416
+ huggingface: {
417
+ datasetsPublished:
418
+ (stats.benchmarkCount > 0 ? 1 : 0) +
419
+ (stats.trajectoryTraining > 0 ? 1 : 0),
420
+ modelsPublished: stats.publishedRepoCount,
421
+ },
422
+ };
423
+ }
424
+ }
425
+
426
+ export const huggingFaceIntegration = new HuggingFaceIntegrationService();