@timmeck/brain-core 2.36.39 → 2.36.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-training/agent-trainer.d.ts +85 -0
- package/dist/agent-training/agent-trainer.js +149 -0
- package/dist/agent-training/agent-trainer.js.map +1 -0
- package/dist/agent-training/benchmark-suite.d.ts +112 -0
- package/dist/agent-training/benchmark-suite.js +248 -0
- package/dist/agent-training/benchmark-suite.js.map +1 -0
- package/dist/agent-training/index.d.ts +4 -0
- package/dist/agent-training/index.js +3 -0
- package/dist/agent-training/index.js.map +1 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -1
- package/dist/messaging/discord-bot.d.ts +51 -0
- package/dist/messaging/discord-bot.js +152 -0
- package/dist/messaging/discord-bot.js.map +1 -0
- package/dist/messaging/index.d.ts +6 -0
- package/dist/messaging/index.js +4 -0
- package/dist/messaging/index.js.map +1 -0
- package/dist/messaging/message-router.d.ts +67 -0
- package/dist/messaging/message-router.js +249 -0
- package/dist/messaging/message-router.js.map +1 -0
- package/dist/messaging/telegram-bot.d.ts +48 -0
- package/dist/messaging/telegram-bot.js +133 -0
- package/dist/messaging/telegram-bot.js.map +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Trainer — Training Loop mit Curriculum Learning
|
|
3
|
+
*
|
|
4
|
+
* Inspiriert von CrewAI's Agent Training und RLHF.
|
|
5
|
+
* Orchestriert Training-Zyklen mit steigender Schwierigkeit,
|
|
6
|
+
* sammelt Rewards und trackt Fortschritt über Zeit.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const trainer = new AgentTrainer(db);
|
|
11
|
+
* trainer.setBenchmarkSuite(suite);
|
|
12
|
+
* const report = await trainer.train(evalFn, { epochs: 3 });
|
|
13
|
+
* console.log(report.progressOverEpochs);
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
import type Database from 'better-sqlite3';
|
|
17
|
+
import type { BenchmarkSuite, EvalFunction, BenchmarkReport } from './benchmark-suite.js';
|
|
18
|
+
export interface TrainingConfig {
|
|
19
|
+
/** Number of training epochs. Default: 3 */
|
|
20
|
+
epochs?: number;
|
|
21
|
+
/** Start with easy, progress to hard. Default: true */
|
|
22
|
+
curriculumLearning?: boolean;
|
|
23
|
+
/** Minimum accuracy to pass an epoch. Default: 0.5 */
|
|
24
|
+
passThreshold?: number;
|
|
25
|
+
/** Stop early if accuracy drops. Default: true */
|
|
26
|
+
earlyStop?: boolean;
|
|
27
|
+
/** Name for this training session */
|
|
28
|
+
name?: string;
|
|
29
|
+
}
|
|
30
|
+
export interface EpochResult {
|
|
31
|
+
epoch: number;
|
|
32
|
+
difficulty: 'easy' | 'medium' | 'hard' | 'all';
|
|
33
|
+
report: BenchmarkReport;
|
|
34
|
+
passed: boolean;
|
|
35
|
+
}
|
|
36
|
+
export interface TrainingReport {
|
|
37
|
+
id: string;
|
|
38
|
+
name: string;
|
|
39
|
+
epochs: EpochResult[];
|
|
40
|
+
totalEpochs: number;
|
|
41
|
+
finalAccuracy: number;
|
|
42
|
+
bestAccuracy: number;
|
|
43
|
+
improvement: number;
|
|
44
|
+
durationMs: number;
|
|
45
|
+
passed: boolean;
|
|
46
|
+
createdAt: number;
|
|
47
|
+
}
|
|
48
|
+
export interface AgentTrainerStatus {
|
|
49
|
+
totalTrainingSessions: number;
|
|
50
|
+
bestAccuracy: number;
|
|
51
|
+
lastAccuracy: number | null;
|
|
52
|
+
totalEpochsRun: number;
|
|
53
|
+
avgImprovement: number;
|
|
54
|
+
}
|
|
55
|
+
export declare function runTrainerMigration(db: Database.Database): void;
|
|
56
|
+
export declare class AgentTrainer {
|
|
57
|
+
private db;
|
|
58
|
+
private readonly log;
|
|
59
|
+
private suite;
|
|
60
|
+
private stmtInsertSession;
|
|
61
|
+
constructor(db: Database.Database);
|
|
62
|
+
/** Set the benchmark suite used for evaluation. */
|
|
63
|
+
setBenchmarkSuite(suite: BenchmarkSuite): void;
|
|
64
|
+
/**
|
|
65
|
+
* Run a training session with curriculum learning.
|
|
66
|
+
*
|
|
67
|
+
* Curriculum order: easy → medium → hard → all
|
|
68
|
+
* Each epoch runs the benchmark at a difficulty level.
|
|
69
|
+
* If curriculum disabled, runs all cases each epoch.
|
|
70
|
+
*/
|
|
71
|
+
train(evalFn: EvalFunction, config?: TrainingConfig): Promise<TrainingReport>;
|
|
72
|
+
/** Get past training sessions. */
|
|
73
|
+
getHistory(limit?: number): Array<{
|
|
74
|
+
id: string;
|
|
75
|
+
name: string;
|
|
76
|
+
finalAccuracy: number;
|
|
77
|
+
bestAccuracy: number;
|
|
78
|
+
improvement: number;
|
|
79
|
+
passed: boolean;
|
|
80
|
+
createdAt: string;
|
|
81
|
+
}>;
|
|
82
|
+
/** Get a specific training session's full report. */
|
|
83
|
+
getSession(id: string): TrainingReport | null;
|
|
84
|
+
getStatus(): AgentTrainerStatus;
|
|
85
|
+
}
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Trainer — Training Loop mit Curriculum Learning
|
|
3
|
+
*
|
|
4
|
+
* Inspiriert von CrewAI's Agent Training und RLHF.
|
|
5
|
+
* Orchestriert Training-Zyklen mit steigender Schwierigkeit,
|
|
6
|
+
* sammelt Rewards und trackt Fortschritt über Zeit.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* ```typescript
|
|
10
|
+
* const trainer = new AgentTrainer(db);
|
|
11
|
+
* trainer.setBenchmarkSuite(suite);
|
|
12
|
+
* const report = await trainer.train(evalFn, { epochs: 3 });
|
|
13
|
+
* console.log(report.progressOverEpochs);
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
import { getLogger } from '../utils/logger.js';
|
|
17
|
+
// ── Migration ───────────────────────────────────────────
|
|
18
|
+
export function runTrainerMigration(db) {
|
|
19
|
+
db.exec(`
|
|
20
|
+
CREATE TABLE IF NOT EXISTS training_sessions (
|
|
21
|
+
id TEXT PRIMARY KEY,
|
|
22
|
+
name TEXT NOT NULL,
|
|
23
|
+
total_epochs INTEGER NOT NULL DEFAULT 0,
|
|
24
|
+
final_accuracy REAL NOT NULL DEFAULT 0,
|
|
25
|
+
best_accuracy REAL NOT NULL DEFAULT 0,
|
|
26
|
+
improvement REAL NOT NULL DEFAULT 0,
|
|
27
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
28
|
+
passed INTEGER NOT NULL DEFAULT 0,
|
|
29
|
+
report TEXT DEFAULT '{}',
|
|
30
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
31
|
+
);
|
|
32
|
+
CREATE INDEX IF NOT EXISTS idx_training_created ON training_sessions(created_at);
|
|
33
|
+
`);
|
|
34
|
+
}
|
|
35
|
+
// ── Trainer ─────────────────────────────────────────────
|
|
36
|
+
export class AgentTrainer {
|
|
37
|
+
db;
|
|
38
|
+
log = getLogger();
|
|
39
|
+
suite = null;
|
|
40
|
+
stmtInsertSession;
|
|
41
|
+
constructor(db) {
|
|
42
|
+
this.db = db;
|
|
43
|
+
runTrainerMigration(db);
|
|
44
|
+
this.stmtInsertSession = db.prepare('INSERT INTO training_sessions (id, name, total_epochs, final_accuracy, best_accuracy, improvement, duration_ms, passed, report) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
|
45
|
+
}
|
|
46
|
+
/** Set the benchmark suite used for evaluation. */
|
|
47
|
+
setBenchmarkSuite(suite) {
|
|
48
|
+
this.suite = suite;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Run a training session with curriculum learning.
|
|
52
|
+
*
|
|
53
|
+
* Curriculum order: easy → medium → hard → all
|
|
54
|
+
* Each epoch runs the benchmark at a difficulty level.
|
|
55
|
+
* If curriculum disabled, runs all cases each epoch.
|
|
56
|
+
*/
|
|
57
|
+
async train(evalFn, config = {}) {
|
|
58
|
+
if (!this.suite)
|
|
59
|
+
throw new Error('BenchmarkSuite not set — call setBenchmarkSuite() first');
|
|
60
|
+
const { epochs = 3, curriculumLearning = true, passThreshold = 0.5, earlyStop = true, name = `training-${Date.now()}`, } = config;
|
|
61
|
+
const startTime = Date.now();
|
|
62
|
+
const epochResults = [];
|
|
63
|
+
let bestAccuracy = 0;
|
|
64
|
+
let prevAccuracy = 0;
|
|
65
|
+
const difficulties = curriculumLearning
|
|
66
|
+
? ['easy', 'medium', 'hard', ...Array(Math.max(0, epochs - 3)).fill('all')]
|
|
67
|
+
: Array(epochs).fill('all');
|
|
68
|
+
for (let i = 0; i < epochs; i++) {
|
|
69
|
+
const difficulty = difficulties[i] ?? 'all';
|
|
70
|
+
const difficultyFilter = difficulty === 'all' ? undefined : difficulty;
|
|
71
|
+
this.log.debug(`[AgentTrainer] Epoch ${i + 1}/${epochs} (difficulty: ${difficulty})`);
|
|
72
|
+
const report = await this.suite.run(evalFn, {
|
|
73
|
+
name: `${name}-epoch-${i + 1}`,
|
|
74
|
+
difficulty: difficultyFilter,
|
|
75
|
+
});
|
|
76
|
+
const passed = report.accuracy >= passThreshold;
|
|
77
|
+
epochResults.push({ epoch: i + 1, difficulty, report, passed });
|
|
78
|
+
if (report.accuracy > bestAccuracy)
|
|
79
|
+
bestAccuracy = report.accuracy;
|
|
80
|
+
// Early stop: accuracy dropped significantly from previous epoch
|
|
81
|
+
if (earlyStop && i > 0 && report.accuracy < prevAccuracy - 0.2) {
|
|
82
|
+
this.log.debug(`[AgentTrainer] Early stop: accuracy dropped ${(prevAccuracy - report.accuracy).toFixed(2)}`);
|
|
83
|
+
break;
|
|
84
|
+
}
|
|
85
|
+
prevAccuracy = report.accuracy;
|
|
86
|
+
}
|
|
87
|
+
const finalAccuracy = epochResults.length > 0 ? epochResults[epochResults.length - 1].report.accuracy : 0;
|
|
88
|
+
const firstAccuracy = epochResults.length > 0 ? epochResults[0].report.accuracy : 0;
|
|
89
|
+
const improvement = finalAccuracy - firstAccuracy;
|
|
90
|
+
const trainingReport = {
|
|
91
|
+
id: `train-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
92
|
+
name,
|
|
93
|
+
epochs: epochResults,
|
|
94
|
+
totalEpochs: epochResults.length,
|
|
95
|
+
finalAccuracy,
|
|
96
|
+
bestAccuracy,
|
|
97
|
+
improvement,
|
|
98
|
+
durationMs: Date.now() - startTime,
|
|
99
|
+
passed: epochResults.every(e => e.passed),
|
|
100
|
+
createdAt: Date.now(),
|
|
101
|
+
};
|
|
102
|
+
// Persist
|
|
103
|
+
try {
|
|
104
|
+
this.stmtInsertSession.run(trainingReport.id, trainingReport.name, trainingReport.totalEpochs, trainingReport.finalAccuracy, trainingReport.bestAccuracy, trainingReport.improvement, trainingReport.durationMs, trainingReport.passed ? 1 : 0, JSON.stringify(trainingReport));
|
|
105
|
+
}
|
|
106
|
+
catch (e) {
|
|
107
|
+
this.log.warn(`[AgentTrainer] Failed to persist training: ${e.message}`);
|
|
108
|
+
}
|
|
109
|
+
return trainingReport;
|
|
110
|
+
}
|
|
111
|
+
// ── History ─────────────────────────────────────────
|
|
112
|
+
/** Get past training sessions. */
|
|
113
|
+
getHistory(limit = 20) {
|
|
114
|
+
return this.db.prepare('SELECT id, name, final_accuracy as finalAccuracy, best_accuracy as bestAccuracy, improvement, passed, created_at as createdAt FROM training_sessions ORDER BY created_at DESC LIMIT ?').all(limit).map(r => ({ ...r, passed: !!r.passed }));
|
|
115
|
+
}
|
|
116
|
+
/** Get a specific training session's full report. */
|
|
117
|
+
getSession(id) {
|
|
118
|
+
const row = this.db.prepare('SELECT report FROM training_sessions WHERE id = ?').get(id);
|
|
119
|
+
if (!row)
|
|
120
|
+
return null;
|
|
121
|
+
try {
|
|
122
|
+
return JSON.parse(row.report);
|
|
123
|
+
}
|
|
124
|
+
catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
// ── Status ──────────────────────────────────────────
|
|
129
|
+
getStatus() {
|
|
130
|
+
try {
|
|
131
|
+
const total = this.db.prepare('SELECT COUNT(*) as c FROM training_sessions').get().c;
|
|
132
|
+
const best = this.db.prepare('SELECT MAX(best_accuracy) as v FROM training_sessions').get().v ?? 0;
|
|
133
|
+
const last = this.db.prepare('SELECT final_accuracy FROM training_sessions ORDER BY created_at DESC LIMIT 1').get();
|
|
134
|
+
const totalEpochs = this.db.prepare('SELECT COALESCE(SUM(total_epochs), 0) as v FROM training_sessions').get().v;
|
|
135
|
+
const avgImprovement = this.db.prepare('SELECT COALESCE(AVG(improvement), 0) as v FROM training_sessions').get().v;
|
|
136
|
+
return {
|
|
137
|
+
totalTrainingSessions: total,
|
|
138
|
+
bestAccuracy: best,
|
|
139
|
+
lastAccuracy: last?.final_accuracy ?? null,
|
|
140
|
+
totalEpochsRun: totalEpochs,
|
|
141
|
+
avgImprovement: Math.round(avgImprovement * 1000) / 1000,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return { totalTrainingSessions: 0, bestAccuracy: 0, lastAccuracy: null, totalEpochsRun: 0, avgImprovement: 0 };
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
//# sourceMappingURL=agent-trainer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-trainer.js","sourceRoot":"","sources":["../../src/agent-training/agent-trainer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AA8C/C,2DAA2D;AAE3D,MAAM,UAAU,mBAAmB,CAAC,EAAqB;IACvD,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;GAcP,CAAC,CAAC;AACL,CAAC;AAED,2DAA2D;AAE3D,MAAM,OAAO,YAAY;IAKH;IAJH,GAAG,GAAG,SAAS,EAAE,CAAC;IAC3B,KAAK,GAA0B,IAAI,CAAC;IACpC,iBAAiB,CAAqB;IAE9C,YAAoB,EAAqB;QAArB,OAAE,GAAF,EAAE,CAAmB;QACvC,mBAAmB,CAAC,EAAE,CAAC,CAAC;QAExB,IAAI,CAAC,iBAAiB,GAAG,EAAE,CAAC,OAAO,CACjC,oKAAoK,CACrK,CAAC;IACJ,CAAC;IAED,mDAAmD;IACnD,iBAAiB,CAAC,KAAqB;QACrC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;IACrB,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,KAAK,CACT,MAAoB,EACpB,SAAyB,EAAE;QAE3B,IAAI,CAAC,IAAI,CAAC,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAE5F,MAAM,EACJ,MAAM,GAAG,CAAC,EACV,kBAAkB,GAAG,IAAI,EACzB,aAAa,GAAG,GAAG,EACnB,SAAS,GAAG,IAAI,EAChB,IAAI,GAAG,YAAY,IAAI,CAAC,GAAG,EAAE,EAAE,GAChC,GAAG,MAAM,CAAC;QAEX,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,YAAY,GAAkB,EAAE,CAAC;QACvC,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,MAAM,YAAY,GAA8C,kBAAkB;YAChF,CAAC,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAY,CAAC;YACtF,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAY,CAAC;QAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,UAAU,GAAG,YAAY,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC;YAC5C,MAAM,gBAAgB,GAAG,UAAU,KAAK,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC;YAEvE,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,wBAAwB,CAAC,GAAG,CAAC,IAAI,MAAM,iBAAiB,UAAU,GAAG,CAAC,CAAC;YAEtF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE;gBAC1C,IAAI,EAAE,GAAG,IAAI,UAAU,CAAC,GAAG,CAAC,EAAE;gBAC9B,UAAU,EAAE,gBAAgB;aAC7B,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,MAAM,CAAC,QAAQ,IAAI,aAAa,CAAC;YAChD,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,CAAC,EAAE,UAAU,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;YAEhE,IAAI,MAAM,CAAC,QAAQ,GAAG,YAAY;gBAAE,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC;YAEnE,iEAAiE;YACjE,IAAI,SAAS,IAAI,CAAC,GAAG,CAAC,IAAI,MAAM,CAAC,QAAQ,GAAG,YAAY,GAAG,GAAG,EAAE,CAAC;gBAC/D,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,+CAA+C,CAAC,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC7G,MAAM;YACR,CAAC;YAED,YAAY,GAAG,MAAM,CAAC,QAAQ,CAAC;QACjC,CAAC;QAED,MAAM,aAAa,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1G,MAAM,aAAa,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QACpF,MAAM,WAAW,GAAG,aAAa,GAAG,aAAa,CAAC;QAElD,MAAM,cAAc,GAAmB;YACrC,EAAE,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE;YACnE,IAAI;YACJ,MAAM,EAAE,YAAY;YACpB,WAAW,EAAE,YAAY,CAAC,MAAM;YAChC,aAAa;YACb,YAAY;YACZ,WAAW;YACX,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAClC,MAAM,EAAE,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;YACzC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAC;QAEF,UAAU;QACV,IAAI,CAAC;YACH,IAAI,CAAC,iBAAiB,CAAC,GAAG,CACxB,cAAc,CAAC,EAAE,EAAE,cAAc,CAAC,IAAI,EAAE,cAAc,CAAC,WAAW,EAClE,cAAc,CAAC,aAAa,EAAE,cAAc,CAAC,YAAY,EACzD,cAAc,CAAC,WAAW,EAAE,cAAc,CAAC,UAAU,EACrD,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,CAC9D,CAAC;QACJ,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,8CAA+C,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACtF,CAAC;QAED,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,uDAAuD;IAEvD,kCAAkC;IAClC,UAAU,CAAC,KAAK,GAAG,EAAE;QACnB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,uLAAuL,CACxL,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,GAAI,CAA6B,EAAE,MAAM,EAAE,CAAC,CAAE,CAA6B,CAAC,MAAM,EAAE,CAAC,CAA8I,CAAC;IAC/P,CAAC;IAED,qDAAqD;IACrD,UAAU,CAAC,EAAU;QACnB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mDAAmD,CAAC,CAAC,GAAG,CAAC,EAAE,CAAmC,CAAC;QAC3H,IAAI,CAAC,GAAG;YAAE,OAAO,IAAI,CAAC;QACtB,IAAI,CAAC;YAAC,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,IAAI,CAAC;QAAC,CAAC;IAC/D,CAAC;IAED,uDAAuD;IAEvD,SAAS;QACP,IAAI,CAAC;YACH,MAAM,KAAK,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAC,GAAG,EAAoB,CAAC,CAAC,CAAC;YACxG,MAAM,IAAI,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,uDAAuD,CAAC,CAAC,GAAG,EAA2B,CAAC,CAAC,IAAI,CAAC,CAAC;YAC7H,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,+EAA+E,CAAC,CAAC,GAAG,EAA4C,CAAC;YAC9J,MAAM,WAAW,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAC,GAAG,EAAoB,CAAC,CAAC,CAAC;YACpI,MAAM,cAAc,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kEAAkE,CAAC,CAAC,GAAG,EAAoB,CAAC,CAAC,CAAC;YAEtI,OAAO;gBACL,qBAAqB,EAAE,KAAK;gBAC5B,YAAY,EAAE,IAAI;gBAClB,YAAY,EAAE,IAAI,EAAE,cAAc,IAAI,IAAI;gBAC1C,cAAc,EAAE,WAAW;gBAC3B,cAAc,EAAE,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,IAAI,CAAC,GAAG,IAAI;aACzD,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,qBAAqB,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,CAAC;QACjH,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark Suite — Evaluation Harness für das Brain Ecosystem
|
|
3
|
+
*
|
|
4
|
+
* Inspiriert von CrewAI's Agent Testing und LangSmith Evaluation.
|
|
5
|
+
* Definiert Evaluations-Tasks, führt Benchmarks durch und sammelt Metriken.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ```typescript
|
|
9
|
+
* suite.addCase({ input: 'What is 2+2?', expected: '4', category: 'math' });
|
|
10
|
+
* const result = await suite.run(async (input) => agent.process(input));
|
|
11
|
+
* console.log(result.accuracy, result.avgLatencyMs);
|
|
12
|
+
* ```
|
|
13
|
+
*/
|
|
14
|
+
import type Database from 'better-sqlite3';
|
|
15
|
+
export interface EvalCase {
|
|
16
|
+
id?: number;
|
|
17
|
+
input: string;
|
|
18
|
+
expected: string;
|
|
19
|
+
category: string;
|
|
20
|
+
difficulty: 'easy' | 'medium' | 'hard';
|
|
21
|
+
metadata?: Record<string, unknown>;
|
|
22
|
+
}
|
|
23
|
+
export interface EvalResult {
|
|
24
|
+
caseId: number;
|
|
25
|
+
input: string;
|
|
26
|
+
expected: string;
|
|
27
|
+
actual: string;
|
|
28
|
+
correct: boolean;
|
|
29
|
+
latencyMs: number;
|
|
30
|
+
category: string;
|
|
31
|
+
difficulty: string;
|
|
32
|
+
error?: string;
|
|
33
|
+
}
|
|
34
|
+
export interface BenchmarkReport {
|
|
35
|
+
id: string;
|
|
36
|
+
name: string;
|
|
37
|
+
totalCases: number;
|
|
38
|
+
passed: number;
|
|
39
|
+
failed: number;
|
|
40
|
+
errored: number;
|
|
41
|
+
accuracy: number;
|
|
42
|
+
avgLatencyMs: number;
|
|
43
|
+
p50LatencyMs: number;
|
|
44
|
+
p99LatencyMs: number;
|
|
45
|
+
byCategory: Record<string, {
|
|
46
|
+
total: number;
|
|
47
|
+
passed: number;
|
|
48
|
+
accuracy: number;
|
|
49
|
+
}>;
|
|
50
|
+
byDifficulty: Record<string, {
|
|
51
|
+
total: number;
|
|
52
|
+
passed: number;
|
|
53
|
+
accuracy: number;
|
|
54
|
+
}>;
|
|
55
|
+
results: EvalResult[];
|
|
56
|
+
durationMs: number;
|
|
57
|
+
createdAt: number;
|
|
58
|
+
}
|
|
59
|
+
export interface BenchmarkSuiteStatus {
|
|
60
|
+
totalCases: number;
|
|
61
|
+
totalRuns: number;
|
|
62
|
+
categories: string[];
|
|
63
|
+
lastRunAccuracy: number | null;
|
|
64
|
+
bestAccuracy: number;
|
|
65
|
+
}
|
|
66
|
+
export type EvalFunction = (input: string) => Promise<string>;
|
|
67
|
+
export type ScoreFunction = (expected: string, actual: string) => boolean;
|
|
68
|
+
export declare function runBenchmarkMigration(db: Database.Database): void;
|
|
69
|
+
export declare class BenchmarkSuite {
|
|
70
|
+
private db;
|
|
71
|
+
private readonly log;
|
|
72
|
+
private stmtInsertCase;
|
|
73
|
+
private stmtInsertRun;
|
|
74
|
+
/** Custom scoring function. Default: exact match (trimmed, case-insensitive). */
|
|
75
|
+
scoreFunction: ScoreFunction;
|
|
76
|
+
constructor(db: Database.Database);
|
|
77
|
+
/** Add a test case to the dataset. */
|
|
78
|
+
addCase(c: EvalCase): number;
|
|
79
|
+
/** Add multiple test cases at once. */
|
|
80
|
+
addCases(cases: EvalCase[]): number;
|
|
81
|
+
/** Get all test cases, optionally filtered. */
|
|
82
|
+
getCases(filter?: {
|
|
83
|
+
category?: string;
|
|
84
|
+
difficulty?: string;
|
|
85
|
+
}): EvalCase[];
|
|
86
|
+
/** Get distinct categories. */
|
|
87
|
+
getCategories(): string[];
|
|
88
|
+
/** Delete a case by ID. */
|
|
89
|
+
deleteCase(id: number): boolean;
|
|
90
|
+
/** Clear all cases. */
|
|
91
|
+
clearCases(): number;
|
|
92
|
+
/** Run benchmark against all cases (or filtered subset). */
|
|
93
|
+
run(evalFn: EvalFunction, options?: {
|
|
94
|
+
name?: string;
|
|
95
|
+
category?: string;
|
|
96
|
+
difficulty?: string;
|
|
97
|
+
}): Promise<BenchmarkReport>;
|
|
98
|
+
/** Get past benchmark runs. */
|
|
99
|
+
getHistory(limit?: number): Array<{
|
|
100
|
+
id: string;
|
|
101
|
+
name: string;
|
|
102
|
+
accuracy: number;
|
|
103
|
+
totalCases: number;
|
|
104
|
+
durationMs: number;
|
|
105
|
+
createdAt: string;
|
|
106
|
+
}>;
|
|
107
|
+
/** Get a specific run's full report. */
|
|
108
|
+
getRun(id: string): BenchmarkReport | null;
|
|
109
|
+
getStatus(): BenchmarkSuiteStatus;
|
|
110
|
+
private buildReport;
|
|
111
|
+
private emptyReport;
|
|
112
|
+
}
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark Suite — Evaluation Harness für das Brain Ecosystem
|
|
3
|
+
*
|
|
4
|
+
* Inspiriert von CrewAI's Agent Testing und LangSmith Evaluation.
|
|
5
|
+
* Definiert Evaluations-Tasks, führt Benchmarks durch und sammelt Metriken.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* ```typescript
|
|
9
|
+
* suite.addCase({ input: 'What is 2+2?', expected: '4', category: 'math' });
|
|
10
|
+
* const result = await suite.run(async (input) => agent.process(input));
|
|
11
|
+
* console.log(result.accuracy, result.avgLatencyMs);
|
|
12
|
+
* ```
|
|
13
|
+
*/
|
|
14
|
+
import { getLogger } from '../utils/logger.js';
|
|
15
|
+
// ── Migration ───────────────────────────────────────────
|
|
16
|
+
export function runBenchmarkMigration(db) {
|
|
17
|
+
db.exec(`
|
|
18
|
+
CREATE TABLE IF NOT EXISTS eval_cases (
|
|
19
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
20
|
+
input TEXT NOT NULL,
|
|
21
|
+
expected TEXT NOT NULL,
|
|
22
|
+
category TEXT NOT NULL DEFAULT 'general',
|
|
23
|
+
difficulty TEXT NOT NULL DEFAULT 'medium',
|
|
24
|
+
metadata TEXT DEFAULT '{}',
|
|
25
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
26
|
+
);
|
|
27
|
+
CREATE INDEX IF NOT EXISTS idx_eval_category ON eval_cases(category);
|
|
28
|
+
|
|
29
|
+
CREATE TABLE IF NOT EXISTS benchmark_runs (
|
|
30
|
+
id TEXT PRIMARY KEY,
|
|
31
|
+
name TEXT NOT NULL,
|
|
32
|
+
total_cases INTEGER NOT NULL DEFAULT 0,
|
|
33
|
+
passed INTEGER NOT NULL DEFAULT 0,
|
|
34
|
+
failed INTEGER NOT NULL DEFAULT 0,
|
|
35
|
+
errored INTEGER NOT NULL DEFAULT 0,
|
|
36
|
+
accuracy REAL NOT NULL DEFAULT 0,
|
|
37
|
+
avg_latency_ms REAL NOT NULL DEFAULT 0,
|
|
38
|
+
duration_ms INTEGER NOT NULL DEFAULT 0,
|
|
39
|
+
report TEXT DEFAULT '{}',
|
|
40
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
41
|
+
);
|
|
42
|
+
CREATE INDEX IF NOT EXISTS idx_benchmark_created ON benchmark_runs(created_at);
|
|
43
|
+
`);
|
|
44
|
+
}
|
|
45
|
+
// ── Suite ───────────────────────────────────────────────
|
|
46
|
+
export class BenchmarkSuite {
|
|
47
|
+
db;
|
|
48
|
+
log = getLogger();
|
|
49
|
+
stmtInsertCase;
|
|
50
|
+
stmtInsertRun;
|
|
51
|
+
/** Custom scoring function. Default: exact match (trimmed, case-insensitive). */
|
|
52
|
+
scoreFunction = (expected, actual) => expected.trim().toLowerCase() === actual.trim().toLowerCase();
|
|
53
|
+
constructor(db) {
|
|
54
|
+
this.db = db;
|
|
55
|
+
runBenchmarkMigration(db);
|
|
56
|
+
this.stmtInsertCase = db.prepare('INSERT INTO eval_cases (input, expected, category, difficulty, metadata) VALUES (?, ?, ?, ?, ?)');
|
|
57
|
+
this.stmtInsertRun = db.prepare('INSERT INTO benchmark_runs (id, name, total_cases, passed, failed, errored, accuracy, avg_latency_ms, duration_ms, report) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
|
|
58
|
+
}
|
|
59
|
+
// ── Case Management ─────────────────────────────────
|
|
60
|
+
/** Add a test case to the dataset. */
|
|
61
|
+
addCase(c) {
|
|
62
|
+
const result = this.stmtInsertCase.run(c.input, c.expected, c.category, c.difficulty, JSON.stringify(c.metadata ?? {}));
|
|
63
|
+
return Number(result.lastInsertRowid);
|
|
64
|
+
}
|
|
65
|
+
/** Add multiple test cases at once. */
|
|
66
|
+
addCases(cases) {
|
|
67
|
+
let count = 0;
|
|
68
|
+
const tx = this.db.transaction(() => {
|
|
69
|
+
for (const c of cases) {
|
|
70
|
+
this.addCase(c);
|
|
71
|
+
count++;
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
tx();
|
|
75
|
+
return count;
|
|
76
|
+
}
|
|
77
|
+
/** Get all test cases, optionally filtered. */
|
|
78
|
+
getCases(filter) {
|
|
79
|
+
const conditions = [];
|
|
80
|
+
const params = [];
|
|
81
|
+
if (filter?.category) {
|
|
82
|
+
conditions.push('category = ?');
|
|
83
|
+
params.push(filter.category);
|
|
84
|
+
}
|
|
85
|
+
if (filter?.difficulty) {
|
|
86
|
+
conditions.push('difficulty = ?');
|
|
87
|
+
params.push(filter.difficulty);
|
|
88
|
+
}
|
|
89
|
+
const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
|
|
90
|
+
return this.db.prepare(`SELECT * FROM eval_cases ${where} ORDER BY id`).all(...params);
|
|
91
|
+
}
|
|
92
|
+
/** Get distinct categories. */
|
|
93
|
+
getCategories() {
|
|
94
|
+
const rows = this.db.prepare('SELECT DISTINCT category FROM eval_cases ORDER BY category').all();
|
|
95
|
+
return rows.map(r => r.category);
|
|
96
|
+
}
|
|
97
|
+
/** Delete a case by ID. */
|
|
98
|
+
deleteCase(id) {
|
|
99
|
+
return this.db.prepare('DELETE FROM eval_cases WHERE id = ?').run(id).changes > 0;
|
|
100
|
+
}
|
|
101
|
+
/** Clear all cases. */
|
|
102
|
+
clearCases() {
|
|
103
|
+
return this.db.prepare('DELETE FROM eval_cases').run().changes;
|
|
104
|
+
}
|
|
105
|
+
// ── Benchmark Execution ─────────────────────────────
|
|
106
|
+
/** Run benchmark against all cases (or filtered subset). */
|
|
107
|
+
async run(evalFn, options) {
|
|
108
|
+
const name = options?.name ?? `benchmark-${Date.now()}`;
|
|
109
|
+
const cases = this.getCases({ category: options?.category, difficulty: options?.difficulty });
|
|
110
|
+
if (cases.length === 0) {
|
|
111
|
+
return this.emptyReport(name);
|
|
112
|
+
}
|
|
113
|
+
const startTime = Date.now();
|
|
114
|
+
const results = [];
|
|
115
|
+
for (const c of cases) {
|
|
116
|
+
const caseStart = Date.now();
|
|
117
|
+
let actual = '';
|
|
118
|
+
let error;
|
|
119
|
+
let correct = false;
|
|
120
|
+
try {
|
|
121
|
+
actual = await evalFn(c.input);
|
|
122
|
+
correct = this.scoreFunction(c.expected, actual);
|
|
123
|
+
}
|
|
124
|
+
catch (e) {
|
|
125
|
+
error = e.message;
|
|
126
|
+
}
|
|
127
|
+
results.push({
|
|
128
|
+
caseId: c.id ?? 0,
|
|
129
|
+
input: c.input,
|
|
130
|
+
expected: c.expected,
|
|
131
|
+
actual,
|
|
132
|
+
correct,
|
|
133
|
+
latencyMs: Date.now() - caseStart,
|
|
134
|
+
category: c.category,
|
|
135
|
+
difficulty: c.difficulty,
|
|
136
|
+
error,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
const durationMs = Date.now() - startTime;
|
|
140
|
+
const report = this.buildReport(name, results, durationMs);
|
|
141
|
+
// Persist run
|
|
142
|
+
try {
|
|
143
|
+
this.stmtInsertRun.run(report.id, report.name, report.totalCases, report.passed, report.failed, report.errored, report.accuracy, report.avgLatencyMs, report.durationMs, JSON.stringify(report));
|
|
144
|
+
}
|
|
145
|
+
catch (e) {
|
|
146
|
+
this.log.warn(`[BenchmarkSuite] Failed to persist run: ${e.message}`);
|
|
147
|
+
}
|
|
148
|
+
return report;
|
|
149
|
+
}
|
|
150
|
+
// ── History ─────────────────────────────────────────
|
|
151
|
+
/** Get past benchmark runs. */
|
|
152
|
+
getHistory(limit = 20) {
|
|
153
|
+
return this.db.prepare('SELECT id, name, accuracy, total_cases as totalCases, duration_ms as durationMs, created_at as createdAt FROM benchmark_runs ORDER BY created_at DESC LIMIT ?').all(limit);
|
|
154
|
+
}
|
|
155
|
+
/** Get a specific run's full report. */
|
|
156
|
+
getRun(id) {
|
|
157
|
+
const row = this.db.prepare('SELECT report FROM benchmark_runs WHERE id = ?').get(id);
|
|
158
|
+
if (!row)
|
|
159
|
+
return null;
|
|
160
|
+
try {
|
|
161
|
+
return JSON.parse(row.report);
|
|
162
|
+
}
|
|
163
|
+
catch {
|
|
164
|
+
return null;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// ── Status ──────────────────────────────────────────
|
|
168
|
+
getStatus() {
|
|
169
|
+
try {
|
|
170
|
+
const totalCases = this.db.prepare('SELECT COUNT(*) as c FROM eval_cases').get().c;
|
|
171
|
+
const totalRuns = this.db.prepare('SELECT COUNT(*) as c FROM benchmark_runs').get().c;
|
|
172
|
+
const categories = this.getCategories();
|
|
173
|
+
const lastRun = this.db.prepare('SELECT accuracy FROM benchmark_runs ORDER BY created_at DESC LIMIT 1').get();
|
|
174
|
+
const bestRun = this.db.prepare('SELECT MAX(accuracy) as best FROM benchmark_runs').get();
|
|
175
|
+
return {
|
|
176
|
+
totalCases,
|
|
177
|
+
totalRuns,
|
|
178
|
+
categories,
|
|
179
|
+
lastRunAccuracy: lastRun?.accuracy ?? null,
|
|
180
|
+
bestAccuracy: bestRun?.best ?? 0,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
return { totalCases: 0, totalRuns: 0, categories: [], lastRunAccuracy: null, bestAccuracy: 0 };
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
// ── Private ─────────────────────────────────────────
|
|
188
|
+
buildReport(name, results, durationMs) {
|
|
189
|
+
const passed = results.filter(r => r.correct).length;
|
|
190
|
+
const errored = results.filter(r => !!r.error).length;
|
|
191
|
+
const failed = results.length - passed - errored;
|
|
192
|
+
const accuracy = results.length > 0 ? passed / results.length : 0;
|
|
193
|
+
const latencies = results.map(r => r.latencyMs).sort((a, b) => a - b);
|
|
194
|
+
const avgLatency = latencies.length > 0 ? latencies.reduce((s, l) => s + l, 0) / latencies.length : 0;
|
|
195
|
+
const p50 = latencies.length > 0 ? latencies[Math.floor(latencies.length * 0.5)] : 0;
|
|
196
|
+
const p99 = latencies.length > 0 ? latencies[Math.floor(latencies.length * 0.99)] : 0;
|
|
197
|
+
// Breakdown by category
|
|
198
|
+
const byCategory = {};
|
|
199
|
+
for (const r of results) {
|
|
200
|
+
if (!byCategory[r.category])
|
|
201
|
+
byCategory[r.category] = { total: 0, passed: 0, accuracy: 0 };
|
|
202
|
+
byCategory[r.category].total++;
|
|
203
|
+
if (r.correct)
|
|
204
|
+
byCategory[r.category].passed++;
|
|
205
|
+
}
|
|
206
|
+
for (const cat of Object.values(byCategory)) {
|
|
207
|
+
cat.accuracy = cat.total > 0 ? cat.passed / cat.total : 0;
|
|
208
|
+
}
|
|
209
|
+
// Breakdown by difficulty
|
|
210
|
+
const byDifficulty = {};
|
|
211
|
+
for (const r of results) {
|
|
212
|
+
if (!byDifficulty[r.difficulty])
|
|
213
|
+
byDifficulty[r.difficulty] = { total: 0, passed: 0, accuracy: 0 };
|
|
214
|
+
byDifficulty[r.difficulty].total++;
|
|
215
|
+
if (r.correct)
|
|
216
|
+
byDifficulty[r.difficulty].passed++;
|
|
217
|
+
}
|
|
218
|
+
for (const diff of Object.values(byDifficulty)) {
|
|
219
|
+
diff.accuracy = diff.total > 0 ? diff.passed / diff.total : 0;
|
|
220
|
+
}
|
|
221
|
+
return {
|
|
222
|
+
id: `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
223
|
+
name,
|
|
224
|
+
totalCases: results.length,
|
|
225
|
+
passed,
|
|
226
|
+
failed,
|
|
227
|
+
errored,
|
|
228
|
+
accuracy,
|
|
229
|
+
avgLatencyMs: Math.round(avgLatency),
|
|
230
|
+
p50LatencyMs: p50,
|
|
231
|
+
p99LatencyMs: p99,
|
|
232
|
+
byCategory,
|
|
233
|
+
byDifficulty,
|
|
234
|
+
results,
|
|
235
|
+
durationMs,
|
|
236
|
+
createdAt: Date.now(),
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
emptyReport(name) {
|
|
240
|
+
return {
|
|
241
|
+
id: `bench-${Date.now()}-empty`,
|
|
242
|
+
name, totalCases: 0, passed: 0, failed: 0, errored: 0,
|
|
243
|
+
accuracy: 0, avgLatencyMs: 0, p50LatencyMs: 0, p99LatencyMs: 0,
|
|
244
|
+
byCategory: {}, byDifficulty: {}, results: [], durationMs: 0, createdAt: Date.now(),
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
//# sourceMappingURL=benchmark-suite.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"benchmark-suite.js","sourceRoot":"","sources":["../../src/agent-training/benchmark-suite.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAsD/C,2DAA2D;AAE3D,MAAM,UAAU,qBAAqB,CAAC,EAAqB;IACzD,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BP,CAAC,CAAC;AACL,CAAC;AAED,2DAA2D;AAE3D,MAAM,OAAO,cAAc;IASL;IARH,GAAG,GAAG,SAAS,EAAE,CAAC;IAC3B,cAAc,CAAqB;IACnC,aAAa,CAAqB;IAE1C,iFAAiF;IACjF,aAAa,GAAkB,CAAC,QAAQ,EAAE,MAAM,EAAE,EAAE,CAClD,QAAQ,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEhE,YAAoB,EAAqB;QAArB,OAAE,GAAF,EAAE,CAAmB;QACvC,qBAAqB,CAAC,EAAE,CAAC,CAAC;QAE1B,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC,OAAO,CAC9B,iGAAiG,CAClG,CAAC;QACF,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC,OAAO,CAC7B,kKAAkK,CACnK,CAAC;IACJ,CAAC;IAED,uDAAuD;IAEvD,sCAAsC;IACtC,OAAO,CAAC,CAAW;QACjB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CACpC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,UAAU,EAC7C,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CACjC,CAAC;QACF,OAAO,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;IACxC,CAAC;IAED,uCAAuC;IACvC,QAAQ,CAAC,KAAiB;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YAClC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;gBACtB,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBAChB,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC,CAAC,CAAC;QACH,EAAE,EAAE,CAAC;QACL,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+CAA+C;IAC/C,QAAQ,CAAC,MAAmD;QAC1D,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,MAAM,MAAM,GAAc,EAAE,CAAC;QAC7B,IAAI,MAAM,EAAE,QAAQ,EAAE,CAAC;YAAC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAAC,CAAC;QACxF,IAAI,MAAM,EAAE,UAAU,EAAE,CAAC;YAAC,UAAU,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;YAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;QAAC,CAAC;QAC9F,MAAM,KAAK,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC/E,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,4BAA4B,KAAK,cAAc,CAAC,CAAC,GAAG,CAAC,GAAG,MAAM,CAAe,CAAC;IACvG,CAAC;IAED,+BAA+B;IAC/B,aAAa;QACX,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,4DAA4D,CAAC,CAAC,GAAG,EAAiC,CAAC;QAChI,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,2BAA2B;IAC3B,UAAU,CAAC,EAAU;QACnB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,qCAAqC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC;IACpF,CAAC;IAED,uBAAuB;IACvB,UAAU;QACR,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,wBAAwB,CAAC,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC;IACjE,CAAC;IAED,uDAAuD;IAEvD,4DAA4D;IAC5D,KAAK,CAAC,GAAG,CACP,MAAoB,EACpB,OAAmE;QAEnE,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,aAAa,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC,CAAC;QAE9F,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAChC,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAiB,EAAE,CAAC;QAEjC,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;YACtB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC7B,IAAI,MAAM,GAAG,EAAE,CAAC;YAChB,IAAI,KAAyB,CAAC;YAC9B,IAAI,OAAO,GAAG,KAAK,CAAC;YAEpB,IAAI,CAAC;gBACH,MAAM,GAAG,MAAM,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;gBAC/B,OAAO,GAAG,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YACnD,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,KAAK,GAAI,CAAW,CAAC,OAAO,CAAC;YAC/B,CAAC;YAED,OAAO,CAAC,IAAI,CAAC;gBACX,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC;gBACjB,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,MAAM;gBACN,OAAO;gBACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;gBACjC,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBACpB,UAAU,EAAE,CAAC,CAAC,UAAU;gBACxB,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QAED,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC;QAE3D,cAAc;QACd,IAAI,CAAC;YACH,IAAI,CAAC,aAAa,CAAC,GAAG,CACpB,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,UAAU,EACzC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,OAAO,EAC5C,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,UAAU,EACvD,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CACvB,CAAC;QACJ,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,2CAA4C,CAAW,CAAC,OAAO,EAAE,CAAC,CAAC;QACnF,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,uDAAuD;IAEvD,+BAA+B;IAC/B,UAAU,CAAC,KAAK,GAAG,EAAE;QACnB,OAAO,IAAI,CAAC,EAAE,CAAC,OAAO,CACpB,+JAA+J,CAChK,CAAC,GAAG,CAAC,KAAK,CAAqH,CAAC;IACnI,CAAC;IAED,wCAAwC;IACxC,MAAM,CAAC,EAAU;QACf,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,gDAAgD,CAAC,CAAC,GAAG,CAAC,EAAE,CAAmC,CAAC;QACxH,IAAI,CAAC,GAAG;YAAE,OAAO,IAAI,CAAC;QACtB,IAAI,CAAC;YAAC,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAAC,CAAC;QAAC,MAAM,CAAC;YAAC,OAAO,IAAI,CAAC;QAAC,CAAC;IAC/D,CAAC;IAED,uDAAuD;IAEvD,SAAS;QACP,IAAI,CAAC;YACH,MAAM,UAAU,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,EAAoB,CAAC,CAAC,CAAC;YACtG,MAAM,SAAS,GAAI,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,EAAoB,CAAC,CAAC,CAAC;YACzG,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,CAAC;YACxC,MAAM,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sEAAsE,CAAC,CAAC,GAAG,EAAsC,CAAC;YAClJ,MAAM,OAAO,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC,GAAG,EAAkC,CAAC;YAE1H,OAAO;gBACL,UAAU;gBACV,SAAS;gBACT,UAAU;gBACV,eAAe,EAAE,OAAO,EAAE,QAAQ,IAAI,IAAI;gBAC1C,YAAY,EAAE,OAAO,EAAE,IAAI,IAAI,CAAC;aACjC,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,eAAe,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;QACjG,CAAC;IACH,CAAC;IAED,uDAAuD;IAE/C,WAAW,CAAC,IAAY,EAAE,OAAqB,EAAE,UAAkB;QACzE,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;QACrD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC;QACtD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,MAAM,GAAG,OAAO,CAAC;QACjD,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAElE,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtE,MAAM,UAAU,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QACtG,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrF,MAAM,GAAG,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtF,wBAAwB;QACxB,MAAM,UAAU,GAAwE,EAAE,CAAC;QAC3F,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;gBAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;YAC3F,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC;YAC/B,IAAI,CAAC,CAAC,OAAO;gBAAE,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE,CAAC;QACjD,CAAC;QACD,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5C,GAAG,CAAC,QAAQ,GAAG,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,0BAA0B;QAC1B,MAAM,YAAY,GAAwE,EAAE,CAAC;QAC7F,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC;gBAAE,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;YACnG,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,EAAE,CAAC;YACnC,IAAI,CAAC,CAAC,OAAO;gBAAE,YAAY,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,MAAM,EAAE,CAAC;QACrD,CAAC;QACD,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC;YAC/C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;QAChE,CAAC;QAED,OAAO;YACL,EAAE,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE;YACnE,IAAI;YACJ,UAAU,EAAE,OAAO,CAAC,MAAM;YAC1B,MAAM;YACN,MAAM;YACN,OAAO;YACP,QAAQ;YACR,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC;YACpC,YAAY,EAAE,GAAG;YACjB,YAAY,EAAE,GAAG;YACjB,UAAU;YACV,YAAY;YACZ,OAAO;YACP,UAAU;YACV,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACtB,CAAC;IACJ,CAAC;IAEO,WAAW,CAAC,IAAY;QAC9B,OAAO;YACL,EAAE,EAAE,SAAS,IAAI,CAAC,GAAG,EAAE,QAAQ;YAC/B,IAAI,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC;YACrD,QAAQ,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC;YAC9D,UAAU,EAAE,EAAE,EAAE,YAAY,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;SACpF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { BenchmarkSuite, runBenchmarkMigration } from './benchmark-suite.js';
|
|
2
|
+
export type { EvalCase, EvalResult, BenchmarkReport, BenchmarkSuiteStatus, EvalFunction, ScoreFunction, } from './benchmark-suite.js';
|
|
3
|
+
export { AgentTrainer, runTrainerMigration } from './agent-trainer.js';
|
|
4
|
+
export type { TrainingConfig, EpochResult, TrainingReport, AgentTrainerStatus, } from './agent-trainer.js';
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/agent-training/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,sBAAsB,CAAC;AAM7E,OAAO,EAAE,YAAY,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -241,3 +241,9 @@ export { CheckpointManager, runCheckpointMigration } from './checkpoint/index.js
|
|
|
241
241
|
export type { Checkpoint, CheckpointSummary, CheckpointManagerStatus } from './checkpoint/index.js';
|
|
242
242
|
export { TraceCollector, runTraceMigration } from './observability/index.js';
|
|
243
243
|
export type { Trace, Span, TraceTree, TraceStats, TraceListOptions, TraceCollectorStatus } from './observability/index.js';
|
|
244
|
+
export { MessageRouter, TelegramBot, DiscordBot } from './messaging/index.js';
|
|
245
|
+
export type { IncomingMessage, OutgoingResponse, MessageRouterConfig, MessageRouterStatus, TelegramBotConfig, TelegramBotStatus, DiscordBotConfig, DiscordBotStatus, } from './messaging/index.js';
|
|
246
|
+
export { BenchmarkSuite, runBenchmarkMigration } from './agent-training/index.js';
|
|
247
|
+
export type { EvalCase, EvalResult, BenchmarkReport, BenchmarkSuiteStatus, EvalFunction, ScoreFunction, } from './agent-training/index.js';
|
|
248
|
+
export { AgentTrainer, runTrainerMigration } from './agent-training/index.js';
|
|
249
|
+
export type { TrainingConfig, EpochResult, TrainingReport, AgentTrainerStatus, } from './agent-training/index.js';
|
package/dist/index.js
CHANGED
|
@@ -209,4 +209,9 @@ export { ActiveLearner, runActiveLearningMigration } from './active-learning/ind
|
|
|
209
209
|
export { CheckpointManager, runCheckpointMigration } from './checkpoint/index.js';
|
|
210
210
|
// ── Observability / Tracing ─────────────────────────────────
|
|
211
211
|
export { TraceCollector, runTraceMigration } from './observability/index.js';
|
|
212
|
+
// ── Messaging (Bidirectional Bots) ──────────────────────────
|
|
213
|
+
export { MessageRouter, TelegramBot, DiscordBot } from './messaging/index.js';
|
|
214
|
+
// ── Agent Training ──────────────────────────────────────────
|
|
215
|
+
export { BenchmarkSuite, runBenchmarkMigration } from './agent-training/index.js';
|
|
216
|
+
export { AgentTrainer, runTrainerMigration } from './agent-training/index.js';
|
|
212
217
|
//# sourceMappingURL=index.js.map
|