@cogitator-ai/core 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +920 -15
- package/dist/__tests__/agent.test.js +2 -2
- package/dist/__tests__/agent.test.js.map +1 -1
- package/dist/__tests__/base64.test.js +1 -1
- package/dist/__tests__/base64.test.js.map +1 -1
- package/dist/__tests__/calculator.test.js +1 -1
- package/dist/__tests__/calculator.test.js.map +1 -1
- package/dist/__tests__/cogitator-memory.test.js +2 -2
- package/dist/__tests__/cogitator-memory.test.js.map +1 -1
- package/dist/__tests__/datetime.test.js +1 -1
- package/dist/__tests__/datetime.test.js.map +1 -1
- package/dist/__tests__/exec.test.js +1 -1
- package/dist/__tests__/exec.test.js.map +1 -1
- package/dist/__tests__/filesystem.test.js +1 -1
- package/dist/__tests__/filesystem.test.js.map +1 -1
- package/dist/__tests__/google-backend.test.js +1 -1
- package/dist/__tests__/google-backend.test.js.map +1 -1
- package/dist/__tests__/hash.test.js +1 -1
- package/dist/__tests__/hash.test.js.map +1 -1
- package/dist/__tests__/http.test.js +1 -1
- package/dist/__tests__/http.test.js.map +1 -1
- package/dist/__tests__/json.test.js +1 -1
- package/dist/__tests__/json.test.js.map +1 -1
- package/dist/__tests__/logger.test.js +1 -1
- package/dist/__tests__/logger.test.js.map +1 -1
- package/dist/__tests__/random.test.js +1 -1
- package/dist/__tests__/random.test.js.map +1 -1
- package/dist/__tests__/regex.test.js +1 -1
- package/dist/__tests__/regex.test.js.map +1 -1
- package/dist/__tests__/registry.test.js +2 -2
- package/dist/__tests__/registry.test.js.map +1 -1
- package/dist/__tests__/sleep.test.js +1 -1
- package/dist/__tests__/sleep.test.js.map +1 -1
- package/dist/__tests__/tool.test.js +1 -1
- package/dist/__tests__/tool.test.js.map +1 -1
- package/dist/__tests__/uuid.test.js +1 -1
- package/dist/__tests__/uuid.test.js.map +1 -1
- package/dist/cogitator.d.ts +46 -1
- package/dist/cogitator.d.ts.map +1 -1
- package/dist/cogitator.js +274 -17
- package/dist/cogitator.js.map +1 -1
- package/dist/constitutional/constitution.d.ts +9 -0
- package/dist/constitutional/constitution.d.ts.map +1 -0
- package/dist/constitutional/constitution.js +215 -0
- package/dist/constitutional/constitution.js.map +1 -0
- package/dist/constitutional/constitutional-ai.d.ts +36 -0
- package/dist/constitutional/constitutional-ai.d.ts.map +1 -0
- package/dist/constitutional/constitutional-ai.js +163 -0
- package/dist/constitutional/constitutional-ai.js.map +1 -0
- package/dist/constitutional/critique-reviser.d.ts +20 -0
- package/dist/constitutional/critique-reviser.d.ts.map +1 -0
- package/dist/constitutional/critique-reviser.js +98 -0
- package/dist/constitutional/critique-reviser.js.map +1 -0
- package/dist/constitutional/index.d.ts +13 -0
- package/dist/constitutional/index.d.ts.map +1 -0
- package/dist/constitutional/index.js +8 -0
- package/dist/constitutional/index.js.map +1 -0
- package/dist/constitutional/input-filter.d.ts +19 -0
- package/dist/constitutional/input-filter.d.ts.map +1 -0
- package/dist/constitutional/input-filter.js +88 -0
- package/dist/constitutional/input-filter.js.map +1 -0
- package/dist/constitutional/output-filter.d.ts +19 -0
- package/dist/constitutional/output-filter.d.ts.map +1 -0
- package/dist/constitutional/output-filter.js +86 -0
- package/dist/constitutional/output-filter.js.map +1 -0
- package/dist/constitutional/prompts.d.ts +11 -0
- package/dist/constitutional/prompts.d.ts.map +1 -0
- package/dist/constitutional/prompts.js +202 -0
- package/dist/constitutional/prompts.js.map +1 -0
- package/dist/constitutional/tool-guard.d.ts +18 -0
- package/dist/constitutional/tool-guard.d.ts.map +1 -0
- package/dist/constitutional/tool-guard.js +125 -0
- package/dist/constitutional/tool-guard.js.map +1 -0
- package/dist/cost-routing/budget-enforcer.d.ts +26 -0
- package/dist/cost-routing/budget-enforcer.d.ts.map +1 -0
- package/dist/cost-routing/budget-enforcer.js +86 -0
- package/dist/cost-routing/budget-enforcer.js.map +1 -0
- package/dist/cost-routing/cost-router.d.ts +34 -0
- package/dist/cost-routing/cost-router.d.ts.map +1 -0
- package/dist/cost-routing/cost-router.js +80 -0
- package/dist/cost-routing/cost-router.js.map +1 -0
- package/dist/cost-routing/cost-tracker.d.ts +20 -0
- package/dist/cost-routing/cost-tracker.d.ts.map +1 -0
- package/dist/cost-routing/cost-tracker.js +85 -0
- package/dist/cost-routing/cost-tracker.js.map +1 -0
- package/dist/cost-routing/index.d.ts +6 -0
- package/dist/cost-routing/index.d.ts.map +1 -0
- package/dist/cost-routing/index.js +6 -0
- package/dist/cost-routing/index.js.map +1 -0
- package/dist/cost-routing/model-selector.d.ts +15 -0
- package/dist/cost-routing/model-selector.d.ts.map +1 -0
- package/dist/cost-routing/model-selector.js +216 -0
- package/dist/cost-routing/model-selector.js.map +1 -0
- package/dist/cost-routing/task-analyzer.d.ts +13 -0
- package/dist/cost-routing/task-analyzer.d.ts.map +1 -0
- package/dist/cost-routing/task-analyzer.js +185 -0
- package/dist/cost-routing/task-analyzer.js.map +1 -0
- package/dist/index.d.ts +19 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/learning/ab-testing.d.ts +45 -0
- package/dist/learning/ab-testing.d.ts.map +1 -0
- package/dist/learning/ab-testing.js +267 -0
- package/dist/learning/ab-testing.js.map +1 -0
- package/dist/learning/agent-optimizer.d.ts +42 -0
- package/dist/learning/agent-optimizer.d.ts.map +1 -0
- package/dist/learning/agent-optimizer.js +273 -0
- package/dist/learning/agent-optimizer.js.map +1 -0
- package/dist/learning/auto-optimizer.d.ts +38 -0
- package/dist/learning/auto-optimizer.d.ts.map +1 -0
- package/dist/learning/auto-optimizer.js +229 -0
- package/dist/learning/auto-optimizer.js.map +1 -0
- package/dist/learning/demo-selector.d.ts +29 -0
- package/dist/learning/demo-selector.d.ts.map +1 -0
- package/dist/learning/demo-selector.js +235 -0
- package/dist/learning/demo-selector.js.map +1 -0
- package/dist/learning/index.d.ts +24 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +13 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/instruction-optimizer.d.ts +29 -0
- package/dist/learning/instruction-optimizer.d.ts.map +1 -0
- package/dist/learning/instruction-optimizer.js +175 -0
- package/dist/learning/instruction-optimizer.js.map +1 -0
- package/dist/learning/metrics.d.ts +37 -0
- package/dist/learning/metrics.d.ts.map +1 -0
- package/dist/learning/metrics.js +310 -0
- package/dist/learning/metrics.js.map +1 -0
- package/dist/learning/postgres-trace-store.d.ts +53 -0
- package/dist/learning/postgres-trace-store.d.ts.map +1 -0
- package/dist/learning/postgres-trace-store.js +692 -0
- package/dist/learning/postgres-trace-store.js.map +1 -0
- package/dist/learning/prompt-logger.d.ts +29 -0
- package/dist/learning/prompt-logger.d.ts.map +1 -0
- package/dist/learning/prompt-logger.js +157 -0
- package/dist/learning/prompt-logger.js.map +1 -0
- package/dist/learning/prompt-monitor.d.ts +29 -0
- package/dist/learning/prompt-monitor.d.ts.map +1 -0
- package/dist/learning/prompt-monitor.js +243 -0
- package/dist/learning/prompt-monitor.js.map +1 -0
- package/dist/learning/prompts.d.ts +28 -0
- package/dist/learning/prompts.d.ts.map +1 -0
- package/dist/learning/prompts.js +195 -0
- package/dist/learning/prompts.js.map +1 -0
- package/dist/learning/rollback-manager.d.ts +36 -0
- package/dist/learning/rollback-manager.d.ts.map +1 -0
- package/dist/learning/rollback-manager.js +177 -0
- package/dist/learning/rollback-manager.js.map +1 -0
- package/dist/learning/trace-store.d.ts +26 -0
- package/dist/learning/trace-store.d.ts.map +1 -0
- package/dist/learning/trace-store.js +218 -0
- package/dist/learning/trace-store.js.map +1 -0
- package/dist/llm/google.d.ts.map +1 -1
- package/dist/llm/google.js +1 -2
- package/dist/llm/google.js.map +1 -1
- package/dist/reasoning/branch-evaluator.d.ts +28 -0
- package/dist/reasoning/branch-evaluator.d.ts.map +1 -0
- package/dist/reasoning/branch-evaluator.js +143 -0
- package/dist/reasoning/branch-evaluator.js.map +1 -0
- package/dist/reasoning/branch-generator.d.ts +9 -0
- package/dist/reasoning/branch-generator.d.ts.map +1 -0
- package/dist/reasoning/branch-generator.js +60 -0
- package/dist/reasoning/branch-generator.js.map +1 -0
- package/dist/reasoning/index.d.ts +5 -0
- package/dist/reasoning/index.d.ts.map +1 -0
- package/dist/reasoning/index.js +5 -0
- package/dist/reasoning/index.js.map +1 -0
- package/dist/reasoning/prompts.d.ts +19 -0
- package/dist/reasoning/prompts.d.ts.map +1 -0
- package/dist/reasoning/prompts.js +161 -0
- package/dist/reasoning/prompts.js.map +1 -0
- package/dist/reasoning/thought-tree.d.ts +32 -0
- package/dist/reasoning/thought-tree.d.ts.map +1 -0
- package/dist/reasoning/thought-tree.js +352 -0
- package/dist/reasoning/thought-tree.js.map +1 -0
- package/dist/reflection/index.d.ts +4 -0
- package/dist/reflection/index.d.ts.map +1 -0
- package/dist/reflection/index.js +4 -0
- package/dist/reflection/index.js.map +1 -0
- package/dist/reflection/insight-store.d.ts +19 -0
- package/dist/reflection/insight-store.d.ts.map +1 -0
- package/dist/reflection/insight-store.js +129 -0
- package/dist/reflection/insight-store.js.map +1 -0
- package/dist/reflection/prompts.d.ts +18 -0
- package/dist/reflection/prompts.d.ts.map +1 -0
- package/dist/reflection/prompts.js +157 -0
- package/dist/reflection/prompts.js.map +1 -0
- package/dist/reflection/reflection-engine.d.ts +25 -0
- package/dist/reflection/reflection-engine.d.ts.map +1 -0
- package/dist/reflection/reflection-engine.js +202 -0
- package/dist/reflection/reflection-engine.js.map +1 -0
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +3 -0
- package/dist/registry.js.map +1 -1
- package/dist/time-travel/checkpoint-store.d.ts +34 -0
- package/dist/time-travel/checkpoint-store.d.ts.map +1 -0
- package/dist/time-travel/checkpoint-store.js +240 -0
- package/dist/time-travel/checkpoint-store.js.map +1 -0
- package/dist/time-travel/comparator.d.ts +26 -0
- package/dist/time-travel/comparator.d.ts.map +1 -0
- package/dist/time-travel/comparator.js +253 -0
- package/dist/time-travel/comparator.js.map +1 -0
- package/dist/time-travel/forker.d.ts +22 -0
- package/dist/time-travel/forker.d.ts.map +1 -0
- package/dist/time-travel/forker.js +118 -0
- package/dist/time-travel/forker.js.map +1 -0
- package/dist/time-travel/index.d.ts +6 -0
- package/dist/time-travel/index.d.ts.map +1 -0
- package/dist/time-travel/index.js +6 -0
- package/dist/time-travel/index.js.map +1 -0
- package/dist/time-travel/replayer.d.ts +20 -0
- package/dist/time-travel/replayer.d.ts.map +1 -0
- package/dist/time-travel/replayer.js +147 -0
- package/dist/time-travel/replayer.js.map +1 -0
- package/dist/time-travel/time-travel.d.ts +41 -0
- package/dist/time-travel/time-travel.d.ts.map +1 -0
- package/dist/time-travel/time-travel.js +127 -0
- package/dist/time-travel/time-travel.js.map +1 -0
- package/dist/tool.d.ts.map +1 -1
- package/dist/tool.js +2 -0
- package/dist/tool.js.map +1 -1
- package/dist/tools/base64.d.ts.map +1 -1
- package/dist/tools/base64.js +2 -8
- package/dist/tools/base64.js.map +1 -1
- package/dist/tools/datetime.d.ts.map +1 -1
- package/dist/tools/datetime.js.map +1 -1
- package/dist/tools/exec.d.ts.map +1 -1
- package/dist/tools/exec.js +1 -4
- package/dist/tools/exec.js.map +1 -1
- package/dist/tools/filesystem.d.ts.map +1 -1
- package/dist/tools/filesystem.js +4 -1
- package/dist/tools/filesystem.js.map +1 -1
- package/dist/tools/hash.d.ts.map +1 -1
- package/dist/tools/hash.js +1 -4
- package/dist/tools/hash.js.map +1 -1
- package/dist/tools/http.d.ts.map +1 -1
- package/dist/tools/http.js +1 -4
- package/dist/tools/http.js.map +1 -1
- package/dist/tools/regex.d.ts.map +1 -1
- package/dist/tools/regex.js +4 -1
- package/dist/tools/regex.js.map +1 -1
- package/dist/utils/circuit-breaker.d.ts.map +1 -1
- package/dist/utils/circuit-breaker.js.map +1 -1
- package/dist/utils/fallback.d.ts.map +1 -1
- package/dist/utils/fallback.js +1 -4
- package/dist/utils/fallback.js.map +1 -1
- package/dist/utils/retry.d.ts.map +1 -1
- package/dist/utils/retry.js +8 -13
- package/dist/utils/retry.js.map +1 -1
- package/package.json +17 -8
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
const DEFAULT_CONFIG = {
|
|
2
|
+
defaultConfidenceLevel: 0.95,
|
|
3
|
+
defaultMinSampleSize: 50,
|
|
4
|
+
defaultMaxDuration: 7 * 24 * 60 * 60 * 1000,
|
|
5
|
+
autoDeployWinner: false,
|
|
6
|
+
};
|
|
7
|
+
export class ABTestingFramework {
|
|
8
|
+
store;
|
|
9
|
+
config;
|
|
10
|
+
activeTests = new Map();
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.store = config.store;
|
|
13
|
+
this.config = {
|
|
14
|
+
defaultConfidenceLevel: config.defaultConfidenceLevel ?? DEFAULT_CONFIG.defaultConfidenceLevel,
|
|
15
|
+
defaultMinSampleSize: config.defaultMinSampleSize ?? DEFAULT_CONFIG.defaultMinSampleSize,
|
|
16
|
+
defaultMaxDuration: config.defaultMaxDuration ?? DEFAULT_CONFIG.defaultMaxDuration,
|
|
17
|
+
autoDeployWinner: config.autoDeployWinner ?? DEFAULT_CONFIG.autoDeployWinner,
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
async createTest(params) {
|
|
21
|
+
const test = await this.store.create({
|
|
22
|
+
agentId: params.agentId,
|
|
23
|
+
name: params.name,
|
|
24
|
+
description: params.description,
|
|
25
|
+
status: 'draft',
|
|
26
|
+
controlInstructions: params.controlInstructions,
|
|
27
|
+
treatmentInstructions: params.treatmentInstructions,
|
|
28
|
+
treatmentAllocation: params.treatmentAllocation ?? 0.5,
|
|
29
|
+
minSampleSize: params.minSampleSize ?? this.config.defaultMinSampleSize,
|
|
30
|
+
maxDuration: params.maxDuration ?? this.config.defaultMaxDuration,
|
|
31
|
+
confidenceLevel: params.confidenceLevel ?? this.config.defaultConfidenceLevel,
|
|
32
|
+
metricToOptimize: params.metricToOptimize ?? 'score',
|
|
33
|
+
controlResults: this.emptyResults(),
|
|
34
|
+
treatmentResults: this.emptyResults(),
|
|
35
|
+
});
|
|
36
|
+
return test;
|
|
37
|
+
}
|
|
38
|
+
async startTest(testId) {
|
|
39
|
+
const test = await this.store.update(testId, {
|
|
40
|
+
status: 'running',
|
|
41
|
+
startedAt: new Date(),
|
|
42
|
+
});
|
|
43
|
+
this.activeTests.set(test.agentId, test);
|
|
44
|
+
return test;
|
|
45
|
+
}
|
|
46
|
+
async pauseTest(testId) {
|
|
47
|
+
const test = await this.store.update(testId, {
|
|
48
|
+
status: 'paused',
|
|
49
|
+
});
|
|
50
|
+
this.activeTests.delete(test.agentId);
|
|
51
|
+
return test;
|
|
52
|
+
}
|
|
53
|
+
async resumeTest(testId) {
|
|
54
|
+
const test = await this.store.update(testId, {
|
|
55
|
+
status: 'running',
|
|
56
|
+
});
|
|
57
|
+
this.activeTests.set(test.agentId, test);
|
|
58
|
+
return test;
|
|
59
|
+
}
|
|
60
|
+
async completeTest(testId) {
|
|
61
|
+
const test = await this.store.update(testId, {
|
|
62
|
+
status: 'completed',
|
|
63
|
+
completedAt: new Date(),
|
|
64
|
+
});
|
|
65
|
+
this.activeTests.delete(test.agentId);
|
|
66
|
+
const outcome = this.analyzeResults(test);
|
|
67
|
+
return { test, outcome };
|
|
68
|
+
}
|
|
69
|
+
async cancelTest(testId) {
|
|
70
|
+
const test = await this.store.update(testId, {
|
|
71
|
+
status: 'cancelled',
|
|
72
|
+
completedAt: new Date(),
|
|
73
|
+
});
|
|
74
|
+
this.activeTests.delete(test.agentId);
|
|
75
|
+
return test;
|
|
76
|
+
}
|
|
77
|
+
async getActiveTest(agentId) {
|
|
78
|
+
const cached = this.activeTests.get(agentId);
|
|
79
|
+
if (cached) {
|
|
80
|
+
return cached;
|
|
81
|
+
}
|
|
82
|
+
const test = await this.store.getActive(agentId);
|
|
83
|
+
if (test) {
|
|
84
|
+
this.activeTests.set(agentId, test);
|
|
85
|
+
}
|
|
86
|
+
return test;
|
|
87
|
+
}
|
|
88
|
+
selectVariant(test) {
|
|
89
|
+
return Math.random() < test.treatmentAllocation ? 'treatment' : 'control';
|
|
90
|
+
}
|
|
91
|
+
getInstructionsForVariant(test, variant) {
|
|
92
|
+
return variant === 'treatment' ? test.treatmentInstructions : test.controlInstructions;
|
|
93
|
+
}
|
|
94
|
+
async recordResult(testId, variant, score, latency, cost) {
|
|
95
|
+
await this.store.recordResult(testId, variant, score, latency, cost);
|
|
96
|
+
const test = await this.store.get(testId);
|
|
97
|
+
if (test?.agentId) {
|
|
98
|
+
this.activeTests.set(test.agentId, test);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
async checkAndCompleteIfReady(testId) {
|
|
102
|
+
const test = await this.store.get(testId);
|
|
103
|
+
if (test?.status !== 'running') {
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
const totalSamples = test.controlResults.sampleSize + test.treatmentResults.sampleSize;
|
|
107
|
+
const minSamplesReached = totalSamples >= test.minSampleSize * 2;
|
|
108
|
+
const elapsed = test.startedAt ? Date.now() - test.startedAt.getTime() : 0;
|
|
109
|
+
const maxDurationReached = elapsed >= test.maxDuration;
|
|
110
|
+
if (minSamplesReached || maxDurationReached) {
|
|
111
|
+
const outcome = this.analyzeResults(test);
|
|
112
|
+
if (outcome.isSignificant || maxDurationReached) {
|
|
113
|
+
await this.store.update(testId, {
|
|
114
|
+
status: 'completed',
|
|
115
|
+
completedAt: new Date(),
|
|
116
|
+
});
|
|
117
|
+
this.activeTests.delete(test.agentId);
|
|
118
|
+
return outcome;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
analyzeResults(test) {
|
|
124
|
+
const control = test.controlResults;
|
|
125
|
+
const treatment = test.treatmentResults;
|
|
126
|
+
if (control.sampleSize < 2 || treatment.sampleSize < 2) {
|
|
127
|
+
return {
|
|
128
|
+
winner: null,
|
|
129
|
+
pValue: 1,
|
|
130
|
+
confidenceInterval: [0, 0],
|
|
131
|
+
effectSize: 0,
|
|
132
|
+
isSignificant: false,
|
|
133
|
+
recommendation: 'Insufficient sample size for statistical analysis',
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
const controlMean = control.avgScore;
|
|
137
|
+
const treatmentMean = treatment.avgScore;
|
|
138
|
+
const controlStd = this.calculateStd(control.scores, controlMean);
|
|
139
|
+
const treatmentStd = this.calculateStd(treatment.scores, treatmentMean);
|
|
140
|
+
const { tStatistic, degreesOfFreedom } = this.welchTTest(controlMean, treatmentMean, controlStd, treatmentStd, control.sampleSize, treatment.sampleSize);
|
|
141
|
+
const pValue = this.tDistributionPValue(Math.abs(tStatistic), degreesOfFreedom);
|
|
142
|
+
const pooledStd = Math.sqrt(((control.sampleSize - 1) * controlStd ** 2 + (treatment.sampleSize - 1) * treatmentStd ** 2) /
|
|
143
|
+
(control.sampleSize + treatment.sampleSize - 2));
|
|
144
|
+
const effectSize = pooledStd > 0 ? (treatmentMean - controlMean) / pooledStd : 0;
|
|
145
|
+
const seDiff = Math.sqrt((controlStd ** 2) / control.sampleSize + (treatmentStd ** 2) / treatment.sampleSize);
|
|
146
|
+
const tCritical = this.tCriticalValue(test.confidenceLevel, degreesOfFreedom);
|
|
147
|
+
const meanDiff = treatmentMean - controlMean;
|
|
148
|
+
const confidenceInterval = [
|
|
149
|
+
meanDiff - tCritical * seDiff,
|
|
150
|
+
meanDiff + tCritical * seDiff,
|
|
151
|
+
];
|
|
152
|
+
const alpha = 1 - test.confidenceLevel;
|
|
153
|
+
const isSignificant = pValue < alpha;
|
|
154
|
+
let winner = null;
|
|
155
|
+
let recommendation;
|
|
156
|
+
if (!isSignificant) {
|
|
157
|
+
recommendation = `No statistically significant difference detected (p=${pValue.toFixed(4)}). Consider collecting more data or accepting null hypothesis.`;
|
|
158
|
+
}
|
|
159
|
+
else if (treatmentMean > controlMean) {
|
|
160
|
+
winner = 'treatment';
|
|
161
|
+
recommendation = `Treatment performs significantly better (p=${pValue.toFixed(4)}, effect size=${effectSize.toFixed(3)}). Recommend deploying treatment variant.`;
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
winner = 'control';
|
|
165
|
+
recommendation = `Control performs significantly better (p=${pValue.toFixed(4)}, effect size=${Math.abs(effectSize).toFixed(3)}). Recommend keeping control variant.`;
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
winner,
|
|
169
|
+
pValue,
|
|
170
|
+
confidenceInterval,
|
|
171
|
+
effectSize,
|
|
172
|
+
isSignificant,
|
|
173
|
+
recommendation,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
emptyResults() {
|
|
177
|
+
return {
|
|
178
|
+
sampleSize: 0,
|
|
179
|
+
successRate: 0,
|
|
180
|
+
avgScore: 0,
|
|
181
|
+
avgLatency: 0,
|
|
182
|
+
totalCost: 0,
|
|
183
|
+
scores: [],
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
calculateStd(values, mean) {
|
|
187
|
+
if (values.length < 2)
|
|
188
|
+
return 0;
|
|
189
|
+
const squaredDiffs = values.map((v) => (v - mean) ** 2);
|
|
190
|
+
const variance = squaredDiffs.reduce((a, b) => a + b, 0) / (values.length - 1);
|
|
191
|
+
return Math.sqrt(variance);
|
|
192
|
+
}
|
|
193
|
+
welchTTest(mean1, mean2, std1, std2, n1, n2) {
|
|
194
|
+
const se1 = (std1 ** 2) / n1;
|
|
195
|
+
const se2 = (std2 ** 2) / n2;
|
|
196
|
+
const seDiff = Math.sqrt(se1 + se2);
|
|
197
|
+
const tStatistic = seDiff > 0 ? (mean1 - mean2) / seDiff : 0;
|
|
198
|
+
const numerator = (se1 + se2) ** 2;
|
|
199
|
+
const denominator = (se1 ** 2) / (n1 - 1) + (se2 ** 2) / (n2 - 1);
|
|
200
|
+
const degreesOfFreedom = denominator > 0 ? numerator / denominator : 1;
|
|
201
|
+
return { tStatistic, degreesOfFreedom };
|
|
202
|
+
}
|
|
203
|
+
tDistributionPValue(tStatistic, df) {
|
|
204
|
+
const x = df / (df + tStatistic ** 2);
|
|
205
|
+
const beta = this.incompleteBeta(x, df / 2, 0.5);
|
|
206
|
+
return beta;
|
|
207
|
+
}
|
|
208
|
+
incompleteBeta(x, a, b) {
|
|
209
|
+
if (x === 0)
|
|
210
|
+
return 0;
|
|
211
|
+
if (x === 1)
|
|
212
|
+
return 1;
|
|
213
|
+
const maxIterations = 200;
|
|
214
|
+
const epsilon = 1e-10;
|
|
215
|
+
let sum = 0;
|
|
216
|
+
let term = 1;
|
|
217
|
+
for (let n = 0; n < maxIterations; n++) {
|
|
218
|
+
const coeff = this.gamma(a + b + n) / (this.gamma(a + 1 + n) * this.gamma(b));
|
|
219
|
+
term = coeff * Math.pow(x, a + n) * Math.pow(1 - x, b - 1);
|
|
220
|
+
if (n > 0 && Math.abs(term) < epsilon)
|
|
221
|
+
break;
|
|
222
|
+
sum += term;
|
|
223
|
+
}
|
|
224
|
+
return (sum * this.gamma(a + b)) / (this.gamma(a) * this.gamma(b));
|
|
225
|
+
}
|
|
226
|
+
gamma(z) {
|
|
227
|
+
if (z < 0.5) {
|
|
228
|
+
return Math.PI / (Math.sin(Math.PI * z) * this.gamma(1 - z));
|
|
229
|
+
}
|
|
230
|
+
z -= 1;
|
|
231
|
+
const g = 7;
|
|
232
|
+
const c = [
|
|
233
|
+
0.99999999999980993, 676.5203681218851, -1259.1392167224028, 771.32342877765313,
|
|
234
|
+
-176.61502916214059, 12.507343278686905, -0.13857109526572012, 9.9843695780195716e-6,
|
|
235
|
+
1.5056327351493116e-7,
|
|
236
|
+
];
|
|
237
|
+
let x = c[0];
|
|
238
|
+
for (let i = 1; i < g + 2; i++) {
|
|
239
|
+
x += c[i] / (z + i);
|
|
240
|
+
}
|
|
241
|
+
const t = z + g + 0.5;
|
|
242
|
+
return Math.sqrt(2 * Math.PI) * Math.pow(t, z + 0.5) * Math.exp(-t) * x;
|
|
243
|
+
}
|
|
244
|
+
tCriticalValue(confidenceLevel, df) {
|
|
245
|
+
const alpha = 1 - confidenceLevel;
|
|
246
|
+
const p = 1 - alpha / 2;
|
|
247
|
+
let low = 0;
|
|
248
|
+
let high = 10;
|
|
249
|
+
while (high - low > 0.0001) {
|
|
250
|
+
const mid = (low + high) / 2;
|
|
251
|
+
const cdf = this.tCDF(mid, df);
|
|
252
|
+
if (cdf < p) {
|
|
253
|
+
low = mid;
|
|
254
|
+
}
|
|
255
|
+
else {
|
|
256
|
+
high = mid;
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return (low + high) / 2;
|
|
260
|
+
}
|
|
261
|
+
tCDF(t, df) {
|
|
262
|
+
const x = df / (df + t ** 2);
|
|
263
|
+
const beta = this.incompleteBeta(x, df / 2, 0.5);
|
|
264
|
+
return t >= 0 ? 1 - beta / 2 : beta / 2;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
//# sourceMappingURL=ab-testing.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ab-testing.js","sourceRoot":"","sources":["../../src/learning/ab-testing.ts"],"names":[],"mappings":"AAoBA,MAAM,cAAc,GAA4B;IAC9C,sBAAsB,EAAE,IAAI;IAC5B,oBAAoB,EAAE,EAAE;IACxB,kBAAkB,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,IAAI;IAC3C,gBAAgB,EAAE,KAAK;CACxB,CAAC;AAEF,MAAM,OAAO,kBAAkB;IACrB,KAAK,CAAc;IACnB,MAAM,CAA0B;IAChC,WAAW,GAAG,IAAI,GAAG,EAAkB,CAAC;IAEhD,YAAY,MAAgC;QAC1C,IAAI,CAAC,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC;QAC1B,IAAI,CAAC,MAAM,GAAG;YACZ,sBAAsB,EAAE,MAAM,CAAC,sBAAsB,IAAI,cAAc,CAAC,sBAAsB;YAC9F,oBAAoB,EAAE,MAAM,CAAC,oBAAoB,IAAI,cAAc,CAAC,oBAAoB;YACxF,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,cAAc,CAAC,kBAAkB;YAClF,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,cAAc,CAAC,gBAAgB;SAC7E,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAWhB;QACC,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC;YACnC,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,WAAW,EAAE,MAAM,CAAC,WAAW;YAC/B,MAAM,EAAE,OAAO;YACf,mBAAmB,EAAE,MAAM,CAAC,mBAAmB;YAC/C,qBAAqB,EAAE,MAAM,CAAC,qBAAqB;YACnD,mBAAmB,EAAE,MAAM,CAAC,mBAAmB,IAAI,GAAG;YACtD,aAAa,EAAE,MAAM,CAAC,aAAa,IAAI,IAAI,CAAC,MAAM,CAAC,oBAAoB;YACvE,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,kBAAkB;YACjE,eAAe,EAAE,MAAM,CAAC,eAAe,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB;YAC7E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,OAAO;YACpD,cAAc,EAAE,IAAI,CAAC,YAAY,EAAE;YACnC,gBAAgB,EAAE,IAAI,CAAC,YAAY,EAAE;SACtC,CAAC,CAAC;QAEH,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAc;QAC5B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;YAC3C,MAAM,EAAE,SAAS;YACjB,SAAS,EAAE,IAAI,IAAI,EAAE;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACzC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAc;QAC5B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;YAC3C,MAAM,EAAE,QAAQ;SACjB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAc;QAC7B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;YAC3C,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACzC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;YAC3C,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,IAAI,IAAI,EAAE;SACxB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAEtC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAE1C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,MAAc;QAC7B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;YAC3C,MAAM,EAAE,WAAW;YACnB,WAAW,EAAE,IAAI,IAAI,EAAE;SACxB,CAAC,CAAC;QAEH,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,OAAe;QACjC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7C,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QACjD,IAAI,IAAI,EAAE,CAAC;YACT,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QACtC,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,aAAa,CAAC,IAAY;QACxB,OAAO,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC;IAC5E,CAAC;IAED,yBAAyB,CAAC,IAAY,EAAE,OAAsB;QAC5D,OAAO,OAAO,KAAK,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC,IAAI,CAAC,mBAAmB,CAAC;IACzF,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,MAAc,EACd,OAAsB,EACtB,KAAa,EACb,OAAe,EACf,IAAY;QAEZ,MAAM,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;QAErE,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,IAAI,EAAE,OAAO,EAAE,CAAC;YAClB,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,uBAAuB,CAAC,MAAc;QAC1C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAC1C,IAAI,IAAI,EAAE,MAAM,KAAK,SAAS,EAAE,CAAC;YAC/B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,YAAY,GAAG,IAAI,CAAC,cAAc,CAAC,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,UAAU,CAAC;QACvF,MAAM,iBAAiB,GAAG,YAAY,IAAI,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC;QAEjE,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3E,MAAM,kBAAkB,GAAG,OAAO,IAAI,IAAI,CAAC,WAAW,CAAC;QAEvD,IAAI,iBAAiB,IAAI,kBAAkB,EAAE,CAAC;YAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;YAE1C,IAAI,OAAO,CAAC,aAAa,IAAI,kBAAkB,EAAE,CAAC;gBAChD,MAAM,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE;oBAC9B,MAAM,EAAE,WAAW;oBACnB,WAAW,EAAE,IAAI,IAAI,EAAE;iBACxB,CAAC,CAAC;gBACH,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACtC,OAAO,OAAO,CAAC;YACjB,CAAC;QACH,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED,cAAc,CAAC,IAAY;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,gBAAgB,CAAC;QAExC,IAAI,OAAO,CAAC,UAAU,GAAG,CAAC,IAAI,SAAS,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO;gBACL,MAAM,EAAE,IAAI;gBACZ,MAAM,EAAE,CAAC;gBACT,kBAAkB,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC;gBAC1B,UAAU,EAAE,CAAC;gBACb,aAAa,EAAE,KAAK;gBACpB,cAAc,EAAE,mDAAmD;aACpE,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,OAAO,CAAC,QAAQ,CAAC;QACrC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC;QACzC,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QAClE,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAExE,MAAM,EAAE,UAAU,EAAE,gBAAgB,EAAE,GAAG,IAAI,CAAC,UAAU,CACtD,WAAW,EACX,aAAa,EACb,UAAU,EACV,YAAY,EACZ,OAAO,CAAC,UAAU,EAClB,SAAS,CAAC,UAAU,CACrB,CAAC;QAEF,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,gBAAgB,CAAC,CAAC;QAEhF,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CACzB,CAAC,CAAC,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,UAAU,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,UAAU,GAAG,CAAC,CAAC,GAAG,YAAY,IAAI,CAAC,CAAC;YAC3F,CAAC,OAAO,CAAC,UAAU,GAAG,SAAS,CAAC,UAAU,GAAG,CAAC,CAAC,CAClD,CAAC;QACF,MAAM,UAAU,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,GAAG,WAAW,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;QAEjF,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CACtB,CAAC,UAAU,IAAI,CAAC,CAAC,GAAG,OAAO,CAAC,UAAU,GAAG,CAAC,YAAY,IAAI,CAAC,CAAC,GAAG,SAAS,CAAC,UAAU,CACpF,CAAC;QACF,MAAM,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,eAAe,EAAE,gBAAgB,CAAC,CAAC;QAC9E,MAAM,QAAQ,GAAG,aAAa,GAAG,WAAW,CAAC;QAC7C,MAAM,kBAAkB,GAAqB;YAC3C,QAAQ,GAAG,SAAS,GAAG,MAAM;YAC7B,QAAQ,GAAG,SAAS,GAAG,MAAM;SAC9B,CAAC;QAEF,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,eAAe,CAAC;QACvC,MAAM,aAAa,GAAG,MAAM,GAAG,KAAK,CAAC;QAErC,IAAI,MAAM,GAAyB,IAAI,CAAC;QACxC,IAAI,cAAsB,CAAC;QAE3B,IAAI,CAAC,aAAa,EAAE,CAAC;YACnB,cAAc,GAAG,uDAAuD,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,gEAAgE,CAAC;QAC5J,CAAC;aAAM,IAAI,aAAa,GAAG,WAAW,EAAE,CAAC;YACvC,MAAM,GAAG,WAAW,CAAC;YACrB,cAAc,GAAG,8CAA8C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,2CAA2C,CAAC;QACpK,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,SAAS,CAAC;YACnB,cAAc,GAAG,4CAA4C,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,iBAAiB,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,uCAAuC,CAAC;QACxK,CAAC;QAED,OAAO;YACL,MAAM;YACN,MAAM;YACN,kBAAkB;YAClB,UAAU;YACV,aAAa;YACb,cAAc;SACf,CAAC;IACJ,CAAC;IAEO,YAAY;QAClB,OAAO;YACL,UAAU,EAAE,CAAC;YACb,WAAW,EAAE,CAAC;YACd,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE,CAAC;YACb,SAAS,EAAE,CAAC;YACZ,MAAM,EAAE,EAAE;SACX,CAAC;IACJ,CAAC;IAEO,YAAY,CAAC,MAAgB,EAAE,IAAY;QACjD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,CAAC;QAChC,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAC/E,OAAO,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC7B,CAAC;IAEO,UAAU,CAChB,KAAa,EACb,KAAa,EACb,IAAY,EACZ,IAAY,EACZ,EAAU,EACV,EAAU;QAEV,MAAM,GAAG,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;QAEpC,MAAM,UAAU,GAAG,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAE7D,MAAM,SAAS,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;QACnC,MAAM,WAAW,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;QAClE,MAAM,gBAAgB,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QAEvE,OAAO,EAAE,UAAU,EAAE,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IAEO,mBAAmB,CAAC,UAAkB,EAAE,EAAU;QACxD,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,UAAU,IAAI,CAAC,CAAC,CAAC;QACtC,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;QACjD,OAAO,IAAI,CAAC;IACd,CAAC;IAEO,cAAc,CAAC,CAAS,EAAE,CAAS,EAAE,CAAS;QACpD,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QACtB,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEtB,MAAM,aAAa,GAAG,GAAG,CAAC;QAC1B,MAAM,OAAO,GAAG,KAAK,CAAC;QAEtB,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAI,IAAI,GAAG,CAAC,CAAC;QAEb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9E,IAAI,GAAG,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;YAE3D,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,OAAO;gBAAE,MAAM;YAC7C,GAAG,IAAI,IAAI,CAAC;QACd,CAAC;QAED,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,CAAC;IAEO,KAAK,CAAC,CAAS;QACrB,IAAI,CAAC,GAAG,GAAG,EAAE,CAAC;YACZ,OAAO,IAAI,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC/D,CAAC;QAED,CAAC,IAAI,CAAC,CAAC;QACP,MAAM,CAAC,GAAG,CAAC,CAAC;QACZ,MAAM,CAAC,GAAG;YACR,mBAAmB,EAAE,iBAAiB,EAAE,CAAC,kBAAkB,EAAE,kBAAkB;YAC/E,CAAC,kBAAkB,EAAE,kBAAkB,EAAE,CAAC,mBAAmB,EAAE,qBAAqB;YACpF,qBAAqB;SACtB,CAAC;QAEF,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/B,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACtB,CAAC;QAED,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;QACtB,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IAC1E,CAAC;IAEO,cAAc,CAAC,eAAuB,EAAE,EAAU;QACxD,MAAM,KAAK,GAAG,CAAC,GAAG,eAAe,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;QAExB,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,IAAI,IAAI,GAAG,EAAE,CAAC;QAEd,OAAO,IAAI,GAAG,GAAG,GAAG,MAAM,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;YAC7B,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YAC/B,IAAI,GAAG,GAAG,CAAC,EAAE,CAAC;gBACZ,GAAG,GAAG,GAAG,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,IAAI,GAAG,GAAG,CAAC;YACb,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC1B,CAAC;IAEO,IAAI,CAAC,CAAS,EAAE,EAAU;QAChC,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC;QAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC;QACjD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;IAC1C,CAAC;CACF"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { ExecutionTrace, TraceStore, Demo, OptimizationResult, CompileOptions, LearningStats, LearningConfig, InsightStore, RunResult, Agent, LLMBackend } from '@cogitator-ai/types';
|
|
2
|
+
import { MetricEvaluator } from './metrics';
|
|
3
|
+
import { DemoSelector } from './demo-selector';
|
|
4
|
+
import { InstructionOptimizer } from './instruction-optimizer';
|
|
5
|
+
export interface AgentOptimizerOptions {
|
|
6
|
+
llm: LLMBackend;
|
|
7
|
+
model: string;
|
|
8
|
+
traceStore?: TraceStore;
|
|
9
|
+
insightStore?: InsightStore;
|
|
10
|
+
config?: Partial<LearningConfig>;
|
|
11
|
+
}
|
|
12
|
+
export declare class AgentOptimizer {
|
|
13
|
+
private llm;
|
|
14
|
+
private model;
|
|
15
|
+
private traceStore;
|
|
16
|
+
private insightStore?;
|
|
17
|
+
private metricEvaluator;
|
|
18
|
+
private demoSelector;
|
|
19
|
+
private instructionOptimizer;
|
|
20
|
+
private config;
|
|
21
|
+
private optimizationRuns;
|
|
22
|
+
constructor(options: AgentOptimizerOptions);
|
|
23
|
+
captureTrace(runResult: RunResult, input: string, options?: {
|
|
24
|
+
expected?: unknown;
|
|
25
|
+
labels?: string[];
|
|
26
|
+
}): Promise<ExecutionTrace>;
|
|
27
|
+
compile(agent: Agent, _trainset: Array<{
|
|
28
|
+
input: string;
|
|
29
|
+
expected?: unknown;
|
|
30
|
+
}>, options?: CompileOptions): Promise<OptimizationResult>;
|
|
31
|
+
bootstrapDemos(agentId: string): Promise<Demo[]>;
|
|
32
|
+
getDemosForPrompt(agentId: string, input: string, count?: number): Promise<Demo[]>;
|
|
33
|
+
formatDemosForPrompt(demos: Demo[]): string;
|
|
34
|
+
getStats(agentId: string): Promise<LearningStats>;
|
|
35
|
+
private extractSteps;
|
|
36
|
+
private computeQuickMetrics;
|
|
37
|
+
getTraceStore(): TraceStore;
|
|
38
|
+
getMetricEvaluator(): MetricEvaluator;
|
|
39
|
+
getDemoSelector(): DemoSelector;
|
|
40
|
+
getInstructionOptimizer(): InstructionOptimizer;
|
|
41
|
+
}
|
|
42
|
+
//# sourceMappingURL=agent-optimizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-optimizer.d.ts","sourceRoot":"","sources":["../../src/learning/agent-optimizer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,cAAc,EAEd,UAAU,EAEV,IAAI,EACJ,kBAAkB,EAClB,cAAc,EACd,aAAa,EACb,cAAc,EACd,YAAY,EACZ,SAAS,EACT,KAAK,EACL,UAAU,EACX,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,GAAG,EAAE,UAAU,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,UAAU,CAAC;IACxB,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,MAAM,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC,CAAC;CAClC;AAED,qBAAa,cAAc;IACzB,OAAO,CAAC,GAAG,CAAa;IACxB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,YAAY,CAAC,CAAe;IACpC,OAAO,CAAC,eAAe,CAAkB;IACzC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,MAAM,CAAiB;IAE/B,OAAO,CAAC,gBAAgB,CAGpB;gBAEQ,OAAO,EAAE,qBAAqB;IAmCpC,YAAY,CAChB,SAAS,EAAE,SAAS,EACpB,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,EAAE,CAAA;KAAE,GAClD,OAAO,CAAC,cAAc,CAAC;IAwCpB,OAAO,CACX,KAAK,EAAE,KAAK,EACZ,SAAS,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,OAAO,CAAA;KAAE,CAAC,EACvD,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,kBAAkB,CAAC;IAqFxB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IA4BhD,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAIxF,oBAAoB,CAAC,KAAK,EAAE,IAAI,EAAE,GAAG,MAAM;IAIrC,QAAQ,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAkBvD,OAAO,CAAC,YAAY;IAwDpB,OAAO,CAAC,mBAAmB;IAoB3B,aAAa,IAAI,UAAU;IAI3B,kBAAkB,IAAI,eAAe;IAIrC,eAAe,IAAI,YAAY;IAI/B,uBAAuB,IAAI,oBAAoB;CAGhD"}
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
import { nanoid } from 'nanoid';
|
|
2
|
+
import { InMemoryTraceStore } from './trace-store';
|
|
3
|
+
import { MetricEvaluator } from './metrics';
|
|
4
|
+
import { DemoSelector } from './demo-selector';
|
|
5
|
+
import { InstructionOptimizer } from './instruction-optimizer';
|
|
6
|
+
export class AgentOptimizer {
|
|
7
|
+
llm;
|
|
8
|
+
model;
|
|
9
|
+
traceStore;
|
|
10
|
+
insightStore;
|
|
11
|
+
metricEvaluator;
|
|
12
|
+
demoSelector;
|
|
13
|
+
instructionOptimizer;
|
|
14
|
+
config;
|
|
15
|
+
optimizationRuns = new Map();
|
|
16
|
+
constructor(options) {
|
|
17
|
+
this.llm = options.llm;
|
|
18
|
+
this.model = options.model;
|
|
19
|
+
this.traceStore = options.traceStore ?? new InMemoryTraceStore();
|
|
20
|
+
this.insightStore = options.insightStore;
|
|
21
|
+
const defaultConfig = {
|
|
22
|
+
enabled: true,
|
|
23
|
+
captureTraces: true,
|
|
24
|
+
autoOptimize: false,
|
|
25
|
+
maxDemosPerAgent: 5,
|
|
26
|
+
minScoreForDemo: 0.8,
|
|
27
|
+
defaultMetrics: ['success', 'tool_accuracy', 'efficiency'],
|
|
28
|
+
};
|
|
29
|
+
this.config = { ...defaultConfig, ...options.config };
|
|
30
|
+
this.metricEvaluator = new MetricEvaluator({
|
|
31
|
+
llm: this.llm,
|
|
32
|
+
model: this.model,
|
|
33
|
+
});
|
|
34
|
+
this.demoSelector = new DemoSelector({
|
|
35
|
+
traceStore: this.traceStore,
|
|
36
|
+
maxDemos: this.config.maxDemosPerAgent,
|
|
37
|
+
minScore: this.config.minScoreForDemo,
|
|
38
|
+
});
|
|
39
|
+
this.instructionOptimizer = new InstructionOptimizer({
|
|
40
|
+
llm: this.llm,
|
|
41
|
+
model: this.model,
|
|
42
|
+
traceStore: this.traceStore,
|
|
43
|
+
insightStore: this.insightStore,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
async captureTrace(runResult, input, options) {
|
|
47
|
+
const steps = this.extractSteps(runResult);
|
|
48
|
+
const metrics = this.computeQuickMetrics(runResult, steps);
|
|
49
|
+
const trace = {
|
|
50
|
+
id: `trace_${nanoid(12)}`,
|
|
51
|
+
runId: runResult.runId,
|
|
52
|
+
agentId: runResult.agentId,
|
|
53
|
+
threadId: runResult.threadId,
|
|
54
|
+
input,
|
|
55
|
+
output: runResult.output,
|
|
56
|
+
steps,
|
|
57
|
+
toolCalls: [...runResult.toolCalls],
|
|
58
|
+
reflections: runResult.reflections ? [...runResult.reflections] : [],
|
|
59
|
+
metrics,
|
|
60
|
+
score: 0,
|
|
61
|
+
model: '',
|
|
62
|
+
createdAt: new Date(),
|
|
63
|
+
duration: runResult.usage.duration,
|
|
64
|
+
usage: {
|
|
65
|
+
inputTokens: runResult.usage.inputTokens,
|
|
66
|
+
outputTokens: runResult.usage.outputTokens,
|
|
67
|
+
cost: runResult.usage.cost,
|
|
68
|
+
},
|
|
69
|
+
labels: options?.labels,
|
|
70
|
+
isDemo: false,
|
|
71
|
+
expected: options?.expected,
|
|
72
|
+
};
|
|
73
|
+
const evaluation = await this.metricEvaluator.evaluate(trace, options?.expected);
|
|
74
|
+
trace.score = evaluation.score;
|
|
75
|
+
trace.metrics.completeness =
|
|
76
|
+
evaluation.results.find((r) => r.name === 'completeness')?.value ?? metrics.completeness;
|
|
77
|
+
await this.traceStore.store(trace);
|
|
78
|
+
return trace;
|
|
79
|
+
}
|
|
80
|
+
async compile(agent, _trainset, options) {
|
|
81
|
+
const startTime = Date.now();
|
|
82
|
+
const maxRounds = options?.maxRounds ?? 3;
|
|
83
|
+
const maxBootstrappedDemos = options?.maxBootstrappedDemos ?? 5;
|
|
84
|
+
const demosAdded = [];
|
|
85
|
+
const demosRemoved = [];
|
|
86
|
+
const errors = [];
|
|
87
|
+
let tokensUsed = 0;
|
|
88
|
+
const existingTraces = await this.traceStore.getAll(agent.id);
|
|
89
|
+
const scoreBefore = existingTraces.length > 0
|
|
90
|
+
? existingTraces.reduce((sum, t) => sum + t.score, 0) / existingTraces.length
|
|
91
|
+
: 0;
|
|
92
|
+
const instructionsBefore = agent.instructions;
|
|
93
|
+
let currentInstructions = instructionsBefore;
|
|
94
|
+
for (let round = 0; round < maxRounds; round++) {
|
|
95
|
+
const highScoringTraces = existingTraces
|
|
96
|
+
.filter((t) => t.score >= (this.config.minScoreForDemo ?? 0.8))
|
|
97
|
+
.sort((a, b) => b.score - a.score)
|
|
98
|
+
.slice(0, maxBootstrappedDemos);
|
|
99
|
+
for (const trace of highScoringTraces) {
|
|
100
|
+
if (!trace.isDemo) {
|
|
101
|
+
try {
|
|
102
|
+
const demo = await this.demoSelector.addDemo(trace);
|
|
103
|
+
demosAdded.push(demo);
|
|
104
|
+
}
|
|
105
|
+
catch (e) {
|
|
106
|
+
errors.push(`Failed to add demo: ${e instanceof Error ? e.message : String(e)}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (options?.optimizeInstructions !== false) {
|
|
111
|
+
try {
|
|
112
|
+
const optimizationResult = await this.instructionOptimizer.optimize(agent.id, currentInstructions);
|
|
113
|
+
if (optimizationResult.improvement > 0) {
|
|
114
|
+
currentInstructions = optimizationResult.optimizedInstructions;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch (e) {
|
|
118
|
+
errors.push(`Instruction optimization failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const allTraces = await this.traceStore.getAll(agent.id);
|
|
123
|
+
const scoreAfter = allTraces.length > 0 ? allTraces.reduce((sum, t) => sum + t.score, 0) / allTraces.length : 0;
|
|
124
|
+
const stats = this.optimizationRuns.get(agent.id) ?? {
|
|
125
|
+
lastRun: new Date(),
|
|
126
|
+
count: 0,
|
|
127
|
+
totalImprovement: 0,
|
|
128
|
+
};
|
|
129
|
+
stats.lastRun = new Date();
|
|
130
|
+
stats.count++;
|
|
131
|
+
stats.totalImprovement += scoreAfter - scoreBefore;
|
|
132
|
+
this.optimizationRuns.set(agent.id, stats);
|
|
133
|
+
return {
|
|
134
|
+
success: errors.length === 0,
|
|
135
|
+
instructionsBefore,
|
|
136
|
+
instructionsAfter: currentInstructions,
|
|
137
|
+
demosAdded,
|
|
138
|
+
demosRemoved,
|
|
139
|
+
scoreBefore,
|
|
140
|
+
scoreAfter,
|
|
141
|
+
improvement: scoreAfter - scoreBefore,
|
|
142
|
+
tracesEvaluated: existingTraces.length,
|
|
143
|
+
bootstrapRounds: maxRounds,
|
|
144
|
+
duration: Date.now() - startTime,
|
|
145
|
+
tokensUsed,
|
|
146
|
+
errors,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
async bootstrapDemos(agentId) {
|
|
150
|
+
const traces = await this.traceStore.getDemos(agentId);
|
|
151
|
+
const existingDemoCount = traces.length;
|
|
152
|
+
if (existingDemoCount >= (this.config.maxDemosPerAgent ?? 5)) {
|
|
153
|
+
return this.demoSelector.getAllDemos(agentId);
|
|
154
|
+
}
|
|
155
|
+
const allTraces = await this.traceStore.getAll(agentId);
|
|
156
|
+
const candidates = allTraces
|
|
157
|
+
.filter((t) => !t.isDemo && t.score >= (this.config.minScoreForDemo ?? 0.8))
|
|
158
|
+
.sort((a, b) => b.score - a.score);
|
|
159
|
+
const newDemos = [];
|
|
160
|
+
const slotsAvailable = (this.config.maxDemosPerAgent ?? 5) - existingDemoCount;
|
|
161
|
+
for (const trace of candidates.slice(0, slotsAvailable)) {
|
|
162
|
+
try {
|
|
163
|
+
const demo = await this.demoSelector.addDemo(trace);
|
|
164
|
+
newDemos.push(demo);
|
|
165
|
+
}
|
|
166
|
+
catch {
|
|
167
|
+
continue;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
return [...this.demoSelector.getAllDemos(agentId)];
|
|
171
|
+
}
|
|
172
|
+
async getDemosForPrompt(agentId, input, count) {
|
|
173
|
+
return this.demoSelector.selectDemos(agentId, input, count ?? 3);
|
|
174
|
+
}
|
|
175
|
+
formatDemosForPrompt(demos) {
|
|
176
|
+
return this.demoSelector.formatDemosForPrompt(demos);
|
|
177
|
+
}
|
|
178
|
+
async getStats(agentId) {
|
|
179
|
+
const traceStats = await this.traceStore.getStats(agentId);
|
|
180
|
+
const demoStats = await this.demoSelector.getDemoStats(agentId);
|
|
181
|
+
const optimizationStats = this.optimizationRuns.get(agentId);
|
|
182
|
+
return {
|
|
183
|
+
traces: traceStats,
|
|
184
|
+
demos: demoStats,
|
|
185
|
+
optimization: {
|
|
186
|
+
lastRun: optimizationStats?.lastRun,
|
|
187
|
+
runsOptimized: optimizationStats?.count ?? 0,
|
|
188
|
+
averageImprovement: optimizationStats
|
|
189
|
+
? optimizationStats.totalImprovement / optimizationStats.count
|
|
190
|
+
: 0,
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
extractSteps(runResult) {
|
|
195
|
+
const steps = [];
|
|
196
|
+
let index = 0;
|
|
197
|
+
for (const span of runResult.trace.spans) {
|
|
198
|
+
if (span.name.includes('tool_call') || span.attributes?.toolName) {
|
|
199
|
+
const toolCall = runResult.toolCalls.find((tc) => tc.name === span.attributes?.toolName);
|
|
200
|
+
steps.push({
|
|
201
|
+
index: index++,
|
|
202
|
+
type: 'tool_call',
|
|
203
|
+
timestamp: span.startTime,
|
|
204
|
+
duration: span.duration,
|
|
205
|
+
toolCall,
|
|
206
|
+
toolResult: toolCall
|
|
207
|
+
? {
|
|
208
|
+
callId: toolCall.id,
|
|
209
|
+
name: toolCall.name,
|
|
210
|
+
result: span.attributes?.result,
|
|
211
|
+
error: span.status === 'error'
|
|
212
|
+
? String(span.attributes?.error ?? 'Unknown error')
|
|
213
|
+
: undefined,
|
|
214
|
+
}
|
|
215
|
+
: undefined,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
else if (span.name.includes('llm') || span.name.includes('chat')) {
|
|
219
|
+
steps.push({
|
|
220
|
+
index: index++,
|
|
221
|
+
type: 'llm_call',
|
|
222
|
+
timestamp: span.startTime,
|
|
223
|
+
duration: span.duration,
|
|
224
|
+
tokensUsed: {
|
|
225
|
+
input: Number(span.attributes?.inputTokens ?? 0),
|
|
226
|
+
output: Number(span.attributes?.outputTokens ?? 0),
|
|
227
|
+
},
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
if (runResult.reflections) {
|
|
232
|
+
for (const reflection of runResult.reflections) {
|
|
233
|
+
steps.push({
|
|
234
|
+
index: index++,
|
|
235
|
+
type: 'reflection',
|
|
236
|
+
timestamp: reflection.timestamp.getTime(),
|
|
237
|
+
duration: 0,
|
|
238
|
+
reflection,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
steps.sort((a, b) => a.timestamp - b.timestamp);
|
|
243
|
+
return steps;
|
|
244
|
+
}
|
|
245
|
+
computeQuickMetrics(runResult, steps) {
|
|
246
|
+
const toolSteps = steps.filter((s) => s.type === 'tool_call');
|
|
247
|
+
const successfulTools = toolSteps.filter((s) => !s.toolResult?.error);
|
|
248
|
+
const hasErrors = steps.some((s) => s.toolResult?.error);
|
|
249
|
+
const toolAccuracy = toolSteps.length > 0 ? successfulTools.length / toolSteps.length : 1;
|
|
250
|
+
const totalTokens = runResult.usage.inputTokens + runResult.usage.outputTokens;
|
|
251
|
+
const efficiency = Math.min(1, 10000 / Math.max(totalTokens, 1));
|
|
252
|
+
const completeness = runResult.output.length > 50 ? 0.8 : 0.5;
|
|
253
|
+
return {
|
|
254
|
+
success: !hasErrors,
|
|
255
|
+
toolAccuracy,
|
|
256
|
+
efficiency,
|
|
257
|
+
completeness,
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
getTraceStore() {
|
|
261
|
+
return this.traceStore;
|
|
262
|
+
}
|
|
263
|
+
getMetricEvaluator() {
|
|
264
|
+
return this.metricEvaluator;
|
|
265
|
+
}
|
|
266
|
+
getDemoSelector() {
|
|
267
|
+
return this.demoSelector;
|
|
268
|
+
}
|
|
269
|
+
getInstructionOptimizer() {
|
|
270
|
+
return this.instructionOptimizer;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
//# sourceMappingURL=agent-optimizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-optimizer.js","sourceRoot":"","sources":["../../src/learning/agent-optimizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAgBhC,OAAO,EAAE,kBAAkB,EAAE,MAAM,eAAe,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAC;AAC/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,yBAAyB,CAAC;AAU/D,MAAM,OAAO,cAAc;IACjB,GAAG,CAAa;IAChB,KAAK,CAAS;IACd,UAAU,CAAa;IACvB,YAAY,CAAgB;IAC5B,eAAe,CAAkB;IACjC,YAAY,CAAe;IAC3B,oBAAoB,CAAuB;IAC3C,MAAM,CAAiB;IAEvB,gBAAgB,GAAG,IAAI,GAAG,EAG/B,CAAC;IAEJ,YAAY,OAA8B;QACxC,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,IAAI,kBAAkB,EAAE,CAAC;QACjE,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QAEzC,MAAM,aAAa,GAAmB;YACpC,OAAO,EAAE,IAAI;YACb,aAAa,EAAE,IAAI;YACnB,YAAY,EAAE,KAAK;YACnB,gBAAgB,EAAE,CAAC;YACnB,eAAe,EAAE,GAAG;YACpB,cAAc,EAAE,CAAC,SAAS,EAAE,eAAe,EAAE,YAAY,CAAC;SAC3D,CAAC;QACF,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,aAAa,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAEtD,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC;YACzC,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,KAAK,EAAE,IAAI,CAAC,KAAK;SAClB,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC;YACnC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,gBAAgB;YACtC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;SACtC,CAAC,CAAC;QAEH,IAAI,CAAC,oBAAoB,GAAG,IAAI,oBAAoB,CAAC;YACnD,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;SAChC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,SAAoB,EACpB,KAAa,EACb,OAAmD;QAEnD,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC;QAE3C,MAAM,OAAO,GAAG,IAAI,CAAC,mBAAmB,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;QAE3D,MAAM,KAAK,GAAmB;YAC5B,EAAE,EAAE,SAAS,MAAM,CAAC,EAAE,CAAC,EAAE;YACzB,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,QAAQ,EAAE,SAAS,CAAC,QAAQ;YAC5B,KAAK;YACL,MAAM,EAAE,SAAS,CAAC,MAAM;YACxB,KAAK;YACL,SAAS,EAAE,CAAC,GAAG,SAAS,CAAC,SAAS,CAAC;YACnC,WAAW,EAAE,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE;YACpE,OAAO;YACP,KAAK,EAAE,CAAC;YACR,KAAK,EAAE,EAAE;YACT,SAAS,EAAE,IAAI,IAAI,EAAE;YACrB,QAAQ,EAAE,SAAS,CAAC,KAAK,CAAC,QAAQ;YAClC,KAAK,EAAE;gBACL,WAAW,EAAE,SAAS,CAAC,KAAK,CAAC,WAAW;gBACxC,YAAY,EAAE,SAAS,CAAC,KAAK,CAAC,YAAY;gBAC1C,IAAI,EAAE,SAAS,CAAC,KAAK,CAAC,IAAI;aAC3B;YACD,MAAM,EAAE,OAAO,EAAE,MAAM;YACvB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,OAAO,EAAE,QAAQ;SAC5B,CAAC;QAEF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC;QACjF,KAAK,CAAC,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC;QAC/B,KAAK,CAAC,OAAO,CAAC,YAAY;YACxB,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CAAC,EAAE,KAAK,IAAI,OAAO,CAAC,YAAY,CAAC;QAE3F,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEnC,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,OAAO,CACX,KAAY,EACZ,SAAuD,EACvD,OAAwB;QAExB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,CAAC,CAAC;QAC1C,MAAM,oBAAoB,GAAG,OAAO,EAAE,oBAAoB,IAAI,CAAC,CAAC;QAEhE,MAAM,UAAU,GAAW,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAW,EAAE,CAAC;QAChC,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QAC9D,MAAM,WAAW,GACf,cAAc,CAAC,MAAM,GAAG,CAAC;YACvB,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM;YAC7E,CAAC,CAAC,CAAC,CAAC;QAER,MAAM,kBAAkB,GAAG,KAAK,CAAC,YAAY,CAAC;QAC9C,IAAI,mBAAmB,GAAG,kBAAkB,CAAC;QAE7C,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,SAAS,EAAE,KAAK,EAAE,EAAE,CAAC;YAC/C,MAAM,iBAAiB,GAAG,cAAc;iBACrC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,IAAI,GAAG,CAAC,CAAC;iBAC9D,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;iBACjC,KAAK,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC;YAElC,KAAK,MAAM,KAAK,IAAI,iBAAiB,EAAE,CAAC;gBACtC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;oBAClB,IAAI,CAAC;wBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;wBACpD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACxB,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,MAAM,CAAC,IAAI,CAAC,uBAAuB,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACnF,CAAC;gBACH,CAAC;YACH,CAAC;YAED,IAAI,OAAO,EAAE,oBAAoB,KAAK,KAAK,EAAE,CAAC;gBAC5C,IAAI,CAAC;oBACH,MAAM,kBAAkB,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CACjE,KAAK,CAAC,EAAE,EACR,mBAAmB,CACpB,CAAC;oBAEF,IAAI,kBAAkB,CAAC,WAAW,GAAG,CAAC,EAAE,CAAC;wBACvC,mBAAmB,GAAG,kBAAkB,CAAC,qBAAqB,CAAC;oBACjE,CAAC;gBACH,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,MAAM,CAAC,IAAI,CACT,oCAAoC,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CACjF,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACzD,MAAM,UAAU,GACd,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAE/F,MAAM,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,IAAI;YACnD,OAAO,EAAE,IAAI,IAAI,EAAE;YACnB,KAAK,EAAE,CAAC;YACR,gBAAgB,EAAE,CAAC;SACpB,CAAC;QACF,KAAK,CAAC,OAAO,GAAG,IAAI,IAAI,EAAE,CAAC;QAC3B,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,KAAK,CAAC,gBAAgB,IAAI,UAAU,GAAG,WAAW,CAAC;QACnD,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QAE3C,OAAO;YACL,OAAO,EAAE,MAAM,CAAC,MAAM,KAAK,CAAC;YAC5B,kBAAkB;YAClB,iBAAiB,EAAE,mBAAmB;YACtC,UAAU;YACV,YAAY;YACZ,WAAW;YACX,UAAU;YACV,WAAW,EAAE,UAAU,GAAG,WAAW;YACrC,eAAe,EAAE,cAAc,CAAC,MAAM;YACtC,eAAe,EAAE,SAAS;YAC1B,QAAQ,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YAChC,UAAU;YACV,MAAM;SACP,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,OAAe;QAClC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QACvD,MAAM,iBAAiB,GAAG,MAAM,CAAC,MAAM,CAAC;QAExC,IAAI,iBAAiB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,IAAI,CAAC,CAAC,EAAE,CAAC;YAC7D,OAAO,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QAChD,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QACxD,MAAM,UAAU,GAAG,SAAS;aACzB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,eAAe,IAAI,GAAG,CAAC,CAAC;aAC3E,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAErC,MAAM,QAAQ,GAAW,EAAE,CAAC;QAC5B,MAAM,cAAc,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,gBAAgB,IAAI,CAAC,CAAC,GAAG,iBAAiB,CAAC;QAE/E,KAAK,MAAM,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,EAAE,CAAC;YACxD,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;gBACpD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtB,CAAC;YAAC,MAAM,CAAC;gBACP,SAAS;YACX,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;IACrD,CAAC;IAED,KAAK,CAAC,iBAAiB,CAAC,OAAe,EAAE,KAAa,EAAE,KAAc;QACpE,OAAO,IAAI,CAAC,YAAY,CAAC,WAAW,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,IAAI,CAAC,CAAC,CAAC;IACnE,CAAC;IAED,oBAAoB,CAAC,KAAa;QAChC,OAAO,IAAI,CAAC,YAAY,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC;IACvD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,OAAe;QAC5B,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC3D,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAChE,MAAM,iBAAiB,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAE7D,OAAO;YACL,MAAM,EAAE,UAAU;YAClB,KAAK,EAAE,SAAS;YAChB,YAAY,EAAE;gBACZ,OAAO,EAAE,iBAAiB,EAAE,OAAO;gBACnC,aAAa,EAAE,iBAAiB,EAAE,KAAK,IAAI,CAAC;gBAC5C,kBAAkB,EAAE,iBAAiB;oBACnC,CAAC,CAAC,iBAAiB,CAAC,gBAAgB,GAAG,iBAAiB,CAAC,KAAK;oBAC9D,CAAC,CAAC,CAAC;aACN;SACF,CAAC;IACJ,CAAC;IAEO,YAAY,CAAC,SAAoB;QACvC,MAAM,KAAK,GAAoB,EAAE,CAAC;QAClC,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,IAAI,IAAI,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACzC,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,EAAE,CAAC;gBACjE,MAAM,QAAQ,GAAG,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,IAAI,KAAK,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;gBAEzF,KAAK,CAAC,IAAI,CAAC;oBACT,KAAK,EAAE,KAAK,EAAE;oBACd,IAAI,EAAE,WAAW;oBACjB,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,QAAQ;oBACR,UAAU,EAAE,QAAQ;wBAClB,CAAC,CAAC;4BACE,MAAM,EAAE,QAAQ,CAAC,EAAE;4BACnB,IAAI,EAAE,QAAQ,CAAC,IAAI;4BACnB,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,MAAM;4BAC/B,KAAK,EACH,IAAI,CAAC,MAAM,KAAK,OAAO;gCACrB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,eAAe,CAAC;gCACnD,CAAC,CAAC,SAAS;yBAChB;wBACH,CAAC,CAAC,SAAS;iBACd,CAAC,CAAC;YACL,CAAC;iBAAM,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACnE,KAAK,CAAC,IAAI,CAAC;oBACT,KAAK,EAAE,KAAK,EAAE;oBACd,IAAI,EAAE,UAAU;oBAChB,SAAS,EAAE,IAAI,CAAC,SAAS;oBACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,UAAU,EAAE;wBACV,KAAK,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,WAAW,IAAI,CAAC,CAAC;wBAChD,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,IAAI,CAAC,CAAC;qBACnD;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC;YAC1B,KAAK,MAAM,UAAU,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC;gBAC/C,KAAK,CAAC,IAAI,CAAC;oBACT,KAAK,EAAE,KAAK,EAAE;oBACd,IAAI,EAAE,YAAY;oBAClB,SAAS,EAAE,UAAU,CAAC,SAAS,CAAC,OAAO,EAAE;oBACzC,QAAQ,EAAE,CAAC;oBACX,UAAU;iBACX,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,mBAAmB,CAAC,SAAoB,EAAE,KAAsB;QACtE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;QAC9D,MAAM,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QAEtE,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACzD,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAE1F,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC,WAAW,GAAG,SAAS,CAAC,KAAK,CAAC,YAAY,CAAC;QAC/E,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QAEjE,MAAM,YAAY,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QAE9D,OAAO;YACL,OAAO,EAAE,CAAC,SAAS;YACnB,YAAY;YACZ,UAAU;YACV,YAAY;SACb,CAAC;IACJ,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,UAAU,CAAC;IACzB,CAAC;IAED,kBAAkB;QAChB,OAAO,IAAI,CAAC,eAAe,CAAC;IAC9B,CAAC;IAED,eAAe;QACb,OAAO,IAAI,CAAC,YAAY,CAAC;IAC3B,CAAC;IAED,uBAAuB;QACrB,OAAO,IAAI,CAAC,oBAAoB,CAAC;IACnC,CAAC;CACF"}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import type { ExecutionTrace, AutoOptimizationConfig, OptimizationRun } from '@cogitator-ai/types';
|
|
2
|
+
import type { AgentOptimizer } from './agent-optimizer';
|
|
3
|
+
import type { ABTestingFramework } from './ab-testing';
|
|
4
|
+
import type { PromptMonitor } from './prompt-monitor';
|
|
5
|
+
import type { RollbackManager } from './rollback-manager';
|
|
6
|
+
export interface AutoOptimizerConfig extends AutoOptimizationConfig {
|
|
7
|
+
agentOptimizer: AgentOptimizer;
|
|
8
|
+
abTesting: ABTestingFramework;
|
|
9
|
+
monitor: PromptMonitor;
|
|
10
|
+
rollbackManager: RollbackManager;
|
|
11
|
+
onOptimizationStart?: (run: OptimizationRun) => void;
|
|
12
|
+
onOptimizationComplete?: (run: OptimizationRun) => void;
|
|
13
|
+
onRollback?: (agentId: string, reason: string) => void;
|
|
14
|
+
}
|
|
15
|
+
export declare class AutoOptimizer {
|
|
16
|
+
private config;
|
|
17
|
+
private agentOptimizer;
|
|
18
|
+
private abTesting;
|
|
19
|
+
private monitor;
|
|
20
|
+
private rollbackManager;
|
|
21
|
+
private runCounts;
|
|
22
|
+
private dailyOptimizations;
|
|
23
|
+
private activeRuns;
|
|
24
|
+
constructor(config: AutoOptimizerConfig);
|
|
25
|
+
recordExecution(trace: ExecutionTrace): Promise<void>;
|
|
26
|
+
triggerOptimization(agentId: string): Promise<OptimizationRun>;
|
|
27
|
+
forceRollback(agentId: string, reason: string): Promise<boolean>;
|
|
28
|
+
getActiveRun(agentId: string): OptimizationRun | null;
|
|
29
|
+
getRunCount(agentId: string): number;
|
|
30
|
+
getDailyOptimizationCount(agentId: string): number;
|
|
31
|
+
setEnabled(enabled: boolean): void;
|
|
32
|
+
private shouldTriggerOptimization;
|
|
33
|
+
private handleAlerts;
|
|
34
|
+
private handleABTestCompletion;
|
|
35
|
+
private createOptimizationRun;
|
|
36
|
+
private incrementDailyCount;
|
|
37
|
+
}
|
|
38
|
+
//# sourceMappingURL=auto-optimizer.d.ts.map
|