@cogitator-ai/core 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +920 -15
- package/dist/__tests__/agent.test.js +2 -2
- package/dist/__tests__/agent.test.js.map +1 -1
- package/dist/__tests__/base64.test.js +1 -1
- package/dist/__tests__/base64.test.js.map +1 -1
- package/dist/__tests__/calculator.test.js +1 -1
- package/dist/__tests__/calculator.test.js.map +1 -1
- package/dist/__tests__/cogitator-memory.test.js +2 -2
- package/dist/__tests__/cogitator-memory.test.js.map +1 -1
- package/dist/__tests__/datetime.test.js +1 -1
- package/dist/__tests__/datetime.test.js.map +1 -1
- package/dist/__tests__/exec.test.js +1 -1
- package/dist/__tests__/exec.test.js.map +1 -1
- package/dist/__tests__/filesystem.test.js +1 -1
- package/dist/__tests__/filesystem.test.js.map +1 -1
- package/dist/__tests__/google-backend.test.js +1 -1
- package/dist/__tests__/google-backend.test.js.map +1 -1
- package/dist/__tests__/hash.test.js +1 -1
- package/dist/__tests__/hash.test.js.map +1 -1
- package/dist/__tests__/http.test.js +1 -1
- package/dist/__tests__/http.test.js.map +1 -1
- package/dist/__tests__/json.test.js +1 -1
- package/dist/__tests__/json.test.js.map +1 -1
- package/dist/__tests__/logger.test.js +1 -1
- package/dist/__tests__/logger.test.js.map +1 -1
- package/dist/__tests__/random.test.js +1 -1
- package/dist/__tests__/random.test.js.map +1 -1
- package/dist/__tests__/regex.test.js +1 -1
- package/dist/__tests__/regex.test.js.map +1 -1
- package/dist/__tests__/registry.test.js +2 -2
- package/dist/__tests__/registry.test.js.map +1 -1
- package/dist/__tests__/sleep.test.js +1 -1
- package/dist/__tests__/sleep.test.js.map +1 -1
- package/dist/__tests__/tool.test.js +1 -1
- package/dist/__tests__/tool.test.js.map +1 -1
- package/dist/__tests__/uuid.test.js +1 -1
- package/dist/__tests__/uuid.test.js.map +1 -1
- package/dist/cogitator.d.ts +46 -1
- package/dist/cogitator.d.ts.map +1 -1
- package/dist/cogitator.js +274 -17
- package/dist/cogitator.js.map +1 -1
- package/dist/constitutional/constitution.d.ts +9 -0
- package/dist/constitutional/constitution.d.ts.map +1 -0
- package/dist/constitutional/constitution.js +215 -0
- package/dist/constitutional/constitution.js.map +1 -0
- package/dist/constitutional/constitutional-ai.d.ts +36 -0
- package/dist/constitutional/constitutional-ai.d.ts.map +1 -0
- package/dist/constitutional/constitutional-ai.js +163 -0
- package/dist/constitutional/constitutional-ai.js.map +1 -0
- package/dist/constitutional/critique-reviser.d.ts +20 -0
- package/dist/constitutional/critique-reviser.d.ts.map +1 -0
- package/dist/constitutional/critique-reviser.js +98 -0
- package/dist/constitutional/critique-reviser.js.map +1 -0
- package/dist/constitutional/index.d.ts +13 -0
- package/dist/constitutional/index.d.ts.map +1 -0
- package/dist/constitutional/index.js +8 -0
- package/dist/constitutional/index.js.map +1 -0
- package/dist/constitutional/input-filter.d.ts +19 -0
- package/dist/constitutional/input-filter.d.ts.map +1 -0
- package/dist/constitutional/input-filter.js +88 -0
- package/dist/constitutional/input-filter.js.map +1 -0
- package/dist/constitutional/output-filter.d.ts +19 -0
- package/dist/constitutional/output-filter.d.ts.map +1 -0
- package/dist/constitutional/output-filter.js +86 -0
- package/dist/constitutional/output-filter.js.map +1 -0
- package/dist/constitutional/prompts.d.ts +11 -0
- package/dist/constitutional/prompts.d.ts.map +1 -0
- package/dist/constitutional/prompts.js +202 -0
- package/dist/constitutional/prompts.js.map +1 -0
- package/dist/constitutional/tool-guard.d.ts +18 -0
- package/dist/constitutional/tool-guard.d.ts.map +1 -0
- package/dist/constitutional/tool-guard.js +125 -0
- package/dist/constitutional/tool-guard.js.map +1 -0
- package/dist/cost-routing/budget-enforcer.d.ts +26 -0
- package/dist/cost-routing/budget-enforcer.d.ts.map +1 -0
- package/dist/cost-routing/budget-enforcer.js +86 -0
- package/dist/cost-routing/budget-enforcer.js.map +1 -0
- package/dist/cost-routing/cost-router.d.ts +34 -0
- package/dist/cost-routing/cost-router.d.ts.map +1 -0
- package/dist/cost-routing/cost-router.js +80 -0
- package/dist/cost-routing/cost-router.js.map +1 -0
- package/dist/cost-routing/cost-tracker.d.ts +20 -0
- package/dist/cost-routing/cost-tracker.d.ts.map +1 -0
- package/dist/cost-routing/cost-tracker.js +85 -0
- package/dist/cost-routing/cost-tracker.js.map +1 -0
- package/dist/cost-routing/index.d.ts +6 -0
- package/dist/cost-routing/index.d.ts.map +1 -0
- package/dist/cost-routing/index.js +6 -0
- package/dist/cost-routing/index.js.map +1 -0
- package/dist/cost-routing/model-selector.d.ts +15 -0
- package/dist/cost-routing/model-selector.d.ts.map +1 -0
- package/dist/cost-routing/model-selector.js +216 -0
- package/dist/cost-routing/model-selector.js.map +1 -0
- package/dist/cost-routing/task-analyzer.d.ts +13 -0
- package/dist/cost-routing/task-analyzer.d.ts.map +1 -0
- package/dist/cost-routing/task-analyzer.js +185 -0
- package/dist/cost-routing/task-analyzer.js.map +1 -0
- package/dist/index.d.ts +19 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/learning/ab-testing.d.ts +45 -0
- package/dist/learning/ab-testing.d.ts.map +1 -0
- package/dist/learning/ab-testing.js +267 -0
- package/dist/learning/ab-testing.js.map +1 -0
- package/dist/learning/agent-optimizer.d.ts +42 -0
- package/dist/learning/agent-optimizer.d.ts.map +1 -0
- package/dist/learning/agent-optimizer.js +273 -0
- package/dist/learning/agent-optimizer.js.map +1 -0
- package/dist/learning/auto-optimizer.d.ts +38 -0
- package/dist/learning/auto-optimizer.d.ts.map +1 -0
- package/dist/learning/auto-optimizer.js +229 -0
- package/dist/learning/auto-optimizer.js.map +1 -0
- package/dist/learning/demo-selector.d.ts +29 -0
- package/dist/learning/demo-selector.d.ts.map +1 -0
- package/dist/learning/demo-selector.js +235 -0
- package/dist/learning/demo-selector.js.map +1 -0
- package/dist/learning/index.d.ts +24 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +13 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/instruction-optimizer.d.ts +29 -0
- package/dist/learning/instruction-optimizer.d.ts.map +1 -0
- package/dist/learning/instruction-optimizer.js +175 -0
- package/dist/learning/instruction-optimizer.js.map +1 -0
- package/dist/learning/metrics.d.ts +37 -0
- package/dist/learning/metrics.d.ts.map +1 -0
- package/dist/learning/metrics.js +310 -0
- package/dist/learning/metrics.js.map +1 -0
- package/dist/learning/postgres-trace-store.d.ts +53 -0
- package/dist/learning/postgres-trace-store.d.ts.map +1 -0
- package/dist/learning/postgres-trace-store.js +692 -0
- package/dist/learning/postgres-trace-store.js.map +1 -0
- package/dist/learning/prompt-logger.d.ts +29 -0
- package/dist/learning/prompt-logger.d.ts.map +1 -0
- package/dist/learning/prompt-logger.js +157 -0
- package/dist/learning/prompt-logger.js.map +1 -0
- package/dist/learning/prompt-monitor.d.ts +29 -0
- package/dist/learning/prompt-monitor.d.ts.map +1 -0
- package/dist/learning/prompt-monitor.js +243 -0
- package/dist/learning/prompt-monitor.js.map +1 -0
- package/dist/learning/prompts.d.ts +28 -0
- package/dist/learning/prompts.d.ts.map +1 -0
- package/dist/learning/prompts.js +195 -0
- package/dist/learning/prompts.js.map +1 -0
- package/dist/learning/rollback-manager.d.ts +36 -0
- package/dist/learning/rollback-manager.d.ts.map +1 -0
- package/dist/learning/rollback-manager.js +177 -0
- package/dist/learning/rollback-manager.js.map +1 -0
- package/dist/learning/trace-store.d.ts +26 -0
- package/dist/learning/trace-store.d.ts.map +1 -0
- package/dist/learning/trace-store.js +218 -0
- package/dist/learning/trace-store.js.map +1 -0
- package/dist/llm/google.d.ts.map +1 -1
- package/dist/llm/google.js +1 -2
- package/dist/llm/google.js.map +1 -1
- package/dist/reasoning/branch-evaluator.d.ts +28 -0
- package/dist/reasoning/branch-evaluator.d.ts.map +1 -0
- package/dist/reasoning/branch-evaluator.js +143 -0
- package/dist/reasoning/branch-evaluator.js.map +1 -0
- package/dist/reasoning/branch-generator.d.ts +9 -0
- package/dist/reasoning/branch-generator.d.ts.map +1 -0
- package/dist/reasoning/branch-generator.js +60 -0
- package/dist/reasoning/branch-generator.js.map +1 -0
- package/dist/reasoning/index.d.ts +5 -0
- package/dist/reasoning/index.d.ts.map +1 -0
- package/dist/reasoning/index.js +5 -0
- package/dist/reasoning/index.js.map +1 -0
- package/dist/reasoning/prompts.d.ts +19 -0
- package/dist/reasoning/prompts.d.ts.map +1 -0
- package/dist/reasoning/prompts.js +161 -0
- package/dist/reasoning/prompts.js.map +1 -0
- package/dist/reasoning/thought-tree.d.ts +32 -0
- package/dist/reasoning/thought-tree.d.ts.map +1 -0
- package/dist/reasoning/thought-tree.js +352 -0
- package/dist/reasoning/thought-tree.js.map +1 -0
- package/dist/reflection/index.d.ts +4 -0
- package/dist/reflection/index.d.ts.map +1 -0
- package/dist/reflection/index.js +4 -0
- package/dist/reflection/index.js.map +1 -0
- package/dist/reflection/insight-store.d.ts +19 -0
- package/dist/reflection/insight-store.d.ts.map +1 -0
- package/dist/reflection/insight-store.js +129 -0
- package/dist/reflection/insight-store.js.map +1 -0
- package/dist/reflection/prompts.d.ts +18 -0
- package/dist/reflection/prompts.d.ts.map +1 -0
- package/dist/reflection/prompts.js +157 -0
- package/dist/reflection/prompts.js.map +1 -0
- package/dist/reflection/reflection-engine.d.ts +25 -0
- package/dist/reflection/reflection-engine.d.ts.map +1 -0
- package/dist/reflection/reflection-engine.js +202 -0
- package/dist/reflection/reflection-engine.js.map +1 -0
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +3 -0
- package/dist/registry.js.map +1 -1
- package/dist/time-travel/checkpoint-store.d.ts +34 -0
- package/dist/time-travel/checkpoint-store.d.ts.map +1 -0
- package/dist/time-travel/checkpoint-store.js +240 -0
- package/dist/time-travel/checkpoint-store.js.map +1 -0
- package/dist/time-travel/comparator.d.ts +26 -0
- package/dist/time-travel/comparator.d.ts.map +1 -0
- package/dist/time-travel/comparator.js +253 -0
- package/dist/time-travel/comparator.js.map +1 -0
- package/dist/time-travel/forker.d.ts +22 -0
- package/dist/time-travel/forker.d.ts.map +1 -0
- package/dist/time-travel/forker.js +118 -0
- package/dist/time-travel/forker.js.map +1 -0
- package/dist/time-travel/index.d.ts +6 -0
- package/dist/time-travel/index.d.ts.map +1 -0
- package/dist/time-travel/index.js +6 -0
- package/dist/time-travel/index.js.map +1 -0
- package/dist/time-travel/replayer.d.ts +20 -0
- package/dist/time-travel/replayer.d.ts.map +1 -0
- package/dist/time-travel/replayer.js +147 -0
- package/dist/time-travel/replayer.js.map +1 -0
- package/dist/time-travel/time-travel.d.ts +41 -0
- package/dist/time-travel/time-travel.d.ts.map +1 -0
- package/dist/time-travel/time-travel.js +127 -0
- package/dist/time-travel/time-travel.js.map +1 -0
- package/dist/tool.d.ts.map +1 -1
- package/dist/tool.js +2 -0
- package/dist/tool.js.map +1 -1
- package/dist/tools/base64.d.ts.map +1 -1
- package/dist/tools/base64.js +2 -8
- package/dist/tools/base64.js.map +1 -1
- package/dist/tools/datetime.d.ts.map +1 -1
- package/dist/tools/datetime.js.map +1 -1
- package/dist/tools/exec.d.ts.map +1 -1
- package/dist/tools/exec.js +1 -4
- package/dist/tools/exec.js.map +1 -1
- package/dist/tools/filesystem.d.ts.map +1 -1
- package/dist/tools/filesystem.js +4 -1
- package/dist/tools/filesystem.js.map +1 -1
- package/dist/tools/hash.d.ts.map +1 -1
- package/dist/tools/hash.js +1 -4
- package/dist/tools/hash.js.map +1 -1
- package/dist/tools/http.d.ts.map +1 -1
- package/dist/tools/http.js +1 -4
- package/dist/tools/http.js.map +1 -1
- package/dist/tools/regex.d.ts.map +1 -1
- package/dist/tools/regex.js +4 -1
- package/dist/tools/regex.js.map +1 -1
- package/dist/utils/circuit-breaker.d.ts.map +1 -1
- package/dist/utils/circuit-breaker.js.map +1 -1
- package/dist/utils/fallback.d.ts.map +1 -1
- package/dist/utils/fallback.js +1 -4
- package/dist/utils/fallback.js.map +1 -1
- package/dist/utils/retry.d.ts.map +1 -1
- package/dist/utils/retry.js +8 -13
- package/dist/utils/retry.js.map +1 -1
- package/package.json +17 -8
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
import { buildFailureAnalysisPrompt, buildInstructionCandidatePrompt, buildInstructionEvaluationPrompt, buildInstructionRefinementPrompt, parseFailureAnalysisResponse, parseInstructionCandidatesResponse, parseInstructionEvaluationResponse, parseInstructionRefinementResponse, } from './prompts';
|
|
2
|
+
export class InstructionOptimizer {
|
|
3
|
+
llm;
|
|
4
|
+
model;
|
|
5
|
+
traceStore;
|
|
6
|
+
insightStore;
|
|
7
|
+
candidateCount;
|
|
8
|
+
refinementRounds;
|
|
9
|
+
constructor(options) {
|
|
10
|
+
this.llm = options.llm;
|
|
11
|
+
this.model = options.model;
|
|
12
|
+
this.traceStore = options.traceStore;
|
|
13
|
+
this.insightStore = options.insightStore;
|
|
14
|
+
this.candidateCount = options.candidateCount ?? 3;
|
|
15
|
+
this.refinementRounds = options.refinementRounds ?? 1;
|
|
16
|
+
}
|
|
17
|
+
async optimize(agentId, currentInstructions, options) {
|
|
18
|
+
const traces = options?.traces ?? (await this.traceStore.getAll(agentId));
|
|
19
|
+
const maxTraces = options?.maxTraces ?? 20;
|
|
20
|
+
const relevantTraces = traces.slice(0, maxTraces);
|
|
21
|
+
if (relevantTraces.length === 0) {
|
|
22
|
+
return {
|
|
23
|
+
originalInstructions: currentInstructions,
|
|
24
|
+
optimizedInstructions: currentInstructions,
|
|
25
|
+
improvement: 0,
|
|
26
|
+
gapsAddressed: [],
|
|
27
|
+
candidatesEvaluated: 0,
|
|
28
|
+
reasoning: 'No traces available for optimization',
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
const gaps = await this.analyzeFailures(relevantTraces, currentInstructions);
|
|
32
|
+
if (gaps.length === 0) {
|
|
33
|
+
return {
|
|
34
|
+
originalInstructions: currentInstructions,
|
|
35
|
+
optimizedInstructions: currentInstructions,
|
|
36
|
+
improvement: 0,
|
|
37
|
+
gapsAddressed: [],
|
|
38
|
+
candidatesEvaluated: 0,
|
|
39
|
+
reasoning: 'No instruction gaps identified',
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
const insights = this.insightStore
|
|
43
|
+
? await this.insightStore.findRelevant(agentId, currentInstructions, 10)
|
|
44
|
+
: [];
|
|
45
|
+
const candidates = await this.generateCandidates(currentInstructions, gaps, insights);
|
|
46
|
+
if (candidates.length === 0) {
|
|
47
|
+
return {
|
|
48
|
+
originalInstructions: currentInstructions,
|
|
49
|
+
optimizedInstructions: currentInstructions,
|
|
50
|
+
improvement: 0,
|
|
51
|
+
gapsAddressed: gaps,
|
|
52
|
+
candidatesEvaluated: 0,
|
|
53
|
+
reasoning: 'Failed to generate instruction candidates',
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
const evaluations = await this.evaluateCandidates(candidates, relevantTraces);
|
|
57
|
+
let bestCandidate = candidates[0];
|
|
58
|
+
let bestScore = 0;
|
|
59
|
+
let bestEvaluation = evaluations.get(candidates[0]) ?? { score: 0, weaknesses: [] };
|
|
60
|
+
for (const [candidate, evaluation] of evaluations) {
|
|
61
|
+
if (evaluation.score > bestScore) {
|
|
62
|
+
bestScore = evaluation.score;
|
|
63
|
+
bestCandidate = candidate;
|
|
64
|
+
bestEvaluation = evaluation;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
let finalInstructions = bestCandidate;
|
|
68
|
+
for (let round = 0; round < this.refinementRounds; round++) {
|
|
69
|
+
if (bestEvaluation.weaknesses.length === 0)
|
|
70
|
+
break;
|
|
71
|
+
const refined = await this.refineInstructions(finalInstructions, bestEvaluation.weaknesses);
|
|
72
|
+
if (refined) {
|
|
73
|
+
finalInstructions = refined;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const originalScore = await this.estimateInstructionScore(currentInstructions, relevantTraces);
|
|
77
|
+
const newScore = await this.estimateInstructionScore(finalInstructions, relevantTraces);
|
|
78
|
+
return {
|
|
79
|
+
originalInstructions: currentInstructions,
|
|
80
|
+
optimizedInstructions: finalInstructions,
|
|
81
|
+
improvement: newScore - originalScore,
|
|
82
|
+
gapsAddressed: gaps,
|
|
83
|
+
candidatesEvaluated: candidates.length,
|
|
84
|
+
reasoning: `Identified ${gaps.length} gaps, evaluated ${candidates.length} candidates, best score: ${bestScore.toFixed(2)}`,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
async analyzeFailures(traces, currentInstructions) {
|
|
88
|
+
const prompt = buildFailureAnalysisPrompt(traces, currentInstructions);
|
|
89
|
+
try {
|
|
90
|
+
const response = await this.llm.chat({
|
|
91
|
+
model: this.model,
|
|
92
|
+
messages: [{ role: 'user', content: prompt }],
|
|
93
|
+
temperature: 0.5,
|
|
94
|
+
maxTokens: 1000,
|
|
95
|
+
});
|
|
96
|
+
const parsed = parseFailureAnalysisResponse(response.content);
|
|
97
|
+
return parsed?.gaps ?? [];
|
|
98
|
+
}
|
|
99
|
+
catch {
|
|
100
|
+
return [];
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
async generateCandidates(currentInstructions, gaps, insights) {
|
|
104
|
+
const prompt = buildInstructionCandidatePrompt(currentInstructions, gaps, insights);
|
|
105
|
+
try {
|
|
106
|
+
const response = await this.llm.chat({
|
|
107
|
+
model: this.model,
|
|
108
|
+
messages: [{ role: 'user', content: prompt }],
|
|
109
|
+
temperature: 0.7,
|
|
110
|
+
maxTokens: 2000,
|
|
111
|
+
});
|
|
112
|
+
const parsed = parseInstructionCandidatesResponse(response.content);
|
|
113
|
+
return parsed
|
|
114
|
+
.map((c) => c.instructions)
|
|
115
|
+
.filter((i) => i.length > 0)
|
|
116
|
+
.slice(0, this.candidateCount);
|
|
117
|
+
}
|
|
118
|
+
catch {
|
|
119
|
+
return [];
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async evaluateCandidates(candidates, traces) {
|
|
123
|
+
const evaluations = new Map();
|
|
124
|
+
for (const candidate of candidates) {
|
|
125
|
+
const prompt = buildInstructionEvaluationPrompt(candidate, traces);
|
|
126
|
+
try {
|
|
127
|
+
const response = await this.llm.chat({
|
|
128
|
+
model: this.model,
|
|
129
|
+
messages: [{ role: 'user', content: prompt }],
|
|
130
|
+
temperature: 0.3,
|
|
131
|
+
maxTokens: 500,
|
|
132
|
+
});
|
|
133
|
+
const parsed = parseInstructionEvaluationResponse(response.content);
|
|
134
|
+
if (parsed) {
|
|
135
|
+
evaluations.set(candidate, {
|
|
136
|
+
score: parsed.score,
|
|
137
|
+
weaknesses: parsed.weaknesses,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
evaluations.set(candidate, { score: 0.5, weaknesses: [] });
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
evaluations.set(candidate, { score: 0.5, weaknesses: [] });
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return evaluations;
|
|
149
|
+
}
|
|
150
|
+
async refineInstructions(candidate, weaknesses) {
|
|
151
|
+
const prompt = buildInstructionRefinementPrompt(candidate, weaknesses);
|
|
152
|
+
try {
|
|
153
|
+
const response = await this.llm.chat({
|
|
154
|
+
model: this.model,
|
|
155
|
+
messages: [{ role: 'user', content: prompt }],
|
|
156
|
+
temperature: 0.5,
|
|
157
|
+
maxTokens: 1500,
|
|
158
|
+
});
|
|
159
|
+
const parsed = parseInstructionRefinementResponse(response.content);
|
|
160
|
+
return parsed?.instructions ?? null;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
return null;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
async estimateInstructionScore(instructions, traces) {
|
|
167
|
+
const successfulTraces = traces.filter((t) => t.metrics.success);
|
|
168
|
+
const baseScore = traces.length > 0 ? successfulTraces.length / traces.length : 0.5;
|
|
169
|
+
const avgTraceScore = traces.length > 0 ? traces.reduce((sum, t) => sum + t.score, 0) / traces.length : 0.5;
|
|
170
|
+
const instructionLength = instructions.length;
|
|
171
|
+
const conciseBonus = instructionLength < 500 ? 0.1 : instructionLength > 1500 ? -0.1 : 0;
|
|
172
|
+
return Math.max(0, Math.min(1, baseScore * 0.4 + avgTraceScore * 0.5 + 0.5 + conciseBonus));
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
//# sourceMappingURL=instruction-optimizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"instruction-optimizer.js","sourceRoot":"","sources":["../../src/learning/instruction-optimizer.ts"],"names":[],"mappings":"AASA,OAAO,EACL,0BAA0B,EAC1B,+BAA+B,EAC/B,gCAAgC,EAChC,gCAAgC,EAChC,4BAA4B,EAC5B,kCAAkC,EAClC,kCAAkC,EAClC,kCAAkC,GACnC,MAAM,WAAW,CAAC;AAWnB,MAAM,OAAO,oBAAoB;IACvB,GAAG,CAAa;IAChB,KAAK,CAAS;IACd,UAAU,CAAa;IACvB,YAAY,CAAgB;IAC5B,cAAc,CAAS;IACvB,gBAAgB,CAAS;IAEjC,YAAY,OAAoC;QAC9C,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,CAAC,CAAC;QAClD,IAAI,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC;IACxD,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,OAAe,EACf,mBAA2B,EAC3B,OAIC;QAED,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1E,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC;QAC3C,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAElD,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChC,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,EAAE;gBACjB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,sCAAsC;aAClD,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,cAAc,EAAE,mBAAmB,CAAC,CAAC;QAE7E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,EAAE;gBACjB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,gCAAgC;aAC5C,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY;YAChC,CAAC,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,OAAO,EAAE,mBAAmB,EAAE,EAAE,CAAC;YACxE,CAAC,CAAC,EAAE,CAAC;QAEP,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,mBAAmB,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEtF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,IAAI;gBACnB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,2CAA2C;aACvD,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;QAE9E,IAAI,aAAa,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,cAAc,GAAG,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAEpF,KAAK,MAAM,CAAC,SAAS,EAAE,UAAU,CAAC,IAAI,WAAW,EAAE,CAAC;YAClD,IAAI,UAAU,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBACjC,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC;gBAC7B,aAAa,GAAG,SAAS,CAAC;gBAC1B,cAAc,GAAG,UAAU,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,GAAG,aAAa,CAAC;QACtC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,gBAAgB,EAAE,KAAK,EAAE,EAAE,CAAC;YAC3D,IAAI,cAAc,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM;YAElD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,iBAAiB,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;YAE5F,IAAI,OAAO,EAAE,CAAC;gBACZ,iBAAiB,GAAG,OAAO,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC;QAC/F,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QAExF,OAAO;YACL,oBAAoB,EAAE,mBAAmB;YACzC,qBAAqB,EAAE,iBAAiB;YACxC,WAAW,EAAE,QAAQ,GAAG,aAAa;YACrC,aAAa,EAAE,IAAI;YACnB,mBAAmB,EAAE,UAAU,CAAC,MAAM;YACtC,SAAS,EAAE,cAAc,IAAI,CAAC,MAAM,oBAAoB,UAAU,CAAC,MAAM,4BAA4B,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;SAC5H,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,eAAe,CAC3B,MAAwB,EACxB,mBAA2B;QAE3B,MAAM,MAAM,GAAG,0BAA0B,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;QAEvE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,4BAA4B,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC9D,OAAO,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,mBAA2B,EAC3B,IAAsB,EACtB,QAAmB;QAEnB,MAAM,MAAM,GAAG,+BAA+B,CAAC,mBAAmB,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpE,OAAO,MAAM;iBACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC;iBAC1B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;iBAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,UAAoB,EACpB,MAAwB;QAExB,MAAM,WAAW,GAAG,IAAI,GAAG,EAAmD,CAAC;QAE/E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,gCAAgC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAEnE,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;oBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;oBAC7C,WAAW,EAAE,GAAG;oBAChB,SAAS,EAAE,GAAG;iBACf,CAAC,CAAC;gBAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACpE,IAAI,MAAM,EAAE,CAAC;oBACX,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;wBACzB,KAAK,EAAE,MAAM,CAAC,KAAK;wBACnB,UAAU,EAAE,MAAM,CAAC,UAAU;qBAC9B,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,SAAiB,EACjB,UAAoB;QAEpB,MAAM,MAAM,GAAG,gCAAgC,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;QAEvE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpE,OAAO,MAAM,EAAE,YAAY,IAAI,IAAI,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,wBAAwB,CACpC,YAAoB,EACpB,MAAwB;QAExB,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEjE,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;QAEpF,MAAM,aAAa,GACjB,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;QAExF,MAAM,iBAAiB,GAAG,YAAY,CAAC,MAAM,CAAC;QAC9C,MAAM,YAAY,GAAG,iBAAiB,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEzF,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,GAAG,GAAG,aAAa,GAAG,GAAG,GAAG,GAAG,GAAG,YAAY,CAAC,CAAC,CAAC;IAC9F,CAAC;CACF"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import type { ExecutionTrace, MetricResult, MetricFn, MetricEvaluatorConfig, LLMBackend } from '@cogitator-ai/types';
|
|
2
|
+
export interface MetricEvaluatorOptions {
|
|
3
|
+
llm?: LLMBackend;
|
|
4
|
+
model?: string;
|
|
5
|
+
config?: Partial<MetricEvaluatorConfig>;
|
|
6
|
+
}
|
|
7
|
+
export declare class MetricEvaluator {
|
|
8
|
+
private llm?;
|
|
9
|
+
private model?;
|
|
10
|
+
private config;
|
|
11
|
+
private customMetrics;
|
|
12
|
+
constructor(options?: MetricEvaluatorOptions);
|
|
13
|
+
registerMetric(name: string, fn: MetricFn): void;
|
|
14
|
+
evaluate(trace: ExecutionTrace, expected?: unknown): Promise<{
|
|
15
|
+
results: MetricResult[];
|
|
16
|
+
score: number;
|
|
17
|
+
passed: boolean;
|
|
18
|
+
}>;
|
|
19
|
+
evaluateBatch(traces: ExecutionTrace[], expectedList?: unknown[]): Promise<Map<string, {
|
|
20
|
+
results: MetricResult[];
|
|
21
|
+
score: number;
|
|
22
|
+
passed: boolean;
|
|
23
|
+
}>>;
|
|
24
|
+
private evaluateMetric;
|
|
25
|
+
successMetric(trace: ExecutionTrace): MetricResult;
|
|
26
|
+
toolAccuracyMetric(trace: ExecutionTrace, expected?: unknown): MetricResult;
|
|
27
|
+
efficiencyMetric(trace: ExecutionTrace): MetricResult;
|
|
28
|
+
completenessMetric(trace: ExecutionTrace, expected?: unknown): Promise<MetricResult>;
|
|
29
|
+
coherenceMetric(trace: ExecutionTrace): Promise<MetricResult>;
|
|
30
|
+
private parseMetricResponse;
|
|
31
|
+
private aggregateScores;
|
|
32
|
+
getConfig(): MetricEvaluatorConfig;
|
|
33
|
+
}
|
|
34
|
+
export declare function createSuccessMetric(): MetricFn;
|
|
35
|
+
export declare function createExactMatchMetric(fieldPath?: string): MetricFn;
|
|
36
|
+
export declare function createContainsMetric(keywords: string[]): MetricFn;
|
|
37
|
+
//# sourceMappingURL=metrics.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/learning/metrics.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EACd,YAAY,EACZ,QAAQ,EACR,qBAAqB,EAErB,UAAU,EACX,MAAM,qBAAqB,CAAC;AAE7B,MAAM,WAAW,sBAAsB;IACrC,GAAG,CAAC,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC,qBAAqB,CAAC,CAAC;CACzC;AAsBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,GAAG,CAAC,CAAa;IACzB,OAAO,CAAC,KAAK,CAAC,CAAS;IACvB,OAAO,CAAC,MAAM,CAAwB;IACtC,OAAO,CAAC,aAAa,CAA+B;gBAExC,OAAO,GAAE,sBAA2B;IAMhD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,GAAG,IAAI;IAI1C,QAAQ,CACZ,KAAK,EAAE,cAAc,EACrB,QAAQ,CAAC,EAAE,OAAO,GACjB,OAAO,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC;IAcjE,aAAa,CACjB,MAAM,EAAE,cAAc,EAAE,EACxB,YAAY,CAAC,EAAE,OAAO,EAAE,GACvB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;YAatE,cAAc;IA0B5B,aAAa,CAAC,KAAK,EAAE,cAAc,GAAG,YAAY;IAmBlD,kBAAkB,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,YAAY;IAwC3E,gBAAgB,CAAC,KAAK,EAAE,cAAc,GAAG,YAAY;IAiB/C,kBAAkB,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,YAAY,CAAC;IAiDpF,eAAe,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;IAgDnE,OAAO,CAAC,mBAAmB;IAkB3B,OAAO,CAAC,eAAe;IA6BvB,SAAS,IAAI,qBAAqB;CAGnC;AAED,wBAAgB,mBAAmB,IAAI,QAAQ,CAS9C;AAED,wBAAgB,sBAAsB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,QAAQ,CAkBnE;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,QAAQ,CAoBjE"}
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
const DEFAULT_CONFIG = {
|
|
2
|
+
metrics: [
|
|
3
|
+
{
|
|
4
|
+
name: 'success',
|
|
5
|
+
type: 'boolean',
|
|
6
|
+
description: 'Did the run complete without errors?',
|
|
7
|
+
weight: 0.4,
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
name: 'tool_accuracy',
|
|
11
|
+
type: 'numeric',
|
|
12
|
+
description: 'Did tools produce expected results?',
|
|
13
|
+
weight: 0.3,
|
|
14
|
+
},
|
|
15
|
+
{ name: 'efficiency', type: 'numeric', description: 'Token/time efficiency', weight: 0.3 },
|
|
16
|
+
],
|
|
17
|
+
aggregation: 'weighted-average',
|
|
18
|
+
passThreshold: 0.7,
|
|
19
|
+
};
|
|
20
|
+
export class MetricEvaluator {
|
|
21
|
+
llm;
|
|
22
|
+
model;
|
|
23
|
+
config;
|
|
24
|
+
customMetrics = new Map();
|
|
25
|
+
constructor(options = {}) {
|
|
26
|
+
this.llm = options.llm;
|
|
27
|
+
this.model = options.model;
|
|
28
|
+
this.config = { ...DEFAULT_CONFIG, ...options.config };
|
|
29
|
+
}
|
|
30
|
+
registerMetric(name, fn) {
|
|
31
|
+
this.customMetrics.set(name, fn);
|
|
32
|
+
}
|
|
33
|
+
async evaluate(trace, expected) {
|
|
34
|
+
const results = [];
|
|
35
|
+
for (const metricDef of this.config.metrics) {
|
|
36
|
+
const result = await this.evaluateMetric(metricDef.name, trace, expected);
|
|
37
|
+
results.push(result);
|
|
38
|
+
}
|
|
39
|
+
const score = this.aggregateScores(results);
|
|
40
|
+
const passed = score >= this.config.passThreshold;
|
|
41
|
+
return { results, score, passed };
|
|
42
|
+
}
|
|
43
|
+
async evaluateBatch(traces, expectedList) {
|
|
44
|
+
const results = new Map();
|
|
45
|
+
for (let i = 0; i < traces.length; i++) {
|
|
46
|
+
const trace = traces[i];
|
|
47
|
+
const expected = expectedList?.[i];
|
|
48
|
+
const evaluation = await this.evaluate(trace, expected);
|
|
49
|
+
results.set(trace.id, evaluation);
|
|
50
|
+
}
|
|
51
|
+
return results;
|
|
52
|
+
}
|
|
53
|
+
async evaluateMetric(name, trace, expected) {
|
|
54
|
+
if (this.customMetrics.has(name)) {
|
|
55
|
+
const fn = this.customMetrics.get(name);
|
|
56
|
+
return fn(trace, expected);
|
|
57
|
+
}
|
|
58
|
+
switch (name) {
|
|
59
|
+
case 'success':
|
|
60
|
+
return this.successMetric(trace);
|
|
61
|
+
case 'tool_accuracy':
|
|
62
|
+
return this.toolAccuracyMetric(trace, expected);
|
|
63
|
+
case 'efficiency':
|
|
64
|
+
return this.efficiencyMetric(trace);
|
|
65
|
+
case 'completeness':
|
|
66
|
+
return this.completenessMetric(trace, expected);
|
|
67
|
+
case 'coherence':
|
|
68
|
+
return this.coherenceMetric(trace);
|
|
69
|
+
default:
|
|
70
|
+
return { name, value: 0.5, passed: true, reasoning: 'Unknown metric' };
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
successMetric(trace) {
|
|
74
|
+
const hasErrors = trace.steps.some((step) => step.toolResult?.error ||
|
|
75
|
+
(step.type === 'reflection' && step.reflection?.analysis?.wasSuccessful === false));
|
|
76
|
+
const value = hasErrors ? 0 : 1;
|
|
77
|
+
return {
|
|
78
|
+
name: 'success',
|
|
79
|
+
value,
|
|
80
|
+
passed: value === 1,
|
|
81
|
+
reasoning: hasErrors
|
|
82
|
+
? 'Run had errors or failed reflections'
|
|
83
|
+
: 'Run completed without errors',
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
toolAccuracyMetric(trace, expected) {
|
|
87
|
+
const toolSteps = trace.steps.filter((s) => s.type === 'tool_call');
|
|
88
|
+
if (toolSteps.length === 0) {
|
|
89
|
+
return {
|
|
90
|
+
name: 'tool_accuracy',
|
|
91
|
+
value: 1,
|
|
92
|
+
passed: true,
|
|
93
|
+
reasoning: 'No tool calls to evaluate',
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
let successfulCalls = 0;
|
|
97
|
+
for (const step of toolSteps) {
|
|
98
|
+
if (step.toolResult && !step.toolResult.error) {
|
|
99
|
+
successfulCalls++;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
const value = successfulCalls / toolSteps.length;
|
|
103
|
+
if (expected !== undefined && typeof expected === 'string') {
|
|
104
|
+
const outputMatches = trace.output.toLowerCase().includes(expected.toString().toLowerCase());
|
|
105
|
+
const adjustedValue = outputMatches ? Math.min(value + 0.2, 1) : Math.max(value - 0.2, 0);
|
|
106
|
+
return {
|
|
107
|
+
name: 'tool_accuracy',
|
|
108
|
+
value: adjustedValue,
|
|
109
|
+
passed: adjustedValue >= 0.7,
|
|
110
|
+
reasoning: `${successfulCalls}/${toolSteps.length} successful tool calls, output ${outputMatches ? 'matches' : 'does not match'} expected`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
return {
|
|
114
|
+
name: 'tool_accuracy',
|
|
115
|
+
value,
|
|
116
|
+
passed: value >= 0.7,
|
|
117
|
+
reasoning: `${successfulCalls}/${toolSteps.length} successful tool calls`,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
efficiencyMetric(trace) {
|
|
121
|
+
const totalTokens = trace.usage.inputTokens + trace.usage.outputTokens;
|
|
122
|
+
const duration = trace.duration;
|
|
123
|
+
const tokenEfficiency = Math.min(1, 10000 / Math.max(totalTokens, 1));
|
|
124
|
+
const timeEfficiency = Math.min(1, 30000 / Math.max(duration, 1));
|
|
125
|
+
const value = tokenEfficiency * 0.6 + timeEfficiency * 0.4;
|
|
126
|
+
return {
|
|
127
|
+
name: 'efficiency',
|
|
128
|
+
value,
|
|
129
|
+
passed: value >= 0.5,
|
|
130
|
+
reasoning: `${totalTokens} tokens in ${duration}ms`,
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
async completenessMetric(trace, expected) {
|
|
134
|
+
if (!this.llm || !this.model) {
|
|
135
|
+
const hasOutput = !!trace.output && trace.output.length > 10;
|
|
136
|
+
return {
|
|
137
|
+
name: 'completeness',
|
|
138
|
+
value: hasOutput ? 0.7 : 0.3,
|
|
139
|
+
passed: hasOutput,
|
|
140
|
+
reasoning: 'Basic output length check (no LLM available)',
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
const prompt = `Evaluate how completely this output addresses the input.
|
|
144
|
+
|
|
145
|
+
Input: ${trace.input}
|
|
146
|
+
Output: ${trace.output}
|
|
147
|
+
${expected ? `Expected: ${JSON.stringify(expected)}` : ''}
|
|
148
|
+
|
|
149
|
+
Rate completeness from 0.0 to 1.0 where:
|
|
150
|
+
- 0.0 = completely misses the point
|
|
151
|
+
- 0.5 = partially addresses input
|
|
152
|
+
- 1.0 = fully and thoroughly addresses input
|
|
153
|
+
|
|
154
|
+
Respond with JSON: { "score": 0.X, "reasoning": "..." }`;
|
|
155
|
+
try {
|
|
156
|
+
const response = await this.llm.chat({
|
|
157
|
+
model: this.model,
|
|
158
|
+
messages: [{ role: 'user', content: prompt }],
|
|
159
|
+
temperature: 0.3,
|
|
160
|
+
maxTokens: 200,
|
|
161
|
+
});
|
|
162
|
+
const parsed = this.parseMetricResponse(response.content);
|
|
163
|
+
return {
|
|
164
|
+
name: 'completeness',
|
|
165
|
+
value: parsed.score,
|
|
166
|
+
passed: parsed.score >= 0.7,
|
|
167
|
+
reasoning: parsed.reasoning,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
return {
|
|
172
|
+
name: 'completeness',
|
|
173
|
+
value: 0.5,
|
|
174
|
+
passed: true,
|
|
175
|
+
reasoning: 'Evaluation failed, using default',
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
async coherenceMetric(trace) {
|
|
180
|
+
if (!this.llm || !this.model) {
|
|
181
|
+
return {
|
|
182
|
+
name: 'coherence',
|
|
183
|
+
value: 0.7,
|
|
184
|
+
passed: true,
|
|
185
|
+
reasoning: 'No LLM available for coherence check',
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
const prompt = `Evaluate the logical coherence of this agent execution.
|
|
189
|
+
|
|
190
|
+
Input: ${trace.input}
|
|
191
|
+
Steps taken: ${trace.steps.map((s) => (s.type === 'tool_call' ? `Tool: ${s.toolCall?.name}` : s.type)).join(' → ')}
|
|
192
|
+
Output: ${trace.output}
|
|
193
|
+
|
|
194
|
+
Rate coherence from 0.0 to 1.0 where:
|
|
195
|
+
- 0.0 = completely incoherent, steps don't make sense
|
|
196
|
+
- 0.5 = somewhat logical but with issues
|
|
197
|
+
- 1.0 = perfectly logical and well-structured
|
|
198
|
+
|
|
199
|
+
Respond with JSON: { "score": 0.X, "reasoning": "..." }`;
|
|
200
|
+
try {
|
|
201
|
+
const response = await this.llm.chat({
|
|
202
|
+
model: this.model,
|
|
203
|
+
messages: [{ role: 'user', content: prompt }],
|
|
204
|
+
temperature: 0.3,
|
|
205
|
+
maxTokens: 200,
|
|
206
|
+
});
|
|
207
|
+
const parsed = this.parseMetricResponse(response.content);
|
|
208
|
+
return {
|
|
209
|
+
name: 'coherence',
|
|
210
|
+
value: parsed.score,
|
|
211
|
+
passed: parsed.score >= 0.6,
|
|
212
|
+
reasoning: parsed.reasoning,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
215
|
+
catch {
|
|
216
|
+
return {
|
|
217
|
+
name: 'coherence',
|
|
218
|
+
value: 0.6,
|
|
219
|
+
passed: true,
|
|
220
|
+
reasoning: 'Evaluation failed, using default',
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
parseMetricResponse(content) {
|
|
225
|
+
try {
|
|
226
|
+
let jsonStr = content;
|
|
227
|
+
const codeBlockMatch = /```(?:json)?\s*([\s\S]*?)\s*```/.exec(content);
|
|
228
|
+
if (codeBlockMatch) {
|
|
229
|
+
jsonStr = codeBlockMatch[1];
|
|
230
|
+
}
|
|
231
|
+
const parsed = JSON.parse(jsonStr);
|
|
232
|
+
const score = Math.max(0, Math.min(1, Number(parsed.score) || 0.5));
|
|
233
|
+
const reasoning = String(parsed.reasoning || 'No reasoning provided');
|
|
234
|
+
return { score, reasoning };
|
|
235
|
+
}
|
|
236
|
+
catch {
|
|
237
|
+
return { score: 0.5, reasoning: 'Failed to parse metric response' };
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
aggregateScores(results) {
|
|
241
|
+
if (results.length === 0)
|
|
242
|
+
return 0;
|
|
243
|
+
switch (this.config.aggregation) {
|
|
244
|
+
case 'weighted-average': {
|
|
245
|
+
let totalWeight = 0;
|
|
246
|
+
let weightedSum = 0;
|
|
247
|
+
for (const result of results) {
|
|
248
|
+
const metricDef = this.config.metrics.find((m) => m.name === result.name);
|
|
249
|
+
const weight = metricDef?.weight ?? 1;
|
|
250
|
+
weightedSum += result.value * weight;
|
|
251
|
+
totalWeight += weight;
|
|
252
|
+
}
|
|
253
|
+
return totalWeight > 0 ? weightedSum / totalWeight : 0;
|
|
254
|
+
}
|
|
255
|
+
case 'min':
|
|
256
|
+
return Math.min(...results.map((r) => r.value));
|
|
257
|
+
case 'product':
|
|
258
|
+
return results.reduce((acc, r) => acc * r.value, 1);
|
|
259
|
+
default:
|
|
260
|
+
return results.reduce((sum, r) => sum + r.value, 0) / results.length;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
getConfig() {
|
|
264
|
+
return { ...this.config };
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
export function createSuccessMetric() {
|
|
268
|
+
return (trace) => {
|
|
269
|
+
const hasErrors = trace.steps.some((step) => step.toolResult?.error);
|
|
270
|
+
return {
|
|
271
|
+
name: 'success',
|
|
272
|
+
value: hasErrors ? 0 : 1,
|
|
273
|
+
passed: !hasErrors,
|
|
274
|
+
};
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
export function createExactMatchMetric(fieldPath) {
|
|
278
|
+
return (trace, expected) => {
|
|
279
|
+
if (expected === undefined) {
|
|
280
|
+
return { name: 'exact_match', value: 1, passed: true, reasoning: 'No expected value' };
|
|
281
|
+
}
|
|
282
|
+
const outputValue = fieldPath ? trace.output : trace.output;
|
|
283
|
+
const matches = String(outputValue).toLowerCase().trim() === String(expected).toLowerCase().trim();
|
|
284
|
+
return {
|
|
285
|
+
name: 'exact_match',
|
|
286
|
+
value: matches ? 1 : 0,
|
|
287
|
+
passed: matches,
|
|
288
|
+
reasoning: matches ? 'Output matches expected' : 'Output does not match expected',
|
|
289
|
+
};
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
export function createContainsMetric(keywords) {
|
|
293
|
+
return (trace) => {
|
|
294
|
+
const outputLower = trace.output.toLowerCase();
|
|
295
|
+
let found = 0;
|
|
296
|
+
for (const keyword of keywords) {
|
|
297
|
+
if (outputLower.includes(keyword.toLowerCase())) {
|
|
298
|
+
found++;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
const value = keywords.length > 0 ? found / keywords.length : 1;
|
|
302
|
+
return {
|
|
303
|
+
name: 'contains',
|
|
304
|
+
value,
|
|
305
|
+
passed: value >= 0.5,
|
|
306
|
+
reasoning: `Found ${found}/${keywords.length} keywords`,
|
|
307
|
+
};
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
//# sourceMappingURL=metrics.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/learning/metrics.ts"],"names":[],"mappings":"AAeA,MAAM,cAAc,GAA0B;IAC5C,OAAO,EAAE;QACP;YACE,IAAI,EAAE,SAAS;YACf,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,sCAAsC;YACnD,MAAM,EAAE,GAAG;SACZ;QACD;YACE,IAAI,EAAE,eAAe;YACrB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,qCAAqC;YAClD,MAAM,EAAE,GAAG;SACZ;QACD,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,EAAE,GAAG,EAAE;KAC3F;IACD,WAAW,EAAE,kBAAkB;IAC/B,aAAa,EAAE,GAAG;CACnB,CAAC;AAEF,MAAM,OAAO,eAAe;IAClB,GAAG,CAAc;IACjB,KAAK,CAAU;IACf,MAAM,CAAwB;IAC9B,aAAa,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEpD,YAAY,UAAkC,EAAE;QAC9C,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IACzD,CAAC;IAED,cAAc,CAAC,IAAY,EAAE,EAAY;QACvC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,KAAqB,EACrB,QAAkB;QAElB,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;YAC1E,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;QAElD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,aAAa,CACjB,MAAwB,EACxB,YAAwB;QAExB,MAAM,OAAO,GAAG,IAAI,GAAG,EAAuE,CAAC;QAE/F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,KAAK,CAAC,cAAc,CAC1B,IAAY,EACZ,KAAqB,EACrB,QAAkB;QAElB,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACjC,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YACzC,OAAO,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC7B,CAAC;QAED,QAAQ,IAAqB,EAAE,CAAC;YAC9B,KAAK,SAAS;gBACZ,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,eAAe;gBAClB,OAAO,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAClD,KAAK,YAAY;gBACf,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;YACtC,KAAK,cAAc;gBACjB,OAAO,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAClD,KAAK,WAAW;gBACd,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;YACrC;gBACE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,gBAAgB,EAAE,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,aAAa,CAAC,KAAqB;QACjC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAChC,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,CAAC,UAAU,EAAE,KAAK;YACtB,CAAC,IAAI,CAAC,IAAI,KAAK,YAAY,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,EAAE,aAAa,KAAK,KAAK,CAAC,CACrF,CAAC;QAEF,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhC,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK;YACL,MAAM,EAAE,KAAK,KAAK,CAAC;YACnB,SAAS,EAAE,SAAS;gBAClB,CAAC,CAAC,sCAAsC;gBACxC,CAAC,CAAC,8BAA8B;SACnC,CAAC;IACJ,CAAC;IAED,kBAAkB,CAAC,KAAqB,EAAE,QAAkB;QAC1D,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;QAEpE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,2BAA2B;aACvC,CAAC;QACJ,CAAC;QAED,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBAC9C,eAAe,EAAE,CAAC;YACpB,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC;QAEjD,IAAI,QAAQ,KAAK,SAAS,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC3D,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7F,MAAM,aAAa,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;YAC1F,OAAO;gBACL,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,aAAa;gBACpB,MAAM,EAAE,aAAa,IAAI,GAAG;gBAC5B,SAAS,EAAE,GAAG,eAAe,IAAI,SAAS,CAAC,MAAM,kCAAkC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,WAAW;aAC3I,CAAC;QACJ,CAAC;QAED,OAAO;YACL,IAAI,EAAE,eAAe;YACrB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,GAAG,eAAe,IAAI,SAAS,CAAC,MAAM,wBAAwB;SAC1E,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,KAAqB;QACpC,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC;QACvE,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;QAEhC,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACtE,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;QAElE,MAAM,KAAK,GAAG,eAAe,GAAG,GAAG,GAAG,cAAc,GAAG,GAAG,CAAC;QAE3D,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,GAAG,WAAW,cAAc,QAAQ,IAAI;SACpD,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,KAAqB,EAAE,QAAkB;QAChE,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC;YAC7D,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;gBAC5B,MAAM,EAAE,SAAS;gBACjB,SAAS,EAAE,8CAA8C;aAC1D,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;;SAEV,KAAK,CAAC,KAAK;UACV,KAAK,CAAC,MAAM;EACpB,QAAQ,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;;;;;;;wDAOD,CAAC;QAErD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,GAAG;aACf,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG;gBAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,kCAAkC;aAC9C,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAqB;QACzC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,sCAAsC;aAClD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;;SAEV,KAAK,CAAC,KAAK;eACL,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;UACxG,KAAK,CAAC,MAAM;;;;;;;wDAOkC,CAAC;QAErD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,GAAG;aACf,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG;gBAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,kCAAkC;aAC9C,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,OAAe;QACzC,IAAI,CAAC;YACH,IAAI,OAAO,GAAG,OAAO,CAAC;YACtB,MAAM,cAAc,GAAG,iCAAiC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvE,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC;YACpE,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,IAAI,uBAAuB,CAAC,CAAC;YAEtE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,iCAAiC,EAAE,CAAC;QACtE,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,OAAuB;QAC7C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEnC,QAAQ,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAChC,KAAK,kBAAkB,CAAC,CAAC,CAAC;gBACxB,IAAI,WAAW,GAAG,CAAC,CAAC;gBACpB,IAAI,WAAW,GAAG,CAAC,CAAC;gBAEpB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;oBAC7B,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC1E,MAAM,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,CAAC,CAAC;oBACtC,WAAW,IAAI,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC;oBACrC,WAAW,IAAI,MAAM,CAAC;gBACxB,CAAC;gBAED,OAAO,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,CAAC;YAED,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAElD,KAAK,SAAS;gBACZ,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAEtD;gBACE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACzE,CAAC;IACH,CAAC;IAED,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;CACF;AAED,MAAM,UAAU,mBAAmB;IACjC,OAAO,CAAC,KAAqB,EAAE,EAAE;QAC/B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACrE,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,EAAE,CAAC,SAAS;SACnB,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,SAAkB;IACvD,OAAO,CAAC,KAAqB,EAAE,QAAkB,EAAE,EAAE;QACnD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC;QACzF,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;QAE5D,MAAM,OAAO,GACX,MAAM,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,KAAK,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAErF,OAAO;YACL,IAAI,EAAE,aAAa;YACnB,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,EAAE,OAAO;YACf,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,gCAAgC;SAClF,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAkB;IACrD,OAAO,CAAC,KAAqB,EAAE,EAAE;QAC/B,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBAChD,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhE,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,SAAS,KAAK,IAAI,QAAQ,CAAC,MAAM,WAAW;SACxD,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import type { ExecutionTrace, TraceQuery, TraceStoreStats, CapturedPrompt, PromptQuery, ABTest, ABTestStatus, ABTestVariant, InstructionVersion, InstructionVersionMetrics, CombinedPersistentStore } from '@cogitator-ai/types';
|
|
2
|
+
export interface PostgresTraceStoreConfig {
|
|
3
|
+
connectionString: string;
|
|
4
|
+
schema?: string;
|
|
5
|
+
poolSize?: number;
|
|
6
|
+
}
|
|
7
|
+
export declare class PostgresTraceStore implements CombinedPersistentStore {
|
|
8
|
+
private pool;
|
|
9
|
+
private config;
|
|
10
|
+
private schema;
|
|
11
|
+
constructor(config: PostgresTraceStoreConfig);
|
|
12
|
+
connect(): Promise<void>;
|
|
13
|
+
disconnect(): Promise<void>;
|
|
14
|
+
private initSchema;
|
|
15
|
+
private generateId;
|
|
16
|
+
storeTrace(trace: ExecutionTrace): Promise<void>;
|
|
17
|
+
storeTraceMany(traces: ExecutionTrace[]): Promise<void>;
|
|
18
|
+
getTrace(id: string): Promise<ExecutionTrace | null>;
|
|
19
|
+
getTraceByRunId(runId: string): Promise<ExecutionTrace | null>;
|
|
20
|
+
queryTraces(query: TraceQuery): Promise<ExecutionTrace[]>;
|
|
21
|
+
getAllTraces(agentId: string): Promise<ExecutionTrace[]>;
|
|
22
|
+
getDemos(agentId: string, limit?: number): Promise<ExecutionTrace[]>;
|
|
23
|
+
markAsDemo(id: string): Promise<void>;
|
|
24
|
+
unmarkAsDemo(id: string): Promise<void>;
|
|
25
|
+
deleteTrace(id: string): Promise<boolean>;
|
|
26
|
+
pruneTraces(agentId: string, maxTraces: number): Promise<number>;
|
|
27
|
+
clearTraces(agentId: string): Promise<void>;
|
|
28
|
+
getTraceStats(agentId: string): Promise<TraceStoreStats>;
|
|
29
|
+
private rowToTrace;
|
|
30
|
+
capture(prompt: CapturedPrompt): Promise<void>;
|
|
31
|
+
getPrompt(id: string): Promise<CapturedPrompt | null>;
|
|
32
|
+
getByRun(runId: string): Promise<CapturedPrompt[]>;
|
|
33
|
+
query(query: PromptQuery): Promise<CapturedPrompt[]>;
|
|
34
|
+
deletePrompt(id: string): Promise<boolean>;
|
|
35
|
+
prune(beforeDate: Date): Promise<number>;
|
|
36
|
+
private rowToPrompt;
|
|
37
|
+
create(test: Omit<ABTest, 'id' | 'createdAt'>): Promise<ABTest>;
|
|
38
|
+
getABTest(id: string): Promise<ABTest | null>;
|
|
39
|
+
getActive(agentId: string): Promise<ABTest | null>;
|
|
40
|
+
update(id: string, updates: Partial<ABTest>): Promise<ABTest>;
|
|
41
|
+
recordResult(testId: string, variant: ABTestVariant, score: number, latency: number, cost: number): Promise<void>;
|
|
42
|
+
list(agentId?: string, status?: ABTestStatus): Promise<ABTest[]>;
|
|
43
|
+
deleteABTest(id: string): Promise<boolean>;
|
|
44
|
+
private rowToABTest;
|
|
45
|
+
save(version: Omit<InstructionVersion, 'id'>): Promise<InstructionVersion>;
|
|
46
|
+
getVersion(id: string): Promise<InstructionVersion | null>;
|
|
47
|
+
getCurrent(agentId: string): Promise<InstructionVersion | null>;
|
|
48
|
+
getHistory(agentId: string, limit?: number): Promise<InstructionVersion[]>;
|
|
49
|
+
retire(id: string): Promise<void>;
|
|
50
|
+
updateMetrics(id: string, metrics: Partial<InstructionVersionMetrics>): Promise<void>;
|
|
51
|
+
private rowToVersion;
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=postgres-trace-store.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"postgres-trace-store.d.ts","sourceRoot":"","sources":["../../src/learning/postgres-trace-store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EACd,UAAU,EACV,eAAe,EACf,cAAc,EACd,WAAW,EACX,MAAM,EACN,YAAY,EACZ,aAAa,EAEb,kBAAkB,EAClB,yBAAyB,EACzB,uBAAuB,EACxB,MAAM,qBAAqB,CAAC;AAS7B,MAAM,WAAW,wBAAwB;IACvC,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,kBAAmB,YAAW,uBAAuB;IAChE,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,wBAAwB;IAKtC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAexB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAOnB,UAAU;IA+HxB,OAAO,CAAC,UAAU;IAIZ,UAAU,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA+BhD,cAAc,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAMvD,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAapD,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAa9D,WAAW,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IA4CzD,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWxD,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWhE,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASrC,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASvC,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWzC,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBhE,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAS3C,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAoB9D,OAAO,CAAC,UAAU;IAwBZ,OAAO,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAkC9C,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAarD,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWlD,KAAK,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAgDpD,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAW1C,KAAK,CAAC,UAAU,EAAE,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC;IAW9C,OAAO,CAAC,WAAW;IAiCb,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,GAAG,WAAW,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAkC/D,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAa7C,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAalD,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAsC7D,YAAY,CAChB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,IAAI,CAAC;IAuBV,IAAI,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAuBhE,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWhD,OAAO,CAAC,WAAW;IAsBb,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,kBAAkB,EAAE,IAAI,CAAC,GAAG,OAAO,CAAC,kBAAkB,CAAC;IA8B1E,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC;IAa1D,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC;IAe/D,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC;IAatE,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASjC,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,yBAAyB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAsC3F,OAAO,CAAC,YAAY;CAoBrB"}
|