@cogitator-ai/core 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/README.md +920 -15
  2. package/dist/__tests__/agent.test.js +2 -2
  3. package/dist/__tests__/agent.test.js.map +1 -1
  4. package/dist/__tests__/base64.test.js +1 -1
  5. package/dist/__tests__/base64.test.js.map +1 -1
  6. package/dist/__tests__/calculator.test.js +1 -1
  7. package/dist/__tests__/calculator.test.js.map +1 -1
  8. package/dist/__tests__/cogitator-memory.test.js +2 -2
  9. package/dist/__tests__/cogitator-memory.test.js.map +1 -1
  10. package/dist/__tests__/datetime.test.js +1 -1
  11. package/dist/__tests__/datetime.test.js.map +1 -1
  12. package/dist/__tests__/exec.test.js +1 -1
  13. package/dist/__tests__/exec.test.js.map +1 -1
  14. package/dist/__tests__/filesystem.test.js +1 -1
  15. package/dist/__tests__/filesystem.test.js.map +1 -1
  16. package/dist/__tests__/google-backend.test.js +1 -1
  17. package/dist/__tests__/google-backend.test.js.map +1 -1
  18. package/dist/__tests__/hash.test.js +1 -1
  19. package/dist/__tests__/hash.test.js.map +1 -1
  20. package/dist/__tests__/http.test.js +1 -1
  21. package/dist/__tests__/http.test.js.map +1 -1
  22. package/dist/__tests__/json.test.js +1 -1
  23. package/dist/__tests__/json.test.js.map +1 -1
  24. package/dist/__tests__/logger.test.js +1 -1
  25. package/dist/__tests__/logger.test.js.map +1 -1
  26. package/dist/__tests__/random.test.js +1 -1
  27. package/dist/__tests__/random.test.js.map +1 -1
  28. package/dist/__tests__/regex.test.js +1 -1
  29. package/dist/__tests__/regex.test.js.map +1 -1
  30. package/dist/__tests__/registry.test.js +2 -2
  31. package/dist/__tests__/registry.test.js.map +1 -1
  32. package/dist/__tests__/sleep.test.js +1 -1
  33. package/dist/__tests__/sleep.test.js.map +1 -1
  34. package/dist/__tests__/tool.test.js +1 -1
  35. package/dist/__tests__/tool.test.js.map +1 -1
  36. package/dist/__tests__/uuid.test.js +1 -1
  37. package/dist/__tests__/uuid.test.js.map +1 -1
  38. package/dist/cogitator.d.ts +46 -1
  39. package/dist/cogitator.d.ts.map +1 -1
  40. package/dist/cogitator.js +274 -17
  41. package/dist/cogitator.js.map +1 -1
  42. package/dist/constitutional/constitution.d.ts +9 -0
  43. package/dist/constitutional/constitution.d.ts.map +1 -0
  44. package/dist/constitutional/constitution.js +215 -0
  45. package/dist/constitutional/constitution.js.map +1 -0
  46. package/dist/constitutional/constitutional-ai.d.ts +36 -0
  47. package/dist/constitutional/constitutional-ai.d.ts.map +1 -0
  48. package/dist/constitutional/constitutional-ai.js +163 -0
  49. package/dist/constitutional/constitutional-ai.js.map +1 -0
  50. package/dist/constitutional/critique-reviser.d.ts +20 -0
  51. package/dist/constitutional/critique-reviser.d.ts.map +1 -0
  52. package/dist/constitutional/critique-reviser.js +98 -0
  53. package/dist/constitutional/critique-reviser.js.map +1 -0
  54. package/dist/constitutional/index.d.ts +13 -0
  55. package/dist/constitutional/index.d.ts.map +1 -0
  56. package/dist/constitutional/index.js +8 -0
  57. package/dist/constitutional/index.js.map +1 -0
  58. package/dist/constitutional/input-filter.d.ts +19 -0
  59. package/dist/constitutional/input-filter.d.ts.map +1 -0
  60. package/dist/constitutional/input-filter.js +88 -0
  61. package/dist/constitutional/input-filter.js.map +1 -0
  62. package/dist/constitutional/output-filter.d.ts +19 -0
  63. package/dist/constitutional/output-filter.d.ts.map +1 -0
  64. package/dist/constitutional/output-filter.js +86 -0
  65. package/dist/constitutional/output-filter.js.map +1 -0
  66. package/dist/constitutional/prompts.d.ts +11 -0
  67. package/dist/constitutional/prompts.d.ts.map +1 -0
  68. package/dist/constitutional/prompts.js +202 -0
  69. package/dist/constitutional/prompts.js.map +1 -0
  70. package/dist/constitutional/tool-guard.d.ts +18 -0
  71. package/dist/constitutional/tool-guard.d.ts.map +1 -0
  72. package/dist/constitutional/tool-guard.js +125 -0
  73. package/dist/constitutional/tool-guard.js.map +1 -0
  74. package/dist/cost-routing/budget-enforcer.d.ts +26 -0
  75. package/dist/cost-routing/budget-enforcer.d.ts.map +1 -0
  76. package/dist/cost-routing/budget-enforcer.js +86 -0
  77. package/dist/cost-routing/budget-enforcer.js.map +1 -0
  78. package/dist/cost-routing/cost-router.d.ts +34 -0
  79. package/dist/cost-routing/cost-router.d.ts.map +1 -0
  80. package/dist/cost-routing/cost-router.js +80 -0
  81. package/dist/cost-routing/cost-router.js.map +1 -0
  82. package/dist/cost-routing/cost-tracker.d.ts +20 -0
  83. package/dist/cost-routing/cost-tracker.d.ts.map +1 -0
  84. package/dist/cost-routing/cost-tracker.js +85 -0
  85. package/dist/cost-routing/cost-tracker.js.map +1 -0
  86. package/dist/cost-routing/index.d.ts +6 -0
  87. package/dist/cost-routing/index.d.ts.map +1 -0
  88. package/dist/cost-routing/index.js +6 -0
  89. package/dist/cost-routing/index.js.map +1 -0
  90. package/dist/cost-routing/model-selector.d.ts +15 -0
  91. package/dist/cost-routing/model-selector.d.ts.map +1 -0
  92. package/dist/cost-routing/model-selector.js +216 -0
  93. package/dist/cost-routing/model-selector.js.map +1 -0
  94. package/dist/cost-routing/task-analyzer.d.ts +13 -0
  95. package/dist/cost-routing/task-analyzer.d.ts.map +1 -0
  96. package/dist/cost-routing/task-analyzer.js +185 -0
  97. package/dist/cost-routing/task-analyzer.js.map +1 -0
  98. package/dist/index.d.ts +19 -1
  99. package/dist/index.d.ts.map +1 -1
  100. package/dist/index.js +9 -0
  101. package/dist/index.js.map +1 -1
  102. package/dist/learning/ab-testing.d.ts +45 -0
  103. package/dist/learning/ab-testing.d.ts.map +1 -0
  104. package/dist/learning/ab-testing.js +267 -0
  105. package/dist/learning/ab-testing.js.map +1 -0
  106. package/dist/learning/agent-optimizer.d.ts +42 -0
  107. package/dist/learning/agent-optimizer.d.ts.map +1 -0
  108. package/dist/learning/agent-optimizer.js +273 -0
  109. package/dist/learning/agent-optimizer.js.map +1 -0
  110. package/dist/learning/auto-optimizer.d.ts +38 -0
  111. package/dist/learning/auto-optimizer.d.ts.map +1 -0
  112. package/dist/learning/auto-optimizer.js +229 -0
  113. package/dist/learning/auto-optimizer.js.map +1 -0
  114. package/dist/learning/demo-selector.d.ts +29 -0
  115. package/dist/learning/demo-selector.d.ts.map +1 -0
  116. package/dist/learning/demo-selector.js +235 -0
  117. package/dist/learning/demo-selector.js.map +1 -0
  118. package/dist/learning/index.d.ts +24 -0
  119. package/dist/learning/index.d.ts.map +1 -0
  120. package/dist/learning/index.js +13 -0
  121. package/dist/learning/index.js.map +1 -0
  122. package/dist/learning/instruction-optimizer.d.ts +29 -0
  123. package/dist/learning/instruction-optimizer.d.ts.map +1 -0
  124. package/dist/learning/instruction-optimizer.js +175 -0
  125. package/dist/learning/instruction-optimizer.js.map +1 -0
  126. package/dist/learning/metrics.d.ts +37 -0
  127. package/dist/learning/metrics.d.ts.map +1 -0
  128. package/dist/learning/metrics.js +310 -0
  129. package/dist/learning/metrics.js.map +1 -0
  130. package/dist/learning/postgres-trace-store.d.ts +53 -0
  131. package/dist/learning/postgres-trace-store.d.ts.map +1 -0
  132. package/dist/learning/postgres-trace-store.js +692 -0
  133. package/dist/learning/postgres-trace-store.js.map +1 -0
  134. package/dist/learning/prompt-logger.d.ts +29 -0
  135. package/dist/learning/prompt-logger.d.ts.map +1 -0
  136. package/dist/learning/prompt-logger.js +157 -0
  137. package/dist/learning/prompt-logger.js.map +1 -0
  138. package/dist/learning/prompt-monitor.d.ts +29 -0
  139. package/dist/learning/prompt-monitor.d.ts.map +1 -0
  140. package/dist/learning/prompt-monitor.js +243 -0
  141. package/dist/learning/prompt-monitor.js.map +1 -0
  142. package/dist/learning/prompts.d.ts +28 -0
  143. package/dist/learning/prompts.d.ts.map +1 -0
  144. package/dist/learning/prompts.js +195 -0
  145. package/dist/learning/prompts.js.map +1 -0
  146. package/dist/learning/rollback-manager.d.ts +36 -0
  147. package/dist/learning/rollback-manager.d.ts.map +1 -0
  148. package/dist/learning/rollback-manager.js +177 -0
  149. package/dist/learning/rollback-manager.js.map +1 -0
  150. package/dist/learning/trace-store.d.ts +26 -0
  151. package/dist/learning/trace-store.d.ts.map +1 -0
  152. package/dist/learning/trace-store.js +218 -0
  153. package/dist/learning/trace-store.js.map +1 -0
  154. package/dist/llm/google.d.ts.map +1 -1
  155. package/dist/llm/google.js +1 -2
  156. package/dist/llm/google.js.map +1 -1
  157. package/dist/reasoning/branch-evaluator.d.ts +28 -0
  158. package/dist/reasoning/branch-evaluator.d.ts.map +1 -0
  159. package/dist/reasoning/branch-evaluator.js +143 -0
  160. package/dist/reasoning/branch-evaluator.js.map +1 -0
  161. package/dist/reasoning/branch-generator.d.ts +9 -0
  162. package/dist/reasoning/branch-generator.d.ts.map +1 -0
  163. package/dist/reasoning/branch-generator.js +60 -0
  164. package/dist/reasoning/branch-generator.js.map +1 -0
  165. package/dist/reasoning/index.d.ts +5 -0
  166. package/dist/reasoning/index.d.ts.map +1 -0
  167. package/dist/reasoning/index.js +5 -0
  168. package/dist/reasoning/index.js.map +1 -0
  169. package/dist/reasoning/prompts.d.ts +19 -0
  170. package/dist/reasoning/prompts.d.ts.map +1 -0
  171. package/dist/reasoning/prompts.js +161 -0
  172. package/dist/reasoning/prompts.js.map +1 -0
  173. package/dist/reasoning/thought-tree.d.ts +32 -0
  174. package/dist/reasoning/thought-tree.d.ts.map +1 -0
  175. package/dist/reasoning/thought-tree.js +352 -0
  176. package/dist/reasoning/thought-tree.js.map +1 -0
  177. package/dist/reflection/index.d.ts +4 -0
  178. package/dist/reflection/index.d.ts.map +1 -0
  179. package/dist/reflection/index.js +4 -0
  180. package/dist/reflection/index.js.map +1 -0
  181. package/dist/reflection/insight-store.d.ts +19 -0
  182. package/dist/reflection/insight-store.d.ts.map +1 -0
  183. package/dist/reflection/insight-store.js +129 -0
  184. package/dist/reflection/insight-store.js.map +1 -0
  185. package/dist/reflection/prompts.d.ts +18 -0
  186. package/dist/reflection/prompts.d.ts.map +1 -0
  187. package/dist/reflection/prompts.js +157 -0
  188. package/dist/reflection/prompts.js.map +1 -0
  189. package/dist/reflection/reflection-engine.d.ts +25 -0
  190. package/dist/reflection/reflection-engine.d.ts.map +1 -0
  191. package/dist/reflection/reflection-engine.js +202 -0
  192. package/dist/reflection/reflection-engine.js.map +1 -0
  193. package/dist/registry.d.ts +1 -0
  194. package/dist/registry.d.ts.map +1 -1
  195. package/dist/registry.js +3 -0
  196. package/dist/registry.js.map +1 -1
  197. package/dist/time-travel/checkpoint-store.d.ts +34 -0
  198. package/dist/time-travel/checkpoint-store.d.ts.map +1 -0
  199. package/dist/time-travel/checkpoint-store.js +240 -0
  200. package/dist/time-travel/checkpoint-store.js.map +1 -0
  201. package/dist/time-travel/comparator.d.ts +26 -0
  202. package/dist/time-travel/comparator.d.ts.map +1 -0
  203. package/dist/time-travel/comparator.js +253 -0
  204. package/dist/time-travel/comparator.js.map +1 -0
  205. package/dist/time-travel/forker.d.ts +22 -0
  206. package/dist/time-travel/forker.d.ts.map +1 -0
  207. package/dist/time-travel/forker.js +118 -0
  208. package/dist/time-travel/forker.js.map +1 -0
  209. package/dist/time-travel/index.d.ts +6 -0
  210. package/dist/time-travel/index.d.ts.map +1 -0
  211. package/dist/time-travel/index.js +6 -0
  212. package/dist/time-travel/index.js.map +1 -0
  213. package/dist/time-travel/replayer.d.ts +20 -0
  214. package/dist/time-travel/replayer.d.ts.map +1 -0
  215. package/dist/time-travel/replayer.js +147 -0
  216. package/dist/time-travel/replayer.js.map +1 -0
  217. package/dist/time-travel/time-travel.d.ts +41 -0
  218. package/dist/time-travel/time-travel.d.ts.map +1 -0
  219. package/dist/time-travel/time-travel.js +127 -0
  220. package/dist/time-travel/time-travel.js.map +1 -0
  221. package/dist/tool.d.ts.map +1 -1
  222. package/dist/tool.js +2 -0
  223. package/dist/tool.js.map +1 -1
  224. package/dist/tools/base64.d.ts.map +1 -1
  225. package/dist/tools/base64.js +2 -8
  226. package/dist/tools/base64.js.map +1 -1
  227. package/dist/tools/datetime.d.ts.map +1 -1
  228. package/dist/tools/datetime.js.map +1 -1
  229. package/dist/tools/exec.d.ts.map +1 -1
  230. package/dist/tools/exec.js +1 -4
  231. package/dist/tools/exec.js.map +1 -1
  232. package/dist/tools/filesystem.d.ts.map +1 -1
  233. package/dist/tools/filesystem.js +4 -1
  234. package/dist/tools/filesystem.js.map +1 -1
  235. package/dist/tools/hash.d.ts.map +1 -1
  236. package/dist/tools/hash.js +1 -4
  237. package/dist/tools/hash.js.map +1 -1
  238. package/dist/tools/http.d.ts.map +1 -1
  239. package/dist/tools/http.js +1 -4
  240. package/dist/tools/http.js.map +1 -1
  241. package/dist/tools/regex.d.ts.map +1 -1
  242. package/dist/tools/regex.js +4 -1
  243. package/dist/tools/regex.js.map +1 -1
  244. package/dist/utils/circuit-breaker.d.ts.map +1 -1
  245. package/dist/utils/circuit-breaker.js.map +1 -1
  246. package/dist/utils/fallback.d.ts.map +1 -1
  247. package/dist/utils/fallback.js +1 -4
  248. package/dist/utils/fallback.js.map +1 -1
  249. package/dist/utils/retry.d.ts.map +1 -1
  250. package/dist/utils/retry.js +8 -13
  251. package/dist/utils/retry.js.map +1 -1
  252. package/package.json +17 -8
@@ -0,0 +1,175 @@
1
+ import { buildFailureAnalysisPrompt, buildInstructionCandidatePrompt, buildInstructionEvaluationPrompt, buildInstructionRefinementPrompt, parseFailureAnalysisResponse, parseInstructionCandidatesResponse, parseInstructionEvaluationResponse, parseInstructionRefinementResponse, } from './prompts';
2
+ export class InstructionOptimizer {
3
+ llm;
4
+ model;
5
+ traceStore;
6
+ insightStore;
7
+ candidateCount;
8
+ refinementRounds;
9
+ constructor(options) {
10
+ this.llm = options.llm;
11
+ this.model = options.model;
12
+ this.traceStore = options.traceStore;
13
+ this.insightStore = options.insightStore;
14
+ this.candidateCount = options.candidateCount ?? 3;
15
+ this.refinementRounds = options.refinementRounds ?? 1;
16
+ }
17
+ async optimize(agentId, currentInstructions, options) {
18
+ const traces = options?.traces ?? (await this.traceStore.getAll(agentId));
19
+ const maxTraces = options?.maxTraces ?? 20;
20
+ const relevantTraces = traces.slice(0, maxTraces);
21
+ if (relevantTraces.length === 0) {
22
+ return {
23
+ originalInstructions: currentInstructions,
24
+ optimizedInstructions: currentInstructions,
25
+ improvement: 0,
26
+ gapsAddressed: [],
27
+ candidatesEvaluated: 0,
28
+ reasoning: 'No traces available for optimization',
29
+ };
30
+ }
31
+ const gaps = await this.analyzeFailures(relevantTraces, currentInstructions);
32
+ if (gaps.length === 0) {
33
+ return {
34
+ originalInstructions: currentInstructions,
35
+ optimizedInstructions: currentInstructions,
36
+ improvement: 0,
37
+ gapsAddressed: [],
38
+ candidatesEvaluated: 0,
39
+ reasoning: 'No instruction gaps identified',
40
+ };
41
+ }
42
+ const insights = this.insightStore
43
+ ? await this.insightStore.findRelevant(agentId, currentInstructions, 10)
44
+ : [];
45
+ const candidates = await this.generateCandidates(currentInstructions, gaps, insights);
46
+ if (candidates.length === 0) {
47
+ return {
48
+ originalInstructions: currentInstructions,
49
+ optimizedInstructions: currentInstructions,
50
+ improvement: 0,
51
+ gapsAddressed: gaps,
52
+ candidatesEvaluated: 0,
53
+ reasoning: 'Failed to generate instruction candidates',
54
+ };
55
+ }
56
+ const evaluations = await this.evaluateCandidates(candidates, relevantTraces);
57
+ let bestCandidate = candidates[0];
58
+ let bestScore = 0;
59
+ let bestEvaluation = evaluations.get(candidates[0]) ?? { score: 0, weaknesses: [] };
60
+ for (const [candidate, evaluation] of evaluations) {
61
+ if (evaluation.score > bestScore) {
62
+ bestScore = evaluation.score;
63
+ bestCandidate = candidate;
64
+ bestEvaluation = evaluation;
65
+ }
66
+ }
67
+ let finalInstructions = bestCandidate;
68
+ for (let round = 0; round < this.refinementRounds; round++) {
69
+ if (bestEvaluation.weaknesses.length === 0)
70
+ break;
71
+ const refined = await this.refineInstructions(finalInstructions, bestEvaluation.weaknesses);
72
+ if (refined) {
73
+ finalInstructions = refined;
74
+ }
75
+ }
76
+ const originalScore = await this.estimateInstructionScore(currentInstructions, relevantTraces);
77
+ const newScore = await this.estimateInstructionScore(finalInstructions, relevantTraces);
78
+ return {
79
+ originalInstructions: currentInstructions,
80
+ optimizedInstructions: finalInstructions,
81
+ improvement: newScore - originalScore,
82
+ gapsAddressed: gaps,
83
+ candidatesEvaluated: candidates.length,
84
+ reasoning: `Identified ${gaps.length} gaps, evaluated ${candidates.length} candidates, best score: ${bestScore.toFixed(2)}`,
85
+ };
86
+ }
87
+ async analyzeFailures(traces, currentInstructions) {
88
+ const prompt = buildFailureAnalysisPrompt(traces, currentInstructions);
89
+ try {
90
+ const response = await this.llm.chat({
91
+ model: this.model,
92
+ messages: [{ role: 'user', content: prompt }],
93
+ temperature: 0.5,
94
+ maxTokens: 1000,
95
+ });
96
+ const parsed = parseFailureAnalysisResponse(response.content);
97
+ return parsed?.gaps ?? [];
98
+ }
99
+ catch {
100
+ return [];
101
+ }
102
+ }
103
+ async generateCandidates(currentInstructions, gaps, insights) {
104
+ const prompt = buildInstructionCandidatePrompt(currentInstructions, gaps, insights);
105
+ try {
106
+ const response = await this.llm.chat({
107
+ model: this.model,
108
+ messages: [{ role: 'user', content: prompt }],
109
+ temperature: 0.7,
110
+ maxTokens: 2000,
111
+ });
112
+ const parsed = parseInstructionCandidatesResponse(response.content);
113
+ return parsed
114
+ .map((c) => c.instructions)
115
+ .filter((i) => i.length > 0)
116
+ .slice(0, this.candidateCount);
117
+ }
118
+ catch {
119
+ return [];
120
+ }
121
+ }
122
+ async evaluateCandidates(candidates, traces) {
123
+ const evaluations = new Map();
124
+ for (const candidate of candidates) {
125
+ const prompt = buildInstructionEvaluationPrompt(candidate, traces);
126
+ try {
127
+ const response = await this.llm.chat({
128
+ model: this.model,
129
+ messages: [{ role: 'user', content: prompt }],
130
+ temperature: 0.3,
131
+ maxTokens: 500,
132
+ });
133
+ const parsed = parseInstructionEvaluationResponse(response.content);
134
+ if (parsed) {
135
+ evaluations.set(candidate, {
136
+ score: parsed.score,
137
+ weaknesses: parsed.weaknesses,
138
+ });
139
+ }
140
+ else {
141
+ evaluations.set(candidate, { score: 0.5, weaknesses: [] });
142
+ }
143
+ }
144
+ catch {
145
+ evaluations.set(candidate, { score: 0.5, weaknesses: [] });
146
+ }
147
+ }
148
+ return evaluations;
149
+ }
150
+ async refineInstructions(candidate, weaknesses) {
151
+ const prompt = buildInstructionRefinementPrompt(candidate, weaknesses);
152
+ try {
153
+ const response = await this.llm.chat({
154
+ model: this.model,
155
+ messages: [{ role: 'user', content: prompt }],
156
+ temperature: 0.5,
157
+ maxTokens: 1500,
158
+ });
159
+ const parsed = parseInstructionRefinementResponse(response.content);
160
+ return parsed?.instructions ?? null;
161
+ }
162
+ catch {
163
+ return null;
164
+ }
165
+ }
166
+ async estimateInstructionScore(instructions, traces) {
167
+ const successfulTraces = traces.filter((t) => t.metrics.success);
168
+ const baseScore = traces.length > 0 ? successfulTraces.length / traces.length : 0.5;
169
+ const avgTraceScore = traces.length > 0 ? traces.reduce((sum, t) => sum + t.score, 0) / traces.length : 0.5;
170
+ const instructionLength = instructions.length;
171
+ const conciseBonus = instructionLength < 500 ? 0.1 : instructionLength > 1500 ? -0.1 : 0;
172
+ return Math.max(0, Math.min(1, baseScore * 0.4 + avgTraceScore * 0.5 + 0.5 + conciseBonus));
173
+ }
174
+ }
175
+ //# sourceMappingURL=instruction-optimizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"instruction-optimizer.js","sourceRoot":"","sources":["../../src/learning/instruction-optimizer.ts"],"names":[],"mappings":"AASA,OAAO,EACL,0BAA0B,EAC1B,+BAA+B,EAC/B,gCAAgC,EAChC,gCAAgC,EAChC,4BAA4B,EAC5B,kCAAkC,EAClC,kCAAkC,EAClC,kCAAkC,GACnC,MAAM,WAAW,CAAC;AAWnB,MAAM,OAAO,oBAAoB;IACvB,GAAG,CAAa;IAChB,KAAK,CAAS;IACd,UAAU,CAAa;IACvB,YAAY,CAAgB;IAC5B,cAAc,CAAS;IACvB,gBAAgB,CAAS;IAEjC,YAAY,OAAoC;QAC9C,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACrC,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC;QACzC,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,CAAC,CAAC;QAClD,IAAI,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,IAAI,CAAC,CAAC;IACxD,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,OAAe,EACf,mBAA2B,EAC3B,OAIC;QAED,MAAM,MAAM,GAAG,OAAO,EAAE,MAAM,IAAI,CAAC,MAAM,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;QAC1E,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC;QAC3C,MAAM,cAAc,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC;QAElD,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChC,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,EAAE;gBACjB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,sCAAsC;aAClD,CAAC;QACJ,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,cAAc,EAAE,mBAAmB,CAAC,CAAC;QAE7E,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,EAAE;gBACjB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,gCAAgC;aAC5C,CAAC;QACJ,CAAC;QAED,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY;YAChC,CAAC,CAAC,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,OAAO,EAAE,mBAAmB,EAAE,EAAE,CAAC;YACxE,CAAC,CAAC,EAAE,CAAC;QAEP,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,mBAAmB,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEtF,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO;gBACL,oBAAoB,EAAE,mBAAmB;gBACzC,qBAAqB,EAAE,mBAAmB;gBAC1C,WAAW,EAAE,CAAC;gBACd,aAAa,EAAE,IAAI;gBACnB,mBAAmB,EAAE,CAAC;gBACtB,SAAS,EAAE,2CAA2C;aACvD,CAAC;QACJ,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;QAE9E,IAAI,aAAa,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAClC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,cAAc,GAAG,WAAW,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC;QAEpF,KAAK,MAAM,CAAC,SAAS,EAAE,UAAU,CAAC,IAAI,WAAW,EAAE,CAAC;YAClD,IAAI,UAAU,CAAC,KAAK,GAAG,SAAS,EAAE,CAAC;gBACjC,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC;gBAC7B,aAAa,GAAG,SAAS,CAAC;gBAC1B,cAAc,GAAG,UAAU,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,IAAI,iBAAiB,GAAG,aAAa,CAAC;QACtC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,gBAAgB,EAAE,KAAK,EAAE,EAAE,CAAC;YAC3D,IAAI,cAAc,CAAC,UAAU,CAAC,MAAM,KAAK,CAAC;gBAAE,MAAM;YAElD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,iBAAiB,EAAE,cAAc,CAAC,UAAU,CAAC,CAAC;YAE5F,IAAI,OAAO,EAAE,CAAC;gBACZ,iBAAiB,GAAG,OAAO,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,mBAAmB,EAAE,cAAc,CAAC,CAAC;QAC/F,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,wBAAwB,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QAExF,OAAO;YACL,oBAAoB,EAAE,mBAAmB;YACzC,qBAAqB,EAAE,iBAAiB;YACxC,WAAW,EAAE,QAAQ,GAAG,aAAa;YACrC,aAAa,EAAE,IAAI;YACnB,mBAAmB,EAAE,UAAU,CAAC,MAAM;YACtC,SAAS,EAAE,cAAc,IAAI,CAAC,MAAM,oBAAoB,UAAU,CAAC,MAAM,4BAA4B,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;SAC5H,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,eAAe,CAC3B,MAAwB,EACxB,mBAA2B;QAE3B,MAAM,MAAM,GAAG,0BAA0B,CAAC,MAAM,EAAE,mBAAmB,CAAC,CAAC;QAEvE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,4BAA4B,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC9D,OAAO,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QAC5B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,mBAA2B,EAC3B,IAAsB,EACtB,QAAmB;QAEnB,MAAM,MAAM,GAAG,+BAA+B,CAAC,mBAAmB,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;QAEpF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpE,OAAO,MAAM;iBACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC;iBAC1B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;iBAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,UAAoB,EACpB,MAAwB;QAExB,MAAM,WAAW,GAAG,IAAI,GAAG,EAAmD,CAAC;QAE/E,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,gCAAgC,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAEnE,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;oBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;oBAC7C,WAAW,EAAE,GAAG;oBAChB,SAAS,EAAE,GAAG;iBACf,CAAC,CAAC;gBAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACpE,IAAI,MAAM,EAAE,CAAC;oBACX,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE;wBACzB,KAAK,EAAE,MAAM,CAAC,KAAK;wBACnB,UAAU,EAAE,MAAM,CAAC,UAAU;qBAC9B,CAAC,CAAC;gBACL,CAAC;qBAAM,CAAC;oBACN,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;gBAC7D,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAC9B,SAAiB,EACjB,UAAoB;QAEpB,MAAM,MAAM,GAAG,gCAAgC,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;QAEvE,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,kCAAkC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YACpE,OAAO,MAAM,EAAE,YAAY,IAAI,IAAI,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAEO,KAAK,CAAC,wBAAwB,CACpC,YAAoB,EACpB,MAAwB;QAExB,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEjE,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;QAEpF,MAAM,aAAa,GACjB,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC;QAExF,MAAM,iBAAiB,GAAG,YAAY,CAAC,MAAM,CAAC;QAC9C,MAAM,YAAY,GAAG,iBAAiB,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAEzF,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,GAAG,GAAG,aAAa,GAAG,GAAG,GAAG,GAAG,GAAG,YAAY,CAAC,CAAC,CAAC;IAC9F,CAAC;CACF"}
@@ -0,0 +1,37 @@
1
+ import type { ExecutionTrace, MetricResult, MetricFn, MetricEvaluatorConfig, LLMBackend } from '@cogitator-ai/types';
2
+ export interface MetricEvaluatorOptions {
3
+ llm?: LLMBackend;
4
+ model?: string;
5
+ config?: Partial<MetricEvaluatorConfig>;
6
+ }
7
+ export declare class MetricEvaluator {
8
+ private llm?;
9
+ private model?;
10
+ private config;
11
+ private customMetrics;
12
+ constructor(options?: MetricEvaluatorOptions);
13
+ registerMetric(name: string, fn: MetricFn): void;
14
+ evaluate(trace: ExecutionTrace, expected?: unknown): Promise<{
15
+ results: MetricResult[];
16
+ score: number;
17
+ passed: boolean;
18
+ }>;
19
+ evaluateBatch(traces: ExecutionTrace[], expectedList?: unknown[]): Promise<Map<string, {
20
+ results: MetricResult[];
21
+ score: number;
22
+ passed: boolean;
23
+ }>>;
24
+ private evaluateMetric;
25
+ successMetric(trace: ExecutionTrace): MetricResult;
26
+ toolAccuracyMetric(trace: ExecutionTrace, expected?: unknown): MetricResult;
27
+ efficiencyMetric(trace: ExecutionTrace): MetricResult;
28
+ completenessMetric(trace: ExecutionTrace, expected?: unknown): Promise<MetricResult>;
29
+ coherenceMetric(trace: ExecutionTrace): Promise<MetricResult>;
30
+ private parseMetricResponse;
31
+ private aggregateScores;
32
+ getConfig(): MetricEvaluatorConfig;
33
+ }
34
+ export declare function createSuccessMetric(): MetricFn;
35
+ export declare function createExactMatchMetric(fieldPath?: string): MetricFn;
36
+ export declare function createContainsMetric(keywords: string[]): MetricFn;
37
+ //# sourceMappingURL=metrics.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.d.ts","sourceRoot":"","sources":["../../src/learning/metrics.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EACd,YAAY,EACZ,QAAQ,EACR,qBAAqB,EAErB,UAAU,EACX,MAAM,qBAAqB,CAAC;AAE7B,MAAM,WAAW,sBAAsB;IACrC,GAAG,CAAC,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,MAAM,CAAC,EAAE,OAAO,CAAC,qBAAqB,CAAC,CAAC;CACzC;AAsBD,qBAAa,eAAe;IAC1B,OAAO,CAAC,GAAG,CAAC,CAAa;IACzB,OAAO,CAAC,KAAK,CAAC,CAAS;IACvB,OAAO,CAAC,MAAM,CAAwB;IACtC,OAAO,CAAC,aAAa,CAA+B;gBAExC,OAAO,GAAE,sBAA2B;IAMhD,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,GAAG,IAAI;IAI1C,QAAQ,CACZ,KAAK,EAAE,cAAc,EACrB,QAAQ,CAAC,EAAE,OAAO,GACjB,OAAO,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC;IAcjE,aAAa,CACjB,MAAM,EAAE,cAAc,EAAE,EACxB,YAAY,CAAC,EAAE,OAAO,EAAE,GACvB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;YAatE,cAAc;IA0B5B,aAAa,CAAC,KAAK,EAAE,cAAc,GAAG,YAAY;IAmBlD,kBAAkB,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,YAAY;IAwC3E,gBAAgB,CAAC,KAAK,EAAE,cAAc,GAAG,YAAY;IAiB/C,kBAAkB,CAAC,KAAK,EAAE,cAAc,EAAE,QAAQ,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,YAAY,CAAC;IAiDpF,eAAe,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,YAAY,CAAC;IAgDnE,OAAO,CAAC,mBAAmB;IAkB3B,OAAO,CAAC,eAAe;IA6BvB,SAAS,IAAI,qBAAqB;CAGnC;AAED,wBAAgB,mBAAmB,IAAI,QAAQ,CAS9C;AAED,wBAAgB,sBAAsB,CAAC,SAAS,CAAC,EAAE,MAAM,GAAG,QAAQ,CAkBnE;AAED,wBAAgB,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,QAAQ,CAoBjE"}
@@ -0,0 +1,310 @@
1
+ const DEFAULT_CONFIG = {
2
+ metrics: [
3
+ {
4
+ name: 'success',
5
+ type: 'boolean',
6
+ description: 'Did the run complete without errors?',
7
+ weight: 0.4,
8
+ },
9
+ {
10
+ name: 'tool_accuracy',
11
+ type: 'numeric',
12
+ description: 'Did tools produce expected results?',
13
+ weight: 0.3,
14
+ },
15
+ { name: 'efficiency', type: 'numeric', description: 'Token/time efficiency', weight: 0.3 },
16
+ ],
17
+ aggregation: 'weighted-average',
18
+ passThreshold: 0.7,
19
+ };
20
+ export class MetricEvaluator {
21
+ llm;
22
+ model;
23
+ config;
24
+ customMetrics = new Map();
25
+ constructor(options = {}) {
26
+ this.llm = options.llm;
27
+ this.model = options.model;
28
+ this.config = { ...DEFAULT_CONFIG, ...options.config };
29
+ }
30
+ registerMetric(name, fn) {
31
+ this.customMetrics.set(name, fn);
32
+ }
33
+ async evaluate(trace, expected) {
34
+ const results = [];
35
+ for (const metricDef of this.config.metrics) {
36
+ const result = await this.evaluateMetric(metricDef.name, trace, expected);
37
+ results.push(result);
38
+ }
39
+ const score = this.aggregateScores(results);
40
+ const passed = score >= this.config.passThreshold;
41
+ return { results, score, passed };
42
+ }
43
+ async evaluateBatch(traces, expectedList) {
44
+ const results = new Map();
45
+ for (let i = 0; i < traces.length; i++) {
46
+ const trace = traces[i];
47
+ const expected = expectedList?.[i];
48
+ const evaluation = await this.evaluate(trace, expected);
49
+ results.set(trace.id, evaluation);
50
+ }
51
+ return results;
52
+ }
53
+ async evaluateMetric(name, trace, expected) {
54
+ if (this.customMetrics.has(name)) {
55
+ const fn = this.customMetrics.get(name);
56
+ return fn(trace, expected);
57
+ }
58
+ switch (name) {
59
+ case 'success':
60
+ return this.successMetric(trace);
61
+ case 'tool_accuracy':
62
+ return this.toolAccuracyMetric(trace, expected);
63
+ case 'efficiency':
64
+ return this.efficiencyMetric(trace);
65
+ case 'completeness':
66
+ return this.completenessMetric(trace, expected);
67
+ case 'coherence':
68
+ return this.coherenceMetric(trace);
69
+ default:
70
+ return { name, value: 0.5, passed: true, reasoning: 'Unknown metric' };
71
+ }
72
+ }
73
+ successMetric(trace) {
74
+ const hasErrors = trace.steps.some((step) => step.toolResult?.error ||
75
+ (step.type === 'reflection' && step.reflection?.analysis?.wasSuccessful === false));
76
+ const value = hasErrors ? 0 : 1;
77
+ return {
78
+ name: 'success',
79
+ value,
80
+ passed: value === 1,
81
+ reasoning: hasErrors
82
+ ? 'Run had errors or failed reflections'
83
+ : 'Run completed without errors',
84
+ };
85
+ }
86
+ toolAccuracyMetric(trace, expected) {
87
+ const toolSteps = trace.steps.filter((s) => s.type === 'tool_call');
88
+ if (toolSteps.length === 0) {
89
+ return {
90
+ name: 'tool_accuracy',
91
+ value: 1,
92
+ passed: true,
93
+ reasoning: 'No tool calls to evaluate',
94
+ };
95
+ }
96
+ let successfulCalls = 0;
97
+ for (const step of toolSteps) {
98
+ if (step.toolResult && !step.toolResult.error) {
99
+ successfulCalls++;
100
+ }
101
+ }
102
+ const value = successfulCalls / toolSteps.length;
103
+ if (expected !== undefined && typeof expected === 'string') {
104
+ const outputMatches = trace.output.toLowerCase().includes(expected.toString().toLowerCase());
105
+ const adjustedValue = outputMatches ? Math.min(value + 0.2, 1) : Math.max(value - 0.2, 0);
106
+ return {
107
+ name: 'tool_accuracy',
108
+ value: adjustedValue,
109
+ passed: adjustedValue >= 0.7,
110
+ reasoning: `${successfulCalls}/${toolSteps.length} successful tool calls, output ${outputMatches ? 'matches' : 'does not match'} expected`,
111
+ };
112
+ }
113
+ return {
114
+ name: 'tool_accuracy',
115
+ value,
116
+ passed: value >= 0.7,
117
+ reasoning: `${successfulCalls}/${toolSteps.length} successful tool calls`,
118
+ };
119
+ }
120
+ efficiencyMetric(trace) {
121
+ const totalTokens = trace.usage.inputTokens + trace.usage.outputTokens;
122
+ const duration = trace.duration;
123
+ const tokenEfficiency = Math.min(1, 10000 / Math.max(totalTokens, 1));
124
+ const timeEfficiency = Math.min(1, 30000 / Math.max(duration, 1));
125
+ const value = tokenEfficiency * 0.6 + timeEfficiency * 0.4;
126
+ return {
127
+ name: 'efficiency',
128
+ value,
129
+ passed: value >= 0.5,
130
+ reasoning: `${totalTokens} tokens in ${duration}ms`,
131
+ };
132
+ }
133
+ async completenessMetric(trace, expected) {
134
+ if (!this.llm || !this.model) {
135
+ const hasOutput = !!trace.output && trace.output.length > 10;
136
+ return {
137
+ name: 'completeness',
138
+ value: hasOutput ? 0.7 : 0.3,
139
+ passed: hasOutput,
140
+ reasoning: 'Basic output length check (no LLM available)',
141
+ };
142
+ }
143
+ const prompt = `Evaluate how completely this output addresses the input.
144
+
145
+ Input: ${trace.input}
146
+ Output: ${trace.output}
147
+ ${expected ? `Expected: ${JSON.stringify(expected)}` : ''}
148
+
149
+ Rate completeness from 0.0 to 1.0 where:
150
+ - 0.0 = completely misses the point
151
+ - 0.5 = partially addresses input
152
+ - 1.0 = fully and thoroughly addresses input
153
+
154
+ Respond with JSON: { "score": 0.X, "reasoning": "..." }`;
155
+ try {
156
+ const response = await this.llm.chat({
157
+ model: this.model,
158
+ messages: [{ role: 'user', content: prompt }],
159
+ temperature: 0.3,
160
+ maxTokens: 200,
161
+ });
162
+ const parsed = this.parseMetricResponse(response.content);
163
+ return {
164
+ name: 'completeness',
165
+ value: parsed.score,
166
+ passed: parsed.score >= 0.7,
167
+ reasoning: parsed.reasoning,
168
+ };
169
+ }
170
+ catch {
171
+ return {
172
+ name: 'completeness',
173
+ value: 0.5,
174
+ passed: true,
175
+ reasoning: 'Evaluation failed, using default',
176
+ };
177
+ }
178
+ }
179
+ async coherenceMetric(trace) {
180
+ if (!this.llm || !this.model) {
181
+ return {
182
+ name: 'coherence',
183
+ value: 0.7,
184
+ passed: true,
185
+ reasoning: 'No LLM available for coherence check',
186
+ };
187
+ }
188
+ const prompt = `Evaluate the logical coherence of this agent execution.
189
+
190
+ Input: ${trace.input}
191
+ Steps taken: ${trace.steps.map((s) => (s.type === 'tool_call' ? `Tool: ${s.toolCall?.name}` : s.type)).join(' → ')}
192
+ Output: ${trace.output}
193
+
194
+ Rate coherence from 0.0 to 1.0 where:
195
+ - 0.0 = completely incoherent, steps don't make sense
196
+ - 0.5 = somewhat logical but with issues
197
+ - 1.0 = perfectly logical and well-structured
198
+
199
+ Respond with JSON: { "score": 0.X, "reasoning": "..." }`;
200
+ try {
201
+ const response = await this.llm.chat({
202
+ model: this.model,
203
+ messages: [{ role: 'user', content: prompt }],
204
+ temperature: 0.3,
205
+ maxTokens: 200,
206
+ });
207
+ const parsed = this.parseMetricResponse(response.content);
208
+ return {
209
+ name: 'coherence',
210
+ value: parsed.score,
211
+ passed: parsed.score >= 0.6,
212
+ reasoning: parsed.reasoning,
213
+ };
214
+ }
215
+ catch {
216
+ return {
217
+ name: 'coherence',
218
+ value: 0.6,
219
+ passed: true,
220
+ reasoning: 'Evaluation failed, using default',
221
+ };
222
+ }
223
+ }
224
+ parseMetricResponse(content) {
225
+ try {
226
+ let jsonStr = content;
227
+ const codeBlockMatch = /```(?:json)?\s*([\s\S]*?)\s*```/.exec(content);
228
+ if (codeBlockMatch) {
229
+ jsonStr = codeBlockMatch[1];
230
+ }
231
+ const parsed = JSON.parse(jsonStr);
232
+ const score = Math.max(0, Math.min(1, Number(parsed.score) || 0.5));
233
+ const reasoning = String(parsed.reasoning || 'No reasoning provided');
234
+ return { score, reasoning };
235
+ }
236
+ catch {
237
+ return { score: 0.5, reasoning: 'Failed to parse metric response' };
238
+ }
239
+ }
240
+ aggregateScores(results) {
241
+ if (results.length === 0)
242
+ return 0;
243
+ switch (this.config.aggregation) {
244
+ case 'weighted-average': {
245
+ let totalWeight = 0;
246
+ let weightedSum = 0;
247
+ for (const result of results) {
248
+ const metricDef = this.config.metrics.find((m) => m.name === result.name);
249
+ const weight = metricDef?.weight ?? 1;
250
+ weightedSum += result.value * weight;
251
+ totalWeight += weight;
252
+ }
253
+ return totalWeight > 0 ? weightedSum / totalWeight : 0;
254
+ }
255
+ case 'min':
256
+ return Math.min(...results.map((r) => r.value));
257
+ case 'product':
258
+ return results.reduce((acc, r) => acc * r.value, 1);
259
+ default:
260
+ return results.reduce((sum, r) => sum + r.value, 0) / results.length;
261
+ }
262
+ }
263
+ getConfig() {
264
+ return { ...this.config };
265
+ }
266
+ }
267
+ export function createSuccessMetric() {
268
+ return (trace) => {
269
+ const hasErrors = trace.steps.some((step) => step.toolResult?.error);
270
+ return {
271
+ name: 'success',
272
+ value: hasErrors ? 0 : 1,
273
+ passed: !hasErrors,
274
+ };
275
+ };
276
+ }
277
+ export function createExactMatchMetric(fieldPath) {
278
+ return (trace, expected) => {
279
+ if (expected === undefined) {
280
+ return { name: 'exact_match', value: 1, passed: true, reasoning: 'No expected value' };
281
+ }
282
+ const outputValue = fieldPath ? trace.output : trace.output;
283
+ const matches = String(outputValue).toLowerCase().trim() === String(expected).toLowerCase().trim();
284
+ return {
285
+ name: 'exact_match',
286
+ value: matches ? 1 : 0,
287
+ passed: matches,
288
+ reasoning: matches ? 'Output matches expected' : 'Output does not match expected',
289
+ };
290
+ };
291
+ }
292
+ export function createContainsMetric(keywords) {
293
+ return (trace) => {
294
+ const outputLower = trace.output.toLowerCase();
295
+ let found = 0;
296
+ for (const keyword of keywords) {
297
+ if (outputLower.includes(keyword.toLowerCase())) {
298
+ found++;
299
+ }
300
+ }
301
+ const value = keywords.length > 0 ? found / keywords.length : 1;
302
+ return {
303
+ name: 'contains',
304
+ value,
305
+ passed: value >= 0.5,
306
+ reasoning: `Found ${found}/${keywords.length} keywords`,
307
+ };
308
+ };
309
+ }
310
+ //# sourceMappingURL=metrics.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"metrics.js","sourceRoot":"","sources":["../../src/learning/metrics.ts"],"names":[],"mappings":"AAeA,MAAM,cAAc,GAA0B;IAC5C,OAAO,EAAE;QACP;YACE,IAAI,EAAE,SAAS;YACf,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,sCAAsC;YACnD,MAAM,EAAE,GAAG;SACZ;QACD;YACE,IAAI,EAAE,eAAe;YACrB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,qCAAqC;YAClD,MAAM,EAAE,GAAG;SACZ;QACD,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,uBAAuB,EAAE,MAAM,EAAE,GAAG,EAAE;KAC3F;IACD,WAAW,EAAE,kBAAkB;IAC/B,aAAa,EAAE,GAAG;CACnB,CAAC;AAEF,MAAM,OAAO,eAAe;IAClB,GAAG,CAAc;IACjB,KAAK,CAAU;IACf,MAAM,CAAwB;IAC9B,aAAa,GAAG,IAAI,GAAG,EAAoB,CAAC;IAEpD,YAAY,UAAkC,EAAE;QAC9C,IAAI,CAAC,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;QACvB,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IACzD,CAAC;IAED,cAAc,CAAC,IAAY,EAAE,EAAY;QACvC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,KAAK,CAAC,QAAQ,CACZ,KAAqB,EACrB,QAAkB;QAElB,MAAM,OAAO,GAAmB,EAAE,CAAC;QAEnC,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YAC5C,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;YAC1E,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC;QAElD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,aAAa,CACjB,MAAwB,EACxB,YAAwB;QAExB,MAAM,OAAO,GAAG,IAAI,GAAG,EAAuE,CAAC;QAE/F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,QAAQ,GAAG,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YACxD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,KAAK,CAAC,cAAc,CAC1B,IAAY,EACZ,KAAqB,EACrB,QAAkB;QAElB,IAAI,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;YACjC,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YACzC,OAAO,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;QAC7B,CAAC;QAED,QAAQ,IAAqB,EAAE,CAAC;YAC9B,KAAK,SAAS;gBACZ,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,eAAe;gBAClB,OAAO,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAClD,KAAK,YAAY;gBACf,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;YACtC,KAAK,cAAc;gBACjB,OAAO,IAAI,CAAC,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;YAClD,KAAK,WAAW;gBACd,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;YACrC;gBACE,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,gBAAgB,EAAE,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,aAAa,CAAC,KAAqB;QACjC,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAChC,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,CAAC,UAAU,EAAE,KAAK;YACtB,CAAC,IAAI,CAAC,IAAI,KAAK,YAAY,IAAI,IAAI,CAAC,UAAU,EAAE,QAAQ,EAAE,aAAa,KAAK,KAAK,CAAC,CACrF,CAAC;QAEF,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhC,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK;YACL,MAAM,EAAE,KAAK,KAAK,CAAC;YACnB,SAAS,EAAE,SAAS;gBAClB,CAAC,CAAC,sCAAsC;gBACxC,CAAC,CAAC,8BAA8B;SACnC,CAAC;IACJ,CAAC;IAED,kBAAkB,CAAC,KAAqB,EAAE,QAAkB;QAC1D,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC;QAEpE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO;gBACL,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,2BAA2B;aACvC,CAAC;QACJ,CAAC;QAED,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;YAC7B,IAAI,IAAI,CAAC,UAAU,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;gBAC9C,eAAe,EAAE,CAAC;YACpB,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,eAAe,GAAG,SAAS,CAAC,MAAM,CAAC;QAEjD,IAAI,QAAQ,KAAK,SAAS,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;YAC3D,MAAM,aAAa,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,WAAW,EAAE,CAAC,CAAC;YAC7F,MAAM,aAAa,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC;YAC1F,OAAO;gBACL,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,aAAa;gBACpB,MAAM,EAAE,aAAa,IAAI,GAAG;gBAC5B,SAAS,EAAE,GAAG,eAAe,IAAI,SAAS,CAAC,MAAM,kCAAkC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,WAAW;aAC3I,CAAC;QACJ,CAAC;QAED,OAAO;YACL,IAAI,EAAE,eAAe;YACrB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,GAAG,eAAe,IAAI,SAAS,CAAC,MAAM,wBAAwB;SAC1E,CAAC;IACJ,CAAC;IAED,gBAAgB,CAAC,KAAqB;QACpC,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,YAAY,CAAC;QACvE,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;QAEhC,MAAM,eAAe,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAC;QACtE,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAC;QAElE,MAAM,KAAK,GAAG,eAAe,GAAG,GAAG,GAAG,cAAc,GAAG,GAAG,CAAC;QAE3D,OAAO;YACL,IAAI,EAAE,YAAY;YAClB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,GAAG,WAAW,cAAc,QAAQ,IAAI;SACpD,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,KAAqB,EAAE,QAAkB;QAChE,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,GAAG,EAAE,CAAC;YAC7D,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;gBAC5B,MAAM,EAAE,SAAS;gBACjB,SAAS,EAAE,8CAA8C;aAC1D,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;;SAEV,KAAK,CAAC,KAAK;UACV,KAAK,CAAC,MAAM;EACpB,QAAQ,CAAC,CAAC,CAAC,aAAa,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE;;;;;;;wDAOD,CAAC;QAErD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,GAAG;aACf,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG;gBAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,IAAI,EAAE,cAAc;gBACpB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,kCAAkC;aAC9C,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,eAAe,CAAC,KAAqB;QACzC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC;YAC7B,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,sCAAsC;aAClD,CAAC;QACJ,CAAC;QAED,MAAM,MAAM,GAAG;;SAEV,KAAK,CAAC,KAAK;eACL,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;UACxG,KAAK,CAAC,MAAM;;;;;;;wDAOkC,CAAC;QAErD,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;gBACnC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;gBAC7C,WAAW,EAAE,GAAG;gBAChB,SAAS,EAAE,GAAG;aACf,CAAC,CAAC;YAEH,MAAM,MAAM,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC1D,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,MAAM,CAAC,KAAK;gBACnB,MAAM,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG;gBAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;aAC5B,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO;gBACL,IAAI,EAAE,WAAW;gBACjB,KAAK,EAAE,GAAG;gBACV,MAAM,EAAE,IAAI;gBACZ,SAAS,EAAE,kCAAkC;aAC9C,CAAC;QACJ,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,OAAe;QACzC,IAAI,CAAC;YACH,IAAI,OAAO,GAAG,OAAO,CAAC;YACtB,MAAM,cAAc,GAAG,iCAAiC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvE,IAAI,cAAc,EAAE,CAAC;gBACnB,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC;YACpE,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,SAAS,IAAI,uBAAuB,CAAC,CAAC;YAEtE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,iCAAiC,EAAE,CAAC;QACtE,CAAC;IACH,CAAC;IAEO,eAAe,CAAC,OAAuB;QAC7C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAEnC,QAAQ,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YAChC,KAAK,kBAAkB,CAAC,CAAC,CAAC;gBACxB,IAAI,WAAW,GAAG,CAAC,CAAC;gBACpB,IAAI,WAAW,GAAG,CAAC,CAAC;gBAEpB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;oBAC7B,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI,CAAC,CAAC;oBAC1E,MAAM,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,CAAC,CAAC;oBACtC,WAAW,IAAI,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC;oBACrC,WAAW,IAAI,MAAM,CAAC;gBACxB,CAAC;gBAED,OAAO,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YACzD,CAAC;YAED,KAAK,KAAK;gBACR,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;YAElD,KAAK,SAAS;gBACZ,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;YAEtD;gBACE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;QACzE,CAAC;IACH,CAAC;IAED,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;CACF;AAED,MAAM,UAAU,mBAAmB;IACjC,OAAO,CAAC,KAAqB,EAAE,EAAE;QAC/B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,CAAC;QACrE,OAAO;YACL,IAAI,EAAE,SAAS;YACf,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACxB,MAAM,EAAE,CAAC,SAAS;SACnB,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,SAAkB;IACvD,OAAO,CAAC,KAAqB,EAAE,QAAkB,EAAE,EAAE;QACnD,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,OAAO,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC;QACzF,CAAC;QAED,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC;QAE5D,MAAM,OAAO,GACX,MAAM,CAAC,WAAW,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,KAAK,MAAM,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAErF,OAAO;YACL,IAAI,EAAE,aAAa;YACnB,KAAK,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,EAAE,OAAO;YACf,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC,gCAAgC;SAClF,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,QAAkB;IACrD,OAAO,CAAC,KAAqB,EAAE,EAAE;QAC/B,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;QAC/C,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,IAAI,WAAW,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBAChD,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;QAED,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhE,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,KAAK;YACL,MAAM,EAAE,KAAK,IAAI,GAAG;YACpB,SAAS,EAAE,SAAS,KAAK,IAAI,QAAQ,CAAC,MAAM,WAAW;SACxD,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC"}
@@ -0,0 +1,53 @@
1
+ import type { ExecutionTrace, TraceQuery, TraceStoreStats, CapturedPrompt, PromptQuery, ABTest, ABTestStatus, ABTestVariant, InstructionVersion, InstructionVersionMetrics, CombinedPersistentStore } from '@cogitator-ai/types';
2
+ export interface PostgresTraceStoreConfig {
3
+ connectionString: string;
4
+ schema?: string;
5
+ poolSize?: number;
6
+ }
7
+ export declare class PostgresTraceStore implements CombinedPersistentStore {
8
+ private pool;
9
+ private config;
10
+ private schema;
11
+ constructor(config: PostgresTraceStoreConfig);
12
+ connect(): Promise<void>;
13
+ disconnect(): Promise<void>;
14
+ private initSchema;
15
+ private generateId;
16
+ storeTrace(trace: ExecutionTrace): Promise<void>;
17
+ storeTraceMany(traces: ExecutionTrace[]): Promise<void>;
18
+ getTrace(id: string): Promise<ExecutionTrace | null>;
19
+ getTraceByRunId(runId: string): Promise<ExecutionTrace | null>;
20
+ queryTraces(query: TraceQuery): Promise<ExecutionTrace[]>;
21
+ getAllTraces(agentId: string): Promise<ExecutionTrace[]>;
22
+ getDemos(agentId: string, limit?: number): Promise<ExecutionTrace[]>;
23
+ markAsDemo(id: string): Promise<void>;
24
+ unmarkAsDemo(id: string): Promise<void>;
25
+ deleteTrace(id: string): Promise<boolean>;
26
+ pruneTraces(agentId: string, maxTraces: number): Promise<number>;
27
+ clearTraces(agentId: string): Promise<void>;
28
+ getTraceStats(agentId: string): Promise<TraceStoreStats>;
29
+ private rowToTrace;
30
+ capture(prompt: CapturedPrompt): Promise<void>;
31
+ getPrompt(id: string): Promise<CapturedPrompt | null>;
32
+ getByRun(runId: string): Promise<CapturedPrompt[]>;
33
+ query(query: PromptQuery): Promise<CapturedPrompt[]>;
34
+ deletePrompt(id: string): Promise<boolean>;
35
+ prune(beforeDate: Date): Promise<number>;
36
+ private rowToPrompt;
37
+ create(test: Omit<ABTest, 'id' | 'createdAt'>): Promise<ABTest>;
38
+ getABTest(id: string): Promise<ABTest | null>;
39
+ getActive(agentId: string): Promise<ABTest | null>;
40
+ update(id: string, updates: Partial<ABTest>): Promise<ABTest>;
41
+ recordResult(testId: string, variant: ABTestVariant, score: number, latency: number, cost: number): Promise<void>;
42
+ list(agentId?: string, status?: ABTestStatus): Promise<ABTest[]>;
43
+ deleteABTest(id: string): Promise<boolean>;
44
+ private rowToABTest;
45
+ save(version: Omit<InstructionVersion, 'id'>): Promise<InstructionVersion>;
46
+ getVersion(id: string): Promise<InstructionVersion | null>;
47
+ getCurrent(agentId: string): Promise<InstructionVersion | null>;
48
+ getHistory(agentId: string, limit?: number): Promise<InstructionVersion[]>;
49
+ retire(id: string): Promise<void>;
50
+ updateMetrics(id: string, metrics: Partial<InstructionVersionMetrics>): Promise<void>;
51
+ private rowToVersion;
52
+ }
53
+ //# sourceMappingURL=postgres-trace-store.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postgres-trace-store.d.ts","sourceRoot":"","sources":["../../src/learning/postgres-trace-store.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,cAAc,EACd,UAAU,EACV,eAAe,EACf,cAAc,EACd,WAAW,EACX,MAAM,EACN,YAAY,EACZ,aAAa,EAEb,kBAAkB,EAClB,yBAAyB,EACzB,uBAAuB,EACxB,MAAM,qBAAqB,CAAC;AAS7B,MAAM,WAAW,wBAAwB;IACvC,gBAAgB,EAAE,MAAM,CAAC;IACzB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAED,qBAAa,kBAAmB,YAAW,uBAAuB;IAChE,OAAO,CAAC,IAAI,CAAqB;IACjC,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,wBAAwB;IAKtC,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAexB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;YAOnB,UAAU;IA+HxB,OAAO,CAAC,UAAU;IAIZ,UAAU,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IA+BhD,cAAc,CAAC,MAAM,EAAE,cAAc,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAMvD,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAapD,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAa9D,WAAW,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IA4CzD,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWxD,QAAQ,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWhE,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASrC,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASvC,WAAW,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWzC,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAiBhE,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAS3C,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC;IAoB9D,OAAO,CAAC,UAAU;IAwBZ,OAAO,CAAC,MAAM,EAAE,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC;IAkC9C,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC;IAarD,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAWlD,KAAK,CAAC,KAAK,EAAE,WAAW,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC;IAgDpD,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAW1C,KAAK,CAAC,UAAU,EAAE,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC;IAW9C,OAAO,CAAC,WAAW;IAiCb,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,GAAG,WAAW,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAkC/D,SAAS,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAa7C,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC;IAalD,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;IAsC7D,YAAY,CAChB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,aAAa,EACtB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,IAAI,CAAC;IAuBV,IAAI,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAuBhE,YAAY,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;IAWhD,OAAO,CAAC,WAAW;IAsBb,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,kBAAkB,EAAE,IAAI,CAAC,GAAG,OAAO,CAAC,kBAAkB,CAAC;IA8B1E,UAAU,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC;IAa1D,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,GAAG,IAAI,CAAC;IAe/D,UAAU,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,SAAK,GAAG,OAAO,CAAC,kBAAkB,EAAE,CAAC;IAatE,MAAM,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IASjC,aAAa,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,yBAAyB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAsC3F,OAAO,CAAC,YAAY;CAoBrB"}