cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Solution Evaluator
3
+ *
4
+ * Evaluates solution quality using a hybrid approach:
5
+ * 1. First tries task verification (if available)
6
+ * 2. Falls back to ACP agent evaluation
7
+ * 3. Falls back to heuristic evaluation as last resort
8
+ */
9
+ import type { Trajectory } from '../types/trajectory.js';
10
+ import type { Task } from '../types/task.js';
11
+ import type { AgentManager } from '../runtime/manager.js';
12
+ import { type EvaluationResult } from './refinement-types.js';
13
+ /**
14
+ * Verification function signature for task-specific verification
15
+ */
16
+ export interface VerificationFunction {
17
+ (trajectory: Trajectory, task: Task): Promise<VerificationResult>;
18
+ }
19
+ /**
20
+ * Result from a verification function
21
+ */
22
+ export interface VerificationResult {
23
+ /** Whether the solution passed verification */
24
+ passed: boolean;
25
+ /** Confidence in the verification result (0-1) */
26
+ confidence: number;
27
+ /** Specific issues found */
28
+ issues?: Array<{
29
+ type: 'incomplete' | 'incorrect' | 'error';
30
+ description: string;
31
+ severity?: 'critical' | 'major' | 'minor';
32
+ }>;
33
+ /** Additional details */
34
+ details?: string;
35
+ }
36
+ /**
37
+ * Configuration for the SolutionEvaluator
38
+ */
39
+ export interface EvaluatorConfig {
40
+ /** Agent type to use for evaluation (default: 'evaluator') */
41
+ evaluatorAgentType?: string;
42
+ /** Minimum confidence for verification to be accepted */
43
+ verificationConfidenceThreshold?: number;
44
+ /** Whether to always run agent evaluation even if verification passes */
45
+ alwaysUseAgent?: boolean;
46
+ /** Timeout for agent evaluation in ms */
47
+ agentTimeout?: number;
48
+ }
49
+ /**
50
+ * SolutionEvaluator - Evaluates trajectory quality using hybrid approach
51
+ */
52
+ export declare class SolutionEvaluator {
53
+ private agentManager;
54
+ private config;
55
+ private verifiers;
56
+ constructor(agentManager: AgentManager | null, config?: EvaluatorConfig);
57
+ /**
58
+ * Register a verification function for a task domain
59
+ */
60
+ registerVerifier(domain: string, verifier: VerificationFunction): void;
61
+ /**
62
+ * Evaluate a trajectory's quality
63
+ */
64
+ evaluate(trajectory: Trajectory, task: Task): Promise<EvaluationResult>;
65
+ /**
66
+ * Try verification if a verifier is available for the task domain
67
+ */
68
+ private tryVerification;
69
+ /**
70
+ * Evaluate using an ACP agent
71
+ */
72
+ private evaluateWithAgent;
73
+ /**
74
+ * Build evaluation prompt for the agent
75
+ */
76
+ private buildEvaluationPrompt;
77
+ /**
78
+ * Parse agent evaluation response into EvaluationResult
79
+ */
80
+ private parseAgentEvaluation;
81
+ /**
82
+ * Heuristic evaluation based on trajectory properties
83
+ */
84
+ private evaluateHeuristic;
85
+ /**
86
+ * Parse quality string to QualityLevel
87
+ */
88
+ private parseQuality;
89
+ /**
90
+ * Parse issue type string
91
+ */
92
+ private parseIssueType;
93
+ /**
94
+ * Parse severity string
95
+ */
96
+ private parseSeverity;
97
+ }
98
+ /**
99
+ * Create a solution evaluator
100
+ */
101
+ export declare function createSolutionEvaluator(agentManager: AgentManager | null, config?: EvaluatorConfig): SolutionEvaluator;
102
+ //# sourceMappingURL=evaluator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.d.ts","sourceRoot":"","sources":["../../src/search/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAC7C,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,uBAAuB,CAAC;AAE/B;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,EAAE,IAAI,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;CACnE;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,+CAA+C;IAC/C,MAAM,EAAE,OAAO,CAAC;IAChB,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB,4BAA4B;IAC5B,MAAM,CAAC,EAAE,KAAK,CAAC;QACb,IAAI,EAAE,YAAY,GAAG,WAAW,GAAG,OAAO,CAAC;QAC3C,WAAW,EAAE,MAAM,CAAC;QACpB,QAAQ,CAAC,EAAE,UAAU,GAAG,OAAO,GAAG,OAAO,CAAC;KAC3C,CAAC,CAAC;IACH,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,8DAA8D;IAC9D,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,yDAAyD;IACzD,+BAA+B,CAAC,EAAE,MAAM,CAAC;IACzC,yEAAyE;IACzE,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,yCAAyC;IACzC,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAYD;;GAEG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,YAAY,CAAsB;IAC1C,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAgD;gBAG/D,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,MAAM,GAAE,eAAoB;IAM9B;;OAEG;IACH,gBAAgB,CAAC,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,oBAAoB,GAAG,IAAI;IAItE;;OAEG;IACG,QAAQ,CACZ,UAAU,EAAE,UAAU,EACtB,IAAI,EAAE,IAAI,GACT,OAAO,CAAC,gBAAgB,CAAC;IAgC5B;;OAEG;YACW,eAAe;IAyC7B;;OAEG;YACW,iBAAiB;IAoC/B;;OAEG;IACH,OAAO,CAAC,qBAAqB;IA8C7B;;OAEG;IACH,OAAO,CAAC,oBAAoB;IA0D5B;;OAEG;IACH,OAAO,CAAC,iBAAiB;IA6EzB;;OAEG;IACH,OAAO,CAAC,YAAY;IAmBpB;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;OAEG;IACH,OAAO,CAAC,aAAa;CAatB;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,MAAM,CAAC,EAAE,eAAe,GACvB,iBAAiB,CAEnB"}
@@ -0,0 +1,352 @@
1
+ /**
2
+ * Solution Evaluator
3
+ *
4
+ * Evaluates solution quality using a hybrid approach:
5
+ * 1. First tries task verification (if available)
6
+ * 2. Falls back to ACP agent evaluation
7
+ * 3. Falls back to heuristic evaluation as last resort
8
+ */
9
+ import { createEvaluationResult, scoreToQuality, } from './refinement-types.js';
10
+ /**
11
+ * Default evaluator configuration
12
+ */
13
+ const DEFAULT_CONFIG = {
14
+ evaluatorAgentType: 'evaluator',
15
+ verificationConfidenceThreshold: 0.8,
16
+ alwaysUseAgent: false,
17
+ agentTimeout: 60000,
18
+ };
19
+ /**
20
+ * SolutionEvaluator - Evaluates trajectory quality using hybrid approach
21
+ */
22
+ export class SolutionEvaluator {
23
+ agentManager;
24
+ config;
25
+ verifiers = new Map();
26
+ constructor(agentManager, config = {}) {
27
+ this.agentManager = agentManager;
28
+ this.config = { ...DEFAULT_CONFIG, ...config };
29
+ }
30
+ /**
31
+ * Register a verification function for a task domain
32
+ */
33
+ registerVerifier(domain, verifier) {
34
+ this.verifiers.set(domain, verifier);
35
+ }
36
+ /**
37
+ * Evaluate a trajectory's quality
38
+ */
39
+ async evaluate(trajectory, task) {
40
+ // 1. Try verification first (if available and applicable)
41
+ const verificationResult = await this.tryVerification(trajectory, task);
42
+ if (verificationResult) {
43
+ const { result, evalResult } = verificationResult;
44
+ // If verification is confident enough and doesn't always require agent
45
+ if (result.confidence >= this.config.verificationConfidenceThreshold &&
46
+ !this.config.alwaysUseAgent) {
47
+ return evalResult;
48
+ }
49
+ }
50
+ // 2. Try ACP agent evaluation
51
+ if (this.agentManager) {
52
+ try {
53
+ const agentResult = await this.evaluateWithAgent(trajectory, task);
54
+ return agentResult;
55
+ }
56
+ catch (error) {
57
+ // Fall through to heuristic if agent fails
58
+ const errorMsg = error instanceof Error ? error.message : String(error);
59
+ // Log warning but continue to heuristic
60
+ console.warn(`Agent evaluation failed, falling back to heuristic: ${errorMsg}`);
61
+ }
62
+ }
63
+ // 3. Fall back to heuristic evaluation
64
+ return this.evaluateHeuristic(trajectory, task);
65
+ }
66
+ /**
67
+ * Try verification if a verifier is available for the task domain
68
+ */
69
+ async tryVerification(trajectory, task) {
70
+ // Check for domain-specific verifier
71
+ const verifier = task.domain ? this.verifiers.get(task.domain) : undefined;
72
+ if (!verifier) {
73
+ return null;
74
+ }
75
+ try {
76
+ const result = await verifier(trajectory, task);
77
+ const score = result.passed
78
+ ? Math.max(0.7, result.confidence)
79
+ : Math.min(0.5, 1 - result.confidence);
80
+ const evalResult = createEvaluationResult({
81
+ quality: scoreToQuality(score),
82
+ score,
83
+ acceptable: result.passed && result.confidence >= this.config.verificationConfidenceThreshold,
84
+ issues: result.issues?.map((issue) => ({
85
+ type: issue.type,
86
+ description: issue.description,
87
+ severity: issue.severity ?? 'major',
88
+ })) ?? [],
89
+ method: 'verification',
90
+ rawResponse: result.details,
91
+ });
92
+ return { result, evalResult };
93
+ }
94
+ catch (error) {
95
+ // Verification failed, return null to try other methods
96
+ console.warn('Verification failed:', error instanceof Error ? error.message : String(error));
97
+ return null;
98
+ }
99
+ }
100
+ /**
101
+ * Evaluate using an ACP agent
102
+ */
103
+ async evaluateWithAgent(trajectory, task) {
104
+ if (!this.agentManager) {
105
+ throw new Error('AgentManager not available for agent evaluation');
106
+ }
107
+ // Create evaluation task for the agent
108
+ const evaluationTask = {
109
+ id: `eval-${trajectory.id}`,
110
+ description: this.buildEvaluationPrompt(trajectory, task),
111
+ domain: 'evaluation',
112
+ context: {},
113
+ createdAt: new Date(),
114
+ metadata: {
115
+ originalTaskId: task.id,
116
+ trajectoryId: trajectory.id,
117
+ },
118
+ };
119
+ // Spawn evaluation agent
120
+ const result = await this.agentManager.spawn({
121
+ agentType: this.config.evaluatorAgentType,
122
+ task: evaluationTask,
123
+ timeout: this.config.agentTimeout,
124
+ backendOptions: {
125
+ // Evaluation agents should be concise
126
+ maxTokens: 2000,
127
+ },
128
+ });
129
+ // Parse agent response into EvaluationResult
130
+ return this.parseAgentEvaluation(result.session.result, result.trajectory);
131
+ }
132
+ /**
133
+ * Build evaluation prompt for the agent
134
+ */
135
+ buildEvaluationPrompt(trajectory, task) {
136
+ const steps = trajectory.steps
137
+ .map((step, i) => {
138
+ let stepStr = `Step ${i + 1}:`;
139
+ if (step.thought)
140
+ stepStr += `\n Thought: ${step.thought}`;
141
+ stepStr += `\n Action: ${step.action}`;
142
+ if (step.observation)
143
+ stepStr += `\n Observation: ${step.observation}`;
144
+ return stepStr;
145
+ })
146
+ .join('\n\n');
147
+ const outcomeStr = trajectory.outcome.success
148
+ ? `SUCCESS: ${trajectory.outcome.solution ?? 'Task completed'}`
149
+ : `FAILURE: ${trajectory.outcome.errorInfo ?? 'Unknown error'}`;
150
+ return `Evaluate the quality of this solution attempt.
151
+
152
+ ## Original Task
153
+ ${task.description}
154
+
155
+ ## Solution Steps
156
+ ${steps}
157
+
158
+ ## Outcome
159
+ ${outcomeStr}
160
+
161
+ ## Instructions
162
+ Analyze the solution and provide:
163
+ 1. Overall quality assessment (excellent/good/needs_work/poor)
164
+ 2. A numeric score from 0.0 to 1.0
165
+ 3. Whether this solution is acceptable
166
+ 4. List any specific issues found
167
+ 5. Suggestions for improvement
168
+
169
+ Respond in JSON format:
170
+ {
171
+ "quality": "excellent|good|needs_work|poor",
172
+ "score": 0.85,
173
+ "acceptable": true,
174
+ "issues": [
175
+ {"type": "incomplete|incorrect|inefficient|off_topic|error", "description": "...", "severity": "critical|major|minor"}
176
+ ],
177
+ "suggestions": ["..."]
178
+ }`;
179
+ }
180
+ /**
181
+ * Parse agent evaluation response into EvaluationResult
182
+ */
183
+ parseAgentEvaluation(result, _evalTrajectory) {
184
+ // Try to parse as JSON
185
+ if (typeof result === 'string') {
186
+ try {
187
+ // Extract JSON from response (agent may include extra text)
188
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
189
+ if (jsonMatch) {
190
+ const parsed = JSON.parse(jsonMatch[0]);
191
+ return createEvaluationResult({
192
+ quality: this.parseQuality(parsed.quality),
193
+ score: Math.min(1, Math.max(0, parsed.score ?? 0.5)),
194
+ acceptable: parsed.acceptable ?? false,
195
+ issues: (parsed.issues ?? []).map((issue) => ({
196
+ type: this.parseIssueType(issue.type),
197
+ description: issue.description ?? 'Unknown issue',
198
+ severity: this.parseSeverity(issue.severity),
199
+ suggestion: undefined,
200
+ })),
201
+ method: 'agent',
202
+ rawResponse: result,
203
+ });
204
+ }
205
+ }
206
+ catch {
207
+ // Fall through to heuristic parsing
208
+ }
209
+ }
210
+ // If we can't parse, return a conservative result
211
+ return createEvaluationResult({
212
+ quality: 'needs_work',
213
+ score: 0.5,
214
+ acceptable: false,
215
+ issues: [
216
+ {
217
+ type: 'error',
218
+ description: 'Could not parse agent evaluation response',
219
+ severity: 'major',
220
+ },
221
+ ],
222
+ method: 'agent',
223
+ rawResponse: typeof result === 'string' ? result : JSON.stringify(result),
224
+ });
225
+ }
226
+ /**
227
+ * Heuristic evaluation based on trajectory properties
228
+ */
229
+ evaluateHeuristic(trajectory, _task) {
230
+ const issues = [];
231
+ let score = 0.5; // Start neutral
232
+ // Check outcome
233
+ if (trajectory.outcome.success) {
234
+ score += 0.3;
235
+ }
236
+ else {
237
+ score -= 0.2;
238
+ issues.push({
239
+ type: 'error',
240
+ description: trajectory.outcome.errorInfo ?? 'Task did not complete successfully',
241
+ severity: 'critical',
242
+ });
243
+ }
244
+ // Check for steps taken
245
+ if (trajectory.steps.length === 0) {
246
+ score -= 0.2;
247
+ issues.push({
248
+ type: 'incomplete',
249
+ description: 'No steps were taken to solve the task',
250
+ severity: 'critical',
251
+ });
252
+ }
253
+ else {
254
+ // Penalize very long trajectories (may indicate inefficiency)
255
+ if (trajectory.steps.length > 20) {
256
+ score -= 0.1;
257
+ issues.push({
258
+ type: 'inefficient',
259
+ description: `Solution took ${trajectory.steps.length} steps, which may indicate inefficiency`,
260
+ severity: 'minor',
261
+ });
262
+ }
263
+ }
264
+ // Check for errors in steps
265
+ const errorSteps = trajectory.steps.filter((step) => step.observation?.toLowerCase().includes('error'));
266
+ if (errorSteps.length > 0) {
267
+ score -= 0.1 * Math.min(3, errorSteps.length);
268
+ issues.push({
269
+ type: 'error',
270
+ description: `${errorSteps.length} step(s) encountered errors`,
271
+ severity: errorSteps.length > 2 ? 'major' : 'minor',
272
+ });
273
+ }
274
+ // Check key steps (high attribution)
275
+ const keySteps = trajectory.steps.filter((step) => (step.attributionScore ?? 0) >= 0.15);
276
+ if (keySteps.length > 0 && trajectory.outcome.success) {
277
+ score += 0.1;
278
+ }
279
+ // Normalize score to [0, 1]
280
+ score = Math.max(0, Math.min(1, score));
281
+ return createEvaluationResult({
282
+ quality: scoreToQuality(score),
283
+ score,
284
+ acceptable: trajectory.outcome.success && score >= 0.6,
285
+ issues,
286
+ method: 'heuristic',
287
+ });
288
+ }
289
+ /**
290
+ * Parse quality string to QualityLevel
291
+ */
292
+ parseQuality(quality) {
293
+ const normalized = quality?.toLowerCase();
294
+ switch (normalized) {
295
+ case 'excellent':
296
+ return 'excellent';
297
+ case 'good':
298
+ return 'good';
299
+ case 'needs_work':
300
+ case 'needs work':
301
+ return 'needs_work';
302
+ case 'poor':
303
+ return 'poor';
304
+ default:
305
+ return 'needs_work';
306
+ }
307
+ }
308
+ /**
309
+ * Parse issue type string
310
+ */
311
+ parseIssueType(type) {
312
+ const normalized = type?.toLowerCase();
313
+ switch (normalized) {
314
+ case 'incomplete':
315
+ return 'incomplete';
316
+ case 'incorrect':
317
+ return 'incorrect';
318
+ case 'inefficient':
319
+ return 'inefficient';
320
+ case 'off_topic':
321
+ case 'off-topic':
322
+ return 'off_topic';
323
+ case 'error':
324
+ return 'error';
325
+ default:
326
+ return 'error';
327
+ }
328
+ }
329
+ /**
330
+ * Parse severity string
331
+ */
332
+ parseSeverity(severity) {
333
+ const normalized = severity?.toLowerCase();
334
+ switch (normalized) {
335
+ case 'critical':
336
+ return 'critical';
337
+ case 'major':
338
+ return 'major';
339
+ case 'minor':
340
+ return 'minor';
341
+ default:
342
+ return 'major';
343
+ }
344
+ }
345
+ }
346
+ /**
347
+ * Create a solution evaluator
348
+ */
349
+ export function createSolutionEvaluator(agentManager, config) {
350
+ return new SolutionEvaluator(agentManager, config);
351
+ }
352
+ //# sourceMappingURL=evaluator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evaluator.js","sourceRoot":"","sources":["../../src/search/evaluator.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,EAEL,sBAAsB,EACtB,cAAc,GACf,MAAM,uBAAuB,CAAC;AAyC/B;;GAEG;AACH,MAAM,cAAc,GAA8B;IAChD,kBAAkB,EAAE,WAAW;IAC/B,+BAA+B,EAAE,GAAG;IACpC,cAAc,EAAE,KAAK;IACrB,YAAY,EAAE,KAAK;CACpB,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,YAAY,CAAsB;IAClC,MAAM,CAA4B;IAClC,SAAS,GAAsC,IAAI,GAAG,EAAE,CAAC;IAEjE,YACE,YAAiC,EACjC,SAA0B,EAAE;QAE5B,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,MAAc,EAAE,QAA8B;QAC7D,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ,CACZ,UAAsB,EACtB,IAAU;QAEV,0DAA0D;QAC1D,MAAM,kBAAkB,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QACxE,IAAI,kBAAkB,EAAE,CAAC;YACvB,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,kBAAkB,CAAC;YAElD,uEAAuE;YACvE,IACE,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,+BAA+B;gBAChE,CAAC,IAAI,CAAC,MAAM,CAAC,cAAc,EAC3B,CAAC;gBACD,OAAO,UAAU,CAAC;YACpB,CAAC;QACH,CAAC;QAED,8BAA8B;QAC9B,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;gBACnE,OAAO,WAAW,CAAC;YACrB,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,2CAA2C;gBAC3C,MAAM,QAAQ,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACxE,wCAAwC;gBACxC,OAAO,CAAC,IAAI,CAAC,uDAAuD,QAAQ,EAAE,CAAC,CAAC;YAClF,CAAC;QACH,CAAC;QAED,uCAAuC;QACvC,OAAO,IAAI,CAAC,iBAAiB,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAC3B,UAAsB,EACtB,IAAU;QAEV,qCAAqC;QACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAC3E,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;YAEhD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM;gBACzB,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,UAAU,CAAC;gBAClC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;YAEzC,MAAM,UAAU,GAAG,sBAAsB,CAAC;gBACxC,OAAO,EAAE,cAAc,CAAC,KAAK,CAAC;gBAC9B,KAAK;gBACL,UAAU,EAAE,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,+BAA+B;gBAC7F,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACrC,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,QAAQ,EAAE,KAAK,CAAC,QAAQ,IAAI,OAAO;iBACpC,CAAC,CAAC,IAAI,EAAE;gBACT,MAAM,EAAE,cAAc;gBACtB,WAAW,EAAE,MAAM,CAAC,OAAO;aAC5B,CAAC,CAAC;YAEH,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;QAChC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,wDAAwD;YACxD,OAAO,CAAC,IAAI,CACV,sBAAsB,EACtB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CACvD,CAAC;YACF,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB,CAC7B,UAAsB,EACtB,IAAU;QAEV,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,iDAAiD,CAAC,CAAC;QACrE,CAAC;QAED,uCAAuC;QACvC,MAAM,cAAc,GAAS;YAC3B,EAAE,EAAE,QAAQ,UAAU,CAAC,EAAE,EAAE;YAC3B,WAAW,EAAE,IAAI,CAAC,qBAAqB,CAAC,UAAU,EAAE,IAAI,CAAC;YACzD,MAAM,EAAE,YAAY;YACpB,OAAO,EAAE,EAAE;YACX,SAAS,EAAE,IAAI,IAAI,EAAE;YACrB,QAAQ,EAAE;gBACR,cAAc,EAAE,IAAI,CAAC,EAAE;gBACvB,YAAY,EAAE,UAAU,CAAC,EAAE;aAC5B;SACF,CAAC;QAEF,yBAAyB;QACzB,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC;YAC3C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YACzC,IAAI,EAAE,cAAc;YACpB,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;YACjC,cAAc,EAAE;gBACd,sCAAsC;gBACtC,SAAS,EAAE,IAAI;aAChB;SACF,CAAC,CAAC;QAEH,6CAA6C;QAC7C,OAAO,IAAI,CAAC,oBAAoB,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IAC7E,CAAC;IAED;;OAEG;IACK,qBAAqB,CAAC,UAAsB,EAAE,IAAU;QAC9D,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK;aAC3B,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;YACf,IAAI,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC;YAC/B,IAAI,IAAI,CAAC,OAAO;gBAAE,OAAO,IAAI,gBAAgB,IAAI,CAAC,OAAO,EAAE,CAAC;YAC5D,OAAO,IAAI,eAAe,IAAI,CAAC,MAAM,EAAE,CAAC;YACxC,IAAI,IAAI,CAAC,WAAW;gBAAE,OAAO,IAAI,oBAAoB,IAAI,CAAC,WAAW,EAAE,CAAC;YACxE,OAAO,OAAO,CAAC;QACjB,CAAC,CAAC;aACD,IAAI,CAAC,MAAM,CAAC,CAAC;QAEhB,MAAM,UAAU,GAAG,UAAU,CAAC,OAAO,CAAC,OAAO;YAC3C,CAAC,CAAC,YAAY,UAAU,CAAC,OAAO,CAAC,QAAQ,IAAI,gBAAgB,EAAE;YAC/D,CAAC,CAAC,YAAY,UAAU,CAAC,OAAO,CAAC,SAAS,IAAI,eAAe,EAAE,CAAC;QAElE,OAAO;;;EAGT,IAAI,CAAC,WAAW;;;EAGhB,KAAK;;;EAGL,UAAU;;;;;;;;;;;;;;;;;;;EAmBV,CAAC;IACD,CAAC;IAED;;OAEG;IACK,oBAAoB,CAC1B,MAAe,EACf,eAA2B;QAE3B,uBAAuB;QACvB,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;YAC/B,IAAI,CAAC;gBACH,4DAA4D;gBAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;gBAC9C,IAAI,SAAS,EAAE,CAAC;oBACd,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,CAUrC,CAAC;oBAEF,OAAO,sBAAsB,CAAC;wBAC5B,OAAO,EAAE,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,OAAO,CAAC;wBAC1C,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,IAAI,GAAG,CAAC,CAAC;wBACpD,UAAU,EAAE,MAAM,CAAC,UAAU,IAAI,KAAK;wBACtC,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;4BAC5C,IAAI,EAAE,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC;4BACrC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,eAAe;4BACjD,QAAQ,EAAE,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC;4BAC5C,UAAU,EAAE,SAAS;yBACtB,CAAC,CAAC;wBACH,MAAM,EAAE,OAAO;wBACf,WAAW,EAAE,MAAM;qBACpB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,oCAAoC;YACtC,CAAC;QACH,CAAC;QAED,kDAAkD;QAClD,OAAO,sBAAsB,CAAC;YAC5B,OAAO,EAAE,YAAY;YACrB,KAAK,EAAE,GAAG;YACV,UAAU,EAAE,KAAK;YACjB,MAAM,EAAE;gBACN;oBACE,IAAI,EAAE,OAAO;oBACb,WAAW,EAAE,2CAA2C;oBACxD,QAAQ,EAAE,OAAO;iBAClB;aACF;YACD,MAAM,EAAE,OAAO;YACf,WAAW,EAAE,OAAO,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;SAC1E,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,iBAAiB,CACvB,UAAsB,EACtB,KAAW;QAEX,MAAM,MAAM,GAIP,EAAE,CAAC;QAER,IAAI,KAAK,GAAG,GAAG,CAAC,CAAC,gBAAgB;QAEjC,gBAAgB;QAChB,IAAI,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC/B,KAAK,IAAI,GAAG,CAAC;QACf,CAAC;aAAM,CAAC;YACN,KAAK,IAAI,GAAG,CAAC;YACb,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,OAAO;gBACb,WAAW,EAAE,UAAU,CAAC,OAAO,CAAC,SAAS,IAAI,oCAAoC;gBACjF,QAAQ,EAAE,UAAU;aACrB,CAAC,CAAC;QACL,CAAC;QAED,wBAAwB;QACxB,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAClC,KAAK,IAAI,GAAG,CAAC;YACb,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,YAAY;gBAClB,WAAW,EAAE,uCAAuC;gBACpD,QAAQ,EAAE,UAAU;aACrB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,8DAA8D;YAC9D,IAAI,UAAU,CAAC,KAAK,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;gBACjC,KAAK,IAAI,GAAG,CAAC;gBACb,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,aAAa;oBACnB,WAAW,EAAE,iBAAiB,UAAU,CAAC,KAAK,CAAC,MAAM,yCAAyC;oBAC9F,QAAQ,EAAE,OAAO;iBAClB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,MAAM,UAAU,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CACxC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,WAAW,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAC5D,CAAC;QACF,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,KAAK,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;YAC9C,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,OAAO;gBACb,WAAW,EAAE,GAAG,UAAU,CAAC,MAAM,6BAA6B;gBAC9D,QAAQ,EAAE,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO;aACpD,CAAC,CAAC;QACL,CAAC;QAED,qCAAqC;QACrC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,MAAM,CACtC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC,IAAI,IAAI,CAC/C,CAAC;QACF,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACtD,KAAK,IAAI,GAAG,CAAC;QACf,CAAC;QAED,4BAA4B;QAC5B,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;QAExC,OAAO,sBAAsB,CAAC;YAC5B,OAAO,EAAE,cAAc,CAAC,KAAK,CAAC;YAC9B,KAAK;YACL,UAAU,EAAE,UAAU,CAAC,OAAO,CAAC,OAAO,IAAI,KAAK,IAAI,GAAG;YACtD,MAAM;YACN,MAAM,EAAE,WAAW;SACpB,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACK,YAAY,CAClB,OAAgB;QAEhB,MAAM,UAAU,GAAG,OAAO,EAAE,WAAW,EAAE,CAAC;QAC1C,QAAQ,UAAU,EAAE,CAAC;YACnB,KAAK,WAAW;gBACd,OAAO,WAAW,CAAC;YACrB,KAAK,MAAM;gBACT,OAAO,MAAM,CAAC;YAChB,KAAK,YAAY,CAAC;YAClB,KAAK,YAAY;gBACf,OAAO,YAAY,CAAC;YACtB,KAAK,MAAM;gBACT,OAAO,MAAM,CAAC;YAChB;gBACE,OAAO,YAAY,CAAC;QACxB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CACpB,IAAa;QAEb,MAAM,UAAU,GAAG,IAAI,EAAE,WAAW,EAAE,CAAC;QACvC,QAAQ,UAAU,EAAE,CAAC;YACnB,KAAK,YAAY;gBACf,OAAO,YAAY,CAAC;YACtB,KAAK,WAAW;gBACd,OAAO,WAAW,CAAC;YACrB,KAAK,aAAa;gBAChB,OAAO,aAAa,CAAC;YACvB,KAAK,WAAW,CAAC;YACjB,KAAK,WAAW;gBACd,OAAO,WAAW,CAAC;YACrB,KAAK,OAAO;gBACV,OAAO,OAAO,CAAC;YACjB;gBACE,OAAO,OAAO,CAAC;QACnB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAiB;QACrC,MAAM,UAAU,GAAG,QAAQ,EAAE,WAAW,EAAE,CAAC;QAC3C,QAAQ,UAAU,EAAE,CAAC;YACnB,KAAK,UAAU;gBACb,OAAO,UAAU,CAAC;YACpB,KAAK,OAAO;gBACV,OAAO,OAAO,CAAC;YACjB,KAAK,OAAO;gBACV,OAAO,OAAO,CAAC;YACjB;gBACE,OAAO,OAAO,CAAC;QACnB,CAAC;IACH,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CACrC,YAAiC,EACjC,MAAwB;IAExB,OAAO,IAAI,iBAAiB,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;AACrD,CAAC"}
@@ -0,0 +1,7 @@
1
+ export { TaskRouter, createRouter, type RoutingDecision, } from './router.js';
2
+ export { DirectSolver, createSolver, type SolverConfig, type SolverResult, } from './solver.js';
3
+ export { type EvaluationResult, type EvaluationIssue, type EvaluationMethod, type QualityLevel, type FailureContext, type RefinementResult, type IssueType, type IssueSeverity, createEvaluationResult, scoreToQuality, isQualityAcceptable, EvaluationResultSchema, EvaluationIssueSchema, QualityLevelSchema, IssueSeveritySchema, IssueTypeSchema, EvaluationMethodSchema, } from './refinement-types.js';
4
+ export { SolutionEvaluator, createSolutionEvaluator, type EvaluatorConfig, type VerificationFunction, type VerificationResult, } from './evaluator.js';
5
+ export { RefinementLoop, createRefinementLoop, type RefinementLoopConfig, } from './refinement-loop.js';
6
+ export { VerificationRunner, createVerificationRunner, TestRunners, type CommandVerificationConfig, type CommandResult, type VerificationIssue, } from './verification-runner.js';
7
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,UAAU,EACV,YAAY,EACZ,KAAK,eAAe,GACrB,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,YAAY,EACZ,YAAY,EACZ,KAAK,YAAY,EACjB,KAAK,YAAY,GAClB,MAAM,aAAa,CAAC;AAGrB,OAAO,EACL,KAAK,gBAAgB,EACrB,KAAK,eAAe,EACpB,KAAK,gBAAgB,EACrB,KAAK,YAAY,EACjB,KAAK,cAAc,EACnB,KAAK,gBAAgB,EACrB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,sBAAsB,EACtB,cAAc,EACd,mBAAmB,EACnB,sBAAsB,EACtB,qBAAqB,EACrB,kBAAkB,EAClB,mBAAmB,EACnB,eAAe,EACf,sBAAsB,GACvB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EACL,iBAAiB,EACjB,uBAAuB,EACvB,KAAK,eAAe,EACpB,KAAK,oBAAoB,EACzB,KAAK,kBAAkB,GACxB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EACL,cAAc,EACd,oBAAoB,EACpB,KAAK,oBAAoB,GAC1B,MAAM,sBAAsB,CAAC;AAG9B,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,WAAW,EACX,KAAK,yBAAyB,EAC9B,KAAK,aAAa,EAClB,KAAK,iBAAiB,GACvB,MAAM,0BAA0B,CAAC"}
@@ -0,0 +1,11 @@
1
+ export { TaskRouter, createRouter, } from './router.js';
2
+ export { DirectSolver, createSolver, } from './solver.js';
3
+ // Refinement types
4
+ export { createEvaluationResult, scoreToQuality, isQualityAcceptable, EvaluationResultSchema, EvaluationIssueSchema, QualityLevelSchema, IssueSeveritySchema, IssueTypeSchema, EvaluationMethodSchema, } from './refinement-types.js';
5
+ // Solution evaluator
6
+ export { SolutionEvaluator, createSolutionEvaluator, } from './evaluator.js';
7
+ // Refinement loop
8
+ export { RefinementLoop, createRefinementLoop, } from './refinement-loop.js';
9
+ // Verification runner
10
+ export { VerificationRunner, createVerificationRunner, TestRunners, } from './verification-runner.js';
11
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/search/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,UAAU,EACV,YAAY,GAEb,MAAM,aAAa,CAAC;AAErB,OAAO,EACL,YAAY,EACZ,YAAY,GAGb,MAAM,aAAa,CAAC;AAErB,mBAAmB;AACnB,OAAO,EASL,sBAAsB,EACtB,cAAc,EACd,mBAAmB,EACnB,sBAAsB,EACtB,qBAAqB,EACrB,kBAAkB,EAClB,mBAAmB,EACnB,eAAe,EACf,sBAAsB,GACvB,MAAM,uBAAuB,CAAC;AAE/B,qBAAqB;AACrB,OAAO,EACL,iBAAiB,EACjB,uBAAuB,GAIxB,MAAM,gBAAgB,CAAC;AAExB,kBAAkB;AAClB,OAAO,EACL,cAAc,EACd,oBAAoB,GAErB,MAAM,sBAAsB,CAAC;AAE9B,sBAAsB;AACtB,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,WAAW,GAIZ,MAAM,0BAA0B,CAAC"}
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Refinement Loop
3
+ *
4
+ * Implements ReMem-style iterative refinement that improves solutions
5
+ * using memory-augmented feedback. The loop:
6
+ * 1. Evaluates solution quality
7
+ * 2. If not acceptable, injects failure context and retries
8
+ * 3. Continues until acceptable or max iterations reached
9
+ */
10
+ import type { Trajectory } from '../types/trajectory.js';
11
+ import type { AgentManager } from '../runtime/manager.js';
12
+ import type { MemorySystem } from '../memory/system.js';
13
+ import type { AgentSpawnConfig } from '../runtime/types.js';
14
+ import { type RefinementResult } from './refinement-types.js';
15
+ import { SolutionEvaluator, type EvaluatorConfig } from './evaluator.js';
16
+ /**
17
+ * Configuration for the refinement loop
18
+ */
19
+ export interface RefinementLoopConfig {
20
+ /** Maximum number of refinement iterations (default: 3) */
21
+ maxIterations?: number;
22
+ /** Minimum acceptable quality score (default: 0.7) */
23
+ minAcceptableScore?: number;
24
+ /** Whether to inject previous failure context (default: true) */
25
+ injectFailureContext?: boolean;
26
+ /** Whether to query memory for similar failures (default: true) */
27
+ queryFailureMemory?: boolean;
28
+ /** Evaluator configuration */
29
+ evaluatorConfig?: EvaluatorConfig;
30
+ /** Task strategies that should trigger refinement */
31
+ triggerOnStrategies?: Array<'direct' | 'adapt' | 'explore' | 'fallback'>;
32
+ /** Minimum confidence to skip refinement */
33
+ confidenceThreshold?: number;
34
+ }
35
+ /**
36
+ * RefinementLoop - Iteratively improves solutions using feedback
37
+ */
38
+ export declare class RefinementLoop {
39
+ private agentManager;
40
+ private memory;
41
+ private evaluator;
42
+ private config;
43
+ constructor(agentManager: AgentManager, memory: MemorySystem, config?: RefinementLoopConfig);
44
+ /**
45
+ * Get the evaluator for registering domain verifiers
46
+ */
47
+ getEvaluator(): SolutionEvaluator;
48
+ /**
49
+ * Check if refinement should be triggered based on strategy and confidence
50
+ */
51
+ shouldTriggerRefinement(strategy: 'direct' | 'adapt' | 'explore' | 'fallback', confidence?: number): boolean;
52
+ /**
53
+ * Run the refinement loop on an initial trajectory
54
+ */
55
+ refine(initialTrajectory: Trajectory, spawnConfig: AgentSpawnConfig): Promise<RefinementResult>;
56
+ /**
57
+ * Query memory for similar failure patterns
58
+ */
59
+ private queryFailurePatterns;
60
+ /**
61
+ * Check if an experience had a similar initial failure pattern
62
+ */
63
+ private hasSimilarInitialFailure;
64
+ /**
65
+ * Build refinement prompt with failure context and memory
66
+ */
67
+ private buildRefinementPrompt;
68
+ }
69
+ /**
70
+ * Create a refinement loop
71
+ */
72
+ export declare function createRefinementLoop(agentManager: AgentManager, memory: MemorySystem, config?: RefinementLoopConfig): RefinementLoop;
73
+ //# sourceMappingURL=refinement-loop.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"refinement-loop.d.ts","sourceRoot":"","sources":["../../src/search/refinement-loop.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAC1D,OAAO,KAAK,EAAE,YAAY,EAAuB,MAAM,qBAAqB,CAAC;AAC7E,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC5D,OAAO,EACL,KAAK,gBAAgB,EAGtB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,iBAAiB,EAAE,KAAK,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAEzE;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,2DAA2D;IAC3D,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,sDAAsD;IACtD,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iEAAiE;IACjE,oBAAoB,CAAC,EAAE,OAAO,CAAC;IAC/B,mEAAmE;IACnE,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,8BAA8B;IAC9B,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC,qDAAqD;IACrD,mBAAmB,CAAC,EAAE,KAAK,CAAC,QAAQ,GAAG,OAAO,GAAG,SAAS,GAAG,UAAU,CAAC,CAAC;IACzE,4CAA4C;IAC5C,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAeD;;GAEG;AACH,qBAAa,cAAc;IACzB,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,MAAM,CAAe;IAC7B,OAAO,CAAC,SAAS,CAAoB;IACrC,OAAO,CAAC,MAAM,CAAiC;gBAG7C,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,YAAY,EACpB,MAAM,GAAE,oBAAyB;IAWnC;;OAEG;IACH,YAAY,IAAI,iBAAiB;IAIjC;;OAEG;IACH,uBAAuB,CACrB,QAAQ,EAAE,QAAQ,GAAG,OAAO,GAAG,SAAS,GAAG,UAAU,EACrD,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO;IAiBV;;OAEG;IACG,MAAM,CACV,iBAAiB,EAAE,UAAU,EAC7B,WAAW,EAAE,gBAAgB,GAC5B,OAAO,CAAC,gBAAgB,CAAC;IAwG5B;;OAEG;YACW,oBAAoB;IAgClC;;OAEG;IACH,OAAO,CAAC,wBAAwB;IAyBhC;;OAEG;IACH,OAAO,CAAC,qBAAqB;CAmE9B;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,YAAY,EACpB,MAAM,CAAC,EAAE,oBAAoB,GAC5B,cAAc,CAEhB"}