cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,474 @@
1
+ /**
2
+ * Solution Evaluator
3
+ *
4
+ * Evaluates solution quality using a hybrid approach:
5
+ * 1. First tries task verification (if available)
6
+ * 2. Falls back to ACP agent evaluation
7
+ * 3. Falls back to heuristic evaluation as last resort
8
+ */
9
+
10
+ import type { Trajectory } from '../types/trajectory.js';
11
+ import type { Task } from '../types/task.js';
12
+ import type { AgentManager } from '../runtime/manager.js';
13
+ import {
14
+ type EvaluationResult,
15
+ createEvaluationResult,
16
+ scoreToQuality,
17
+ } from './refinement-types.js';
18
+
19
+ /**
20
+ * Verification function signature for task-specific verification
21
+ */
22
+ export interface VerificationFunction {
23
+ (trajectory: Trajectory, task: Task): Promise<VerificationResult>;
24
+ }
25
+
26
+ /**
27
+ * Result from a verification function
28
+ */
29
+ export interface VerificationResult {
30
+ /** Whether the solution passed verification */
31
+ passed: boolean;
32
+ /** Confidence in the verification result (0-1) */
33
+ confidence: number;
34
+ /** Specific issues found */
35
+ issues?: Array<{
36
+ type: 'incomplete' | 'incorrect' | 'error';
37
+ description: string;
38
+ severity?: 'critical' | 'major' | 'minor';
39
+ }>;
40
+ /** Additional details */
41
+ details?: string;
42
+ }
43
+
44
+ /**
45
+ * Configuration for the SolutionEvaluator
46
+ */
47
+ export interface EvaluatorConfig {
48
+ /** Agent type to use for evaluation (default: 'evaluator') */
49
+ evaluatorAgentType?: string;
50
+ /** Minimum confidence for verification to be accepted */
51
+ verificationConfidenceThreshold?: number;
52
+ /** Whether to always run agent evaluation even if verification passes */
53
+ alwaysUseAgent?: boolean;
54
+ /** Timeout for agent evaluation in ms */
55
+ agentTimeout?: number;
56
+ }
57
+
58
+ /**
59
+ * Default evaluator configuration
60
+ */
61
+ const DEFAULT_CONFIG: Required<EvaluatorConfig> = {
62
+ evaluatorAgentType: 'evaluator',
63
+ verificationConfidenceThreshold: 0.8,
64
+ alwaysUseAgent: false,
65
+ agentTimeout: 60000,
66
+ };
67
+
68
+ /**
69
+ * SolutionEvaluator - Evaluates trajectory quality using hybrid approach
70
+ */
71
+ export class SolutionEvaluator {
72
+ private agentManager: AgentManager | null;
73
+ private config: Required<EvaluatorConfig>;
74
+ private verifiers: Map<string, VerificationFunction> = new Map();
75
+
76
+ constructor(
77
+ agentManager: AgentManager | null,
78
+ config: EvaluatorConfig = {}
79
+ ) {
80
+ this.agentManager = agentManager;
81
+ this.config = { ...DEFAULT_CONFIG, ...config };
82
+ }
83
+
84
+ /**
85
+ * Register a verification function for a task domain
86
+ */
87
+ registerVerifier(domain: string, verifier: VerificationFunction): void {
88
+ this.verifiers.set(domain, verifier);
89
+ }
90
+
91
+ /**
92
+ * Evaluate a trajectory's quality
93
+ */
94
+ async evaluate(
95
+ trajectory: Trajectory,
96
+ task: Task
97
+ ): Promise<EvaluationResult> {
98
+ // 1. Try verification first (if available and applicable)
99
+ const verificationResult = await this.tryVerification(trajectory, task);
100
+ if (verificationResult) {
101
+ const { result, evalResult } = verificationResult;
102
+
103
+ // If verification is confident enough and doesn't always require agent
104
+ if (
105
+ result.confidence >= this.config.verificationConfidenceThreshold &&
106
+ !this.config.alwaysUseAgent
107
+ ) {
108
+ return evalResult;
109
+ }
110
+ }
111
+
112
+ // 2. Try ACP agent evaluation
113
+ if (this.agentManager) {
114
+ try {
115
+ const agentResult = await this.evaluateWithAgent(trajectory, task);
116
+ return agentResult;
117
+ } catch (error) {
118
+ // Fall through to heuristic if agent fails
119
+ const errorMsg = error instanceof Error ? error.message : String(error);
120
+ // Log warning but continue to heuristic
121
+ console.warn(`Agent evaluation failed, falling back to heuristic: ${errorMsg}`);
122
+ }
123
+ }
124
+
125
+ // 3. Fall back to heuristic evaluation
126
+ return this.evaluateHeuristic(trajectory, task);
127
+ }
128
+
129
+ /**
130
+ * Try verification if a verifier is available for the task domain
131
+ */
132
+ private async tryVerification(
133
+ trajectory: Trajectory,
134
+ task: Task
135
+ ): Promise<{ result: VerificationResult; evalResult: EvaluationResult } | null> {
136
+ // Check for domain-specific verifier
137
+ const verifier = task.domain ? this.verifiers.get(task.domain) : undefined;
138
+ if (!verifier) {
139
+ return null;
140
+ }
141
+
142
+ try {
143
+ const result = await verifier(trajectory, task);
144
+
145
+ const score = result.passed
146
+ ? Math.max(0.7, result.confidence)
147
+ : Math.min(0.5, 1 - result.confidence);
148
+
149
+ const evalResult = createEvaluationResult({
150
+ quality: scoreToQuality(score),
151
+ score,
152
+ acceptable: result.passed && result.confidence >= this.config.verificationConfidenceThreshold,
153
+ issues: result.issues?.map((issue) => ({
154
+ type: issue.type,
155
+ description: issue.description,
156
+ severity: issue.severity ?? 'major',
157
+ })) ?? [],
158
+ method: 'verification',
159
+ rawResponse: result.details,
160
+ });
161
+
162
+ return { result, evalResult };
163
+ } catch (error) {
164
+ // Verification failed, return null to try other methods
165
+ console.warn(
166
+ 'Verification failed:',
167
+ error instanceof Error ? error.message : String(error)
168
+ );
169
+ return null;
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Evaluate using an ACP agent
175
+ */
176
+ private async evaluateWithAgent(
177
+ trajectory: Trajectory,
178
+ task: Task
179
+ ): Promise<EvaluationResult> {
180
+ if (!this.agentManager) {
181
+ throw new Error('AgentManager not available for agent evaluation');
182
+ }
183
+
184
+ // Create evaluation task for the agent
185
+ const evaluationTask: Task = {
186
+ id: `eval-${trajectory.id}`,
187
+ description: this.buildEvaluationPrompt(trajectory, task),
188
+ domain: 'evaluation',
189
+ context: {},
190
+ createdAt: new Date(),
191
+ metadata: {
192
+ originalTaskId: task.id,
193
+ trajectoryId: trajectory.id,
194
+ },
195
+ };
196
+
197
+ // Spawn evaluation agent
198
+ const result = await this.agentManager.spawn({
199
+ agentType: this.config.evaluatorAgentType,
200
+ task: evaluationTask,
201
+ timeout: this.config.agentTimeout,
202
+ backendOptions: {
203
+ // Evaluation agents should be concise
204
+ maxTokens: 2000,
205
+ },
206
+ });
207
+
208
+ // Parse agent response into EvaluationResult
209
+ return this.parseAgentEvaluation(result.session.result, result.trajectory);
210
+ }
211
+
212
+ /**
213
+ * Build evaluation prompt for the agent
214
+ */
215
+ private buildEvaluationPrompt(trajectory: Trajectory, task: Task): string {
216
+ const steps = trajectory.steps
217
+ .map((step, i) => {
218
+ let stepStr = `Step ${i + 1}:`;
219
+ if (step.thought) stepStr += `\n Thought: ${step.thought}`;
220
+ stepStr += `\n Action: ${step.action}`;
221
+ if (step.observation) stepStr += `\n Observation: ${step.observation}`;
222
+ return stepStr;
223
+ })
224
+ .join('\n\n');
225
+
226
+ const outcomeStr = trajectory.outcome.success
227
+ ? `SUCCESS: ${trajectory.outcome.solution ?? 'Task completed'}`
228
+ : `FAILURE: ${trajectory.outcome.errorInfo ?? 'Unknown error'}`;
229
+
230
+ return `Evaluate the quality of this solution attempt.
231
+
232
+ ## Original Task
233
+ ${task.description}
234
+
235
+ ## Solution Steps
236
+ ${steps}
237
+
238
+ ## Outcome
239
+ ${outcomeStr}
240
+
241
+ ## Instructions
242
+ Analyze the solution and provide:
243
+ 1. Overall quality assessment (excellent/good/needs_work/poor)
244
+ 2. A numeric score from 0.0 to 1.0
245
+ 3. Whether this solution is acceptable
246
+ 4. List any specific issues found
247
+ 5. Suggestions for improvement
248
+
249
+ Respond in JSON format:
250
+ {
251
+ "quality": "excellent|good|needs_work|poor",
252
+ "score": 0.85,
253
+ "acceptable": true,
254
+ "issues": [
255
+ {"type": "incomplete|incorrect|inefficient|off_topic|error", "description": "...", "severity": "critical|major|minor"}
256
+ ],
257
+ "suggestions": ["..."]
258
+ }`;
259
+ }
260
+
261
+ /**
262
+ * Parse agent evaluation response into EvaluationResult
263
+ */
264
+ private parseAgentEvaluation(
265
+ result: unknown,
266
+ _evalTrajectory: Trajectory
267
+ ): EvaluationResult {
268
+ // Try to parse as JSON
269
+ if (typeof result === 'string') {
270
+ try {
271
+ // Extract JSON from response (agent may include extra text)
272
+ const jsonMatch = result.match(/\{[\s\S]*\}/);
273
+ if (jsonMatch) {
274
+ const parsed = JSON.parse(jsonMatch[0]) as {
275
+ quality?: string;
276
+ score?: number;
277
+ acceptable?: boolean;
278
+ issues?: Array<{
279
+ type?: string;
280
+ description?: string;
281
+ severity?: string;
282
+ }>;
283
+ suggestions?: string[];
284
+ };
285
+
286
+ return createEvaluationResult({
287
+ quality: this.parseQuality(parsed.quality),
288
+ score: Math.min(1, Math.max(0, parsed.score ?? 0.5)),
289
+ acceptable: parsed.acceptable ?? false,
290
+ issues: (parsed.issues ?? []).map((issue) => ({
291
+ type: this.parseIssueType(issue.type),
292
+ description: issue.description ?? 'Unknown issue',
293
+ severity: this.parseSeverity(issue.severity),
294
+ suggestion: undefined,
295
+ })),
296
+ method: 'agent',
297
+ rawResponse: result,
298
+ });
299
+ }
300
+ } catch {
301
+ // Fall through to heuristic parsing
302
+ }
303
+ }
304
+
305
+ // If we can't parse, return a conservative result
306
+ return createEvaluationResult({
307
+ quality: 'needs_work',
308
+ score: 0.5,
309
+ acceptable: false,
310
+ issues: [
311
+ {
312
+ type: 'error',
313
+ description: 'Could not parse agent evaluation response',
314
+ severity: 'major',
315
+ },
316
+ ],
317
+ method: 'agent',
318
+ rawResponse: typeof result === 'string' ? result : JSON.stringify(result),
319
+ });
320
+ }
321
+
322
+ /**
323
+ * Heuristic evaluation based on trajectory properties
324
+ */
325
+ private evaluateHeuristic(
326
+ trajectory: Trajectory,
327
+ _task: Task
328
+ ): EvaluationResult {
329
+ const issues: Array<{
330
+ type: 'incomplete' | 'incorrect' | 'inefficient' | 'off_topic' | 'error';
331
+ description: string;
332
+ severity: 'critical' | 'major' | 'minor';
333
+ }> = [];
334
+
335
+ let score = 0.5; // Start neutral
336
+
337
+ // Check outcome
338
+ if (trajectory.outcome.success) {
339
+ score += 0.3;
340
+ } else {
341
+ score -= 0.2;
342
+ issues.push({
343
+ type: 'error',
344
+ description: trajectory.outcome.errorInfo ?? 'Task did not complete successfully',
345
+ severity: 'critical',
346
+ });
347
+ }
348
+
349
+ // Check for steps taken
350
+ if (trajectory.steps.length === 0) {
351
+ score -= 0.2;
352
+ issues.push({
353
+ type: 'incomplete',
354
+ description: 'No steps were taken to solve the task',
355
+ severity: 'critical',
356
+ });
357
+ } else {
358
+ // Penalize very long trajectories (may indicate inefficiency)
359
+ if (trajectory.steps.length > 20) {
360
+ score -= 0.1;
361
+ issues.push({
362
+ type: 'inefficient',
363
+ description: `Solution took ${trajectory.steps.length} steps, which may indicate inefficiency`,
364
+ severity: 'minor',
365
+ });
366
+ }
367
+ }
368
+
369
+ // Check for errors in steps
370
+ const errorSteps = trajectory.steps.filter(
371
+ (step) => step.observation?.toLowerCase().includes('error')
372
+ );
373
+ if (errorSteps.length > 0) {
374
+ score -= 0.1 * Math.min(3, errorSteps.length);
375
+ issues.push({
376
+ type: 'error',
377
+ description: `${errorSteps.length} step(s) encountered errors`,
378
+ severity: errorSteps.length > 2 ? 'major' : 'minor',
379
+ });
380
+ }
381
+
382
+ // Check key steps (high attribution)
383
+ const keySteps = trajectory.steps.filter(
384
+ (step) => (step.attributionScore ?? 0) >= 0.15
385
+ );
386
+ if (keySteps.length > 0 && trajectory.outcome.success) {
387
+ score += 0.1;
388
+ }
389
+
390
+ // Normalize score to [0, 1]
391
+ score = Math.max(0, Math.min(1, score));
392
+
393
+ return createEvaluationResult({
394
+ quality: scoreToQuality(score),
395
+ score,
396
+ acceptable: trajectory.outcome.success && score >= 0.6,
397
+ issues,
398
+ method: 'heuristic',
399
+ });
400
+ }
401
+
402
+ /**
403
+ * Parse quality string to QualityLevel
404
+ */
405
+ private parseQuality(
406
+ quality?: string
407
+ ): 'excellent' | 'good' | 'needs_work' | 'poor' {
408
+ const normalized = quality?.toLowerCase();
409
+ switch (normalized) {
410
+ case 'excellent':
411
+ return 'excellent';
412
+ case 'good':
413
+ return 'good';
414
+ case 'needs_work':
415
+ case 'needs work':
416
+ return 'needs_work';
417
+ case 'poor':
418
+ return 'poor';
419
+ default:
420
+ return 'needs_work';
421
+ }
422
+ }
423
+
424
+ /**
425
+ * Parse issue type string
426
+ */
427
+ private parseIssueType(
428
+ type?: string
429
+ ): 'incomplete' | 'incorrect' | 'inefficient' | 'off_topic' | 'error' {
430
+ const normalized = type?.toLowerCase();
431
+ switch (normalized) {
432
+ case 'incomplete':
433
+ return 'incomplete';
434
+ case 'incorrect':
435
+ return 'incorrect';
436
+ case 'inefficient':
437
+ return 'inefficient';
438
+ case 'off_topic':
439
+ case 'off-topic':
440
+ return 'off_topic';
441
+ case 'error':
442
+ return 'error';
443
+ default:
444
+ return 'error';
445
+ }
446
+ }
447
+
448
+ /**
449
+ * Parse severity string
450
+ */
451
+ private parseSeverity(severity?: string): 'critical' | 'major' | 'minor' {
452
+ const normalized = severity?.toLowerCase();
453
+ switch (normalized) {
454
+ case 'critical':
455
+ return 'critical';
456
+ case 'major':
457
+ return 'major';
458
+ case 'minor':
459
+ return 'minor';
460
+ default:
461
+ return 'major';
462
+ }
463
+ }
464
+ }
465
+
466
+ /**
467
+ * Create a solution evaluator
468
+ */
469
+ export function createSolutionEvaluator(
470
+ agentManager: AgentManager | null,
471
+ config?: EvaluatorConfig
472
+ ): SolutionEvaluator {
473
+ return new SolutionEvaluator(agentManager, config);
474
+ }
@@ -0,0 +1,59 @@
1
+ export {
2
+ TaskRouter,
3
+ createRouter,
4
+ type RoutingDecision,
5
+ } from './router.js';
6
+
7
+ export {
8
+ DirectSolver,
9
+ createSolver,
10
+ type SolverConfig,
11
+ type SolverResult,
12
+ } from './solver.js';
13
+
14
+ // Refinement types
15
+ export {
16
+ type EvaluationResult,
17
+ type EvaluationIssue,
18
+ type EvaluationMethod,
19
+ type QualityLevel,
20
+ type FailureContext,
21
+ type RefinementResult,
22
+ type IssueType,
23
+ type IssueSeverity,
24
+ createEvaluationResult,
25
+ scoreToQuality,
26
+ isQualityAcceptable,
27
+ EvaluationResultSchema,
28
+ EvaluationIssueSchema,
29
+ QualityLevelSchema,
30
+ IssueSeveritySchema,
31
+ IssueTypeSchema,
32
+ EvaluationMethodSchema,
33
+ } from './refinement-types.js';
34
+
35
+ // Solution evaluator
36
+ export {
37
+ SolutionEvaluator,
38
+ createSolutionEvaluator,
39
+ type EvaluatorConfig,
40
+ type VerificationFunction,
41
+ type VerificationResult,
42
+ } from './evaluator.js';
43
+
44
+ // Refinement loop
45
+ export {
46
+ RefinementLoop,
47
+ createRefinementLoop,
48
+ type RefinementLoopConfig,
49
+ } from './refinement-loop.js';
50
+
51
+ // Verification runner
52
+ export {
53
+ VerificationRunner,
54
+ createVerificationRunner,
55
+ TestRunners,
56
+ type CommandVerificationConfig,
57
+ type CommandResult,
58
+ type VerificationIssue,
59
+ } from './verification-runner.js';