cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,363 @@
1
+ /**
2
+ * Refinement Loop
3
+ *
4
+ * Implements ReMem-style iterative refinement that improves solutions
5
+ * using memory-augmented feedback. The loop:
6
+ * 1. Evaluates solution quality
7
+ * 2. If not acceptable, injects failure context and retries
8
+ * 3. Continues until acceptable or max iterations reached
9
+ */
10
+
11
+ import type { Task } from '../types/task.js';
12
+ import type { Trajectory } from '../types/trajectory.js';
13
+ import type { AgentManager } from '../runtime/manager.js';
14
+ import type { MemorySystem, MemoryQueryResultV2 } from '../memory/system.js';
15
+ import type { AgentSpawnConfig } from '../runtime/types.js';
16
+ import {
17
+ type RefinementResult,
18
+ type EvaluationResult,
19
+ type FailureContext,
20
+ } from './refinement-types.js';
21
+ import { SolutionEvaluator, type EvaluatorConfig } from './evaluator.js';
22
+
23
+ /**
24
+ * Configuration for the refinement loop
25
+ */
26
+ export interface RefinementLoopConfig {
27
+ /** Maximum number of refinement iterations (default: 3) */
28
+ maxIterations?: number;
29
+ /** Minimum acceptable quality score (default: 0.7) */
30
+ minAcceptableScore?: number;
31
+ /** Whether to inject previous failure context (default: true) */
32
+ injectFailureContext?: boolean;
33
+ /** Whether to query memory for similar failures (default: true) */
34
+ queryFailureMemory?: boolean;
35
+ /** Evaluator configuration */
36
+ evaluatorConfig?: EvaluatorConfig;
37
+ /** Task strategies that should trigger refinement */
38
+ triggerOnStrategies?: Array<'direct' | 'adapt' | 'explore' | 'fallback'>;
39
+ /** Minimum confidence to skip refinement */
40
+ confidenceThreshold?: number;
41
+ }
42
+
43
+ /**
44
+ * Default refinement configuration
45
+ */
46
+ const DEFAULT_CONFIG: Required<RefinementLoopConfig> = {
47
+ maxIterations: 3,
48
+ minAcceptableScore: 0.7,
49
+ injectFailureContext: true,
50
+ queryFailureMemory: true,
51
+ evaluatorConfig: {},
52
+ triggerOnStrategies: ['explore', 'fallback'],
53
+ confidenceThreshold: 0.85,
54
+ };
55
+
56
+ /**
57
+ * RefinementLoop - Iteratively improves solutions using feedback
58
+ */
59
+ export class RefinementLoop {
60
+ private agentManager: AgentManager;
61
+ private memory: MemorySystem;
62
+ private evaluator: SolutionEvaluator;
63
+ private config: Required<RefinementLoopConfig>;
64
+
65
+ constructor(
66
+ agentManager: AgentManager,
67
+ memory: MemorySystem,
68
+ config: RefinementLoopConfig = {}
69
+ ) {
70
+ this.agentManager = agentManager;
71
+ this.memory = memory;
72
+ this.config = { ...DEFAULT_CONFIG, ...config };
73
+ this.evaluator = new SolutionEvaluator(
74
+ agentManager,
75
+ this.config.evaluatorConfig
76
+ );
77
+ }
78
+
79
+ /**
80
+ * Get the evaluator for registering domain verifiers
81
+ */
82
+ getEvaluator(): SolutionEvaluator {
83
+ return this.evaluator;
84
+ }
85
+
86
+ /**
87
+ * Check if refinement should be triggered based on strategy and confidence
88
+ */
89
+ shouldTriggerRefinement(
90
+ strategy: 'direct' | 'adapt' | 'explore' | 'fallback',
91
+ confidence?: number
92
+ ): boolean {
93
+ // Check if strategy is in trigger list
94
+ if (!this.config.triggerOnStrategies.includes(strategy)) {
95
+ return false;
96
+ }
97
+
98
+ // If confidence is high enough, skip refinement
99
+ if (
100
+ confidence !== undefined &&
101
+ confidence >= this.config.confidenceThreshold
102
+ ) {
103
+ return false;
104
+ }
105
+
106
+ return true;
107
+ }
108
+
109
+ /**
110
+ * Run the refinement loop on an initial trajectory
111
+ */
112
+ async refine(
113
+ initialTrajectory: Trajectory,
114
+ spawnConfig: AgentSpawnConfig
115
+ ): Promise<RefinementResult> {
116
+ const evaluations: EvaluationResult[] = [];
117
+ let currentTrajectory = initialTrajectory;
118
+ let bestTrajectory = initialTrajectory;
119
+ let bestScore = 0;
120
+ let improved = false;
121
+
122
+ // Evaluate initial trajectory
123
+ const initialEval = await this.evaluator.evaluate(
124
+ currentTrajectory,
125
+ spawnConfig.task
126
+ );
127
+ evaluations.push(initialEval);
128
+ bestScore = initialEval.score;
129
+
130
+ // If initial is acceptable, return early
131
+ if (initialEval.acceptable && initialEval.score >= this.config.minAcceptableScore) {
132
+ return {
133
+ trajectory: currentTrajectory,
134
+ success: true,
135
+ iterations: 0,
136
+ evaluations,
137
+ improved: false,
138
+ bestScore: initialEval.score,
139
+ };
140
+ }
141
+
142
+ // Refinement loop
143
+ for (let iteration = 0; iteration < this.config.maxIterations; iteration++) {
144
+ // Build failure context
145
+ const failureContext: FailureContext = {
146
+ attemptedSolution: currentTrajectory.outcome.solution,
147
+ evaluation: evaluations[evaluations.length - 1],
148
+ stepsAttempted: currentTrajectory.steps.length,
149
+ previousAttempts: iteration + 1,
150
+ };
151
+
152
+ // Query memory for similar failures if enabled
153
+ let failureMemory: MemoryQueryResultV2 | undefined;
154
+ if (this.config.queryFailureMemory) {
155
+ failureMemory = await this.queryFailurePatterns(
156
+ spawnConfig.task,
157
+ failureContext
158
+ );
159
+ }
160
+
161
+ // Build refinement prompt
162
+ const refinementAdditions = this.buildRefinementPrompt(
163
+ failureContext,
164
+ failureMemory
165
+ );
166
+
167
+ // Spawn refined agent
168
+ const refinedConfig: AgentSpawnConfig = {
169
+ ...spawnConfig,
170
+ systemPromptAdditions: [
171
+ spawnConfig.systemPromptAdditions,
172
+ refinementAdditions,
173
+ ]
174
+ .filter(Boolean)
175
+ .join('\n\n'),
176
+ };
177
+
178
+ const result = await this.agentManager.spawn(refinedConfig);
179
+ currentTrajectory = result.trajectory;
180
+
181
+ // Evaluate refined trajectory
182
+ const evaluation = await this.evaluator.evaluate(
183
+ currentTrajectory,
184
+ spawnConfig.task
185
+ );
186
+ evaluations.push(evaluation);
187
+
188
+ // Track best
189
+ if (evaluation.score > bestScore) {
190
+ bestScore = evaluation.score;
191
+ bestTrajectory = currentTrajectory;
192
+ improved = true;
193
+ }
194
+
195
+ // Check if acceptable
196
+ if (evaluation.acceptable && evaluation.score >= this.config.minAcceptableScore) {
197
+ return {
198
+ trajectory: currentTrajectory,
199
+ success: true,
200
+ iterations: iteration + 1,
201
+ evaluations,
202
+ improved,
203
+ bestScore,
204
+ };
205
+ }
206
+ }
207
+
208
+ // Return best trajectory after all iterations
209
+ return {
210
+ trajectory: bestTrajectory,
211
+ success: bestScore >= this.config.minAcceptableScore,
212
+ iterations: this.config.maxIterations,
213
+ evaluations,
214
+ improved,
215
+ bestScore,
216
+ };
217
+ }
218
+
219
+ /**
220
+ * Query memory for similar failure patterns
221
+ */
222
+ private async queryFailurePatterns(
223
+ task: Task,
224
+ failureContext: FailureContext
225
+ ): Promise<MemoryQueryResultV2> {
226
+ // Build query from task and failure
227
+ const failureDescription = failureContext.evaluation.issues
228
+ .map((issue) => issue.description)
229
+ .join('; ');
230
+
231
+ const query = `${task.description} - failed with: ${failureDescription}`;
232
+
233
+ // Query memory for similar experiences and playbooks
234
+ const results = await this.memory.queryV2(query, {
235
+ domains: task.domain ? [task.domain] : undefined,
236
+ includeExperiences: true,
237
+ includePlaybooks: true,
238
+ });
239
+
240
+ // Filter to experiences that eventually succeeded after similar failures
241
+ // This gives us recovery strategies
242
+ const recoveryExperiences = results.experiences.filter(
243
+ (exp) =>
244
+ exp.experience.success &&
245
+ this.hasSimilarInitialFailure(exp.experience, failureContext)
246
+ );
247
+
248
+ return {
249
+ ...results,
250
+ experiences: recoveryExperiences.length > 0 ? recoveryExperiences : results.experiences,
251
+ };
252
+ }
253
+
254
+ /**
255
+ * Check if an experience had a similar initial failure pattern
256
+ */
257
+ private hasSimilarInitialFailure(
258
+ experience: { steps?: Array<{ observation?: string }>; solutionOutput: string },
259
+ failureContext: FailureContext
260
+ ): boolean {
261
+ // Look for similar error patterns in the experience's early steps
262
+ const failureKeywords = failureContext.evaluation.issues
263
+ .map((issue) => issue.description.toLowerCase().split(' '))
264
+ .flat()
265
+ .filter((word) => word.length > 4);
266
+
267
+ const experienceText = (
268
+ (experience.steps ?? [])
269
+ .map((s) => s.observation ?? '')
270
+ .join(' ') +
271
+ ' ' +
272
+ experience.solutionOutput
273
+ ).toLowerCase();
274
+
275
+ const matchCount = failureKeywords.filter((kw) =>
276
+ experienceText.includes(kw)
277
+ ).length;
278
+
279
+ return matchCount >= Math.min(2, failureKeywords.length / 2);
280
+ }
281
+
282
+ /**
283
+ * Build refinement prompt with failure context and memory
284
+ */
285
+ private buildRefinementPrompt(
286
+ failureContext: FailureContext,
287
+ failureMemory?: MemoryQueryResultV2
288
+ ): string {
289
+ const parts: string[] = [];
290
+
291
+ if (this.config.injectFailureContext) {
292
+ parts.push('## Previous Attempt Feedback');
293
+ parts.push('');
294
+ parts.push(
295
+ `Your previous attempt (iteration ${failureContext.previousAttempts}) had issues:`
296
+ );
297
+ parts.push('');
298
+
299
+ for (const issue of failureContext.evaluation.issues) {
300
+ const severity = issue.severity.toUpperCase();
301
+ parts.push(`- **[${severity}]** ${issue.type}: ${issue.description}`);
302
+ if (issue.suggestion) {
303
+ parts.push(` *Suggestion:* ${issue.suggestion}`);
304
+ }
305
+ }
306
+
307
+ parts.push('');
308
+ parts.push(`Quality score: ${(failureContext.evaluation.score * 100).toFixed(0)}%`);
309
+ parts.push(`Assessment: ${failureContext.evaluation.quality}`);
310
+ parts.push('');
311
+ }
312
+
313
+ if (failureMemory && this.config.queryFailureMemory) {
314
+ // Add recovery playbooks from memory
315
+ if (failureMemory.playbooks.length > 0) {
316
+ parts.push('## Recovery Playbooks from Memory');
317
+ parts.push('');
318
+ for (const { playbook, score } of failureMemory.playbooks.slice(0, 3)) {
319
+ parts.push(
320
+ `- **${playbook.name}** (${(score * 100).toFixed(0)}% relevant)`
321
+ );
322
+ parts.push(` Strategy: ${playbook.guidance.strategy}`);
323
+ if (playbook.guidance.steps && playbook.guidance.steps.length > 0) {
324
+ parts.push(` Steps: ${playbook.guidance.steps.slice(0, 3).join(' → ')}`);
325
+ }
326
+ }
327
+ parts.push('');
328
+ }
329
+
330
+ // Add successful recovery experiences
331
+ if (failureMemory.experiences.length > 0) {
332
+ parts.push('## Similar Problems Successfully Solved');
333
+ parts.push('');
334
+ for (const { experience } of failureMemory.experiences.slice(0, 2)) {
335
+ parts.push(`- Task: ${experience.taskInput.slice(0, 150)}...`);
336
+ parts.push(` Solution approach: ${experience.solutionOutput.slice(0, 200)}...`);
337
+ }
338
+ parts.push('');
339
+ }
340
+ }
341
+
342
+ parts.push('## Instructions for This Attempt');
343
+ parts.push('');
344
+ parts.push('Please address the issues identified above and try a different approach.');
345
+ parts.push('Focus on:');
346
+ parts.push('1. Avoiding the specific errors from the previous attempt');
347
+ parts.push('2. Using the recovery playbooks if applicable');
348
+ parts.push('3. Taking a more thorough or alternative approach');
349
+
350
+ return parts.join('\n');
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Create a refinement loop
356
+ */
357
+ export function createRefinementLoop(
358
+ agentManager: AgentManager,
359
+ memory: MemorySystem,
360
+ config?: RefinementLoopConfig
361
+ ): RefinementLoop {
362
+ return new RefinementLoop(agentManager, memory, config);
363
+ }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Refinement Types
3
+ *
4
+ * Types for the ReMem-style refinement loop that iteratively
5
+ * improves solutions using memory-augmented feedback.
6
+ */
7
+
8
+ import { z } from 'zod';
9
+
10
+ /**
11
+ * Issue type found during evaluation
12
+ */
13
+ export const IssueTypeSchema = z.enum([
14
+ 'incomplete',
15
+ 'incorrect',
16
+ 'inefficient',
17
+ 'off_topic',
18
+ 'error',
19
+ ]);
20
+
21
+ export type IssueType = z.infer<typeof IssueTypeSchema>;
22
+
23
+ /**
24
+ * Severity of an issue
25
+ */
26
+ export const IssueSeveritySchema = z.enum(['critical', 'major', 'minor']);
27
+
28
+ export type IssueSeverity = z.infer<typeof IssueSeveritySchema>;
29
+
30
+ /**
31
+ * An issue found during solution evaluation
32
+ */
33
+ export const EvaluationIssueSchema = z.object({
34
+ type: IssueTypeSchema,
35
+ description: z.string(),
36
+ severity: IssueSeveritySchema,
37
+ suggestion: z.string().optional(),
38
+ });
39
+
40
+ export type EvaluationIssue = z.infer<typeof EvaluationIssueSchema>;
41
+
42
+ /**
43
+ * Quality assessment level
44
+ */
45
+ export const QualityLevelSchema = z.enum([
46
+ 'excellent',
47
+ 'good',
48
+ 'needs_work',
49
+ 'poor',
50
+ ]);
51
+
52
+ export type QualityLevel = z.infer<typeof QualityLevelSchema>;
53
+
54
+ /**
55
+ * How the evaluation was performed
56
+ */
57
+ export const EvaluationMethodSchema = z.enum([
58
+ 'verification',
59
+ 'agent',
60
+ 'heuristic',
61
+ ]);
62
+
63
+ export type EvaluationMethod = z.infer<typeof EvaluationMethodSchema>;
64
+
65
+ /**
66
+ * Result of evaluating a solution's quality
67
+ */
68
+ export const EvaluationResultSchema = z.object({
69
+ /** Overall quality assessment */
70
+ quality: QualityLevelSchema,
71
+ /** Numeric score (0-1) */
72
+ score: z.number().min(0).max(1),
73
+ /** Whether this solution is acceptable */
74
+ acceptable: z.boolean(),
75
+ /** Specific issues found */
76
+ issues: z.array(EvaluationIssueSchema),
77
+ /** How the evaluation was performed */
78
+ method: EvaluationMethodSchema,
79
+ /** Raw response from evaluator (for debugging) */
80
+ rawResponse: z.string().optional(),
81
+ });
82
+
83
+ export type EvaluationResult = z.infer<typeof EvaluationResultSchema>;
84
+
85
+ /**
86
+ * Context about why a solution failed
87
+ */
88
+ export interface FailureContext {
89
+ /** The solution that was attempted */
90
+ attemptedSolution: unknown;
91
+ /** Evaluation result */
92
+ evaluation: EvaluationResult;
93
+ /** Number of steps taken in the attempt */
94
+ stepsAttempted: number;
95
+ /** Number of previous refinement attempts */
96
+ previousAttempts: number;
97
+ }
98
+
99
+ /**
100
+ * Result of the refinement loop
101
+ */
102
+ export interface RefinementResult {
103
+ /** Final trajectory after refinement */
104
+ trajectory: import('../types/trajectory.js').Trajectory;
105
+ /** Whether the final solution is acceptable */
106
+ success: boolean;
107
+ /** Number of refinement iterations performed */
108
+ iterations: number;
109
+ /** Evaluation results from each iteration */
110
+ evaluations: EvaluationResult[];
111
+ /** Whether refinement improved the solution */
112
+ improved: boolean;
113
+ /** Best score achieved during refinement */
114
+ bestScore: number;
115
+ }
116
+
117
+ /**
118
+ * Create an evaluation result
119
+ */
120
+ export function createEvaluationResult(params: {
121
+ quality: QualityLevel;
122
+ score: number;
123
+ acceptable: boolean;
124
+ issues?: EvaluationIssue[];
125
+ method: EvaluationMethod;
126
+ rawResponse?: string;
127
+ }): EvaluationResult {
128
+ return EvaluationResultSchema.parse({
129
+ quality: params.quality,
130
+ score: params.score,
131
+ acceptable: params.acceptable,
132
+ issues: params.issues ?? [],
133
+ method: params.method,
134
+ rawResponse: params.rawResponse,
135
+ });
136
+ }
137
+
138
+ /**
139
+ * Determine quality level from a numeric score
140
+ */
141
+ export function scoreToQuality(score: number): QualityLevel {
142
+ if (score >= 0.85) return 'excellent';
143
+ if (score >= 0.7) return 'good';
144
+ if (score >= 0.4) return 'needs_work';
145
+ return 'poor';
146
+ }
147
+
148
+ /**
149
+ * Check if a quality level is acceptable
150
+ */
151
+ export function isQualityAcceptable(
152
+ quality: QualityLevel,
153
+ minAcceptable: QualityLevel = 'good'
154
+ ): boolean {
155
+ const levels: QualityLevel[] = ['poor', 'needs_work', 'good', 'excellent'];
156
+ const qualityIndex = levels.indexOf(quality);
157
+ const minIndex = levels.indexOf(minAcceptable);
158
+ return qualityIndex >= minIndex;
159
+ }