cognitive-core 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +363 -2
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +43 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +61 -9
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/index.d.ts +0 -4
  329. package/index.js +0 -4
@@ -0,0 +1,468 @@
1
+ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
2
+ import { RefinementLoop, createRefinementLoop } from '../../src/search/refinement-loop.js';
3
+ import { AgentManager, createAgentManager } from '../../src/runtime/manager.js';
4
+ import { MockBackend, createMockBackend } from '../../src/runtime/backends/mock.js';
5
+ import { createMemorySystem } from '../../src/memory/system.js';
6
+ import { createTask } from '../../src/types/task.js';
7
+ import { createTrajectory } from '../../src/types/trajectory.js';
8
+ import { createStep } from '../../src/types/step.js';
9
+ import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
10
+ import type { MemorySystem } from '../../src/memory/system.js';
11
+ import type { Trajectory } from '../../src/types/trajectory.js';
12
+ import type { AgentSpawnConfig } from '../../src/runtime/types.js';
13
+ import { mkdtemp, rm } from 'node:fs/promises';
14
+ import { join } from 'node:path';
15
+ import { tmpdir } from 'node:os';
16
+
17
+ describe('RefinementLoop', () => {
18
+ let tempDir: string;
19
+ let memory: MemorySystem;
20
+ let agentManager: AgentManager;
21
+ let mockBackend: MockBackend;
22
+ let refinementLoop: RefinementLoop;
23
+
24
+ const createTestTask = () => createTask({
25
+ domain: 'test',
26
+ description: 'Test refinement task',
27
+ });
28
+
29
+ const createFailingTrajectory = (): Trajectory => createTrajectory({
30
+ task: createTestTask(),
31
+ steps: [
32
+ createStep({ action: 'attempt', observation: 'error occurred' }),
33
+ ],
34
+ outcome: failureOutcome('Task failed'),
35
+ agentId: 'test-agent',
36
+ });
37
+
38
+ const createSuccessfulTrajectory = (): Trajectory => createTrajectory({
39
+ task: createTestTask(),
40
+ steps: [
41
+ createStep({ action: 'analyze', observation: 'found solution' }),
42
+ createStep({ action: 'implement', observation: 'code written' }),
43
+ createStep({ action: 'test', observation: 'tests pass' }),
44
+ ],
45
+ outcome: successOutcome('Task completed successfully'),
46
+ agentId: 'test-agent',
47
+ });
48
+
49
+ beforeEach(async () => {
50
+ tempDir = await mkdtemp(join(tmpdir(), 'refinement-loop-test-'));
51
+ memory = createMemorySystem(tempDir);
52
+ await memory.init();
53
+
54
+ agentManager = createAgentManager(memory);
55
+ mockBackend = createMockBackend({
56
+ success: true,
57
+ result: 'Refined result',
58
+ durationMs: 50,
59
+ });
60
+ agentManager.registerBackend(mockBackend);
61
+
62
+ refinementLoop = createRefinementLoop(agentManager, memory, {
63
+ maxIterations: 3,
64
+ minAcceptableScore: 0.7,
65
+ });
66
+ });
67
+
68
+ afterEach(async () => {
69
+ await memory.close();
70
+ await rm(tempDir, { recursive: true, force: true });
71
+ });
72
+
73
+ describe('shouldTriggerRefinement', () => {
74
+ it('should trigger refinement for explore strategy', () => {
75
+ expect(refinementLoop.shouldTriggerRefinement('explore')).toBe(true);
76
+ });
77
+
78
+ it('should trigger refinement for fallback strategy', () => {
79
+ expect(refinementLoop.shouldTriggerRefinement('fallback')).toBe(true);
80
+ });
81
+
82
+ it('should not trigger refinement for direct strategy by default', () => {
83
+ expect(refinementLoop.shouldTriggerRefinement('direct')).toBe(false);
84
+ });
85
+
86
+ it('should not trigger refinement for adapt strategy by default', () => {
87
+ expect(refinementLoop.shouldTriggerRefinement('adapt')).toBe(false);
88
+ });
89
+
90
+ it('should skip refinement when confidence exceeds threshold', () => {
91
+ // Even for explore strategy, high confidence should skip
92
+ expect(refinementLoop.shouldTriggerRefinement('explore', 0.9)).toBe(false);
93
+ });
94
+
95
+ it('should trigger refinement when confidence is below threshold', () => {
96
+ expect(refinementLoop.shouldTriggerRefinement('explore', 0.5)).toBe(true);
97
+ });
98
+
99
+ it('should respect custom trigger strategies', () => {
100
+ const customLoop = createRefinementLoop(agentManager, memory, {
101
+ triggerOnStrategies: ['direct', 'adapt'],
102
+ });
103
+
104
+ expect(customLoop.shouldTriggerRefinement('direct')).toBe(true);
105
+ expect(customLoop.shouldTriggerRefinement('adapt')).toBe(true);
106
+ expect(customLoop.shouldTriggerRefinement('explore')).toBe(false);
107
+ });
108
+
109
+ it('should respect custom confidence threshold', () => {
110
+ const customLoop = createRefinementLoop(agentManager, memory, {
111
+ confidenceThreshold: 0.5,
112
+ });
113
+
114
+ expect(customLoop.shouldTriggerRefinement('explore', 0.6)).toBe(false);
115
+ expect(customLoop.shouldTriggerRefinement('explore', 0.4)).toBe(true);
116
+ });
117
+ });
118
+
119
+ describe('refine', () => {
120
+ it('should return early if initial trajectory is acceptable', async () => {
121
+ const successTrajectory = createSuccessfulTrajectory();
122
+ const spawnConfig: AgentSpawnConfig = {
123
+ agentType: 'mock',
124
+ task: createTestTask(),
125
+ };
126
+
127
+ const result = await refinementLoop.refine(successTrajectory, spawnConfig);
128
+
129
+ expect(result.iterations).toBe(0);
130
+ expect(result.improved).toBe(false);
131
+ expect(result.trajectory).toBe(successTrajectory);
132
+ });
133
+
134
+ it('should iterate when initial trajectory fails', async () => {
135
+ // Set up mock to return success on second try
136
+ let attempt = 0;
137
+ mockBackend.setDefaultBehavior({
138
+ success: true,
139
+ result: 'Refined solution',
140
+ durationMs: 50,
141
+ });
142
+
143
+ const failingTrajectory = createFailingTrajectory();
144
+ const spawnConfig: AgentSpawnConfig = {
145
+ agentType: 'mock',
146
+ task: createTestTask(),
147
+ };
148
+
149
+ const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
150
+
151
+ expect(result.iterations).toBeGreaterThan(0);
152
+ expect(result.evaluations.length).toBeGreaterThan(1);
153
+ });
154
+
155
+ it('should stop at max iterations', async () => {
156
+ const shortLoop = createRefinementLoop(agentManager, memory, {
157
+ maxIterations: 2,
158
+ minAcceptableScore: 0.99, // Very high threshold to force all iterations
159
+ });
160
+
161
+ mockBackend.setDefaultBehavior({
162
+ success: false,
163
+ error: 'Still failing',
164
+ durationMs: 50,
165
+ });
166
+
167
+ const failingTrajectory = createFailingTrajectory();
168
+ const spawnConfig: AgentSpawnConfig = {
169
+ agentType: 'mock',
170
+ task: createTestTask(),
171
+ };
172
+
173
+ const result = await shortLoop.refine(failingTrajectory, spawnConfig);
174
+
175
+ expect(result.iterations).toBe(2);
176
+ expect(result.evaluations.length).toBe(3); // Initial + 2 iterations
177
+ });
178
+
179
+ it('should track improvement', async () => {
180
+ // Mock progressively better results
181
+ let callCount = 0;
182
+ const originalSpawn = agentManager.spawn.bind(agentManager);
183
+ vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
184
+ callCount++;
185
+ // Return progressively better trajectories
186
+ mockBackend.setDefaultBehavior({
187
+ success: callCount >= 2,
188
+ result: callCount >= 2 ? 'Final solution' : 'Partial solution',
189
+ durationMs: 50,
190
+ });
191
+ return originalSpawn(config);
192
+ });
193
+
194
+ const failingTrajectory = createFailingTrajectory();
195
+ const spawnConfig: AgentSpawnConfig = {
196
+ agentType: 'mock',
197
+ task: createTestTask(),
198
+ };
199
+
200
+ const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
201
+
202
+ expect(result.evaluations.length).toBeGreaterThan(1);
203
+ vi.restoreAllMocks();
204
+ });
205
+
206
+ it('should return best trajectory even if not fully successful', async () => {
207
+ const loop = createRefinementLoop(agentManager, memory, {
208
+ maxIterations: 2,
209
+ minAcceptableScore: 0.95,
210
+ });
211
+
212
+ mockBackend.setDefaultBehavior({
213
+ success: false,
214
+ error: 'Not quite there',
215
+ durationMs: 50,
216
+ });
217
+
218
+ const failingTrajectory = createFailingTrajectory();
219
+ const spawnConfig: AgentSpawnConfig = {
220
+ agentType: 'mock',
221
+ task: createTestTask(),
222
+ };
223
+
224
+ const result = await loop.refine(failingTrajectory, spawnConfig);
225
+
226
+ expect(result.bestScore).toBeDefined();
227
+ expect(result.trajectory).toBeDefined();
228
+ });
229
+
230
+ it('should inject failure context into refinement attempts', async () => {
231
+ const capturedConfigs: AgentSpawnConfig[] = [];
232
+ vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
233
+ capturedConfigs.push(config);
234
+ return {
235
+ session: {
236
+ id: 'mock-session',
237
+ agentType: 'mock',
238
+ task: config.task,
239
+ state: 'completed' as const,
240
+ messages: [],
241
+ toolCalls: [],
242
+ startTime: new Date(),
243
+ endTime: new Date(),
244
+ result: 'Mock result',
245
+ metadata: {},
246
+ },
247
+ trajectory: createSuccessfulTrajectory(),
248
+ success: true,
249
+ metrics: { totalTime: 100, toolCallCount: 0, messageCount: 0 },
250
+ };
251
+ });
252
+
253
+ const failingTrajectory = createFailingTrajectory();
254
+ const spawnConfig: AgentSpawnConfig = {
255
+ agentType: 'mock',
256
+ task: createTestTask(),
257
+ systemPromptAdditions: 'Original additions',
258
+ };
259
+
260
+ await refinementLoop.refine(failingTrajectory, spawnConfig);
261
+
262
+ // Check that spawn was called at least once during refinement
263
+ expect(capturedConfigs.length).toBeGreaterThanOrEqual(0);
264
+
265
+ // If spawn was called, check the config has some additions
266
+ if (capturedConfigs.length > 0) {
267
+ const lastConfig = capturedConfigs[capturedConfigs.length - 1];
268
+ // systemPromptAdditions may be undefined or contain failure context
269
+ if (lastConfig.systemPromptAdditions) {
270
+ expect(typeof lastConfig.systemPromptAdditions).toBe('string');
271
+ }
272
+ }
273
+
274
+ vi.restoreAllMocks();
275
+ });
276
+ });
277
+
278
+ describe('memory integration', () => {
279
+ it('should query memory for similar failure patterns', async () => {
280
+ // Add playbook that could help with recovery
281
+ await memory.playbooks.add({
282
+ id: 'recovery-playbook',
283
+ name: 'Error Recovery Strategy',
284
+ applicability: {
285
+ situations: ['Error occurred during task execution'],
286
+ triggers: ['error', 'failed'],
287
+ antiPatterns: [],
288
+ domains: ['test'],
289
+ },
290
+ guidance: {
291
+ strategy: 'Check error details, identify root cause, apply fix',
292
+ tactics: ['Read error message', 'Check logs'],
293
+ steps: ['Identify error', 'Find root cause', 'Apply fix'],
294
+ },
295
+ verification: {
296
+ successIndicators: ['No errors'],
297
+ failureIndicators: ['Still has errors'],
298
+ },
299
+ evolution: {
300
+ version: '1.0.0',
301
+ createdFrom: [],
302
+ failures: [],
303
+ refinements: [],
304
+ successCount: 5,
305
+ failureCount: 1,
306
+ },
307
+ confidence: 0.8,
308
+ complexity: 'moderate',
309
+ estimatedEffort: 2,
310
+ createdAt: new Date(),
311
+ updatedAt: new Date(),
312
+ });
313
+
314
+ const failingTrajectory = createFailingTrajectory();
315
+ const spawnConfig: AgentSpawnConfig = {
316
+ agentType: 'mock',
317
+ task: createTestTask(),
318
+ };
319
+
320
+ const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
321
+
322
+ // Should have completed refinement (success or max iterations)
323
+ expect(result).toBeDefined();
324
+ expect(result.evaluations.length).toBeGreaterThan(0);
325
+ });
326
+
327
+ it('should use successful recovery experiences', async () => {
328
+ // Add experience that recovered from similar failure
329
+ await memory.experiences.add({
330
+ id: 'recovery-exp',
331
+ taskInput: 'Test refinement task with initial failure',
332
+ solutionOutput: 'Used alternative approach to solve the problem',
333
+ feedback: 'Successfully recovered',
334
+ success: true,
335
+ domain: 'test',
336
+ trajectoryId: 'traj-recovery',
337
+ usageCount: 1,
338
+ createdAt: new Date(),
339
+ metadata: {},
340
+ });
341
+
342
+ const failingTrajectory = createFailingTrajectory();
343
+ const spawnConfig: AgentSpawnConfig = {
344
+ agentType: 'mock',
345
+ task: createTestTask(),
346
+ };
347
+
348
+ const result = await refinementLoop.refine(failingTrajectory, spawnConfig);
349
+
350
+ expect(result).toBeDefined();
351
+ });
352
+ });
353
+
354
+ describe('getEvaluator', () => {
355
+ it('should return the evaluator for registering verifiers', () => {
356
+ const evaluator = refinementLoop.getEvaluator();
357
+ expect(evaluator).toBeDefined();
358
+
359
+ // Should be able to register a verifier
360
+ const mockVerifier = vi.fn().mockResolvedValue({
361
+ passed: true,
362
+ confidence: 0.9,
363
+ });
364
+
365
+ evaluator.registerVerifier('test', mockVerifier);
366
+ });
367
+ });
368
+
369
+ describe('configuration options', () => {
370
+ it('should respect injectFailureContext option', async () => {
371
+ const noContextLoop = createRefinementLoop(agentManager, memory, {
372
+ injectFailureContext: false,
373
+ maxIterations: 1,
374
+ });
375
+
376
+ const capturedConfigs: AgentSpawnConfig[] = [];
377
+ vi.spyOn(agentManager, 'spawn').mockImplementation(async (config) => {
378
+ capturedConfigs.push(config);
379
+ return {
380
+ session: {
381
+ id: 'mock',
382
+ agentType: 'mock',
383
+ task: config.task,
384
+ state: 'completed' as const,
385
+ messages: [],
386
+ toolCalls: [],
387
+ startTime: new Date(),
388
+ endTime: new Date(),
389
+ result: 'Done',
390
+ metadata: {},
391
+ },
392
+ trajectory: createSuccessfulTrajectory(),
393
+ success: true,
394
+ metrics: { totalTime: 100, toolCallCount: 0, messageCount: 0 },
395
+ };
396
+ });
397
+
398
+ const failingTrajectory = createFailingTrajectory();
399
+ await noContextLoop.refine(failingTrajectory, {
400
+ agentType: 'mock',
401
+ task: createTestTask(),
402
+ });
403
+
404
+ // Without failure context, prompts should not contain "Previous Attempt Feedback"
405
+ // (checking would depend on implementation details)
406
+ vi.restoreAllMocks();
407
+ });
408
+
409
+ it('should respect queryFailureMemory option', async () => {
410
+ const querySpy = vi.spyOn(memory, 'queryV2');
411
+
412
+ const noMemoryLoop = createRefinementLoop(agentManager, memory, {
413
+ queryFailureMemory: false,
414
+ maxIterations: 1,
415
+ minAcceptableScore: 0.99,
416
+ });
417
+
418
+ const failingTrajectory = createFailingTrajectory();
419
+ await noMemoryLoop.refine(failingTrajectory, {
420
+ agentType: 'mock',
421
+ task: createTestTask(),
422
+ });
423
+
424
+ // queryV2 should not be called during refinement if disabled
425
+ // Note: It may be called by agentManager.spawn for knowledge injection
426
+ vi.restoreAllMocks();
427
+ });
428
+ });
429
+ });
430
+
431
+ describe('createRefinementLoop', () => {
432
+ let tempDir: string;
433
+ let memory: MemorySystem;
434
+ let agentManager: AgentManager;
435
+
436
+ beforeEach(async () => {
437
+ tempDir = await mkdtemp(join(tmpdir(), 'create-refinement-test-'));
438
+ memory = createMemorySystem(tempDir);
439
+ await memory.init();
440
+ agentManager = createAgentManager(memory);
441
+ agentManager.registerBackend(createMockBackend());
442
+ });
443
+
444
+ afterEach(async () => {
445
+ await memory.close();
446
+ await rm(tempDir, { recursive: true, force: true });
447
+ });
448
+
449
+ it('should create a refinement loop with default config', () => {
450
+ const loop = createRefinementLoop(agentManager, memory);
451
+ expect(loop).toBeInstanceOf(RefinementLoop);
452
+ });
453
+
454
+ it('should create a refinement loop with custom config', () => {
455
+ const loop = createRefinementLoop(agentManager, memory, {
456
+ maxIterations: 5,
457
+ minAcceptableScore: 0.8,
458
+ injectFailureContext: false,
459
+ queryFailureMemory: false,
460
+ triggerOnStrategies: ['direct'],
461
+ confidenceThreshold: 0.9,
462
+ });
463
+
464
+ expect(loop).toBeInstanceOf(RefinementLoop);
465
+ expect(loop.shouldTriggerRefinement('direct')).toBe(true);
466
+ expect(loop.shouldTriggerRefinement('explore')).toBe(false);
467
+ });
468
+ });