cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,426 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import {
3
+ TrajectoryAnalyzer,
4
+ simpleCreditAssignment,
5
+ outcomeCreditAssignment,
6
+ getCreditAssignmentFn,
7
+ createAnalyzer,
8
+ type AnalyzerConfig,
9
+ } from '../../src/learning/analyzer.js';
10
+ import { createTrajectory } from '../../src/types/trajectory.js';
11
+ import { createTask } from '../../src/types/task.js';
12
+ import { createStep } from '../../src/types/step.js';
13
+ import { successOutcome, failureOutcome } from '../../src/types/outcome.js';
14
+
15
+ // Default config for tests
16
+ const DEFAULT_CONFIG: AnalyzerConfig = {
17
+ creditDecayFactor: 0.5,
18
+ actionRepetitionThreshold: 3,
19
+ attributionThreshold: 0.15,
20
+ };
21
+
22
+ describe('simpleCreditAssignment', () => {
23
+ it('should assign exponentially decaying credit from end', async () => {
24
+ const task = createTask({ domain: 'test', description: 'test' });
25
+ const steps = [
26
+ createStep({ action: 'a1', observation: 'o1' }),
27
+ createStep({ action: 'a2', observation: 'o2' }),
28
+ createStep({ action: 'a3', observation: 'o3' }),
29
+ ];
30
+ const trajectory = createTrajectory({
31
+ task,
32
+ steps,
33
+ outcome: successOutcome('done'),
34
+ agentId: 'agent',
35
+ });
36
+
37
+ const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
38
+
39
+ expect(scores).toHaveLength(3);
40
+ expect(scores[2]).toBe(1.0); // Last step gets full credit
41
+ expect(scores[1]).toBe(0.5); // Second to last
42
+ expect(scores[0]).toBe(0.25); // Third to last
43
+ });
44
+
45
+ it('should handle single step trajectory', async () => {
46
+ const trajectory = createTrajectory({
47
+ task: createTask({ domain: 'test', description: 'test' }),
48
+ steps: [createStep({ action: 'a', observation: 'o' })],
49
+ outcome: successOutcome('x'),
50
+ agentId: 'agent',
51
+ });
52
+
53
+ const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
54
+ expect(scores).toEqual([1.0]);
55
+ });
56
+
57
+ it('should handle empty steps', async () => {
58
+ const trajectory = createTrajectory({
59
+ task: createTask({ domain: 'test', description: 'test' }),
60
+ steps: [],
61
+ outcome: successOutcome('x'),
62
+ agentId: 'agent',
63
+ });
64
+
65
+ const scores = await simpleCreditAssignment(trajectory, DEFAULT_CONFIG);
66
+ expect(scores).toEqual([]);
67
+ });
68
+
69
+ it('should use configurable decay factor', async () => {
70
+ const trajectory = createTrajectory({
71
+ task: createTask({ domain: 'test', description: 'test' }),
72
+ steps: [
73
+ createStep({ action: 'a1', observation: 'o1' }),
74
+ createStep({ action: 'a2', observation: 'o2' }),
75
+ createStep({ action: 'a3', observation: 'o3' }),
76
+ ],
77
+ outcome: successOutcome('done'),
78
+ agentId: 'agent',
79
+ });
80
+
81
+ const customConfig = { ...DEFAULT_CONFIG, creditDecayFactor: 0.8 };
82
+ const scores = await simpleCreditAssignment(trajectory, customConfig);
83
+
84
+ expect(scores[0]).toBeCloseTo(0.64); // 0.8^2
85
+ expect(scores[1]).toBeCloseTo(0.8); // 0.8^1
86
+ expect(scores[2]).toBe(1.0); // 0.8^0
87
+ });
88
+ });
89
+
90
+ describe('outcomeCreditAssignment', () => {
91
+ it('should assign higher credit for successful trajectories', async () => {
92
+ const successTrajectory = createTrajectory({
93
+ task: createTask({ domain: 'test', description: 'test' }),
94
+ steps: [
95
+ createStep({ action: 'a1', observation: 'o1' }),
96
+ createStep({ action: 'a2', observation: 'o2' }),
97
+ ],
98
+ outcome: successOutcome('done'),
99
+ agentId: 'agent',
100
+ });
101
+
102
+ const failTrajectory = createTrajectory({
103
+ task: createTask({ domain: 'test', description: 'test' }),
104
+ steps: [
105
+ createStep({ action: 'a1', observation: 'o1' }),
106
+ createStep({ action: 'a2', observation: 'o2' }),
107
+ ],
108
+ outcome: failureOutcome('error'),
109
+ agentId: 'agent',
110
+ });
111
+
112
+ const successScores = await outcomeCreditAssignment(successTrajectory, DEFAULT_CONFIG);
113
+ const failScores = await outcomeCreditAssignment(failTrajectory, DEFAULT_CONFIG);
114
+
115
+ expect(successScores[0]).toBe(0.7);
116
+ expect(successScores[1]).toBe(1.0); // Last step boosted
117
+ expect(failScores[0]).toBe(0.3);
118
+ expect(failScores[1]).toBe(0.3);
119
+ });
120
+ });
121
+
122
+ describe('getCreditAssignmentFn', () => {
123
+ it('should return correct function for strategy', () => {
124
+ expect(getCreditAssignmentFn('simple')).toBe(simpleCreditAssignment);
125
+ expect(getCreditAssignmentFn('llm')).toBe(simpleCreditAssignment); // Falls back
126
+ expect(getCreditAssignmentFn('counterfactual')).toBe(outcomeCreditAssignment);
127
+ });
128
+ });
129
+
130
+ describe('TrajectoryAnalyzer', () => {
131
+ const task = createTask({
132
+ domain: 'code',
133
+ description: 'Write a test function',
134
+ });
135
+
136
+ describe('analyze', () => {
137
+ it('should analyze a successful trajectory', async () => {
138
+ const steps = [
139
+ createStep({ thought: 'First, read the code', action: 'read', observation: 'Code content' }),
140
+ createStep({ thought: 'Now write the test', action: 'write', observation: 'Test written' }),
141
+ createStep({ action: 'run_test', observation: 'Tests passed' }),
142
+ ];
143
+ const trajectory = createTrajectory({
144
+ task,
145
+ steps,
146
+ outcome: successOutcome('Test implemented'),
147
+ agentId: 'agent',
148
+ });
149
+
150
+ const analyzer = new TrajectoryAnalyzer('simple');
151
+ const result = await analyzer.analyze(trajectory);
152
+
153
+ expect(result.success).toBe(true);
154
+ expect(result.stepAttribution).toHaveLength(3);
155
+ expect(result.keySteps.length).toBeGreaterThan(0);
156
+ expect(result.abstractable).toBe(true);
157
+ });
158
+
159
+ it('should detect error patterns in failed trajectories', async () => {
160
+ const trajectory = createTrajectory({
161
+ task,
162
+ steps: [createStep({ action: 'try', observation: 'error' })],
163
+ outcome: failureOutcome('Type error: null is not an object'),
164
+ agentId: 'agent',
165
+ });
166
+
167
+ const analyzer = createAnalyzer();
168
+ const result = await analyzer.analyze(trajectory);
169
+
170
+ expect(result.success).toBe(false);
171
+ expect(result.errorPatterns.length).toBeGreaterThan(0);
172
+ // Should match type error pattern
173
+ expect(result.errorPatterns.some(p => p.type === 'type' || p.type === 'general')).toBe(true);
174
+ });
175
+
176
+ it('should detect timeout errors', async () => {
177
+ const trajectory = createTrajectory({
178
+ task,
179
+ steps: [],
180
+ outcome: failureOutcome('Operation timeout exceeded'),
181
+ agentId: 'agent',
182
+ });
183
+
184
+ const analyzer = createAnalyzer();
185
+ const result = await analyzer.analyze(trajectory);
186
+
187
+ expect(result.errorPatterns.some(p => p.type === 'timeout')).toBe(true);
188
+ });
189
+
190
+ it('should detect repeated actions', async () => {
191
+ const steps = [
192
+ createStep({ action: 'retry_connection', observation: 'failed' }),
193
+ createStep({ action: 'retry_connection', observation: 'failed' }),
194
+ createStep({ action: 'retry_connection', observation: 'failed' }),
195
+ createStep({ action: 'retry_connection', observation: 'failed' }),
196
+ ];
197
+ const trajectory = createTrajectory({
198
+ task,
199
+ steps,
200
+ outcome: failureOutcome('Max retries exceeded'),
201
+ agentId: 'agent',
202
+ });
203
+
204
+ const analyzer = createAnalyzer();
205
+ const result = await analyzer.analyze(trajectory);
206
+
207
+ expect(result.errorPatterns.some(p => p.type === 'repetition')).toBe(true);
208
+ });
209
+
210
+ it('should extract training examples', async () => {
211
+ const steps = [
212
+ createStep({
213
+ thought: 'I should check the input type',
214
+ action: 'validate_input(data)',
215
+ observation: 'Input is valid',
216
+ }),
217
+ createStep({
218
+ thought: 'Now process the data',
219
+ action: 'process(data)',
220
+ observation: 'Processing complete',
221
+ }),
222
+ ];
223
+ const trajectory = createTrajectory({
224
+ task,
225
+ steps,
226
+ outcome: successOutcome({ result: 'done' }),
227
+ agentId: 'agent',
228
+ });
229
+
230
+ const analyzer = createAnalyzer();
231
+ const result = await analyzer.analyze(trajectory);
232
+
233
+ expect(result.trainingExamples.length).toBeGreaterThan(0);
234
+ // Should include task -> solution example
235
+ expect(result.trainingExamples.some(e =>
236
+ e.input === task.description
237
+ )).toBe(true);
238
+ });
239
+
240
+ it('should apply attribution scores to steps', async () => {
241
+ const steps = [
242
+ createStep({ action: 'a1', observation: 'o1' }),
243
+ createStep({ action: 'a2', observation: 'o2' }),
244
+ ];
245
+ const trajectory = createTrajectory({
246
+ task,
247
+ steps,
248
+ outcome: successOutcome('x'),
249
+ agentId: 'agent',
250
+ });
251
+
252
+ const analyzer = createAnalyzer();
253
+ await analyzer.analyze(trajectory);
254
+
255
+ // Steps should now have attribution scores
256
+ expect(trajectory.steps[0].attributionScore).toBeDefined();
257
+ expect(trajectory.steps[1].attributionScore).toBeDefined();
258
+ });
259
+ });
260
+
261
+ describe('setStrategy and setThreshold', () => {
262
+ it('should change credit assignment strategy', async () => {
263
+ const analyzer = createAnalyzer('simple');
264
+
265
+ const trajectory = createTrajectory({
266
+ task,
267
+ steps: [
268
+ createStep({ action: 'a', observation: 'o' }),
269
+ createStep({ action: 'b', observation: 'o' }),
270
+ ],
271
+ outcome: successOutcome('x'),
272
+ agentId: 'agent',
273
+ });
274
+
275
+ const result1 = await analyzer.analyze(trajectory);
276
+
277
+ analyzer.setStrategy('counterfactual');
278
+ const result2 = await analyzer.analyze(trajectory);
279
+
280
+ // Different strategies may produce different attributions
281
+ expect(result1.stepAttribution).not.toEqual(result2.stepAttribution);
282
+ });
283
+
284
+ it('should change attribution threshold', async () => {
285
+ const analyzer = createAnalyzer();
286
+
287
+ const steps = [
288
+ { ...createStep({ action: 'a', observation: 'o' }), attributionScore: 0.2 },
289
+ { ...createStep({ action: 'b', observation: 'o' }), attributionScore: 0.1 },
290
+ ];
291
+ const trajectory = createTrajectory({
292
+ task,
293
+ steps,
294
+ outcome: successOutcome('x'),
295
+ agentId: 'agent',
296
+ });
297
+
298
+ analyzer.setThreshold(0.15);
299
+ const result1 = await analyzer.analyze(trajectory);
300
+
301
+ analyzer.setThreshold(0.05);
302
+ const result2 = await analyzer.analyze(trajectory);
303
+
304
+ // Lower threshold should include more key steps
305
+ expect(result2.keySteps.length).toBeGreaterThanOrEqual(result1.keySteps.length);
306
+ });
307
+ });
308
+
309
+ describe('configuration', () => {
310
+ it('should get current configuration', () => {
311
+ const analyzer = createAnalyzer('simple', {
312
+ creditDecayFactor: 0.7,
313
+ actionRepetitionThreshold: 5,
314
+ });
315
+
316
+ const config = analyzer.getConfig();
317
+ expect(config.creditDecayFactor).toBe(0.7);
318
+ expect(config.actionRepetitionThreshold).toBe(5);
319
+ expect(config.attributionThreshold).toBe(0.15); // Default
320
+ });
321
+
322
+ it('should update configuration', () => {
323
+ const analyzer = createAnalyzer();
324
+ analyzer.updateConfig({ creditDecayFactor: 0.9 });
325
+
326
+ const config = analyzer.getConfig();
327
+ expect(config.creditDecayFactor).toBe(0.9);
328
+ });
329
+
330
+ it('should preserve unchanged values when updating', () => {
331
+ const analyzer = createAnalyzer('simple', {
332
+ creditDecayFactor: 0.6,
333
+ actionRepetitionThreshold: 4,
334
+ });
335
+
336
+ analyzer.updateConfig({ creditDecayFactor: 0.8 });
337
+
338
+ const config = analyzer.getConfig();
339
+ expect(config.creditDecayFactor).toBe(0.8);
340
+ expect(config.actionRepetitionThreshold).toBe(4); // Unchanged
341
+ });
342
+
343
+ it('should use configurable repetition threshold', async () => {
344
+ const analyzer = createAnalyzer('simple', {
345
+ actionRepetitionThreshold: 5, // Higher threshold
346
+ });
347
+
348
+ // Only 3 repetitions - shouldn't trigger with threshold of 5
349
+ const steps = [
350
+ createStep({ action: 'same_action', observation: 'obs1' }),
351
+ createStep({ action: 'same_action', observation: 'obs2' }),
352
+ createStep({ action: 'same_action', observation: 'obs3' }),
353
+ ];
354
+
355
+ const trajectory = createTrajectory({
356
+ task,
357
+ steps,
358
+ outcome: failureOutcome('Failed'),
359
+ agentId: 'agent',
360
+ });
361
+
362
+ const result = await analyzer.analyze(trajectory);
363
+ // Should NOT detect repetition with threshold of 5
364
+ expect(result.errorPatterns.some(p => p.type === 'repetition')).toBe(false);
365
+ });
366
+ });
367
+
368
+ describe('abstractability assessment', () => {
369
+ it('should mark successful trajectories with key steps as abstractable', async () => {
370
+ const steps = [
371
+ createStep({ action: 'a1', observation: 'o1' }),
372
+ createStep({ action: 'a2', observation: 'o2' }),
373
+ createStep({ action: 'a3', observation: 'o3' }),
374
+ ];
375
+ const trajectory = createTrajectory({
376
+ task,
377
+ steps,
378
+ outcome: successOutcome('done'),
379
+ agentId: 'agent',
380
+ });
381
+
382
+ const analyzer = createAnalyzer();
383
+ const result = await analyzer.analyze(trajectory);
384
+
385
+ expect(result.abstractable).toBe(true);
386
+ });
387
+
388
+ it('should not mark simple failed trajectories as abstractable', async () => {
389
+ const trajectory = createTrajectory({
390
+ task,
391
+ steps: [createStep({ action: 'fail', observation: 'error' })],
392
+ outcome: failureOutcome('Unknown error'),
393
+ agentId: 'agent',
394
+ });
395
+
396
+ const analyzer = createAnalyzer();
397
+ const result = await analyzer.analyze(trajectory);
398
+
399
+ // No clear error pattern, not abstractable
400
+ expect(result.abstractable).toBe(false);
401
+ });
402
+
403
+ it('should mark failed trajectories with partial progress as potentially abstractable', async () => {
404
+ const trajectory = createTrajectory({
405
+ task,
406
+ steps: [
407
+ createStep({ action: 'setup', observation: 'done' }),
408
+ createStep({ action: 'execute', observation: 'partial' }),
409
+ ],
410
+ outcome: {
411
+ success: false,
412
+ partialScore: 0.5,
413
+ errorInfo: 'Timeout after partial completion',
414
+ verificationDetails: {},
415
+ },
416
+ agentId: 'agent',
417
+ });
418
+
419
+ const analyzer = createAnalyzer();
420
+ const result = await analyzer.analyze(trajectory);
421
+
422
+ // Has error pattern with partial progress, might be abstractable
423
+ expect(result.errorPatterns.length).toBeGreaterThan(0);
424
+ });
425
+ });
426
+ });