cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,542 @@
1
+ import { describe, it, expect, beforeEach, afterEach } from 'vitest';
2
+ import {
3
+ LearningEffectivenessTracker,
4
+ createEffectivenessTracker,
5
+ } from '../../src/learning/effectiveness.js';
6
+ import { createTrajectory, createStep, createTask, successOutcome, failureOutcome } from '../../src/types/index.js';
7
+ import type { PlaybookMatch } from '../../src/memory/playbook.js';
8
+ import type { Playbook } from '../../src/types/playbook.js';
9
+ import { createPlaybook } from '../../src/types/playbook.js';
10
+ import { mkdtemp, rm } from 'node:fs/promises';
11
+ import { join } from 'node:path';
12
+ import { tmpdir } from 'node:os';
13
+
14
+ describe('LearningEffectivenessTracker', () => {
15
+ let tempDir: string;
16
+ let tracker: LearningEffectivenessTracker;
17
+
18
+ beforeEach(async () => {
19
+ tempDir = await mkdtemp(join(tmpdir(), 'atlas-eff-test-'));
20
+ tracker = createEffectivenessTracker(tempDir);
21
+ await tracker.init();
22
+ });
23
+
24
+ afterEach(async () => {
25
+ await tracker.close();
26
+ await rm(tempDir, { recursive: true, force: true });
27
+ });
28
+
29
+ function makeTrajectory(success: boolean, stepCount: number, hasErrors = false) {
30
+ const steps = [];
31
+ for (let i = 0; i < stepCount; i++) {
32
+ steps.push(createStep({
33
+ thought: `Step ${i + 1}`,
34
+ action: `action-${i + 1}`,
35
+ observation: hasErrors && i === 1 ? 'error: something failed' : 'ok',
36
+ }));
37
+ }
38
+ return createTrajectory({
39
+ task: createTask({ domain: 'test', description: `Test task ${Date.now()}` }),
40
+ steps,
41
+ outcome: success ? successOutcome({ result: 'done' }) : failureOutcome('failed'),
42
+ agentId: 'test-agent',
43
+ });
44
+ }
45
+
46
+ function makePlaybook(name: string): Playbook {
47
+ return createPlaybook({
48
+ name,
49
+ applicability: {
50
+ situations: ['Test'],
51
+ triggers: [],
52
+ antiPatterns: [],
53
+ domains: ['test'],
54
+ },
55
+ guidance: { strategy: 'Test', tactics: [] },
56
+ });
57
+ }
58
+
59
+ function makePlaybookMatch(playbook: Playbook): PlaybookMatch {
60
+ return { playbook, score: 0.8, matchType: 'situation' };
61
+ }
62
+
63
+ describe('annotate', () => {
64
+ it('should create an annotation for a trajectory', async () => {
65
+ const trajectory = makeTrajectory(true, 3);
66
+ const pb = makePlaybook('test-playbook');
67
+ const matches = [makePlaybookMatch(pb)];
68
+
69
+ const annotation = await tracker.annotate(
70
+ trajectory,
71
+ matches,
72
+ ['exp-1'],
73
+ [pb.id],
74
+ );
75
+
76
+ expect(annotation.trajectoryId).toBe(trajectory.id);
77
+ expect(annotation.knowledgeSurfaced.playbookIds).toEqual([pb.id]);
78
+ expect(annotation.knowledgeSurfaced.playbookNames).toEqual(['test-playbook']);
79
+ expect(annotation.knowledgeSurfaced.experienceIds).toEqual(['exp-1']);
80
+ expect(annotation.knowledgeSurfaced.totalItems).toBe(2);
81
+ expect(annotation.knowledgeApplied.playbookIdsUsed).toEqual([pb.id]);
82
+ expect(annotation.knowledgeApplied.anyKnowledgeUsed).toBe(true);
83
+ expect(annotation.outcome.success).toBe(true);
84
+ expect(annotation.outcome.stepCount).toBe(3);
85
+ });
86
+
87
+ it('should count error recoveries', async () => {
88
+ const trajectory = makeTrajectory(true, 4, true);
89
+ const annotation = await tracker.annotate(trajectory, [], [], []);
90
+
91
+ // Step 1 (ok) -> Step 2 (error) -> Step 3 (ok) = 1 recovery
92
+ expect(annotation.outcome.errorRecoveries).toBe(1);
93
+ });
94
+
95
+ it('should count multiple error recoveries', async () => {
96
+ const steps = [
97
+ createStep({ thought: 'S1', action: 'a1', observation: 'ok' }),
98
+ createStep({ thought: 'S2', action: 'a2', observation: 'error: first fail' }),
99
+ createStep({ thought: 'S3', action: 'a3', observation: 'recovered' }),
100
+ createStep({ thought: 'S4', action: 'a4', observation: 'error: second fail' }),
101
+ createStep({ thought: 'S5', action: 'a5', observation: 'recovered again' }),
102
+ ];
103
+ const trajectory = createTrajectory({
104
+ task: createTask({ domain: 'test', description: 'multi-error' }),
105
+ steps,
106
+ outcome: successOutcome({ result: 'done' }),
107
+ agentId: 'test-agent',
108
+ });
109
+
110
+ const annotation = await tracker.annotate(trajectory, [], [], []);
111
+ expect(annotation.outcome.errorRecoveries).toBe(2);
112
+ });
113
+
114
+ it('should report zero error recoveries for clean trajectories', async () => {
115
+ const trajectory = makeTrajectory(true, 5, false);
116
+ const annotation = await tracker.annotate(trajectory, [], [], []);
117
+ expect(annotation.outcome.errorRecoveries).toBe(0);
118
+ });
119
+
120
+ it('should handle multiple playbooks surfaced in one task', async () => {
121
+ const pb1 = makePlaybook('pb-alpha');
122
+ const pb2 = makePlaybook('pb-beta');
123
+ const pb3 = makePlaybook('pb-gamma');
124
+ const matches = [makePlaybookMatch(pb1), makePlaybookMatch(pb2), makePlaybookMatch(pb3)];
125
+
126
+ const annotation = await tracker.annotate(
127
+ makeTrajectory(true, 3),
128
+ matches,
129
+ [],
130
+ [pb1.id, pb3.id],
131
+ );
132
+
133
+ expect(annotation.knowledgeSurfaced.playbookIds).toHaveLength(3);
134
+ expect(annotation.knowledgeSurfaced.playbookNames).toEqual(['pb-alpha', 'pb-beta', 'pb-gamma']);
135
+ expect(annotation.knowledgeSurfaced.totalItems).toBe(3);
136
+ expect(annotation.knowledgeApplied.playbookIdsUsed).toHaveLength(2);
137
+ expect(annotation.knowledgeApplied.anyKnowledgeUsed).toBe(true);
138
+ });
139
+
140
+ it('should handle experience-only guidance (no playbooks)', async () => {
141
+ const annotation = await tracker.annotate(
142
+ makeTrajectory(true, 3),
143
+ [],
144
+ ['exp-1', 'exp-2', 'exp-3'],
145
+ [],
146
+ );
147
+
148
+ expect(annotation.knowledgeSurfaced.playbookIds).toHaveLength(0);
149
+ expect(annotation.knowledgeSurfaced.experienceIds).toHaveLength(3);
150
+ expect(annotation.knowledgeSurfaced.totalItems).toBe(3);
151
+ expect(annotation.knowledgeApplied.anyKnowledgeUsed).toBe(false);
152
+ expect(annotation.outcome.domainHadPlaybooks).toBe(false);
153
+ });
154
+
155
+ it('should persist annotations', async () => {
156
+ const trajectory = makeTrajectory(true, 2);
157
+ await tracker.annotate(trajectory, [], [], []);
158
+
159
+ const count = await tracker.count();
160
+ expect(count).toBe(1);
161
+
162
+ const retrieved = await tracker.getByTrajectoryId(trajectory.id);
163
+ expect(retrieved).toBeDefined();
164
+ expect(retrieved!.trajectoryId).toBe(trajectory.id);
165
+ });
166
+
167
+ it('should set annotation id based on trajectory id', async () => {
168
+ const trajectory = makeTrajectory(true, 2);
169
+ const annotation = await tracker.annotate(trajectory, [], [], []);
170
+ expect(annotation.id).toBe(`ann-${trajectory.id}`);
171
+ });
172
+ });
173
+
174
+ describe('getAll and getByTrajectoryId', () => {
175
+ it('should return all annotations', async () => {
176
+ await tracker.annotate(makeTrajectory(true, 2), [], [], []);
177
+ await tracker.annotate(makeTrajectory(false, 3), [], [], []);
178
+ await tracker.annotate(makeTrajectory(true, 4), [], [], []);
179
+
180
+ const all = await tracker.getAll();
181
+ expect(all).toHaveLength(3);
182
+ });
183
+
184
+ it('should return undefined for non-existent trajectory id', async () => {
185
+ const result = await tracker.getByTrajectoryId('nonexistent-id');
186
+ expect(result).toBeUndefined();
187
+ });
188
+ });
189
+
190
+ describe('addReflection', () => {
191
+ it('should add a reflection to an existing annotation', async () => {
192
+ const trajectory = makeTrajectory(true, 2);
193
+ const annotation = await tracker.annotate(trajectory, [], [], []);
194
+
195
+ await tracker.addReflection(annotation.id, {
196
+ knowledgeRelevance: 'helpful',
197
+ notes: 'The playbook guidance was spot on',
198
+ });
199
+
200
+ const updated = await tracker.getByTrajectoryId(trajectory.id);
201
+ expect(updated!.reflection).toBeDefined();
202
+ expect(updated!.reflection!.knowledgeRelevance).toBe('helpful');
203
+ expect(updated!.reflection!.notes).toBe('The playbook guidance was spot on');
204
+ });
205
+
206
+ it('should no-op when annotation id does not exist', async () => {
207
+ await tracker.addReflection('nonexistent-ann', {
208
+ knowledgeRelevance: 'helpful',
209
+ });
210
+ expect(await tracker.count()).toBe(0);
211
+ });
212
+
213
+ it('should overwrite previous reflection', async () => {
214
+ const trajectory = makeTrajectory(true, 2);
215
+ const annotation = await tracker.annotate(trajectory, [], [], []);
216
+
217
+ await tracker.addReflection(annotation.id, {
218
+ knowledgeRelevance: 'helpful',
219
+ });
220
+ await tracker.addReflection(annotation.id, {
221
+ knowledgeRelevance: 'misleading',
222
+ notes: 'Actually it was wrong',
223
+ });
224
+
225
+ const updated = await tracker.getByTrajectoryId(trajectory.id);
226
+ expect(updated!.reflection!.knowledgeRelevance).toBe('misleading');
227
+ expect(updated!.reflection!.notes).toBe('Actually it was wrong');
228
+ });
229
+ });
230
+
231
+ describe('computeMetrics', () => {
232
+ it('should compute guided vs unguided success rates', async () => {
233
+ const pb = makePlaybook('guide-playbook');
234
+ const match = makePlaybookMatch(pb);
235
+
236
+ // 2 guided successes
237
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
238
+ await tracker.annotate(makeTrajectory(true, 4), [match], [], [pb.id]);
239
+ // 1 guided failure
240
+ await tracker.annotate(makeTrajectory(false, 5), [match], [], []);
241
+
242
+ // 1 unguided success
243
+ await tracker.annotate(makeTrajectory(true, 6), [], [], []);
244
+ // 2 unguided failures
245
+ await tracker.annotate(makeTrajectory(false, 8), [], [], []);
246
+ await tracker.annotate(makeTrajectory(false, 7), [], [], []);
247
+
248
+ const metrics = await tracker.computeMetrics();
249
+
250
+ expect(metrics.totalTasks).toBe(6);
251
+ expect(metrics.guidedTasks).toBe(3);
252
+ expect(metrics.unguidedTasks).toBe(3);
253
+ expect(metrics.guidedSuccessRate).toBeCloseTo(2 / 3, 2);
254
+ expect(metrics.unguidedSuccessRate).toBeCloseTo(1 / 3, 2);
255
+ expect(metrics.successRateDelta).toBeCloseTo(1 / 3, 2);
256
+ });
257
+
258
+ it('should compute average step counts', async () => {
259
+ const pb = makePlaybook('step-playbook');
260
+ const match = makePlaybookMatch(pb);
261
+
262
+ // Guided: 3, 4 steps -> avg 3.5
263
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], []);
264
+ await tracker.annotate(makeTrajectory(true, 4), [match], [], []);
265
+
266
+ // Unguided: 6, 8 steps -> avg 7
267
+ await tracker.annotate(makeTrajectory(true, 6), [], [], []);
268
+ await tracker.annotate(makeTrajectory(true, 8), [], [], []);
269
+
270
+ const metrics = await tracker.computeMetrics();
271
+
272
+ expect(metrics.guidedAvgSteps).toBeCloseTo(3.5, 1);
273
+ expect(metrics.unguidedAvgSteps).toBeCloseTo(7, 1);
274
+ expect(metrics.stepCountDelta).toBeCloseTo(-3.5, 1); // Negative = guidance reduces steps
275
+ });
276
+
277
+ it('should compute knowledge application rate', async () => {
278
+ const pb = makePlaybook('applied-playbook');
279
+ const match = makePlaybookMatch(pb);
280
+
281
+ // 2 guided, knowledge applied
282
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
283
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
284
+ // 1 guided, knowledge NOT applied
285
+ await tracker.annotate(makeTrajectory(false, 5), [match], [], []);
286
+
287
+ const metrics = await tracker.computeMetrics();
288
+
289
+ expect(metrics.knowledgeApplicationRate).toBeCloseTo(2 / 3, 2);
290
+ expect(metrics.appliedKnowledgeSuccessRate).toBe(1); // Both applied succeeded
291
+ });
292
+
293
+ it('should compute per-playbook effectiveness', async () => {
294
+ const pb1 = makePlaybook('good-playbook');
295
+ const pb2 = makePlaybook('bad-playbook');
296
+ const match1 = makePlaybookMatch(pb1);
297
+ const match2 = makePlaybookMatch(pb2);
298
+
299
+ // Good playbook: surfaced 3x, applied 3x, succeeded 2x
300
+ await tracker.annotate(makeTrajectory(true, 3), [match1], [], [pb1.id]);
301
+ await tracker.annotate(makeTrajectory(true, 4), [match1], [], [pb1.id]);
302
+ await tracker.annotate(makeTrajectory(false, 5), [match1], [], [pb1.id]);
303
+
304
+ // Bad playbook: surfaced 2x, applied 2x, succeeded 0x
305
+ await tracker.annotate(makeTrajectory(false, 6), [match2], [], [pb2.id]);
306
+ await tracker.annotate(makeTrajectory(false, 7), [match2], [], [pb2.id]);
307
+
308
+ const metrics = await tracker.computeMetrics();
309
+
310
+ expect(metrics.playbookEffectiveness).toHaveLength(2);
311
+
312
+ const goodPb = metrics.playbookEffectiveness.find(
313
+ (e) => e.playbookName === 'good-playbook'
314
+ );
315
+ expect(goodPb).toBeDefined();
316
+ expect(goodPb!.surfacedCount).toBe(3);
317
+ expect(goodPb!.appliedCount).toBe(3);
318
+ expect(goodPb!.appliedSuccessRate).toBeCloseTo(2 / 3, 2);
319
+
320
+ const badPb = metrics.playbookEffectiveness.find(
321
+ (e) => e.playbookName === 'bad-playbook'
322
+ );
323
+ expect(badPb).toBeDefined();
324
+ expect(badPb!.surfacedCount).toBe(2);
325
+ expect(badPb!.appliedSuccessRate).toBe(0);
326
+ });
327
+
328
+ it('should compute error recovery delta between guided and unguided', async () => {
329
+ const pb = makePlaybook('error-helper');
330
+ const match = makePlaybookMatch(pb);
331
+
332
+ // Guided: 1 error recovery each (avg 1)
333
+ await tracker.annotate(makeTrajectory(true, 4, true), [match], [], [pb.id]);
334
+ await tracker.annotate(makeTrajectory(true, 4, true), [match], [], [pb.id]);
335
+
336
+ // Unguided: no errors (avg 0)
337
+ await tracker.annotate(makeTrajectory(true, 3, false), [], [], []);
338
+ await tracker.annotate(makeTrajectory(true, 3, false), [], [], []);
339
+
340
+ const metrics = await tracker.computeMetrics();
341
+
342
+ expect(metrics.guidedAvgErrorRecoveries).toBe(1);
343
+ expect(metrics.unguidedAvgErrorRecoveries).toBe(0);
344
+ });
345
+
346
+ it('should compute per-playbook average step count', async () => {
347
+ const pb = makePlaybook('step-counter');
348
+ const match = makePlaybookMatch(pb);
349
+
350
+ // Applied with step counts 3, 5, 7 -> avg 5
351
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
352
+ await tracker.annotate(makeTrajectory(true, 5), [match], [], [pb.id]);
353
+ await tracker.annotate(makeTrajectory(true, 7), [match], [], [pb.id]);
354
+
355
+ const metrics = await tracker.computeMetrics();
356
+ const pbMetrics = metrics.playbookEffectiveness.find(
357
+ (e) => e.playbookName === 'step-counter'
358
+ );
359
+
360
+ expect(pbMetrics).toBeDefined();
361
+ expect(pbMetrics!.appliedAvgSteps).toBe(5);
362
+ });
363
+
364
+ it('should track playbooks surfaced but not applied', async () => {
365
+ const pb = makePlaybook('ignored-playbook');
366
+ const match = makePlaybookMatch(pb);
367
+
368
+ // Surfaced 3x, applied 0x
369
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], []);
370
+ await tracker.annotate(makeTrajectory(false, 4), [match], [], []);
371
+ await tracker.annotate(makeTrajectory(true, 5), [match], [], []);
372
+
373
+ const metrics = await tracker.computeMetrics();
374
+ const pbMetrics = metrics.playbookEffectiveness.find(
375
+ (e) => e.playbookName === 'ignored-playbook'
376
+ );
377
+
378
+ expect(pbMetrics).toBeDefined();
379
+ expect(pbMetrics!.surfacedCount).toBe(3);
380
+ expect(pbMetrics!.appliedCount).toBe(0);
381
+ expect(pbMetrics!.appliedSuccessRate).toBe(0);
382
+ expect(pbMetrics!.appliedAvgSteps).toBe(0);
383
+ });
384
+
385
+ it('should sort playbook effectiveness by surfaced count descending', async () => {
386
+ const pb1 = makePlaybook('popular');
387
+ const pb2 = makePlaybook('rare');
388
+ const match1 = makePlaybookMatch(pb1);
389
+ const match2 = makePlaybookMatch(pb2);
390
+
391
+ // Popular: surfaced 5x
392
+ for (let i = 0; i < 5; i++) {
393
+ await tracker.annotate(makeTrajectory(true, 3), [match1], [], []);
394
+ }
395
+ // Rare: surfaced 1x
396
+ await tracker.annotate(makeTrajectory(true, 3), [match2], [], []);
397
+
398
+ const metrics = await tracker.computeMetrics();
399
+ expect(metrics.playbookEffectiveness[0].playbookName).toBe('popular');
400
+ expect(metrics.playbookEffectiveness[1].playbookName).toBe('rare');
401
+ });
402
+
403
+ it('should filter by time window with since', async () => {
404
+ const now = Date.now();
405
+
406
+ await tracker.annotate(makeTrajectory(true, 3), [], [], []);
407
+
408
+ const metrics = await tracker.computeMetrics({
409
+ since: new Date(now + 60000), // In the future — should exclude everything
410
+ });
411
+
412
+ expect(metrics.totalTasks).toBe(0);
413
+ });
414
+
415
+ it('should filter by time window with until', async () => {
416
+ const past = new Date(Date.now() - 60000);
417
+
418
+ await tracker.annotate(makeTrajectory(true, 3), [], [], []);
419
+
420
+ const metrics = await tracker.computeMetrics({
421
+ until: past, // In the past — should exclude everything created now
422
+ });
423
+
424
+ expect(metrics.totalTasks).toBe(0);
425
+ });
426
+
427
+ it('should set window start and end in metrics', async () => {
428
+ const since = new Date('2025-01-01');
429
+ const until = new Date('2025-12-31');
430
+
431
+ const metrics = await tracker.computeMetrics({ since, until });
432
+
433
+ expect(metrics.windowStart).toEqual(since);
434
+ expect(metrics.windowEnd).toEqual(until);
435
+ });
436
+
437
+ it('should handle all-guided scenario', async () => {
438
+ const pb = makePlaybook('always-on');
439
+ const match = makePlaybookMatch(pb);
440
+
441
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
442
+ await tracker.annotate(makeTrajectory(true, 4), [match], [], [pb.id]);
443
+
444
+ const metrics = await tracker.computeMetrics();
445
+
446
+ expect(metrics.guidedTasks).toBe(2);
447
+ expect(metrics.unguidedTasks).toBe(0);
448
+ expect(metrics.guidedSuccessRate).toBe(1);
449
+ expect(metrics.unguidedSuccessRate).toBe(0); // No data → 0
450
+ });
451
+
452
+ it('should handle all-unguided scenario', async () => {
453
+ await tracker.annotate(makeTrajectory(true, 3), [], [], []);
454
+ await tracker.annotate(makeTrajectory(false, 4), [], [], []);
455
+
456
+ const metrics = await tracker.computeMetrics();
457
+
458
+ expect(metrics.guidedTasks).toBe(0);
459
+ expect(metrics.unguidedTasks).toBe(2);
460
+ expect(metrics.guidedSuccessRate).toBe(0);
461
+ expect(metrics.unguidedSuccessRate).toBe(0.5);
462
+ });
463
+
464
+ it('should handle empty tracker gracefully', async () => {
465
+ const metrics = await tracker.computeMetrics();
466
+
467
+ expect(metrics.totalTasks).toBe(0);
468
+ expect(metrics.guidedSuccessRate).toBe(0);
469
+ expect(metrics.unguidedSuccessRate).toBe(0);
470
+ expect(metrics.successRateDelta).toBe(0);
471
+ expect(metrics.guidedAvgSteps).toBe(0);
472
+ expect(metrics.unguidedAvgSteps).toBe(0);
473
+ expect(metrics.knowledgeApplicationRate).toBe(0);
474
+ expect(metrics.appliedKnowledgeSuccessRate).toBe(0);
475
+ expect(metrics.playbookEffectiveness).toHaveLength(0);
476
+ });
477
+ });
478
+
479
+ describe('reflection-based relevance tracking', () => {
480
+ it('should compute relevance rate from reflections', async () => {
481
+ const pb = makePlaybook('reflected-playbook');
482
+ const match = makePlaybookMatch(pb);
483
+
484
+ const ann1 = await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
485
+ await tracker.addReflection(ann1.id, { knowledgeRelevance: 'helpful' });
486
+
487
+ const ann2 = await tracker.annotate(makeTrajectory(true, 4), [match], [], [pb.id]);
488
+ await tracker.addReflection(ann2.id, { knowledgeRelevance: 'irrelevant' });
489
+
490
+ const ann3 = await tracker.annotate(makeTrajectory(true, 5), [match], [], [pb.id]);
491
+ await tracker.addReflection(ann3.id, { knowledgeRelevance: 'helpful' });
492
+
493
+ const metrics = await tracker.computeMetrics();
494
+ const pbMetrics = metrics.playbookEffectiveness.find(
495
+ (e) => e.playbookName === 'reflected-playbook'
496
+ );
497
+
498
+ expect(pbMetrics).toBeDefined();
499
+ // 2 out of 3 reflections rated as helpful
500
+ expect(pbMetrics!.relevanceRate).toBeCloseTo(2 / 3, 2);
501
+ });
502
+
503
+ it('should not count misleading or not_assessed as relevant', async () => {
504
+ const pb = makePlaybook('mixed-feedback');
505
+ const match = makePlaybookMatch(pb);
506
+
507
+ const ann1 = await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
508
+ await tracker.addReflection(ann1.id, { knowledgeRelevance: 'misleading' });
509
+
510
+ const ann2 = await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
511
+ await tracker.addReflection(ann2.id, { knowledgeRelevance: 'not_assessed' });
512
+
513
+ const ann3 = await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
514
+ await tracker.addReflection(ann3.id, { knowledgeRelevance: 'helpful' });
515
+
516
+ const metrics = await tracker.computeMetrics();
517
+ const pbMetrics = metrics.playbookEffectiveness.find(
518
+ (e) => e.playbookName === 'mixed-feedback'
519
+ );
520
+
521
+ expect(pbMetrics).toBeDefined();
522
+ // Only 1 out of 3 is 'helpful'
523
+ expect(pbMetrics!.relevanceRate).toBeCloseTo(1 / 3, 2);
524
+ });
525
+
526
+ it('should report 0 relevance when no reflections exist', async () => {
527
+ const pb = makePlaybook('no-reflections');
528
+ const match = makePlaybookMatch(pb);
529
+
530
+ await tracker.annotate(makeTrajectory(true, 3), [match], [], [pb.id]);
531
+ await tracker.annotate(makeTrajectory(true, 4), [match], [], [pb.id]);
532
+
533
+ const metrics = await tracker.computeMetrics();
534
+ const pbMetrics = metrics.playbookEffectiveness.find(
535
+ (e) => e.playbookName === 'no-reflections'
536
+ );
537
+
538
+ expect(pbMetrics).toBeDefined();
539
+ expect(pbMetrics!.relevanceRate).toBe(0); // No assessments = 0 rate
540
+ });
541
+ });
542
+ });