cognitive-core 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +363 -2
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +43 -0
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -0
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +61 -9
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/index.d.ts +0 -4
  329. package/index.js +0 -4
@@ -0,0 +1,428 @@
1
+ /**
2
+ * Learning Effectiveness Tracker
3
+ *
4
+ * Measures whether the learning system is actually improving agent outcomes.
5
+ * Uses post-task reflection annotations rather than A/B replay evaluation.
6
+ *
7
+ * After each trajectory completes, the tracker annotates it with:
8
+ * - What knowledge was surfaced (playbooks, experiences)
9
+ * - Whether the surfaced knowledge was applied (inferred from trajectory)
10
+ * - Whether the outcome improved relative to similar unaided tasks
11
+ *
12
+ * These annotations accumulate into aggregate metrics that answer:
13
+ * "Is learning making agents better over time?"
14
+ *
15
+ * Inspired by Dash's eval framework but adapted for domain-agnostic
16
+ * trajectory learning rather than text-to-SQL.
17
+ */
18
+
19
+ import type { Trajectory } from '../types/index.js';
20
+ import type { PlaybookMatch } from '../memory/playbook.js';
21
+ import { JsonStore } from '../utils/storage.js';
22
+
23
+ // === TYPES ===
24
+
25
+ /**
26
+ * Annotation attached to a trajectory after completion.
27
+ * Records what knowledge was available and whether it helped.
28
+ */
29
+ export interface TaskAnnotation {
30
+ id: string;
31
+ trajectoryId: string;
32
+ timestamp: Date;
33
+
34
+ /** What was surfaced before/during execution */
35
+ knowledgeSurfaced: {
36
+ playbookIds: string[];
37
+ playbookNames: string[];
38
+ experienceIds: string[];
39
+ /** Total number of knowledge items retrieved */
40
+ totalItems: number;
41
+ };
42
+
43
+ /** What was actually applied (inferred from trajectory analysis) */
44
+ knowledgeApplied: {
45
+ /** Playbook IDs whose tactics appeared in the trajectory steps */
46
+ playbookIdsUsed: string[];
47
+ /** Whether any surfaced knowledge was used */
48
+ anyKnowledgeUsed: boolean;
49
+ };
50
+
51
+ /** Outcome metrics for this task */
52
+ outcome: {
53
+ success: boolean;
54
+ stepCount: number;
55
+ /** Number of error-recovery cycles in the trajectory */
56
+ errorRecoveries: number;
57
+ /** Whether the task domain had prior playbooks */
58
+ domainHadPlaybooks: boolean;
59
+ };
60
+
61
+ /** Optional human or agent reflection after task completion */
62
+ reflection?: {
63
+ /** Was the surfaced knowledge relevant? */
64
+ knowledgeRelevance: 'helpful' | 'irrelevant' | 'misleading' | 'not_assessed';
65
+ /** Free-form notes on what worked or didn't */
66
+ notes?: string;
67
+ };
68
+ }
69
+
70
+ /**
71
+ * Aggregate effectiveness metrics computed from annotations
72
+ */
73
+ export interface EffectivenessMetrics {
74
+ /** Total tasks tracked */
75
+ totalTasks: number;
76
+
77
+ /** Tasks where knowledge was surfaced vs not */
78
+ guidedTasks: number;
79
+ unguidedTasks: number;
80
+
81
+ /** Success rates */
82
+ guidedSuccessRate: number;
83
+ unguidedSuccessRate: number;
84
+ /** Difference: positive means guidance helps */
85
+ successRateDelta: number;
86
+
87
+ /** Average step counts */
88
+ guidedAvgSteps: number;
89
+ unguidedAvgSteps: number;
90
+ /** Negative means guidance reduces steps (good) */
91
+ stepCountDelta: number;
92
+
93
+ /** Knowledge application rates */
94
+ knowledgeApplicationRate: number;
95
+ /** Of applied knowledge, how often did the task succeed? */
96
+ appliedKnowledgeSuccessRate: number;
97
+
98
+ /** Error recovery */
99
+ guidedAvgErrorRecoveries: number;
100
+ unguidedAvgErrorRecoveries: number;
101
+
102
+ /** Per-playbook effectiveness */
103
+ playbookEffectiveness: PlaybookEffectivenessEntry[];
104
+
105
+ /** Time window these metrics cover */
106
+ windowStart: Date;
107
+ windowEnd: Date;
108
+ }
109
+
110
+ /**
111
+ * Effectiveness metrics for a single playbook
112
+ */
113
+ export interface PlaybookEffectivenessEntry {
114
+ playbookId: string;
115
+ playbookName: string;
116
+ /** Times this playbook was surfaced */
117
+ surfacedCount: number;
118
+ /** Times this playbook was actually applied */
119
+ appliedCount: number;
120
+ /** Success rate when applied */
121
+ appliedSuccessRate: number;
122
+ /** Average step count when applied */
123
+ appliedAvgSteps: number;
124
+ /** How often agents found this knowledge relevant */
125
+ relevanceRate: number;
126
+ }
127
+
128
+ // === TRACKER ===
129
+
130
+ /**
131
+ * Tracks learning effectiveness through post-task annotations
132
+ */
133
+ export class LearningEffectivenessTracker {
134
+ private store: JsonStore<TaskAnnotation>;
135
+ private initialized = false;
136
+
137
+ constructor(baseDir: string) {
138
+ this.store = new JsonStore<TaskAnnotation>(baseDir, 'effectiveness', {
139
+ autoSaveInterval: 30000,
140
+ pretty: true,
141
+ });
142
+ }
143
+
144
+ async init(): Promise<void> {
145
+ if (this.initialized) return;
146
+ await this.store.init();
147
+ this.initialized = true;
148
+ }
149
+
150
+ /**
151
+ * Annotate a completed trajectory with knowledge usage data.
152
+ * Call this after a task finishes and usage inference has run.
153
+ */
154
+ async annotate(
155
+ trajectory: Trajectory,
156
+ surfacedPlaybooks: PlaybookMatch[],
157
+ surfacedExperienceIds: string[],
158
+ appliedPlaybookIds: string[],
159
+ ): Promise<TaskAnnotation> {
160
+ await this.init();
161
+
162
+ const errorRecoveries = countErrorRecoveries(trajectory);
163
+
164
+ const annotation: TaskAnnotation = {
165
+ id: `ann-${trajectory.id}`,
166
+ trajectoryId: trajectory.id,
167
+ timestamp: new Date(),
168
+ knowledgeSurfaced: {
169
+ playbookIds: surfacedPlaybooks.map((m) => m.playbook.id),
170
+ playbookNames: surfacedPlaybooks.map((m) => m.playbook.name),
171
+ experienceIds: surfacedExperienceIds,
172
+ totalItems: surfacedPlaybooks.length + surfacedExperienceIds.length,
173
+ },
174
+ knowledgeApplied: {
175
+ playbookIdsUsed: appliedPlaybookIds,
176
+ anyKnowledgeUsed: appliedPlaybookIds.length > 0,
177
+ },
178
+ outcome: {
179
+ success: trajectory.outcome.success,
180
+ stepCount: trajectory.steps.length,
181
+ errorRecoveries,
182
+ domainHadPlaybooks: surfacedPlaybooks.length > 0,
183
+ },
184
+ };
185
+
186
+ this.store.set(annotation.id, annotation);
187
+ await this.store.save(annotation.id);
188
+
189
+ return annotation;
190
+ }
191
+
192
+ /**
193
+ * Add a reflection to an existing annotation.
194
+ * Called after human review or agent self-reflection.
195
+ */
196
+ async addReflection(
197
+ annotationId: string,
198
+ reflection: TaskAnnotation['reflection'],
199
+ ): Promise<void> {
200
+ await this.init();
201
+
202
+ const annotation = this.store.get(annotationId);
203
+ if (!annotation) return;
204
+
205
+ annotation.reflection = reflection;
206
+ this.store.set(annotationId, annotation);
207
+ await this.store.save(annotationId);
208
+ }
209
+
210
+ /**
211
+ * Compute aggregate effectiveness metrics over a time window.
212
+ * Defaults to all time if no window specified.
213
+ */
214
+ async computeMetrics(options?: {
215
+ since?: Date;
216
+ until?: Date;
217
+ domain?: string;
218
+ }): Promise<EffectivenessMetrics> {
219
+ await this.init();
220
+
221
+ let annotations = this.store.values();
222
+
223
+ // Filter by time window
224
+ if (options?.since) {
225
+ const since = options.since.getTime();
226
+ annotations = annotations.filter(
227
+ (a) => new Date(a.timestamp).getTime() >= since
228
+ );
229
+ }
230
+ if (options?.until) {
231
+ const until = options.until.getTime();
232
+ annotations = annotations.filter(
233
+ (a) => new Date(a.timestamp).getTime() <= until
234
+ );
235
+ }
236
+
237
+ const guided = annotations.filter((a) => a.knowledgeSurfaced.totalItems > 0);
238
+ const unguided = annotations.filter((a) => a.knowledgeSurfaced.totalItems === 0);
239
+
240
+ const guidedSuccesses = guided.filter((a) => a.outcome.success);
241
+ const unguidedSuccesses = unguided.filter((a) => a.outcome.success);
242
+
243
+ const guidedSuccessRate = guided.length > 0
244
+ ? guidedSuccesses.length / guided.length
245
+ : 0;
246
+ const unguidedSuccessRate = unguided.length > 0
247
+ ? unguidedSuccesses.length / unguided.length
248
+ : 0;
249
+
250
+ const guidedAvgSteps = guided.length > 0
251
+ ? guided.reduce((sum, a) => sum + a.outcome.stepCount, 0) / guided.length
252
+ : 0;
253
+ const unguidedAvgSteps = unguided.length > 0
254
+ ? unguided.reduce((sum, a) => sum + a.outcome.stepCount, 0) / unguided.length
255
+ : 0;
256
+
257
+ const applied = annotations.filter((a) => a.knowledgeApplied.anyKnowledgeUsed);
258
+ const appliedSuccesses = applied.filter((a) => a.outcome.success);
259
+
260
+ const knowledgeApplicationRate = guided.length > 0
261
+ ? applied.length / guided.length
262
+ : 0;
263
+ const appliedKnowledgeSuccessRate = applied.length > 0
264
+ ? appliedSuccesses.length / applied.length
265
+ : 0;
266
+
267
+ const guidedAvgErrors = guided.length > 0
268
+ ? guided.reduce((sum, a) => sum + a.outcome.errorRecoveries, 0) / guided.length
269
+ : 0;
270
+ const unguidedAvgErrors = unguided.length > 0
271
+ ? unguided.reduce((sum, a) => sum + a.outcome.errorRecoveries, 0) / unguided.length
272
+ : 0;
273
+
274
+ // Per-playbook effectiveness
275
+ const playbookEffectiveness = this.computePlaybookEffectiveness(annotations);
276
+
277
+ return {
278
+ totalTasks: annotations.length,
279
+ guidedTasks: guided.length,
280
+ unguidedTasks: unguided.length,
281
+ guidedSuccessRate,
282
+ unguidedSuccessRate,
283
+ successRateDelta: guidedSuccessRate - unguidedSuccessRate,
284
+ guidedAvgSteps,
285
+ unguidedAvgSteps,
286
+ stepCountDelta: guidedAvgSteps - unguidedAvgSteps,
287
+ knowledgeApplicationRate,
288
+ appliedKnowledgeSuccessRate,
289
+ guidedAvgErrorRecoveries: guidedAvgErrors,
290
+ unguidedAvgErrorRecoveries: unguidedAvgErrors,
291
+ playbookEffectiveness,
292
+ windowStart: options?.since ?? new Date(0),
293
+ windowEnd: options?.until ?? new Date(),
294
+ };
295
+ }
296
+
297
+ /**
298
+ * Get all annotations (for export/inspection)
299
+ */
300
+ async getAll(): Promise<TaskAnnotation[]> {
301
+ await this.init();
302
+ return this.store.values();
303
+ }
304
+
305
+ /**
306
+ * Get annotation by trajectory ID
307
+ */
308
+ async getByTrajectoryId(trajectoryId: string): Promise<TaskAnnotation | undefined> {
309
+ await this.init();
310
+ return this.store.values().find((a) => a.trajectoryId === trajectoryId);
311
+ }
312
+
313
+ /**
314
+ * Get annotation count
315
+ */
316
+ async count(): Promise<number> {
317
+ await this.init();
318
+ return this.store.size();
319
+ }
320
+
321
+ /**
322
+ * Close the store
323
+ */
324
+ async close(): Promise<void> {
325
+ await this.store.close();
326
+ }
327
+
328
+ // === PRIVATE ===
329
+
330
+ private computePlaybookEffectiveness(
331
+ annotations: TaskAnnotation[]
332
+ ): PlaybookEffectivenessEntry[] {
333
+ const playbookStats = new Map<
334
+ string,
335
+ {
336
+ name: string;
337
+ surfaced: number;
338
+ applied: number;
339
+ appliedSuccesses: number;
340
+ appliedStepSum: number;
341
+ relevantCount: number;
342
+ assessedCount: number;
343
+ }
344
+ >();
345
+
346
+ for (const ann of annotations) {
347
+ // Track surfaced
348
+ for (let i = 0; i < ann.knowledgeSurfaced.playbookIds.length; i++) {
349
+ const id = ann.knowledgeSurfaced.playbookIds[i];
350
+ const name = ann.knowledgeSurfaced.playbookNames[i] ?? id;
351
+
352
+ if (!playbookStats.has(id)) {
353
+ playbookStats.set(id, {
354
+ name,
355
+ surfaced: 0,
356
+ applied: 0,
357
+ appliedSuccesses: 0,
358
+ appliedStepSum: 0,
359
+ relevantCount: 0,
360
+ assessedCount: 0,
361
+ });
362
+ }
363
+
364
+ const stats = playbookStats.get(id)!;
365
+ stats.surfaced++;
366
+
367
+ const wasApplied = ann.knowledgeApplied.playbookIdsUsed.includes(id);
368
+ if (wasApplied) {
369
+ stats.applied++;
370
+ if (ann.outcome.success) stats.appliedSuccesses++;
371
+ stats.appliedStepSum += ann.outcome.stepCount;
372
+ }
373
+
374
+ if (ann.reflection) {
375
+ stats.assessedCount++;
376
+ if (ann.reflection.knowledgeRelevance === 'helpful') {
377
+ stats.relevantCount++;
378
+ }
379
+ }
380
+ }
381
+ }
382
+
383
+ return Array.from(playbookStats.entries())
384
+ .map(([id, stats]) => ({
385
+ playbookId: id,
386
+ playbookName: stats.name,
387
+ surfacedCount: stats.surfaced,
388
+ appliedCount: stats.applied,
389
+ appliedSuccessRate:
390
+ stats.applied > 0 ? stats.appliedSuccesses / stats.applied : 0,
391
+ appliedAvgSteps:
392
+ stats.applied > 0 ? stats.appliedStepSum / stats.applied : 0,
393
+ relevanceRate:
394
+ stats.assessedCount > 0 ? stats.relevantCount / stats.assessedCount : 0,
395
+ }))
396
+ .sort((a, b) => b.surfacedCount - a.surfacedCount);
397
+ }
398
+ }
399
+
400
+ // === HELPERS ===
401
+
402
+ /**
403
+ * Count error-recovery cycles in a trajectory.
404
+ * An error recovery is when a step has an error observation
405
+ * followed by a subsequent step that doesn't.
406
+ */
407
+ function countErrorRecoveries(trajectory: Trajectory): number {
408
+ let recoveries = 0;
409
+ for (let i = 0; i < trajectory.steps.length - 1; i++) {
410
+ const current = trajectory.steps[i];
411
+ const next = trajectory.steps[i + 1];
412
+ const hasError = current.observation?.toLowerCase().includes('error');
413
+ const nextOk = !next.observation?.toLowerCase().includes('error');
414
+ if (hasError && nextOk) {
415
+ recoveries++;
416
+ }
417
+ }
418
+ return recoveries;
419
+ }
420
+
421
+ /**
422
+ * Create a learning effectiveness tracker
423
+ */
424
+ export function createEffectivenessTracker(
425
+ baseDir: string
426
+ ): LearningEffectivenessTracker {
427
+ return new LearningEffectivenessTracker(baseDir);
428
+ }
@@ -0,0 +1,58 @@
1
+ export {
2
+ TrajectoryAnalyzer,
3
+ createAnalyzer,
4
+ simpleCreditAssignment,
5
+ outcomeCreditAssignment,
6
+ getCreditAssignmentFn,
7
+ type AnalysisResult,
8
+ type ErrorPattern,
9
+ type TrainingExample,
10
+ type CreditAssignmentFn,
11
+ type AnalyzerConfig,
12
+ } from './analyzer.js';
13
+
14
+ export {
15
+ LearningPipeline,
16
+ createLearningPipeline,
17
+ type ProcessResult,
18
+ type BatchResult,
19
+ } from './pipeline.js';
20
+
21
+ // Playbook-based learning
22
+ export {
23
+ PlaybookExtractor,
24
+ createPlaybookExtractor,
25
+ type ExtractedPlaybooks,
26
+ type PlaybookUpdate,
27
+ type PlaybookExtractorConfig,
28
+ } from './playbook-extractor.js';
29
+
30
+ export {
31
+ MetaLearner,
32
+ createMetaLearner,
33
+ type MetaLearnerConfig,
34
+ } from './meta-learner.js';
35
+
36
+ export {
37
+ PlaybookUsageInference,
38
+ createUsageInference,
39
+ type PlaybookUsageResult,
40
+ type UsageInferenceConfig,
41
+ } from './usage-inference.js';
42
+
43
+ // LLM-based extraction
44
+ export {
45
+ LLMPlaybookExtractor,
46
+ createLLMExtractor,
47
+ type LLMExtractorConfig,
48
+ type LLMExtractionResult,
49
+ } from './llm-extractor.js';
50
+
51
+ // Effectiveness tracking
52
+ export {
53
+ LearningEffectivenessTracker,
54
+ createEffectivenessTracker,
55
+ type TaskAnnotation,
56
+ type EffectivenessMetrics,
57
+ type PlaybookEffectivenessEntry,
58
+ } from './effectiveness.js';