cognitive-core 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. package/README.md +302 -116
  2. package/SKILL.md +193 -0
  3. package/dist/agents/index.d.ts +3 -0
  4. package/dist/agents/index.d.ts.map +1 -0
  5. package/dist/agents/index.js +5 -0
  6. package/dist/agents/index.js.map +1 -0
  7. package/dist/agents/mock-provider.d.ts +23 -0
  8. package/dist/agents/mock-provider.d.ts.map +1 -0
  9. package/dist/agents/mock-provider.js +71 -0
  10. package/dist/agents/mock-provider.js.map +1 -0
  11. package/dist/agents/types.d.ts +98 -0
  12. package/dist/agents/types.d.ts.map +1 -0
  13. package/dist/agents/types.js +44 -0
  14. package/dist/agents/types.js.map +1 -0
  15. package/dist/atlas.d.ts +196 -0
  16. package/dist/atlas.d.ts.map +1 -0
  17. package/dist/atlas.js +373 -0
  18. package/dist/atlas.js.map +1 -0
  19. package/dist/bin/cognitive-core.d.ts +18 -0
  20. package/dist/bin/cognitive-core.d.ts.map +1 -0
  21. package/dist/bin/cognitive-core.js +419 -0
  22. package/dist/bin/cognitive-core.js.map +1 -0
  23. package/dist/embeddings/bm25.d.ts +104 -0
  24. package/dist/embeddings/bm25.d.ts.map +1 -0
  25. package/dist/embeddings/bm25.js +264 -0
  26. package/dist/embeddings/bm25.js.map +1 -0
  27. package/dist/embeddings/index.d.ts +12 -0
  28. package/dist/embeddings/index.d.ts.map +1 -0
  29. package/dist/embeddings/index.js +16 -0
  30. package/dist/embeddings/index.js.map +1 -0
  31. package/dist/embeddings/manager.d.ts +112 -0
  32. package/dist/embeddings/manager.d.ts.map +1 -0
  33. package/dist/embeddings/manager.js +215 -0
  34. package/dist/embeddings/manager.js.map +1 -0
  35. package/dist/embeddings/provider.d.ts +101 -0
  36. package/dist/embeddings/provider.d.ts.map +1 -0
  37. package/dist/embeddings/provider.js +232 -0
  38. package/dist/embeddings/provider.js.map +1 -0
  39. package/dist/embeddings/vector-store.d.ts +101 -0
  40. package/dist/embeddings/vector-store.d.ts.map +1 -0
  41. package/dist/embeddings/vector-store.js +256 -0
  42. package/dist/embeddings/vector-store.js.map +1 -0
  43. package/dist/factory.d.ts +193 -0
  44. package/dist/factory.d.ts.map +1 -0
  45. package/dist/factory.js +109 -0
  46. package/dist/factory.js.map +1 -0
  47. package/dist/index.d.ts +30 -453
  48. package/dist/index.d.ts.map +1 -0
  49. package/dist/index.js +84 -509
  50. package/dist/index.js.map +1 -0
  51. package/dist/learning/analyzer.d.ts +110 -0
  52. package/dist/learning/analyzer.d.ts.map +1 -0
  53. package/dist/learning/analyzer.js +213 -0
  54. package/dist/learning/analyzer.js.map +1 -0
  55. package/dist/learning/effectiveness.d.ts +158 -0
  56. package/dist/learning/effectiveness.d.ts.map +1 -0
  57. package/dist/learning/effectiveness.js +251 -0
  58. package/dist/learning/effectiveness.js.map +1 -0
  59. package/dist/learning/index.d.ts +8 -0
  60. package/dist/learning/index.d.ts.map +1 -0
  61. package/dist/learning/index.js +11 -0
  62. package/dist/learning/index.js.map +1 -0
  63. package/dist/learning/llm-extractor.d.ts +88 -0
  64. package/dist/learning/llm-extractor.d.ts.map +1 -0
  65. package/dist/learning/llm-extractor.js +372 -0
  66. package/dist/learning/llm-extractor.js.map +1 -0
  67. package/dist/learning/meta-learner.d.ts +80 -0
  68. package/dist/learning/meta-learner.d.ts.map +1 -0
  69. package/dist/learning/meta-learner.js +355 -0
  70. package/dist/learning/meta-learner.js.map +1 -0
  71. package/dist/learning/pipeline.d.ts +65 -0
  72. package/dist/learning/pipeline.d.ts.map +1 -0
  73. package/dist/learning/pipeline.js +170 -0
  74. package/dist/learning/pipeline.js.map +1 -0
  75. package/dist/learning/playbook-extractor.d.ts +113 -0
  76. package/dist/learning/playbook-extractor.d.ts.map +1 -0
  77. package/dist/learning/playbook-extractor.js +523 -0
  78. package/dist/learning/playbook-extractor.js.map +1 -0
  79. package/dist/learning/usage-inference.d.ts +82 -0
  80. package/dist/learning/usage-inference.d.ts.map +1 -0
  81. package/dist/learning/usage-inference.js +261 -0
  82. package/dist/learning/usage-inference.js.map +1 -0
  83. package/dist/mcp/index.d.ts +6 -0
  84. package/dist/mcp/index.d.ts.map +1 -0
  85. package/dist/mcp/index.js +6 -0
  86. package/dist/mcp/index.js.map +1 -0
  87. package/dist/mcp/playbook-server.d.ts +120 -0
  88. package/dist/mcp/playbook-server.d.ts.map +1 -0
  89. package/dist/mcp/playbook-server.js +427 -0
  90. package/dist/mcp/playbook-server.js.map +1 -0
  91. package/dist/memory/curated-loader.d.ts +62 -0
  92. package/dist/memory/curated-loader.d.ts.map +1 -0
  93. package/dist/memory/curated-loader.js +106 -0
  94. package/dist/memory/curated-loader.js.map +1 -0
  95. package/dist/memory/experience.d.ts +122 -0
  96. package/dist/memory/experience.d.ts.map +1 -0
  97. package/dist/memory/experience.js +392 -0
  98. package/dist/memory/experience.js.map +1 -0
  99. package/dist/memory/index.d.ts +6 -0
  100. package/dist/memory/index.d.ts.map +1 -0
  101. package/dist/memory/index.js +9 -0
  102. package/dist/memory/index.js.map +1 -0
  103. package/dist/memory/meta.d.ts +90 -0
  104. package/dist/memory/meta.d.ts.map +1 -0
  105. package/dist/memory/meta.js +362 -0
  106. package/dist/memory/meta.js.map +1 -0
  107. package/dist/memory/playbook.d.ts +133 -0
  108. package/dist/memory/playbook.d.ts.map +1 -0
  109. package/dist/memory/playbook.js +357 -0
  110. package/dist/memory/playbook.js.map +1 -0
  111. package/dist/memory/system.d.ts +167 -0
  112. package/dist/memory/system.d.ts.map +1 -0
  113. package/dist/memory/system.js +383 -0
  114. package/dist/memory/system.js.map +1 -0
  115. package/dist/runtime/backends/acp.d.ts +67 -0
  116. package/dist/runtime/backends/acp.d.ts.map +1 -0
  117. package/dist/runtime/backends/acp.js +290 -0
  118. package/dist/runtime/backends/acp.js.map +1 -0
  119. package/dist/runtime/backends/index.d.ts +5 -0
  120. package/dist/runtime/backends/index.d.ts.map +1 -0
  121. package/dist/runtime/backends/index.js +6 -0
  122. package/dist/runtime/backends/index.js.map +1 -0
  123. package/dist/runtime/backends/mock.d.ts +67 -0
  124. package/dist/runtime/backends/mock.d.ts.map +1 -0
  125. package/dist/runtime/backends/mock.js +153 -0
  126. package/dist/runtime/backends/mock.js.map +1 -0
  127. package/dist/runtime/backends/subprocess.d.ts +56 -0
  128. package/dist/runtime/backends/subprocess.d.ts.map +1 -0
  129. package/dist/runtime/backends/subprocess.js +260 -0
  130. package/dist/runtime/backends/subprocess.js.map +1 -0
  131. package/dist/runtime/flows/learning.d.ts +73 -0
  132. package/dist/runtime/flows/learning.d.ts.map +1 -0
  133. package/dist/runtime/flows/learning.js +116 -0
  134. package/dist/runtime/flows/learning.js.map +1 -0
  135. package/dist/runtime/flows/validation.d.ts +122 -0
  136. package/dist/runtime/flows/validation.d.ts.map +1 -0
  137. package/dist/runtime/flows/validation.js +223 -0
  138. package/dist/runtime/flows/validation.js.map +1 -0
  139. package/dist/runtime/index.d.ts +6 -0
  140. package/dist/runtime/index.d.ts.map +1 -0
  141. package/dist/runtime/index.js +8 -0
  142. package/dist/runtime/index.js.map +1 -0
  143. package/dist/runtime/manager.d.ts +116 -0
  144. package/dist/runtime/manager.d.ts.map +1 -0
  145. package/dist/runtime/manager.js +416 -0
  146. package/dist/runtime/manager.js.map +1 -0
  147. package/dist/runtime/types.d.ts +138 -0
  148. package/dist/runtime/types.d.ts.map +1 -0
  149. package/dist/runtime/types.js +2 -0
  150. package/dist/runtime/types.js.map +1 -0
  151. package/dist/search/evaluator.d.ts +102 -0
  152. package/dist/search/evaluator.d.ts.map +1 -0
  153. package/dist/search/evaluator.js +352 -0
  154. package/dist/search/evaluator.js.map +1 -0
  155. package/dist/search/index.d.ts +7 -0
  156. package/dist/search/index.d.ts.map +1 -0
  157. package/dist/search/index.js +11 -0
  158. package/dist/search/index.js.map +1 -0
  159. package/dist/search/refinement-loop.d.ts +73 -0
  160. package/dist/search/refinement-loop.d.ts.map +1 -0
  161. package/dist/search/refinement-loop.js +245 -0
  162. package/dist/search/refinement-loop.js.map +1 -0
  163. package/dist/search/refinement-types.d.ts +154 -0
  164. package/dist/search/refinement-types.d.ts.map +1 -0
  165. package/dist/search/refinement-types.js +99 -0
  166. package/dist/search/refinement-types.js.map +1 -0
  167. package/dist/search/router.d.ts +61 -0
  168. package/dist/search/router.d.ts.map +1 -0
  169. package/dist/search/router.js +197 -0
  170. package/dist/search/router.js.map +1 -0
  171. package/dist/search/solver.d.ts +75 -0
  172. package/dist/search/solver.d.ts.map +1 -0
  173. package/dist/search/solver.js +216 -0
  174. package/dist/search/solver.js.map +1 -0
  175. package/dist/search/verification-runner.d.ts +125 -0
  176. package/dist/search/verification-runner.d.ts.map +1 -0
  177. package/dist/search/verification-runner.js +440 -0
  178. package/dist/search/verification-runner.js.map +1 -0
  179. package/dist/surfacing/index.d.ts +2 -0
  180. package/dist/surfacing/index.d.ts.map +1 -0
  181. package/dist/surfacing/index.js +2 -0
  182. package/dist/surfacing/index.js.map +1 -0
  183. package/dist/surfacing/skill-library.d.ts +158 -0
  184. package/dist/surfacing/skill-library.d.ts.map +1 -0
  185. package/dist/surfacing/skill-library.js +429 -0
  186. package/dist/surfacing/skill-library.js.map +1 -0
  187. package/dist/types/config.d.ts +1113 -0
  188. package/dist/types/config.d.ts.map +1 -0
  189. package/dist/types/config.js +274 -0
  190. package/dist/types/config.js.map +1 -0
  191. package/dist/types/index.d.ts +9 -0
  192. package/dist/types/index.d.ts.map +1 -0
  193. package/dist/types/index.js +14 -0
  194. package/dist/types/index.js.map +1 -0
  195. package/dist/types/memory.d.ts +339 -0
  196. package/dist/types/memory.d.ts.map +1 -0
  197. package/dist/types/memory.js +207 -0
  198. package/dist/types/memory.js.map +1 -0
  199. package/dist/types/meta.d.ts +146 -0
  200. package/dist/types/meta.d.ts.map +1 -0
  201. package/dist/types/meta.js +51 -0
  202. package/dist/types/meta.js.map +1 -0
  203. package/dist/types/outcome.d.ts +42 -0
  204. package/dist/types/outcome.d.ts.map +1 -0
  205. package/dist/types/outcome.js +50 -0
  206. package/dist/types/outcome.js.map +1 -0
  207. package/dist/types/playbook.d.ts +119 -0
  208. package/dist/types/playbook.d.ts.map +1 -0
  209. package/dist/types/playbook.js +71 -0
  210. package/dist/types/playbook.js.map +1 -0
  211. package/dist/types/step.d.ts +44 -0
  212. package/dist/types/step.d.ts.map +1 -0
  213. package/dist/types/step.js +32 -0
  214. package/dist/types/step.js.map +1 -0
  215. package/dist/types/task.d.ts +91 -0
  216. package/dist/types/task.d.ts.map +1 -0
  217. package/dist/types/task.js +39 -0
  218. package/dist/types/task.js.map +1 -0
  219. package/dist/types/trajectory.d.ts +221 -0
  220. package/dist/types/trajectory.d.ts.map +1 -0
  221. package/dist/types/trajectory.js +60 -0
  222. package/dist/types/trajectory.js.map +1 -0
  223. package/dist/utils/index.d.ts +4 -0
  224. package/dist/utils/index.d.ts.map +1 -0
  225. package/dist/utils/index.js +4 -0
  226. package/dist/utils/index.js.map +1 -0
  227. package/dist/utils/similarity.d.ts +31 -0
  228. package/dist/utils/similarity.d.ts.map +1 -0
  229. package/dist/utils/similarity.js +107 -0
  230. package/dist/utils/similarity.js.map +1 -0
  231. package/dist/utils/storage.d.ts +106 -0
  232. package/dist/utils/storage.d.ts.map +1 -0
  233. package/dist/utils/storage.js +203 -0
  234. package/dist/utils/storage.js.map +1 -0
  235. package/dist/utils/validation.d.ts +129 -0
  236. package/dist/utils/validation.d.ts.map +1 -0
  237. package/dist/utils/validation.js +171 -0
  238. package/dist/utils/validation.js.map +1 -0
  239. package/package.json +50 -34
  240. package/scripts/migrate-to-playbooks.ts +307 -0
  241. package/src/agents/index.ts +14 -0
  242. package/src/agents/mock-provider.ts +93 -0
  243. package/src/agents/types.ts +137 -0
  244. package/src/atlas.ts +560 -0
  245. package/src/bin/cognitive-core.ts +470 -0
  246. package/src/embeddings/bm25.ts +337 -0
  247. package/src/embeddings/index.ts +39 -0
  248. package/src/embeddings/manager.ts +288 -0
  249. package/src/embeddings/provider.ts +311 -0
  250. package/src/embeddings/vector-store.ts +353 -0
  251. package/src/factory.ts +263 -0
  252. package/src/index.ts +246 -0
  253. package/src/learning/analyzer.ts +335 -0
  254. package/src/learning/effectiveness.ts +428 -0
  255. package/src/learning/index.ts +58 -0
  256. package/src/learning/llm-extractor.ts +542 -0
  257. package/src/learning/meta-learner.ts +516 -0
  258. package/src/learning/pipeline.ts +244 -0
  259. package/src/learning/playbook-extractor.ts +702 -0
  260. package/src/learning/usage-inference.ts +372 -0
  261. package/src/mcp/index.ts +12 -0
  262. package/src/mcp/playbook-server.ts +565 -0
  263. package/src/memory/curated-loader.ts +160 -0
  264. package/src/memory/experience.ts +515 -0
  265. package/src/memory/index.ts +27 -0
  266. package/src/memory/meta.ts +506 -0
  267. package/src/memory/playbook.ts +493 -0
  268. package/src/memory/system.ts +551 -0
  269. package/src/runtime/backends/acp.ts +378 -0
  270. package/src/runtime/backends/index.ts +24 -0
  271. package/src/runtime/backends/mock.ts +218 -0
  272. package/src/runtime/backends/subprocess.ts +356 -0
  273. package/src/runtime/flows/learning.ts +183 -0
  274. package/src/runtime/flows/validation.ts +381 -0
  275. package/src/runtime/index.ts +53 -0
  276. package/src/runtime/manager.ts +541 -0
  277. package/src/runtime/types.ts +157 -0
  278. package/src/search/evaluator.ts +474 -0
  279. package/src/search/index.ts +59 -0
  280. package/src/search/refinement-loop.ts +363 -0
  281. package/src/search/refinement-types.ts +159 -0
  282. package/src/search/router.ts +261 -0
  283. package/src/search/solver.ts +303 -0
  284. package/src/search/verification-runner.ts +570 -0
  285. package/src/surfacing/index.ts +6 -0
  286. package/src/surfacing/skill-library.ts +594 -0
  287. package/src/types/config.ts +333 -0
  288. package/src/types/index.ts +130 -0
  289. package/src/types/memory.ts +270 -0
  290. package/src/types/meta.ts +218 -0
  291. package/src/types/outcome.ts +66 -0
  292. package/src/types/playbook.ts +196 -0
  293. package/src/types/step.ts +40 -0
  294. package/src/types/task.ts +52 -0
  295. package/src/types/trajectory.ts +80 -0
  296. package/src/utils/index.ts +38 -0
  297. package/src/utils/similarity.ts +139 -0
  298. package/src/utils/storage.ts +249 -0
  299. package/src/utils/validation.ts +286 -0
  300. package/tests/embeddings/bm25.test.ts +130 -0
  301. package/tests/embeddings/manager.test.ts +205 -0
  302. package/tests/integration/atlas.test.ts +266 -0
  303. package/tests/integration/e2e.test.ts +929 -0
  304. package/tests/learning/analyzer.test.ts +426 -0
  305. package/tests/learning/effectiveness.test.ts +542 -0
  306. package/tests/learning/pipeline.test.ts +176 -0
  307. package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
  308. package/tests/learning/usage-inference.test.ts +254 -0
  309. package/tests/mcp/playbook-server.test.ts +252 -0
  310. package/tests/memory/experience.test.ts +198 -0
  311. package/tests/memory/playbook.test.ts +338 -0
  312. package/tests/memory/provenance.test.ts +639 -0
  313. package/tests/memory/system.test.ts +325 -0
  314. package/tests/runtime/agent-manager.test.ts +512 -0
  315. package/tests/runtime/mock-backend.test.ts +248 -0
  316. package/tests/search/refinement-loop.test.ts +468 -0
  317. package/tests/search/refinement.test.ts +267 -0
  318. package/tests/search/router.test.ts +427 -0
  319. package/tests/surfacing/skill-library.test.ts +292 -0
  320. package/tests/types/outcome.test.ts +147 -0
  321. package/tests/types/step.test.ts +133 -0
  322. package/tests/types/task.test.ts +158 -0
  323. package/tests/types/trajectory.test.ts +253 -0
  324. package/tests/utils/similarity.test.ts +188 -0
  325. package/tests/utils/validation.test.ts +252 -0
  326. package/tsconfig.json +25 -0
  327. package/vitest.config.ts +22 -0
  328. package/dist/index.d.mts +0 -466
  329. package/dist/index.mjs +0 -478
@@ -0,0 +1,337 @@
1
+ /**
2
+ * BM25 Index
3
+ *
4
+ * A text-based similarity search index using BM25 (Best Match 25) algorithm.
5
+ * Used as a fallback when no embedding provider is configured.
6
+ *
7
+ * BM25 is a ranking function used in information retrieval that considers
8
+ * term frequency, document length, and inverse document frequency.
9
+ */
10
+
11
+ export type MemoryItemType = 'experience' | 'strategy' | 'concept' | 'skill';
12
+
13
+ interface Document {
14
+ id: string;
15
+ tokens: string[];
16
+ type: MemoryItemType;
17
+ content: string;
18
+ }
19
+
20
+ /**
21
+ * BM25 text similarity search index
22
+ */
23
+ export class BM25Index {
24
+ private documents: Map<string, Document>;
25
+ private idf: Map<string, number>;
26
+ private avgDocLength: number;
27
+ private k1: number;
28
+ private b: number;
29
+ private dirty: boolean;
30
+
31
+ constructor(options?: { k1?: number; b?: number }) {
32
+ this.documents = new Map();
33
+ this.idf = new Map();
34
+ this.avgDocLength = 0;
35
+ // k1 controls term frequency saturation (typical: 1.2-2.0)
36
+ this.k1 = options?.k1 ?? 1.5;
37
+ // b controls document length normalization (typical: 0.75)
38
+ this.b = options?.b ?? 0.75;
39
+ this.dirty = false;
40
+ }
41
+
42
+ /**
43
+ * Add a document to the index
44
+ */
45
+ add(id: string, content: string, type: MemoryItemType): void {
46
+ const tokens = this.tokenize(content);
47
+ this.documents.set(id, { id, tokens, type, content });
48
+ this.dirty = true;
49
+ }
50
+
51
+ /**
52
+ * Remove a document from the index
53
+ */
54
+ remove(id: string): boolean {
55
+ const removed = this.documents.delete(id);
56
+ if (removed) {
57
+ this.dirty = true;
58
+ }
59
+ return removed;
60
+ }
61
+
62
+ /**
63
+ * Check if a document exists
64
+ */
65
+ has(id: string): boolean {
66
+ return this.documents.has(id);
67
+ }
68
+
69
+ /**
70
+ * Get document count
71
+ */
72
+ get size(): number {
73
+ return this.documents.size;
74
+ }
75
+
76
+ /**
77
+ * Search for similar documents
78
+ */
79
+ search(
80
+ query: string,
81
+ options: {
82
+ k?: number;
83
+ type?: MemoryItemType;
84
+ threshold?: number;
85
+ } = {}
86
+ ): Array<{ id: string; score: number; content: string }> {
87
+ // Rebuild IDF if needed
88
+ if (this.dirty) {
89
+ this.rebuildIdf();
90
+ }
91
+
92
+ const queryTokens = this.tokenize(query);
93
+
94
+ // Return empty for empty query
95
+ if (queryTokens.length === 0) {
96
+ return [];
97
+ }
98
+
99
+ const results: Array<{ id: string; score: number; content: string }> = [];
100
+
101
+ for (const doc of this.documents.values()) {
102
+ // Skip if type filter doesn't match
103
+ if (options.type && doc.type !== options.type) continue;
104
+
105
+ const score = this.computeScore(queryTokens, doc.tokens);
106
+
107
+ // Normalize score to 0-1 range (approximate)
108
+ const normalizedScore = this.normalizeScore(score, queryTokens.length);
109
+
110
+ if (normalizedScore >= (options.threshold ?? 0)) {
111
+ results.push({
112
+ id: doc.id,
113
+ score: normalizedScore,
114
+ content: doc.content,
115
+ });
116
+ }
117
+ }
118
+
119
+ return results.sort((a, b) => b.score - a.score).slice(0, options.k ?? 10);
120
+ }
121
+
122
+ /**
123
+ * Tokenize text for indexing and search
124
+ */
125
+ private tokenize(text: string): string[] {
126
+ return text
127
+ .toLowerCase()
128
+ .replace(/[^\w\s]/g, ' ')
129
+ .split(/\s+/)
130
+ .filter((t) => t.length > 1)
131
+ .filter((t) => !STOP_WORDS.has(t));
132
+ }
133
+
134
+ /**
135
+ * Rebuild IDF values for all terms
136
+ */
137
+ private rebuildIdf(): void {
138
+ this.idf.clear();
139
+ const N = this.documents.size;
140
+
141
+ if (N === 0) {
142
+ this.avgDocLength = 0;
143
+ this.dirty = false;
144
+ return;
145
+ }
146
+
147
+ const termDocCounts = new Map<string, number>();
148
+ let totalLength = 0;
149
+
150
+ for (const doc of this.documents.values()) {
151
+ totalLength += doc.tokens.length;
152
+ const uniqueTerms = new Set(doc.tokens);
153
+ for (const term of uniqueTerms) {
154
+ termDocCounts.set(term, (termDocCounts.get(term) ?? 0) + 1);
155
+ }
156
+ }
157
+
158
+ this.avgDocLength = totalLength / N;
159
+
160
+ // Calculate IDF for each term
161
+ for (const [term, df] of termDocCounts) {
162
+ // IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
163
+ this.idf.set(term, Math.log((N - df + 0.5) / (df + 0.5) + 1));
164
+ }
165
+
166
+ this.dirty = false;
167
+ }
168
+
169
+ /**
170
+ * Compute BM25 score for a document
171
+ */
172
+ private computeScore(queryTokens: string[], docTokens: string[]): number {
173
+ const docLength = docTokens.length;
174
+
175
+ // Build term frequency map for document
176
+ const termFreq = new Map<string, number>();
177
+ for (const token of docTokens) {
178
+ termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
179
+ }
180
+
181
+ let score = 0;
182
+
183
+ for (const term of queryTokens) {
184
+ const tf = termFreq.get(term) ?? 0;
185
+ const idf = this.idf.get(term) ?? 0;
186
+
187
+ if (tf === 0 || idf === 0) continue;
188
+
189
+ // BM25 formula
190
+ const numerator = tf * (this.k1 + 1);
191
+ const denominator =
192
+ tf +
193
+ this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
194
+
195
+ score += idf * (numerator / denominator);
196
+ }
197
+
198
+ return score;
199
+ }
200
+
201
+ /**
202
+ * Normalize BM25 score to approximately 0-1 range
203
+ * This is an approximation since BM25 scores are unbounded
204
+ */
205
+ private normalizeScore(score: number, queryLength: number): number {
206
+ if (score <= 0 || queryLength === 0) return 0;
207
+
208
+ // Approximate maximum possible score per term
209
+ // Assuming perfect TF and high IDF
210
+ const maxScorePerTerm = 2.5; // Typical upper bound
211
+ const maxPossibleScore = queryLength * maxScorePerTerm;
212
+
213
+ // Sigmoid-like normalization
214
+ const normalized = score / (score + maxPossibleScore * 0.5);
215
+
216
+ return Math.min(1, Math.max(0, normalized));
217
+ }
218
+
219
+ /**
220
+ * Serialize index to JSON (for persistence)
221
+ */
222
+ toJSON(): {
223
+ documents: Array<{
224
+ id: string;
225
+ tokens: string[];
226
+ type: MemoryItemType;
227
+ content: string;
228
+ }>;
229
+ k1: number;
230
+ b: number;
231
+ } {
232
+ return {
233
+ documents: Array.from(this.documents.values()),
234
+ k1: this.k1,
235
+ b: this.b,
236
+ };
237
+ }
238
+
239
+ /**
240
+ * Restore index from JSON
241
+ */
242
+ static fromJSON(data: {
243
+ documents: Array<{
244
+ id: string;
245
+ tokens: string[];
246
+ type: MemoryItemType;
247
+ content: string;
248
+ }>;
249
+ k1: number;
250
+ b: number;
251
+ }): BM25Index {
252
+ const index = new BM25Index({ k1: data.k1, b: data.b });
253
+ for (const doc of data.documents) {
254
+ index.documents.set(doc.id, doc);
255
+ }
256
+ index.dirty = true;
257
+ return index;
258
+ }
259
+ }
260
+
261
+ /**
262
+ * Common English stop words to filter out
263
+ */
264
+ const STOP_WORDS = new Set([
265
+ 'a',
266
+ 'an',
267
+ 'and',
268
+ 'are',
269
+ 'as',
270
+ 'at',
271
+ 'be',
272
+ 'by',
273
+ 'for',
274
+ 'from',
275
+ 'has',
276
+ 'he',
277
+ 'in',
278
+ 'is',
279
+ 'it',
280
+ 'its',
281
+ 'of',
282
+ 'on',
283
+ 'or',
284
+ 'that',
285
+ 'the',
286
+ 'to',
287
+ 'was',
288
+ 'were',
289
+ 'will',
290
+ 'with',
291
+ 'this',
292
+ 'but',
293
+ 'they',
294
+ 'have',
295
+ 'had',
296
+ 'what',
297
+ 'when',
298
+ 'where',
299
+ 'who',
300
+ 'which',
301
+ 'why',
302
+ 'how',
303
+ 'all',
304
+ 'each',
305
+ 'every',
306
+ 'both',
307
+ 'few',
308
+ 'more',
309
+ 'most',
310
+ 'other',
311
+ 'some',
312
+ 'such',
313
+ 'no',
314
+ 'nor',
315
+ 'not',
316
+ 'only',
317
+ 'own',
318
+ 'same',
319
+ 'so',
320
+ 'than',
321
+ 'too',
322
+ 'very',
323
+ 'can',
324
+ 'just',
325
+ 'should',
326
+ 'now',
327
+ ]);
328
+
329
+ /**
330
+ * Create a new BM25 index
331
+ */
332
+ export function createBM25Index(options?: {
333
+ k1?: number;
334
+ b?: number;
335
+ }): BM25Index {
336
+ return new BM25Index(options);
337
+ }
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Embeddings Module
3
+ *
4
+ * Provides embedding generation, storage, and similarity search capabilities.
5
+ * Supports multiple embedding providers (OpenAI, Voyage, local) with automatic
6
+ * fallback to BM25 text search when no provider is configured.
7
+ */
8
+
9
+ // Provider interface and implementations
10
+ export {
11
+ type EmbeddingProvider,
12
+ type EmbeddingProviderConfig,
13
+ OpenAIEmbeddingProvider,
14
+ VoyageEmbeddingProvider,
15
+ LocalEmbeddingProvider,
16
+ createEmbeddingProvider,
17
+ } from './provider.js';
18
+
19
+ // BM25 text search (fallback)
20
+ export {
21
+ BM25Index,
22
+ createBM25Index,
23
+ type MemoryItemType,
24
+ } from './bm25.js';
25
+
26
+ // Vector store using sqlite-vec
27
+ export {
28
+ SqliteVectorStore,
29
+ createVectorStore,
30
+ type VectorSearchResult,
31
+ } from './vector-store.js';
32
+
33
+ // Embedding manager (main interface)
34
+ export {
35
+ EmbeddingManager,
36
+ createEmbeddingManager,
37
+ type EmbeddingManagerOptions,
38
+ type SearchResult,
39
+ } from './manager.js';
@@ -0,0 +1,288 @@
1
+ /**
2
+ * Embedding Manager
3
+ *
4
+ * Orchestrates embedding generation, storage, and search.
5
+ * Provides automatic fallback to BM25 when no embedding provider is configured.
6
+ */
7
+
8
+ import type { EmbeddingProvider } from './provider.js';
9
+ import { SqliteVectorStore } from './vector-store.js';
10
+ import { BM25Index, type MemoryItemType } from './bm25.js';
11
+
12
+ export { type MemoryItemType } from './bm25.js';
13
+
14
+ export interface SearchResult {
15
+ id: string;
16
+ score: number;
17
+ content: string;
18
+ }
19
+
20
+ export interface EmbeddingManagerOptions {
21
+ /** Embedding provider (null for BM25-only mode) */
22
+ provider?: EmbeddingProvider | null;
23
+ /** Path to sqlite-vec database file */
24
+ vectorStorePath?: string;
25
+ /** Embedding dimension (required if provider is set) */
26
+ dimension?: number;
27
+ /** BM25 parameters */
28
+ bm25?: {
29
+ k1?: number;
30
+ b?: number;
31
+ };
32
+ }
33
+
34
+ /**
35
+ * Manages embeddings with automatic fallback to BM25
36
+ */
37
+ export class EmbeddingManager {
38
+ private provider: EmbeddingProvider | null;
39
+ private vectorStore: SqliteVectorStore | null;
40
+ private bm25Index: BM25Index;
41
+ private useVectorSearch: boolean;
42
+
43
+ constructor(options: EmbeddingManagerOptions) {
44
+ this.provider = options.provider ?? null;
45
+ this.useVectorSearch = this.provider !== null && !!options.vectorStorePath;
46
+
47
+ // Initialize vector store if we have a provider and path
48
+ if (this.useVectorSearch && options.vectorStorePath) {
49
+ const dimension = options.dimension ?? this.provider!.dimension;
50
+ this.vectorStore = new SqliteVectorStore({
51
+ dbPath: options.vectorStorePath,
52
+ dimension,
53
+ });
54
+ } else {
55
+ this.vectorStore = null;
56
+ }
57
+
58
+ // Always maintain BM25 as fallback
59
+ this.bm25Index = new BM25Index(options.bm25);
60
+ }
61
+
62
+ /**
63
+ * Check if vector search is available
64
+ */
65
+ get hasVectorSearch(): boolean {
66
+ return this.useVectorSearch && this.vectorStore !== null;
67
+ }
68
+
69
+ /**
70
+ * Get the embedding dimension (if available)
71
+ */
72
+ get dimension(): number | null {
73
+ return this.provider?.dimension ?? null;
74
+ }
75
+
76
+ /**
77
+ * Store an item with its embedding
78
+ * Returns the embedding if generated, undefined otherwise
79
+ */
80
+ async store(
81
+ id: string,
82
+ content: string,
83
+ type: MemoryItemType
84
+ ): Promise<number[] | undefined> {
85
+ // Always add to BM25 index for fallback
86
+ this.bm25Index.add(id, content, type);
87
+
88
+ // Generate and store embedding if provider available
89
+ if (this.useVectorSearch && this.provider && this.vectorStore) {
90
+ try {
91
+ const embedding = await this.provider.embed(content);
92
+ this.vectorStore.insert(id, embedding, type, content);
93
+ return embedding;
94
+ } catch (error) {
95
+ console.error(`Failed to generate embedding for ${id}:`, error);
96
+ // Fall through to return undefined - BM25 is still available
97
+ }
98
+ }
99
+
100
+ return undefined;
101
+ }
102
+
103
+ /**
104
+ * Store multiple items with embeddings (batch)
105
+ * More efficient than calling store() multiple times
106
+ */
107
+ async storeBatch(
108
+ items: Array<{ id: string; content: string; type: MemoryItemType }>
109
+ ): Promise<Map<string, number[] | undefined>> {
110
+ const results = new Map<string, number[] | undefined>();
111
+
112
+ // Add all to BM25 index
113
+ for (const item of items) {
114
+ this.bm25Index.add(item.id, item.content, item.type);
115
+ }
116
+
117
+ // Batch embed if provider available
118
+ if (this.useVectorSearch && this.provider && this.vectorStore) {
119
+ try {
120
+ const contents = items.map((i) => i.content);
121
+ const embeddings = await this.provider.embedBatch(contents);
122
+
123
+ const batchItems = items.map((item, i) => ({
124
+ id: item.id,
125
+ embedding: embeddings[i],
126
+ type: item.type,
127
+ content: item.content,
128
+ }));
129
+
130
+ this.vectorStore.insertBatch(batchItems);
131
+
132
+ for (let i = 0; i < items.length; i++) {
133
+ results.set(items[i].id, embeddings[i]);
134
+ }
135
+ } catch (error) {
136
+ console.error('Failed to generate batch embeddings:', error);
137
+ // Fall through - items are still in BM25 index
138
+ for (const item of items) {
139
+ results.set(item.id, undefined);
140
+ }
141
+ }
142
+ } else {
143
+ for (const item of items) {
144
+ results.set(item.id, undefined);
145
+ }
146
+ }
147
+
148
+ return results;
149
+ }
150
+
151
+ /**
152
+ * Search for similar items
153
+ * Uses vector search if available, falls back to BM25
154
+ */
155
+ async search(
156
+ query: string,
157
+ options: {
158
+ k?: number;
159
+ type?: MemoryItemType;
160
+ threshold?: number;
161
+ } = {}
162
+ ): Promise<SearchResult[]> {
163
+ // Use vector search if available
164
+ if (this.useVectorSearch && this.provider && this.vectorStore) {
165
+ try {
166
+ const queryEmbedding = await this.provider.embed(query);
167
+ const results = this.vectorStore.search(queryEmbedding, options);
168
+ return results.map((r) => ({
169
+ id: r.id,
170
+ score: r.score,
171
+ content: r.content,
172
+ }));
173
+ } catch (error) {
174
+ console.error('Vector search failed, falling back to BM25:', error);
175
+ // Fall through to BM25
176
+ }
177
+ }
178
+
179
+ // Fallback to BM25
180
+ return this.bm25Index.search(query, options);
181
+ }
182
+
183
+ /**
184
+ * Search using a pre-computed embedding
185
+ * Only works if vector store is available
186
+ */
187
+ searchByEmbedding(
188
+ embedding: number[],
189
+ options: {
190
+ k?: number;
191
+ type?: MemoryItemType;
192
+ threshold?: number;
193
+ } = {}
194
+ ): SearchResult[] | null {
195
+ if (!this.vectorStore) {
196
+ return null;
197
+ }
198
+
199
+ const results = this.vectorStore.search(embedding, options);
200
+ return results.map((r) => ({
201
+ id: r.id,
202
+ score: r.score,
203
+ content: r.content,
204
+ }));
205
+ }
206
+
207
+ /**
208
+ * Generate an embedding without storing it
209
+ */
210
+ async embed(text: string): Promise<number[] | null> {
211
+ if (!this.provider) {
212
+ return null;
213
+ }
214
+
215
+ try {
216
+ return await this.provider.embed(text);
217
+ } catch (error) {
218
+ console.error('Failed to generate embedding:', error);
219
+ return null;
220
+ }
221
+ }
222
+
223
+ /**
224
+ * Generate embeddings for multiple texts without storing
225
+ */
226
+ async embedBatch(texts: string[]): Promise<number[][] | null> {
227
+ if (!this.provider) {
228
+ return null;
229
+ }
230
+
231
+ try {
232
+ return await this.provider.embedBatch(texts);
233
+ } catch (error) {
234
+ console.error('Failed to generate batch embeddings:', error);
235
+ return null;
236
+ }
237
+ }
238
+
239
+ /**
240
+ * Remove an item from all indexes
241
+ */
242
+ remove(id: string): void {
243
+ this.bm25Index.remove(id);
244
+ this.vectorStore?.delete(id);
245
+ }
246
+
247
+ /**
248
+ * Check if an item exists
249
+ */
250
+ has(id: string): boolean {
251
+ return this.bm25Index.has(id);
252
+ }
253
+
254
+ /**
255
+ * Get statistics about the indexes
256
+ */
257
+ getStats(): {
258
+ bm25Count: number;
259
+ vectorCount: number | null;
260
+ hasVectorSearch: boolean;
261
+ dimension: number | null;
262
+ providerName: string | null;
263
+ } {
264
+ return {
265
+ bm25Count: this.bm25Index.size,
266
+ vectorCount: this.vectorStore?.count ?? null,
267
+ hasVectorSearch: this.hasVectorSearch,
268
+ dimension: this.dimension,
269
+ providerName: this.provider?.name ?? null,
270
+ };
271
+ }
272
+
273
+ /**
274
+ * Close all resources
275
+ */
276
+ close(): void {
277
+ this.vectorStore?.close();
278
+ }
279
+ }
280
+
281
+ /**
282
+ * Create an embedding manager
283
+ */
284
+ export function createEmbeddingManager(
285
+ options: EmbeddingManagerOptions
286
+ ): EmbeddingManager {
287
+ return new EmbeddingManager(options);
288
+ }