cognitive-core 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +302 -116
- package/SKILL.md +193 -0
- package/dist/agents/index.d.ts +3 -0
- package/dist/agents/index.d.ts.map +1 -0
- package/dist/agents/index.js +5 -0
- package/dist/agents/index.js.map +1 -0
- package/dist/agents/mock-provider.d.ts +23 -0
- package/dist/agents/mock-provider.d.ts.map +1 -0
- package/dist/agents/mock-provider.js +71 -0
- package/dist/agents/mock-provider.js.map +1 -0
- package/dist/agents/types.d.ts +98 -0
- package/dist/agents/types.d.ts.map +1 -0
- package/dist/agents/types.js +44 -0
- package/dist/agents/types.js.map +1 -0
- package/dist/atlas.d.ts +196 -0
- package/dist/atlas.d.ts.map +1 -0
- package/dist/atlas.js +373 -0
- package/dist/atlas.js.map +1 -0
- package/dist/bin/cognitive-core.d.ts +18 -0
- package/dist/bin/cognitive-core.d.ts.map +1 -0
- package/dist/bin/cognitive-core.js +419 -0
- package/dist/bin/cognitive-core.js.map +1 -0
- package/dist/embeddings/bm25.d.ts +104 -0
- package/dist/embeddings/bm25.d.ts.map +1 -0
- package/dist/embeddings/bm25.js +264 -0
- package/dist/embeddings/bm25.js.map +1 -0
- package/dist/embeddings/index.d.ts +12 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +16 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/manager.d.ts +112 -0
- package/dist/embeddings/manager.d.ts.map +1 -0
- package/dist/embeddings/manager.js +215 -0
- package/dist/embeddings/manager.js.map +1 -0
- package/dist/embeddings/provider.d.ts +101 -0
- package/dist/embeddings/provider.d.ts.map +1 -0
- package/dist/embeddings/provider.js +232 -0
- package/dist/embeddings/provider.js.map +1 -0
- package/dist/embeddings/vector-store.d.ts +101 -0
- package/dist/embeddings/vector-store.d.ts.map +1 -0
- package/dist/embeddings/vector-store.js +256 -0
- package/dist/embeddings/vector-store.js.map +1 -0
- package/dist/factory.d.ts +193 -0
- package/dist/factory.d.ts.map +1 -0
- package/dist/factory.js +109 -0
- package/dist/factory.js.map +1 -0
- package/dist/index.d.ts +30 -453
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +84 -509
- package/dist/index.js.map +1 -0
- package/dist/learning/analyzer.d.ts +110 -0
- package/dist/learning/analyzer.d.ts.map +1 -0
- package/dist/learning/analyzer.js +213 -0
- package/dist/learning/analyzer.js.map +1 -0
- package/dist/learning/effectiveness.d.ts +158 -0
- package/dist/learning/effectiveness.d.ts.map +1 -0
- package/dist/learning/effectiveness.js +251 -0
- package/dist/learning/effectiveness.js.map +1 -0
- package/dist/learning/index.d.ts +8 -0
- package/dist/learning/index.d.ts.map +1 -0
- package/dist/learning/index.js +11 -0
- package/dist/learning/index.js.map +1 -0
- package/dist/learning/llm-extractor.d.ts +88 -0
- package/dist/learning/llm-extractor.d.ts.map +1 -0
- package/dist/learning/llm-extractor.js +372 -0
- package/dist/learning/llm-extractor.js.map +1 -0
- package/dist/learning/meta-learner.d.ts +80 -0
- package/dist/learning/meta-learner.d.ts.map +1 -0
- package/dist/learning/meta-learner.js +355 -0
- package/dist/learning/meta-learner.js.map +1 -0
- package/dist/learning/pipeline.d.ts +65 -0
- package/dist/learning/pipeline.d.ts.map +1 -0
- package/dist/learning/pipeline.js +170 -0
- package/dist/learning/pipeline.js.map +1 -0
- package/dist/learning/playbook-extractor.d.ts +113 -0
- package/dist/learning/playbook-extractor.d.ts.map +1 -0
- package/dist/learning/playbook-extractor.js +523 -0
- package/dist/learning/playbook-extractor.js.map +1 -0
- package/dist/learning/usage-inference.d.ts +82 -0
- package/dist/learning/usage-inference.d.ts.map +1 -0
- package/dist/learning/usage-inference.js +261 -0
- package/dist/learning/usage-inference.js.map +1 -0
- package/dist/mcp/index.d.ts +6 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +6 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/playbook-server.d.ts +120 -0
- package/dist/mcp/playbook-server.d.ts.map +1 -0
- package/dist/mcp/playbook-server.js +427 -0
- package/dist/mcp/playbook-server.js.map +1 -0
- package/dist/memory/curated-loader.d.ts +62 -0
- package/dist/memory/curated-loader.d.ts.map +1 -0
- package/dist/memory/curated-loader.js +106 -0
- package/dist/memory/curated-loader.js.map +1 -0
- package/dist/memory/experience.d.ts +122 -0
- package/dist/memory/experience.d.ts.map +1 -0
- package/dist/memory/experience.js +392 -0
- package/dist/memory/experience.js.map +1 -0
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -0
- package/dist/memory/index.js +9 -0
- package/dist/memory/index.js.map +1 -0
- package/dist/memory/meta.d.ts +90 -0
- package/dist/memory/meta.d.ts.map +1 -0
- package/dist/memory/meta.js +362 -0
- package/dist/memory/meta.js.map +1 -0
- package/dist/memory/playbook.d.ts +133 -0
- package/dist/memory/playbook.d.ts.map +1 -0
- package/dist/memory/playbook.js +357 -0
- package/dist/memory/playbook.js.map +1 -0
- package/dist/memory/system.d.ts +167 -0
- package/dist/memory/system.d.ts.map +1 -0
- package/dist/memory/system.js +383 -0
- package/dist/memory/system.js.map +1 -0
- package/dist/runtime/backends/acp.d.ts +67 -0
- package/dist/runtime/backends/acp.d.ts.map +1 -0
- package/dist/runtime/backends/acp.js +290 -0
- package/dist/runtime/backends/acp.js.map +1 -0
- package/dist/runtime/backends/index.d.ts +5 -0
- package/dist/runtime/backends/index.d.ts.map +1 -0
- package/dist/runtime/backends/index.js +6 -0
- package/dist/runtime/backends/index.js.map +1 -0
- package/dist/runtime/backends/mock.d.ts +67 -0
- package/dist/runtime/backends/mock.d.ts.map +1 -0
- package/dist/runtime/backends/mock.js +153 -0
- package/dist/runtime/backends/mock.js.map +1 -0
- package/dist/runtime/backends/subprocess.d.ts +56 -0
- package/dist/runtime/backends/subprocess.d.ts.map +1 -0
- package/dist/runtime/backends/subprocess.js +260 -0
- package/dist/runtime/backends/subprocess.js.map +1 -0
- package/dist/runtime/flows/learning.d.ts +73 -0
- package/dist/runtime/flows/learning.d.ts.map +1 -0
- package/dist/runtime/flows/learning.js +116 -0
- package/dist/runtime/flows/learning.js.map +1 -0
- package/dist/runtime/flows/validation.d.ts +122 -0
- package/dist/runtime/flows/validation.d.ts.map +1 -0
- package/dist/runtime/flows/validation.js +223 -0
- package/dist/runtime/flows/validation.js.map +1 -0
- package/dist/runtime/index.d.ts +6 -0
- package/dist/runtime/index.d.ts.map +1 -0
- package/dist/runtime/index.js +8 -0
- package/dist/runtime/index.js.map +1 -0
- package/dist/runtime/manager.d.ts +116 -0
- package/dist/runtime/manager.d.ts.map +1 -0
- package/dist/runtime/manager.js +416 -0
- package/dist/runtime/manager.js.map +1 -0
- package/dist/runtime/types.d.ts +138 -0
- package/dist/runtime/types.d.ts.map +1 -0
- package/dist/runtime/types.js +2 -0
- package/dist/runtime/types.js.map +1 -0
- package/dist/search/evaluator.d.ts +102 -0
- package/dist/search/evaluator.d.ts.map +1 -0
- package/dist/search/evaluator.js +352 -0
- package/dist/search/evaluator.js.map +1 -0
- package/dist/search/index.d.ts +7 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +11 -0
- package/dist/search/index.js.map +1 -0
- package/dist/search/refinement-loop.d.ts +73 -0
- package/dist/search/refinement-loop.d.ts.map +1 -0
- package/dist/search/refinement-loop.js +245 -0
- package/dist/search/refinement-loop.js.map +1 -0
- package/dist/search/refinement-types.d.ts +154 -0
- package/dist/search/refinement-types.d.ts.map +1 -0
- package/dist/search/refinement-types.js +99 -0
- package/dist/search/refinement-types.js.map +1 -0
- package/dist/search/router.d.ts +61 -0
- package/dist/search/router.d.ts.map +1 -0
- package/dist/search/router.js +197 -0
- package/dist/search/router.js.map +1 -0
- package/dist/search/solver.d.ts +75 -0
- package/dist/search/solver.d.ts.map +1 -0
- package/dist/search/solver.js +216 -0
- package/dist/search/solver.js.map +1 -0
- package/dist/search/verification-runner.d.ts +125 -0
- package/dist/search/verification-runner.d.ts.map +1 -0
- package/dist/search/verification-runner.js +440 -0
- package/dist/search/verification-runner.js.map +1 -0
- package/dist/surfacing/index.d.ts +2 -0
- package/dist/surfacing/index.d.ts.map +1 -0
- package/dist/surfacing/index.js +2 -0
- package/dist/surfacing/index.js.map +1 -0
- package/dist/surfacing/skill-library.d.ts +158 -0
- package/dist/surfacing/skill-library.d.ts.map +1 -0
- package/dist/surfacing/skill-library.js +429 -0
- package/dist/surfacing/skill-library.js.map +1 -0
- package/dist/types/config.d.ts +1113 -0
- package/dist/types/config.d.ts.map +1 -0
- package/dist/types/config.js +274 -0
- package/dist/types/config.js.map +1 -0
- package/dist/types/index.d.ts +9 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +14 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/memory.d.ts +339 -0
- package/dist/types/memory.d.ts.map +1 -0
- package/dist/types/memory.js +207 -0
- package/dist/types/memory.js.map +1 -0
- package/dist/types/meta.d.ts +146 -0
- package/dist/types/meta.d.ts.map +1 -0
- package/dist/types/meta.js +51 -0
- package/dist/types/meta.js.map +1 -0
- package/dist/types/outcome.d.ts +42 -0
- package/dist/types/outcome.d.ts.map +1 -0
- package/dist/types/outcome.js +50 -0
- package/dist/types/outcome.js.map +1 -0
- package/dist/types/playbook.d.ts +119 -0
- package/dist/types/playbook.d.ts.map +1 -0
- package/dist/types/playbook.js +71 -0
- package/dist/types/playbook.js.map +1 -0
- package/dist/types/step.d.ts +44 -0
- package/dist/types/step.d.ts.map +1 -0
- package/dist/types/step.js +32 -0
- package/dist/types/step.js.map +1 -0
- package/dist/types/task.d.ts +91 -0
- package/dist/types/task.d.ts.map +1 -0
- package/dist/types/task.js +39 -0
- package/dist/types/task.js.map +1 -0
- package/dist/types/trajectory.d.ts +221 -0
- package/dist/types/trajectory.d.ts.map +1 -0
- package/dist/types/trajectory.js +60 -0
- package/dist/types/trajectory.js.map +1 -0
- package/dist/utils/index.d.ts +4 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +4 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/similarity.d.ts +31 -0
- package/dist/utils/similarity.d.ts.map +1 -0
- package/dist/utils/similarity.js +107 -0
- package/dist/utils/similarity.js.map +1 -0
- package/dist/utils/storage.d.ts +106 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +203 -0
- package/dist/utils/storage.js.map +1 -0
- package/dist/utils/validation.d.ts +129 -0
- package/dist/utils/validation.d.ts.map +1 -0
- package/dist/utils/validation.js +171 -0
- package/dist/utils/validation.js.map +1 -0
- package/package.json +50 -34
- package/scripts/migrate-to-playbooks.ts +307 -0
- package/src/agents/index.ts +14 -0
- package/src/agents/mock-provider.ts +93 -0
- package/src/agents/types.ts +137 -0
- package/src/atlas.ts +560 -0
- package/src/bin/cognitive-core.ts +470 -0
- package/src/embeddings/bm25.ts +337 -0
- package/src/embeddings/index.ts +39 -0
- package/src/embeddings/manager.ts +288 -0
- package/src/embeddings/provider.ts +311 -0
- package/src/embeddings/vector-store.ts +353 -0
- package/src/factory.ts +263 -0
- package/src/index.ts +246 -0
- package/src/learning/analyzer.ts +335 -0
- package/src/learning/effectiveness.ts +428 -0
- package/src/learning/index.ts +58 -0
- package/src/learning/llm-extractor.ts +542 -0
- package/src/learning/meta-learner.ts +516 -0
- package/src/learning/pipeline.ts +244 -0
- package/src/learning/playbook-extractor.ts +702 -0
- package/src/learning/usage-inference.ts +372 -0
- package/src/mcp/index.ts +12 -0
- package/src/mcp/playbook-server.ts +565 -0
- package/src/memory/curated-loader.ts +160 -0
- package/src/memory/experience.ts +515 -0
- package/src/memory/index.ts +27 -0
- package/src/memory/meta.ts +506 -0
- package/src/memory/playbook.ts +493 -0
- package/src/memory/system.ts +551 -0
- package/src/runtime/backends/acp.ts +378 -0
- package/src/runtime/backends/index.ts +24 -0
- package/src/runtime/backends/mock.ts +218 -0
- package/src/runtime/backends/subprocess.ts +356 -0
- package/src/runtime/flows/learning.ts +183 -0
- package/src/runtime/flows/validation.ts +381 -0
- package/src/runtime/index.ts +53 -0
- package/src/runtime/manager.ts +541 -0
- package/src/runtime/types.ts +157 -0
- package/src/search/evaluator.ts +474 -0
- package/src/search/index.ts +59 -0
- package/src/search/refinement-loop.ts +363 -0
- package/src/search/refinement-types.ts +159 -0
- package/src/search/router.ts +261 -0
- package/src/search/solver.ts +303 -0
- package/src/search/verification-runner.ts +570 -0
- package/src/surfacing/index.ts +6 -0
- package/src/surfacing/skill-library.ts +594 -0
- package/src/types/config.ts +333 -0
- package/src/types/index.ts +130 -0
- package/src/types/memory.ts +270 -0
- package/src/types/meta.ts +218 -0
- package/src/types/outcome.ts +66 -0
- package/src/types/playbook.ts +196 -0
- package/src/types/step.ts +40 -0
- package/src/types/task.ts +52 -0
- package/src/types/trajectory.ts +80 -0
- package/src/utils/index.ts +38 -0
- package/src/utils/similarity.ts +139 -0
- package/src/utils/storage.ts +249 -0
- package/src/utils/validation.ts +286 -0
- package/tests/embeddings/bm25.test.ts +130 -0
- package/tests/embeddings/manager.test.ts +205 -0
- package/tests/integration/atlas.test.ts +266 -0
- package/tests/integration/e2e.test.ts +929 -0
- package/tests/learning/analyzer.test.ts +426 -0
- package/tests/learning/effectiveness.test.ts +542 -0
- package/tests/learning/pipeline.test.ts +176 -0
- package/tests/learning/playbook-extractor-provenance.test.ts +114 -0
- package/tests/learning/usage-inference.test.ts +254 -0
- package/tests/mcp/playbook-server.test.ts +252 -0
- package/tests/memory/experience.test.ts +198 -0
- package/tests/memory/playbook.test.ts +338 -0
- package/tests/memory/provenance.test.ts +639 -0
- package/tests/memory/system.test.ts +325 -0
- package/tests/runtime/agent-manager.test.ts +512 -0
- package/tests/runtime/mock-backend.test.ts +248 -0
- package/tests/search/refinement-loop.test.ts +468 -0
- package/tests/search/refinement.test.ts +267 -0
- package/tests/search/router.test.ts +427 -0
- package/tests/surfacing/skill-library.test.ts +292 -0
- package/tests/types/outcome.test.ts +147 -0
- package/tests/types/step.test.ts +133 -0
- package/tests/types/task.test.ts +158 -0
- package/tests/types/trajectory.test.ts +253 -0
- package/tests/utils/similarity.test.ts +188 -0
- package/tests/utils/validation.test.ts +252 -0
- package/tsconfig.json +25 -0
- package/vitest.config.ts +22 -0
- package/dist/index.d.mts +0 -466
- package/dist/index.mjs +0 -478
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BM25 Index
|
|
3
|
+
*
|
|
4
|
+
* A text-based similarity search index using BM25 (Best Match 25) algorithm.
|
|
5
|
+
* Used as a fallback when no embedding provider is configured.
|
|
6
|
+
*
|
|
7
|
+
* BM25 is a ranking function used in information retrieval that considers
|
|
8
|
+
* term frequency, document length, and inverse document frequency.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
export type MemoryItemType = 'experience' | 'strategy' | 'concept' | 'skill';
|
|
12
|
+
|
|
13
|
+
interface Document {
|
|
14
|
+
id: string;
|
|
15
|
+
tokens: string[];
|
|
16
|
+
type: MemoryItemType;
|
|
17
|
+
content: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* BM25 text similarity search index
|
|
22
|
+
*/
|
|
23
|
+
export class BM25Index {
|
|
24
|
+
private documents: Map<string, Document>;
|
|
25
|
+
private idf: Map<string, number>;
|
|
26
|
+
private avgDocLength: number;
|
|
27
|
+
private k1: number;
|
|
28
|
+
private b: number;
|
|
29
|
+
private dirty: boolean;
|
|
30
|
+
|
|
31
|
+
constructor(options?: { k1?: number; b?: number }) {
|
|
32
|
+
this.documents = new Map();
|
|
33
|
+
this.idf = new Map();
|
|
34
|
+
this.avgDocLength = 0;
|
|
35
|
+
// k1 controls term frequency saturation (typical: 1.2-2.0)
|
|
36
|
+
this.k1 = options?.k1 ?? 1.5;
|
|
37
|
+
// b controls document length normalization (typical: 0.75)
|
|
38
|
+
this.b = options?.b ?? 0.75;
|
|
39
|
+
this.dirty = false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Add a document to the index
|
|
44
|
+
*/
|
|
45
|
+
add(id: string, content: string, type: MemoryItemType): void {
|
|
46
|
+
const tokens = this.tokenize(content);
|
|
47
|
+
this.documents.set(id, { id, tokens, type, content });
|
|
48
|
+
this.dirty = true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Remove a document from the index
|
|
53
|
+
*/
|
|
54
|
+
remove(id: string): boolean {
|
|
55
|
+
const removed = this.documents.delete(id);
|
|
56
|
+
if (removed) {
|
|
57
|
+
this.dirty = true;
|
|
58
|
+
}
|
|
59
|
+
return removed;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Check if a document exists
|
|
64
|
+
*/
|
|
65
|
+
has(id: string): boolean {
|
|
66
|
+
return this.documents.has(id);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Get document count
|
|
71
|
+
*/
|
|
72
|
+
get size(): number {
|
|
73
|
+
return this.documents.size;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Search for similar documents
|
|
78
|
+
*/
|
|
79
|
+
search(
|
|
80
|
+
query: string,
|
|
81
|
+
options: {
|
|
82
|
+
k?: number;
|
|
83
|
+
type?: MemoryItemType;
|
|
84
|
+
threshold?: number;
|
|
85
|
+
} = {}
|
|
86
|
+
): Array<{ id: string; score: number; content: string }> {
|
|
87
|
+
// Rebuild IDF if needed
|
|
88
|
+
if (this.dirty) {
|
|
89
|
+
this.rebuildIdf();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const queryTokens = this.tokenize(query);
|
|
93
|
+
|
|
94
|
+
// Return empty for empty query
|
|
95
|
+
if (queryTokens.length === 0) {
|
|
96
|
+
return [];
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const results: Array<{ id: string; score: number; content: string }> = [];
|
|
100
|
+
|
|
101
|
+
for (const doc of this.documents.values()) {
|
|
102
|
+
// Skip if type filter doesn't match
|
|
103
|
+
if (options.type && doc.type !== options.type) continue;
|
|
104
|
+
|
|
105
|
+
const score = this.computeScore(queryTokens, doc.tokens);
|
|
106
|
+
|
|
107
|
+
// Normalize score to 0-1 range (approximate)
|
|
108
|
+
const normalizedScore = this.normalizeScore(score, queryTokens.length);
|
|
109
|
+
|
|
110
|
+
if (normalizedScore >= (options.threshold ?? 0)) {
|
|
111
|
+
results.push({
|
|
112
|
+
id: doc.id,
|
|
113
|
+
score: normalizedScore,
|
|
114
|
+
content: doc.content,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return results.sort((a, b) => b.score - a.score).slice(0, options.k ?? 10);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Tokenize text for indexing and search
|
|
124
|
+
*/
|
|
125
|
+
private tokenize(text: string): string[] {
|
|
126
|
+
return text
|
|
127
|
+
.toLowerCase()
|
|
128
|
+
.replace(/[^\w\s]/g, ' ')
|
|
129
|
+
.split(/\s+/)
|
|
130
|
+
.filter((t) => t.length > 1)
|
|
131
|
+
.filter((t) => !STOP_WORDS.has(t));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Rebuild IDF values for all terms
|
|
136
|
+
*/
|
|
137
|
+
private rebuildIdf(): void {
|
|
138
|
+
this.idf.clear();
|
|
139
|
+
const N = this.documents.size;
|
|
140
|
+
|
|
141
|
+
if (N === 0) {
|
|
142
|
+
this.avgDocLength = 0;
|
|
143
|
+
this.dirty = false;
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const termDocCounts = new Map<string, number>();
|
|
148
|
+
let totalLength = 0;
|
|
149
|
+
|
|
150
|
+
for (const doc of this.documents.values()) {
|
|
151
|
+
totalLength += doc.tokens.length;
|
|
152
|
+
const uniqueTerms = new Set(doc.tokens);
|
|
153
|
+
for (const term of uniqueTerms) {
|
|
154
|
+
termDocCounts.set(term, (termDocCounts.get(term) ?? 0) + 1);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
this.avgDocLength = totalLength / N;
|
|
159
|
+
|
|
160
|
+
// Calculate IDF for each term
|
|
161
|
+
for (const [term, df] of termDocCounts) {
|
|
162
|
+
// IDF formula: log((N - df + 0.5) / (df + 0.5) + 1)
|
|
163
|
+
this.idf.set(term, Math.log((N - df + 0.5) / (df + 0.5) + 1));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
this.dirty = false;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Compute BM25 score for a document
|
|
171
|
+
*/
|
|
172
|
+
private computeScore(queryTokens: string[], docTokens: string[]): number {
|
|
173
|
+
const docLength = docTokens.length;
|
|
174
|
+
|
|
175
|
+
// Build term frequency map for document
|
|
176
|
+
const termFreq = new Map<string, number>();
|
|
177
|
+
for (const token of docTokens) {
|
|
178
|
+
termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
let score = 0;
|
|
182
|
+
|
|
183
|
+
for (const term of queryTokens) {
|
|
184
|
+
const tf = termFreq.get(term) ?? 0;
|
|
185
|
+
const idf = this.idf.get(term) ?? 0;
|
|
186
|
+
|
|
187
|
+
if (tf === 0 || idf === 0) continue;
|
|
188
|
+
|
|
189
|
+
// BM25 formula
|
|
190
|
+
const numerator = tf * (this.k1 + 1);
|
|
191
|
+
const denominator =
|
|
192
|
+
tf +
|
|
193
|
+
this.k1 * (1 - this.b + this.b * (docLength / this.avgDocLength));
|
|
194
|
+
|
|
195
|
+
score += idf * (numerator / denominator);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return score;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Normalize BM25 score to approximately 0-1 range
|
|
203
|
+
* This is an approximation since BM25 scores are unbounded
|
|
204
|
+
*/
|
|
205
|
+
private normalizeScore(score: number, queryLength: number): number {
|
|
206
|
+
if (score <= 0 || queryLength === 0) return 0;
|
|
207
|
+
|
|
208
|
+
// Approximate maximum possible score per term
|
|
209
|
+
// Assuming perfect TF and high IDF
|
|
210
|
+
const maxScorePerTerm = 2.5; // Typical upper bound
|
|
211
|
+
const maxPossibleScore = queryLength * maxScorePerTerm;
|
|
212
|
+
|
|
213
|
+
// Sigmoid-like normalization
|
|
214
|
+
const normalized = score / (score + maxPossibleScore * 0.5);
|
|
215
|
+
|
|
216
|
+
return Math.min(1, Math.max(0, normalized));
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Serialize index to JSON (for persistence)
|
|
221
|
+
*/
|
|
222
|
+
toJSON(): {
|
|
223
|
+
documents: Array<{
|
|
224
|
+
id: string;
|
|
225
|
+
tokens: string[];
|
|
226
|
+
type: MemoryItemType;
|
|
227
|
+
content: string;
|
|
228
|
+
}>;
|
|
229
|
+
k1: number;
|
|
230
|
+
b: number;
|
|
231
|
+
} {
|
|
232
|
+
return {
|
|
233
|
+
documents: Array.from(this.documents.values()),
|
|
234
|
+
k1: this.k1,
|
|
235
|
+
b: this.b,
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Restore index from JSON
|
|
241
|
+
*/
|
|
242
|
+
static fromJSON(data: {
|
|
243
|
+
documents: Array<{
|
|
244
|
+
id: string;
|
|
245
|
+
tokens: string[];
|
|
246
|
+
type: MemoryItemType;
|
|
247
|
+
content: string;
|
|
248
|
+
}>;
|
|
249
|
+
k1: number;
|
|
250
|
+
b: number;
|
|
251
|
+
}): BM25Index {
|
|
252
|
+
const index = new BM25Index({ k1: data.k1, b: data.b });
|
|
253
|
+
for (const doc of data.documents) {
|
|
254
|
+
index.documents.set(doc.id, doc);
|
|
255
|
+
}
|
|
256
|
+
index.dirty = true;
|
|
257
|
+
return index;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Common English stop words to filter out
|
|
263
|
+
*/
|
|
264
|
+
const STOP_WORDS = new Set([
|
|
265
|
+
'a',
|
|
266
|
+
'an',
|
|
267
|
+
'and',
|
|
268
|
+
'are',
|
|
269
|
+
'as',
|
|
270
|
+
'at',
|
|
271
|
+
'be',
|
|
272
|
+
'by',
|
|
273
|
+
'for',
|
|
274
|
+
'from',
|
|
275
|
+
'has',
|
|
276
|
+
'he',
|
|
277
|
+
'in',
|
|
278
|
+
'is',
|
|
279
|
+
'it',
|
|
280
|
+
'its',
|
|
281
|
+
'of',
|
|
282
|
+
'on',
|
|
283
|
+
'or',
|
|
284
|
+
'that',
|
|
285
|
+
'the',
|
|
286
|
+
'to',
|
|
287
|
+
'was',
|
|
288
|
+
'were',
|
|
289
|
+
'will',
|
|
290
|
+
'with',
|
|
291
|
+
'this',
|
|
292
|
+
'but',
|
|
293
|
+
'they',
|
|
294
|
+
'have',
|
|
295
|
+
'had',
|
|
296
|
+
'what',
|
|
297
|
+
'when',
|
|
298
|
+
'where',
|
|
299
|
+
'who',
|
|
300
|
+
'which',
|
|
301
|
+
'why',
|
|
302
|
+
'how',
|
|
303
|
+
'all',
|
|
304
|
+
'each',
|
|
305
|
+
'every',
|
|
306
|
+
'both',
|
|
307
|
+
'few',
|
|
308
|
+
'more',
|
|
309
|
+
'most',
|
|
310
|
+
'other',
|
|
311
|
+
'some',
|
|
312
|
+
'such',
|
|
313
|
+
'no',
|
|
314
|
+
'nor',
|
|
315
|
+
'not',
|
|
316
|
+
'only',
|
|
317
|
+
'own',
|
|
318
|
+
'same',
|
|
319
|
+
'so',
|
|
320
|
+
'than',
|
|
321
|
+
'too',
|
|
322
|
+
'very',
|
|
323
|
+
'can',
|
|
324
|
+
'just',
|
|
325
|
+
'should',
|
|
326
|
+
'now',
|
|
327
|
+
]);
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Create a new BM25 index
|
|
331
|
+
*/
|
|
332
|
+
export function createBM25Index(options?: {
|
|
333
|
+
k1?: number;
|
|
334
|
+
b?: number;
|
|
335
|
+
}): BM25Index {
|
|
336
|
+
return new BM25Index(options);
|
|
337
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embeddings Module
|
|
3
|
+
*
|
|
4
|
+
* Provides embedding generation, storage, and similarity search capabilities.
|
|
5
|
+
* Supports multiple embedding providers (OpenAI, Voyage, local) with automatic
|
|
6
|
+
* fallback to BM25 text search when no provider is configured.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
// Provider interface and implementations
|
|
10
|
+
export {
|
|
11
|
+
type EmbeddingProvider,
|
|
12
|
+
type EmbeddingProviderConfig,
|
|
13
|
+
OpenAIEmbeddingProvider,
|
|
14
|
+
VoyageEmbeddingProvider,
|
|
15
|
+
LocalEmbeddingProvider,
|
|
16
|
+
createEmbeddingProvider,
|
|
17
|
+
} from './provider.js';
|
|
18
|
+
|
|
19
|
+
// BM25 text search (fallback)
|
|
20
|
+
export {
|
|
21
|
+
BM25Index,
|
|
22
|
+
createBM25Index,
|
|
23
|
+
type MemoryItemType,
|
|
24
|
+
} from './bm25.js';
|
|
25
|
+
|
|
26
|
+
// Vector store using sqlite-vec
|
|
27
|
+
export {
|
|
28
|
+
SqliteVectorStore,
|
|
29
|
+
createVectorStore,
|
|
30
|
+
type VectorSearchResult,
|
|
31
|
+
} from './vector-store.js';
|
|
32
|
+
|
|
33
|
+
// Embedding manager (main interface)
|
|
34
|
+
export {
|
|
35
|
+
EmbeddingManager,
|
|
36
|
+
createEmbeddingManager,
|
|
37
|
+
type EmbeddingManagerOptions,
|
|
38
|
+
type SearchResult,
|
|
39
|
+
} from './manager.js';
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Manager
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates embedding generation, storage, and search.
|
|
5
|
+
* Provides automatic fallback to BM25 when no embedding provider is configured.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { EmbeddingProvider } from './provider.js';
|
|
9
|
+
import { SqliteVectorStore } from './vector-store.js';
|
|
10
|
+
import { BM25Index, type MemoryItemType } from './bm25.js';
|
|
11
|
+
|
|
12
|
+
export { type MemoryItemType } from './bm25.js';
|
|
13
|
+
|
|
14
|
+
export interface SearchResult {
|
|
15
|
+
id: string;
|
|
16
|
+
score: number;
|
|
17
|
+
content: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface EmbeddingManagerOptions {
|
|
21
|
+
/** Embedding provider (null for BM25-only mode) */
|
|
22
|
+
provider?: EmbeddingProvider | null;
|
|
23
|
+
/** Path to sqlite-vec database file */
|
|
24
|
+
vectorStorePath?: string;
|
|
25
|
+
/** Embedding dimension (required if provider is set) */
|
|
26
|
+
dimension?: number;
|
|
27
|
+
/** BM25 parameters */
|
|
28
|
+
bm25?: {
|
|
29
|
+
k1?: number;
|
|
30
|
+
b?: number;
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Manages embeddings with automatic fallback to BM25
|
|
36
|
+
*/
|
|
37
|
+
export class EmbeddingManager {
|
|
38
|
+
private provider: EmbeddingProvider | null;
|
|
39
|
+
private vectorStore: SqliteVectorStore | null;
|
|
40
|
+
private bm25Index: BM25Index;
|
|
41
|
+
private useVectorSearch: boolean;
|
|
42
|
+
|
|
43
|
+
constructor(options: EmbeddingManagerOptions) {
|
|
44
|
+
this.provider = options.provider ?? null;
|
|
45
|
+
this.useVectorSearch = this.provider !== null && !!options.vectorStorePath;
|
|
46
|
+
|
|
47
|
+
// Initialize vector store if we have a provider and path
|
|
48
|
+
if (this.useVectorSearch && options.vectorStorePath) {
|
|
49
|
+
const dimension = options.dimension ?? this.provider!.dimension;
|
|
50
|
+
this.vectorStore = new SqliteVectorStore({
|
|
51
|
+
dbPath: options.vectorStorePath,
|
|
52
|
+
dimension,
|
|
53
|
+
});
|
|
54
|
+
} else {
|
|
55
|
+
this.vectorStore = null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Always maintain BM25 as fallback
|
|
59
|
+
this.bm25Index = new BM25Index(options.bm25);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Check if vector search is available
|
|
64
|
+
*/
|
|
65
|
+
get hasVectorSearch(): boolean {
|
|
66
|
+
return this.useVectorSearch && this.vectorStore !== null;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Get the embedding dimension (if available)
|
|
71
|
+
*/
|
|
72
|
+
get dimension(): number | null {
|
|
73
|
+
return this.provider?.dimension ?? null;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Store an item with its embedding
|
|
78
|
+
* Returns the embedding if generated, undefined otherwise
|
|
79
|
+
*/
|
|
80
|
+
async store(
|
|
81
|
+
id: string,
|
|
82
|
+
content: string,
|
|
83
|
+
type: MemoryItemType
|
|
84
|
+
): Promise<number[] | undefined> {
|
|
85
|
+
// Always add to BM25 index for fallback
|
|
86
|
+
this.bm25Index.add(id, content, type);
|
|
87
|
+
|
|
88
|
+
// Generate and store embedding if provider available
|
|
89
|
+
if (this.useVectorSearch && this.provider && this.vectorStore) {
|
|
90
|
+
try {
|
|
91
|
+
const embedding = await this.provider.embed(content);
|
|
92
|
+
this.vectorStore.insert(id, embedding, type, content);
|
|
93
|
+
return embedding;
|
|
94
|
+
} catch (error) {
|
|
95
|
+
console.error(`Failed to generate embedding for ${id}:`, error);
|
|
96
|
+
// Fall through to return undefined - BM25 is still available
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return undefined;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Store multiple items with embeddings (batch)
|
|
105
|
+
* More efficient than calling store() multiple times
|
|
106
|
+
*/
|
|
107
|
+
async storeBatch(
|
|
108
|
+
items: Array<{ id: string; content: string; type: MemoryItemType }>
|
|
109
|
+
): Promise<Map<string, number[] | undefined>> {
|
|
110
|
+
const results = new Map<string, number[] | undefined>();
|
|
111
|
+
|
|
112
|
+
// Add all to BM25 index
|
|
113
|
+
for (const item of items) {
|
|
114
|
+
this.bm25Index.add(item.id, item.content, item.type);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Batch embed if provider available
|
|
118
|
+
if (this.useVectorSearch && this.provider && this.vectorStore) {
|
|
119
|
+
try {
|
|
120
|
+
const contents = items.map((i) => i.content);
|
|
121
|
+
const embeddings = await this.provider.embedBatch(contents);
|
|
122
|
+
|
|
123
|
+
const batchItems = items.map((item, i) => ({
|
|
124
|
+
id: item.id,
|
|
125
|
+
embedding: embeddings[i],
|
|
126
|
+
type: item.type,
|
|
127
|
+
content: item.content,
|
|
128
|
+
}));
|
|
129
|
+
|
|
130
|
+
this.vectorStore.insertBatch(batchItems);
|
|
131
|
+
|
|
132
|
+
for (let i = 0; i < items.length; i++) {
|
|
133
|
+
results.set(items[i].id, embeddings[i]);
|
|
134
|
+
}
|
|
135
|
+
} catch (error) {
|
|
136
|
+
console.error('Failed to generate batch embeddings:', error);
|
|
137
|
+
// Fall through - items are still in BM25 index
|
|
138
|
+
for (const item of items) {
|
|
139
|
+
results.set(item.id, undefined);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
} else {
|
|
143
|
+
for (const item of items) {
|
|
144
|
+
results.set(item.id, undefined);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return results;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Search for similar items
|
|
153
|
+
* Uses vector search if available, falls back to BM25
|
|
154
|
+
*/
|
|
155
|
+
async search(
|
|
156
|
+
query: string,
|
|
157
|
+
options: {
|
|
158
|
+
k?: number;
|
|
159
|
+
type?: MemoryItemType;
|
|
160
|
+
threshold?: number;
|
|
161
|
+
} = {}
|
|
162
|
+
): Promise<SearchResult[]> {
|
|
163
|
+
// Use vector search if available
|
|
164
|
+
if (this.useVectorSearch && this.provider && this.vectorStore) {
|
|
165
|
+
try {
|
|
166
|
+
const queryEmbedding = await this.provider.embed(query);
|
|
167
|
+
const results = this.vectorStore.search(queryEmbedding, options);
|
|
168
|
+
return results.map((r) => ({
|
|
169
|
+
id: r.id,
|
|
170
|
+
score: r.score,
|
|
171
|
+
content: r.content,
|
|
172
|
+
}));
|
|
173
|
+
} catch (error) {
|
|
174
|
+
console.error('Vector search failed, falling back to BM25:', error);
|
|
175
|
+
// Fall through to BM25
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Fallback to BM25
|
|
180
|
+
return this.bm25Index.search(query, options);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Search using a pre-computed embedding
|
|
185
|
+
* Only works if vector store is available
|
|
186
|
+
*/
|
|
187
|
+
searchByEmbedding(
|
|
188
|
+
embedding: number[],
|
|
189
|
+
options: {
|
|
190
|
+
k?: number;
|
|
191
|
+
type?: MemoryItemType;
|
|
192
|
+
threshold?: number;
|
|
193
|
+
} = {}
|
|
194
|
+
): SearchResult[] | null {
|
|
195
|
+
if (!this.vectorStore) {
|
|
196
|
+
return null;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const results = this.vectorStore.search(embedding, options);
|
|
200
|
+
return results.map((r) => ({
|
|
201
|
+
id: r.id,
|
|
202
|
+
score: r.score,
|
|
203
|
+
content: r.content,
|
|
204
|
+
}));
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Generate an embedding without storing it
|
|
209
|
+
*/
|
|
210
|
+
async embed(text: string): Promise<number[] | null> {
|
|
211
|
+
if (!this.provider) {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
return await this.provider.embed(text);
|
|
217
|
+
} catch (error) {
|
|
218
|
+
console.error('Failed to generate embedding:', error);
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Generate embeddings for multiple texts without storing
|
|
225
|
+
*/
|
|
226
|
+
async embedBatch(texts: string[]): Promise<number[][] | null> {
|
|
227
|
+
if (!this.provider) {
|
|
228
|
+
return null;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
try {
|
|
232
|
+
return await this.provider.embedBatch(texts);
|
|
233
|
+
} catch (error) {
|
|
234
|
+
console.error('Failed to generate batch embeddings:', error);
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Remove an item from all indexes
|
|
241
|
+
*/
|
|
242
|
+
remove(id: string): void {
|
|
243
|
+
this.bm25Index.remove(id);
|
|
244
|
+
this.vectorStore?.delete(id);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Check if an item exists
|
|
249
|
+
*/
|
|
250
|
+
has(id: string): boolean {
|
|
251
|
+
return this.bm25Index.has(id);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Get statistics about the indexes
|
|
256
|
+
*/
|
|
257
|
+
getStats(): {
|
|
258
|
+
bm25Count: number;
|
|
259
|
+
vectorCount: number | null;
|
|
260
|
+
hasVectorSearch: boolean;
|
|
261
|
+
dimension: number | null;
|
|
262
|
+
providerName: string | null;
|
|
263
|
+
} {
|
|
264
|
+
return {
|
|
265
|
+
bm25Count: this.bm25Index.size,
|
|
266
|
+
vectorCount: this.vectorStore?.count ?? null,
|
|
267
|
+
hasVectorSearch: this.hasVectorSearch,
|
|
268
|
+
dimension: this.dimension,
|
|
269
|
+
providerName: this.provider?.name ?? null,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Close all resources
|
|
275
|
+
*/
|
|
276
|
+
close(): void {
|
|
277
|
+
this.vectorStore?.close();
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Create an embedding manager
|
|
283
|
+
*/
|
|
284
|
+
export function createEmbeddingManager(
|
|
285
|
+
options: EmbeddingManagerOptions
|
|
286
|
+
): EmbeddingManager {
|
|
287
|
+
return new EmbeddingManager(options);
|
|
288
|
+
}
|