brainbank 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +155 -0
- package/assets/architecture.png +0 -0
- package/bin/brainbank +18 -0
- package/bin/brainbank-mcp +19 -0
- package/dist/chunk-3YBCD6DI.js +117 -0
- package/dist/chunk-3YBCD6DI.js.map +1 -0
- package/dist/chunk-63GBCDS5.js +3249 -0
- package/dist/chunk-63GBCDS5.js.map +1 -0
- package/dist/chunk-DMFMTOHF.js +123 -0
- package/dist/chunk-DMFMTOHF.js.map +1 -0
- package/dist/chunk-FQYKWB2Q.js +136 -0
- package/dist/chunk-FQYKWB2Q.js.map +1 -0
- package/dist/chunk-IMJJ2VEM.js +74 -0
- package/dist/chunk-IMJJ2VEM.js.map +1 -0
- package/dist/chunk-M744PCJQ.js +43 -0
- package/dist/chunk-M744PCJQ.js.map +1 -0
- package/dist/chunk-O3J6ZIXK.js +82 -0
- package/dist/chunk-O3J6ZIXK.js.map +1 -0
- package/dist/chunk-OPH7GZ7U.js +124 -0
- package/dist/chunk-OPH7GZ7U.js.map +1 -0
- package/dist/chunk-PXEWQMN7.js +89 -0
- package/dist/chunk-PXEWQMN7.js.map +1 -0
- package/dist/chunk-RDQYDLYZ.js +69 -0
- package/dist/chunk-RDQYDLYZ.js.map +1 -0
- package/dist/chunk-VIIHPCC4.js +254 -0
- package/dist/chunk-VIIHPCC4.js.map +1 -0
- package/dist/chunk-WCQVDF3K.js +14 -0
- package/dist/chunk-WCQVDF3K.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +3076 -0
- package/dist/cli.js.map +1 -0
- package/dist/haiku-expander-YRSIPGKP.js +8 -0
- package/dist/haiku-expander-YRSIPGKP.js.map +1 -0
- package/dist/haiku-pruner-SHAXUPY6.js +8 -0
- package/dist/haiku-pruner-SHAXUPY6.js.map +1 -0
- package/dist/http-server-QUXHLWUM.js +9 -0
- package/dist/http-server-QUXHLWUM.js.map +1 -0
- package/dist/index.d.ts +2161 -0
- package/dist/index.js +357 -0
- package/dist/index.js.map +1 -0
- package/dist/local-embedding-NZQTILGV.js +8 -0
- package/dist/local-embedding-NZQTILGV.js.map +1 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +334 -0
- package/dist/mcp.js.map +1 -0
- package/dist/openai-embedding-ZP5TSUJG.js +8 -0
- package/dist/openai-embedding-ZP5TSUJG.js.map +1 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
- package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
- package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
- package/dist/plugin-IKQ6IRSJ.js +32 -0
- package/dist/plugin-IKQ6IRSJ.js.map +1 -0
- package/dist/resolve-ASGLBNUC.js +10 -0
- package/dist/resolve-ASGLBNUC.js.map +1 -0
- package/dist/stats-tui-ZY2NQSEA.js +1904 -0
- package/dist/stats-tui-ZY2NQSEA.js.map +1 -0
- package/package.json +96 -0
- package/src/brainbank.ts +617 -0
- package/src/cli/commands/collection.ts +77 -0
- package/src/cli/commands/context.ts +179 -0
- package/src/cli/commands/daemon.ts +100 -0
- package/src/cli/commands/docs.ts +71 -0
- package/src/cli/commands/files.ts +69 -0
- package/src/cli/commands/help.ts +77 -0
- package/src/cli/commands/index.ts +482 -0
- package/src/cli/commands/kv.ts +140 -0
- package/src/cli/commands/mcp-export.ts +273 -0
- package/src/cli/commands/mcp.ts +6 -0
- package/src/cli/commands/reembed.ts +30 -0
- package/src/cli/commands/scan.ts +336 -0
- package/src/cli/commands/search.ts +203 -0
- package/src/cli/commands/stats.ts +68 -0
- package/src/cli/commands/status.ts +47 -0
- package/src/cli/commands/watch.ts +47 -0
- package/src/cli/factory/brain-context.ts +43 -0
- package/src/cli/factory/builtin-registration.ts +87 -0
- package/src/cli/factory/config-loader.ts +77 -0
- package/src/cli/factory/index.ts +69 -0
- package/src/cli/factory/plugin-loader.ts +325 -0
- package/src/cli/index.ts +71 -0
- package/src/cli/server-client.ts +178 -0
- package/src/cli/tui/index-tui.tsx +667 -0
- package/src/cli/tui/stats-data.ts +523 -0
- package/src/cli/tui/stats-search.ts +262 -0
- package/src/cli/tui/stats-tui.tsx +1465 -0
- package/src/cli/tui/tree-scanner.ts +650 -0
- package/src/cli/utils.ts +137 -0
- package/src/config.ts +49 -0
- package/src/constants.ts +21 -0
- package/src/db/adapter.ts +112 -0
- package/src/db/metadata.ts +130 -0
- package/src/db/migrations.ts +66 -0
- package/src/db/sqlite-adapter.ts +218 -0
- package/src/db/tracker.ts +91 -0
- package/src/engine/index-api.ts +81 -0
- package/src/engine/reembed.ts +206 -0
- package/src/engine/search-api.ts +218 -0
- package/src/index.ts +154 -0
- package/src/lib/fts.ts +57 -0
- package/src/lib/languages.ts +180 -0
- package/src/lib/logger.ts +126 -0
- package/src/lib/math.ts +87 -0
- package/src/lib/provider-key.ts +20 -0
- package/src/lib/prune.ts +71 -0
- package/src/lib/rrf.ts +133 -0
- package/src/lib/write-lock.ts +108 -0
- package/src/mcp/mcp-server.ts +195 -0
- package/src/mcp/workspace-factory.ts +68 -0
- package/src/mcp/workspace-pool.ts +224 -0
- package/src/plugin.ts +381 -0
- package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
- package/src/providers/embeddings/embedding-worker.ts +141 -0
- package/src/providers/embeddings/local-embedding.ts +115 -0
- package/src/providers/embeddings/openai-embedding.ts +167 -0
- package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
- package/src/providers/embeddings/perplexity-embedding.ts +165 -0
- package/src/providers/embeddings/resolve.ts +34 -0
- package/src/providers/pruners/haiku-expander.ts +166 -0
- package/src/providers/pruners/haiku-pruner.ts +112 -0
- package/src/providers/vector/hnsw-index.ts +174 -0
- package/src/providers/vector/hnsw-loader.ts +129 -0
- package/src/search/bm25-boost.ts +69 -0
- package/src/search/context-builder.ts +251 -0
- package/src/search/keyword/composite-bm25-search.ts +47 -0
- package/src/search/types.ts +37 -0
- package/src/search/vector/composite-vector-search.ts +61 -0
- package/src/search/vector/mmr.ts +64 -0
- package/src/services/collection.ts +384 -0
- package/src/services/daemon.ts +87 -0
- package/src/services/http-server.ts +336 -0
- package/src/services/kv-service.ts +64 -0
- package/src/services/plugin-registry.ts +77 -0
- package/src/services/watch.ts +340 -0
- package/src/services/webhook-server.ts +100 -0
- package/src/types.ts +493 -0
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Query Debug Logger
|
|
3
|
+
*
|
|
4
|
+
* Appends structured, human-readable entries to /tmp/brainbank.log.
|
|
5
|
+
* Covers all search operations: getContext, search, hybridSearch, searchBM25.
|
|
6
|
+
* Truncates at 10 MB (keeps the newest half).
|
|
7
|
+
*
|
|
8
|
+
* Layer 0 — pure functions, no state.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import * as fs from 'node:fs';
|
|
12
|
+
|
|
13
|
+
const LOG_PATH = '/tmp/brainbank.log';
|
|
14
|
+
const MAX_BYTES = 10 * 1024 * 1024; // 10 MB
|
|
15
|
+
|
|
16
|
+
// ── Public Types ─────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
export type QuerySource = 'cli' | 'mcp' | 'daemon' | 'api';
|
|
19
|
+
|
|
20
|
+
export interface QueryLogEntry {
|
|
21
|
+
source: QuerySource;
|
|
22
|
+
method: 'getContext' | 'search' | 'hybridSearch' | 'searchBM25';
|
|
23
|
+
query: string;
|
|
24
|
+
embedding: string;
|
|
25
|
+
pruner: string | null;
|
|
26
|
+
expander?: string | null;
|
|
27
|
+
expandedCount?: number;
|
|
28
|
+
options: Record<string, unknown>;
|
|
29
|
+
results: QueryLogResult[];
|
|
30
|
+
pruned?: QueryLogResult[];
|
|
31
|
+
durationMs: number;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface QueryLogResult {
|
|
35
|
+
filePath: string;
|
|
36
|
+
score: number;
|
|
37
|
+
type: string;
|
|
38
|
+
name?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ── Public API ───────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
/** Append a query log entry to /tmp/brainbank.log. Never throws. */
|
|
44
|
+
export function logQuery(entry: QueryLogEntry): void {
|
|
45
|
+
try {
|
|
46
|
+
_truncateIfNeeded();
|
|
47
|
+
fs.appendFileSync(LOG_PATH, _formatEntry(entry));
|
|
48
|
+
} catch {
|
|
49
|
+
// Logging must never break the app
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ── Formatting ───────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
function _formatEntry(e: QueryLogEntry): string {
|
|
56
|
+
const divider = '═'.repeat(70);
|
|
57
|
+
const lines: string[] = [
|
|
58
|
+
'',
|
|
59
|
+
divider,
|
|
60
|
+
`[${new Date().toISOString()}] ${e.source.toUpperCase()} · ${e.method}`,
|
|
61
|
+
`Query: "${e.query}"`,
|
|
62
|
+
`Embedding: ${e.embedding} | Pruner: ${e.pruner ?? 'none'} | Expander: ${e.expander ?? 'off'}${e.expandedCount ? ` (+${e.expandedCount})` : ''}`,
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
// Options (sources, path, etc.)
|
|
66
|
+
const opts = Object.entries(e.options).filter(([, v]) => v !== undefined);
|
|
67
|
+
if (opts.length > 0) {
|
|
68
|
+
const parts = opts.map(([k, v]) =>
|
|
69
|
+
`${k}: ${typeof v === 'object' ? JSON.stringify(v) : String(v)}`
|
|
70
|
+
);
|
|
71
|
+
lines.push(parts.join(' | '));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
lines.push(`Duration: ${e.durationMs}ms`);
|
|
75
|
+
lines.push('');
|
|
76
|
+
|
|
77
|
+
// Results
|
|
78
|
+
const resultCount = e.results.length;
|
|
79
|
+
const prunedCount = e.pruned?.length ?? 0;
|
|
80
|
+
const header = prunedCount > 0
|
|
81
|
+
? `Results (${resultCount + prunedCount} → ${resultCount} after pruning):`
|
|
82
|
+
: `Results (${resultCount}):`;
|
|
83
|
+
lines.push(header);
|
|
84
|
+
|
|
85
|
+
for (let i = 0; i < e.results.length; i++) {
|
|
86
|
+
const r = e.results[i];
|
|
87
|
+
const pct = Math.round(r.score * 100);
|
|
88
|
+
const name = r.name ? `[${r.name}]` : '';
|
|
89
|
+
lines.push(` #${String(i + 1).padStart(2)} ${String(pct).padStart(3)}% ${r.filePath.padEnd(45)} ${name}`);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Pruned items
|
|
93
|
+
if (e.pruned && e.pruned.length > 0) {
|
|
94
|
+
lines.push('');
|
|
95
|
+
lines.push(`Pruned (${e.pruned.length} removed):`);
|
|
96
|
+
for (const r of e.pruned) {
|
|
97
|
+
const name = r.name ? `[${r.name}]` : '';
|
|
98
|
+
lines.push(` ✗ ${r.filePath.padEnd(45)} ${name}`);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
lines.push(divider);
|
|
103
|
+
lines.push('');
|
|
104
|
+
|
|
105
|
+
return lines.join('\n');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// ── Truncation ───────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
function _truncateIfNeeded(): void {
|
|
111
|
+
try {
|
|
112
|
+
const stat = fs.statSync(LOG_PATH);
|
|
113
|
+
if (stat.size < MAX_BYTES) return;
|
|
114
|
+
|
|
115
|
+
// Keep the newest half
|
|
116
|
+
const content = fs.readFileSync(LOG_PATH, 'utf-8');
|
|
117
|
+
const half = Math.floor(content.length / 2);
|
|
118
|
+
// Find the next entry boundary after the midpoint
|
|
119
|
+
const nextEntry = content.indexOf('\n═', half);
|
|
120
|
+
if (nextEntry > 0) {
|
|
121
|
+
fs.writeFileSync(LOG_PATH, '[truncated]\n' + content.slice(nextEntry + 1));
|
|
122
|
+
}
|
|
123
|
+
} catch {
|
|
124
|
+
// File doesn't exist yet — fine
|
|
125
|
+
}
|
|
126
|
+
}
|
package/src/lib/math.ts
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Math Utilities
|
|
3
|
+
*
|
|
4
|
+
* Pure vector math functions for similarity calculations.
|
|
5
|
+
* No dependencies — works on Float32Array directly.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Cosine similarity between two vectors.
|
|
10
|
+
* Assumes vectors are already normalized (unit length).
|
|
11
|
+
* Returns value between -1.0 and 1.0.
|
|
12
|
+
*/
|
|
13
|
+
export function cosineSimilarity(a: Float32Array, b: Float32Array): number {
|
|
14
|
+
if (a.length !== b.length) {
|
|
15
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
16
|
+
}
|
|
17
|
+
if (a.length === 0) return 0;
|
|
18
|
+
|
|
19
|
+
let dot = 0;
|
|
20
|
+
for (let i = 0; i < a.length; i++) {
|
|
21
|
+
dot += a[i] * b[i];
|
|
22
|
+
}
|
|
23
|
+
return dot;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Full cosine similarity (normalizes first).
|
|
28
|
+
* Use this when vectors may not be pre-normalized.
|
|
29
|
+
*/
|
|
30
|
+
export function cosineSimilarityFull(a: Float32Array, b: Float32Array): number {
|
|
31
|
+
if (a.length !== b.length) {
|
|
32
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
33
|
+
}
|
|
34
|
+
if (a.length === 0) return 0;
|
|
35
|
+
|
|
36
|
+
let dot = 0, normA = 0, normB = 0;
|
|
37
|
+
for (let i = 0; i < a.length; i++) {
|
|
38
|
+
dot += a[i] * b[i];
|
|
39
|
+
normA += a[i] * a[i];
|
|
40
|
+
normB += b[i] * b[i];
|
|
41
|
+
}
|
|
42
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
43
|
+
return denom === 0 ? 0 : dot / denom;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* L2-normalize a vector to unit length.
|
|
48
|
+
* Returns a new Float32Array.
|
|
49
|
+
*/
|
|
50
|
+
export function normalize(vec: Float32Array): Float32Array {
|
|
51
|
+
let norm = 0;
|
|
52
|
+
for (let i = 0; i < vec.length; i++) {
|
|
53
|
+
norm += vec[i] * vec[i];
|
|
54
|
+
}
|
|
55
|
+
norm = Math.sqrt(norm);
|
|
56
|
+
if (norm === 0) return new Float32Array(vec.length);
|
|
57
|
+
|
|
58
|
+
const result = new Float32Array(vec.length);
|
|
59
|
+
for (let i = 0; i < vec.length; i++) {
|
|
60
|
+
result[i] = vec[i] / norm;
|
|
61
|
+
}
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Euclidean distance between two vectors.
|
|
67
|
+
*/
|
|
68
|
+
export function euclideanDistance(a: Float32Array, b: Float32Array): number {
|
|
69
|
+
if (a.length !== b.length) {
|
|
70
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
71
|
+
}
|
|
72
|
+
let sum = 0;
|
|
73
|
+
for (let i = 0; i < a.length; i++) {
|
|
74
|
+
const d = a[i] - b[i];
|
|
75
|
+
sum += d * d;
|
|
76
|
+
}
|
|
77
|
+
return Math.sqrt(sum);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Convert a Float32Array to a Buffer for SQLite storage.
|
|
82
|
+
* Handles views with non-zero byteOffset (e.g. from batched embedding output).
|
|
83
|
+
* Using Buffer.from(vec.buffer) directly is WRONG for views — it copies the entire parent buffer.
|
|
84
|
+
*/
|
|
85
|
+
export function vecToBuffer(vec: Float32Array): Buffer {
|
|
86
|
+
return Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength);
|
|
87
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Provider Key
|
|
3
|
+
*
|
|
4
|
+
* Infers a stable key from an existing EmbeddingProvider instance.
|
|
5
|
+
* Lives in lib/ (Layer 0) to avoid db/ → providers/ dependency.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { EmbeddingProvider } from '@/types.ts';
|
|
9
|
+
|
|
10
|
+
/** Known embedding provider keys. */
|
|
11
|
+
export type EmbeddingKey = 'local' | 'openai' | 'perplexity' | 'perplexity-context';
|
|
12
|
+
|
|
13
|
+
/** Infer a stable key from an existing provider instance. */
|
|
14
|
+
export function providerKey(p: EmbeddingProvider): EmbeddingKey {
|
|
15
|
+
const name = p.constructor?.name ?? '';
|
|
16
|
+
if (name === 'OpenAIEmbedding') return 'openai';
|
|
17
|
+
if (name === 'PerplexityEmbedding') return 'perplexity';
|
|
18
|
+
if (name === 'PerplexityContextEmbedding') return 'perplexity-context';
|
|
19
|
+
return 'local';
|
|
20
|
+
}
|
package/src/lib/prune.ts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Prune Utility
|
|
3
|
+
*
|
|
4
|
+
* Bridges SearchResult[] → Pruner.prune() → filtered SearchResult[].
|
|
5
|
+
* Converts results to lightweight PrunerItems with full content previews,
|
|
6
|
+
* calls the pruner, and filters out dropped results.
|
|
7
|
+
*
|
|
8
|
+
* Content strategy: send the FULL chunk content to the pruner so it can
|
|
9
|
+
* make informed decisions. A per-item character cap prevents cost blowups
|
|
10
|
+
* on monster files — when truncated, the middle is kept (imports + core
|
|
11
|
+
* logic) rather than just the top.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { Pruner, PrunerItem, SearchResult } from '@/types.ts';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Max characters per item sent to the pruner.
|
|
18
|
+
* ~8K chars ≈ 200-250 lines — enough for the model to understand
|
|
19
|
+
* the file's purpose without blowing up token budgets.
|
|
20
|
+
*/
|
|
21
|
+
const MAX_PREVIEW_CHARS = 8_000;
|
|
22
|
+
|
|
23
|
+
/** Run the pruner on search results. Returns results in the order the pruner chose. */
|
|
24
|
+
export async function pruneResults(
|
|
25
|
+
query: string,
|
|
26
|
+
results: SearchResult[],
|
|
27
|
+
pruner: Pruner,
|
|
28
|
+
): Promise<SearchResult[]> {
|
|
29
|
+
if (results.length <= 1) return results;
|
|
30
|
+
|
|
31
|
+
// Map results to items with full content (capped per item)
|
|
32
|
+
const items: PrunerItem[] = results.map((r, i) => ({
|
|
33
|
+
id: i,
|
|
34
|
+
filePath: r.filePath ?? 'unknown',
|
|
35
|
+
preview: _buildPreview(r.content),
|
|
36
|
+
metadata: r.metadata as Record<string, unknown>,
|
|
37
|
+
}));
|
|
38
|
+
|
|
39
|
+
const keepIds = await pruner.prune(query, items);
|
|
40
|
+
const validIds = new Set(Array.from({ length: results.length }, (_, i) => i));
|
|
41
|
+
|
|
42
|
+
// Respect the pruner's ordering — map IDs to results in returned order
|
|
43
|
+
return keepIds
|
|
44
|
+
.filter(id => validIds.has(id))
|
|
45
|
+
.map(id => results[id]);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Build a preview from full content.
|
|
50
|
+
*
|
|
51
|
+
* If the content fits within MAX_PREVIEW_CHARS, return it as-is.
|
|
52
|
+
* For oversized content, keep the first half + last quarter with a
|
|
53
|
+
* "[... N lines omitted ...]" marker — this preserves imports/types
|
|
54
|
+
* at the top AND exports/key functions that often live at the bottom.
|
|
55
|
+
*/
|
|
56
|
+
function _buildPreview(content: string): string {
|
|
57
|
+
if (content.length <= MAX_PREVIEW_CHARS) return content;
|
|
58
|
+
|
|
59
|
+
const lines = content.split('\n');
|
|
60
|
+
const totalLines = lines.length;
|
|
61
|
+
|
|
62
|
+
// Keep ~60% from top, ~25% from bottom
|
|
63
|
+
const topCount = Math.floor(totalLines * 0.6);
|
|
64
|
+
const bottomCount = Math.floor(totalLines * 0.25);
|
|
65
|
+
const omitted = totalLines - topCount - bottomCount;
|
|
66
|
+
|
|
67
|
+
const topPart = lines.slice(0, topCount).join('\n');
|
|
68
|
+
const bottomPart = lines.slice(totalLines - bottomCount).join('\n');
|
|
69
|
+
|
|
70
|
+
return `${topPart}\n\n// [... ${omitted} lines omitted ...]\n\n${bottomPart}`;
|
|
71
|
+
}
|
package/src/lib/rrf.ts
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Reciprocal Rank Fusion (RRF)
|
|
3
|
+
*
|
|
4
|
+
* Combines results from multiple search systems (vector + BM25)
|
|
5
|
+
* using the RRF algorithm: score = Σ 1/(k + rank_i)
|
|
6
|
+
*
|
|
7
|
+
* This is the same algorithm used by Elasticsearch, QMD, and most
|
|
8
|
+
* production hybrid search systems. Simple but very effective.
|
|
9
|
+
*
|
|
10
|
+
* Reference: Cormack et al., "Reciprocal Rank Fusion outperforms
|
|
11
|
+
* Condorcet and individual Rank Learning Methods" (2009)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import type { SearchResult } from '@/types.ts';
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Fuse ranked lists from different search systems into a single ranked list.
|
|
18
|
+
*
|
|
19
|
+
* @param resultSets - Arrays of SearchResult from different systems (e.g. vector, BM25)
|
|
20
|
+
* @param k - Smoothing constant. Default: 60 (standard value). Higher = less emphasis on top ranks.
|
|
21
|
+
* @param maxResults - Maximum results to return.
|
|
22
|
+
*/
|
|
23
|
+
export function reciprocalRankFusion(
|
|
24
|
+
resultSets: SearchResult[][],
|
|
25
|
+
k: number = 60,
|
|
26
|
+
maxResults: number = 15,
|
|
27
|
+
): SearchResult[] {
|
|
28
|
+
// Build a map: unique key → { bestResult, rrfScore }
|
|
29
|
+
const fused = new Map<string, { result: SearchResult; rrfScore: number }>();
|
|
30
|
+
|
|
31
|
+
for (const results of resultSets) {
|
|
32
|
+
for (let rank = 0; rank < results.length; rank++) {
|
|
33
|
+
const r = results[rank];
|
|
34
|
+
const key = resultKey(r);
|
|
35
|
+
const rrfContribution = 1.0 / (k + rank + 1);
|
|
36
|
+
|
|
37
|
+
const existing = fused.get(key);
|
|
38
|
+
if (existing) {
|
|
39
|
+
existing.rrfScore += rrfContribution;
|
|
40
|
+
// Keep the result with the higher original score
|
|
41
|
+
if (r.score > existing.result.score) {
|
|
42
|
+
existing.result = { ...r };
|
|
43
|
+
}
|
|
44
|
+
} else {
|
|
45
|
+
fused.set(key, {
|
|
46
|
+
result: { ...r },
|
|
47
|
+
rrfScore: rrfContribution,
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Sort by RRF score descending, normalize, and return
|
|
54
|
+
const sorted = Array.from(fused.values())
|
|
55
|
+
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
56
|
+
.slice(0, maxResults);
|
|
57
|
+
|
|
58
|
+
// Normalize RRF scores to 0..1 range.
|
|
59
|
+
// Note: A single result always normalizes to 1.0. This is correct for RRF —
|
|
60
|
+
// the score is relative to the result set, not absolute relevance.
|
|
61
|
+
// Use minScore filters sparingly with RRF.
|
|
62
|
+
const maxRRF = sorted[0]?.rrfScore ?? 1;
|
|
63
|
+
return sorted.map(entry => ({
|
|
64
|
+
...entry.result,
|
|
65
|
+
score: entry.rrfScore / maxRRF,
|
|
66
|
+
metadata: {
|
|
67
|
+
...entry.result.metadata,
|
|
68
|
+
rrfScore: entry.rrfScore,
|
|
69
|
+
},
|
|
70
|
+
}) as SearchResult);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Generate a unique key for a search result to detect duplicates across systems.
|
|
75
|
+
*/
|
|
76
|
+
function resultKey(r: SearchResult): string {
|
|
77
|
+
switch (r.type) {
|
|
78
|
+
case 'code':
|
|
79
|
+
return `code:${r.filePath}:${r.metadata.startLine}-${r.metadata.endLine}`;
|
|
80
|
+
case 'commit':
|
|
81
|
+
return `commit:${r.metadata.hash || r.metadata.shortHash}`;
|
|
82
|
+
case 'document':
|
|
83
|
+
return `document:${r.filePath ?? ''}:${r.metadata.collection ?? ''}:${r.metadata.seq ?? ''}:${r.content?.slice(0, 80)}`;
|
|
84
|
+
case 'collection':
|
|
85
|
+
return `collection:${r.metadata.id ?? r.content?.slice(0, 80)}`;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Generic RRF that works on any type — no SearchResult required.
|
|
91
|
+
*
|
|
92
|
+
* @param lists - Ranked lists from different search systems.
|
|
93
|
+
* @param keyFn - Returns a stable unique string per item.
|
|
94
|
+
* @param scoreFn - Extracts the original score from an item.
|
|
95
|
+
* @param k - Smoothing constant. Default: 60.
|
|
96
|
+
* @param maxResults - Maximum results to return.
|
|
97
|
+
*/
|
|
98
|
+
export function fuseRankedLists<T>(
|
|
99
|
+
lists: T[][],
|
|
100
|
+
keyFn: (item: T) => string,
|
|
101
|
+
scoreFn: (item: T) => number,
|
|
102
|
+
k: number = 60,
|
|
103
|
+
maxResults: number = 15,
|
|
104
|
+
): { item: T; score: number }[] {
|
|
105
|
+
const fused = new Map<string, { item: T; rrfScore: number; bestScore: number }>();
|
|
106
|
+
|
|
107
|
+
for (const list of lists) {
|
|
108
|
+
for (let rank = 0; rank < list.length; rank++) {
|
|
109
|
+
const item = list[rank];
|
|
110
|
+
const key = keyFn(item);
|
|
111
|
+
const contribution = 1.0 / (k + rank + 1);
|
|
112
|
+
const score = scoreFn(item);
|
|
113
|
+
|
|
114
|
+
const existing = fused.get(key);
|
|
115
|
+
if (existing) {
|
|
116
|
+
existing.rrfScore += contribution;
|
|
117
|
+
if (score > existing.bestScore) {
|
|
118
|
+
existing.item = item;
|
|
119
|
+
existing.bestScore = score;
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
fused.set(key, { item, rrfScore: contribution, bestScore: score });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const sorted = [...fused.values()]
|
|
128
|
+
.sort((a, b) => b.rrfScore - a.rrfScore)
|
|
129
|
+
.slice(0, maxResults);
|
|
130
|
+
|
|
131
|
+
const maxRRF = sorted[0]?.rrfScore ?? 1;
|
|
132
|
+
return sorted.map(e => ({ item: e.item, score: e.rrfScore / maxRRF }));
|
|
133
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Write Lock
|
|
3
|
+
*
|
|
4
|
+
* Advisory file lock for cross-process HNSW write exclusion.
|
|
5
|
+
* Uses `O_CREAT | O_EXCL` for atomic lock creation — works on all OS.
|
|
6
|
+
* Stale locks (dead PID) are detected and stolen automatically.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { openSync, closeSync, unlinkSync, readFileSync, writeFileSync, existsSync, mkdirSync, constants } from 'node:fs';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
|
|
12
|
+
/** Max wait time before giving up on acquiring a lock (ms). */
|
|
13
|
+
const MAX_WAIT_MS = 30_000;
|
|
14
|
+
|
|
15
|
+
/** Initial retry delay (ms), doubled on each retry. */
|
|
16
|
+
const INITIAL_DELAY_MS = 50;
|
|
17
|
+
|
|
18
|
+
/** Check if a process is alive by sending signal 0. */
|
|
19
|
+
function isProcessAlive(pid: number): boolean {
|
|
20
|
+
if (isNaN(pid)) return false;
|
|
21
|
+
try {
|
|
22
|
+
process.kill(pid, 0);
|
|
23
|
+
return true;
|
|
24
|
+
} catch {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Resolve the lock file path. */
|
|
30
|
+
function lockPath(lockDir: string, name: string): string {
|
|
31
|
+
return join(lockDir, `${name}.lock`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/** Try to create the lock file atomically. Returns true on success. */
|
|
35
|
+
function tryCreateLock(filePath: string): boolean {
|
|
36
|
+
try {
|
|
37
|
+
const fd = openSync(filePath, constants.O_CREAT | constants.O_EXCL | constants.O_WRONLY);
|
|
38
|
+
writeFileSync(fd, String(process.pid));
|
|
39
|
+
closeSync(fd);
|
|
40
|
+
return true;
|
|
41
|
+
} catch {
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Acquire an advisory lock. Blocks with exponential backoff if another
|
|
48
|
+
* process holds the lock. Steals stale locks from dead processes.
|
|
49
|
+
*
|
|
50
|
+
* @throws After MAX_WAIT_MS if the lock cannot be acquired.
|
|
51
|
+
*/
|
|
52
|
+
export async function acquireLock(lockDir: string, name: string): Promise<void> {
|
|
53
|
+
if (!existsSync(lockDir)) {
|
|
54
|
+
mkdirSync(lockDir, { recursive: true });
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const fp = lockPath(lockDir, name);
|
|
58
|
+
let delay = INITIAL_DELAY_MS;
|
|
59
|
+
let elapsed = 0;
|
|
60
|
+
|
|
61
|
+
while (true) {
|
|
62
|
+
if (tryCreateLock(fp)) return;
|
|
63
|
+
|
|
64
|
+
// Lock exists — check if holder is alive
|
|
65
|
+
try {
|
|
66
|
+
const content = readFileSync(fp, 'utf-8').trim();
|
|
67
|
+
const pid = parseInt(content, 10);
|
|
68
|
+
if (isNaN(pid) || !isProcessAlive(pid)) {
|
|
69
|
+
// Stale lock (dead or invalid PID) — steal it
|
|
70
|
+
try { unlinkSync(fp); } catch { /* race: another process stole it first */ }
|
|
71
|
+
if (tryCreateLock(fp)) return;
|
|
72
|
+
}
|
|
73
|
+
} catch {
|
|
74
|
+
// File gone between check and read — retry
|
|
75
|
+
if (tryCreateLock(fp)) return;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (elapsed >= MAX_WAIT_MS) {
|
|
79
|
+
throw new Error(`BrainBank: Could not acquire write lock '${name}' after ${MAX_WAIT_MS}ms. Another process may be indexing.`);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
await new Promise<void>(r => setTimeout(r, delay));
|
|
83
|
+
elapsed += delay;
|
|
84
|
+
delay = Math.min(delay * 2, 2000);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/** Release an advisory lock. Safe to call even if not held. */
|
|
89
|
+
export function releaseLock(lockDir: string, name: string): void {
|
|
90
|
+
try {
|
|
91
|
+
unlinkSync(lockPath(lockDir, name));
|
|
92
|
+
} catch {
|
|
93
|
+
// Already released or never acquired — safe to ignore
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Execute a function while holding an advisory lock.
|
|
99
|
+
* Lock is always released, even on error.
|
|
100
|
+
*/
|
|
101
|
+
export async function withLock<T>(lockDir: string, name: string, fn: () => T | Promise<T>): Promise<T> {
|
|
102
|
+
await acquireLock(lockDir, name);
|
|
103
|
+
try {
|
|
104
|
+
return await fn();
|
|
105
|
+
} finally {
|
|
106
|
+
releaseLock(lockDir, name);
|
|
107
|
+
}
|
|
108
|
+
}
|