brainbank 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -1107
- package/assets/architecture.png +0 -0
- package/bin/brainbank +8 -1
- package/bin/brainbank-mcp +19 -0
- package/dist/chunk-3UIWA32X.js +3341 -0
- package/dist/chunk-3UIWA32X.js.map +1 -0
- package/dist/chunk-3YBCD6DI.js +117 -0
- package/dist/chunk-3YBCD6DI.js.map +1 -0
- package/dist/chunk-DAGVUEXL.js +258 -0
- package/dist/chunk-DAGVUEXL.js.map +1 -0
- package/dist/chunk-DMFMTOHF.js +123 -0
- package/dist/chunk-DMFMTOHF.js.map +1 -0
- package/dist/chunk-FQYKWB2Q.js +136 -0
- package/dist/chunk-FQYKWB2Q.js.map +1 -0
- package/dist/chunk-IMJJ2VEM.js +74 -0
- package/dist/chunk-IMJJ2VEM.js.map +1 -0
- package/dist/chunk-M744PCJQ.js +43 -0
- package/dist/chunk-M744PCJQ.js.map +1 -0
- package/dist/chunk-NNDY7P2R.js +211 -0
- package/dist/chunk-NNDY7P2R.js.map +1 -0
- package/dist/chunk-O3J6ZIXK.js +82 -0
- package/dist/chunk-O3J6ZIXK.js.map +1 -0
- package/dist/chunk-RDQYDLYZ.js +69 -0
- package/dist/chunk-RDQYDLYZ.js.map +1 -0
- package/dist/chunk-WCQVDF3K.js +14 -0
- package/dist/cli.js +2713 -325
- package/dist/cli.js.map +1 -1
- package/dist/haiku-pruner-5KVT5AI2.js +8 -0
- package/dist/http-server-2ZQ6I43B.js +9 -0
- package/dist/index.d.ts +1886 -626
- package/dist/index.js +319 -46
- package/dist/index.js.map +1 -1
- package/dist/local-embedding-NZQTILGV.js +8 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +386 -0
- package/dist/mcp.js.map +1 -0
- package/dist/openai-embedding-ZP5TSUJG.js +8 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
- package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
- package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
- package/dist/plugin-IKQ6IRSJ.js +32 -0
- package/dist/plugin-IKQ6IRSJ.js.map +1 -0
- package/dist/resolve-ASGLBNUC.js +10 -0
- package/dist/resolve-ASGLBNUC.js.map +1 -0
- package/dist/stats-tui-AD3AMYGV.js +1904 -0
- package/dist/stats-tui-AD3AMYGV.js.map +1 -0
- package/package.json +38 -53
- package/src/brainbank.ts +617 -0
- package/src/cli/commands/collection.ts +77 -0
- package/src/cli/commands/context.ts +59 -0
- package/src/cli/commands/daemon.ts +100 -0
- package/src/cli/commands/docs.ts +71 -0
- package/src/cli/commands/files.ts +69 -0
- package/src/cli/commands/help.ts +82 -0
- package/src/cli/commands/index.ts +478 -0
- package/src/cli/commands/kv.ts +140 -0
- package/src/cli/commands/mcp-export.ts +273 -0
- package/src/cli/commands/mcp.ts +6 -0
- package/src/cli/commands/query.ts +167 -0
- package/src/cli/commands/reembed.ts +30 -0
- package/src/cli/commands/reindex.ts +40 -0
- package/src/cli/commands/scan.ts +336 -0
- package/src/cli/commands/search.ts +203 -0
- package/src/cli/commands/stats.ts +68 -0
- package/src/cli/commands/status.ts +47 -0
- package/src/cli/commands/watch.ts +47 -0
- package/src/cli/factory/brain-context.ts +43 -0
- package/src/cli/factory/builtin-registration.ts +87 -0
- package/src/cli/factory/config-loader.ts +77 -0
- package/src/cli/factory/index.ts +69 -0
- package/src/cli/factory/plugin-loader.ts +324 -0
- package/src/cli/index.ts +76 -0
- package/src/cli/server-client.ts +186 -0
- package/src/cli/tui/index-tui.tsx +667 -0
- package/src/cli/tui/stats-data.ts +523 -0
- package/src/cli/tui/stats-search.ts +262 -0
- package/src/cli/tui/stats-tui.tsx +1465 -0
- package/src/cli/tui/tree-scanner.ts +650 -0
- package/src/cli/utils.ts +137 -0
- package/src/config.ts +48 -0
- package/src/constants.ts +21 -0
- package/src/db/adapter.ts +112 -0
- package/src/db/metadata.ts +130 -0
- package/src/db/migrations.ts +66 -0
- package/src/db/sqlite-adapter.ts +218 -0
- package/src/db/tracker.ts +91 -0
- package/src/engine/index-api.ts +81 -0
- package/src/engine/reembed.ts +206 -0
- package/src/engine/search-api.ts +218 -0
- package/src/index.ts +150 -0
- package/src/lib/fts.ts +57 -0
- package/src/lib/languages.ts +179 -0
- package/src/lib/logger.ts +126 -0
- package/src/lib/math.ts +87 -0
- package/src/lib/provider-key.ts +20 -0
- package/src/lib/prune.ts +72 -0
- package/src/lib/rrf.ts +133 -0
- package/src/lib/write-lock.ts +108 -0
- package/src/mcp/mcp-server.ts +268 -0
- package/src/mcp/workspace-factory.ts +68 -0
- package/src/mcp/workspace-pool.ts +224 -0
- package/src/plugin.ts +381 -0
- package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
- package/src/providers/embeddings/embedding-worker.ts +141 -0
- package/src/providers/embeddings/local-embedding.ts +115 -0
- package/src/providers/embeddings/openai-embedding.ts +167 -0
- package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
- package/src/providers/embeddings/perplexity-embedding.ts +165 -0
- package/src/providers/embeddings/resolve.ts +34 -0
- package/src/providers/pruners/haiku-expander.ts +178 -0
- package/src/providers/pruners/haiku-pruner.ts +263 -0
- package/src/providers/vector/hnsw-index.ts +174 -0
- package/src/providers/vector/hnsw-loader.ts +129 -0
- package/src/search/bm25-boost.ts +76 -0
- package/src/search/context-builder.ts +209 -0
- package/src/search/keyword/composite-bm25-search.ts +47 -0
- package/src/search/query-decomposer.ts +124 -0
- package/src/search/types.ts +37 -0
- package/src/search/vector/composite-vector-search.ts +105 -0
- package/src/search/vector/mmr.ts +64 -0
- package/src/services/collection.ts +384 -0
- package/src/services/daemon.ts +87 -0
- package/src/services/http-server.ts +344 -0
- package/src/services/kv-service.ts +64 -0
- package/src/services/plugin-registry.ts +77 -0
- package/src/services/watch.ts +340 -0
- package/src/services/webhook-server.ts +100 -0
- package/src/types.ts +509 -0
- package/dist/chunk-2P3EGY6S.js +0 -37
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js +0 -105
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js +0 -56
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-7QVYU63E.js +0 -7
- package/dist/chunk-GOUBW7UA.js +0 -373
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js +0 -185
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-RAEBYV75.js +0 -709
- package/dist/chunk-RAEBYV75.js.map +0 -1
- package/dist/chunk-TW5NTYYZ.js +0 -2066
- package/dist/chunk-TW5NTYYZ.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js +0 -171
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- package/dist/code.d.ts +0 -31
- package/dist/code.js +0 -8
- package/dist/docs.d.ts +0 -19
- package/dist/docs.js +0 -8
- package/dist/git.d.ts +0 -31
- package/dist/git.js +0 -8
- package/dist/memory.d.ts +0 -19
- package/dist/memory.js +0 -146
- package/dist/memory.js.map +0 -1
- package/dist/notes.d.ts +0 -19
- package/dist/notes.js +0 -57
- package/dist/notes.js.map +0 -1
- package/dist/openai-PCTYLOWI.js +0 -8
- package/dist/types-Da_zLLOl.d.ts +0 -474
- /package/dist/{chunk-7QVYU63E.js.map → chunk-WCQVDF3K.js.map} +0 -0
- /package/dist/{code.js.map → haiku-pruner-5KVT5AI2.js.map} +0 -0
- /package/dist/{docs.js.map → http-server-2ZQ6I43B.js.map} +0 -0
- /package/dist/{git.js.map → local-embedding-NZQTILGV.js.map} +0 -0
- /package/dist/{openai-PCTYLOWI.js.map → openai-embedding-ZP5TSUJG.js.map} +0 -0
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — HNSW Loader
|
|
3
|
+
*
|
|
4
|
+
* Utilities for persisting and loading HNSW indexes to/from disk.
|
|
5
|
+
* Used by BrainBank._runInitialize() and PluginContext.loadVectors().
|
|
6
|
+
*
|
|
7
|
+
* Includes cross-process write locking and hot-reload support
|
|
8
|
+
* for multi-process coordination.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import type { DatabaseAdapter, CountRow } from '@/db/adapter.ts';
|
|
12
|
+
import type { HNSWIndex } from './hnsw-index.ts';
|
|
13
|
+
|
|
14
|
+
import { dirname, join } from 'node:path';
|
|
15
|
+
import { withLock } from '@/lib/write-lock.ts';
|
|
16
|
+
|
|
17
|
+
/** Derive the HNSW index file path from the DB path. */
|
|
18
|
+
export function hnswPath(dbPath: string, name: string): string {
|
|
19
|
+
return join(dirname(dbPath), `hnsw-${name}.index`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Derive the lock directory from the DB path. */
|
|
23
|
+
export function lockDir(dbPath: string): string {
|
|
24
|
+
return dirname(dbPath);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/** Count rows in a vector table (fast, no data transfer). */
|
|
28
|
+
export function countRows(db: DatabaseAdapter, table: string): number {
|
|
29
|
+
const row = db.prepare(`SELECT COUNT(*) as c FROM ${table}`).get() as CountRow;
|
|
30
|
+
return row?.c ?? 0;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Save all HNSW indexes to disk with cross-process file locking.
|
|
35
|
+
* Prevents concurrent writes from corrupting `.index` files.
|
|
36
|
+
*/
|
|
37
|
+
export async function saveAllHnsw(
|
|
38
|
+
dbPath: string,
|
|
39
|
+
kvHnsw: HNSWIndex,
|
|
40
|
+
sharedHnsw: Map<string, { hnsw: HNSWIndex; vecCache: Map<number, Float32Array> }>,
|
|
41
|
+
privateHnsw: Map<string, HNSWIndex>,
|
|
42
|
+
): Promise<boolean> {
|
|
43
|
+
try {
|
|
44
|
+
await withLock(lockDir(dbPath), 'hnsw', () => {
|
|
45
|
+
kvHnsw.save(hnswPath(dbPath, 'kv'));
|
|
46
|
+
for (const [name, { hnsw }] of sharedHnsw) {
|
|
47
|
+
hnsw.save(hnswPath(dbPath, name));
|
|
48
|
+
}
|
|
49
|
+
for (const [name, hnsw] of privateHnsw) {
|
|
50
|
+
hnsw.save(hnswPath(dbPath, name));
|
|
51
|
+
}
|
|
52
|
+
});
|
|
53
|
+
return true;
|
|
54
|
+
} catch {
|
|
55
|
+
// Non-fatal: next startup rebuilds from SQLite (slower).
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** Load vectors from SQLite into HNSW + cache. */
|
|
61
|
+
export function loadVectors(
|
|
62
|
+
db: DatabaseAdapter,
|
|
63
|
+
table: string,
|
|
64
|
+
idCol: string,
|
|
65
|
+
hnsw: HNSWIndex,
|
|
66
|
+
cache: Map<number, Float32Array>,
|
|
67
|
+
): void {
|
|
68
|
+
const iter = db.prepare(`SELECT ${idCol}, embedding FROM ${table}`).iterate() as IterableIterator<{ embedding: Uint8Array; [key: string]: unknown }>;
|
|
69
|
+
for (const row of iter) {
|
|
70
|
+
const vec = new Float32Array(
|
|
71
|
+
row.embedding.buffer.slice(
|
|
72
|
+
row.embedding.byteOffset,
|
|
73
|
+
row.embedding.byteOffset + row.embedding.byteLength,
|
|
74
|
+
),
|
|
75
|
+
);
|
|
76
|
+
hnsw.add(vec, row[idCol] as number);
|
|
77
|
+
cache.set(row[idCol] as number, vec);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/** Populate only the vecCache from SQLite (HNSW already loaded from file). */
|
|
82
|
+
export function loadVecCache(
|
|
83
|
+
db: DatabaseAdapter,
|
|
84
|
+
table: string,
|
|
85
|
+
idCol: string,
|
|
86
|
+
cache: Map<number, Float32Array>,
|
|
87
|
+
): void {
|
|
88
|
+
const iter = db.prepare(`SELECT ${idCol}, embedding FROM ${table}`).iterate() as IterableIterator<{ embedding: Uint8Array; [key: string]: unknown }>;
|
|
89
|
+
for (const row of iter) {
|
|
90
|
+
const vec = new Float32Array(
|
|
91
|
+
row.embedding.buffer.slice(
|
|
92
|
+
row.embedding.byteOffset,
|
|
93
|
+
row.embedding.byteOffset + row.embedding.byteLength,
|
|
94
|
+
),
|
|
95
|
+
);
|
|
96
|
+
cache.set(row[idCol] as number, vec);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Deps for reloading a single HNSW index from disk. */
|
|
101
|
+
interface ReloadDeps {
|
|
102
|
+
dbPath: string;
|
|
103
|
+
db: DatabaseAdapter;
|
|
104
|
+
name: string;
|
|
105
|
+
hnsw: HNSWIndex;
|
|
106
|
+
vecCache: Map<number, Float32Array>;
|
|
107
|
+
vectorTable: string;
|
|
108
|
+
idCol: string;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Reload a single HNSW index from disk after detecting a stale version.
|
|
113
|
+
* Reinitializes the in-memory HNSW, loads the saved index file, and
|
|
114
|
+
* refreshes the vector cache from SQLite.
|
|
115
|
+
*/
|
|
116
|
+
export function reloadHnsw(deps: ReloadDeps): void {
|
|
117
|
+
const { dbPath, db, name, hnsw, vecCache, vectorTable, idCol } = deps;
|
|
118
|
+
const indexPath = hnswPath(dbPath, name);
|
|
119
|
+
const rowCount = countRows(db, vectorTable);
|
|
120
|
+
|
|
121
|
+
hnsw.reinit();
|
|
122
|
+
vecCache.clear();
|
|
123
|
+
|
|
124
|
+
if (hnsw.tryLoad(indexPath, rowCount)) {
|
|
125
|
+
loadVecCache(db, vectorTable, idCol, vecCache);
|
|
126
|
+
} else {
|
|
127
|
+
loadVectors(db, vectorTable, idCol, hnsw, vecCache);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — BM25 Intersection Boost
|
|
3
|
+
*
|
|
4
|
+
* Pure functions for post-processing vector search results with BM25 keyword overlap.
|
|
5
|
+
* Extracted from ContextBuilder for single-responsibility and testability.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SearchResult } from '@/types.ts';
|
|
9
|
+
import type { SearchStrategy } from './types.ts';
|
|
10
|
+
|
|
11
|
+
/** BM25 boost factor applied to vector results that also match keywords. */
|
|
12
|
+
export const BM25_BOOST = 0.15;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Boost vector results that also appear in BM25 keyword results.
|
|
16
|
+
* Does NOT add new results — only re-scores and re-sorts existing vector hits.
|
|
17
|
+
* This promotes keyword-relevant files without introducing BM25-only noise.
|
|
18
|
+
*/
|
|
19
|
+
export async function boostWithBM25(
|
|
20
|
+
vectorResults: SearchResult[],
|
|
21
|
+
bm25: SearchStrategy,
|
|
22
|
+
query: string,
|
|
23
|
+
sources: Record<string, number>,
|
|
24
|
+
): Promise<SearchResult[]> {
|
|
25
|
+
if (vectorResults.length === 0) return vectorResults;
|
|
26
|
+
|
|
27
|
+
const bm25Results = await bm25.search(query, { sources });
|
|
28
|
+
if (bm25Results.length === 0) return vectorResults;
|
|
29
|
+
|
|
30
|
+
// Build a set of BM25 hit keys for fast lookup
|
|
31
|
+
const bm25Keys = new Set<string>();
|
|
32
|
+
for (const r of bm25Results) {
|
|
33
|
+
bm25Keys.add(resultKey(r));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Boost scores of vector results that also appear in BM25
|
|
37
|
+
const boosted = vectorResults.map(r => {
|
|
38
|
+
const k = resultKey(r);
|
|
39
|
+
if (bm25Keys.has(k)) {
|
|
40
|
+
return { ...r, score: r.score + BM25_BOOST };
|
|
41
|
+
}
|
|
42
|
+
return r;
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
// Re-sort by boosted score
|
|
46
|
+
boosted.sort((a, b) => b.score - a.score);
|
|
47
|
+
return boosted;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/** Filter results whose filePath starts with any of the given prefixes. */
|
|
51
|
+
export function filterByPath(results: SearchResult[], prefix: string | string[] | undefined): SearchResult[] {
|
|
52
|
+
if (!prefix) return results;
|
|
53
|
+
const prefixes = Array.isArray(prefix) ? prefix : [prefix];
|
|
54
|
+
if (prefixes.length === 0) return results;
|
|
55
|
+
// Normalize prefixes to ensure directory-boundary matching.
|
|
56
|
+
// 'sdk' → 'sdk/' so it won't match 'sdk-server/'.
|
|
57
|
+
// Prefixes already ending with '/' or containing '.' (file match) are kept as-is.
|
|
58
|
+
const normalized = prefixes.map(p => {
|
|
59
|
+
if (p.endsWith('/') || p.includes('.')) return p;
|
|
60
|
+
return p + '/';
|
|
61
|
+
});
|
|
62
|
+
return results.filter(r => normalized.some(p => r.filePath?.startsWith(p)));
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Exclude results whose filePath starts with any of the given prefixes. */
|
|
66
|
+
export function filterByIgnore(results: SearchResult[], ignorePaths: string[] | undefined): SearchResult[] {
|
|
67
|
+
if (!ignorePaths || ignorePaths.length === 0) return results;
|
|
68
|
+
return results.filter(r => !r.filePath || !ignorePaths.some(p => r.filePath!.startsWith(p)));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/** Generate a dedup key for a search result (file:startLine:endLine). */
|
|
72
|
+
export function resultKey(r: SearchResult): string {
|
|
73
|
+
const sl = 'startLine' in r.metadata ? r.metadata.startLine : '';
|
|
74
|
+
const el = 'endLine' in r.metadata ? r.metadata.endLine : '';
|
|
75
|
+
return `${r.filePath ?? ''}:${sl}:${el}`;
|
|
76
|
+
}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Context Builder
|
|
3
|
+
*
|
|
4
|
+
* Orchestrates the context-building pipeline:
|
|
5
|
+
* 1. Vector search (primary)
|
|
6
|
+
* 2. Path scoping (filter)
|
|
7
|
+
* 3. LLM noise pruning (optional)
|
|
8
|
+
* 4. Session dedup (filter)
|
|
9
|
+
* 5. Plugin formatters (output)
|
|
10
|
+
*
|
|
11
|
+
* All search post-processing lives in `bm25-boost.ts`.
|
|
12
|
+
* Plugin-agnostic — discovers formatters from ContextFormatterPlugin.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { ContextOptions, EmbeddingProvider, Pruner, SearchResult } from '@/types.ts';
|
|
16
|
+
import type { SearchStrategy } from './types.ts';
|
|
17
|
+
import type { PluginRegistry } from '@/services/plugin-registry.ts';
|
|
18
|
+
|
|
19
|
+
import { isContextFormatterPlugin, isContextFieldPlugin, isSearchable } from '@/plugin.ts';
|
|
20
|
+
import { filterByPath, filterByIgnore } from './bm25-boost.ts';
|
|
21
|
+
import { pruneResults } from '@/lib/prune.ts';
|
|
22
|
+
import { logQuery } from '@/lib/logger.ts';
|
|
23
|
+
import type { QueryLogResult } from '@/lib/logger.ts';
|
|
24
|
+
import { providerKey } from '@/lib/provider-key.ts';
|
|
25
|
+
|
|
26
|
+
const _debug = !!process.env.BRAINBANK_DEBUG;
|
|
27
|
+
function dbg(msg: string): void { if (_debug) console.error(msg); }
|
|
28
|
+
|
|
29
|
+
export class ContextBuilder {
|
|
30
|
+
constructor(
|
|
31
|
+
private _search: SearchStrategy | undefined,
|
|
32
|
+
private _registry: PluginRegistry,
|
|
33
|
+
private _pruner?: Pruner,
|
|
34
|
+
private _embedding?: EmbeddingProvider,
|
|
35
|
+
private _configFields: Record<string, unknown> = {},
|
|
36
|
+
) {}
|
|
37
|
+
|
|
38
|
+
/** Set config-level context field defaults (from config.json "context" section). */
|
|
39
|
+
set configFields(fields: Record<string, unknown>) {
|
|
40
|
+
this._configFields = fields;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** Build a full context block for a task. Returns markdown for system prompt. */
|
|
44
|
+
async build(task: string, options: ContextOptions = {}): Promise<string> {
|
|
45
|
+
const t0 = Date.now();
|
|
46
|
+
const src = options.sources ?? {};
|
|
47
|
+
const { minScore = 0.25, useMMR = true, mmrLambda = 0.7 } = options;
|
|
48
|
+
|
|
49
|
+
// 1. Primary: vector search (includes per-repo BM25 fusion internally)
|
|
50
|
+
let results: SearchResult[] = this._search
|
|
51
|
+
? await this._search.search(task, {
|
|
52
|
+
sources: src,
|
|
53
|
+
minScore, useMMR, mmrLambda,
|
|
54
|
+
})
|
|
55
|
+
: [];
|
|
56
|
+
|
|
57
|
+
// 2. Path scoping + ignore filtering
|
|
58
|
+
results = filterByPath(results, options.pathPrefix);
|
|
59
|
+
results = filterByIgnore(results, options.ignorePaths);
|
|
60
|
+
|
|
61
|
+
// 3. LLM noise pruning (optional — per-request override or construction-time)
|
|
62
|
+
const pruner = options.pruner ?? this._pruner;
|
|
63
|
+
const beforePrune = results;
|
|
64
|
+
|
|
65
|
+
// 3a. Pre-score filtering: when many results, drop the weakest before LLM pruning
|
|
66
|
+
// This keeps the pruner's input manageable and improves accuracy.
|
|
67
|
+
if (pruner && results.length > 40) {
|
|
68
|
+
const topScore = Math.max(...results.map(r => r.score));
|
|
69
|
+
const threshold = topScore * 0.35; // Keep results scoring >= 35% of top
|
|
70
|
+
const preFiltered = results.filter(r => r.score >= threshold);
|
|
71
|
+
if (preFiltered.length < results.length && preFiltered.length >= 3) {
|
|
72
|
+
dbg(`[pre-filter] Dropped ${results.length - preFiltered.length} low-score results (threshold: ${threshold.toFixed(3)}, top: ${topScore.toFixed(3)})`);
|
|
73
|
+
results = preFiltered;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (pruner && results.length > 1) {
|
|
78
|
+
dbg(`[pruner] Running ${_prunerName(pruner)} on ${results.length} results...`);
|
|
79
|
+
const pruneT0 = Date.now();
|
|
80
|
+
// Merge context + prunerContext into a single pruner description
|
|
81
|
+
const prunerDesc = [options.context, options.prunerContext].filter(Boolean).join('\n\n') || undefined;
|
|
82
|
+
results = await pruneResults(task, results, pruner, prunerDesc);
|
|
83
|
+
const pruneMs = Date.now() - pruneT0;
|
|
84
|
+
const dropped = beforePrune.filter(r => !results.includes(r));
|
|
85
|
+
dbg(`[pruner] ${beforePrune.length} → ${results.length} in ${pruneMs}ms (${dropped.length} dropped)`);
|
|
86
|
+
if (results.length > 0) {
|
|
87
|
+
dbg(`[pruner] Kept: ${results.map(r => r.filePath ?? '?').join(', ')}`);
|
|
88
|
+
}
|
|
89
|
+
if (dropped.length > 0) {
|
|
90
|
+
dbg(`[pruner] Dropped: ${dropped.map(r => r.filePath ?? '?').join(', ')}`);
|
|
91
|
+
}
|
|
92
|
+
} else if (!pruner) {
|
|
93
|
+
dbg(`[pruner] No pruner configured — skipping`);
|
|
94
|
+
} else {
|
|
95
|
+
dbg(`[pruner] Only ${results.length} result(s) — skipping pruner (need >1)`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 4. Exclude already-returned files (session dedup)
|
|
99
|
+
if (options.excludeFiles && options.excludeFiles.size > 0) {
|
|
100
|
+
results = results.filter(r => !r.filePath || !options.excludeFiles!.has(r.filePath));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// 5. Format output
|
|
104
|
+
const resolvedFields = this._resolveFields(options);
|
|
105
|
+
const parts: string[] = [`# Context for: "${task}"\n`];
|
|
106
|
+
this._appendFormatterResults(results, parts, options, resolvedFields);
|
|
107
|
+
await this._appendSearchableResults(task, src, minScore, parts);
|
|
108
|
+
|
|
109
|
+
// ── Log ──
|
|
110
|
+
const prunedResults = pruner
|
|
111
|
+
? beforePrune.filter(r => !results.includes(r))
|
|
112
|
+
: [];
|
|
113
|
+
logQuery({
|
|
114
|
+
source: options.source ?? 'api',
|
|
115
|
+
method: 'getContext',
|
|
116
|
+
query: task,
|
|
117
|
+
embedding: this._embedding ? providerKey(this._embedding) : 'unknown',
|
|
118
|
+
pruner: pruner ? _prunerName(pruner) : null,
|
|
119
|
+
expander: null,
|
|
120
|
+
options: {
|
|
121
|
+
sources: src,
|
|
122
|
+
pathPrefix: options.pathPrefix,
|
|
123
|
+
ignorePaths: options.ignorePaths,
|
|
124
|
+
minScore,
|
|
125
|
+
affectedFiles: options.affectedFiles,
|
|
126
|
+
},
|
|
127
|
+
results: results.map(_toLogResult),
|
|
128
|
+
pruned: prunedResults.length > 0 ? prunedResults.map(_toLogResult) : undefined,
|
|
129
|
+
durationMs: Date.now() - t0,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
return parts.join('\n');
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/** Invoke ContextFormatterPlugins. */
|
|
136
|
+
private _appendFormatterResults(
|
|
137
|
+
results: SearchResult[],
|
|
138
|
+
parts: string[],
|
|
139
|
+
options: ContextOptions,
|
|
140
|
+
resolvedFields?: Record<string, unknown>,
|
|
141
|
+
): void {
|
|
142
|
+
const fields = resolvedFields ?? this._resolveFields(options);
|
|
143
|
+
const seenFormatters = new Set<string>();
|
|
144
|
+
|
|
145
|
+
for (const mod of this._registry.all) {
|
|
146
|
+
if (!isContextFormatterPlugin(mod)) continue;
|
|
147
|
+
|
|
148
|
+
if (seenFormatters.has(mod.name)) continue;
|
|
149
|
+
seenFormatters.add(mod.name);
|
|
150
|
+
mod.formatContext(results, parts, fields);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Resolve context fields: plugin defaults ← config.json ← per-query.
|
|
156
|
+
* Returns a flat Record with the final value for each field.
|
|
157
|
+
*/
|
|
158
|
+
private _resolveFields(options: ContextOptions): Record<string, unknown> {
|
|
159
|
+
// 1. Collect plugin defaults
|
|
160
|
+
const defaults: Record<string, unknown> = {};
|
|
161
|
+
for (const mod of this._registry.all) {
|
|
162
|
+
if (isContextFieldPlugin(mod)) {
|
|
163
|
+
for (const field of mod.contextFields()) {
|
|
164
|
+
defaults[field.name] = field.default;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// 2. Merge: defaults ← config ← per-query
|
|
170
|
+
return { ...defaults, ...this._configFields, ...(options.fields ?? {}) };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/** Collect results from SearchablePlugins that don't have their own formatter. */
|
|
174
|
+
private async _appendSearchableResults(
|
|
175
|
+
task: string,
|
|
176
|
+
sources: Record<string, number>,
|
|
177
|
+
minScore: number,
|
|
178
|
+
parts: string[],
|
|
179
|
+
): Promise<void> {
|
|
180
|
+
for (const mod of this._registry.all) {
|
|
181
|
+
if (isContextFormatterPlugin(mod)) continue;
|
|
182
|
+
if (!isSearchable(mod)) continue;
|
|
183
|
+
const hits = await mod.search(task, { k: sources[mod.name] ?? 6, minScore });
|
|
184
|
+
if (hits.length > 0) {
|
|
185
|
+
parts.push(`## ${mod.name}\n`);
|
|
186
|
+
for (const r of hits) {
|
|
187
|
+
parts.push(`- [${Math.round(r.score * 100)}%] ${r.content.slice(0, 200)}`);
|
|
188
|
+
}
|
|
189
|
+
parts.push('');
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ── Helpers ──────────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
function _toLogResult(r: SearchResult): QueryLogResult {
|
|
198
|
+
const meta = r.metadata as Record<string, unknown> | undefined;
|
|
199
|
+
return {
|
|
200
|
+
filePath: r.filePath ?? 'unknown',
|
|
201
|
+
score: r.score,
|
|
202
|
+
type: r.type,
|
|
203
|
+
name: (meta?.name as string | undefined) ?? undefined,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function _prunerName(pruner: Pruner): string {
|
|
208
|
+
return pruner.constructor?.name ?? 'custom';
|
|
209
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Composite BM25 Search Strategy
|
|
3
|
+
*
|
|
4
|
+
* Generic BM25 coordinator that discovers BM25SearchPlugin instances
|
|
5
|
+
* from the registry and delegates per-source keyword search.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SearchResult } from '@/types.ts';
|
|
9
|
+
import type { SearchStrategy, SearchOptions } from '@/search/types.ts';
|
|
10
|
+
import type { PluginRegistry } from '@/services/plugin-registry.ts';
|
|
11
|
+
|
|
12
|
+
import { isBM25SearchPlugin } from '@/plugin.ts';
|
|
13
|
+
|
|
14
|
+
const DEFAULT_K = 8;
|
|
15
|
+
|
|
16
|
+
export class CompositeBM25Search implements SearchStrategy {
|
|
17
|
+
constructor(private _registry: PluginRegistry) {}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Run BM25 keyword search across all plugins that implement BM25SearchPlugin.
|
|
21
|
+
* Each plugin searches its own FTS5 tables.
|
|
22
|
+
*/
|
|
23
|
+
async search(query: string, options: SearchOptions = {}): Promise<SearchResult[]> {
|
|
24
|
+
const src = options.sources ?? {};
|
|
25
|
+
const results: SearchResult[] = [];
|
|
26
|
+
|
|
27
|
+
for (const plugin of this._registry.all) {
|
|
28
|
+
if (!isBM25SearchPlugin(plugin)) continue;
|
|
29
|
+
|
|
30
|
+
const k = src[plugin.name] ?? DEFAULT_K;
|
|
31
|
+
if (k <= 0) continue;
|
|
32
|
+
|
|
33
|
+
const hits = plugin.searchBM25(query, k);
|
|
34
|
+
results.push(...hits);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return results.sort((a, b) => b.score - a.score);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Rebuild FTS5 indices across all BM25 plugins. */
|
|
41
|
+
rebuild(): void {
|
|
42
|
+
for (const plugin of this._registry.all) {
|
|
43
|
+
if (!isBM25SearchPlugin(plugin)) continue;
|
|
44
|
+
plugin.rebuildFTS?.();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Query Decomposer
|
|
3
|
+
*
|
|
4
|
+
* Uses Haiku to break complex/ambiguous natural language queries into
|
|
5
|
+
* 2-3 focused sub-queries that each target a specific aspect of the
|
|
6
|
+
* original intent. This improves embedding coverage for multi-concept
|
|
7
|
+
* queries where a single embedding vector can't capture all facets.
|
|
8
|
+
*
|
|
9
|
+
* Example:
|
|
10
|
+
* Input: "user availability shiftStatus signIn signOut available unavailable"
|
|
11
|
+
* Output: [
|
|
12
|
+
* "user signIn signOut authentication session methods",
|
|
13
|
+
* "shiftStatus isAvailable availability entity fields",
|
|
14
|
+
* "responder availability state management workflow"
|
|
15
|
+
* ]
|
|
16
|
+
*
|
|
17
|
+
* Latency: ~300-500ms (single Haiku call).
|
|
18
|
+
* Cost: ~0.0001$ per decomposition.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
|
|
22
|
+
const _debug = !!process.env.BRAINBANK_DEBUG;
|
|
23
|
+
function dbg(msg: string): void { if (_debug) process.stderr.write(`[query-decomposer] ${msg}\n`); }
|
|
24
|
+
|
|
25
|
+
/** Minimum word count to trigger decomposition. Short queries pass through. */
|
|
26
|
+
const MIN_WORDS_FOR_DECOMPOSITION = 6;
|
|
27
|
+
|
|
28
|
+
export interface QueryDecomposerOptions {
|
|
29
|
+
/** Anthropic API key. Falls back to ANTHROPIC_API_KEY env var. */
|
|
30
|
+
apiKey?: string;
|
|
31
|
+
/** Model to use. Default: claude-haiku-4-5-20251001 */
|
|
32
|
+
model?: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export class QueryDecomposer {
|
|
36
|
+
private readonly _apiKey: string;
|
|
37
|
+
private readonly _model: string;
|
|
38
|
+
|
|
39
|
+
constructor(options: QueryDecomposerOptions = {}) {
|
|
40
|
+
this._apiKey = options.apiKey ?? process.env.ANTHROPIC_API_KEY ?? '';
|
|
41
|
+
this._model = options.model ?? DEFAULT_MODEL;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Decompose a complex query into 2-3 focused sub-queries.
|
|
46
|
+
* Returns the original query + generated sub-queries.
|
|
47
|
+
* For simple queries (< 6 words), returns just the original.
|
|
48
|
+
*/
|
|
49
|
+
async decompose(query: string): Promise<string[]> {
|
|
50
|
+
// Always include original query
|
|
51
|
+
const words = query.trim().split(/\s+/);
|
|
52
|
+
if (words.length < MIN_WORDS_FOR_DECOMPOSITION || !this._apiKey) {
|
|
53
|
+
dbg(`Skip decomposition: ${words.length} words (min: ${MIN_WORDS_FOR_DECOMPOSITION})`);
|
|
54
|
+
return [query];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const prompt =
|
|
59
|
+
`You are a code search query optimizer. Given a complex search query, decompose it into 2-3 focused sub-queries that each target a DIFFERENT aspect of the original intent.\n\n` +
|
|
60
|
+
`Original query: "${query}"\n\n` +
|
|
61
|
+
`Rules:\n` +
|
|
62
|
+
`- Each sub-query should be 4-8 words, mixing natural language with code identifiers\n` +
|
|
63
|
+
`- Sub-queries should be COMPLEMENTARY, not overlapping\n` +
|
|
64
|
+
`- Preserve code identifiers (camelCase, PascalCase) exactly as written\n` +
|
|
65
|
+
`- Focus on: (1) the core action/method, (2) the data/entity, (3) the workflow/context\n` +
|
|
66
|
+
`- Return ONLY a JSON array of strings. No explanation.\n\n` +
|
|
67
|
+
`Example:\n` +
|
|
68
|
+
`Query: "offer lifecycle arrived started admin behalf responder availability"\n` +
|
|
69
|
+
`["offer state machine arrived started transition", "admin acting behalf responder proxy", "responder availability isAvailable update"]\n\n` +
|
|
70
|
+
`Respond with ONLY the JSON array:`;
|
|
71
|
+
|
|
72
|
+
const response = await fetch('https://api.anthropic.com/v1/messages', {
|
|
73
|
+
method: 'POST',
|
|
74
|
+
headers: {
|
|
75
|
+
'Content-Type': 'application/json',
|
|
76
|
+
'x-api-key': this._apiKey,
|
|
77
|
+
'anthropic-version': '2023-06-01',
|
|
78
|
+
},
|
|
79
|
+
body: JSON.stringify({
|
|
80
|
+
model: this._model,
|
|
81
|
+
max_tokens: 256,
|
|
82
|
+
messages: [{ role: 'user', content: prompt }],
|
|
83
|
+
}),
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
if (!response.ok) {
|
|
87
|
+
dbg(`API error: ${response.status} — using original query only`);
|
|
88
|
+
return [query];
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const data = await response.json() as {
|
|
92
|
+
content: { type: string; text: string }[];
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
const text = data.content?.[0]?.text ?? '';
|
|
96
|
+
dbg(`Raw response: ${text}`);
|
|
97
|
+
|
|
98
|
+
// Extract JSON array from response (may be wrapped in ```json ... ```)
|
|
99
|
+
const match = text.match(/\[[\s\S]*?\]/);
|
|
100
|
+
if (!match) {
|
|
101
|
+
dbg(`No JSON array found — using original query only`);
|
|
102
|
+
return [query];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const subQueries = JSON.parse(match[0]) as string[];
|
|
106
|
+
if (!Array.isArray(subQueries) || subQueries.length === 0) {
|
|
107
|
+
return [query];
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Cap at 3 sub-queries + always include original
|
|
111
|
+
const result = [query, ...subQueries.slice(0, 3)];
|
|
112
|
+
dbg(`Decomposed into ${result.length} queries: ${JSON.stringify(result)}`);
|
|
113
|
+
return result;
|
|
114
|
+
} catch (err) {
|
|
115
|
+
dbg(`Error: ${err instanceof Error ? err.message : String(err)} — using original query only`);
|
|
116
|
+
return [query];
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/** Check if the decomposer is available (has API key). */
|
|
121
|
+
get available(): boolean {
|
|
122
|
+
return !!this._apiKey;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrainBank — Search Types
|
|
3
|
+
*
|
|
4
|
+
* Shared interface for all search strategies.
|
|
5
|
+
* Implement SearchStrategy to add a new search backend.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { SearchResult } from '@/types.ts';
|
|
9
|
+
|
|
10
|
+
/** Any search implementation follows this shape. */
|
|
11
|
+
export interface SearchStrategy {
|
|
12
|
+
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
13
|
+
/** Rebuild internal indices (e.g. FTS5). Optional. */
|
|
14
|
+
rebuild?(): void;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/** Pre-embedded vector search for a single domain (code, git, etc.). */
|
|
18
|
+
export interface DomainVectorSearch {
|
|
19
|
+
/** Search using a pre-computed query vector. Optional queryText enables BM25 fusion. */
|
|
20
|
+
search(queryVec: Float32Array, k: number, minScore: number, useMMR?: boolean, mmrLambda?: number, queryText?: string): SearchResult[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface SearchOptions {
|
|
24
|
+
/** Per-source result limits. Built-in: 'code', 'git', 'memory'. Any other key = custom plugin or KV collection. */
|
|
25
|
+
sources?: Record<string, number>;
|
|
26
|
+
/** Minimum similarity score. Default: 0.25 */
|
|
27
|
+
minScore?: number;
|
|
28
|
+
/** Use MMR for diversity. Default: true */
|
|
29
|
+
useMMR?: boolean;
|
|
30
|
+
/** MMR lambda. Default: 0.7 */
|
|
31
|
+
mmrLambda?: number;
|
|
32
|
+
/** Caller origin for debug logging. */
|
|
33
|
+
source?: 'cli' | 'mcp' | 'daemon' | 'api';
|
|
34
|
+
/** Filter results to files under these path prefixes. */
|
|
35
|
+
pathPrefix?: string | string[];
|
|
36
|
+
}
|
|
37
|
+
|