claude-local-docs 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +2 -1
- package/README.md +124 -58
- package/commands/fetch-docs.md +54 -28
- package/commands/index-codebase.md +53 -0
- package/dist/code-indexer.d.ts +14 -0
- package/dist/code-indexer.js +519 -0
- package/dist/code-indexer.js.map +1 -0
- package/dist/code-search.d.ts +14 -0
- package/dist/code-search.js +155 -0
- package/dist/code-search.js.map +1 -0
- package/dist/code-store.d.ts +39 -0
- package/dist/code-store.js +206 -0
- package/dist/code-store.js.map +1 -0
- package/dist/code.test.d.ts +7 -0
- package/dist/code.test.js +197 -0
- package/dist/code.test.js.map +1 -0
- package/dist/discovery.js +56 -4
- package/dist/discovery.js.map +1 -1
- package/dist/docs.test.d.ts +7 -0
- package/dist/docs.test.js +105 -0
- package/dist/docs.test.js.map +1 -0
- package/dist/file-walker.d.ts +34 -0
- package/dist/file-walker.js +199 -0
- package/dist/file-walker.js.map +1 -0
- package/dist/index.js +321 -22
- package/dist/index.js.map +1 -1
- package/dist/indexer.js +4 -23
- package/dist/indexer.js.map +1 -1
- package/dist/integration.test.d.ts +3 -2
- package/dist/integration.test.js +461 -11
- package/dist/integration.test.js.map +1 -1
- package/dist/reranker.d.ts +2 -2
- package/dist/reranker.js +10 -12
- package/dist/reranker.js.map +1 -1
- package/dist/rrf.d.ts +17 -0
- package/dist/rrf.js +25 -0
- package/dist/rrf.js.map +1 -0
- package/dist/search.d.ts +2 -0
- package/dist/search.js +30 -52
- package/dist/search.js.map +1 -1
- package/dist/sfc-extractor.d.ts +14 -0
- package/dist/sfc-extractor.js +70 -0
- package/dist/sfc-extractor.js.map +1 -0
- package/dist/store.d.ts +2 -0
- package/dist/store.js +39 -24
- package/dist/store.js.map +1 -1
- package/dist/tei-client.d.ts +70 -0
- package/dist/tei-client.js +153 -0
- package/dist/tei-client.js.map +1 -0
- package/dist/types.d.ts +49 -0
- package/dist/types.js +4 -1
- package/dist/types.js.map +1 -1
- package/dist/unit.test.d.ts +8 -0
- package/dist/unit.test.js +1241 -0
- package/dist/unit.test.js.map +1 -0
- package/docker-compose.nvidia.yml +7 -0
- package/docker-compose.yml +9 -0
- package/package.json +8 -2
- package/scripts/ensure-tei.sh +93 -19
- package/start-tei.sh +17 -3
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code search pipeline — mirrors the doc search pipeline architecture.
|
|
3
|
+
* Vector search + BM25 → RRF fusion → cross-encoder rerank.
|
|
4
|
+
*
|
|
5
|
+
* TEI containers must be running — no fallback mode.
|
|
6
|
+
*/
|
|
7
|
+
import { sqlEscapeString } from "./types.js";
|
|
8
|
+
import { embedCodeTexts } from "./code-indexer.js";
|
|
9
|
+
import { rerank } from "./reranker.js";
|
|
10
|
+
import { reciprocalRankFusion } from "./rrf.js";
|
|
11
|
+
function buildFilter(options) {
|
|
12
|
+
if (!options)
|
|
13
|
+
return undefined;
|
|
14
|
+
const clauses = [];
|
|
15
|
+
if (options.filePath) {
|
|
16
|
+
clauses.push(`"filePath" = '${sqlEscapeString(options.filePath)}'`);
|
|
17
|
+
}
|
|
18
|
+
if (options.language) {
|
|
19
|
+
clauses.push(`language = '${sqlEscapeString(options.language)}'`);
|
|
20
|
+
}
|
|
21
|
+
if (options.entityType) {
|
|
22
|
+
clauses.push(`"entityType" = '${sqlEscapeString(options.entityType)}'`);
|
|
23
|
+
}
|
|
24
|
+
return clauses.length > 0 ? clauses.join(" AND ") : undefined;
|
|
25
|
+
}
|
|
26
|
+
/** Extract file-name-like tokens from a query for file-path boosting. */
|
|
27
|
+
function extractFilePathTokens(query) {
|
|
28
|
+
const matches = query.match(/[\w.-]+\.(ts|tsx|js|jsx|vue|svelte|astro)/gi);
|
|
29
|
+
return matches ?? [];
|
|
30
|
+
}
|
|
31
|
+
function toRankedDoc(row) {
|
|
32
|
+
return {
|
|
33
|
+
id: row.id,
|
|
34
|
+
text: row.text,
|
|
35
|
+
filePath: row.filePath,
|
|
36
|
+
language: row.language,
|
|
37
|
+
entityType: row.entityType,
|
|
38
|
+
entityName: row.entityName,
|
|
39
|
+
signature: row.signature,
|
|
40
|
+
scopeChain: row.scopeChain,
|
|
41
|
+
lineStart: row.lineStart,
|
|
42
|
+
lineEnd: row.lineEnd,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export async function searchCode(query, store, options) {
|
|
46
|
+
const topK = options?.topK ?? 10;
|
|
47
|
+
const candidateCount = Math.max(50, topK * 3);
|
|
48
|
+
const filter = buildFilter(options);
|
|
49
|
+
// Step 1: Vector search via Qodo-Embed
|
|
50
|
+
const [queryVector] = await embedCodeTexts([query], "query");
|
|
51
|
+
const vectorHits = await store.vectorSearch(queryVector, candidateCount, filter);
|
|
52
|
+
const vectorRanked = vectorHits.map(toRankedDoc);
|
|
53
|
+
// Step 2: BM25 search via LanceDB native FTS
|
|
54
|
+
const ftsHits = await store.ftsSearch(query, candidateCount, filter);
|
|
55
|
+
const bm25Ranked = ftsHits.map(toRankedDoc);
|
|
56
|
+
// Step 3: RRF fusion with optional file-path boost as third signal
|
|
57
|
+
const rrfInputs = [];
|
|
58
|
+
if (vectorRanked.length > 0) {
|
|
59
|
+
rrfInputs.push({ docs: vectorRanked, weight: 0.7 });
|
|
60
|
+
}
|
|
61
|
+
if (bm25Ranked.length > 0) {
|
|
62
|
+
rrfInputs.push({ docs: bm25Ranked, weight: 1.0 });
|
|
63
|
+
}
|
|
64
|
+
// File-path boost: if query contains file references, rank matching results
|
|
65
|
+
const fileTokens = extractFilePathTokens(query);
|
|
66
|
+
if (fileTokens.length > 0) {
|
|
67
|
+
// Collect all unique candidates from vector + BM25
|
|
68
|
+
const allCandidates = new Map();
|
|
69
|
+
for (const doc of [...vectorRanked, ...bm25Ranked]) {
|
|
70
|
+
if (!allCandidates.has(doc.id))
|
|
71
|
+
allCandidates.set(doc.id, doc);
|
|
72
|
+
}
|
|
73
|
+
const filePathRanked = [];
|
|
74
|
+
for (const doc of allCandidates.values()) {
|
|
75
|
+
const fp = doc.filePath ?? "";
|
|
76
|
+
if (fileTokens.some(token => fp.endsWith(token) || fp.includes(token))) {
|
|
77
|
+
filePathRanked.push(doc);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (filePathRanked.length > 0) {
|
|
81
|
+
rrfInputs.push({ docs: filePathRanked, weight: 0.5 });
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (rrfInputs.length === 0)
|
|
85
|
+
return [];
|
|
86
|
+
const fused = reciprocalRankFusion(rrfInputs);
|
|
87
|
+
// Step 4: Cross-encoder rerank top 50 candidates
|
|
88
|
+
const rerankCandidates = fused
|
|
89
|
+
.slice(0, 50)
|
|
90
|
+
.map((f) => ({ ...f }));
|
|
91
|
+
const rerankResults = await rerank(query, rerankCandidates);
|
|
92
|
+
const reranked = rerankResults.slice(0, topK);
|
|
93
|
+
// Step 5: Map back to CodeSearchResult
|
|
94
|
+
// Build lookup from fused results for extra fields
|
|
95
|
+
const fusedMap = new Map(fused.map(f => [f.id, f]));
|
|
96
|
+
const results = reranked.map((r) => {
|
|
97
|
+
const orig = fusedMap.get(r.id);
|
|
98
|
+
return {
|
|
99
|
+
score: Math.round(r.rerankerScore * 1000) / 1000,
|
|
100
|
+
filePath: (orig?.filePath ?? ""),
|
|
101
|
+
language: (orig?.language ?? "unknown"),
|
|
102
|
+
entityType: (orig?.entityType ?? "other"),
|
|
103
|
+
entityName: (orig?.entityName ?? ""),
|
|
104
|
+
signature: (orig?.signature ?? ""),
|
|
105
|
+
scopeChain: (() => {
|
|
106
|
+
try {
|
|
107
|
+
return JSON.parse((orig?.scopeChain ?? "[]"));
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
})(),
|
|
113
|
+
lineStart: (orig?.lineStart ?? 0),
|
|
114
|
+
lineEnd: (orig?.lineEnd ?? 0),
|
|
115
|
+
content: orig?.text ?? "",
|
|
116
|
+
chunkId: r.id,
|
|
117
|
+
};
|
|
118
|
+
});
|
|
119
|
+
// Step 5: Neighbor expansion — merge adjacent chunks from same file
|
|
120
|
+
return expandCodeWithNeighbors(results, store);
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Expand code search results with adjacent chunks (id-1 and id+1) from the same file.
|
|
124
|
+
* Recovers context split across chunk boundaries (similar to doc search neighbor expansion).
|
|
125
|
+
*/
|
|
126
|
+
async function expandCodeWithNeighbors(results, store) {
|
|
127
|
+
const resultIds = new Set(results.map(r => r.chunkId));
|
|
128
|
+
const expanded = [];
|
|
129
|
+
for (const result of results) {
|
|
130
|
+
const parts = [];
|
|
131
|
+
// Try previous chunk
|
|
132
|
+
const prevId = result.chunkId - 1;
|
|
133
|
+
if (!resultIds.has(prevId)) {
|
|
134
|
+
const prev = await store.getChunkById(prevId);
|
|
135
|
+
if (prev && prev.filePath === result.filePath) {
|
|
136
|
+
parts.push(prev.text);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
parts.push(result.content);
|
|
140
|
+
// Try next chunk
|
|
141
|
+
const nextId = result.chunkId + 1;
|
|
142
|
+
if (!resultIds.has(nextId)) {
|
|
143
|
+
const next = await store.getChunkById(nextId);
|
|
144
|
+
if (next && next.filePath === result.filePath) {
|
|
145
|
+
parts.push(next.text);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
expanded.push({
|
|
149
|
+
...result,
|
|
150
|
+
content: parts.length > 1 ? parts.join("\n\n") : result.content,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
return expanded;
|
|
154
|
+
}
|
|
155
|
+
//# sourceMappingURL=code-search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-search.js","sourceRoot":"","sources":["../src/code-search.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAG7C,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,MAAM,EAAwB,MAAM,eAAe,CAAC;AAC7D,OAAO,EAAE,oBAAoB,EAAkB,MAAM,UAAU,CAAC;AAEhE,SAAS,WAAW,CAAC,OAIpB;IACC,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAC/B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,iBAAiB,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IACtE,CAAC;IACD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;QACrB,OAAO,CAAC,IAAI,CAAC,eAAe,eAAe,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IACpE,CAAC;IACD,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;QACvB,OAAO,CAAC,IAAI,CAAC,mBAAmB,eAAe,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;IAC1E,CAAC;IACD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AAChE,CAAC;AAED,yEAAyE;AACzE,SAAS,qBAAqB,CAAC,KAAa;IAC1C,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAC3E,OAAO,OAAO,IAAI,EAAE,CAAC;AACvB,CAAC;AAED,SAAS,WAAW,CAAC,GAAY;IAC/B,OAAO;QACL,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,QAAQ,EAAE,GAAG,CAAC,QAAQ;QACtB,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,UAAU,EAAE,GAAG,CAAC,UAAU;QAC1B,SAAS,EAAE,GAAG,CAAC,SAAS;QACxB,OAAO,EAAE,GAAG,CAAC,OAAO;KACrB,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAa,EACb,KAAgB,EAChB,OAKC;IAED,MAAM,IAAI,GAAG,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC;IACjC,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,GAAG,CAAC,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAEpC,uCAAuC;IACvC,MAAM,CAAC,WAAW,CAAC,GAAG,MAAM,cAAc,CAAC,CAAC,KAAK,CAAC,EAAE,OAAO,CAAC,CAAC;IAC7D,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;IACjF,MAAM,YAAY,GAAgB,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAE9D,6CAA6C;IAC7C,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;IACrE,MAAM,UAAU,GAAgB,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;IAEzD,mEAAmE;IACnE,MAAM,SAAS,GAA4C,EAAE,CAAC;IAC9D,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IACtD,CAAC;IACD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;IACpD,CAAC;IAED,4EAA4E;IAC5E,MAAM,UAAU,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAChD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,mDAAmD;QACnD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAqB,CAAC;QACnD,KAAK,MAAM,GAAG,IAAI,CAAC,GAAG,YAAY,EAAE,GAAG,UAAU,CAAC,EAAE,CAAC;YACnD,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QACjE,CAAC;QACD,MAAM,cAAc,GAAgB,EAAE,CAAC;QACvC,KAAK,MAAM,GAAG,IAAI,aAAa,CAAC,MAAM,EAAE,EAAE,CAAC;YACzC,MAAM,EAAE,GAAI,GAAG,CAAC,QAAmB,IAAI,EAAE,CAAC;YAC1C,IAAI,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACvE,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QACD,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,SAAS,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,KAAK,GAAG,oBAAoB,CAAC,SAAS,CAAC,CAAC;IAE9C,iDAAiD;IACjD,MAAM,gBAAgB,GAAsB,KAAK;SAC9C,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IAE1B,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;IAC5D,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAE9C,uCAAuC;IACvC,mDAAmD;IACnD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAEpD,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACjC,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAChC,OAAO;YACL,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,GAAG,IAAI,CAAC,GAAG,IAAI;YAChD,QAAQ,EAAE,CAAC,IAAI,EAAE,QAAQ,IAAI,EAAE,CAAW;YAC1C,QAAQ,EAAE,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS,CAAW;YACjD,UAAU,EAAE,CAAC,IAAI,EAAE,UAAU,IAAI,OAAO,CAAmB;YAC3D,UAAU,EAAE,CAAC,IAAI,EAAE,UAAU,IAAI,EAAE,CAAW;YAC9C,SAAS,EAAE,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAW;YAC5C,UAAU,EAAE,CAAC,GAAG,EAAE;gBAChB,IAAI,CAAC;oBAAC,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,UAAU,IAAI,IAAI,CAAW,CAAa,CAAC;gBAAC,CAAC;gBAC5E,MAAM,CAAC;oBAAC,OAAO,EAAE,CAAC;gBAAC,CAAC;YACtB,CAAC,CAAC,EAAE;YACJ,SAAS,EAAE,CAAC,IAAI,EAAE,SAAS,IAAI,CAAC,CAAW;YAC3C,OAAO,EAAE,CAAC,IAAI,EAAE,OAAO,IAAI,CAAC,CAAW;YACvC,OAAO,EAAE,IAAI,EAAE,IAAI,IAAI,EAAE;YACzB,OAAO,EAAE,CAAC,CAAC,EAAE;SACd,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,oEAAoE;IACpE,OAAO,uBAAuB,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;AACjD,CAAC;AAED;;;GAGG;AACH,KAAK,UAAU,uBAAuB,CACpC,OAA2B,EAC3B,KAAgB;IAEhB,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IACvD,MAAM,QAAQ,GAAuB,EAAE,CAAC;IAExC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,qBAAqB;QACrB,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE3B,iBAAiB;QACjB,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,GAAG,CAAC,CAAC;QAClC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;YAC9C,IAAI,IAAI,IAAI,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,EAAE,CAAC;gBAC9C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,GAAG,MAAM;YACT,OAAO,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO;SAChE,CAAC,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB "code" table management for codebase indexing.
|
|
3
|
+
* Follows the same patterns as src/store.ts (DocStore) but operates
|
|
4
|
+
* on per-file code chunks instead of per-library doc chunks.
|
|
5
|
+
*/
|
|
6
|
+
import type { CodeRow, CodeMetadata } from "./types.js";
|
|
7
|
+
export declare class CodeStore {
|
|
8
|
+
private docsDir;
|
|
9
|
+
private dbPath;
|
|
10
|
+
private metadataPath;
|
|
11
|
+
private metadata;
|
|
12
|
+
private nextId;
|
|
13
|
+
private projectRoot;
|
|
14
|
+
private dbInstance;
|
|
15
|
+
private tableInstance;
|
|
16
|
+
constructor(projectRoot: string);
|
|
17
|
+
private ensureDir;
|
|
18
|
+
private static readonly CURRENT_SCHEMA_VERSION;
|
|
19
|
+
private getTable;
|
|
20
|
+
addFile(filePath: string, language: string, sha256: string, chunks: Omit<CodeRow, "id">[], options?: {
|
|
21
|
+
skipMetadataSave?: boolean;
|
|
22
|
+
}): Promise<{
|
|
23
|
+
chunkCount: number;
|
|
24
|
+
indexSize: number;
|
|
25
|
+
}>;
|
|
26
|
+
removeFile(filePath: string): Promise<void>;
|
|
27
|
+
removeStaleFiles(currentFiles: Set<string>): Promise<string[]>;
|
|
28
|
+
vectorSearch(queryVector: number[], limit: number, filter?: string): Promise<(CodeRow & {
|
|
29
|
+
_distance?: number;
|
|
30
|
+
})[]>;
|
|
31
|
+
ftsSearch(query: string, limit: number, filter?: string): Promise<CodeRow[]>;
|
|
32
|
+
createFtsIndex(): Promise<void>;
|
|
33
|
+
loadMetadata(): Promise<CodeMetadata>;
|
|
34
|
+
saveMetadata(metadata: CodeMetadata): Promise<void>;
|
|
35
|
+
getFileHash(filePath: string): Promise<string | undefined>;
|
|
36
|
+
getChunkById(chunkId: number): Promise<CodeRow | undefined>;
|
|
37
|
+
isEmpty(): Promise<boolean>;
|
|
38
|
+
getDocsDir(): string;
|
|
39
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LanceDB "code" table management for codebase indexing.
|
|
3
|
+
* Follows the same patterns as src/store.ts (DocStore) but operates
|
|
4
|
+
* on per-file code chunks instead of per-library doc chunks.
|
|
5
|
+
*/
|
|
6
|
+
import { readFile, writeFile, mkdir } from "node:fs/promises";
|
|
7
|
+
import { join } from "node:path";
|
|
8
|
+
import { sqlEscapeString } from "./types.js";
|
|
9
|
+
export class CodeStore {
|
|
10
|
+
docsDir;
|
|
11
|
+
dbPath;
|
|
12
|
+
metadataPath;
|
|
13
|
+
metadata = null;
|
|
14
|
+
nextId = 1;
|
|
15
|
+
projectRoot;
|
|
16
|
+
dbInstance = null;
|
|
17
|
+
tableInstance = null;
|
|
18
|
+
constructor(projectRoot) {
|
|
19
|
+
this.projectRoot = projectRoot;
|
|
20
|
+
this.docsDir = join(projectRoot, ".claude", "docs");
|
|
21
|
+
this.dbPath = join(this.docsDir, "lancedb");
|
|
22
|
+
this.metadataPath = join(this.docsDir, ".code-metadata.json");
|
|
23
|
+
}
|
|
24
|
+
async ensureDir() {
|
|
25
|
+
await mkdir(this.docsDir, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
static CURRENT_SCHEMA_VERSION = 2;
|
|
28
|
+
async getTable() {
|
|
29
|
+
if (this.tableInstance)
|
|
30
|
+
return this.tableInstance;
|
|
31
|
+
const lancedb = await import("@lancedb/lancedb");
|
|
32
|
+
if (!this.dbInstance) {
|
|
33
|
+
this.dbInstance = await lancedb.connect(this.dbPath);
|
|
34
|
+
}
|
|
35
|
+
// Check schema version — drop old table if outdated
|
|
36
|
+
const metadata = await this.loadMetadata();
|
|
37
|
+
if (metadata.files.length > 0 && (metadata.schemaVersion ?? 1) < CodeStore.CURRENT_SCHEMA_VERSION) {
|
|
38
|
+
try {
|
|
39
|
+
await this.dbInstance.dropTable("code");
|
|
40
|
+
}
|
|
41
|
+
catch { /* table may not exist */ }
|
|
42
|
+
this.tableInstance = null;
|
|
43
|
+
// Clear files list to force full reindex
|
|
44
|
+
metadata.files = [];
|
|
45
|
+
metadata.schemaVersion = CodeStore.CURRENT_SCHEMA_VERSION;
|
|
46
|
+
await this.saveMetadata(metadata);
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
try {
|
|
50
|
+
this.tableInstance = await this.dbInstance.openTable("code");
|
|
51
|
+
// Find max existing ID
|
|
52
|
+
const rows = await this.tableInstance.query().select(["id"]).toArray();
|
|
53
|
+
if (rows.length > 0) {
|
|
54
|
+
this.nextId = Math.max(...rows.map((r) => r.id)) + 1;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
// Table doesn't exist yet — will be created on first insert
|
|
59
|
+
this.tableInstance = null;
|
|
60
|
+
}
|
|
61
|
+
return this.tableInstance;
|
|
62
|
+
}
|
|
63
|
+
async addFile(filePath, language, sha256, chunks, options) {
|
|
64
|
+
await this.ensureDir();
|
|
65
|
+
const lancedb = await import("@lancedb/lancedb");
|
|
66
|
+
if (!this.dbInstance) {
|
|
67
|
+
this.dbInstance = await lancedb.connect(this.dbPath);
|
|
68
|
+
}
|
|
69
|
+
// Get or create table (before assigning IDs so nextId is up-to-date)
|
|
70
|
+
let table = await this.getTable();
|
|
71
|
+
// Assign IDs
|
|
72
|
+
const rows = chunks.map((chunk) => ({
|
|
73
|
+
...chunk,
|
|
74
|
+
id: this.nextId++,
|
|
75
|
+
}));
|
|
76
|
+
if (table) {
|
|
77
|
+
// Delete existing rows for this file, then add new ones
|
|
78
|
+
await table.delete(`"filePath" = '${sqlEscapeString(filePath)}'`);
|
|
79
|
+
if (rows.length > 0) {
|
|
80
|
+
await table.add(rows);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Create table with first batch of rows
|
|
85
|
+
if (rows.length > 0) {
|
|
86
|
+
table = await this.dbInstance.createTable("code", rows);
|
|
87
|
+
this.tableInstance = table;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
// Update file metadata
|
|
91
|
+
const metadata = await this.loadMetadata();
|
|
92
|
+
const existing = metadata.files.findIndex(f => f.filePath === filePath);
|
|
93
|
+
const fileMeta = {
|
|
94
|
+
filePath,
|
|
95
|
+
sha256,
|
|
96
|
+
language,
|
|
97
|
+
chunkCount: rows.length,
|
|
98
|
+
indexedAt: new Date().toISOString(),
|
|
99
|
+
};
|
|
100
|
+
if (existing >= 0) {
|
|
101
|
+
metadata.files[existing] = fileMeta;
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
metadata.files.push(fileMeta);
|
|
105
|
+
}
|
|
106
|
+
if (!options?.skipMetadataSave) {
|
|
107
|
+
await this.saveMetadata(metadata);
|
|
108
|
+
}
|
|
109
|
+
const totalRows = table ? (await table.countRows()) : 0;
|
|
110
|
+
return { chunkCount: rows.length, indexSize: totalRows };
|
|
111
|
+
}
|
|
112
|
+
async removeFile(filePath) {
|
|
113
|
+
const table = await this.getTable();
|
|
114
|
+
if (table) {
|
|
115
|
+
await table.delete(`"filePath" = '${sqlEscapeString(filePath)}'`);
|
|
116
|
+
}
|
|
117
|
+
const metadata = await this.loadMetadata();
|
|
118
|
+
metadata.files = metadata.files.filter(f => f.filePath !== filePath);
|
|
119
|
+
await this.saveMetadata(metadata);
|
|
120
|
+
}
|
|
121
|
+
async removeStaleFiles(currentFiles) {
|
|
122
|
+
const metadata = await this.loadMetadata();
|
|
123
|
+
const stale = metadata.files.filter(f => !currentFiles.has(f.filePath));
|
|
124
|
+
const table = await this.getTable();
|
|
125
|
+
if (table && stale.length > 0) {
|
|
126
|
+
const escaped = stale.map(f => `'${sqlEscapeString(f.filePath)}'`).join(", ");
|
|
127
|
+
await table.delete(`"filePath" IN (${escaped})`);
|
|
128
|
+
}
|
|
129
|
+
if (stale.length > 0) {
|
|
130
|
+
metadata.files = metadata.files.filter(f => currentFiles.has(f.filePath));
|
|
131
|
+
await this.saveMetadata(metadata);
|
|
132
|
+
}
|
|
133
|
+
return stale.map(f => f.filePath);
|
|
134
|
+
}
|
|
135
|
+
async vectorSearch(queryVector, limit, filter) {
|
|
136
|
+
const table = await this.getTable();
|
|
137
|
+
if (!table)
|
|
138
|
+
return [];
|
|
139
|
+
let query = table.vectorSearch(queryVector).limit(limit);
|
|
140
|
+
if (filter) {
|
|
141
|
+
query = query.where(filter);
|
|
142
|
+
}
|
|
143
|
+
return await query.toArray();
|
|
144
|
+
}
|
|
145
|
+
async ftsSearch(query, limit, filter) {
|
|
146
|
+
const table = await this.getTable();
|
|
147
|
+
if (!table)
|
|
148
|
+
return [];
|
|
149
|
+
try {
|
|
150
|
+
let q = table.query().fullTextSearch(query, { columns: ["text"] }).limit(limit);
|
|
151
|
+
if (filter) {
|
|
152
|
+
q = q.where(filter);
|
|
153
|
+
}
|
|
154
|
+
return await q.toArray();
|
|
155
|
+
}
|
|
156
|
+
catch {
|
|
157
|
+
return [];
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async createFtsIndex() {
|
|
161
|
+
const table = await this.getTable();
|
|
162
|
+
if (!table)
|
|
163
|
+
return;
|
|
164
|
+
const lancedb = await import("@lancedb/lancedb");
|
|
165
|
+
await table.createIndex("text", {
|
|
166
|
+
config: lancedb.Index.fts({ stem: true, lowercase: true, removeStopWords: true }),
|
|
167
|
+
replace: true,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
async loadMetadata() {
|
|
171
|
+
if (this.metadata)
|
|
172
|
+
return this.metadata;
|
|
173
|
+
try {
|
|
174
|
+
const data = await readFile(this.metadataPath, "utf-8");
|
|
175
|
+
this.metadata = JSON.parse(data);
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
this.metadata = { projectRoot: this.projectRoot, files: [], schemaVersion: CodeStore.CURRENT_SCHEMA_VERSION };
|
|
179
|
+
}
|
|
180
|
+
return this.metadata;
|
|
181
|
+
}
|
|
182
|
+
async saveMetadata(metadata) {
|
|
183
|
+
await this.ensureDir();
|
|
184
|
+
this.metadata = metadata;
|
|
185
|
+
await writeFile(this.metadataPath, JSON.stringify(metadata, null, 2));
|
|
186
|
+
}
|
|
187
|
+
async getFileHash(filePath) {
|
|
188
|
+
const metadata = await this.loadMetadata();
|
|
189
|
+
return metadata.files.find(f => f.filePath === filePath)?.sha256;
|
|
190
|
+
}
|
|
191
|
+
async getChunkById(chunkId) {
|
|
192
|
+
const table = await this.getTable();
|
|
193
|
+
if (!table)
|
|
194
|
+
return undefined;
|
|
195
|
+
const rows = await table.query().where(`id = ${chunkId}`).toArray();
|
|
196
|
+
return rows[0];
|
|
197
|
+
}
|
|
198
|
+
async isEmpty() {
|
|
199
|
+
const table = await this.getTable();
|
|
200
|
+
return !table;
|
|
201
|
+
}
|
|
202
|
+
getDocsDir() {
|
|
203
|
+
return this.docsDir;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=code-store.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"code-store.js","sourceRoot":"","sources":["../src/code-store.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAG7C,MAAM,OAAO,SAAS;IACZ,OAAO,CAAS;IAChB,MAAM,CAAS;IACf,YAAY,CAAS;IACrB,QAAQ,GAAwB,IAAI,CAAC;IACrC,MAAM,GAAW,CAAC,CAAC;IACnB,WAAW,CAAS;IACpB,UAAU,GAAQ,IAAI,CAAC;IACvB,aAAa,GAAQ,IAAI,CAAC;IAElC,YAAY,WAAmB;QAC7B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;QACpD,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC5C,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,qBAAqB,CAAC,CAAC;IAChE,CAAC;IAEO,KAAK,CAAC,SAAS;QACrB,MAAM,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACjD,CAAC;IAEO,MAAM,CAAU,sBAAsB,GAAG,CAAC,CAAC;IAE3C,KAAK,CAAC,QAAQ;QACpB,IAAI,IAAI,CAAC,aAAa;YAAE,OAAO,IAAI,CAAC,aAAa,CAAC;QAClD,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAEjD,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrB,IAAI,CAAC,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvD,CAAC;QAED,oDAAoD;QACpD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,aAAa,IAAI,CAAC,CAAC,GAAG,SAAS,CAAC,sBAAsB,EAAE,CAAC;YAClG,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAC1C,CAAC;YAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;YACrC,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;YAC1B,yCAAyC;YACzC,QAAQ,CAAC,KAAK,GAAG,EAAE,CAAC;YACpB,QAAQ,CAAC,aAAa,GAAG,SAAS,CAAC,sBAAsB,CAAC;YAC1D,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;YAClC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,IAAI,CAAC,aAAa,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAC7D,uBAAuB;YACvB,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;YACvE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,4DAA4D;YAC5D,IAAI,CAAC,aAAa,GAAG,IAAI,CAAC;QAC5B,CAAC;QACD,OAAO,IAAI,CAAC,aAAa,CAAC;IAC5B,CAAC;IAED,KAAK,CAAC,OAAO,CACX,QAAgB,EAChB,QAAgB,EAChB,MAAc,EACd,MAA6B,EAC7B,OAAwC;QAExC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAEjD,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrB,IAAI,CAAC,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvD,CAAC;QAED,qEAAqE;QACrE,IAAI,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QAElC,aAAa;QACb,MAAM,IAAI,GAAc,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAC7C,GAAG,KAAK;YACR,EAAE,EAAE,IAAI,CAAC,MAAM,EAAE;SAClB,CAAC,CAAC,CAAC;QAEJ,IAAI,KAAK,EAAE,CAAC;YACV,wDAAwD;YACxD,MAAM,KAAK,CAAC,MAAM,CAAC,iBAAiB,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAClE,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,MAAM,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wCAAwC;YACxC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,KAAK,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;gBACxD,IAAI,CAAC,aAAa,GAAG,KAAK,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC;QACxE,MAAM,QAAQ,GAAwB;YACpC,QAAQ;YACR,MAAM;YACN,QAAQ;YACR,UAAU,EAAE,IAAI,CAAC,MAAM;YACvB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC;QACF,IAAI,QAAQ,IAAI,CAAC,EAAE,CAAC;YAClB,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,QAAQ,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,gBAAgB,EAAE,CAAC;YAC/B,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QACpC,CAAC;QAED,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACxD,OAAO,EAAE,UAAU,EAAE,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,QAAgB;QAC/B,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,KAAK,CAAC,MAAM,CAAC,iBAAiB,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QACpE,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC;QACrE,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,gBAAgB,CAAC,YAAyB;QAC9C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;QAExE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC9E,MAAM,KAAK,CAAC,MAAM,CAAC,kBAAkB,OAAO,GAAG,CAAC,CAAC;QACnD,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC1E,MAAM,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;QACpC,CAAC;QAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACpC,CAAC;IAED,KAAK,CAAC,YAAY,CAChB,WAAqB,EACrB,KAAa,EACb,MAAe;QAEf,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QAEtB,IAAI,KAAK,GAAG,KAAK,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACzD,IAAI,MAAM,EAAE,CAAC;YACX,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAC9B,CAAC;QACD,OAAO,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,KAAa,EAAE,KAAa,EAAE,MAAe;QAC3D,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,IAAI,CAAC,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC,cAAc,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAChF,IAAI,MAAM,EAAE,CAAC;gBACX,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YACtB,CAAC;YACD,OAAO,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC;QAC3B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,cAAc;QAClB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO;QACnB,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QACjD,MAAM,KAAK,CAAC,WAAW,CAAC,MAAM,EAAE;YAC9B,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC;YACjF,OAAO,EAAE,IAAI;SACd,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,QAAQ,CAAC;QACxC,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;YACxD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAiB,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,QAAQ,GAAG,EAAE,WAAW,EAAE,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,EAAE,EAAE,aAAa,EAAE,SAAS,CAAC,sBAAsB,EAAE,CAAC;QAChH,CAAC;QACD,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,QAAsB;QACvC,MAAM,IAAI,CAAC,SAAS,EAAE,CAAC;QACvB,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;QACzB,MAAM,SAAS,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IACxE,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,QAAgB;QAChC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE,CAAC;QAC3C,OAAO,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,EAAE,MAAM,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,OAAe;QAChC,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,IAAI,CAAC,KAAK;YAAE,OAAO,SAAS,CAAC;QAC7B,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,QAAQ,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC;QACpE,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QACpC,OAAO,CAAC,KAAK,CAAC;IAChB,CAAC;IAED,UAAU;QACR,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC"}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Code search integration tests — requires TEI containers running (:39281, :39282, :39283).
|
|
3
|
+
* Indexes this project's own source code and tests semantic search against it.
|
|
4
|
+
*
|
|
5
|
+
* Run: npm run test:code
|
|
6
|
+
*/
|
|
7
|
+
import { describe, it, before, after } from "node:test";
|
|
8
|
+
import assert from "node:assert/strict";
|
|
9
|
+
import { mkdtemp, readFile } from "node:fs/promises";
|
|
10
|
+
import { tmpdir } from "node:os";
|
|
11
|
+
import { join } from "node:path";
|
|
12
|
+
import { CodeStore } from "./code-store.js";
|
|
13
|
+
import { indexCodeFile } from "./code-indexer.js";
|
|
14
|
+
import { searchCode } from "./code-search.js";
|
|
15
|
+
import { walkProjectFiles, computeFileHash } from "./file-walker.js";
|
|
16
|
+
const PROJECT_ROOT = join(import.meta.dirname, "..");
|
|
17
|
+
describe("Code search pipeline (self-codebase)", { timeout: 600_000 }, () => {
|
|
18
|
+
let tempDir;
|
|
19
|
+
let codeStore;
|
|
20
|
+
let indexedFileCount;
|
|
21
|
+
let totalChunks;
|
|
22
|
+
before(async () => {
|
|
23
|
+
tempDir = await mkdtemp(join(tmpdir(), "code-search-test-"));
|
|
24
|
+
codeStore = new CodeStore(tempDir);
|
|
25
|
+
// Walk this project's own src/ files (exclude test files to avoid self-reference noise)
|
|
26
|
+
const files = await walkProjectFiles({
|
|
27
|
+
projectRoot: PROJECT_ROOT,
|
|
28
|
+
includePaths: ["src/**"],
|
|
29
|
+
excludePaths: ["src/*.test.ts"],
|
|
30
|
+
});
|
|
31
|
+
indexedFileCount = files.length;
|
|
32
|
+
console.log(` Found ${files.length} source files to index`);
|
|
33
|
+
assert.ok(files.length >= 8, `Expected >=8 src files, got ${files.length}`);
|
|
34
|
+
totalChunks = 0;
|
|
35
|
+
for (const f of files) {
|
|
36
|
+
const source = await readFile(f.absolutePath, "utf-8");
|
|
37
|
+
const sha256 = await computeFileHash(f.absolutePath);
|
|
38
|
+
const chunks = await indexCodeFile(source, f.relativePath, f.language);
|
|
39
|
+
await codeStore.addFile(f.relativePath, f.language, sha256, chunks);
|
|
40
|
+
totalChunks += chunks.length;
|
|
41
|
+
console.log(` ${f.relativePath}: ${chunks.length} chunks`);
|
|
42
|
+
}
|
|
43
|
+
// Build FTS index
|
|
44
|
+
await codeStore.createFtsIndex();
|
|
45
|
+
console.log(` FTS index built — ${totalChunks} total chunks across ${indexedFileCount} files`);
|
|
46
|
+
});
|
|
47
|
+
after(() => {
|
|
48
|
+
// Force exit — LanceDB native bindings hold file locks and keep event loop alive
|
|
49
|
+
setTimeout(() => process.exit(0), 200);
|
|
50
|
+
});
|
|
51
|
+
// --- Concept-based search tests ---
|
|
52
|
+
it("finds RRF fusion logic by concept", async () => {
|
|
53
|
+
const results = await searchCode("reciprocal rank fusion", codeStore, { topK: 5 });
|
|
54
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
55
|
+
const hasRrf = results.some(r => r.filePath.includes("rrf") || r.content.includes("reciprocalRankFusion") || r.content.includes("rrfScore"));
|
|
56
|
+
assert.ok(hasRrf, "Should find RRF-related code");
|
|
57
|
+
console.log(` 'reciprocal rank fusion' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
58
|
+
});
|
|
59
|
+
it("finds LanceDB table management by concept", async () => {
|
|
60
|
+
const results = await searchCode("LanceDB table management", codeStore, { topK: 5 });
|
|
61
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
62
|
+
const hasStore = results.some(r => r.filePath.includes("store") || r.content.includes("LanceDB") || r.content.includes("openTable") || r.content.includes("createTable"));
|
|
63
|
+
assert.ok(hasStore, "Should find LanceDB store code");
|
|
64
|
+
console.log(` 'LanceDB table management' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
65
|
+
});
|
|
66
|
+
it("finds heading-based markdown chunking", async () => {
|
|
67
|
+
const results = await searchCode("heading based markdown chunking", codeStore, { topK: 5 });
|
|
68
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
69
|
+
const hasChunker = results.some(r => r.content.includes("chunkMarkdown") || r.content.includes("headingStack") || r.content.includes("splitWithOverlap"));
|
|
70
|
+
assert.ok(hasChunker, "Should find markdown chunking code");
|
|
71
|
+
console.log(` 'heading based markdown chunking' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
72
|
+
});
|
|
73
|
+
it("finds cross-encoder reranking", async () => {
|
|
74
|
+
const results = await searchCode("cross encoder reranking", codeStore, { topK: 5 });
|
|
75
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
76
|
+
const hasReranker = results.some(r => r.filePath.includes("reranker") || r.content.includes("rerank") || r.content.includes("rerankerScore"));
|
|
77
|
+
assert.ok(hasReranker, "Should find reranker code");
|
|
78
|
+
console.log(` 'cross encoder reranking' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
79
|
+
});
|
|
80
|
+
it("finds file walker / gitignore logic", async () => {
|
|
81
|
+
const results = await searchCode("walk project files gitignore", codeStore, { topK: 5 });
|
|
82
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
83
|
+
const hasWalker = results.some(r => r.filePath.includes("file-walker") || r.content.includes("walkProjectFiles") || r.content.includes("gitignore"));
|
|
84
|
+
assert.ok(hasWalker, "Should find file walker code");
|
|
85
|
+
console.log(` 'walk project files gitignore' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
86
|
+
});
|
|
87
|
+
it("finds DocStore class by name", async () => {
|
|
88
|
+
const results = await searchCode("DocStore class", codeStore, { topK: 5 });
|
|
89
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
90
|
+
const hasDocStore = results.some(r => r.entityName === "DocStore" || r.content.includes("class DocStore"));
|
|
91
|
+
assert.ok(hasDocStore, "Should find DocStore class");
|
|
92
|
+
console.log(` 'DocStore class' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
93
|
+
});
|
|
94
|
+
it("finds AST parsing / tree-sitter code", async () => {
|
|
95
|
+
const results = await searchCode("tree sitter AST parsing", codeStore, { topK: 5 });
|
|
96
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
97
|
+
const hasTreeSitter = results.some(r => r.content.includes("tree-sitter") || r.content.includes("Parser") || r.content.includes("rootNode"));
|
|
98
|
+
assert.ok(hasTreeSitter, "Should find tree-sitter AST code");
|
|
99
|
+
console.log(` 'tree sitter AST parsing' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
100
|
+
});
|
|
101
|
+
it("finds npm doc discovery logic", async () => {
|
|
102
|
+
const results = await searchCode("npm package documentation discovery", codeStore, { topK: 5 });
|
|
103
|
+
assert.ok(results.length > 0, "Expected at least 1 result");
|
|
104
|
+
const hasDiscovery = results.some(r => r.filePath.includes("discovery") || r.content.includes("npmRegistry") || r.content.includes("llms.txt") || r.content.includes("probeUrl"));
|
|
105
|
+
assert.ok(hasDiscovery, "Should find doc discovery code");
|
|
106
|
+
console.log(` 'npm package documentation discovery' → top: ${results[0].filePath}:${results[0].lineStart} (${results[0].entityName})`);
|
|
107
|
+
});
|
|
108
|
+
// --- Neighbor expansion tests ---
|
|
109
|
+
it("returns expanded content with neighbor chunks", async () => {
|
|
110
|
+
const results = await searchCode("reciprocal rank fusion", codeStore, { topK: 3 });
|
|
111
|
+
assert.ok(results.length > 0, "Expected results");
|
|
112
|
+
// At least one result should have neighbor content (longer than a single chunk)
|
|
113
|
+
// This is probabilistic — in a small codebase some results may not have adjacent chunks
|
|
114
|
+
console.log(` Neighbor expansion: top result content length = ${results[0].content.length} chars`);
|
|
115
|
+
});
|
|
116
|
+
// --- Structure validation tests ---
|
|
117
|
+
it("returns results with valid CodeSearchResult structure", async () => {
|
|
118
|
+
const results = await searchCode("embed texts", codeStore, { topK: 3 });
|
|
119
|
+
for (const r of results) {
|
|
120
|
+
assert.equal(typeof r.score, "number");
|
|
121
|
+
assert.equal(typeof r.filePath, "string");
|
|
122
|
+
assert.equal(typeof r.language, "string");
|
|
123
|
+
assert.equal(typeof r.entityType, "string");
|
|
124
|
+
assert.equal(typeof r.entityName, "string");
|
|
125
|
+
assert.ok(Array.isArray(r.scopeChain));
|
|
126
|
+
assert.equal(typeof r.lineStart, "number");
|
|
127
|
+
assert.equal(typeof r.lineEnd, "number");
|
|
128
|
+
assert.equal(typeof r.content, "string");
|
|
129
|
+
assert.equal(typeof r.chunkId, "number");
|
|
130
|
+
assert.ok(r.content.length > 0);
|
|
131
|
+
assert.ok(r.lineStart >= 1, "lineStart should be 1-based");
|
|
132
|
+
assert.ok(r.lineEnd >= r.lineStart, "lineEnd >= lineStart");
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
it("returns scores in descending order", async () => {
|
|
136
|
+
const results = await searchCode("search pipeline", codeStore, { topK: 10 });
|
|
137
|
+
assert.ok(results.length > 1, "Expected multiple results");
|
|
138
|
+
for (let i = 1; i < results.length; i++) {
|
|
139
|
+
assert.ok(results[i - 1].score >= results[i].score, `Not sorted at [${i}]`);
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
// --- Filter tests ---
|
|
143
|
+
it("filters by language", async () => {
|
|
144
|
+
const results = await searchCode("function", codeStore, { language: "typescript", topK: 10 });
|
|
145
|
+
assert.ok(results.length > 0, "Expected results");
|
|
146
|
+
for (const r of results) {
|
|
147
|
+
assert.equal(r.language, "typescript");
|
|
148
|
+
}
|
|
149
|
+
});
|
|
150
|
+
it("filters by filePath", async () => {
|
|
151
|
+
const results = await searchCode("search", codeStore, { filePath: "src/search.ts", topK: 10 });
|
|
152
|
+
for (const r of results) {
|
|
153
|
+
assert.equal(r.filePath, "src/search.ts");
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
it("filters by entityType", async () => {
|
|
157
|
+
const results = await searchCode("store management", codeStore, { entityType: "class", topK: 10 });
|
|
158
|
+
// entityType filter may return 0 results if no classes match the query well
|
|
159
|
+
// Just verify that any returned results have the correct type
|
|
160
|
+
for (const r of results) {
|
|
161
|
+
assert.equal(r.entityType, "class");
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
// --- Metadata tests ---
|
|
165
|
+
it("tracks file metadata correctly", async () => {
|
|
166
|
+
const metadata = await codeStore.loadMetadata();
|
|
167
|
+
assert.ok(metadata.files.length > 0, "Should have indexed file metadata");
|
|
168
|
+
assert.equal(metadata.files.length, indexedFileCount);
|
|
169
|
+
for (const f of metadata.files) {
|
|
170
|
+
assert.ok(f.filePath.startsWith("src/"), `filePath should start with src/: ${f.filePath}`);
|
|
171
|
+
assert.equal(f.sha256.length, 64, "SHA-256 hex should be 64 chars");
|
|
172
|
+
assert.ok(f.chunkCount > 0, `chunkCount should be > 0 for ${f.filePath}`);
|
|
173
|
+
assert.ok(f.language === "typescript" || f.language === "javascript");
|
|
174
|
+
assert.ok(f.indexedAt, "indexedAt should be set");
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
it("getFileHash returns correct hash for indexed files", async () => {
|
|
178
|
+
const hash = await codeStore.getFileHash("src/rrf.ts");
|
|
179
|
+
assert.ok(hash, "Should have hash for src/rrf.ts");
|
|
180
|
+
assert.equal(hash.length, 64);
|
|
181
|
+
const missing = await codeStore.getFileHash("src/nonexistent.ts");
|
|
182
|
+
assert.equal(missing, undefined, "Should return undefined for non-indexed file");
|
|
183
|
+
});
|
|
184
|
+
it("getChunkById returns a valid chunk", async () => {
|
|
185
|
+
// Use a known chunk ID (first chunk should be id=1)
|
|
186
|
+
const chunk = await codeStore.getChunkById(1);
|
|
187
|
+
assert.ok(chunk, "Should find chunk with id 1");
|
|
188
|
+
assert.equal(chunk.id, 1);
|
|
189
|
+
assert.ok(chunk.text.length > 0);
|
|
190
|
+
assert.ok(chunk.filePath.startsWith("src/"));
|
|
191
|
+
});
|
|
192
|
+
it("isEmpty returns false after indexing", async () => {
|
|
193
|
+
const empty = await codeStore.isEmpty();
|
|
194
|
+
assert.equal(empty, false, "Store should not be empty after indexing");
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
//# sourceMappingURL=code.test.js.map
|