codesift-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +241 -0
- package/dist/cli/args.d.ts +13 -0
- package/dist/cli/args.d.ts.map +1 -0
- package/dist/cli/args.js +79 -0
- package/dist/cli/args.js.map +1 -0
- package/dist/cli/commands.d.ts +4 -0
- package/dist/cli/commands.d.ts.map +1 -0
- package/dist/cli/commands.js +336 -0
- package/dist/cli/commands.js.map +1 -0
- package/dist/cli/help.d.ts +3 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +271 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +80 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +23 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +49 -0
- package/dist/config.js.map +1 -0
- package/dist/parser/extractors/go.d.ts +4 -0
- package/dist/parser/extractors/go.d.ts.map +1 -0
- package/dist/parser/extractors/go.js +185 -0
- package/dist/parser/extractors/go.js.map +1 -0
- package/dist/parser/extractors/javascript.d.ts +9 -0
- package/dist/parser/extractors/javascript.d.ts.map +1 -0
- package/dist/parser/extractors/javascript.js +10 -0
- package/dist/parser/extractors/javascript.js.map +1 -0
- package/dist/parser/extractors/markdown.d.ts +15 -0
- package/dist/parser/extractors/markdown.d.ts.map +1 -0
- package/dist/parser/extractors/markdown.js +217 -0
- package/dist/parser/extractors/markdown.js.map +1 -0
- package/dist/parser/extractors/prisma.d.ts +17 -0
- package/dist/parser/extractors/prisma.d.ts.map +1 -0
- package/dist/parser/extractors/prisma.js +121 -0
- package/dist/parser/extractors/prisma.js.map +1 -0
- package/dist/parser/extractors/python.d.ts +4 -0
- package/dist/parser/extractors/python.d.ts.map +1 -0
- package/dist/parser/extractors/python.js +203 -0
- package/dist/parser/extractors/python.js.map +1 -0
- package/dist/parser/extractors/rust.d.ts +4 -0
- package/dist/parser/extractors/rust.d.ts.map +1 -0
- package/dist/parser/extractors/rust.js +178 -0
- package/dist/parser/extractors/rust.js.map +1 -0
- package/dist/parser/extractors/typescript.d.ts +4 -0
- package/dist/parser/extractors/typescript.d.ts.map +1 -0
- package/dist/parser/extractors/typescript.js +296 -0
- package/dist/parser/extractors/typescript.js.map +1 -0
- package/dist/parser/languages/tree-sitter-css.wasm +0 -0
- package/dist/parser/languages/tree-sitter-go.wasm +0 -0
- package/dist/parser/languages/tree-sitter-java.wasm +0 -0
- package/dist/parser/languages/tree-sitter-javascript.wasm +0 -0
- package/dist/parser/languages/tree-sitter-json.wasm +0 -0
- package/dist/parser/languages/tree-sitter-php.wasm +0 -0
- package/dist/parser/languages/tree-sitter-python.wasm +0 -0
- package/dist/parser/languages/tree-sitter-ruby.wasm +0 -0
- package/dist/parser/languages/tree-sitter-rust.wasm +0 -0
- package/dist/parser/languages/tree-sitter-tsx.wasm +0 -0
- package/dist/parser/languages/tree-sitter-typescript.wasm +0 -0
- package/dist/parser/parser-manager.d.ts +6 -0
- package/dist/parser/parser-manager.d.ts.map +1 -0
- package/dist/parser/parser-manager.js +60 -0
- package/dist/parser/parser-manager.js.map +1 -0
- package/dist/parser/symbol-extractor.d.ts +22 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -0
- package/dist/parser/symbol-extractor.js +115 -0
- package/dist/parser/symbol-extractor.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +27 -0
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -0
- package/dist/retrieval/codebase-retrieval.js +472 -0
- package/dist/retrieval/codebase-retrieval.js.map +1 -0
- package/dist/search/bm25.d.ts +22 -0
- package/dist/search/bm25.d.ts.map +1 -0
- package/dist/search/bm25.js +179 -0
- package/dist/search/bm25.js.map +1 -0
- package/dist/search/chunker.d.ts +9 -0
- package/dist/search/chunker.d.ts.map +1 -0
- package/dist/search/chunker.js +91 -0
- package/dist/search/chunker.js.map +1 -0
- package/dist/search/hybrid.d.ts +16 -0
- package/dist/search/hybrid.d.ts.map +1 -0
- package/dist/search/hybrid.js +51 -0
- package/dist/search/hybrid.js.map +1 -0
- package/dist/search/semantic.d.ts +44 -0
- package/dist/search/semantic.d.ts.map +1 -0
- package/dist/search/semantic.js +194 -0
- package/dist/search/semantic.js.map +1 -0
- package/dist/server.d.ts +2 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/server.js +285 -0
- package/dist/server.js.map +1 -0
- package/dist/storage/chunk-store.d.ts +32 -0
- package/dist/storage/chunk-store.d.ts.map +1 -0
- package/dist/storage/chunk-store.js +144 -0
- package/dist/storage/chunk-store.js.map +1 -0
- package/dist/storage/embedding-store.d.ts +41 -0
- package/dist/storage/embedding-store.d.ts.map +1 -0
- package/dist/storage/embedding-store.js +149 -0
- package/dist/storage/embedding-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +23 -0
- package/dist/storage/index-store.d.ts.map +1 -0
- package/dist/storage/index-store.js +95 -0
- package/dist/storage/index-store.js.map +1 -0
- package/dist/storage/registry.d.ts +35 -0
- package/dist/storage/registry.d.ts.map +1 -0
- package/dist/storage/registry.js +99 -0
- package/dist/storage/registry.js.map +1 -0
- package/dist/storage/usage-stats.d.ts +32 -0
- package/dist/storage/usage-stats.d.ts.map +1 -0
- package/dist/storage/usage-stats.js +180 -0
- package/dist/storage/usage-stats.js.map +1 -0
- package/dist/storage/usage-tracker.d.ts +35 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -0
- package/dist/storage/usage-tracker.js +245 -0
- package/dist/storage/usage-tracker.js.map +1 -0
- package/dist/storage/watcher.d.ts +12 -0
- package/dist/storage/watcher.d.ts.map +1 -0
- package/dist/storage/watcher.js +66 -0
- package/dist/storage/watcher.js.map +1 -0
- package/dist/tools/context-tools.d.ts +31 -0
- package/dist/tools/context-tools.d.ts.map +1 -0
- package/dist/tools/context-tools.js +219 -0
- package/dist/tools/context-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +22 -0
- package/dist/tools/diff-tools.d.ts.map +1 -0
- package/dist/tools/diff-tools.js +165 -0
- package/dist/tools/diff-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts +11 -0
- package/dist/tools/generate-tools.d.ts.map +1 -0
- package/dist/tools/generate-tools.js +135 -0
- package/dist/tools/generate-tools.js.map +1 -0
- package/dist/tools/graph-tools.d.ts +60 -0
- package/dist/tools/graph-tools.d.ts.map +1 -0
- package/dist/tools/graph-tools.js +313 -0
- package/dist/tools/graph-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +39 -0
- package/dist/tools/index-tools.d.ts.map +1 -0
- package/dist/tools/index-tools.js +451 -0
- package/dist/tools/index-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +59 -0
- package/dist/tools/outline-tools.d.ts.map +1 -0
- package/dist/tools/outline-tools.js +342 -0
- package/dist/tools/outline-tools.js.map +1 -0
- package/dist/tools/search-tools.d.ts +29 -0
- package/dist/tools/search-tools.d.ts.map +1 -0
- package/dist/tools/search-tools.js +309 -0
- package/dist/tools/search-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +24 -0
- package/dist/tools/symbol-tools.d.ts.map +1 -0
- package/dist/tools/symbol-tools.js +172 -0
- package/dist/tools/symbol-tools.js.map +1 -0
- package/dist/types.d.ts +91 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/git-validation.d.ts +11 -0
- package/dist/utils/git-validation.d.ts.map +1 -0
- package/dist/utils/git-validation.js +19 -0
- package/dist/utils/git-validation.js.map +1 -0
- package/dist/utils/test-file.d.ts +11 -0
- package/dist/utils/test-file.d.ts.map +1 -0
- package/dist/utils/test-file.js +27 -0
- package/dist/utils/test-file.js.map +1 -0
- package/package.json +62 -0
- package/src/parser/languages/tree-sitter-css.wasm +0 -0
- package/src/parser/languages/tree-sitter-go.wasm +0 -0
- package/src/parser/languages/tree-sitter-java.wasm +0 -0
- package/src/parser/languages/tree-sitter-javascript.wasm +0 -0
- package/src/parser/languages/tree-sitter-json.wasm +0 -0
- package/src/parser/languages/tree-sitter-php.wasm +0 -0
- package/src/parser/languages/tree-sitter-python.wasm +0 -0
- package/src/parser/languages/tree-sitter-ruby.wasm +0 -0
- package/src/parser/languages/tree-sitter-rust.wasm +0 -0
- package/src/parser/languages/tree-sitter-tsx.wasm +0 -0
- package/src/parser/languages/tree-sitter-typescript.wasm +0 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import { tokenizeIdentifier } from "../parser/symbol-extractor.js";
|
|
2
|
+
import { isTestFile } from "../utils/test-file.js";
|
|
3
|
+
// BM25 parameters
|
|
4
|
+
const K1 = 1.2;
|
|
5
|
+
const B = 0.75;
|
|
6
|
+
const BODY_CHAR_LIMIT = 500;
|
|
7
|
+
/**
|
|
8
|
+
* Score multiplier for symbols in test files.
|
|
9
|
+
* Demotes test helpers so production code ranks higher in search results.
|
|
10
|
+
* 0.3 = test symbols score 30% of equivalent production symbols.
|
|
11
|
+
*/
|
|
12
|
+
const TEST_FILE_SCORE_MULTIPLIER = 0.3;
|
|
13
|
+
/**
|
|
14
|
+
* General-purpose tokenizer for signature, docstring, and body text.
|
|
15
|
+
* Splits on non-alphanumeric chars, applies camelCase/snake_case splitting,
|
|
16
|
+
* lowercases, and filters tokens shorter than 2 chars.
|
|
17
|
+
*/
|
|
18
|
+
export function tokenizeText(text) {
|
|
19
|
+
// Split on non-alphanumeric boundaries
|
|
20
|
+
const rawParts = text.split(/[^a-zA-Z0-9]+/).filter(Boolean);
|
|
21
|
+
const tokens = [];
|
|
22
|
+
for (const part of rawParts) {
|
|
23
|
+
// Split camelCase / PascalCase (same logic as tokenizeIdentifier)
|
|
24
|
+
const subParts = part
|
|
25
|
+
.replace(/([a-z0-9])([A-Z])/g, "$1\0$2")
|
|
26
|
+
.replace(/([A-Z]+)([A-Z][a-z])/g, "$1\0$2")
|
|
27
|
+
.split("\0");
|
|
28
|
+
for (const sub of subParts) {
|
|
29
|
+
const lower = sub.toLowerCase();
|
|
30
|
+
if (lower.length >= 2) {
|
|
31
|
+
tokens.push(lower);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return tokens;
|
|
36
|
+
}
|
|
37
|
+
function getFieldTokens(symbol) {
|
|
38
|
+
return {
|
|
39
|
+
name: tokenizeIdentifier(symbol.name),
|
|
40
|
+
signature: symbol.signature ? tokenizeText(symbol.signature) : [],
|
|
41
|
+
docstring: symbol.docstring ? tokenizeText(symbol.docstring) : [],
|
|
42
|
+
body: symbol.source
|
|
43
|
+
? tokenizeText(symbol.source.slice(0, BODY_CHAR_LIMIT))
|
|
44
|
+
: [],
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
function countTermFrequencies(tokens) {
|
|
48
|
+
const tf = new Map();
|
|
49
|
+
for (const token of tokens) {
|
|
50
|
+
tf.set(token, (tf.get(token) ?? 0) + 1);
|
|
51
|
+
}
|
|
52
|
+
return tf;
|
|
53
|
+
}
|
|
54
|
+
export function buildBM25Index(symbols) {
|
|
55
|
+
const fieldNames = ["name", "signature", "docstring", "body"];
|
|
56
|
+
const fields = {
|
|
57
|
+
name: new Map(),
|
|
58
|
+
signature: new Map(),
|
|
59
|
+
docstring: new Map(),
|
|
60
|
+
body: new Map(),
|
|
61
|
+
};
|
|
62
|
+
const totalFieldLengths = {
|
|
63
|
+
name: 0,
|
|
64
|
+
signature: 0,
|
|
65
|
+
docstring: 0,
|
|
66
|
+
body: 0,
|
|
67
|
+
};
|
|
68
|
+
const symbolMap = new Map();
|
|
69
|
+
for (const symbol of symbols) {
|
|
70
|
+
symbolMap.set(symbol.id, symbol);
|
|
71
|
+
const fieldTokens = getFieldTokens(symbol);
|
|
72
|
+
for (const field of fieldNames) {
|
|
73
|
+
const tokens = fieldTokens[field];
|
|
74
|
+
totalFieldLengths[field] += tokens.length;
|
|
75
|
+
const tf = countTermFrequencies(tokens);
|
|
76
|
+
for (const [token, freq] of tf) {
|
|
77
|
+
let postings = fields[field].get(token);
|
|
78
|
+
if (!postings) {
|
|
79
|
+
postings = new Map();
|
|
80
|
+
fields[field].set(token, postings);
|
|
81
|
+
}
|
|
82
|
+
postings.set(symbol.id, freq);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
const docCount = symbols.length;
|
|
87
|
+
const avgFieldLengths = {
|
|
88
|
+
name: docCount > 0 ? totalFieldLengths.name / docCount : 0,
|
|
89
|
+
signature: docCount > 0 ? totalFieldLengths.signature / docCount : 0,
|
|
90
|
+
docstring: docCount > 0 ? totalFieldLengths.docstring / docCount : 0,
|
|
91
|
+
body: docCount > 0 ? totalFieldLengths.body / docCount : 0,
|
|
92
|
+
};
|
|
93
|
+
return { fields, avgFieldLengths, docCount, symbols: symbolMap };
|
|
94
|
+
}
|
|
95
|
+
export function searchBM25(index, query, topK, fieldWeights) {
|
|
96
|
+
if (index.docCount === 0 || !query.trim()) {
|
|
97
|
+
return [];
|
|
98
|
+
}
|
|
99
|
+
const queryTokens = tokenizeText(query);
|
|
100
|
+
if (queryTokens.length === 0) {
|
|
101
|
+
return [];
|
|
102
|
+
}
|
|
103
|
+
const fieldNames = ["name", "signature", "docstring", "body"];
|
|
104
|
+
// Accumulate scores per document
|
|
105
|
+
const scores = new Map();
|
|
106
|
+
// Track which query tokens matched per document
|
|
107
|
+
const matchedTokens = new Map();
|
|
108
|
+
// Pre-compute field lengths per document per field
|
|
109
|
+
// We derive field length from the sum of term frequencies in each field's postings
|
|
110
|
+
const fieldLengths = new Map();
|
|
111
|
+
for (const [symbolId] of index.symbols) {
|
|
112
|
+
const lengths = {
|
|
113
|
+
name: 0,
|
|
114
|
+
signature: 0,
|
|
115
|
+
docstring: 0,
|
|
116
|
+
body: 0,
|
|
117
|
+
};
|
|
118
|
+
fieldLengths.set(symbolId, lengths);
|
|
119
|
+
}
|
|
120
|
+
// Compute field lengths by summing all term frequencies per doc per field
|
|
121
|
+
for (const field of fieldNames) {
|
|
122
|
+
for (const [, postings] of index.fields[field]) {
|
|
123
|
+
for (const [symbolId, freq] of postings) {
|
|
124
|
+
const lengths = fieldLengths.get(symbolId);
|
|
125
|
+
if (lengths) {
|
|
126
|
+
lengths[field] += freq;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
for (const qToken of queryTokens) {
|
|
132
|
+
for (const field of fieldNames) {
|
|
133
|
+
const postings = index.fields[field].get(qToken);
|
|
134
|
+
if (!postings)
|
|
135
|
+
continue;
|
|
136
|
+
const df = postings.size;
|
|
137
|
+
const idf = Math.log((index.docCount - df + 0.5) / (df + 0.5) + 1);
|
|
138
|
+
const avgFl = index.avgFieldLengths[field];
|
|
139
|
+
const weight = fieldWeights[field];
|
|
140
|
+
for (const [symbolId, tf] of postings) {
|
|
141
|
+
const fl = fieldLengths.get(symbolId)?.[field] ?? 0;
|
|
142
|
+
const norm = avgFl > 0 ? fl / avgFl : 1;
|
|
143
|
+
const tfScore = (tf * (K1 + 1)) / (tf + K1 * (1 - B + B * norm));
|
|
144
|
+
const fieldScore = idf * tfScore * weight;
|
|
145
|
+
scores.set(symbolId, (scores.get(symbolId) ?? 0) + fieldScore);
|
|
146
|
+
let tokenSet = matchedTokens.get(symbolId);
|
|
147
|
+
if (!tokenSet) {
|
|
148
|
+
tokenSet = new Set();
|
|
149
|
+
matchedTokens.set(symbolId, tokenSet);
|
|
150
|
+
}
|
|
151
|
+
tokenSet.add(qToken);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
// Demote test file symbols so production code ranks above test helpers
|
|
156
|
+
for (const [symbolId, score] of scores) {
|
|
157
|
+
const symbol = index.symbols.get(symbolId);
|
|
158
|
+
if (symbol && isTestFile(symbol.file)) {
|
|
159
|
+
scores.set(symbolId, score * TEST_FILE_SCORE_MULTIPLIER);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
// Sort by score descending, take top-K
|
|
163
|
+
const sorted = [...scores.entries()]
|
|
164
|
+
.sort((a, b) => b[1] - a[1])
|
|
165
|
+
.slice(0, topK);
|
|
166
|
+
const results = [];
|
|
167
|
+
for (const [symbolId, score] of sorted) {
|
|
168
|
+
const symbol = index.symbols.get(symbolId);
|
|
169
|
+
if (!symbol)
|
|
170
|
+
continue;
|
|
171
|
+
results.push({
|
|
172
|
+
symbol,
|
|
173
|
+
score,
|
|
174
|
+
matches: [...(matchedTokens.get(symbolId) ?? [])],
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
return results;
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=bm25.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AACnE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AAGnD,kBAAkB;AAClB,MAAM,EAAE,GAAG,GAAG,CAAC;AACf,MAAM,CAAC,GAAG,IAAI,CAAC;AAEf,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B;;;;GAIG;AACH,MAAM,0BAA0B,GAAG,GAAG,CAAC;AAevC;;;;GAIG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,uCAAuC;IACvC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAE7D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC5B,kEAAkE;QAClE,MAAM,QAAQ,GAAG,IAAI;aAClB,OAAO,CAAC,oBAAoB,EAAE,QAAQ,CAAC;aACvC,OAAO,CAAC,uBAAuB,EAAE,QAAQ,CAAC;aAC1C,KAAK,CAAC,IAAI,CAAC,CAAC;QAEf,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,KAAK,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;YAChC,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;gBACtB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,cAAc,CAAC,MAAkB;IACxC,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,MAAM,CAAC,IAAI,CAAC;QACrC,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE;QACjE,SAAS,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE;QACjE,IAAI,EAAE,MAAM,CAAC,MAAM;YACjB,CAAC,CAAC,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,eAAe,CAAC,CAAC;YACvD,CAAC,CAAC,EAAE;KACP,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAAC,MAAgB;IAC5C,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;IACrC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,OAAqB;IAClD,MAAM,UAAU,GAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAE3E,MAAM,MAAM,GAAwD;QAClE,IAAI,EAAE,IAAI,GAAG,EAAE;QACf,SAAS,EAAE,IAAI,GAAG,EAAE;QACpB,SAAS,EAAE,IAAI,GAAG,EAAE;QACpB,IAAI,EAAE,IAAI,GAAG,EAAE;KAChB,CAAC;IAEF,MAAM,iBAAiB,GAA8B;QACnD,IAAI,EAAE,CAAC;QACP,SAAS,EAAE,CAAC;QACZ,SAAS,EAAE,CAAC;QACZ,IAAI,EAAE,CAAC;KACR,CAAC;IAEF,MAAM,SAAS,GAAG,IAAI,GAAG,EAAsB,CAAC;IAEhD,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QACjC,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CAAC,CAAC;QAE3C,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;YAClC,iBAAiB,CAAC,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC;YAE1C,MAAM,EAAE,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;YACxC,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC;gBAC/B,IAAI,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACxC,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;oBACrB,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC;gBACrC,CAAC;gBACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;YAChC,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC;IAChC,MAAM,eAAe,GAA8B;QACjD,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC1D,SAAS,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACpE,SAAS,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QACpE,IAAI,EAAE,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;KAC3D,CAAC;IAEF,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AACnE,CAAC;AAED,MAAM,UAAU,UAAU,CACxB,KAAgB,EAChB,KAAa,EACb,IAAY,EACZ,YAAuC;IAEvC,IAAI,KAAK,CAAC,QAAQ,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,CAAC;QAC1C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;IACxC,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC7B,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,UAAU,GAAgB,CAAC,MAAM,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,CAAC,CAAC;IAE3E,iCAAiC;IACjC,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,gDAAgD;IAChD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAuB,CAAC;IAErD,mDAAmD;IACnD,mFAAmF;IACnF,MAAM,YAAY,GAAG,IAAI,GAAG,EAAqC,CAAC;IAElE,KAAK,MAAM,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;QACvC,MAAM,OAAO,GAA8B;YACzC,IAAI,EAAE,CAAC;YACP,SAAS,EAAE,CAAC;YACZ,SAAS,EAAE,CAAC;YACZ,IAAI,EAAE,CAAC;SACR,CAAC;QACF,YAAY,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACtC,CAAC;IAED,0EAA0E;IAC1E,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,KAAK,MAAM,CAAC,EAAE,QAAQ,CAAC,IAAI,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/C,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACxC,MAAM,OAAO,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAC3C,IAAI,OAAO,EAAE,CAAC;oBACZ,OAAO,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC;gBACzB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QACjC,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACjD,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAExB,MAAM,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,QAAQ,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;YACnE,MAAM,KAAK,GAAG,KAAK,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;YAC3C,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YAEnC,KAAK,MAAM,CAAC,QAAQ,EAAE,EAAE,CAAC,IAAI,QAAQ,EAAE,CAAC;gBACtC,MAAM,EAAE,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBACpD,MAAM,IAAI,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxC,MAAM,OAAO,GAAG,CAAC,EAAE,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC;gBACjE,MAAM,UAAU,GAAG,GAAG,GAAG,OAAO,GAAG,MAAM,CAAC;gBAE1C,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC;gBAE/D,IAAI,QAAQ,GAAG,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAC3C,IAAI,CAAC,QAAQ,EAAE,CAAC;oBACd,QAAQ,GAAG,IAAI,GAAG,EAAE,CAAC;oBACrB,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC;gBACxC,CAAC;gBACD,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,uEAAuE;IACvE,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,MAAM,IAAI,UAAU,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;YACtC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,GAAG,0BAA0B,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED,uCAAuC;IACvC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;SACjC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAElB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACvC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAC3C,IAAI,CAAC,MAAM;YAAE,SAAS;QAEtB,OAAO,CAAC,IAAI,CAAC;YACX,MAAM;YACN,KAAK;YACL,OAAO,EAAE,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;SAClD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { CodeChunk } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Split a file's content into overlapping text chunks suitable for embedding.
|
|
4
|
+
*
|
|
5
|
+
* Returns an empty array when the file should be skipped (binary, too large,
|
|
6
|
+
* non-code extension).
|
|
7
|
+
*/
|
|
8
|
+
export declare function chunkFile(file: string, content: string, repo: string): CodeChunk[];
|
|
9
|
+
//# sourceMappingURL=chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAyB7C;;;;;GAKG;AACH,wBAAgB,SAAS,CACvB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,MAAM,EACf,IAAI,EAAE,MAAM,GACX,SAAS,EAAE,CAoDb"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Chunking constants
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
const CHUNK_TOKENS = 400; // target tokens per chunk
|
|
5
|
+
const OVERLAP_TOKENS = 80; // overlap between consecutive chunks
|
|
6
|
+
const CHARS_PER_TOKEN = 4; // rough approximation
|
|
7
|
+
const CHUNK_CHARS = CHUNK_TOKENS * CHARS_PER_TOKEN; // 1600
|
|
8
|
+
const OVERLAP_CHARS = OVERLAP_TOKENS * CHARS_PER_TOKEN; // 320
|
|
9
|
+
const MAX_FILE_BYTES = 50_000; // skip files > 50KB
|
|
10
|
+
// Extensions that carry no semantic code value for embedding
|
|
11
|
+
const SKIP_EXTENSIONS = new Set([
|
|
12
|
+
".json", ".lock", ".md", ".yaml", ".yml",
|
|
13
|
+
".env", ".txt", ".svg", ".png", ".wasm",
|
|
14
|
+
]);
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
// Public API
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
/**
|
|
19
|
+
* Split a file's content into overlapping text chunks suitable for embedding.
|
|
20
|
+
*
|
|
21
|
+
* Returns an empty array when the file should be skipped (binary, too large,
|
|
22
|
+
* non-code extension).
|
|
23
|
+
*/
|
|
24
|
+
export function chunkFile(file, content, repo) {
|
|
25
|
+
// Skip non-code file types
|
|
26
|
+
const dotIdx = file.lastIndexOf(".");
|
|
27
|
+
const ext = dotIdx !== -1 ? file.slice(dotIdx) : "";
|
|
28
|
+
if (SKIP_EXTENSIONS.has(ext))
|
|
29
|
+
return [];
|
|
30
|
+
// Skip files that are too large
|
|
31
|
+
if (content.length > MAX_FILE_BYTES)
|
|
32
|
+
return [];
|
|
33
|
+
// Skip binary files (presence of null bytes is a reliable signal)
|
|
34
|
+
if (content.includes("\0"))
|
|
35
|
+
return [];
|
|
36
|
+
const lines = content.split("\n");
|
|
37
|
+
const totalLines = lines.length;
|
|
38
|
+
if (totalLines === 0)
|
|
39
|
+
return [];
|
|
40
|
+
// Pre-compute cumulative character offsets per line (1-based line indices)
|
|
41
|
+
// offsets[i] = char offset of the START of line (i+1) (0-indexed array)
|
|
42
|
+
const lineStartOffset = new Array(totalLines);
|
|
43
|
+
let offset = 0;
|
|
44
|
+
for (let i = 0; i < totalLines; i++) {
|
|
45
|
+
lineStartOffset[i] = offset;
|
|
46
|
+
offset += (lines[i]?.length ?? 0) + 1; // +1 for the '\n'
|
|
47
|
+
}
|
|
48
|
+
const totalChars = content.length;
|
|
49
|
+
const chunks = [];
|
|
50
|
+
let chunkStart = 0; // char offset of current window start
|
|
51
|
+
while (chunkStart < totalChars) {
|
|
52
|
+
const chunkEnd = Math.min(chunkStart + CHUNK_CHARS, totalChars);
|
|
53
|
+
const text = content.slice(chunkStart, chunkEnd);
|
|
54
|
+
// Map char offsets to 1-based line numbers
|
|
55
|
+
const startLine = charOffsetToLine(chunkStart, lineStartOffset) + 1;
|
|
56
|
+
const endLine = charOffsetToLine(chunkEnd - 1, lineStartOffset) + 1;
|
|
57
|
+
const tokenCount = Math.ceil(text.length / CHARS_PER_TOKEN);
|
|
58
|
+
const id = `${repo}:${file}:${startLine}`;
|
|
59
|
+
chunks.push({ id, file, startLine, endLine, text, tokenCount });
|
|
60
|
+
// Advance window by (CHUNK_CHARS - OVERLAP_CHARS) to create overlap
|
|
61
|
+
const advance = CHUNK_CHARS - OVERLAP_CHARS;
|
|
62
|
+
chunkStart += advance;
|
|
63
|
+
// If the remaining content fits entirely in one more chunk, we're done
|
|
64
|
+
if (chunkStart >= totalChars)
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
return chunks;
|
|
68
|
+
}
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
// Internal helpers
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
/**
|
|
73
|
+
* Binary-search lineStartOffset to find the 0-based line index that contains
|
|
74
|
+
* the given character offset.
|
|
75
|
+
*/
|
|
76
|
+
function charOffsetToLine(charOffset, lineStartOffset) {
|
|
77
|
+
let lo = 0;
|
|
78
|
+
let hi = lineStartOffset.length - 1;
|
|
79
|
+
while (lo < hi) {
|
|
80
|
+
const mid = (lo + hi + 1) >> 1;
|
|
81
|
+
const lineStart = lineStartOffset[mid];
|
|
82
|
+
if (lineStart !== undefined && lineStart <= charOffset) {
|
|
83
|
+
lo = mid;
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
hi = mid - 1;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return lo;
|
|
90
|
+
}
|
|
91
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/search/chunker.ts"],"names":[],"mappings":"AAEA,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,YAAY,GAAG,GAAG,CAAC,CAAW,0BAA0B;AAC9D,MAAM,cAAc,GAAG,EAAE,CAAC,CAAU,qCAAqC;AACzE,MAAM,eAAe,GAAG,CAAC,CAAC,CAAU,sBAAsB;AAE1D,MAAM,WAAW,GAAG,YAAY,GAAG,eAAe,CAAC,CAAG,OAAO;AAC7D,MAAM,aAAa,GAAG,cAAc,GAAG,eAAe,CAAC,CAAC,MAAM;AAE9D,MAAM,cAAc,GAAG,MAAM,CAAC,CAAM,oBAAoB;AAExD,6DAA6D;AAC7D,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC;IAC9B,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM;IACxC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;CACxC,CAAC,CAAC;AAEH,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,UAAU,SAAS,CACvB,IAAY,EACZ,OAAe,EACf,IAAY;IAEZ,2BAA2B;IAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACrC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACpD,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAExC,gCAAgC;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,cAAc;QAAE,OAAO,EAAE,CAAC;IAE/C,kEAAkE;IAClE,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC;IAEtC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAEhC,IAAI,UAAU,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEhC,2EAA2E;IAC3E,wEAAwE;IACxE,MAAM,eAAe,GAAa,IAAI,KAAK,CAAC,UAAU,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,eAAe,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;QAC5B,MAAM,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,kBAAkB;IAC3D,CAAC;IACD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAElC,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,sCAAsC;IAE1D,OAAO,UAAU,GAAG,UAAU,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,WAAW,EAAE,UAAU,CAAC,CAAC;QAChE,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAEjD,2CAA2C;QAC3C,MAAM,SAAS,GAAG,gBAAgB,CAAC,UAAU,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC;QACpE,MAAM,OAAO,GAAG,gBAAgB,CAAC,QAAQ,GAAG,CAAC,EAAE,eAAe,CAAC,GAAG,CAAC,CAAC;QAEpE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,eAAe,CAAC,CAAC;QAE5D,MAAM,EAAE,GAAG,GAAG,IAAI,IAAI,IAAI,IAAI,SAAS,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QAEhE,oEAAoE;QACpE,MAAM,OAAO,GAAG,WAAW,GAAG,aAAa,CAAC;QAC5C,UAAU,IAAI,OAAO,CAAC;QAEtB,uEAAuE;QACvE,IAAI,UAAU,IAAI,UAAU;YAAE,MAAM;IACtC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,8EAA8E;AAC9E,mBAAmB;AACnB,8EAA8E;AAE9E;;;GAGG;AACH,SAAS,gBAAgB,CAAC,UAAkB,EAAE,eAAyB;IACrE,IAAI,EAAE,GAAG,CAAC,CAAC;IACX,IAAI,EAAE,GAAG,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;IAEpC,OAAO,EAAE,GAAG,EAAE,EAAE,CAAC;QACf,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/B,MAAM,SAAS,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QACvC,IAAI,SAAS,KAAK,SAAS,IAAI,SAAS,IAAI,UAAU,EAAE,CAAC;YACvD,EAAE,GAAG,GAAG,CAAC;QACX,CAAC;aAAM,CAAC;YACN,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { SearchResult } from "../types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Reciprocal Rank Fusion (RRF) combining BM25 + semantic search results.
|
|
4
|
+
*
|
|
5
|
+
* Formula: score(d) = Σ 1/(k + rank_i(d))
|
|
6
|
+
* where k=60 is the standard RRF constant and rank_i is the rank
|
|
7
|
+
* of document d in result list i (1-based).
|
|
8
|
+
*
|
|
9
|
+
* @param bm25Results - Results from BM25 search, ordered by score
|
|
10
|
+
* @param semanticResults - Results from semantic search, ordered by similarity
|
|
11
|
+
* @param topK - Number of results to return
|
|
12
|
+
* @param k - RRF constant (default 60)
|
|
13
|
+
* @returns Merged results sorted by RRF score
|
|
14
|
+
*/
|
|
15
|
+
export declare function hybridRank(bm25Results: SearchResult[], semanticResults: SearchResult[], topK: number, k?: number): SearchResult[];
|
|
16
|
+
//# sourceMappingURL=hybrid.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAEhD;;;;;;;;;;;;GAYG;AACH,wBAAgB,UAAU,CACxB,WAAW,EAAE,YAAY,EAAE,EAC3B,eAAe,EAAE,YAAY,EAAE,EAC/B,IAAI,EAAE,MAAM,EACZ,CAAC,SAAK,GACL,YAAY,EAAE,CAwChB"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reciprocal Rank Fusion (RRF) combining BM25 + semantic search results.
|
|
3
|
+
*
|
|
4
|
+
* Formula: score(d) = Σ 1/(k + rank_i(d))
|
|
5
|
+
* where k=60 is the standard RRF constant and rank_i is the rank
|
|
6
|
+
* of document d in result list i (1-based).
|
|
7
|
+
*
|
|
8
|
+
* @param bm25Results - Results from BM25 search, ordered by score
|
|
9
|
+
* @param semanticResults - Results from semantic search, ordered by similarity
|
|
10
|
+
* @param topK - Number of results to return
|
|
11
|
+
* @param k - RRF constant (default 60)
|
|
12
|
+
* @returns Merged results sorted by RRF score
|
|
13
|
+
*/
|
|
14
|
+
export function hybridRank(bm25Results, semanticResults, topK, k = 60) {
|
|
15
|
+
const scores = new Map();
|
|
16
|
+
const symbolLookup = new Map();
|
|
17
|
+
// BM25 ranks (1-based)
|
|
18
|
+
for (let i = 0; i < bm25Results.length; i++) {
|
|
19
|
+
const result = bm25Results[i];
|
|
20
|
+
const id = result.symbol.id;
|
|
21
|
+
scores.set(id, (scores.get(id) ?? 0) + 1 / (k + i + 1));
|
|
22
|
+
if (!symbolLookup.has(id)) {
|
|
23
|
+
symbolLookup.set(id, result);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
// Semantic ranks (1-based)
|
|
27
|
+
for (let i = 0; i < semanticResults.length; i++) {
|
|
28
|
+
const result = semanticResults[i];
|
|
29
|
+
const id = result.symbol.id;
|
|
30
|
+
scores.set(id, (scores.get(id) ?? 0) + 1 / (k + i + 1));
|
|
31
|
+
if (!symbolLookup.has(id)) {
|
|
32
|
+
symbolLookup.set(id, result);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
// Sort by combined RRF score
|
|
36
|
+
const sorted = [...scores.entries()]
|
|
37
|
+
.sort((a, b) => b[1] - a[1])
|
|
38
|
+
.slice(0, topK);
|
|
39
|
+
const results = [];
|
|
40
|
+
for (const [id, score] of sorted) {
|
|
41
|
+
const original = symbolLookup.get(id);
|
|
42
|
+
if (original) {
|
|
43
|
+
const result = { symbol: original.symbol, score };
|
|
44
|
+
if (original.matches)
|
|
45
|
+
result.matches = original.matches;
|
|
46
|
+
results.push(result);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return results;
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=hybrid.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hybrid.js","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,UAAU,CACxB,WAA2B,EAC3B,eAA+B,EAC/B,IAAY,EACZ,CAAC,GAAG,EAAE;IAEN,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAwB,CAAC;IAErD,uBAAuB;IACvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,MAAM,GAAG,WAAW,CAAC,CAAC,CAAE,CAAC;QAC/B,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACxD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,GAAG,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,eAAe,CAAC,CAAC,CAAE,CAAC;QACnC,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QACxD,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC;YAC1B,YAAY,CAAC,GAAG,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;SACjC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;SAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IAElB,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,EAAE,EAAE,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;QACjC,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACtC,IAAI,QAAQ,EAAE,CAAC;YACb,MAAM,MAAM,GAAiB,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,CAAC;YAChE,IAAI,QAAQ,CAAC,OAAO;gBAAE,MAAM,CAAC,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;YACxD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type { CodeSymbol, SearchResult } from "../types.js";
|
|
2
|
+
export interface EmbeddingProvider {
|
|
3
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
4
|
+
readonly dimensions: number;
|
|
5
|
+
readonly model: string;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Build a searchable text string from a symbol for embedding.
|
|
9
|
+
* Format: "{kind} {name}\n{signature}\n{docstring first line}\n{body first 200 chars}"
|
|
10
|
+
*/
|
|
11
|
+
export declare function buildSymbolText(symbol: CodeSymbol): string;
|
|
12
|
+
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
13
|
+
/**
|
|
14
|
+
* Search embeddings by cosine similarity (linear scan).
|
|
15
|
+
* Returns top-k results sorted by similarity descending.
|
|
16
|
+
*/
|
|
17
|
+
export declare function searchSemantic(queryEmbedding: Float32Array, embeddings: Map<string, Float32Array>, symbols: Map<string, CodeSymbol>, topK: number): SearchResult[];
|
|
18
|
+
export declare class VoyageProvider implements EmbeddingProvider {
|
|
19
|
+
readonly model = "voyage-code-3";
|
|
20
|
+
readonly dimensions = 1024;
|
|
21
|
+
private apiKey;
|
|
22
|
+
constructor(apiKey: string);
|
|
23
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
24
|
+
}
|
|
25
|
+
export declare class OpenAIProvider implements EmbeddingProvider {
|
|
26
|
+
readonly model = "text-embedding-3-small";
|
|
27
|
+
readonly dimensions = 1536;
|
|
28
|
+
private apiKey;
|
|
29
|
+
constructor(apiKey: string);
|
|
30
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
31
|
+
}
|
|
32
|
+
export declare class OllamaProvider implements EmbeddingProvider {
|
|
33
|
+
readonly model = "nomic-embed-text";
|
|
34
|
+
readonly dimensions = 768;
|
|
35
|
+
private baseUrl;
|
|
36
|
+
constructor(baseUrl: string);
|
|
37
|
+
embed(texts: string[]): Promise<number[][]>;
|
|
38
|
+
}
|
|
39
|
+
export declare function createEmbeddingProvider(provider: "voyage" | "openai" | "ollama", config: {
|
|
40
|
+
voyageApiKey?: string | null;
|
|
41
|
+
openaiApiKey?: string | null;
|
|
42
|
+
ollamaUrl?: string | null;
|
|
43
|
+
}): EmbeddingProvider;
|
|
44
|
+
//# sourceMappingURL=semantic.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic.d.ts","sourceRoot":"","sources":["../../src/search/semantic.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAM5D,MAAM,WAAW,iBAAiB;IAChC,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IAC5C,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,UAAU,GAAG,MAAM,CAiB1D;AAMD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAezE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAC5B,cAAc,EAAE,YAAY,EAC5B,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,YAAY,CAAC,EACrC,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,UAAU,CAAC,EAChC,IAAI,EAAE,MAAM,GACX,YAAY,EAAE,CAoBhB;AAoBD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,mBAAmB;IACjC,QAAQ,CAAC,UAAU,QAAQ;IAC3B,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,MAAM;IAIpB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CAyBlD;AAMD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,4BAA4B;IAC1C,QAAQ,CAAC,UAAU,QAAQ;IAC3B,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE,MAAM;IAIpB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CAwBlD;AAMD,qBAAa,cAAe,YAAW,iBAAiB;IACtD,QAAQ,CAAC,KAAK,sBAAsB;IACpC,QAAQ,CAAC,UAAU,OAAO;IAC1B,OAAO,CAAC,OAAO,CAAS;gBAEZ,OAAO,EAAE,MAAM;IAIrB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;CA4BlD;AAMD,wBAAgB,uBAAuB,CACrC,QAAQ,EAAE,QAAQ,GAAG,QAAQ,GAAG,QAAQ,EACxC,MAAM,EAAE;IAAE,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAA;CAAE,GAChG,iBAAiB,CAenB"}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build a searchable text string from a symbol for embedding.
|
|
3
|
+
* Format: "{kind} {name}\n{signature}\n{docstring first line}\n{body first 200 chars}"
|
|
4
|
+
*/
|
|
5
|
+
export function buildSymbolText(symbol) {
|
|
6
|
+
const parts = [`${symbol.kind} ${symbol.name}`];
|
|
7
|
+
if (symbol.signature) {
|
|
8
|
+
parts.push(symbol.signature);
|
|
9
|
+
}
|
|
10
|
+
if (symbol.docstring) {
|
|
11
|
+
const firstLine = symbol.docstring.split("\n")[0]?.trim();
|
|
12
|
+
if (firstLine)
|
|
13
|
+
parts.push(firstLine);
|
|
14
|
+
}
|
|
15
|
+
if (symbol.source) {
|
|
16
|
+
parts.push(symbol.source.slice(0, 200));
|
|
17
|
+
}
|
|
18
|
+
return parts.join("\n");
|
|
19
|
+
}
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
// Cosine similarity
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
export function cosineSimilarity(a, b) {
|
|
24
|
+
let dot = 0;
|
|
25
|
+
let normA = 0;
|
|
26
|
+
let normB = 0;
|
|
27
|
+
for (let i = 0; i < a.length; i++) {
|
|
28
|
+
const ai = a[i];
|
|
29
|
+
const bi = b[i];
|
|
30
|
+
dot += ai * bi;
|
|
31
|
+
normA += ai * ai;
|
|
32
|
+
normB += bi * bi;
|
|
33
|
+
}
|
|
34
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
35
|
+
return denom > 0 ? dot / denom : 0;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Search embeddings by cosine similarity (linear scan).
|
|
39
|
+
* Returns top-k results sorted by similarity descending.
|
|
40
|
+
*/
|
|
41
|
+
export function searchSemantic(queryEmbedding, embeddings, symbols, topK) {
|
|
42
|
+
const scored = [];
|
|
43
|
+
for (const [id, vec] of embeddings) {
|
|
44
|
+
if (vec.length !== queryEmbedding.length)
|
|
45
|
+
continue;
|
|
46
|
+
const score = cosineSimilarity(queryEmbedding, vec);
|
|
47
|
+
scored.push({ id, score });
|
|
48
|
+
}
|
|
49
|
+
scored.sort((a, b) => b.score - a.score);
|
|
50
|
+
const results = [];
|
|
51
|
+
for (const { id, score } of scored.slice(0, topK)) {
|
|
52
|
+
const symbol = symbols.get(id);
|
|
53
|
+
if (symbol) {
|
|
54
|
+
results.push({ symbol, score });
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Response shape guard for OpenAI / Voyage embedding APIs
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
function isEmbeddingResponse(data) {
|
|
63
|
+
if (!data || typeof data !== "object")
|
|
64
|
+
return false;
|
|
65
|
+
const obj = data;
|
|
66
|
+
if (!Array.isArray(obj["data"]))
|
|
67
|
+
return false;
|
|
68
|
+
return obj["data"].every((item) => typeof item === "object" && item !== null && Array.isArray(item["embedding"]));
|
|
69
|
+
}
|
|
70
|
+
// ---------------------------------------------------------------------------
|
|
71
|
+
// Voyage AI provider
|
|
72
|
+
// ---------------------------------------------------------------------------
|
|
73
|
+
export class VoyageProvider {
|
|
74
|
+
model = "voyage-code-3";
|
|
75
|
+
dimensions = 1024;
|
|
76
|
+
apiKey;
|
|
77
|
+
constructor(apiKey) {
|
|
78
|
+
this.apiKey = apiKey;
|
|
79
|
+
}
|
|
80
|
+
async embed(texts) {
|
|
81
|
+
const response = await fetch("https://api.voyageai.com/v1/embeddings", {
|
|
82
|
+
method: "POST",
|
|
83
|
+
headers: {
|
|
84
|
+
"Content-Type": "application/json",
|
|
85
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
86
|
+
},
|
|
87
|
+
body: JSON.stringify({
|
|
88
|
+
input: texts,
|
|
89
|
+
model: this.model,
|
|
90
|
+
input_type: "document",
|
|
91
|
+
}),
|
|
92
|
+
});
|
|
93
|
+
if (!response.ok) {
|
|
94
|
+
const body = await response.text();
|
|
95
|
+
throw new Error(`Voyage API error ${response.status}: ${body}`);
|
|
96
|
+
}
|
|
97
|
+
const data = await response.json();
|
|
98
|
+
if (!isEmbeddingResponse(data)) {
|
|
99
|
+
throw new Error(`Unexpected Voyage API response shape: ${JSON.stringify(data).slice(0, 200)}`);
|
|
100
|
+
}
|
|
101
|
+
return data.data.map((d) => d.embedding);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// ---------------------------------------------------------------------------
|
|
105
|
+
// OpenAI provider
|
|
106
|
+
// ---------------------------------------------------------------------------
|
|
107
|
+
export class OpenAIProvider {
|
|
108
|
+
model = "text-embedding-3-small";
|
|
109
|
+
dimensions = 1536;
|
|
110
|
+
apiKey;
|
|
111
|
+
constructor(apiKey) {
|
|
112
|
+
this.apiKey = apiKey;
|
|
113
|
+
}
|
|
114
|
+
async embed(texts) {
|
|
115
|
+
const response = await fetch("https://api.openai.com/v1/embeddings", {
|
|
116
|
+
method: "POST",
|
|
117
|
+
headers: {
|
|
118
|
+
"Content-Type": "application/json",
|
|
119
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
120
|
+
},
|
|
121
|
+
body: JSON.stringify({
|
|
122
|
+
input: texts,
|
|
123
|
+
model: this.model,
|
|
124
|
+
}),
|
|
125
|
+
});
|
|
126
|
+
if (!response.ok) {
|
|
127
|
+
const body = await response.text();
|
|
128
|
+
throw new Error(`OpenAI API error ${response.status}: ${body}`);
|
|
129
|
+
}
|
|
130
|
+
const data = await response.json();
|
|
131
|
+
if (!isEmbeddingResponse(data)) {
|
|
132
|
+
throw new Error(`Unexpected OpenAI API response shape: ${JSON.stringify(data).slice(0, 200)}`);
|
|
133
|
+
}
|
|
134
|
+
return data.data.map((d) => d.embedding);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
// ---------------------------------------------------------------------------
|
|
138
|
+
// Ollama provider (local)
|
|
139
|
+
// ---------------------------------------------------------------------------
|
|
140
|
+
export class OllamaProvider {
|
|
141
|
+
model = "nomic-embed-text";
|
|
142
|
+
dimensions = 768;
|
|
143
|
+
baseUrl;
|
|
144
|
+
constructor(baseUrl) {
|
|
145
|
+
this.baseUrl = baseUrl.replace(/\/$/, "");
|
|
146
|
+
}
|
|
147
|
+
async embed(texts) {
|
|
148
|
+
// Ollama doesn't support batch — call sequentially
|
|
149
|
+
const results = [];
|
|
150
|
+
for (const text of texts) {
|
|
151
|
+
const response = await fetch(`${this.baseUrl}/api/embeddings`, {
|
|
152
|
+
method: "POST",
|
|
153
|
+
headers: { "Content-Type": "application/json" },
|
|
154
|
+
body: JSON.stringify({
|
|
155
|
+
model: this.model,
|
|
156
|
+
prompt: text,
|
|
157
|
+
}),
|
|
158
|
+
});
|
|
159
|
+
if (!response.ok) {
|
|
160
|
+
const body = await response.text();
|
|
161
|
+
throw new Error(`Ollama API error ${response.status}: ${body}`);
|
|
162
|
+
}
|
|
163
|
+
const data = await response.json();
|
|
164
|
+
if (!data || typeof data !== "object" || !("embedding" in data) || !Array.isArray(data["embedding"])) {
|
|
165
|
+
throw new Error(`Unexpected Ollama API response shape: ${JSON.stringify(data).slice(0, 200)}`);
|
|
166
|
+
}
|
|
167
|
+
results.push(data.embedding);
|
|
168
|
+
}
|
|
169
|
+
return results;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// Factory
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
export function createEmbeddingProvider(provider, config) {
|
|
176
|
+
switch (provider) {
|
|
177
|
+
case "voyage": {
|
|
178
|
+
if (!config.voyageApiKey)
|
|
179
|
+
throw new Error("CODESIFT_VOYAGE_API_KEY not set");
|
|
180
|
+
return new VoyageProvider(config.voyageApiKey);
|
|
181
|
+
}
|
|
182
|
+
case "openai": {
|
|
183
|
+
if (!config.openaiApiKey)
|
|
184
|
+
throw new Error("CODESIFT_OPENAI_API_KEY not set");
|
|
185
|
+
return new OpenAIProvider(config.openaiApiKey);
|
|
186
|
+
}
|
|
187
|
+
case "ollama": {
|
|
188
|
+
if (!config.ollamaUrl)
|
|
189
|
+
throw new Error("CODESIFT_OLLAMA_URL not set");
|
|
190
|
+
return new OllamaProvider(config.ollamaUrl);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
//# sourceMappingURL=semantic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../src/search/semantic.ts"],"names":[],"mappings":"AAYA;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,MAAkB;IAChD,MAAM,KAAK,GAAa,CAAC,GAAG,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAE1D,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAED,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC1D,IAAI,SAAS;YAAE,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACvC,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;QAClB,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,8EAA8E;AAC9E,oBAAoB;AACpB,8EAA8E;AAE9E,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;QACjB,MAAM,EAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC;QACjB,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC;QACf,KAAK,IAAI,EAAE,GAAG,EAAE,CAAC;QACjB,KAAK,IAAI,EAAE,GAAG,EAAE,CAAC;IACnB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClD,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,cAA4B,EAC5B,UAAqC,EACrC,OAAgC,EAChC,IAAY;IAEZ,MAAM,MAAM,GAAyC,EAAE,CAAC;IAExD,KAAK,MAAM,CAAC,EAAE,EAAE,GAAG,CAAC,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,GAAG,CAAC,MAAM,KAAK,cAAc,CAAC,MAAM;YAAE,SAAS;QACnD,MAAM,KAAK,GAAG,gBAAgB,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;QACpD,MAAM,CAAC,IAAI,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,CAAC;IAC7B,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEzC,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,KAAK,MAAM,EAAE,EAAE,EAAE,KAAK,EAAE,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,CAAC;QAClD,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAC/B,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,8EAA8E;AAC9E,0DAA0D;AAC1D,8EAA8E;AAE9E,SAAS,mBAAmB,CAAC,IAAa;IACxC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IACpD,MAAM,GAAG,GAAG,IAA+B,CAAC;IAC5C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAAE,OAAO,KAAK,CAAC;IAC9C,OAAO,GAAG,CAAC,MAAM,CAAC,CAAC,KAAK,CACtB,CAAC,IAAa,EAAE,EAAE,CAChB,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAE,IAAgC,CAAC,WAAW,CAAC,CAAC,CAC7G,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,OAAO,cAAc;IAChB,KAAK,GAAG,eAAe,CAAC;IACxB,UAAU,GAAG,IAAI,CAAC;IACnB,MAAM,CAAS;IAEvB,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,wCAAwC,EAAE;YACrE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,UAAU,EAAE,UAAU;aACvB,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,yCAAyC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACjG,CAAC;QACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,8EAA8E;AAC9E,kBAAkB;AAClB,8EAA8E;AAE9E,MAAM,OAAO,cAAc;IAChB,KAAK,GAAG,wBAAwB,CAAC;IACjC,UAAU,GAAG,IAAI,CAAC;IACnB,MAAM,CAAS;IAEvB,YAAY,MAAc;QACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,sCAAsC,EAAE;YACnE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,KAAK;gBACZ,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;QAClE,CAAC;QAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC5C,IAAI,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC/B,MAAM,IAAI,KAAK,CAAC,yCAAyC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;QACjG,CAAC;QACD,OAAO,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IAC3C,CAAC;CACF;AAED,8EAA8E;AAC9E,0BAA0B;AAC1B,8EAA8E;AAE9E,MAAM,OAAO,cAAc;IAChB,KAAK,GAAG,kBAAkB,CAAC;IAC3B,UAAU,GAAG,GAAG,CAAC;IAClB,OAAO,CAAS;IAExB,YAAY,OAAe;QACzB,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe;QACzB,mDAAmD;QACnD,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,IAAI,CAAC,OAAO,iBAAiB,EAAE;gBAC7D,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;gBAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,MAAM,EAAE,IAAI;iBACb,CAAC;aACH,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACnC,MAAM,IAAI,KAAK,CAAC,oBAAoB,QAAQ,CAAC,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;YAClE,CAAC;YAED,MAAM,IAAI,GAAY,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,CAAC,CAAC,WAAW,IAAI,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAE,IAAgC,CAAC,WAAW,CAAC,CAAC,EAAE,CAAC;gBAClI,MAAM,IAAI,KAAK,CAAC,yCAAyC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;YACjG,CAAC;YACD,OAAO,CAAC,IAAI,CAAE,IAAgC,CAAC,SAAS,CAAC,CAAC;QAC5D,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E,MAAM,UAAU,uBAAuB,CACrC,QAAwC,EACxC,MAAiG;IAEjG,QAAQ,QAAQ,EAAE,CAAC;QACjB,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,IAAI,CAAC,MAAM,CAAC,YAAY;gBAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;YAC7E,OAAO,IAAI,cAAc,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QACjD,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,IAAI,CAAC,MAAM,CAAC,YAAY;gBAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;YAC7E,OAAO,IAAI,cAAc,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;QACjD,CAAC;QACD,KAAK,QAAQ,CAAC,CAAC,CAAC;YACd,IAAI,CAAC,MAAM,CAAC,SAAS;gBAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;YACtE,OAAO,IAAI,cAAc,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;AACH,CAAC"}
|
package/dist/server.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"server.d.ts","sourceRoot":"","sources":["../src/server.ts"],"names":[],"mappings":""}
|