recker 1.0.68 → 1.0.70-next.9b4eebc
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/index.d.ts +0 -1
- package/dist/ai/index.js +0 -1
- package/dist/browser/ai/index.d.ts +0 -1
- package/dist/browser/ai/index.js +0 -1
- package/dist/browser/index.iife.min.js +51 -51
- package/dist/browser/index.min.js +51 -51
- package/dist/browser/index.umd.min.js +51 -51
- package/dist/cli/index.js +0 -2
- package/dist/cli/tui/components/command-palette.d.ts +0 -3
- package/dist/cli/tui/components/command-palette.js +0 -16
- package/dist/cli/tui/shell-search.d.ts +10 -4
- package/dist/cli/tui/shell-search.js +55 -92
- package/dist/mcp/index.d.ts +0 -1
- package/dist/mcp/index.js +0 -1
- package/dist/mcp/server.d.ts +0 -6
- package/dist/mcp/server.js +27 -138
- package/dist/mcp/tools/ai.js +0 -82
- package/dist/mini.d.ts +0 -6
- package/dist/mini.js +0 -3
- package/dist/version.js +1 -1
- package/package.json +12 -16
- package/dist/ai/vector/index.d.ts +0 -2
- package/dist/ai/vector/index.js +0 -2
- package/dist/ai/vector/similarity.d.ts +0 -2
- package/dist/ai/vector/similarity.js +0 -27
- package/dist/ai/vector/store.d.ts +0 -27
- package/dist/ai/vector/store.js +0 -82
- package/dist/browser/ai/vector/index.d.ts +0 -2
- package/dist/browser/ai/vector/index.js +0 -2
- package/dist/browser/ai/vector/similarity.d.ts +0 -2
- package/dist/browser/ai/vector/similarity.js +0 -27
- package/dist/browser/ai/vector/store.d.ts +0 -27
- package/dist/browser/ai/vector/store.js +0 -82
- package/dist/cli/commands/vector.d.ts +0 -8
- package/dist/cli/commands/vector.js +0 -214
- package/dist/mcp/embeddings-loader.d.ts +0 -17
- package/dist/mcp/embeddings-loader.js +0 -162
- package/dist/mcp/search/embedder.d.ts +0 -9
- package/dist/mcp/search/embedder.js +0 -83
- package/dist/mcp/search/hybrid-search.d.ts +0 -30
- package/dist/mcp/search/hybrid-search.js +0 -402
- package/dist/mcp/search/index.d.ts +0 -4
- package/dist/mcp/search/index.js +0 -3
- package/dist/mcp/search/math.d.ts +0 -5
- package/dist/mcp/search/math.js +0 -63
- package/dist/mcp/search/types.d.ts +0 -51
- package/dist/mcp/search/types.js +0 -1
|
@@ -1,402 +0,0 @@
|
|
|
1
|
-
import Fuse from 'fuse.js';
|
|
2
|
-
import { cosineSimilarity, levenshtein } from './math.js';
|
|
3
|
-
import { loadEmbeddings } from '../embeddings-loader.js';
|
|
4
|
-
import { StateError } from '../../core/errors.js';
|
|
5
|
-
let cachedEmbeddings = null;
|
|
6
|
-
export class HybridSearch {
|
|
7
|
-
fuse = null;
|
|
8
|
-
docs = [];
|
|
9
|
-
vectors = new Map();
|
|
10
|
-
embeddingsData = null;
|
|
11
|
-
initialized = false;
|
|
12
|
-
config;
|
|
13
|
-
constructor(config = {}) {
|
|
14
|
-
this.config = {
|
|
15
|
-
fuzzyThreshold: config.fuzzyThreshold ?? 0.3,
|
|
16
|
-
fuzzyWeight: config.fuzzyWeight ?? 0.5,
|
|
17
|
-
semanticWeight: config.semanticWeight ?? 0.5,
|
|
18
|
-
debug: config.debug ?? false,
|
|
19
|
-
offline: config.offline ?? false,
|
|
20
|
-
embedder: config.embedder,
|
|
21
|
-
};
|
|
22
|
-
}
|
|
23
|
-
setEmbedder(embedder) {
|
|
24
|
-
this.config.embedder = embedder;
|
|
25
|
-
}
|
|
26
|
-
async initialize(docs) {
|
|
27
|
-
this.docs = docs;
|
|
28
|
-
this.fuse = new Fuse(docs, {
|
|
29
|
-
keys: [
|
|
30
|
-
{ name: 'keywords', weight: 10 },
|
|
31
|
-
{ name: 'title', weight: 6 },
|
|
32
|
-
{ name: 'section', weight: 4 },
|
|
33
|
-
{ name: 'path', weight: 2 },
|
|
34
|
-
{ name: 'content', weight: 0.5 },
|
|
35
|
-
],
|
|
36
|
-
includeScore: true,
|
|
37
|
-
threshold: this.config.fuzzyThreshold,
|
|
38
|
-
ignoreLocation: true,
|
|
39
|
-
useExtendedSearch: true,
|
|
40
|
-
findAllMatches: true,
|
|
41
|
-
minMatchCharLength: 2,
|
|
42
|
-
});
|
|
43
|
-
await this.loadPrecomputedEmbeddings();
|
|
44
|
-
this.initialized = true;
|
|
45
|
-
this.log(`Initialized with ${docs.length} docs, ${this.vectors.size} embeddings`);
|
|
46
|
-
}
|
|
47
|
-
async loadPrecomputedEmbeddings() {
|
|
48
|
-
try {
|
|
49
|
-
if (cachedEmbeddings) {
|
|
50
|
-
this.embeddingsData = cachedEmbeddings;
|
|
51
|
-
}
|
|
52
|
-
else {
|
|
53
|
-
const data = await loadEmbeddings({
|
|
54
|
-
debug: this.config.debug,
|
|
55
|
-
offline: this.config.offline,
|
|
56
|
-
});
|
|
57
|
-
if (data) {
|
|
58
|
-
this.embeddingsData = data;
|
|
59
|
-
cachedEmbeddings = data;
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
if (this.embeddingsData) {
|
|
63
|
-
for (const entry of this.embeddingsData.documents) {
|
|
64
|
-
if (entry.vector) {
|
|
65
|
-
const vec = Array.isArray(entry.vector)
|
|
66
|
-
? entry.vector
|
|
67
|
-
: Object.values(entry.vector);
|
|
68
|
-
if (vec.length > 0) {
|
|
69
|
-
this.vectors.set(entry.id, vec);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
this.log(`Loaded ${this.vectors.size} pre-computed embeddings (model: ${this.embeddingsData.model})`);
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
catch (error) {
|
|
77
|
-
this.log(`No pre-computed embeddings found: ${error}`);
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
async search(query, options = {}) {
|
|
81
|
-
const { limit = 10, category, mode = 'hybrid', minScore = 0 } = options;
|
|
82
|
-
if (!this.initialized) {
|
|
83
|
-
throw new StateError('HybridSearch not initialized. Call initialize() first.', {
|
|
84
|
-
expectedState: 'initialized',
|
|
85
|
-
actualState: 'not-initialized',
|
|
86
|
-
});
|
|
87
|
-
}
|
|
88
|
-
const cleanedQuery = this.cleanQuery(query);
|
|
89
|
-
this.log(`Original query: "${query}" → Cleaned: "${cleanedQuery}"`);
|
|
90
|
-
const searchQuery = cleanedQuery.length > 0 ? cleanedQuery : query;
|
|
91
|
-
const results = new Map();
|
|
92
|
-
if (mode === 'hybrid' || mode === 'fuzzy') {
|
|
93
|
-
const fuzzyResults = this.fuzzySearch(searchQuery, limit * 2, category);
|
|
94
|
-
for (const result of fuzzyResults) {
|
|
95
|
-
results.set(result.id, result);
|
|
96
|
-
}
|
|
97
|
-
this.log(`Fuzzy search found ${fuzzyResults.length} results`);
|
|
98
|
-
}
|
|
99
|
-
if ((mode === 'hybrid' || mode === 'semantic') && this.vectors.size > 0) {
|
|
100
|
-
const semanticResults = await this.semanticSearch(searchQuery, limit * 2, category);
|
|
101
|
-
for (const result of semanticResults) {
|
|
102
|
-
const existing = results.get(result.id);
|
|
103
|
-
if (existing) {
|
|
104
|
-
const maxScore = Math.max(existing.score, result.score);
|
|
105
|
-
const bonus = Math.min(existing.score, result.score) * 0.3;
|
|
106
|
-
existing.score = Math.min(1.0, maxScore + bonus);
|
|
107
|
-
existing.source = 'hybrid';
|
|
108
|
-
}
|
|
109
|
-
else {
|
|
110
|
-
results.set(result.id, result);
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
this.log(`Semantic search found ${semanticResults.length} results`);
|
|
114
|
-
}
|
|
115
|
-
return Array.from(results.values())
|
|
116
|
-
.filter((r) => r.score >= minScore)
|
|
117
|
-
.sort((a, b) => b.score - a.score)
|
|
118
|
-
.slice(0, limit);
|
|
119
|
-
}
|
|
120
|
-
fuzzySearch(query, limit, category) {
|
|
121
|
-
if (!this.fuse)
|
|
122
|
-
return [];
|
|
123
|
-
let results = this.fuse.search(query, { limit: limit * 2 });
|
|
124
|
-
if (category) {
|
|
125
|
-
results = results.filter((r) => r.item.category.toLowerCase().includes(category.toLowerCase()));
|
|
126
|
-
}
|
|
127
|
-
const queryTerms = this.tokenize(query);
|
|
128
|
-
const scored = results.slice(0, limit).map((r) => {
|
|
129
|
-
const fuseScore = r.score || 0;
|
|
130
|
-
const baseScore = 1 - fuseScore;
|
|
131
|
-
let boost = 0;
|
|
132
|
-
if (fuseScore < 0.3) {
|
|
133
|
-
const keywords = r.item.keywords || [];
|
|
134
|
-
const titleLower = r.item.title.toLowerCase();
|
|
135
|
-
for (const term of queryTerms) {
|
|
136
|
-
if (keywords.some(k => k.toLowerCase() === term)) {
|
|
137
|
-
boost += 0.15;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
for (const term of queryTerms) {
|
|
141
|
-
if (titleLower.includes(term)) {
|
|
142
|
-
boost += 0.10;
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
else {
|
|
147
|
-
boost = -0.3;
|
|
148
|
-
}
|
|
149
|
-
return {
|
|
150
|
-
item: r.item,
|
|
151
|
-
baseScore,
|
|
152
|
-
boost,
|
|
153
|
-
finalScore: baseScore + boost,
|
|
154
|
-
};
|
|
155
|
-
});
|
|
156
|
-
scored.sort((a, b) => b.finalScore - a.finalScore);
|
|
157
|
-
return scored.map((r) => ({
|
|
158
|
-
id: r.item.id,
|
|
159
|
-
path: r.item.path,
|
|
160
|
-
title: r.item.title,
|
|
161
|
-
content: r.item.content,
|
|
162
|
-
snippet: this.extractSnippet(r.item.content, query),
|
|
163
|
-
score: Math.max(0, Math.min(1, r.finalScore)),
|
|
164
|
-
source: 'fuzzy',
|
|
165
|
-
}));
|
|
166
|
-
}
|
|
167
|
-
async semanticSearch(query, limit, category) {
|
|
168
|
-
if (!this.embeddingsData || this.vectors.size === 0) {
|
|
169
|
-
return [];
|
|
170
|
-
}
|
|
171
|
-
if (this.config.embedder) {
|
|
172
|
-
try {
|
|
173
|
-
const model = this.embeddingsData.model;
|
|
174
|
-
const queryVector = await this.config.embedder(query, model);
|
|
175
|
-
this.log(`Generated query vector using provided embedder (model: ${model})`);
|
|
176
|
-
const scores = [];
|
|
177
|
-
for (const [id, vector] of this.vectors) {
|
|
178
|
-
if (category) {
|
|
179
|
-
const entry = this.embeddingsData.documents.find((e) => e.id === id);
|
|
180
|
-
if (!entry || !entry.category.toLowerCase().includes(category.toLowerCase())) {
|
|
181
|
-
continue;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
if (vector.length !== queryVector.length)
|
|
185
|
-
continue;
|
|
186
|
-
const score = cosineSimilarity(queryVector, vector);
|
|
187
|
-
if (score > 0.05) {
|
|
188
|
-
scores.push({ id, score });
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
const results = [];
|
|
192
|
-
for (const s of scores.sort((a, b) => b.score - a.score).slice(0, limit)) {
|
|
193
|
-
const doc = this.docs.find((d) => d.id === s.id);
|
|
194
|
-
if (!doc)
|
|
195
|
-
continue;
|
|
196
|
-
results.push({
|
|
197
|
-
id: s.id,
|
|
198
|
-
path: doc.path,
|
|
199
|
-
title: doc.title,
|
|
200
|
-
content: doc.content,
|
|
201
|
-
snippet: this.extractSnippet(doc.content, query),
|
|
202
|
-
score: s.score,
|
|
203
|
-
source: 'semantic',
|
|
204
|
-
});
|
|
205
|
-
}
|
|
206
|
-
return results;
|
|
207
|
-
}
|
|
208
|
-
catch (error) {
|
|
209
|
-
this.log(`Embedder failed: ${error}. Falling back to synthetic vectors.`);
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
const queryTerms = this.tokenize(query);
|
|
213
|
-
const scores = [];
|
|
214
|
-
for (const entry of this.embeddingsData.documents) {
|
|
215
|
-
if (category && !entry.category.toLowerCase().includes(category.toLowerCase())) {
|
|
216
|
-
continue;
|
|
217
|
-
}
|
|
218
|
-
let termScore = 0;
|
|
219
|
-
const docText = `${entry.title} ${entry.keywords?.join(' ') || ''}`.toLowerCase();
|
|
220
|
-
const docTerms = this.tokenize(docText);
|
|
221
|
-
for (const queryTerm of queryTerms) {
|
|
222
|
-
if (docTerms.includes(queryTerm)) {
|
|
223
|
-
termScore += 2;
|
|
224
|
-
continue;
|
|
225
|
-
}
|
|
226
|
-
for (const docTerm of docTerms) {
|
|
227
|
-
const distance = levenshtein(queryTerm, docTerm);
|
|
228
|
-
const maxLen = Math.max(queryTerm.length, docTerm.length);
|
|
229
|
-
if (distance <= Math.min(2, maxLen * 0.3)) {
|
|
230
|
-
termScore += 1 - distance / maxLen;
|
|
231
|
-
break;
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
const normalizedScore = Math.min(1, termScore / (queryTerms.length * 2));
|
|
236
|
-
if (normalizedScore > 0.1) {
|
|
237
|
-
scores.push({ id: entry.id, score: normalizedScore });
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
const topDocs = scores.sort((a, b) => b.score - a.score).slice(0, 3);
|
|
241
|
-
if (topDocs.length > 0) {
|
|
242
|
-
const topVectors = topDocs
|
|
243
|
-
.map((d) => this.vectors.get(d.id))
|
|
244
|
-
.filter((v) => Array.isArray(v) && v.length > 0);
|
|
245
|
-
if (topVectors.length > 0 && topVectors.every((v) => v.length === topVectors[0].length)) {
|
|
246
|
-
const avgVector = this.averageVectors(topVectors);
|
|
247
|
-
const expectedDimensions = avgVector.length;
|
|
248
|
-
for (const entry of this.embeddingsData.documents) {
|
|
249
|
-
if (scores.some((s) => s.id === entry.id))
|
|
250
|
-
continue;
|
|
251
|
-
if (category && !entry.category.toLowerCase().includes(category.toLowerCase()))
|
|
252
|
-
continue;
|
|
253
|
-
const vector = this.vectors.get(entry.id);
|
|
254
|
-
if (!vector || vector.length === 0 || vector.length !== expectedDimensions)
|
|
255
|
-
continue;
|
|
256
|
-
const similarity = cosineSimilarity(avgVector, vector);
|
|
257
|
-
if (similarity > 0.7) {
|
|
258
|
-
scores.push({ id: entry.id, score: similarity * 0.5 });
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
}
|
|
262
|
-
}
|
|
263
|
-
const results = [];
|
|
264
|
-
for (const s of scores.sort((a, b) => b.score - a.score).slice(0, limit)) {
|
|
265
|
-
const doc = this.docs.find((d) => d.id === s.id);
|
|
266
|
-
if (!doc) {
|
|
267
|
-
continue;
|
|
268
|
-
}
|
|
269
|
-
results.push({
|
|
270
|
-
id: s.id,
|
|
271
|
-
path: doc.path,
|
|
272
|
-
title: doc.title,
|
|
273
|
-
content: doc.content,
|
|
274
|
-
snippet: this.extractSnippet(doc.content, query),
|
|
275
|
-
score: s.score,
|
|
276
|
-
source: 'semantic',
|
|
277
|
-
});
|
|
278
|
-
}
|
|
279
|
-
return results;
|
|
280
|
-
}
|
|
281
|
-
extractSnippet(content, query) {
|
|
282
|
-
if (!content)
|
|
283
|
-
return '';
|
|
284
|
-
const lowerContent = content.toLowerCase();
|
|
285
|
-
const queryTerms = this.tokenize(query);
|
|
286
|
-
let bestIndex = -1;
|
|
287
|
-
let bestScore = 0;
|
|
288
|
-
for (const term of queryTerms) {
|
|
289
|
-
const idx = lowerContent.indexOf(term);
|
|
290
|
-
if (idx !== -1) {
|
|
291
|
-
bestIndex = idx;
|
|
292
|
-
bestScore = term.length;
|
|
293
|
-
break;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
if (bestIndex === -1) {
|
|
297
|
-
const words = lowerContent.split(/\s+/).slice(0, 500);
|
|
298
|
-
for (let i = 0; i < words.length; i++) {
|
|
299
|
-
for (const term of queryTerms) {
|
|
300
|
-
const dist = levenshtein(words[i].slice(0, 20), term.slice(0, 20));
|
|
301
|
-
if (dist <= 2 && dist < bestScore) {
|
|
302
|
-
bestScore = dist;
|
|
303
|
-
bestIndex = lowerContent.indexOf(words[i]);
|
|
304
|
-
}
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
if (bestIndex === -1) {
|
|
309
|
-
return content.slice(0, 200).trim() + (content.length > 200 ? '...' : '');
|
|
310
|
-
}
|
|
311
|
-
const start = Math.max(0, bestIndex - 50);
|
|
312
|
-
const end = Math.min(content.length, bestIndex + 150);
|
|
313
|
-
let snippet = content.slice(start, end).trim();
|
|
314
|
-
if (start > 0)
|
|
315
|
-
snippet = '...' + snippet;
|
|
316
|
-
if (end < content.length)
|
|
317
|
-
snippet = snippet + '...';
|
|
318
|
-
return snippet.replace(/\n+/g, ' ').replace(/\s+/g, ' ');
|
|
319
|
-
}
|
|
320
|
-
static STOP_WORDS = new Set([
|
|
321
|
-
'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
|
|
322
|
-
'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should',
|
|
323
|
-
'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used',
|
|
324
|
-
'and', 'but', 'or', 'nor', 'for', 'yet', 'so', 'both', 'either', 'neither',
|
|
325
|
-
'not', 'only', 'own', 'same', 'than', 'too', 'very', 'just', 'also',
|
|
326
|
-
'how', 'what', 'when', 'where', 'who', 'which', 'why', 'whom', 'whose',
|
|
327
|
-
'this', 'that', 'these', 'those', 'here', 'there', 'all', 'each', 'every',
|
|
328
|
-
'any', 'some', 'no', 'none', 'one', 'two', 'other', 'another', 'such',
|
|
329
|
-
'to', 'of', 'in', 'on', 'at', 'by', 'with', 'from', 'as', 'into', 'through',
|
|
330
|
-
'about', 'above', 'below', 'between', 'under', 'over', 'out', 'up', 'down',
|
|
331
|
-
'if', 'then', 'else', 'because', 'while', 'although', 'though', 'unless',
|
|
332
|
-
'my', 'your', 'his', 'her', 'its', 'our', 'their', 'me', 'you', 'him', 'us', 'them',
|
|
333
|
-
'o', 'a', 'os', 'as', 'um', 'uma', 'uns', 'umas', 'de', 'do', 'da', 'dos', 'das',
|
|
334
|
-
'em', 'no', 'na', 'nos', 'nas', 'por', 'para', 'com', 'sem', 'sob', 'sobre',
|
|
335
|
-
'e', 'ou', 'mas', 'porem', 'todavia', 'contudo', 'entretanto',
|
|
336
|
-
'que', 'qual', 'quais', 'quanto', 'quem', 'como', 'onde', 'quando', 'porque',
|
|
337
|
-
'eu', 'tu', 'ele', 'ela', 'nos', 'vos', 'eles', 'elas', 'voce', 'voces',
|
|
338
|
-
'meu', 'minha', 'meus', 'minhas', 'seu', 'sua', 'seus', 'suas',
|
|
339
|
-
'este', 'esta', 'estes', 'estas', 'esse', 'essa', 'esses', 'essas',
|
|
340
|
-
'isso', 'isto', 'aquilo', 'aquele', 'aquela', 'aqueles', 'aquelas',
|
|
341
|
-
'ser', 'estar', 'ter', 'haver', 'fazer', 'ir', 'vir', 'poder', 'dever',
|
|
342
|
-
'sim', 'nao', 'ja', 'ainda', 'sempre', 'nunca', 'tambem', 'so', 'apenas',
|
|
343
|
-
'muito', 'pouco', 'mais', 'menos', 'bem', 'mal', 'assim', 'entao', 'logo',
|
|
344
|
-
'yo', 'hey', 'oi', 'ola', 'bom', 'boa', 'obrigado', 'por favor',
|
|
345
|
-
'configure', 'configuro', 'configurar', 'configurando', 'configura',
|
|
346
|
-
'use', 'usar', 'using', 'uso', 'usa',
|
|
347
|
-
'create', 'criar', 'creating', 'criando', 'cria', 'crio',
|
|
348
|
-
'setup', 'setar', 'setting', 'setando', 'seta', 'seto',
|
|
349
|
-
'add', 'adicionar', 'adding', 'adicionando', 'adiciona', 'adiciono',
|
|
350
|
-
'get', 'getting', 'pegar', 'pegando', 'pega', 'pego',
|
|
351
|
-
'set', 'setting', 'definir', 'definindo', 'define', 'defino',
|
|
352
|
-
'make', 'making', 'fazer', 'fazendo', 'faz', 'faço', 'faco',
|
|
353
|
-
]);
|
|
354
|
-
cleanQuery(query) {
|
|
355
|
-
const words = query
|
|
356
|
-
.toLowerCase()
|
|
357
|
-
.split(/[\s\-_.,;:!?()[\]{}'"]+/)
|
|
358
|
-
.filter((w) => w.length > 1 && !HybridSearch.STOP_WORDS.has(w));
|
|
359
|
-
return words.join(' ');
|
|
360
|
-
}
|
|
361
|
-
tokenize(text) {
|
|
362
|
-
return text
|
|
363
|
-
.toLowerCase()
|
|
364
|
-
.split(/[\s\-_.,;:!?()[\]{}'"]+/)
|
|
365
|
-
.filter((t) => t.length > 2 && !HybridSearch.STOP_WORDS.has(t));
|
|
366
|
-
}
|
|
367
|
-
averageVectors(vectors) {
|
|
368
|
-
if (vectors.length === 0)
|
|
369
|
-
return [];
|
|
370
|
-
if (vectors.length === 1)
|
|
371
|
-
return vectors[0];
|
|
372
|
-
const result = new Array(vectors[0].length).fill(0);
|
|
373
|
-
for (const vec of vectors) {
|
|
374
|
-
for (let i = 0; i < vec.length; i++) {
|
|
375
|
-
result[i] += vec[i];
|
|
376
|
-
}
|
|
377
|
-
}
|
|
378
|
-
for (let i = 0; i < result.length; i++) {
|
|
379
|
-
result[i] /= vectors.length;
|
|
380
|
-
}
|
|
381
|
-
return result;
|
|
382
|
-
}
|
|
383
|
-
hasEmbeddings() {
|
|
384
|
-
return this.vectors.size > 0;
|
|
385
|
-
}
|
|
386
|
-
getStats() {
|
|
387
|
-
return {
|
|
388
|
-
documents: this.docs.length,
|
|
389
|
-
embeddings: this.vectors.size,
|
|
390
|
-
model: this.embeddingsData?.model,
|
|
391
|
-
dimensions: this.embeddingsData?.dimensions,
|
|
392
|
-
};
|
|
393
|
-
}
|
|
394
|
-
log(message) {
|
|
395
|
-
if (this.config.debug) {
|
|
396
|
-
console.log(`[HybridSearch] ${message}`);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
export function createHybridSearch(config) {
|
|
401
|
-
return new HybridSearch(config);
|
|
402
|
-
}
|
|
@@ -1,4 +0,0 @@
|
|
|
1
|
-
export { HybridSearch, createHybridSearch } from './hybrid-search.js';
|
|
2
|
-
export { cosineSimilarity, levenshtein, stringSimilarity, reciprocalRankFusion, combineScores, } from './math.js';
|
|
3
|
-
export { embed, embedBatch, createEmbedder, isFastembedAvailable, getModelInfo, unloadEmbedder, } from './embedder.js';
|
|
4
|
-
export type { IndexedDoc, SearchResult, SearchOptions, HybridSearchConfig, EmbeddingsData, EmbeddingEntry, } from './types.js';
|
package/dist/mcp/search/index.js
DELETED
|
@@ -1,3 +0,0 @@
|
|
|
1
|
-
export { HybridSearch, createHybridSearch } from './hybrid-search.js';
|
|
2
|
-
export { cosineSimilarity, levenshtein, stringSimilarity, reciprocalRankFusion, combineScores, } from './math.js';
|
|
3
|
-
export { embed, embedBatch, createEmbedder, isFastembedAvailable, getModelInfo, unloadEmbedder, } from './embedder.js';
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
2
|
-
export declare function levenshtein(a: string, b: string): number;
|
|
3
|
-
export declare function stringSimilarity(a: string, b: string): number;
|
|
4
|
-
export declare function reciprocalRankFusion(ranks: number[], k?: number): number;
|
|
5
|
-
export declare function combineScores(score1: number, score2: number, k?: number): number;
|
package/dist/mcp/search/math.js
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { ValidationError } from '../../core/errors.js';
|
|
2
|
-
export function cosineSimilarity(a, b) {
|
|
3
|
-
if (a.length !== b.length) {
|
|
4
|
-
throw new ValidationError(`Vector length mismatch: ${a.length} vs ${b.length}`, {
|
|
5
|
-
field: 'vectorLength',
|
|
6
|
-
value: { a: a.length, b: b.length },
|
|
7
|
-
});
|
|
8
|
-
}
|
|
9
|
-
if (a.length === 0) {
|
|
10
|
-
return 0;
|
|
11
|
-
}
|
|
12
|
-
let dot = 0;
|
|
13
|
-
let normA = 0;
|
|
14
|
-
let normB = 0;
|
|
15
|
-
for (let i = 0; i < a.length; i++) {
|
|
16
|
-
dot += a[i] * b[i];
|
|
17
|
-
normA += a[i] * a[i];
|
|
18
|
-
normB += b[i] * b[i];
|
|
19
|
-
}
|
|
20
|
-
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
21
|
-
return magnitude === 0 ? 0 : dot / magnitude;
|
|
22
|
-
}
|
|
23
|
-
export function levenshtein(a, b) {
|
|
24
|
-
if (a === b)
|
|
25
|
-
return 0;
|
|
26
|
-
if (a.length === 0)
|
|
27
|
-
return b.length;
|
|
28
|
-
if (b.length === 0)
|
|
29
|
-
return a.length;
|
|
30
|
-
if (a.length > b.length) {
|
|
31
|
-
[a, b] = [b, a];
|
|
32
|
-
}
|
|
33
|
-
const aLen = a.length;
|
|
34
|
-
const bLen = b.length;
|
|
35
|
-
let prevRow = new Array(aLen + 1);
|
|
36
|
-
let currRow = new Array(aLen + 1);
|
|
37
|
-
for (let i = 0; i <= aLen; i++) {
|
|
38
|
-
prevRow[i] = i;
|
|
39
|
-
}
|
|
40
|
-
for (let j = 1; j <= bLen; j++) {
|
|
41
|
-
currRow[0] = j;
|
|
42
|
-
for (let i = 1; i <= aLen; i++) {
|
|
43
|
-
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
44
|
-
currRow[i] = Math.min(prevRow[i] + 1, currRow[i - 1] + 1, prevRow[i - 1] + cost);
|
|
45
|
-
}
|
|
46
|
-
[prevRow, currRow] = [currRow, prevRow];
|
|
47
|
-
}
|
|
48
|
-
return prevRow[aLen];
|
|
49
|
-
}
|
|
50
|
-
export function stringSimilarity(a, b) {
|
|
51
|
-
const maxLen = Math.max(a.length, b.length);
|
|
52
|
-
if (maxLen === 0)
|
|
53
|
-
return 1;
|
|
54
|
-
return 1 - levenshtein(a, b) / maxLen;
|
|
55
|
-
}
|
|
56
|
-
export function reciprocalRankFusion(ranks, k = 60) {
|
|
57
|
-
return ranks.reduce((sum, rank) => sum + 1 / (k + rank), 0);
|
|
58
|
-
}
|
|
59
|
-
export function combineScores(score1, score2, k = 60) {
|
|
60
|
-
const rank1 = 1 + (1 - score1) * 100;
|
|
61
|
-
const rank2 = 1 + (1 - score2) * 100;
|
|
62
|
-
return reciprocalRankFusion([rank1, rank2], k);
|
|
63
|
-
}
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
export interface IndexedDoc {
|
|
2
|
-
id: string;
|
|
3
|
-
path: string;
|
|
4
|
-
title: string;
|
|
5
|
-
content: string;
|
|
6
|
-
category: string;
|
|
7
|
-
keywords: string[];
|
|
8
|
-
section?: string;
|
|
9
|
-
parentPath?: string;
|
|
10
|
-
}
|
|
11
|
-
export interface SearchResult {
|
|
12
|
-
id: string;
|
|
13
|
-
path: string;
|
|
14
|
-
title: string;
|
|
15
|
-
content: string;
|
|
16
|
-
snippet: string;
|
|
17
|
-
score: number;
|
|
18
|
-
source: 'fuzzy' | 'semantic' | 'hybrid';
|
|
19
|
-
}
|
|
20
|
-
export interface EmbeddingsData {
|
|
21
|
-
version: string;
|
|
22
|
-
model: string;
|
|
23
|
-
dimensions: number;
|
|
24
|
-
generatedAt: string;
|
|
25
|
-
documents: EmbeddingEntry[];
|
|
26
|
-
}
|
|
27
|
-
export interface EmbeddingEntry {
|
|
28
|
-
id: string;
|
|
29
|
-
path: string;
|
|
30
|
-
title: string;
|
|
31
|
-
category: string;
|
|
32
|
-
keywords: string[];
|
|
33
|
-
section?: string;
|
|
34
|
-
parentPath?: string;
|
|
35
|
-
content?: string;
|
|
36
|
-
vector: number[];
|
|
37
|
-
}
|
|
38
|
-
export interface SearchOptions {
|
|
39
|
-
limit?: number;
|
|
40
|
-
category?: string;
|
|
41
|
-
mode?: 'hybrid' | 'fuzzy' | 'semantic';
|
|
42
|
-
minScore?: number;
|
|
43
|
-
}
|
|
44
|
-
export interface HybridSearchConfig {
|
|
45
|
-
fuzzyThreshold?: number;
|
|
46
|
-
fuzzyWeight?: number;
|
|
47
|
-
semanticWeight?: number;
|
|
48
|
-
debug?: boolean;
|
|
49
|
-
offline?: boolean;
|
|
50
|
-
embedder?: (text: string, model?: string) => Promise<number[]>;
|
|
51
|
-
}
|
package/dist/mcp/search/types.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export {};
|