claude-eidetic 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/config.d.ts +87 -0
- package/dist/config.js +65 -0
- package/dist/core/indexer.d.ts +18 -0
- package/dist/core/indexer.js +169 -0
- package/dist/core/preview.d.ts +14 -0
- package/dist/core/preview.js +61 -0
- package/dist/core/searcher.d.ts +24 -0
- package/dist/core/searcher.js +101 -0
- package/dist/core/snapshot-io.d.ts +6 -0
- package/dist/core/snapshot-io.js +39 -0
- package/dist/core/sync.d.ts +35 -0
- package/dist/core/sync.js +188 -0
- package/dist/embedding/factory.d.ts +17 -0
- package/dist/embedding/factory.js +41 -0
- package/dist/embedding/openai.d.ts +45 -0
- package/dist/embedding/openai.js +243 -0
- package/dist/embedding/truncate.d.ts +6 -0
- package/dist/embedding/truncate.js +14 -0
- package/dist/embedding/types.d.ts +18 -0
- package/dist/embedding/types.js +2 -0
- package/dist/errors.d.ts +17 -0
- package/dist/errors.js +21 -0
- package/dist/format.d.ts +12 -0
- package/dist/format.js +97 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +109 -0
- package/dist/infra/qdrant-bootstrap.d.ts +2 -0
- package/dist/infra/qdrant-bootstrap.js +94 -0
- package/dist/paths.d.ts +11 -0
- package/dist/paths.js +41 -0
- package/dist/splitter/ast.d.ts +13 -0
- package/dist/splitter/ast.js +169 -0
- package/dist/splitter/line.d.ts +14 -0
- package/dist/splitter/line.js +109 -0
- package/dist/splitter/types.d.ts +11 -0
- package/dist/splitter/types.js +2 -0
- package/dist/state/registry.d.ts +8 -0
- package/dist/state/registry.js +33 -0
- package/dist/state/snapshot.d.ts +26 -0
- package/dist/state/snapshot.js +101 -0
- package/dist/tool-schemas.d.ts +135 -0
- package/dist/tool-schemas.js +162 -0
- package/dist/tools.d.ts +40 -0
- package/dist/tools.js +169 -0
- package/dist/vectordb/milvus.d.ts +33 -0
- package/dist/vectordb/milvus.js +328 -0
- package/dist/vectordb/qdrant.d.ts +51 -0
- package/dist/vectordb/qdrant.js +241 -0
- package/dist/vectordb/types.d.ts +35 -0
- package/dist/vectordb/types.js +2 -0
- package/package.json +62 -0
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { QdrantClient } from '@qdrant/js-client-rest';
|
|
2
|
+
import { randomUUID } from 'node:crypto';
|
|
3
|
+
import { VectorDBError } from '../errors.js';
|
|
4
|
+
import { getConfig } from '../config.js';
|
|
5
|
+
export const RRF_K = 5; // Reciprocal Rank Fusion constant (low k for code search — stronger rank separation)
|
|
6
|
+
export const RRF_ALPHA = 0.7; // Blend weight: 70% rank-based (fusion stability), 30% raw similarity (query-specific signal)
|
|
7
|
+
export class QdrantVectorDB {
|
|
8
|
+
client;
|
|
9
|
+
constructor(url, apiKey) {
|
|
10
|
+
const config = getConfig();
|
|
11
|
+
this.client = new QdrantClient({
|
|
12
|
+
url: url ?? config.qdrantUrl,
|
|
13
|
+
...(apiKey ?? config.qdrantApiKey ? { apiKey: apiKey ?? config.qdrantApiKey } : {}),
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
async createCollection(name, dimension) {
|
|
17
|
+
try {
|
|
18
|
+
await this.client.createCollection(name, {
|
|
19
|
+
vectors: {
|
|
20
|
+
dense: { size: dimension, distance: 'Cosine' },
|
|
21
|
+
},
|
|
22
|
+
});
|
|
23
|
+
// Create payload indexes for filtering and full-text search
|
|
24
|
+
await Promise.all([
|
|
25
|
+
this.client.createPayloadIndex(name, {
|
|
26
|
+
field_name: 'content',
|
|
27
|
+
field_schema: 'text',
|
|
28
|
+
wait: true,
|
|
29
|
+
}),
|
|
30
|
+
this.client.createPayloadIndex(name, {
|
|
31
|
+
field_name: 'relativePath',
|
|
32
|
+
field_schema: 'keyword',
|
|
33
|
+
wait: true,
|
|
34
|
+
}),
|
|
35
|
+
this.client.createPayloadIndex(name, {
|
|
36
|
+
field_name: 'fileExtension',
|
|
37
|
+
field_schema: 'keyword',
|
|
38
|
+
wait: true,
|
|
39
|
+
}),
|
|
40
|
+
]);
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
throw new VectorDBError(`Failed to create collection "${name}"`, err);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
async hasCollection(name) {
|
|
47
|
+
try {
|
|
48
|
+
const response = await this.client.collectionExists(name);
|
|
49
|
+
return response.exists;
|
|
50
|
+
}
|
|
51
|
+
catch {
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
async dropCollection(name) {
|
|
56
|
+
try {
|
|
57
|
+
if (await this.hasCollection(name)) {
|
|
58
|
+
await this.client.deleteCollection(name);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
throw new VectorDBError(`Failed to drop collection "${name}"`, err);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
async insert(name, documents) {
|
|
66
|
+
if (documents.length === 0)
|
|
67
|
+
return;
|
|
68
|
+
try {
|
|
69
|
+
const batchSize = 100;
|
|
70
|
+
for (let i = 0; i < documents.length; i += batchSize) {
|
|
71
|
+
const batch = documents.slice(i, i + batchSize);
|
|
72
|
+
await this.client.upsert(name, {
|
|
73
|
+
wait: true,
|
|
74
|
+
points: batch.map(doc => ({
|
|
75
|
+
id: doc.id ?? randomUUID(),
|
|
76
|
+
vector: { dense: doc.vector },
|
|
77
|
+
payload: {
|
|
78
|
+
content: doc.content,
|
|
79
|
+
relativePath: doc.relativePath,
|
|
80
|
+
startLine: doc.startLine,
|
|
81
|
+
endLine: doc.endLine,
|
|
82
|
+
fileExtension: doc.fileExtension,
|
|
83
|
+
language: doc.language,
|
|
84
|
+
},
|
|
85
|
+
})),
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch (err) {
|
|
90
|
+
throw new VectorDBError(`Failed to insert ${documents.length} documents into "${name}"`, err);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async search(name, params) {
|
|
94
|
+
try {
|
|
95
|
+
const fetchLimit = params.limit * 2;
|
|
96
|
+
const extensionFilter = params.extensionFilter?.length
|
|
97
|
+
? {
|
|
98
|
+
should: params.extensionFilter.map(ext => ({
|
|
99
|
+
key: 'fileExtension',
|
|
100
|
+
match: { value: ext },
|
|
101
|
+
})),
|
|
102
|
+
}
|
|
103
|
+
: undefined;
|
|
104
|
+
const denseResults = await this.client.search(name, {
|
|
105
|
+
vector: { name: 'dense', vector: params.queryVector },
|
|
106
|
+
limit: fetchLimit,
|
|
107
|
+
with_payload: true,
|
|
108
|
+
...(extensionFilter ? { filter: { must: [extensionFilter] } } : {}),
|
|
109
|
+
});
|
|
110
|
+
const textFilter = [
|
|
111
|
+
{ key: 'content', match: { text: params.queryText } },
|
|
112
|
+
];
|
|
113
|
+
if (extensionFilter) {
|
|
114
|
+
textFilter.push(extensionFilter);
|
|
115
|
+
}
|
|
116
|
+
const textResponse = await this.client.scroll(name, {
|
|
117
|
+
filter: { must: textFilter },
|
|
118
|
+
limit: fetchLimit,
|
|
119
|
+
with_payload: true,
|
|
120
|
+
});
|
|
121
|
+
// Rank text results by query term frequency so RRF receives a meaningful ordering
|
|
122
|
+
const rankedTextResults = rankByTermFrequency(textResponse.points, params.queryText);
|
|
123
|
+
// 3. Client-side RRF fusion blending rank position with raw similarity scores
|
|
124
|
+
return reciprocalRankFusion(denseResults, rankedTextResults, params.limit);
|
|
125
|
+
}
|
|
126
|
+
catch (err) {
|
|
127
|
+
throw new VectorDBError(`Search failed in collection "${name}"`, err);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
async deleteByPath(name, relativePath) {
|
|
131
|
+
try {
|
|
132
|
+
await this.client.delete(name, {
|
|
133
|
+
filter: {
|
|
134
|
+
must: [{ key: 'relativePath', match: { value: relativePath } }],
|
|
135
|
+
},
|
|
136
|
+
wait: true,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
catch (err) {
|
|
140
|
+
throw new VectorDBError(`Failed to delete documents for path "${relativePath}" from "${name}"`, err);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Rank text-match results by normalized term frequency.
|
|
146
|
+
*
|
|
147
|
+
* Qdrant's scroll API returns text-filtered points in storage order (not by
|
|
148
|
+
* relevance). Before feeding these into RRF, we score each result by how many
|
|
149
|
+
* query terms appear in its content, normalized by word count to avoid bias
|
|
150
|
+
* toward longer chunks. Returns points sorted best-first with rawScore attached
|
|
151
|
+
* so RRF can blend rank position with content-based signal.
|
|
152
|
+
*/
|
|
153
|
+
export function rankByTermFrequency(points, queryText) {
|
|
154
|
+
if (points.length === 0)
|
|
155
|
+
return [];
|
|
156
|
+
// Split query into individual terms, deduplicate, and build escaped regex patterns
|
|
157
|
+
const terms = [...new Set(queryText.toLowerCase().split(/\s+/).filter(t => t.length > 0))];
|
|
158
|
+
if (terms.length === 0)
|
|
159
|
+
return points.map(p => ({ ...p, rawScore: 0 }));
|
|
160
|
+
const termPatterns = terms.map(t => new RegExp(t.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi'));
|
|
161
|
+
const scored = points.map(point => {
|
|
162
|
+
const content = point.payload?.content ?? '';
|
|
163
|
+
const wordCount = Math.max(1, content.split(/\s+/).length);
|
|
164
|
+
// Count total term occurrences across all query terms
|
|
165
|
+
let hits = 0;
|
|
166
|
+
for (const pattern of termPatterns) {
|
|
167
|
+
pattern.lastIndex = 0; // reset stateful regex
|
|
168
|
+
const matches = content.match(pattern);
|
|
169
|
+
hits += matches ? matches.length : 0;
|
|
170
|
+
}
|
|
171
|
+
// Normalize by word count (TF = hits / total words)
|
|
172
|
+
const tf = hits / wordCount;
|
|
173
|
+
return { point, tf };
|
|
174
|
+
});
|
|
175
|
+
// Sort by TF descending (best matches first)
|
|
176
|
+
scored.sort((a, b) => b.tf - a.tf);
|
|
177
|
+
// Normalize TF to 0-1 range for blending with cosine similarity
|
|
178
|
+
const maxTf = scored[0].tf;
|
|
179
|
+
return scored.map(s => ({
|
|
180
|
+
...s.point,
|
|
181
|
+
rawScore: maxTf > 0 ? s.tf / maxTf : 0,
|
|
182
|
+
}));
|
|
183
|
+
}
|
|
184
|
+
export function extractPayload(point) {
|
|
185
|
+
const p = point.payload ?? {};
|
|
186
|
+
return {
|
|
187
|
+
id: point.id,
|
|
188
|
+
content: String(p.content ?? ''),
|
|
189
|
+
relativePath: String(p.relativePath ?? ''),
|
|
190
|
+
startLine: Number(p.startLine ?? 0),
|
|
191
|
+
endLine: Number(p.endLine ?? 0),
|
|
192
|
+
fileExtension: String(p.fileExtension ?? ''),
|
|
193
|
+
language: String(p.language ?? ''),
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
export function reciprocalRankFusion(denseResults, textResults, limit) {
|
|
197
|
+
const scoreMap = new Map();
|
|
198
|
+
// Hybrid RRF blended with raw similarity:
|
|
199
|
+
// score = alpha * (1/(k + rank + 1)) + (1-alpha) * rawSimilarity
|
|
200
|
+
// alpha=0.7 preserves rank-fusion stability while 0.3 raw similarity
|
|
201
|
+
// injects query-specific signal so identical ranks get different scores.
|
|
202
|
+
const blendedScore = (rank, rawSimilarity) => RRF_ALPHA * (1 / (RRF_K + rank + 1)) + (1 - RRF_ALPHA) * rawSimilarity;
|
|
203
|
+
// Score from dense results (cosine similarity from Qdrant)
|
|
204
|
+
for (let rank = 0; rank < denseResults.length; rank++) {
|
|
205
|
+
const point = denseResults[rank];
|
|
206
|
+
const rawSim = point.score ?? 0; // cosine similarity (0-1)
|
|
207
|
+
const score = blendedScore(rank, rawSim);
|
|
208
|
+
const existing = scoreMap.get(point.id);
|
|
209
|
+
const payload = extractPayload(point);
|
|
210
|
+
if (existing) {
|
|
211
|
+
existing.score += score;
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
scoreMap.set(point.id, { score, payload });
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
// Score from text results (normalized TF from rankByTermFrequency)
|
|
218
|
+
for (let rank = 0; rank < textResults.length; rank++) {
|
|
219
|
+
const point = textResults[rank];
|
|
220
|
+
const score = blendedScore(rank, point.rawScore);
|
|
221
|
+
const existing = scoreMap.get(point.id);
|
|
222
|
+
const payload = extractPayload(point);
|
|
223
|
+
if (existing) {
|
|
224
|
+
existing.score += score;
|
|
225
|
+
}
|
|
226
|
+
else {
|
|
227
|
+
scoreMap.set(point.id, { score, payload });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
const sorted = [...scoreMap.values()].sort((a, b) => b.score - a.score).slice(0, limit);
|
|
231
|
+
return sorted.map(({ score, payload }) => ({
|
|
232
|
+
content: payload.content,
|
|
233
|
+
relativePath: payload.relativePath,
|
|
234
|
+
startLine: payload.startLine,
|
|
235
|
+
endLine: payload.endLine,
|
|
236
|
+
fileExtension: payload.fileExtension,
|
|
237
|
+
language: payload.language,
|
|
238
|
+
score,
|
|
239
|
+
}));
|
|
240
|
+
}
|
|
241
|
+
//# sourceMappingURL=qdrant.js.map
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import type { EmbeddingVector } from '../embedding/types.js';
|
|
2
|
+
export interface CodeDocument {
|
|
3
|
+
id: string;
|
|
4
|
+
content: string;
|
|
5
|
+
vector: EmbeddingVector;
|
|
6
|
+
relativePath: string;
|
|
7
|
+
startLine: number;
|
|
8
|
+
endLine: number;
|
|
9
|
+
fileExtension: string;
|
|
10
|
+
language: string;
|
|
11
|
+
}
|
|
12
|
+
export interface HybridSearchParams {
|
|
13
|
+
queryVector: EmbeddingVector;
|
|
14
|
+
queryText: string;
|
|
15
|
+
limit: number;
|
|
16
|
+
extensionFilter?: string[];
|
|
17
|
+
}
|
|
18
|
+
export interface SearchResult {
|
|
19
|
+
content: string;
|
|
20
|
+
relativePath: string;
|
|
21
|
+
startLine: number;
|
|
22
|
+
endLine: number;
|
|
23
|
+
fileExtension: string;
|
|
24
|
+
language: string;
|
|
25
|
+
score: number;
|
|
26
|
+
}
|
|
27
|
+
export interface VectorDB {
|
|
28
|
+
createCollection(name: string, dimension: number): Promise<void>;
|
|
29
|
+
hasCollection(name: string): Promise<boolean>;
|
|
30
|
+
dropCollection(name: string): Promise<void>;
|
|
31
|
+
insert(name: string, documents: CodeDocument[]): Promise<void>;
|
|
32
|
+
search(name: string, params: HybridSearchParams): Promise<SearchResult[]>;
|
|
33
|
+
deleteByPath(name: string, relativePath: string): Promise<void>;
|
|
34
|
+
}
|
|
35
|
+
//# sourceMappingURL=types.d.ts.map
|
package/package.json
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "claude-eidetic",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Semantic code search MCP server — lean, correct, fast",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"bin": {
|
|
8
|
+
"claude-eidetic": "dist/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"dist/**/*.js",
|
|
12
|
+
"dist/**/*.d.ts",
|
|
13
|
+
"!dist/**/*.test.*",
|
|
14
|
+
"!dist/__test__/**",
|
|
15
|
+
"!dist/e2e/**"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"build": "tsc",
|
|
19
|
+
"dev": "tsx --watch src/index.ts",
|
|
20
|
+
"start": "node dist/index.js",
|
|
21
|
+
"clean": "rimraf dist",
|
|
22
|
+
"typecheck": "tsc --noEmit",
|
|
23
|
+
"test": "vitest run",
|
|
24
|
+
"test:watch": "vitest",
|
|
25
|
+
"test:coverage": "vitest run --coverage",
|
|
26
|
+
"test:integration": "vitest run --config vitest.integration.config.ts",
|
|
27
|
+
"test:all": "vitest run && vitest run --config vitest.integration.config.ts",
|
|
28
|
+
"test:audit": "tsx scripts/audit/lonely-source.ts; tsx scripts/audit/mock-hygiene.ts; tsx scripts/audit/bare-hands.ts; tsx scripts/audit/timer-drift.ts",
|
|
29
|
+
"prepublishOnly": "npm run clean && npm run build"
|
|
30
|
+
},
|
|
31
|
+
"dependencies": {
|
|
32
|
+
"@modelcontextprotocol/sdk": "^1.12.1",
|
|
33
|
+
"@qdrant/js-client-rest": "^1.13.0",
|
|
34
|
+
"glob": "^10.0.0",
|
|
35
|
+
"openai": "^5.1.1",
|
|
36
|
+
"tree-sitter": "^0.21.1",
|
|
37
|
+
"tree-sitter-c-sharp": "^0.21.0",
|
|
38
|
+
"tree-sitter-cpp": "^0.22.0",
|
|
39
|
+
"tree-sitter-go": "^0.21.0",
|
|
40
|
+
"tree-sitter-java": "^0.21.0",
|
|
41
|
+
"tree-sitter-javascript": "^0.21.0",
|
|
42
|
+
"tree-sitter-python": "^0.21.0",
|
|
43
|
+
"tree-sitter-rust": "^0.21.0",
|
|
44
|
+
"tree-sitter-typescript": "^0.21.0",
|
|
45
|
+
"zod": "^3.25.0"
|
|
46
|
+
},
|
|
47
|
+
"optionalDependencies": {
|
|
48
|
+
"@zilliz/milvus2-sdk-node": "^2.5.10"
|
|
49
|
+
},
|
|
50
|
+
"devDependencies": {
|
|
51
|
+
"@types/node": "^20.0.0",
|
|
52
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
53
|
+
"rimraf": "^6.0.1",
|
|
54
|
+
"tsx": "^4.19.4",
|
|
55
|
+
"typescript": "^5.8.3",
|
|
56
|
+
"vitest": "^4.0.18"
|
|
57
|
+
},
|
|
58
|
+
"engines": {
|
|
59
|
+
"node": ">=20.0.0"
|
|
60
|
+
},
|
|
61
|
+
"license": "MIT"
|
|
62
|
+
}
|