uilint-duplicates 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DURWZZLU.js +1941 -0
- package/dist/chunk-DURWZZLU.js.map +1 -0
- package/dist/index.d.ts +783 -0
- package/dist/index.js +55 -0
- package/dist/index.js.map +1 -0
- package/dist/node.d.ts +1 -0
- package/dist/node.js +55 -0
- package/dist/node.js.map +1 -0
- package/package.json +79 -0
|
@@ -0,0 +1,1941 @@
|
|
|
1
|
+
// src/embeddings/chunker.ts
|
|
2
|
+
import { parse, AST_NODE_TYPES } from "@typescript-eslint/typescript-estree";
|
|
3
|
+
var DEFAULT_MAX_LINES = 100;
|
|
4
|
+
var MIN_SECTION_LINES = 3;
|
|
5
|
+
function hashChunk(content, filePath, startLine) {
|
|
6
|
+
let hash = 5381;
|
|
7
|
+
const input = `${filePath}:${startLine}:${content}`;
|
|
8
|
+
for (let i = 0; i < input.length; i++) {
|
|
9
|
+
hash = hash * 33 ^ input.charCodeAt(i);
|
|
10
|
+
}
|
|
11
|
+
return (hash >>> 0).toString(16).padStart(8, "0");
|
|
12
|
+
}
|
|
13
|
+
function chunkFile(filePath, content, options = {}) {
|
|
14
|
+
const {
|
|
15
|
+
minLines = 3,
|
|
16
|
+
maxLines = DEFAULT_MAX_LINES,
|
|
17
|
+
includeAnonymous = false,
|
|
18
|
+
kinds,
|
|
19
|
+
splitStrategy = "jsx-children"
|
|
20
|
+
} = options;
|
|
21
|
+
let ast;
|
|
22
|
+
try {
|
|
23
|
+
ast = parse(content, {
|
|
24
|
+
jsx: true,
|
|
25
|
+
loc: true,
|
|
26
|
+
range: true,
|
|
27
|
+
tokens: false,
|
|
28
|
+
comment: false
|
|
29
|
+
});
|
|
30
|
+
} catch (error) {
|
|
31
|
+
console.warn(`Failed to parse ${filePath}:`, error);
|
|
32
|
+
return [];
|
|
33
|
+
}
|
|
34
|
+
const chunks = [];
|
|
35
|
+
const lines = content.split("\n");
|
|
36
|
+
const exportedNames = /* @__PURE__ */ new Set();
|
|
37
|
+
let defaultExportName = null;
|
|
38
|
+
for (const node of ast.body) {
|
|
39
|
+
if (node.type === AST_NODE_TYPES.ExportNamedDeclaration) {
|
|
40
|
+
if (node.declaration) {
|
|
41
|
+
const names = getDeclarationNames(node.declaration);
|
|
42
|
+
names.forEach((name) => exportedNames.add(name));
|
|
43
|
+
}
|
|
44
|
+
if (node.specifiers) {
|
|
45
|
+
node.specifiers.forEach((spec) => {
|
|
46
|
+
if (spec.type === AST_NODE_TYPES.ExportSpecifier) {
|
|
47
|
+
exportedNames.add(
|
|
48
|
+
spec.exported.type === AST_NODE_TYPES.Identifier ? spec.exported.name : spec.exported.value
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
} else if (node.type === AST_NODE_TYPES.ExportDefaultDeclaration) {
|
|
54
|
+
if (node.declaration.type === AST_NODE_TYPES.Identifier) {
|
|
55
|
+
defaultExportName = node.declaration.name;
|
|
56
|
+
} else if (node.declaration.type === AST_NODE_TYPES.FunctionDeclaration && node.declaration.id) {
|
|
57
|
+
defaultExportName = node.declaration.id.name;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function visit(node) {
|
|
62
|
+
if (node.type === AST_NODE_TYPES.FunctionDeclaration && node.id) {
|
|
63
|
+
const chunk = processFunction(
|
|
64
|
+
node,
|
|
65
|
+
node.id.name,
|
|
66
|
+
filePath,
|
|
67
|
+
content,
|
|
68
|
+
lines,
|
|
69
|
+
exportedNames,
|
|
70
|
+
defaultExportName
|
|
71
|
+
);
|
|
72
|
+
if (chunk && shouldIncludeChunk(chunk, minLines, includeAnonymous, kinds)) {
|
|
73
|
+
const lineCount = chunk.endLine - chunk.startLine + 1;
|
|
74
|
+
if (lineCount > maxLines && splitStrategy !== "none") {
|
|
75
|
+
const splitChunks = splitLargeChunk(node, chunk, content, lines, maxLines, splitStrategy);
|
|
76
|
+
chunks.push(...splitChunks.filter((c) => shouldIncludeChunk(c, minLines, includeAnonymous, kinds)));
|
|
77
|
+
} else {
|
|
78
|
+
chunks.push(chunk);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (node.type === AST_NODE_TYPES.VariableDeclaration) {
|
|
83
|
+
for (const decl of node.declarations) {
|
|
84
|
+
if (decl.id.type === AST_NODE_TYPES.Identifier && decl.init && (decl.init.type === AST_NODE_TYPES.ArrowFunctionExpression || decl.init.type === AST_NODE_TYPES.FunctionExpression)) {
|
|
85
|
+
const chunk = processFunction(
|
|
86
|
+
decl.init,
|
|
87
|
+
decl.id.name,
|
|
88
|
+
filePath,
|
|
89
|
+
content,
|
|
90
|
+
lines,
|
|
91
|
+
exportedNames,
|
|
92
|
+
defaultExportName,
|
|
93
|
+
node
|
|
94
|
+
// Use the variable declaration for location
|
|
95
|
+
);
|
|
96
|
+
if (chunk && shouldIncludeChunk(chunk, minLines, includeAnonymous, kinds)) {
|
|
97
|
+
const lineCount = chunk.endLine - chunk.startLine + 1;
|
|
98
|
+
if (lineCount > maxLines && splitStrategy !== "none") {
|
|
99
|
+
const splitChunks = splitLargeChunk(decl.init, chunk, content, lines, maxLines, splitStrategy);
|
|
100
|
+
chunks.push(...splitChunks.filter((c) => shouldIncludeChunk(c, minLines, includeAnonymous, kinds)));
|
|
101
|
+
} else {
|
|
102
|
+
chunks.push(chunk);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
for (const key of Object.keys(node)) {
|
|
109
|
+
if (key === "parent" || key === "loc" || key === "range") continue;
|
|
110
|
+
const child = node[key];
|
|
111
|
+
if (child && typeof child === "object") {
|
|
112
|
+
if (Array.isArray(child)) {
|
|
113
|
+
child.forEach((c) => {
|
|
114
|
+
if (c && typeof c === "object" && "type" in c) {
|
|
115
|
+
visit(c);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
} else if ("type" in child) {
|
|
119
|
+
visit(child);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
visit(ast);
|
|
125
|
+
return chunks;
|
|
126
|
+
}
|
|
127
|
+
function processFunction(node, name, filePath, content, lines, exportedNames, defaultExportName, locationNode) {
|
|
128
|
+
const loc = (locationNode || node).loc;
|
|
129
|
+
if (!loc) return null;
|
|
130
|
+
const startLine = loc.start.line;
|
|
131
|
+
const endLine = loc.end.line;
|
|
132
|
+
const startColumn = loc.start.column;
|
|
133
|
+
const endColumn = loc.end.column;
|
|
134
|
+
const chunkContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
135
|
+
const kind = classifyFunction(name, node);
|
|
136
|
+
const metadata = extractMetadata(node, name, exportedNames, defaultExportName);
|
|
137
|
+
return {
|
|
138
|
+
id: hashChunk(chunkContent, filePath, startLine),
|
|
139
|
+
filePath,
|
|
140
|
+
startLine,
|
|
141
|
+
endLine,
|
|
142
|
+
startColumn,
|
|
143
|
+
endColumn,
|
|
144
|
+
kind,
|
|
145
|
+
name,
|
|
146
|
+
content: chunkContent,
|
|
147
|
+
metadata
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
function classifyFunction(name, node) {
|
|
151
|
+
if (/^use[A-Z]/.test(name)) {
|
|
152
|
+
return "hook";
|
|
153
|
+
}
|
|
154
|
+
if (/^[A-Z]/.test(name) && containsJSX(node)) {
|
|
155
|
+
return "component";
|
|
156
|
+
}
|
|
157
|
+
if (containsJSX(node)) {
|
|
158
|
+
return "jsx-fragment";
|
|
159
|
+
}
|
|
160
|
+
return "function";
|
|
161
|
+
}
|
|
162
|
+
function containsJSX(node) {
|
|
163
|
+
let found = false;
|
|
164
|
+
function search(n) {
|
|
165
|
+
if (found) return;
|
|
166
|
+
if (n.type === AST_NODE_TYPES.JSXElement || n.type === AST_NODE_TYPES.JSXFragment) {
|
|
167
|
+
found = true;
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
for (const key of Object.keys(n)) {
|
|
171
|
+
if (key === "parent" || key === "loc" || key === "range") continue;
|
|
172
|
+
const child = n[key];
|
|
173
|
+
if (child && typeof child === "object") {
|
|
174
|
+
if (Array.isArray(child)) {
|
|
175
|
+
child.forEach((c) => {
|
|
176
|
+
if (c && typeof c === "object" && "type" in c) {
|
|
177
|
+
search(c);
|
|
178
|
+
}
|
|
179
|
+
});
|
|
180
|
+
} else if ("type" in child) {
|
|
181
|
+
search(child);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
search(node);
|
|
187
|
+
return found;
|
|
188
|
+
}
|
|
189
|
+
function extractMetadata(node, name, exportedNames, defaultExportName) {
|
|
190
|
+
const metadata = {
|
|
191
|
+
isExported: exportedNames.has(name) || defaultExportName === name,
|
|
192
|
+
isDefaultExport: defaultExportName === name
|
|
193
|
+
};
|
|
194
|
+
const params = node.params;
|
|
195
|
+
if (params.length > 0) {
|
|
196
|
+
const firstParam = params[0];
|
|
197
|
+
const props = extractPropsFromParam(firstParam);
|
|
198
|
+
if (props.length > 0) {
|
|
199
|
+
metadata.props = props;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
const hooks = [];
|
|
203
|
+
const jsxElements = [];
|
|
204
|
+
function searchForHooksAndJSX(n) {
|
|
205
|
+
if (n.type === AST_NODE_TYPES.CallExpression && n.callee.type === AST_NODE_TYPES.Identifier && /^use[A-Z]/.test(n.callee.name)) {
|
|
206
|
+
hooks.push(n.callee.name);
|
|
207
|
+
}
|
|
208
|
+
if (n.type === AST_NODE_TYPES.JSXOpeningElement) {
|
|
209
|
+
if (n.name.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
210
|
+
jsxElements.push(n.name.name);
|
|
211
|
+
} else if (n.name.type === AST_NODE_TYPES.JSXMemberExpression) {
|
|
212
|
+
const parts = [];
|
|
213
|
+
let current = n.name;
|
|
214
|
+
while (current.type === AST_NODE_TYPES.JSXMemberExpression) {
|
|
215
|
+
if (current.property.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
216
|
+
parts.unshift(current.property.name);
|
|
217
|
+
}
|
|
218
|
+
current = current.object;
|
|
219
|
+
}
|
|
220
|
+
if (current.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
221
|
+
parts.unshift(current.name);
|
|
222
|
+
}
|
|
223
|
+
jsxElements.push(parts.join("."));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
for (const key of Object.keys(n)) {
|
|
227
|
+
if (key === "parent" || key === "loc" || key === "range") continue;
|
|
228
|
+
const child = n[key];
|
|
229
|
+
if (child && typeof child === "object") {
|
|
230
|
+
if (Array.isArray(child)) {
|
|
231
|
+
child.forEach((c) => {
|
|
232
|
+
if (c && typeof c === "object" && "type" in c) {
|
|
233
|
+
searchForHooksAndJSX(c);
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
} else if ("type" in child) {
|
|
237
|
+
searchForHooksAndJSX(child);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
searchForHooksAndJSX(node);
|
|
243
|
+
if (hooks.length > 0) {
|
|
244
|
+
metadata.hooks = [...new Set(hooks)];
|
|
245
|
+
}
|
|
246
|
+
if (jsxElements.length > 0) {
|
|
247
|
+
metadata.jsxElements = [...new Set(jsxElements)];
|
|
248
|
+
}
|
|
249
|
+
return metadata;
|
|
250
|
+
}
|
|
251
|
+
function extractPropsFromParam(param) {
|
|
252
|
+
const props = [];
|
|
253
|
+
if (param.type === AST_NODE_TYPES.ObjectPattern) {
|
|
254
|
+
for (const prop of param.properties) {
|
|
255
|
+
if (prop.type === AST_NODE_TYPES.Property && prop.key.type === AST_NODE_TYPES.Identifier) {
|
|
256
|
+
props.push(prop.key.name);
|
|
257
|
+
} else if (prop.type === AST_NODE_TYPES.RestElement && prop.argument.type === AST_NODE_TYPES.Identifier) {
|
|
258
|
+
props.push(`...${prop.argument.name}`);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
} else if (param.type === AST_NODE_TYPES.Identifier) {
|
|
262
|
+
props.push(param.name);
|
|
263
|
+
}
|
|
264
|
+
return props;
|
|
265
|
+
}
|
|
266
|
+
function getDeclarationNames(decl) {
|
|
267
|
+
const names = [];
|
|
268
|
+
if (decl.type === AST_NODE_TYPES.FunctionDeclaration && decl.id) {
|
|
269
|
+
names.push(decl.id.name);
|
|
270
|
+
} else if (decl.type === AST_NODE_TYPES.VariableDeclaration) {
|
|
271
|
+
for (const d of decl.declarations) {
|
|
272
|
+
if (d.id.type === AST_NODE_TYPES.Identifier) {
|
|
273
|
+
names.push(d.id.name);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
} else if (decl.type === AST_NODE_TYPES.ClassDeclaration && decl.id) {
|
|
277
|
+
names.push(decl.id.name);
|
|
278
|
+
}
|
|
279
|
+
return names;
|
|
280
|
+
}
|
|
281
|
+
function shouldIncludeChunk(chunk, minLines, includeAnonymous, kinds) {
|
|
282
|
+
const lineCount = chunk.endLine - chunk.startLine + 1;
|
|
283
|
+
if (lineCount < minLines) {
|
|
284
|
+
return false;
|
|
285
|
+
}
|
|
286
|
+
if (!includeAnonymous && chunk.name === null) {
|
|
287
|
+
return false;
|
|
288
|
+
}
|
|
289
|
+
if (kinds && !kinds.includes(chunk.kind)) {
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
292
|
+
return true;
|
|
293
|
+
}
|
|
294
|
+
var DEFAULT_MAX_EMBEDDING_CHARS = 6e3;
|
|
295
|
+
function prepareEmbeddingInput(chunk, options = {}) {
|
|
296
|
+
const { maxChars = DEFAULT_MAX_EMBEDDING_CHARS } = options;
|
|
297
|
+
const parts = [];
|
|
298
|
+
if (chunk.kind === "component") {
|
|
299
|
+
parts.push(`React component: ${chunk.name || "anonymous"}`);
|
|
300
|
+
if (chunk.metadata.props?.length) {
|
|
301
|
+
parts.push(`Props: ${chunk.metadata.props.join(", ")}`);
|
|
302
|
+
}
|
|
303
|
+
} else if (chunk.kind === "component-summary") {
|
|
304
|
+
parts.push(`React component summary: ${chunk.name || "anonymous"}`);
|
|
305
|
+
if (chunk.metadata.props?.length) {
|
|
306
|
+
parts.push(`Props: ${chunk.metadata.props.join(", ")}`);
|
|
307
|
+
}
|
|
308
|
+
parts.push("(Large component - see sections for JSX details)");
|
|
309
|
+
} else if (chunk.kind === "jsx-section") {
|
|
310
|
+
const parentName = chunk.name || "anonymous";
|
|
311
|
+
const label = chunk.sectionLabel || `section-${chunk.sectionIndex}`;
|
|
312
|
+
parts.push(`JSX section from ${parentName}: ${label}`);
|
|
313
|
+
} else if (chunk.kind === "hook") {
|
|
314
|
+
parts.push(`React hook: ${chunk.name || "anonymous"}`);
|
|
315
|
+
} else if (chunk.kind === "function") {
|
|
316
|
+
parts.push(`Function: ${chunk.name || "anonymous"}`);
|
|
317
|
+
} else if (chunk.kind === "function-summary") {
|
|
318
|
+
parts.push(`Function summary: ${chunk.name || "anonymous"}`);
|
|
319
|
+
parts.push("(Large function - split into sections)");
|
|
320
|
+
} else if (chunk.kind === "function-section") {
|
|
321
|
+
const parentName = chunk.name || "anonymous";
|
|
322
|
+
const label = chunk.sectionLabel || `section-${chunk.sectionIndex}`;
|
|
323
|
+
parts.push(`Function section from ${parentName}: ${label}`);
|
|
324
|
+
} else if (chunk.kind === "jsx-fragment") {
|
|
325
|
+
parts.push(`JSX fragment: ${chunk.name || "anonymous"}`);
|
|
326
|
+
}
|
|
327
|
+
parts.push(chunk.content);
|
|
328
|
+
if (chunk.metadata.jsxElements?.length) {
|
|
329
|
+
parts.push(`JSX elements: ${chunk.metadata.jsxElements.join(", ")}`);
|
|
330
|
+
}
|
|
331
|
+
if (chunk.metadata.hooks?.length) {
|
|
332
|
+
parts.push(`Hooks used: ${chunk.metadata.hooks.join(", ")}`);
|
|
333
|
+
}
|
|
334
|
+
let result = parts.join("\n\n");
|
|
335
|
+
if (result.length > maxChars) {
|
|
336
|
+
result = result.slice(0, maxChars - 50) + "\n\n[... content truncated for embedding ...]";
|
|
337
|
+
}
|
|
338
|
+
return result;
|
|
339
|
+
}
|
|
340
|
+
function splitLargeChunk(node, originalChunk, content, lines, maxLines, strategy) {
|
|
341
|
+
if (strategy === "jsx-children" && originalChunk.kind === "component") {
|
|
342
|
+
const jsxChunks = splitByJSXChildren(node, originalChunk, content, lines);
|
|
343
|
+
if (jsxChunks.length > 0) {
|
|
344
|
+
return jsxChunks;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
return splitByLines(originalChunk, lines, maxLines);
|
|
348
|
+
}
|
|
349
|
+
function splitByJSXChildren(node, originalChunk, content, lines) {
|
|
350
|
+
const returnStatement = findJSXReturnStatement(node);
|
|
351
|
+
if (!returnStatement || !returnStatement.argument) {
|
|
352
|
+
return [];
|
|
353
|
+
}
|
|
354
|
+
const jsxRoot = getJSXRoot(returnStatement.argument);
|
|
355
|
+
if (!jsxRoot) {
|
|
356
|
+
return [];
|
|
357
|
+
}
|
|
358
|
+
const children = getSignificantJSXChildren(jsxRoot);
|
|
359
|
+
if (children.length < 2) {
|
|
360
|
+
return [];
|
|
361
|
+
}
|
|
362
|
+
const chunks = [];
|
|
363
|
+
const summaryChunk = createSummaryChunk(node, originalChunk, returnStatement, lines);
|
|
364
|
+
chunks.push(summaryChunk);
|
|
365
|
+
for (let i = 0; i < children.length; i++) {
|
|
366
|
+
const child = children[i];
|
|
367
|
+
const sectionChunk = createJSXSectionChunk(
|
|
368
|
+
originalChunk,
|
|
369
|
+
child,
|
|
370
|
+
lines,
|
|
371
|
+
i,
|
|
372
|
+
summaryChunk.id
|
|
373
|
+
);
|
|
374
|
+
if (sectionChunk) {
|
|
375
|
+
chunks.push(sectionChunk);
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
return chunks;
|
|
379
|
+
}
|
|
380
|
+
function findJSXReturnStatement(node) {
|
|
381
|
+
if (node.type === AST_NODE_TYPES.ArrowFunctionExpression && node.expression) {
|
|
382
|
+
if (isJSXNode(node.body)) {
|
|
383
|
+
return {
|
|
384
|
+
type: AST_NODE_TYPES.ReturnStatement,
|
|
385
|
+
argument: node.body,
|
|
386
|
+
loc: node.body.loc,
|
|
387
|
+
range: node.body.range,
|
|
388
|
+
parent: node
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
const body = node.body;
|
|
394
|
+
if (body.type !== AST_NODE_TYPES.BlockStatement) {
|
|
395
|
+
return null;
|
|
396
|
+
}
|
|
397
|
+
for (const stmt of body.body) {
|
|
398
|
+
if (stmt.type === AST_NODE_TYPES.ReturnStatement && stmt.argument && isJSXNode(stmt.argument)) {
|
|
399
|
+
return stmt;
|
|
400
|
+
}
|
|
401
|
+
}
|
|
402
|
+
return null;
|
|
403
|
+
}
|
|
404
|
+
function isJSXNode(node) {
|
|
405
|
+
return node.type === AST_NODE_TYPES.JSXElement || node.type === AST_NODE_TYPES.JSXFragment;
|
|
406
|
+
}
|
|
407
|
+
function getJSXRoot(node) {
|
|
408
|
+
if (node.type === AST_NODE_TYPES.JSXElement) {
|
|
409
|
+
return node;
|
|
410
|
+
}
|
|
411
|
+
if (node.type === AST_NODE_TYPES.JSXFragment) {
|
|
412
|
+
return node;
|
|
413
|
+
}
|
|
414
|
+
return null;
|
|
415
|
+
}
|
|
416
|
+
function getSignificantJSXChildren(jsxRoot) {
|
|
417
|
+
const children = [];
|
|
418
|
+
for (const child of jsxRoot.children) {
|
|
419
|
+
if (child.type === AST_NODE_TYPES.JSXText) {
|
|
420
|
+
if (child.value.trim() === "") {
|
|
421
|
+
continue;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
if (child.type === AST_NODE_TYPES.JSXElement || child.type === AST_NODE_TYPES.JSXFragment || child.type === AST_NODE_TYPES.JSXExpressionContainer) {
|
|
425
|
+
children.push(child);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
return children;
|
|
429
|
+
}
|
|
430
|
+
function createSummaryChunk(node, originalChunk, returnStatement, lines) {
|
|
431
|
+
const startLine = originalChunk.startLine;
|
|
432
|
+
const returnLine = returnStatement.loc?.start.line || originalChunk.endLine;
|
|
433
|
+
const summaryEndLine = Math.min(returnLine, originalChunk.endLine);
|
|
434
|
+
const summaryLines = lines.slice(startLine - 1, summaryEndLine);
|
|
435
|
+
const summaryContent = summaryLines.join("\n") + "\n // ... JSX content (see sections)\n );";
|
|
436
|
+
return {
|
|
437
|
+
id: hashChunk(summaryContent, originalChunk.filePath, startLine),
|
|
438
|
+
filePath: originalChunk.filePath,
|
|
439
|
+
startLine,
|
|
440
|
+
endLine: summaryEndLine,
|
|
441
|
+
startColumn: originalChunk.startColumn,
|
|
442
|
+
endColumn: originalChunk.endColumn,
|
|
443
|
+
kind: "component-summary",
|
|
444
|
+
name: originalChunk.name,
|
|
445
|
+
content: summaryContent,
|
|
446
|
+
metadata: {
|
|
447
|
+
...originalChunk.metadata,
|
|
448
|
+
// Keep hooks but clear JSX elements (they're in the sections)
|
|
449
|
+
jsxElements: void 0
|
|
450
|
+
}
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
function createJSXSectionChunk(originalChunk, jsxChild, lines, index, parentId) {
|
|
454
|
+
const loc = jsxChild.loc;
|
|
455
|
+
if (!loc) {
|
|
456
|
+
return null;
|
|
457
|
+
}
|
|
458
|
+
const startLine = loc.start.line;
|
|
459
|
+
const endLine = loc.end.line;
|
|
460
|
+
const lineCount = endLine - startLine + 1;
|
|
461
|
+
if (lineCount < MIN_SECTION_LINES) {
|
|
462
|
+
return null;
|
|
463
|
+
}
|
|
464
|
+
const sectionContent = lines.slice(startLine - 1, endLine).join("\n");
|
|
465
|
+
const label = inferSectionLabel(jsxChild, index);
|
|
466
|
+
const jsxElements = extractJSXElementsFromNode(jsxChild);
|
|
467
|
+
return {
|
|
468
|
+
id: hashChunk(sectionContent, originalChunk.filePath, startLine),
|
|
469
|
+
filePath: originalChunk.filePath,
|
|
470
|
+
startLine,
|
|
471
|
+
endLine,
|
|
472
|
+
startColumn: loc.start.column,
|
|
473
|
+
endColumn: loc.end.column,
|
|
474
|
+
kind: "jsx-section",
|
|
475
|
+
name: originalChunk.name,
|
|
476
|
+
content: sectionContent,
|
|
477
|
+
metadata: {
|
|
478
|
+
jsxElements: jsxElements.length > 0 ? jsxElements : void 0,
|
|
479
|
+
isExported: originalChunk.metadata.isExported,
|
|
480
|
+
isDefaultExport: originalChunk.metadata.isDefaultExport
|
|
481
|
+
},
|
|
482
|
+
parentId,
|
|
483
|
+
sectionIndex: index,
|
|
484
|
+
sectionLabel: label
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
function inferSectionLabel(jsxChild, index) {
|
|
488
|
+
if (jsxChild.type === AST_NODE_TYPES.JSXElement) {
|
|
489
|
+
const opening = jsxChild.openingElement;
|
|
490
|
+
for (const attr of opening.attributes) {
|
|
491
|
+
if (attr.type === AST_NODE_TYPES.JSXAttribute && attr.name.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
492
|
+
const attrName = attr.name.name;
|
|
493
|
+
if (attrName === "aria-label" || attrName === "aria-labelledby") {
|
|
494
|
+
if (attr.value?.type === AST_NODE_TYPES.Literal && typeof attr.value.value === "string") {
|
|
495
|
+
return attr.value.value.toLowerCase().replace(/\s+/g, "-").slice(0, 30);
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
if (attrName === "className" || attrName === "class") {
|
|
499
|
+
if (attr.value?.type === AST_NODE_TYPES.Literal && typeof attr.value.value === "string") {
|
|
500
|
+
const className = attr.value.value;
|
|
501
|
+
const classes = className.split(/\s+/);
|
|
502
|
+
for (const cls of classes) {
|
|
503
|
+
if (!cls.match(/^(bg-|text-|p-|m-|w-|h-|flex|grid|border|rounded|shadow|hover:|focus:)/)) {
|
|
504
|
+
return cls.slice(0, 30);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
if (opening.name.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
512
|
+
return `${opening.name.name}-${index}`;
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
return `section-${index}`;
|
|
516
|
+
}
|
|
517
|
+
function extractJSXElementsFromNode(node) {
|
|
518
|
+
const elements = [];
|
|
519
|
+
function search(n) {
|
|
520
|
+
if (n.type === AST_NODE_TYPES.JSXOpeningElement) {
|
|
521
|
+
if (n.name.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
522
|
+
elements.push(n.name.name);
|
|
523
|
+
} else if (n.name.type === AST_NODE_TYPES.JSXMemberExpression) {
|
|
524
|
+
const parts = [];
|
|
525
|
+
let current = n.name;
|
|
526
|
+
while (current.type === AST_NODE_TYPES.JSXMemberExpression) {
|
|
527
|
+
if (current.property.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
528
|
+
parts.unshift(current.property.name);
|
|
529
|
+
}
|
|
530
|
+
current = current.object;
|
|
531
|
+
}
|
|
532
|
+
if (current.type === AST_NODE_TYPES.JSXIdentifier) {
|
|
533
|
+
parts.unshift(current.name);
|
|
534
|
+
}
|
|
535
|
+
elements.push(parts.join("."));
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
for (const key of Object.keys(n)) {
|
|
539
|
+
if (key === "parent" || key === "loc" || key === "range") continue;
|
|
540
|
+
const child = n[key];
|
|
541
|
+
if (child && typeof child === "object") {
|
|
542
|
+
if (Array.isArray(child)) {
|
|
543
|
+
child.forEach((c) => {
|
|
544
|
+
if (c && typeof c === "object" && "type" in c) {
|
|
545
|
+
search(c);
|
|
546
|
+
}
|
|
547
|
+
});
|
|
548
|
+
} else if ("type" in child) {
|
|
549
|
+
search(child);
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
search(node);
|
|
555
|
+
return [...new Set(elements)];
|
|
556
|
+
}
|
|
557
|
+
function splitByLines(originalChunk, lines, maxLines) {
|
|
558
|
+
const totalLines = originalChunk.endLine - originalChunk.startLine + 1;
|
|
559
|
+
if (totalLines <= maxLines) {
|
|
560
|
+
return [originalChunk];
|
|
561
|
+
}
|
|
562
|
+
const isComponent = originalChunk.kind === "component" || originalChunk.kind === "jsx-fragment";
|
|
563
|
+
const summaryKind = isComponent ? "component-summary" : "function-summary";
|
|
564
|
+
const sectionKind = isComponent ? "jsx-section" : "function-section";
|
|
565
|
+
const chunks = [];
|
|
566
|
+
const overlap = Math.min(10, Math.floor(maxLines / 5));
|
|
567
|
+
let currentStart = originalChunk.startLine;
|
|
568
|
+
let sectionIndex = 0;
|
|
569
|
+
while (currentStart <= originalChunk.endLine) {
|
|
570
|
+
const currentEnd = Math.min(currentStart + maxLines - 1, originalChunk.endLine);
|
|
571
|
+
const sectionContent = lines.slice(currentStart - 1, currentEnd).join("\n");
|
|
572
|
+
const isFirstSection = sectionIndex === 0;
|
|
573
|
+
chunks.push({
|
|
574
|
+
id: hashChunk(sectionContent, originalChunk.filePath, currentStart),
|
|
575
|
+
filePath: originalChunk.filePath,
|
|
576
|
+
startLine: currentStart,
|
|
577
|
+
endLine: currentEnd,
|
|
578
|
+
startColumn: isFirstSection ? originalChunk.startColumn : 0,
|
|
579
|
+
endColumn: currentEnd === originalChunk.endLine ? originalChunk.endColumn : lines[currentEnd - 1]?.length || 0,
|
|
580
|
+
kind: isFirstSection ? summaryKind : sectionKind,
|
|
581
|
+
name: originalChunk.name,
|
|
582
|
+
content: sectionContent,
|
|
583
|
+
metadata: isFirstSection ? originalChunk.metadata : {
|
|
584
|
+
isExported: originalChunk.metadata.isExported,
|
|
585
|
+
isDefaultExport: originalChunk.metadata.isDefaultExport
|
|
586
|
+
},
|
|
587
|
+
parentId: isFirstSection ? void 0 : chunks[0]?.id,
|
|
588
|
+
sectionIndex: isFirstSection ? void 0 : sectionIndex,
|
|
589
|
+
sectionLabel: isFirstSection ? void 0 : `lines-${currentStart}-${currentEnd}`
|
|
590
|
+
});
|
|
591
|
+
currentStart = currentEnd - overlap + 1;
|
|
592
|
+
sectionIndex++;
|
|
593
|
+
if (currentStart <= originalChunk.startLine + sectionIndex * (maxLines - overlap)) {
|
|
594
|
+
currentStart = originalChunk.startLine + sectionIndex * (maxLines - overlap);
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return chunks;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
// src/embeddings/ollama-embeddings.ts
|
|
601
|
+
var DEFAULT_BASE_URL = "http://localhost:11434";
|
|
602
|
+
var DEFAULT_MODEL = "nomic-embed-text";
|
|
603
|
+
var DEFAULT_TIMEOUT = 6e4;
|
|
604
|
+
var DEFAULT_BATCH_SIZE = 10;
|
|
605
|
+
var OllamaEmbeddingClient = class {
|
|
606
|
+
baseUrl;
|
|
607
|
+
model;
|
|
608
|
+
timeout;
|
|
609
|
+
batchSize;
|
|
610
|
+
constructor(options = {}) {
|
|
611
|
+
this.baseUrl = options.baseUrl || DEFAULT_BASE_URL;
|
|
612
|
+
this.model = options.model || DEFAULT_MODEL;
|
|
613
|
+
this.timeout = options.timeout || DEFAULT_TIMEOUT;
|
|
614
|
+
this.batchSize = options.batchSize || DEFAULT_BATCH_SIZE;
|
|
615
|
+
}
|
|
616
|
+
/**
|
|
617
|
+
* Generate embedding for a single text
|
|
618
|
+
*/
|
|
619
|
+
async embed(text) {
|
|
620
|
+
const controller = new AbortController();
|
|
621
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
622
|
+
try {
|
|
623
|
+
const response = await fetch(`${this.baseUrl}/api/embed`, {
|
|
624
|
+
method: "POST",
|
|
625
|
+
headers: { "Content-Type": "application/json" },
|
|
626
|
+
body: JSON.stringify({
|
|
627
|
+
model: this.model,
|
|
628
|
+
input: text
|
|
629
|
+
}),
|
|
630
|
+
signal: controller.signal
|
|
631
|
+
});
|
|
632
|
+
if (!response.ok) {
|
|
633
|
+
const errorText = await response.text();
|
|
634
|
+
throw new Error(`Ollama API error (${response.status}): ${errorText}`);
|
|
635
|
+
}
|
|
636
|
+
const data = await response.json();
|
|
637
|
+
const embedding = data.embeddings?.[0] || data.embedding;
|
|
638
|
+
if (!embedding || !Array.isArray(embedding)) {
|
|
639
|
+
throw new Error("Invalid embedding response from Ollama");
|
|
640
|
+
}
|
|
641
|
+
return {
|
|
642
|
+
embedding,
|
|
643
|
+
model: this.model,
|
|
644
|
+
promptTokens: data.prompt_eval_count
|
|
645
|
+
};
|
|
646
|
+
} finally {
|
|
647
|
+
clearTimeout(timeoutId);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
/**
|
|
651
|
+
* Generate embeddings for multiple texts
|
|
652
|
+
* Automatically batches large inputs
|
|
653
|
+
*/
|
|
654
|
+
async embedBatch(texts) {
|
|
655
|
+
if (texts.length === 0) {
|
|
656
|
+
return [];
|
|
657
|
+
}
|
|
658
|
+
if (texts.length <= this.batchSize) {
|
|
659
|
+
return this.embedBatchDirect(texts);
|
|
660
|
+
}
|
|
661
|
+
const results = [];
|
|
662
|
+
for (let i = 0; i < texts.length; i += this.batchSize) {
|
|
663
|
+
const batch = texts.slice(i, i + this.batchSize);
|
|
664
|
+
const batchResults = await this.embedBatchDirect(batch);
|
|
665
|
+
results.push(...batchResults);
|
|
666
|
+
}
|
|
667
|
+
return results;
|
|
668
|
+
}
|
|
669
|
+
/**
|
|
670
|
+
* Embed a batch directly (no chunking)
|
|
671
|
+
*/
|
|
672
|
+
async embedBatchDirect(texts) {
|
|
673
|
+
const controller = new AbortController();
|
|
674
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
675
|
+
try {
|
|
676
|
+
const response = await fetch(`${this.baseUrl}/api/embed`, {
|
|
677
|
+
method: "POST",
|
|
678
|
+
headers: { "Content-Type": "application/json" },
|
|
679
|
+
body: JSON.stringify({
|
|
680
|
+
model: this.model,
|
|
681
|
+
input: texts
|
|
682
|
+
}),
|
|
683
|
+
signal: controller.signal
|
|
684
|
+
});
|
|
685
|
+
if (!response.ok) {
|
|
686
|
+
const errorText = await response.text();
|
|
687
|
+
throw new Error(`Ollama API error (${response.status}): ${errorText}`);
|
|
688
|
+
}
|
|
689
|
+
const data = await response.json();
|
|
690
|
+
const embeddings = data.embeddings;
|
|
691
|
+
if (!embeddings || !Array.isArray(embeddings)) {
|
|
692
|
+
throw new Error("Invalid batch embedding response from Ollama");
|
|
693
|
+
}
|
|
694
|
+
return embeddings.map((embedding) => ({
|
|
695
|
+
embedding,
|
|
696
|
+
model: this.model
|
|
697
|
+
}));
|
|
698
|
+
} finally {
|
|
699
|
+
clearTimeout(timeoutId);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
/**
|
|
703
|
+
* Check if Ollama is available
|
|
704
|
+
*/
|
|
705
|
+
async isAvailable() {
|
|
706
|
+
try {
|
|
707
|
+
const response = await fetch(`${this.baseUrl}/api/tags`, {
|
|
708
|
+
method: "GET",
|
|
709
|
+
signal: AbortSignal.timeout(5e3)
|
|
710
|
+
});
|
|
711
|
+
return response.ok;
|
|
712
|
+
} catch {
|
|
713
|
+
return false;
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Check if the embedding model is available
|
|
718
|
+
*/
|
|
719
|
+
async isModelAvailable() {
|
|
720
|
+
try {
|
|
721
|
+
const response = await fetch(`${this.baseUrl}/api/tags`, {
|
|
722
|
+
method: "GET",
|
|
723
|
+
signal: AbortSignal.timeout(5e3)
|
|
724
|
+
});
|
|
725
|
+
if (!response.ok) return false;
|
|
726
|
+
const data = await response.json();
|
|
727
|
+
const models = data.models || [];
|
|
728
|
+
return models.some(
|
|
729
|
+
(m) => m.name === this.model || m.name.startsWith(`${this.model}:`)
|
|
730
|
+
);
|
|
731
|
+
} catch {
|
|
732
|
+
return false;
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Pull the embedding model if not available
|
|
737
|
+
*/
|
|
738
|
+
async ensureModel() {
|
|
739
|
+
const available = await this.isModelAvailable();
|
|
740
|
+
if (available) return;
|
|
741
|
+
console.log(`Pulling embedding model ${this.model}...`);
|
|
742
|
+
const response = await fetch(`${this.baseUrl}/api/pull`, {
|
|
743
|
+
method: "POST",
|
|
744
|
+
headers: { "Content-Type": "application/json" },
|
|
745
|
+
body: JSON.stringify({
|
|
746
|
+
model: this.model,
|
|
747
|
+
stream: false
|
|
748
|
+
})
|
|
749
|
+
});
|
|
750
|
+
if (!response.ok) {
|
|
751
|
+
const errorText = await response.text();
|
|
752
|
+
throw new Error(`Failed to pull model ${this.model}: ${errorText}`);
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
/**
|
|
756
|
+
* Get the embedding dimension for the current model
|
|
757
|
+
* (Requires generating a test embedding)
|
|
758
|
+
*/
|
|
759
|
+
async getEmbeddingDimension() {
|
|
760
|
+
const result = await this.embed("test");
|
|
761
|
+
return result.embedding.length;
|
|
762
|
+
}
|
|
763
|
+
/**
|
|
764
|
+
* Get the current model name
|
|
765
|
+
*/
|
|
766
|
+
getModel() {
|
|
767
|
+
return this.model;
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Set the model name
|
|
771
|
+
*/
|
|
772
|
+
setModel(model) {
|
|
773
|
+
this.model = model;
|
|
774
|
+
}
|
|
775
|
+
};
|
|
776
|
+
var defaultClient = null;
|
|
777
|
+
function getOllamaEmbeddingClient(options) {
|
|
778
|
+
if (!defaultClient || options) {
|
|
779
|
+
defaultClient = new OllamaEmbeddingClient(options);
|
|
780
|
+
}
|
|
781
|
+
return defaultClient;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// src/query/api.ts
|
|
785
|
+
import { resolve } from "path";
|
|
786
|
+
|
|
787
|
+
// src/cache/incremental-indexer.ts
|
|
788
|
+
import { existsSync as existsSync4, mkdirSync as mkdirSync4, readFileSync as readFileSync4, writeFileSync as writeFileSync4 } from "fs";
|
|
789
|
+
import { join as join4, relative } from "path";
|
|
790
|
+
import { glob } from "glob";
|
|
791
|
+
|
|
792
|
+
// src/index/vector-store.ts
|
|
793
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
794
|
+
import { join } from "path";
|
|
795
|
+
function cosineSimilarity(a, b) {
|
|
796
|
+
if (a.length !== b.length) {
|
|
797
|
+
throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);
|
|
798
|
+
}
|
|
799
|
+
let dotProduct = 0;
|
|
800
|
+
let normA = 0;
|
|
801
|
+
let normB = 0;
|
|
802
|
+
for (let i = 0; i < a.length; i++) {
|
|
803
|
+
dotProduct += a[i] * b[i];
|
|
804
|
+
normA += a[i] * a[i];
|
|
805
|
+
normB += b[i] * b[i];
|
|
806
|
+
}
|
|
807
|
+
normA = Math.sqrt(normA);
|
|
808
|
+
normB = Math.sqrt(normB);
|
|
809
|
+
if (normA === 0 || normB === 0) {
|
|
810
|
+
return 0;
|
|
811
|
+
}
|
|
812
|
+
return dotProduct / (normA * normB);
|
|
813
|
+
}
|
|
814
|
+
var VectorStore = class {
|
|
815
|
+
vectors = /* @__PURE__ */ new Map();
|
|
816
|
+
dimension = null;
|
|
817
|
+
idIndex = [];
|
|
818
|
+
// Ordered list of IDs for binary storage
|
|
819
|
+
constructor(options = {}) {
|
|
820
|
+
if (options.dimension) {
|
|
821
|
+
this.dimension = options.dimension;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
/**
|
|
825
|
+
* Add a vector to the store
|
|
826
|
+
*/
|
|
827
|
+
add(id, vector) {
|
|
828
|
+
if (this.dimension === null) {
|
|
829
|
+
this.dimension = vector.length;
|
|
830
|
+
} else if (vector.length !== this.dimension) {
|
|
831
|
+
throw new Error(
|
|
832
|
+
`Vector dimension mismatch: expected ${this.dimension}, got ${vector.length}`
|
|
833
|
+
);
|
|
834
|
+
}
|
|
835
|
+
if (!this.vectors.has(id)) {
|
|
836
|
+
this.idIndex.push(id);
|
|
837
|
+
}
|
|
838
|
+
this.vectors.set(id, vector);
|
|
839
|
+
}
|
|
840
|
+
/**
|
|
841
|
+
* Add multiple vectors at once
|
|
842
|
+
*/
|
|
843
|
+
addBatch(items) {
|
|
844
|
+
for (const { id, vector } of items) {
|
|
845
|
+
this.add(id, vector);
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
/**
|
|
849
|
+
* Remove a vector from the store
|
|
850
|
+
*/
|
|
851
|
+
remove(id) {
|
|
852
|
+
if (!this.vectors.has(id)) {
|
|
853
|
+
return false;
|
|
854
|
+
}
|
|
855
|
+
this.vectors.delete(id);
|
|
856
|
+
this.idIndex = this.idIndex.filter((i) => i !== id);
|
|
857
|
+
return true;
|
|
858
|
+
}
|
|
859
|
+
/**
|
|
860
|
+
* Get a vector by ID
|
|
861
|
+
*/
|
|
862
|
+
get(id) {
|
|
863
|
+
return this.vectors.get(id) || null;
|
|
864
|
+
}
|
|
865
|
+
/**
|
|
866
|
+
* Check if a vector exists
|
|
867
|
+
*/
|
|
868
|
+
has(id) {
|
|
869
|
+
return this.vectors.has(id);
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Find the most similar vectors to a query vector
|
|
873
|
+
*/
|
|
874
|
+
findSimilar(query, k = 10, threshold = 0) {
|
|
875
|
+
if (this.dimension !== null && query.length !== this.dimension) {
|
|
876
|
+
throw new Error(
|
|
877
|
+
`Query vector dimension mismatch: expected ${this.dimension}, got ${query.length}`
|
|
878
|
+
);
|
|
879
|
+
}
|
|
880
|
+
const results = [];
|
|
881
|
+
for (const [id, vector] of this.vectors) {
|
|
882
|
+
const score = cosineSimilarity(query, vector);
|
|
883
|
+
if (score >= threshold) {
|
|
884
|
+
results.push({
|
|
885
|
+
id,
|
|
886
|
+
score,
|
|
887
|
+
distance: 1 - score
|
|
888
|
+
});
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
results.sort((a, b) => b.score - a.score);
|
|
892
|
+
return results.slice(0, k);
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* Get the number of vectors in the store
|
|
896
|
+
*/
|
|
897
|
+
size() {
|
|
898
|
+
return this.vectors.size;
|
|
899
|
+
}
|
|
900
|
+
/**
|
|
901
|
+
* Get the dimension of vectors
|
|
902
|
+
*/
|
|
903
|
+
getDimension() {
|
|
904
|
+
return this.dimension;
|
|
905
|
+
}
|
|
906
|
+
/**
|
|
907
|
+
* Get all IDs
|
|
908
|
+
*/
|
|
909
|
+
getIds() {
|
|
910
|
+
return [...this.idIndex];
|
|
911
|
+
}
|
|
912
|
+
/**
|
|
913
|
+
* Clear all vectors
|
|
914
|
+
*/
|
|
915
|
+
clear() {
|
|
916
|
+
this.vectors.clear();
|
|
917
|
+
this.idIndex = [];
|
|
918
|
+
this.dimension = null;
|
|
919
|
+
}
|
|
920
|
+
/**
|
|
921
|
+
* Save the vector store to disk
|
|
922
|
+
*
|
|
923
|
+
* Format:
|
|
924
|
+
* - embeddings.bin: Binary Float32 vectors
|
|
925
|
+
* - ids.json: Ordered array of IDs matching vector positions
|
|
926
|
+
*/
|
|
927
|
+
async save(dirPath) {
|
|
928
|
+
if (!existsSync(dirPath)) {
|
|
929
|
+
mkdirSync(dirPath, { recursive: true });
|
|
930
|
+
}
|
|
931
|
+
const idsPath = join(dirPath, "ids.json");
|
|
932
|
+
writeFileSync(idsPath, JSON.stringify(this.idIndex), "utf-8");
|
|
933
|
+
const embeddingsPath = join(dirPath, "embeddings.bin");
|
|
934
|
+
if (this.vectors.size === 0) {
|
|
935
|
+
const header = new Uint32Array([0, 0]);
|
|
936
|
+
writeFileSync(embeddingsPath, Buffer.from(header.buffer));
|
|
937
|
+
return;
|
|
938
|
+
}
|
|
939
|
+
const dimension = this.dimension;
|
|
940
|
+
const count = this.vectors.size;
|
|
941
|
+
const headerSize = 8;
|
|
942
|
+
const vectorsSize = count * dimension * 4;
|
|
943
|
+
const buffer = Buffer.alloc(headerSize + vectorsSize);
|
|
944
|
+
buffer.writeUInt32LE(dimension, 0);
|
|
945
|
+
buffer.writeUInt32LE(count, 4);
|
|
946
|
+
let offset = headerSize;
|
|
947
|
+
for (const id of this.idIndex) {
|
|
948
|
+
const vector = this.vectors.get(id);
|
|
949
|
+
for (const value of vector) {
|
|
950
|
+
buffer.writeFloatLE(value, offset);
|
|
951
|
+
offset += 4;
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
writeFileSync(embeddingsPath, buffer);
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* Load the vector store from disk
|
|
958
|
+
*/
|
|
959
|
+
async load(dirPath) {
|
|
960
|
+
const idsPath = join(dirPath, "ids.json");
|
|
961
|
+
const embeddingsPath = join(dirPath, "embeddings.bin");
|
|
962
|
+
if (!existsSync(idsPath) || !existsSync(embeddingsPath)) {
|
|
963
|
+
throw new Error(`Vector store files not found in ${dirPath}`);
|
|
964
|
+
}
|
|
965
|
+
this.clear();
|
|
966
|
+
const idsContent = readFileSync(idsPath, "utf-8");
|
|
967
|
+
this.idIndex = JSON.parse(idsContent);
|
|
968
|
+
const buffer = readFileSync(embeddingsPath);
|
|
969
|
+
const dimension = buffer.readUInt32LE(0);
|
|
970
|
+
const count = buffer.readUInt32LE(4);
|
|
971
|
+
if (count === 0) {
|
|
972
|
+
return;
|
|
973
|
+
}
|
|
974
|
+
this.dimension = dimension;
|
|
975
|
+
let offset = 8;
|
|
976
|
+
for (let i = 0; i < count; i++) {
|
|
977
|
+
const vector = [];
|
|
978
|
+
for (let j = 0; j < dimension; j++) {
|
|
979
|
+
vector.push(buffer.readFloatLE(offset));
|
|
980
|
+
offset += 4;
|
|
981
|
+
}
|
|
982
|
+
this.vectors.set(this.idIndex[i], vector);
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
/**
|
|
986
|
+
* Iterate over all vectors
|
|
987
|
+
*/
|
|
988
|
+
*entries() {
|
|
989
|
+
for (const id of this.idIndex) {
|
|
990
|
+
yield [id, this.vectors.get(id)];
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
/**
|
|
994
|
+
* Get stats about the store
|
|
995
|
+
*/
|
|
996
|
+
getStats() {
|
|
997
|
+
const memoryBytes = this.dimension ? this.vectors.size * this.dimension * 4 + this.idIndex.length * 50 : 0;
|
|
998
|
+
return {
|
|
999
|
+
size: this.vectors.size,
|
|
1000
|
+
dimension: this.dimension,
|
|
1001
|
+
memoryBytes
|
|
1002
|
+
};
|
|
1003
|
+
}
|
|
1004
|
+
};
|
|
1005
|
+
|
|
1006
|
+
// src/index/metadata-store.ts
|
|
1007
|
+
import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2 } from "fs";
|
|
1008
|
+
import { join as join2 } from "path";
|
|
1009
|
+
var MetadataStore = class {
|
|
1010
|
+
chunks = /* @__PURE__ */ new Map();
|
|
1011
|
+
/**
|
|
1012
|
+
* Add or update chunk metadata
|
|
1013
|
+
*/
|
|
1014
|
+
set(id, metadata) {
|
|
1015
|
+
this.chunks.set(id, metadata);
|
|
1016
|
+
}
|
|
1017
|
+
/**
|
|
1018
|
+
* Add multiple chunks at once
|
|
1019
|
+
*/
|
|
1020
|
+
setBatch(items) {
|
|
1021
|
+
for (const { id, metadata } of items) {
|
|
1022
|
+
this.set(id, metadata);
|
|
1023
|
+
}
|
|
1024
|
+
}
|
|
1025
|
+
/**
|
|
1026
|
+
* Get chunk metadata by ID
|
|
1027
|
+
*/
|
|
1028
|
+
get(id) {
|
|
1029
|
+
return this.chunks.get(id) || null;
|
|
1030
|
+
}
|
|
1031
|
+
/**
|
|
1032
|
+
* Check if a chunk exists
|
|
1033
|
+
*/
|
|
1034
|
+
has(id) {
|
|
1035
|
+
return this.chunks.has(id);
|
|
1036
|
+
}
|
|
1037
|
+
/**
|
|
1038
|
+
* Remove chunk metadata
|
|
1039
|
+
*/
|
|
1040
|
+
remove(id) {
|
|
1041
|
+
return this.chunks.delete(id);
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Remove all chunks for a given file path
|
|
1045
|
+
*/
|
|
1046
|
+
removeByFilePath(filePath) {
|
|
1047
|
+
const removedIds = [];
|
|
1048
|
+
for (const [id, metadata] of this.chunks) {
|
|
1049
|
+
if (metadata.filePath === filePath) {
|
|
1050
|
+
this.chunks.delete(id);
|
|
1051
|
+
removedIds.push(id);
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
return removedIds;
|
|
1055
|
+
}
|
|
1056
|
+
/**
|
|
1057
|
+
* Get all chunks for a given file path
|
|
1058
|
+
*/
|
|
1059
|
+
getByFilePath(filePath) {
|
|
1060
|
+
const results = [];
|
|
1061
|
+
for (const [id, metadata] of this.chunks) {
|
|
1062
|
+
if (metadata.filePath === filePath) {
|
|
1063
|
+
results.push({ id, metadata });
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
return results;
|
|
1067
|
+
}
|
|
1068
|
+
/**
|
|
1069
|
+
* Get chunk by content hash
|
|
1070
|
+
*/
|
|
1071
|
+
getByContentHash(contentHash) {
|
|
1072
|
+
for (const [id, metadata] of this.chunks) {
|
|
1073
|
+
if (metadata.contentHash === contentHash) {
|
|
1074
|
+
return { id, metadata };
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
return null;
|
|
1078
|
+
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Get chunk at a specific location
|
|
1081
|
+
*/
|
|
1082
|
+
getAtLocation(filePath, line) {
|
|
1083
|
+
for (const [id, metadata] of this.chunks) {
|
|
1084
|
+
if (metadata.filePath === filePath && metadata.startLine <= line && metadata.endLine >= line) {
|
|
1085
|
+
return { id, metadata };
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
return null;
|
|
1089
|
+
}
|
|
1090
|
+
/**
|
|
1091
|
+
* Get all unique file paths
|
|
1092
|
+
*/
|
|
1093
|
+
getFilePaths() {
|
|
1094
|
+
const paths = /* @__PURE__ */ new Set();
|
|
1095
|
+
for (const metadata of this.chunks.values()) {
|
|
1096
|
+
paths.add(metadata.filePath);
|
|
1097
|
+
}
|
|
1098
|
+
return [...paths];
|
|
1099
|
+
}
|
|
1100
|
+
/**
|
|
1101
|
+
* Get number of chunks
|
|
1102
|
+
*/
|
|
1103
|
+
size() {
|
|
1104
|
+
return this.chunks.size;
|
|
1105
|
+
}
|
|
1106
|
+
/**
|
|
1107
|
+
* Clear all metadata
|
|
1108
|
+
*/
|
|
1109
|
+
clear() {
|
|
1110
|
+
this.chunks.clear();
|
|
1111
|
+
}
|
|
1112
|
+
/**
|
|
1113
|
+
* Iterate over all chunks
|
|
1114
|
+
*/
|
|
1115
|
+
*entries() {
|
|
1116
|
+
yield* this.chunks.entries();
|
|
1117
|
+
}
|
|
1118
|
+
/**
|
|
1119
|
+
* Get all IDs
|
|
1120
|
+
*/
|
|
1121
|
+
getIds() {
|
|
1122
|
+
return [...this.chunks.keys()];
|
|
1123
|
+
}
|
|
1124
|
+
/**
|
|
1125
|
+
* Save to disk
|
|
1126
|
+
*/
|
|
1127
|
+
async save(dirPath) {
|
|
1128
|
+
if (!existsSync2(dirPath)) {
|
|
1129
|
+
mkdirSync2(dirPath, { recursive: true });
|
|
1130
|
+
}
|
|
1131
|
+
const metadataPath = join2(dirPath, "metadata.json");
|
|
1132
|
+
const data = Object.fromEntries(this.chunks);
|
|
1133
|
+
writeFileSync2(metadataPath, JSON.stringify(data, null, 2), "utf-8");
|
|
1134
|
+
}
|
|
1135
|
+
/**
|
|
1136
|
+
* Load from disk
|
|
1137
|
+
*/
|
|
1138
|
+
async load(dirPath) {
|
|
1139
|
+
const metadataPath = join2(dirPath, "metadata.json");
|
|
1140
|
+
if (!existsSync2(metadataPath)) {
|
|
1141
|
+
throw new Error(`Metadata file not found: ${metadataPath}`);
|
|
1142
|
+
}
|
|
1143
|
+
this.clear();
|
|
1144
|
+
const content = readFileSync2(metadataPath, "utf-8");
|
|
1145
|
+
const data = JSON.parse(content);
|
|
1146
|
+
for (const [id, metadata] of Object.entries(data)) {
|
|
1147
|
+
this.chunks.set(id, metadata);
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
/**
|
|
1151
|
+
* Filter chunks by kind
|
|
1152
|
+
*/
|
|
1153
|
+
filterByKind(kind) {
|
|
1154
|
+
const results = [];
|
|
1155
|
+
for (const [id, metadata] of this.chunks) {
|
|
1156
|
+
if (metadata.kind === kind) {
|
|
1157
|
+
results.push({ id, metadata });
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
return results;
|
|
1161
|
+
}
|
|
1162
|
+
/**
|
|
1163
|
+
* Search by name (case-insensitive partial match)
|
|
1164
|
+
*/
|
|
1165
|
+
searchByName(query) {
|
|
1166
|
+
const lowerQuery = query.toLowerCase();
|
|
1167
|
+
const results = [];
|
|
1168
|
+
for (const [id, metadata] of this.chunks) {
|
|
1169
|
+
if (metadata.name && metadata.name.toLowerCase().includes(lowerQuery)) {
|
|
1170
|
+
results.push({ id, metadata });
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
1173
|
+
return results;
|
|
1174
|
+
}
|
|
1175
|
+
};
|
|
1176
|
+
|
|
1177
|
+
// src/cache/file-tracker.ts
|
|
1178
|
+
import {
|
|
1179
|
+
existsSync as existsSync3,
|
|
1180
|
+
mkdirSync as mkdirSync3,
|
|
1181
|
+
readFileSync as readFileSync3,
|
|
1182
|
+
writeFileSync as writeFileSync3,
|
|
1183
|
+
statSync
|
|
1184
|
+
} from "fs";
|
|
1185
|
+
import { join as join3 } from "path";
|
|
1186
|
+
var xxhashInstance = null;
|
|
1187
|
+
async function getXxhash() {
|
|
1188
|
+
if (!xxhashInstance) {
|
|
1189
|
+
const xxhash = await import("xxhash-wasm");
|
|
1190
|
+
xxhashInstance = await xxhash.default();
|
|
1191
|
+
}
|
|
1192
|
+
return xxhashInstance;
|
|
1193
|
+
}
|
|
1194
|
+
function djb2Hash(str) {
|
|
1195
|
+
let hash = 5381;
|
|
1196
|
+
for (let i = 0; i < str.length; i++) {
|
|
1197
|
+
hash = hash * 33 ^ str.charCodeAt(i);
|
|
1198
|
+
}
|
|
1199
|
+
return (hash >>> 0).toString(16);
|
|
1200
|
+
}
|
|
1201
|
+
async function hashContent(content) {
|
|
1202
|
+
try {
|
|
1203
|
+
const xxhash = await getXxhash();
|
|
1204
|
+
return xxhash.h64ToString(content);
|
|
1205
|
+
} catch {
|
|
1206
|
+
return djb2Hash(content);
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
function hashContentSync(content) {
|
|
1210
|
+
return djb2Hash(content);
|
|
1211
|
+
}
|
|
1212
|
+
var HASH_STORE_VERSION = 1;
|
|
1213
|
+
var FileTracker = class {
|
|
1214
|
+
store = {
|
|
1215
|
+
version: HASH_STORE_VERSION,
|
|
1216
|
+
files: {}
|
|
1217
|
+
};
|
|
1218
|
+
/**
|
|
1219
|
+
* Get the hash entry for a file
|
|
1220
|
+
*/
|
|
1221
|
+
getEntry(filePath) {
|
|
1222
|
+
return this.store.files[filePath] || null;
|
|
1223
|
+
}
|
|
1224
|
+
/**
|
|
1225
|
+
* Set the hash entry for a file
|
|
1226
|
+
*/
|
|
1227
|
+
setEntry(filePath, entry) {
|
|
1228
|
+
this.store.files[filePath] = entry;
|
|
1229
|
+
}
|
|
1230
|
+
/**
|
|
1231
|
+
* Remove the hash entry for a file
|
|
1232
|
+
*/
|
|
1233
|
+
removeEntry(filePath) {
|
|
1234
|
+
if (this.store.files[filePath]) {
|
|
1235
|
+
delete this.store.files[filePath];
|
|
1236
|
+
return true;
|
|
1237
|
+
}
|
|
1238
|
+
return false;
|
|
1239
|
+
}
|
|
1240
|
+
/**
|
|
1241
|
+
* Get all tracked file paths
|
|
1242
|
+
*/
|
|
1243
|
+
getTrackedFiles() {
|
|
1244
|
+
return Object.keys(this.store.files);
|
|
1245
|
+
}
|
|
1246
|
+
/**
|
|
1247
|
+
* Clear all entries
|
|
1248
|
+
*/
|
|
1249
|
+
clear() {
|
|
1250
|
+
this.store = {
|
|
1251
|
+
version: HASH_STORE_VERSION,
|
|
1252
|
+
files: {}
|
|
1253
|
+
};
|
|
1254
|
+
}
|
|
1255
|
+
/**
|
|
1256
|
+
* Detect changes between current files and stored hashes
|
|
1257
|
+
*/
|
|
1258
|
+
async detectChanges(files) {
|
|
1259
|
+
const changes = [];
|
|
1260
|
+
const currentFiles = new Set(files);
|
|
1261
|
+
for (const storedPath of Object.keys(this.store.files)) {
|
|
1262
|
+
if (!currentFiles.has(storedPath)) {
|
|
1263
|
+
changes.push({
|
|
1264
|
+
path: storedPath,
|
|
1265
|
+
type: "deleted",
|
|
1266
|
+
oldHash: this.store.files[storedPath].contentHash
|
|
1267
|
+
});
|
|
1268
|
+
}
|
|
1269
|
+
}
|
|
1270
|
+
for (const filePath of files) {
|
|
1271
|
+
try {
|
|
1272
|
+
const content = readFileSync3(filePath, "utf-8");
|
|
1273
|
+
const newHash = await hashContent(content);
|
|
1274
|
+
const stat = statSync(filePath);
|
|
1275
|
+
const entry = this.store.files[filePath];
|
|
1276
|
+
if (!entry) {
|
|
1277
|
+
changes.push({
|
|
1278
|
+
path: filePath,
|
|
1279
|
+
type: "added",
|
|
1280
|
+
newHash
|
|
1281
|
+
});
|
|
1282
|
+
} else if (entry.contentHash !== newHash) {
|
|
1283
|
+
changes.push({
|
|
1284
|
+
path: filePath,
|
|
1285
|
+
type: "modified",
|
|
1286
|
+
oldHash: entry.contentHash,
|
|
1287
|
+
newHash
|
|
1288
|
+
});
|
|
1289
|
+
}
|
|
1290
|
+
} catch (error) {
|
|
1291
|
+
if (this.store.files[filePath]) {
|
|
1292
|
+
changes.push({
|
|
1293
|
+
path: filePath,
|
|
1294
|
+
type: "deleted",
|
|
1295
|
+
oldHash: this.store.files[filePath].contentHash
|
|
1296
|
+
});
|
|
1297
|
+
}
|
|
1298
|
+
}
|
|
1299
|
+
}
|
|
1300
|
+
return changes;
|
|
1301
|
+
}
|
|
1302
|
+
/**
|
|
1303
|
+
* Update stored hash for a file
|
|
1304
|
+
*/
|
|
1305
|
+
async updateFile(filePath, content, chunkIds) {
|
|
1306
|
+
const hash = await hashContent(content);
|
|
1307
|
+
const stat = statSync(filePath);
|
|
1308
|
+
this.store.files[filePath] = {
|
|
1309
|
+
contentHash: hash,
|
|
1310
|
+
mtimeMs: stat.mtimeMs,
|
|
1311
|
+
chunkIds
|
|
1312
|
+
};
|
|
1313
|
+
}
|
|
1314
|
+
/**
|
|
1315
|
+
* Save to disk
|
|
1316
|
+
*/
|
|
1317
|
+
async save(dirPath) {
|
|
1318
|
+
if (!existsSync3(dirPath)) {
|
|
1319
|
+
mkdirSync3(dirPath, { recursive: true });
|
|
1320
|
+
}
|
|
1321
|
+
const hashesPath = join3(dirPath, "hashes.json");
|
|
1322
|
+
writeFileSync3(hashesPath, JSON.stringify(this.store, null, 2), "utf-8");
|
|
1323
|
+
}
|
|
1324
|
+
/**
|
|
1325
|
+
* Load from disk
|
|
1326
|
+
*/
|
|
1327
|
+
async load(dirPath) {
|
|
1328
|
+
const hashesPath = join3(dirPath, "hashes.json");
|
|
1329
|
+
if (!existsSync3(hashesPath)) {
|
|
1330
|
+
this.clear();
|
|
1331
|
+
return;
|
|
1332
|
+
}
|
|
1333
|
+
const content = readFileSync3(hashesPath, "utf-8");
|
|
1334
|
+
const data = JSON.parse(content);
|
|
1335
|
+
if (data.version !== HASH_STORE_VERSION) {
|
|
1336
|
+
this.clear();
|
|
1337
|
+
return;
|
|
1338
|
+
}
|
|
1339
|
+
this.store = data;
|
|
1340
|
+
}
|
|
1341
|
+
/**
|
|
1342
|
+
* Get stats
|
|
1343
|
+
*/
|
|
1344
|
+
getStats() {
|
|
1345
|
+
return {
|
|
1346
|
+
trackedFiles: Object.keys(this.store.files).length
|
|
1347
|
+
};
|
|
1348
|
+
}
|
|
1349
|
+
};
|
|
1350
|
+
|
|
1351
|
+
// src/cache/incremental-indexer.ts
|
|
1352
|
+
var INDEX_DIR = ".uilint/.duplicates-index";
|
|
1353
|
+
var MANIFEST_FILE = "manifest.json";
|
|
1354
|
+
var MANIFEST_VERSION = 1;
|
|
1355
|
+
var IncrementalIndexer = class {
|
|
1356
|
+
vectorStore;
|
|
1357
|
+
metadataStore;
|
|
1358
|
+
fileTracker;
|
|
1359
|
+
embeddingClient;
|
|
1360
|
+
projectRoot;
|
|
1361
|
+
indexDir;
|
|
1362
|
+
options;
|
|
1363
|
+
manifest = null;
|
|
1364
|
+
constructor(projectRoot, options = {}) {
|
|
1365
|
+
this.projectRoot = projectRoot;
|
|
1366
|
+
this.indexDir = join4(projectRoot, INDEX_DIR);
|
|
1367
|
+
this.options = options;
|
|
1368
|
+
this.vectorStore = new VectorStore();
|
|
1369
|
+
this.metadataStore = new MetadataStore();
|
|
1370
|
+
this.fileTracker = new FileTracker();
|
|
1371
|
+
this.embeddingClient = new OllamaEmbeddingClient({
|
|
1372
|
+
model: options.model,
|
|
1373
|
+
baseUrl: options.baseUrl
|
|
1374
|
+
});
|
|
1375
|
+
}
|
|
1376
|
+
/**
|
|
1377
|
+
* Get the include patterns
|
|
1378
|
+
*/
|
|
1379
|
+
getIncludePatterns() {
|
|
1380
|
+
return this.options.include || ["**/*.{ts,tsx,js,jsx}"];
|
|
1381
|
+
}
|
|
1382
|
+
/**
|
|
1383
|
+
* Get the exclude patterns
|
|
1384
|
+
*/
|
|
1385
|
+
getExcludePatterns() {
|
|
1386
|
+
return [
|
|
1387
|
+
"**/node_modules/**",
|
|
1388
|
+
"**/dist/**",
|
|
1389
|
+
"**/build/**",
|
|
1390
|
+
"**/.git/**",
|
|
1391
|
+
"**/*.test.{ts,tsx,js,jsx}",
|
|
1392
|
+
"**/*.spec.{ts,tsx,js,jsx}",
|
|
1393
|
+
"**/__tests__/**",
|
|
1394
|
+
...this.options.exclude || []
|
|
1395
|
+
];
|
|
1396
|
+
}
|
|
1397
|
+
/**
|
|
1398
|
+
* Find all files to index
|
|
1399
|
+
*/
|
|
1400
|
+
async findFiles() {
|
|
1401
|
+
const files = [];
|
|
1402
|
+
for (const pattern of this.getIncludePatterns()) {
|
|
1403
|
+
const matches = await glob(pattern, {
|
|
1404
|
+
cwd: this.projectRoot,
|
|
1405
|
+
ignore: this.getExcludePatterns(),
|
|
1406
|
+
absolute: true,
|
|
1407
|
+
nodir: true
|
|
1408
|
+
});
|
|
1409
|
+
files.push(...matches);
|
|
1410
|
+
}
|
|
1411
|
+
return [...new Set(files)];
|
|
1412
|
+
}
|
|
1413
|
+
/**
|
|
1414
|
+
* Load existing index from disk
|
|
1415
|
+
*/
|
|
1416
|
+
async load() {
|
|
1417
|
+
if (!existsSync4(this.indexDir)) {
|
|
1418
|
+
return;
|
|
1419
|
+
}
|
|
1420
|
+
try {
|
|
1421
|
+
const manifestPath = join4(this.indexDir, MANIFEST_FILE);
|
|
1422
|
+
if (existsSync4(manifestPath)) {
|
|
1423
|
+
const content = readFileSync4(manifestPath, "utf-8");
|
|
1424
|
+
this.manifest = JSON.parse(content);
|
|
1425
|
+
}
|
|
1426
|
+
await this.vectorStore.load(this.indexDir);
|
|
1427
|
+
await this.metadataStore.load(this.indexDir);
|
|
1428
|
+
await this.fileTracker.load(this.indexDir);
|
|
1429
|
+
} catch (error) {
|
|
1430
|
+
this.vectorStore = new VectorStore();
|
|
1431
|
+
this.metadataStore = new MetadataStore();
|
|
1432
|
+
this.fileTracker = new FileTracker();
|
|
1433
|
+
this.manifest = null;
|
|
1434
|
+
}
|
|
1435
|
+
}
|
|
1436
|
+
/**
|
|
1437
|
+
* Save index to disk
|
|
1438
|
+
*/
|
|
1439
|
+
async save() {
|
|
1440
|
+
if (!existsSync4(this.indexDir)) {
|
|
1441
|
+
mkdirSync4(this.indexDir, { recursive: true });
|
|
1442
|
+
}
|
|
1443
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
1444
|
+
const dimension = this.vectorStore.getDimension();
|
|
1445
|
+
this.manifest = {
|
|
1446
|
+
version: MANIFEST_VERSION,
|
|
1447
|
+
createdAt: this.manifest?.createdAt || now,
|
|
1448
|
+
updatedAt: now,
|
|
1449
|
+
embeddingModel: this.embeddingClient.getModel(),
|
|
1450
|
+
dimension: dimension || 0,
|
|
1451
|
+
fileCount: this.fileTracker.getTrackedFiles().length,
|
|
1452
|
+
chunkCount: this.metadataStore.size()
|
|
1453
|
+
};
|
|
1454
|
+
const manifestPath = join4(this.indexDir, MANIFEST_FILE);
|
|
1455
|
+
writeFileSync4(
|
|
1456
|
+
manifestPath,
|
|
1457
|
+
JSON.stringify(this.manifest, null, 2),
|
|
1458
|
+
"utf-8"
|
|
1459
|
+
);
|
|
1460
|
+
await this.vectorStore.save(this.indexDir);
|
|
1461
|
+
await this.metadataStore.save(this.indexDir);
|
|
1462
|
+
await this.fileTracker.save(this.indexDir);
|
|
1463
|
+
}
|
|
1464
|
+
/**
|
|
1465
|
+
* Index all files from scratch
|
|
1466
|
+
*/
|
|
1467
|
+
async indexAll(force = false) {
|
|
1468
|
+
const startTime = Date.now();
|
|
1469
|
+
const log = this.options.onProgress || (() => {
|
|
1470
|
+
});
|
|
1471
|
+
const available = await this.embeddingClient.isAvailable();
|
|
1472
|
+
if (!available) {
|
|
1473
|
+
throw new Error(
|
|
1474
|
+
"Ollama is not available. Make sure it's running at " + (this.options.baseUrl || "http://localhost:11434")
|
|
1475
|
+
);
|
|
1476
|
+
}
|
|
1477
|
+
if (force) {
|
|
1478
|
+
this.vectorStore.clear();
|
|
1479
|
+
this.metadataStore.clear();
|
|
1480
|
+
this.fileTracker.clear();
|
|
1481
|
+
} else {
|
|
1482
|
+
await this.load();
|
|
1483
|
+
}
|
|
1484
|
+
log("Finding files...");
|
|
1485
|
+
const files = await this.findFiles();
|
|
1486
|
+
log(`Found ${files.length} files`);
|
|
1487
|
+
const changes = force ? files.map((path) => ({ path, type: "added", newHash: "" })) : await this.fileTracker.detectChanges(files);
|
|
1488
|
+
const result = await this.processChanges(changes, log);
|
|
1489
|
+
log("Saving index...");
|
|
1490
|
+
await this.save();
|
|
1491
|
+
return {
|
|
1492
|
+
...result,
|
|
1493
|
+
duration: Date.now() - startTime
|
|
1494
|
+
};
|
|
1495
|
+
}
|
|
1496
|
+
/**
|
|
1497
|
+
* Update index incrementally
|
|
1498
|
+
*/
|
|
1499
|
+
async update() {
|
|
1500
|
+
const startTime = Date.now();
|
|
1501
|
+
const log = this.options.onProgress || (() => {
|
|
1502
|
+
});
|
|
1503
|
+
await this.load();
|
|
1504
|
+
const available = await this.embeddingClient.isAvailable();
|
|
1505
|
+
if (!available) {
|
|
1506
|
+
throw new Error("Ollama is not available");
|
|
1507
|
+
}
|
|
1508
|
+
log("Detecting changes...");
|
|
1509
|
+
const files = await this.findFiles();
|
|
1510
|
+
const changes = await this.fileTracker.detectChanges(files);
|
|
1511
|
+
if (changes.length === 0) {
|
|
1512
|
+
log("No changes detected");
|
|
1513
|
+
return {
|
|
1514
|
+
added: 0,
|
|
1515
|
+
modified: 0,
|
|
1516
|
+
deleted: 0,
|
|
1517
|
+
totalChunks: this.metadataStore.size(),
|
|
1518
|
+
duration: Date.now() - startTime
|
|
1519
|
+
};
|
|
1520
|
+
}
|
|
1521
|
+
log(`Found ${changes.length} changed files`);
|
|
1522
|
+
const result = await this.processChanges(changes, log);
|
|
1523
|
+
log("Saving index...");
|
|
1524
|
+
await this.save();
|
|
1525
|
+
return {
|
|
1526
|
+
...result,
|
|
1527
|
+
duration: Date.now() - startTime
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
/**
|
|
1531
|
+
* Process file changes
|
|
1532
|
+
*/
|
|
1533
|
+
async processChanges(changes, log) {
|
|
1534
|
+
let added = 0;
|
|
1535
|
+
let modified = 0;
|
|
1536
|
+
let deleted = 0;
|
|
1537
|
+
const deletedFiles = changes.filter((c) => c.type === "deleted");
|
|
1538
|
+
for (const change of deletedFiles) {
|
|
1539
|
+
const removedIds = this.metadataStore.removeByFilePath(change.path);
|
|
1540
|
+
for (const id of removedIds) {
|
|
1541
|
+
this.vectorStore.remove(id);
|
|
1542
|
+
}
|
|
1543
|
+
this.fileTracker.removeEntry(change.path);
|
|
1544
|
+
deleted++;
|
|
1545
|
+
}
|
|
1546
|
+
const filesToProcess = changes.filter((c) => c.type !== "deleted");
|
|
1547
|
+
for (let i = 0; i < filesToProcess.length; i++) {
|
|
1548
|
+
const change = filesToProcess[i];
|
|
1549
|
+
log(
|
|
1550
|
+
`Processing ${relative(this.projectRoot, change.path)}`,
|
|
1551
|
+
i + 1,
|
|
1552
|
+
filesToProcess.length
|
|
1553
|
+
);
|
|
1554
|
+
try {
|
|
1555
|
+
const content = readFileSync4(change.path, "utf-8");
|
|
1556
|
+
if (change.type === "modified") {
|
|
1557
|
+
const removedIds = this.metadataStore.removeByFilePath(change.path);
|
|
1558
|
+
for (const id of removedIds) {
|
|
1559
|
+
this.vectorStore.remove(id);
|
|
1560
|
+
}
|
|
1561
|
+
modified++;
|
|
1562
|
+
} else {
|
|
1563
|
+
added++;
|
|
1564
|
+
}
|
|
1565
|
+
const chunks = chunkFile(change.path, content, this.options.chunking);
|
|
1566
|
+
if (chunks.length === 0) {
|
|
1567
|
+
await this.fileTracker.updateFile(change.path, content, []);
|
|
1568
|
+
continue;
|
|
1569
|
+
}
|
|
1570
|
+
const embeddingInputs = chunks.map((c) => prepareEmbeddingInput(c));
|
|
1571
|
+
const embeddings = await this.embeddingClient.embedBatch(embeddingInputs);
|
|
1572
|
+
const chunkIds = [];
|
|
1573
|
+
for (let j = 0; j < chunks.length; j++) {
|
|
1574
|
+
const chunk = chunks[j];
|
|
1575
|
+
const embedding = embeddings[j].embedding;
|
|
1576
|
+
this.vectorStore.add(chunk.id, embedding);
|
|
1577
|
+
const metadata = {
|
|
1578
|
+
filePath: chunk.filePath,
|
|
1579
|
+
startLine: chunk.startLine,
|
|
1580
|
+
endLine: chunk.endLine,
|
|
1581
|
+
startColumn: chunk.startColumn,
|
|
1582
|
+
endColumn: chunk.endColumn,
|
|
1583
|
+
kind: chunk.kind,
|
|
1584
|
+
name: chunk.name,
|
|
1585
|
+
contentHash: hashContentSync(chunk.content),
|
|
1586
|
+
metadata: {
|
|
1587
|
+
props: chunk.metadata.props,
|
|
1588
|
+
hooks: chunk.metadata.hooks,
|
|
1589
|
+
jsxElements: chunk.metadata.jsxElements,
|
|
1590
|
+
isExported: chunk.metadata.isExported,
|
|
1591
|
+
isDefaultExport: chunk.metadata.isDefaultExport
|
|
1592
|
+
}
|
|
1593
|
+
};
|
|
1594
|
+
this.metadataStore.set(chunk.id, metadata);
|
|
1595
|
+
chunkIds.push(chunk.id);
|
|
1596
|
+
}
|
|
1597
|
+
await this.fileTracker.updateFile(change.path, content, chunkIds);
|
|
1598
|
+
} catch (error) {
|
|
1599
|
+
console.warn(`Error processing ${change.path}:`, error);
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
return {
|
|
1603
|
+
added,
|
|
1604
|
+
modified,
|
|
1605
|
+
deleted,
|
|
1606
|
+
totalChunks: this.metadataStore.size()
|
|
1607
|
+
};
|
|
1608
|
+
}
|
|
1609
|
+
/**
|
|
1610
|
+
* Get index statistics
|
|
1611
|
+
*/
|
|
1612
|
+
getStats() {
|
|
1613
|
+
return {
|
|
1614
|
+
totalFiles: this.fileTracker.getTrackedFiles().length,
|
|
1615
|
+
totalChunks: this.metadataStore.size(),
|
|
1616
|
+
indexSizeBytes: this.vectorStore.getStats().memoryBytes,
|
|
1617
|
+
manifest: this.manifest
|
|
1618
|
+
};
|
|
1619
|
+
}
|
|
1620
|
+
/**
|
|
1621
|
+
* Get the vector store (for queries)
|
|
1622
|
+
*/
|
|
1623
|
+
getVectorStore() {
|
|
1624
|
+
return this.vectorStore;
|
|
1625
|
+
}
|
|
1626
|
+
/**
|
|
1627
|
+
* Get the metadata store (for queries)
|
|
1628
|
+
*/
|
|
1629
|
+
getMetadataStore() {
|
|
1630
|
+
return this.metadataStore;
|
|
1631
|
+
}
|
|
1632
|
+
/**
|
|
1633
|
+
* Check if index exists
|
|
1634
|
+
*/
|
|
1635
|
+
hasIndex() {
|
|
1636
|
+
return existsSync4(join4(this.indexDir, MANIFEST_FILE));
|
|
1637
|
+
}
|
|
1638
|
+
};
|
|
1639
|
+
function createIndexer(projectRoot, options) {
|
|
1640
|
+
return new IncrementalIndexer(projectRoot, options);
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1643
|
+
// src/detection/scorer.ts
|
|
1644
|
+
function calculateSizeRatio(chunk1, chunk2) {
|
|
1645
|
+
const lines1 = chunk1.endLine - chunk1.startLine + 1;
|
|
1646
|
+
const lines2 = chunk2.endLine - chunk2.startLine + 1;
|
|
1647
|
+
const minLines = Math.min(lines1, lines2);
|
|
1648
|
+
const maxLines = Math.max(lines1, lines2);
|
|
1649
|
+
return maxLines > 0 ? minLines / maxLines : 1;
|
|
1650
|
+
}
|
|
1651
|
+
function calculateDuplicateScore(similarity, chunk1, chunk2) {
|
|
1652
|
+
const sizeRatio = calculateSizeRatio(chunk1, chunk2);
|
|
1653
|
+
const combinedScore = similarity * 0.85 + sizeRatio * 0.15;
|
|
1654
|
+
return {
|
|
1655
|
+
similarity,
|
|
1656
|
+
sizeRatio,
|
|
1657
|
+
combinedScore
|
|
1658
|
+
};
|
|
1659
|
+
}
|
|
1660
|
+
function calculateGroupAverageSimilarity(similarities) {
|
|
1661
|
+
if (similarities.length === 0) return 0;
|
|
1662
|
+
return similarities.reduce((sum, s) => sum + s, 0) / similarities.length;
|
|
1663
|
+
}
|
|
1664
|
+
function sortDuplicateGroups(groups) {
|
|
1665
|
+
return [...groups].sort((a, b) => {
|
|
1666
|
+
const countDiff = b.members.length - a.members.length;
|
|
1667
|
+
if (countDiff !== 0) return countDiff;
|
|
1668
|
+
return b.avgSimilarity - a.avgSimilarity;
|
|
1669
|
+
});
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
// src/detection/duplicate-finder.ts
|
|
1673
|
+
function findDuplicateGroups(vectorStore, metadataStore, options = {}) {
|
|
1674
|
+
const {
|
|
1675
|
+
threshold = 0.85,
|
|
1676
|
+
minGroupSize = 2,
|
|
1677
|
+
kind,
|
|
1678
|
+
excludePaths = []
|
|
1679
|
+
} = options;
|
|
1680
|
+
const groups = [];
|
|
1681
|
+
const processed = /* @__PURE__ */ new Set();
|
|
1682
|
+
let entries = [...metadataStore.entries()];
|
|
1683
|
+
if (kind) {
|
|
1684
|
+
entries = entries.filter(([, meta]) => meta.kind === kind);
|
|
1685
|
+
}
|
|
1686
|
+
if (excludePaths.length > 0) {
|
|
1687
|
+
entries = entries.filter(
|
|
1688
|
+
([, meta]) => !excludePaths.some((p) => meta.filePath.includes(p))
|
|
1689
|
+
);
|
|
1690
|
+
}
|
|
1691
|
+
for (const [id, metadata] of entries) {
|
|
1692
|
+
if (processed.has(id)) continue;
|
|
1693
|
+
const vector = vectorStore.get(id);
|
|
1694
|
+
if (!vector) continue;
|
|
1695
|
+
const similar = vectorStore.findSimilar(vector, 50, threshold);
|
|
1696
|
+
let candidates = similar.filter((s) => {
|
|
1697
|
+
if (s.id === id) return false;
|
|
1698
|
+
if (processed.has(s.id)) return false;
|
|
1699
|
+
const candidateMeta = metadataStore.get(s.id);
|
|
1700
|
+
if (!candidateMeta) return false;
|
|
1701
|
+
if (kind && candidateMeta.kind !== kind) return false;
|
|
1702
|
+
if (excludePaths.some((p) => candidateMeta.filePath.includes(p)))
|
|
1703
|
+
return false;
|
|
1704
|
+
return true;
|
|
1705
|
+
});
|
|
1706
|
+
if (!kind && candidates.length > 0) {
|
|
1707
|
+
const sameKindCandidates = candidates.filter((c) => {
|
|
1708
|
+
const meta = metadataStore.get(c.id);
|
|
1709
|
+
return meta?.kind === metadata.kind;
|
|
1710
|
+
});
|
|
1711
|
+
if (sameKindCandidates.length > 0) {
|
|
1712
|
+
candidates = sameKindCandidates;
|
|
1713
|
+
}
|
|
1714
|
+
}
|
|
1715
|
+
if (candidates.length >= minGroupSize - 1) {
|
|
1716
|
+
const members = [
|
|
1717
|
+
{ id, metadata, score: 1 }
|
|
1718
|
+
// First member (reference)
|
|
1719
|
+
];
|
|
1720
|
+
const similarities = [];
|
|
1721
|
+
for (const candidate of candidates) {
|
|
1722
|
+
const candidateMeta = metadataStore.get(candidate.id);
|
|
1723
|
+
if (candidateMeta) {
|
|
1724
|
+
members.push({
|
|
1725
|
+
id: candidate.id,
|
|
1726
|
+
metadata: candidateMeta,
|
|
1727
|
+
score: candidate.score
|
|
1728
|
+
});
|
|
1729
|
+
similarities.push(candidate.score);
|
|
1730
|
+
processed.add(candidate.id);
|
|
1731
|
+
}
|
|
1732
|
+
}
|
|
1733
|
+
processed.add(id);
|
|
1734
|
+
groups.push({
|
|
1735
|
+
members,
|
|
1736
|
+
avgSimilarity: calculateGroupAverageSimilarity(similarities),
|
|
1737
|
+
kind: metadata.kind
|
|
1738
|
+
});
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
return sortDuplicateGroups(groups);
|
|
1742
|
+
}
|
|
1743
|
+
function findSimilarToLocation(vectorStore, metadataStore, filePath, line, options = {}) {
|
|
1744
|
+
const { top = 10, threshold = 0.5 } = options;
|
|
1745
|
+
const chunk = metadataStore.getAtLocation(filePath, line);
|
|
1746
|
+
if (!chunk) {
|
|
1747
|
+
return [];
|
|
1748
|
+
}
|
|
1749
|
+
const vector = vectorStore.get(chunk.id);
|
|
1750
|
+
if (!vector) {
|
|
1751
|
+
return [];
|
|
1752
|
+
}
|
|
1753
|
+
const similar = vectorStore.findSimilar(vector, top + 1, threshold);
|
|
1754
|
+
return similar.filter((s) => s.id !== chunk.id).slice(0, top);
|
|
1755
|
+
}
|
|
1756
|
+
function findSimilarToQuery(vectorStore, queryEmbedding, options = {}) {
|
|
1757
|
+
const { top = 10, threshold = 0.5 } = options;
|
|
1758
|
+
return vectorStore.findSimilar(queryEmbedding, top, threshold);
|
|
1759
|
+
}
|
|
1760
|
+
|
|
1761
|
+
// src/query/api.ts
|
|
1762
|
+
var indexerCache = /* @__PURE__ */ new Map();
|
|
1763
|
+
function getIndexer(path, options) {
|
|
1764
|
+
const projectRoot = resolve(path);
|
|
1765
|
+
const cacheKey = projectRoot;
|
|
1766
|
+
let indexer = indexerCache.get(cacheKey);
|
|
1767
|
+
if (!indexer) {
|
|
1768
|
+
indexer = createIndexer(projectRoot, options);
|
|
1769
|
+
indexerCache.set(cacheKey, indexer);
|
|
1770
|
+
}
|
|
1771
|
+
return indexer;
|
|
1772
|
+
}
|
|
1773
|
+
function clearIndexerCache(path) {
|
|
1774
|
+
if (path) {
|
|
1775
|
+
const projectRoot = resolve(path);
|
|
1776
|
+
indexerCache.delete(projectRoot);
|
|
1777
|
+
} else {
|
|
1778
|
+
indexerCache.clear();
|
|
1779
|
+
}
|
|
1780
|
+
}
|
|
1781
|
+
async function indexDirectory(path, options = {}) {
|
|
1782
|
+
const projectRoot = resolve(path);
|
|
1783
|
+
clearIndexerCache(projectRoot);
|
|
1784
|
+
const indexer = createIndexer(projectRoot, {
|
|
1785
|
+
model: options.model,
|
|
1786
|
+
baseUrl: options.baseUrl,
|
|
1787
|
+
exclude: options.exclude,
|
|
1788
|
+
onProgress: options.onProgress
|
|
1789
|
+
});
|
|
1790
|
+
indexerCache.set(projectRoot, indexer);
|
|
1791
|
+
if (options.force) {
|
|
1792
|
+
return await indexer.indexAll(true);
|
|
1793
|
+
}
|
|
1794
|
+
if (indexer.hasIndex()) {
|
|
1795
|
+
return await indexer.update();
|
|
1796
|
+
}
|
|
1797
|
+
return await indexer.indexAll(false);
|
|
1798
|
+
}
|
|
1799
|
+
async function findDuplicates(options = {}) {
|
|
1800
|
+
const projectRoot = resolve(options.path || process.cwd());
|
|
1801
|
+
const indexer = getIndexer(projectRoot);
|
|
1802
|
+
await indexer.load();
|
|
1803
|
+
if (!indexer.hasIndex()) {
|
|
1804
|
+
throw new Error(
|
|
1805
|
+
`No index found at ${projectRoot}. Run 'uilint duplicates index' first.`
|
|
1806
|
+
);
|
|
1807
|
+
}
|
|
1808
|
+
const vectorStore = indexer.getVectorStore();
|
|
1809
|
+
const metadataStore = indexer.getMetadataStore();
|
|
1810
|
+
const groups = findDuplicateGroups(vectorStore, metadataStore, {
|
|
1811
|
+
threshold: options.threshold,
|
|
1812
|
+
minGroupSize: options.minGroupSize,
|
|
1813
|
+
kind: options.kind
|
|
1814
|
+
});
|
|
1815
|
+
return groups.map((group) => ({
|
|
1816
|
+
members: group.members.map((m) => ({
|
|
1817
|
+
filePath: m.metadata.filePath,
|
|
1818
|
+
startLine: m.metadata.startLine,
|
|
1819
|
+
endLine: m.metadata.endLine,
|
|
1820
|
+
name: m.metadata.name,
|
|
1821
|
+
kind: m.metadata.kind,
|
|
1822
|
+
score: m.score
|
|
1823
|
+
})),
|
|
1824
|
+
avgSimilarity: group.avgSimilarity,
|
|
1825
|
+
kind: group.kind
|
|
1826
|
+
}));
|
|
1827
|
+
}
|
|
1828
|
+
async function searchSimilar(query, options = {}) {
|
|
1829
|
+
const projectRoot = resolve(options.path || process.cwd());
|
|
1830
|
+
const indexer = getIndexer(projectRoot);
|
|
1831
|
+
await indexer.load();
|
|
1832
|
+
if (!indexer.hasIndex()) {
|
|
1833
|
+
throw new Error(
|
|
1834
|
+
`No index found at ${projectRoot}. Run 'uilint duplicates index' first.`
|
|
1835
|
+
);
|
|
1836
|
+
}
|
|
1837
|
+
const embeddingClient = new OllamaEmbeddingClient({
|
|
1838
|
+
model: options.model,
|
|
1839
|
+
baseUrl: options.baseUrl
|
|
1840
|
+
});
|
|
1841
|
+
const queryResult = await embeddingClient.embed(query);
|
|
1842
|
+
const vectorStore = indexer.getVectorStore();
|
|
1843
|
+
const metadataStore = indexer.getMetadataStore();
|
|
1844
|
+
const results = findSimilarToQuery(vectorStore, queryResult.embedding, {
|
|
1845
|
+
top: options.top,
|
|
1846
|
+
threshold: options.threshold
|
|
1847
|
+
});
|
|
1848
|
+
return results.map((r) => {
|
|
1849
|
+
const metadata = metadataStore.get(r.id);
|
|
1850
|
+
if (!metadata) return null;
|
|
1851
|
+
return {
|
|
1852
|
+
filePath: metadata.filePath,
|
|
1853
|
+
startLine: metadata.startLine,
|
|
1854
|
+
endLine: metadata.endLine,
|
|
1855
|
+
name: metadata.name,
|
|
1856
|
+
kind: metadata.kind,
|
|
1857
|
+
score: r.score
|
|
1858
|
+
};
|
|
1859
|
+
}).filter((r) => r !== null);
|
|
1860
|
+
}
|
|
1861
|
+
async function findSimilarAtLocation(options) {
|
|
1862
|
+
const projectRoot = resolve(options.path || process.cwd());
|
|
1863
|
+
const indexer = getIndexer(projectRoot);
|
|
1864
|
+
await indexer.load();
|
|
1865
|
+
if (!indexer.hasIndex()) {
|
|
1866
|
+
throw new Error(
|
|
1867
|
+
`No index found at ${projectRoot}. Run 'uilint duplicates index' first.`
|
|
1868
|
+
);
|
|
1869
|
+
}
|
|
1870
|
+
const vectorStore = indexer.getVectorStore();
|
|
1871
|
+
const metadataStore = indexer.getMetadataStore();
|
|
1872
|
+
const results = findSimilarToLocation(
|
|
1873
|
+
vectorStore,
|
|
1874
|
+
metadataStore,
|
|
1875
|
+
options.filePath,
|
|
1876
|
+
options.line,
|
|
1877
|
+
{
|
|
1878
|
+
top: options.top,
|
|
1879
|
+
threshold: options.threshold
|
|
1880
|
+
}
|
|
1881
|
+
);
|
|
1882
|
+
return results.map((r) => {
|
|
1883
|
+
const metadata = metadataStore.get(r.id);
|
|
1884
|
+
if (!metadata) return null;
|
|
1885
|
+
return {
|
|
1886
|
+
filePath: metadata.filePath,
|
|
1887
|
+
startLine: metadata.startLine,
|
|
1888
|
+
endLine: metadata.endLine,
|
|
1889
|
+
name: metadata.name,
|
|
1890
|
+
kind: metadata.kind,
|
|
1891
|
+
score: r.score
|
|
1892
|
+
};
|
|
1893
|
+
}).filter((r) => r !== null);
|
|
1894
|
+
}
|
|
1895
|
+
function hasIndex(path = process.cwd()) {
|
|
1896
|
+
const projectRoot = resolve(path);
|
|
1897
|
+
const indexer = getIndexer(projectRoot);
|
|
1898
|
+
return indexer.hasIndex();
|
|
1899
|
+
}
|
|
1900
|
+
async function getIndexStats(path = process.cwd()) {
|
|
1901
|
+
const projectRoot = resolve(path);
|
|
1902
|
+
const indexer = getIndexer(projectRoot);
|
|
1903
|
+
await indexer.load();
|
|
1904
|
+
const stats = indexer.getStats();
|
|
1905
|
+
return {
|
|
1906
|
+
totalFiles: stats.totalFiles,
|
|
1907
|
+
totalChunks: stats.totalChunks,
|
|
1908
|
+
indexSizeBytes: stats.indexSizeBytes,
|
|
1909
|
+
embeddingModel: stats.manifest?.embeddingModel || null,
|
|
1910
|
+
lastUpdated: stats.manifest?.updatedAt || null
|
|
1911
|
+
};
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
export {
|
|
1915
|
+
chunkFile,
|
|
1916
|
+
prepareEmbeddingInput,
|
|
1917
|
+
OllamaEmbeddingClient,
|
|
1918
|
+
getOllamaEmbeddingClient,
|
|
1919
|
+
VectorStore,
|
|
1920
|
+
MetadataStore,
|
|
1921
|
+
hashContent,
|
|
1922
|
+
hashContentSync,
|
|
1923
|
+
FileTracker,
|
|
1924
|
+
IncrementalIndexer,
|
|
1925
|
+
createIndexer,
|
|
1926
|
+
calculateSizeRatio,
|
|
1927
|
+
calculateDuplicateScore,
|
|
1928
|
+
calculateGroupAverageSimilarity,
|
|
1929
|
+
sortDuplicateGroups,
|
|
1930
|
+
findDuplicateGroups,
|
|
1931
|
+
findSimilarToLocation,
|
|
1932
|
+
findSimilarToQuery,
|
|
1933
|
+
clearIndexerCache,
|
|
1934
|
+
indexDirectory,
|
|
1935
|
+
findDuplicates,
|
|
1936
|
+
searchSimilar,
|
|
1937
|
+
findSimilarAtLocation,
|
|
1938
|
+
hasIndex,
|
|
1939
|
+
getIndexStats
|
|
1940
|
+
};
|
|
1941
|
+
//# sourceMappingURL=chunk-DURWZZLU.js.map
|