uilint-duplicates 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-DURWZZLU.js +1941 -0
- package/dist/chunk-DURWZZLU.js.map +1 -0
- package/dist/index.d.ts +783 -0
- package/dist/index.js +55 -0
- package/dist/index.js.map +1 -0
- package/dist/node.d.ts +1 -0
- package/dist/node.js +55 -0
- package/dist/node.js.map +1 -0
- package/package.json +79 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types for code chunking and embeddings
|
|
3
|
+
*/
|
|
4
|
+
type ChunkKind = "component" | "hook" | "function" | "jsx-fragment" | "jsx-section" | "component-summary" | "function-section" | "function-summary";
|
|
5
|
+
interface ChunkMetadata {
|
|
6
|
+
/** Props/parameters for components and hooks */
|
|
7
|
+
props?: string[];
|
|
8
|
+
/** React hooks used in the chunk */
|
|
9
|
+
hooks?: string[];
|
|
10
|
+
/** JSX element tags used */
|
|
11
|
+
jsxElements?: string[];
|
|
12
|
+
/** Import dependencies */
|
|
13
|
+
imports?: string[];
|
|
14
|
+
/** Whether the chunk is exported */
|
|
15
|
+
isExported?: boolean;
|
|
16
|
+
/** Whether it's a default export */
|
|
17
|
+
isDefaultExport?: boolean;
|
|
18
|
+
}
|
|
19
|
+
interface CodeChunk {
|
|
20
|
+
/** Unique identifier (hash of content + location) */
|
|
21
|
+
id: string;
|
|
22
|
+
/** Absolute file path */
|
|
23
|
+
filePath: string;
|
|
24
|
+
/** Start line number (1-indexed) */
|
|
25
|
+
startLine: number;
|
|
26
|
+
/** End line number (1-indexed) */
|
|
27
|
+
endLine: number;
|
|
28
|
+
/** Start column */
|
|
29
|
+
startColumn: number;
|
|
30
|
+
/** End column */
|
|
31
|
+
endColumn: number;
|
|
32
|
+
/** Type of code chunk */
|
|
33
|
+
kind: ChunkKind;
|
|
34
|
+
/** Name of the function/component/hook (null if anonymous) */
|
|
35
|
+
name: string | null;
|
|
36
|
+
/** Raw source code content */
|
|
37
|
+
content: string;
|
|
38
|
+
/** Extracted metadata */
|
|
39
|
+
metadata: ChunkMetadata;
|
|
40
|
+
/** Parent chunk ID (for sub-chunks like jsx-section) */
|
|
41
|
+
parentId?: string;
|
|
42
|
+
/** Section index within parent (for ordering) */
|
|
43
|
+
sectionIndex?: number;
|
|
44
|
+
/** Human-readable section label (e.g., "header", "form-fields") */
|
|
45
|
+
sectionLabel?: string;
|
|
46
|
+
}
|
|
47
|
+
interface ChunkingOptions {
|
|
48
|
+
/** Minimum number of lines for a chunk (default: 3) */
|
|
49
|
+
minLines?: number;
|
|
50
|
+
/** Maximum lines before splitting a component (default: 100) */
|
|
51
|
+
maxLines?: number;
|
|
52
|
+
/** Whether to include anonymous functions (default: false) */
|
|
53
|
+
includeAnonymous?: boolean;
|
|
54
|
+
/** Chunk kinds to extract (default: all) */
|
|
55
|
+
kinds?: ChunkKind[];
|
|
56
|
+
/** Strategy for splitting large chunks (default: "jsx-children") */
|
|
57
|
+
splitStrategy?: "jsx-children" | "line-based" | "none";
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Code chunker - extracts meaningful code units from TypeScript/TSX files
|
|
62
|
+
*
|
|
63
|
+
* Uses @typescript-eslint/typescript-estree for parsing.
|
|
64
|
+
* Supports splitting large components into smaller, embeddable chunks.
|
|
65
|
+
*/
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Parse a file and extract code chunks
|
|
69
|
+
*/
|
|
70
|
+
declare function chunkFile(filePath: string, content: string, options?: ChunkingOptions): CodeChunk[];
|
|
71
|
+
interface EmbeddingInputOptions {
|
|
72
|
+
/** Maximum characters for the embedding input (default: 6000) */
|
|
73
|
+
maxChars?: number;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Prepare chunk content for embedding by enriching with context
|
|
77
|
+
*/
|
|
78
|
+
declare function prepareEmbeddingInput(chunk: CodeChunk, options?: EmbeddingInputOptions): string;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Ollama Embedding Client
|
|
82
|
+
*
|
|
83
|
+
* Uses Ollama's /api/embed endpoint to generate text embeddings.
|
|
84
|
+
* Follows the patterns from uilint-core's OllamaClient.
|
|
85
|
+
*/
|
|
86
|
+
interface EmbeddingOptions {
|
|
87
|
+
/** Ollama embedding model (default: nomic-embed-text) */
|
|
88
|
+
model?: string;
|
|
89
|
+
/** Ollama server URL (default: http://localhost:11434) */
|
|
90
|
+
baseUrl?: string;
|
|
91
|
+
/** Request timeout in ms (default: 60000) */
|
|
92
|
+
timeout?: number;
|
|
93
|
+
/** Batch size for embedding multiple texts (default: 10) */
|
|
94
|
+
batchSize?: number;
|
|
95
|
+
}
|
|
96
|
+
interface EmbeddingResult {
|
|
97
|
+
/** The embedding vector */
|
|
98
|
+
embedding: number[];
|
|
99
|
+
/** The model used */
|
|
100
|
+
model: string;
|
|
101
|
+
/** Number of tokens in the input (if available) */
|
|
102
|
+
promptTokens?: number;
|
|
103
|
+
}
|
|
104
|
+
declare class OllamaEmbeddingClient {
|
|
105
|
+
private baseUrl;
|
|
106
|
+
private model;
|
|
107
|
+
private timeout;
|
|
108
|
+
private batchSize;
|
|
109
|
+
constructor(options?: EmbeddingOptions);
|
|
110
|
+
/**
|
|
111
|
+
* Generate embedding for a single text
|
|
112
|
+
*/
|
|
113
|
+
embed(text: string): Promise<EmbeddingResult>;
|
|
114
|
+
/**
|
|
115
|
+
* Generate embeddings for multiple texts
|
|
116
|
+
* Automatically batches large inputs
|
|
117
|
+
*/
|
|
118
|
+
embedBatch(texts: string[]): Promise<EmbeddingResult[]>;
|
|
119
|
+
/**
|
|
120
|
+
* Embed a batch directly (no chunking)
|
|
121
|
+
*/
|
|
122
|
+
private embedBatchDirect;
|
|
123
|
+
/**
|
|
124
|
+
* Check if Ollama is available
|
|
125
|
+
*/
|
|
126
|
+
isAvailable(): Promise<boolean>;
|
|
127
|
+
/**
|
|
128
|
+
* Check if the embedding model is available
|
|
129
|
+
*/
|
|
130
|
+
isModelAvailable(): Promise<boolean>;
|
|
131
|
+
/**
|
|
132
|
+
* Pull the embedding model if not available
|
|
133
|
+
*/
|
|
134
|
+
ensureModel(): Promise<void>;
|
|
135
|
+
/**
|
|
136
|
+
* Get the embedding dimension for the current model
|
|
137
|
+
* (Requires generating a test embedding)
|
|
138
|
+
*/
|
|
139
|
+
getEmbeddingDimension(): Promise<number>;
|
|
140
|
+
/**
|
|
141
|
+
* Get the current model name
|
|
142
|
+
*/
|
|
143
|
+
getModel(): string;
|
|
144
|
+
/**
|
|
145
|
+
* Set the model name
|
|
146
|
+
*/
|
|
147
|
+
setModel(model: string): void;
|
|
148
|
+
}
|
|
149
|
+
declare function getOllamaEmbeddingClient(options?: EmbeddingOptions): OllamaEmbeddingClient;
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Vector Store
|
|
153
|
+
*
|
|
154
|
+
* File-based vector storage with cosine similarity search.
|
|
155
|
+
* Uses binary Float32 format for efficient storage.
|
|
156
|
+
*/
|
|
157
|
+
interface SimilarityResult {
|
|
158
|
+
/** Chunk ID */
|
|
159
|
+
id: string;
|
|
160
|
+
/** Cosine similarity score (0-1) */
|
|
161
|
+
score: number;
|
|
162
|
+
/** Distance (1 - score) */
|
|
163
|
+
distance: number;
|
|
164
|
+
}
|
|
165
|
+
interface VectorStoreOptions {
|
|
166
|
+
/** Expected dimension of vectors (validated on add) */
|
|
167
|
+
dimension?: number;
|
|
168
|
+
}
|
|
169
|
+
declare class VectorStore {
|
|
170
|
+
private vectors;
|
|
171
|
+
private dimension;
|
|
172
|
+
private idIndex;
|
|
173
|
+
constructor(options?: VectorStoreOptions);
|
|
174
|
+
/**
|
|
175
|
+
* Add a vector to the store
|
|
176
|
+
*/
|
|
177
|
+
add(id: string, vector: number[]): void;
|
|
178
|
+
/**
|
|
179
|
+
* Add multiple vectors at once
|
|
180
|
+
*/
|
|
181
|
+
addBatch(items: Array<{
|
|
182
|
+
id: string;
|
|
183
|
+
vector: number[];
|
|
184
|
+
}>): void;
|
|
185
|
+
/**
|
|
186
|
+
* Remove a vector from the store
|
|
187
|
+
*/
|
|
188
|
+
remove(id: string): boolean;
|
|
189
|
+
/**
|
|
190
|
+
* Get a vector by ID
|
|
191
|
+
*/
|
|
192
|
+
get(id: string): number[] | null;
|
|
193
|
+
/**
|
|
194
|
+
* Check if a vector exists
|
|
195
|
+
*/
|
|
196
|
+
has(id: string): boolean;
|
|
197
|
+
/**
|
|
198
|
+
* Find the most similar vectors to a query vector
|
|
199
|
+
*/
|
|
200
|
+
findSimilar(query: number[], k?: number, threshold?: number): SimilarityResult[];
|
|
201
|
+
/**
|
|
202
|
+
* Get the number of vectors in the store
|
|
203
|
+
*/
|
|
204
|
+
size(): number;
|
|
205
|
+
/**
|
|
206
|
+
* Get the dimension of vectors
|
|
207
|
+
*/
|
|
208
|
+
getDimension(): number | null;
|
|
209
|
+
/**
|
|
210
|
+
* Get all IDs
|
|
211
|
+
*/
|
|
212
|
+
getIds(): string[];
|
|
213
|
+
/**
|
|
214
|
+
* Clear all vectors
|
|
215
|
+
*/
|
|
216
|
+
clear(): void;
|
|
217
|
+
/**
|
|
218
|
+
* Save the vector store to disk
|
|
219
|
+
*
|
|
220
|
+
* Format:
|
|
221
|
+
* - embeddings.bin: Binary Float32 vectors
|
|
222
|
+
* - ids.json: Ordered array of IDs matching vector positions
|
|
223
|
+
*/
|
|
224
|
+
save(dirPath: string): Promise<void>;
|
|
225
|
+
/**
|
|
226
|
+
* Load the vector store from disk
|
|
227
|
+
*/
|
|
228
|
+
load(dirPath: string): Promise<void>;
|
|
229
|
+
/**
|
|
230
|
+
* Iterate over all vectors
|
|
231
|
+
*/
|
|
232
|
+
entries(): IterableIterator<[string, number[]]>;
|
|
233
|
+
/**
|
|
234
|
+
* Get stats about the store
|
|
235
|
+
*/
|
|
236
|
+
getStats(): {
|
|
237
|
+
size: number;
|
|
238
|
+
dimension: number | null;
|
|
239
|
+
memoryBytes: number;
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Types for the index module
|
|
245
|
+
*/
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Metadata stored for each chunk in the metadata store
|
|
249
|
+
*/
|
|
250
|
+
interface StoredChunkMetadata {
|
|
251
|
+
/** Absolute file path */
|
|
252
|
+
filePath: string;
|
|
253
|
+
/** Start line number (1-indexed) */
|
|
254
|
+
startLine: number;
|
|
255
|
+
/** End line number (1-indexed) */
|
|
256
|
+
endLine: number;
|
|
257
|
+
/** Start column */
|
|
258
|
+
startColumn: number;
|
|
259
|
+
/** End column */
|
|
260
|
+
endColumn: number;
|
|
261
|
+
/** Type of code chunk */
|
|
262
|
+
kind: ChunkKind;
|
|
263
|
+
/** Name of the function/component/hook (null if anonymous) */
|
|
264
|
+
name: string | null;
|
|
265
|
+
/** Hash of the content for change detection */
|
|
266
|
+
contentHash: string;
|
|
267
|
+
/** Extracted metadata */
|
|
268
|
+
metadata: ChunkMetadata;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Manifest for the index
|
|
272
|
+
*/
|
|
273
|
+
interface IndexManifest {
|
|
274
|
+
/** Version of the index format */
|
|
275
|
+
version: number;
|
|
276
|
+
/** Timestamp when the index was created */
|
|
277
|
+
createdAt: string;
|
|
278
|
+
/** Timestamp when the index was last updated */
|
|
279
|
+
updatedAt: string;
|
|
280
|
+
/** Number of chunks in the index */
|
|
281
|
+
chunkCount: number;
|
|
282
|
+
/** Number of files indexed */
|
|
283
|
+
fileCount: number;
|
|
284
|
+
/** Embedding model used */
|
|
285
|
+
embeddingModel: string;
|
|
286
|
+
/** Vector dimension */
|
|
287
|
+
dimension: number;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
/**
|
|
291
|
+
* Metadata Store
|
|
292
|
+
*
|
|
293
|
+
* JSON-based storage for chunk metadata (file paths, line numbers, etc.)
|
|
294
|
+
*/
|
|
295
|
+
|
|
296
|
+
declare class MetadataStore {
|
|
297
|
+
private chunks;
|
|
298
|
+
/**
|
|
299
|
+
* Add or update chunk metadata
|
|
300
|
+
*/
|
|
301
|
+
set(id: string, metadata: StoredChunkMetadata): void;
|
|
302
|
+
/**
|
|
303
|
+
* Add multiple chunks at once
|
|
304
|
+
*/
|
|
305
|
+
setBatch(items: Array<{
|
|
306
|
+
id: string;
|
|
307
|
+
metadata: StoredChunkMetadata;
|
|
308
|
+
}>): void;
|
|
309
|
+
/**
|
|
310
|
+
* Get chunk metadata by ID
|
|
311
|
+
*/
|
|
312
|
+
get(id: string): StoredChunkMetadata | null;
|
|
313
|
+
/**
|
|
314
|
+
* Check if a chunk exists
|
|
315
|
+
*/
|
|
316
|
+
has(id: string): boolean;
|
|
317
|
+
/**
|
|
318
|
+
* Remove chunk metadata
|
|
319
|
+
*/
|
|
320
|
+
remove(id: string): boolean;
|
|
321
|
+
/**
|
|
322
|
+
* Remove all chunks for a given file path
|
|
323
|
+
*/
|
|
324
|
+
removeByFilePath(filePath: string): string[];
|
|
325
|
+
/**
|
|
326
|
+
* Get all chunks for a given file path
|
|
327
|
+
*/
|
|
328
|
+
getByFilePath(filePath: string): Array<{
|
|
329
|
+
id: string;
|
|
330
|
+
metadata: StoredChunkMetadata;
|
|
331
|
+
}>;
|
|
332
|
+
/**
|
|
333
|
+
* Get chunk by content hash
|
|
334
|
+
*/
|
|
335
|
+
getByContentHash(contentHash: string): {
|
|
336
|
+
id: string;
|
|
337
|
+
metadata: StoredChunkMetadata;
|
|
338
|
+
} | null;
|
|
339
|
+
/**
|
|
340
|
+
* Get chunk at a specific location
|
|
341
|
+
*/
|
|
342
|
+
getAtLocation(filePath: string, line: number): {
|
|
343
|
+
id: string;
|
|
344
|
+
metadata: StoredChunkMetadata;
|
|
345
|
+
} | null;
|
|
346
|
+
/**
|
|
347
|
+
* Get all unique file paths
|
|
348
|
+
*/
|
|
349
|
+
getFilePaths(): string[];
|
|
350
|
+
/**
|
|
351
|
+
* Get number of chunks
|
|
352
|
+
*/
|
|
353
|
+
size(): number;
|
|
354
|
+
/**
|
|
355
|
+
* Clear all metadata
|
|
356
|
+
*/
|
|
357
|
+
clear(): void;
|
|
358
|
+
/**
|
|
359
|
+
* Iterate over all chunks
|
|
360
|
+
*/
|
|
361
|
+
entries(): IterableIterator<[string, StoredChunkMetadata]>;
|
|
362
|
+
/**
|
|
363
|
+
* Get all IDs
|
|
364
|
+
*/
|
|
365
|
+
getIds(): string[];
|
|
366
|
+
/**
|
|
367
|
+
* Save to disk
|
|
368
|
+
*/
|
|
369
|
+
save(dirPath: string): Promise<void>;
|
|
370
|
+
/**
|
|
371
|
+
* Load from disk
|
|
372
|
+
*/
|
|
373
|
+
load(dirPath: string): Promise<void>;
|
|
374
|
+
/**
|
|
375
|
+
* Filter chunks by kind
|
|
376
|
+
*/
|
|
377
|
+
filterByKind(kind: string): Array<{
|
|
378
|
+
id: string;
|
|
379
|
+
metadata: StoredChunkMetadata;
|
|
380
|
+
}>;
|
|
381
|
+
/**
|
|
382
|
+
* Search by name (case-insensitive partial match)
|
|
383
|
+
*/
|
|
384
|
+
searchByName(query: string): Array<{
|
|
385
|
+
id: string;
|
|
386
|
+
metadata: StoredChunkMetadata;
|
|
387
|
+
}>;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Incremental Indexer
|
|
392
|
+
*
|
|
393
|
+
* Combines chunker, embedding client, and storage to build and update
|
|
394
|
+
* the semantic index incrementally.
|
|
395
|
+
*/
|
|
396
|
+
|
|
397
|
+
interface IndexerOptions {
|
|
398
|
+
/** Embedding model to use */
|
|
399
|
+
model?: string;
|
|
400
|
+
/** Ollama server URL */
|
|
401
|
+
baseUrl?: string;
|
|
402
|
+
/** Glob patterns to include (default: **\/*.{ts,tsx,js,jsx}) */
|
|
403
|
+
include?: string[];
|
|
404
|
+
/** Glob patterns to exclude */
|
|
405
|
+
exclude?: string[];
|
|
406
|
+
/** Chunking options */
|
|
407
|
+
chunking?: ChunkingOptions;
|
|
408
|
+
/** Progress callback */
|
|
409
|
+
onProgress?: (message: string, current?: number, total?: number) => void;
|
|
410
|
+
}
|
|
411
|
+
interface IndexUpdateResult {
|
|
412
|
+
/** Number of files added */
|
|
413
|
+
added: number;
|
|
414
|
+
/** Number of files modified */
|
|
415
|
+
modified: number;
|
|
416
|
+
/** Number of files deleted */
|
|
417
|
+
deleted: number;
|
|
418
|
+
/** Total chunks in index */
|
|
419
|
+
totalChunks: number;
|
|
420
|
+
/** Duration in milliseconds */
|
|
421
|
+
duration: number;
|
|
422
|
+
}
|
|
423
|
+
declare class IncrementalIndexer {
|
|
424
|
+
private vectorStore;
|
|
425
|
+
private metadataStore;
|
|
426
|
+
private fileTracker;
|
|
427
|
+
private embeddingClient;
|
|
428
|
+
private projectRoot;
|
|
429
|
+
private indexDir;
|
|
430
|
+
private options;
|
|
431
|
+
private manifest;
|
|
432
|
+
constructor(projectRoot: string, options?: IndexerOptions);
|
|
433
|
+
/**
|
|
434
|
+
* Get the include patterns
|
|
435
|
+
*/
|
|
436
|
+
private getIncludePatterns;
|
|
437
|
+
/**
|
|
438
|
+
* Get the exclude patterns
|
|
439
|
+
*/
|
|
440
|
+
private getExcludePatterns;
|
|
441
|
+
/**
|
|
442
|
+
* Find all files to index
|
|
443
|
+
*/
|
|
444
|
+
private findFiles;
|
|
445
|
+
/**
|
|
446
|
+
* Load existing index from disk
|
|
447
|
+
*/
|
|
448
|
+
load(): Promise<void>;
|
|
449
|
+
/**
|
|
450
|
+
* Save index to disk
|
|
451
|
+
*/
|
|
452
|
+
save(): Promise<void>;
|
|
453
|
+
/**
|
|
454
|
+
* Index all files from scratch
|
|
455
|
+
*/
|
|
456
|
+
indexAll(force?: boolean): Promise<IndexUpdateResult>;
|
|
457
|
+
/**
|
|
458
|
+
* Update index incrementally
|
|
459
|
+
*/
|
|
460
|
+
update(): Promise<IndexUpdateResult>;
|
|
461
|
+
/**
|
|
462
|
+
* Process file changes
|
|
463
|
+
*/
|
|
464
|
+
private processChanges;
|
|
465
|
+
/**
|
|
466
|
+
* Get index statistics
|
|
467
|
+
*/
|
|
468
|
+
getStats(): {
|
|
469
|
+
totalFiles: number;
|
|
470
|
+
totalChunks: number;
|
|
471
|
+
indexSizeBytes: number;
|
|
472
|
+
manifest: IndexManifest | null;
|
|
473
|
+
};
|
|
474
|
+
/**
|
|
475
|
+
* Get the vector store (for queries)
|
|
476
|
+
*/
|
|
477
|
+
getVectorStore(): VectorStore;
|
|
478
|
+
/**
|
|
479
|
+
* Get the metadata store (for queries)
|
|
480
|
+
*/
|
|
481
|
+
getMetadataStore(): MetadataStore;
|
|
482
|
+
/**
|
|
483
|
+
* Check if index exists
|
|
484
|
+
*/
|
|
485
|
+
hasIndex(): boolean;
|
|
486
|
+
}
|
|
487
|
+
/**
|
|
488
|
+
* Create an indexer for a project
|
|
489
|
+
*/
|
|
490
|
+
declare function createIndexer(projectRoot: string, options?: IndexerOptions): IncrementalIndexer;
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Query API for semantic duplicate detection
|
|
494
|
+
*
|
|
495
|
+
* High-level API for indexing, finding duplicates, and semantic search.
|
|
496
|
+
*/
|
|
497
|
+
|
|
498
|
+
interface IndexOptions {
|
|
499
|
+
/** Embedding model to use */
|
|
500
|
+
model?: string;
|
|
501
|
+
/** Ollama server URL */
|
|
502
|
+
baseUrl?: string;
|
|
503
|
+
/** Glob patterns to exclude */
|
|
504
|
+
exclude?: string[];
|
|
505
|
+
/** Force reindex from scratch */
|
|
506
|
+
force?: boolean;
|
|
507
|
+
/** Progress callback */
|
|
508
|
+
onProgress?: (message: string, current?: number, total?: number) => void;
|
|
509
|
+
}
|
|
510
|
+
interface FindDuplicatesOptions$1 {
|
|
511
|
+
/** Path to search (defaults to current directory) */
|
|
512
|
+
path?: string;
|
|
513
|
+
/** Minimum similarity threshold (0-1). Default: 0.85 */
|
|
514
|
+
threshold?: number;
|
|
515
|
+
/** Minimum group size. Default: 2 */
|
|
516
|
+
minGroupSize?: number;
|
|
517
|
+
/** Filter by kind: component, hook, function */
|
|
518
|
+
kind?: ChunkKind;
|
|
519
|
+
}
|
|
520
|
+
interface SearchOptions {
|
|
521
|
+
/** Path to search (defaults to current directory) */
|
|
522
|
+
path?: string;
|
|
523
|
+
/** Number of results to return. Default: 10 */
|
|
524
|
+
top?: number;
|
|
525
|
+
/** Minimum similarity threshold. Default: 0.5 */
|
|
526
|
+
threshold?: number;
|
|
527
|
+
/** Embedding model to use */
|
|
528
|
+
model?: string;
|
|
529
|
+
/** Ollama server URL */
|
|
530
|
+
baseUrl?: string;
|
|
531
|
+
}
|
|
532
|
+
interface SimilarLocationOptions extends SearchOptions {
|
|
533
|
+
/** File path containing the code */
|
|
534
|
+
filePath: string;
|
|
535
|
+
/** Line number in the file */
|
|
536
|
+
line: number;
|
|
537
|
+
}
|
|
538
|
+
interface DuplicateGroupMember {
|
|
539
|
+
/** File path */
|
|
540
|
+
filePath: string;
|
|
541
|
+
/** Start line */
|
|
542
|
+
startLine: number;
|
|
543
|
+
/** End line */
|
|
544
|
+
endLine: number;
|
|
545
|
+
/** Chunk name (component/function/hook name) */
|
|
546
|
+
name: string | null;
|
|
547
|
+
/** Kind of code */
|
|
548
|
+
kind: ChunkKind;
|
|
549
|
+
/** Similarity score (1.0 for the reference member) */
|
|
550
|
+
score: number;
|
|
551
|
+
}
|
|
552
|
+
interface DuplicateGroup$1 {
|
|
553
|
+
/** Members of the duplicate group */
|
|
554
|
+
members: DuplicateGroupMember[];
|
|
555
|
+
/** Average similarity between all group members */
|
|
556
|
+
avgSimilarity: number;
|
|
557
|
+
/** The kind of code in this group */
|
|
558
|
+
kind: ChunkKind;
|
|
559
|
+
}
|
|
560
|
+
interface SearchResult {
|
|
561
|
+
/** File path */
|
|
562
|
+
filePath: string;
|
|
563
|
+
/** Start line */
|
|
564
|
+
startLine: number;
|
|
565
|
+
/** End line */
|
|
566
|
+
endLine: number;
|
|
567
|
+
/** Chunk name */
|
|
568
|
+
name: string | null;
|
|
569
|
+
/** Kind of code */
|
|
570
|
+
kind: ChunkKind;
|
|
571
|
+
/** Similarity score */
|
|
572
|
+
score: number;
|
|
573
|
+
}
|
|
574
|
+
/**
|
|
575
|
+
* Clear the indexer cache for a path.
|
|
576
|
+
*/
|
|
577
|
+
declare function clearIndexerCache(path?: string): void;
|
|
578
|
+
/**
|
|
579
|
+
* Index a directory for semantic duplicate detection.
|
|
580
|
+
* Creates or updates the index at .uilint/.duplicates-index/
|
|
581
|
+
*/
|
|
582
|
+
declare function indexDirectory(path: string, options?: IndexOptions): Promise<IndexUpdateResult>;
|
|
583
|
+
/**
|
|
584
|
+
* Find semantic duplicate groups in the indexed codebase.
|
|
585
|
+
*/
|
|
586
|
+
declare function findDuplicates(options?: FindDuplicatesOptions$1): Promise<DuplicateGroup$1[]>;
|
|
587
|
+
/**
|
|
588
|
+
* Search for code semantically similar to a text query.
|
|
589
|
+
*/
|
|
590
|
+
declare function searchSimilar(query: string, options?: SearchOptions): Promise<SearchResult[]>;
|
|
591
|
+
/**
|
|
592
|
+
* Find code similar to a specific location (file:line).
|
|
593
|
+
*/
|
|
594
|
+
declare function findSimilarAtLocation(options: SimilarLocationOptions): Promise<SearchResult[]>;
|
|
595
|
+
/**
|
|
596
|
+
* Check if an index exists for the given path.
|
|
597
|
+
*/
|
|
598
|
+
declare function hasIndex(path?: string): boolean;
|
|
599
|
+
/**
|
|
600
|
+
* Get index statistics.
|
|
601
|
+
*/
|
|
602
|
+
declare function getIndexStats(path?: string): Promise<{
|
|
603
|
+
totalFiles: number;
|
|
604
|
+
totalChunks: number;
|
|
605
|
+
indexSizeBytes: number;
|
|
606
|
+
embeddingModel: string | null;
|
|
607
|
+
lastUpdated: string | null;
|
|
608
|
+
}>;
|
|
609
|
+
|
|
610
|
+
/**
|
|
611
|
+
* Duplicate Finder
|
|
612
|
+
*
|
|
613
|
+
* Finds groups of semantically similar code chunks using the vector index.
|
|
614
|
+
*/
|
|
615
|
+
|
|
616
|
+
interface DuplicateMember {
|
|
617
|
+
/** Chunk ID */
|
|
618
|
+
id: string;
|
|
619
|
+
/** Chunk metadata */
|
|
620
|
+
metadata: StoredChunkMetadata;
|
|
621
|
+
/** Similarity score to the group centroid/first member */
|
|
622
|
+
score: number;
|
|
623
|
+
}
|
|
624
|
+
interface DuplicateGroup {
|
|
625
|
+
/** Members of the duplicate group */
|
|
626
|
+
members: DuplicateMember[];
|
|
627
|
+
/** Average similarity between all group members */
|
|
628
|
+
avgSimilarity: number;
|
|
629
|
+
/** The kind of code in this group (component, hook, function) */
|
|
630
|
+
kind: ChunkKind;
|
|
631
|
+
}
|
|
632
|
+
interface FindDuplicatesOptions {
|
|
633
|
+
/** Minimum cosine similarity threshold (0-1). Default: 0.85 */
|
|
634
|
+
threshold?: number;
|
|
635
|
+
/** Minimum group size. Default: 2 */
|
|
636
|
+
minGroupSize?: number;
|
|
637
|
+
/** Filter by chunk kind */
|
|
638
|
+
kind?: ChunkKind;
|
|
639
|
+
/** Exclude specific file paths */
|
|
640
|
+
excludePaths?: string[];
|
|
641
|
+
}
|
|
642
|
+
/**
|
|
643
|
+
* Find groups of semantically similar code.
|
|
644
|
+
*
|
|
645
|
+
* Algorithm:
|
|
646
|
+
* 1. Iterate through all chunks
|
|
647
|
+
* 2. For each unprocessed chunk, find similar chunks above threshold
|
|
648
|
+
* 3. Group similar chunks together
|
|
649
|
+
* 4. Mark all grouped chunks as processed
|
|
650
|
+
* 5. Sort groups by size and similarity
|
|
651
|
+
*/
|
|
652
|
+
declare function findDuplicateGroups(vectorStore: VectorStore, metadataStore: MetadataStore, options?: FindDuplicatesOptions): DuplicateGroup[];
|
|
653
|
+
/**
|
|
654
|
+
* Find similar code to a given location (file:line).
|
|
655
|
+
*/
|
|
656
|
+
declare function findSimilarToLocation(vectorStore: VectorStore, metadataStore: MetadataStore, filePath: string, line: number, options?: {
|
|
657
|
+
top?: number;
|
|
658
|
+
threshold?: number;
|
|
659
|
+
}): SimilarityResult[];
|
|
660
|
+
/**
|
|
661
|
+
* Search for code similar to a text query.
|
|
662
|
+
* Requires embedding the query first.
|
|
663
|
+
*/
|
|
664
|
+
declare function findSimilarToQuery(vectorStore: VectorStore, queryEmbedding: number[], options?: {
|
|
665
|
+
top?: number;
|
|
666
|
+
threshold?: number;
|
|
667
|
+
}): SimilarityResult[];
|
|
668
|
+
|
|
669
|
+
/**
|
|
670
|
+
* Duplicate Scorer
|
|
671
|
+
*
|
|
672
|
+
* Provides scoring functions for ranking duplicate code groups.
|
|
673
|
+
*/
|
|
674
|
+
|
|
675
|
+
interface DuplicateScore {
|
|
676
|
+
/** Embedding cosine similarity (0-1) */
|
|
677
|
+
similarity: number;
|
|
678
|
+
/** Ratio of code size similarity (0-1) */
|
|
679
|
+
sizeRatio: number;
|
|
680
|
+
/** Weighted combined score */
|
|
681
|
+
combinedScore: number;
|
|
682
|
+
}
|
|
683
|
+
/**
|
|
684
|
+
* Calculate the size ratio between two code chunks.
|
|
685
|
+
* Returns a value between 0 and 1 where 1 means identical size.
|
|
686
|
+
*/
|
|
687
|
+
declare function calculateSizeRatio(chunk1: StoredChunkMetadata, chunk2: StoredChunkMetadata): number;
|
|
688
|
+
/**
|
|
689
|
+
* Calculate a combined duplicate score.
|
|
690
|
+
*/
|
|
691
|
+
declare function calculateDuplicateScore(similarity: number, chunk1: StoredChunkMetadata, chunk2: StoredChunkMetadata): DuplicateScore;
|
|
692
|
+
/**
|
|
693
|
+
* Calculate the average similarity of a duplicate group.
|
|
694
|
+
*/
|
|
695
|
+
declare function calculateGroupAverageSimilarity(similarities: number[]): number;
|
|
696
|
+
/**
|
|
697
|
+
* Sort duplicate groups by relevance.
|
|
698
|
+
* Groups are sorted by: member count (desc), then average similarity (desc).
|
|
699
|
+
*/
|
|
700
|
+
declare function sortDuplicateGroups<T extends {
|
|
701
|
+
avgSimilarity: number;
|
|
702
|
+
members: unknown[];
|
|
703
|
+
}>(groups: T[]): T[];
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* File Tracker
|
|
707
|
+
*
|
|
708
|
+
* Tracks file content hashes for incremental updates.
|
|
709
|
+
* Uses xxhash for fast hashing (following uilint-eslint patterns).
|
|
710
|
+
*/
|
|
711
|
+
/**
|
|
712
|
+
* Hash content using xxhash (async) or djb2 (sync fallback)
|
|
713
|
+
*/
|
|
714
|
+
declare function hashContent(content: string): Promise<string>;
|
|
715
|
+
/**
|
|
716
|
+
* Synchronous hash for when async is not possible
|
|
717
|
+
*/
|
|
718
|
+
declare function hashContentSync(content: string): string;
|
|
719
|
+
interface FileHashEntry {
|
|
720
|
+
/** xxhash of file content */
|
|
721
|
+
contentHash: string;
|
|
722
|
+
/** Last modification time in ms */
|
|
723
|
+
mtimeMs: number;
|
|
724
|
+
/** IDs of chunks from this file */
|
|
725
|
+
chunkIds: string[];
|
|
726
|
+
}
|
|
727
|
+
interface HashStore {
|
|
728
|
+
version: number;
|
|
729
|
+
files: Record<string, FileHashEntry>;
|
|
730
|
+
}
|
|
731
|
+
interface FileChange {
|
|
732
|
+
path: string;
|
|
733
|
+
type: "added" | "modified" | "deleted";
|
|
734
|
+
oldHash?: string;
|
|
735
|
+
newHash?: string;
|
|
736
|
+
}
|
|
737
|
+
declare class FileTracker {
|
|
738
|
+
private store;
|
|
739
|
+
/**
|
|
740
|
+
* Get the hash entry for a file
|
|
741
|
+
*/
|
|
742
|
+
getEntry(filePath: string): FileHashEntry | null;
|
|
743
|
+
/**
|
|
744
|
+
* Set the hash entry for a file
|
|
745
|
+
*/
|
|
746
|
+
setEntry(filePath: string, entry: FileHashEntry): void;
|
|
747
|
+
/**
|
|
748
|
+
* Remove the hash entry for a file
|
|
749
|
+
*/
|
|
750
|
+
removeEntry(filePath: string): boolean;
|
|
751
|
+
/**
|
|
752
|
+
* Get all tracked file paths
|
|
753
|
+
*/
|
|
754
|
+
getTrackedFiles(): string[];
|
|
755
|
+
/**
|
|
756
|
+
* Clear all entries
|
|
757
|
+
*/
|
|
758
|
+
clear(): void;
|
|
759
|
+
/**
|
|
760
|
+
* Detect changes between current files and stored hashes
|
|
761
|
+
*/
|
|
762
|
+
detectChanges(files: string[]): Promise<FileChange[]>;
|
|
763
|
+
/**
|
|
764
|
+
* Update stored hash for a file
|
|
765
|
+
*/
|
|
766
|
+
updateFile(filePath: string, content: string, chunkIds: string[]): Promise<void>;
|
|
767
|
+
/**
|
|
768
|
+
* Save to disk
|
|
769
|
+
*/
|
|
770
|
+
save(dirPath: string): Promise<void>;
|
|
771
|
+
/**
|
|
772
|
+
* Load from disk
|
|
773
|
+
*/
|
|
774
|
+
load(dirPath: string): Promise<void>;
|
|
775
|
+
/**
|
|
776
|
+
* Get stats
|
|
777
|
+
*/
|
|
778
|
+
getStats(): {
|
|
779
|
+
trackedFiles: number;
|
|
780
|
+
};
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
export { type ChunkKind, type ChunkMetadata, type ChunkingOptions, type CodeChunk, type DuplicateGroup$1 as DuplicateGroup, type DuplicateGroupMember, type DuplicateMember, type DuplicateScore, type EmbeddingOptions, type EmbeddingResult, type FileChange, type FileHashEntry, FileTracker, type FindDuplicatesOptions$1 as FindDuplicatesOptions, type HashStore, IncrementalIndexer, type IndexManifest, type IndexOptions, type IndexUpdateResult, type IndexerOptions, type DuplicateGroup as InternalDuplicateGroup, type FindDuplicatesOptions as InternalFindDuplicatesOptions, MetadataStore, OllamaEmbeddingClient, type SearchOptions, type SearchResult, type SimilarLocationOptions, type SimilarityResult, type StoredChunkMetadata, VectorStore, type VectorStoreOptions, calculateDuplicateScore, calculateGroupAverageSimilarity, calculateSizeRatio, chunkFile, clearIndexerCache, createIndexer, findDuplicateGroups, findDuplicates, findSimilarAtLocation, findSimilarToLocation, findSimilarToQuery, getIndexStats, getOllamaEmbeddingClient, hasIndex, hashContent, hashContentSync, indexDirectory, prepareEmbeddingInput, searchSimilar, sortDuplicateGroups };
|