claude-eidetic 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +333 -0
- package/dist/config.d.ts +25 -0
- package/dist/config.js +29 -14
- package/dist/core/cleanup.d.ts +8 -0
- package/dist/core/cleanup.js +41 -0
- package/dist/core/doc-indexer.d.ts +13 -0
- package/dist/core/doc-indexer.js +76 -0
- package/dist/core/doc-searcher.d.ts +13 -0
- package/dist/core/doc-searcher.js +65 -0
- package/dist/core/file-category.d.ts +7 -0
- package/dist/core/file-category.js +75 -0
- package/dist/core/indexer.js +12 -4
- package/dist/core/preview.d.ts +1 -2
- package/dist/core/preview.js +2 -5
- package/dist/core/repo-map.d.ts +33 -0
- package/dist/core/repo-map.js +144 -0
- package/dist/core/searcher.d.ts +1 -13
- package/dist/core/searcher.js +20 -24
- package/dist/core/snapshot-io.js +2 -2
- package/dist/core/sync.d.ts +5 -25
- package/dist/core/sync.js +90 -65
- package/dist/core/targeted-indexer.d.ts +19 -0
- package/dist/core/targeted-indexer.js +127 -0
- package/dist/embedding/factory.d.ts +0 -13
- package/dist/embedding/factory.js +0 -17
- package/dist/embedding/openai.d.ts +2 -14
- package/dist/embedding/openai.js +7 -20
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +2 -0
- package/dist/format.d.ts +12 -0
- package/dist/format.js +160 -31
- package/dist/hooks/post-tool-use.d.ts +13 -0
- package/dist/hooks/post-tool-use.js +113 -0
- package/dist/hooks/stop-hook.d.ts +11 -0
- package/dist/hooks/stop-hook.js +121 -0
- package/dist/hooks/targeted-runner.d.ts +11 -0
- package/dist/hooks/targeted-runner.js +66 -0
- package/dist/index.js +102 -24
- package/dist/infra/qdrant-bootstrap.js +14 -12
- package/dist/memory/history.d.ts +19 -0
- package/dist/memory/history.js +40 -0
- package/dist/memory/llm.d.ts +2 -0
- package/dist/memory/llm.js +56 -0
- package/dist/memory/prompts.d.ts +5 -0
- package/dist/memory/prompts.js +36 -0
- package/dist/memory/reconciler.d.ts +12 -0
- package/dist/memory/reconciler.js +36 -0
- package/dist/memory/store.d.ts +20 -0
- package/dist/memory/store.js +206 -0
- package/dist/memory/types.d.ts +28 -0
- package/dist/memory/types.js +2 -0
- package/dist/paths.d.ts +3 -4
- package/dist/paths.js +14 -4
- package/dist/precompact/hook.d.ts +9 -0
- package/dist/precompact/hook.js +170 -0
- package/dist/precompact/index-runner.d.ts +9 -0
- package/dist/precompact/index-runner.js +52 -0
- package/dist/precompact/note-writer.d.ts +15 -0
- package/dist/precompact/note-writer.js +109 -0
- package/dist/precompact/session-indexer.d.ts +13 -0
- package/dist/precompact/session-indexer.js +31 -0
- package/dist/precompact/tier0-inject.d.ts +16 -0
- package/dist/precompact/tier0-inject.js +88 -0
- package/dist/precompact/tier0-writer.d.ts +16 -0
- package/dist/precompact/tier0-writer.js +74 -0
- package/dist/precompact/transcript-parser.d.ts +10 -0
- package/dist/precompact/transcript-parser.js +148 -0
- package/dist/precompact/types.d.ts +93 -0
- package/dist/precompact/types.js +5 -0
- package/dist/precompact/utils.d.ts +29 -0
- package/dist/precompact/utils.js +95 -0
- package/dist/setup-message.d.ts +3 -0
- package/dist/setup-message.js +42 -0
- package/dist/splitter/ast.js +84 -22
- package/dist/splitter/line.d.ts +0 -4
- package/dist/splitter/line.js +1 -7
- package/dist/splitter/symbol-extract.d.ts +16 -0
- package/dist/splitter/symbol-extract.js +61 -0
- package/dist/splitter/types.d.ts +5 -0
- package/dist/splitter/types.js +1 -1
- package/dist/state/doc-metadata.d.ts +18 -0
- package/dist/state/doc-metadata.js +59 -0
- package/dist/state/registry.d.ts +1 -3
- package/dist/state/snapshot.d.ts +0 -1
- package/dist/state/snapshot.js +3 -19
- package/dist/tool-schemas.d.ts +251 -1
- package/dist/tool-schemas.js +307 -0
- package/dist/tools.d.ts +69 -0
- package/dist/tools.js +286 -17
- package/dist/vectordb/milvus.d.ts +7 -5
- package/dist/vectordb/milvus.js +116 -19
- package/dist/vectordb/qdrant.d.ts +8 -10
- package/dist/vectordb/qdrant.js +105 -33
- package/dist/vectordb/types.d.ts +20 -0
- package/messages.yaml +50 -0
- package/package.json +31 -6
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { docCollectionName } from '../paths.js';
|
|
2
|
+
import { loadDocMetadata, isStale } from '../state/doc-metadata.js';
|
|
3
|
+
import { SearchError } from '../errors.js';
|
|
4
|
+
import { deduplicateResults } from './searcher.js';
|
|
5
|
+
const DEFAULT_LIMIT = 5;
|
|
6
|
+
const MAX_LIMIT = 20;
|
|
7
|
+
export async function searchDocuments(query, embedding, vectordb, options = {}) {
|
|
8
|
+
if (!query || query.trim().length === 0) {
|
|
9
|
+
throw new SearchError('Search query is required.');
|
|
10
|
+
}
|
|
11
|
+
const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
|
|
12
|
+
const metadata = loadDocMetadata();
|
|
13
|
+
let collectionsToSearch;
|
|
14
|
+
if (options.library) {
|
|
15
|
+
const collection = docCollectionName(options.library);
|
|
16
|
+
const entries = Object.values(metadata).filter((e) => e.collectionName === collection);
|
|
17
|
+
if (entries.length === 0) {
|
|
18
|
+
throw new SearchError(`No cached documentation found for library "${options.library}". ` +
|
|
19
|
+
`Use index_document to cache documentation first.`);
|
|
20
|
+
}
|
|
21
|
+
collectionsToSearch = [{ collection, entries }];
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
const collectionMap = new Map();
|
|
25
|
+
for (const entry of Object.values(metadata)) {
|
|
26
|
+
const existing = collectionMap.get(entry.collectionName) ?? [];
|
|
27
|
+
existing.push(entry);
|
|
28
|
+
collectionMap.set(entry.collectionName, existing);
|
|
29
|
+
}
|
|
30
|
+
if (collectionMap.size === 0) {
|
|
31
|
+
throw new SearchError('No cached documentation found. Use index_document to cache documentation first.');
|
|
32
|
+
}
|
|
33
|
+
collectionsToSearch = [...collectionMap.entries()].map(([collection, entries]) => ({
|
|
34
|
+
collection,
|
|
35
|
+
entries,
|
|
36
|
+
}));
|
|
37
|
+
}
|
|
38
|
+
const queryVector = await embedding.embed(query);
|
|
39
|
+
const overFetchLimit = Math.min(limit * 3, MAX_LIMIT);
|
|
40
|
+
const allResults = [];
|
|
41
|
+
for (const { collection, entries } of collectionsToSearch) {
|
|
42
|
+
const exists = await vectordb.hasCollection(collection);
|
|
43
|
+
if (!exists)
|
|
44
|
+
continue;
|
|
45
|
+
const results = await vectordb.search(collection, {
|
|
46
|
+
queryVector,
|
|
47
|
+
queryText: query,
|
|
48
|
+
limit: overFetchLimit,
|
|
49
|
+
});
|
|
50
|
+
for (const r of results) {
|
|
51
|
+
const matchingEntry = entries.find((e) => e.source === r.relativePath);
|
|
52
|
+
allResults.push({
|
|
53
|
+
...r,
|
|
54
|
+
library: matchingEntry?.library ?? 'unknown',
|
|
55
|
+
topic: matchingEntry?.topic ?? 'unknown',
|
|
56
|
+
source: r.relativePath,
|
|
57
|
+
stale: matchingEntry ? isStale(matchingEntry) : false,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
allResults.sort((a, b) => b.score - a.score);
|
|
62
|
+
const deduped = deduplicateResults(allResults, limit);
|
|
63
|
+
return deduped.map((r) => r);
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=doc-searcher.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export type FileCategory = 'source' | 'test' | 'doc' | 'config' | 'generated';
|
|
2
|
+
/**
|
|
3
|
+
* Classify a file by category based on its relative path.
|
|
4
|
+
* First match wins.
|
|
5
|
+
*/
|
|
6
|
+
export declare function classifyFileCategory(relativePath: string): FileCategory;
|
|
7
|
+
//# sourceMappingURL=file-category.d.ts.map
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Classify a file by category based on its relative path.
|
|
3
|
+
* First match wins.
|
|
4
|
+
*/
|
|
5
|
+
export function classifyFileCategory(relativePath) {
|
|
6
|
+
const normalized = relativePath.replace(/\\/g, '/');
|
|
7
|
+
const segments = normalized.split('/');
|
|
8
|
+
const filename = segments[segments.length - 1];
|
|
9
|
+
const lower = normalized.toLowerCase();
|
|
10
|
+
const filenameLower = filename.toLowerCase();
|
|
11
|
+
// test
|
|
12
|
+
if (lower.includes('/__tests__/') ||
|
|
13
|
+
lower.includes('.test.') ||
|
|
14
|
+
lower.includes('.spec.') ||
|
|
15
|
+
lower.includes('_test.') ||
|
|
16
|
+
lower.includes('_spec.') ||
|
|
17
|
+
filenameLower.startsWith('test_') ||
|
|
18
|
+
filenameLower.startsWith('test-')) {
|
|
19
|
+
return 'test';
|
|
20
|
+
}
|
|
21
|
+
// doc
|
|
22
|
+
const ext = filename.includes('.') ? filename.slice(filename.lastIndexOf('.')).toLowerCase() : '';
|
|
23
|
+
if (['.md', '.mdx', '.rst', '.txt'].includes(ext) ||
|
|
24
|
+
segments.some((s) => s.toLowerCase() === 'docs' || s.toLowerCase() === 'doc') ||
|
|
25
|
+
/^readme/i.test(filename) ||
|
|
26
|
+
/^changelog/i.test(filename) ||
|
|
27
|
+
/^license/i.test(filename)) {
|
|
28
|
+
return 'doc';
|
|
29
|
+
}
|
|
30
|
+
// generated
|
|
31
|
+
if (lower.includes('/dist/') ||
|
|
32
|
+
lower.startsWith('dist/') ||
|
|
33
|
+
lower.includes('/build/') ||
|
|
34
|
+
lower.startsWith('build/') ||
|
|
35
|
+
lower.includes('/generated/') ||
|
|
36
|
+
lower.startsWith('generated/') ||
|
|
37
|
+
lower.includes('.generated.') ||
|
|
38
|
+
/\.[gG]\./.test(filename)) {
|
|
39
|
+
return 'generated';
|
|
40
|
+
}
|
|
41
|
+
// config
|
|
42
|
+
if (isConfigFile(normalized, filename, ext, segments)) {
|
|
43
|
+
return 'config';
|
|
44
|
+
}
|
|
45
|
+
return 'source';
|
|
46
|
+
}
|
|
47
|
+
function isConfigFile(normalized, filename, ext, segments) {
|
|
48
|
+
const filenameLower = filename.toLowerCase();
|
|
49
|
+
// Explicit filename matches
|
|
50
|
+
if (filenameLower === 'package.json')
|
|
51
|
+
return true;
|
|
52
|
+
if (filenameLower === 'makefile')
|
|
53
|
+
return true;
|
|
54
|
+
if (filenameLower === 'dockerfile')
|
|
55
|
+
return true;
|
|
56
|
+
if (/^tsconfig.*\.json$/.test(filenameLower))
|
|
57
|
+
return true;
|
|
58
|
+
if (filenameLower.startsWith('docker-compose'))
|
|
59
|
+
return true;
|
|
60
|
+
if (filenameLower.startsWith('.eslintrc'))
|
|
61
|
+
return true;
|
|
62
|
+
if (filenameLower.startsWith('.prettierrc'))
|
|
63
|
+
return true;
|
|
64
|
+
// *.config.* pattern
|
|
65
|
+
if (filename.includes('.config.'))
|
|
66
|
+
return true;
|
|
67
|
+
// .yaml/.yml/.toml not under src/
|
|
68
|
+
if (['.yaml', '.yml', '.toml'].includes(ext)) {
|
|
69
|
+
const underSrc = segments.some((s) => s.toLowerCase() === 'src');
|
|
70
|
+
if (!underSrc)
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
//# sourceMappingURL=file-category.js.map
|
package/dist/core/indexer.js
CHANGED
|
@@ -7,6 +7,7 @@ import { scanFiles, buildSnapshot, diffSnapshots, extensionToLanguage } from './
|
|
|
7
7
|
import { getConfig } from '../config.js';
|
|
8
8
|
import { normalizePath, pathToCollectionName } from '../paths.js';
|
|
9
9
|
import { IndexingError } from '../errors.js';
|
|
10
|
+
import { classifyFileCategory } from './file-category.js';
|
|
10
11
|
import { loadSnapshot, saveSnapshot } from './snapshot-io.js';
|
|
11
12
|
export { previewCodebase } from './preview.js';
|
|
12
13
|
export { saveSnapshot, deleteSnapshot, snapshotExists } from './snapshot-io.js';
|
|
@@ -75,7 +76,9 @@ export async function indexCodebase(rootPath, embedding, vectordb, force = false
|
|
|
75
76
|
const concurrency = config.indexingConcurrency;
|
|
76
77
|
for (let i = 0; i < filesToProcess.length; i += concurrency) {
|
|
77
78
|
const batch = filesToProcess.slice(i, i + concurrency);
|
|
78
|
-
const batchResults = await Promise.all(
|
|
79
|
+
const batchResults = await Promise.all(
|
|
80
|
+
// eslint-disable-next-line @typescript-eslint/require-await
|
|
81
|
+
batch.map(async (relPath) => {
|
|
79
82
|
const fullPath = path.join(normalizedPath, relPath);
|
|
80
83
|
try {
|
|
81
84
|
const code = fs.readFileSync(fullPath, 'utf-8');
|
|
@@ -92,7 +95,7 @@ export async function indexCodebase(rootPath, embedding, vectordb, force = false
|
|
|
92
95
|
return { chunks, failed: false };
|
|
93
96
|
}
|
|
94
97
|
catch (err) {
|
|
95
|
-
console.warn(`Failed to process "${relPath}"
|
|
98
|
+
console.warn(`Failed to process "${relPath}":`, err);
|
|
96
99
|
return { chunks: [], failed: true };
|
|
97
100
|
}
|
|
98
101
|
}));
|
|
@@ -122,7 +125,7 @@ export async function indexCodebase(rootPath, embedding, vectordb, force = false
|
|
|
122
125
|
parseFailures,
|
|
123
126
|
};
|
|
124
127
|
}
|
|
125
|
-
const chunkTexts = allChunks.map(c => c.content);
|
|
128
|
+
const chunkTexts = allChunks.map((c) => c.content);
|
|
126
129
|
const estimation = embedding.estimateTokens(chunkTexts);
|
|
127
130
|
console.log(`Indexing ${filesToProcess.length} files -> ${allChunks.length} chunks -> ` +
|
|
128
131
|
`~${(estimation.estimatedTokens / 1000).toFixed(0)}K tokens (~$${estimation.estimatedCostUsd.toFixed(4)})`);
|
|
@@ -130,7 +133,7 @@ export async function indexCodebase(rootPath, embedding, vectordb, force = false
|
|
|
130
133
|
let processedChunks = 0;
|
|
131
134
|
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
132
135
|
const batch = allChunks.slice(i, i + batchSize);
|
|
133
|
-
const texts = batch.map(c => c.content);
|
|
136
|
+
const texts = batch.map((c) => c.content);
|
|
134
137
|
const pct = 10 + Math.round((i / allChunks.length) * 85);
|
|
135
138
|
onProgress?.(pct, `Embedding batch ${Math.floor(i / batchSize) + 1}/${Math.ceil(allChunks.length / batchSize)}...`);
|
|
136
139
|
const vectors = await embedding.embedBatch(texts);
|
|
@@ -146,6 +149,11 @@ export async function indexCodebase(rootPath, embedding, vectordb, force = false
|
|
|
146
149
|
endLine: chunk.endLine,
|
|
147
150
|
fileExtension: path.extname(chunk.filePath),
|
|
148
151
|
language: chunk.language,
|
|
152
|
+
fileCategory: classifyFileCategory(chunk.filePath),
|
|
153
|
+
symbolName: chunk.symbolName,
|
|
154
|
+
symbolKind: chunk.symbolKind,
|
|
155
|
+
symbolSignature: chunk.symbolSignature,
|
|
156
|
+
parentSymbol: chunk.parentSymbol,
|
|
149
157
|
}));
|
|
150
158
|
await vectordb.insert(collectionName, documents);
|
|
151
159
|
processedChunks += batch.length;
|
package/dist/core/preview.d.ts
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import type { Embedding } from '../embedding/types.js';
|
|
2
1
|
export interface PreviewResult {
|
|
3
2
|
totalFiles: number;
|
|
4
3
|
byExtension: Record<string, number>;
|
|
@@ -10,5 +9,5 @@ export interface PreviewResult {
|
|
|
10
9
|
estimatedCostUsd: number;
|
|
11
10
|
warnings: string[];
|
|
12
11
|
}
|
|
13
|
-
export declare function previewCodebase(rootPath: string,
|
|
12
|
+
export declare function previewCodebase(rootPath: string, customExtensions?: string[], customIgnorePatterns?: string[]): Promise<PreviewResult>;
|
|
14
13
|
//# sourceMappingURL=preview.d.ts.map
|
package/dist/core/preview.js
CHANGED
|
@@ -2,7 +2,7 @@ import fs from 'node:fs';
|
|
|
2
2
|
import path from 'node:path';
|
|
3
3
|
import { scanFiles } from './sync.js';
|
|
4
4
|
import { normalizePath } from '../paths.js';
|
|
5
|
-
export async function previewCodebase(rootPath,
|
|
5
|
+
export async function previewCodebase(rootPath, customExtensions, customIgnorePatterns) {
|
|
6
6
|
const normalizedPath = normalizePath(rootPath);
|
|
7
7
|
const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
|
|
8
8
|
const byExtension = {};
|
|
@@ -20,7 +20,6 @@ export async function previewCodebase(rootPath, embedding, customExtensions, cus
|
|
|
20
20
|
.map(([dir, count]) => ({ dir, count }))
|
|
21
21
|
.sort((a, b) => b.count - a.count)
|
|
22
22
|
.slice(0, 10);
|
|
23
|
-
// Estimate tokens from file sizes (rough: sum sizes / 4 chars-per-token)
|
|
24
23
|
let totalBytes = 0;
|
|
25
24
|
for (const f of filePaths) {
|
|
26
25
|
try {
|
|
@@ -28,11 +27,9 @@ export async function previewCodebase(rootPath, embedding, customExtensions, cus
|
|
|
28
27
|
totalBytes += stat.size;
|
|
29
28
|
}
|
|
30
29
|
catch {
|
|
31
|
-
//
|
|
30
|
+
// file may have disappeared between scan and stat
|
|
32
31
|
}
|
|
33
32
|
}
|
|
34
|
-
// Conservative estimate: ~3-4 chars per token for code.
|
|
35
|
-
// May underestimate for dense code; will be refined during actual indexing.
|
|
36
33
|
const estimatedTokens = Math.ceil(totalBytes / 3);
|
|
37
34
|
const estimatedCostUsd = (estimatedTokens / 1_000_000) * 0.02;
|
|
38
35
|
const warnings = [];
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { VectorDB, SymbolEntry } from '../vectordb/types.js';
|
|
2
|
+
export interface RepoMapOptions {
|
|
3
|
+
pathFilter?: string;
|
|
4
|
+
kindFilter?: string;
|
|
5
|
+
maxTokens?: number;
|
|
6
|
+
}
|
|
7
|
+
export interface ListSymbolsOptions {
|
|
8
|
+
pathFilter?: string;
|
|
9
|
+
kindFilter?: string;
|
|
10
|
+
nameFilter?: string;
|
|
11
|
+
}
|
|
12
|
+
export interface SymbolSource {
|
|
13
|
+
getSymbols(collectionName: string, options?: RepoMapOptions | ListSymbolsOptions): Promise<SymbolEntry[]>;
|
|
14
|
+
}
|
|
15
|
+
export declare class VectorDBSymbolSource implements SymbolSource {
|
|
16
|
+
private vectordb;
|
|
17
|
+
constructor(vectordb: VectorDB);
|
|
18
|
+
getSymbols(collectionName: string, options?: RepoMapOptions | ListSymbolsOptions): Promise<SymbolEntry[]>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Convert a glob-like pattern to a regex for path filtering.
|
|
22
|
+
* Supports * (non-separator) and ** (any path segment).
|
|
23
|
+
*/
|
|
24
|
+
export declare function matchesPathFilter(relativePath: string, pattern: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Generate a structured repo map grouped by file, with methods nested under classes.
|
|
27
|
+
*/
|
|
28
|
+
export declare function generateRepoMap(rootPath: string, source: SymbolSource, options?: RepoMapOptions): Promise<string>;
|
|
29
|
+
/**
|
|
30
|
+
* List symbols as a compact Name|Kind|Location table.
|
|
31
|
+
*/
|
|
32
|
+
export declare function listSymbolsTable(rootPath: string, source: SymbolSource, options?: ListSymbolsOptions): Promise<string>;
|
|
33
|
+
//# sourceMappingURL=repo-map.d.ts.map
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { pathToCollectionName } from '../paths.js';
|
|
2
|
+
export class VectorDBSymbolSource {
|
|
3
|
+
vectordb;
|
|
4
|
+
constructor(vectordb) {
|
|
5
|
+
this.vectordb = vectordb;
|
|
6
|
+
}
|
|
7
|
+
async getSymbols(collectionName, options) {
|
|
8
|
+
const all = await this.vectordb.listSymbols(collectionName);
|
|
9
|
+
let result = all;
|
|
10
|
+
const pathFilter = options?.pathFilter;
|
|
11
|
+
if (pathFilter) {
|
|
12
|
+
result = result.filter((s) => matchesPathFilter(s.relativePath, pathFilter));
|
|
13
|
+
}
|
|
14
|
+
const kindFilter = options?.kindFilter;
|
|
15
|
+
if (kindFilter) {
|
|
16
|
+
const kind = kindFilter.toLowerCase();
|
|
17
|
+
result = result.filter((s) => s.kind.toLowerCase() === kind);
|
|
18
|
+
}
|
|
19
|
+
const nameFilter = options?.nameFilter;
|
|
20
|
+
if (nameFilter) {
|
|
21
|
+
const lower = nameFilter.toLowerCase();
|
|
22
|
+
result = result.filter((s) => s.name.toLowerCase().includes(lower));
|
|
23
|
+
}
|
|
24
|
+
return result;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Convert a glob-like pattern to a regex for path filtering.
|
|
29
|
+
* Supports * (non-separator) and ** (any path segment).
|
|
30
|
+
*/
|
|
31
|
+
export function matchesPathFilter(relativePath, pattern) {
|
|
32
|
+
// Escape regex special chars except * which we handle
|
|
33
|
+
const escaped = pattern
|
|
34
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
35
|
+
.replace(/\*\*/g, '\uFFFD') // placeholder for **
|
|
36
|
+
.replace(/\*/g, '[^/]*')
|
|
37
|
+
.replace(/\uFFFD/g, '.*');
|
|
38
|
+
const regex = new RegExp(`^${escaped}$`);
|
|
39
|
+
return regex.test(relativePath);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Deduplicate symbol entries: prefer those with a signature, then by first occurrence.
|
|
43
|
+
*/
|
|
44
|
+
function deduplicateSymbols(symbols) {
|
|
45
|
+
const seen = new Map();
|
|
46
|
+
for (const sym of symbols) {
|
|
47
|
+
const key = `${sym.relativePath}:${sym.name}:${sym.kind}`;
|
|
48
|
+
const existing = seen.get(key);
|
|
49
|
+
if (!existing || (!existing.signature && sym.signature)) {
|
|
50
|
+
seen.set(key, sym);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return [...seen.values()];
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Generate a structured repo map grouped by file, with methods nested under classes.
|
|
57
|
+
*/
|
|
58
|
+
export async function generateRepoMap(rootPath, source, options) {
|
|
59
|
+
const collectionName = pathToCollectionName(rootPath);
|
|
60
|
+
const maxTokens = options?.maxTokens ?? 4000;
|
|
61
|
+
const maxChars = maxTokens * 4;
|
|
62
|
+
const symbols = await source.getSymbols(collectionName, options);
|
|
63
|
+
if (symbols.length === 0) {
|
|
64
|
+
return '(no symbols found — codebase may not be indexed yet)';
|
|
65
|
+
}
|
|
66
|
+
const deduped = deduplicateSymbols(symbols);
|
|
67
|
+
// Group by file
|
|
68
|
+
const byFile = new Map();
|
|
69
|
+
for (const sym of deduped) {
|
|
70
|
+
const list = byFile.get(sym.relativePath) ?? [];
|
|
71
|
+
list.push(sym);
|
|
72
|
+
byFile.set(sym.relativePath, list);
|
|
73
|
+
}
|
|
74
|
+
// Sort files
|
|
75
|
+
const files = [...byFile.keys()].sort();
|
|
76
|
+
const lines = [];
|
|
77
|
+
let totalChars = 0;
|
|
78
|
+
for (const file of files) {
|
|
79
|
+
const fileSymbols = byFile.get(file) ?? [];
|
|
80
|
+
// Separate top-level from methods (those with a parentName)
|
|
81
|
+
const topLevel = fileSymbols.filter((s) => !s.parentName);
|
|
82
|
+
const methods = fileSymbols.filter((s) => s.parentName !== undefined);
|
|
83
|
+
// Build method lookup by parent
|
|
84
|
+
const methodsByParent = new Map();
|
|
85
|
+
for (const m of methods) {
|
|
86
|
+
const list = methodsByParent.get(m.parentName) ?? [];
|
|
87
|
+
list.push(m);
|
|
88
|
+
methodsByParent.set(m.parentName, list);
|
|
89
|
+
}
|
|
90
|
+
const fileHeader = `${file}:`;
|
|
91
|
+
if (totalChars + fileHeader.length > maxChars)
|
|
92
|
+
break;
|
|
93
|
+
lines.push(fileHeader);
|
|
94
|
+
totalChars += fileHeader.length + 1;
|
|
95
|
+
for (const sym of topLevel) {
|
|
96
|
+
const sig = sym.signature ? ` ${sym.signature.trim()}` : ` ${sym.name}`;
|
|
97
|
+
const line = ` [${sym.kind}]${sig}`;
|
|
98
|
+
if (totalChars + line.length > maxChars) {
|
|
99
|
+
lines.push(' ...(truncated)');
|
|
100
|
+
return lines.join('\n');
|
|
101
|
+
}
|
|
102
|
+
lines.push(line);
|
|
103
|
+
totalChars += line.length + 1;
|
|
104
|
+
// Nest methods under this symbol if it's a container
|
|
105
|
+
const children = methodsByParent.get(sym.name) ?? [];
|
|
106
|
+
for (const child of children) {
|
|
107
|
+
const childSig = child.signature ? ` ${child.signature.trim()}` : ` ${child.name}`;
|
|
108
|
+
const childLine = ` [${child.kind}]${childSig}`;
|
|
109
|
+
if (totalChars + childLine.length > maxChars) {
|
|
110
|
+
lines.push(' ...(truncated)');
|
|
111
|
+
return lines.join('\n');
|
|
112
|
+
}
|
|
113
|
+
lines.push(childLine);
|
|
114
|
+
totalChars += childLine.length + 1;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return lines.join('\n');
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* List symbols as a compact Name|Kind|Location table.
|
|
122
|
+
*/
|
|
123
|
+
export async function listSymbolsTable(rootPath, source, options) {
|
|
124
|
+
const collectionName = pathToCollectionName(rootPath);
|
|
125
|
+
const symbols = await source.getSymbols(collectionName, options);
|
|
126
|
+
if (symbols.length === 0) {
|
|
127
|
+
return '(no symbols found)';
|
|
128
|
+
}
|
|
129
|
+
const deduped = deduplicateSymbols(symbols);
|
|
130
|
+
deduped.sort((a, b) => {
|
|
131
|
+
const pathCmp = a.relativePath.localeCompare(b.relativePath);
|
|
132
|
+
if (pathCmp !== 0)
|
|
133
|
+
return pathCmp;
|
|
134
|
+
return a.startLine - b.startLine;
|
|
135
|
+
});
|
|
136
|
+
const header = 'Name | Kind | Location';
|
|
137
|
+
const sep = '-----|------|--------';
|
|
138
|
+
const rows = deduped.map((s) => {
|
|
139
|
+
const location = `${s.relativePath}:${s.startLine}`;
|
|
140
|
+
return `${s.name} | ${s.kind} | ${location}`;
|
|
141
|
+
});
|
|
142
|
+
return [header, sep, ...rows].join('\n');
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=repo-map.js.map
|
package/dist/core/searcher.d.ts
CHANGED
|
@@ -5,20 +5,8 @@ export interface SearchOptions {
|
|
|
5
5
|
extensionFilter?: string[];
|
|
6
6
|
}
|
|
7
7
|
export declare function searchCode(rootPath: string, query: string, embedding: Embedding, vectordb: VectorDB, options?: SearchOptions): Promise<SearchResult[]>;
|
|
8
|
-
|
|
9
|
-
* Deduplicate overlapping chunks from the same file.
|
|
10
|
-
* Results are already sorted by score (best first). For each file, keep only
|
|
11
|
-
* chunks whose line ranges do not overlap with an already-accepted chunk.
|
|
12
|
-
*/
|
|
8
|
+
export declare function applyCategoryBoost(results: SearchResult[]): SearchResult[];
|
|
13
9
|
export declare function deduplicateResults(results: SearchResult[], limit: number): SearchResult[];
|
|
14
|
-
/**
|
|
15
|
-
* Format search results as a compact markdown table for token-efficient output.
|
|
16
|
-
* Returns file paths, line ranges, scores, and estimated token costs.
|
|
17
|
-
* Consumers use the Read tool to fetch full code for interesting results.
|
|
18
|
-
*/
|
|
19
10
|
export declare function formatCompactResults(results: SearchResult[], query: string, rootPath: string): string;
|
|
20
|
-
/**
|
|
21
|
-
* Format search results as markdown for MCP tool output.
|
|
22
|
-
*/
|
|
23
11
|
export declare function formatSearchResults(results: SearchResult[], query: string, rootPath: string): string;
|
|
24
12
|
//# sourceMappingURL=searcher.d.ts.map
|
package/dist/core/searcher.js
CHANGED
|
@@ -5,32 +5,38 @@ const MAX_LIMIT = 50;
|
|
|
5
5
|
export async function searchCode(rootPath, query, embedding, vectordb, options = {}) {
|
|
6
6
|
const normalizedPath = normalizePath(rootPath);
|
|
7
7
|
const collectionName = pathToCollectionName(normalizedPath);
|
|
8
|
-
// Check collection exists
|
|
9
8
|
const exists = await vectordb.hasCollection(collectionName);
|
|
10
9
|
if (!exists) {
|
|
11
10
|
throw new SearchError(`Codebase at "${normalizedPath}" is not indexed. ` +
|
|
12
11
|
`Use the index_codebase tool to index it first.`);
|
|
13
12
|
}
|
|
14
13
|
const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
|
|
15
|
-
// Embed the query
|
|
16
14
|
const queryVector = await embedding.embed(query);
|
|
17
|
-
|
|
18
|
-
const overFetchLimit = Math.min(limit * 3, MAX_LIMIT);
|
|
19
|
-
// Hybrid search (dense + full-text + RRF with exponential decay)
|
|
15
|
+
const overFetchLimit = Math.min(limit * 5, MAX_LIMIT);
|
|
20
16
|
const results = await vectordb.search(collectionName, {
|
|
21
17
|
queryVector,
|
|
22
18
|
queryText: query,
|
|
23
19
|
limit: overFetchLimit,
|
|
24
20
|
extensionFilter: options.extensionFilter,
|
|
25
21
|
});
|
|
26
|
-
|
|
27
|
-
|
|
22
|
+
return deduplicateResults(applyCategoryBoost(results), limit);
|
|
23
|
+
}
|
|
24
|
+
const CATEGORY_BOOST = {
|
|
25
|
+
source: 1.0,
|
|
26
|
+
test: 0.75,
|
|
27
|
+
doc: 0.65,
|
|
28
|
+
config: 0.7,
|
|
29
|
+
generated: 0.6,
|
|
30
|
+
};
|
|
31
|
+
const DEFAULT_BOOST = 1.0; // legacy points without fileCategory get no penalty
|
|
32
|
+
export function applyCategoryBoost(results) {
|
|
33
|
+
return results
|
|
34
|
+
.map((r) => ({
|
|
35
|
+
...r,
|
|
36
|
+
score: r.score * (CATEGORY_BOOST[r.fileCategory ?? ''] ?? DEFAULT_BOOST),
|
|
37
|
+
}))
|
|
38
|
+
.sort((a, b) => b.score - a.score);
|
|
28
39
|
}
|
|
29
|
-
/**
|
|
30
|
-
* Deduplicate overlapping chunks from the same file.
|
|
31
|
-
* Results are already sorted by score (best first). For each file, keep only
|
|
32
|
-
* chunks whose line ranges do not overlap with an already-accepted chunk.
|
|
33
|
-
*/
|
|
34
40
|
export function deduplicateResults(results, limit) {
|
|
35
41
|
const accepted = [];
|
|
36
42
|
// Track accepted line ranges per file: relativePath -> [startLine, endLine][]
|
|
@@ -39,7 +45,7 @@ export function deduplicateResults(results, limit) {
|
|
|
39
45
|
if (accepted.length >= limit)
|
|
40
46
|
break;
|
|
41
47
|
const ranges = fileRanges.get(r.relativePath);
|
|
42
|
-
if (ranges
|
|
48
|
+
if (ranges?.some(([s, e]) => r.startLine <= e && r.endLine >= s)) {
|
|
43
49
|
continue; // overlaps with an already-accepted chunk from same file
|
|
44
50
|
}
|
|
45
51
|
accepted.push(r);
|
|
@@ -52,11 +58,6 @@ export function deduplicateResults(results, limit) {
|
|
|
52
58
|
}
|
|
53
59
|
return accepted;
|
|
54
60
|
}
|
|
55
|
-
/**
|
|
56
|
-
* Format search results as a compact markdown table for token-efficient output.
|
|
57
|
-
* Returns file paths, line ranges, scores, and estimated token costs.
|
|
58
|
-
* Consumers use the Read tool to fetch full code for interesting results.
|
|
59
|
-
*/
|
|
60
61
|
export function formatCompactResults(results, query, rootPath) {
|
|
61
62
|
if (results.length === 0) {
|
|
62
63
|
return `No results found for "${query}" in ${rootPath}.`;
|
|
@@ -75,16 +76,11 @@ export function formatCompactResults(results, query, rootPath) {
|
|
|
75
76
|
lines.push('Use the Read tool to view full code for specific results.');
|
|
76
77
|
return lines.join('\n');
|
|
77
78
|
}
|
|
78
|
-
/**
|
|
79
|
-
* Format search results as markdown for MCP tool output.
|
|
80
|
-
*/
|
|
81
79
|
export function formatSearchResults(results, query, rootPath) {
|
|
82
80
|
if (results.length === 0) {
|
|
83
81
|
return `No results found for "${query}" in ${rootPath}.`;
|
|
84
82
|
}
|
|
85
|
-
const lines = [
|
|
86
|
-
`Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`,
|
|
87
|
-
];
|
|
83
|
+
const lines = [`Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`];
|
|
88
84
|
for (let i = 0; i < results.length; i++) {
|
|
89
85
|
const r = results[i];
|
|
90
86
|
lines.push(`### Result ${i + 1} of ${results.length}`);
|
package/dist/core/snapshot-io.js
CHANGED
|
@@ -14,7 +14,7 @@ export function loadSnapshot(rootPath) {
|
|
|
14
14
|
catch (err) {
|
|
15
15
|
if (err.code === 'ENOENT')
|
|
16
16
|
return null;
|
|
17
|
-
console.warn(`Corrupted snapshot at ${filePath}, ignoring
|
|
17
|
+
console.warn(`Corrupted snapshot at ${filePath}, ignoring:`, err);
|
|
18
18
|
return null;
|
|
19
19
|
}
|
|
20
20
|
}
|
|
@@ -30,7 +30,7 @@ export function deleteSnapshot(rootPath) {
|
|
|
30
30
|
fs.unlinkSync(filePath);
|
|
31
31
|
}
|
|
32
32
|
catch {
|
|
33
|
-
//
|
|
33
|
+
// ignore — file may already be gone
|
|
34
34
|
}
|
|
35
35
|
}
|
|
36
36
|
export function snapshotExists(rootPath) {
|
package/dist/core/sync.d.ts
CHANGED
|
@@ -1,35 +1,15 @@
|
|
|
1
|
-
export
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
}
|
|
6
|
-
export interface SyncResult {
|
|
1
|
+
export type FileSnapshot = Record<string, {
|
|
2
|
+
contentHash: string;
|
|
3
|
+
}>;
|
|
4
|
+
interface SyncResult {
|
|
7
5
|
added: string[];
|
|
8
6
|
modified: string[];
|
|
9
7
|
removed: string[];
|
|
10
8
|
}
|
|
11
|
-
/**
|
|
12
|
-
* Scan a directory and return relative paths of indexable files.
|
|
13
|
-
* Respects .gitignore if present.
|
|
14
|
-
*/
|
|
15
9
|
export declare function scanFiles(rootPath: string, customExtensions?: string[], customIgnore?: string[]): Promise<string[]>;
|
|
16
|
-
/**
|
|
17
|
-
* Build a size+contentHash snapshot for a list of files.
|
|
18
|
-
*/
|
|
19
10
|
export declare function buildSnapshot(rootPath: string, relativePaths: string[]): FileSnapshot;
|
|
20
|
-
/**
|
|
21
|
-
* Compare current snapshot to a previous one. Returns added, modified, and removed files.
|
|
22
|
-
* Uses content hash as the authoritative change signal — immune to git ops, IDE formatters,
|
|
23
|
-
* NFS clock skew, and other mtime-only pitfalls.
|
|
24
|
-
*/
|
|
25
11
|
export declare function diffSnapshots(previous: FileSnapshot, current: FileSnapshot): SyncResult;
|
|
26
|
-
/**
|
|
27
|
-
* Parse .gitignore content into glob patterns.
|
|
28
|
-
* Pure function — no filesystem access.
|
|
29
|
-
*/
|
|
30
12
|
export declare function parseGitignorePatterns(content: string): string[];
|
|
31
|
-
/**
|
|
32
|
-
* Map file extension to language name for the splitter.
|
|
33
|
-
*/
|
|
34
13
|
export declare function extensionToLanguage(ext: string): string;
|
|
14
|
+
export {};
|
|
35
15
|
//# sourceMappingURL=sync.d.ts.map
|