@tai-io/codesearch 2026.313.1614
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.d.ts +3 -0
- package/dist/build-info.js +4 -0
- package/dist/config.d.ts +62 -0
- package/dist/config.js +52 -0
- package/dist/core/cleanup.d.ts +8 -0
- package/dist/core/cleanup.js +41 -0
- package/dist/core/doc-indexer.d.ts +13 -0
- package/dist/core/doc-indexer.js +76 -0
- package/dist/core/doc-searcher.d.ts +13 -0
- package/dist/core/doc-searcher.js +65 -0
- package/dist/core/file-category.d.ts +7 -0
- package/dist/core/file-category.js +75 -0
- package/dist/core/indexer.d.ts +18 -0
- package/dist/core/indexer.js +177 -0
- package/dist/core/preview.d.ts +13 -0
- package/dist/core/preview.js +58 -0
- package/dist/core/repo-map.d.ts +33 -0
- package/dist/core/repo-map.js +144 -0
- package/dist/core/searcher.d.ts +12 -0
- package/dist/core/searcher.js +97 -0
- package/dist/core/sync.d.ts +15 -0
- package/dist/core/sync.js +212 -0
- package/dist/core/targeted-indexer.d.ts +19 -0
- package/dist/core/targeted-indexer.js +127 -0
- package/dist/embedding/factory.d.ts +4 -0
- package/dist/embedding/factory.js +24 -0
- package/dist/embedding/openai.d.ts +33 -0
- package/dist/embedding/openai.js +234 -0
- package/dist/embedding/truncate.d.ts +6 -0
- package/dist/embedding/truncate.js +14 -0
- package/dist/embedding/types.d.ts +18 -0
- package/dist/embedding/types.js +2 -0
- package/dist/errors.d.ts +17 -0
- package/dist/errors.js +21 -0
- package/dist/format.d.ts +18 -0
- package/dist/format.js +151 -0
- package/dist/hooks/cli-router.d.ts +7 -0
- package/dist/hooks/cli-router.js +47 -0
- package/dist/hooks/hook-output.d.ts +56 -0
- package/dist/hooks/hook-output.js +21 -0
- package/dist/hooks/post-tool-use.d.ts +13 -0
- package/dist/hooks/post-tool-use.js +123 -0
- package/dist/hooks/stop-hook.d.ts +11 -0
- package/dist/hooks/stop-hook.js +137 -0
- package/dist/hooks/targeted-runner.d.ts +11 -0
- package/dist/hooks/targeted-runner.js +58 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +138 -0
- package/dist/paths.d.ts +11 -0
- package/dist/paths.js +54 -0
- package/dist/setup-message.d.ts +4 -0
- package/dist/setup-message.js +48 -0
- package/dist/splitter/ast.d.ts +13 -0
- package/dist/splitter/ast.js +231 -0
- package/dist/splitter/line.d.ts +10 -0
- package/dist/splitter/line.js +103 -0
- package/dist/splitter/symbol-extract.d.ts +16 -0
- package/dist/splitter/symbol-extract.js +61 -0
- package/dist/splitter/types.d.ts +16 -0
- package/dist/splitter/types.js +2 -0
- package/dist/state/doc-metadata.d.ts +18 -0
- package/dist/state/doc-metadata.js +59 -0
- package/dist/state/registry.d.ts +7 -0
- package/dist/state/registry.js +46 -0
- package/dist/state/snapshot.d.ts +26 -0
- package/dist/state/snapshot.js +100 -0
- package/dist/tool-schemas.d.ts +215 -0
- package/dist/tool-schemas.js +269 -0
- package/dist/tools.d.ts +58 -0
- package/dist/tools.js +245 -0
- package/dist/vectordb/rrf.d.ts +32 -0
- package/dist/vectordb/rrf.js +88 -0
- package/dist/vectordb/sqlite.d.ts +34 -0
- package/dist/vectordb/sqlite.js +624 -0
- package/dist/vectordb/types.d.ts +63 -0
- package/dist/vectordb/types.js +2 -0
- package/messages.yaml +69 -0
- package/package.json +79 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { scanFiles } from './sync.js';
|
|
4
|
+
import { normalizePath } from '../paths.js';
|
|
5
|
+
export async function previewCodebase(rootPath, customExtensions, customIgnorePatterns) {
|
|
6
|
+
const normalizedPath = normalizePath(rootPath);
|
|
7
|
+
const filePaths = await scanFiles(normalizedPath, customExtensions, customIgnorePatterns);
|
|
8
|
+
const byExtension = {};
|
|
9
|
+
for (const f of filePaths) {
|
|
10
|
+
const ext = path.extname(f).toLowerCase() || '(no ext)';
|
|
11
|
+
byExtension[ext] = (byExtension[ext] ?? 0) + 1;
|
|
12
|
+
}
|
|
13
|
+
const dirCounts = {};
|
|
14
|
+
for (const f of filePaths) {
|
|
15
|
+
const firstSeg = f.split(/[/\\]/)[0];
|
|
16
|
+
const dir = f.includes('/') || f.includes('\\') ? firstSeg : '(root)';
|
|
17
|
+
dirCounts[dir] = (dirCounts[dir] ?? 0) + 1;
|
|
18
|
+
}
|
|
19
|
+
const topDirectories = Object.entries(dirCounts)
|
|
20
|
+
.map(([dir, count]) => ({ dir, count }))
|
|
21
|
+
.sort((a, b) => b.count - a.count)
|
|
22
|
+
.slice(0, 10);
|
|
23
|
+
let totalBytes = 0;
|
|
24
|
+
for (const f of filePaths) {
|
|
25
|
+
try {
|
|
26
|
+
const stat = fs.statSync(path.join(normalizedPath, f));
|
|
27
|
+
totalBytes += stat.size;
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// file may have disappeared between scan and stat
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
const estimatedTokens = Math.ceil(totalBytes / 3);
|
|
34
|
+
const estimatedCostUsd = (estimatedTokens / 1_000_000) * 0.02;
|
|
35
|
+
const warnings = [];
|
|
36
|
+
if (filePaths.length === 0) {
|
|
37
|
+
warnings.push('No indexable files found. Check file extension filters and ignore patterns.');
|
|
38
|
+
}
|
|
39
|
+
if (filePaths.length > 5000) {
|
|
40
|
+
warnings.push(`Found ${filePaths.length.toLocaleString()} files. Most codebases have 100-5,000 source files. Consider adding ignore patterns.`);
|
|
41
|
+
}
|
|
42
|
+
if (topDirectories.length > 0 && filePaths.length > 0) {
|
|
43
|
+
const topDir = topDirectories[0];
|
|
44
|
+
const pct = Math.round((topDir.count / filePaths.length) * 100);
|
|
45
|
+
if (pct > 50 && topDir.dir !== '(root)') {
|
|
46
|
+
warnings.push(`Directory '${topDir.dir}/' contains ${pct}% of files -- consider ignoring if it contains build artifacts or dependencies.`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
totalFiles: filePaths.length,
|
|
51
|
+
byExtension,
|
|
52
|
+
topDirectories,
|
|
53
|
+
estimatedTokens,
|
|
54
|
+
estimatedCostUsd,
|
|
55
|
+
warnings,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=preview.js.map
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { VectorDB, SymbolEntry } from '../vectordb/types.js';
|
|
2
|
+
export interface RepoMapOptions {
|
|
3
|
+
pathFilter?: string;
|
|
4
|
+
kindFilter?: string;
|
|
5
|
+
maxTokens?: number;
|
|
6
|
+
}
|
|
7
|
+
export interface ListSymbolsOptions {
|
|
8
|
+
pathFilter?: string;
|
|
9
|
+
kindFilter?: string;
|
|
10
|
+
nameFilter?: string;
|
|
11
|
+
}
|
|
12
|
+
export interface SymbolSource {
|
|
13
|
+
getSymbols(collectionName: string, options?: RepoMapOptions | ListSymbolsOptions): Promise<SymbolEntry[]>;
|
|
14
|
+
}
|
|
15
|
+
export declare class VectorDBSymbolSource implements SymbolSource {
|
|
16
|
+
private vectordb;
|
|
17
|
+
constructor(vectordb: VectorDB);
|
|
18
|
+
getSymbols(collectionName: string, options?: RepoMapOptions | ListSymbolsOptions): Promise<SymbolEntry[]>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Convert a glob-like pattern to a regex for path filtering.
|
|
22
|
+
* Supports * (non-separator) and ** (any path segment).
|
|
23
|
+
*/
|
|
24
|
+
export declare function matchesPathFilter(relativePath: string, pattern: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Generate a structured repo map grouped by file, with methods nested under classes.
|
|
27
|
+
*/
|
|
28
|
+
export declare function generateRepoMap(rootPath: string, source: SymbolSource, options?: RepoMapOptions): Promise<string>;
|
|
29
|
+
/**
|
|
30
|
+
* List symbols as a compact Name|Kind|Location table.
|
|
31
|
+
*/
|
|
32
|
+
export declare function listSymbolsTable(rootPath: string, source: SymbolSource, options?: ListSymbolsOptions): Promise<string>;
|
|
33
|
+
//# sourceMappingURL=repo-map.d.ts.map
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { pathToCollectionName } from '../paths.js';
|
|
2
|
+
export class VectorDBSymbolSource {
|
|
3
|
+
vectordb;
|
|
4
|
+
constructor(vectordb) {
|
|
5
|
+
this.vectordb = vectordb;
|
|
6
|
+
}
|
|
7
|
+
async getSymbols(collectionName, options) {
|
|
8
|
+
const all = await this.vectordb.listSymbols(collectionName);
|
|
9
|
+
let result = all;
|
|
10
|
+
const pathFilter = options?.pathFilter;
|
|
11
|
+
if (pathFilter) {
|
|
12
|
+
result = result.filter((s) => matchesPathFilter(s.relativePath, pathFilter));
|
|
13
|
+
}
|
|
14
|
+
const kindFilter = options?.kindFilter;
|
|
15
|
+
if (kindFilter) {
|
|
16
|
+
const kind = kindFilter.toLowerCase();
|
|
17
|
+
result = result.filter((s) => s.kind.toLowerCase() === kind);
|
|
18
|
+
}
|
|
19
|
+
const nameFilter = options?.nameFilter;
|
|
20
|
+
if (nameFilter) {
|
|
21
|
+
const lower = nameFilter.toLowerCase();
|
|
22
|
+
result = result.filter((s) => s.name.toLowerCase().includes(lower));
|
|
23
|
+
}
|
|
24
|
+
return result;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Convert a glob-like pattern to a regex for path filtering.
|
|
29
|
+
* Supports * (non-separator) and ** (any path segment).
|
|
30
|
+
*/
|
|
31
|
+
export function matchesPathFilter(relativePath, pattern) {
|
|
32
|
+
// Escape regex special chars except * which we handle
|
|
33
|
+
const escaped = pattern
|
|
34
|
+
.replace(/[.+^${}()|[\]\\]/g, '\\$&')
|
|
35
|
+
.replace(/\*\*/g, '\uFFFD') // placeholder for **
|
|
36
|
+
.replace(/\*/g, '[^/]*')
|
|
37
|
+
.replace(/\uFFFD/g, '.*');
|
|
38
|
+
const regex = new RegExp(`^${escaped}$`);
|
|
39
|
+
return regex.test(relativePath);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Deduplicate symbol entries: prefer those with a signature, then by first occurrence.
|
|
43
|
+
*/
|
|
44
|
+
function deduplicateSymbols(symbols) {
|
|
45
|
+
const seen = new Map();
|
|
46
|
+
for (const sym of symbols) {
|
|
47
|
+
const key = `${sym.relativePath}:${sym.name}:${sym.kind}`;
|
|
48
|
+
const existing = seen.get(key);
|
|
49
|
+
if (!existing || (!existing.signature && sym.signature)) {
|
|
50
|
+
seen.set(key, sym);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return [...seen.values()];
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Generate a structured repo map grouped by file, with methods nested under classes.
|
|
57
|
+
*/
|
|
58
|
+
export async function generateRepoMap(rootPath, source, options) {
|
|
59
|
+
const collectionName = pathToCollectionName(rootPath);
|
|
60
|
+
const maxTokens = options?.maxTokens ?? 4000;
|
|
61
|
+
const maxChars = maxTokens * 4;
|
|
62
|
+
const symbols = await source.getSymbols(collectionName, options);
|
|
63
|
+
if (symbols.length === 0) {
|
|
64
|
+
return '(no symbols found — codebase may not be indexed yet)';
|
|
65
|
+
}
|
|
66
|
+
const deduped = deduplicateSymbols(symbols);
|
|
67
|
+
// Group by file
|
|
68
|
+
const byFile = new Map();
|
|
69
|
+
for (const sym of deduped) {
|
|
70
|
+
const list = byFile.get(sym.relativePath) ?? [];
|
|
71
|
+
list.push(sym);
|
|
72
|
+
byFile.set(sym.relativePath, list);
|
|
73
|
+
}
|
|
74
|
+
// Sort files
|
|
75
|
+
const files = [...byFile.keys()].sort();
|
|
76
|
+
const lines = [];
|
|
77
|
+
let totalChars = 0;
|
|
78
|
+
for (const file of files) {
|
|
79
|
+
const fileSymbols = byFile.get(file) ?? [];
|
|
80
|
+
// Separate top-level from methods (those with a parentName)
|
|
81
|
+
const topLevel = fileSymbols.filter((s) => !s.parentName);
|
|
82
|
+
const methods = fileSymbols.filter((s) => s.parentName !== undefined);
|
|
83
|
+
// Build method lookup by parent
|
|
84
|
+
const methodsByParent = new Map();
|
|
85
|
+
for (const m of methods) {
|
|
86
|
+
const list = methodsByParent.get(m.parentName) ?? [];
|
|
87
|
+
list.push(m);
|
|
88
|
+
methodsByParent.set(m.parentName, list);
|
|
89
|
+
}
|
|
90
|
+
const fileHeader = `${file}:`;
|
|
91
|
+
if (totalChars + fileHeader.length > maxChars)
|
|
92
|
+
break;
|
|
93
|
+
lines.push(fileHeader);
|
|
94
|
+
totalChars += fileHeader.length + 1;
|
|
95
|
+
for (const sym of topLevel) {
|
|
96
|
+
const sig = sym.signature ? ` ${sym.signature.trim()}` : ` ${sym.name}`;
|
|
97
|
+
const line = ` [${sym.kind}]${sig}`;
|
|
98
|
+
if (totalChars + line.length > maxChars) {
|
|
99
|
+
lines.push(' ...(truncated)');
|
|
100
|
+
return lines.join('\n');
|
|
101
|
+
}
|
|
102
|
+
lines.push(line);
|
|
103
|
+
totalChars += line.length + 1;
|
|
104
|
+
// Nest methods under this symbol if it's a container
|
|
105
|
+
const children = methodsByParent.get(sym.name) ?? [];
|
|
106
|
+
for (const child of children) {
|
|
107
|
+
const childSig = child.signature ? ` ${child.signature.trim()}` : ` ${child.name}`;
|
|
108
|
+
const childLine = ` [${child.kind}]${childSig}`;
|
|
109
|
+
if (totalChars + childLine.length > maxChars) {
|
|
110
|
+
lines.push(' ...(truncated)');
|
|
111
|
+
return lines.join('\n');
|
|
112
|
+
}
|
|
113
|
+
lines.push(childLine);
|
|
114
|
+
totalChars += childLine.length + 1;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return lines.join('\n');
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* List symbols as a compact Name|Kind|Location table.
|
|
122
|
+
*/
|
|
123
|
+
export async function listSymbolsTable(rootPath, source, options) {
|
|
124
|
+
const collectionName = pathToCollectionName(rootPath);
|
|
125
|
+
const symbols = await source.getSymbols(collectionName, options);
|
|
126
|
+
if (symbols.length === 0) {
|
|
127
|
+
return '(no symbols found)';
|
|
128
|
+
}
|
|
129
|
+
const deduped = deduplicateSymbols(symbols);
|
|
130
|
+
deduped.sort((a, b) => {
|
|
131
|
+
const pathCmp = a.relativePath.localeCompare(b.relativePath);
|
|
132
|
+
if (pathCmp !== 0)
|
|
133
|
+
return pathCmp;
|
|
134
|
+
return a.startLine - b.startLine;
|
|
135
|
+
});
|
|
136
|
+
const header = 'Name | Kind | Location';
|
|
137
|
+
const sep = '-----|------|--------';
|
|
138
|
+
const rows = deduped.map((s) => {
|
|
139
|
+
const location = `${s.relativePath}:${s.startLine}`;
|
|
140
|
+
return `${s.name} | ${s.kind} | ${location}`;
|
|
141
|
+
});
|
|
142
|
+
return [header, sep, ...rows].join('\n');
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=repo-map.js.map
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import type { Embedding } from '../embedding/types.js';
|
|
2
|
+
import type { VectorDB, SearchResult } from '../vectordb/types.js';
|
|
3
|
+
export interface SearchOptions {
|
|
4
|
+
limit?: number;
|
|
5
|
+
extensionFilter?: string[];
|
|
6
|
+
}
|
|
7
|
+
export declare function searchCode(rootPath: string, query: string, embedding: Embedding, vectordb: VectorDB, options?: SearchOptions): Promise<SearchResult[]>;
|
|
8
|
+
export declare function applyCategoryBoost(results: SearchResult[]): SearchResult[];
|
|
9
|
+
export declare function deduplicateResults(results: SearchResult[], limit: number): SearchResult[];
|
|
10
|
+
export declare function formatCompactResults(results: SearchResult[], query: string, rootPath: string): string;
|
|
11
|
+
export declare function formatSearchResults(results: SearchResult[], query: string, rootPath: string): string;
|
|
12
|
+
//# sourceMappingURL=searcher.d.ts.map
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import { normalizePath, pathToCollectionName } from '../paths.js';
|
|
2
|
+
import { SearchError } from '../errors.js';
|
|
3
|
+
const DEFAULT_LIMIT = 10;
|
|
4
|
+
const MAX_LIMIT = 50;
|
|
5
|
+
export async function searchCode(rootPath, query, embedding, vectordb, options = {}) {
|
|
6
|
+
const normalizedPath = normalizePath(rootPath);
|
|
7
|
+
const collectionName = pathToCollectionName(normalizedPath);
|
|
8
|
+
const exists = await vectordb.hasCollection(collectionName);
|
|
9
|
+
if (!exists) {
|
|
10
|
+
throw new SearchError(`Codebase at "${normalizedPath}" is not indexed. ` +
|
|
11
|
+
`Use the index tool to index it first.`);
|
|
12
|
+
}
|
|
13
|
+
const limit = Math.min(Math.max(1, options.limit ?? DEFAULT_LIMIT), MAX_LIMIT);
|
|
14
|
+
const queryVector = await embedding.embed(query);
|
|
15
|
+
const overFetchLimit = Math.min(limit * 5, MAX_LIMIT);
|
|
16
|
+
const results = await vectordb.search(collectionName, {
|
|
17
|
+
queryVector,
|
|
18
|
+
queryText: query,
|
|
19
|
+
limit: overFetchLimit,
|
|
20
|
+
extensionFilter: options.extensionFilter,
|
|
21
|
+
});
|
|
22
|
+
return deduplicateResults(applyCategoryBoost(results), limit);
|
|
23
|
+
}
|
|
24
|
+
const CATEGORY_BOOST = {
|
|
25
|
+
source: 1.0,
|
|
26
|
+
test: 0.75,
|
|
27
|
+
doc: 0.65,
|
|
28
|
+
config: 0.7,
|
|
29
|
+
generated: 0.6,
|
|
30
|
+
};
|
|
31
|
+
const DEFAULT_BOOST = 1.0; // legacy points without fileCategory get no penalty
|
|
32
|
+
export function applyCategoryBoost(results) {
|
|
33
|
+
return results
|
|
34
|
+
.map((r) => ({
|
|
35
|
+
...r,
|
|
36
|
+
score: r.score * (CATEGORY_BOOST[r.fileCategory ?? ''] ?? DEFAULT_BOOST),
|
|
37
|
+
}))
|
|
38
|
+
.sort((a, b) => b.score - a.score);
|
|
39
|
+
}
|
|
40
|
+
export function deduplicateResults(results, limit) {
|
|
41
|
+
const accepted = [];
|
|
42
|
+
// Track accepted line ranges per file: relativePath -> [startLine, endLine][]
|
|
43
|
+
const fileRanges = new Map();
|
|
44
|
+
for (const r of results) {
|
|
45
|
+
if (accepted.length >= limit)
|
|
46
|
+
break;
|
|
47
|
+
const ranges = fileRanges.get(r.relativePath);
|
|
48
|
+
if (ranges?.some(([s, e]) => r.startLine <= e && r.endLine >= s)) {
|
|
49
|
+
continue; // overlaps with an already-accepted chunk from same file
|
|
50
|
+
}
|
|
51
|
+
accepted.push(r);
|
|
52
|
+
if (!ranges) {
|
|
53
|
+
fileRanges.set(r.relativePath, [[r.startLine, r.endLine]]);
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
ranges.push([r.startLine, r.endLine]);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return accepted;
|
|
60
|
+
}
|
|
61
|
+
export function formatCompactResults(results, query, rootPath) {
|
|
62
|
+
if (results.length === 0) {
|
|
63
|
+
return `No results found for "${query}" in ${rootPath}.`;
|
|
64
|
+
}
|
|
65
|
+
const lines = [
|
|
66
|
+
`Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`,
|
|
67
|
+
'| # | File | Lines | Score | ~Tokens |',
|
|
68
|
+
'|---|------|-------|-------|---------|',
|
|
69
|
+
];
|
|
70
|
+
for (let i = 0; i < results.length; i++) {
|
|
71
|
+
const r = results[i];
|
|
72
|
+
const tokens = Math.ceil(r.content.length / 4);
|
|
73
|
+
lines.push(`| ${i + 1} | \`${r.relativePath}\` | ${r.startLine}-${r.endLine} | ${r.score.toFixed(2)} | ~${tokens} |`);
|
|
74
|
+
}
|
|
75
|
+
lines.push('');
|
|
76
|
+
lines.push('Use the Read tool to view full code for specific results.');
|
|
77
|
+
return lines.join('\n');
|
|
78
|
+
}
|
|
79
|
+
export function formatSearchResults(results, query, rootPath) {
|
|
80
|
+
if (results.length === 0) {
|
|
81
|
+
return `No results found for "${query}" in ${rootPath}.`;
|
|
82
|
+
}
|
|
83
|
+
const lines = [`Found ${results.length} result(s) for "${query}" in ${rootPath}:\n`];
|
|
84
|
+
for (let i = 0; i < results.length; i++) {
|
|
85
|
+
const r = results[i];
|
|
86
|
+
lines.push(`### Result ${i + 1} of ${results.length}`);
|
|
87
|
+
lines.push(`**File:** \`${r.relativePath}\` (lines ${r.startLine}-${r.endLine})`);
|
|
88
|
+
lines.push(`**Language:** ${r.language} | **Score:** ${r.score.toFixed(4)}`);
|
|
89
|
+
const safeLang = r.language.replace(/[^a-zA-Z0-9_+-]/g, '');
|
|
90
|
+
lines.push('```' + safeLang);
|
|
91
|
+
lines.push(r.content);
|
|
92
|
+
lines.push('```');
|
|
93
|
+
lines.push('');
|
|
94
|
+
}
|
|
95
|
+
return lines.join('\n');
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=searcher.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export type FileSnapshot = Record<string, {
|
|
2
|
+
contentHash: string;
|
|
3
|
+
}>;
|
|
4
|
+
interface SyncResult {
|
|
5
|
+
added: string[];
|
|
6
|
+
modified: string[];
|
|
7
|
+
removed: string[];
|
|
8
|
+
}
|
|
9
|
+
export declare function scanFiles(rootPath: string, customExtensions?: string[], customIgnore?: string[]): Promise<string[]>;
|
|
10
|
+
export declare function buildSnapshot(rootPath: string, relativePaths: string[]): FileSnapshot;
|
|
11
|
+
export declare function diffSnapshots(previous: FileSnapshot, current: FileSnapshot): SyncResult;
|
|
12
|
+
export declare function parseGitignorePatterns(content: string): string[];
|
|
13
|
+
export declare function extensionToLanguage(ext: string): string;
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=sync.d.ts.map
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { createHash } from 'node:crypto';
|
|
4
|
+
import { glob } from 'glob';
|
|
5
|
+
const DEFAULT_EXTENSIONS = new Set([
|
|
6
|
+
'.ts',
|
|
7
|
+
'.tsx',
|
|
8
|
+
'.js',
|
|
9
|
+
'.jsx',
|
|
10
|
+
'.mjs',
|
|
11
|
+
'.cjs',
|
|
12
|
+
'.py',
|
|
13
|
+
'.pyi',
|
|
14
|
+
'.go',
|
|
15
|
+
'.java',
|
|
16
|
+
'.rs',
|
|
17
|
+
'.cpp',
|
|
18
|
+
'.cc',
|
|
19
|
+
'.cxx',
|
|
20
|
+
'.c',
|
|
21
|
+
'.h',
|
|
22
|
+
'.hpp',
|
|
23
|
+
'.cs',
|
|
24
|
+
'.scala',
|
|
25
|
+
'.rb',
|
|
26
|
+
'.php',
|
|
27
|
+
'.swift',
|
|
28
|
+
'.kt',
|
|
29
|
+
'.kts',
|
|
30
|
+
'.lua',
|
|
31
|
+
'.sh',
|
|
32
|
+
'.bash',
|
|
33
|
+
'.zsh',
|
|
34
|
+
'.sql',
|
|
35
|
+
'.r',
|
|
36
|
+
'.R',
|
|
37
|
+
'.m',
|
|
38
|
+
'.mm', // Objective-C
|
|
39
|
+
'.dart',
|
|
40
|
+
'.ex',
|
|
41
|
+
'.exs', // Elixir
|
|
42
|
+
'.erl',
|
|
43
|
+
'.hrl', // Erlang
|
|
44
|
+
'.hs', // Haskell
|
|
45
|
+
'.ml',
|
|
46
|
+
'.mli', // OCaml
|
|
47
|
+
'.vue',
|
|
48
|
+
'.svelte',
|
|
49
|
+
'.astro',
|
|
50
|
+
'.yaml',
|
|
51
|
+
'.yml',
|
|
52
|
+
'.toml',
|
|
53
|
+
'.json',
|
|
54
|
+
'.md',
|
|
55
|
+
'.mdx',
|
|
56
|
+
'.html',
|
|
57
|
+
'.css',
|
|
58
|
+
'.scss',
|
|
59
|
+
'.less',
|
|
60
|
+
]);
|
|
61
|
+
const DEFAULT_IGNORE = [
|
|
62
|
+
'**/node_modules/**',
|
|
63
|
+
'**/.git/**',
|
|
64
|
+
'**/dist/**',
|
|
65
|
+
'**/build/**',
|
|
66
|
+
'**/.next/**',
|
|
67
|
+
'**/target/**',
|
|
68
|
+
'**/__pycache__/**',
|
|
69
|
+
'**/.venv/**',
|
|
70
|
+
'**/venv/**',
|
|
71
|
+
'**/vendor/**',
|
|
72
|
+
'**/.cache/**',
|
|
73
|
+
'**/coverage/**',
|
|
74
|
+
'**/*.min.js',
|
|
75
|
+
'**/*.min.css',
|
|
76
|
+
'**/package-lock.json',
|
|
77
|
+
'**/pnpm-lock.yaml',
|
|
78
|
+
'**/yarn.lock',
|
|
79
|
+
];
|
|
80
|
+
export async function scanFiles(rootPath, customExtensions = [], customIgnore = []) {
|
|
81
|
+
const extensions = new Set([...DEFAULT_EXTENSIONS, ...customExtensions]);
|
|
82
|
+
const gitignorePatterns = readGitignore(rootPath);
|
|
83
|
+
const allIgnore = [...DEFAULT_IGNORE, ...gitignorePatterns, ...customIgnore];
|
|
84
|
+
const files = await glob('**/*', {
|
|
85
|
+
cwd: rootPath,
|
|
86
|
+
nodir: true,
|
|
87
|
+
dot: false,
|
|
88
|
+
ignore: allIgnore,
|
|
89
|
+
absolute: false,
|
|
90
|
+
});
|
|
91
|
+
return files.filter((f) => extensions.has(path.extname(f).toLowerCase())).sort();
|
|
92
|
+
}
|
|
93
|
+
function hashFileContent(fullPath) {
|
|
94
|
+
const content = fs.readFileSync(fullPath);
|
|
95
|
+
return createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
96
|
+
}
|
|
97
|
+
export function buildSnapshot(rootPath, relativePaths) {
|
|
98
|
+
const snapshot = {};
|
|
99
|
+
for (const rel of relativePaths) {
|
|
100
|
+
const fullPath = path.join(rootPath, rel);
|
|
101
|
+
try {
|
|
102
|
+
const contentHash = hashFileContent(fullPath);
|
|
103
|
+
snapshot[rel] = { contentHash };
|
|
104
|
+
}
|
|
105
|
+
catch (err) {
|
|
106
|
+
process.stderr.write(`Skipping "${rel}": ${String(err)}\n`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
return snapshot;
|
|
110
|
+
}
|
|
111
|
+
export function diffSnapshots(previous, current) {
|
|
112
|
+
const added = [];
|
|
113
|
+
const modified = [];
|
|
114
|
+
const removed = [];
|
|
115
|
+
for (const [rel, cur] of Object.entries(current)) {
|
|
116
|
+
if (!Object.hasOwn(previous, rel)) {
|
|
117
|
+
added.push(rel);
|
|
118
|
+
}
|
|
119
|
+
else if (previous[rel].contentHash !== cur.contentHash) {
|
|
120
|
+
modified.push(rel);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
for (const rel of Object.keys(previous)) {
|
|
124
|
+
if (!(rel in current)) {
|
|
125
|
+
removed.push(rel);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
return { added, modified, removed };
|
|
129
|
+
}
|
|
130
|
+
export function parseGitignorePatterns(content) {
|
|
131
|
+
return content
|
|
132
|
+
.split('\n')
|
|
133
|
+
.map((line) => line.trim())
|
|
134
|
+
.filter((line) => line && !line.startsWith('#') && !line.startsWith('!'))
|
|
135
|
+
.map((pattern) => {
|
|
136
|
+
pattern = pattern.replace(/\s+$/, '');
|
|
137
|
+
if (pattern.endsWith('/')) {
|
|
138
|
+
pattern = pattern.slice(0, -1);
|
|
139
|
+
}
|
|
140
|
+
if (pattern.startsWith('/'))
|
|
141
|
+
return pattern.slice(1);
|
|
142
|
+
if (!pattern.includes('/'))
|
|
143
|
+
return `**/${pattern}`;
|
|
144
|
+
return pattern;
|
|
145
|
+
})
|
|
146
|
+
.filter((p) => p.length > 0);
|
|
147
|
+
}
|
|
148
|
+
function readGitignore(rootPath) {
|
|
149
|
+
const gitignorePath = path.join(rootPath, '.gitignore');
|
|
150
|
+
try {
|
|
151
|
+
const content = fs.readFileSync(gitignorePath, 'utf-8');
|
|
152
|
+
return parseGitignorePatterns(content);
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
return [];
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
export function extensionToLanguage(ext) {
|
|
159
|
+
const map = {
|
|
160
|
+
'.ts': 'typescript',
|
|
161
|
+
'.tsx': 'tsx',
|
|
162
|
+
'.js': 'javascript',
|
|
163
|
+
'.jsx': 'javascript',
|
|
164
|
+
'.mjs': 'javascript',
|
|
165
|
+
'.cjs': 'javascript',
|
|
166
|
+
'.py': 'python',
|
|
167
|
+
'.pyi': 'python',
|
|
168
|
+
'.go': 'go',
|
|
169
|
+
'.java': 'java',
|
|
170
|
+
'.rs': 'rust',
|
|
171
|
+
'.cpp': 'cpp',
|
|
172
|
+
'.cc': 'cpp',
|
|
173
|
+
'.cxx': 'cpp',
|
|
174
|
+
'.c': 'c',
|
|
175
|
+
'.h': 'cpp',
|
|
176
|
+
'.hpp': 'cpp',
|
|
177
|
+
'.cs': 'csharp',
|
|
178
|
+
'.scala': 'scala',
|
|
179
|
+
'.rb': 'ruby',
|
|
180
|
+
'.php': 'php',
|
|
181
|
+
'.swift': 'swift',
|
|
182
|
+
'.kt': 'kotlin',
|
|
183
|
+
'.kts': 'kotlin',
|
|
184
|
+
'.lua': 'lua',
|
|
185
|
+
'.sh': 'bash',
|
|
186
|
+
'.bash': 'bash',
|
|
187
|
+
'.zsh': 'bash',
|
|
188
|
+
'.sql': 'sql',
|
|
189
|
+
'.r': 'r',
|
|
190
|
+
'.R': 'r',
|
|
191
|
+
'.dart': 'dart',
|
|
192
|
+
'.ex': 'elixir',
|
|
193
|
+
'.exs': 'elixir',
|
|
194
|
+
'.hs': 'haskell',
|
|
195
|
+
'.ml': 'ocaml',
|
|
196
|
+
'.vue': 'vue',
|
|
197
|
+
'.svelte': 'svelte',
|
|
198
|
+
'.astro': 'astro',
|
|
199
|
+
'.yaml': 'yaml',
|
|
200
|
+
'.yml': 'yaml',
|
|
201
|
+
'.toml': 'toml',
|
|
202
|
+
'.json': 'json',
|
|
203
|
+
'.md': 'markdown',
|
|
204
|
+
'.mdx': 'markdown',
|
|
205
|
+
'.html': 'html',
|
|
206
|
+
'.css': 'css',
|
|
207
|
+
'.scss': 'scss',
|
|
208
|
+
'.less': 'less',
|
|
209
|
+
};
|
|
210
|
+
return map[ext.toLowerCase()] ?? 'unknown';
|
|
211
|
+
}
|
|
212
|
+
//# sourceMappingURL=sync.js.map
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Embedding } from '../embedding/types.js';
|
|
2
|
+
import type { VectorDB } from '../vectordb/types.js';
|
|
3
|
+
export interface TargetedIndexResult {
|
|
4
|
+
processedFiles: number;
|
|
5
|
+
totalChunks: number;
|
|
6
|
+
skippedFiles: number;
|
|
7
|
+
durationMs: number;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Re-index a specific set of files within a project.
|
|
11
|
+
* For each file: delete stale vectors, re-split, re-embed, re-insert, update snapshot.
|
|
12
|
+
*
|
|
13
|
+
* @param rootPath Absolute path to the project root
|
|
14
|
+
* @param relativePaths Relative paths (from rootPath) of files to re-index
|
|
15
|
+
* @param embedding Embedding provider
|
|
16
|
+
* @param vectordb Vector DB provider
|
|
17
|
+
*/
|
|
18
|
+
export declare function indexFiles(rootPath: string, relativePaths: string[], embedding: Embedding, vectordb: VectorDB): Promise<TargetedIndexResult>;
|
|
19
|
+
//# sourceMappingURL=targeted-indexer.d.ts.map
|