scythe-context-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +21 -0
- package/CHANGELOG.md +27 -0
- package/LICENSE +201 -0
- package/README.en.md +197 -0
- package/README.md +197 -0
- package/README.zh-CN.md +197 -0
- package/dist/config.js +61 -0
- package/dist/index.js +16 -0
- package/dist/indexing/binary.js +15 -0
- package/dist/indexing/chunker.js +64 -0
- package/dist/indexing/contextPack.js +54 -0
- package/dist/indexing/defaults.js +6 -0
- package/dist/indexing/dryRun.js +48 -0
- package/dist/indexing/embeddingWriter.js +102 -0
- package/dist/indexing/hash.js +4 -0
- package/dist/indexing/hybridSearch.js +67 -0
- package/dist/indexing/indexStatus.js +224 -0
- package/dist/indexing/indexWriter.js +106 -0
- package/dist/indexing/keywordSearch.js +86 -0
- package/dist/indexing/relatedFiles.js +137 -0
- package/dist/indexing/relatedSnippets.js +105 -0
- package/dist/indexing/resultFormat.js +69 -0
- package/dist/indexing/scanner.js +123 -0
- package/dist/indexing/semanticSearch.js +48 -0
- package/dist/indexing/symbolGraph.js +121 -0
- package/dist/indexing/types.js +1 -0
- package/dist/providers/gemini.js +149 -0
- package/dist/providers/types.js +1 -0
- package/dist/storage/schema.js +187 -0
- package/dist/storage/sqliteVec.js +17 -0
- package/dist/tools/registerTools.js +364 -0
- package/docs/architecture.md +280 -0
- package/docs/codex-integration.md +114 -0
- package/docs/development-plan.md +218 -0
- package/docs/gemini-compatibility.md +214 -0
- package/docs/tech-stack.md +122 -0
- package/package.json +58 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import Database from "better-sqlite3";
|
|
4
|
+
import { scanProject } from "./scanner.js";
|
|
5
|
+
import { sha256Hex } from "./hash.js";
|
|
6
|
+
export function recommendedNextActions(status, options) {
|
|
7
|
+
if (!status.exists) {
|
|
8
|
+
return [
|
|
9
|
+
"Run repo_reindex with dry_run=false to create the metadata index.",
|
|
10
|
+
"Then run repo_reindex with dry_run=false and index_embeddings=true when semantic search or context packs need vectors.",
|
|
11
|
+
];
|
|
12
|
+
}
|
|
13
|
+
const actions = [];
|
|
14
|
+
if (options.freshness?.status === "stale") {
|
|
15
|
+
actions.push("Run repo_reindex with dry_run=false to refresh stale, new, missing, or metadata-changed files.");
|
|
16
|
+
}
|
|
17
|
+
if (status.files === 0 || status.chunks === 0) {
|
|
18
|
+
actions.push("Run repo_reindex with dry_run=false to index files and chunks.");
|
|
19
|
+
}
|
|
20
|
+
if (status.ftsRows < status.chunks) {
|
|
21
|
+
actions.push("Run repo_reindex with dry_run=false to repair missing keyword-search rows.");
|
|
22
|
+
}
|
|
23
|
+
if (status.symbols === 0 && status.files > 0) {
|
|
24
|
+
actions.push("Run repo_reindex with dry_run=false to populate symbol metadata.");
|
|
25
|
+
}
|
|
26
|
+
if (status.dependencies === 0 && status.files > 1) {
|
|
27
|
+
actions.push("Run repo_reindex with dry_run=false to populate dependency metadata.");
|
|
28
|
+
}
|
|
29
|
+
const matchingEmbeddingSet = status.embeddingSets.find((set) => set.dimensions === options.desiredDimensions);
|
|
30
|
+
if (!matchingEmbeddingSet || matchingEmbeddingSet.embeddings === 0) {
|
|
31
|
+
actions.push(`Run repo_reindex with dry_run=false and index_embeddings=true to create ${options.desiredDimensions}-dimension embeddings for semantic search.`);
|
|
32
|
+
}
|
|
33
|
+
else if (matchingEmbeddingSet.embeddings < status.chunks) {
|
|
34
|
+
actions.push("Run repo_reindex with dry_run=false and index_embeddings=true to fill missing embeddings for newly indexed chunks.");
|
|
35
|
+
}
|
|
36
|
+
if (actions.length === 0) {
|
|
37
|
+
actions.push("Index is ready. Prefer repo_context_pack for task-oriented lookup.");
|
|
38
|
+
}
|
|
39
|
+
return actions;
|
|
40
|
+
}
|
|
41
|
+
function tableExists(db, name) {
|
|
42
|
+
const row = db
|
|
43
|
+
.prepare("select 1 as existsFlag from sqlite_master where type in ('table', 'virtual table') and name = ?")
|
|
44
|
+
.get(name);
|
|
45
|
+
return Boolean(row);
|
|
46
|
+
}
|
|
47
|
+
function countTableRows(db, name) {
|
|
48
|
+
if (!tableExists(db, name))
|
|
49
|
+
return 0;
|
|
50
|
+
return db.prepare(`select count(*) as count from ${name}`).get().count;
|
|
51
|
+
}
|
|
52
|
+
export function readDetailedIndexStatus(dbPath) {
|
|
53
|
+
if (!fs.existsSync(dbPath)) {
|
|
54
|
+
return {
|
|
55
|
+
exists: false,
|
|
56
|
+
dbPath,
|
|
57
|
+
files: 0,
|
|
58
|
+
chunks: 0,
|
|
59
|
+
ftsRows: 0,
|
|
60
|
+
symbols: 0,
|
|
61
|
+
dependencies: 0,
|
|
62
|
+
embeddingSets: [],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
const db = new Database(dbPath, { readonly: true });
|
|
66
|
+
try {
|
|
67
|
+
const files = countTableRows(db, "files");
|
|
68
|
+
const chunks = countTableRows(db, "chunks");
|
|
69
|
+
const ftsRows = countTableRows(db, "chunk_fts");
|
|
70
|
+
const symbols = countTableRows(db, "file_symbols");
|
|
71
|
+
const dependencies = countTableRows(db, "file_dependencies");
|
|
72
|
+
const embeddingSets = tableExists(db, "embedding_sets")
|
|
73
|
+
? db
|
|
74
|
+
.prepare(`
|
|
75
|
+
select embedding_sets.id,
|
|
76
|
+
embedding_sets.provider,
|
|
77
|
+
embedding_sets.model,
|
|
78
|
+
embedding_sets.dimensions,
|
|
79
|
+
count(embeddings.id) as embeddings
|
|
80
|
+
from embedding_sets
|
|
81
|
+
left join embeddings on embeddings.embedding_set_id = embedding_sets.id
|
|
82
|
+
group by embedding_sets.id
|
|
83
|
+
order by embedding_sets.id
|
|
84
|
+
`)
|
|
85
|
+
.all()
|
|
86
|
+
: [];
|
|
87
|
+
return { exists: true, dbPath, files, chunks, ftsRows, symbols, dependencies, embeddingSets };
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
db.close();
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function readIndexedProjectPaths(db) {
|
|
94
|
+
if (!tableExists(db, "files"))
|
|
95
|
+
return [];
|
|
96
|
+
return db.prepare("select distinct project_path as projectPath from files order by project_path").all().map((row) => row.projectPath);
|
|
97
|
+
}
|
|
98
|
+
function safeRealpath(value) {
|
|
99
|
+
try {
|
|
100
|
+
return fs.realpathSync(value);
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return undefined;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
function projectPathAliases(db, projectPath) {
|
|
107
|
+
const aliases = new Set([projectPath]);
|
|
108
|
+
const projectRealpath = safeRealpath(projectPath);
|
|
109
|
+
const indexedProjectPaths = readIndexedProjectPaths(db);
|
|
110
|
+
for (const indexedProjectPath of indexedProjectPaths) {
|
|
111
|
+
if (projectRealpath && safeRealpath(indexedProjectPath) === projectRealpath) {
|
|
112
|
+
aliases.add(indexedProjectPath);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (aliases.size === 1 && indexedProjectPaths.length === 1) {
|
|
116
|
+
aliases.add(indexedProjectPaths[0]);
|
|
117
|
+
}
|
|
118
|
+
return Array.from(aliases);
|
|
119
|
+
}
|
|
120
|
+
function readIndexedFileRows(db, projectPaths) {
|
|
121
|
+
if (!tableExists(db, "files"))
|
|
122
|
+
return [];
|
|
123
|
+
if (projectPaths.length === 0)
|
|
124
|
+
return [];
|
|
125
|
+
const placeholders = projectPaths.map(() => "?").join(", ");
|
|
126
|
+
return db
|
|
127
|
+
.prepare(`
|
|
128
|
+
select project_path as projectPath, path, size, mtime_ms as mtimeMs, hash
|
|
129
|
+
from files
|
|
130
|
+
where project_path in (${placeholders})
|
|
131
|
+
order by path
|
|
132
|
+
`)
|
|
133
|
+
.all(...projectPaths);
|
|
134
|
+
}
|
|
135
|
+
function pushSample(samples, sample, maxSamples) {
|
|
136
|
+
if (samples.length < maxSamples) {
|
|
137
|
+
samples.push(sample);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
export async function readIndexFreshness(options) {
|
|
141
|
+
const checkedAt = new Date().toISOString();
|
|
142
|
+
const projectPath = path.resolve(options.projectPath);
|
|
143
|
+
const maxSamples = options.maxSamples ?? 20;
|
|
144
|
+
if (!fs.existsSync(options.dbPath)) {
|
|
145
|
+
return {
|
|
146
|
+
checked: false,
|
|
147
|
+
status: "index_missing",
|
|
148
|
+
checkedAt,
|
|
149
|
+
indexedFiles: 0,
|
|
150
|
+
indexedProjectPaths: [],
|
|
151
|
+
scannedFiles: 0,
|
|
152
|
+
staleFiles: 0,
|
|
153
|
+
newFiles: 0,
|
|
154
|
+
modifiedFiles: 0,
|
|
155
|
+
metadataChangedFiles: 0,
|
|
156
|
+
missingFiles: 0,
|
|
157
|
+
skippedFiles: 0,
|
|
158
|
+
samples: [],
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
const db = new Database(options.dbPath, { readonly: true });
|
|
162
|
+
let indexedRows;
|
|
163
|
+
let indexedProjectPaths;
|
|
164
|
+
try {
|
|
165
|
+
indexedProjectPaths = projectPathAliases(db, projectPath);
|
|
166
|
+
indexedRows = readIndexedFileRows(db, indexedProjectPaths);
|
|
167
|
+
}
|
|
168
|
+
finally {
|
|
169
|
+
db.close();
|
|
170
|
+
}
|
|
171
|
+
const scan = await scanProject(projectPath, options.limits);
|
|
172
|
+
const indexedByPath = new Map(indexedRows.map((row) => [row.path, row]));
|
|
173
|
+
const scannedByPath = new Map(scan.files.map((file) => [file.relativePath, file]));
|
|
174
|
+
const samples = [];
|
|
175
|
+
let newFiles = 0;
|
|
176
|
+
let modifiedFiles = 0;
|
|
177
|
+
let metadataChangedFiles = 0;
|
|
178
|
+
let missingFiles = 0;
|
|
179
|
+
for (const file of scan.files) {
|
|
180
|
+
if (!indexedByPath.has(file.relativePath)) {
|
|
181
|
+
newFiles += 1;
|
|
182
|
+
pushSample(samples, { path: file.relativePath, reason: "new" }, maxSamples);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
for (const indexed of indexedRows) {
|
|
186
|
+
const file = scannedByPath.get(indexed.path);
|
|
187
|
+
if (!file) {
|
|
188
|
+
missingFiles += 1;
|
|
189
|
+
pushSample(samples, { path: indexed.path, reason: "missing" }, maxSamples);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
192
|
+
const sizeChanged = indexed.size !== file.size;
|
|
193
|
+
const mtimeChanged = Math.abs(indexed.mtimeMs - file.mtimeMs) > 1;
|
|
194
|
+
if (!sizeChanged && !mtimeChanged) {
|
|
195
|
+
continue;
|
|
196
|
+
}
|
|
197
|
+
const content = fs.readFileSync(file.absolutePath);
|
|
198
|
+
const currentHash = sha256Hex(content);
|
|
199
|
+
if (currentHash !== indexed.hash) {
|
|
200
|
+
modifiedFiles += 1;
|
|
201
|
+
pushSample(samples, { path: indexed.path, reason: "modified" }, maxSamples);
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
metadataChangedFiles += 1;
|
|
205
|
+
pushSample(samples, { path: indexed.path, reason: "metadata_changed" }, maxSamples);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const staleFiles = newFiles + modifiedFiles + metadataChangedFiles + missingFiles;
|
|
209
|
+
return {
|
|
210
|
+
checked: true,
|
|
211
|
+
status: staleFiles === 0 ? "fresh" : "stale",
|
|
212
|
+
checkedAt,
|
|
213
|
+
indexedFiles: indexedRows.length,
|
|
214
|
+
indexedProjectPaths,
|
|
215
|
+
scannedFiles: scan.files.length,
|
|
216
|
+
staleFiles,
|
|
217
|
+
newFiles,
|
|
218
|
+
modifiedFiles,
|
|
219
|
+
metadataChangedFiles,
|
|
220
|
+
missingFiles,
|
|
221
|
+
skippedFiles: scan.skipped.length,
|
|
222
|
+
samples,
|
|
223
|
+
};
|
|
224
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import Database from "better-sqlite3";
|
|
4
|
+
import { chunkText } from "./chunker.js";
|
|
5
|
+
import { resolveIndexingLimits } from "./dryRun.js";
|
|
6
|
+
import { sha256Hex } from "./hash.js";
|
|
7
|
+
import { scanProject } from "./scanner.js";
|
|
8
|
+
import { extractFileGraph, resolveDependencyPath } from "./symbolGraph.js";
|
|
9
|
+
import { deleteChunksForFile, initializeStorageSchema, insertChunk, rebuildChunkFtsForFile, replaceSymbolGraphForFile, upsertFile, } from "../storage/schema.js";
|
|
10
|
+
export async function persistentReindexMetadata(options) {
|
|
11
|
+
const projectPath = path.resolve(options.projectPath);
|
|
12
|
+
const limits = resolveIndexingLimits(options);
|
|
13
|
+
const indexPath = path.join(projectPath, options.indexDirName);
|
|
14
|
+
const dbPath = path.join(indexPath, "index.sqlite");
|
|
15
|
+
await fs.mkdir(indexPath, { recursive: true });
|
|
16
|
+
const scan = await scanProject(projectPath, limits);
|
|
17
|
+
const activePathSet = new Set(scan.files.map((file) => file.relativePath));
|
|
18
|
+
const db = new Database(dbPath);
|
|
19
|
+
let chunkCount = 0;
|
|
20
|
+
let byteCount = 0;
|
|
21
|
+
let symbolCount = 0;
|
|
22
|
+
let dependencyCount = 0;
|
|
23
|
+
try {
|
|
24
|
+
initializeStorageSchema(db, { vectorDimensions: options.vectorDimensions });
|
|
25
|
+
const writeFileMetadata = db.transaction((input) => {
|
|
26
|
+
const existing = db
|
|
27
|
+
.prepare("select id, hash from files where project_path = ? and path = ?")
|
|
28
|
+
.get(projectPath, input.relativePath);
|
|
29
|
+
const fileId = upsertFile(db, {
|
|
30
|
+
projectPath,
|
|
31
|
+
path: input.relativePath,
|
|
32
|
+
mtimeMs: input.mtimeMs,
|
|
33
|
+
size: input.size,
|
|
34
|
+
hash: input.hash,
|
|
35
|
+
});
|
|
36
|
+
const graph = extractFileGraph(input.relativePath, input.content);
|
|
37
|
+
replaceSymbolGraphForFile(db, fileId, graph.symbols.map((symbol) => ({ fileId, ...symbol })), graph.dependencies.map((dependency) => ({
|
|
38
|
+
fileId,
|
|
39
|
+
specifier: dependency.specifier,
|
|
40
|
+
line: dependency.line,
|
|
41
|
+
resolvedPath: resolveDependencyPath(input.relativePath, dependency.specifier, activePathSet),
|
|
42
|
+
})));
|
|
43
|
+
if (existing?.hash === input.hash) {
|
|
44
|
+
rebuildChunkFtsForFile(db, fileId, input.relativePath);
|
|
45
|
+
return graph;
|
|
46
|
+
}
|
|
47
|
+
deleteChunksForFile(db, fileId);
|
|
48
|
+
for (const chunk of input.chunks) {
|
|
49
|
+
insertChunk(db, {
|
|
50
|
+
fileId,
|
|
51
|
+
path: input.relativePath,
|
|
52
|
+
startLine: chunk.startLine,
|
|
53
|
+
endLine: chunk.endLine,
|
|
54
|
+
title: input.relativePath,
|
|
55
|
+
text: chunk.text,
|
|
56
|
+
hash: chunk.hash,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
return graph;
|
|
60
|
+
});
|
|
61
|
+
for (const file of scan.files) {
|
|
62
|
+
const content = await fs.readFile(file.absolutePath, "utf8");
|
|
63
|
+
const chunks = chunkText(file.relativePath, content, limits);
|
|
64
|
+
const graph = writeFileMetadata({
|
|
65
|
+
relativePath: file.relativePath,
|
|
66
|
+
size: file.size,
|
|
67
|
+
mtimeMs: file.mtimeMs,
|
|
68
|
+
hash: sha256Hex(content),
|
|
69
|
+
chunks,
|
|
70
|
+
content,
|
|
71
|
+
});
|
|
72
|
+
chunkCount += chunks.length;
|
|
73
|
+
byteCount += file.size;
|
|
74
|
+
symbolCount += graph.symbols.length;
|
|
75
|
+
dependencyCount += graph.dependencies.length;
|
|
76
|
+
}
|
|
77
|
+
const activePaths = scan.files.map((file) => file.relativePath);
|
|
78
|
+
if (activePaths.length === 0) {
|
|
79
|
+
db.prepare("delete from files where project_path = ?").run(projectPath);
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
const placeholders = activePaths.map(() => "?").join(", ");
|
|
83
|
+
db.prepare(`delete from files where project_path = ? and path not in (${placeholders})`).run(projectPath, ...activePaths);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
finally {
|
|
87
|
+
db.close();
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
projectPath,
|
|
91
|
+
dryRun: false,
|
|
92
|
+
status: "metadata_indexed",
|
|
93
|
+
dbPath,
|
|
94
|
+
limits,
|
|
95
|
+
stats: {
|
|
96
|
+
scannedFiles: scan.files.length + scan.skipped.length,
|
|
97
|
+
indexedFiles: scan.files.length,
|
|
98
|
+
skippedFiles: scan.skipped.length,
|
|
99
|
+
chunks: chunkCount,
|
|
100
|
+
bytes: byteCount,
|
|
101
|
+
symbols: symbolCount,
|
|
102
|
+
dependencies: dependencyCount,
|
|
103
|
+
},
|
|
104
|
+
skipped: scan.skipped,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import Database from "better-sqlite3";
|
|
2
|
+
function compactSnippet(text, maxChars) {
|
|
3
|
+
const normalized = text.replace(/\s+$/g, "");
|
|
4
|
+
if (normalized.length <= maxChars)
|
|
5
|
+
return normalized;
|
|
6
|
+
return `${normalized.slice(0, Math.max(0, maxChars - 3))}...`;
|
|
7
|
+
}
|
|
8
|
+
export function keywordTerms(query) {
|
|
9
|
+
return Array.from(new Set(query.match(/[\p{L}\p{N}_.$/-]{2,}/gu) || []));
|
|
10
|
+
}
|
|
11
|
+
function ftsQuery(query) {
|
|
12
|
+
const terms = keywordTerms(query).filter((term) => term.length >= 3);
|
|
13
|
+
if (terms.length === 0)
|
|
14
|
+
return undefined;
|
|
15
|
+
return terms.map((term) => `"${term.replace(/"/g, '""')}"`).join(" OR ");
|
|
16
|
+
}
|
|
17
|
+
export function searchByKeyword(options) {
|
|
18
|
+
if (!Number.isInteger(options.maxResults) || options.maxResults <= 0) {
|
|
19
|
+
throw new Error("maxResults must be a positive integer");
|
|
20
|
+
}
|
|
21
|
+
if (!Number.isInteger(options.maxSnippetChars) || options.maxSnippetChars <= 0) {
|
|
22
|
+
throw new Error("maxSnippetChars must be a positive integer");
|
|
23
|
+
}
|
|
24
|
+
const db = new Database(options.dbPath, { readonly: true });
|
|
25
|
+
try {
|
|
26
|
+
const query = ftsQuery(options.query);
|
|
27
|
+
const rows = query
|
|
28
|
+
? db
|
|
29
|
+
.prepare(`
|
|
30
|
+
select files.path,
|
|
31
|
+
chunks.start_line as startLine,
|
|
32
|
+
chunks.end_line as endLine,
|
|
33
|
+
chunks.text,
|
|
34
|
+
bm25(chunk_fts) as score
|
|
35
|
+
from chunk_fts
|
|
36
|
+
join chunks on chunks.id = chunk_fts.rowid
|
|
37
|
+
join files on files.id = chunks.file_id
|
|
38
|
+
where chunk_fts match ?
|
|
39
|
+
order by score
|
|
40
|
+
limit ?
|
|
41
|
+
`)
|
|
42
|
+
.all(query, options.maxResults)
|
|
43
|
+
: [];
|
|
44
|
+
if (rows.length > 0) {
|
|
45
|
+
return rows.map((row) => ({
|
|
46
|
+
path: row.path,
|
|
47
|
+
startLine: row.startLine,
|
|
48
|
+
endLine: row.endLine,
|
|
49
|
+
score: row.score,
|
|
50
|
+
snippet: compactSnippet(row.text, options.maxSnippetChars),
|
|
51
|
+
matchType: "keyword",
|
|
52
|
+
}));
|
|
53
|
+
}
|
|
54
|
+
const fallbackTerm = keywordTerms(options.query)[0];
|
|
55
|
+
if (!fallbackTerm)
|
|
56
|
+
return [];
|
|
57
|
+
const fallbackRows = db
|
|
58
|
+
.prepare(`
|
|
59
|
+
select files.path,
|
|
60
|
+
chunks.start_line as startLine,
|
|
61
|
+
chunks.end_line as endLine,
|
|
62
|
+
chunks.text,
|
|
63
|
+
case
|
|
64
|
+
when files.path like @pattern then -10.0
|
|
65
|
+
else -1.0
|
|
66
|
+
end as score
|
|
67
|
+
from chunks
|
|
68
|
+
join files on files.id = chunks.file_id
|
|
69
|
+
where files.path like @pattern or chunks.text like @pattern
|
|
70
|
+
order by score, files.path, chunks.start_line
|
|
71
|
+
limit @maxResults
|
|
72
|
+
`)
|
|
73
|
+
.all({ pattern: `%${fallbackTerm}%`, maxResults: options.maxResults });
|
|
74
|
+
return fallbackRows.map((row) => ({
|
|
75
|
+
path: row.path,
|
|
76
|
+
startLine: row.startLine,
|
|
77
|
+
endLine: row.endLine,
|
|
78
|
+
score: row.score,
|
|
79
|
+
snippet: compactSnippet(row.text, options.maxSnippetChars),
|
|
80
|
+
matchType: "keyword",
|
|
81
|
+
}));
|
|
82
|
+
}
|
|
83
|
+
finally {
|
|
84
|
+
db.close();
|
|
85
|
+
}
|
|
86
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import Database from "better-sqlite3";
|
|
3
|
+
function tableExists(db, name) {
|
|
4
|
+
const row = db
|
|
5
|
+
.prepare("select 1 as existsFlag from sqlite_master where type in ('table', 'virtual table') and name = ?")
|
|
6
|
+
.get(name);
|
|
7
|
+
return Boolean(row);
|
|
8
|
+
}
|
|
9
|
+
export function classifyRelatedPath(path) {
|
|
10
|
+
const normalized = path.toLowerCase();
|
|
11
|
+
if (/(^|\/)(test|tests|__tests__|spec|specs)(\/|$)|\.(test|spec)\.[^.]+$/.test(normalized))
|
|
12
|
+
return "test";
|
|
13
|
+
if (/(^|\/)(__mocks__|mocks?)(\/|$)|\bmock\b/.test(normalized))
|
|
14
|
+
return "mock";
|
|
15
|
+
if (/(^|\/)(fixtures?|samples?)(\/|$)/.test(normalized))
|
|
16
|
+
return "fixture";
|
|
17
|
+
if (/(\.generated\.|\.gen\.|\/generated\/|\/dist\/|\/build\/)/.test(normalized))
|
|
18
|
+
return "generated";
|
|
19
|
+
if (/\.(md|mdx|rst|txt)$|(^|\/)docs?\//.test(normalized))
|
|
20
|
+
return "docs";
|
|
21
|
+
return "source";
|
|
22
|
+
}
|
|
23
|
+
function relatedPathScore(path) {
|
|
24
|
+
switch (classifyRelatedPath(path)) {
|
|
25
|
+
case "source":
|
|
26
|
+
return 0;
|
|
27
|
+
case "docs":
|
|
28
|
+
return 1;
|
|
29
|
+
case "test":
|
|
30
|
+
return 2;
|
|
31
|
+
case "mock":
|
|
32
|
+
return 3;
|
|
33
|
+
case "fixture":
|
|
34
|
+
return 4;
|
|
35
|
+
case "generated":
|
|
36
|
+
return 5;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
function sortRelatedPaths(paths) {
|
|
40
|
+
return paths
|
|
41
|
+
.map((path, index) => ({ path, index }))
|
|
42
|
+
.sort((a, b) => relatedPathScore(a.path) - relatedPathScore(b.path) || a.index - b.index || a.path.localeCompare(b.path))
|
|
43
|
+
.map((item) => item.path);
|
|
44
|
+
}
|
|
45
|
+
function sortQueue(queue) {
|
|
46
|
+
queue.sort((a, b) => a.depth - b.depth ||
|
|
47
|
+
relatedPathScore(a.path) - relatedPathScore(b.path) ||
|
|
48
|
+
a.order - b.order ||
|
|
49
|
+
a.path.localeCompare(b.path));
|
|
50
|
+
}
|
|
51
|
+
export function readRelatedFiles(options) {
|
|
52
|
+
if (!fs.existsSync(options.dbPath)) {
|
|
53
|
+
throw new Error(`Index database not found: ${options.dbPath}`);
|
|
54
|
+
}
|
|
55
|
+
const db = new Database(options.dbPath, { readonly: true });
|
|
56
|
+
try {
|
|
57
|
+
if (!tableExists(db, "files") || !tableExists(db, "file_symbols") || !tableExists(db, "file_dependencies")) {
|
|
58
|
+
return { path: options.filePath, symbols: [], imports: [], importedBy: [] };
|
|
59
|
+
}
|
|
60
|
+
const file = db.prepare("select id, path from files where path = ?").get(options.filePath);
|
|
61
|
+
if (!file) {
|
|
62
|
+
return { path: options.filePath, symbols: [], imports: [], importedBy: [] };
|
|
63
|
+
}
|
|
64
|
+
const symbols = db
|
|
65
|
+
.prepare(`
|
|
66
|
+
select name, kind, line, signature, exported
|
|
67
|
+
from file_symbols
|
|
68
|
+
where file_id = ?
|
|
69
|
+
order by line, name
|
|
70
|
+
limit ?
|
|
71
|
+
`)
|
|
72
|
+
.all(file.id, options.maxResults);
|
|
73
|
+
const imports = db
|
|
74
|
+
.prepare(`
|
|
75
|
+
select specifier, resolved_path as resolvedPath, line
|
|
76
|
+
from file_dependencies
|
|
77
|
+
where file_id = ?
|
|
78
|
+
order by line, specifier
|
|
79
|
+
limit ?
|
|
80
|
+
`)
|
|
81
|
+
.all(file.id, options.maxResults);
|
|
82
|
+
const importedBy = db
|
|
83
|
+
.prepare(`
|
|
84
|
+
select files.path, file_dependencies.specifier, file_dependencies.line
|
|
85
|
+
from file_dependencies
|
|
86
|
+
join files on files.id = file_dependencies.file_id
|
|
87
|
+
where file_dependencies.resolved_path = ?
|
|
88
|
+
order by files.path, file_dependencies.line
|
|
89
|
+
limit ?
|
|
90
|
+
`)
|
|
91
|
+
.all(file.path, options.maxResults);
|
|
92
|
+
return {
|
|
93
|
+
path: file.path,
|
|
94
|
+
symbols: symbols.map((symbol) => ({ ...symbol, exported: Boolean(symbol.exported) })),
|
|
95
|
+
imports,
|
|
96
|
+
importedBy,
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
finally {
|
|
100
|
+
db.close();
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
export function readRelatedFileGraph(options) {
|
|
104
|
+
const maxDepth = Math.max(0, options.maxDepth);
|
|
105
|
+
const maxFiles = Math.max(0, options.maxFiles);
|
|
106
|
+
if (maxFiles === 0)
|
|
107
|
+
return [];
|
|
108
|
+
let order = 0;
|
|
109
|
+
const queue = options.seedPaths.map((path) => ({ path, depth: 0, via: null, order: order++ }));
|
|
110
|
+
const visited = new Set();
|
|
111
|
+
const nodes = [];
|
|
112
|
+
while (queue.length > 0 && nodes.length < maxFiles) {
|
|
113
|
+
sortQueue(queue);
|
|
114
|
+
const next = queue.shift();
|
|
115
|
+
if (!next || visited.has(next.path))
|
|
116
|
+
continue;
|
|
117
|
+
visited.add(next.path);
|
|
118
|
+
const related = readRelatedFiles({
|
|
119
|
+
dbPath: options.dbPath,
|
|
120
|
+
filePath: next.path,
|
|
121
|
+
maxResults: options.maxResultsPerFile,
|
|
122
|
+
});
|
|
123
|
+
nodes.push({ ...related, depth: next.depth, via: next.via });
|
|
124
|
+
if (next.depth >= maxDepth)
|
|
125
|
+
continue;
|
|
126
|
+
const neighbors = sortRelatedPaths([
|
|
127
|
+
...related.imports.map((item) => item.resolvedPath).filter((path) => Boolean(path)),
|
|
128
|
+
...related.importedBy.map((item) => item.path),
|
|
129
|
+
]);
|
|
130
|
+
for (const neighbor of neighbors) {
|
|
131
|
+
if (!visited.has(neighbor) && !queue.some((queued) => queued.path === neighbor)) {
|
|
132
|
+
queue.push({ path: neighbor, depth: next.depth + 1, via: next.path, order: order++ });
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return nodes;
|
|
137
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import Database from "better-sqlite3";
|
|
3
|
+
const truncationMarker = "... [truncated]";
|
|
4
|
+
const truncationMarkerWithBreak = `\n${truncationMarker}`;
|
|
5
|
+
function emptyRelatedSnippetPack(maxRelatedContextChars) {
|
|
6
|
+
return {
|
|
7
|
+
snippets: [],
|
|
8
|
+
summary: {
|
|
9
|
+
maxRelatedContextChars: Math.max(0, maxRelatedContextChars),
|
|
10
|
+
usedRelatedContextChars: 0,
|
|
11
|
+
relatedSnippetCount: 0,
|
|
12
|
+
truncatedRelatedSnippets: 0,
|
|
13
|
+
},
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
function tableExists(db, name) {
|
|
17
|
+
const row = db
|
|
18
|
+
.prepare("select 1 as existsFlag from sqlite_master where type in ('table', 'virtual table') and name = ?")
|
|
19
|
+
.get(name);
|
|
20
|
+
return Boolean(row);
|
|
21
|
+
}
|
|
22
|
+
function compactSnippet(text, maxChars) {
|
|
23
|
+
const normalized = text.replace(/\s+$/g, "");
|
|
24
|
+
if (normalized.length <= maxChars)
|
|
25
|
+
return { snippet: normalized, truncated: false };
|
|
26
|
+
if (maxChars <= truncationMarker.length)
|
|
27
|
+
return { snippet: truncationMarker.slice(0, maxChars), truncated: true };
|
|
28
|
+
return {
|
|
29
|
+
snippet: `${normalized.slice(0, Math.max(0, maxChars - truncationMarkerWithBreak.length)).trimEnd()}${truncationMarkerWithBreak}`,
|
|
30
|
+
truncated: true,
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
function fitBudget(snippet, remaining) {
|
|
34
|
+
if (snippet.length <= remaining)
|
|
35
|
+
return { snippet, truncated: false };
|
|
36
|
+
if (remaining <= 0)
|
|
37
|
+
return { snippet: "", truncated: true };
|
|
38
|
+
if (remaining <= truncationMarker.length)
|
|
39
|
+
return { snippet: truncationMarker.slice(0, remaining), truncated: true };
|
|
40
|
+
return {
|
|
41
|
+
snippet: `${snippet.slice(0, remaining - truncationMarkerWithBreak.length).trimEnd()}${truncationMarkerWithBreak}`,
|
|
42
|
+
truncated: true,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
export function readRelatedSnippets(options) {
|
|
46
|
+
if (!fs.existsSync(options.dbPath) || options.maxRelatedContextChars <= 0 || options.maxSnippetsPerFile <= 0) {
|
|
47
|
+
return emptyRelatedSnippetPack(options.maxRelatedContextChars);
|
|
48
|
+
}
|
|
49
|
+
const db = new Database(options.dbPath, { readonly: true });
|
|
50
|
+
const snippets = [];
|
|
51
|
+
let usedRelatedContextChars = 0;
|
|
52
|
+
let truncatedRelatedSnippets = 0;
|
|
53
|
+
try {
|
|
54
|
+
if (!tableExists(db, "files") || !tableExists(db, "chunks")) {
|
|
55
|
+
return emptyRelatedSnippetPack(options.maxRelatedContextChars);
|
|
56
|
+
}
|
|
57
|
+
const selectChunks = db.prepare(`
|
|
58
|
+
select files.path,
|
|
59
|
+
chunks.start_line as startLine,
|
|
60
|
+
chunks.end_line as endLine,
|
|
61
|
+
chunks.text
|
|
62
|
+
from chunks
|
|
63
|
+
join files on files.id = chunks.file_id
|
|
64
|
+
where files.path = ?
|
|
65
|
+
order by chunks.start_line
|
|
66
|
+
limit ?
|
|
67
|
+
`);
|
|
68
|
+
for (const path of options.paths) {
|
|
69
|
+
if (usedRelatedContextChars >= options.maxRelatedContextChars)
|
|
70
|
+
break;
|
|
71
|
+
const rows = selectChunks.all(path, options.maxSnippetsPerFile);
|
|
72
|
+
for (const row of rows) {
|
|
73
|
+
if (usedRelatedContextChars >= options.maxRelatedContextChars)
|
|
74
|
+
break;
|
|
75
|
+
const compacted = compactSnippet(row.text, options.maxSnippetChars);
|
|
76
|
+
const fitted = fitBudget(compacted.snippet, options.maxRelatedContextChars - usedRelatedContextChars);
|
|
77
|
+
if (fitted.snippet.length === 0)
|
|
78
|
+
break;
|
|
79
|
+
const snippetTruncated = compacted.truncated || fitted.truncated;
|
|
80
|
+
if (snippetTruncated)
|
|
81
|
+
truncatedRelatedSnippets += 1;
|
|
82
|
+
usedRelatedContextChars += fitted.snippet.length;
|
|
83
|
+
snippets.push({
|
|
84
|
+
path: row.path,
|
|
85
|
+
startLine: row.startLine,
|
|
86
|
+
endLine: row.endLine,
|
|
87
|
+
snippet: fitted.snippet,
|
|
88
|
+
...(snippetTruncated ? { snippetTruncated } : {}),
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return {
|
|
93
|
+
snippets,
|
|
94
|
+
summary: {
|
|
95
|
+
maxRelatedContextChars: options.maxRelatedContextChars,
|
|
96
|
+
usedRelatedContextChars,
|
|
97
|
+
relatedSnippetCount: snippets.length,
|
|
98
|
+
truncatedRelatedSnippets,
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
finally {
|
|
103
|
+
db.close();
|
|
104
|
+
}
|
|
105
|
+
}
|