codesift-mcp 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +66 -21
- package/README.md +402 -56
- package/dist/cli/args.d.ts +2 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +11 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +177 -67
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +157 -0
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/hooks.d.ts +3 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +163 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/setup.d.ts +25 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +400 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/formatters-shortening.d.ts +7 -0
- package/dist/formatters-shortening.d.ts.map +1 -0
- package/dist/formatters-shortening.js +68 -0
- package/dist/formatters-shortening.js.map +1 -0
- package/dist/formatters.d.ts +314 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +396 -0
- package/dist/formatters.js.map +1 -0
- package/dist/instructions.d.ts +6 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +72 -0
- package/dist/instructions.js.map +1 -0
- package/dist/lsp/lsp-client.d.ts +21 -0
- package/dist/lsp/lsp-client.d.ts.map +1 -0
- package/dist/lsp/lsp-client.js +122 -0
- package/dist/lsp/lsp-client.js.map +1 -0
- package/dist/lsp/lsp-manager.d.ts +12 -0
- package/dist/lsp/lsp-manager.d.ts.map +1 -0
- package/dist/lsp/lsp-manager.js +82 -0
- package/dist/lsp/lsp-manager.js.map +1 -0
- package/dist/lsp/lsp-servers.d.ts +13 -0
- package/dist/lsp/lsp-servers.d.ts.map +1 -0
- package/dist/lsp/lsp-servers.js +57 -0
- package/dist/lsp/lsp-servers.js.map +1 -0
- package/dist/lsp/lsp-tools.d.ts +67 -0
- package/dist/lsp/lsp-tools.d.ts.map +1 -0
- package/dist/lsp/lsp-tools.js +359 -0
- package/dist/lsp/lsp-tools.js.map +1 -0
- package/dist/parser/extractors/_shared.d.ts +11 -0
- package/dist/parser/extractors/_shared.d.ts.map +1 -0
- package/dist/parser/extractors/_shared.js +38 -0
- package/dist/parser/extractors/_shared.js.map +1 -0
- package/dist/parser/extractors/astro.d.ts +15 -0
- package/dist/parser/extractors/astro.d.ts.map +1 -0
- package/dist/parser/extractors/astro.js +104 -0
- package/dist/parser/extractors/astro.js.map +1 -0
- package/dist/parser/extractors/conversation.d.ts +16 -0
- package/dist/parser/extractors/conversation.d.ts.map +1 -0
- package/dist/parser/extractors/conversation.js +196 -0
- package/dist/parser/extractors/conversation.js.map +1 -0
- package/dist/parser/extractors/go.d.ts.map +1 -1
- package/dist/parser/extractors/go.js +22 -45
- package/dist/parser/extractors/go.js.map +1 -1
- package/dist/parser/extractors/python.d.ts +1 -1
- package/dist/parser/extractors/python.d.ts.map +1 -1
- package/dist/parser/extractors/python.js +19 -50
- package/dist/parser/extractors/python.js.map +1 -1
- package/dist/parser/extractors/rust.d.ts +1 -1
- package/dist/parser/extractors/rust.d.ts.map +1 -1
- package/dist/parser/extractors/rust.js +7 -34
- package/dist/parser/extractors/rust.js.map +1 -1
- package/dist/parser/extractors/typescript.d.ts +1 -1
- package/dist/parser/extractors/typescript.d.ts.map +1 -1
- package/dist/parser/extractors/typescript.js +99 -68
- package/dist/parser/extractors/typescript.js.map +1 -1
- package/dist/parser/parser-manager.d.ts.map +1 -1
- package/dist/parser/parser-manager.js +12 -2
- package/dist/parser/parser-manager.js.map +1 -1
- package/dist/parser/symbol-extractor.d.ts +2 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -1
- package/dist/parser/symbol-extractor.js +2 -0
- package/dist/parser/symbol-extractor.js.map +1 -1
- package/dist/register-tools.d.ts +127 -0
- package/dist/register-tools.d.ts.map +1 -0
- package/dist/register-tools.js +1453 -0
- package/dist/register-tools.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +4 -26
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
- package/dist/retrieval/codebase-retrieval.js +105 -403
- package/dist/retrieval/codebase-retrieval.js.map +1 -1
- package/dist/retrieval/retrieval-constants.d.ts +27 -0
- package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
- package/dist/retrieval/retrieval-constants.js +27 -0
- package/dist/retrieval/retrieval-constants.js.map +1 -0
- package/dist/retrieval/retrieval-schemas.d.ts +107 -0
- package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
- package/dist/retrieval/retrieval-schemas.js +102 -0
- package/dist/retrieval/retrieval-schemas.js.map +1 -0
- package/dist/retrieval/retrieval-utils.d.ts +40 -0
- package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
- package/dist/retrieval/retrieval-utils.js +139 -0
- package/dist/retrieval/retrieval-utils.js.map +1 -0
- package/dist/retrieval/semantic-handlers.d.ts +8 -0
- package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
- package/dist/retrieval/semantic-handlers.js +152 -0
- package/dist/retrieval/semantic-handlers.js.map +1 -0
- package/dist/search/bm25.d.ts +6 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +95 -32
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/chunker.d.ts +10 -0
- package/dist/search/chunker.d.ts.map +1 -1
- package/dist/search/chunker.js +63 -11
- package/dist/search/chunker.js.map +1 -1
- package/dist/search/reranker.d.ts +15 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +126 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/semantic.d.ts +1 -1
- package/dist/search/semantic.d.ts.map +1 -1
- package/dist/search/semantic.js +40 -45
- package/dist/search/semantic.js.map +1 -1
- package/dist/server-helpers.d.ts +29 -0
- package/dist/server-helpers.d.ts.map +1 -0
- package/dist/server-helpers.js +312 -0
- package/dist/server-helpers.js.map +1 -0
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -271
- package/dist/server.js.map +1 -1
- package/dist/storage/_shared.d.ts +9 -0
- package/dist/storage/_shared.d.ts.map +1 -0
- package/dist/storage/_shared.js +26 -0
- package/dist/storage/_shared.js.map +1 -0
- package/dist/storage/chunk-store.d.ts.map +1 -1
- package/dist/storage/chunk-store.js +23 -63
- package/dist/storage/chunk-store.js.map +1 -1
- package/dist/storage/embedding-store.d.ts +6 -3
- package/dist/storage/embedding-store.d.ts.map +1 -1
- package/dist/storage/embedding-store.js +54 -30
- package/dist/storage/embedding-store.js.map +1 -1
- package/dist/storage/graph-store.d.ts +48 -0
- package/dist/storage/graph-store.d.ts.map +1 -0
- package/dist/storage/graph-store.js +52 -0
- package/dist/storage/graph-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +5 -0
- package/dist/storage/index-store.d.ts.map +1 -1
- package/dist/storage/index-store.js +28 -16
- package/dist/storage/index-store.js.map +1 -1
- package/dist/storage/registry.d.ts +4 -0
- package/dist/storage/registry.d.ts.map +1 -1
- package/dist/storage/registry.js +16 -16
- package/dist/storage/registry.js.map +1 -1
- package/dist/storage/usage-stats.d.ts +6 -0
- package/dist/storage/usage-stats.d.ts.map +1 -1
- package/dist/storage/usage-stats.js +59 -11
- package/dist/storage/usage-stats.js.map +1 -1
- package/dist/storage/usage-tracker.d.ts +3 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -1
- package/dist/storage/usage-tracker.js +50 -132
- package/dist/storage/usage-tracker.js.map +1 -1
- package/dist/storage/watcher.d.ts +2 -1
- package/dist/storage/watcher.d.ts.map +1 -1
- package/dist/storage/watcher.js +16 -16
- package/dist/storage/watcher.js.map +1 -1
- package/dist/tools/ast-query-tools.d.ts +29 -0
- package/dist/tools/ast-query-tools.d.ts.map +1 -0
- package/dist/tools/ast-query-tools.js +110 -0
- package/dist/tools/ast-query-tools.js.map +1 -0
- package/dist/tools/boundary-tools.d.ts +31 -0
- package/dist/tools/boundary-tools.d.ts.map +1 -0
- package/dist/tools/boundary-tools.js +62 -0
- package/dist/tools/boundary-tools.js.map +1 -0
- package/dist/tools/clone-tools.d.ts +35 -0
- package/dist/tools/clone-tools.d.ts.map +1 -0
- package/dist/tools/clone-tools.js +181 -0
- package/dist/tools/clone-tools.js.map +1 -0
- package/dist/tools/community-tools.d.ts +23 -0
- package/dist/tools/community-tools.d.ts.map +1 -0
- package/dist/tools/community-tools.js +297 -0
- package/dist/tools/community-tools.js.map +1 -0
- package/dist/tools/complexity-tools.d.ts +34 -0
- package/dist/tools/complexity-tools.d.ts.map +1 -0
- package/dist/tools/complexity-tools.js +135 -0
- package/dist/tools/complexity-tools.js.map +1 -0
- package/dist/tools/context-tools.d.ts +44 -3
- package/dist/tools/context-tools.d.ts.map +1 -1
- package/dist/tools/context-tools.js +329 -99
- package/dist/tools/context-tools.js.map +1 -1
- package/dist/tools/conversation-tools.d.ts +107 -0
- package/dist/tools/conversation-tools.d.ts.map +1 -0
- package/dist/tools/conversation-tools.js +419 -0
- package/dist/tools/conversation-tools.js.map +1 -0
- package/dist/tools/coordinator-tools.d.ts +73 -0
- package/dist/tools/coordinator-tools.d.ts.map +1 -0
- package/dist/tools/coordinator-tools.js +153 -0
- package/dist/tools/coordinator-tools.js.map +1 -0
- package/dist/tools/cross-repo-tools.d.ts +43 -0
- package/dist/tools/cross-repo-tools.d.ts.map +1 -0
- package/dist/tools/cross-repo-tools.js +55 -0
- package/dist/tools/cross-repo-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +4 -1
- package/dist/tools/diff-tools.d.ts.map +1 -1
- package/dist/tools/diff-tools.js +23 -5
- package/dist/tools/diff-tools.js.map +1 -1
- package/dist/tools/frequency-tools.d.ts +46 -0
- package/dist/tools/frequency-tools.d.ts.map +1 -0
- package/dist/tools/frequency-tools.js +184 -0
- package/dist/tools/frequency-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts.map +1 -1
- package/dist/tools/generate-tools.js +13 -2
- package/dist/tools/generate-tools.js.map +1 -1
- package/dist/tools/graph-tools.d.ts +44 -11
- package/dist/tools/graph-tools.d.ts.map +1 -1
- package/dist/tools/graph-tools.js +147 -104
- package/dist/tools/graph-tools.js.map +1 -1
- package/dist/tools/hotspot-tools.d.ts +24 -0
- package/dist/tools/hotspot-tools.d.ts.map +1 -0
- package/dist/tools/hotspot-tools.js +122 -0
- package/dist/tools/hotspot-tools.js.map +1 -0
- package/dist/tools/impact-tools.d.ts +13 -0
- package/dist/tools/impact-tools.d.ts.map +1 -0
- package/dist/tools/impact-tools.js +238 -0
- package/dist/tools/impact-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +44 -3
- package/dist/tools/index-tools.d.ts.map +1 -1
- package/dist/tools/index-tools.js +530 -222
- package/dist/tools/index-tools.js.map +1 -1
- package/dist/tools/memory-tools.d.ts +35 -0
- package/dist/tools/memory-tools.d.ts.map +1 -0
- package/dist/tools/memory-tools.js +229 -0
- package/dist/tools/memory-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +24 -13
- package/dist/tools/outline-tools.d.ts.map +1 -1
- package/dist/tools/outline-tools.js +113 -87
- package/dist/tools/outline-tools.js.map +1 -1
- package/dist/tools/pattern-tools.d.ts +32 -0
- package/dist/tools/pattern-tools.d.ts.map +1 -0
- package/dist/tools/pattern-tools.js +116 -0
- package/dist/tools/pattern-tools.js.map +1 -0
- package/dist/tools/report-tools.d.ts +5 -0
- package/dist/tools/report-tools.d.ts.map +1 -0
- package/dist/tools/report-tools.js +167 -0
- package/dist/tools/report-tools.js.map +1 -0
- package/dist/tools/review-diff-tools.d.ts +148 -0
- package/dist/tools/review-diff-tools.d.ts.map +1 -0
- package/dist/tools/review-diff-tools.js +852 -0
- package/dist/tools/review-diff-tools.js.map +1 -0
- package/dist/tools/route-tools.d.ts +32 -0
- package/dist/tools/route-tools.d.ts.map +1 -0
- package/dist/tools/route-tools.js +276 -0
- package/dist/tools/route-tools.js.map +1 -0
- package/dist/tools/search-ranker.d.ts +5 -0
- package/dist/tools/search-ranker.d.ts.map +1 -0
- package/dist/tools/search-ranker.js +142 -0
- package/dist/tools/search-ranker.js.map +1 -0
- package/dist/tools/search-tools.d.ts +24 -1
- package/dist/tools/search-tools.d.ts.map +1 -1
- package/dist/tools/search-tools.js +459 -225
- package/dist/tools/search-tools.js.map +1 -1
- package/dist/tools/secret-tools.d.ts +104 -0
- package/dist/tools/secret-tools.d.ts.map +1 -0
- package/dist/tools/secret-tools.js +410 -0
- package/dist/tools/secret-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +90 -2
- package/dist/tools/symbol-tools.d.ts.map +1 -1
- package/dist/tools/symbol-tools.js +576 -42
- package/dist/tools/symbol-tools.js.map +1 -1
- package/dist/types.d.ts +34 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/framework-detect.d.ts +5 -0
- package/dist/utils/framework-detect.d.ts.map +1 -0
- package/dist/utils/framework-detect.js +36 -0
- package/dist/utils/framework-detect.js.map +1 -0
- package/dist/utils/glob.d.ts +19 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +74 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/import-graph.d.ts +29 -0
- package/dist/utils/import-graph.d.ts.map +1 -0
- package/dist/utils/import-graph.js +125 -0
- package/dist/utils/import-graph.js.map +1 -0
- package/dist/utils/test-file.d.ts.map +1 -1
- package/dist/utils/test-file.js +1 -0
- package/dist/utils/test-file.js.map +1 -1
- package/dist/utils/walk.d.ts +45 -0
- package/dist/utils/walk.d.ts.map +1 -0
- package/dist/utils/walk.js +87 -0
- package/dist/utils/walk.js.map +1 -0
- package/package.json +12 -5
- package/rules/codesift.md +187 -0
- package/rules/codesift.mdc +192 -0
- package/rules/codex.md +187 -0
- package/rules/gemini.md +187 -0
|
@@ -1,81 +1,82 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { readFile, stat, unlink, rm, mkdir as mkdirAsync } from "node:fs/promises";
|
|
2
2
|
import { join, relative, extname, resolve, basename } from "node:path";
|
|
3
3
|
import { execFileSync } from "node:child_process";
|
|
4
|
+
import { createHash } from "node:crypto";
|
|
4
5
|
import { parseFile } from "../parser/parser-manager.js";
|
|
5
|
-
import { extractSymbols, extractMarkdownSymbols, extractPrismaSymbols } from "../parser/symbol-extractor.js";
|
|
6
|
+
import { extractSymbols, extractMarkdownSymbols, extractPrismaSymbols, extractAstroSymbols, extractConversationSymbols } from "../parser/symbol-extractor.js";
|
|
6
7
|
import { getLanguageForExtension } from "../parser/parser-manager.js";
|
|
7
|
-
import { saveIndex, loadIndex, getIndexPath, saveIncremental } from "../storage/index-store.js";
|
|
8
|
-
import { registerRepo, listRepos as listRegistryRepos, getRepo, removeRepo, getRepoName } from "../storage/registry.js";
|
|
8
|
+
import { saveIndex, loadIndex, getIndexPath, saveIncremental, removeFileFromIndex } from "../storage/index-store.js";
|
|
9
|
+
import { registerRepo, listRepos as listRegistryRepos, getRepo, removeRepo, getRepoName, updateRepoMeta } from "../storage/registry.js";
|
|
9
10
|
import { startWatcher, stopWatcher } from "../storage/watcher.js";
|
|
10
11
|
import { buildBM25Index } from "../search/bm25.js";
|
|
11
12
|
import { buildSymbolText, createEmbeddingProvider } from "../search/semantic.js";
|
|
12
13
|
import { loadEmbeddings, saveEmbeddings, saveEmbeddingMeta, getEmbeddingPath, getEmbeddingMetaPath, batchEmbed } from "../storage/embedding-store.js";
|
|
13
14
|
import { saveChunks, saveChunkEmbeddings, loadChunkEmbeddings, getChunkPath, getChunkEmbeddingPath } from "../storage/chunk-store.js";
|
|
14
|
-
import { chunkFile } from "../search/chunker.js";
|
|
15
|
+
import { chunkFile, chunkBySymbols } from "../search/chunker.js";
|
|
15
16
|
import { loadConfig } from "../config.js";
|
|
16
17
|
import { validateGitUrl, validateGitRef } from "../utils/git-validation.js";
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
".codesift", ".next", "__pycache__", ".pytest_cache",
|
|
21
|
-
".venv", "venv", ".tox", ".mypy_cache", ".turbo",
|
|
22
|
-
"generated", "audit-results", ".backup", "jscpd-report",
|
|
23
|
-
]);
|
|
24
|
-
const MAX_FILE_SIZE = 1_000_000; // 1MB — skip giant files
|
|
18
|
+
import { walkDirectory } from "../utils/walk.js";
|
|
19
|
+
import { onFileChanged as scanOnChanged, onFileDeleted as scanOnDeleted, scanFileForSecrets } from "./secret-tools.js";
|
|
20
|
+
import { getGraphPath } from "../storage/graph-store.js";
|
|
25
21
|
const PARSE_CONCURRENCY = 8;
|
|
22
|
+
const CHUNK_EMBEDDING_BATCH_SIZE = 96;
|
|
23
|
+
const GIT_CLONE_TIMEOUT_MS = 120_000;
|
|
24
|
+
const GIT_CHECKOUT_TIMEOUT_MS = 30_000;
|
|
25
|
+
const GIT_PULL_TIMEOUT_MS = 60_000;
|
|
26
26
|
// Active watchers and in-memory indexes keyed by repo name
|
|
27
27
|
const activeWatchers = new Map();
|
|
28
28
|
const bm25Indexes = new Map();
|
|
29
|
+
const codeIndexes = new Map();
|
|
29
30
|
const embeddingCaches = new Map();
|
|
30
31
|
/**
|
|
31
|
-
*
|
|
32
|
-
*
|
|
32
|
+
* Parse a single file and extract its symbols + metadata.
|
|
33
|
+
* Returns null if the file cannot be parsed.
|
|
33
34
|
*/
|
|
34
|
-
async function
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
35
|
+
async function parseOneFile(filePath, repoRoot, repoName) {
|
|
36
|
+
try {
|
|
37
|
+
const stat = await import("node:fs/promises").then((fs) => fs.stat(filePath));
|
|
38
|
+
const source = await readFile(filePath, "utf-8");
|
|
39
|
+
const relPath = relative(repoRoot, filePath);
|
|
40
|
+
const ext = extname(filePath);
|
|
41
|
+
const baseName = filePath.split("/").pop() ?? "";
|
|
42
|
+
const language = getLanguageForExtension(ext)
|
|
43
|
+
?? (baseName.startsWith(".env") ? "config" : "unknown");
|
|
44
|
+
let symbols;
|
|
45
|
+
if (language === "markdown") {
|
|
46
|
+
symbols = extractMarkdownSymbols(source, relPath, repoName);
|
|
40
47
|
}
|
|
41
|
-
|
|
42
|
-
|
|
48
|
+
else if (language === "prisma") {
|
|
49
|
+
symbols = extractPrismaSymbols(source, relPath, repoName);
|
|
43
50
|
}
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
else if (entry.isFile()) {
|
|
53
|
-
const ext = extname(entry.name);
|
|
54
|
-
const language = getLanguageForExtension(ext);
|
|
55
|
-
if (!language)
|
|
56
|
-
continue;
|
|
57
|
-
// Filter by include paths if specified
|
|
58
|
-
if (includePaths && includePaths.length > 0) {
|
|
59
|
-
const relPath = relative(rootPath, fullPath);
|
|
60
|
-
const matches = includePaths.some((p) => relPath.startsWith(p));
|
|
61
|
-
if (!matches)
|
|
62
|
-
continue;
|
|
63
|
-
}
|
|
64
|
-
// Skip files that are too large
|
|
65
|
-
try {
|
|
66
|
-
const fileStat = await stat(fullPath);
|
|
67
|
-
if (fileStat.size > MAX_FILE_SIZE)
|
|
68
|
-
continue;
|
|
69
|
-
}
|
|
70
|
-
catch {
|
|
71
|
-
continue;
|
|
72
|
-
}
|
|
73
|
-
files.push(fullPath);
|
|
74
|
-
}
|
|
51
|
+
else if (language === "astro") {
|
|
52
|
+
symbols = extractAstroSymbols(source, relPath, repoName);
|
|
53
|
+
}
|
|
54
|
+
else if (language === "conversation") {
|
|
55
|
+
symbols = extractConversationSymbols(source, relPath, repoName);
|
|
56
|
+
}
|
|
57
|
+
else if (language === "config") {
|
|
58
|
+
symbols = [];
|
|
75
59
|
}
|
|
60
|
+
else {
|
|
61
|
+
const tree = await parseFile(filePath, source);
|
|
62
|
+
if (!tree)
|
|
63
|
+
return null;
|
|
64
|
+
symbols = extractSymbols(tree, relPath, source, repoName, language);
|
|
65
|
+
}
|
|
66
|
+
const entry = {
|
|
67
|
+
path: relPath,
|
|
68
|
+
language,
|
|
69
|
+
symbol_count: symbols.length,
|
|
70
|
+
last_modified: Date.now(),
|
|
71
|
+
mtime_ms: Math.round(stat.mtimeMs),
|
|
72
|
+
};
|
|
73
|
+
return { symbols, entry };
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
77
|
+
console.warn(`[codesift] Failed to parse ${relative(repoRoot, filePath)}: ${message}`);
|
|
78
|
+
return null;
|
|
76
79
|
}
|
|
77
|
-
await walk(rootPath);
|
|
78
|
-
return files;
|
|
79
80
|
}
|
|
80
81
|
/**
|
|
81
82
|
* Parse files in parallel batches.
|
|
@@ -83,41 +84,9 @@ async function walkDirectory(rootPath, includePaths) {
|
|
|
83
84
|
async function parseFiles(files, repoRoot, repoName) {
|
|
84
85
|
const allSymbols = [];
|
|
85
86
|
const fileEntries = [];
|
|
86
|
-
// Process in batches for controlled concurrency
|
|
87
87
|
for (let i = 0; i < files.length; i += PARSE_CONCURRENCY) {
|
|
88
88
|
const batch = files.slice(i, i + PARSE_CONCURRENCY);
|
|
89
|
-
const results = await Promise.all(batch.map(
|
|
90
|
-
try {
|
|
91
|
-
const source = await readFile(filePath, "utf-8");
|
|
92
|
-
const relPath = relative(repoRoot, filePath);
|
|
93
|
-
const ext = extname(filePath);
|
|
94
|
-
const language = getLanguageForExtension(ext) ?? "unknown";
|
|
95
|
-
let symbols;
|
|
96
|
-
// Markdown and Prisma use custom parsers (no tree-sitter grammar)
|
|
97
|
-
if (language === "markdown") {
|
|
98
|
-
symbols = extractMarkdownSymbols(source, relPath, repoName);
|
|
99
|
-
}
|
|
100
|
-
else if (language === "prisma") {
|
|
101
|
-
symbols = extractPrismaSymbols(source, relPath, repoName);
|
|
102
|
-
}
|
|
103
|
-
else {
|
|
104
|
-
const tree = await parseFile(filePath, source);
|
|
105
|
-
if (!tree)
|
|
106
|
-
return null;
|
|
107
|
-
symbols = extractSymbols(tree, relPath, source, repoName, language);
|
|
108
|
-
}
|
|
109
|
-
const entry = {
|
|
110
|
-
path: relPath,
|
|
111
|
-
language,
|
|
112
|
-
symbol_count: symbols.length,
|
|
113
|
-
last_modified: Date.now(),
|
|
114
|
-
};
|
|
115
|
-
return { symbols, entry };
|
|
116
|
-
}
|
|
117
|
-
catch {
|
|
118
|
-
return null;
|
|
119
|
-
}
|
|
120
|
-
}));
|
|
89
|
+
const results = await Promise.all(batch.map((filePath) => parseOneFile(filePath, repoRoot, repoName)));
|
|
121
90
|
for (const result of results) {
|
|
122
91
|
if (result) {
|
|
123
92
|
allSymbols.push(...result.symbols);
|
|
@@ -127,35 +96,241 @@ async function parseFiles(files, repoRoot, repoName) {
|
|
|
127
96
|
}
|
|
128
97
|
return { symbols: allSymbols, fileEntries };
|
|
129
98
|
}
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// Dirty propagation — mark caller files stale when a callee signature changes
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
/**
|
|
103
|
+
* Compute a hash of a symbol's public interface (name + kind + signature).
|
|
104
|
+
* Body changes don't trigger propagation — only signature changes.
|
|
105
|
+
*/
|
|
106
|
+
function computeSignatureHash(sym) {
|
|
107
|
+
const key = `${sym.name}|${sym.kind}|${sym.signature ?? ""}`;
|
|
108
|
+
return createHash("sha256").update(key).digest("hex").slice(0, 16);
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Detect signature changes and mark caller files as stale.
|
|
112
|
+
* Returns the set of files marked stale.
|
|
113
|
+
*/
|
|
114
|
+
function propagateDirtySignatures(oldSymbols, newSymbols, fileEntries) {
|
|
115
|
+
// Build old signature hashes
|
|
116
|
+
const oldHashes = new Map();
|
|
117
|
+
for (const sym of oldSymbols) {
|
|
118
|
+
oldHashes.set(sym.id, computeSignatureHash(sym));
|
|
119
|
+
}
|
|
120
|
+
// Find symbols with changed signatures
|
|
121
|
+
const changedSymbolFiles = new Set();
|
|
122
|
+
for (const sym of newSymbols) {
|
|
123
|
+
const oldHash = oldHashes.get(sym.id);
|
|
124
|
+
if (oldHash && oldHash !== computeSignatureHash(sym)) {
|
|
125
|
+
changedSymbolFiles.add(sym.file);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (changedSymbolFiles.size === 0)
|
|
129
|
+
return new Set();
|
|
130
|
+
// Find files that import from changed files (1 level of callers)
|
|
131
|
+
// Use a simple heuristic: check if any symbol source mentions a changed file's name
|
|
132
|
+
const changedBasenames = new Set();
|
|
133
|
+
for (const f of changedSymbolFiles) {
|
|
134
|
+
const base = f.split("/").pop()?.replace(/\.\w+$/, "");
|
|
135
|
+
if (base)
|
|
136
|
+
changedBasenames.add(base);
|
|
137
|
+
}
|
|
138
|
+
const staleFiles = new Set();
|
|
139
|
+
for (const sym of newSymbols) {
|
|
140
|
+
if (changedSymbolFiles.has(sym.file))
|
|
141
|
+
continue; // Don't mark the changed file itself
|
|
142
|
+
if (!sym.source)
|
|
143
|
+
continue;
|
|
144
|
+
for (const base of changedBasenames) {
|
|
145
|
+
if (sym.source.includes(base)) {
|
|
146
|
+
staleFiles.add(sym.file);
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
// Mark stale in file entries (clear mtime so next index re-parses them)
|
|
152
|
+
for (const entry of fileEntries) {
|
|
153
|
+
if (staleFiles.has(entry.path)) {
|
|
154
|
+
entry.stale = true;
|
|
155
|
+
delete entry.mtime_ms; // Force re-parse on next indexFolder
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
return staleFiles;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Embed symbols using the configured embedding provider.
|
|
162
|
+
* Non-fatal — BM25 search still works if embedding fails.
|
|
163
|
+
*/
|
|
164
|
+
export async function embedSymbols(symbols, indexPath, repoName, config) {
|
|
165
|
+
if (!config.embeddingProvider)
|
|
166
|
+
return;
|
|
167
|
+
const embeddingPath = getEmbeddingPath(indexPath);
|
|
168
|
+
const metaPath = getEmbeddingMetaPath(indexPath);
|
|
169
|
+
try {
|
|
170
|
+
const provider = createEmbeddingProvider(config.embeddingProvider, config);
|
|
171
|
+
const symbolTexts = new Map(symbols.map((s) => [s.id, buildSymbolText(s)]));
|
|
172
|
+
const existing = await loadEmbeddings(embeddingPath);
|
|
173
|
+
const embeddings = await batchEmbed(symbolTexts, existing, provider.embed.bind(provider), config.embeddingBatchSize, repoName);
|
|
174
|
+
await saveEmbeddings(embeddingPath, embeddings);
|
|
175
|
+
await saveEmbeddingMeta(metaPath, {
|
|
176
|
+
model: provider.model,
|
|
177
|
+
provider: config.embeddingProvider,
|
|
178
|
+
dimensions: provider.dimensions,
|
|
179
|
+
symbol_count: embeddings.size,
|
|
180
|
+
updated_at: Date.now(),
|
|
181
|
+
});
|
|
182
|
+
embeddingCaches.set(repoName, embeddings);
|
|
183
|
+
}
|
|
184
|
+
catch (err) {
|
|
185
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
186
|
+
console.error(`[codesift] Embedding failed for ${repoName}: ${message}`);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Read files in parallel batches and split each into chunks.
|
|
191
|
+
*/
|
|
192
|
+
async function readAndChunkFiles(fileEntries, rootPath, repoName, symbols) {
|
|
193
|
+
const allChunks = [];
|
|
194
|
+
for (let i = 0; i < fileEntries.length; i += PARSE_CONCURRENCY) {
|
|
195
|
+
const batch = fileEntries.slice(i, i + PARSE_CONCURRENCY);
|
|
196
|
+
const batchResults = await Promise.all(batch.map(async (entry) => {
|
|
197
|
+
const fullPath = join(rootPath, entry.path);
|
|
198
|
+
try {
|
|
199
|
+
const content = await readFile(fullPath, "utf-8");
|
|
200
|
+
if (symbols) {
|
|
201
|
+
const fileSymbols = symbols
|
|
202
|
+
.filter((s) => s.file === entry.path)
|
|
203
|
+
.map((s) => ({ name: s.name, start_line: s.start_line, end_line: s.end_line }));
|
|
204
|
+
return chunkBySymbols(entry.path, content, repoName, fileSymbols);
|
|
205
|
+
}
|
|
206
|
+
return chunkFile(entry.path, content, repoName);
|
|
207
|
+
}
|
|
208
|
+
catch (err) {
|
|
209
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
210
|
+
console.warn(`[codesift] Failed to read ${entry.path} for chunking: ${message}`);
|
|
211
|
+
return [];
|
|
212
|
+
}
|
|
213
|
+
}));
|
|
214
|
+
for (const chunks of batchResults) {
|
|
215
|
+
allChunks.push(...chunks);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
return allChunks;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Embed file chunks using the configured embedding provider.
|
|
222
|
+
* Non-fatal — symbol-level and BM25 search still work if this fails.
|
|
223
|
+
*/
|
|
224
|
+
async function embedChunks(fileEntries, rootPath, repoName, indexPath, config, symbols) {
|
|
225
|
+
if (!config.embeddingProvider)
|
|
226
|
+
return;
|
|
227
|
+
const chunkPath = getChunkPath(indexPath);
|
|
228
|
+
const chunkEmbeddingPath = getChunkEmbeddingPath(indexPath);
|
|
229
|
+
try {
|
|
230
|
+
const provider = createEmbeddingProvider(config.embeddingProvider, config);
|
|
231
|
+
const existingChunkEmbeddings = await loadChunkEmbeddings(chunkEmbeddingPath) ?? new Map();
|
|
232
|
+
const allChunks = await readAndChunkFiles(fileEntries, rootPath, repoName, symbols);
|
|
233
|
+
if (allChunks.length > 0) {
|
|
234
|
+
const chunkTexts = new Map(allChunks.map((c) => [c.id, c.text]));
|
|
235
|
+
const chunkEmbeddings = await batchEmbed(chunkTexts, existingChunkEmbeddings, provider.embed.bind(provider), CHUNK_EMBEDDING_BATCH_SIZE, `${repoName}:chunks`);
|
|
236
|
+
await saveChunks(chunkPath, allChunks);
|
|
237
|
+
await saveChunkEmbeddings(chunkEmbeddingPath, chunkEmbeddings);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
242
|
+
console.error(`[codesift] Chunk embedding failed for ${repoName}: ${message}`);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
130
245
|
export async function indexFolder(folderPath, options) {
|
|
246
|
+
if (!folderPath || typeof folderPath !== "string") {
|
|
247
|
+
throw new Error("folderPath is required and must be a non-empty string");
|
|
248
|
+
}
|
|
131
249
|
const config = loadConfig();
|
|
132
250
|
const startTime = Date.now();
|
|
133
251
|
const rootPath = resolve(folderPath);
|
|
134
252
|
const repoName = getRepoName(rootPath);
|
|
135
253
|
const indexPath = getIndexPath(config.dataDir, rootPath);
|
|
136
|
-
//
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
};
|
|
254
|
+
// Walk directory and collect parseable files
|
|
255
|
+
const files = await walkDirectory(rootPath, {
|
|
256
|
+
includePaths: options?.include_paths,
|
|
257
|
+
fileFilter: (ext, name) => !!getLanguageForExtension(ext) || (name?.startsWith(".env") ?? false),
|
|
258
|
+
});
|
|
259
|
+
// mtime-based incremental: skip files unchanged since last index
|
|
260
|
+
const existing = await loadIndex(indexPath);
|
|
261
|
+
const mtimeMap = new Map();
|
|
262
|
+
if (existing) {
|
|
263
|
+
for (const f of existing.files) {
|
|
264
|
+
if (f.mtime_ms)
|
|
265
|
+
mtimeMap.set(f.path, f.mtime_ms);
|
|
149
266
|
}
|
|
150
267
|
}
|
|
151
|
-
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
268
|
+
const filesToParse = [];
|
|
269
|
+
const keptSymbols = [];
|
|
270
|
+
const keptEntries = [];
|
|
271
|
+
if (mtimeMap.size > 0) {
|
|
272
|
+
const { stat } = await import("node:fs/promises");
|
|
273
|
+
for (const filePath of files) {
|
|
274
|
+
const relPath = relative(rootPath, filePath);
|
|
275
|
+
const prevMtime = mtimeMap.get(relPath);
|
|
276
|
+
if (prevMtime !== undefined) {
|
|
277
|
+
const fileEntry = existing.files.find((f) => f.path === relPath);
|
|
278
|
+
// Force re-parse if file is marked stale (callee signature changed)
|
|
279
|
+
if (fileEntry?.stale) {
|
|
280
|
+
filesToParse.push(filePath);
|
|
281
|
+
continue;
|
|
282
|
+
}
|
|
283
|
+
try {
|
|
284
|
+
const st = await stat(filePath);
|
|
285
|
+
if (Math.round(st.mtimeMs) === prevMtime) {
|
|
286
|
+
// File unchanged — keep existing symbols
|
|
287
|
+
const fileSymbols = existing.symbols.filter((s) => s.file === relPath);
|
|
288
|
+
if (fileEntry) {
|
|
289
|
+
keptSymbols.push(...fileSymbols);
|
|
290
|
+
keptEntries.push(fileEntry);
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
catch { /* file may have been deleted — reparse */ }
|
|
296
|
+
}
|
|
297
|
+
filesToParse.push(filePath);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
else {
|
|
301
|
+
filesToParse.push(...files);
|
|
302
|
+
}
|
|
303
|
+
// Parse only changed/new files
|
|
304
|
+
const { symbols: parsedSymbols, fileEntries: parsedEntries } = await parseFiles(filesToParse, rootPath, repoName);
|
|
305
|
+
const symbols = [...keptSymbols, ...parsedSymbols];
|
|
306
|
+
const fileEntries = [...keptEntries, ...parsedEntries];
|
|
307
|
+
// Dirty propagation: detect signature changes and mark caller files stale
|
|
308
|
+
if (existing && filesToParse.length > 0 && filesToParse.length < files.length) {
|
|
309
|
+
const staleFiles = propagateDirtySignatures(existing.symbols, symbols, fileEntries);
|
|
310
|
+
if (staleFiles.size > 0) {
|
|
311
|
+
console.error(`[codesift] Dirty propagation: ${staleFiles.size} caller files marked stale`);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
// Build and cache BM25 index; invalidate code index cache
|
|
156
315
|
const bm25 = buildBM25Index(symbols);
|
|
157
316
|
bm25Indexes.set(repoName, bm25);
|
|
158
|
-
|
|
317
|
+
codeIndexes.delete(repoName);
|
|
318
|
+
// Sanity check: don't overwrite a complete index with a partial one
|
|
319
|
+
// (WASM crash or walk failure can produce truncated results)
|
|
320
|
+
const DROP_THRESHOLD = 0.5; // Reject if new index has <50% of old file count
|
|
321
|
+
if (existing && fileEntries.length < existing.file_count * DROP_THRESHOLD && existing.file_count > 50) {
|
|
322
|
+
console.error(`[codesift] SANITY CHECK FAILED for ${repoName}: ` +
|
|
323
|
+
`new index has ${fileEntries.length} files vs ${existing.file_count} previously. ` +
|
|
324
|
+
`Keeping old index. Use invalidate_cache + index_folder to force reindex.`);
|
|
325
|
+
return {
|
|
326
|
+
repo: repoName,
|
|
327
|
+
root: rootPath,
|
|
328
|
+
file_count: existing.file_count,
|
|
329
|
+
symbol_count: existing.symbol_count,
|
|
330
|
+
duration_ms: Date.now() - startTime,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
// Build and save code index
|
|
159
334
|
const codeIndex = {
|
|
160
335
|
repo: repoName,
|
|
161
336
|
root: rootPath,
|
|
@@ -166,67 +341,15 @@ export async function indexFolder(folderPath, options) {
|
|
|
166
341
|
symbol_count: symbols.length,
|
|
167
342
|
file_count: fileEntries.length,
|
|
168
343
|
};
|
|
169
|
-
// Save index to disk
|
|
170
344
|
await saveIndex(indexPath, codeIndex);
|
|
171
|
-
// Embed symbols
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
const embeddings = await batchEmbed(symbolTexts, existing, provider.embed.bind(provider), config.embeddingBatchSize);
|
|
180
|
-
await saveEmbeddings(embeddingPath, embeddings);
|
|
181
|
-
await saveEmbeddingMeta(metaPath, {
|
|
182
|
-
model: provider.model,
|
|
183
|
-
provider: config.embeddingProvider,
|
|
184
|
-
dimensions: provider.dimensions,
|
|
185
|
-
symbol_count: embeddings.size,
|
|
186
|
-
updated_at: Date.now(),
|
|
187
|
-
});
|
|
188
|
-
embeddingCaches.set(repoName, embeddings);
|
|
189
|
-
}
|
|
190
|
-
catch (err) {
|
|
191
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
192
|
-
console.error(`[codesift] Embedding failed for ${repoName}: ${message}`);
|
|
193
|
-
// Non-fatal — BM25 search still works
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
// Embed file chunks if an embedding provider is configured (non-fatal if it fails)
|
|
197
|
-
if (config.embeddingProvider) {
|
|
198
|
-
const chunkPath = getChunkPath(indexPath);
|
|
199
|
-
const chunkEmbeddingPath = getChunkEmbeddingPath(indexPath);
|
|
200
|
-
try {
|
|
201
|
-
const provider = createEmbeddingProvider(config.embeddingProvider, config);
|
|
202
|
-
// Build chunks for all indexed files
|
|
203
|
-
const allChunks = [];
|
|
204
|
-
for (const entry of fileEntries) {
|
|
205
|
-
const fullPath = join(rootPath, entry.path);
|
|
206
|
-
try {
|
|
207
|
-
const content = await readFile(fullPath, "utf-8");
|
|
208
|
-
const fileChunks = chunkFile(entry.path, content, repoName);
|
|
209
|
-
allChunks.push(...fileChunks);
|
|
210
|
-
}
|
|
211
|
-
catch {
|
|
212
|
-
// Skip unreadable files
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
if (allChunks.length > 0) {
|
|
216
|
-
// Load existing chunk embeddings to avoid re-embedding unchanged chunks
|
|
217
|
-
const existingChunkEmbeddings = await loadChunkEmbeddings(chunkEmbeddingPath) ?? new Map();
|
|
218
|
-
const chunkTexts = new Map(allChunks.map((c) => [c.id, c.text]));
|
|
219
|
-
const chunkEmbeddings = await batchEmbed(chunkTexts, existingChunkEmbeddings, provider.embed.bind(provider), 96);
|
|
220
|
-
await saveChunks(chunkPath, allChunks);
|
|
221
|
-
await saveChunkEmbeddings(chunkEmbeddingPath, chunkEmbeddings);
|
|
222
|
-
}
|
|
223
|
-
}
|
|
224
|
-
catch (err) {
|
|
225
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
226
|
-
console.error(`[codesift] Chunk embedding failed for ${repoName}: ${message}`);
|
|
227
|
-
// Non-fatal — symbol-level and BM25 search still work
|
|
228
|
-
}
|
|
229
|
-
}
|
|
345
|
+
// Embed symbols and chunks in background (non-fatal, don't block MCP response)
|
|
346
|
+
// Large repos (71K symbols) can take minutes — fire-and-forget to prevent timeout
|
|
347
|
+
embedSymbols(symbols, indexPath, repoName, config)
|
|
348
|
+
.then(() => embedChunks(fileEntries, rootPath, repoName, indexPath, config, symbols))
|
|
349
|
+
.catch((err) => {
|
|
350
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
351
|
+
console.error(`[codesift] Background embedding failed for ${repoName}: ${msg}`);
|
|
352
|
+
});
|
|
230
353
|
// Register in the global registry
|
|
231
354
|
const meta = {
|
|
232
355
|
name: repoName,
|
|
@@ -237,20 +360,19 @@ export async function indexFolder(folderPath, options) {
|
|
|
237
360
|
updated_at: Date.now(),
|
|
238
361
|
};
|
|
239
362
|
await registerRepo(config.registryPath, meta);
|
|
363
|
+
// Capture git HEAD for auto-refresh tracking
|
|
364
|
+
try {
|
|
365
|
+
const head = execFileSync("git", ["rev-parse", "HEAD"], {
|
|
366
|
+
cwd: rootPath, encoding: "utf-8", timeout: 5000,
|
|
367
|
+
}).trim();
|
|
368
|
+
await updateRepoMeta(config.registryPath, repoName, { last_git_commit: head });
|
|
369
|
+
}
|
|
370
|
+
catch {
|
|
371
|
+
// Not a git repo — skip
|
|
372
|
+
}
|
|
240
373
|
// Start file watcher for incremental updates (unless disabled)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
const existingWatcher = activeWatchers.get(repoName);
|
|
244
|
-
if (existingWatcher) {
|
|
245
|
-
await stopWatcher(existingWatcher);
|
|
246
|
-
}
|
|
247
|
-
const watcher = startWatcher(rootPath, (changedFile) => {
|
|
248
|
-
handleFileChange(rootPath, repoName, indexPath, changedFile).catch((err) => {
|
|
249
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
250
|
-
console.error(`[codesift] Watcher error for ${changedFile}: ${message}`);
|
|
251
|
-
});
|
|
252
|
-
});
|
|
253
|
-
activeWatchers.set(repoName, watcher);
|
|
374
|
+
if (options?.watch !== false) {
|
|
375
|
+
await setupWatcher(rootPath, repoName, indexPath);
|
|
254
376
|
}
|
|
255
377
|
return {
|
|
256
378
|
repo: repoName,
|
|
@@ -294,7 +416,7 @@ export async function indexRepo(url, options) {
|
|
|
294
416
|
if (options?.branch)
|
|
295
417
|
args.push("--branch", options.branch);
|
|
296
418
|
args.push("--", url, cloneTarget);
|
|
297
|
-
execFileSync("git", args, { stdio: "pipe", timeout:
|
|
419
|
+
execFileSync("git", args, { stdio: "pipe", timeout: GIT_CLONE_TIMEOUT_MS });
|
|
298
420
|
}
|
|
299
421
|
else {
|
|
300
422
|
// Pull latest changes
|
|
@@ -302,22 +424,24 @@ export async function indexRepo(url, options) {
|
|
|
302
424
|
if (options?.branch) {
|
|
303
425
|
execFileSync("git", ["-C", cloneTarget, "checkout", options.branch], {
|
|
304
426
|
stdio: "pipe",
|
|
305
|
-
timeout:
|
|
427
|
+
timeout: GIT_CHECKOUT_TIMEOUT_MS,
|
|
306
428
|
});
|
|
307
429
|
}
|
|
308
430
|
execFileSync("git", ["-C", cloneTarget, "pull", "--ff-only"], {
|
|
309
431
|
stdio: "pipe",
|
|
310
|
-
timeout:
|
|
432
|
+
timeout: GIT_PULL_TIMEOUT_MS,
|
|
311
433
|
});
|
|
312
434
|
}
|
|
313
|
-
catch {
|
|
435
|
+
catch (err) {
|
|
314
436
|
// Pull may fail if detached HEAD — force fresh clone
|
|
437
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
438
|
+
console.warn(`[codesift] Git pull failed for ${urlBasename}, re-cloning: ${message}`);
|
|
315
439
|
await rm(cloneTarget, { recursive: true, force: true });
|
|
316
440
|
const args = ["clone", "--depth", "1"];
|
|
317
441
|
if (options?.branch)
|
|
318
442
|
args.push("--branch", options.branch);
|
|
319
443
|
args.push("--", url, cloneTarget);
|
|
320
|
-
execFileSync("git", args, { stdio: "pipe", timeout:
|
|
444
|
+
execFileSync("git", args, { stdio: "pipe", timeout: GIT_CLONE_TIMEOUT_MS });
|
|
321
445
|
}
|
|
322
446
|
}
|
|
323
447
|
// Index the cloned repo (no watcher for remote repos)
|
|
@@ -326,50 +450,72 @@ export async function indexRepo(url, options) {
|
|
|
326
450
|
watch: false,
|
|
327
451
|
});
|
|
328
452
|
}
|
|
453
|
+
/**
|
|
454
|
+
* Replace or create a file watcher for incremental index updates.
|
|
455
|
+
*/
|
|
456
|
+
async function setupWatcher(rootPath, repoName, indexPath) {
|
|
457
|
+
const existingWatcher = activeWatchers.get(repoName);
|
|
458
|
+
if (existingWatcher) {
|
|
459
|
+
await stopWatcher(existingWatcher);
|
|
460
|
+
}
|
|
461
|
+
const watcher = startWatcher(rootPath, (changedFile) => {
|
|
462
|
+
handleFileChange(rootPath, repoName, indexPath, changedFile).catch((err) => {
|
|
463
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
464
|
+
console.error(`[codesift] Watcher error for ${changedFile}: ${message}`);
|
|
465
|
+
});
|
|
466
|
+
}, (deletedFile) => {
|
|
467
|
+
handleFileDelete(repoName, indexPath, deletedFile).catch((err) => {
|
|
468
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
469
|
+
console.error(`[codesift] Watcher delete error for ${deletedFile}: ${message}`);
|
|
470
|
+
});
|
|
471
|
+
});
|
|
472
|
+
activeWatchers.set(repoName, watcher);
|
|
473
|
+
}
|
|
329
474
|
/**
|
|
330
475
|
* Handle a file change event from the watcher.
|
|
331
476
|
* Re-parses the changed file and updates the index incrementally.
|
|
332
477
|
*/
|
|
333
478
|
async function handleFileChange(repoRoot, repoName, indexPath, relativeFile) {
|
|
334
479
|
const fullPath = join(repoRoot, relativeFile);
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
else if (language === "prisma") {
|
|
345
|
-
symbols = extractPrismaSymbols(source, relativeFile, repoName);
|
|
346
|
-
}
|
|
347
|
-
else {
|
|
348
|
-
const tree = await parseFile(fullPath, source);
|
|
349
|
-
if (!tree)
|
|
350
|
-
return;
|
|
351
|
-
symbols = extractSymbols(tree, relativeFile, source, repoName, language);
|
|
480
|
+
// Invalidate cached findings so the next scan sees the updated file contents.
|
|
481
|
+
scanOnChanged(repoName, relativeFile);
|
|
482
|
+
const result = await parseOneFile(fullPath, repoRoot, repoName);
|
|
483
|
+
if (!result)
|
|
484
|
+
return;
|
|
485
|
+
await saveIncremental(indexPath, relativeFile, result.symbols, result.entry);
|
|
486
|
+
if (loadConfig().secretScanEnabled) {
|
|
487
|
+
try {
|
|
488
|
+
await scanFileForSecrets(fullPath, relativeFile, repoName, result.symbols);
|
|
352
489
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
symbol_count: symbols.length,
|
|
357
|
-
last_modified: Date.now(),
|
|
358
|
-
};
|
|
359
|
-
await saveIncremental(indexPath, relativeFile, symbols, fileEntry);
|
|
360
|
-
// Rebuild in-memory BM25 index
|
|
361
|
-
const index = await loadIndex(indexPath);
|
|
362
|
-
if (index) {
|
|
363
|
-
bm25Indexes.set(repoName, buildBM25Index(index.symbols));
|
|
490
|
+
catch (err) {
|
|
491
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
492
|
+
console.warn(`[codesift] Secret scan failed for ${relativeFile}: ${message}`);
|
|
364
493
|
}
|
|
365
494
|
}
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
495
|
+
// Invalidate caches — lazy rebuild on next query via getBM25Index()
|
|
496
|
+
bm25Indexes.delete(repoName);
|
|
497
|
+
codeIndexes.delete(repoName);
|
|
498
|
+
embeddingCaches.delete(repoName);
|
|
369
499
|
}
|
|
370
|
-
|
|
500
|
+
/**
|
|
501
|
+
* Handle a file deletion event from the watcher.
|
|
502
|
+
* Removes all symbols for the deleted file from the index.
|
|
503
|
+
*/
|
|
504
|
+
async function handleFileDelete(repoName, indexPath, relativeFile) {
|
|
505
|
+
await removeFileFromIndex(indexPath, relativeFile);
|
|
506
|
+
// Invalidate caches — lazy rebuild on next query via getBM25Index()
|
|
507
|
+
bm25Indexes.delete(repoName);
|
|
508
|
+
codeIndexes.delete(repoName);
|
|
509
|
+
embeddingCaches.delete(repoName);
|
|
510
|
+
scanOnDeleted(repoName, relativeFile);
|
|
511
|
+
}
|
|
512
|
+
export async function listAllRepos(options) {
|
|
371
513
|
const config = loadConfig();
|
|
372
|
-
|
|
514
|
+
const repos = await listRegistryRepos(config.registryPath);
|
|
515
|
+
if (options?.compact === false)
|
|
516
|
+
return repos;
|
|
517
|
+
// Default: ultra-compact — just repo names (agents only need the identifier)
|
|
518
|
+
return repos.map((r) => r.name);
|
|
373
519
|
}
|
|
374
520
|
export async function invalidateCache(repoName) {
|
|
375
521
|
const config = loadConfig();
|
|
@@ -384,13 +530,15 @@ export async function invalidateCache(repoName) {
|
|
|
384
530
|
}
|
|
385
531
|
// Remove in-memory caches
|
|
386
532
|
bm25Indexes.delete(repoName);
|
|
533
|
+
codeIndexes.delete(repoName);
|
|
387
534
|
embeddingCaches.delete(repoName);
|
|
388
535
|
// Delete index file + embedding files + chunk files
|
|
389
536
|
const embeddingPath = getEmbeddingPath(meta.index_path);
|
|
390
537
|
const embeddingMetaPath = getEmbeddingMetaPath(meta.index_path);
|
|
391
538
|
const chunkPath = getChunkPath(meta.index_path);
|
|
392
539
|
const chunkEmbeddingPath = getChunkEmbeddingPath(meta.index_path);
|
|
393
|
-
|
|
540
|
+
const graphStorePath = getGraphPath(meta.index_path);
|
|
541
|
+
for (const fp of [meta.index_path, embeddingPath, embeddingMetaPath, chunkPath, chunkEmbeddingPath, graphStorePath]) {
|
|
394
542
|
try {
|
|
395
543
|
await unlink(fp);
|
|
396
544
|
}
|
|
@@ -400,11 +548,159 @@ export async function invalidateCache(repoName) {
|
|
|
400
548
|
await removeRepo(config.registryPath, repoName);
|
|
401
549
|
return true;
|
|
402
550
|
}
|
|
551
|
+
/**
|
|
552
|
+
* Re-index a single file instantly. Finds the repo by matching the file
|
|
553
|
+
* path against indexed repo roots. Updates symbols, BM25 index, and
|
|
554
|
+
* invalidates embedding cache — no full repo walk needed.
|
|
555
|
+
*/
|
|
556
|
+
export async function indexFile(filePath) {
|
|
557
|
+
const absPath = resolve(filePath);
|
|
558
|
+
const config = loadConfig();
|
|
559
|
+
const repos = await listRegistryRepos(config.registryPath);
|
|
560
|
+
// Find the most specific repo root that contains this file
|
|
561
|
+
const matchingRepo = repos
|
|
562
|
+
.filter((r) => absPath.startsWith(r.root + "/") || absPath === r.root)
|
|
563
|
+
.sort((a, b) => b.root.length - a.root.length)[0];
|
|
564
|
+
if (!matchingRepo) {
|
|
565
|
+
throw new Error(`No indexed repo contains "${absPath}". Run index_folder first.`);
|
|
566
|
+
}
|
|
567
|
+
const startTime = Date.now();
|
|
568
|
+
const relPath = relative(matchingRepo.root, absPath);
|
|
569
|
+
// mtime check — skip if unchanged
|
|
570
|
+
const existing = await loadIndex(matchingRepo.index_path);
|
|
571
|
+
if (existing) {
|
|
572
|
+
const prevEntry = existing.files.find((f) => f.path === relPath);
|
|
573
|
+
if (prevEntry?.mtime_ms) {
|
|
574
|
+
const st = await stat(absPath);
|
|
575
|
+
if (Math.round(st.mtimeMs) === prevEntry.mtime_ms) {
|
|
576
|
+
return {
|
|
577
|
+
repo: matchingRepo.name,
|
|
578
|
+
file: relPath,
|
|
579
|
+
symbol_count: prevEntry.symbol_count,
|
|
580
|
+
duration_ms: Date.now() - startTime,
|
|
581
|
+
skipped: true,
|
|
582
|
+
};
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
const result = await parseOneFile(absPath, matchingRepo.root, matchingRepo.name);
|
|
587
|
+
if (!result) {
|
|
588
|
+
throw new Error(`Failed to parse "${relPath}"`);
|
|
589
|
+
}
|
|
590
|
+
await saveIncremental(matchingRepo.index_path, relPath, result.symbols, result.entry);
|
|
591
|
+
let secretFindingsCount = 0;
|
|
592
|
+
if (config.secretScanEnabled) {
|
|
593
|
+
try {
|
|
594
|
+
secretFindingsCount = (await scanFileForSecrets(absPath, relPath, matchingRepo.name, result.symbols)).length;
|
|
595
|
+
}
|
|
596
|
+
catch (err) {
|
|
597
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
598
|
+
console.warn(`[codesift] Secret scan failed for ${relPath}: ${message}`);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
// Invalidate caches — lazy rebuild on next query via getBM25Index()
|
|
602
|
+
bm25Indexes.delete(matchingRepo.name);
|
|
603
|
+
codeIndexes.delete(matchingRepo.name);
|
|
604
|
+
embeddingCaches.delete(matchingRepo.name);
|
|
605
|
+
let secretsWarning;
|
|
606
|
+
if (secretFindingsCount > 0) {
|
|
607
|
+
secretsWarning = `\u26A0 ${secretFindingsCount} potential secret(s) detected`;
|
|
608
|
+
}
|
|
609
|
+
return {
|
|
610
|
+
repo: matchingRepo.name,
|
|
611
|
+
file: relPath,
|
|
612
|
+
symbol_count: result.symbols.length,
|
|
613
|
+
duration_ms: Date.now() - startTime,
|
|
614
|
+
...(secretsWarning ? { secrets_warning: secretsWarning } : {}),
|
|
615
|
+
};
|
|
616
|
+
}
|
|
617
|
+
// ---------------------------------------------------------------------------
|
|
618
|
+
// Git-based auto-refresh — transparent freshness check before index access
|
|
619
|
+
// ---------------------------------------------------------------------------
|
|
620
|
+
const freshnessChecked = new Map();
|
|
621
|
+
const FRESHNESS_INTERVAL_MS = 60_000;
|
|
622
|
+
const MAX_DIFF_FILES = 50;
|
|
623
|
+
/**
|
|
624
|
+
* Ensure the index for a repo is fresh relative to git HEAD.
|
|
625
|
+
* Throttled to once per minute per repo. Reindexes changed files if HEAD moved.
|
|
626
|
+
* No-op for non-git repos.
|
|
627
|
+
*/
|
|
628
|
+
export async function ensureIndexFresh(repoName) {
|
|
629
|
+
const lastCheck = freshnessChecked.get(repoName);
|
|
630
|
+
if (lastCheck && Date.now() - lastCheck < FRESHNESS_INTERVAL_MS) {
|
|
631
|
+
return { status: "fresh" };
|
|
632
|
+
}
|
|
633
|
+
const config = loadConfig();
|
|
634
|
+
const meta = await getRepo(config.registryPath, repoName);
|
|
635
|
+
if (!meta)
|
|
636
|
+
return { status: "skipped" };
|
|
637
|
+
let currentCommit;
|
|
638
|
+
try {
|
|
639
|
+
currentCommit = execFileSync("git", ["rev-parse", "HEAD"], {
|
|
640
|
+
cwd: meta.root, encoding: "utf-8", timeout: 5000,
|
|
641
|
+
}).trim();
|
|
642
|
+
}
|
|
643
|
+
catch {
|
|
644
|
+
freshnessChecked.set(repoName, Date.now());
|
|
645
|
+
return { status: "skipped" };
|
|
646
|
+
}
|
|
647
|
+
if (meta.last_git_commit === currentCommit) {
|
|
648
|
+
freshnessChecked.set(repoName, Date.now());
|
|
649
|
+
return { status: "fresh" };
|
|
650
|
+
}
|
|
651
|
+
// HEAD moved — find changed files
|
|
652
|
+
let changedFiles = [];
|
|
653
|
+
if (meta.last_git_commit) {
|
|
654
|
+
try {
|
|
655
|
+
const diff = execFileSync("git", [
|
|
656
|
+
"diff", "--name-only", "--diff-filter=ACMR",
|
|
657
|
+
`${meta.last_git_commit}..${currentCommit}`,
|
|
658
|
+
], {
|
|
659
|
+
cwd: meta.root, encoding: "utf-8", timeout: 10_000,
|
|
660
|
+
});
|
|
661
|
+
changedFiles = diff.trim().split("\n").filter(Boolean);
|
|
662
|
+
}
|
|
663
|
+
catch {
|
|
664
|
+
// Stored commit gone (rebase/squash) — will do full incremental
|
|
665
|
+
changedFiles = [];
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
if (changedFiles.length > 0 && changedFiles.length <= MAX_DIFF_FILES) {
|
|
669
|
+
for (const file of changedFiles) {
|
|
670
|
+
try {
|
|
671
|
+
await indexFile(join(meta.root, file));
|
|
672
|
+
}
|
|
673
|
+
catch {
|
|
674
|
+
// File deleted or unparseable — skip
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
else if (changedFiles.length > MAX_DIFF_FILES || !meta.last_git_commit) {
|
|
679
|
+
await indexFolder(meta.root, { incremental: true, watch: false });
|
|
680
|
+
}
|
|
681
|
+
await updateRepoMeta(config.registryPath, repoName, {
|
|
682
|
+
last_git_commit: currentCommit,
|
|
683
|
+
updated_at: Date.now(),
|
|
684
|
+
});
|
|
685
|
+
bm25Indexes.delete(repoName);
|
|
686
|
+
codeIndexes.delete(repoName);
|
|
687
|
+
embeddingCaches.delete(repoName);
|
|
688
|
+
freshnessChecked.set(repoName, Date.now());
|
|
689
|
+
return { status: "refreshed", files_updated: changedFiles.length };
|
|
690
|
+
}
|
|
691
|
+
/** Reset freshness throttle cache. Exported for testing. */
|
|
692
|
+
export function resetFreshnessCache() {
|
|
693
|
+
freshnessChecked.clear();
|
|
694
|
+
}
|
|
695
|
+
// ---------------------------------------------------------------------------
|
|
696
|
+
// Index access — with auto-refresh
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
403
698
|
/**
|
|
404
699
|
* Get the in-memory BM25 index for a repo.
|
|
405
|
-
* Loads from disk if not cached.
|
|
700
|
+
* Loads from disk if not cached. Auto-refreshes if git HEAD moved.
|
|
406
701
|
*/
|
|
407
702
|
export async function getBM25Index(repoName) {
|
|
703
|
+
await ensureIndexFresh(repoName);
|
|
408
704
|
const cached = bm25Indexes.get(repoName);
|
|
409
705
|
if (cached)
|
|
410
706
|
return cached;
|
|
@@ -421,13 +717,25 @@ export async function getBM25Index(repoName) {
|
|
|
421
717
|
}
|
|
422
718
|
/**
|
|
423
719
|
* Get the code index for a repo from disk.
|
|
720
|
+
* Starts watcher if not running (lazy start after server restart).
|
|
721
|
+
*/
|
|
722
|
+
/**
|
|
723
|
+
* Get the code index for a repo from disk. Auto-refreshes if git HEAD moved.
|
|
424
724
|
*/
|
|
425
725
|
export async function getCodeIndex(repoName) {
|
|
726
|
+
await ensureIndexFresh(repoName);
|
|
727
|
+
const cached = codeIndexes.get(repoName);
|
|
728
|
+
if (cached)
|
|
729
|
+
return cached;
|
|
426
730
|
const config = loadConfig();
|
|
427
731
|
const meta = await getRepo(config.registryPath, repoName);
|
|
428
732
|
if (!meta)
|
|
429
733
|
return null;
|
|
430
|
-
|
|
734
|
+
const index = await loadIndex(meta.index_path);
|
|
735
|
+
if (!index)
|
|
736
|
+
return null;
|
|
737
|
+
codeIndexes.set(repoName, index);
|
|
738
|
+
return index;
|
|
431
739
|
}
|
|
432
740
|
/**
|
|
433
741
|
* Get the in-memory embedding cache for a repo.
|