codesift-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/LICENSE +66 -21
  2. package/README.md +346 -56
  3. package/dist/cli/args.d.ts +2 -0
  4. package/dist/cli/args.d.ts.map +1 -1
  5. package/dist/cli/args.js +11 -0
  6. package/dist/cli/args.js.map +1 -1
  7. package/dist/cli/commands.d.ts.map +1 -1
  8. package/dist/cli/commands.js +177 -67
  9. package/dist/cli/commands.js.map +1 -1
  10. package/dist/cli/help.d.ts +1 -1
  11. package/dist/cli/help.d.ts.map +1 -1
  12. package/dist/cli/help.js +157 -0
  13. package/dist/cli/help.js.map +1 -1
  14. package/dist/cli/hooks.d.ts +3 -0
  15. package/dist/cli/hooks.d.ts.map +1 -0
  16. package/dist/cli/hooks.js +163 -0
  17. package/dist/cli/hooks.js.map +1 -0
  18. package/dist/cli/setup.d.ts +25 -0
  19. package/dist/cli/setup.d.ts.map +1 -0
  20. package/dist/cli/setup.js +400 -0
  21. package/dist/cli/setup.js.map +1 -0
  22. package/dist/config.d.ts +2 -0
  23. package/dist/config.d.ts.map +1 -1
  24. package/dist/config.js +2 -0
  25. package/dist/config.js.map +1 -1
  26. package/dist/formatters-shortening.d.ts +7 -0
  27. package/dist/formatters-shortening.d.ts.map +1 -0
  28. package/dist/formatters-shortening.js +68 -0
  29. package/dist/formatters-shortening.js.map +1 -0
  30. package/dist/formatters.d.ts +314 -0
  31. package/dist/formatters.d.ts.map +1 -0
  32. package/dist/formatters.js +396 -0
  33. package/dist/formatters.js.map +1 -0
  34. package/dist/instructions.d.ts +6 -0
  35. package/dist/instructions.d.ts.map +1 -0
  36. package/dist/instructions.js +72 -0
  37. package/dist/instructions.js.map +1 -0
  38. package/dist/lsp/lsp-client.d.ts +21 -0
  39. package/dist/lsp/lsp-client.d.ts.map +1 -0
  40. package/dist/lsp/lsp-client.js +122 -0
  41. package/dist/lsp/lsp-client.js.map +1 -0
  42. package/dist/lsp/lsp-manager.d.ts +12 -0
  43. package/dist/lsp/lsp-manager.d.ts.map +1 -0
  44. package/dist/lsp/lsp-manager.js +82 -0
  45. package/dist/lsp/lsp-manager.js.map +1 -0
  46. package/dist/lsp/lsp-servers.d.ts +13 -0
  47. package/dist/lsp/lsp-servers.d.ts.map +1 -0
  48. package/dist/lsp/lsp-servers.js +57 -0
  49. package/dist/lsp/lsp-servers.js.map +1 -0
  50. package/dist/lsp/lsp-tools.d.ts +67 -0
  51. package/dist/lsp/lsp-tools.d.ts.map +1 -0
  52. package/dist/lsp/lsp-tools.js +359 -0
  53. package/dist/lsp/lsp-tools.js.map +1 -0
  54. package/dist/parser/extractors/_shared.d.ts +11 -0
  55. package/dist/parser/extractors/_shared.d.ts.map +1 -0
  56. package/dist/parser/extractors/_shared.js +38 -0
  57. package/dist/parser/extractors/_shared.js.map +1 -0
  58. package/dist/parser/extractors/astro.d.ts +15 -0
  59. package/dist/parser/extractors/astro.d.ts.map +1 -0
  60. package/dist/parser/extractors/astro.js +104 -0
  61. package/dist/parser/extractors/astro.js.map +1 -0
  62. package/dist/parser/extractors/conversation.d.ts +16 -0
  63. package/dist/parser/extractors/conversation.d.ts.map +1 -0
  64. package/dist/parser/extractors/conversation.js +196 -0
  65. package/dist/parser/extractors/conversation.js.map +1 -0
  66. package/dist/parser/extractors/go.d.ts.map +1 -1
  67. package/dist/parser/extractors/go.js +22 -45
  68. package/dist/parser/extractors/go.js.map +1 -1
  69. package/dist/parser/extractors/python.d.ts +1 -1
  70. package/dist/parser/extractors/python.d.ts.map +1 -1
  71. package/dist/parser/extractors/python.js +19 -50
  72. package/dist/parser/extractors/python.js.map +1 -1
  73. package/dist/parser/extractors/rust.d.ts +1 -1
  74. package/dist/parser/extractors/rust.d.ts.map +1 -1
  75. package/dist/parser/extractors/rust.js +7 -34
  76. package/dist/parser/extractors/rust.js.map +1 -1
  77. package/dist/parser/extractors/typescript.d.ts +1 -1
  78. package/dist/parser/extractors/typescript.d.ts.map +1 -1
  79. package/dist/parser/extractors/typescript.js +99 -68
  80. package/dist/parser/extractors/typescript.js.map +1 -1
  81. package/dist/parser/parser-manager.d.ts.map +1 -1
  82. package/dist/parser/parser-manager.js +12 -2
  83. package/dist/parser/parser-manager.js.map +1 -1
  84. package/dist/parser/symbol-extractor.d.ts +2 -0
  85. package/dist/parser/symbol-extractor.d.ts.map +1 -1
  86. package/dist/parser/symbol-extractor.js +2 -0
  87. package/dist/parser/symbol-extractor.js.map +1 -1
  88. package/dist/register-tools.d.ts +127 -0
  89. package/dist/register-tools.d.ts.map +1 -0
  90. package/dist/register-tools.js +1453 -0
  91. package/dist/register-tools.js.map +1 -0
  92. package/dist/retrieval/codebase-retrieval.d.ts +4 -26
  93. package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
  94. package/dist/retrieval/codebase-retrieval.js +105 -403
  95. package/dist/retrieval/codebase-retrieval.js.map +1 -1
  96. package/dist/retrieval/retrieval-constants.d.ts +27 -0
  97. package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
  98. package/dist/retrieval/retrieval-constants.js +27 -0
  99. package/dist/retrieval/retrieval-constants.js.map +1 -0
  100. package/dist/retrieval/retrieval-schemas.d.ts +107 -0
  101. package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
  102. package/dist/retrieval/retrieval-schemas.js +102 -0
  103. package/dist/retrieval/retrieval-schemas.js.map +1 -0
  104. package/dist/retrieval/retrieval-utils.d.ts +40 -0
  105. package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
  106. package/dist/retrieval/retrieval-utils.js +139 -0
  107. package/dist/retrieval/retrieval-utils.js.map +1 -0
  108. package/dist/retrieval/semantic-handlers.d.ts +8 -0
  109. package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
  110. package/dist/retrieval/semantic-handlers.js +152 -0
  111. package/dist/retrieval/semantic-handlers.js.map +1 -0
  112. package/dist/search/bm25.d.ts +6 -1
  113. package/dist/search/bm25.d.ts.map +1 -1
  114. package/dist/search/bm25.js +95 -32
  115. package/dist/search/bm25.js.map +1 -1
  116. package/dist/search/chunker.d.ts +10 -0
  117. package/dist/search/chunker.d.ts.map +1 -1
  118. package/dist/search/chunker.js +63 -11
  119. package/dist/search/chunker.js.map +1 -1
  120. package/dist/search/reranker.d.ts +15 -0
  121. package/dist/search/reranker.d.ts.map +1 -0
  122. package/dist/search/reranker.js +126 -0
  123. package/dist/search/reranker.js.map +1 -0
  124. package/dist/search/semantic.d.ts +1 -1
  125. package/dist/search/semantic.d.ts.map +1 -1
  126. package/dist/search/semantic.js +40 -45
  127. package/dist/search/semantic.js.map +1 -1
  128. package/dist/server-helpers.d.ts +29 -0
  129. package/dist/server-helpers.d.ts.map +1 -0
  130. package/dist/server-helpers.js +312 -0
  131. package/dist/server-helpers.js.map +1 -0
  132. package/dist/server.d.ts +1 -1
  133. package/dist/server.d.ts.map +1 -1
  134. package/dist/server.js +11 -271
  135. package/dist/server.js.map +1 -1
  136. package/dist/storage/_shared.d.ts +9 -0
  137. package/dist/storage/_shared.d.ts.map +1 -0
  138. package/dist/storage/_shared.js +26 -0
  139. package/dist/storage/_shared.js.map +1 -0
  140. package/dist/storage/chunk-store.d.ts.map +1 -1
  141. package/dist/storage/chunk-store.js +23 -63
  142. package/dist/storage/chunk-store.js.map +1 -1
  143. package/dist/storage/embedding-store.d.ts +6 -3
  144. package/dist/storage/embedding-store.d.ts.map +1 -1
  145. package/dist/storage/embedding-store.js +54 -30
  146. package/dist/storage/embedding-store.js.map +1 -1
  147. package/dist/storage/graph-store.d.ts +48 -0
  148. package/dist/storage/graph-store.d.ts.map +1 -0
  149. package/dist/storage/graph-store.js +52 -0
  150. package/dist/storage/graph-store.js.map +1 -0
  151. package/dist/storage/index-store.d.ts +5 -0
  152. package/dist/storage/index-store.d.ts.map +1 -1
  153. package/dist/storage/index-store.js +28 -16
  154. package/dist/storage/index-store.js.map +1 -1
  155. package/dist/storage/registry.d.ts +4 -0
  156. package/dist/storage/registry.d.ts.map +1 -1
  157. package/dist/storage/registry.js +16 -16
  158. package/dist/storage/registry.js.map +1 -1
  159. package/dist/storage/usage-stats.d.ts +6 -0
  160. package/dist/storage/usage-stats.d.ts.map +1 -1
  161. package/dist/storage/usage-stats.js +59 -11
  162. package/dist/storage/usage-stats.js.map +1 -1
  163. package/dist/storage/usage-tracker.d.ts +3 -0
  164. package/dist/storage/usage-tracker.d.ts.map +1 -1
  165. package/dist/storage/usage-tracker.js +50 -132
  166. package/dist/storage/usage-tracker.js.map +1 -1
  167. package/dist/storage/watcher.d.ts +2 -1
  168. package/dist/storage/watcher.d.ts.map +1 -1
  169. package/dist/storage/watcher.js +16 -16
  170. package/dist/storage/watcher.js.map +1 -1
  171. package/dist/tools/ast-query-tools.d.ts +29 -0
  172. package/dist/tools/ast-query-tools.d.ts.map +1 -0
  173. package/dist/tools/ast-query-tools.js +110 -0
  174. package/dist/tools/ast-query-tools.js.map +1 -0
  175. package/dist/tools/boundary-tools.d.ts +31 -0
  176. package/dist/tools/boundary-tools.d.ts.map +1 -0
  177. package/dist/tools/boundary-tools.js +62 -0
  178. package/dist/tools/boundary-tools.js.map +1 -0
  179. package/dist/tools/clone-tools.d.ts +35 -0
  180. package/dist/tools/clone-tools.d.ts.map +1 -0
  181. package/dist/tools/clone-tools.js +181 -0
  182. package/dist/tools/clone-tools.js.map +1 -0
  183. package/dist/tools/community-tools.d.ts +23 -0
  184. package/dist/tools/community-tools.d.ts.map +1 -0
  185. package/dist/tools/community-tools.js +297 -0
  186. package/dist/tools/community-tools.js.map +1 -0
  187. package/dist/tools/complexity-tools.d.ts +34 -0
  188. package/dist/tools/complexity-tools.d.ts.map +1 -0
  189. package/dist/tools/complexity-tools.js +135 -0
  190. package/dist/tools/complexity-tools.js.map +1 -0
  191. package/dist/tools/context-tools.d.ts +44 -3
  192. package/dist/tools/context-tools.d.ts.map +1 -1
  193. package/dist/tools/context-tools.js +329 -99
  194. package/dist/tools/context-tools.js.map +1 -1
  195. package/dist/tools/conversation-tools.d.ts +107 -0
  196. package/dist/tools/conversation-tools.d.ts.map +1 -0
  197. package/dist/tools/conversation-tools.js +419 -0
  198. package/dist/tools/conversation-tools.js.map +1 -0
  199. package/dist/tools/coordinator-tools.d.ts +73 -0
  200. package/dist/tools/coordinator-tools.d.ts.map +1 -0
  201. package/dist/tools/coordinator-tools.js +153 -0
  202. package/dist/tools/coordinator-tools.js.map +1 -0
  203. package/dist/tools/cross-repo-tools.d.ts +43 -0
  204. package/dist/tools/cross-repo-tools.d.ts.map +1 -0
  205. package/dist/tools/cross-repo-tools.js +55 -0
  206. package/dist/tools/cross-repo-tools.js.map +1 -0
  207. package/dist/tools/diff-tools.d.ts +4 -1
  208. package/dist/tools/diff-tools.d.ts.map +1 -1
  209. package/dist/tools/diff-tools.js +23 -5
  210. package/dist/tools/diff-tools.js.map +1 -1
  211. package/dist/tools/frequency-tools.d.ts +46 -0
  212. package/dist/tools/frequency-tools.d.ts.map +1 -0
  213. package/dist/tools/frequency-tools.js +184 -0
  214. package/dist/tools/frequency-tools.js.map +1 -0
  215. package/dist/tools/generate-tools.d.ts.map +1 -1
  216. package/dist/tools/generate-tools.js +13 -2
  217. package/dist/tools/generate-tools.js.map +1 -1
  218. package/dist/tools/graph-tools.d.ts +44 -11
  219. package/dist/tools/graph-tools.d.ts.map +1 -1
  220. package/dist/tools/graph-tools.js +147 -104
  221. package/dist/tools/graph-tools.js.map +1 -1
  222. package/dist/tools/hotspot-tools.d.ts +24 -0
  223. package/dist/tools/hotspot-tools.d.ts.map +1 -0
  224. package/dist/tools/hotspot-tools.js +122 -0
  225. package/dist/tools/hotspot-tools.js.map +1 -0
  226. package/dist/tools/impact-tools.d.ts +13 -0
  227. package/dist/tools/impact-tools.d.ts.map +1 -0
  228. package/dist/tools/impact-tools.js +238 -0
  229. package/dist/tools/impact-tools.js.map +1 -0
  230. package/dist/tools/index-tools.d.ts +44 -3
  231. package/dist/tools/index-tools.d.ts.map +1 -1
  232. package/dist/tools/index-tools.js +530 -222
  233. package/dist/tools/index-tools.js.map +1 -1
  234. package/dist/tools/memory-tools.d.ts +35 -0
  235. package/dist/tools/memory-tools.d.ts.map +1 -0
  236. package/dist/tools/memory-tools.js +229 -0
  237. package/dist/tools/memory-tools.js.map +1 -0
  238. package/dist/tools/outline-tools.d.ts +24 -13
  239. package/dist/tools/outline-tools.d.ts.map +1 -1
  240. package/dist/tools/outline-tools.js +113 -87
  241. package/dist/tools/outline-tools.js.map +1 -1
  242. package/dist/tools/pattern-tools.d.ts +32 -0
  243. package/dist/tools/pattern-tools.d.ts.map +1 -0
  244. package/dist/tools/pattern-tools.js +116 -0
  245. package/dist/tools/pattern-tools.js.map +1 -0
  246. package/dist/tools/report-tools.d.ts +5 -0
  247. package/dist/tools/report-tools.d.ts.map +1 -0
  248. package/dist/tools/report-tools.js +167 -0
  249. package/dist/tools/report-tools.js.map +1 -0
  250. package/dist/tools/review-diff-tools.d.ts +148 -0
  251. package/dist/tools/review-diff-tools.d.ts.map +1 -0
  252. package/dist/tools/review-diff-tools.js +852 -0
  253. package/dist/tools/review-diff-tools.js.map +1 -0
  254. package/dist/tools/route-tools.d.ts +32 -0
  255. package/dist/tools/route-tools.d.ts.map +1 -0
  256. package/dist/tools/route-tools.js +276 -0
  257. package/dist/tools/route-tools.js.map +1 -0
  258. package/dist/tools/search-ranker.d.ts +5 -0
  259. package/dist/tools/search-ranker.d.ts.map +1 -0
  260. package/dist/tools/search-ranker.js +142 -0
  261. package/dist/tools/search-ranker.js.map +1 -0
  262. package/dist/tools/search-tools.d.ts +24 -1
  263. package/dist/tools/search-tools.d.ts.map +1 -1
  264. package/dist/tools/search-tools.js +459 -225
  265. package/dist/tools/search-tools.js.map +1 -1
  266. package/dist/tools/secret-tools.d.ts +104 -0
  267. package/dist/tools/secret-tools.d.ts.map +1 -0
  268. package/dist/tools/secret-tools.js +410 -0
  269. package/dist/tools/secret-tools.js.map +1 -0
  270. package/dist/tools/symbol-tools.d.ts +90 -2
  271. package/dist/tools/symbol-tools.d.ts.map +1 -1
  272. package/dist/tools/symbol-tools.js +576 -42
  273. package/dist/tools/symbol-tools.js.map +1 -1
  274. package/dist/types.d.ts +34 -1
  275. package/dist/types.d.ts.map +1 -1
  276. package/dist/utils/framework-detect.d.ts +5 -0
  277. package/dist/utils/framework-detect.d.ts.map +1 -0
  278. package/dist/utils/framework-detect.js +36 -0
  279. package/dist/utils/framework-detect.js.map +1 -0
  280. package/dist/utils/glob.d.ts +19 -0
  281. package/dist/utils/glob.d.ts.map +1 -0
  282. package/dist/utils/glob.js +74 -0
  283. package/dist/utils/glob.js.map +1 -0
  284. package/dist/utils/import-graph.d.ts +29 -0
  285. package/dist/utils/import-graph.d.ts.map +1 -0
  286. package/dist/utils/import-graph.js +125 -0
  287. package/dist/utils/import-graph.js.map +1 -0
  288. package/dist/utils/test-file.d.ts.map +1 -1
  289. package/dist/utils/test-file.js +1 -0
  290. package/dist/utils/test-file.js.map +1 -1
  291. package/dist/utils/walk.d.ts +45 -0
  292. package/dist/utils/walk.d.ts.map +1 -0
  293. package/dist/utils/walk.js +87 -0
  294. package/dist/utils/walk.js.map +1 -0
  295. package/package.json +10 -4
  296. package/rules/codesift.md +187 -0
  297. package/rules/codesift.mdc +192 -0
  298. package/rules/codex.md +187 -0
  299. package/rules/gemini.md +187 -0
@@ -1,81 +1,82 @@
1
- import { readdir, readFile, stat, unlink, rm, mkdir as mkdirAsync } from "node:fs/promises";
1
+ import { readFile, stat, unlink, rm, mkdir as mkdirAsync } from "node:fs/promises";
2
2
  import { join, relative, extname, resolve, basename } from "node:path";
3
3
  import { execFileSync } from "node:child_process";
4
+ import { createHash } from "node:crypto";
4
5
  import { parseFile } from "../parser/parser-manager.js";
5
- import { extractSymbols, extractMarkdownSymbols, extractPrismaSymbols } from "../parser/symbol-extractor.js";
6
+ import { extractSymbols, extractMarkdownSymbols, extractPrismaSymbols, extractAstroSymbols, extractConversationSymbols } from "../parser/symbol-extractor.js";
6
7
  import { getLanguageForExtension } from "../parser/parser-manager.js";
7
- import { saveIndex, loadIndex, getIndexPath, saveIncremental } from "../storage/index-store.js";
8
- import { registerRepo, listRepos as listRegistryRepos, getRepo, removeRepo, getRepoName } from "../storage/registry.js";
8
+ import { saveIndex, loadIndex, getIndexPath, saveIncremental, removeFileFromIndex } from "../storage/index-store.js";
9
+ import { registerRepo, listRepos as listRegistryRepos, getRepo, removeRepo, getRepoName, updateRepoMeta } from "../storage/registry.js";
9
10
  import { startWatcher, stopWatcher } from "../storage/watcher.js";
10
11
  import { buildBM25Index } from "../search/bm25.js";
11
12
  import { buildSymbolText, createEmbeddingProvider } from "../search/semantic.js";
12
13
  import { loadEmbeddings, saveEmbeddings, saveEmbeddingMeta, getEmbeddingPath, getEmbeddingMetaPath, batchEmbed } from "../storage/embedding-store.js";
13
14
  import { saveChunks, saveChunkEmbeddings, loadChunkEmbeddings, getChunkPath, getChunkEmbeddingPath } from "../storage/chunk-store.js";
14
- import { chunkFile } from "../search/chunker.js";
15
+ import { chunkFile, chunkBySymbols } from "../search/chunker.js";
15
16
  import { loadConfig } from "../config.js";
16
17
  import { validateGitUrl, validateGitRef } from "../utils/git-validation.js";
17
- // Ignore patterns for directory walking (same as watcher)
18
- const IGNORE_DIRS = new Set([
19
- "node_modules", ".git", "dist", "build", "coverage",
20
- ".codesift", ".next", "__pycache__", ".pytest_cache",
21
- ".venv", "venv", ".tox", ".mypy_cache", ".turbo",
22
- "generated", "audit-results", ".backup", "jscpd-report",
23
- ]);
24
- const MAX_FILE_SIZE = 1_000_000; // 1MB — skip giant files
18
+ import { walkDirectory } from "../utils/walk.js";
19
+ import { onFileChanged as scanOnChanged, onFileDeleted as scanOnDeleted, scanFileForSecrets } from "./secret-tools.js";
20
+ import { getGraphPath } from "../storage/graph-store.js";
25
21
  const PARSE_CONCURRENCY = 8;
22
+ const CHUNK_EMBEDDING_BATCH_SIZE = 96;
23
+ const GIT_CLONE_TIMEOUT_MS = 120_000;
24
+ const GIT_CHECKOUT_TIMEOUT_MS = 30_000;
25
+ const GIT_PULL_TIMEOUT_MS = 60_000;
26
26
  // Active watchers and in-memory indexes keyed by repo name
27
27
  const activeWatchers = new Map();
28
28
  const bm25Indexes = new Map();
29
+ const codeIndexes = new Map();
29
30
  const embeddingCaches = new Map();
30
31
  /**
31
- * Walk a directory tree, collecting files that can be parsed.
32
- * Respects .gitignore patterns and skips known non-source directories.
32
+ * Parse a single file and extract its symbols + metadata.
33
+ * Returns null if the file cannot be parsed.
33
34
  */
34
- async function walkDirectory(rootPath, includePaths) {
35
- const files = [];
36
- async function walk(dirPath) {
37
- let entries;
38
- try {
39
- entries = await readdir(dirPath, { withFileTypes: true });
35
+ async function parseOneFile(filePath, repoRoot, repoName) {
36
+ try {
37
+ const stat = await import("node:fs/promises").then((fs) => fs.stat(filePath));
38
+ const source = await readFile(filePath, "utf-8");
39
+ const relPath = relative(repoRoot, filePath);
40
+ const ext = extname(filePath);
41
+ const baseName = filePath.split("/").pop() ?? "";
42
+ const language = getLanguageForExtension(ext)
43
+ ?? (baseName.startsWith(".env") ? "config" : "unknown");
44
+ let symbols;
45
+ if (language === "markdown") {
46
+ symbols = extractMarkdownSymbols(source, relPath, repoName);
40
47
  }
41
- catch {
42
- return; // permission denied, etc.
48
+ else if (language === "prisma") {
49
+ symbols = extractPrismaSymbols(source, relPath, repoName);
43
50
  }
44
- for (const entry of entries) {
45
- const fullPath = join(dirPath, entry.name);
46
- if (entry.isDirectory()) {
47
- if (IGNORE_DIRS.has(entry.name) || entry.name.startsWith(".")) {
48
- continue;
49
- }
50
- await walk(fullPath);
51
- }
52
- else if (entry.isFile()) {
53
- const ext = extname(entry.name);
54
- const language = getLanguageForExtension(ext);
55
- if (!language)
56
- continue;
57
- // Filter by include paths if specified
58
- if (includePaths && includePaths.length > 0) {
59
- const relPath = relative(rootPath, fullPath);
60
- const matches = includePaths.some((p) => relPath.startsWith(p));
61
- if (!matches)
62
- continue;
63
- }
64
- // Skip files that are too large
65
- try {
66
- const fileStat = await stat(fullPath);
67
- if (fileStat.size > MAX_FILE_SIZE)
68
- continue;
69
- }
70
- catch {
71
- continue;
72
- }
73
- files.push(fullPath);
74
- }
51
+ else if (language === "astro") {
52
+ symbols = extractAstroSymbols(source, relPath, repoName);
53
+ }
54
+ else if (language === "conversation") {
55
+ symbols = extractConversationSymbols(source, relPath, repoName);
56
+ }
57
+ else if (language === "config") {
58
+ symbols = [];
75
59
  }
60
+ else {
61
+ const tree = await parseFile(filePath, source);
62
+ if (!tree)
63
+ return null;
64
+ symbols = extractSymbols(tree, relPath, source, repoName, language);
65
+ }
66
+ const entry = {
67
+ path: relPath,
68
+ language,
69
+ symbol_count: symbols.length,
70
+ last_modified: Date.now(),
71
+ mtime_ms: Math.round(stat.mtimeMs),
72
+ };
73
+ return { symbols, entry };
74
+ }
75
+ catch (err) {
76
+ const message = err instanceof Error ? err.message : String(err);
77
+ console.warn(`[codesift] Failed to parse ${relative(repoRoot, filePath)}: ${message}`);
78
+ return null;
76
79
  }
77
- await walk(rootPath);
78
- return files;
79
80
  }
80
81
  /**
81
82
  * Parse files in parallel batches.
@@ -83,41 +84,9 @@ async function walkDirectory(rootPath, includePaths) {
83
84
  async function parseFiles(files, repoRoot, repoName) {
84
85
  const allSymbols = [];
85
86
  const fileEntries = [];
86
- // Process in batches for controlled concurrency
87
87
  for (let i = 0; i < files.length; i += PARSE_CONCURRENCY) {
88
88
  const batch = files.slice(i, i + PARSE_CONCURRENCY);
89
- const results = await Promise.all(batch.map(async (filePath) => {
90
- try {
91
- const source = await readFile(filePath, "utf-8");
92
- const relPath = relative(repoRoot, filePath);
93
- const ext = extname(filePath);
94
- const language = getLanguageForExtension(ext) ?? "unknown";
95
- let symbols;
96
- // Markdown and Prisma use custom parsers (no tree-sitter grammar)
97
- if (language === "markdown") {
98
- symbols = extractMarkdownSymbols(source, relPath, repoName);
99
- }
100
- else if (language === "prisma") {
101
- symbols = extractPrismaSymbols(source, relPath, repoName);
102
- }
103
- else {
104
- const tree = await parseFile(filePath, source);
105
- if (!tree)
106
- return null;
107
- symbols = extractSymbols(tree, relPath, source, repoName, language);
108
- }
109
- const entry = {
110
- path: relPath,
111
- language,
112
- symbol_count: symbols.length,
113
- last_modified: Date.now(),
114
- };
115
- return { symbols, entry };
116
- }
117
- catch {
118
- return null;
119
- }
120
- }));
89
+ const results = await Promise.all(batch.map((filePath) => parseOneFile(filePath, repoRoot, repoName)));
121
90
  for (const result of results) {
122
91
  if (result) {
123
92
  allSymbols.push(...result.symbols);
@@ -127,35 +96,241 @@ async function parseFiles(files, repoRoot, repoName) {
127
96
  }
128
97
  return { symbols: allSymbols, fileEntries };
129
98
  }
99
+ // ---------------------------------------------------------------------------
100
+ // Dirty propagation — mark caller files stale when a callee signature changes
101
+ // ---------------------------------------------------------------------------
102
+ /**
103
+ * Compute a hash of a symbol's public interface (name + kind + signature).
104
+ * Body changes don't trigger propagation — only signature changes.
105
+ */
106
+ function computeSignatureHash(sym) {
107
+ const key = `${sym.name}|${sym.kind}|${sym.signature ?? ""}`;
108
+ return createHash("sha256").update(key).digest("hex").slice(0, 16);
109
+ }
110
+ /**
111
+ * Detect signature changes and mark caller files as stale.
112
+ * Returns the set of files marked stale.
113
+ */
114
+ function propagateDirtySignatures(oldSymbols, newSymbols, fileEntries) {
115
+ // Build old signature hashes
116
+ const oldHashes = new Map();
117
+ for (const sym of oldSymbols) {
118
+ oldHashes.set(sym.id, computeSignatureHash(sym));
119
+ }
120
+ // Find symbols with changed signatures
121
+ const changedSymbolFiles = new Set();
122
+ for (const sym of newSymbols) {
123
+ const oldHash = oldHashes.get(sym.id);
124
+ if (oldHash && oldHash !== computeSignatureHash(sym)) {
125
+ changedSymbolFiles.add(sym.file);
126
+ }
127
+ }
128
+ if (changedSymbolFiles.size === 0)
129
+ return new Set();
130
+ // Find files that import from changed files (1 level of callers)
131
+ // Use a simple heuristic: check if any symbol source mentions a changed file's name
132
+ const changedBasenames = new Set();
133
+ for (const f of changedSymbolFiles) {
134
+ const base = f.split("/").pop()?.replace(/\.\w+$/, "");
135
+ if (base)
136
+ changedBasenames.add(base);
137
+ }
138
+ const staleFiles = new Set();
139
+ for (const sym of newSymbols) {
140
+ if (changedSymbolFiles.has(sym.file))
141
+ continue; // Don't mark the changed file itself
142
+ if (!sym.source)
143
+ continue;
144
+ for (const base of changedBasenames) {
145
+ if (sym.source.includes(base)) {
146
+ staleFiles.add(sym.file);
147
+ break;
148
+ }
149
+ }
150
+ }
151
+ // Mark stale in file entries (clear mtime so next index re-parses them)
152
+ for (const entry of fileEntries) {
153
+ if (staleFiles.has(entry.path)) {
154
+ entry.stale = true;
155
+ delete entry.mtime_ms; // Force re-parse on next indexFolder
156
+ }
157
+ }
158
+ return staleFiles;
159
+ }
160
+ /**
161
+ * Embed symbols using the configured embedding provider.
162
+ * Non-fatal — BM25 search still works if embedding fails.
163
+ */
164
+ export async function embedSymbols(symbols, indexPath, repoName, config) {
165
+ if (!config.embeddingProvider)
166
+ return;
167
+ const embeddingPath = getEmbeddingPath(indexPath);
168
+ const metaPath = getEmbeddingMetaPath(indexPath);
169
+ try {
170
+ const provider = createEmbeddingProvider(config.embeddingProvider, config);
171
+ const symbolTexts = new Map(symbols.map((s) => [s.id, buildSymbolText(s)]));
172
+ const existing = await loadEmbeddings(embeddingPath);
173
+ const embeddings = await batchEmbed(symbolTexts, existing, provider.embed.bind(provider), config.embeddingBatchSize, repoName);
174
+ await saveEmbeddings(embeddingPath, embeddings);
175
+ await saveEmbeddingMeta(metaPath, {
176
+ model: provider.model,
177
+ provider: config.embeddingProvider,
178
+ dimensions: provider.dimensions,
179
+ symbol_count: embeddings.size,
180
+ updated_at: Date.now(),
181
+ });
182
+ embeddingCaches.set(repoName, embeddings);
183
+ }
184
+ catch (err) {
185
+ const message = err instanceof Error ? err.message : String(err);
186
+ console.error(`[codesift] Embedding failed for ${repoName}: ${message}`);
187
+ }
188
+ }
189
+ /**
190
+ * Read files in parallel batches and split each into chunks.
191
+ */
192
+ async function readAndChunkFiles(fileEntries, rootPath, repoName, symbols) {
193
+ const allChunks = [];
194
+ for (let i = 0; i < fileEntries.length; i += PARSE_CONCURRENCY) {
195
+ const batch = fileEntries.slice(i, i + PARSE_CONCURRENCY);
196
+ const batchResults = await Promise.all(batch.map(async (entry) => {
197
+ const fullPath = join(rootPath, entry.path);
198
+ try {
199
+ const content = await readFile(fullPath, "utf-8");
200
+ if (symbols) {
201
+ const fileSymbols = symbols
202
+ .filter((s) => s.file === entry.path)
203
+ .map((s) => ({ name: s.name, start_line: s.start_line, end_line: s.end_line }));
204
+ return chunkBySymbols(entry.path, content, repoName, fileSymbols);
205
+ }
206
+ return chunkFile(entry.path, content, repoName);
207
+ }
208
+ catch (err) {
209
+ const message = err instanceof Error ? err.message : String(err);
210
+ console.warn(`[codesift] Failed to read ${entry.path} for chunking: ${message}`);
211
+ return [];
212
+ }
213
+ }));
214
+ for (const chunks of batchResults) {
215
+ allChunks.push(...chunks);
216
+ }
217
+ }
218
+ return allChunks;
219
+ }
220
+ /**
221
+ * Embed file chunks using the configured embedding provider.
222
+ * Non-fatal — symbol-level and BM25 search still work if this fails.
223
+ */
224
+ async function embedChunks(fileEntries, rootPath, repoName, indexPath, config, symbols) {
225
+ if (!config.embeddingProvider)
226
+ return;
227
+ const chunkPath = getChunkPath(indexPath);
228
+ const chunkEmbeddingPath = getChunkEmbeddingPath(indexPath);
229
+ try {
230
+ const provider = createEmbeddingProvider(config.embeddingProvider, config);
231
+ const existingChunkEmbeddings = await loadChunkEmbeddings(chunkEmbeddingPath) ?? new Map();
232
+ const allChunks = await readAndChunkFiles(fileEntries, rootPath, repoName, symbols);
233
+ if (allChunks.length > 0) {
234
+ const chunkTexts = new Map(allChunks.map((c) => [c.id, c.text]));
235
+ const chunkEmbeddings = await batchEmbed(chunkTexts, existingChunkEmbeddings, provider.embed.bind(provider), CHUNK_EMBEDDING_BATCH_SIZE, `${repoName}:chunks`);
236
+ await saveChunks(chunkPath, allChunks);
237
+ await saveChunkEmbeddings(chunkEmbeddingPath, chunkEmbeddings);
238
+ }
239
+ }
240
+ catch (err) {
241
+ const message = err instanceof Error ? err.message : String(err);
242
+ console.error(`[codesift] Chunk embedding failed for ${repoName}: ${message}`);
243
+ }
244
+ }
130
245
  export async function indexFolder(folderPath, options) {
246
+ if (!folderPath || typeof folderPath !== "string") {
247
+ throw new Error("folderPath is required and must be a non-empty string");
248
+ }
131
249
  const config = loadConfig();
132
250
  const startTime = Date.now();
133
251
  const rootPath = resolve(folderPath);
134
252
  const repoName = getRepoName(rootPath);
135
253
  const indexPath = getIndexPath(config.dataDir, rootPath);
136
- // Check for incremental update
137
- if (options?.incremental) {
138
- const existing = await loadIndex(indexPath);
139
- if (existing) {
140
- // For now, incremental just returns existing stats.
141
- // Full incremental support comes via file watcher.
142
- return {
143
- repo: repoName,
144
- root: rootPath,
145
- file_count: existing.file_count,
146
- symbol_count: existing.symbol_count,
147
- duration_ms: Date.now() - startTime,
148
- };
254
+ // Walk directory and collect parseable files
255
+ const files = await walkDirectory(rootPath, {
256
+ includePaths: options?.include_paths,
257
+ fileFilter: (ext, name) => !!getLanguageForExtension(ext) || (name?.startsWith(".env") ?? false),
258
+ });
259
+ // mtime-based incremental: skip files unchanged since last index
260
+ const existing = await loadIndex(indexPath);
261
+ const mtimeMap = new Map();
262
+ if (existing) {
263
+ for (const f of existing.files) {
264
+ if (f.mtime_ms)
265
+ mtimeMap.set(f.path, f.mtime_ms);
149
266
  }
150
267
  }
151
- // Walk directory and collect parseable files
152
- const files = await walkDirectory(rootPath, options?.include_paths);
153
- // Parse all files and extract symbols
154
- const { symbols, fileEntries } = await parseFiles(files, rootPath, repoName);
155
- // Build and cache BM25 index
268
+ const filesToParse = [];
269
+ const keptSymbols = [];
270
+ const keptEntries = [];
271
+ if (mtimeMap.size > 0) {
272
+ const { stat } = await import("node:fs/promises");
273
+ for (const filePath of files) {
274
+ const relPath = relative(rootPath, filePath);
275
+ const prevMtime = mtimeMap.get(relPath);
276
+ if (prevMtime !== undefined) {
277
+ const fileEntry = existing.files.find((f) => f.path === relPath);
278
+ // Force re-parse if file is marked stale (callee signature changed)
279
+ if (fileEntry?.stale) {
280
+ filesToParse.push(filePath);
281
+ continue;
282
+ }
283
+ try {
284
+ const st = await stat(filePath);
285
+ if (Math.round(st.mtimeMs) === prevMtime) {
286
+ // File unchanged — keep existing symbols
287
+ const fileSymbols = existing.symbols.filter((s) => s.file === relPath);
288
+ if (fileEntry) {
289
+ keptSymbols.push(...fileSymbols);
290
+ keptEntries.push(fileEntry);
291
+ continue;
292
+ }
293
+ }
294
+ }
295
+ catch { /* file may have been deleted — reparse */ }
296
+ }
297
+ filesToParse.push(filePath);
298
+ }
299
+ }
300
+ else {
301
+ filesToParse.push(...files);
302
+ }
303
+ // Parse only changed/new files
304
+ const { symbols: parsedSymbols, fileEntries: parsedEntries } = await parseFiles(filesToParse, rootPath, repoName);
305
+ const symbols = [...keptSymbols, ...parsedSymbols];
306
+ const fileEntries = [...keptEntries, ...parsedEntries];
307
+ // Dirty propagation: detect signature changes and mark caller files stale
308
+ if (existing && filesToParse.length > 0 && filesToParse.length < files.length) {
309
+ const staleFiles = propagateDirtySignatures(existing.symbols, symbols, fileEntries);
310
+ if (staleFiles.size > 0) {
311
+ console.error(`[codesift] Dirty propagation: ${staleFiles.size} caller files marked stale`);
312
+ }
313
+ }
314
+ // Build and cache BM25 index; invalidate code index cache
156
315
  const bm25 = buildBM25Index(symbols);
157
316
  bm25Indexes.set(repoName, bm25);
158
- // Build code index
317
+ codeIndexes.delete(repoName);
318
+ // Sanity check: don't overwrite a complete index with a partial one
319
+ // (WASM crash or walk failure can produce truncated results)
320
+ const DROP_THRESHOLD = 0.5; // Reject if new index has <50% of old file count
321
+ if (existing && fileEntries.length < existing.file_count * DROP_THRESHOLD && existing.file_count > 50) {
322
+ console.error(`[codesift] SANITY CHECK FAILED for ${repoName}: ` +
323
+ `new index has ${fileEntries.length} files vs ${existing.file_count} previously. ` +
324
+ `Keeping old index. Use invalidate_cache + index_folder to force reindex.`);
325
+ return {
326
+ repo: repoName,
327
+ root: rootPath,
328
+ file_count: existing.file_count,
329
+ symbol_count: existing.symbol_count,
330
+ duration_ms: Date.now() - startTime,
331
+ };
332
+ }
333
+ // Build and save code index
159
334
  const codeIndex = {
160
335
  repo: repoName,
161
336
  root: rootPath,
@@ -166,67 +341,15 @@ export async function indexFolder(folderPath, options) {
166
341
  symbol_count: symbols.length,
167
342
  file_count: fileEntries.length,
168
343
  };
169
- // Save index to disk
170
344
  await saveIndex(indexPath, codeIndex);
171
- // Embed symbols if an embedding provider is configured (non-fatal if it fails)
172
- if (config.embeddingProvider) {
173
- const embeddingPath = getEmbeddingPath(indexPath);
174
- const metaPath = getEmbeddingMetaPath(indexPath);
175
- try {
176
- const provider = createEmbeddingProvider(config.embeddingProvider, config);
177
- const symbolTexts = new Map(symbols.map((s) => [s.id, buildSymbolText(s)]));
178
- const existing = await loadEmbeddings(embeddingPath);
179
- const embeddings = await batchEmbed(symbolTexts, existing, provider.embed.bind(provider), config.embeddingBatchSize);
180
- await saveEmbeddings(embeddingPath, embeddings);
181
- await saveEmbeddingMeta(metaPath, {
182
- model: provider.model,
183
- provider: config.embeddingProvider,
184
- dimensions: provider.dimensions,
185
- symbol_count: embeddings.size,
186
- updated_at: Date.now(),
187
- });
188
- embeddingCaches.set(repoName, embeddings);
189
- }
190
- catch (err) {
191
- const message = err instanceof Error ? err.message : String(err);
192
- console.error(`[codesift] Embedding failed for ${repoName}: ${message}`);
193
- // Non-fatal — BM25 search still works
194
- }
195
- }
196
- // Embed file chunks if an embedding provider is configured (non-fatal if it fails)
197
- if (config.embeddingProvider) {
198
- const chunkPath = getChunkPath(indexPath);
199
- const chunkEmbeddingPath = getChunkEmbeddingPath(indexPath);
200
- try {
201
- const provider = createEmbeddingProvider(config.embeddingProvider, config);
202
- // Build chunks for all indexed files
203
- const allChunks = [];
204
- for (const entry of fileEntries) {
205
- const fullPath = join(rootPath, entry.path);
206
- try {
207
- const content = await readFile(fullPath, "utf-8");
208
- const fileChunks = chunkFile(entry.path, content, repoName);
209
- allChunks.push(...fileChunks);
210
- }
211
- catch {
212
- // Skip unreadable files
213
- }
214
- }
215
- if (allChunks.length > 0) {
216
- // Load existing chunk embeddings to avoid re-embedding unchanged chunks
217
- const existingChunkEmbeddings = await loadChunkEmbeddings(chunkEmbeddingPath) ?? new Map();
218
- const chunkTexts = new Map(allChunks.map((c) => [c.id, c.text]));
219
- const chunkEmbeddings = await batchEmbed(chunkTexts, existingChunkEmbeddings, provider.embed.bind(provider), 96);
220
- await saveChunks(chunkPath, allChunks);
221
- await saveChunkEmbeddings(chunkEmbeddingPath, chunkEmbeddings);
222
- }
223
- }
224
- catch (err) {
225
- const message = err instanceof Error ? err.message : String(err);
226
- console.error(`[codesift] Chunk embedding failed for ${repoName}: ${message}`);
227
- // Non-fatal — symbol-level and BM25 search still work
228
- }
229
- }
345
+ // Embed symbols and chunks in background (non-fatal, don't block MCP response)
346
+ // Large repos (71K symbols) can take minutes — fire-and-forget to prevent timeout
347
+ embedSymbols(symbols, indexPath, repoName, config)
348
+ .then(() => embedChunks(fileEntries, rootPath, repoName, indexPath, config, symbols))
349
+ .catch((err) => {
350
+ const msg = err instanceof Error ? err.message : String(err);
351
+ console.error(`[codesift] Background embedding failed for ${repoName}: ${msg}`);
352
+ });
230
353
  // Register in the global registry
231
354
  const meta = {
232
355
  name: repoName,
@@ -237,20 +360,19 @@ export async function indexFolder(folderPath, options) {
237
360
  updated_at: Date.now(),
238
361
  };
239
362
  await registerRepo(config.registryPath, meta);
363
+ // Capture git HEAD for auto-refresh tracking
364
+ try {
365
+ const head = execFileSync("git", ["rev-parse", "HEAD"], {
366
+ cwd: rootPath, encoding: "utf-8", timeout: 5000,
367
+ }).trim();
368
+ await updateRepoMeta(config.registryPath, repoName, { last_git_commit: head });
369
+ }
370
+ catch {
371
+ // Not a git repo — skip
372
+ }
240
373
  // Start file watcher for incremental updates (unless disabled)
241
- const shouldWatch = options?.watch !== false;
242
- if (shouldWatch) {
243
- const existingWatcher = activeWatchers.get(repoName);
244
- if (existingWatcher) {
245
- await stopWatcher(existingWatcher);
246
- }
247
- const watcher = startWatcher(rootPath, (changedFile) => {
248
- handleFileChange(rootPath, repoName, indexPath, changedFile).catch((err) => {
249
- const message = err instanceof Error ? err.message : String(err);
250
- console.error(`[codesift] Watcher error for ${changedFile}: ${message}`);
251
- });
252
- });
253
- activeWatchers.set(repoName, watcher);
374
+ if (options?.watch !== false) {
375
+ await setupWatcher(rootPath, repoName, indexPath);
254
376
  }
255
377
  return {
256
378
  repo: repoName,
@@ -294,7 +416,7 @@ export async function indexRepo(url, options) {
294
416
  if (options?.branch)
295
417
  args.push("--branch", options.branch);
296
418
  args.push("--", url, cloneTarget);
297
- execFileSync("git", args, { stdio: "pipe", timeout: 120_000 });
419
+ execFileSync("git", args, { stdio: "pipe", timeout: GIT_CLONE_TIMEOUT_MS });
298
420
  }
299
421
  else {
300
422
  // Pull latest changes
@@ -302,22 +424,24 @@ export async function indexRepo(url, options) {
302
424
  if (options?.branch) {
303
425
  execFileSync("git", ["-C", cloneTarget, "checkout", options.branch], {
304
426
  stdio: "pipe",
305
- timeout: 30_000,
427
+ timeout: GIT_CHECKOUT_TIMEOUT_MS,
306
428
  });
307
429
  }
308
430
  execFileSync("git", ["-C", cloneTarget, "pull", "--ff-only"], {
309
431
  stdio: "pipe",
310
- timeout: 60_000,
432
+ timeout: GIT_PULL_TIMEOUT_MS,
311
433
  });
312
434
  }
313
- catch {
435
+ catch (err) {
314
436
  // Pull may fail if detached HEAD — force fresh clone
437
+ const message = err instanceof Error ? err.message : String(err);
438
+ console.warn(`[codesift] Git pull failed for ${urlBasename}, re-cloning: ${message}`);
315
439
  await rm(cloneTarget, { recursive: true, force: true });
316
440
  const args = ["clone", "--depth", "1"];
317
441
  if (options?.branch)
318
442
  args.push("--branch", options.branch);
319
443
  args.push("--", url, cloneTarget);
320
- execFileSync("git", args, { stdio: "pipe", timeout: 120_000 });
444
+ execFileSync("git", args, { stdio: "pipe", timeout: GIT_CLONE_TIMEOUT_MS });
321
445
  }
322
446
  }
323
447
  // Index the cloned repo (no watcher for remote repos)
@@ -326,50 +450,72 @@ export async function indexRepo(url, options) {
326
450
  watch: false,
327
451
  });
328
452
  }
453
+ /**
454
+ * Replace or create a file watcher for incremental index updates.
455
+ */
456
+ async function setupWatcher(rootPath, repoName, indexPath) {
457
+ const existingWatcher = activeWatchers.get(repoName);
458
+ if (existingWatcher) {
459
+ await stopWatcher(existingWatcher);
460
+ }
461
+ const watcher = startWatcher(rootPath, (changedFile) => {
462
+ handleFileChange(rootPath, repoName, indexPath, changedFile).catch((err) => {
463
+ const message = err instanceof Error ? err.message : String(err);
464
+ console.error(`[codesift] Watcher error for ${changedFile}: ${message}`);
465
+ });
466
+ }, (deletedFile) => {
467
+ handleFileDelete(repoName, indexPath, deletedFile).catch((err) => {
468
+ const message = err instanceof Error ? err.message : String(err);
469
+ console.error(`[codesift] Watcher delete error for ${deletedFile}: ${message}`);
470
+ });
471
+ });
472
+ activeWatchers.set(repoName, watcher);
473
+ }
329
474
  /**
330
475
  * Handle a file change event from the watcher.
331
476
  * Re-parses the changed file and updates the index incrementally.
332
477
  */
333
478
  async function handleFileChange(repoRoot, repoName, indexPath, relativeFile) {
334
479
  const fullPath = join(repoRoot, relativeFile);
335
- try {
336
- const source = await readFile(fullPath, "utf-8");
337
- const ext = extname(relativeFile);
338
- const language = getLanguageForExtension(ext) ?? "unknown";
339
- let symbols;
340
- // Markdown and Prisma use custom parsers (no tree-sitter grammar)
341
- if (language === "markdown") {
342
- symbols = extractMarkdownSymbols(source, relativeFile, repoName);
343
- }
344
- else if (language === "prisma") {
345
- symbols = extractPrismaSymbols(source, relativeFile, repoName);
346
- }
347
- else {
348
- const tree = await parseFile(fullPath, source);
349
- if (!tree)
350
- return;
351
- symbols = extractSymbols(tree, relativeFile, source, repoName, language);
480
+ // Invalidate cached findings so the next scan sees the updated file contents.
481
+ scanOnChanged(repoName, relativeFile);
482
+ const result = await parseOneFile(fullPath, repoRoot, repoName);
483
+ if (!result)
484
+ return;
485
+ await saveIncremental(indexPath, relativeFile, result.symbols, result.entry);
486
+ if (loadConfig().secretScanEnabled) {
487
+ try {
488
+ await scanFileForSecrets(fullPath, relativeFile, repoName, result.symbols);
352
489
  }
353
- const fileEntry = {
354
- path: relativeFile,
355
- language,
356
- symbol_count: symbols.length,
357
- last_modified: Date.now(),
358
- };
359
- await saveIncremental(indexPath, relativeFile, symbols, fileEntry);
360
- // Rebuild in-memory BM25 index
361
- const index = await loadIndex(indexPath);
362
- if (index) {
363
- bm25Indexes.set(repoName, buildBM25Index(index.symbols));
490
+ catch (err) {
491
+ const message = err instanceof Error ? err.message : String(err);
492
+ console.warn(`[codesift] Secret scan failed for ${relativeFile}: ${message}`);
364
493
  }
365
494
  }
366
- catch {
367
- // File may have been deleted between event and read — ignore
368
- }
495
+ // Invalidate caches — lazy rebuild on next query via getBM25Index()
496
+ bm25Indexes.delete(repoName);
497
+ codeIndexes.delete(repoName);
498
+ embeddingCaches.delete(repoName);
369
499
  }
370
- export async function listAllRepos() {
500
+ /**
501
+ * Handle a file deletion event from the watcher.
502
+ * Removes all symbols for the deleted file from the index.
503
+ */
504
+ async function handleFileDelete(repoName, indexPath, relativeFile) {
505
+ await removeFileFromIndex(indexPath, relativeFile);
506
+ // Invalidate caches — lazy rebuild on next query via getBM25Index()
507
+ bm25Indexes.delete(repoName);
508
+ codeIndexes.delete(repoName);
509
+ embeddingCaches.delete(repoName);
510
+ scanOnDeleted(repoName, relativeFile);
511
+ }
512
+ export async function listAllRepos(options) {
371
513
  const config = loadConfig();
372
- return listRegistryRepos(config.registryPath);
514
+ const repos = await listRegistryRepos(config.registryPath);
515
+ if (options?.compact === false)
516
+ return repos;
517
+ // Default: ultra-compact — just repo names (agents only need the identifier)
518
+ return repos.map((r) => r.name);
373
519
  }
374
520
  export async function invalidateCache(repoName) {
375
521
  const config = loadConfig();
@@ -384,13 +530,15 @@ export async function invalidateCache(repoName) {
384
530
  }
385
531
  // Remove in-memory caches
386
532
  bm25Indexes.delete(repoName);
533
+ codeIndexes.delete(repoName);
387
534
  embeddingCaches.delete(repoName);
388
535
  // Delete index file + embedding files + chunk files
389
536
  const embeddingPath = getEmbeddingPath(meta.index_path);
390
537
  const embeddingMetaPath = getEmbeddingMetaPath(meta.index_path);
391
538
  const chunkPath = getChunkPath(meta.index_path);
392
539
  const chunkEmbeddingPath = getChunkEmbeddingPath(meta.index_path);
393
- for (const fp of [meta.index_path, embeddingPath, embeddingMetaPath, chunkPath, chunkEmbeddingPath]) {
540
+ const graphStorePath = getGraphPath(meta.index_path);
541
+ for (const fp of [meta.index_path, embeddingPath, embeddingMetaPath, chunkPath, chunkEmbeddingPath, graphStorePath]) {
394
542
  try {
395
543
  await unlink(fp);
396
544
  }
@@ -400,11 +548,159 @@ export async function invalidateCache(repoName) {
400
548
  await removeRepo(config.registryPath, repoName);
401
549
  return true;
402
550
  }
551
+ /**
552
+ * Re-index a single file instantly. Finds the repo by matching the file
553
+ * path against indexed repo roots. Updates symbols, BM25 index, and
554
+ * invalidates embedding cache — no full repo walk needed.
555
+ */
556
+ export async function indexFile(filePath) {
557
+ const absPath = resolve(filePath);
558
+ const config = loadConfig();
559
+ const repos = await listRegistryRepos(config.registryPath);
560
+ // Find the most specific repo root that contains this file
561
+ const matchingRepo = repos
562
+ .filter((r) => absPath.startsWith(r.root + "/") || absPath === r.root)
563
+ .sort((a, b) => b.root.length - a.root.length)[0];
564
+ if (!matchingRepo) {
565
+ throw new Error(`No indexed repo contains "${absPath}". Run index_folder first.`);
566
+ }
567
+ const startTime = Date.now();
568
+ const relPath = relative(matchingRepo.root, absPath);
569
+ // mtime check — skip if unchanged
570
+ const existing = await loadIndex(matchingRepo.index_path);
571
+ if (existing) {
572
+ const prevEntry = existing.files.find((f) => f.path === relPath);
573
+ if (prevEntry?.mtime_ms) {
574
+ const st = await stat(absPath);
575
+ if (Math.round(st.mtimeMs) === prevEntry.mtime_ms) {
576
+ return {
577
+ repo: matchingRepo.name,
578
+ file: relPath,
579
+ symbol_count: prevEntry.symbol_count,
580
+ duration_ms: Date.now() - startTime,
581
+ skipped: true,
582
+ };
583
+ }
584
+ }
585
+ }
586
+ const result = await parseOneFile(absPath, matchingRepo.root, matchingRepo.name);
587
+ if (!result) {
588
+ throw new Error(`Failed to parse "${relPath}"`);
589
+ }
590
+ await saveIncremental(matchingRepo.index_path, relPath, result.symbols, result.entry);
591
+ let secretFindingsCount = 0;
592
+ if (config.secretScanEnabled) {
593
+ try {
594
+ secretFindingsCount = (await scanFileForSecrets(absPath, relPath, matchingRepo.name, result.symbols)).length;
595
+ }
596
+ catch (err) {
597
+ const message = err instanceof Error ? err.message : String(err);
598
+ console.warn(`[codesift] Secret scan failed for ${relPath}: ${message}`);
599
+ }
600
+ }
601
+ // Invalidate caches — lazy rebuild on next query via getBM25Index()
602
+ bm25Indexes.delete(matchingRepo.name);
603
+ codeIndexes.delete(matchingRepo.name);
604
+ embeddingCaches.delete(matchingRepo.name);
605
+ let secretsWarning;
606
+ if (secretFindingsCount > 0) {
607
+ secretsWarning = `\u26A0 ${secretFindingsCount} potential secret(s) detected`;
608
+ }
609
+ return {
610
+ repo: matchingRepo.name,
611
+ file: relPath,
612
+ symbol_count: result.symbols.length,
613
+ duration_ms: Date.now() - startTime,
614
+ ...(secretsWarning ? { secrets_warning: secretsWarning } : {}),
615
+ };
616
+ }
617
+ // ---------------------------------------------------------------------------
618
+ // Git-based auto-refresh — transparent freshness check before index access
619
+ // ---------------------------------------------------------------------------
620
+ const freshnessChecked = new Map();
621
+ const FRESHNESS_INTERVAL_MS = 60_000;
622
+ const MAX_DIFF_FILES = 50;
623
+ /**
624
+ * Ensure the index for a repo is fresh relative to git HEAD.
625
+ * Throttled to once per minute per repo. Reindexes changed files if HEAD moved.
626
+ * No-op for non-git repos.
627
+ */
628
+ export async function ensureIndexFresh(repoName) {
629
+ const lastCheck = freshnessChecked.get(repoName);
630
+ if (lastCheck && Date.now() - lastCheck < FRESHNESS_INTERVAL_MS) {
631
+ return { status: "fresh" };
632
+ }
633
+ const config = loadConfig();
634
+ const meta = await getRepo(config.registryPath, repoName);
635
+ if (!meta)
636
+ return { status: "skipped" };
637
+ let currentCommit;
638
+ try {
639
+ currentCommit = execFileSync("git", ["rev-parse", "HEAD"], {
640
+ cwd: meta.root, encoding: "utf-8", timeout: 5000,
641
+ }).trim();
642
+ }
643
+ catch {
644
+ freshnessChecked.set(repoName, Date.now());
645
+ return { status: "skipped" };
646
+ }
647
+ if (meta.last_git_commit === currentCommit) {
648
+ freshnessChecked.set(repoName, Date.now());
649
+ return { status: "fresh" };
650
+ }
651
+ // HEAD moved — find changed files
652
+ let changedFiles = [];
653
+ if (meta.last_git_commit) {
654
+ try {
655
+ const diff = execFileSync("git", [
656
+ "diff", "--name-only", "--diff-filter=ACMR",
657
+ `${meta.last_git_commit}..${currentCommit}`,
658
+ ], {
659
+ cwd: meta.root, encoding: "utf-8", timeout: 10_000,
660
+ });
661
+ changedFiles = diff.trim().split("\n").filter(Boolean);
662
+ }
663
+ catch {
664
+ // Stored commit gone (rebase/squash) — will do full incremental
665
+ changedFiles = [];
666
+ }
667
+ }
668
+ if (changedFiles.length > 0 && changedFiles.length <= MAX_DIFF_FILES) {
669
+ for (const file of changedFiles) {
670
+ try {
671
+ await indexFile(join(meta.root, file));
672
+ }
673
+ catch {
674
+ // File deleted or unparseable — skip
675
+ }
676
+ }
677
+ }
678
+ else if (changedFiles.length > MAX_DIFF_FILES || !meta.last_git_commit) {
679
+ await indexFolder(meta.root, { incremental: true, watch: false });
680
+ }
681
+ await updateRepoMeta(config.registryPath, repoName, {
682
+ last_git_commit: currentCommit,
683
+ updated_at: Date.now(),
684
+ });
685
+ bm25Indexes.delete(repoName);
686
+ codeIndexes.delete(repoName);
687
+ embeddingCaches.delete(repoName);
688
+ freshnessChecked.set(repoName, Date.now());
689
+ return { status: "refreshed", files_updated: changedFiles.length };
690
+ }
691
+ /** Reset freshness throttle cache. Exported for testing. */
692
+ export function resetFreshnessCache() {
693
+ freshnessChecked.clear();
694
+ }
695
+ // ---------------------------------------------------------------------------
696
+ // Index access — with auto-refresh
697
+ // ---------------------------------------------------------------------------
403
698
  /**
404
699
  * Get the in-memory BM25 index for a repo.
405
- * Loads from disk if not cached.
700
+ * Loads from disk if not cached. Auto-refreshes if git HEAD moved.
406
701
  */
407
702
  export async function getBM25Index(repoName) {
703
+ await ensureIndexFresh(repoName);
408
704
  const cached = bm25Indexes.get(repoName);
409
705
  if (cached)
410
706
  return cached;
@@ -421,13 +717,25 @@ export async function getBM25Index(repoName) {
421
717
  }
422
718
  /**
423
719
  * Get the code index for a repo from disk.
720
+ * Starts watcher if not running (lazy start after server restart).
721
+ */
722
+ /**
723
+ * Get the code index for a repo from disk. Auto-refreshes if git HEAD moved.
424
724
  */
425
725
  export async function getCodeIndex(repoName) {
726
+ await ensureIndexFresh(repoName);
727
+ const cached = codeIndexes.get(repoName);
728
+ if (cached)
729
+ return cached;
426
730
  const config = loadConfig();
427
731
  const meta = await getRepo(config.registryPath, repoName);
428
732
  if (!meta)
429
733
  return null;
430
- return loadIndex(meta.index_path);
734
+ const index = await loadIndex(meta.index_path);
735
+ if (!index)
736
+ return null;
737
+ codeIndexes.set(repoName, index);
738
+ return index;
431
739
  }
432
740
  /**
433
741
  * Get the in-memory embedding cache for a repo.