codesift-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/LICENSE +66 -21
  2. package/README.md +346 -56
  3. package/dist/cli/args.d.ts +2 -0
  4. package/dist/cli/args.d.ts.map +1 -1
  5. package/dist/cli/args.js +11 -0
  6. package/dist/cli/args.js.map +1 -1
  7. package/dist/cli/commands.d.ts.map +1 -1
  8. package/dist/cli/commands.js +177 -67
  9. package/dist/cli/commands.js.map +1 -1
  10. package/dist/cli/help.d.ts +1 -1
  11. package/dist/cli/help.d.ts.map +1 -1
  12. package/dist/cli/help.js +157 -0
  13. package/dist/cli/help.js.map +1 -1
  14. package/dist/cli/hooks.d.ts +3 -0
  15. package/dist/cli/hooks.d.ts.map +1 -0
  16. package/dist/cli/hooks.js +163 -0
  17. package/dist/cli/hooks.js.map +1 -0
  18. package/dist/cli/setup.d.ts +25 -0
  19. package/dist/cli/setup.d.ts.map +1 -0
  20. package/dist/cli/setup.js +400 -0
  21. package/dist/cli/setup.js.map +1 -0
  22. package/dist/config.d.ts +2 -0
  23. package/dist/config.d.ts.map +1 -1
  24. package/dist/config.js +2 -0
  25. package/dist/config.js.map +1 -1
  26. package/dist/formatters-shortening.d.ts +7 -0
  27. package/dist/formatters-shortening.d.ts.map +1 -0
  28. package/dist/formatters-shortening.js +68 -0
  29. package/dist/formatters-shortening.js.map +1 -0
  30. package/dist/formatters.d.ts +314 -0
  31. package/dist/formatters.d.ts.map +1 -0
  32. package/dist/formatters.js +396 -0
  33. package/dist/formatters.js.map +1 -0
  34. package/dist/instructions.d.ts +6 -0
  35. package/dist/instructions.d.ts.map +1 -0
  36. package/dist/instructions.js +72 -0
  37. package/dist/instructions.js.map +1 -0
  38. package/dist/lsp/lsp-client.d.ts +21 -0
  39. package/dist/lsp/lsp-client.d.ts.map +1 -0
  40. package/dist/lsp/lsp-client.js +122 -0
  41. package/dist/lsp/lsp-client.js.map +1 -0
  42. package/dist/lsp/lsp-manager.d.ts +12 -0
  43. package/dist/lsp/lsp-manager.d.ts.map +1 -0
  44. package/dist/lsp/lsp-manager.js +82 -0
  45. package/dist/lsp/lsp-manager.js.map +1 -0
  46. package/dist/lsp/lsp-servers.d.ts +13 -0
  47. package/dist/lsp/lsp-servers.d.ts.map +1 -0
  48. package/dist/lsp/lsp-servers.js +57 -0
  49. package/dist/lsp/lsp-servers.js.map +1 -0
  50. package/dist/lsp/lsp-tools.d.ts +67 -0
  51. package/dist/lsp/lsp-tools.d.ts.map +1 -0
  52. package/dist/lsp/lsp-tools.js +359 -0
  53. package/dist/lsp/lsp-tools.js.map +1 -0
  54. package/dist/parser/extractors/_shared.d.ts +11 -0
  55. package/dist/parser/extractors/_shared.d.ts.map +1 -0
  56. package/dist/parser/extractors/_shared.js +38 -0
  57. package/dist/parser/extractors/_shared.js.map +1 -0
  58. package/dist/parser/extractors/astro.d.ts +15 -0
  59. package/dist/parser/extractors/astro.d.ts.map +1 -0
  60. package/dist/parser/extractors/astro.js +104 -0
  61. package/dist/parser/extractors/astro.js.map +1 -0
  62. package/dist/parser/extractors/conversation.d.ts +16 -0
  63. package/dist/parser/extractors/conversation.d.ts.map +1 -0
  64. package/dist/parser/extractors/conversation.js +196 -0
  65. package/dist/parser/extractors/conversation.js.map +1 -0
  66. package/dist/parser/extractors/go.d.ts.map +1 -1
  67. package/dist/parser/extractors/go.js +22 -45
  68. package/dist/parser/extractors/go.js.map +1 -1
  69. package/dist/parser/extractors/python.d.ts +1 -1
  70. package/dist/parser/extractors/python.d.ts.map +1 -1
  71. package/dist/parser/extractors/python.js +19 -50
  72. package/dist/parser/extractors/python.js.map +1 -1
  73. package/dist/parser/extractors/rust.d.ts +1 -1
  74. package/dist/parser/extractors/rust.d.ts.map +1 -1
  75. package/dist/parser/extractors/rust.js +7 -34
  76. package/dist/parser/extractors/rust.js.map +1 -1
  77. package/dist/parser/extractors/typescript.d.ts +1 -1
  78. package/dist/parser/extractors/typescript.d.ts.map +1 -1
  79. package/dist/parser/extractors/typescript.js +99 -68
  80. package/dist/parser/extractors/typescript.js.map +1 -1
  81. package/dist/parser/parser-manager.d.ts.map +1 -1
  82. package/dist/parser/parser-manager.js +12 -2
  83. package/dist/parser/parser-manager.js.map +1 -1
  84. package/dist/parser/symbol-extractor.d.ts +2 -0
  85. package/dist/parser/symbol-extractor.d.ts.map +1 -1
  86. package/dist/parser/symbol-extractor.js +2 -0
  87. package/dist/parser/symbol-extractor.js.map +1 -1
  88. package/dist/register-tools.d.ts +127 -0
  89. package/dist/register-tools.d.ts.map +1 -0
  90. package/dist/register-tools.js +1453 -0
  91. package/dist/register-tools.js.map +1 -0
  92. package/dist/retrieval/codebase-retrieval.d.ts +4 -26
  93. package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
  94. package/dist/retrieval/codebase-retrieval.js +105 -403
  95. package/dist/retrieval/codebase-retrieval.js.map +1 -1
  96. package/dist/retrieval/retrieval-constants.d.ts +27 -0
  97. package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
  98. package/dist/retrieval/retrieval-constants.js +27 -0
  99. package/dist/retrieval/retrieval-constants.js.map +1 -0
  100. package/dist/retrieval/retrieval-schemas.d.ts +107 -0
  101. package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
  102. package/dist/retrieval/retrieval-schemas.js +102 -0
  103. package/dist/retrieval/retrieval-schemas.js.map +1 -0
  104. package/dist/retrieval/retrieval-utils.d.ts +40 -0
  105. package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
  106. package/dist/retrieval/retrieval-utils.js +139 -0
  107. package/dist/retrieval/retrieval-utils.js.map +1 -0
  108. package/dist/retrieval/semantic-handlers.d.ts +8 -0
  109. package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
  110. package/dist/retrieval/semantic-handlers.js +152 -0
  111. package/dist/retrieval/semantic-handlers.js.map +1 -0
  112. package/dist/search/bm25.d.ts +6 -1
  113. package/dist/search/bm25.d.ts.map +1 -1
  114. package/dist/search/bm25.js +95 -32
  115. package/dist/search/bm25.js.map +1 -1
  116. package/dist/search/chunker.d.ts +10 -0
  117. package/dist/search/chunker.d.ts.map +1 -1
  118. package/dist/search/chunker.js +63 -11
  119. package/dist/search/chunker.js.map +1 -1
  120. package/dist/search/reranker.d.ts +15 -0
  121. package/dist/search/reranker.d.ts.map +1 -0
  122. package/dist/search/reranker.js +126 -0
  123. package/dist/search/reranker.js.map +1 -0
  124. package/dist/search/semantic.d.ts +1 -1
  125. package/dist/search/semantic.d.ts.map +1 -1
  126. package/dist/search/semantic.js +40 -45
  127. package/dist/search/semantic.js.map +1 -1
  128. package/dist/server-helpers.d.ts +29 -0
  129. package/dist/server-helpers.d.ts.map +1 -0
  130. package/dist/server-helpers.js +312 -0
  131. package/dist/server-helpers.js.map +1 -0
  132. package/dist/server.d.ts +1 -1
  133. package/dist/server.d.ts.map +1 -1
  134. package/dist/server.js +11 -271
  135. package/dist/server.js.map +1 -1
  136. package/dist/storage/_shared.d.ts +9 -0
  137. package/dist/storage/_shared.d.ts.map +1 -0
  138. package/dist/storage/_shared.js +26 -0
  139. package/dist/storage/_shared.js.map +1 -0
  140. package/dist/storage/chunk-store.d.ts.map +1 -1
  141. package/dist/storage/chunk-store.js +23 -63
  142. package/dist/storage/chunk-store.js.map +1 -1
  143. package/dist/storage/embedding-store.d.ts +6 -3
  144. package/dist/storage/embedding-store.d.ts.map +1 -1
  145. package/dist/storage/embedding-store.js +54 -30
  146. package/dist/storage/embedding-store.js.map +1 -1
  147. package/dist/storage/graph-store.d.ts +48 -0
  148. package/dist/storage/graph-store.d.ts.map +1 -0
  149. package/dist/storage/graph-store.js +52 -0
  150. package/dist/storage/graph-store.js.map +1 -0
  151. package/dist/storage/index-store.d.ts +5 -0
  152. package/dist/storage/index-store.d.ts.map +1 -1
  153. package/dist/storage/index-store.js +28 -16
  154. package/dist/storage/index-store.js.map +1 -1
  155. package/dist/storage/registry.d.ts +4 -0
  156. package/dist/storage/registry.d.ts.map +1 -1
  157. package/dist/storage/registry.js +16 -16
  158. package/dist/storage/registry.js.map +1 -1
  159. package/dist/storage/usage-stats.d.ts +6 -0
  160. package/dist/storage/usage-stats.d.ts.map +1 -1
  161. package/dist/storage/usage-stats.js +59 -11
  162. package/dist/storage/usage-stats.js.map +1 -1
  163. package/dist/storage/usage-tracker.d.ts +3 -0
  164. package/dist/storage/usage-tracker.d.ts.map +1 -1
  165. package/dist/storage/usage-tracker.js +50 -132
  166. package/dist/storage/usage-tracker.js.map +1 -1
  167. package/dist/storage/watcher.d.ts +2 -1
  168. package/dist/storage/watcher.d.ts.map +1 -1
  169. package/dist/storage/watcher.js +16 -16
  170. package/dist/storage/watcher.js.map +1 -1
  171. package/dist/tools/ast-query-tools.d.ts +29 -0
  172. package/dist/tools/ast-query-tools.d.ts.map +1 -0
  173. package/dist/tools/ast-query-tools.js +110 -0
  174. package/dist/tools/ast-query-tools.js.map +1 -0
  175. package/dist/tools/boundary-tools.d.ts +31 -0
  176. package/dist/tools/boundary-tools.d.ts.map +1 -0
  177. package/dist/tools/boundary-tools.js +62 -0
  178. package/dist/tools/boundary-tools.js.map +1 -0
  179. package/dist/tools/clone-tools.d.ts +35 -0
  180. package/dist/tools/clone-tools.d.ts.map +1 -0
  181. package/dist/tools/clone-tools.js +181 -0
  182. package/dist/tools/clone-tools.js.map +1 -0
  183. package/dist/tools/community-tools.d.ts +23 -0
  184. package/dist/tools/community-tools.d.ts.map +1 -0
  185. package/dist/tools/community-tools.js +297 -0
  186. package/dist/tools/community-tools.js.map +1 -0
  187. package/dist/tools/complexity-tools.d.ts +34 -0
  188. package/dist/tools/complexity-tools.d.ts.map +1 -0
  189. package/dist/tools/complexity-tools.js +135 -0
  190. package/dist/tools/complexity-tools.js.map +1 -0
  191. package/dist/tools/context-tools.d.ts +44 -3
  192. package/dist/tools/context-tools.d.ts.map +1 -1
  193. package/dist/tools/context-tools.js +329 -99
  194. package/dist/tools/context-tools.js.map +1 -1
  195. package/dist/tools/conversation-tools.d.ts +107 -0
  196. package/dist/tools/conversation-tools.d.ts.map +1 -0
  197. package/dist/tools/conversation-tools.js +419 -0
  198. package/dist/tools/conversation-tools.js.map +1 -0
  199. package/dist/tools/coordinator-tools.d.ts +73 -0
  200. package/dist/tools/coordinator-tools.d.ts.map +1 -0
  201. package/dist/tools/coordinator-tools.js +153 -0
  202. package/dist/tools/coordinator-tools.js.map +1 -0
  203. package/dist/tools/cross-repo-tools.d.ts +43 -0
  204. package/dist/tools/cross-repo-tools.d.ts.map +1 -0
  205. package/dist/tools/cross-repo-tools.js +55 -0
  206. package/dist/tools/cross-repo-tools.js.map +1 -0
  207. package/dist/tools/diff-tools.d.ts +4 -1
  208. package/dist/tools/diff-tools.d.ts.map +1 -1
  209. package/dist/tools/diff-tools.js +23 -5
  210. package/dist/tools/diff-tools.js.map +1 -1
  211. package/dist/tools/frequency-tools.d.ts +46 -0
  212. package/dist/tools/frequency-tools.d.ts.map +1 -0
  213. package/dist/tools/frequency-tools.js +184 -0
  214. package/dist/tools/frequency-tools.js.map +1 -0
  215. package/dist/tools/generate-tools.d.ts.map +1 -1
  216. package/dist/tools/generate-tools.js +13 -2
  217. package/dist/tools/generate-tools.js.map +1 -1
  218. package/dist/tools/graph-tools.d.ts +44 -11
  219. package/dist/tools/graph-tools.d.ts.map +1 -1
  220. package/dist/tools/graph-tools.js +147 -104
  221. package/dist/tools/graph-tools.js.map +1 -1
  222. package/dist/tools/hotspot-tools.d.ts +24 -0
  223. package/dist/tools/hotspot-tools.d.ts.map +1 -0
  224. package/dist/tools/hotspot-tools.js +122 -0
  225. package/dist/tools/hotspot-tools.js.map +1 -0
  226. package/dist/tools/impact-tools.d.ts +13 -0
  227. package/dist/tools/impact-tools.d.ts.map +1 -0
  228. package/dist/tools/impact-tools.js +238 -0
  229. package/dist/tools/impact-tools.js.map +1 -0
  230. package/dist/tools/index-tools.d.ts +44 -3
  231. package/dist/tools/index-tools.d.ts.map +1 -1
  232. package/dist/tools/index-tools.js +530 -222
  233. package/dist/tools/index-tools.js.map +1 -1
  234. package/dist/tools/memory-tools.d.ts +35 -0
  235. package/dist/tools/memory-tools.d.ts.map +1 -0
  236. package/dist/tools/memory-tools.js +229 -0
  237. package/dist/tools/memory-tools.js.map +1 -0
  238. package/dist/tools/outline-tools.d.ts +24 -13
  239. package/dist/tools/outline-tools.d.ts.map +1 -1
  240. package/dist/tools/outline-tools.js +113 -87
  241. package/dist/tools/outline-tools.js.map +1 -1
  242. package/dist/tools/pattern-tools.d.ts +32 -0
  243. package/dist/tools/pattern-tools.d.ts.map +1 -0
  244. package/dist/tools/pattern-tools.js +116 -0
  245. package/dist/tools/pattern-tools.js.map +1 -0
  246. package/dist/tools/report-tools.d.ts +5 -0
  247. package/dist/tools/report-tools.d.ts.map +1 -0
  248. package/dist/tools/report-tools.js +167 -0
  249. package/dist/tools/report-tools.js.map +1 -0
  250. package/dist/tools/review-diff-tools.d.ts +148 -0
  251. package/dist/tools/review-diff-tools.d.ts.map +1 -0
  252. package/dist/tools/review-diff-tools.js +852 -0
  253. package/dist/tools/review-diff-tools.js.map +1 -0
  254. package/dist/tools/route-tools.d.ts +32 -0
  255. package/dist/tools/route-tools.d.ts.map +1 -0
  256. package/dist/tools/route-tools.js +276 -0
  257. package/dist/tools/route-tools.js.map +1 -0
  258. package/dist/tools/search-ranker.d.ts +5 -0
  259. package/dist/tools/search-ranker.d.ts.map +1 -0
  260. package/dist/tools/search-ranker.js +142 -0
  261. package/dist/tools/search-ranker.js.map +1 -0
  262. package/dist/tools/search-tools.d.ts +24 -1
  263. package/dist/tools/search-tools.d.ts.map +1 -1
  264. package/dist/tools/search-tools.js +459 -225
  265. package/dist/tools/search-tools.js.map +1 -1
  266. package/dist/tools/secret-tools.d.ts +104 -0
  267. package/dist/tools/secret-tools.d.ts.map +1 -0
  268. package/dist/tools/secret-tools.js +410 -0
  269. package/dist/tools/secret-tools.js.map +1 -0
  270. package/dist/tools/symbol-tools.d.ts +90 -2
  271. package/dist/tools/symbol-tools.d.ts.map +1 -1
  272. package/dist/tools/symbol-tools.js +576 -42
  273. package/dist/tools/symbol-tools.js.map +1 -1
  274. package/dist/types.d.ts +34 -1
  275. package/dist/types.d.ts.map +1 -1
  276. package/dist/utils/framework-detect.d.ts +5 -0
  277. package/dist/utils/framework-detect.d.ts.map +1 -0
  278. package/dist/utils/framework-detect.js +36 -0
  279. package/dist/utils/framework-detect.js.map +1 -0
  280. package/dist/utils/glob.d.ts +19 -0
  281. package/dist/utils/glob.d.ts.map +1 -0
  282. package/dist/utils/glob.js +74 -0
  283. package/dist/utils/glob.js.map +1 -0
  284. package/dist/utils/import-graph.d.ts +29 -0
  285. package/dist/utils/import-graph.d.ts.map +1 -0
  286. package/dist/utils/import-graph.js +125 -0
  287. package/dist/utils/import-graph.js.map +1 -0
  288. package/dist/utils/test-file.d.ts.map +1 -1
  289. package/dist/utils/test-file.js +1 -0
  290. package/dist/utils/test-file.js.map +1 -1
  291. package/dist/utils/walk.d.ts +45 -0
  292. package/dist/utils/walk.d.ts.map +1 -0
  293. package/dist/utils/walk.js +87 -0
  294. package/dist/utils/walk.js.map +1 -0
  295. package/package.json +10 -4
  296. package/rules/codesift.md +187 -0
  297. package/rules/codesift.mdc +192 -0
  298. package/rules/codex.md +187 -0
  299. package/rules/gemini.md +187 -0
@@ -1,18 +1,36 @@
1
- import { readFile, readdir, stat } from "node:fs/promises";
2
- import { join, relative, extname } from "node:path";
1
+ import { readFile } from "node:fs/promises";
2
+ import { execFileSync } from "node:child_process";
3
+ import { join } from "node:path";
3
4
  import { getBM25Index, getCodeIndex } from "./index-tools.js";
4
- import { searchBM25 } from "../search/bm25.js";
5
+ import { searchBM25, applyCutoff } from "../search/bm25.js";
5
6
  import { loadConfig } from "../config.js";
6
- const DEFAULT_MAX_TEXT_MATCHES = 500;
7
- const MAX_FILE_SIZE = 1_000_000; // 1MB — skip giant files
7
+ import { walkDirectory } from "../utils/walk.js";
8
+ import { matchFilePattern } from "../utils/glob.js";
9
+ const DEFAULT_MAX_TEXT_MATCHES = 200;
8
10
  const MAX_WALK_FILES = 50_000; // Safety limit — stop walking after this many files
9
- /** Directories to skip during text search file walk */
10
- const IGNORE_DIRS = new Set([
11
- "node_modules", ".git", "dist", "build", "coverage",
12
- ".codesift", ".next", "__pycache__", ".pytest_cache",
13
- ".venv", "venv", ".tox", ".mypy_cache", ".turbo",
14
- "generated", "audit-results", ".backup", "jscpd-report",
15
- ]);
11
+ const SEARCH_TIMEOUT_MS = 30_000; // Abort search after 30s to prevent 100s+ hangs
12
+ const AUTO_GROUP_THRESHOLD = 50; // Auto-switch to group_by_file above this match count
13
+ const MAX_RESPONSE_CHARS = 80_000; // ~20K tokens — force group_by_file above this
14
+ const MAX_FIRST_MATCH_CHARS = 300; // Cap first_match preview in grouped output
15
+ const MAX_LINE_CHARS = 500; // Truncate individual match lines (minified JS/JSON can be 100K+)
16
+ const DEFAULT_TOP_K_WITH_SOURCE = 10; // Cap results when include_source=true without file_pattern
17
+ const BM25_FILTER_MULTIPLIER = 5; // Widen BM25 candidate set when filters active
18
+ const BM25_FILTER_MIN_K = 200; // Minimum candidate set size when filters active
19
+ const DEFAULT_SOURCE_CHARS_NARROW = 200; // Source truncation without file_pattern (reduce waste)
20
+ const DEFAULT_SOURCE_CHARS_WIDE = 500; // Source truncation with file_pattern
21
+ const CHARS_PER_TOKEN = 3.5; // Approximate chars-per-token for budget calculation
22
+ const DEFAULT_MAX_REGEX_RESULTS = 50; // Regex without file_pattern — tighter cap to limit timeout
23
+ const JSON_OVERHEAD_PER_MATCH = 40; // Estimated JSON serialization overhead per TextMatch
24
+ // SEC-003: Detect common catastrophic backtracking patterns (ReDoS)
25
+ const REDOS_PATTERNS = [
26
+ /\(.*[+*].*\)[+*]/, // Nested quantifiers: (a+)+ or (a*)*
27
+ /\(.*\|.*\)[+*]/, // Alternation with quantifier: (a|b)+
28
+ /\(.*[+*].*\)\{/, // Nested quantifier with range: (a+){2,}
29
+ /\([^)]*\\[dDwWsS][+*].*\)[+*]/, // Character class with nested quantifier
30
+ ];
31
+ function isSafeRegex(pattern) {
32
+ return !REDOS_PATTERNS.some((p) => p.test(pattern));
33
+ }
16
34
  /** Binary/non-text extensions to skip during text search */
17
35
  const BINARY_EXTENSIONS = new Set([
18
36
  ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg",
@@ -25,128 +43,318 @@ const BINARY_EXTENSIONS = new Set([
25
43
  ".db", ".sqlite", ".sqlite3",
26
44
  ".lock",
27
45
  ]);
46
+ // ── Private helpers ─────────────────────────────────────
47
+ /** Check if a symbol matches the active kind and file_pattern filters. */
48
+ function matchesSymbolFilters(symbol, options) {
49
+ if (options?.kind && symbol.kind !== options.kind)
50
+ return false;
51
+ if (options?.file_pattern && !matchFilePattern(symbol.file, options.file_pattern))
52
+ return false;
53
+ return true;
54
+ }
28
55
  /**
29
- * Match a file path against a simple glob pattern.
30
- * Supports: "*.ts", "src/*.ts", "src/**\/*.ts", "**\/*.test.ts"
56
+ * Apply detail-level shaping, source truncation, and field cleanup.
57
+ * Compact: ~15 tok/result. Standard: signature + truncated source. Full: unlimited.
31
58
  */
32
- function matchFilePattern(filePath, pattern) {
33
- // Exact match
34
- if (filePath === pattern)
35
- return true;
36
- // "**\/" prefix — match anywhere in path
37
- if (pattern.startsWith("**/")) {
38
- const suffix = pattern.slice(3);
39
- // Recursively match the suffix against every segment tail
40
- return matchFilePattern(filePath, suffix) ||
41
- filePath.includes("/" + suffix) ||
42
- matchFileSuffix(filePath, suffix);
59
+ function shapeSearchResults(results, detail, includeSource, options) {
60
+ if (detail === "compact") {
61
+ return results.map((r) => ({
62
+ symbol: {
63
+ id: r.symbol.id,
64
+ name: r.symbol.name,
65
+ kind: r.symbol.kind,
66
+ file: r.symbol.file,
67
+ start_line: r.symbol.start_line,
68
+ },
69
+ score: r.score,
70
+ }));
43
71
  }
44
- // "*" at the start — match extension-style patterns like "*.ts"
45
- if (pattern.startsWith("*") && !pattern.includes("/")) {
46
- const suffix = pattern.slice(1);
47
- return filePath.endsWith(suffix);
72
+ let shaped = results;
73
+ if (!includeSource) {
74
+ shaped = shaped.map((r) => {
75
+ const { source: _source, ...symbolWithoutSource } = r.symbol;
76
+ return { ...r, symbol: symbolWithoutSource };
77
+ });
48
78
  }
49
- // "dir/**" match everything under directory (e.g., "src/**")
50
- if (pattern.endsWith("/**")) {
51
- const prefix = pattern.slice(0, -3);
52
- return filePath.startsWith(prefix + "/") || filePath === prefix;
79
+ const defaultSourceChars = detail === "full" ? undefined
80
+ : (includeSource && !options?.file_pattern) ? DEFAULT_SOURCE_CHARS_NARROW : DEFAULT_SOURCE_CHARS_WIDE;
81
+ const sourceChars = options?.source_chars ?? (includeSource ? defaultSourceChars : undefined);
82
+ if (includeSource && sourceChars !== undefined && sourceChars > 0) {
83
+ shaped = shaped.map((r) => {
84
+ const source = r.symbol.source;
85
+ if (source && source.length > sourceChars) {
86
+ return { ...r, symbol: { ...r.symbol, source: source.slice(0, sourceChars) + "..." } };
87
+ }
88
+ return r;
89
+ });
53
90
  }
54
- // Pattern with "**" in the middle (e.g., "src/**/*.ts")
55
- if (pattern.includes("/**/")) {
56
- const [prefix, suffix] = splitFirst(pattern, "/**/");
57
- if (!filePath.startsWith(prefix + "/") && filePath !== prefix)
58
- return false;
59
- const rest = filePath.slice(prefix.length + 1);
60
- return matchFilePattern(rest, suffix) ||
61
- matchFilePattern(rest, "**/" + suffix);
91
+ return shaped.map((r) => {
92
+ const { tokens: _tokens, repo: _repo, ...cleanSymbol } = r.symbol;
93
+ return { ...r, symbol: cleanSymbol };
94
+ });
95
+ }
96
+ /** Validate regex for ReDoS safety and compile without g/y flags, or throw descriptive error. */
97
+ function compileSearchRegex(query) {
98
+ if (!isSafeRegex(query)) {
99
+ throw new Error("Regex pattern rejected: potential catastrophic backtracking (ReDoS)");
62
100
  }
63
- // Simple directory prefix + filename pattern (e.g., "src/*.ts")
64
- if (pattern.includes("/") && pattern.includes("*")) {
65
- const lastSlash = pattern.lastIndexOf("/");
66
- const dirPart = pattern.slice(0, lastSlash);
67
- const filePart = pattern.slice(lastSlash + 1);
68
- const fileLastSlash = filePath.lastIndexOf("/");
69
- const fileDir = fileLastSlash >= 0 ? filePath.slice(0, fileLastSlash) : "";
70
- const fileName = fileLastSlash >= 0 ? filePath.slice(fileLastSlash + 1) : filePath;
71
- if (fileDir !== dirPart)
72
- return false;
73
- return matchFilePattern(fileName, filePart);
101
+ try {
102
+ // No g/y flags regex is reused across files; stateful flags cause alternating matches
103
+ return new RegExp(query);
74
104
  }
75
- // No wildcards: substring match on the full path
76
- // "risk.service.ts" matches "src/lib/services/risk/risk.service.ts"
77
- // "validators" matches "src/lib/validators/schema.ts"
78
- if (!pattern.includes("*")) {
79
- return filePath.includes(pattern);
105
+ catch (err) {
106
+ const message = err instanceof Error ? err.message : String(err);
107
+ throw new Error(`Invalid regex pattern: ${message}`);
80
108
  }
81
- return false;
82
109
  }
83
- function matchFileSuffix(filePath, suffix) {
84
- if (suffix.startsWith("*")) {
85
- const ext = suffix.slice(1);
86
- return filePath.endsWith(ext);
110
+ // ── Ripgrep backend ────────────────────────────────────
111
+ /** Directories always excluded from ripgrep search */
112
+ const RG_EXCLUDE_DIRS = [
113
+ "node_modules", ".git", ".next", "dist", ".codesift", "coverage",
114
+ ".playwright-mcp", "__pycache__", ".mypy_cache", ".tox",
115
+ ];
116
+ /** Detect whether `rg` (ripgrep) is available on this system. Cached at module level. */
117
+ let rgAvailable = null;
118
+ function hasRipgrep() {
119
+ if (rgAvailable !== null)
120
+ return rgAvailable;
121
+ try {
122
+ execFileSync("rg", ["--version"], { stdio: "pipe", timeout: 2000 });
123
+ rgAvailable = true;
87
124
  }
88
- return filePath.endsWith("/" + suffix) || filePath === suffix;
89
- }
90
- function splitFirst(str, sep) {
91
- const idx = str.indexOf(sep);
92
- if (idx < 0)
93
- return [str, ""];
94
- return [str.slice(0, idx), str.slice(idx + sep.length)];
125
+ catch {
126
+ rgAvailable = false;
127
+ }
128
+ return rgAvailable;
95
129
  }
96
130
  /**
97
- * Walk a directory tree collecting all text files.
98
- * Returns relative paths from rootPath.
99
- * Unlike the index walk, this includes ALL text files (not just parseable ones).
131
+ * Search via ripgrep fast C-based search, parses `rg -n` output.
132
+ * Falls back to Node.js search if rg is not available.
100
133
  */
101
- async function walkAllTextFiles(rootPath) {
102
- const files = [];
103
- let limitReached = false;
104
- async function walk(dirPath) {
105
- if (limitReached)
106
- return;
107
- let entries;
108
- try {
109
- entries = await readdir(dirPath, { withFileTypes: true });
134
+ function searchWithRipgrep(root, query, options) {
135
+ const args = [
136
+ "-n", // line numbers
137
+ "--no-heading", // flat output
138
+ "--max-columns", String(MAX_LINE_CHARS),
139
+ "--max-columns-preview", // show truncated preview
140
+ "--max-count", String(Math.min(options.maxResults * 2, 5000)), // per-file cap (generous to hit global max)
141
+ ];
142
+ if (!options.regex) {
143
+ args.push("-F"); // fixed string (literal)
144
+ }
145
+ if (options.contextLines > 0) {
146
+ args.push("-C", String(options.contextLines));
147
+ }
148
+ // File pattern → rg glob
149
+ if (options.filePattern) {
150
+ // Handle patterns like "src/**" or "*.ts"
151
+ args.push("--glob", options.filePattern);
152
+ }
153
+ // Exclude dirs
154
+ for (const dir of RG_EXCLUDE_DIRS) {
155
+ args.push("--glob", `!${dir}`);
156
+ }
157
+ args.push("--", query, root);
158
+ let stdout;
159
+ try {
160
+ stdout = execFileSync("rg", args, {
161
+ encoding: "utf-8",
162
+ maxBuffer: 20 * 1024 * 1024, // 20MB
163
+ timeout: SEARCH_TIMEOUT_MS,
164
+ });
165
+ }
166
+ catch (err) {
167
+ // rg exits 1 = no matches, 2 = error
168
+ if (err && typeof err === "object" && "status" in err) {
169
+ const exitCode = err.status;
170
+ if (exitCode === 1)
171
+ return []; // no matches
172
+ if ("stdout" in err && typeof err.stdout === "string") {
173
+ stdout = err.stdout;
174
+ if (!stdout)
175
+ return [];
176
+ }
177
+ else {
178
+ return [];
179
+ }
110
180
  }
111
- catch {
112
- return; // permission denied, etc.
181
+ else {
182
+ return [];
113
183
  }
114
- for (const entry of entries) {
115
- if (limitReached)
116
- return;
117
- const fullPath = join(dirPath, entry.name);
118
- if (entry.isDirectory()) {
119
- if (IGNORE_DIRS.has(entry.name) || entry.name.startsWith(".")) {
120
- continue;
121
- }
122
- await walk(fullPath);
184
+ }
185
+ const matches = [];
186
+ const rootPrefix = root.endsWith("/") ? root : root + "/";
187
+ // Parse context blocks: lines separated by "--" separators
188
+ const blocks = options.contextLines > 0
189
+ ? stdout.split(/^--$/m)
190
+ : [stdout];
191
+ for (const block of blocks) {
192
+ if (matches.length >= options.maxResults)
193
+ break;
194
+ const lines = block.split("\n").filter(Boolean);
195
+ // In context mode, find the actual match line (has `:` separator) vs context (has `-` separator)
196
+ // In non-context mode, all lines are matches
197
+ for (const rawLine of lines) {
198
+ if (matches.length >= options.maxResults)
199
+ break;
200
+ // rg format: /abs/path/file.ts:42:content (match)
201
+ // rg format: /abs/path/file.ts-40-content (context, only with -C)
202
+ // We only want match lines (with `:` after line number)
203
+ const matchResult = rawLine.match(/^(.+?):(\d+):(.*)/);
204
+ if (!matchResult)
205
+ continue;
206
+ const [, absPath, lineNumStr, content] = matchResult;
207
+ if (!absPath || !lineNumStr || content === undefined)
208
+ continue;
209
+ const relPath = absPath.startsWith(rootPrefix)
210
+ ? absPath.slice(rootPrefix.length)
211
+ : absPath;
212
+ matches.push({
213
+ file: relPath,
214
+ line: parseInt(lineNumStr, 10),
215
+ content: content,
216
+ });
217
+ }
218
+ }
219
+ // For context mode, we need to re-parse to attach context_before/context_after
220
+ // But context_lines=0 is the default now, so this path is rarely hit
221
+ if (options.contextLines > 0 && blocks.length > 1) {
222
+ return parseRipgrepContextBlocks(stdout, rootPrefix, options.maxResults, options.contextLines);
223
+ }
224
+ return matches;
225
+ }
226
+ /**
227
+ * Parse rg output with context lines (-C N) into TextMatch[] with context_before/context_after.
228
+ */
229
+ function parseRipgrepContextBlocks(stdout, rootPrefix, maxResults, contextLines) {
230
+ const matches = [];
231
+ const blocks = stdout.split(/^--$/m);
232
+ for (const block of blocks) {
233
+ if (matches.length >= maxResults)
234
+ break;
235
+ const lines = block.split("\n").filter(Boolean);
236
+ // Separate match lines from context lines
237
+ // Match: path:line:content Context: path-line-content
238
+ const parsed = [];
239
+ for (const raw of lines) {
240
+ // Try match line first (colon after line number)
241
+ const matchLine = raw.match(/^(.+?):(\d+):(.*)/);
242
+ if (matchLine && matchLine[1] && matchLine[2] && matchLine[3] !== undefined) {
243
+ parsed.push({
244
+ path: matchLine[1].startsWith(rootPrefix) ? matchLine[1].slice(rootPrefix.length) : matchLine[1],
245
+ line: parseInt(matchLine[2], 10),
246
+ content: matchLine[3],
247
+ isMatch: true,
248
+ });
249
+ continue;
250
+ }
251
+ // Try context line (hyphen after line number)
252
+ const ctxLine = raw.match(/^(.+?)-(\d+)-(.*)/);
253
+ if (ctxLine && ctxLine[1] && ctxLine[2] && ctxLine[3] !== undefined) {
254
+ parsed.push({
255
+ path: ctxLine[1].startsWith(rootPrefix) ? ctxLine[1].slice(rootPrefix.length) : ctxLine[1],
256
+ line: parseInt(ctxLine[2], 10),
257
+ content: ctxLine[3],
258
+ isMatch: false,
259
+ });
260
+ }
261
+ }
262
+ // Build TextMatch for each match line with surrounding context
263
+ for (let i = 0; i < parsed.length; i++) {
264
+ const p = parsed[i];
265
+ if (!p.isMatch)
266
+ continue;
267
+ if (matches.length >= maxResults)
268
+ break;
269
+ const contextBefore = [];
270
+ const contextAfter = [];
271
+ // Collect context before
272
+ for (let j = Math.max(0, i - contextLines); j < i; j++) {
273
+ const ctx = parsed[j];
274
+ if (ctx && !ctx.isMatch)
275
+ contextBefore.push(ctx.content);
123
276
  }
124
- else if (entry.isFile()) {
125
- const ext = extname(entry.name);
126
- // Skip binary files
127
- if (BINARY_EXTENSIONS.has(ext))
128
- continue;
129
- // Skip files that are too large
130
- try {
131
- const fileStat = await stat(fullPath);
132
- if (fileStat.size > MAX_FILE_SIZE)
133
- continue;
134
- }
135
- catch {
136
- continue;
137
- }
138
- files.push(relative(rootPath, fullPath));
139
- if (files.length >= MAX_WALK_FILES) {
140
- console.warn(`[codesift] walkAllTextFiles: reached ${MAX_WALK_FILES} file limit, returning partial results`);
141
- limitReached = true;
142
- return;
143
- }
277
+ // Collect context after
278
+ for (let j = i + 1; j <= Math.min(parsed.length - 1, i + contextLines); j++) {
279
+ const ctx = parsed[j];
280
+ if (ctx && !ctx.isMatch)
281
+ contextAfter.push(ctx.content);
144
282
  }
283
+ const match = { file: p.path, line: p.line, content: p.content };
284
+ if (contextBefore.length > 0)
285
+ match.context_before = contextBefore;
286
+ if (contextAfter.length > 0)
287
+ match.context_after = contextAfter;
288
+ matches.push(match);
289
+ }
290
+ }
291
+ return matches;
292
+ }
293
+ // ── Node.js fallback search ───────────────────────────
294
+ /** Search file content for line matches, collecting context lines around each hit. */
295
+ function searchFileForMatches(content, filePath, query, regex, contextLines, maxMatches) {
296
+ const lines = content.split("\n");
297
+ const matches = [];
298
+ for (let i = 0; i < lines.length; i++) {
299
+ if (matches.length >= maxMatches)
300
+ break;
301
+ const line = lines[i];
302
+ if (line === undefined)
303
+ continue;
304
+ const isMatch = regex ? regex.test(line) : line.includes(query);
305
+ if (!isMatch)
306
+ continue;
307
+ const contextBefore = [];
308
+ for (let j = Math.max(0, i - contextLines); j < i; j++) {
309
+ const ctxLine = lines[j];
310
+ if (ctxLine !== undefined)
311
+ contextBefore.push(ctxLine);
312
+ }
313
+ const contextAfter = [];
314
+ for (let j = i + 1; j <= Math.min(lines.length - 1, i + contextLines); j++) {
315
+ const ctxLine = lines[j];
316
+ if (ctxLine !== undefined)
317
+ contextAfter.push(ctxLine);
318
+ }
319
+ const truncLine = line.length > MAX_LINE_CHARS
320
+ ? line.slice(0, MAX_LINE_CHARS) + "..."
321
+ : line;
322
+ const match = {
323
+ file: filePath,
324
+ line: i + 1,
325
+ content: truncLine,
326
+ };
327
+ if (contextBefore.length > 0)
328
+ match.context_before = contextBefore;
329
+ if (contextAfter.length > 0)
330
+ match.context_after = contextAfter;
331
+ matches.push(match);
332
+ }
333
+ return matches;
334
+ }
335
+ /** Aggregate flat TextMatch[] into per-file groups with counts and first_match preview. */
336
+ function groupMatchesByFile(matches) {
337
+ const groups = new Map();
338
+ for (const m of matches) {
339
+ const existing = groups.get(m.file);
340
+ if (existing) {
341
+ existing.count++;
342
+ existing.lines.push(m.line);
343
+ }
344
+ else {
345
+ groups.set(m.file, {
346
+ file: m.file,
347
+ count: 1,
348
+ lines: [m.line],
349
+ first_match: m.content.length > MAX_FIRST_MATCH_CHARS
350
+ ? m.content.slice(0, MAX_FIRST_MATCH_CHARS) + "..."
351
+ : m.content,
352
+ });
145
353
  }
146
354
  }
147
- await walk(rootPath);
148
- return files;
355
+ return [...groups.values()];
149
356
  }
357
+ // ── Public API ──────────────────────────────────────────
150
358
  /**
151
359
  * Search symbols by name/signature/docstring using BM25 ranking.
152
360
  * Supports filtering by symbol kind and file pattern.
@@ -161,149 +369,175 @@ export async function searchSymbols(repo, query, options) {
161
369
  throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
162
370
  }
163
371
  const config = loadConfig();
164
- const topK = options?.top_k ?? config.defaultTopK;
165
372
  const includeSource = options?.include_source ?? true;
166
- const hasKindFilter = !!options?.kind;
167
- const hasFileFilter = !!options?.file_pattern;
168
- const hasFilters = hasKindFilter || hasFileFilter;
373
+ const defaultK = (includeSource && !options?.file_pattern) ? DEFAULT_TOP_K_WITH_SOURCE : config.defaultTopK;
374
+ const topK = options?.top_k ?? defaultK;
375
+ const hasFilters = !!options?.kind || !!options?.file_pattern;
169
376
  let results;
170
377
  if (!query.trim()) {
171
- // Empty query: return all symbols matching filters (no BM25 scoring)
172
378
  const allSymbols = [...index.symbols.values()];
173
- let filtered = allSymbols;
174
- if (hasKindFilter) {
175
- const kind = options.kind;
176
- filtered = filtered.filter((s) => s.kind === kind);
177
- }
178
- if (hasFileFilter) {
179
- const pattern = options.file_pattern;
180
- filtered = filtered.filter((s) => matchFilePattern(s.file, pattern));
181
- }
182
- results = filtered.slice(0, topK).map((symbol) => ({
183
- symbol,
184
- score: 0,
185
- }));
379
+ const filtered = allSymbols.filter((s) => matchesSymbolFilters(s, options));
380
+ results = filtered.slice(0, topK).map((symbol) => ({ symbol, score: 0 }));
186
381
  }
187
382
  else {
188
- // When filters are active, search a wider candidate set from BM25
189
- // so that post-filter truncation doesn't lose relevant results.
190
- const searchTopK = hasFilters ? Math.max(topK * 5, 200) : topK;
383
+ const searchTopK = hasFilters ? Math.max(topK * BM25_FILTER_MULTIPLIER, BM25_FILTER_MIN_K) : topK;
191
384
  results = searchBM25(index, query, searchTopK, config.bm25FieldWeights);
192
- // Filter by symbol kind
193
- if (hasKindFilter) {
194
- const kind = options.kind;
195
- results = results.filter((r) => r.symbol.kind === kind);
196
- }
197
- // Filter by file pattern
198
- if (hasFileFilter) {
199
- const pattern = options.file_pattern;
200
- results = results.filter((r) => matchFilePattern(r.symbol.file, pattern));
201
- }
202
- // Re-truncate to requested top_k after filtering
385
+ results = results.filter((r) => matchesSymbolFilters(r.symbol, options));
203
386
  results = results.slice(0, topK);
387
+ results = applyCutoff(results);
204
388
  }
205
- // Strip source if not requested
206
- if (!includeSource) {
207
- results = results.map((r) => {
208
- const { source: _source, ...symbolWithoutSource } = r.symbol;
209
- return { ...r, symbol: symbolWithoutSource };
210
- });
389
+ if (options?.rerank && results.length > 1) {
390
+ const { rerankResults } = await import("../search/reranker.js");
391
+ results = await rerankResults(query, results);
211
392
  }
212
- // Truncate source to source_chars limit (default 500 when include_source=true)
213
- const sourceChars = options?.source_chars ?? (includeSource ? 500 : undefined);
214
- if (includeSource && sourceChars !== undefined && sourceChars > 0) {
215
- results = results.map((r) => {
216
- const source = r.symbol.source;
217
- if (source && source.length > sourceChars) {
218
- return {
219
- ...r,
220
- symbol: { ...r.symbol, source: source.slice(0, sourceChars) + "..." },
221
- };
222
- }
223
- return r;
224
- });
393
+ const detail = options?.detail_level ?? "standard";
394
+ const shaped = shapeSearchResults(results, detail, includeSource, options);
395
+ // Token budget: greedily pack results until budget exhausted
396
+ const budget = options?.token_budget;
397
+ if (budget && budget > 0) {
398
+ const packed = [];
399
+ let used = 0;
400
+ for (const r of shaped) {
401
+ const tok = Math.ceil(JSON.stringify(r).length / CHARS_PER_TOKEN);
402
+ if (used + tok > budget)
403
+ break;
404
+ packed.push(r);
405
+ used += tok;
406
+ }
407
+ return packed;
225
408
  }
226
- return results;
409
+ return shaped;
227
410
  }
228
- /**
229
- * Full-text search across all files in a repository.
230
- * Walks the filesystem to search ALL text files, not just indexed ones.
231
- */
232
411
  export async function searchText(repo, query, options) {
233
412
  const index = await getCodeIndex(repo);
234
413
  if (!index) {
235
414
  throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
236
415
  }
237
- const contextLines = options?.context_lines ?? 2;
238
416
  const useRegex = options?.regex ?? false;
239
417
  const filePattern = options?.file_pattern;
240
- const maxResults = options?.max_results ?? DEFAULT_MAX_TEXT_MATCHES;
241
- let regex = null;
418
+ const maxResults = options?.max_results
419
+ ?? (useRegex && !filePattern ? DEFAULT_MAX_REGEX_RESULTS : DEFAULT_MAX_TEXT_MATCHES);
420
+ const contextLines = options?.context_lines ?? 0; // OPT-2: default 0 (was 2) — saves ~30 tokens/match
421
+ // Validate regex safety before passing to ripgrep
242
422
  if (useRegex) {
243
- try {
244
- regex = new RegExp(query);
245
- }
246
- catch (err) {
247
- const message = err instanceof Error ? err.message : String(err);
248
- throw new Error(`Invalid regex pattern: ${message}`);
249
- }
423
+ compileSearchRegex(query); // throws on ReDoS patterns
250
424
  }
251
- // Walk the filesystem to find ALL text files (not just indexed/parseable ones)
252
- const allFiles = await walkAllTextFiles(index.root);
253
- const matches = [];
254
- for (const filePath of allFiles) {
255
- if (matches.length >= maxResults)
256
- break;
257
- // Filter by file pattern
258
- if (filePattern && !matchFilePattern(filePath, filePattern)) {
259
- continue;
260
- }
261
- const fullPath = join(index.root, filePath);
262
- let content;
263
- try {
264
- content = await readFile(fullPath, "utf-8");
425
+ let matches;
426
+ // OPT-1: Use ripgrep when available (10x faster)
427
+ if (hasRipgrep()) {
428
+ matches = searchWithRipgrep(index.root, query, {
429
+ regex: useRegex,
430
+ filePattern: filePattern,
431
+ maxResults: maxResults,
432
+ contextLines: contextLines,
433
+ });
434
+ }
435
+ else {
436
+ // Node.js fallback
437
+ const regex = useRegex ? compileSearchRegex(query) : null;
438
+ let allFiles;
439
+ if (filePattern) {
440
+ allFiles = index.files.map((f) => f.path);
265
441
  }
266
- catch {
267
- continue; // File may have been deleted or moved
442
+ else {
443
+ allFiles = await walkDirectory(index.root, {
444
+ fileFilter: (ext) => !BINARY_EXTENSIONS.has(ext),
445
+ maxFiles: MAX_WALK_FILES,
446
+ relative: true,
447
+ });
268
448
  }
269
- const lines = content.split("\n");
270
- for (let i = 0; i < lines.length; i++) {
449
+ matches = [];
450
+ const searchStart = Date.now();
451
+ for (const filePath of allFiles) {
271
452
  if (matches.length >= maxResults)
272
453
  break;
273
- const line = lines[i];
274
- if (line === undefined)
275
- continue;
276
- const isMatch = regex ? regex.test(line) : line.includes(query);
277
- if (!isMatch)
454
+ if (Date.now() - searchStart > SEARCH_TIMEOUT_MS)
455
+ break;
456
+ if (filePattern && !matchFilePattern(filePath, filePattern))
278
457
  continue;
279
- const contextBefore = [];
280
- const contextAfter = [];
281
- for (let j = Math.max(0, i - contextLines); j < i; j++) {
282
- const ctxLine = lines[j];
283
- if (ctxLine !== undefined) {
284
- contextBefore.push(ctxLine);
285
- }
458
+ const fullPath = join(index.root, filePath);
459
+ let content;
460
+ try {
461
+ content = await readFile(fullPath, "utf-8");
286
462
  }
287
- for (let j = i + 1; j <= Math.min(lines.length - 1, i + contextLines); j++) {
288
- const ctxLine = lines[j];
289
- if (ctxLine !== undefined) {
290
- contextAfter.push(ctxLine);
291
- }
463
+ catch {
464
+ continue;
292
465
  }
293
- const match = {
294
- file: filePath,
295
- line: i + 1, // 1-based
296
- content: line,
297
- };
298
- if (contextBefore.length > 0) {
299
- match.context_before = contextBefore;
466
+ const fileMatches = searchFileForMatches(content, filePath, query, regex, contextLines, maxResults - matches.length);
467
+ matches.push(...fileMatches);
468
+ }
469
+ }
470
+ // Ranked mode: classify hits with symbol context, deduplicate, and sort by centrality.
471
+ // Takes precedence over auto_group/compact — returns TextMatch[] with containing_symbol.
472
+ if (options?.ranked && matches.length > 0) {
473
+ try {
474
+ const { classifyHitsWithSymbols } = await import("./search-ranker.js");
475
+ const bm25Idx = await getBM25Index(repo);
476
+ if (bm25Idx) {
477
+ matches = await classifyHitsWithSymbols(matches, index, { centrality: bm25Idx.centrality });
300
478
  }
301
- if (contextAfter.length > 0) {
302
- match.context_after = contextAfter;
479
+ }
480
+ catch {
481
+ // Graceful fallback — return unranked matches if pipeline fails
482
+ }
483
+ return matches;
484
+ }
485
+ // OPT-3: Compact format — grep-like `file:line: content` output, ~50% less tokens than JSON
486
+ // Auto-enable when auto_group is set (caller is optimization-aware) and results are small
487
+ const useCompact = options?.compact
488
+ ?? (options?.auto_group && contextLines === 0 && matches.length > 0 && matches.length <= AUTO_GROUP_THRESHOLD);
489
+ if (useCompact && !options?.group_by_file) {
490
+ // Group by file to avoid repeating long paths (saves ~30% on multi-match files)
491
+ const groups = new Map();
492
+ for (const m of matches) {
493
+ let g = groups.get(m.file);
494
+ if (!g) {
495
+ g = [];
496
+ groups.set(m.file, g);
303
497
  }
304
- matches.push(match);
498
+ g.push(` ${m.line}: ${m.content}`);
499
+ }
500
+ if (groups.size === matches.length) {
501
+ // Each file has 1 match — flat format is fine
502
+ return matches.map((m) => `${m.file}:${m.line}: ${m.content}`).join("\n");
305
503
  }
504
+ // Grouped: file header + indented matches
505
+ const parts = [];
506
+ for (const [file, lines] of groups) {
507
+ parts.push(`${file}\n${lines.join("\n")}`);
508
+ }
509
+ return parts.join("\n");
510
+ }
511
+ // Estimate response size; force grouping when output would be enormous
512
+ const estimatedChars = matches.reduce((sum, m) => {
513
+ let chars = m.file.length + m.content.length + JSON_OVERHEAD_PER_MATCH;
514
+ if (m.context_before)
515
+ chars += m.context_before.reduce((s, l) => s + l.length, 0);
516
+ if (m.context_after)
517
+ chars += m.context_after.reduce((s, l) => s + l.length, 0);
518
+ return sum + chars;
519
+ }, 0);
520
+ const shouldGroup = options?.group_by_file
521
+ || (options?.auto_group && matches.length > AUTO_GROUP_THRESHOLD)
522
+ || estimatedChars > MAX_RESPONSE_CHARS;
523
+ if (shouldGroup) {
524
+ return groupMatchesByFile(matches);
306
525
  }
307
526
  return matches;
308
527
  }
528
+ // ---------------------------------------------------------------------------
529
+ // Semantic search — standalone wrapper around retrieval infrastructure
530
+ // ---------------------------------------------------------------------------
531
+ export async function semanticSearch(repo, query, options) {
532
+ const { handleSemanticQuery } = await import("../retrieval/semantic-handlers.js");
533
+ const result = await handleSemanticQuery(repo, {
534
+ type: "semantic",
535
+ query,
536
+ top_k: options?.top_k,
537
+ file_filter: options?.file_pattern,
538
+ exclude_tests: options?.exclude_tests,
539
+ rerank: options?.rerank,
540
+ });
541
+ return typeof result.data === "string" ? result.data : JSON.stringify(result.data);
542
+ }
309
543
  //# sourceMappingURL=search-tools.js.map