npm - codesift-mcp - Versions diffs - 0.1.0 → 0.2.0 - Mend

codesift-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (299) hide show

package/LICENSE +66 -21
package/README.md +346 -56
package/dist/cli/args.d.ts +2 -0
package/dist/cli/args.d.ts.map +1 -1
package/dist/cli/args.js +11 -0
package/dist/cli/args.js.map +1 -1
package/dist/cli/commands.d.ts.map +1 -1
package/dist/cli/commands.js +177 -67
package/dist/cli/commands.js.map +1 -1
package/dist/cli/help.d.ts +1 -1
package/dist/cli/help.d.ts.map +1 -1
package/dist/cli/help.js +157 -0
package/dist/cli/help.js.map +1 -1
package/dist/cli/hooks.d.ts +3 -0
package/dist/cli/hooks.d.ts.map +1 -0
package/dist/cli/hooks.js +163 -0
package/dist/cli/hooks.js.map +1 -0
package/dist/cli/setup.d.ts +25 -0
package/dist/cli/setup.d.ts.map +1 -0
package/dist/cli/setup.js +400 -0
package/dist/cli/setup.js.map +1 -0
package/dist/config.d.ts +2 -0
package/dist/config.d.ts.map +1 -1
package/dist/config.js +2 -0
package/dist/config.js.map +1 -1
package/dist/formatters-shortening.d.ts +7 -0
package/dist/formatters-shortening.d.ts.map +1 -0
package/dist/formatters-shortening.js +68 -0
package/dist/formatters-shortening.js.map +1 -0
package/dist/formatters.d.ts +314 -0
package/dist/formatters.d.ts.map +1 -0
package/dist/formatters.js +396 -0
package/dist/formatters.js.map +1 -0
package/dist/instructions.d.ts +6 -0
package/dist/instructions.d.ts.map +1 -0
package/dist/instructions.js +72 -0
package/dist/instructions.js.map +1 -0
package/dist/lsp/lsp-client.d.ts +21 -0
package/dist/lsp/lsp-client.d.ts.map +1 -0
package/dist/lsp/lsp-client.js +122 -0
package/dist/lsp/lsp-client.js.map +1 -0
package/dist/lsp/lsp-manager.d.ts +12 -0
package/dist/lsp/lsp-manager.d.ts.map +1 -0
package/dist/lsp/lsp-manager.js +82 -0
package/dist/lsp/lsp-manager.js.map +1 -0
package/dist/lsp/lsp-servers.d.ts +13 -0
package/dist/lsp/lsp-servers.d.ts.map +1 -0
package/dist/lsp/lsp-servers.js +57 -0
package/dist/lsp/lsp-servers.js.map +1 -0
package/dist/lsp/lsp-tools.d.ts +67 -0
package/dist/lsp/lsp-tools.d.ts.map +1 -0
package/dist/lsp/lsp-tools.js +359 -0
package/dist/lsp/lsp-tools.js.map +1 -0
package/dist/parser/extractors/_shared.d.ts +11 -0
package/dist/parser/extractors/_shared.d.ts.map +1 -0
package/dist/parser/extractors/_shared.js +38 -0
package/dist/parser/extractors/_shared.js.map +1 -0
package/dist/parser/extractors/astro.d.ts +15 -0
package/dist/parser/extractors/astro.d.ts.map +1 -0
package/dist/parser/extractors/astro.js +104 -0
package/dist/parser/extractors/astro.js.map +1 -0
package/dist/parser/extractors/conversation.d.ts +16 -0
package/dist/parser/extractors/conversation.d.ts.map +1 -0
package/dist/parser/extractors/conversation.js +196 -0
package/dist/parser/extractors/conversation.js.map +1 -0
package/dist/parser/extractors/go.d.ts.map +1 -1
package/dist/parser/extractors/go.js +22 -45
package/dist/parser/extractors/go.js.map +1 -1
package/dist/parser/extractors/python.d.ts +1 -1
package/dist/parser/extractors/python.d.ts.map +1 -1
package/dist/parser/extractors/python.js +19 -50
package/dist/parser/extractors/python.js.map +1 -1
package/dist/parser/extractors/rust.d.ts +1 -1
package/dist/parser/extractors/rust.d.ts.map +1 -1
package/dist/parser/extractors/rust.js +7 -34
package/dist/parser/extractors/rust.js.map +1 -1
package/dist/parser/extractors/typescript.d.ts +1 -1
package/dist/parser/extractors/typescript.d.ts.map +1 -1
package/dist/parser/extractors/typescript.js +99 -68
package/dist/parser/extractors/typescript.js.map +1 -1
package/dist/parser/parser-manager.d.ts.map +1 -1
package/dist/parser/parser-manager.js +12 -2
package/dist/parser/parser-manager.js.map +1 -1
package/dist/parser/symbol-extractor.d.ts +2 -0
package/dist/parser/symbol-extractor.d.ts.map +1 -1
package/dist/parser/symbol-extractor.js +2 -0
package/dist/parser/symbol-extractor.js.map +1 -1
package/dist/register-tools.d.ts +127 -0
package/dist/register-tools.d.ts.map +1 -0
package/dist/register-tools.js +1453 -0
package/dist/register-tools.js.map +1 -0
package/dist/retrieval/codebase-retrieval.d.ts +4 -26
package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
package/dist/retrieval/codebase-retrieval.js +105 -403
package/dist/retrieval/codebase-retrieval.js.map +1 -1
package/dist/retrieval/retrieval-constants.d.ts +27 -0
package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
package/dist/retrieval/retrieval-constants.js +27 -0
package/dist/retrieval/retrieval-constants.js.map +1 -0
package/dist/retrieval/retrieval-schemas.d.ts +107 -0
package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
package/dist/retrieval/retrieval-schemas.js +102 -0
package/dist/retrieval/retrieval-schemas.js.map +1 -0
package/dist/retrieval/retrieval-utils.d.ts +40 -0
package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
package/dist/retrieval/retrieval-utils.js +139 -0
package/dist/retrieval/retrieval-utils.js.map +1 -0
package/dist/retrieval/semantic-handlers.d.ts +8 -0
package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
package/dist/retrieval/semantic-handlers.js +152 -0
package/dist/retrieval/semantic-handlers.js.map +1 -0
package/dist/search/bm25.d.ts +6 -1
package/dist/search/bm25.d.ts.map +1 -1
package/dist/search/bm25.js +95 -32
package/dist/search/bm25.js.map +1 -1
package/dist/search/chunker.d.ts +10 -0
package/dist/search/chunker.d.ts.map +1 -1
package/dist/search/chunker.js +63 -11
package/dist/search/chunker.js.map +1 -1
package/dist/search/reranker.d.ts +15 -0
package/dist/search/reranker.d.ts.map +1 -0
package/dist/search/reranker.js +126 -0
package/dist/search/reranker.js.map +1 -0
package/dist/search/semantic.d.ts +1 -1
package/dist/search/semantic.d.ts.map +1 -1
package/dist/search/semantic.js +40 -45
package/dist/search/semantic.js.map +1 -1
package/dist/server-helpers.d.ts +29 -0
package/dist/server-helpers.d.ts.map +1 -0
package/dist/server-helpers.js +312 -0
package/dist/server-helpers.js.map +1 -0
package/dist/server.d.ts +1 -1
package/dist/server.d.ts.map +1 -1
package/dist/server.js +11 -271
package/dist/server.js.map +1 -1
package/dist/storage/_shared.d.ts +9 -0
package/dist/storage/_shared.d.ts.map +1 -0
package/dist/storage/_shared.js +26 -0
package/dist/storage/_shared.js.map +1 -0
package/dist/storage/chunk-store.d.ts.map +1 -1
package/dist/storage/chunk-store.js +23 -63
package/dist/storage/chunk-store.js.map +1 -1
package/dist/storage/embedding-store.d.ts +6 -3
package/dist/storage/embedding-store.d.ts.map +1 -1
package/dist/storage/embedding-store.js +54 -30
package/dist/storage/embedding-store.js.map +1 -1
package/dist/storage/graph-store.d.ts +48 -0
package/dist/storage/graph-store.d.ts.map +1 -0
package/dist/storage/graph-store.js +52 -0
package/dist/storage/graph-store.js.map +1 -0
package/dist/storage/index-store.d.ts +5 -0
package/dist/storage/index-store.d.ts.map +1 -1
package/dist/storage/index-store.js +28 -16
package/dist/storage/index-store.js.map +1 -1
package/dist/storage/registry.d.ts +4 -0
package/dist/storage/registry.d.ts.map +1 -1
package/dist/storage/registry.js +16 -16
package/dist/storage/registry.js.map +1 -1
package/dist/storage/usage-stats.d.ts +6 -0
package/dist/storage/usage-stats.d.ts.map +1 -1
package/dist/storage/usage-stats.js +59 -11
package/dist/storage/usage-stats.js.map +1 -1
package/dist/storage/usage-tracker.d.ts +3 -0
package/dist/storage/usage-tracker.d.ts.map +1 -1
package/dist/storage/usage-tracker.js +50 -132
package/dist/storage/usage-tracker.js.map +1 -1
package/dist/storage/watcher.d.ts +2 -1
package/dist/storage/watcher.d.ts.map +1 -1
package/dist/storage/watcher.js +16 -16
package/dist/storage/watcher.js.map +1 -1
package/dist/tools/ast-query-tools.d.ts +29 -0
package/dist/tools/ast-query-tools.d.ts.map +1 -0
package/dist/tools/ast-query-tools.js +110 -0
package/dist/tools/ast-query-tools.js.map +1 -0
package/dist/tools/boundary-tools.d.ts +31 -0
package/dist/tools/boundary-tools.d.ts.map +1 -0
package/dist/tools/boundary-tools.js +62 -0
package/dist/tools/boundary-tools.js.map +1 -0
package/dist/tools/clone-tools.d.ts +35 -0
package/dist/tools/clone-tools.d.ts.map +1 -0
package/dist/tools/clone-tools.js +181 -0
package/dist/tools/clone-tools.js.map +1 -0
package/dist/tools/community-tools.d.ts +23 -0
package/dist/tools/community-tools.d.ts.map +1 -0
package/dist/tools/community-tools.js +297 -0
package/dist/tools/community-tools.js.map +1 -0
package/dist/tools/complexity-tools.d.ts +34 -0
package/dist/tools/complexity-tools.d.ts.map +1 -0
package/dist/tools/complexity-tools.js +135 -0
package/dist/tools/complexity-tools.js.map +1 -0
package/dist/tools/context-tools.d.ts +44 -3
package/dist/tools/context-tools.d.ts.map +1 -1
package/dist/tools/context-tools.js +329 -99
package/dist/tools/context-tools.js.map +1 -1
package/dist/tools/conversation-tools.d.ts +107 -0
package/dist/tools/conversation-tools.d.ts.map +1 -0
package/dist/tools/conversation-tools.js +419 -0
package/dist/tools/conversation-tools.js.map +1 -0
package/dist/tools/coordinator-tools.d.ts +73 -0
package/dist/tools/coordinator-tools.d.ts.map +1 -0
package/dist/tools/coordinator-tools.js +153 -0
package/dist/tools/coordinator-tools.js.map +1 -0
package/dist/tools/cross-repo-tools.d.ts +43 -0
package/dist/tools/cross-repo-tools.d.ts.map +1 -0
package/dist/tools/cross-repo-tools.js +55 -0
package/dist/tools/cross-repo-tools.js.map +1 -0
package/dist/tools/diff-tools.d.ts +4 -1
package/dist/tools/diff-tools.d.ts.map +1 -1
package/dist/tools/diff-tools.js +23 -5
package/dist/tools/diff-tools.js.map +1 -1
package/dist/tools/frequency-tools.d.ts +46 -0
package/dist/tools/frequency-tools.d.ts.map +1 -0
package/dist/tools/frequency-tools.js +184 -0
package/dist/tools/frequency-tools.js.map +1 -0
package/dist/tools/generate-tools.d.ts.map +1 -1
package/dist/tools/generate-tools.js +13 -2
package/dist/tools/generate-tools.js.map +1 -1
package/dist/tools/graph-tools.d.ts +44 -11
package/dist/tools/graph-tools.d.ts.map +1 -1
package/dist/tools/graph-tools.js +147 -104
package/dist/tools/graph-tools.js.map +1 -1
package/dist/tools/hotspot-tools.d.ts +24 -0
package/dist/tools/hotspot-tools.d.ts.map +1 -0
package/dist/tools/hotspot-tools.js +122 -0
package/dist/tools/hotspot-tools.js.map +1 -0
package/dist/tools/impact-tools.d.ts +13 -0
package/dist/tools/impact-tools.d.ts.map +1 -0
package/dist/tools/impact-tools.js +238 -0
package/dist/tools/impact-tools.js.map +1 -0
package/dist/tools/index-tools.d.ts +44 -3
package/dist/tools/index-tools.d.ts.map +1 -1
package/dist/tools/index-tools.js +530 -222
package/dist/tools/index-tools.js.map +1 -1
package/dist/tools/memory-tools.d.ts +35 -0
package/dist/tools/memory-tools.d.ts.map +1 -0
package/dist/tools/memory-tools.js +229 -0
package/dist/tools/memory-tools.js.map +1 -0
package/dist/tools/outline-tools.d.ts +24 -13
package/dist/tools/outline-tools.d.ts.map +1 -1
package/dist/tools/outline-tools.js +113 -87
package/dist/tools/outline-tools.js.map +1 -1
package/dist/tools/pattern-tools.d.ts +32 -0
package/dist/tools/pattern-tools.d.ts.map +1 -0
package/dist/tools/pattern-tools.js +116 -0
package/dist/tools/pattern-tools.js.map +1 -0
package/dist/tools/report-tools.d.ts +5 -0
package/dist/tools/report-tools.d.ts.map +1 -0
package/dist/tools/report-tools.js +167 -0
package/dist/tools/report-tools.js.map +1 -0
package/dist/tools/review-diff-tools.d.ts +148 -0
package/dist/tools/review-diff-tools.d.ts.map +1 -0
package/dist/tools/review-diff-tools.js +852 -0
package/dist/tools/review-diff-tools.js.map +1 -0
package/dist/tools/route-tools.d.ts +32 -0
package/dist/tools/route-tools.d.ts.map +1 -0
package/dist/tools/route-tools.js +276 -0
package/dist/tools/route-tools.js.map +1 -0
package/dist/tools/search-ranker.d.ts +5 -0
package/dist/tools/search-ranker.d.ts.map +1 -0
package/dist/tools/search-ranker.js +142 -0
package/dist/tools/search-ranker.js.map +1 -0
package/dist/tools/search-tools.d.ts +24 -1
package/dist/tools/search-tools.d.ts.map +1 -1
package/dist/tools/search-tools.js +459 -225
package/dist/tools/search-tools.js.map +1 -1
package/dist/tools/secret-tools.d.ts +104 -0
package/dist/tools/secret-tools.d.ts.map +1 -0
package/dist/tools/secret-tools.js +410 -0
package/dist/tools/secret-tools.js.map +1 -0
package/dist/tools/symbol-tools.d.ts +90 -2
package/dist/tools/symbol-tools.d.ts.map +1 -1
package/dist/tools/symbol-tools.js +576 -42
package/dist/tools/symbol-tools.js.map +1 -1
package/dist/types.d.ts +34 -1
package/dist/types.d.ts.map +1 -1
package/dist/utils/framework-detect.d.ts +5 -0
package/dist/utils/framework-detect.d.ts.map +1 -0
package/dist/utils/framework-detect.js +36 -0
package/dist/utils/framework-detect.js.map +1 -0
package/dist/utils/glob.d.ts +19 -0
package/dist/utils/glob.d.ts.map +1 -0
package/dist/utils/glob.js +74 -0
package/dist/utils/glob.js.map +1 -0
package/dist/utils/import-graph.d.ts +29 -0
package/dist/utils/import-graph.d.ts.map +1 -0
package/dist/utils/import-graph.js +125 -0
package/dist/utils/import-graph.js.map +1 -0
package/dist/utils/test-file.d.ts.map +1 -1
package/dist/utils/test-file.js +1 -0
package/dist/utils/test-file.js.map +1 -1
package/dist/utils/walk.d.ts +45 -0
package/dist/utils/walk.d.ts.map +1 -0
package/dist/utils/walk.js +87 -0
package/dist/utils/walk.js.map +1 -0
package/package.json +10 -4
package/rules/codesift.md +187 -0
package/rules/codesift.mdc +192 -0
package/rules/codex.md +187 -0
package/rules/gemini.md +187 -0

package/dist/tools/search-tools.js CHANGED Viewed

@@ -1,18 +1,36 @@
-import { readFile, readdir, stat } from "node:fs/promises";
-import { join, relative, extname } from "node:path";
+import { readFile } from "node:fs/promises";
+import { execFileSync } from "node:child_process";
+import { join } from "node:path";
 import { getBM25Index, getCodeIndex } from "./index-tools.js";
-import { searchBM25 } from "../search/bm25.js";
+import { searchBM25, applyCutoff } from "../search/bm25.js";
 import { loadConfig } from "../config.js";
-const DEFAULT_MAX_TEXT_MATCHES = 500;
-const MAX_FILE_SIZE = 1_000_000; // 1MB — skip giant files
+import { walkDirectory } from "../utils/walk.js";
+import { matchFilePattern } from "../utils/glob.js";
+const DEFAULT_MAX_TEXT_MATCHES = 200;
 const MAX_WALK_FILES = 50_000; // Safety limit — stop walking after this many files
-/** Directories to skip during text search file walk */
-const IGNORE_DIRS = new Set([
-    "node_modules", ".git", "dist", "build", "coverage",
-    ".codesift", ".next", "__pycache__", ".pytest_cache",
-    ".venv", "venv", ".tox", ".mypy_cache", ".turbo",
-    "generated", "audit-results", ".backup", "jscpd-report",
-]);
+const SEARCH_TIMEOUT_MS = 30_000; // Abort search after 30s to prevent 100s+ hangs
+const AUTO_GROUP_THRESHOLD = 50; // Auto-switch to group_by_file above this match count
+const MAX_RESPONSE_CHARS = 80_000; // ~20K tokens — force group_by_file above this
+const MAX_FIRST_MATCH_CHARS = 300; // Cap first_match preview in grouped output
+const MAX_LINE_CHARS = 500; // Truncate individual match lines (minified JS/JSON can be 100K+)
+const DEFAULT_TOP_K_WITH_SOURCE = 10; // Cap results when include_source=true without file_pattern
+const BM25_FILTER_MULTIPLIER = 5; // Widen BM25 candidate set when filters active
+const BM25_FILTER_MIN_K = 200; // Minimum candidate set size when filters active
+const DEFAULT_SOURCE_CHARS_NARROW = 200; // Source truncation without file_pattern (reduce waste)
+const DEFAULT_SOURCE_CHARS_WIDE = 500; // Source truncation with file_pattern
+const CHARS_PER_TOKEN = 3.5; // Approximate chars-per-token for budget calculation
+const DEFAULT_MAX_REGEX_RESULTS = 50; // Regex without file_pattern — tighter cap to limit timeout
+const JSON_OVERHEAD_PER_MATCH = 40; // Estimated JSON serialization overhead per TextMatch
+// SEC-003: Detect common catastrophic backtracking patterns (ReDoS)
+const REDOS_PATTERNS = [
+    /\(.*[+*].*\)[+*]/, // Nested quantifiers: (a+)+ or (a*)*
+    /\(.*\|.*\)[+*]/, // Alternation with quantifier: (a|b)+
+    /\(.*[+*].*\)\{/, // Nested quantifier with range: (a+){2,}
+    /\([^)]*\\[dDwWsS][+*].*\)[+*]/, // Character class with nested quantifier
+];
+function isSafeRegex(pattern) {
+    return !REDOS_PATTERNS.some((p) => p.test(pattern));
+}
 /** Binary/non-text extensions to skip during text search */
 const BINARY_EXTENSIONS = new Set([
     ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg",
@@ -25,128 +43,318 @@ const BINARY_EXTENSIONS = new Set([
     ".db", ".sqlite", ".sqlite3",
     ".lock",
 ]);
+// ── Private helpers ─────────────────────────────────────
+/** Check if a symbol matches the active kind and file_pattern filters. */
+function matchesSymbolFilters(symbol, options) {
+    if (options?.kind && symbol.kind !== options.kind)
+        return false;
+    if (options?.file_pattern && !matchFilePattern(symbol.file, options.file_pattern))
+        return false;
+    return true;
+}
 /**
- * Match a file path against a simple glob pattern.
- * Supports: "*.ts", "src/*.ts", "src/**\/*.ts", "**\/*.test.ts"
+ * Apply detail-level shaping, source truncation, and field cleanup.
+ * Compact: ~15 tok/result. Standard: signature + truncated source. Full: unlimited.
  */
-function matchFilePattern(filePath, pattern) {
-    // Exact match
-    if (filePath === pattern)
-        return true;
-    // "**\/" prefix — match anywhere in path
-    if (pattern.startsWith("**/")) {
-        const suffix = pattern.slice(3);
-        // Recursively match the suffix against every segment tail
-        return matchFilePattern(filePath, suffix) ||
-            filePath.includes("/" + suffix) ||
-            matchFileSuffix(filePath, suffix);
+function shapeSearchResults(results, detail, includeSource, options) {
+    if (detail === "compact") {
+        return results.map((r) => ({
+            symbol: {
+                id: r.symbol.id,
+                name: r.symbol.name,
+                kind: r.symbol.kind,
+                file: r.symbol.file,
+                start_line: r.symbol.start_line,
+            },
+            score: r.score,
+        }));
     }
-    // "*" at the start — match extension-style patterns like "*.ts"
-    if (pattern.startsWith("*") && !pattern.includes("/")) {
-        const suffix = pattern.slice(1);
-        return filePath.endsWith(suffix);
+    let shaped = results;
+    if (!includeSource) {
+        shaped = shaped.map((r) => {
+            const { source: _source, ...symbolWithoutSource } = r.symbol;
+            return { ...r, symbol: symbolWithoutSource };
+        });
     }
-    // "dir/**" — match everything under directory (e.g., "src/**")
-    if (pattern.endsWith("/**")) {
-        const prefix = pattern.slice(0, -3);
-        return filePath.startsWith(prefix + "/") || filePath === prefix;
+    const defaultSourceChars = detail === "full" ? undefined
+        : (includeSource && !options?.file_pattern) ? DEFAULT_SOURCE_CHARS_NARROW : DEFAULT_SOURCE_CHARS_WIDE;
+    const sourceChars = options?.source_chars ?? (includeSource ? defaultSourceChars : undefined);
+    if (includeSource && sourceChars !== undefined && sourceChars > 0) {
+        shaped = shaped.map((r) => {
+            const source = r.symbol.source;
+            if (source && source.length > sourceChars) {
+                return { ...r, symbol: { ...r.symbol, source: source.slice(0, sourceChars) + "..." } };
+            }
+            return r;
+        });
     }
-    // Pattern with "**" in the middle (e.g., "src/**/*.ts")
-    if (pattern.includes("/**/")) {
-        const [prefix, suffix] = splitFirst(pattern, "/**/");
-        if (!filePath.startsWith(prefix + "/") && filePath !== prefix)
-            return false;
-        const rest = filePath.slice(prefix.length + 1);
-        return matchFilePattern(rest, suffix) ||
-            matchFilePattern(rest, "**/" + suffix);
+    return shaped.map((r) => {
+        const { tokens: _tokens, repo: _repo, ...cleanSymbol } = r.symbol;
+        return { ...r, symbol: cleanSymbol };
+    });
+}
+/** Validate regex for ReDoS safety and compile without g/y flags, or throw descriptive error. */
+function compileSearchRegex(query) {
+    if (!isSafeRegex(query)) {
+        throw new Error("Regex pattern rejected: potential catastrophic backtracking (ReDoS)");
     }
-    // Simple directory prefix + filename pattern (e.g., "src/*.ts")
-    if (pattern.includes("/") && pattern.includes("*")) {
-        const lastSlash = pattern.lastIndexOf("/");
-        const dirPart = pattern.slice(0, lastSlash);
-        const filePart = pattern.slice(lastSlash + 1);
-        const fileLastSlash = filePath.lastIndexOf("/");
-        const fileDir = fileLastSlash >= 0 ? filePath.slice(0, fileLastSlash) : "";
-        const fileName = fileLastSlash >= 0 ? filePath.slice(fileLastSlash + 1) : filePath;
-        if (fileDir !== dirPart)
-            return false;
-        return matchFilePattern(fileName, filePart);
+    try {
+        // No g/y flags — regex is reused across files; stateful flags cause alternating matches
+        return new RegExp(query);
     }
-    // No wildcards: substring match on the full path
-    // "risk.service.ts" matches "src/lib/services/risk/risk.service.ts"
-    // "validators" matches "src/lib/validators/schema.ts"
-    if (!pattern.includes("*")) {
-        return filePath.includes(pattern);
+    catch (err) {
+        const message = err instanceof Error ? err.message : String(err);
+        throw new Error(`Invalid regex pattern: ${message}`);
     }
-    return false;
 }
-function matchFileSuffix(filePath, suffix) {
-    if (suffix.startsWith("*")) {
-        const ext = suffix.slice(1);
-        return filePath.endsWith(ext);
+// ── Ripgrep backend ────────────────────────────────────
+/** Directories always excluded from ripgrep search */
+const RG_EXCLUDE_DIRS = [
+    "node_modules", ".git", ".next", "dist", ".codesift", "coverage",
+    ".playwright-mcp", "__pycache__", ".mypy_cache", ".tox",
+];
+/** Detect whether `rg` (ripgrep) is available on this system. Cached at module level. */
+let rgAvailable = null;
+function hasRipgrep() {
+    if (rgAvailable !== null)
+        return rgAvailable;
+    try {
+        execFileSync("rg", ["--version"], { stdio: "pipe", timeout: 2000 });
+        rgAvailable = true;
     }
-    return filePath.endsWith("/" + suffix) || filePath === suffix;
-}
-function splitFirst(str, sep) {
-    const idx = str.indexOf(sep);
-    if (idx < 0)
-        return [str, ""];
-    return [str.slice(0, idx), str.slice(idx + sep.length)];
+    catch {
+        rgAvailable = false;
+    }
+    return rgAvailable;
 }
 /**
- * Walk a directory tree collecting all text files.
- * Returns relative paths from rootPath.
- * Unlike the index walk, this includes ALL text files (not just parseable ones).
+ * Search via ripgrep — fast C-based search, parses `rg -n` output.
+ * Falls back to Node.js search if rg is not available.
  */
-async function walkAllTextFiles(rootPath) {
-    const files = [];
-    let limitReached = false;
-    async function walk(dirPath) {
-        if (limitReached)
-            return;
-        let entries;
-        try {
-            entries = await readdir(dirPath, { withFileTypes: true });
+function searchWithRipgrep(root, query, options) {
+    const args = [
+        "-n", // line numbers
+        "--no-heading", // flat output
+        "--max-columns", String(MAX_LINE_CHARS),
+        "--max-columns-preview", // show truncated preview
+        "--max-count", String(Math.min(options.maxResults * 2, 5000)), // per-file cap (generous to hit global max)
+    ];
+    if (!options.regex) {
+        args.push("-F"); // fixed string (literal)
+    }
+    if (options.contextLines > 0) {
+        args.push("-C", String(options.contextLines));
+    }
+    // File pattern → rg glob
+    if (options.filePattern) {
+        // Handle patterns like "src/**" or "*.ts"
+        args.push("--glob", options.filePattern);
+    }
+    // Exclude dirs
+    for (const dir of RG_EXCLUDE_DIRS) {
+        args.push("--glob", `!${dir}`);
+    }
+    args.push("--", query, root);
+    let stdout;
+    try {
+        stdout = execFileSync("rg", args, {
+            encoding: "utf-8",
+            maxBuffer: 20 * 1024 * 1024, // 20MB
+            timeout: SEARCH_TIMEOUT_MS,
+        });
+    }
+    catch (err) {
+        // rg exits 1 = no matches, 2 = error
+        if (err && typeof err === "object" && "status" in err) {
+            const exitCode = err.status;
+            if (exitCode === 1)
+                return []; // no matches
+            if ("stdout" in err && typeof err.stdout === "string") {
+                stdout = err.stdout;
+                if (!stdout)
+                    return [];
+            }
+            else {
+                return [];
+            }
         }
-        catch {
-            return; // permission denied, etc.
+        else {
+            return [];
         }
-        for (const entry of entries) {
-            if (limitReached)
-                return;
-            const fullPath = join(dirPath, entry.name);
-            if (entry.isDirectory()) {
-                if (IGNORE_DIRS.has(entry.name) || entry.name.startsWith(".")) {
-                    continue;
-                }
-                await walk(fullPath);
+    }
+    const matches = [];
+    const rootPrefix = root.endsWith("/") ? root : root + "/";
+    // Parse context blocks: lines separated by "--" separators
+    const blocks = options.contextLines > 0
+        ? stdout.split(/^--$/m)
+        : [stdout];
+    for (const block of blocks) {
+        if (matches.length >= options.maxResults)
+            break;
+        const lines = block.split("\n").filter(Boolean);
+        // In context mode, find the actual match line (has `:` separator) vs context (has `-` separator)
+        // In non-context mode, all lines are matches
+        for (const rawLine of lines) {
+            if (matches.length >= options.maxResults)
+                break;
+            // rg format: /abs/path/file.ts:42:content  (match)
+            // rg format: /abs/path/file.ts-40-content   (context, only with -C)
+            // We only want match lines (with `:` after line number)
+            const matchResult = rawLine.match(/^(.+?):(\d+):(.*)/);
+            if (!matchResult)
+                continue;
+            const [, absPath, lineNumStr, content] = matchResult;
+            if (!absPath || !lineNumStr || content === undefined)
+                continue;
+            const relPath = absPath.startsWith(rootPrefix)
+                ? absPath.slice(rootPrefix.length)
+                : absPath;
+            matches.push({
+                file: relPath,
+                line: parseInt(lineNumStr, 10),
+                content: content,
+            });
+        }
+    }
+    // For context mode, we need to re-parse to attach context_before/context_after
+    // But context_lines=0 is the default now, so this path is rarely hit
+    if (options.contextLines > 0 && blocks.length > 1) {
+        return parseRipgrepContextBlocks(stdout, rootPrefix, options.maxResults, options.contextLines);
+    }
+    return matches;
+}
+/**
+ * Parse rg output with context lines (-C N) into TextMatch[] with context_before/context_after.
+ */
+function parseRipgrepContextBlocks(stdout, rootPrefix, maxResults, contextLines) {
+    const matches = [];
+    const blocks = stdout.split(/^--$/m);
+    for (const block of blocks) {
+        if (matches.length >= maxResults)
+            break;
+        const lines = block.split("\n").filter(Boolean);
+        // Separate match lines from context lines
+        // Match: path:line:content  Context: path-line-content
+        const parsed = [];
+        for (const raw of lines) {
+            // Try match line first (colon after line number)
+            const matchLine = raw.match(/^(.+?):(\d+):(.*)/);
+            if (matchLine && matchLine[1] && matchLine[2] && matchLine[3] !== undefined) {
+                parsed.push({
+                    path: matchLine[1].startsWith(rootPrefix) ? matchLine[1].slice(rootPrefix.length) : matchLine[1],
+                    line: parseInt(matchLine[2], 10),
+                    content: matchLine[3],
+                    isMatch: true,
+                });
+                continue;
+            }
+            // Try context line (hyphen after line number)
+            const ctxLine = raw.match(/^(.+?)-(\d+)-(.*)/);
+            if (ctxLine && ctxLine[1] && ctxLine[2] && ctxLine[3] !== undefined) {
+                parsed.push({
+                    path: ctxLine[1].startsWith(rootPrefix) ? ctxLine[1].slice(rootPrefix.length) : ctxLine[1],
+                    line: parseInt(ctxLine[2], 10),
+                    content: ctxLine[3],
+                    isMatch: false,
+                });
+            }
+        }
+        // Build TextMatch for each match line with surrounding context
+        for (let i = 0; i < parsed.length; i++) {
+            const p = parsed[i];
+            if (!p.isMatch)
+                continue;
+            if (matches.length >= maxResults)
+                break;
+            const contextBefore = [];
+            const contextAfter = [];
+            // Collect context before
+            for (let j = Math.max(0, i - contextLines); j < i; j++) {
+                const ctx = parsed[j];
+                if (ctx && !ctx.isMatch)
+                    contextBefore.push(ctx.content);
             }
-            else if (entry.isFile()) {
-                const ext = extname(entry.name);
-                // Skip binary files
-                if (BINARY_EXTENSIONS.has(ext))
-                    continue;
-                // Skip files that are too large
-                try {
-                    const fileStat = await stat(fullPath);
-                    if (fileStat.size > MAX_FILE_SIZE)
-                        continue;
-                }
-                catch {
-                    continue;
-                }
-                files.push(relative(rootPath, fullPath));
-                if (files.length >= MAX_WALK_FILES) {
-                    console.warn(`[codesift] walkAllTextFiles: reached ${MAX_WALK_FILES} file limit, returning partial results`);
-                    limitReached = true;
-                    return;
-                }
+            // Collect context after
+            for (let j = i + 1; j <= Math.min(parsed.length - 1, i + contextLines); j++) {
+                const ctx = parsed[j];
+                if (ctx && !ctx.isMatch)
+                    contextAfter.push(ctx.content);
             }
+            const match = { file: p.path, line: p.line, content: p.content };
+            if (contextBefore.length > 0)
+                match.context_before = contextBefore;
+            if (contextAfter.length > 0)
+                match.context_after = contextAfter;
+            matches.push(match);
+        }
+    }
+    return matches;
+}
+// ── Node.js fallback search ───────────────────────────
+/** Search file content for line matches, collecting context lines around each hit. */
+function searchFileForMatches(content, filePath, query, regex, contextLines, maxMatches) {
+    const lines = content.split("\n");
+    const matches = [];
+    for (let i = 0; i < lines.length; i++) {
+        if (matches.length >= maxMatches)
+            break;
+        const line = lines[i];
+        if (line === undefined)
+            continue;
+        const isMatch = regex ? regex.test(line) : line.includes(query);
+        if (!isMatch)
+            continue;
+        const contextBefore = [];
+        for (let j = Math.max(0, i - contextLines); j < i; j++) {
+            const ctxLine = lines[j];
+            if (ctxLine !== undefined)
+                contextBefore.push(ctxLine);
+        }
+        const contextAfter = [];
+        for (let j = i + 1; j <= Math.min(lines.length - 1, i + contextLines); j++) {
+            const ctxLine = lines[j];
+            if (ctxLine !== undefined)
+                contextAfter.push(ctxLine);
+        }
+        const truncLine = line.length > MAX_LINE_CHARS
+            ? line.slice(0, MAX_LINE_CHARS) + "..."
+            : line;
+        const match = {
+            file: filePath,
+            line: i + 1,
+            content: truncLine,
+        };
+        if (contextBefore.length > 0)
+            match.context_before = contextBefore;
+        if (contextAfter.length > 0)
+            match.context_after = contextAfter;
+        matches.push(match);
+    }
+    return matches;
+}
+/** Aggregate flat TextMatch[] into per-file groups with counts and first_match preview. */
+function groupMatchesByFile(matches) {
+    const groups = new Map();
+    for (const m of matches) {
+        const existing = groups.get(m.file);
+        if (existing) {
+            existing.count++;
+            existing.lines.push(m.line);
+        }
+        else {
+            groups.set(m.file, {
+                file: m.file,
+                count: 1,
+                lines: [m.line],
+                first_match: m.content.length > MAX_FIRST_MATCH_CHARS
+                    ? m.content.slice(0, MAX_FIRST_MATCH_CHARS) + "..."
+                    : m.content,
+            });
         }
     }
-    await walk(rootPath);
-    return files;
+    return [...groups.values()];
 }
+// ── Public API ──────────────────────────────────────────
 /**
  * Search symbols by name/signature/docstring using BM25 ranking.
  * Supports filtering by symbol kind and file pattern.
@@ -161,149 +369,175 @@ export async function searchSymbols(repo, query, options) {
         throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
     }
     const config = loadConfig();
-    const topK = options?.top_k ?? config.defaultTopK;
     const includeSource = options?.include_source ?? true;
-    const hasKindFilter = !!options?.kind;
-    const hasFileFilter = !!options?.file_pattern;
-    const hasFilters = hasKindFilter || hasFileFilter;
+    const defaultK = (includeSource && !options?.file_pattern) ? DEFAULT_TOP_K_WITH_SOURCE : config.defaultTopK;
+    const topK = options?.top_k ?? defaultK;
+    const hasFilters = !!options?.kind || !!options?.file_pattern;
     let results;
     if (!query.trim()) {
-        // Empty query: return all symbols matching filters (no BM25 scoring)
         const allSymbols = [...index.symbols.values()];
-        let filtered = allSymbols;
-        if (hasKindFilter) {
-            const kind = options.kind;
-            filtered = filtered.filter((s) => s.kind === kind);
-        }
-        if (hasFileFilter) {
-            const pattern = options.file_pattern;
-            filtered = filtered.filter((s) => matchFilePattern(s.file, pattern));
-        }
-        results = filtered.slice(0, topK).map((symbol) => ({
-            symbol,
-            score: 0,
-        }));
+        const filtered = allSymbols.filter((s) => matchesSymbolFilters(s, options));
+        results = filtered.slice(0, topK).map((symbol) => ({ symbol, score: 0 }));
     }
     else {
-        // When filters are active, search a wider candidate set from BM25
-        // so that post-filter truncation doesn't lose relevant results.
-        const searchTopK = hasFilters ? Math.max(topK * 5, 200) : topK;
+        const searchTopK = hasFilters ? Math.max(topK * BM25_FILTER_MULTIPLIER, BM25_FILTER_MIN_K) : topK;
         results = searchBM25(index, query, searchTopK, config.bm25FieldWeights);
-        // Filter by symbol kind
-        if (hasKindFilter) {
-            const kind = options.kind;
-            results = results.filter((r) => r.symbol.kind === kind);
-        }
-        // Filter by file pattern
-        if (hasFileFilter) {
-            const pattern = options.file_pattern;
-            results = results.filter((r) => matchFilePattern(r.symbol.file, pattern));
-        }
-        // Re-truncate to requested top_k after filtering
+        results = results.filter((r) => matchesSymbolFilters(r.symbol, options));
         results = results.slice(0, topK);
+        results = applyCutoff(results);
     }
-    // Strip source if not requested
-    if (!includeSource) {
-        results = results.map((r) => {
-            const { source: _source, ...symbolWithoutSource } = r.symbol;
-            return { ...r, symbol: symbolWithoutSource };
-        });
+    if (options?.rerank && results.length > 1) {
+        const { rerankResults } = await import("../search/reranker.js");
+        results = await rerankResults(query, results);
     }
-    // Truncate source to source_chars limit (default 500 when include_source=true)
-    const sourceChars = options?.source_chars ?? (includeSource ? 500 : undefined);
-    if (includeSource && sourceChars !== undefined && sourceChars > 0) {
-        results = results.map((r) => {
-            const source = r.symbol.source;
-            if (source && source.length > sourceChars) {
-                return {
-                    ...r,
-                    symbol: { ...r.symbol, source: source.slice(0, sourceChars) + "..." },
-                };
-            }
-            return r;
-        });
+    const detail = options?.detail_level ?? "standard";
+    const shaped = shapeSearchResults(results, detail, includeSource, options);
+    // Token budget: greedily pack results until budget exhausted
+    const budget = options?.token_budget;
+    if (budget && budget > 0) {
+        const packed = [];
+        let used = 0;
+        for (const r of shaped) {
+            const tok = Math.ceil(JSON.stringify(r).length / CHARS_PER_TOKEN);
+            if (used + tok > budget)
+                break;
+            packed.push(r);
+            used += tok;
+        }
+        return packed;
     }
-    return results;
+    return shaped;
 }
-/**
- * Full-text search across all files in a repository.
- * Walks the filesystem to search ALL text files, not just indexed ones.
- */
 export async function searchText(repo, query, options) {
     const index = await getCodeIndex(repo);
     if (!index) {
         throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
     }
-    const contextLines = options?.context_lines ?? 2;
     const useRegex = options?.regex ?? false;
     const filePattern = options?.file_pattern;
-    const maxResults = options?.max_results ?? DEFAULT_MAX_TEXT_MATCHES;
-    let regex = null;
+    const maxResults = options?.max_results
+        ?? (useRegex && !filePattern ? DEFAULT_MAX_REGEX_RESULTS : DEFAULT_MAX_TEXT_MATCHES);
+    const contextLines = options?.context_lines ?? 0; // OPT-2: default 0 (was 2) — saves ~30 tokens/match
+    // Validate regex safety before passing to ripgrep
     if (useRegex) {
-        try {
-            regex = new RegExp(query);
-        }
-        catch (err) {
-            const message = err instanceof Error ? err.message : String(err);
-            throw new Error(`Invalid regex pattern: ${message}`);
-        }
+        compileSearchRegex(query); // throws on ReDoS patterns
     }
-    // Walk the filesystem to find ALL text files (not just indexed/parseable ones)
-    const allFiles = await walkAllTextFiles(index.root);
-    const matches = [];
-    for (const filePath of allFiles) {
-        if (matches.length >= maxResults)
-            break;
-        // Filter by file pattern
-        if (filePattern && !matchFilePattern(filePath, filePattern)) {
-            continue;
-        }
-        const fullPath = join(index.root, filePath);
-        let content;
-        try {
-            content = await readFile(fullPath, "utf-8");
+    let matches;
+    // OPT-1: Use ripgrep when available (10x faster)
+    if (hasRipgrep()) {
+        matches = searchWithRipgrep(index.root, query, {
+            regex: useRegex,
+            filePattern: filePattern,
+            maxResults: maxResults,
+            contextLines: contextLines,
+        });
+    }
+    else {
+        // Node.js fallback
+        const regex = useRegex ? compileSearchRegex(query) : null;
+        let allFiles;
+        if (filePattern) {
+            allFiles = index.files.map((f) => f.path);
         }
-        catch {
-            continue; // File may have been deleted or moved
+        else {
+            allFiles = await walkDirectory(index.root, {
+                fileFilter: (ext) => !BINARY_EXTENSIONS.has(ext),
+                maxFiles: MAX_WALK_FILES,
+                relative: true,
+            });
         }
-        const lines = content.split("\n");
-        for (let i = 0; i < lines.length; i++) {
+        matches = [];
+        const searchStart = Date.now();
+        for (const filePath of allFiles) {
             if (matches.length >= maxResults)
                 break;
-            const line = lines[i];
-            if (line === undefined)
-                continue;
-            const isMatch = regex ? regex.test(line) : line.includes(query);
-            if (!isMatch)
+            if (Date.now() - searchStart > SEARCH_TIMEOUT_MS)
+                break;
+            if (filePattern && !matchFilePattern(filePath, filePattern))
                 continue;
-            const contextBefore = [];
-            const contextAfter = [];
-            for (let j = Math.max(0, i - contextLines); j < i; j++) {
-                const ctxLine = lines[j];
-                if (ctxLine !== undefined) {
-                    contextBefore.push(ctxLine);
-                }
+            const fullPath = join(index.root, filePath);
+            let content;
+            try {
+                content = await readFile(fullPath, "utf-8");
             }
-            for (let j = i + 1; j <= Math.min(lines.length - 1, i + contextLines); j++) {
-                const ctxLine = lines[j];
-                if (ctxLine !== undefined) {
-                    contextAfter.push(ctxLine);
-                }
+            catch {
+                continue;
             }
-            const match = {
-                file: filePath,
-                line: i + 1, // 1-based
-                content: line,
-            };
-            if (contextBefore.length > 0) {
-                match.context_before = contextBefore;
+            const fileMatches = searchFileForMatches(content, filePath, query, regex, contextLines, maxResults - matches.length);
+            matches.push(...fileMatches);
+        }
+    }
+    // Ranked mode: classify hits with symbol context, deduplicate, and sort by centrality.
+    // Takes precedence over auto_group/compact — returns TextMatch[] with containing_symbol.
+    if (options?.ranked && matches.length > 0) {
+        try {
+            const { classifyHitsWithSymbols } = await import("./search-ranker.js");
+            const bm25Idx = await getBM25Index(repo);
+            if (bm25Idx) {
+                matches = await classifyHitsWithSymbols(matches, index, { centrality: bm25Idx.centrality });
             }
-            if (contextAfter.length > 0) {
-                match.context_after = contextAfter;
+        }
+        catch {
+            // Graceful fallback — return unranked matches if pipeline fails
+        }
+        return matches;
+    }
+    // OPT-3: Compact format — grep-like `file:line: content` output, ~50% less tokens than JSON
+    // Auto-enable when auto_group is set (caller is optimization-aware) and results are small
+    const useCompact = options?.compact
+        ?? (options?.auto_group && contextLines === 0 && matches.length > 0 && matches.length <= AUTO_GROUP_THRESHOLD);
+    if (useCompact && !options?.group_by_file) {
+        // Group by file to avoid repeating long paths (saves ~30% on multi-match files)
+        const groups = new Map();
+        for (const m of matches) {
+            let g = groups.get(m.file);
+            if (!g) {
+                g = [];
+                groups.set(m.file, g);
             }
-            matches.push(match);
+            g.push(`  ${m.line}: ${m.content}`);
+        }
+        if (groups.size === matches.length) {
+            // Each file has 1 match — flat format is fine
+            return matches.map((m) => `${m.file}:${m.line}: ${m.content}`).join("\n");
         }
+        // Grouped: file header + indented matches
+        const parts = [];
+        for (const [file, lines] of groups) {
+            parts.push(`${file}\n${lines.join("\n")}`);
+        }
+        return parts.join("\n");
+    }
+    // Estimate response size; force grouping when output would be enormous
+    const estimatedChars = matches.reduce((sum, m) => {
+        let chars = m.file.length + m.content.length + JSON_OVERHEAD_PER_MATCH;
+        if (m.context_before)
+            chars += m.context_before.reduce((s, l) => s + l.length, 0);
+        if (m.context_after)
+            chars += m.context_after.reduce((s, l) => s + l.length, 0);
+        return sum + chars;
+    }, 0);
+    const shouldGroup = options?.group_by_file
+        || (options?.auto_group && matches.length > AUTO_GROUP_THRESHOLD)
+        || estimatedChars > MAX_RESPONSE_CHARS;
+    if (shouldGroup) {
+        return groupMatchesByFile(matches);
     }
     return matches;
 }
+// ---------------------------------------------------------------------------
+// Semantic search — standalone wrapper around retrieval infrastructure
+// ---------------------------------------------------------------------------
+export async function semanticSearch(repo, query, options) {
+    const { handleSemanticQuery } = await import("../retrieval/semantic-handlers.js");
+    const result = await handleSemanticQuery(repo, {
+        type: "semantic",
+        query,
+        top_k: options?.top_k,
+        file_filter: options?.file_pattern,
+        exclude_tests: options?.exclude_tests,
+        rerank: options?.rerank,
+    });
+    return typeof result.data === "string" ? result.data : JSON.stringify(result.data);
+}
 //# sourceMappingURL=search-tools.js.map