codesift-mcp 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +66 -21
- package/README.md +402 -56
- package/dist/cli/args.d.ts +2 -0
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +11 -0
- package/dist/cli/args.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +177 -67
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/help.d.ts +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +157 -0
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/hooks.d.ts +3 -0
- package/dist/cli/hooks.d.ts.map +1 -0
- package/dist/cli/hooks.js +163 -0
- package/dist/cli/hooks.js.map +1 -0
- package/dist/cli/setup.d.ts +25 -0
- package/dist/cli/setup.d.ts.map +1 -0
- package/dist/cli/setup.js +400 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/config.d.ts +2 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/formatters-shortening.d.ts +7 -0
- package/dist/formatters-shortening.d.ts.map +1 -0
- package/dist/formatters-shortening.js +68 -0
- package/dist/formatters-shortening.js.map +1 -0
- package/dist/formatters.d.ts +314 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +396 -0
- package/dist/formatters.js.map +1 -0
- package/dist/instructions.d.ts +6 -0
- package/dist/instructions.d.ts.map +1 -0
- package/dist/instructions.js +72 -0
- package/dist/instructions.js.map +1 -0
- package/dist/lsp/lsp-client.d.ts +21 -0
- package/dist/lsp/lsp-client.d.ts.map +1 -0
- package/dist/lsp/lsp-client.js +122 -0
- package/dist/lsp/lsp-client.js.map +1 -0
- package/dist/lsp/lsp-manager.d.ts +12 -0
- package/dist/lsp/lsp-manager.d.ts.map +1 -0
- package/dist/lsp/lsp-manager.js +82 -0
- package/dist/lsp/lsp-manager.js.map +1 -0
- package/dist/lsp/lsp-servers.d.ts +13 -0
- package/dist/lsp/lsp-servers.d.ts.map +1 -0
- package/dist/lsp/lsp-servers.js +57 -0
- package/dist/lsp/lsp-servers.js.map +1 -0
- package/dist/lsp/lsp-tools.d.ts +67 -0
- package/dist/lsp/lsp-tools.d.ts.map +1 -0
- package/dist/lsp/lsp-tools.js +359 -0
- package/dist/lsp/lsp-tools.js.map +1 -0
- package/dist/parser/extractors/_shared.d.ts +11 -0
- package/dist/parser/extractors/_shared.d.ts.map +1 -0
- package/dist/parser/extractors/_shared.js +38 -0
- package/dist/parser/extractors/_shared.js.map +1 -0
- package/dist/parser/extractors/astro.d.ts +15 -0
- package/dist/parser/extractors/astro.d.ts.map +1 -0
- package/dist/parser/extractors/astro.js +104 -0
- package/dist/parser/extractors/astro.js.map +1 -0
- package/dist/parser/extractors/conversation.d.ts +16 -0
- package/dist/parser/extractors/conversation.d.ts.map +1 -0
- package/dist/parser/extractors/conversation.js +196 -0
- package/dist/parser/extractors/conversation.js.map +1 -0
- package/dist/parser/extractors/go.d.ts.map +1 -1
- package/dist/parser/extractors/go.js +22 -45
- package/dist/parser/extractors/go.js.map +1 -1
- package/dist/parser/extractors/python.d.ts +1 -1
- package/dist/parser/extractors/python.d.ts.map +1 -1
- package/dist/parser/extractors/python.js +19 -50
- package/dist/parser/extractors/python.js.map +1 -1
- package/dist/parser/extractors/rust.d.ts +1 -1
- package/dist/parser/extractors/rust.d.ts.map +1 -1
- package/dist/parser/extractors/rust.js +7 -34
- package/dist/parser/extractors/rust.js.map +1 -1
- package/dist/parser/extractors/typescript.d.ts +1 -1
- package/dist/parser/extractors/typescript.d.ts.map +1 -1
- package/dist/parser/extractors/typescript.js +99 -68
- package/dist/parser/extractors/typescript.js.map +1 -1
- package/dist/parser/parser-manager.d.ts.map +1 -1
- package/dist/parser/parser-manager.js +12 -2
- package/dist/parser/parser-manager.js.map +1 -1
- package/dist/parser/symbol-extractor.d.ts +2 -0
- package/dist/parser/symbol-extractor.d.ts.map +1 -1
- package/dist/parser/symbol-extractor.js +2 -0
- package/dist/parser/symbol-extractor.js.map +1 -1
- package/dist/register-tools.d.ts +127 -0
- package/dist/register-tools.d.ts.map +1 -0
- package/dist/register-tools.js +1453 -0
- package/dist/register-tools.js.map +1 -0
- package/dist/retrieval/codebase-retrieval.d.ts +4 -26
- package/dist/retrieval/codebase-retrieval.d.ts.map +1 -1
- package/dist/retrieval/codebase-retrieval.js +105 -403
- package/dist/retrieval/codebase-retrieval.js.map +1 -1
- package/dist/retrieval/retrieval-constants.d.ts +27 -0
- package/dist/retrieval/retrieval-constants.d.ts.map +1 -0
- package/dist/retrieval/retrieval-constants.js +27 -0
- package/dist/retrieval/retrieval-constants.js.map +1 -0
- package/dist/retrieval/retrieval-schemas.d.ts +107 -0
- package/dist/retrieval/retrieval-schemas.d.ts.map +1 -0
- package/dist/retrieval/retrieval-schemas.js +102 -0
- package/dist/retrieval/retrieval-schemas.js.map +1 -0
- package/dist/retrieval/retrieval-utils.d.ts +40 -0
- package/dist/retrieval/retrieval-utils.d.ts.map +1 -0
- package/dist/retrieval/retrieval-utils.js +139 -0
- package/dist/retrieval/retrieval-utils.js.map +1 -0
- package/dist/retrieval/semantic-handlers.d.ts +8 -0
- package/dist/retrieval/semantic-handlers.d.ts.map +1 -0
- package/dist/retrieval/semantic-handlers.js +152 -0
- package/dist/retrieval/semantic-handlers.js.map +1 -0
- package/dist/search/bm25.d.ts +6 -1
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +95 -32
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/chunker.d.ts +10 -0
- package/dist/search/chunker.d.ts.map +1 -1
- package/dist/search/chunker.js +63 -11
- package/dist/search/chunker.js.map +1 -1
- package/dist/search/reranker.d.ts +15 -0
- package/dist/search/reranker.d.ts.map +1 -0
- package/dist/search/reranker.js +126 -0
- package/dist/search/reranker.js.map +1 -0
- package/dist/search/semantic.d.ts +1 -1
- package/dist/search/semantic.d.ts.map +1 -1
- package/dist/search/semantic.js +40 -45
- package/dist/search/semantic.js.map +1 -1
- package/dist/server-helpers.d.ts +29 -0
- package/dist/server-helpers.d.ts.map +1 -0
- package/dist/server-helpers.js +312 -0
- package/dist/server-helpers.js.map +1 -0
- package/dist/server.d.ts +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +11 -271
- package/dist/server.js.map +1 -1
- package/dist/storage/_shared.d.ts +9 -0
- package/dist/storage/_shared.d.ts.map +1 -0
- package/dist/storage/_shared.js +26 -0
- package/dist/storage/_shared.js.map +1 -0
- package/dist/storage/chunk-store.d.ts.map +1 -1
- package/dist/storage/chunk-store.js +23 -63
- package/dist/storage/chunk-store.js.map +1 -1
- package/dist/storage/embedding-store.d.ts +6 -3
- package/dist/storage/embedding-store.d.ts.map +1 -1
- package/dist/storage/embedding-store.js +54 -30
- package/dist/storage/embedding-store.js.map +1 -1
- package/dist/storage/graph-store.d.ts +48 -0
- package/dist/storage/graph-store.d.ts.map +1 -0
- package/dist/storage/graph-store.js +52 -0
- package/dist/storage/graph-store.js.map +1 -0
- package/dist/storage/index-store.d.ts +5 -0
- package/dist/storage/index-store.d.ts.map +1 -1
- package/dist/storage/index-store.js +28 -16
- package/dist/storage/index-store.js.map +1 -1
- package/dist/storage/registry.d.ts +4 -0
- package/dist/storage/registry.d.ts.map +1 -1
- package/dist/storage/registry.js +16 -16
- package/dist/storage/registry.js.map +1 -1
- package/dist/storage/usage-stats.d.ts +6 -0
- package/dist/storage/usage-stats.d.ts.map +1 -1
- package/dist/storage/usage-stats.js +59 -11
- package/dist/storage/usage-stats.js.map +1 -1
- package/dist/storage/usage-tracker.d.ts +3 -0
- package/dist/storage/usage-tracker.d.ts.map +1 -1
- package/dist/storage/usage-tracker.js +50 -132
- package/dist/storage/usage-tracker.js.map +1 -1
- package/dist/storage/watcher.d.ts +2 -1
- package/dist/storage/watcher.d.ts.map +1 -1
- package/dist/storage/watcher.js +16 -16
- package/dist/storage/watcher.js.map +1 -1
- package/dist/tools/ast-query-tools.d.ts +29 -0
- package/dist/tools/ast-query-tools.d.ts.map +1 -0
- package/dist/tools/ast-query-tools.js +110 -0
- package/dist/tools/ast-query-tools.js.map +1 -0
- package/dist/tools/boundary-tools.d.ts +31 -0
- package/dist/tools/boundary-tools.d.ts.map +1 -0
- package/dist/tools/boundary-tools.js +62 -0
- package/dist/tools/boundary-tools.js.map +1 -0
- package/dist/tools/clone-tools.d.ts +35 -0
- package/dist/tools/clone-tools.d.ts.map +1 -0
- package/dist/tools/clone-tools.js +181 -0
- package/dist/tools/clone-tools.js.map +1 -0
- package/dist/tools/community-tools.d.ts +23 -0
- package/dist/tools/community-tools.d.ts.map +1 -0
- package/dist/tools/community-tools.js +297 -0
- package/dist/tools/community-tools.js.map +1 -0
- package/dist/tools/complexity-tools.d.ts +34 -0
- package/dist/tools/complexity-tools.d.ts.map +1 -0
- package/dist/tools/complexity-tools.js +135 -0
- package/dist/tools/complexity-tools.js.map +1 -0
- package/dist/tools/context-tools.d.ts +44 -3
- package/dist/tools/context-tools.d.ts.map +1 -1
- package/dist/tools/context-tools.js +329 -99
- package/dist/tools/context-tools.js.map +1 -1
- package/dist/tools/conversation-tools.d.ts +107 -0
- package/dist/tools/conversation-tools.d.ts.map +1 -0
- package/dist/tools/conversation-tools.js +419 -0
- package/dist/tools/conversation-tools.js.map +1 -0
- package/dist/tools/coordinator-tools.d.ts +73 -0
- package/dist/tools/coordinator-tools.d.ts.map +1 -0
- package/dist/tools/coordinator-tools.js +153 -0
- package/dist/tools/coordinator-tools.js.map +1 -0
- package/dist/tools/cross-repo-tools.d.ts +43 -0
- package/dist/tools/cross-repo-tools.d.ts.map +1 -0
- package/dist/tools/cross-repo-tools.js +55 -0
- package/dist/tools/cross-repo-tools.js.map +1 -0
- package/dist/tools/diff-tools.d.ts +4 -1
- package/dist/tools/diff-tools.d.ts.map +1 -1
- package/dist/tools/diff-tools.js +23 -5
- package/dist/tools/diff-tools.js.map +1 -1
- package/dist/tools/frequency-tools.d.ts +46 -0
- package/dist/tools/frequency-tools.d.ts.map +1 -0
- package/dist/tools/frequency-tools.js +184 -0
- package/dist/tools/frequency-tools.js.map +1 -0
- package/dist/tools/generate-tools.d.ts.map +1 -1
- package/dist/tools/generate-tools.js +13 -2
- package/dist/tools/generate-tools.js.map +1 -1
- package/dist/tools/graph-tools.d.ts +44 -11
- package/dist/tools/graph-tools.d.ts.map +1 -1
- package/dist/tools/graph-tools.js +147 -104
- package/dist/tools/graph-tools.js.map +1 -1
- package/dist/tools/hotspot-tools.d.ts +24 -0
- package/dist/tools/hotspot-tools.d.ts.map +1 -0
- package/dist/tools/hotspot-tools.js +122 -0
- package/dist/tools/hotspot-tools.js.map +1 -0
- package/dist/tools/impact-tools.d.ts +13 -0
- package/dist/tools/impact-tools.d.ts.map +1 -0
- package/dist/tools/impact-tools.js +238 -0
- package/dist/tools/impact-tools.js.map +1 -0
- package/dist/tools/index-tools.d.ts +44 -3
- package/dist/tools/index-tools.d.ts.map +1 -1
- package/dist/tools/index-tools.js +530 -222
- package/dist/tools/index-tools.js.map +1 -1
- package/dist/tools/memory-tools.d.ts +35 -0
- package/dist/tools/memory-tools.d.ts.map +1 -0
- package/dist/tools/memory-tools.js +229 -0
- package/dist/tools/memory-tools.js.map +1 -0
- package/dist/tools/outline-tools.d.ts +24 -13
- package/dist/tools/outline-tools.d.ts.map +1 -1
- package/dist/tools/outline-tools.js +113 -87
- package/dist/tools/outline-tools.js.map +1 -1
- package/dist/tools/pattern-tools.d.ts +32 -0
- package/dist/tools/pattern-tools.d.ts.map +1 -0
- package/dist/tools/pattern-tools.js +116 -0
- package/dist/tools/pattern-tools.js.map +1 -0
- package/dist/tools/report-tools.d.ts +5 -0
- package/dist/tools/report-tools.d.ts.map +1 -0
- package/dist/tools/report-tools.js +167 -0
- package/dist/tools/report-tools.js.map +1 -0
- package/dist/tools/review-diff-tools.d.ts +148 -0
- package/dist/tools/review-diff-tools.d.ts.map +1 -0
- package/dist/tools/review-diff-tools.js +852 -0
- package/dist/tools/review-diff-tools.js.map +1 -0
- package/dist/tools/route-tools.d.ts +32 -0
- package/dist/tools/route-tools.d.ts.map +1 -0
- package/dist/tools/route-tools.js +276 -0
- package/dist/tools/route-tools.js.map +1 -0
- package/dist/tools/search-ranker.d.ts +5 -0
- package/dist/tools/search-ranker.d.ts.map +1 -0
- package/dist/tools/search-ranker.js +142 -0
- package/dist/tools/search-ranker.js.map +1 -0
- package/dist/tools/search-tools.d.ts +24 -1
- package/dist/tools/search-tools.d.ts.map +1 -1
- package/dist/tools/search-tools.js +459 -225
- package/dist/tools/search-tools.js.map +1 -1
- package/dist/tools/secret-tools.d.ts +104 -0
- package/dist/tools/secret-tools.d.ts.map +1 -0
- package/dist/tools/secret-tools.js +410 -0
- package/dist/tools/secret-tools.js.map +1 -0
- package/dist/tools/symbol-tools.d.ts +90 -2
- package/dist/tools/symbol-tools.d.ts.map +1 -1
- package/dist/tools/symbol-tools.js +576 -42
- package/dist/tools/symbol-tools.js.map +1 -1
- package/dist/types.d.ts +34 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/framework-detect.d.ts +5 -0
- package/dist/utils/framework-detect.d.ts.map +1 -0
- package/dist/utils/framework-detect.js +36 -0
- package/dist/utils/framework-detect.js.map +1 -0
- package/dist/utils/glob.d.ts +19 -0
- package/dist/utils/glob.d.ts.map +1 -0
- package/dist/utils/glob.js +74 -0
- package/dist/utils/glob.js.map +1 -0
- package/dist/utils/import-graph.d.ts +29 -0
- package/dist/utils/import-graph.d.ts.map +1 -0
- package/dist/utils/import-graph.js +125 -0
- package/dist/utils/import-graph.js.map +1 -0
- package/dist/utils/test-file.d.ts.map +1 -1
- package/dist/utils/test-file.js +1 -0
- package/dist/utils/test-file.js.map +1 -1
- package/dist/utils/walk.d.ts +45 -0
- package/dist/utils/walk.d.ts.map +1 -0
- package/dist/utils/walk.js +87 -0
- package/dist/utils/walk.js.map +1 -0
- package/package.json +12 -5
- package/rules/codesift.md +187 -0
- package/rules/codesift.mdc +192 -0
- package/rules/codex.md +187 -0
- package/rules/gemini.md +187 -0
|
@@ -1,18 +1,36 @@
|
|
|
1
|
-
import { readFile
|
|
2
|
-
import {
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { execFileSync } from "node:child_process";
|
|
3
|
+
import { join } from "node:path";
|
|
3
4
|
import { getBM25Index, getCodeIndex } from "./index-tools.js";
|
|
4
|
-
import { searchBM25 } from "../search/bm25.js";
|
|
5
|
+
import { searchBM25, applyCutoff } from "../search/bm25.js";
|
|
5
6
|
import { loadConfig } from "../config.js";
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
import { walkDirectory } from "../utils/walk.js";
|
|
8
|
+
import { matchFilePattern } from "../utils/glob.js";
|
|
9
|
+
const DEFAULT_MAX_TEXT_MATCHES = 200;
|
|
8
10
|
const MAX_WALK_FILES = 50_000; // Safety limit — stop walking after this many files
|
|
9
|
-
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
11
|
+
const SEARCH_TIMEOUT_MS = 30_000; // Abort search after 30s to prevent 100s+ hangs
|
|
12
|
+
const AUTO_GROUP_THRESHOLD = 50; // Auto-switch to group_by_file above this match count
|
|
13
|
+
const MAX_RESPONSE_CHARS = 80_000; // ~20K tokens — force group_by_file above this
|
|
14
|
+
const MAX_FIRST_MATCH_CHARS = 300; // Cap first_match preview in grouped output
|
|
15
|
+
const MAX_LINE_CHARS = 500; // Truncate individual match lines (minified JS/JSON can be 100K+)
|
|
16
|
+
const DEFAULT_TOP_K_WITH_SOURCE = 10; // Cap results when include_source=true without file_pattern
|
|
17
|
+
const BM25_FILTER_MULTIPLIER = 5; // Widen BM25 candidate set when filters active
|
|
18
|
+
const BM25_FILTER_MIN_K = 200; // Minimum candidate set size when filters active
|
|
19
|
+
const DEFAULT_SOURCE_CHARS_NARROW = 200; // Source truncation without file_pattern (reduce waste)
|
|
20
|
+
const DEFAULT_SOURCE_CHARS_WIDE = 500; // Source truncation with file_pattern
|
|
21
|
+
const CHARS_PER_TOKEN = 3.5; // Approximate chars-per-token for budget calculation
|
|
22
|
+
const DEFAULT_MAX_REGEX_RESULTS = 50; // Regex without file_pattern — tighter cap to limit timeout
|
|
23
|
+
const JSON_OVERHEAD_PER_MATCH = 40; // Estimated JSON serialization overhead per TextMatch
|
|
24
|
+
// SEC-003: Detect common catastrophic backtracking patterns (ReDoS)
|
|
25
|
+
const REDOS_PATTERNS = [
|
|
26
|
+
/\(.*[+*].*\)[+*]/, // Nested quantifiers: (a+)+ or (a*)*
|
|
27
|
+
/\(.*\|.*\)[+*]/, // Alternation with quantifier: (a|b)+
|
|
28
|
+
/\(.*[+*].*\)\{/, // Nested quantifier with range: (a+){2,}
|
|
29
|
+
/\([^)]*\\[dDwWsS][+*].*\)[+*]/, // Character class with nested quantifier
|
|
30
|
+
];
|
|
31
|
+
function isSafeRegex(pattern) {
|
|
32
|
+
return !REDOS_PATTERNS.some((p) => p.test(pattern));
|
|
33
|
+
}
|
|
16
34
|
/** Binary/non-text extensions to skip during text search */
|
|
17
35
|
const BINARY_EXTENSIONS = new Set([
|
|
18
36
|
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".svg",
|
|
@@ -25,128 +43,318 @@ const BINARY_EXTENSIONS = new Set([
|
|
|
25
43
|
".db", ".sqlite", ".sqlite3",
|
|
26
44
|
".lock",
|
|
27
45
|
]);
|
|
46
|
+
// ── Private helpers ─────────────────────────────────────
|
|
47
|
+
/** Check if a symbol matches the active kind and file_pattern filters. */
|
|
48
|
+
function matchesSymbolFilters(symbol, options) {
|
|
49
|
+
if (options?.kind && symbol.kind !== options.kind)
|
|
50
|
+
return false;
|
|
51
|
+
if (options?.file_pattern && !matchFilePattern(symbol.file, options.file_pattern))
|
|
52
|
+
return false;
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
28
55
|
/**
|
|
29
|
-
*
|
|
30
|
-
*
|
|
56
|
+
* Apply detail-level shaping, source truncation, and field cleanup.
|
|
57
|
+
* Compact: ~15 tok/result. Standard: signature + truncated source. Full: unlimited.
|
|
31
58
|
*/
|
|
32
|
-
function
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
59
|
+
function shapeSearchResults(results, detail, includeSource, options) {
|
|
60
|
+
if (detail === "compact") {
|
|
61
|
+
return results.map((r) => ({
|
|
62
|
+
symbol: {
|
|
63
|
+
id: r.symbol.id,
|
|
64
|
+
name: r.symbol.name,
|
|
65
|
+
kind: r.symbol.kind,
|
|
66
|
+
file: r.symbol.file,
|
|
67
|
+
start_line: r.symbol.start_line,
|
|
68
|
+
},
|
|
69
|
+
score: r.score,
|
|
70
|
+
}));
|
|
43
71
|
}
|
|
44
|
-
|
|
45
|
-
if (
|
|
46
|
-
|
|
47
|
-
|
|
72
|
+
let shaped = results;
|
|
73
|
+
if (!includeSource) {
|
|
74
|
+
shaped = shaped.map((r) => {
|
|
75
|
+
const { source: _source, ...symbolWithoutSource } = r.symbol;
|
|
76
|
+
return { ...r, symbol: symbolWithoutSource };
|
|
77
|
+
});
|
|
48
78
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
79
|
+
const defaultSourceChars = detail === "full" ? undefined
|
|
80
|
+
: (includeSource && !options?.file_pattern) ? DEFAULT_SOURCE_CHARS_NARROW : DEFAULT_SOURCE_CHARS_WIDE;
|
|
81
|
+
const sourceChars = options?.source_chars ?? (includeSource ? defaultSourceChars : undefined);
|
|
82
|
+
if (includeSource && sourceChars !== undefined && sourceChars > 0) {
|
|
83
|
+
shaped = shaped.map((r) => {
|
|
84
|
+
const source = r.symbol.source;
|
|
85
|
+
if (source && source.length > sourceChars) {
|
|
86
|
+
return { ...r, symbol: { ...r.symbol, source: source.slice(0, sourceChars) + "..." } };
|
|
87
|
+
}
|
|
88
|
+
return r;
|
|
89
|
+
});
|
|
53
90
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
91
|
+
return shaped.map((r) => {
|
|
92
|
+
const { tokens: _tokens, repo: _repo, ...cleanSymbol } = r.symbol;
|
|
93
|
+
return { ...r, symbol: cleanSymbol };
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
/** Validate regex for ReDoS safety and compile without g/y flags, or throw descriptive error. */
|
|
97
|
+
function compileSearchRegex(query) {
|
|
98
|
+
if (!isSafeRegex(query)) {
|
|
99
|
+
throw new Error("Regex pattern rejected: potential catastrophic backtracking (ReDoS)");
|
|
62
100
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
const dirPart = pattern.slice(0, lastSlash);
|
|
67
|
-
const filePart = pattern.slice(lastSlash + 1);
|
|
68
|
-
const fileLastSlash = filePath.lastIndexOf("/");
|
|
69
|
-
const fileDir = fileLastSlash >= 0 ? filePath.slice(0, fileLastSlash) : "";
|
|
70
|
-
const fileName = fileLastSlash >= 0 ? filePath.slice(fileLastSlash + 1) : filePath;
|
|
71
|
-
if (fileDir !== dirPart)
|
|
72
|
-
return false;
|
|
73
|
-
return matchFilePattern(fileName, filePart);
|
|
101
|
+
try {
|
|
102
|
+
// No g/y flags — regex is reused across files; stateful flags cause alternating matches
|
|
103
|
+
return new RegExp(query);
|
|
74
104
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if (!pattern.includes("*")) {
|
|
79
|
-
return filePath.includes(pattern);
|
|
105
|
+
catch (err) {
|
|
106
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
107
|
+
throw new Error(`Invalid regex pattern: ${message}`);
|
|
80
108
|
}
|
|
81
|
-
return false;
|
|
82
109
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
110
|
+
// ── Ripgrep backend ────────────────────────────────────
|
|
111
|
+
/** Directories always excluded from ripgrep search */
|
|
112
|
+
const RG_EXCLUDE_DIRS = [
|
|
113
|
+
"node_modules", ".git", ".next", "dist", ".codesift", "coverage",
|
|
114
|
+
".playwright-mcp", "__pycache__", ".mypy_cache", ".tox",
|
|
115
|
+
];
|
|
116
|
+
/** Detect whether `rg` (ripgrep) is available on this system. Cached at module level. */
|
|
117
|
+
let rgAvailable = null;
|
|
118
|
+
function hasRipgrep() {
|
|
119
|
+
if (rgAvailable !== null)
|
|
120
|
+
return rgAvailable;
|
|
121
|
+
try {
|
|
122
|
+
execFileSync("rg", ["--version"], { stdio: "pipe", timeout: 2000 });
|
|
123
|
+
rgAvailable = true;
|
|
87
124
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
if (idx < 0)
|
|
93
|
-
return [str, ""];
|
|
94
|
-
return [str.slice(0, idx), str.slice(idx + sep.length)];
|
|
125
|
+
catch {
|
|
126
|
+
rgAvailable = false;
|
|
127
|
+
}
|
|
128
|
+
return rgAvailable;
|
|
95
129
|
}
|
|
96
130
|
/**
|
|
97
|
-
*
|
|
98
|
-
*
|
|
99
|
-
* Unlike the index walk, this includes ALL text files (not just parseable ones).
|
|
131
|
+
* Search via ripgrep — fast C-based search, parses `rg -n` output.
|
|
132
|
+
* Falls back to Node.js search if rg is not available.
|
|
100
133
|
*/
|
|
101
|
-
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
134
|
+
function searchWithRipgrep(root, query, options) {
|
|
135
|
+
const args = [
|
|
136
|
+
"-n", // line numbers
|
|
137
|
+
"--no-heading", // flat output
|
|
138
|
+
"--max-columns", String(MAX_LINE_CHARS),
|
|
139
|
+
"--max-columns-preview", // show truncated preview
|
|
140
|
+
"--max-count", String(Math.min(options.maxResults * 2, 5000)), // per-file cap (generous to hit global max)
|
|
141
|
+
];
|
|
142
|
+
if (!options.regex) {
|
|
143
|
+
args.push("-F"); // fixed string (literal)
|
|
144
|
+
}
|
|
145
|
+
if (options.contextLines > 0) {
|
|
146
|
+
args.push("-C", String(options.contextLines));
|
|
147
|
+
}
|
|
148
|
+
// File pattern → rg glob
|
|
149
|
+
if (options.filePattern) {
|
|
150
|
+
// Handle patterns like "src/**" or "*.ts"
|
|
151
|
+
args.push("--glob", options.filePattern);
|
|
152
|
+
}
|
|
153
|
+
// Exclude dirs
|
|
154
|
+
for (const dir of RG_EXCLUDE_DIRS) {
|
|
155
|
+
args.push("--glob", `!${dir}`);
|
|
156
|
+
}
|
|
157
|
+
args.push("--", query, root);
|
|
158
|
+
let stdout;
|
|
159
|
+
try {
|
|
160
|
+
stdout = execFileSync("rg", args, {
|
|
161
|
+
encoding: "utf-8",
|
|
162
|
+
maxBuffer: 20 * 1024 * 1024, // 20MB
|
|
163
|
+
timeout: SEARCH_TIMEOUT_MS,
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
catch (err) {
|
|
167
|
+
// rg exits 1 = no matches, 2 = error
|
|
168
|
+
if (err && typeof err === "object" && "status" in err) {
|
|
169
|
+
const exitCode = err.status;
|
|
170
|
+
if (exitCode === 1)
|
|
171
|
+
return []; // no matches
|
|
172
|
+
if ("stdout" in err && typeof err.stdout === "string") {
|
|
173
|
+
stdout = err.stdout;
|
|
174
|
+
if (!stdout)
|
|
175
|
+
return [];
|
|
176
|
+
}
|
|
177
|
+
else {
|
|
178
|
+
return [];
|
|
179
|
+
}
|
|
110
180
|
}
|
|
111
|
-
|
|
112
|
-
return;
|
|
181
|
+
else {
|
|
182
|
+
return [];
|
|
113
183
|
}
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
184
|
+
}
|
|
185
|
+
const matches = [];
|
|
186
|
+
const rootPrefix = root.endsWith("/") ? root : root + "/";
|
|
187
|
+
// Parse context blocks: lines separated by "--" separators
|
|
188
|
+
const blocks = options.contextLines > 0
|
|
189
|
+
? stdout.split(/^--$/m)
|
|
190
|
+
: [stdout];
|
|
191
|
+
for (const block of blocks) {
|
|
192
|
+
if (matches.length >= options.maxResults)
|
|
193
|
+
break;
|
|
194
|
+
const lines = block.split("\n").filter(Boolean);
|
|
195
|
+
// In context mode, find the actual match line (has `:` separator) vs context (has `-` separator)
|
|
196
|
+
// In non-context mode, all lines are matches
|
|
197
|
+
for (const rawLine of lines) {
|
|
198
|
+
if (matches.length >= options.maxResults)
|
|
199
|
+
break;
|
|
200
|
+
// rg format: /abs/path/file.ts:42:content (match)
|
|
201
|
+
// rg format: /abs/path/file.ts-40-content (context, only with -C)
|
|
202
|
+
// We only want match lines (with `:` after line number)
|
|
203
|
+
const matchResult = rawLine.match(/^(.+?):(\d+):(.*)/);
|
|
204
|
+
if (!matchResult)
|
|
205
|
+
continue;
|
|
206
|
+
const [, absPath, lineNumStr, content] = matchResult;
|
|
207
|
+
if (!absPath || !lineNumStr || content === undefined)
|
|
208
|
+
continue;
|
|
209
|
+
const relPath = absPath.startsWith(rootPrefix)
|
|
210
|
+
? absPath.slice(rootPrefix.length)
|
|
211
|
+
: absPath;
|
|
212
|
+
matches.push({
|
|
213
|
+
file: relPath,
|
|
214
|
+
line: parseInt(lineNumStr, 10),
|
|
215
|
+
content: content,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
// For context mode, we need to re-parse to attach context_before/context_after
|
|
220
|
+
// But context_lines=0 is the default now, so this path is rarely hit
|
|
221
|
+
if (options.contextLines > 0 && blocks.length > 1) {
|
|
222
|
+
return parseRipgrepContextBlocks(stdout, rootPrefix, options.maxResults, options.contextLines);
|
|
223
|
+
}
|
|
224
|
+
return matches;
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Parse rg output with context lines (-C N) into TextMatch[] with context_before/context_after.
|
|
228
|
+
*/
|
|
229
|
+
function parseRipgrepContextBlocks(stdout, rootPrefix, maxResults, contextLines) {
|
|
230
|
+
const matches = [];
|
|
231
|
+
const blocks = stdout.split(/^--$/m);
|
|
232
|
+
for (const block of blocks) {
|
|
233
|
+
if (matches.length >= maxResults)
|
|
234
|
+
break;
|
|
235
|
+
const lines = block.split("\n").filter(Boolean);
|
|
236
|
+
// Separate match lines from context lines
|
|
237
|
+
// Match: path:line:content Context: path-line-content
|
|
238
|
+
const parsed = [];
|
|
239
|
+
for (const raw of lines) {
|
|
240
|
+
// Try match line first (colon after line number)
|
|
241
|
+
const matchLine = raw.match(/^(.+?):(\d+):(.*)/);
|
|
242
|
+
if (matchLine && matchLine[1] && matchLine[2] && matchLine[3] !== undefined) {
|
|
243
|
+
parsed.push({
|
|
244
|
+
path: matchLine[1].startsWith(rootPrefix) ? matchLine[1].slice(rootPrefix.length) : matchLine[1],
|
|
245
|
+
line: parseInt(matchLine[2], 10),
|
|
246
|
+
content: matchLine[3],
|
|
247
|
+
isMatch: true,
|
|
248
|
+
});
|
|
249
|
+
continue;
|
|
250
|
+
}
|
|
251
|
+
// Try context line (hyphen after line number)
|
|
252
|
+
const ctxLine = raw.match(/^(.+?)-(\d+)-(.*)/);
|
|
253
|
+
if (ctxLine && ctxLine[1] && ctxLine[2] && ctxLine[3] !== undefined) {
|
|
254
|
+
parsed.push({
|
|
255
|
+
path: ctxLine[1].startsWith(rootPrefix) ? ctxLine[1].slice(rootPrefix.length) : ctxLine[1],
|
|
256
|
+
line: parseInt(ctxLine[2], 10),
|
|
257
|
+
content: ctxLine[3],
|
|
258
|
+
isMatch: false,
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Build TextMatch for each match line with surrounding context
|
|
263
|
+
for (let i = 0; i < parsed.length; i++) {
|
|
264
|
+
const p = parsed[i];
|
|
265
|
+
if (!p.isMatch)
|
|
266
|
+
continue;
|
|
267
|
+
if (matches.length >= maxResults)
|
|
268
|
+
break;
|
|
269
|
+
const contextBefore = [];
|
|
270
|
+
const contextAfter = [];
|
|
271
|
+
// Collect context before
|
|
272
|
+
for (let j = Math.max(0, i - contextLines); j < i; j++) {
|
|
273
|
+
const ctx = parsed[j];
|
|
274
|
+
if (ctx && !ctx.isMatch)
|
|
275
|
+
contextBefore.push(ctx.content);
|
|
123
276
|
}
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if (
|
|
128
|
-
|
|
129
|
-
// Skip files that are too large
|
|
130
|
-
try {
|
|
131
|
-
const fileStat = await stat(fullPath);
|
|
132
|
-
if (fileStat.size > MAX_FILE_SIZE)
|
|
133
|
-
continue;
|
|
134
|
-
}
|
|
135
|
-
catch {
|
|
136
|
-
continue;
|
|
137
|
-
}
|
|
138
|
-
files.push(relative(rootPath, fullPath));
|
|
139
|
-
if (files.length >= MAX_WALK_FILES) {
|
|
140
|
-
console.warn(`[codesift] walkAllTextFiles: reached ${MAX_WALK_FILES} file limit, returning partial results`);
|
|
141
|
-
limitReached = true;
|
|
142
|
-
return;
|
|
143
|
-
}
|
|
277
|
+
// Collect context after
|
|
278
|
+
for (let j = i + 1; j <= Math.min(parsed.length - 1, i + contextLines); j++) {
|
|
279
|
+
const ctx = parsed[j];
|
|
280
|
+
if (ctx && !ctx.isMatch)
|
|
281
|
+
contextAfter.push(ctx.content);
|
|
144
282
|
}
|
|
283
|
+
const match = { file: p.path, line: p.line, content: p.content };
|
|
284
|
+
if (contextBefore.length > 0)
|
|
285
|
+
match.context_before = contextBefore;
|
|
286
|
+
if (contextAfter.length > 0)
|
|
287
|
+
match.context_after = contextAfter;
|
|
288
|
+
matches.push(match);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
return matches;
|
|
292
|
+
}
|
|
293
|
+
// ── Node.js fallback search ───────────────────────────
|
|
294
|
+
/** Search file content for line matches, collecting context lines around each hit. */
|
|
295
|
+
function searchFileForMatches(content, filePath, query, regex, contextLines, maxMatches) {
|
|
296
|
+
const lines = content.split("\n");
|
|
297
|
+
const matches = [];
|
|
298
|
+
for (let i = 0; i < lines.length; i++) {
|
|
299
|
+
if (matches.length >= maxMatches)
|
|
300
|
+
break;
|
|
301
|
+
const line = lines[i];
|
|
302
|
+
if (line === undefined)
|
|
303
|
+
continue;
|
|
304
|
+
const isMatch = regex ? regex.test(line) : line.includes(query);
|
|
305
|
+
if (!isMatch)
|
|
306
|
+
continue;
|
|
307
|
+
const contextBefore = [];
|
|
308
|
+
for (let j = Math.max(0, i - contextLines); j < i; j++) {
|
|
309
|
+
const ctxLine = lines[j];
|
|
310
|
+
if (ctxLine !== undefined)
|
|
311
|
+
contextBefore.push(ctxLine);
|
|
312
|
+
}
|
|
313
|
+
const contextAfter = [];
|
|
314
|
+
for (let j = i + 1; j <= Math.min(lines.length - 1, i + contextLines); j++) {
|
|
315
|
+
const ctxLine = lines[j];
|
|
316
|
+
if (ctxLine !== undefined)
|
|
317
|
+
contextAfter.push(ctxLine);
|
|
318
|
+
}
|
|
319
|
+
const truncLine = line.length > MAX_LINE_CHARS
|
|
320
|
+
? line.slice(0, MAX_LINE_CHARS) + "..."
|
|
321
|
+
: line;
|
|
322
|
+
const match = {
|
|
323
|
+
file: filePath,
|
|
324
|
+
line: i + 1,
|
|
325
|
+
content: truncLine,
|
|
326
|
+
};
|
|
327
|
+
if (contextBefore.length > 0)
|
|
328
|
+
match.context_before = contextBefore;
|
|
329
|
+
if (contextAfter.length > 0)
|
|
330
|
+
match.context_after = contextAfter;
|
|
331
|
+
matches.push(match);
|
|
332
|
+
}
|
|
333
|
+
return matches;
|
|
334
|
+
}
|
|
335
|
+
/** Aggregate flat TextMatch[] into per-file groups with counts and first_match preview. */
|
|
336
|
+
function groupMatchesByFile(matches) {
|
|
337
|
+
const groups = new Map();
|
|
338
|
+
for (const m of matches) {
|
|
339
|
+
const existing = groups.get(m.file);
|
|
340
|
+
if (existing) {
|
|
341
|
+
existing.count++;
|
|
342
|
+
existing.lines.push(m.line);
|
|
343
|
+
}
|
|
344
|
+
else {
|
|
345
|
+
groups.set(m.file, {
|
|
346
|
+
file: m.file,
|
|
347
|
+
count: 1,
|
|
348
|
+
lines: [m.line],
|
|
349
|
+
first_match: m.content.length > MAX_FIRST_MATCH_CHARS
|
|
350
|
+
? m.content.slice(0, MAX_FIRST_MATCH_CHARS) + "..."
|
|
351
|
+
: m.content,
|
|
352
|
+
});
|
|
145
353
|
}
|
|
146
354
|
}
|
|
147
|
-
|
|
148
|
-
return files;
|
|
355
|
+
return [...groups.values()];
|
|
149
356
|
}
|
|
357
|
+
// ── Public API ──────────────────────────────────────────
|
|
150
358
|
/**
|
|
151
359
|
* Search symbols by name/signature/docstring using BM25 ranking.
|
|
152
360
|
* Supports filtering by symbol kind and file pattern.
|
|
@@ -161,149 +369,175 @@ export async function searchSymbols(repo, query, options) {
|
|
|
161
369
|
throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
|
|
162
370
|
}
|
|
163
371
|
const config = loadConfig();
|
|
164
|
-
const topK = options?.top_k ?? config.defaultTopK;
|
|
165
372
|
const includeSource = options?.include_source ?? true;
|
|
166
|
-
const
|
|
167
|
-
const
|
|
168
|
-
const hasFilters =
|
|
373
|
+
const defaultK = (includeSource && !options?.file_pattern) ? DEFAULT_TOP_K_WITH_SOURCE : config.defaultTopK;
|
|
374
|
+
const topK = options?.top_k ?? defaultK;
|
|
375
|
+
const hasFilters = !!options?.kind || !!options?.file_pattern;
|
|
169
376
|
let results;
|
|
170
377
|
if (!query.trim()) {
|
|
171
|
-
// Empty query: return all symbols matching filters (no BM25 scoring)
|
|
172
378
|
const allSymbols = [...index.symbols.values()];
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
const kind = options.kind;
|
|
176
|
-
filtered = filtered.filter((s) => s.kind === kind);
|
|
177
|
-
}
|
|
178
|
-
if (hasFileFilter) {
|
|
179
|
-
const pattern = options.file_pattern;
|
|
180
|
-
filtered = filtered.filter((s) => matchFilePattern(s.file, pattern));
|
|
181
|
-
}
|
|
182
|
-
results = filtered.slice(0, topK).map((symbol) => ({
|
|
183
|
-
symbol,
|
|
184
|
-
score: 0,
|
|
185
|
-
}));
|
|
379
|
+
const filtered = allSymbols.filter((s) => matchesSymbolFilters(s, options));
|
|
380
|
+
results = filtered.slice(0, topK).map((symbol) => ({ symbol, score: 0 }));
|
|
186
381
|
}
|
|
187
382
|
else {
|
|
188
|
-
|
|
189
|
-
// so that post-filter truncation doesn't lose relevant results.
|
|
190
|
-
const searchTopK = hasFilters ? Math.max(topK * 5, 200) : topK;
|
|
383
|
+
const searchTopK = hasFilters ? Math.max(topK * BM25_FILTER_MULTIPLIER, BM25_FILTER_MIN_K) : topK;
|
|
191
384
|
results = searchBM25(index, query, searchTopK, config.bm25FieldWeights);
|
|
192
|
-
|
|
193
|
-
if (hasKindFilter) {
|
|
194
|
-
const kind = options.kind;
|
|
195
|
-
results = results.filter((r) => r.symbol.kind === kind);
|
|
196
|
-
}
|
|
197
|
-
// Filter by file pattern
|
|
198
|
-
if (hasFileFilter) {
|
|
199
|
-
const pattern = options.file_pattern;
|
|
200
|
-
results = results.filter((r) => matchFilePattern(r.symbol.file, pattern));
|
|
201
|
-
}
|
|
202
|
-
// Re-truncate to requested top_k after filtering
|
|
385
|
+
results = results.filter((r) => matchesSymbolFilters(r.symbol, options));
|
|
203
386
|
results = results.slice(0, topK);
|
|
387
|
+
results = applyCutoff(results);
|
|
204
388
|
}
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
results = results
|
|
208
|
-
const { source: _source, ...symbolWithoutSource } = r.symbol;
|
|
209
|
-
return { ...r, symbol: symbolWithoutSource };
|
|
210
|
-
});
|
|
389
|
+
if (options?.rerank && results.length > 1) {
|
|
390
|
+
const { rerankResults } = await import("../search/reranker.js");
|
|
391
|
+
results = await rerankResults(query, results);
|
|
211
392
|
}
|
|
212
|
-
|
|
213
|
-
const
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
393
|
+
const detail = options?.detail_level ?? "standard";
|
|
394
|
+
const shaped = shapeSearchResults(results, detail, includeSource, options);
|
|
395
|
+
// Token budget: greedily pack results until budget exhausted
|
|
396
|
+
const budget = options?.token_budget;
|
|
397
|
+
if (budget && budget > 0) {
|
|
398
|
+
const packed = [];
|
|
399
|
+
let used = 0;
|
|
400
|
+
for (const r of shaped) {
|
|
401
|
+
const tok = Math.ceil(JSON.stringify(r).length / CHARS_PER_TOKEN);
|
|
402
|
+
if (used + tok > budget)
|
|
403
|
+
break;
|
|
404
|
+
packed.push(r);
|
|
405
|
+
used += tok;
|
|
406
|
+
}
|
|
407
|
+
return packed;
|
|
225
408
|
}
|
|
226
|
-
return
|
|
409
|
+
return shaped;
|
|
227
410
|
}
|
|
228
|
-
/**
|
|
229
|
-
* Full-text search across all files in a repository.
|
|
230
|
-
* Walks the filesystem to search ALL text files, not just indexed ones.
|
|
231
|
-
*/
|
|
232
411
|
export async function searchText(repo, query, options) {
|
|
233
412
|
const index = await getCodeIndex(repo);
|
|
234
413
|
if (!index) {
|
|
235
414
|
throw new Error(`Repository "${repo}" not found. Run index_folder first.`);
|
|
236
415
|
}
|
|
237
|
-
const contextLines = options?.context_lines ?? 2;
|
|
238
416
|
const useRegex = options?.regex ?? false;
|
|
239
417
|
const filePattern = options?.file_pattern;
|
|
240
|
-
const maxResults = options?.max_results
|
|
241
|
-
|
|
418
|
+
const maxResults = options?.max_results
|
|
419
|
+
?? (useRegex && !filePattern ? DEFAULT_MAX_REGEX_RESULTS : DEFAULT_MAX_TEXT_MATCHES);
|
|
420
|
+
const contextLines = options?.context_lines ?? 0; // OPT-2: default 0 (was 2) — saves ~30 tokens/match
|
|
421
|
+
// Validate regex safety before passing to ripgrep
|
|
242
422
|
if (useRegex) {
|
|
243
|
-
|
|
244
|
-
regex = new RegExp(query);
|
|
245
|
-
}
|
|
246
|
-
catch (err) {
|
|
247
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
248
|
-
throw new Error(`Invalid regex pattern: ${message}`);
|
|
249
|
-
}
|
|
423
|
+
compileSearchRegex(query); // throws on ReDoS patterns
|
|
250
424
|
}
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
425
|
+
let matches;
|
|
426
|
+
// OPT-1: Use ripgrep when available (10x faster)
|
|
427
|
+
if (hasRipgrep()) {
|
|
428
|
+
matches = searchWithRipgrep(index.root, query, {
|
|
429
|
+
regex: useRegex,
|
|
430
|
+
filePattern: filePattern,
|
|
431
|
+
maxResults: maxResults,
|
|
432
|
+
contextLines: contextLines,
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
else {
|
|
436
|
+
// Node.js fallback
|
|
437
|
+
const regex = useRegex ? compileSearchRegex(query) : null;
|
|
438
|
+
let allFiles;
|
|
439
|
+
if (filePattern) {
|
|
440
|
+
allFiles = index.files.map((f) => f.path);
|
|
265
441
|
}
|
|
266
|
-
|
|
267
|
-
|
|
442
|
+
else {
|
|
443
|
+
allFiles = await walkDirectory(index.root, {
|
|
444
|
+
fileFilter: (ext) => !BINARY_EXTENSIONS.has(ext),
|
|
445
|
+
maxFiles: MAX_WALK_FILES,
|
|
446
|
+
relative: true,
|
|
447
|
+
});
|
|
268
448
|
}
|
|
269
|
-
|
|
270
|
-
|
|
449
|
+
matches = [];
|
|
450
|
+
const searchStart = Date.now();
|
|
451
|
+
for (const filePath of allFiles) {
|
|
271
452
|
if (matches.length >= maxResults)
|
|
272
453
|
break;
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
const isMatch = regex ? regex.test(line) : line.includes(query);
|
|
277
|
-
if (!isMatch)
|
|
454
|
+
if (Date.now() - searchStart > SEARCH_TIMEOUT_MS)
|
|
455
|
+
break;
|
|
456
|
+
if (filePattern && !matchFilePattern(filePath, filePattern))
|
|
278
457
|
continue;
|
|
279
|
-
const
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
if (ctxLine !== undefined) {
|
|
284
|
-
contextBefore.push(ctxLine);
|
|
285
|
-
}
|
|
458
|
+
const fullPath = join(index.root, filePath);
|
|
459
|
+
let content;
|
|
460
|
+
try {
|
|
461
|
+
content = await readFile(fullPath, "utf-8");
|
|
286
462
|
}
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if (ctxLine !== undefined) {
|
|
290
|
-
contextAfter.push(ctxLine);
|
|
291
|
-
}
|
|
463
|
+
catch {
|
|
464
|
+
continue;
|
|
292
465
|
}
|
|
293
|
-
const
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
466
|
+
const fileMatches = searchFileForMatches(content, filePath, query, regex, contextLines, maxResults - matches.length);
|
|
467
|
+
matches.push(...fileMatches);
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
// Ranked mode: classify hits with symbol context, deduplicate, and sort by centrality.
|
|
471
|
+
// Takes precedence over auto_group/compact — returns TextMatch[] with containing_symbol.
|
|
472
|
+
if (options?.ranked && matches.length > 0) {
|
|
473
|
+
try {
|
|
474
|
+
const { classifyHitsWithSymbols } = await import("./search-ranker.js");
|
|
475
|
+
const bm25Idx = await getBM25Index(repo);
|
|
476
|
+
if (bm25Idx) {
|
|
477
|
+
matches = await classifyHitsWithSymbols(matches, index, { centrality: bm25Idx.centrality });
|
|
300
478
|
}
|
|
301
|
-
|
|
302
|
-
|
|
479
|
+
}
|
|
480
|
+
catch {
|
|
481
|
+
// Graceful fallback — return unranked matches if pipeline fails
|
|
482
|
+
}
|
|
483
|
+
return matches;
|
|
484
|
+
}
|
|
485
|
+
// OPT-3: Compact format — grep-like `file:line: content` output, ~50% less tokens than JSON
|
|
486
|
+
// Auto-enable when auto_group is set (caller is optimization-aware) and results are small
|
|
487
|
+
const useCompact = options?.compact
|
|
488
|
+
?? (options?.auto_group && contextLines === 0 && matches.length > 0 && matches.length <= AUTO_GROUP_THRESHOLD);
|
|
489
|
+
if (useCompact && !options?.group_by_file) {
|
|
490
|
+
// Group by file to avoid repeating long paths (saves ~30% on multi-match files)
|
|
491
|
+
const groups = new Map();
|
|
492
|
+
for (const m of matches) {
|
|
493
|
+
let g = groups.get(m.file);
|
|
494
|
+
if (!g) {
|
|
495
|
+
g = [];
|
|
496
|
+
groups.set(m.file, g);
|
|
303
497
|
}
|
|
304
|
-
|
|
498
|
+
g.push(` ${m.line}: ${m.content}`);
|
|
499
|
+
}
|
|
500
|
+
if (groups.size === matches.length) {
|
|
501
|
+
// Each file has 1 match — flat format is fine
|
|
502
|
+
return matches.map((m) => `${m.file}:${m.line}: ${m.content}`).join("\n");
|
|
305
503
|
}
|
|
504
|
+
// Grouped: file header + indented matches
|
|
505
|
+
const parts = [];
|
|
506
|
+
for (const [file, lines] of groups) {
|
|
507
|
+
parts.push(`${file}\n${lines.join("\n")}`);
|
|
508
|
+
}
|
|
509
|
+
return parts.join("\n");
|
|
510
|
+
}
|
|
511
|
+
// Estimate response size; force grouping when output would be enormous
|
|
512
|
+
const estimatedChars = matches.reduce((sum, m) => {
|
|
513
|
+
let chars = m.file.length + m.content.length + JSON_OVERHEAD_PER_MATCH;
|
|
514
|
+
if (m.context_before)
|
|
515
|
+
chars += m.context_before.reduce((s, l) => s + l.length, 0);
|
|
516
|
+
if (m.context_after)
|
|
517
|
+
chars += m.context_after.reduce((s, l) => s + l.length, 0);
|
|
518
|
+
return sum + chars;
|
|
519
|
+
}, 0);
|
|
520
|
+
const shouldGroup = options?.group_by_file
|
|
521
|
+
|| (options?.auto_group && matches.length > AUTO_GROUP_THRESHOLD)
|
|
522
|
+
|| estimatedChars > MAX_RESPONSE_CHARS;
|
|
523
|
+
if (shouldGroup) {
|
|
524
|
+
return groupMatchesByFile(matches);
|
|
306
525
|
}
|
|
307
526
|
return matches;
|
|
308
527
|
}
|
|
528
|
+
// ---------------------------------------------------------------------------
|
|
529
|
+
// Semantic search — standalone wrapper around retrieval infrastructure
|
|
530
|
+
// ---------------------------------------------------------------------------
|
|
531
|
+
export async function semanticSearch(repo, query, options) {
|
|
532
|
+
const { handleSemanticQuery } = await import("../retrieval/semantic-handlers.js");
|
|
533
|
+
const result = await handleSemanticQuery(repo, {
|
|
534
|
+
type: "semantic",
|
|
535
|
+
query,
|
|
536
|
+
top_k: options?.top_k,
|
|
537
|
+
file_filter: options?.file_pattern,
|
|
538
|
+
exclude_tests: options?.exclude_tests,
|
|
539
|
+
rerank: options?.rerank,
|
|
540
|
+
});
|
|
541
|
+
return typeof result.data === "string" ? result.data : JSON.stringify(result.data);
|
|
542
|
+
}
|
|
309
543
|
//# sourceMappingURL=search-tools.js.map
|