codetool-explore 0.5.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetool_explore/__init__.py +35 -0
- codetool_explore/_bin/codetool-explore-rust-windows-x86_64.exe +0 -0
- codetool_explore/api.py +266 -0
- codetool_explore/cli.py +188 -0
- codetool_explore/compression.py +150 -0
- codetool_explore/cursor.py +71 -0
- codetool_explore/errors.py +23 -0
- codetool_explore/explorer.py +497 -0
- codetool_explore/ignore.py +222 -0
- codetool_explore/py.typed +0 -0
- codetool_explore/python_backend/__init__.py +154 -0
- codetool_explore/python_backend/case.py +19 -0
- codetool_explore/python_backend/config.py +35 -0
- codetool_explore/python_backend/constants.py +39 -0
- codetool_explore/python_backend/file_search.py +51 -0
- codetool_explore/python_backend/ignore_rules.py +40 -0
- codetool_explore/python_backend/literal.py +79 -0
- codetool_explore/python_backend/matcher.py +79 -0
- codetool_explore/python_backend/models.py +49 -0
- codetool_explore/python_backend/output.py +82 -0
- codetool_explore/python_backend/regex_search.py +63 -0
- codetool_explore/python_backend/search.py +327 -0
- codetool_explore/python_backend/text.py +39 -0
- codetool_explore/python_backend/walker.py +119 -0
- codetool_explore/ranking.py +384 -0
- codetool_explore/roots.py +148 -0
- codetool_explore/rust_backend.py +308 -0
- codetool_explore/text_output.py +475 -0
- codetool_explore-0.5.0.dist-info/METADATA +240 -0
- codetool_explore-0.5.0.dist-info/RECORD +33 -0
- codetool_explore-0.5.0.dist-info/WHEEL +4 -0
- codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
- codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Content and path matcher construction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import TypeAlias
|
|
8
|
+
|
|
9
|
+
from .literal import LiteralMatcher
|
|
10
|
+
from .regex_search import RegexLineMatcher
|
|
11
|
+
|
|
12
|
+
SearchMatcher: TypeAlias = LiteralMatcher | RegexLineMatcher
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def build_content_matcher(
|
|
16
|
+
pattern: str,
|
|
17
|
+
*,
|
|
18
|
+
regex: bool,
|
|
19
|
+
case_sensitive: bool,
|
|
20
|
+
) -> SearchMatcher:
|
|
21
|
+
"""Build the matcher used for content search."""
|
|
22
|
+
|
|
23
|
+
if regex:
|
|
24
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
|
25
|
+
return RegexLineMatcher(re.compile(pattern, flags))
|
|
26
|
+
needle = pattern.encode("utf-8", errors="surrogatepass")
|
|
27
|
+
return LiteralMatcher(needle=needle, case_sensitive=case_sensitive)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class PathMatcher:
|
|
32
|
+
"""Prepared path matcher state."""
|
|
33
|
+
|
|
34
|
+
pattern: str
|
|
35
|
+
regex: bool
|
|
36
|
+
case_sensitive: bool
|
|
37
|
+
compiled: re.Pattern[str] | None = None
|
|
38
|
+
|
|
39
|
+
@classmethod
|
|
40
|
+
def build(
|
|
41
|
+
cls,
|
|
42
|
+
pattern: str,
|
|
43
|
+
*,
|
|
44
|
+
regex: bool,
|
|
45
|
+
case_sensitive: bool,
|
|
46
|
+
) -> "PathMatcher":
|
|
47
|
+
"""Build a path matcher, compiling regexes when requested."""
|
|
48
|
+
|
|
49
|
+
compiled = None
|
|
50
|
+
if regex:
|
|
51
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
|
52
|
+
compiled = re.compile(pattern, flags)
|
|
53
|
+
return cls(
|
|
54
|
+
pattern=pattern,
|
|
55
|
+
regex=regex,
|
|
56
|
+
case_sensitive=case_sensitive,
|
|
57
|
+
compiled=compiled,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def is_match(self, rel_path: str, path_scope: str) -> bool:
|
|
61
|
+
"""Return whether ``rel_path`` matches this matcher."""
|
|
62
|
+
|
|
63
|
+
text = path_match_subject(rel_path, path_scope)
|
|
64
|
+
if self.regex:
|
|
65
|
+
assert self.compiled is not None
|
|
66
|
+
return self.compiled.search(text) is not None
|
|
67
|
+
if not self.pattern:
|
|
68
|
+
return True
|
|
69
|
+
if self.case_sensitive:
|
|
70
|
+
return self.pattern in text
|
|
71
|
+
return self.pattern.casefold() in text.casefold()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def path_match_subject(rel_path: str, path_scope: str) -> str:
|
|
75
|
+
"""Return the relative path component selected by ``path_scope``."""
|
|
76
|
+
|
|
77
|
+
if path_scope == "basename":
|
|
78
|
+
return rel_path.rsplit("/", 1)[-1]
|
|
79
|
+
return rel_path
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Data models shared by the Python backend modules."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BinaryFileError(ValueError):
|
|
9
|
+
"""Raised internally when a candidate file looks binary."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class CandidateFile:
|
|
14
|
+
"""A candidate file found by the scanner."""
|
|
15
|
+
|
|
16
|
+
path: str
|
|
17
|
+
rel_path: str
|
|
18
|
+
size: int
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class IgnorePatterns:
|
|
23
|
+
"""Ignore patterns split by the path they should match against."""
|
|
24
|
+
|
|
25
|
+
common: tuple[str, ...] = ()
|
|
26
|
+
root: tuple[str, ...] = ()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class SearchCounters:
|
|
31
|
+
"""Mutable counters collected while searching candidates."""
|
|
32
|
+
|
|
33
|
+
scanned_files: int = 0
|
|
34
|
+
scanned_bytes: int = 0
|
|
35
|
+
path_match_count: int = 0
|
|
36
|
+
content_match_count: int = 0
|
|
37
|
+
skipped_binary: int = 0
|
|
38
|
+
skipped_huge: int = 0
|
|
39
|
+
skipped_errors: int = 0
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def skipped(self) -> dict[str, int]:
|
|
43
|
+
"""Return skipped counters in the public result shape."""
|
|
44
|
+
|
|
45
|
+
return {
|
|
46
|
+
"binary": self.skipped_binary,
|
|
47
|
+
"huge": self.skipped_huge,
|
|
48
|
+
"errors": self.skipped_errors,
|
|
49
|
+
}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Result-shaping helpers for the Python backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def path_file_match(
|
|
7
|
+
rel_path: str,
|
|
8
|
+
*,
|
|
9
|
+
mode: str,
|
|
10
|
+
match_kind: str = "path",
|
|
11
|
+
) -> dict[str, object]:
|
|
12
|
+
"""Build a file-level row for a path-only match."""
|
|
13
|
+
|
|
14
|
+
match: dict[str, object] = {
|
|
15
|
+
"path": rel_path,
|
|
16
|
+
"kind": "file",
|
|
17
|
+
"match_kind": match_kind,
|
|
18
|
+
}
|
|
19
|
+
if mode == "count":
|
|
20
|
+
match["count"] = 1
|
|
21
|
+
return match
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def record_path_only_match(
|
|
25
|
+
file_matches: list[dict[str, object]],
|
|
26
|
+
snippet_matches: list[dict[str, object]],
|
|
27
|
+
rel_path: str,
|
|
28
|
+
*,
|
|
29
|
+
mode: str,
|
|
30
|
+
collect_snippets: bool,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Append a path-only match to file/snippet collections."""
|
|
33
|
+
|
|
34
|
+
match = path_file_match(rel_path, mode=mode)
|
|
35
|
+
file_matches.append(match)
|
|
36
|
+
if collect_snippets:
|
|
37
|
+
snippet_matches.append(dict(match))
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def mark_snippets_for_content_or_path_target(
|
|
41
|
+
snippets: list[dict[str, object]],
|
|
42
|
+
*,
|
|
43
|
+
path_matches: bool,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""Mark content snippets with combined content/path target metadata."""
|
|
46
|
+
|
|
47
|
+
match_kind = "content_and_path" if path_matches else "content"
|
|
48
|
+
for snippet in snippets:
|
|
49
|
+
snippet["kind"] = "file"
|
|
50
|
+
snippet["match_kind"] = match_kind
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def base_result(
|
|
54
|
+
*,
|
|
55
|
+
pattern: str,
|
|
56
|
+
root: str | list[str],
|
|
57
|
+
regex: bool,
|
|
58
|
+
target: str,
|
|
59
|
+
path_scope: str,
|
|
60
|
+
mode: str,
|
|
61
|
+
requested_case: str,
|
|
62
|
+
effective_case: str,
|
|
63
|
+
limit: int,
|
|
64
|
+
cursor: str | int | None,
|
|
65
|
+
) -> dict[str, object]:
|
|
66
|
+
"""Return the stable metadata prefix for a backend result."""
|
|
67
|
+
|
|
68
|
+
result: dict[str, object] = {
|
|
69
|
+
"pattern": pattern,
|
|
70
|
+
"root": root,
|
|
71
|
+
"regex": bool(regex),
|
|
72
|
+
"target": target,
|
|
73
|
+
"mode": mode,
|
|
74
|
+
"case": requested_case,
|
|
75
|
+
"effective_case": effective_case,
|
|
76
|
+
"limit": limit,
|
|
77
|
+
"cursor": None if cursor is None else str(cursor),
|
|
78
|
+
"backend": "python",
|
|
79
|
+
}
|
|
80
|
+
if target != "content":
|
|
81
|
+
result["path_scope"] = path_scope
|
|
82
|
+
return result
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Regex content matching for the Python backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from .constants import MAX_SNIPPETS_PER_FILE
|
|
9
|
+
from .models import CandidateFile
|
|
10
|
+
from .text import context_for_lines, crop, decode_line
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class RegexLineMatcher:
|
|
15
|
+
"""Prepared regex line matcher."""
|
|
16
|
+
|
|
17
|
+
compiled: re.Pattern[str]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def search_regex_file(
|
|
21
|
+
candidate: CandidateFile,
|
|
22
|
+
data: bytes,
|
|
23
|
+
matcher: RegexLineMatcher,
|
|
24
|
+
*,
|
|
25
|
+
context_lines: int,
|
|
26
|
+
collect_snippets: bool,
|
|
27
|
+
) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
|
|
28
|
+
"""Search one file's decoded lines with a compiled regex."""
|
|
29
|
+
|
|
30
|
+
lines = data.splitlines()
|
|
31
|
+
|
|
32
|
+
count = 0
|
|
33
|
+
first_line: int | None = None
|
|
34
|
+
snippets: list[dict[str, object]] = []
|
|
35
|
+
|
|
36
|
+
for index, raw_line in enumerate(lines):
|
|
37
|
+
line = decode_line(raw_line)
|
|
38
|
+
matches = list(matcher.compiled.finditer(line))
|
|
39
|
+
if not matches:
|
|
40
|
+
continue
|
|
41
|
+
count += len(matches)
|
|
42
|
+
line_number = index + 1
|
|
43
|
+
if first_line is None:
|
|
44
|
+
first_line = line_number
|
|
45
|
+
if collect_snippets and len(snippets) < MAX_SNIPPETS_PER_FILE:
|
|
46
|
+
snippet: dict[str, object] = {
|
|
47
|
+
"path": candidate.rel_path,
|
|
48
|
+
"line": line_number,
|
|
49
|
+
"snippet": crop(line),
|
|
50
|
+
}
|
|
51
|
+
context = context_for_lines(lines, index, context_lines)
|
|
52
|
+
if context:
|
|
53
|
+
snippet["context"] = context
|
|
54
|
+
snippets.append(snippet)
|
|
55
|
+
|
|
56
|
+
if count == 0 or first_line is None:
|
|
57
|
+
return None, []
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
"path": candidate.rel_path,
|
|
61
|
+
"count": count,
|
|
62
|
+
"first_line": first_line,
|
|
63
|
+
}, snippets
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Top-level pure-Python backend search orchestration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from collections.abc import Iterable
|
|
8
|
+
|
|
9
|
+
from ..cursor import normalize_limit, page_items
|
|
10
|
+
from ..errors import ExploreArgumentError, ExplorePatternError
|
|
11
|
+
from ..ignore import normalize_patterns
|
|
12
|
+
from ..ranking import file_sort_key, snippet_sort_key
|
|
13
|
+
from ..roots import RootInput, normalize_search_roots
|
|
14
|
+
from .case import resolve_case
|
|
15
|
+
from .config import normalize_mode, normalize_path_scope, normalize_target
|
|
16
|
+
from .constants import max_file_bytes
|
|
17
|
+
from .file_search import search_file
|
|
18
|
+
from .matcher import PathMatcher, build_content_matcher
|
|
19
|
+
from .models import BinaryFileError, CandidateFile, SearchCounters
|
|
20
|
+
from .output import (
|
|
21
|
+
base_result,
|
|
22
|
+
mark_snippets_for_content_or_path_target,
|
|
23
|
+
record_path_only_match,
|
|
24
|
+
)
|
|
25
|
+
from .walker import iter_candidate_files
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def search_python(
|
|
29
|
+
pattern: str,
|
|
30
|
+
root: RootInput = ".",
|
|
31
|
+
*,
|
|
32
|
+
regex: bool = False,
|
|
33
|
+
target: str = "content",
|
|
34
|
+
path_scope: str = "path",
|
|
35
|
+
glob: str | Iterable[str] | None = None,
|
|
36
|
+
exclude: str | Iterable[str] | None = None,
|
|
37
|
+
case: str = "smart",
|
|
38
|
+
mode: str = "files",
|
|
39
|
+
context_lines: int = 0,
|
|
40
|
+
limit: int = 50,
|
|
41
|
+
cursor: str | int | None = None,
|
|
42
|
+
) -> dict[str, object]:
|
|
43
|
+
"""Search file contents, paths, or content/path union using Python."""
|
|
44
|
+
|
|
45
|
+
if not isinstance(pattern, str):
|
|
46
|
+
raise ExploreArgumentError("pattern must be a string")
|
|
47
|
+
normalised_target = normalize_target(target)
|
|
48
|
+
if pattern == "" and normalised_target in {"content", "content_or_path"}:
|
|
49
|
+
raise ExplorePatternError("pattern must not be empty")
|
|
50
|
+
|
|
51
|
+
root_set = normalize_search_roots(root)
|
|
52
|
+
|
|
53
|
+
normalised_mode = normalize_mode(mode)
|
|
54
|
+
if normalised_mode == "snippets" and normalised_target == "path":
|
|
55
|
+
raise ExploreArgumentError(
|
|
56
|
+
"mode='snippets' is not supported for target='path'; "
|
|
57
|
+
"use target='content' or target='content_or_path'"
|
|
58
|
+
)
|
|
59
|
+
safe_limit = normalize_limit(limit)
|
|
60
|
+
safe_context_lines = max(0, int(context_lines or 0))
|
|
61
|
+
requested_case = str(case or "smart").lower()
|
|
62
|
+
effective_case, case_sensitive = resolve_case(requested_case, pattern)
|
|
63
|
+
normalised_path_scope = normalize_path_scope(path_scope)
|
|
64
|
+
glob_patterns = normalize_patterns(glob)
|
|
65
|
+
exclude_patterns = normalize_patterns(exclude)
|
|
66
|
+
|
|
67
|
+
base = base_result(
|
|
68
|
+
pattern=pattern,
|
|
69
|
+
root=root_set.display,
|
|
70
|
+
regex=regex,
|
|
71
|
+
target=normalised_target,
|
|
72
|
+
path_scope=normalised_path_scope,
|
|
73
|
+
mode=normalised_mode,
|
|
74
|
+
requested_case=requested_case,
|
|
75
|
+
effective_case=effective_case,
|
|
76
|
+
limit=safe_limit,
|
|
77
|
+
cursor=cursor,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
content_enabled = normalised_target in {"content", "content_or_path"}
|
|
81
|
+
path_enabled = normalised_target in {"path", "content_or_path"}
|
|
82
|
+
try:
|
|
83
|
+
content_matcher = (
|
|
84
|
+
build_content_matcher(
|
|
85
|
+
pattern,
|
|
86
|
+
regex=regex,
|
|
87
|
+
case_sensitive=case_sensitive,
|
|
88
|
+
)
|
|
89
|
+
if content_enabled
|
|
90
|
+
else None
|
|
91
|
+
)
|
|
92
|
+
path_matcher = (
|
|
93
|
+
PathMatcher.build(
|
|
94
|
+
pattern,
|
|
95
|
+
regex=regex,
|
|
96
|
+
case_sensitive=case_sensitive,
|
|
97
|
+
)
|
|
98
|
+
if path_enabled
|
|
99
|
+
else None
|
|
100
|
+
)
|
|
101
|
+
except re.error as exc:
|
|
102
|
+
raise ExplorePatternError(f"invalid regex: {exc}") from exc
|
|
103
|
+
|
|
104
|
+
collect_snippets = normalised_mode == "snippets"
|
|
105
|
+
file_matches: list[dict[str, object]] = []
|
|
106
|
+
snippet_matches: list[dict[str, object]] = []
|
|
107
|
+
counters = SearchCounters()
|
|
108
|
+
seen_candidate_paths: set[str] = set()
|
|
109
|
+
|
|
110
|
+
for search_root in root_set.roots:
|
|
111
|
+
candidates = iter_candidate_files(
|
|
112
|
+
search_root.abs_path,
|
|
113
|
+
rel_base=root_set.rel_base,
|
|
114
|
+
glob_patterns=glob_patterns,
|
|
115
|
+
exclude_patterns=exclude_patterns,
|
|
116
|
+
)
|
|
117
|
+
for candidate in candidates:
|
|
118
|
+
candidate_key = os.path.normcase(os.path.abspath(candidate.path))
|
|
119
|
+
if candidate_key in seen_candidate_paths:
|
|
120
|
+
continue
|
|
121
|
+
seen_candidate_paths.add(candidate_key)
|
|
122
|
+
_search_candidate(
|
|
123
|
+
candidate,
|
|
124
|
+
content_matcher=content_matcher,
|
|
125
|
+
path_matcher=path_matcher,
|
|
126
|
+
path_scope=normalised_path_scope,
|
|
127
|
+
target=normalised_target,
|
|
128
|
+
mode=normalised_mode,
|
|
129
|
+
context_lines=safe_context_lines,
|
|
130
|
+
collect_snippets=collect_snippets,
|
|
131
|
+
counters=counters,
|
|
132
|
+
file_matches=file_matches,
|
|
133
|
+
snippet_matches=snippet_matches,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
file_matches.sort(key=lambda item: file_sort_key(item, pattern))
|
|
137
|
+
snippet_matches.sort(key=lambda item: snippet_sort_key(item, pattern))
|
|
138
|
+
|
|
139
|
+
total_files = len(file_matches)
|
|
140
|
+
content_occurrences = sum(
|
|
141
|
+
int(match["count"])
|
|
142
|
+
for match in file_matches
|
|
143
|
+
if match.get("match_kind") != "path" and "count" in match
|
|
144
|
+
)
|
|
145
|
+
path_only_occurrences = sum(
|
|
146
|
+
int(match.get("count", 1))
|
|
147
|
+
for match in file_matches
|
|
148
|
+
if match.get("match_kind") == "path"
|
|
149
|
+
)
|
|
150
|
+
total_occurrences = (
|
|
151
|
+
content_occurrences
|
|
152
|
+
if normalised_target == "content"
|
|
153
|
+
else content_occurrences + path_only_occurrences
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
if normalised_mode == "snippets":
|
|
157
|
+
all_matches = snippet_matches
|
|
158
|
+
total_matches = len(snippet_matches)
|
|
159
|
+
else:
|
|
160
|
+
all_matches = file_matches
|
|
161
|
+
total_matches = total_files
|
|
162
|
+
|
|
163
|
+
page, truncated, next_cursor, offset = page_items(
|
|
164
|
+
all_matches, limit=safe_limit, cursor=cursor
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
result = {
|
|
168
|
+
**base,
|
|
169
|
+
"matches": page,
|
|
170
|
+
"returned": len(page),
|
|
171
|
+
"total_files": total_files,
|
|
172
|
+
"total_matches": total_matches,
|
|
173
|
+
"count": total_occurrences,
|
|
174
|
+
"content_count": content_occurrences,
|
|
175
|
+
"content_files": counters.content_match_count,
|
|
176
|
+
"path_matches": counters.path_match_count,
|
|
177
|
+
"truncated": truncated,
|
|
178
|
+
"next_cursor": next_cursor,
|
|
179
|
+
"offset": offset,
|
|
180
|
+
"scanned_files": counters.scanned_files,
|
|
181
|
+
"scanned_bytes": counters.scanned_bytes,
|
|
182
|
+
"skipped": counters.skipped,
|
|
183
|
+
}
|
|
184
|
+
if glob_patterns:
|
|
185
|
+
result["glob"] = list(glob_patterns)
|
|
186
|
+
if exclude_patterns:
|
|
187
|
+
result["exclude"] = list(exclude_patterns)
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _search_candidate(
|
|
192
|
+
candidate: CandidateFile,
|
|
193
|
+
*,
|
|
194
|
+
content_matcher: object | None,
|
|
195
|
+
path_matcher: PathMatcher | None,
|
|
196
|
+
path_scope: str,
|
|
197
|
+
target: str,
|
|
198
|
+
mode: str,
|
|
199
|
+
context_lines: int,
|
|
200
|
+
collect_snippets: bool,
|
|
201
|
+
counters: SearchCounters,
|
|
202
|
+
file_matches: list[dict[str, object]],
|
|
203
|
+
snippet_matches: list[dict[str, object]],
|
|
204
|
+
) -> None:
|
|
205
|
+
path_matches = (
|
|
206
|
+
path_matcher.is_match(candidate.rel_path, path_scope)
|
|
207
|
+
if path_matcher is not None
|
|
208
|
+
else False
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
if target == "path":
|
|
212
|
+
counters.scanned_files += 1
|
|
213
|
+
if path_matches:
|
|
214
|
+
record_path_only_match(
|
|
215
|
+
file_matches,
|
|
216
|
+
snippet_matches,
|
|
217
|
+
candidate.rel_path,
|
|
218
|
+
mode=mode,
|
|
219
|
+
collect_snippets=False,
|
|
220
|
+
)
|
|
221
|
+
counters.path_match_count += 1
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
if candidate.size > max_file_bytes():
|
|
225
|
+
if target == "content_or_path" and path_matches:
|
|
226
|
+
_record_content_or_path_target_path_fallback(
|
|
227
|
+
file_matches,
|
|
228
|
+
snippet_matches,
|
|
229
|
+
candidate.rel_path,
|
|
230
|
+
mode=mode,
|
|
231
|
+
collect_snippets=collect_snippets,
|
|
232
|
+
counters=counters,
|
|
233
|
+
)
|
|
234
|
+
counters.skipped_huge += 1
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
counters.scanned_files += 1
|
|
238
|
+
counters.scanned_bytes += candidate.size
|
|
239
|
+
try:
|
|
240
|
+
assert content_matcher is not None
|
|
241
|
+
file_match, snippets = search_file(
|
|
242
|
+
candidate,
|
|
243
|
+
content_matcher,
|
|
244
|
+
context_lines=context_lines,
|
|
245
|
+
collect_snippets=collect_snippets,
|
|
246
|
+
)
|
|
247
|
+
except BinaryFileError:
|
|
248
|
+
if target == "content_or_path" and path_matches:
|
|
249
|
+
_record_content_or_path_target_path_fallback(
|
|
250
|
+
file_matches,
|
|
251
|
+
snippet_matches,
|
|
252
|
+
candidate.rel_path,
|
|
253
|
+
mode=mode,
|
|
254
|
+
collect_snippets=collect_snippets,
|
|
255
|
+
counters=counters,
|
|
256
|
+
)
|
|
257
|
+
counters.skipped_binary += 1
|
|
258
|
+
return
|
|
259
|
+
except OverflowError:
|
|
260
|
+
if target == "content_or_path" and path_matches:
|
|
261
|
+
_record_content_or_path_target_path_fallback(
|
|
262
|
+
file_matches,
|
|
263
|
+
snippet_matches,
|
|
264
|
+
candidate.rel_path,
|
|
265
|
+
mode=mode,
|
|
266
|
+
collect_snippets=collect_snippets,
|
|
267
|
+
counters=counters,
|
|
268
|
+
)
|
|
269
|
+
counters.skipped_huge += 1
|
|
270
|
+
return
|
|
271
|
+
except (OSError, UnicodeError, re.error):
|
|
272
|
+
if target == "content_or_path" and path_matches:
|
|
273
|
+
_record_content_or_path_target_path_fallback(
|
|
274
|
+
file_matches,
|
|
275
|
+
snippet_matches,
|
|
276
|
+
candidate.rel_path,
|
|
277
|
+
mode=mode,
|
|
278
|
+
collect_snippets=collect_snippets,
|
|
279
|
+
counters=counters,
|
|
280
|
+
)
|
|
281
|
+
counters.skipped_errors += 1
|
|
282
|
+
return
|
|
283
|
+
|
|
284
|
+
if file_match is None:
|
|
285
|
+
if target == "content_or_path" and path_matches:
|
|
286
|
+
_record_content_or_path_target_path_fallback(
|
|
287
|
+
file_matches,
|
|
288
|
+
snippet_matches,
|
|
289
|
+
candidate.rel_path,
|
|
290
|
+
mode=mode,
|
|
291
|
+
collect_snippets=collect_snippets,
|
|
292
|
+
counters=counters,
|
|
293
|
+
)
|
|
294
|
+
return
|
|
295
|
+
|
|
296
|
+
if target == "content_or_path":
|
|
297
|
+
file_match["kind"] = "file"
|
|
298
|
+
file_match["match_kind"] = "content_and_path" if path_matches else "content"
|
|
299
|
+
if collect_snippets:
|
|
300
|
+
mark_snippets_for_content_or_path_target(
|
|
301
|
+
snippets,
|
|
302
|
+
path_matches=path_matches,
|
|
303
|
+
)
|
|
304
|
+
if path_matches:
|
|
305
|
+
counters.path_match_count += 1
|
|
306
|
+
counters.content_match_count += 1
|
|
307
|
+
file_matches.append(file_match)
|
|
308
|
+
snippet_matches.extend(snippets)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _record_content_or_path_target_path_fallback(
|
|
312
|
+
file_matches: list[dict[str, object]],
|
|
313
|
+
snippet_matches: list[dict[str, object]],
|
|
314
|
+
rel_path: str,
|
|
315
|
+
*,
|
|
316
|
+
mode: str,
|
|
317
|
+
collect_snippets: bool,
|
|
318
|
+
counters: SearchCounters,
|
|
319
|
+
) -> None:
|
|
320
|
+
record_path_only_match(
|
|
321
|
+
file_matches,
|
|
322
|
+
snippet_matches,
|
|
323
|
+
rel_path,
|
|
324
|
+
mode=mode,
|
|
325
|
+
collect_snippets=collect_snippets,
|
|
326
|
+
)
|
|
327
|
+
counters.path_match_count += 1
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Text decoding, cropping, and snippet context helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .constants import MAX_SNIPPET_CHARS
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def decode_line(line: bytes) -> str:
|
|
9
|
+
"""Decode one raw line for display."""
|
|
10
|
+
|
|
11
|
+
return line.decode("utf-8", errors="replace").rstrip("\r")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def crop(text: str, *, max_chars: int = MAX_SNIPPET_CHARS) -> str:
|
|
15
|
+
"""Compact and crop snippet text."""
|
|
16
|
+
|
|
17
|
+
text = text.replace("\t", " ").strip()
|
|
18
|
+
if len(text) <= max_chars:
|
|
19
|
+
return text
|
|
20
|
+
if max_chars <= 1:
|
|
21
|
+
return text[:max_chars]
|
|
22
|
+
return text[: max_chars - 1].rstrip() + "…"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def context_for_lines(
|
|
26
|
+
lines: list[bytes],
|
|
27
|
+
line_index: int,
|
|
28
|
+
context_lines: int,
|
|
29
|
+
) -> list[dict[str, object]]:
|
|
30
|
+
"""Return cropped context rows around ``line_index``."""
|
|
31
|
+
|
|
32
|
+
if context_lines <= 0:
|
|
33
|
+
return []
|
|
34
|
+
start = max(0, line_index - context_lines)
|
|
35
|
+
end = min(len(lines), line_index + context_lines + 1)
|
|
36
|
+
return [
|
|
37
|
+
{"line": index + 1, "text": crop(decode_line(lines[index]))}
|
|
38
|
+
for index in range(start, end)
|
|
39
|
+
]
|