codetool-explore 0.5.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. codetool_explore/__init__.py +35 -0
  2. codetool_explore/_bin/codetool-explore-rust-windows-x86_64.exe +0 -0
  3. codetool_explore/api.py +266 -0
  4. codetool_explore/cli.py +188 -0
  5. codetool_explore/compression.py +150 -0
  6. codetool_explore/cursor.py +71 -0
  7. codetool_explore/errors.py +23 -0
  8. codetool_explore/explorer.py +497 -0
  9. codetool_explore/ignore.py +222 -0
  10. codetool_explore/py.typed +0 -0
  11. codetool_explore/python_backend/__init__.py +154 -0
  12. codetool_explore/python_backend/case.py +19 -0
  13. codetool_explore/python_backend/config.py +35 -0
  14. codetool_explore/python_backend/constants.py +39 -0
  15. codetool_explore/python_backend/file_search.py +51 -0
  16. codetool_explore/python_backend/ignore_rules.py +40 -0
  17. codetool_explore/python_backend/literal.py +79 -0
  18. codetool_explore/python_backend/matcher.py +79 -0
  19. codetool_explore/python_backend/models.py +49 -0
  20. codetool_explore/python_backend/output.py +82 -0
  21. codetool_explore/python_backend/regex_search.py +63 -0
  22. codetool_explore/python_backend/search.py +327 -0
  23. codetool_explore/python_backend/text.py +39 -0
  24. codetool_explore/python_backend/walker.py +119 -0
  25. codetool_explore/ranking.py +384 -0
  26. codetool_explore/roots.py +148 -0
  27. codetool_explore/rust_backend.py +308 -0
  28. codetool_explore/text_output.py +475 -0
  29. codetool_explore-0.5.0.dist-info/METADATA +240 -0
  30. codetool_explore-0.5.0.dist-info/RECORD +33 -0
  31. codetool_explore-0.5.0.dist-info/WHEEL +4 -0
  32. codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
  33. codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,222 @@
1
+ """Ignore, glob, and path-normalisation helpers.
2
+
3
+ The implementation deliberately avoids external dependencies. It is not a full
4
+ Git wildmatch clone; it provides fast common ignores plus simple shell-style
5
+ patterns that are good enough for coding-agent search.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import fnmatch
11
+ import os
12
+ from collections.abc import Iterable
13
+
14
+ COMMON_IGNORED_DIRS: frozenset[str] = frozenset(
15
+ {
16
+ ".git",
17
+ ".hg",
18
+ ".svn",
19
+ ".tox",
20
+ ".nox",
21
+ ".venv",
22
+ "venv",
23
+ "env",
24
+ "__pycache__",
25
+ ".mypy_cache",
26
+ ".pytest_cache",
27
+ ".ruff_cache",
28
+ ".cache",
29
+ "node_modules",
30
+ "bower_components",
31
+ "dist",
32
+ "build",
33
+ "target",
34
+ ".next",
35
+ ".nuxt",
36
+ ".idea",
37
+ ".vscode",
38
+ }
39
+ )
40
+
41
+ COMMON_IGNORED_FILES: frozenset[str] = frozenset(
42
+ {
43
+ ".DS_Store",
44
+ "Thumbs.db",
45
+ }
46
+ )
47
+
48
+ IGNORE_FILES: tuple[str, ...] = (".pbiignore", ".gitignore")
49
+
50
+
51
+ def normalize_relpath(path: str) -> str:
52
+ """Return a compact relative path using forward slashes."""
53
+
54
+ rel = os.fspath(path).replace("\\", "/").replace(os.sep, "/")
55
+ if os.altsep:
56
+ rel = rel.replace(os.altsep, "/")
57
+ while rel.startswith("./"):
58
+ rel = rel[2:]
59
+ return "" if rel == "." else rel
60
+
61
+
62
+ def relative_path(path: str, root: str) -> str:
63
+ """Return ``path`` relative to ``root`` with stable slash separators."""
64
+
65
+ try:
66
+ return normalize_relpath(os.path.relpath(path, root))
67
+ except ValueError:
68
+ # Different drives on Windows; fall back to a normalised absolute path.
69
+ return normalize_relpath(os.path.abspath(path))
70
+
71
+
72
+ def normalize_patterns(patterns: str | Iterable[str] | None) -> tuple[str, ...]:
73
+ """Normalise a public glob/exclude value into a tuple of patterns."""
74
+
75
+ if patterns is None:
76
+ return ()
77
+ if isinstance(patterns, str):
78
+ raw_patterns = (patterns,)
79
+ else:
80
+ raw_patterns = tuple(patterns)
81
+ normalised: list[str] = []
82
+ for pattern in raw_patterns:
83
+ if pattern is None:
84
+ continue
85
+ text = normalize_relpath(str(pattern).strip())
86
+ if not text:
87
+ continue
88
+ normalised.append(text)
89
+ return tuple(normalised)
90
+
91
+
92
+ def load_ignore_patterns(root: str) -> tuple[str, ...]:
93
+ """Load simple root-level patterns from ``.pbiignore`` and ``.gitignore``.
94
+
95
+ Supported syntax is intentionally small: comments and blank lines are
96
+ ignored, leading ``!`` negation is ignored, and trailing slash means the
97
+ directory and everything under it.
98
+ """
99
+
100
+ patterns: list[str] = []
101
+ for filename in IGNORE_FILES:
102
+ path = os.path.join(root, filename)
103
+ try:
104
+ with open(path, encoding="utf-8", errors="replace") as handle:
105
+ lines = handle.readlines()
106
+ except OSError:
107
+ continue
108
+ for line in lines:
109
+ text = line.strip()
110
+ if not text or text.startswith("#") or text.startswith("!"):
111
+ continue
112
+ text = normalize_relpath(text)
113
+ if text.endswith("/"):
114
+ text = text.rstrip("/")
115
+ patterns.extend((text, f"{text}/**"))
116
+ else:
117
+ patterns.append(text)
118
+ return tuple(patterns)
119
+
120
+
121
+ def _path_segments(rel_path: str) -> tuple[str, ...]:
122
+ return tuple(
123
+ segment for segment in normalize_relpath(rel_path).split("/") if segment
124
+ )
125
+
126
+
127
+ def is_common_ignored_dir(name: str) -> bool:
128
+ """Return true when ``name`` is a directory we should hard-prune."""
129
+
130
+ return name in COMMON_IGNORED_DIRS
131
+
132
+
133
+ def is_common_ignored_file(name: str) -> bool:
134
+ """Return true when ``name`` is a common unhelpful file."""
135
+
136
+ return name in COMMON_IGNORED_FILES
137
+
138
+
139
+ def path_matches_pattern(rel_path: str, pattern: str) -> bool:
140
+ """Match a normalised relative path against a simple shell-style pattern."""
141
+
142
+ rel_path = normalize_relpath(rel_path)
143
+ pattern = normalize_relpath(pattern)
144
+ if not pattern:
145
+ return False
146
+
147
+ basename = rel_path.rsplit("/", 1)[-1]
148
+
149
+ if fnmatch.fnmatchcase(rel_path, pattern) or fnmatch.fnmatchcase(basename, pattern):
150
+ return True
151
+
152
+ # Treat bare directory names as "any segment named X".
153
+ if "/" not in pattern and pattern in _path_segments(rel_path):
154
+ return True
155
+
156
+ # Treat "dir" and "dir/**" as directory-prefix patterns.
157
+ prefix = pattern[:-3] if pattern.endswith("/**") else pattern
158
+ if prefix and (rel_path == prefix or rel_path.startswith(prefix.rstrip("/") + "/")):
159
+ return True
160
+
161
+ return False
162
+
163
+
164
+ def path_matches_any(rel_path: str, patterns: Iterable[str]) -> bool:
165
+ """Return true if ``rel_path`` matches any pattern."""
166
+
167
+ return any(path_matches_pattern(rel_path, pattern) for pattern in patterns)
168
+
169
+
170
+ def pattern_targets_path_or_descendant(path: str, pattern: str) -> bool:
171
+ """Return true when a pattern is anchored at ``path`` or its descendants."""
172
+
173
+ path = normalize_relpath(path).strip("/")
174
+ pattern = normalize_relpath(pattern).strip("/")
175
+ if not path or not pattern:
176
+ return False
177
+ return path_matches_pattern(path, pattern) or pattern.startswith(f"{path}/")
178
+
179
+
180
+ def matches_glob(rel_path: str, glob_patterns: Iterable[str]) -> bool:
181
+ """Return true if the path is accepted by the optional glob filters."""
182
+
183
+ patterns = tuple(glob_patterns)
184
+ if not patterns:
185
+ return True
186
+ return path_matches_any(rel_path, patterns)
187
+
188
+
189
+ def should_ignore_path(
190
+ rel_path: str,
191
+ *,
192
+ is_dir: bool,
193
+ exclude_patterns: Iterable[str] = (),
194
+ ignore_patterns: Iterable[str] = (),
195
+ root_ignore_patterns: Iterable[str] = (),
196
+ common_rel_path: str | None = None,
197
+ ) -> bool:
198
+ """Return true when a file/directory should be skipped."""
199
+
200
+ rel_path = normalize_relpath(rel_path)
201
+ common_path = normalize_relpath(
202
+ common_rel_path if common_rel_path is not None else rel_path
203
+ )
204
+ common_name = common_path.rsplit("/", 1)[-1]
205
+ common_segments = _path_segments(common_path)
206
+
207
+ if is_dir:
208
+ if common_name in COMMON_IGNORED_DIRS or any(
209
+ segment in COMMON_IGNORED_DIRS for segment in common_segments
210
+ ):
211
+ return True
212
+ elif common_name in COMMON_IGNORED_FILES:
213
+ return True
214
+
215
+ all_patterns = tuple(exclude_patterns) + tuple(ignore_patterns)
216
+ if all_patterns and path_matches_any(rel_path, all_patterns):
217
+ return True
218
+
219
+ if root_ignore_patterns and path_matches_any(common_path, root_ignore_patterns):
220
+ return True
221
+
222
+ return False
File without changes
@@ -0,0 +1,154 @@
1
+ """Pure-Python stdlib backend for workspace search.
2
+
3
+ The package mirrors the Rust helper's ``rust/src`` organization so equivalent
4
+ backend responsibilities live in equivalent module names:
5
+
6
+ * ``constants`` and ``models`` define shared backend data;
7
+ * ``case`` and ``config`` validate public search options;
8
+ * ``walker`` and ``ignore_rules`` enumerate candidate files;
9
+ * ``matcher``, ``literal``, ``regex_search``, ``file_search``, and ``text``
10
+ perform path/content matching; and
11
+ * ``search`` coordinates ranking, pagination, and result assembly.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+
18
+ from .case import resolve_case
19
+ from .config import normalize_mode, normalize_path_scope, normalize_target
20
+ from .constants import (
21
+ BINARY_CHECK_BYTES,
22
+ MAX_FILE_BYTES,
23
+ MAX_SNIPPETS_PER_FILE,
24
+ MAX_SNIPPET_CHARS,
25
+ VALID_MODES,
26
+ VALID_PATH_SCOPES,
27
+ VALID_TARGETS,
28
+ )
29
+ from .models import BinaryFileError, CandidateFile, IgnorePatterns
30
+ from .file_search import read_text_candidate
31
+ from .ignore_rules import ignore_patterns_for_root
32
+ from .literal import LiteralMatcher, count_non_overlapping, search_literal_file
33
+ from .matcher import PathMatcher, path_match_subject
34
+ from .output import (
35
+ base_result,
36
+ mark_snippets_for_content_or_path_target,
37
+ path_file_match,
38
+ record_path_only_match,
39
+ )
40
+ from .regex_search import RegexLineMatcher, search_regex_file
41
+ from .search import search_python
42
+ from .text import context_for_lines, crop, decode_line
43
+ from .walker import iter_candidate_files
44
+
45
+ __all__ = [
46
+ "BINARY_CHECK_BYTES",
47
+ "MAX_FILE_BYTES",
48
+ "MAX_SNIPPETS_PER_FILE",
49
+ "MAX_SNIPPET_CHARS",
50
+ "VALID_MODES",
51
+ "VALID_PATH_SCOPES",
52
+ "VALID_TARGETS",
53
+ "BinaryFileError",
54
+ "CandidateFile",
55
+ "IgnorePatterns",
56
+ "iter_candidate_files",
57
+ "normalize_mode",
58
+ "normalize_path_scope",
59
+ "normalize_target",
60
+ "resolve_case",
61
+ "search_python",
62
+ ]
63
+
64
+
65
+ def _read_text_candidate(path: str, size: int) -> bytes:
66
+ """Compatibility wrapper for the former single-file module helper."""
67
+
68
+ return read_text_candidate(CandidateFile(path=path, rel_path=path, size=size))
69
+
70
+
71
+ def _search_literal_file(
72
+ path: str,
73
+ rel_path: str,
74
+ size: int,
75
+ *,
76
+ pattern: str,
77
+ needle: bytes,
78
+ case_sensitive: bool,
79
+ context_lines: int,
80
+ collect_snippets: bool,
81
+ ) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
82
+ """Compatibility wrapper for the former literal search helper."""
83
+
84
+ del pattern
85
+ candidate = CandidateFile(path=path, rel_path=rel_path, size=size)
86
+ return search_literal_file(
87
+ candidate,
88
+ read_text_candidate(candidate),
89
+ LiteralMatcher(needle=needle, case_sensitive=case_sensitive),
90
+ context_lines=context_lines,
91
+ collect_snippets=collect_snippets,
92
+ )
93
+
94
+
95
+ def _search_regex_file(
96
+ path: str,
97
+ rel_path: str,
98
+ size: int,
99
+ *,
100
+ compiled: re.Pattern[str],
101
+ context_lines: int,
102
+ collect_snippets: bool,
103
+ ) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
104
+ """Compatibility wrapper for the former regex search helper."""
105
+
106
+ candidate = CandidateFile(path=path, rel_path=rel_path, size=size)
107
+ return search_regex_file(
108
+ candidate,
109
+ read_text_candidate(candidate),
110
+ RegexLineMatcher(compiled=compiled),
111
+ context_lines=context_lines,
112
+ collect_snippets=collect_snippets,
113
+ )
114
+
115
+
116
+ def _path_matches_literal(
117
+ rel_path: str,
118
+ *,
119
+ pattern: str,
120
+ case_sensitive: bool,
121
+ path_scope: str,
122
+ ) -> bool:
123
+ """Compatibility wrapper for the former path-literal helper."""
124
+
125
+ return PathMatcher.build(
126
+ pattern,
127
+ regex=False,
128
+ case_sensitive=case_sensitive,
129
+ ).is_match(rel_path, path_scope)
130
+
131
+
132
+ def _path_matches_regex(
133
+ rel_path: str,
134
+ *,
135
+ compiled: re.Pattern[str],
136
+ path_scope: str,
137
+ ) -> bool:
138
+ """Compatibility wrapper for the former path-regex helper."""
139
+
140
+ return compiled.search(path_match_subject(rel_path, path_scope)) is not None
141
+
142
+
143
+ _base_result = base_result
144
+ _context_for_lines = context_for_lines
145
+ _count_non_overlapping = count_non_overlapping
146
+ _crop = crop
147
+ _decode_line = decode_line
148
+ _ignore_patterns_for_root = ignore_patterns_for_root
149
+ _mark_snippets_for_content_or_path_target = (
150
+ mark_snippets_for_content_or_path_target
151
+ )
152
+ _path_file_match = path_file_match
153
+ _path_match_subject = path_match_subject
154
+ _record_path_only_match = record_path_only_match
@@ -0,0 +1,19 @@
1
+ """Case-sensitivity handling for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..errors import ExploreArgumentError
6
+
7
+
8
+ def resolve_case(case: str, pattern: str) -> tuple[str, bool]:
9
+ """Return ``(effective_case, case_sensitive)`` for a search pattern."""
10
+
11
+ normalised = str(case or "smart").lower()
12
+ if normalised == "smart":
13
+ case_sensitive = any(char.isupper() for char in pattern)
14
+ return "sensitive" if case_sensitive else "insensitive", case_sensitive
15
+ if normalised in {"sensitive", "case-sensitive", "exact"}:
16
+ return "sensitive", True
17
+ if normalised in {"insensitive", "ignore", "ignorecase", "case-insensitive", "i"}:
18
+ return "insensitive", False
19
+ raise ExploreArgumentError("case must be one of: smart, sensitive, insensitive")
@@ -0,0 +1,35 @@
1
+ """Public-option normalization for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ..errors import ExploreArgumentError
6
+ from .constants import VALID_MODES, VALID_PATH_SCOPES, VALID_TARGETS
7
+
8
+
9
+ def normalize_mode(mode: str) -> str:
10
+ """Validate and normalise a public output mode."""
11
+
12
+ normalised = str(mode or "files").lower()
13
+ if normalised not in VALID_MODES:
14
+ raise ExploreArgumentError("mode must be one of: files, snippets, count")
15
+ return normalised
16
+
17
+
18
+ def normalize_target(target: str) -> str:
19
+ """Validate and normalise the public search target."""
20
+
21
+ normalised = str(target or "content").lower()
22
+ if normalised not in VALID_TARGETS:
23
+ raise ExploreArgumentError(
24
+ "target must be one of: content, path, content_or_path"
25
+ )
26
+ return normalised
27
+
28
+
29
+ def normalize_path_scope(path_scope: str) -> str:
30
+ """Validate and normalise the path field matched by path search."""
31
+
32
+ normalised = str(path_scope or "path").lower()
33
+ if normalised not in VALID_PATH_SCOPES:
34
+ raise ExploreArgumentError("path_scope must be one of: path, basename")
35
+ return normalised
@@ -0,0 +1,39 @@
1
+ """Constants used by the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from typing import Any
7
+
8
+ DEFAULT_MAX_FILE_BYTES = 5 * 1024 * 1024
9
+ DEFAULT_BINARY_CHECK_BYTES = 8 * 1024
10
+
11
+ MAX_FILE_BYTES = DEFAULT_MAX_FILE_BYTES
12
+ BINARY_CHECK_BYTES = DEFAULT_BINARY_CHECK_BYTES
13
+ MAX_SNIPPETS_PER_FILE = 3
14
+ MAX_SNIPPET_CHARS = 180
15
+ VALID_MODES = frozenset({"files", "snippets", "count"})
16
+ VALID_TARGETS = frozenset({"content", "path", "content_or_path"})
17
+ VALID_PATH_SCOPES = frozenset({"path", "basename"})
18
+
19
+
20
+ def _runtime_constant(name: str, default: int) -> int:
21
+ """Read a tunable constant, preserving package-level monkeypatch support."""
22
+
23
+ package = sys.modules.get("codetool_explore.python_backend")
24
+ package_value: Any = getattr(package, name, default) if package else default
25
+ module_value: Any = globals().get(name, default)
26
+ value = package_value if package_value != default else module_value
27
+ return int(value)
28
+
29
+
30
+ def max_file_bytes() -> int:
31
+ """Return the active maximum content file size."""
32
+
33
+ return _runtime_constant("MAX_FILE_BYTES", DEFAULT_MAX_FILE_BYTES)
34
+
35
+
36
+ def binary_check_bytes() -> int:
37
+ """Return the active binary-probe byte count."""
38
+
39
+ return _runtime_constant("BINARY_CHECK_BYTES", DEFAULT_BINARY_CHECK_BYTES)
@@ -0,0 +1,51 @@
1
+ """Read text candidates and dispatch content searchers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .constants import binary_check_bytes, max_file_bytes
6
+ from .literal import LiteralMatcher, search_literal_file
7
+ from .matcher import SearchMatcher
8
+ from .models import BinaryFileError, CandidateFile
9
+ from .regex_search import RegexLineMatcher, search_regex_file
10
+
11
+
12
+ def read_text_candidate(candidate: CandidateFile) -> bytes:
13
+ """Read a candidate file after applying huge-file and binary guards."""
14
+
15
+ if candidate.size > max_file_bytes():
16
+ raise OverflowError("huge file")
17
+ with open(candidate.path, "rb") as handle:
18
+ first = handle.read(binary_check_bytes())
19
+ if b"\x00" in first:
20
+ raise BinaryFileError("binary file")
21
+ rest = handle.read()
22
+ return first + rest
23
+
24
+
25
+ def search_file(
26
+ candidate: CandidateFile,
27
+ matcher: SearchMatcher,
28
+ *,
29
+ context_lines: int,
30
+ collect_snippets: bool,
31
+ ) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
32
+ """Search one already-filtered candidate file."""
33
+
34
+ data = read_text_candidate(candidate)
35
+ if isinstance(matcher, LiteralMatcher):
36
+ return search_literal_file(
37
+ candidate,
38
+ data,
39
+ matcher,
40
+ context_lines=context_lines,
41
+ collect_snippets=collect_snippets,
42
+ )
43
+ if isinstance(matcher, RegexLineMatcher):
44
+ return search_regex_file(
45
+ candidate,
46
+ data,
47
+ matcher,
48
+ context_lines=context_lines,
49
+ collect_snippets=collect_snippets,
50
+ )
51
+ raise TypeError(f"unsupported matcher: {type(matcher).__name__}")
@@ -0,0 +1,40 @@
1
+ """Ignore-pattern loading for Python backend candidate walking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+
7
+ from ..ignore import (
8
+ load_ignore_patterns,
9
+ pattern_targets_path_or_descendant,
10
+ relative_path,
11
+ )
12
+ from .models import IgnorePatterns
13
+
14
+
15
+ def ignore_patterns_for_root(
16
+ root_abs: str,
17
+ *,
18
+ rel_base_abs: str | None,
19
+ is_file: bool,
20
+ ) -> IgnorePatterns:
21
+ """Return common-base and root-local ignore patterns for a search root."""
22
+
23
+ filter_root = os.path.dirname(root_abs) if is_file else root_abs
24
+ if rel_base_abs is None:
25
+ return IgnorePatterns(root=load_ignore_patterns(filter_root))
26
+
27
+ root_prefix = relative_path(filter_root, rel_base_abs)
28
+ patterns: list[str] = []
29
+ if os.path.isdir(rel_base_abs):
30
+ patterns.extend(
31
+ pattern
32
+ for pattern in load_ignore_patterns(rel_base_abs)
33
+ if not pattern_targets_path_or_descendant(root_prefix, pattern)
34
+ )
35
+
36
+ root_patterns = load_ignore_patterns(filter_root)
37
+ return IgnorePatterns(
38
+ common=tuple(dict.fromkeys(patterns)),
39
+ root=tuple(dict.fromkeys(root_patterns)),
40
+ )
@@ -0,0 +1,79 @@
1
+ """Literal content matching for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from .constants import MAX_SNIPPETS_PER_FILE
8
+ from .models import CandidateFile
9
+ from .text import context_for_lines, crop, decode_line
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class LiteralMatcher:
14
+ """Prepared literal matcher state."""
15
+
16
+ needle: bytes
17
+ case_sensitive: bool
18
+
19
+
20
+ def count_non_overlapping(haystack: bytes, needle: bytes) -> int:
21
+ """Count non-overlapping byte literal matches."""
22
+
23
+ count = 0
24
+ start = 0
25
+ step = max(1, len(needle))
26
+ while True:
27
+ index = haystack.find(needle, start)
28
+ if index < 0:
29
+ return count
30
+ count += 1
31
+ start = index + step
32
+
33
+
34
+ def search_literal_file(
35
+ candidate: CandidateFile,
36
+ data: bytes,
37
+ matcher: LiteralMatcher,
38
+ *,
39
+ context_lines: int,
40
+ collect_snippets: bool,
41
+ ) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
42
+ """Search one file's bytes for a literal needle."""
43
+
44
+ compare_needle = matcher.needle if matcher.case_sensitive else matcher.needle.lower()
45
+ lines = data.splitlines()
46
+
47
+ count = 0
48
+ first_line: int | None = None
49
+ snippets: list[dict[str, object]] = []
50
+
51
+ for index, line in enumerate(lines):
52
+ compare_line = line if matcher.case_sensitive else line.lower()
53
+ line_count = count_non_overlapping(compare_line, compare_needle)
54
+ if line_count == 0:
55
+ continue
56
+ count += line_count
57
+ line_number = index + 1
58
+ if first_line is None:
59
+ first_line = line_number
60
+ if collect_snippets and len(snippets) < MAX_SNIPPETS_PER_FILE:
61
+ snippet: dict[str, object] = {
62
+ "path": candidate.rel_path,
63
+ "line": line_number,
64
+ "snippet": crop(decode_line(line)),
65
+ }
66
+ context = context_for_lines(lines, index, context_lines)
67
+ if context:
68
+ snippet["context"] = context
69
+ snippets.append(snippet)
70
+
71
+ if count == 0 or first_line is None:
72
+ return None, []
73
+
74
+ file_match: dict[str, object] = {
75
+ "path": candidate.rel_path,
76
+ "count": count,
77
+ "first_line": first_line,
78
+ }
79
+ return file_match, snippets