codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. codetool_explore/__init__.py +35 -0
  2. codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
  3. codetool_explore/api.py +266 -0
  4. codetool_explore/cli.py +188 -0
  5. codetool_explore/compression.py +150 -0
  6. codetool_explore/cursor.py +71 -0
  7. codetool_explore/errors.py +23 -0
  8. codetool_explore/explorer.py +497 -0
  9. codetool_explore/ignore.py +222 -0
  10. codetool_explore/py.typed +0 -0
  11. codetool_explore/python_backend/__init__.py +154 -0
  12. codetool_explore/python_backend/case.py +19 -0
  13. codetool_explore/python_backend/config.py +35 -0
  14. codetool_explore/python_backend/constants.py +39 -0
  15. codetool_explore/python_backend/file_search.py +51 -0
  16. codetool_explore/python_backend/ignore_rules.py +40 -0
  17. codetool_explore/python_backend/literal.py +79 -0
  18. codetool_explore/python_backend/matcher.py +79 -0
  19. codetool_explore/python_backend/models.py +49 -0
  20. codetool_explore/python_backend/output.py +82 -0
  21. codetool_explore/python_backend/regex_search.py +63 -0
  22. codetool_explore/python_backend/search.py +327 -0
  23. codetool_explore/python_backend/text.py +39 -0
  24. codetool_explore/python_backend/walker.py +119 -0
  25. codetool_explore/ranking.py +384 -0
  26. codetool_explore/roots.py +148 -0
  27. codetool_explore/rust_backend.py +308 -0
  28. codetool_explore/text_output.py +475 -0
  29. codetool_explore-0.5.0.dist-info/METADATA +240 -0
  30. codetool_explore-0.5.0.dist-info/RECORD +33 -0
  31. codetool_explore-0.5.0.dist-info/WHEEL +4 -0
  32. codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
  33. codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,79 @@
1
+ """Content and path matcher construction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import TypeAlias
8
+
9
+ from .literal import LiteralMatcher
10
+ from .regex_search import RegexLineMatcher
11
+
12
+ SearchMatcher: TypeAlias = LiteralMatcher | RegexLineMatcher
13
+
14
+
15
+ def build_content_matcher(
16
+ pattern: str,
17
+ *,
18
+ regex: bool,
19
+ case_sensitive: bool,
20
+ ) -> SearchMatcher:
21
+ """Build the matcher used for content search."""
22
+
23
+ if regex:
24
+ flags = 0 if case_sensitive else re.IGNORECASE
25
+ return RegexLineMatcher(re.compile(pattern, flags))
26
+ needle = pattern.encode("utf-8", errors="surrogatepass")
27
+ return LiteralMatcher(needle=needle, case_sensitive=case_sensitive)
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class PathMatcher:
32
+ """Prepared path matcher state."""
33
+
34
+ pattern: str
35
+ regex: bool
36
+ case_sensitive: bool
37
+ compiled: re.Pattern[str] | None = None
38
+
39
+ @classmethod
40
+ def build(
41
+ cls,
42
+ pattern: str,
43
+ *,
44
+ regex: bool,
45
+ case_sensitive: bool,
46
+ ) -> "PathMatcher":
47
+ """Build a path matcher, compiling regexes when requested."""
48
+
49
+ compiled = None
50
+ if regex:
51
+ flags = 0 if case_sensitive else re.IGNORECASE
52
+ compiled = re.compile(pattern, flags)
53
+ return cls(
54
+ pattern=pattern,
55
+ regex=regex,
56
+ case_sensitive=case_sensitive,
57
+ compiled=compiled,
58
+ )
59
+
60
+ def is_match(self, rel_path: str, path_scope: str) -> bool:
61
+ """Return whether ``rel_path`` matches this matcher."""
62
+
63
+ text = path_match_subject(rel_path, path_scope)
64
+ if self.regex:
65
+ assert self.compiled is not None
66
+ return self.compiled.search(text) is not None
67
+ if not self.pattern:
68
+ return True
69
+ if self.case_sensitive:
70
+ return self.pattern in text
71
+ return self.pattern.casefold() in text.casefold()
72
+
73
+
74
+ def path_match_subject(rel_path: str, path_scope: str) -> str:
75
+ """Return the relative path component selected by ``path_scope``."""
76
+
77
+ if path_scope == "basename":
78
+ return rel_path.rsplit("/", 1)[-1]
79
+ return rel_path
@@ -0,0 +1,49 @@
1
+ """Data models shared by the Python backend modules."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ class BinaryFileError(ValueError):
9
+ """Raised internally when a candidate file looks binary."""
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class CandidateFile:
14
+ """A candidate file found by the scanner."""
15
+
16
+ path: str
17
+ rel_path: str
18
+ size: int
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class IgnorePatterns:
23
+ """Ignore patterns split by the path they should match against."""
24
+
25
+ common: tuple[str, ...] = ()
26
+ root: tuple[str, ...] = ()
27
+
28
+
29
+ @dataclass
30
+ class SearchCounters:
31
+ """Mutable counters collected while searching candidates."""
32
+
33
+ scanned_files: int = 0
34
+ scanned_bytes: int = 0
35
+ path_match_count: int = 0
36
+ content_match_count: int = 0
37
+ skipped_binary: int = 0
38
+ skipped_huge: int = 0
39
+ skipped_errors: int = 0
40
+
41
+ @property
42
+ def skipped(self) -> dict[str, int]:
43
+ """Return skipped counters in the public result shape."""
44
+
45
+ return {
46
+ "binary": self.skipped_binary,
47
+ "huge": self.skipped_huge,
48
+ "errors": self.skipped_errors,
49
+ }
@@ -0,0 +1,82 @@
1
+ """Result-shaping helpers for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def path_file_match(
7
+ rel_path: str,
8
+ *,
9
+ mode: str,
10
+ match_kind: str = "path",
11
+ ) -> dict[str, object]:
12
+ """Build a file-level row for a path-only match."""
13
+
14
+ match: dict[str, object] = {
15
+ "path": rel_path,
16
+ "kind": "file",
17
+ "match_kind": match_kind,
18
+ }
19
+ if mode == "count":
20
+ match["count"] = 1
21
+ return match
22
+
23
+
24
+ def record_path_only_match(
25
+ file_matches: list[dict[str, object]],
26
+ snippet_matches: list[dict[str, object]],
27
+ rel_path: str,
28
+ *,
29
+ mode: str,
30
+ collect_snippets: bool,
31
+ ) -> None:
32
+ """Append a path-only match to file/snippet collections."""
33
+
34
+ match = path_file_match(rel_path, mode=mode)
35
+ file_matches.append(match)
36
+ if collect_snippets:
37
+ snippet_matches.append(dict(match))
38
+
39
+
40
+ def mark_snippets_for_content_or_path_target(
41
+ snippets: list[dict[str, object]],
42
+ *,
43
+ path_matches: bool,
44
+ ) -> None:
45
+ """Mark content snippets with combined content/path target metadata."""
46
+
47
+ match_kind = "content_and_path" if path_matches else "content"
48
+ for snippet in snippets:
49
+ snippet["kind"] = "file"
50
+ snippet["match_kind"] = match_kind
51
+
52
+
53
+ def base_result(
54
+ *,
55
+ pattern: str,
56
+ root: str | list[str],
57
+ regex: bool,
58
+ target: str,
59
+ path_scope: str,
60
+ mode: str,
61
+ requested_case: str,
62
+ effective_case: str,
63
+ limit: int,
64
+ cursor: str | int | None,
65
+ ) -> dict[str, object]:
66
+ """Return the stable metadata prefix for a backend result."""
67
+
68
+ result: dict[str, object] = {
69
+ "pattern": pattern,
70
+ "root": root,
71
+ "regex": bool(regex),
72
+ "target": target,
73
+ "mode": mode,
74
+ "case": requested_case,
75
+ "effective_case": effective_case,
76
+ "limit": limit,
77
+ "cursor": None if cursor is None else str(cursor),
78
+ "backend": "python",
79
+ }
80
+ if target != "content":
81
+ result["path_scope"] = path_scope
82
+ return result
@@ -0,0 +1,63 @@
1
+ """Regex content matching for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+
8
+ from .constants import MAX_SNIPPETS_PER_FILE
9
+ from .models import CandidateFile
10
+ from .text import context_for_lines, crop, decode_line
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class RegexLineMatcher:
15
+ """Prepared regex line matcher."""
16
+
17
+ compiled: re.Pattern[str]
18
+
19
+
20
+ def search_regex_file(
21
+ candidate: CandidateFile,
22
+ data: bytes,
23
+ matcher: RegexLineMatcher,
24
+ *,
25
+ context_lines: int,
26
+ collect_snippets: bool,
27
+ ) -> tuple[dict[str, object] | None, list[dict[str, object]]]:
28
+ """Search one file's decoded lines with a compiled regex."""
29
+
30
+ lines = data.splitlines()
31
+
32
+ count = 0
33
+ first_line: int | None = None
34
+ snippets: list[dict[str, object]] = []
35
+
36
+ for index, raw_line in enumerate(lines):
37
+ line = decode_line(raw_line)
38
+ matches = list(matcher.compiled.finditer(line))
39
+ if not matches:
40
+ continue
41
+ count += len(matches)
42
+ line_number = index + 1
43
+ if first_line is None:
44
+ first_line = line_number
45
+ if collect_snippets and len(snippets) < MAX_SNIPPETS_PER_FILE:
46
+ snippet: dict[str, object] = {
47
+ "path": candidate.rel_path,
48
+ "line": line_number,
49
+ "snippet": crop(line),
50
+ }
51
+ context = context_for_lines(lines, index, context_lines)
52
+ if context:
53
+ snippet["context"] = context
54
+ snippets.append(snippet)
55
+
56
+ if count == 0 or first_line is None:
57
+ return None, []
58
+
59
+ return {
60
+ "path": candidate.rel_path,
61
+ "count": count,
62
+ "first_line": first_line,
63
+ }, snippets
@@ -0,0 +1,327 @@
1
+ """Top-level pure-Python backend search orchestration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ from collections.abc import Iterable
8
+
9
+ from ..cursor import normalize_limit, page_items
10
+ from ..errors import ExploreArgumentError, ExplorePatternError
11
+ from ..ignore import normalize_patterns
12
+ from ..ranking import file_sort_key, snippet_sort_key
13
+ from ..roots import RootInput, normalize_search_roots
14
+ from .case import resolve_case
15
+ from .config import normalize_mode, normalize_path_scope, normalize_target
16
+ from .constants import max_file_bytes
17
+ from .file_search import search_file
18
+ from .matcher import PathMatcher, build_content_matcher
19
+ from .models import BinaryFileError, CandidateFile, SearchCounters
20
+ from .output import (
21
+ base_result,
22
+ mark_snippets_for_content_or_path_target,
23
+ record_path_only_match,
24
+ )
25
+ from .walker import iter_candidate_files
26
+
27
+
28
+ def search_python(
29
+ pattern: str,
30
+ root: RootInput = ".",
31
+ *,
32
+ regex: bool = False,
33
+ target: str = "content",
34
+ path_scope: str = "path",
35
+ glob: str | Iterable[str] | None = None,
36
+ exclude: str | Iterable[str] | None = None,
37
+ case: str = "smart",
38
+ mode: str = "files",
39
+ context_lines: int = 0,
40
+ limit: int = 50,
41
+ cursor: str | int | None = None,
42
+ ) -> dict[str, object]:
43
+ """Search file contents, paths, or content/path union using Python."""
44
+
45
+ if not isinstance(pattern, str):
46
+ raise ExploreArgumentError("pattern must be a string")
47
+ normalised_target = normalize_target(target)
48
+ if pattern == "" and normalised_target in {"content", "content_or_path"}:
49
+ raise ExplorePatternError("pattern must not be empty")
50
+
51
+ root_set = normalize_search_roots(root)
52
+
53
+ normalised_mode = normalize_mode(mode)
54
+ if normalised_mode == "snippets" and normalised_target == "path":
55
+ raise ExploreArgumentError(
56
+ "mode='snippets' is not supported for target='path'; "
57
+ "use target='content' or target='content_or_path'"
58
+ )
59
+ safe_limit = normalize_limit(limit)
60
+ safe_context_lines = max(0, int(context_lines or 0))
61
+ requested_case = str(case or "smart").lower()
62
+ effective_case, case_sensitive = resolve_case(requested_case, pattern)
63
+ normalised_path_scope = normalize_path_scope(path_scope)
64
+ glob_patterns = normalize_patterns(glob)
65
+ exclude_patterns = normalize_patterns(exclude)
66
+
67
+ base = base_result(
68
+ pattern=pattern,
69
+ root=root_set.display,
70
+ regex=regex,
71
+ target=normalised_target,
72
+ path_scope=normalised_path_scope,
73
+ mode=normalised_mode,
74
+ requested_case=requested_case,
75
+ effective_case=effective_case,
76
+ limit=safe_limit,
77
+ cursor=cursor,
78
+ )
79
+
80
+ content_enabled = normalised_target in {"content", "content_or_path"}
81
+ path_enabled = normalised_target in {"path", "content_or_path"}
82
+ try:
83
+ content_matcher = (
84
+ build_content_matcher(
85
+ pattern,
86
+ regex=regex,
87
+ case_sensitive=case_sensitive,
88
+ )
89
+ if content_enabled
90
+ else None
91
+ )
92
+ path_matcher = (
93
+ PathMatcher.build(
94
+ pattern,
95
+ regex=regex,
96
+ case_sensitive=case_sensitive,
97
+ )
98
+ if path_enabled
99
+ else None
100
+ )
101
+ except re.error as exc:
102
+ raise ExplorePatternError(f"invalid regex: {exc}") from exc
103
+
104
+ collect_snippets = normalised_mode == "snippets"
105
+ file_matches: list[dict[str, object]] = []
106
+ snippet_matches: list[dict[str, object]] = []
107
+ counters = SearchCounters()
108
+ seen_candidate_paths: set[str] = set()
109
+
110
+ for search_root in root_set.roots:
111
+ candidates = iter_candidate_files(
112
+ search_root.abs_path,
113
+ rel_base=root_set.rel_base,
114
+ glob_patterns=glob_patterns,
115
+ exclude_patterns=exclude_patterns,
116
+ )
117
+ for candidate in candidates:
118
+ candidate_key = os.path.normcase(os.path.abspath(candidate.path))
119
+ if candidate_key in seen_candidate_paths:
120
+ continue
121
+ seen_candidate_paths.add(candidate_key)
122
+ _search_candidate(
123
+ candidate,
124
+ content_matcher=content_matcher,
125
+ path_matcher=path_matcher,
126
+ path_scope=normalised_path_scope,
127
+ target=normalised_target,
128
+ mode=normalised_mode,
129
+ context_lines=safe_context_lines,
130
+ collect_snippets=collect_snippets,
131
+ counters=counters,
132
+ file_matches=file_matches,
133
+ snippet_matches=snippet_matches,
134
+ )
135
+
136
+ file_matches.sort(key=lambda item: file_sort_key(item, pattern))
137
+ snippet_matches.sort(key=lambda item: snippet_sort_key(item, pattern))
138
+
139
+ total_files = len(file_matches)
140
+ content_occurrences = sum(
141
+ int(match["count"])
142
+ for match in file_matches
143
+ if match.get("match_kind") != "path" and "count" in match
144
+ )
145
+ path_only_occurrences = sum(
146
+ int(match.get("count", 1))
147
+ for match in file_matches
148
+ if match.get("match_kind") == "path"
149
+ )
150
+ total_occurrences = (
151
+ content_occurrences
152
+ if normalised_target == "content"
153
+ else content_occurrences + path_only_occurrences
154
+ )
155
+
156
+ if normalised_mode == "snippets":
157
+ all_matches = snippet_matches
158
+ total_matches = len(snippet_matches)
159
+ else:
160
+ all_matches = file_matches
161
+ total_matches = total_files
162
+
163
+ page, truncated, next_cursor, offset = page_items(
164
+ all_matches, limit=safe_limit, cursor=cursor
165
+ )
166
+
167
+ result = {
168
+ **base,
169
+ "matches": page,
170
+ "returned": len(page),
171
+ "total_files": total_files,
172
+ "total_matches": total_matches,
173
+ "count": total_occurrences,
174
+ "content_count": content_occurrences,
175
+ "content_files": counters.content_match_count,
176
+ "path_matches": counters.path_match_count,
177
+ "truncated": truncated,
178
+ "next_cursor": next_cursor,
179
+ "offset": offset,
180
+ "scanned_files": counters.scanned_files,
181
+ "scanned_bytes": counters.scanned_bytes,
182
+ "skipped": counters.skipped,
183
+ }
184
+ if glob_patterns:
185
+ result["glob"] = list(glob_patterns)
186
+ if exclude_patterns:
187
+ result["exclude"] = list(exclude_patterns)
188
+ return result
189
+
190
+
191
+ def _search_candidate(
192
+ candidate: CandidateFile,
193
+ *,
194
+ content_matcher: object | None,
195
+ path_matcher: PathMatcher | None,
196
+ path_scope: str,
197
+ target: str,
198
+ mode: str,
199
+ context_lines: int,
200
+ collect_snippets: bool,
201
+ counters: SearchCounters,
202
+ file_matches: list[dict[str, object]],
203
+ snippet_matches: list[dict[str, object]],
204
+ ) -> None:
205
+ path_matches = (
206
+ path_matcher.is_match(candidate.rel_path, path_scope)
207
+ if path_matcher is not None
208
+ else False
209
+ )
210
+
211
+ if target == "path":
212
+ counters.scanned_files += 1
213
+ if path_matches:
214
+ record_path_only_match(
215
+ file_matches,
216
+ snippet_matches,
217
+ candidate.rel_path,
218
+ mode=mode,
219
+ collect_snippets=False,
220
+ )
221
+ counters.path_match_count += 1
222
+ return
223
+
224
+ if candidate.size > max_file_bytes():
225
+ if target == "content_or_path" and path_matches:
226
+ _record_content_or_path_target_path_fallback(
227
+ file_matches,
228
+ snippet_matches,
229
+ candidate.rel_path,
230
+ mode=mode,
231
+ collect_snippets=collect_snippets,
232
+ counters=counters,
233
+ )
234
+ counters.skipped_huge += 1
235
+ return
236
+
237
+ counters.scanned_files += 1
238
+ counters.scanned_bytes += candidate.size
239
+ try:
240
+ assert content_matcher is not None
241
+ file_match, snippets = search_file(
242
+ candidate,
243
+ content_matcher,
244
+ context_lines=context_lines,
245
+ collect_snippets=collect_snippets,
246
+ )
247
+ except BinaryFileError:
248
+ if target == "content_or_path" and path_matches:
249
+ _record_content_or_path_target_path_fallback(
250
+ file_matches,
251
+ snippet_matches,
252
+ candidate.rel_path,
253
+ mode=mode,
254
+ collect_snippets=collect_snippets,
255
+ counters=counters,
256
+ )
257
+ counters.skipped_binary += 1
258
+ return
259
+ except OverflowError:
260
+ if target == "content_or_path" and path_matches:
261
+ _record_content_or_path_target_path_fallback(
262
+ file_matches,
263
+ snippet_matches,
264
+ candidate.rel_path,
265
+ mode=mode,
266
+ collect_snippets=collect_snippets,
267
+ counters=counters,
268
+ )
269
+ counters.skipped_huge += 1
270
+ return
271
+ except (OSError, UnicodeError, re.error):
272
+ if target == "content_or_path" and path_matches:
273
+ _record_content_or_path_target_path_fallback(
274
+ file_matches,
275
+ snippet_matches,
276
+ candidate.rel_path,
277
+ mode=mode,
278
+ collect_snippets=collect_snippets,
279
+ counters=counters,
280
+ )
281
+ counters.skipped_errors += 1
282
+ return
283
+
284
+ if file_match is None:
285
+ if target == "content_or_path" and path_matches:
286
+ _record_content_or_path_target_path_fallback(
287
+ file_matches,
288
+ snippet_matches,
289
+ candidate.rel_path,
290
+ mode=mode,
291
+ collect_snippets=collect_snippets,
292
+ counters=counters,
293
+ )
294
+ return
295
+
296
+ if target == "content_or_path":
297
+ file_match["kind"] = "file"
298
+ file_match["match_kind"] = "content_and_path" if path_matches else "content"
299
+ if collect_snippets:
300
+ mark_snippets_for_content_or_path_target(
301
+ snippets,
302
+ path_matches=path_matches,
303
+ )
304
+ if path_matches:
305
+ counters.path_match_count += 1
306
+ counters.content_match_count += 1
307
+ file_matches.append(file_match)
308
+ snippet_matches.extend(snippets)
309
+
310
+
311
+ def _record_content_or_path_target_path_fallback(
312
+ file_matches: list[dict[str, object]],
313
+ snippet_matches: list[dict[str, object]],
314
+ rel_path: str,
315
+ *,
316
+ mode: str,
317
+ collect_snippets: bool,
318
+ counters: SearchCounters,
319
+ ) -> None:
320
+ record_path_only_match(
321
+ file_matches,
322
+ snippet_matches,
323
+ rel_path,
324
+ mode=mode,
325
+ collect_snippets=collect_snippets,
326
+ )
327
+ counters.path_match_count += 1
@@ -0,0 +1,39 @@
1
+ """Text decoding, cropping, and snippet context helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .constants import MAX_SNIPPET_CHARS
6
+
7
+
8
+ def decode_line(line: bytes) -> str:
9
+ """Decode one raw line for display."""
10
+
11
+ return line.decode("utf-8", errors="replace").rstrip("\r")
12
+
13
+
14
+ def crop(text: str, *, max_chars: int = MAX_SNIPPET_CHARS) -> str:
15
+ """Compact and crop snippet text."""
16
+
17
+ text = text.replace("\t", " ").strip()
18
+ if len(text) <= max_chars:
19
+ return text
20
+ if max_chars <= 1:
21
+ return text[:max_chars]
22
+ return text[: max_chars - 1].rstrip() + "…"
23
+
24
+
25
+ def context_for_lines(
26
+ lines: list[bytes],
27
+ line_index: int,
28
+ context_lines: int,
29
+ ) -> list[dict[str, object]]:
30
+ """Return cropped context rows around ``line_index``."""
31
+
32
+ if context_lines <= 0:
33
+ return []
34
+ start = max(0, line_index - context_lines)
35
+ end = min(len(lines), line_index + context_lines + 1)
36
+ return [
37
+ {"line": index + 1, "text": crop(decode_line(lines[index]))}
38
+ for index in range(start, end)
39
+ ]