codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. codetool_explore/__init__.py +35 -0
  2. codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
  3. codetool_explore/api.py +266 -0
  4. codetool_explore/cli.py +188 -0
  5. codetool_explore/compression.py +150 -0
  6. codetool_explore/cursor.py +71 -0
  7. codetool_explore/errors.py +23 -0
  8. codetool_explore/explorer.py +497 -0
  9. codetool_explore/ignore.py +222 -0
  10. codetool_explore/py.typed +0 -0
  11. codetool_explore/python_backend/__init__.py +154 -0
  12. codetool_explore/python_backend/case.py +19 -0
  13. codetool_explore/python_backend/config.py +35 -0
  14. codetool_explore/python_backend/constants.py +39 -0
  15. codetool_explore/python_backend/file_search.py +51 -0
  16. codetool_explore/python_backend/ignore_rules.py +40 -0
  17. codetool_explore/python_backend/literal.py +79 -0
  18. codetool_explore/python_backend/matcher.py +79 -0
  19. codetool_explore/python_backend/models.py +49 -0
  20. codetool_explore/python_backend/output.py +82 -0
  21. codetool_explore/python_backend/regex_search.py +63 -0
  22. codetool_explore/python_backend/search.py +327 -0
  23. codetool_explore/python_backend/text.py +39 -0
  24. codetool_explore/python_backend/walker.py +119 -0
  25. codetool_explore/ranking.py +384 -0
  26. codetool_explore/roots.py +148 -0
  27. codetool_explore/rust_backend.py +308 -0
  28. codetool_explore/text_output.py +475 -0
  29. codetool_explore-0.5.0.dist-info/METADATA +240 -0
  30. codetool_explore-0.5.0.dist-info/RECORD +33 -0
  31. codetool_explore-0.5.0.dist-info/WHEEL +4 -0
  32. codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
  33. codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,119 @@
1
+ """Candidate-file walking for the Python backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from collections.abc import Iterable, Iterator
7
+
8
+ from ..ignore import matches_glob, relative_path, should_ignore_path
9
+ from .ignore_rules import ignore_patterns_for_root
10
+ from .models import CandidateFile
11
+
12
+
13
+ def iter_candidate_files(
14
+ root: str,
15
+ *,
16
+ rel_base: str | None = None,
17
+ glob_patterns: Iterable[str] = (),
18
+ exclude_patterns: Iterable[str] = (),
19
+ ) -> Iterator[CandidateFile]:
20
+ """Yield candidate files for a directory or single-file ``root``."""
21
+
22
+ root_abs = os.path.abspath(os.fspath(root))
23
+ rel_base_abs = os.path.abspath(os.fspath(rel_base)) if rel_base else None
24
+ if os.path.isfile(root_abs):
25
+ yield from _iter_single_file_root(
26
+ root_abs,
27
+ rel_base_abs=rel_base_abs,
28
+ glob_patterns=glob_patterns,
29
+ exclude_patterns=exclude_patterns,
30
+ )
31
+ return
32
+
33
+ base_root = rel_base_abs or root_abs
34
+ ignore_patterns = ignore_patterns_for_root(
35
+ root_abs,
36
+ rel_base_abs=rel_base_abs,
37
+ is_file=False,
38
+ )
39
+ stack = [root_abs]
40
+
41
+ while stack:
42
+ current = stack.pop()
43
+ try:
44
+ with os.scandir(current) as entries:
45
+ for entry in entries:
46
+ try:
47
+ rel_path = relative_path(entry.path, base_root)
48
+ common_rel_path = (
49
+ relative_path(entry.path, root_abs)
50
+ if rel_base_abs is not None
51
+ else rel_path
52
+ )
53
+ if entry.is_dir(follow_symlinks=False):
54
+ if should_ignore_path(
55
+ rel_path,
56
+ is_dir=True,
57
+ exclude_patterns=exclude_patterns,
58
+ ignore_patterns=ignore_patterns.common,
59
+ root_ignore_patterns=ignore_patterns.root,
60
+ common_rel_path=common_rel_path,
61
+ ):
62
+ continue
63
+ stack.append(entry.path)
64
+ elif entry.is_file(follow_symlinks=False):
65
+ if should_ignore_path(
66
+ rel_path,
67
+ is_dir=False,
68
+ exclude_patterns=exclude_patterns,
69
+ ignore_patterns=ignore_patterns.common,
70
+ root_ignore_patterns=ignore_patterns.root,
71
+ common_rel_path=common_rel_path,
72
+ ):
73
+ continue
74
+ if not matches_glob(rel_path, glob_patterns):
75
+ continue
76
+ try:
77
+ stat_result = entry.stat(follow_symlinks=False)
78
+ except OSError:
79
+ continue
80
+ yield CandidateFile(
81
+ entry.path,
82
+ rel_path,
83
+ stat_result.st_size,
84
+ )
85
+ except OSError:
86
+ continue
87
+ except OSError:
88
+ continue
89
+
90
+
91
+ def _iter_single_file_root(
92
+ root_abs: str,
93
+ *,
94
+ rel_base_abs: str | None,
95
+ glob_patterns: Iterable[str],
96
+ exclude_patterns: Iterable[str],
97
+ ) -> Iterator[CandidateFile]:
98
+ filter_root = os.path.dirname(root_abs) or os.curdir
99
+ base_root = rel_base_abs or filter_root
100
+ rel_path = relative_path(root_abs, base_root)
101
+ ignore_patterns = ignore_patterns_for_root(
102
+ root_abs,
103
+ rel_base_abs=rel_base_abs,
104
+ is_file=True,
105
+ )
106
+ if should_ignore_path(
107
+ rel_path,
108
+ is_dir=False,
109
+ exclude_patterns=exclude_patterns,
110
+ ignore_patterns=ignore_patterns.common,
111
+ root_ignore_patterns=ignore_patterns.root,
112
+ common_rel_path=relative_path(root_abs, filter_root),
113
+ ) or not matches_glob(rel_path, glob_patterns):
114
+ return
115
+ try:
116
+ stat_result = os.stat(root_abs)
117
+ except OSError:
118
+ return
119
+ yield CandidateFile(root_abs, rel_path, stat_result.st_size)
@@ -0,0 +1,384 @@
1
+ """Ranking helpers for search results.
2
+
3
+ The ranking favours source files, path relevance, definitions, and concise
4
+ matches. It is intentionally deterministic so pagination remains stable.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ from collections.abc import Mapping
12
+
13
+ SOURCE_EXTENSIONS: frozenset[str] = frozenset(
14
+ {
15
+ ".py",
16
+ ".pyi",
17
+ ".rs",
18
+ ".go",
19
+ ".js",
20
+ ".jsx",
21
+ ".ts",
22
+ ".tsx",
23
+ ".java",
24
+ ".kt",
25
+ ".c",
26
+ ".h",
27
+ ".cc",
28
+ ".cpp",
29
+ ".hpp",
30
+ ".cs",
31
+ ".rb",
32
+ ".php",
33
+ ".swift",
34
+ ".scala",
35
+ ".sh",
36
+ ".bash",
37
+ ".zsh",
38
+ ".fish",
39
+ ".toml",
40
+ ".yaml",
41
+ ".yml",
42
+ ".json",
43
+ ".md",
44
+ ".rst",
45
+ }
46
+ )
47
+
48
+ GENERATED_SEGMENTS: frozenset[str] = frozenset(
49
+ {
50
+ "generated",
51
+ "vendor",
52
+ "vendors",
53
+ "third_party",
54
+ "third-party",
55
+ "coverage",
56
+ "htmlcov",
57
+ "site-packages",
58
+ "dist-packages",
59
+ }
60
+ )
61
+
62
+ TEST_SEGMENTS: frozenset[str] = frozenset(
63
+ {"test", "tests", "spec", "specs", "__tests__"}
64
+ )
65
+
66
+ DEFINITION_PREFIXES: tuple[str, ...] = (
67
+ "def ",
68
+ "async def ",
69
+ "class ",
70
+ "fn ",
71
+ "pub fn ",
72
+ "pub(crate) fn ",
73
+ "pub(super) fn ",
74
+ "async fn ",
75
+ "pub async fn ",
76
+ "pub(crate) async fn ",
77
+ "pub(super) async fn ",
78
+ "unsafe fn ",
79
+ "pub unsafe fn ",
80
+ "pub(crate) unsafe fn ",
81
+ "function ",
82
+ "export function ",
83
+ "export async function ",
84
+ "export default function ",
85
+ "export default async function ",
86
+ "export class ",
87
+ "export default class ",
88
+ "const ",
89
+ "pub const ",
90
+ "pub(crate) const ",
91
+ "pub(super) const ",
92
+ "static ",
93
+ "pub static ",
94
+ "pub(crate) static ",
95
+ "pub(super) static ",
96
+ "let ",
97
+ "export const ",
98
+ "export let ",
99
+ "export var ",
100
+ "var ",
101
+ "type ",
102
+ "pub type ",
103
+ "pub(crate) type ",
104
+ "pub(super) type ",
105
+ "export type ",
106
+ "interface ",
107
+ "export interface ",
108
+ "struct ",
109
+ "pub struct ",
110
+ "pub(crate) struct ",
111
+ "pub(super) struct ",
112
+ "enum ",
113
+ "pub enum ",
114
+ "pub(crate) enum ",
115
+ "pub(super) enum ",
116
+ "trait ",
117
+ "pub trait ",
118
+ "pub(crate) trait ",
119
+ "pub(super) trait ",
120
+ "export enum ",
121
+ "impl ",
122
+ )
123
+
124
+ TEST_INTENT_TERMS: frozenset[str] = frozenset(
125
+ {
126
+ "test",
127
+ "tests",
128
+ "testing",
129
+ "spec",
130
+ "specs",
131
+ "fixture",
132
+ "fixtures",
133
+ "mock",
134
+ "mocks",
135
+ "assert",
136
+ "pytest",
137
+ "unittest",
138
+ }
139
+ )
140
+
141
+
142
+ def _segments(path: str) -> tuple[str, ...]:
143
+ return tuple(
144
+ segment.lower()
145
+ for segment in path.replace(os.sep, "/").replace("\\", "/").split("/")
146
+ if segment
147
+ )
148
+
149
+
150
+ def _basename(path: str) -> str:
151
+ return path.replace(os.sep, "/").replace("\\", "/").rsplit("/", 1)[-1]
152
+
153
+
154
+ def is_generated_path(path: str) -> bool:
155
+ """Return true for generated/vendor/minified-looking paths."""
156
+
157
+ basename = _basename(path).lower()
158
+ return (
159
+ basename.endswith(".min.js")
160
+ or basename.endswith(".map")
161
+ or any(segment in GENERATED_SEGMENTS for segment in _segments(path))
162
+ )
163
+
164
+
165
+ def is_test_path(path: str) -> bool:
166
+ """Return true for common test/spec file paths."""
167
+
168
+ basename = _basename(path).lower()
169
+ stem = basename.rsplit(".", 1)[0]
170
+ return (
171
+ any(segment in TEST_SEGMENTS for segment in _segments(path))
172
+ or stem.startswith("test_")
173
+ or stem.endswith("_test")
174
+ or stem.endswith(".test")
175
+ or stem.endswith(".spec")
176
+ )
177
+
178
+
179
+ def is_source_path(path: str) -> bool:
180
+ """Return true for common source/documentation file extensions."""
181
+
182
+ _, extension = os.path.splitext(_basename(path).lower())
183
+ return extension in SOURCE_EXTENSIONS
184
+
185
+
186
+ def query_mentions_tests(query: str) -> bool:
187
+ """Return true if the query itself appears test/spec oriented."""
188
+
189
+ return any(term in TEST_INTENT_TERMS for term in _query_terms(query))
190
+
191
+
192
+ def path_relevance(path: str, query: str) -> int:
193
+ """Lower score is better for how well ``path`` matches ``query``."""
194
+
195
+ if not query:
196
+ return 50
197
+
198
+ query_lower = query.lower()
199
+ normalised = path.replace(os.sep, "/").replace("\\", "/").lower()
200
+ basename = _basename(normalised)
201
+ stem = basename.rsplit(".", 1)[0]
202
+ segments = _segments(normalised)
203
+
204
+ if stem == query_lower or basename == query_lower:
205
+ return 0
206
+ if stem.startswith(query_lower) or basename.startswith(query_lower):
207
+ return 5
208
+ if query_lower in stem or query_lower in basename:
209
+ return 10
210
+ if any(
211
+ query_lower == segment or segment.startswith(query_lower)
212
+ for segment in segments[:-1]
213
+ ):
214
+ return 15
215
+ if any(query_lower in segment for segment in segments[:-1]):
216
+ return 20
217
+ if query_lower in normalised:
218
+ return 30
219
+
220
+ query_compact = _compact_alnum(query) if _query_allows_compact_variant(query) else ""
221
+ basename_compact = _compact_alnum(basename)
222
+ stem_compact = _compact_alnum(stem)
223
+ if len(query_compact) >= 2 and query_compact in {stem_compact, basename_compact}:
224
+ return 0
225
+ if len(query_compact) >= 2 and (
226
+ stem_compact.startswith(query_compact)
227
+ or basename_compact.startswith(query_compact)
228
+ ):
229
+ return 5
230
+ if len(query_compact) >= 2 and (
231
+ query_compact in stem_compact or query_compact in basename_compact
232
+ ):
233
+ return 10
234
+
235
+ return _term_path_relevance(normalised, stem, segments, query)
236
+
237
+
238
+ def _term_path_relevance(
239
+ normalised: str, stem: str, segments: tuple[str, ...], query: str
240
+ ) -> int:
241
+ terms = _query_terms(query)
242
+ if not terms:
243
+ return 50
244
+
245
+ stem_compact = _compact_alnum(stem)
246
+ path_compact = _compact_alnum(normalised)
247
+ directory_text = "/".join(segments[:-1])
248
+ directory_compact = _compact_alnum(directory_text)
249
+
250
+ stem_hits = sum(_text_matches_term(stem, stem_compact, term) for term in terms)
251
+ directory_hits = sum(
252
+ _text_matches_term(directory_text, directory_compact, term) for term in terms
253
+ )
254
+ path_hits = sum(_text_matches_term(normalised, path_compact, term) for term in terms)
255
+
256
+ if stem_hits == len(terms):
257
+ return 8
258
+ if stem_hits >= 2:
259
+ return 12
260
+ if directory_hits == len(terms):
261
+ return 15
262
+ if path_hits == len(terms):
263
+ return 18
264
+ if stem_hits == 1:
265
+ return 22
266
+ if directory_hits >= 1:
267
+ return 26
268
+ if path_hits >= 1:
269
+ return 35
270
+ return 50
271
+
272
+
273
+ def _text_matches_term(text: str, compact: str, term: str) -> bool:
274
+ return term in text or term in compact
275
+
276
+
277
+ def _query_terms(query: str) -> tuple[str, ...]:
278
+ terms = list(_split_identifier_terms(query))
279
+ if _query_allows_compact_variant(query):
280
+ _append_unique(terms, _compact_alnum(query))
281
+ return tuple(terms)
282
+
283
+
284
+ def _split_identifier_terms(text: str) -> tuple[str, ...]:
285
+ text = _regex_escapes_as_separators(text)
286
+ spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", text)
287
+ spaced = re.sub(r"(?<=[A-Z])(?=[A-Z][a-z])", " ", spaced)
288
+ terms: list[str] = []
289
+ for term in re.split(r"[^0-9A-Za-z]+", spaced):
290
+ _append_unique(terms, term.lower())
291
+ return tuple(terms)
292
+
293
+
294
+ def _regex_escapes_as_separators(text: str) -> str:
295
+ chars: list[str] = []
296
+ index = 0
297
+ while index < len(text):
298
+ character = text[index]
299
+ if character == "\\":
300
+ chars.append(" ")
301
+ index += 1
302
+ if index < len(text):
303
+ if (
304
+ text[index] in {"p", "P"}
305
+ and index + 1 < len(text)
306
+ and text[index + 1] == "{"
307
+ ):
308
+ index += 2
309
+ while index < len(text) and text[index] != "}":
310
+ index += 1
311
+ if index < len(text):
312
+ index += 1
313
+ else:
314
+ index += 1
315
+ continue
316
+
317
+ chars.append(character)
318
+ index += 1
319
+ return "".join(chars)
320
+
321
+
322
+ def _query_allows_compact_variant(query: str) -> bool:
323
+ return all(
324
+ (character.isascii() and character.isalnum()) or character in "_-."
325
+ for character in query
326
+ )
327
+
328
+
329
+ def _compact_alnum(text: str) -> str:
330
+ return "".join(
331
+ character.lower()
332
+ for character in text
333
+ if character.isascii() and character.isalnum()
334
+ )
335
+
336
+
337
+ def _append_unique(terms: list[str], term: str) -> None:
338
+ if len(term) >= 2 and term not in terms:
339
+ terms.append(term)
340
+
341
+
342
+ def definition_bonus(snippet: str) -> int:
343
+ """Return a negative score for definition-like snippets."""
344
+
345
+ stripped = snippet.strip().lower()
346
+ return -10 if stripped.startswith(DEFINITION_PREFIXES) else 0
347
+
348
+
349
+ def file_sort_key(match: Mapping[str, object], query: str) -> tuple[object, ...]:
350
+ """Sort key for file/count mode result rows."""
351
+
352
+ path = str(match.get("path", ""))
353
+ count = int(match.get("count", 0) or 0)
354
+ first_line = int(match.get("first_line", 0) or 0)
355
+ mentions_tests = query_mentions_tests(query)
356
+ return (
357
+ 10 if is_generated_path(path) else 0,
358
+ 0 if is_source_path(path) else 5,
359
+ 5 if is_test_path(path) and not mentions_tests else 0,
360
+ path_relevance(path, query),
361
+ min(count, 20), # fewer matches are often more precise
362
+ first_line,
363
+ len(path),
364
+ path,
365
+ )
366
+
367
+
368
+ def snippet_sort_key(match: Mapping[str, object], query: str) -> tuple[object, ...]:
369
+ """Sort key for snippet mode result rows."""
370
+
371
+ path = str(match.get("path", ""))
372
+ snippet = str(match.get("snippet", ""))
373
+ line = int(match.get("line", 0) or 0)
374
+ mentions_tests = query_mentions_tests(query)
375
+ return (
376
+ 10 if is_generated_path(path) else 0,
377
+ 0 if is_source_path(path) else 5,
378
+ 5 if is_test_path(path) and not mentions_tests else 0,
379
+ path_relevance(path, query),
380
+ definition_bonus(snippet),
381
+ line,
382
+ len(path),
383
+ path,
384
+ )
@@ -0,0 +1,148 @@
1
+ """Root argument normalisation shared by search backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shlex
7
+ from collections.abc import Iterable
8
+ from dataclasses import dataclass
9
+
10
+ from .errors import ExploreArgumentError, ExploreRootError
11
+
12
+
13
+ Pathish = str | os.PathLike[str]
14
+ RootInput = Pathish | Iterable[Pathish]
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class SearchRoot:
19
+ """One validated root path."""
20
+
21
+ raw: str
22
+ abs_path: str
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class NormalizedRoots:
27
+ """Validated root set plus multi-root display/path metadata."""
28
+
29
+ roots: tuple[SearchRoot, ...]
30
+ from_sequence: bool
31
+ rel_base: str | None
32
+ display: str | list[str]
33
+
34
+ @property
35
+ def has_multiple(self) -> bool:
36
+ return len(self.roots) > 1
37
+
38
+
39
+ def _is_root_sequence(value: object) -> bool:
40
+ return isinstance(value, Iterable) and not isinstance(
41
+ value, (str, bytes, os.PathLike)
42
+ )
43
+
44
+
45
+ def _coerce_path(value: object) -> str:
46
+ try:
47
+ path = os.fspath(value)
48
+ except TypeError as exc:
49
+ raise ExploreArgumentError(
50
+ "root must be a path string or a list of path strings"
51
+ ) from exc
52
+ if not isinstance(path, str):
53
+ raise ExploreArgumentError(
54
+ "root must be a path string or a list of path strings"
55
+ )
56
+ return path
57
+
58
+
59
+ def _is_searchable_path(raw_path: str) -> bool:
60
+ abs_path = os.path.abspath(raw_path)
61
+ return os.path.isdir(abs_path) or os.path.isfile(abs_path)
62
+
63
+
64
+ def _strip_matching_quotes(value: str) -> str:
65
+ if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
66
+ return value[1:-1]
67
+ return value
68
+
69
+
70
+ def _split_space_separated_roots(raw_path: str) -> tuple[str, ...] | None:
71
+ """Split a mistaken space-separated root string when it is unambiguous."""
72
+
73
+ if not raw_path.strip() or not any(char.isspace() for char in raw_path):
74
+ return None
75
+ if _is_searchable_path(raw_path):
76
+ return None
77
+ try:
78
+ parts = tuple(
79
+ _strip_matching_quotes(part) for part in shlex.split(raw_path, posix=False)
80
+ )
81
+ except ValueError:
82
+ return None
83
+ if len(parts) < 2 or not all(parts):
84
+ return None
85
+ if not all(_is_searchable_path(part) for part in parts):
86
+ return None
87
+ return parts
88
+
89
+
90
+ def _common_rel_base(abs_roots: tuple[str, ...]) -> str:
91
+ common_inputs = [
92
+ os.path.dirname(path) if os.path.isfile(path) else path for path in abs_roots
93
+ ]
94
+ try:
95
+ common = os.path.commonpath(common_inputs)
96
+ except ValueError:
97
+ common = os.path.abspath(os.curdir)
98
+ if os.path.isfile(common):
99
+ common = os.path.dirname(common)
100
+ return common or os.path.abspath(os.curdir)
101
+
102
+
103
+ def normalize_search_roots(root: RootInput) -> NormalizedRoots:
104
+ """Validate ``root`` as one path or a non-empty iterable of paths."""
105
+
106
+ from_sequence = _is_root_sequence(root)
107
+ if from_sequence:
108
+ raw_values = tuple(root) # type: ignore[arg-type]
109
+ if not raw_values:
110
+ raise ExploreArgumentError("root list must not be empty")
111
+ else:
112
+ allow_implicit_split = isinstance(root, str)
113
+ raw_path = _coerce_path(root)
114
+ split_roots = (
115
+ _split_space_separated_roots(raw_path)
116
+ if allow_implicit_split
117
+ else None
118
+ )
119
+ if split_roots is None:
120
+ raw_values = (raw_path,)
121
+ else:
122
+ raw_values = split_roots
123
+ from_sequence = True
124
+
125
+ raw_paths = tuple(_coerce_path(value) for value in raw_values)
126
+ search_roots: list[SearchRoot] = []
127
+ for raw_path in raw_paths:
128
+ abs_path = os.path.abspath(raw_path)
129
+ if not (os.path.isdir(abs_path) or os.path.isfile(abs_path)):
130
+ if not os.path.exists(abs_path):
131
+ raise ExploreRootError(f"root does not exist: {raw_path!r}")
132
+ raise ExploreRootError(
133
+ f"root is neither a directory nor file: {raw_path!r}"
134
+ )
135
+ search_roots.append(SearchRoot(raw=raw_path, abs_path=abs_path))
136
+
137
+ rel_base = (
138
+ _common_rel_base(tuple(item.abs_path for item in search_roots))
139
+ if len(search_roots) > 1
140
+ else None
141
+ )
142
+ display: str | list[str] = list(raw_paths) if from_sequence else raw_paths[0]
143
+ return NormalizedRoots(
144
+ roots=tuple(search_roots),
145
+ from_sequence=from_sequence,
146
+ rel_base=rel_base,
147
+ display=display,
148
+ )