codetool-explore 0.5.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. codetool_explore/__init__.py +35 -0
  2. codetool_explore/_bin/codetool-explore-rust-windows-x86_64.exe +0 -0
  3. codetool_explore/api.py +266 -0
  4. codetool_explore/cli.py +188 -0
  5. codetool_explore/compression.py +150 -0
  6. codetool_explore/cursor.py +71 -0
  7. codetool_explore/errors.py +23 -0
  8. codetool_explore/explorer.py +497 -0
  9. codetool_explore/ignore.py +222 -0
  10. codetool_explore/py.typed +0 -0
  11. codetool_explore/python_backend/__init__.py +154 -0
  12. codetool_explore/python_backend/case.py +19 -0
  13. codetool_explore/python_backend/config.py +35 -0
  14. codetool_explore/python_backend/constants.py +39 -0
  15. codetool_explore/python_backend/file_search.py +51 -0
  16. codetool_explore/python_backend/ignore_rules.py +40 -0
  17. codetool_explore/python_backend/literal.py +79 -0
  18. codetool_explore/python_backend/matcher.py +79 -0
  19. codetool_explore/python_backend/models.py +49 -0
  20. codetool_explore/python_backend/output.py +82 -0
  21. codetool_explore/python_backend/regex_search.py +63 -0
  22. codetool_explore/python_backend/search.py +327 -0
  23. codetool_explore/python_backend/text.py +39 -0
  24. codetool_explore/python_backend/walker.py +119 -0
  25. codetool_explore/ranking.py +384 -0
  26. codetool_explore/roots.py +148 -0
  27. codetool_explore/rust_backend.py +308 -0
  28. codetool_explore/text_output.py +475 -0
  29. codetool_explore-0.5.0.dist-info/METADATA +240 -0
  30. codetool_explore-0.5.0.dist-info/RECORD +33 -0
  31. codetool_explore-0.5.0.dist-info/WHEEL +4 -0
  32. codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
  33. codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,71 @@
1
+ """Cursor and pagination helpers for compact search result pages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+ from typing import TypeVar
7
+
8
+ from .errors import ExploreArgumentError
9
+
10
+ T = TypeVar("T")
11
+
12
+ DEFAULT_LIMIT = 50
13
+ MAX_LIMIT = 1_000
14
+
15
+
16
+ def normalize_limit(limit: int | None) -> int:
17
+ """Return a safe positive result limit.
18
+
19
+ The public API defaults to 50 results. A hard cap keeps accidental huge
20
+ responses from flooding an agent context window.
21
+ """
22
+
23
+ if limit is None:
24
+ return DEFAULT_LIMIT
25
+ try:
26
+ value = int(limit)
27
+ except (TypeError, ValueError) as exc:
28
+ raise ExploreArgumentError("limit must be a positive integer") from exc
29
+ if value <= 0:
30
+ raise ExploreArgumentError("limit must be a positive integer")
31
+ return min(value, MAX_LIMIT)
32
+
33
+
34
+ def decode_cursor(cursor: str | int | None) -> int:
35
+ """Decode a simple opaque-enough offset cursor.
36
+
37
+ Cursors are decimal offsets on purpose: compact, stable across Python and
38
+ the std-only Rust CLI, and easy to recover from if a caller logs them.
39
+ Invalid cursors are treated as the first page instead of failing a search.
40
+ """
41
+
42
+ if cursor in (None, ""):
43
+ return 0
44
+ try:
45
+ offset = int(str(cursor), 10)
46
+ except (TypeError, ValueError):
47
+ return 0
48
+ return max(0, offset)
49
+
50
+
51
+ def encode_cursor(offset: int) -> str:
52
+ """Encode the next result offset as a compact cursor string."""
53
+
54
+ return str(max(0, int(offset)))
55
+
56
+
57
+ def page_items(
58
+ items: Sequence[T],
59
+ *,
60
+ limit: int | None = DEFAULT_LIMIT,
61
+ cursor: str | int | None = None,
62
+ ) -> tuple[list[T], bool, str | None, int]:
63
+ """Return ``(page, truncated, next_cursor, offset)`` for ``items``."""
64
+
65
+ safe_limit = normalize_limit(limit)
66
+ offset = min(decode_cursor(cursor), len(items))
67
+ end = offset + safe_limit
68
+ page = list(items[offset:end])
69
+ truncated = end < len(items)
70
+ next_cursor = encode_cursor(end) if truncated else None
71
+ return page, truncated, next_cursor, offset
@@ -0,0 +1,23 @@
1
+ """Public exception taxonomy for codetool-explore."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class ExploreError(Exception):
7
+ """Base class for controlled codetool-explore failures."""
8
+
9
+
10
+ class ExploreArgumentError(ExploreError, ValueError):
11
+ """Raised for invalid public explore arguments."""
12
+
13
+
14
+ class ExplorePatternError(ExploreArgumentError):
15
+ """Raised for invalid or unsupported search patterns."""
16
+
17
+
18
+ class ExploreRootError(ExploreError, OSError):
19
+ """Raised when the requested root/path cannot be explored."""
20
+
21
+
22
+ class ExploreBackendError(ExploreError, RuntimeError):
23
+ """Raised when a selected backend fails at runtime."""
@@ -0,0 +1,497 @@
1
+ """Read-only workspace exploration targets for the public API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import codecs
6
+ import os
7
+ from collections.abc import Iterable
8
+ from dataclasses import dataclass
9
+ from typing import Any
10
+
11
+ from .cursor import normalize_limit, page_items
12
+ from .errors import ExploreArgumentError, ExploreRootError
13
+ from .ignore import (
14
+ matches_glob,
15
+ normalize_patterns,
16
+ normalize_relpath,
17
+ relative_path,
18
+ should_ignore_path,
19
+ )
20
+ from .python_backend.constants import binary_check_bytes
21
+ from .python_backend.ignore_rules import ignore_patterns_for_root
22
+ from .roots import RootInput, normalize_search_roots
23
+
24
+ MAX_READ_CHARS = 100_000
25
+ READ_CHUNK_BYTES = 8192
26
+
27
+
28
+ @dataclass(frozen=True)
29
+ class ResolvedExplorePath:
30
+ """One read/list path resolved against a single root."""
31
+
32
+ query: str
33
+ abs_path: str
34
+ display_path: str
35
+ root_display: str | list[str]
36
+ display_base: str | None
37
+ root_base: str
38
+
39
+
40
+ def read_file_target(
41
+ path: object,
42
+ *,
43
+ root: RootInput = ".",
44
+ start_line: int = 1,
45
+ limit: int = 50,
46
+ cursor: str | int | None = None,
47
+ ) -> dict[str, object]:
48
+ """Return a controlled line range from one text file."""
49
+
50
+ safe_limit = normalize_limit(limit)
51
+ safe_start_line = _normalize_start_line(start_line, cursor=cursor)
52
+ resolved = _resolve_explore_path(path, root=root, target="read")
53
+
54
+ if not os.path.exists(resolved.abs_path):
55
+ raise ExploreRootError(f"file does not exist: {resolved.query!r}")
56
+ if os.path.isdir(resolved.abs_path):
57
+ raise ExploreRootError(f"read target is a directory: {resolved.query!r}")
58
+ if not os.path.isfile(resolved.abs_path):
59
+ raise ExploreRootError(f"read target is not a file: {resolved.query!r}")
60
+
61
+ _raise_if_binary(resolved.abs_path, resolved.display_path)
62
+ text, returned_lines, has_more_lines, content_truncated = _read_text_range(
63
+ resolved.abs_path,
64
+ display_path=resolved.display_path,
65
+ start_line=safe_start_line,
66
+ limit=safe_limit,
67
+ )
68
+ next_cursor = (
69
+ str(safe_start_line + returned_lines)
70
+ if has_more_lines and returned_lines > 0
71
+ else None
72
+ )
73
+
74
+ return {
75
+ "pattern": resolved.query,
76
+ "root": resolved.root_display,
77
+ "target": "read",
78
+ "mode": "read",
79
+ "path": resolved.display_path,
80
+ "start_line": safe_start_line,
81
+ "limit": safe_limit,
82
+ "cursor": None if cursor in (None, "") else str(cursor),
83
+ "text": text,
84
+ "returned": returned_lines,
85
+ "line_count": returned_lines,
86
+ "count": returned_lines,
87
+ "truncated": bool(has_more_lines or content_truncated),
88
+ "content_truncated": content_truncated,
89
+ "next_cursor": next_cursor,
90
+ "offset": safe_start_line - 1,
91
+ "backend": "python",
92
+ }
93
+
94
+
95
+ def list_path_target(
96
+ path: object,
97
+ *,
98
+ root: RootInput = ".",
99
+ glob: str | Iterable[str] | None = None,
100
+ exclude: str | Iterable[str] | None = None,
101
+ limit: int = 50,
102
+ cursor: str | int | None = None,
103
+ ) -> dict[str, object]:
104
+ """Return one ls-like page for a file or one directory level."""
105
+
106
+ safe_limit = normalize_limit(limit)
107
+ resolved = _resolve_explore_path(path, root=root, target="list", allow_empty=True)
108
+ glob_patterns = normalize_patterns(glob)
109
+ exclude_patterns = normalize_patterns(exclude)
110
+
111
+ if not os.path.exists(resolved.abs_path):
112
+ raise ExploreRootError(f"list target does not exist: {resolved.query!r}")
113
+ if os.path.isfile(resolved.abs_path):
114
+ entries = _list_single_file(
115
+ resolved,
116
+ glob_patterns=glob_patterns,
117
+ exclude_patterns=exclude_patterns,
118
+ )
119
+ elif os.path.isdir(resolved.abs_path):
120
+ entries = _list_directory(
121
+ resolved,
122
+ glob_patterns=glob_patterns,
123
+ exclude_patterns=exclude_patterns,
124
+ )
125
+ else:
126
+ raise ExploreRootError(
127
+ f"list target is neither a directory nor file: {resolved.query!r}"
128
+ )
129
+
130
+ page, truncated, next_cursor, offset = page_items(
131
+ entries, limit=safe_limit, cursor=cursor
132
+ )
133
+ total_files = sum(1 for entry in entries if entry.get("kind") == "file")
134
+ total_dirs = sum(1 for entry in entries if entry.get("kind") == "dir")
135
+
136
+ result: dict[str, object] = {
137
+ "pattern": resolved.query,
138
+ "root": resolved.root_display,
139
+ "target": "list",
140
+ "mode": "list",
141
+ "path": resolved.display_path,
142
+ "entries": page,
143
+ "returned": len(page),
144
+ "total_entries": len(entries),
145
+ "total_matches": len(entries),
146
+ "count": len(entries),
147
+ "total_files": total_files,
148
+ "total_dirs": total_dirs,
149
+ "truncated": truncated,
150
+ "next_cursor": next_cursor,
151
+ "offset": offset,
152
+ "limit": safe_limit,
153
+ "cursor": None if cursor in (None, "") else str(cursor),
154
+ "backend": "python",
155
+ }
156
+ if glob_patterns:
157
+ result["glob"] = list(glob_patterns)
158
+ if exclude_patterns:
159
+ result["exclude"] = list(exclude_patterns)
160
+ return result
161
+
162
+
163
+ def _resolve_explore_path(
164
+ path: object,
165
+ *,
166
+ root: RootInput,
167
+ target: str,
168
+ allow_empty: bool = False,
169
+ ) -> ResolvedExplorePath:
170
+ root_set = normalize_search_roots(root)
171
+ if root_set.has_multiple:
172
+ raise ExploreArgumentError(f"target={target!r} supports one root at a time")
173
+
174
+ try:
175
+ query = os.fspath(path)
176
+ except TypeError as exc:
177
+ raise ExploreArgumentError(f"{target} path must be a string") from exc
178
+ if not isinstance(query, str):
179
+ raise ExploreArgumentError(f"{target} path must be a string")
180
+ if not query:
181
+ if allow_empty:
182
+ query = "."
183
+ else:
184
+ raise ExploreArgumentError(f"{target} path must not be empty")
185
+
186
+ search_root = root_set.roots[0]
187
+ root_abs = search_root.abs_path
188
+ root_base = root_abs if os.path.isdir(root_abs) else os.path.dirname(root_abs)
189
+ if os.path.isabs(query):
190
+ abs_path = os.path.abspath(query)
191
+ display_base = root_base if _is_under(abs_path, root_base) else None
192
+ else:
193
+ abs_path = os.path.abspath(os.path.join(root_base, query))
194
+ display_base = root_base
195
+ display_path = _display_path(abs_path, display_base)
196
+ return ResolvedExplorePath(
197
+ query=query,
198
+ abs_path=abs_path,
199
+ display_path=display_path,
200
+ root_display=root_set.display,
201
+ display_base=display_base,
202
+ root_base=root_base,
203
+ )
204
+
205
+
206
+ def _normalize_start_line(start_line: int, *, cursor: str | int | None) -> int:
207
+ if cursor not in (None, ""):
208
+ try:
209
+ value = int(str(cursor), 10)
210
+ except (TypeError, ValueError):
211
+ return 1
212
+ return max(1, value)
213
+ try:
214
+ value = int(start_line)
215
+ except (TypeError, ValueError) as exc:
216
+ raise ExploreArgumentError("start_line must be a positive integer") from exc
217
+ if value <= 0:
218
+ raise ExploreArgumentError("start_line must be a positive integer")
219
+ return value
220
+
221
+
222
+ def _is_under(path: str, base: str) -> bool:
223
+ try:
224
+ common_path = os.path.commonpath(
225
+ (os.path.abspath(path), os.path.abspath(base))
226
+ )
227
+ except ValueError:
228
+ return False
229
+ return common_path == os.path.abspath(base)
230
+
231
+
232
+ def _display_path(path: str, base: str | None) -> str:
233
+ if base is not None:
234
+ return relative_path(path, base) or "."
235
+ return normalize_relpath(os.path.abspath(path)) or "."
236
+
237
+
238
+ def _raise_if_binary(path: str, display_path: str) -> None:
239
+ try:
240
+ with open(path, "rb") as handle:
241
+ probe = handle.read(binary_check_bytes())
242
+ except OSError as exc:
243
+ _raise_read_error(display_path, exc)
244
+ if b"\x00" in probe:
245
+ raise ExploreRootError(f"file appears to be binary: {display_path!r}")
246
+
247
+
248
+ def _read_text_range(
249
+ path: str,
250
+ *,
251
+ display_path: str,
252
+ start_line: int,
253
+ limit: int,
254
+ ) -> tuple[str, int, bool, bool]:
255
+ lines: list[str] = []
256
+ current_parts: list[str] = []
257
+ output_chars = 0
258
+ current_line_started = False
259
+ line_number = 1
260
+ has_more_lines = False
261
+ content_truncated = False
262
+ stopped_early = False
263
+
264
+ def start_output_line() -> bool:
265
+ nonlocal current_line_started, output_chars, content_truncated
266
+ if current_line_started:
267
+ return True
268
+ if len(lines) >= limit:
269
+ return False
270
+ separator_chars = 1 if lines else 0
271
+ if output_chars + separator_chars > MAX_READ_CHARS:
272
+ content_truncated = True
273
+ return False
274
+ output_chars += separator_chars
275
+ current_line_started = True
276
+ return True
277
+
278
+ def append_fragment(fragment: str) -> bool:
279
+ nonlocal output_chars, content_truncated
280
+ if not start_output_line():
281
+ return False
282
+ remaining = MAX_READ_CHARS - output_chars
283
+ if remaining <= 0:
284
+ content_truncated = True
285
+ return False
286
+ if len(fragment) > remaining:
287
+ current_parts.append(fragment[:remaining])
288
+ output_chars += remaining
289
+ content_truncated = True
290
+ return False
291
+ current_parts.append(fragment)
292
+ output_chars += len(fragment)
293
+ return True
294
+
295
+ def finish_output_line() -> None:
296
+ nonlocal current_line_started, current_parts
297
+ line = "".join(current_parts)
298
+ if line.endswith("\r"):
299
+ line = line[:-1]
300
+ lines.append(line)
301
+ current_parts = []
302
+ current_line_started = False
303
+
304
+ def process_text(text: str) -> bool:
305
+ nonlocal content_truncated, has_more_lines, line_number
306
+ while text:
307
+ if line_number < start_line:
308
+ newline_index = text.find("\n")
309
+ if newline_index < 0:
310
+ return True
311
+ line_number += 1
312
+ text = text[newline_index + 1 :]
313
+ continue
314
+
315
+ if not current_line_started and len(lines) >= limit:
316
+ has_more_lines = True
317
+ return False
318
+
319
+ newline_index = text.find("\n")
320
+ if newline_index < 0:
321
+ if not append_fragment(text):
322
+ return False
323
+ return True
324
+
325
+ fragment = text[:newline_index]
326
+ if fragment and not append_fragment(fragment):
327
+ return False
328
+ if not fragment and not start_output_line():
329
+ has_more_lines = True
330
+ return False
331
+ finish_output_line()
332
+ line_number += 1
333
+ text = text[newline_index + 1 :]
334
+ return True
335
+
336
+ try:
337
+ decoder = codecs.getincrementaldecoder("utf-8")()
338
+ with open(path, "rb") as handle:
339
+ while True:
340
+ chunk = handle.read(READ_CHUNK_BYTES)
341
+ if not chunk:
342
+ break
343
+ if b"\x00" in chunk:
344
+ raise ExploreRootError(
345
+ f"file appears to be binary: {display_path!r}"
346
+ )
347
+ text = decoder.decode(chunk)
348
+ if text and not process_text(text):
349
+ stopped_early = True
350
+ break
351
+ else_text = "" if stopped_early else decoder.decode(b"", final=True)
352
+ if else_text and not (has_more_lines or content_truncated):
353
+ process_text(else_text)
354
+ if current_line_started and len(lines) < limit:
355
+ finish_output_line()
356
+ except UnicodeDecodeError as exc:
357
+ raise ExploreRootError(
358
+ f"file is not valid UTF-8 text: {display_path!r}"
359
+ ) from exc
360
+ except ExploreRootError:
361
+ raise
362
+ except OSError as exc:
363
+ _raise_read_error(display_path, exc)
364
+
365
+ return "\n".join(lines), len(lines), has_more_lines, content_truncated
366
+
367
+
368
+ def _raise_read_error(display_path: str, exc: OSError) -> None:
369
+ message = getattr(exc, "strerror", None) or str(exc)
370
+ raise ExploreRootError(f"cannot read file {display_path!r}: {message}") from exc
371
+
372
+
373
+ def _list_single_file(
374
+ resolved: ResolvedExplorePath,
375
+ *,
376
+ glob_patterns: tuple[str, ...],
377
+ exclude_patterns: tuple[str, ...],
378
+ ) -> list[dict[str, object]]:
379
+ parent = os.path.dirname(resolved.abs_path) or os.curdir
380
+ rel_path = _display_path(resolved.abs_path, resolved.display_base)
381
+ api_ignore_base = _listing_api_ignore_base(resolved, is_file=True)
382
+ api_ignore_patterns = ignore_patterns_for_root(
383
+ api_ignore_base,
384
+ rel_base_abs=None,
385
+ is_file=False,
386
+ )
387
+ local_ignore_patterns = ignore_patterns_for_root(
388
+ resolved.abs_path,
389
+ rel_base_abs=None,
390
+ is_file=True,
391
+ )
392
+ if should_ignore_path(
393
+ rel_path,
394
+ is_dir=False,
395
+ exclude_patterns=exclude_patterns,
396
+ root_ignore_patterns=api_ignore_patterns.root,
397
+ common_rel_path=relative_path(resolved.abs_path, api_ignore_base),
398
+ ) or _is_ignored_by_local_listing_patterns(
399
+ resolved.abs_path,
400
+ rel_path=rel_path,
401
+ is_dir=False,
402
+ local_base=parent,
403
+ root_ignore_patterns=local_ignore_patterns.root,
404
+ ) or not matches_glob(rel_path, glob_patterns):
405
+ return []
406
+ return [{"path": rel_path, "kind": "file"}]
407
+
408
+
409
+ def _list_directory(
410
+ resolved: ResolvedExplorePath,
411
+ *,
412
+ glob_patterns: tuple[str, ...],
413
+ exclude_patterns: tuple[str, ...],
414
+ ) -> list[dict[str, object]]:
415
+ api_ignore_base = _listing_api_ignore_base(resolved, is_file=False)
416
+ api_ignore_patterns = ignore_patterns_for_root(
417
+ api_ignore_base,
418
+ rel_base_abs=None,
419
+ is_file=False,
420
+ )
421
+ local_ignore_patterns = ignore_patterns_for_root(
422
+ resolved.abs_path,
423
+ rel_base_abs=None,
424
+ is_file=False,
425
+ )
426
+ entries: list[dict[str, object]] = []
427
+ try:
428
+ with os.scandir(resolved.abs_path) as directory_entries:
429
+ for entry in directory_entries:
430
+ try:
431
+ is_dir = entry.is_dir(follow_symlinks=False)
432
+ is_file = entry.is_file(follow_symlinks=False)
433
+ except OSError:
434
+ continue
435
+ if not is_dir and not is_file:
436
+ continue
437
+ rel_path = _display_path(entry.path, resolved.display_base)
438
+ if should_ignore_path(
439
+ rel_path,
440
+ is_dir=is_dir,
441
+ exclude_patterns=exclude_patterns,
442
+ root_ignore_patterns=api_ignore_patterns.root,
443
+ common_rel_path=relative_path(entry.path, api_ignore_base),
444
+ ) or _is_ignored_by_local_listing_patterns(
445
+ entry.path,
446
+ rel_path=rel_path,
447
+ is_dir=is_dir,
448
+ local_base=resolved.abs_path,
449
+ root_ignore_patterns=local_ignore_patterns.root,
450
+ ):
451
+ continue
452
+ if not matches_glob(rel_path, glob_patterns):
453
+ continue
454
+ entries.append(
455
+ {
456
+ "path": f"{rel_path}/" if is_dir else rel_path,
457
+ "kind": "dir" if is_dir else "file",
458
+ }
459
+ )
460
+ except OSError as exc:
461
+ message = getattr(exc, "strerror", None) or str(exc)
462
+ raise ExploreRootError(
463
+ f"cannot list directory {resolved.display_path!r}: {message}"
464
+ ) from exc
465
+ entries.sort(key=_entry_sort_key)
466
+ return entries
467
+
468
+
469
+ def _listing_api_ignore_base(resolved: ResolvedExplorePath, *, is_file: bool) -> str:
470
+ if _is_under(resolved.abs_path, resolved.root_base):
471
+ return resolved.root_base
472
+ if is_file:
473
+ return os.path.dirname(resolved.abs_path) or os.curdir
474
+ return resolved.abs_path
475
+
476
+
477
+ def _is_ignored_by_local_listing_patterns(
478
+ path: str,
479
+ *,
480
+ rel_path: str,
481
+ is_dir: bool,
482
+ local_base: str,
483
+ root_ignore_patterns: tuple[str, ...],
484
+ ) -> bool:
485
+ if not root_ignore_patterns:
486
+ return False
487
+ return should_ignore_path(
488
+ rel_path,
489
+ is_dir=is_dir,
490
+ root_ignore_patterns=root_ignore_patterns,
491
+ common_rel_path=relative_path(path, local_base),
492
+ )
493
+
494
+
495
+ def _entry_sort_key(entry: dict[str, Any]) -> tuple[str, str]:
496
+ path = str(entry.get("path", ""))
497
+ return (path.rstrip("/").casefold(), path)