PyPI - codetool-explore - Versions diffs - 0.5.0__py3-none-win_amd64.whl - Mend

codetool-explore 0.5.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

codetool_explore/__init__.py +35 -0
codetool_explore/_bin/codetool-explore-rust-windows-x86_64.exe +0 -0
codetool_explore/api.py +266 -0
codetool_explore/cli.py +188 -0
codetool_explore/compression.py +150 -0
codetool_explore/cursor.py +71 -0
codetool_explore/errors.py +23 -0
codetool_explore/explorer.py +497 -0
codetool_explore/ignore.py +222 -0
codetool_explore/py.typed +0 -0
codetool_explore/python_backend/__init__.py +154 -0
codetool_explore/python_backend/case.py +19 -0
codetool_explore/python_backend/config.py +35 -0
codetool_explore/python_backend/constants.py +39 -0
codetool_explore/python_backend/file_search.py +51 -0
codetool_explore/python_backend/ignore_rules.py +40 -0
codetool_explore/python_backend/literal.py +79 -0
codetool_explore/python_backend/matcher.py +79 -0
codetool_explore/python_backend/models.py +49 -0
codetool_explore/python_backend/output.py +82 -0
codetool_explore/python_backend/regex_search.py +63 -0
codetool_explore/python_backend/search.py +327 -0
codetool_explore/python_backend/text.py +39 -0
codetool_explore/python_backend/walker.py +119 -0
codetool_explore/ranking.py +384 -0
codetool_explore/roots.py +148 -0
codetool_explore/rust_backend.py +308 -0
codetool_explore/text_output.py +475 -0
codetool_explore-0.5.0.dist-info/METADATA +240 -0
codetool_explore-0.5.0.dist-info/RECORD +33 -0
codetool_explore-0.5.0.dist-info/WHEEL +4 -0
codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0

codetool_explore/cursor.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""Cursor and pagination helpers for compact search result pages."""
+from __future__ import annotations
+from collections.abc import Sequence
+from typing import TypeVar
+from .errors import ExploreArgumentError
+T = TypeVar("T")
+DEFAULT_LIMIT = 50
+MAX_LIMIT = 1_000
+def normalize_limit(limit: int | None) -> int:
+    """Return a safe positive result limit.
+    The public API defaults to 50 results. A hard cap keeps accidental huge
+    responses from flooding an agent context window.
+    """
+    if limit is None:
+        return DEFAULT_LIMIT
+    try:
+        value = int(limit)
+    except (TypeError, ValueError) as exc:
+        raise ExploreArgumentError("limit must be a positive integer") from exc
+    if value <= 0:
+        raise ExploreArgumentError("limit must be a positive integer")
+    return min(value, MAX_LIMIT)
+def decode_cursor(cursor: str | int | None) -> int:
+    """Decode a simple opaque-enough offset cursor.
+    Cursors are decimal offsets on purpose: compact, stable across Python and
+    the std-only Rust CLI, and easy to recover from if a caller logs them.
+    Invalid cursors are treated as the first page instead of failing a search.
+    """
+    if cursor in (None, ""):
+        return 0
+    try:
+        offset = int(str(cursor), 10)
+    except (TypeError, ValueError):
+        return 0
+    return max(0, offset)
+def encode_cursor(offset: int) -> str:
+    """Encode the next result offset as a compact cursor string."""
+    return str(max(0, int(offset)))
+def page_items(
+    items: Sequence[T],
+    *,
+    limit: int | None = DEFAULT_LIMIT,
+    cursor: str | int | None = None,
+) -> tuple[list[T], bool, str | None, int]:
+    """Return ``(page, truncated, next_cursor, offset)`` for ``items``."""
+    safe_limit = normalize_limit(limit)
+    offset = min(decode_cursor(cursor), len(items))
+    end = offset + safe_limit
+    page = list(items[offset:end])
+    truncated = end < len(items)
+    next_cursor = encode_cursor(end) if truncated else None
+    return page, truncated, next_cursor, offset

codetool_explore/errors.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Public exception taxonomy for codetool-explore."""
+from __future__ import annotations
+class ExploreError(Exception):
+    """Base class for controlled codetool-explore failures."""
+class ExploreArgumentError(ExploreError, ValueError):
+    """Raised for invalid public explore arguments."""
+class ExplorePatternError(ExploreArgumentError):
+    """Raised for invalid or unsupported search patterns."""
+class ExploreRootError(ExploreError, OSError):
+    """Raised when the requested root/path cannot be explored."""
+class ExploreBackendError(ExploreError, RuntimeError):
+    """Raised when a selected backend fails at runtime."""

codetool_explore/explorer.py ADDED Viewed

@@ -0,0 +1,497 @@
+"""Read-only workspace exploration targets for the public API."""
+from __future__ import annotations
+import codecs
+import os
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import Any
+from .cursor import normalize_limit, page_items
+from .errors import ExploreArgumentError, ExploreRootError
+from .ignore import (
+    matches_glob,
+    normalize_patterns,
+    normalize_relpath,
+    relative_path,
+    should_ignore_path,
+)
+from .python_backend.constants import binary_check_bytes
+from .python_backend.ignore_rules import ignore_patterns_for_root
+from .roots import RootInput, normalize_search_roots
+MAX_READ_CHARS = 100_000
+READ_CHUNK_BYTES = 8192
+@dataclass(frozen=True)
+class ResolvedExplorePath:
+    """One read/list path resolved against a single root."""
+    query: str
+    abs_path: str
+    display_path: str
+    root_display: str | list[str]
+    display_base: str | None
+    root_base: str
+def read_file_target(
+    path: object,
+    *,
+    root: RootInput = ".",
+    start_line: int = 1,
+    limit: int = 50,
+    cursor: str | int | None = None,
+) -> dict[str, object]:
+    """Return a controlled line range from one text file."""
+    safe_limit = normalize_limit(limit)
+    safe_start_line = _normalize_start_line(start_line, cursor=cursor)
+    resolved = _resolve_explore_path(path, root=root, target="read")
+    if not os.path.exists(resolved.abs_path):
+        raise ExploreRootError(f"file does not exist: {resolved.query!r}")
+    if os.path.isdir(resolved.abs_path):
+        raise ExploreRootError(f"read target is a directory: {resolved.query!r}")
+    if not os.path.isfile(resolved.abs_path):
+        raise ExploreRootError(f"read target is not a file: {resolved.query!r}")
+    _raise_if_binary(resolved.abs_path, resolved.display_path)
+    text, returned_lines, has_more_lines, content_truncated = _read_text_range(
+        resolved.abs_path,
+        display_path=resolved.display_path,
+        start_line=safe_start_line,
+        limit=safe_limit,
+    )
+    next_cursor = (
+        str(safe_start_line + returned_lines)
+        if has_more_lines and returned_lines > 0
+        else None
+    )
+    return {
+        "pattern": resolved.query,
+        "root": resolved.root_display,
+        "target": "read",
+        "mode": "read",
+        "path": resolved.display_path,
+        "start_line": safe_start_line,
+        "limit": safe_limit,
+        "cursor": None if cursor in (None, "") else str(cursor),
+        "text": text,
+        "returned": returned_lines,
+        "line_count": returned_lines,
+        "count": returned_lines,
+        "truncated": bool(has_more_lines or content_truncated),
+        "content_truncated": content_truncated,
+        "next_cursor": next_cursor,
+        "offset": safe_start_line - 1,
+        "backend": "python",
+    }
+def list_path_target(
+    path: object,
+    *,
+    root: RootInput = ".",
+    glob: str | Iterable[str] | None = None,
+    exclude: str | Iterable[str] | None = None,
+    limit: int = 50,
+    cursor: str | int | None = None,
+) -> dict[str, object]:
+    """Return one ls-like page for a file or one directory level."""
+    safe_limit = normalize_limit(limit)
+    resolved = _resolve_explore_path(path, root=root, target="list", allow_empty=True)
+    glob_patterns = normalize_patterns(glob)
+    exclude_patterns = normalize_patterns(exclude)
+    if not os.path.exists(resolved.abs_path):
+        raise ExploreRootError(f"list target does not exist: {resolved.query!r}")
+    if os.path.isfile(resolved.abs_path):
+        entries = _list_single_file(
+            resolved,
+            glob_patterns=glob_patterns,
+            exclude_patterns=exclude_patterns,
+        )
+    elif os.path.isdir(resolved.abs_path):
+        entries = _list_directory(
+            resolved,
+            glob_patterns=glob_patterns,
+            exclude_patterns=exclude_patterns,
+        )
+    else:
+        raise ExploreRootError(
+            f"list target is neither a directory nor file: {resolved.query!r}"
+        )
+    page, truncated, next_cursor, offset = page_items(
+        entries, limit=safe_limit, cursor=cursor
+    )
+    total_files = sum(1 for entry in entries if entry.get("kind") == "file")
+    total_dirs = sum(1 for entry in entries if entry.get("kind") == "dir")
+    result: dict[str, object] = {
+        "pattern": resolved.query,
+        "root": resolved.root_display,
+        "target": "list",
+        "mode": "list",
+        "path": resolved.display_path,
+        "entries": page,
+        "returned": len(page),
+        "total_entries": len(entries),
+        "total_matches": len(entries),
+        "count": len(entries),
+        "total_files": total_files,
+        "total_dirs": total_dirs,
+        "truncated": truncated,
+        "next_cursor": next_cursor,
+        "offset": offset,
+        "limit": safe_limit,
+        "cursor": None if cursor in (None, "") else str(cursor),
+        "backend": "python",
+    }
+    if glob_patterns:
+        result["glob"] = list(glob_patterns)
+    if exclude_patterns:
+        result["exclude"] = list(exclude_patterns)
+    return result
+def _resolve_explore_path(
+    path: object,
+    *,
+    root: RootInput,
+    target: str,
+    allow_empty: bool = False,
+) -> ResolvedExplorePath:
+    root_set = normalize_search_roots(root)
+    if root_set.has_multiple:
+        raise ExploreArgumentError(f"target={target!r} supports one root at a time")
+    try:
+        query = os.fspath(path)
+    except TypeError as exc:
+        raise ExploreArgumentError(f"{target} path must be a string") from exc
+    if not isinstance(query, str):
+        raise ExploreArgumentError(f"{target} path must be a string")
+    if not query:
+        if allow_empty:
+            query = "."
+        else:
+            raise ExploreArgumentError(f"{target} path must not be empty")
+    search_root = root_set.roots[0]
+    root_abs = search_root.abs_path
+    root_base = root_abs if os.path.isdir(root_abs) else os.path.dirname(root_abs)
+    if os.path.isabs(query):
+        abs_path = os.path.abspath(query)
+        display_base = root_base if _is_under(abs_path, root_base) else None
+    else:
+        abs_path = os.path.abspath(os.path.join(root_base, query))
+        display_base = root_base
+    display_path = _display_path(abs_path, display_base)
+    return ResolvedExplorePath(
+        query=query,
+        abs_path=abs_path,
+        display_path=display_path,
+        root_display=root_set.display,
+        display_base=display_base,
+        root_base=root_base,
+    )
+def _normalize_start_line(start_line: int, *, cursor: str | int | None) -> int:
+    if cursor not in (None, ""):
+        try:
+            value = int(str(cursor), 10)
+        except (TypeError, ValueError):
+            return 1
+        return max(1, value)
+    try:
+        value = int(start_line)
+    except (TypeError, ValueError) as exc:
+        raise ExploreArgumentError("start_line must be a positive integer") from exc
+    if value <= 0:
+        raise ExploreArgumentError("start_line must be a positive integer")
+    return value
+def _is_under(path: str, base: str) -> bool:
+    try:
+        common_path = os.path.commonpath(
+            (os.path.abspath(path), os.path.abspath(base))
+        )
+    except ValueError:
+        return False
+    return common_path == os.path.abspath(base)
+def _display_path(path: str, base: str | None) -> str:
+    if base is not None:
+        return relative_path(path, base) or "."
+    return normalize_relpath(os.path.abspath(path)) or "."
+def _raise_if_binary(path: str, display_path: str) -> None:
+    try:
+        with open(path, "rb") as handle:
+            probe = handle.read(binary_check_bytes())
+    except OSError as exc:
+        _raise_read_error(display_path, exc)
+    if b"\x00" in probe:
+        raise ExploreRootError(f"file appears to be binary: {display_path!r}")
+def _read_text_range(
+    path: str,
+    *,
+    display_path: str,
+    start_line: int,
+    limit: int,
+) -> tuple[str, int, bool, bool]:
+    lines: list[str] = []
+    current_parts: list[str] = []
+    output_chars = 0
+    current_line_started = False
+    line_number = 1
+    has_more_lines = False
+    content_truncated = False
+    stopped_early = False
+    def start_output_line() -> bool:
+        nonlocal current_line_started, output_chars, content_truncated
+        if current_line_started:
+            return True
+        if len(lines) >= limit:
+            return False
+        separator_chars = 1 if lines else 0
+        if output_chars + separator_chars > MAX_READ_CHARS:
+            content_truncated = True
+            return False
+        output_chars += separator_chars
+        current_line_started = True
+        return True
+    def append_fragment(fragment: str) -> bool:
+        nonlocal output_chars, content_truncated
+        if not start_output_line():
+            return False
+        remaining = MAX_READ_CHARS - output_chars
+        if remaining <= 0:
+            content_truncated = True
+            return False
+        if len(fragment) > remaining:
+            current_parts.append(fragment[:remaining])
+            output_chars += remaining
+            content_truncated = True
+            return False
+        current_parts.append(fragment)
+        output_chars += len(fragment)
+        return True
+    def finish_output_line() -> None:
+        nonlocal current_line_started, current_parts
+        line = "".join(current_parts)
+        if line.endswith("\r"):
+            line = line[:-1]
+        lines.append(line)
+        current_parts = []
+        current_line_started = False
+    def process_text(text: str) -> bool:
+        nonlocal content_truncated, has_more_lines, line_number
+        while text:
+            if line_number < start_line:
+                newline_index = text.find("\n")
+                if newline_index < 0:
+                    return True
+                line_number += 1
+                text = text[newline_index + 1 :]
+                continue
+            if not current_line_started and len(lines) >= limit:
+                has_more_lines = True
+                return False
+            newline_index = text.find("\n")
+            if newline_index < 0:
+                if not append_fragment(text):
+                    return False
+                return True
+            fragment = text[:newline_index]
+            if fragment and not append_fragment(fragment):
+                return False
+            if not fragment and not start_output_line():
+                has_more_lines = True
+                return False
+            finish_output_line()
+            line_number += 1
+            text = text[newline_index + 1 :]
+        return True
+    try:
+        decoder = codecs.getincrementaldecoder("utf-8")()
+        with open(path, "rb") as handle:
+            while True:
+                chunk = handle.read(READ_CHUNK_BYTES)
+                if not chunk:
+                    break
+                if b"\x00" in chunk:
+                    raise ExploreRootError(
+                        f"file appears to be binary: {display_path!r}"
+                    )
+                text = decoder.decode(chunk)
+                if text and not process_text(text):
+                    stopped_early = True
+                    break
+            else_text = "" if stopped_early else decoder.decode(b"", final=True)
+            if else_text and not (has_more_lines or content_truncated):
+                process_text(else_text)
+        if current_line_started and len(lines) < limit:
+            finish_output_line()
+    except UnicodeDecodeError as exc:
+        raise ExploreRootError(
+            f"file is not valid UTF-8 text: {display_path!r}"
+        ) from exc
+    except ExploreRootError:
+        raise
+    except OSError as exc:
+        _raise_read_error(display_path, exc)
+    return "\n".join(lines), len(lines), has_more_lines, content_truncated
+def _raise_read_error(display_path: str, exc: OSError) -> None:
+    message = getattr(exc, "strerror", None) or str(exc)
+    raise ExploreRootError(f"cannot read file {display_path!r}: {message}") from exc
+def _list_single_file(
+    resolved: ResolvedExplorePath,
+    *,
+    glob_patterns: tuple[str, ...],
+    exclude_patterns: tuple[str, ...],
+) -> list[dict[str, object]]:
+    parent = os.path.dirname(resolved.abs_path) or os.curdir
+    rel_path = _display_path(resolved.abs_path, resolved.display_base)
+    api_ignore_base = _listing_api_ignore_base(resolved, is_file=True)
+    api_ignore_patterns = ignore_patterns_for_root(
+        api_ignore_base,
+        rel_base_abs=None,
+        is_file=False,
+    )
+    local_ignore_patterns = ignore_patterns_for_root(
+        resolved.abs_path,
+        rel_base_abs=None,
+        is_file=True,
+    )
+    if should_ignore_path(
+        rel_path,
+        is_dir=False,
+        exclude_patterns=exclude_patterns,
+        root_ignore_patterns=api_ignore_patterns.root,
+        common_rel_path=relative_path(resolved.abs_path, api_ignore_base),
+    ) or _is_ignored_by_local_listing_patterns(
+        resolved.abs_path,
+        rel_path=rel_path,
+        is_dir=False,
+        local_base=parent,
+        root_ignore_patterns=local_ignore_patterns.root,
+    ) or not matches_glob(rel_path, glob_patterns):
+        return []
+    return [{"path": rel_path, "kind": "file"}]
+def _list_directory(
+    resolved: ResolvedExplorePath,
+    *,
+    glob_patterns: tuple[str, ...],
+    exclude_patterns: tuple[str, ...],
+) -> list[dict[str, object]]:
+    api_ignore_base = _listing_api_ignore_base(resolved, is_file=False)
+    api_ignore_patterns = ignore_patterns_for_root(
+        api_ignore_base,
+        rel_base_abs=None,
+        is_file=False,
+    )
+    local_ignore_patterns = ignore_patterns_for_root(
+        resolved.abs_path,
+        rel_base_abs=None,
+        is_file=False,
+    )
+    entries: list[dict[str, object]] = []
+    try:
+        with os.scandir(resolved.abs_path) as directory_entries:
+            for entry in directory_entries:
+                try:
+                    is_dir = entry.is_dir(follow_symlinks=False)
+                    is_file = entry.is_file(follow_symlinks=False)
+                except OSError:
+                    continue
+                if not is_dir and not is_file:
+                    continue
+                rel_path = _display_path(entry.path, resolved.display_base)
+                if should_ignore_path(
+                    rel_path,
+                    is_dir=is_dir,
+                    exclude_patterns=exclude_patterns,
+                    root_ignore_patterns=api_ignore_patterns.root,
+                    common_rel_path=relative_path(entry.path, api_ignore_base),
+                ) or _is_ignored_by_local_listing_patterns(
+                    entry.path,
+                    rel_path=rel_path,
+                    is_dir=is_dir,
+                    local_base=resolved.abs_path,
+                    root_ignore_patterns=local_ignore_patterns.root,
+                ):
+                    continue
+                if not matches_glob(rel_path, glob_patterns):
+                    continue
+                entries.append(
+                    {
+                        "path": f"{rel_path}/" if is_dir else rel_path,
+                        "kind": "dir" if is_dir else "file",
+                    }
+                )
+    except OSError as exc:
+        message = getattr(exc, "strerror", None) or str(exc)
+        raise ExploreRootError(
+            f"cannot list directory {resolved.display_path!r}: {message}"
+        ) from exc
+    entries.sort(key=_entry_sort_key)
+    return entries
+def _listing_api_ignore_base(resolved: ResolvedExplorePath, *, is_file: bool) -> str:
+    if _is_under(resolved.abs_path, resolved.root_base):
+        return resolved.root_base
+    if is_file:
+        return os.path.dirname(resolved.abs_path) or os.curdir
+    return resolved.abs_path
+def _is_ignored_by_local_listing_patterns(
+    path: str,
+    *,
+    rel_path: str,
+    is_dir: bool,
+    local_base: str,
+    root_ignore_patterns: tuple[str, ...],
+) -> bool:
+    if not root_ignore_patterns:
+        return False
+    return should_ignore_path(
+        rel_path,
+        is_dir=is_dir,
+        root_ignore_patterns=root_ignore_patterns,
+        common_rel_path=relative_path(path, local_base),
+    )
+def _entry_sort_key(entry: dict[str, Any]) -> tuple[str, str]:
+    path = str(entry.get("path", ""))
+    return (path.rstrip("/").casefold(), path)