PyPI - codetool-explore - Versions diffs - 0.5.0__py3-none-win_arm64.whl - Mend

codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

codetool_explore/__init__.py +35 -0
codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
codetool_explore/api.py +266 -0
codetool_explore/cli.py +188 -0
codetool_explore/compression.py +150 -0
codetool_explore/cursor.py +71 -0
codetool_explore/errors.py +23 -0
codetool_explore/explorer.py +497 -0
codetool_explore/ignore.py +222 -0
codetool_explore/py.typed +0 -0
codetool_explore/python_backend/__init__.py +154 -0
codetool_explore/python_backend/case.py +19 -0
codetool_explore/python_backend/config.py +35 -0
codetool_explore/python_backend/constants.py +39 -0
codetool_explore/python_backend/file_search.py +51 -0
codetool_explore/python_backend/ignore_rules.py +40 -0
codetool_explore/python_backend/literal.py +79 -0
codetool_explore/python_backend/matcher.py +79 -0
codetool_explore/python_backend/models.py +49 -0
codetool_explore/python_backend/output.py +82 -0
codetool_explore/python_backend/regex_search.py +63 -0
codetool_explore/python_backend/search.py +327 -0
codetool_explore/python_backend/text.py +39 -0
codetool_explore/python_backend/walker.py +119 -0
codetool_explore/ranking.py +384 -0
codetool_explore/roots.py +148 -0
codetool_explore/rust_backend.py +308 -0
codetool_explore/text_output.py +475 -0
codetool_explore-0.5.0.dist-info/METADATA +240 -0
codetool_explore-0.5.0.dist-info/RECORD +33 -0
codetool_explore-0.5.0.dist-info/WHEEL +4 -0
codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0

codetool_explore/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Fast workspace search for coding-agent tools.
+The main public API is intentionally a single function:
+```
+from codetool_explore import explore
+```
+`explore()` can target file contents, file paths, a content/path union, read-only
+file ranges, or one-level directory listings. Patterns are regexes by default
+for search targets. ``backend="auto"`` dispatches searchable targets to a
+bundled or development Rust CLI accelerator when available, with the pure-Python
+stdlib backend as fallback. Use ``result_format="text"`` for maximally compact
+raw text output. Controlled failures raise ``ExploreError`` subclasses.
+"""
+from __future__ import annotations
+from .api import explore
+from .errors import (
+    ExploreArgumentError,
+    ExploreBackendError,
+    ExploreError,
+    ExplorePatternError,
+    ExploreRootError,
+)
+__all__ = [
+    "explore",
+    "ExploreArgumentError",
+    "ExploreBackendError",
+    "ExploreError",
+    "ExplorePatternError",
+    "ExploreRootError",
+]

codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe ADDED Viewed

Binary file

codetool_explore/api.py ADDED Viewed

@@ -0,0 +1,266 @@
+"""Public API for codetool-explore."""
+from __future__ import annotations
+import os
+import re
+from collections.abc import Iterable
+from .compression import compress_result
+from .errors import ExploreArgumentError, ExploreBackendError, ExplorePatternError
+from .explorer import list_path_target, read_file_target
+from .python_backend import resolve_case, search_python
+from .roots import RootInput
+from .rust_backend import RustBackendUnavailable, find_rust_binary, search_rust
+from .text_output import format_text_result
+BACKENDS = frozenset({"auto", "python", "rust", "native"})
+SEARCH_TARGETS = frozenset({"content", "path", "content_or_path"})
+EXPLORATION_TARGETS = frozenset({"read", "list"})
+TARGETS = SEARCH_TARGETS | EXPLORATION_TARGETS
+RESULT_FORMATS = frozenset({"compressed", "full", "text"})
+RESULT_FORMAT_ALIASES = {
+    "raw": "text",
+    "plain": "text",
+    "plaintext": "text",
+}
+EMPTY_REGEX_SAMPLES = ("", "a", "abc", " a ", "_", "0", "é")
+def _normalize_api_target(target: str) -> str:
+    selected_target = str(target or "content").lower()
+    if selected_target not in TARGETS:
+        raise ExploreArgumentError(
+            "target must be one of: content, path, content_or_path, read, list"
+        )
+    return selected_target
+def _normalize_result_format(
+    result_format: str | None,
+    *,
+    target: str = "content",
+) -> str:
+    default = "text" if target in EXPLORATION_TARGETS else "compressed"
+    selected_format = str(result_format or default).lower()
+    selected_format = RESULT_FORMAT_ALIASES.get(selected_format, selected_format)
+    if selected_format not in RESULT_FORMATS:
+        raise ExploreArgumentError(
+            "result_format must be one of: compressed, full, text"
+        )
+    return selected_format
+def _finalize_result(result: dict[str, object], result_format: str) -> dict[str, object] | str:
+    selected_format = _normalize_result_format(
+        result_format,
+        target=str(result.get("target", "content")),
+    )
+    if selected_format == "full":
+        return result
+    if selected_format == "text":
+        return format_text_result(result)
+    return compress_result(result)
+def _regex_can_produce_empty_match(pattern: str, *, case: str) -> bool:
+    requested_case = str(case or "smart").lower()
+    _, case_sensitive = resolve_case(requested_case, pattern)
+    flags = 0 if case_sensitive else re.IGNORECASE
+    try:
+        compiled = re.compile(pattern, flags)
+    except re.error as exc:
+        raise ExplorePatternError(f"invalid regex: {exc}") from exc
+    return any(
+        any(match.start() == match.end() for match in compiled.finditer(sample))
+        for sample in EMPTY_REGEX_SAMPLES
+    )
+def _materialize_root_for_fallback(root: RootInput) -> RootInput:
+    if isinstance(root, Iterable) and not isinstance(root, (str, bytes, os.PathLike)):
+        return tuple(root)
+    return root
+def explore(
+    pattern: str,
+    root: RootInput = ".",
+    target: str = "content",
+    regex: bool = True,
+    path_scope: str = "path",
+    glob: str | Iterable[str] | None = None,
+    exclude: str | Iterable[str] | None = None,
+    case: str = "smart",
+    mode: str = "files",
+    context_lines: int = 0,
+    limit: int = 50,
+    cursor: str | int | None = None,
+    backend: str = "auto",
+    result_format: str | None = None,
+    start_line: int = 1,
+) -> dict[str, object] | str:
+    """Search, read, or list workspace files below ``root``.
+    ``target`` selects what the pattern is matched against:
+    ``"content"`` searches file contents, ``"path"`` searches relative file
+    paths without opening files, and ``"content_or_path"`` returns files
+    matching either.
+    ``"read"`` treats ``pattern`` as one file path and returns a controlled
+    line range. ``"list"`` treats ``pattern`` as one file/directory path and
+    returns a one-level listing.
+    Patterns are interpreted as regex by default; pass ``regex=False`` for exact
+    literal search. ``root`` may be one directory/file path or a non-empty list
+    of directory/file paths. Multi-root searches report paths relative to the
+    roots' common base, so sibling roots keep prefixes such as ``src/...``.
+    To tolerate common JSON/tool-call mistakes, a whitespace-separated root
+    string is treated as multiple roots only when that exact path does not
+    exist and every split token is an existing file or directory.
+    ``backend="auto"`` prefers the optional Rust CLI accelerator when available
+    and falls back to the pure-Python backend otherwise. Regex searches use the
+    Rust helper when it supports the requested syntax; Python remains the
+    compatibility fallback.
+    Search results are returned in a compact structured shape by default; read
+    results default to plain text and list results default to tree-compressed
+    text. Pass
+    ``result_format="full"`` to receive the pre-compression backend result
+    dictionary unchanged. Pass ``result_format="text"`` (or ``"raw"``) for an
+    RTK-inspired plain-text rendering optimized for token compression.
+    """
+    selected = str(backend or "auto").lower()
+    if selected not in BACKENDS:
+        raise ExploreArgumentError(
+            "backend must be one of: auto, python, rust, native"
+        )
+    normalised_target = _normalize_api_target(target)
+    selected_format = _normalize_result_format(
+        result_format,
+        target=normalised_target,
+    )
+    if normalised_target == "read":
+        result = read_file_target(
+            pattern,
+            root=root,
+            start_line=start_line,
+            limit=limit,
+            cursor=cursor,
+        )
+        if selected != "python":
+            result["backend_requested"] = selected
+        return _finalize_result(result, selected_format)
+    if normalised_target == "list":
+        result = list_path_target(
+            pattern,
+            root=root,
+            glob=glob,
+            exclude=exclude,
+            limit=limit,
+            cursor=cursor,
+        )
+        if selected != "python":
+            result["backend_requested"] = selected
+        return _finalize_result(result, selected_format)
+    if selected == "python":
+        result = search_python(
+            pattern,
+            root=root,
+            regex=regex,
+            target=normalised_target,
+            path_scope=path_scope,
+            glob=glob,
+            exclude=exclude,
+            case=case,
+            mode=mode,
+            context_lines=context_lines,
+            limit=limit,
+            cursor=cursor,
+        )
+        return _finalize_result(result, selected_format)
+    if selected in {"rust", "native"}:
+        result = search_rust(
+            pattern,
+            root=root,
+            regex=regex,
+            target=normalised_target,
+            path_scope=path_scope,
+            glob=glob,
+            exclude=exclude,
+            case=case,
+            mode=mode,
+            context_lines=context_lines,
+            limit=limit,
+            cursor=cursor,
+        )
+        return _finalize_result(result, selected_format)
+    # auto: prefer Rust if discoverable, then fall back to Python. The fallback
+    # preserves Python-regex compatibility for syntax unsupported by Rust's
+    # regex engine.
+    root_for_auto = _materialize_root_for_fallback(root)
+    rust_binary = find_rust_binary()
+    if rust_binary:
+        fallback_reason: str | None = None
+        if (
+            regex
+            and normalised_target in {"content", "content_or_path"}
+            and isinstance(pattern, str)
+            and _regex_can_produce_empty_match(pattern, case=case)
+        ):
+            fallback_reason = (
+                "Rust backend skipped for regex patterns that can match empty "
+                "spans; Python preserves re.finditer count semantics"
+            )
+        if fallback_reason is None:
+            try:
+                result = search_rust(
+                    pattern,
+                    root=root_for_auto,
+                    regex=regex,
+                    target=normalised_target,
+                    path_scope=path_scope,
+                    glob=glob,
+                    exclude=exclude,
+                    case=case,
+                    mode=mode,
+                    context_lines=context_lines,
+                    limit=limit,
+                    cursor=cursor,
+                    binary=rust_binary,
+                )
+                result["backend_requested"] = "auto"
+                return _finalize_result(result, selected_format)
+            except (
+                RustBackendUnavailable,
+                ExploreBackendError,
+                ExplorePatternError,
+                ExploreArgumentError,
+                RuntimeError,
+                ValueError,
+            ) as exc:
+                fallback_reason = str(exc)
+    else:
+        fallback_reason = "Rust backend unavailable"
+    result = search_python(
+        pattern,
+        root=root_for_auto,
+        regex=regex,
+        target=normalised_target,
+        path_scope=path_scope,
+        glob=glob,
+        exclude=exclude,
+        case=case,
+        mode=mode,
+        context_lines=context_lines,
+        limit=limit,
+        cursor=cursor,
+    )
+    result["backend_requested"] = "auto"
+    result["backend_fallback"] = fallback_reason
+    return _finalize_result(result, selected_format)

codetool_explore/cli.py ADDED Viewed

@@ -0,0 +1,188 @@
+"""Command-line interface for codetool-explore."""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from collections.abc import Sequence
+from .api import explore
+from .errors import ExploreError
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="codetool-explore",
+        description=(
+            "Search file contents, file paths, read files, or list directories "
+            "with compact JSON or raw text output."
+        ),
+    )
+    parser.add_argument("pattern", nargs="?", help="text or regex pattern to search")
+    parser.add_argument("path", nargs="?", help="root directory/file (default: .)")
+    parser.add_argument("--pattern", dest="pattern_flag", help="text or regex pattern")
+    action_group = parser.add_mutually_exclusive_group()
+    action_group.add_argument(
+        "--read",
+        dest="read_path",
+        help="read one known file path with controlled line output",
+    )
+    action_group.add_argument(
+        "--list",
+        dest="list_path",
+        help="list one directory level or one file path",
+    )
+    parser.add_argument(
+        "--root",
+        dest="roots",
+        action="append",
+        default=None,
+        help="root directory/file (repeat for multiple roots; default: .)",
+    )
+    parser.add_argument(
+        "--target",
+        choices=("content", "path", "content_or_path", "read", "list"),
+        default="content",
+        help="search target (default: content)",
+    )
+    parser.add_argument(
+        "--path-scope",
+        choices=("path", "basename"),
+        default="path",
+        help=(
+            "path field matched when --target path/content_or_path "
+            "(default: path)"
+        ),
+    )
+    parser.add_argument(
+        "--format",
+        choices=("compressed", "full", "text", "raw", "plain"),
+        default=None,
+        help=(
+            "output format (default: compressed JSON for search, plain text "
+            "for read, tree text for list)"
+        ),
+    )
+    parser.add_argument(
+        "--raw",
+        action="store_true",
+        help='shortcut for --format text; prints "No Match" when empty',
+    )
+    parser.add_argument(
+        "--mode",
+        choices=("files", "snippets", "count"),
+        default="files",
+        help="result mode (default: files)",
+    )
+    parser.add_argument("--context-lines", type=int, default=0)
+    parser.add_argument("--limit", type=int, default=50)
+    parser.add_argument("--cursor")
+    parser.add_argument("--start-line", type=int, default=1)
+    parser.add_argument("--glob", action="append")
+    parser.add_argument("--exclude", action="append")
+    parser.add_argument(
+        "--case",
+        default="smart",
+        choices=(
+            "smart",
+            "sensitive",
+            "case-sensitive",
+            "exact",
+            "insensitive",
+            "ignore",
+            "ignorecase",
+            "case-insensitive",
+            "i",
+        ),
+    )
+    parser.add_argument(
+        "--backend",
+        default="auto",
+        choices=("auto", "python", "rust", "native"),
+    )
+    regex_group = parser.add_mutually_exclusive_group()
+    regex_group.add_argument(
+        "--regex",
+        dest="regex",
+        action="store_true",
+        default=True,
+        help="interpret pattern as regex (default)",
+    )
+    regex_group.add_argument(
+        "-F",
+        "--literal",
+        dest="regex",
+        action="store_false",
+        help="interpret pattern literally",
+    )
+    return parser
+def main(argv: Sequence[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    cli_root = None
+    if args.roots is not None:
+        cli_root = args.roots[0] if len(args.roots) == 1 else args.roots
+    action_target = None
+    if args.read_path is not None:
+        action_target = "read"
+        if args.pattern_flag is not None or args.pattern is not None or args.path is not None:
+            parser.error("--read cannot be combined with positional pattern/path or --pattern")
+        pattern = args.read_path
+        root = cli_root or "."
+    elif args.list_path is not None:
+        action_target = "list"
+        if args.pattern_flag is not None or args.pattern is not None or args.path is not None:
+            parser.error("--list cannot be combined with positional pattern/path or --pattern")
+        pattern = args.list_path
+        root = cli_root or "."
+    elif args.pattern_flag is not None:
+        pattern = args.pattern_flag
+        root = cli_root or args.path or args.pattern or "."
+    else:
+        pattern = args.pattern
+        root = cli_root or args.path or "."
+    if pattern is None:
+        parser.error("missing path" if args.target in {"read", "list"} else "missing pattern")
+    result_format = "text" if args.raw else args.format
+    target = action_target or args.target
+    try:
+        result = explore(
+            pattern,
+            root=root,
+            target=target,
+            regex=args.regex,
+            path_scope=args.path_scope,
+            glob=args.glob,
+            exclude=args.exclude,
+            case=args.case,
+            mode=args.mode,
+            context_lines=args.context_lines,
+            limit=args.limit,
+            cursor=args.cursor,
+            start_line=args.start_line,
+            backend=args.backend,
+            result_format=result_format,
+        )
+    except ExploreError as exc:
+        print(f"codetool-explore: {exc}", file=sys.stderr)
+        return 2
+    if isinstance(result, str):
+        sys.stdout.write(result)
+        if not result.endswith("\n"):
+            sys.stdout.write("\n")
+    else:
+        print(json.dumps(result, sort_keys=True, separators=(",", ":")))
+    return 0
+def run() -> None:
+    raise SystemExit(main(sys.argv[1:]))
+if __name__ == "__main__":
+    run()

codetool_explore/compression.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""Compact result shaping for public search output.
+The search backends return the full, compatibility-preserving result shape.
+This module owns the API output compression layer that runs after any backend
+finishes, keeping location/count/pagination data while abbreviating low-value
+metadata for coding-agent token efficiency.
+"""
+from __future__ import annotations
+def _next_request(result: dict[str, object]) -> dict[str, object] | None:
+    next_cursor = result.get("next_cursor")
+    if next_cursor is None:
+        return None
+    request: dict[str, object] = {
+        "pattern": result.get("pattern"),
+        "root": result.get("root"),
+        "cursor": next_cursor,
+        "limit": result.get("limit"),
+        "backend": result.get("backend_requested", result.get("backend")),
+    }
+    for key in ("target", "path_scope", "mode", "case", "regex", "glob", "exclude"):
+        if key in result:
+            request[key] = result[key]
+    return request
+def _backend_info(result: dict[str, object]) -> dict[str, object]:
+    backend: dict[str, object] = {"selected": result.get("backend")}
+    if "backend_requested" in result:
+        backend["requested"] = result["backend_requested"]
+    if "backend_fallback" in result:
+        backend["fallback"] = result["backend_fallback"]
+    return backend
+def _page_info(result: dict[str, object], returned: int) -> dict[str, object]:
+    page: dict[str, object] = {
+        "returned": result.get("returned", returned),
+        "limit": result.get("limit"),
+        "offset": result.get("offset"),
+        "truncated": result.get("truncated", False),
+        "next_cursor": result.get("next_cursor"),
+    }
+    next_request = _next_request(result)
+    if next_request is not None:
+        page["next_request"] = next_request
+    return page
+def _compress_read_result(result: dict[str, object]) -> dict[str, object]:
+    output: dict[str, object] = {
+        "format": "compressed",
+        "target": "read",
+        "backend": _backend_info(result),
+        "path": result.get("path"),
+        "start_line": result.get("start_line"),
+        "line_count": result.get("line_count", result.get("returned", 0)),
+        "page": _page_info(result, int(result.get("returned", 0) or 0)),
+        "text": result.get("text", ""),
+    }
+    if result.get("content_truncated"):
+        output["content_truncated"] = True
+    return output
+def _compress_list_result(result: dict[str, object]) -> dict[str, object]:
+    entries: list[dict[str, object]] = []
+    for entry in result.get("entries", []):
+        if not isinstance(entry, dict):
+            continue
+        compact: dict[str, object] = {}
+        if "path" in entry:
+            compact["p"] = entry["path"]
+        if "kind" in entry:
+            compact["k"] = entry["kind"]
+        entries.append(compact)
+    return {
+        "format": "compressed",
+        "target": "list",
+        "backend": _backend_info(result),
+        "path": result.get("path"),
+        "page": _page_info(result, len(entries)),
+        "totals": {
+            "entries": result.get("total_entries", 0),
+            "files": result.get("total_files", 0),
+            "dirs": result.get("total_dirs", 0),
+        },
+        "entries": entries,
+    }
+def compress_result(result: dict[str, object]) -> dict[str, object]:
+    """Return the default compact structured search result shape."""
+    target = result.get("target", "content")
+    if target == "read":
+        return _compress_read_result(result)
+    if target == "list":
+        return _compress_list_result(result)
+    mode = str(result.get("mode", "files"))
+    compact_matches: list[dict[str, object]] = []
+    for match in result.get("matches", []):
+        if not isinstance(match, dict):
+            continue
+        compact: dict[str, object] = {}
+        if "path" in match:
+            compact["p"] = match["path"]
+        if mode != "files":
+            if "line" in match:
+                compact["l"] = match["line"]
+            elif "first_line" in match:
+                compact["l"] = match["first_line"]
+            if "count" in match:
+                compact["c"] = match["count"]
+        if "snippet" in match:
+            compact["s"] = match["snippet"]
+        if "context" in match:
+            compact["ctx"] = match["context"]
+        if "match_kind" in match:
+            compact["m"] = match["match_kind"]
+        if "kind" in match and match.get("kind") != "file":
+            compact["k"] = match["kind"]
+        compact_matches.append(compact)
+    totals: dict[str, object] = {
+        "files": result.get("total_files", 0),
+        "matches": result.get("total_matches", 0),
+        "count": result.get("count", 0),
+    }
+    target = result.get("target", "content")
+    if target != "content":
+        totals["path"] = result.get("path_matches", 0)
+        totals["content_files"] = result.get("content_files", 0)
+        totals["content_count"] = result.get("content_count", 0)
+    output: dict[str, object] = {
+        "format": "compressed",
+        "mode": mode,
+        "backend": _backend_info(result),
+        "totals": totals,
+        "page": _page_info(result, len(compact_matches)),
+        "matches": compact_matches,
+    }
+    if target != "content" and "target" in result:
+        output["target"] = result["target"]
+    return output