PyPI - codetool-explore - Versions diffs - 0.5.0__py3-none-win_arm64.whl - Mend

codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

codetool_explore/__init__.py +35 -0
codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
codetool_explore/api.py +266 -0
codetool_explore/cli.py +188 -0
codetool_explore/compression.py +150 -0
codetool_explore/cursor.py +71 -0
codetool_explore/errors.py +23 -0
codetool_explore/explorer.py +497 -0
codetool_explore/ignore.py +222 -0
codetool_explore/py.typed +0 -0
codetool_explore/python_backend/__init__.py +154 -0
codetool_explore/python_backend/case.py +19 -0
codetool_explore/python_backend/config.py +35 -0
codetool_explore/python_backend/constants.py +39 -0
codetool_explore/python_backend/file_search.py +51 -0
codetool_explore/python_backend/ignore_rules.py +40 -0
codetool_explore/python_backend/literal.py +79 -0
codetool_explore/python_backend/matcher.py +79 -0
codetool_explore/python_backend/models.py +49 -0
codetool_explore/python_backend/output.py +82 -0
codetool_explore/python_backend/regex_search.py +63 -0
codetool_explore/python_backend/search.py +327 -0
codetool_explore/python_backend/text.py +39 -0
codetool_explore/python_backend/walker.py +119 -0
codetool_explore/ranking.py +384 -0
codetool_explore/roots.py +148 -0
codetool_explore/rust_backend.py +308 -0
codetool_explore/text_output.py +475 -0
codetool_explore-0.5.0.dist-info/METADATA +240 -0
codetool_explore-0.5.0.dist-info/RECORD +33 -0
codetool_explore-0.5.0.dist-info/WHEEL +4 -0
codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0

codetool_explore/python_backend/walker.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""Candidate-file walking for the Python backend."""
+from __future__ import annotations
+import os
+from collections.abc import Iterable, Iterator
+from ..ignore import matches_glob, relative_path, should_ignore_path
+from .ignore_rules import ignore_patterns_for_root
+from .models import CandidateFile
+def iter_candidate_files(
+    root: str,
+    *,
+    rel_base: str | None = None,
+    glob_patterns: Iterable[str] = (),
+    exclude_patterns: Iterable[str] = (),
+) -> Iterator[CandidateFile]:
+    """Yield candidate files for a directory or single-file ``root``."""
+    root_abs = os.path.abspath(os.fspath(root))
+    rel_base_abs = os.path.abspath(os.fspath(rel_base)) if rel_base else None
+    if os.path.isfile(root_abs):
+        yield from _iter_single_file_root(
+            root_abs,
+            rel_base_abs=rel_base_abs,
+            glob_patterns=glob_patterns,
+            exclude_patterns=exclude_patterns,
+        )
+        return
+    base_root = rel_base_abs or root_abs
+    ignore_patterns = ignore_patterns_for_root(
+        root_abs,
+        rel_base_abs=rel_base_abs,
+        is_file=False,
+    )
+    stack = [root_abs]
+    while stack:
+        current = stack.pop()
+        try:
+            with os.scandir(current) as entries:
+                for entry in entries:
+                    try:
+                        rel_path = relative_path(entry.path, base_root)
+                        common_rel_path = (
+                            relative_path(entry.path, root_abs)
+                            if rel_base_abs is not None
+                            else rel_path
+                        )
+                        if entry.is_dir(follow_symlinks=False):
+                            if should_ignore_path(
+                                rel_path,
+                                is_dir=True,
+                                exclude_patterns=exclude_patterns,
+                                ignore_patterns=ignore_patterns.common,
+                                root_ignore_patterns=ignore_patterns.root,
+                                common_rel_path=common_rel_path,
+                            ):
+                                continue
+                            stack.append(entry.path)
+                        elif entry.is_file(follow_symlinks=False):
+                            if should_ignore_path(
+                                rel_path,
+                                is_dir=False,
+                                exclude_patterns=exclude_patterns,
+                                ignore_patterns=ignore_patterns.common,
+                                root_ignore_patterns=ignore_patterns.root,
+                                common_rel_path=common_rel_path,
+                            ):
+                                continue
+                            if not matches_glob(rel_path, glob_patterns):
+                                continue
+                            try:
+                                stat_result = entry.stat(follow_symlinks=False)
+                            except OSError:
+                                continue
+                            yield CandidateFile(
+                                entry.path,
+                                rel_path,
+                                stat_result.st_size,
+                            )
+                    except OSError:
+                        continue
+        except OSError:
+            continue
+def _iter_single_file_root(
+    root_abs: str,
+    *,
+    rel_base_abs: str | None,
+    glob_patterns: Iterable[str],
+    exclude_patterns: Iterable[str],
+) -> Iterator[CandidateFile]:
+    filter_root = os.path.dirname(root_abs) or os.curdir
+    base_root = rel_base_abs or filter_root
+    rel_path = relative_path(root_abs, base_root)
+    ignore_patterns = ignore_patterns_for_root(
+        root_abs,
+        rel_base_abs=rel_base_abs,
+        is_file=True,
+    )
+    if should_ignore_path(
+        rel_path,
+        is_dir=False,
+        exclude_patterns=exclude_patterns,
+        ignore_patterns=ignore_patterns.common,
+        root_ignore_patterns=ignore_patterns.root,
+        common_rel_path=relative_path(root_abs, filter_root),
+    ) or not matches_glob(rel_path, glob_patterns):
+        return
+    try:
+        stat_result = os.stat(root_abs)
+    except OSError:
+        return
+    yield CandidateFile(root_abs, rel_path, stat_result.st_size)

codetool_explore/ranking.py ADDED Viewed

@@ -0,0 +1,384 @@
+"""Ranking helpers for search results.
+The ranking favours source files, path relevance, definitions, and concise
+matches. It is intentionally deterministic so pagination remains stable.
+"""
+from __future__ import annotations
+import os
+import re
+from collections.abc import Mapping
+SOURCE_EXTENSIONS: frozenset[str] = frozenset(
+    {
+        ".py",
+        ".pyi",
+        ".rs",
+        ".go",
+        ".js",
+        ".jsx",
+        ".ts",
+        ".tsx",
+        ".java",
+        ".kt",
+        ".c",
+        ".h",
+        ".cc",
+        ".cpp",
+        ".hpp",
+        ".cs",
+        ".rb",
+        ".php",
+        ".swift",
+        ".scala",
+        ".sh",
+        ".bash",
+        ".zsh",
+        ".fish",
+        ".toml",
+        ".yaml",
+        ".yml",
+        ".json",
+        ".md",
+        ".rst",
+    }
+)
+GENERATED_SEGMENTS: frozenset[str] = frozenset(
+    {
+        "generated",
+        "vendor",
+        "vendors",
+        "third_party",
+        "third-party",
+        "coverage",
+        "htmlcov",
+        "site-packages",
+        "dist-packages",
+    }
+)
+TEST_SEGMENTS: frozenset[str] = frozenset(
+    {"test", "tests", "spec", "specs", "__tests__"}
+)
+DEFINITION_PREFIXES: tuple[str, ...] = (
+    "def ",
+    "async def ",
+    "class ",
+    "fn ",
+    "pub fn ",
+    "pub(crate) fn ",
+    "pub(super) fn ",
+    "async fn ",
+    "pub async fn ",
+    "pub(crate) async fn ",
+    "pub(super) async fn ",
+    "unsafe fn ",
+    "pub unsafe fn ",
+    "pub(crate) unsafe fn ",
+    "function ",
+    "export function ",
+    "export async function ",
+    "export default function ",
+    "export default async function ",
+    "export class ",
+    "export default class ",
+    "const ",
+    "pub const ",
+    "pub(crate) const ",
+    "pub(super) const ",
+    "static ",
+    "pub static ",
+    "pub(crate) static ",
+    "pub(super) static ",
+    "let ",
+    "export const ",
+    "export let ",
+    "export var ",
+    "var ",
+    "type ",
+    "pub type ",
+    "pub(crate) type ",
+    "pub(super) type ",
+    "export type ",
+    "interface ",
+    "export interface ",
+    "struct ",
+    "pub struct ",
+    "pub(crate) struct ",
+    "pub(super) struct ",
+    "enum ",
+    "pub enum ",
+    "pub(crate) enum ",
+    "pub(super) enum ",
+    "trait ",
+    "pub trait ",
+    "pub(crate) trait ",
+    "pub(super) trait ",
+    "export enum ",
+    "impl ",
+)
+TEST_INTENT_TERMS: frozenset[str] = frozenset(
+    {
+        "test",
+        "tests",
+        "testing",
+        "spec",
+        "specs",
+        "fixture",
+        "fixtures",
+        "mock",
+        "mocks",
+        "assert",
+        "pytest",
+        "unittest",
+    }
+)
+def _segments(path: str) -> tuple[str, ...]:
+    return tuple(
+        segment.lower()
+        for segment in path.replace(os.sep, "/").replace("\\", "/").split("/")
+        if segment
+    )
+def _basename(path: str) -> str:
+    return path.replace(os.sep, "/").replace("\\", "/").rsplit("/", 1)[-1]
+def is_generated_path(path: str) -> bool:
+    """Return true for generated/vendor/minified-looking paths."""
+    basename = _basename(path).lower()
+    return (
+        basename.endswith(".min.js")
+        or basename.endswith(".map")
+        or any(segment in GENERATED_SEGMENTS for segment in _segments(path))
+    )
+def is_test_path(path: str) -> bool:
+    """Return true for common test/spec file paths."""
+    basename = _basename(path).lower()
+    stem = basename.rsplit(".", 1)[0]
+    return (
+        any(segment in TEST_SEGMENTS for segment in _segments(path))
+        or stem.startswith("test_")
+        or stem.endswith("_test")
+        or stem.endswith(".test")
+        or stem.endswith(".spec")
+    )
+def is_source_path(path: str) -> bool:
+    """Return true for common source/documentation file extensions."""
+    _, extension = os.path.splitext(_basename(path).lower())
+    return extension in SOURCE_EXTENSIONS
+def query_mentions_tests(query: str) -> bool:
+    """Return true if the query itself appears test/spec oriented."""
+    return any(term in TEST_INTENT_TERMS for term in _query_terms(query))
+def path_relevance(path: str, query: str) -> int:
+    """Lower score is better for how well ``path`` matches ``query``."""
+    if not query:
+        return 50
+    query_lower = query.lower()
+    normalised = path.replace(os.sep, "/").replace("\\", "/").lower()
+    basename = _basename(normalised)
+    stem = basename.rsplit(".", 1)[0]
+    segments = _segments(normalised)
+    if stem == query_lower or basename == query_lower:
+        return 0
+    if stem.startswith(query_lower) or basename.startswith(query_lower):
+        return 5
+    if query_lower in stem or query_lower in basename:
+        return 10
+    if any(
+        query_lower == segment or segment.startswith(query_lower)
+        for segment in segments[:-1]
+    ):
+        return 15
+    if any(query_lower in segment for segment in segments[:-1]):
+        return 20
+    if query_lower in normalised:
+        return 30
+    query_compact = _compact_alnum(query) if _query_allows_compact_variant(query) else ""
+    basename_compact = _compact_alnum(basename)
+    stem_compact = _compact_alnum(stem)
+    if len(query_compact) >= 2 and query_compact in {stem_compact, basename_compact}:
+        return 0
+    if len(query_compact) >= 2 and (
+        stem_compact.startswith(query_compact)
+        or basename_compact.startswith(query_compact)
+    ):
+        return 5
+    if len(query_compact) >= 2 and (
+        query_compact in stem_compact or query_compact in basename_compact
+    ):
+        return 10
+    return _term_path_relevance(normalised, stem, segments, query)
+def _term_path_relevance(
+    normalised: str, stem: str, segments: tuple[str, ...], query: str
+) -> int:
+    terms = _query_terms(query)
+    if not terms:
+        return 50
+    stem_compact = _compact_alnum(stem)
+    path_compact = _compact_alnum(normalised)
+    directory_text = "/".join(segments[:-1])
+    directory_compact = _compact_alnum(directory_text)
+    stem_hits = sum(_text_matches_term(stem, stem_compact, term) for term in terms)
+    directory_hits = sum(
+        _text_matches_term(directory_text, directory_compact, term) for term in terms
+    )
+    path_hits = sum(_text_matches_term(normalised, path_compact, term) for term in terms)
+    if stem_hits == len(terms):
+        return 8
+    if stem_hits >= 2:
+        return 12
+    if directory_hits == len(terms):
+        return 15
+    if path_hits == len(terms):
+        return 18
+    if stem_hits == 1:
+        return 22
+    if directory_hits >= 1:
+        return 26
+    if path_hits >= 1:
+        return 35
+    return 50
+def _text_matches_term(text: str, compact: str, term: str) -> bool:
+    return term in text or term in compact
+def _query_terms(query: str) -> tuple[str, ...]:
+    terms = list(_split_identifier_terms(query))
+    if _query_allows_compact_variant(query):
+        _append_unique(terms, _compact_alnum(query))
+    return tuple(terms)
+def _split_identifier_terms(text: str) -> tuple[str, ...]:
+    text = _regex_escapes_as_separators(text)
+    spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", text)
+    spaced = re.sub(r"(?<=[A-Z])(?=[A-Z][a-z])", " ", spaced)
+    terms: list[str] = []
+    for term in re.split(r"[^0-9A-Za-z]+", spaced):
+        _append_unique(terms, term.lower())
+    return tuple(terms)
+def _regex_escapes_as_separators(text: str) -> str:
+    chars: list[str] = []
+    index = 0
+    while index < len(text):
+        character = text[index]
+        if character == "\\":
+            chars.append(" ")
+            index += 1
+            if index < len(text):
+                if (
+                    text[index] in {"p", "P"}
+                    and index + 1 < len(text)
+                    and text[index + 1] == "{"
+                ):
+                    index += 2
+                    while index < len(text) and text[index] != "}":
+                        index += 1
+                    if index < len(text):
+                        index += 1
+                else:
+                    index += 1
+            continue
+        chars.append(character)
+        index += 1
+    return "".join(chars)
+def _query_allows_compact_variant(query: str) -> bool:
+    return all(
+        (character.isascii() and character.isalnum()) or character in "_-."
+        for character in query
+    )
+def _compact_alnum(text: str) -> str:
+    return "".join(
+        character.lower()
+        for character in text
+        if character.isascii() and character.isalnum()
+    )
+def _append_unique(terms: list[str], term: str) -> None:
+    if len(term) >= 2 and term not in terms:
+        terms.append(term)
+def definition_bonus(snippet: str) -> int:
+    """Return a negative score for definition-like snippets."""
+    stripped = snippet.strip().lower()
+    return -10 if stripped.startswith(DEFINITION_PREFIXES) else 0
+def file_sort_key(match: Mapping[str, object], query: str) -> tuple[object, ...]:
+    """Sort key for file/count mode result rows."""
+    path = str(match.get("path", ""))
+    count = int(match.get("count", 0) or 0)
+    first_line = int(match.get("first_line", 0) or 0)
+    mentions_tests = query_mentions_tests(query)
+    return (
+        10 if is_generated_path(path) else 0,
+        0 if is_source_path(path) else 5,
+        5 if is_test_path(path) and not mentions_tests else 0,
+        path_relevance(path, query),
+        min(count, 20),  # fewer matches are often more precise
+        first_line,
+        len(path),
+        path,
+    )
+def snippet_sort_key(match: Mapping[str, object], query: str) -> tuple[object, ...]:
+    """Sort key for snippet mode result rows."""
+    path = str(match.get("path", ""))
+    snippet = str(match.get("snippet", ""))
+    line = int(match.get("line", 0) or 0)
+    mentions_tests = query_mentions_tests(query)
+    return (
+        10 if is_generated_path(path) else 0,
+        0 if is_source_path(path) else 5,
+        5 if is_test_path(path) and not mentions_tests else 0,
+        path_relevance(path, query),
+        definition_bonus(snippet),
+        line,
+        len(path),
+        path,
+    )

codetool_explore/roots.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Root argument normalisation shared by search backends."""
+from __future__ import annotations
+import os
+import shlex
+from collections.abc import Iterable
+from dataclasses import dataclass
+from .errors import ExploreArgumentError, ExploreRootError
+Pathish = str | os.PathLike[str]
+RootInput = Pathish | Iterable[Pathish]
+@dataclass(frozen=True)
+class SearchRoot:
+    """One validated root path."""
+    raw: str
+    abs_path: str
+@dataclass(frozen=True)
+class NormalizedRoots:
+    """Validated root set plus multi-root display/path metadata."""
+    roots: tuple[SearchRoot, ...]
+    from_sequence: bool
+    rel_base: str | None
+    display: str | list[str]
+    @property
+    def has_multiple(self) -> bool:
+        return len(self.roots) > 1
+def _is_root_sequence(value: object) -> bool:
+    return isinstance(value, Iterable) and not isinstance(
+        value, (str, bytes, os.PathLike)
+    )
+def _coerce_path(value: object) -> str:
+    try:
+        path = os.fspath(value)
+    except TypeError as exc:
+        raise ExploreArgumentError(
+            "root must be a path string or a list of path strings"
+        ) from exc
+    if not isinstance(path, str):
+        raise ExploreArgumentError(
+            "root must be a path string or a list of path strings"
+        )
+    return path
+def _is_searchable_path(raw_path: str) -> bool:
+    abs_path = os.path.abspath(raw_path)
+    return os.path.isdir(abs_path) or os.path.isfile(abs_path)
+def _strip_matching_quotes(value: str) -> str:
+    if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
+        return value[1:-1]
+    return value
+def _split_space_separated_roots(raw_path: str) -> tuple[str, ...] | None:
+    """Split a mistaken space-separated root string when it is unambiguous."""
+    if not raw_path.strip() or not any(char.isspace() for char in raw_path):
+        return None
+    if _is_searchable_path(raw_path):
+        return None
+    try:
+        parts = tuple(
+            _strip_matching_quotes(part) for part in shlex.split(raw_path, posix=False)
+        )
+    except ValueError:
+        return None
+    if len(parts) < 2 or not all(parts):
+        return None
+    if not all(_is_searchable_path(part) for part in parts):
+        return None
+    return parts
+def _common_rel_base(abs_roots: tuple[str, ...]) -> str:
+    common_inputs = [
+        os.path.dirname(path) if os.path.isfile(path) else path for path in abs_roots
+    ]
+    try:
+        common = os.path.commonpath(common_inputs)
+    except ValueError:
+        common = os.path.abspath(os.curdir)
+    if os.path.isfile(common):
+        common = os.path.dirname(common)
+    return common or os.path.abspath(os.curdir)
+def normalize_search_roots(root: RootInput) -> NormalizedRoots:
+    """Validate ``root`` as one path or a non-empty iterable of paths."""
+    from_sequence = _is_root_sequence(root)
+    if from_sequence:
+        raw_values = tuple(root)  # type: ignore[arg-type]
+        if not raw_values:
+            raise ExploreArgumentError("root list must not be empty")
+    else:
+        allow_implicit_split = isinstance(root, str)
+        raw_path = _coerce_path(root)
+        split_roots = (
+            _split_space_separated_roots(raw_path)
+            if allow_implicit_split
+            else None
+        )
+        if split_roots is None:
+            raw_values = (raw_path,)
+        else:
+            raw_values = split_roots
+            from_sequence = True
+    raw_paths = tuple(_coerce_path(value) for value in raw_values)
+    search_roots: list[SearchRoot] = []
+    for raw_path in raw_paths:
+        abs_path = os.path.abspath(raw_path)
+        if not (os.path.isdir(abs_path) or os.path.isfile(abs_path)):
+            if not os.path.exists(abs_path):
+                raise ExploreRootError(f"root does not exist: {raw_path!r}")
+            raise ExploreRootError(
+                f"root is neither a directory nor file: {raw_path!r}"
+            )
+        search_roots.append(SearchRoot(raw=raw_path, abs_path=abs_path))
+    rel_base = (
+        _common_rel_base(tuple(item.abs_path for item in search_roots))
+        if len(search_roots) > 1
+        else None
+    )
+    display: str | list[str] = list(raw_paths) if from_sequence else raw_paths[0]
+    return NormalizedRoots(
+        roots=tuple(search_roots),
+        from_sequence=from_sequence,
+        rel_base=rel_base,
+        display=display,
+    )