PyPI - codetool-explore - Versions diffs - 0.5.0__py3-none-win_arm64.whl - Mend

codetool-explore 0.5.0__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

codetool_explore/__init__.py +35 -0
codetool_explore/_bin/codetool-explore-rust-windows-arm64.exe +0 -0
codetool_explore/api.py +266 -0
codetool_explore/cli.py +188 -0
codetool_explore/compression.py +150 -0
codetool_explore/cursor.py +71 -0
codetool_explore/errors.py +23 -0
codetool_explore/explorer.py +497 -0
codetool_explore/ignore.py +222 -0
codetool_explore/py.typed +0 -0
codetool_explore/python_backend/__init__.py +154 -0
codetool_explore/python_backend/case.py +19 -0
codetool_explore/python_backend/config.py +35 -0
codetool_explore/python_backend/constants.py +39 -0
codetool_explore/python_backend/file_search.py +51 -0
codetool_explore/python_backend/ignore_rules.py +40 -0
codetool_explore/python_backend/literal.py +79 -0
codetool_explore/python_backend/matcher.py +79 -0
codetool_explore/python_backend/models.py +49 -0
codetool_explore/python_backend/output.py +82 -0
codetool_explore/python_backend/regex_search.py +63 -0
codetool_explore/python_backend/search.py +327 -0
codetool_explore/python_backend/text.py +39 -0
codetool_explore/python_backend/walker.py +119 -0
codetool_explore/ranking.py +384 -0
codetool_explore/roots.py +148 -0
codetool_explore/rust_backend.py +308 -0
codetool_explore/text_output.py +475 -0
codetool_explore-0.5.0.dist-info/METADATA +240 -0
codetool_explore-0.5.0.dist-info/RECORD +33 -0
codetool_explore-0.5.0.dist-info/WHEEL +4 -0
codetool_explore-0.5.0.dist-info/entry_points.txt +2 -0
codetool_explore-0.5.0.dist-info/licenses/LICENSE +21 -0

codetool_explore/text_output.py ADDED Viewed

@@ -0,0 +1,475 @@
+"""Token-compact plain-text rendering for search and exploration results."""
+from __future__ import annotations
+import re
+from collections import defaultdict
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import Any
+NO_MATCH = "No Match"
+MAX_TEXT_CHARS = 80
+MAX_CONTEXT_CHARS = 48
+SNIPPET_TREE_ONLY_MIN_MATCHES = 20
+@dataclass(frozen=True)
+class _Row:
+    path: str
+    suffix: str
+    match: dict[str, object]
+def format_text_result(result: dict[str, object]) -> str:
+    """Return an rg/RTK-inspired compact text representation.
+    The renderer deliberately omits backend and totals metadata. It emits only
+    the current page of matches, factoring repeated path prefixes when that is
+    estimated to save tokens. A one-line pagination hint is included only when
+    another page exists.
+    """
+    target = str(result.get("target", "content"))
+    if target == "read":
+        return _format_read_text(result)
+    if target == "list":
+        return _format_list_text(result)
+    matches = [item for item in result.get("matches", []) if isinstance(item, dict)]
+    if not matches:
+        return NO_MATCH
+    mode = str(result.get("mode", "files"))
+    if mode == "snippets":
+        pattern = str(result.get("pattern", ""))
+        candidates = _snippet_candidates(
+            matches,
+            pattern=pattern,
+            regex=bool(result.get("regex", False)),
+            ignore_case=str(result.get("effective_case", "")) == "insensitive",
+        )
+    elif mode == "count":
+        rows = [_count_row(match) for match in matches]
+        candidates = [_format_flat(rows), _format_tree(rows)]
+    else:
+        rows = [_files_row(match) for match in matches]
+        candidates = [_format_flat(rows), _format_tree(rows)]
+    text = min(candidates, key=_score)
+    return _with_pagination_header(text, result)
+def _with_pagination_header(text: str, result: dict[str, object]) -> str:
+    next_cursor = result.get("next_cursor")
+    if next_cursor in (None, ""):
+        return text
+    if not text:
+        return f"-- more: cursor={next_cursor}"
+    return f"-- more: cursor={next_cursor}\n{text}"
+def _format_read_text(result: dict[str, object]) -> str:
+    markers: list[str] = []
+    next_cursor = result.get("next_cursor")
+    if next_cursor not in (None, ""):
+        markers.append(f"-- more: cursor={next_cursor}")
+    if result.get("content_truncated"):
+        markers.append("-- content omitted: output cap")
+    text = str(result.get("text", ""))
+    if text or int(result.get("returned", 0) or 0) > 0:
+        markers.append(text)
+    return "\n".join(markers)
+def _format_list_text(result: dict[str, object]) -> str:
+    entries = [
+        item for item in result.get("entries", []) if isinstance(item, dict)
+    ]
+    rows = [_list_row(entry) for entry in entries]
+    candidates = [_format_flat(rows), _format_tree(rows)]
+    text = min(candidates, key=_score)
+    return _with_pagination_header(text, result)
+def _score(text: str) -> tuple[int, int]:
+    """Cheap runtime proxy for token count without adding tokenizer deps.
+    Byte length correlated better than a regex token-ish count across the
+    benchmark corpus and avoids rescanning large outputs with a costly regex.
+    """
+    return (len(text.encode("utf-8")), len(text))
+def _clean_text(
+    value: object,
+    *,
+    pattern: str = "",
+    regex: bool = False,
+    ignore_case: bool = False,
+    max_chars: int = MAX_TEXT_CHARS,
+) -> str:
+    text = str(value).replace("\r", " ").replace("\n", " ").strip()
+    return _crop_text(
+        text,
+        pattern=pattern,
+        regex=regex,
+        ignore_case=ignore_case,
+        max_chars=max_chars,
+    )
+def _crop_text(
+    text: str,
+    *,
+    pattern: str = "",
+    regex: bool = False,
+    ignore_case: bool = False,
+    max_chars: int = MAX_TEXT_CHARS,
+) -> str:
+    if len(text) <= max_chars:
+        return text
+    if max_chars <= 1:
+        return text[:max_chars]
+    index = _match_index(
+        text,
+        pattern=pattern,
+        regex=regex,
+        ignore_case=ignore_case,
+    )
+    if index >= 0:
+        start = max(0, index - max_chars // 3)
+        end = min(len(text), start + max_chars)
+        if end == len(text):
+            start = max(0, end - max_chars)
+        snippet = text[start:end].strip()
+        if start > 0:
+            snippet = "…" + snippet
+        if end < len(text):
+            snippet += "…"
+        return snippet
+    return text[: max_chars - 1].rstrip() + "…"
+def _match_index(
+    text: str, *, pattern: str = "", regex: bool = False, ignore_case: bool = False
+) -> int:
+    if not pattern:
+        return -1
+    if regex:
+        try:
+            flags = re.IGNORECASE if ignore_case else 0
+            match = re.search(pattern, text, flags)
+        except re.error:
+            match = None
+        if match is not None:
+            return match.start()
+    haystack = text.lower() if ignore_case else text
+    needle = pattern.lower() if ignore_case else pattern
+    return haystack.find(needle)
+def _path(value: object) -> str:
+    path = str(value or "")
+    while path.startswith("./"):
+        path = path[2:]
+    return path.replace("\\", "/") or "."
+def _int(value: object, default: int = 0) -> int:
+    try:
+        return int(value)  # type: ignore[arg-type]
+    except (TypeError, ValueError):
+        return default
+def _files_row(match: dict[str, object]) -> _Row:
+    return _Row(_path(match.get("path")), "", match)
+def _count_row(match: dict[str, object]) -> _Row:
+    return _Row(_path(match.get("path")), f" x{_int(match.get('count'), 0)}", match)
+def _list_row(entry: dict[str, object]) -> _Row:
+    path = _path(entry.get("path"))
+    if entry.get("kind") == "dir":
+        return _Row(path.rstrip("/") or ".", "/", entry)
+    return _Row(path, "", entry)
+def _format_flat(rows: Iterable[_Row]) -> str:
+    return "\n".join(f"{row.path}{row.suffix}" for row in rows)
+class _TreeNode:
+    def __init__(self) -> None:
+        self.children: dict[str, _TreeNode] = {}
+        self.rows: list[_Row] = []
+        self.first_index = 1_000_000_000
+def _format_tree(rows: Iterable[_Row]) -> str:
+    root = _TreeNode()
+    for index, row in enumerate(rows):
+        node = root
+        node.first_index = min(node.first_index, index)
+        for part in row.path.split("/"):
+            node = node.children.setdefault(part, _TreeNode())
+            node.first_index = min(node.first_index, index)
+        node.rows.append(row)
+    lines: list[str] = []
+    _render_tree(root, lines, depth=0)
+    return "\n".join(lines)
+def _ordered_children(node: _TreeNode) -> list[tuple[str, _TreeNode]]:
+    return sorted(node.children.items(), key=lambda item: item[1].first_index)
+def _render_tree(node: _TreeNode, lines: list[str], *, depth: int) -> None:
+    indent = " " * depth
+    for name, child in _ordered_children(node):
+        if child.rows and not child.children:
+            for row in child.rows:
+                lines.append(f"{indent}{name}{row.suffix}")
+        else:
+            lines.append(f"{indent}{name}/")
+            _render_tree(child, lines, depth=depth + 1)
+def _format_snippets_flat(
+    matches: list[dict[str, object]],
+    *,
+    pattern: str,
+    regex: bool,
+    ignore_case: bool,
+) -> str:
+    lines: list[str] = []
+    for match in matches:
+        path = _path(match.get("path"))
+        raw_context = match.get("context")
+        if isinstance(raw_context, list) and raw_context:
+            context = _context_entries(
+                [match],
+                pattern=pattern,
+                regex=regex,
+                ignore_case=ignore_case,
+            )
+            lines.append(path)
+            lines.extend(f" {entry}" for entry in context)
+        else:
+            if "snippet" not in match and "line" not in match:
+                lines.append(path)
+                continue
+            line = _int(match.get("line"), 0)
+            snippet = _clean_text(
+                match.get("snippet", ""),
+                pattern=pattern,
+                regex=regex,
+                ignore_case=ignore_case,
+            )
+            lines.append(f"{path}:{line}:{snippet}")
+    return "\n".join(lines)
+def _snippet_candidates(
+    matches: list[dict[str, object]],
+    *,
+    pattern: str,
+    regex: bool,
+    ignore_case: bool,
+) -> list[str]:
+    if _snippet_tree_likely_wins(matches):
+        return [
+            _format_snippets_tree(
+                matches,
+                pattern=pattern,
+                regex=regex,
+                ignore_case=ignore_case,
+            )
+        ]
+    return [
+        _format_snippets_flat(
+            matches,
+            pattern=pattern,
+            regex=regex,
+            ignore_case=ignore_case,
+        ),
+        _format_snippets_tree(
+            matches,
+            pattern=pattern,
+            regex=regex,
+            ignore_case=ignore_case,
+        ),
+    ]
+def _snippet_tree_likely_wins(matches: list[dict[str, object]]) -> bool:
+    """Return true when flat snippet rendering would mostly duplicate paths."""
+    if len(matches) < SNIPPET_TREE_ONLY_MIN_MATCHES:
+        return False
+    paths: list[str] = []
+    unique_paths: list[str] = []
+    seen: set[str] = set()
+    for match in matches:
+        path = _path(match.get("path"))
+        paths.append(path)
+        if path not in seen:
+            seen.add(path)
+            unique_paths.append(path)
+    if len(paths) <= 1:
+        return False
+    flat_path_bytes = sum(len(path.encode("utf-8")) + 1 for path in paths)
+    tree_path_text = _format_tree(_Row(path, "", {}) for path in unique_paths)
+    return len(tree_path_text.encode("utf-8")) <= flat_path_bytes
+def _format_snippets_tree(
+    matches: list[dict[str, object]],
+    *,
+    pattern: str,
+    regex: bool,
+    ignore_case: bool,
+) -> str:
+    grouped: dict[str, list[dict[str, object]]] = defaultdict(list)
+    order: list[str] = []
+    for match in matches:
+        path = _path(match.get("path"))
+        if path not in grouped:
+            order.append(path)
+        grouped[path].append(match)
+    rows = [_Row(path, "", {"snippet_matches": grouped[path]}) for path in order]
+    root = _TreeNode()
+    for index, row in enumerate(rows):
+        node = root
+        node.first_index = min(node.first_index, index)
+        for part in row.path.split("/"):
+            node = node.children.setdefault(part, _TreeNode())
+            node.first_index = min(node.first_index, index)
+        node.rows.append(row)
+    lines: list[str] = []
+    _render_snippet_tree(
+        root,
+        lines,
+        depth=0,
+        pattern=pattern,
+        regex=regex,
+        ignore_case=ignore_case,
+    )
+    return "\n".join(lines)
+def _render_snippet_tree(
+    node: _TreeNode,
+    lines: list[str],
+    *,
+    depth: int,
+    pattern: str,
+    regex: bool,
+    ignore_case: bool,
+) -> None:
+    indent = " " * depth
+    for name, child in _ordered_children(node):
+        if child.rows and not child.children:
+            for row in child.rows:
+                lines.append(f"{indent}{name}")
+                matches = row.match.get("snippet_matches", [])
+                if isinstance(matches, list):
+                    for entry in _context_entries(
+                        matches,
+                        pattern=pattern,
+                        regex=regex,
+                        ignore_case=ignore_case,
+                    ):
+                        lines.append(f"{indent} {entry}")
+        else:
+            lines.append(f"{indent}{name}/")
+            _render_snippet_tree(
+                child,
+                lines,
+                depth=depth + 1,
+                pattern=pattern,
+                regex=regex,
+                ignore_case=ignore_case,
+            )
+def _context_entries(
+    matches: list[Any],
+    *,
+    pattern: str = "",
+    regex: bool = False,
+    ignore_case: bool = False,
+) -> list[str]:
+    """Merge snippet rows and context into compact per-line entries.
+    Match lines use ``line:text``; surrounding context is emitted as plain text.
+    Duplicate context lines from adjacent snippets are emitted once.
+    """
+    by_line: dict[int, tuple[str, bool]] = {}
+    insertion_order: list[int] = []
+    for item in matches:
+        if not isinstance(item, dict):
+            continue
+        match_line = _int(item.get("line"), 0)
+        context = item.get("context")
+        if isinstance(context, list) and context:
+            for raw_ctx in context:
+                if not isinstance(raw_ctx, dict):
+                    continue
+                line = _int(raw_ctx.get("line"), 0)
+                if line <= 0:
+                    continue
+                is_match = line == match_line
+                text = _clean_text(
+                    item.get("snippet", "")
+                    if is_match
+                    else raw_ctx.get("text", ""),
+                    pattern=pattern if is_match else "",
+                    regex=regex if is_match else False,
+                    ignore_case=ignore_case if is_match else False,
+                    max_chars=MAX_TEXT_CHARS if is_match else MAX_CONTEXT_CHARS,
+                )
+                if line not in by_line:
+                    insertion_order.append(line)
+                    by_line[line] = (text, is_match)
+                else:
+                    previous_text, previous_match = by_line[line]
+                    by_line[line] = (
+                        text if is_match else previous_text,
+                        previous_match or is_match,
+                    )
+        if match_line > 0:
+            if match_line not in by_line:
+                insertion_order.append(match_line)
+            by_line[match_line] = (
+                _clean_text(
+                    item.get("snippet", ""),
+                    pattern=pattern,
+                    regex=regex,
+                    ignore_case=ignore_case,
+                ),
+                True,
+            )
+    if not by_line:
+        return []
+    ordered_lines = sorted(insertion_order)
+    entries: list[str] = []
+    seen: set[int] = set()
+    for line in ordered_lines:
+        if line in seen:
+            continue
+        seen.add(line)
+        text, is_match = by_line[line]
+        if not is_match and text == "":
+            continue
+        entries.append(f"{line}:{text}" if is_match else text)
+    return entries