PyPI - emergent-translator - Versions diffs - 1.1.0__py3-none-any.whl - Mend

emergent-translator 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

emergent_translator/__init__.py +126 -0
emergent_translator/adaptive_codebook.py +342 -0
emergent_translator/api_server.py +4988 -0
emergent_translator/batch_encoder.py +555 -0
emergent_translator/chunk_collector.py +978 -0
emergent_translator/chunk_coordinator.py +738 -0
emergent_translator/claude_compression.py +375 -0
emergent_translator/cli.py +413 -0
emergent_translator/client_sdk.py +903 -0
emergent_translator/code_skeleton.py +448 -0
emergent_translator/core.py +1081 -0
emergent_translator/emergent_symbols.py +690 -0
emergent_translator/format_handlers.py +901 -0
emergent_translator/gpu_batch_encoder.py +848 -0
emergent_translator/intelligent_router.py +509 -0
emergent_translator/metrics.py +436 -0
emergent_translator/py.typed +0 -0
emergent_translator-1.1.0.dist-info/METADATA +568 -0
emergent_translator-1.1.0.dist-info/RECORD +23 -0
emergent_translator-1.1.0.dist-info/WHEEL +5 -0
emergent_translator-1.1.0.dist-info/entry_points.txt +2 -0
emergent_translator-1.1.0.dist-info/licenses/LICENSE +82 -0
emergent_translator-1.1.0.dist-info/top_level.txt +1 -0

emergent_translator/code_skeleton.py ADDED Viewed

@@ -0,0 +1,448 @@
+"""AST-based code skeletonization for Python source files.
+Replaces non-focal function/method bodies with ``...`` to reduce token usage
+when feeding source code to LLMs.  Uses only stdlib ``ast`` — zero extra
+dependencies.
+Example::
+    >>> from emergent_translator.code_skeleton import skeletonize
+    >>> print(skeletonize('''
+    ... class Foo:
+    ...     def bar(self, x: int) -> int:
+    ...         \"\"\"Return x squared.\"\"\"
+    ...         return x * x
+    ... '''))
+    <BLANKLINE>
+    class Foo:
+        def bar(self, x: int) -> int:
+            \"\"\"Return x squared.\"\"\"
+            ...
+    <BLANKLINE>
+"""
+from __future__ import annotations
+import ast
+import fnmatch
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Union
+# ---------------------------------------------------------------------------
+# Private helpers & dataclass
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class _BodyRange:
+    """Line-range metadata for a single function/async-function definition."""
+    qual_name: str
+    decorator_start: int  # 1-based line of first decorator (or def line)
+    def_line: int         # 1-based line of def/async def
+    body_start: int       # 1-based first line of body
+    body_end: int         # 1-based last line of body (inclusive)
+    docstring_end: Optional[int]  # 1-based last line of docstring, or None
+    indent: int           # column offset of body statements
+def _is_docstring(node: ast.AST) -> bool:
+    """True if *node* is a string-literal expression (i.e. a docstring)."""
+    return (
+        isinstance(node, ast.Expr)
+        and isinstance(node.value, (ast.Constant,))
+        and isinstance(node.value.value, str)
+    )
+def _get_docstring_end(body: list[ast.stmt]) -> Optional[int]:
+    """Return ``end_lineno`` of docstring if the first statement is one."""
+    if body and _is_docstring(body[0]):
+        return body[0].end_lineno
+    return None
+def _body_indent(lines: list[str], body_start_0: int) -> int:
+    """Detect indentation width (number of leading spaces) at *body_start_0* (0-based)."""
+    if 0 <= body_start_0 < len(lines):
+        line = lines[body_start_0]
+        return len(line) - len(line.lstrip())
+    return 0
+def _collect_ranges(tree: ast.Module, lines: list[str]) -> List[_BodyRange]:
+    """Walk the AST and return a ``_BodyRange`` for every function definition."""
+    ranges: List[_BodyRange] = []
+    def _walk(node: ast.AST, prefix: str = "") -> None:
+        for child in ast.iter_child_nodes(node):
+            if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
+                qual = f"{prefix}{child.name}" if not prefix else f"{prefix}.{child.name}"
+                dec_start = (
+                    child.decorator_list[0].lineno
+                    if child.decorator_list
+                    else child.lineno
+                )
+                body_start = child.body[0].lineno
+                body_end = child.body[-1].end_lineno
+                ds_end = _get_docstring_end(child.body)
+                indent = _body_indent(lines, body_start - 1)
+                ranges.append(_BodyRange(
+                    qual_name=qual,
+                    decorator_start=dec_start,
+                    def_line=child.lineno,
+                    body_start=body_start,
+                    body_end=body_end,
+                    docstring_end=ds_end,
+                    indent=indent,
+                ))
+                # Recurse into the function body for nested defs
+                _walk(child, qual)
+            elif isinstance(child, ast.ClassDef):
+                cls_qual = f"{prefix}{child.name}" if not prefix else f"{prefix}.{child.name}"
+                _walk(child, cls_qual)
+            else:
+                _walk(child, prefix)
+    _walk(tree)
+    return ranges
+def _is_focal(qual_name: str, focal: Set[str]) -> bool:
+    """Check if *qual_name* matches any entry in *focal*.
+    Matching rules:
+    - Exact qualified name: ``"MyClass.my_method"``
+    - Simple (unqualified) name: ``"my_method"`` matches any ``*.my_method``
+    - Glob pattern: ``"MyClass.*"`` matches ``"MyClass.foo"``
+    """
+    simple_name = qual_name.rsplit(".", 1)[-1]
+    for pattern in focal:
+        if pattern == qual_name:
+            return True
+        if pattern == simple_name:
+            return True
+        if fnmatch.fnmatch(qual_name, pattern):
+            return True
+    return False
+def _build_skeleton_lines(
+    lines: list[str],
+    ranges: List[_BodyRange],
+    focal: Set[str],
+    keep_docstrings: bool,
+) -> list[str]:
+    """Core line-surgery engine.
+    Returns a new list of lines with non-focal function bodies replaced by
+    ``...`` (preserving docstrings if requested).
+    """
+    if not ranges:
+        return list(lines)
+    # Determine which 1-based line numbers to suppress.
+    suppress: set[int] = set()
+    ellipsis_at: dict[int, int] = {}  # line_no -> indent for placing `...`
+    for r in ranges:
+        if _is_focal(r.qual_name, focal):
+            continue
+        # Single-line function (def and body on same line) — keep as-is.
+        if r.body_start == r.def_line:
+            continue
+        # Determine where replacement starts (after docstring if keeping them).
+        if keep_docstrings and r.docstring_end is not None:
+            replace_start = r.docstring_end + 1
+        else:
+            replace_start = r.body_start
+        if replace_start > r.body_end:
+            # Body is only a docstring — nothing to replace.
+            continue
+        for lineno in range(replace_start, r.body_end + 1):
+            suppress.add(lineno)
+        # Place `...` at the replacement start line.
+        ellipsis_at[replace_start] = r.indent
+    # Un-suppress lines that belong to focal functions (nested focal inside
+    # non-focal parent).
+    for r in ranges:
+        if _is_focal(r.qual_name, focal):
+            for lineno in range(r.decorator_start, r.body_end + 1):
+                suppress.discard(lineno)
+    # Build output.
+    out: list[str] = []
+    for i, line in enumerate(lines):
+        lineno = i + 1  # 1-based
+        if lineno in ellipsis_at and lineno not in suppress:
+            # Edge case: this line is an ellipsis insertion point but wasn't
+            # suppressed (e.g. focal un-suppressed it).  Just keep the line.
+            out.append(line)
+        elif lineno in ellipsis_at:
+            indent = ellipsis_at[lineno]
+            out.append(" " * indent + "...\n")
+        elif lineno not in suppress:
+            out.append(line)
+    return out
+# ---------------------------------------------------------------------------
+# Public dataclass
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class SkeletonResult:
+    """Result of skeletonizing a single file."""
+    source: str
+    original_lines: int
+    skeleton_lines: int
+    reduction_pct: float
+    functions_total: int
+    functions_skeletonized: int
+    functions_focal: int
+    original_tokens: int
+    skeleton_tokens: int
+    token_reduction_pct: float
+# ---------------------------------------------------------------------------
+# Free functions
+# ---------------------------------------------------------------------------
+def skeletonize(
+    source: str,
+    focal: Optional[list[str]] = None,
+    keep_docstrings: bool = True,
+) -> str:
+    """Return a skeletonized version of Python *source*.
+    Non-focal function/method bodies are replaced with ``...``.  Focal
+    functions (matched by name, qualified name, or glob) keep their full
+    implementation.
+    Parameters
+    ----------
+    source:
+        Python source code as a string.
+    focal:
+        List of function/method names to keep.  Supports qualified names
+        (``"Class.method"``), simple names (``"method"``), and globs
+        (``"Class.*"``).
+    keep_docstrings:
+        If True (default), docstrings are preserved even in skeletonized
+        functions.
+    """
+    if not source or not source.strip():
+        return source
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        return source
+    lines = source.splitlines(keepends=True)
+    # Ensure last line has a newline for consistent processing.
+    if lines and not lines[-1].endswith("\n"):
+        lines[-1] += "\n"
+        trailing_newline = False
+    else:
+        trailing_newline = True
+    focal_set: Set[str] = set(focal) if focal else set()
+    ranges = _collect_ranges(tree, lines)
+    result_lines = _build_skeleton_lines(lines, ranges, focal_set, keep_docstrings)
+    result = "".join(result_lines)
+    if not trailing_newline and result.endswith("\n"):
+        result = result[:-1]
+    return result
+def skeletonize_file(
+    path: Union[str, Path],
+    focal: Optional[list[str]] = None,
+    keep_docstrings: bool = True,
+) -> SkeletonResult:
+    """Read a Python file and return a :class:`SkeletonResult` with stats."""
+    from .claude_compression import estimate_tokens
+    path = Path(path)
+    source = path.read_text(encoding="utf-8")
+    skeleton = skeletonize(source, focal=focal, keep_docstrings=keep_docstrings)
+    focal_set: Set[str] = set(focal) if focal else set()
+    try:
+        tree = ast.parse(source)
+    except SyntaxError:
+        tree = ast.Module(body=[])
+    lines = source.splitlines(keepends=True)
+    ranges = _collect_ranges(tree, lines)
+    total = len(ranges)
+    focal_count = sum(1 for r in ranges if _is_focal(r.qual_name, focal_set))
+    skeletonized = total - focal_count
+    orig_lines = len(source.splitlines())
+    skel_lines = len(skeleton.splitlines())
+    reduction = (1 - skel_lines / max(orig_lines, 1)) * 100
+    orig_tokens = estimate_tokens(source)
+    skel_tokens = estimate_tokens(skeleton)
+    token_reduction = (1 - skel_tokens / max(orig_tokens, 1)) * 100
+    return SkeletonResult(
+        source=skeleton,
+        original_lines=orig_lines,
+        skeleton_lines=skel_lines,
+        reduction_pct=round(reduction, 1),
+        functions_total=total,
+        functions_skeletonized=skeletonized,
+        functions_focal=focal_count,
+        original_tokens=orig_tokens,
+        skeleton_tokens=skel_tokens,
+        token_reduction_pct=round(token_reduction, 1),
+    )
+def skeletonize_dir(
+    root: Union[str, Path],
+    focal: Optional[list[str]] = None,
+    keep_docstrings: bool = True,
+    exclude: Optional[list[str]] = None,
+) -> Dict[str, SkeletonResult]:
+    """Skeletonize all ``.py`` files under *root*.
+    Parameters
+    ----------
+    root:
+        Directory to walk.
+    focal:
+        Focal function names (applied to all files).
+    keep_docstrings:
+        Preserve docstrings in skeletonized functions.
+    exclude:
+        Glob patterns for paths to skip (matched against the path relative
+        to *root*).
+    """
+    root = Path(root)
+    exclude = exclude or []
+    results: Dict[str, SkeletonResult] = {}
+    for dirpath, _dirnames, filenames in os.walk(root):
+        for fname in sorted(filenames):
+            if not fname.endswith(".py"):
+                continue
+            full = Path(dirpath) / fname
+            rel = str(full.relative_to(root))
+            if any(fnmatch.fnmatch(rel, pat) for pat in exclude):
+                continue
+            try:
+                results[rel] = skeletonize_file(full, focal=focal, keep_docstrings=keep_docstrings)
+            except Exception:
+                # Skip files that can't be read/parsed.
+                continue
+    return results
+# ---------------------------------------------------------------------------
+# Class wrapper
+# ---------------------------------------------------------------------------
+class CodeSkeleton:
+    """Stateful wrapper around the skeletonization free functions.
+    Example::
+        >>> skel = CodeSkeleton(focal=["place_order"])
+        >>> result = skel.skeletonize_file("order_manager.py")
+    """
+    def __init__(
+        self,
+        focal: Optional[list[str]] = None,
+        keep_docstrings: bool = True,
+        exclude: Optional[list[str]] = None,
+    ):
+        self._focal: Set[str] = set(focal) if focal else set()
+        self._keep_docstrings = keep_docstrings
+        self._exclude = list(exclude) if exclude else []
+    # -- focal management ---------------------------------------------------
+    @property
+    def focal(self) -> Set[str]:
+        """Current set of focal function names."""
+        return set(self._focal)
+    def add_focal(self, *names: str) -> None:
+        """Add names to the focal set."""
+        self._focal.update(names)
+    def remove_focal(self, *names: str) -> None:
+        """Remove names from the focal set."""
+        self._focal -= set(names)
+    # -- delegation ---------------------------------------------------------
+    def skeletonize(self, source: str) -> str:
+        """Skeletonize Python *source* using current settings."""
+        return skeletonize(
+            source,
+            focal=list(self._focal),
+            keep_docstrings=self._keep_docstrings,
+        )
+    def skeletonize_file(self, path: Union[str, Path]) -> SkeletonResult:
+        """Skeletonize a file and return a :class:`SkeletonResult`."""
+        return skeletonize_file(
+            path,
+            focal=list(self._focal),
+            keep_docstrings=self._keep_docstrings,
+        )
+    def skeletonize_dir(self, root: Union[str, Path]) -> Dict[str, SkeletonResult]:
+        """Skeletonize all ``.py`` files under *root*."""
+        return skeletonize_dir(
+            root,
+            focal=list(self._focal),
+            keep_docstrings=self._keep_docstrings,
+            exclude=self._exclude,
+        )
+    @staticmethod
+    def summary(results: Dict[str, SkeletonResult]) -> Dict[str, Any]:
+        """Aggregate stats across multiple :class:`SkeletonResult` values."""
+        total_orig_lines = sum(r.original_lines for r in results.values())
+        total_skel_lines = sum(r.skeleton_lines for r in results.values())
+        total_orig_tokens = sum(r.original_tokens for r in results.values())
+        total_skel_tokens = sum(r.skeleton_tokens for r in results.values())
+        total_funcs = sum(r.functions_total for r in results.values())
+        total_skel = sum(r.functions_skeletonized for r in results.values())
+        total_focal = sum(r.functions_focal for r in results.values())
+        return {
+            "files": len(results),
+            "original_lines": total_orig_lines,
+            "skeleton_lines": total_skel_lines,
+            "reduction_pct": round((1 - total_skel_lines / max(total_orig_lines, 1)) * 100, 1),
+            "functions_total": total_funcs,
+            "functions_skeletonized": total_skel,
+            "functions_focal": total_focal,
+            "original_tokens": total_orig_tokens,
+            "skeleton_tokens": total_skel_tokens,
+            "token_reduction_pct": round((1 - total_skel_tokens / max(total_orig_tokens, 1)) * 100, 1),
+        }