PyPI - brevix - Versions diffs - 0.4.0__py3-none-any.whl - Mend

brevix 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

brevix/__init__.py +25 -0
brevix/accuracy_guard.py +124 -0
brevix/adaptive.py +37 -0
brevix/cli.py +243 -0
brevix/compressor.py +297 -0
brevix/file_compress.py +80 -0
brevix/install.py +306 -0
brevix/session_logs.py +176 -0
brevix/stats.py +141 -0
brevix/templates/__init__.py +1 -0
brevix/templates/brevix_rules.md +71 -0
brevix/tokens.py +29 -0
brevix-0.4.0.dist-info/METADATA +272 -0
brevix-0.4.0.dist-info/RECORD +18 -0
brevix-0.4.0.dist-info/WHEEL +5 -0
brevix-0.4.0.dist-info/entry_points.txt +2 -0
brevix-0.4.0.dist-info/licenses/LICENSE +21 -0
brevix-0.4.0.dist-info/top_level.txt +1 -0

brevix/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+"""Brevix — compress LLM output safely."""
+from brevix.compressor import Compressor, CompressionMode, CompressionResult
+from brevix.accuracy_guard import AccuracyGuard, GuardResult
+from brevix.stats import Stats
+from brevix.adaptive import pick_mode, AdaptiveResult
+from brevix.tokens import count_tokens, count_tokens_method
+from brevix.install import install, list_targets, TARGETS
+__version__ = "0.4.0"
+__all__ = [
+    "Compressor",
+    "CompressionMode",
+    "CompressionResult",
+    "AccuracyGuard",
+    "GuardResult",
+    "Stats",
+    "pick_mode",
+    "AdaptiveResult",
+    "count_tokens",
+    "count_tokens_method",
+    "install",
+    "list_targets",
+    "TARGETS",
+]

brevix/accuracy_guard.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""Accuracy Guard — semantic similarity check between original and compressed text.
+Verifies that rule-based compression preserves meaning. Uses local
+sentence-transformers (no API cost). Falls back to a content-word
+containment metric tailored for compression (NOT Jaccard, which
+structurally penalizes legitimate compression).
+This is what separates safe production-grade compression from a blind
+text-stripper: every output is scored against the original, and the
+caller can choose to warn, fall back, or block when meaning would be lost.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from typing import Optional
+# Closed-class words that compression is allowed to drop without
+# meaning loss. Excluded from the lexical similarity calculation.
+_STOPWORDS = frozenset({
+    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being",
+    "am", "do", "does", "did", "have", "has", "had", "of", "to", "in",
+    "on", "at", "for", "by", "with", "from", "as", "and", "or", "but",
+    "so", "if", "then", "than", "that", "this", "these", "those",
+    "it", "its", "i", "you", "he", "she", "we", "they", "them",
+    "your", "my", "our", "his", "her", "their",
+    "just", "really", "basically", "actually", "simply", "very",
+    "quite", "perhaps", "maybe", "essentially", "literally",
+    "however", "therefore", "thus", "hence", "moreover",
+    "sure", "certainly", "of", "course",
+    "i'd", "i'll", "i've", "i'm", "we'll", "we're",
+    "let", "let's",
+})
+@dataclass
+class GuardResult:
+    similarity: float
+    threshold: float
+    passed: bool
+    method: str
+    @property
+    def warning(self) -> Optional[str]:
+        if self.passed:
+            return None
+        return (
+            f"Accuracy Guard: similarity {self.similarity:.2f} below threshold "
+            f"{self.threshold:.2f} ({self.method}). Compression may have lost meaning."
+        )
+class AccuracyGuard:
+    """Check whether compressed text preserves original meaning."""
+    def __init__(self, threshold: float = 0.85, model_name: str = "all-MiniLM-L6-v2") -> None:
+        self.threshold = threshold
+        self.model_name = model_name
+        self._model = None
+    def _load_model(self):
+        if self._model is not None:
+            return self._model
+        try:
+            from sentence_transformers import SentenceTransformer
+            self._model = SentenceTransformer(self.model_name)
+        except ImportError:
+            self._model = False
+        return self._model
+    def check(self, original: str, compressed: str) -> GuardResult:
+        if not original.strip() or not compressed.strip():
+            return GuardResult(similarity=1.0, threshold=self.threshold, passed=True, method="empty")
+        model = self._load_model()
+        if model:
+            similarity = self._semantic_similarity(original, compressed, model)
+            method = "semantic"
+        else:
+            similarity = self._content_containment(original, compressed)
+            method = "content-containment"
+        return GuardResult(
+            similarity=similarity,
+            threshold=self.threshold,
+            passed=similarity >= self.threshold,
+            method=method,
+        )
+    @staticmethod
+    def _semantic_similarity(a: str, b: str, model) -> float:
+        from sentence_transformers import util
+        emb = model.encode([a, b], convert_to_tensor=True, show_progress_bar=False)
+        score = util.cos_sim(emb[0], emb[1]).item()
+        return float(max(0.0, min(1.0, score)))
+    @staticmethod
+    def _tokenize(text: str) -> list[str]:
+        return re.findall(r"[a-z0-9_]+", text.lower())
+    @classmethod
+    def _content_tokens(cls, text: str) -> set[str]:
+        return {t for t in cls._tokenize(text) if t not in _STOPWORDS and len(t) > 1}
+    @classmethod
+    def _content_containment(cls, original: str, compressed: str) -> float:
+        """Fraction of original content words preserved in compressed text.
+        Designed for compression: dropping stopwords/articles is expected and
+        does NOT lower the score. Score drops only when meaningful content
+        words disappear or new unrelated terms appear.
+        """
+        orig_tokens = cls._content_tokens(original)
+        comp_tokens = cls._content_tokens(compressed)
+        if not orig_tokens:
+            return 1.0 if not comp_tokens else 0.5
+        kept = len(orig_tokens & comp_tokens) / len(orig_tokens)
+        spurious = (
+            len(comp_tokens - orig_tokens) / max(len(comp_tokens), 1)
+            if comp_tokens else 0.0
+        )
+        return max(0.0, min(1.0, kept - 0.5 * spurious))

brevix/adaptive.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""Adaptive mode — auto-pick compression level per text characteristics.
+Heuristic: pick the most aggressive mode that still passes Accuracy Guard.
+Falls back to a static heuristic (verbosity + density) if guard unavailable.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from brevix.accuracy_guard import AccuracyGuard, GuardResult
+from brevix.compressor import Compressor, CompressionMode, CompressionResult
+@dataclass
+class AdaptiveResult:
+    chosen_mode: CompressionMode
+    compression: CompressionResult
+    guard: GuardResult
+def pick_mode(text: str, threshold: float = 0.85, guard: AccuracyGuard | None = None) -> AdaptiveResult:
+    """Pick the most aggressive compression mode that still passes the guard.
+    Walks ultra → full → lite. Returns the first that passes, else lite (which
+    is the safest non-trivial option).
+    """
+    guard = guard or AccuracyGuard(threshold=threshold)
+    last: AdaptiveResult | None = None
+    for mode in (CompressionMode.ULTRA, CompressionMode.FULL, CompressionMode.LITE):
+        result = Compressor(mode).compress(text)
+        check = guard.check(text, result.compressed)
+        last = AdaptiveResult(chosen_mode=mode, compression=result, guard=check)
+        if check.passed:
+            return last
+    assert last is not None
+    return last

brevix/cli.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""Brevix CLI entrypoint."""
+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+from brevix import (
+    Compressor,
+    CompressionMode,
+    AccuracyGuard,
+    Stats,
+    pick_mode,
+    count_tokens,
+    count_tokens_method,
+    install as install_target,
+    list_targets,
+    TARGETS,
+    __version__,
+)
+from brevix.file_compress import compress_file
+def _cmd_compress(args: argparse.Namespace) -> int:
+    text = args.text
+    if text == "-" or not text:
+        text = sys.stdin.read()
+    if args.mode == "auto":
+        adaptive = pick_mode(text, threshold=args.threshold)
+        result = adaptive.compression
+        guard_result = adaptive.guard
+        chosen = adaptive.chosen_mode
+        if not guard_result.passed and args.strict:
+            sys.stderr.write(
+                f"[brevix] auto: no mode passed guard ({guard_result.similarity:.2f} < {args.threshold:.2f}). "
+                f"Emitting original.\n"
+            )
+            print(text)
+            return 2
+        if args.verbose:
+            sys.stderr.write(f"[brevix] auto picked mode={chosen.value}\n")
+    else:
+        chosen = CompressionMode(args.mode)
+        result = Compressor(mode=chosen).compress(text)
+        guard_result = None
+        if args.guard:
+            guard = AccuracyGuard(threshold=args.threshold)
+            guard_result = guard.check(result.original, result.compressed)
+            if not guard_result.passed:
+                sys.stderr.write(guard_result.warning + "\n")
+                if args.strict:
+                    sys.stderr.write("Strict mode: emitting original instead.\n")
+                    print(result.original)
+                    return 2
+    if not args.no_stats:
+        Stats().record(
+            mode=chosen.value,
+            chars_saved=result.char_savings,
+            tokens_saved=result.token_savings_estimate,
+        )
+    print(result.compressed)
+    if args.verbose:
+        orig_tok = count_tokens(result.original)
+        comp_tok = count_tokens(result.compressed)
+        method = count_tokens_method()
+        sys.stderr.write(
+            f"\n[brevix] mode={chosen.value} "
+            f"chars: {len(result.original)}→{len(result.compressed)} "
+            f"({result.char_savings_pct}% saved) "
+            f"tokens ({method}): {orig_tok}→{comp_tok}\n"
+        )
+        if guard_result:
+            sys.stderr.write(
+                f"[brevix] guard: sim={guard_result.similarity:.3f} "
+                f"({guard_result.method}) pass={guard_result.passed}\n"
+            )
+    return 0
+def _cmd_compress_file(args: argparse.Namespace) -> int:
+    mode = CompressionMode(args.mode)
+    try:
+        result = compress_file(
+            args.path,
+            mode=mode,
+            guard=not args.no_guard,
+            threshold=args.threshold,
+            dry_run=args.dry_run,
+            force=args.force,
+        )
+    except (FileNotFoundError, IsADirectoryError) as exc:
+        sys.stderr.write(f"Error: {exc}\n")
+        return 2
+    if result.skipped:
+        sys.stderr.write(f"Skipped: {result.reason}\n")
+        return 1
+    suffix = " (dry-run)" if args.dry_run else ""
+    print(
+        f"Compressed {result.path}{suffix}: "
+        f"{result.compression.char_savings} chars saved "
+        f"({result.compression.char_savings_pct}%)"
+    )
+    if result.backup:
+        print(f"Backup: {result.backup}")
+    if result.guard:
+        print(
+            f"Guard: sim={result.guard.similarity:.3f} "
+            f"({result.guard.method}) pass={result.guard.passed}"
+        )
+    return 0
+def _cmd_stats(args: argparse.Namespace) -> int:
+    stats = Stats()
+    if args.reset:
+        stats.reset()
+        print("Stats reset.")
+        return 0
+    try:
+        print(stats.summary(since=args.since, real=args.real, share=args.share))
+    except ValueError as e:
+        sys.stderr.write(f"Error: {e}\n")
+        return 2
+    return 0
+def _cmd_check(args: argparse.Namespace) -> int:
+    guard = AccuracyGuard(threshold=args.threshold)
+    result = guard.check(args.original, args.compressed)
+    print(
+        f"Similarity: {result.similarity:.4f}  "
+        f"Threshold: {result.threshold:.2f}  "
+        f"Passed: {result.passed}  "
+        f"Method: {result.method}"
+    )
+    return 0 if result.passed else 1
+def _cmd_count(args: argparse.Namespace) -> int:
+    text = args.text
+    if text == "-" or not text:
+        text = sys.stdin.read()
+    print(f"{count_tokens(text)} tokens ({count_tokens_method()}, {len(text)} chars)")
+    return 0
+def _cmd_install(args: argparse.Namespace) -> int:
+    if args.list:
+        print(list_targets())
+        return 0
+    target = args.target
+    if target is None:
+        sys.stderr.write("Error: target required (or use --list).\n")
+        return 2
+    if target != "all" and target not in TARGETS:
+        sys.stderr.write(f"Error: unknown target '{target}'. Run `brevix install --list`.\n")
+        return 2
+    root = Path(args.path).resolve()
+    if args.dry_run:
+        print(f"[dry-run] Would install '{target}' into {root}.")
+        return 0
+    files = install_target(target, root)
+    print(f"Brevix installed for target '{target}' in {root}:")
+    for f in files:
+        try:
+            print(f"  + {f.relative_to(root)}")
+        except ValueError:
+            print(f"  + {f}")
+    return 0
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="brevix",
+        description="Compress LLM output safely. Save tokens without breaking your code.",
+    )
+    parser.add_argument("--version", action="version", version=f"brevix {__version__}")
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_compress = sub.add_parser("compress", help="Compress text")
+    p_compress.add_argument("text", nargs="?", default="-", help="Text to compress, or '-' for stdin")
+    p_compress.add_argument("--mode", choices=["lite", "full", "ultra", "auto"], default="full")
+    p_compress.add_argument("--guard", action="store_true", help="Enable Accuracy Guard")
+    p_compress.add_argument("--strict", action="store_true", help="Fall back to original if guard fails")
+    p_compress.add_argument("--threshold", type=float, default=0.85)
+    p_compress.add_argument("--no-stats", action="store_true", help="Don't record to local stats")
+    p_compress.add_argument("-v", "--verbose", action="store_true")
+    p_compress.set_defaults(func=_cmd_compress)
+    p_cf = sub.add_parser("compress-file", help="Compress a file in place (with .original backup)")
+    p_cf.add_argument("path")
+    p_cf.add_argument("--mode", choices=["lite", "full", "ultra"], default="full")
+    p_cf.add_argument("--threshold", type=float, default=0.85)
+    p_cf.add_argument("--no-guard", action="store_true")
+    p_cf.add_argument("--dry-run", action="store_true")
+    p_cf.add_argument("--force", action="store_true", help="Overwrite even if guard fails")
+    p_cf.set_defaults(func=_cmd_compress_file)
+    p_stats = sub.add_parser("stats", help="Show local stats")
+    p_stats.add_argument("--reset", action="store_true")
+    p_stats.add_argument("--since", default="all", help="Time window: 7d, 24h, 30m, all")
+    p_stats.add_argument("--real", action="store_true", help="Parse real Claude Code session logs")
+    p_stats.add_argument("--share", action="store_true", help="One-line tweet-ready output")
+    p_stats.set_defaults(func=_cmd_stats)
+    p_check = sub.add_parser("check", help="Check similarity between two texts")
+    p_check.add_argument("original")
+    p_check.add_argument("compressed")
+    p_check.add_argument("--threshold", type=float, default=0.85)
+    p_check.set_defaults(func=_cmd_check)
+    p_count = sub.add_parser("count", help="Count tokens in text")
+    p_count.add_argument("text", nargs="?", default="-")
+    p_count.set_defaults(func=_cmd_count)
+    p_install = sub.add_parser(
+        "install",
+        help="Install Brevix rules into a project for a specific LLM coding tool",
+    )
+    p_install.add_argument(
+        "target",
+        nargs="?",
+        help="Target tool. Use --list to see all options.",
+    )
+    p_install.add_argument("--path", default=".", help="Project root (default: cwd)")
+    p_install.add_argument("--list", action="store_true", help="List available targets")
+    p_install.add_argument("--dry-run", action="store_true", help="Preview without writing")
+    p_install.set_defaults(func=_cmd_install)
+    args = parser.parse_args(argv)
+    return args.func(args)
+if __name__ == "__main__":
+    sys.exit(main())