PyPI - entroly - Versions diffs - 0.2.0__py3-none-any.whl - Mend

entroly 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

entroly/__init__.py +27 -0
entroly/adaptive_pruner.py +161 -0
entroly/checkpoint.py +302 -0
entroly/config.py +74 -0
entroly/multimodal.py +880 -0
entroly/prefetch.py +297 -0
entroly/provenance.py +184 -0
entroly/query_refiner.py +160 -0
entroly/server.py +1171 -0
entroly-0.2.0.dist-info/METADATA +302 -0
entroly-0.2.0.dist-info/RECORD +13 -0
entroly-0.2.0.dist-info/WHEEL +4 -0
entroly-0.2.0.dist-info/entry_points.txt +2 -0

entroly/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""
+Entroly — Information-Theoretic Context Optimization for Agentic AI
+========================================================================
+An MCP server that mathematically optimizes what goes into an LLM's
+context window. Uses knapsack dynamic programming, Shannon entropy scoring,
+SimHash deduplication, and predictive pre-fetching to cut token costs by
+50–70% while improving agent accuracy.
+Quick Setup (Cursor)::
+    Add to .cursor/mcp.json:
+    {
+      "mcpServers": {
+        "entroly": {
+          "command": "entroly"
+        }
+      }
+    }
+Quick Setup (Claude Code)::
+    claude mcp add entroly -- entroly
+"""
+__version__ = "0.2.0"

entroly/adaptive_pruner.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+AdaptivePruner Bridge for Entroly
+======================================
+Wires ebbiforge_core.AdaptivePruner into the feedback loop.
+The key addition: `historical_success` — a dimension that entroly's
+Rust engine doesn't have. Over time, the RL weight updates learn which
+scoring features matter most for THIS user's codebase.
+Weight update rule (from ebbiforge Rust source):
+    weight += lr * feedback * feature_value  (clamped to [-1, 1])
+Falls back to no-op if ebbiforge_core is not installed.
+"""
+from __future__ import annotations
+import logging
+from typing import Optional, Dict, Any
+logger = logging.getLogger(__name__)
+try:
+    from ebbiforge_core import AdaptivePruner as _RustPruner, ContextFragment
+    _PRUNER_AVAILABLE = True
+except ImportError:
+    _PRUNER_AVAILABLE = False
+    _RustPruner = None
+try:
+    from ebbiforge_core import CodeQualityGuard as _RustGuard
+    _GUARD_AVAILABLE = True
+except ImportError:
+    _GUARD_AVAILABLE = False
+    _RustGuard = None
+class EntrolyPruner:
+    """
+    Adaptive RL pruner backed by ebbiforge_core.AdaptivePruner.
+    Extends entroly's Wilson-score feedback with a `historical_success`
+    dimension: fragments that previously helped get boosted, those that didn't
+    get down-weighted over time.
+    Zero-config: if ebbiforge_core is unavailable, all methods are no-ops.
+    """
+    def __init__(self):
+        self._pruner = _RustPruner() if _PRUNER_AVAILABLE else None
+        self._fragment_features: Dict[str, Dict[str, float]] = {}
+        if _PRUNER_AVAILABLE:
+            logger.info("AdaptivePruner: ebbiforge_core available — RL weight learning active")
+        else:
+            logger.info("AdaptivePruner: ebbiforge_core not available — using static weights")
+    @property
+    def available(self) -> bool:
+        return _PRUNER_AVAILABLE and self._pruner is not None
+    def record_fragment_features(
+        self,
+        fragment_id: str,
+        recency: float,
+        relevance: float,
+        complexity: float,
+        was_selected: bool,
+    ) -> None:
+        """
+        Record the scoring features for a fragment at selection time.
+        Called from optimize_context for each selected fragment.
+        These are stored until feedback arrives.
+        """
+        self._fragment_features[fragment_id] = {
+            "recency": recency,
+            "relevance": relevance,
+            "complexity": complexity,
+            "was_selected": was_selected,
+        }
+    def apply_feedback(self, fragment_id: str, feedback: float) -> bool:
+        """
+        Apply user feedback to update RL weights for this fragment's features.
+        Args:
+            fragment_id: The fragment that received feedback.
+            feedback:    +1.0 = helpful, -1.0 = not helpful, 0.0 = neutral.
+        Returns:
+            True if weights were updated, False if no feature record found.
+        """
+        if not self.available:
+            return False
+        features = self._fragment_features.get(fragment_id)
+        if not features:
+            return False
+        # historical_success: 1.0 if this fragment was previously selected, else 0.5
+        historical_success = 1.0 if features.get("was_selected") else 0.5
+        self._pruner.update_policy(
+            feedback=feedback,
+            recency=features["recency"],
+            relevance=features["relevance"],
+            historical_success=historical_success,
+            complexity=features["complexity"],
+        )
+        return True
+    def score_fragment(
+        self,
+        recency: float,
+        relevance: float,
+        historical_success: float,
+        complexity: float,
+    ) -> Optional[float]:
+        """
+        Score a fragment using current learned RL weights.
+        Returns None if pruner unavailable (use entroly's own scoring).
+        """
+        if not self.available:
+            return None
+        return self._pruner.score_fragment(recency, relevance, historical_success, complexity)
+class FragmentGuard:
+    """
+    Code quality scanner backed by ebbiforge_core.CodeQualityGuard.
+    Scans each ingested fragment for:
+    - Hardcoded API secrets  (sk-..., API_KEY = "...")
+    - unsafe Rust blocks
+    - TODO comments
+    - Console spam (>5 log statements)
+    Returns a list of issues — empty means clean.
+    Zero-config: no-op if ebbiforge_core unavailable.
+    """
+    def __init__(self):
+        self._guard = _RustGuard() if _GUARD_AVAILABLE else None
+        if _GUARD_AVAILABLE:
+            logger.info("FragmentGuard: CodeQualityGuard active — scanning ingested fragments")
+    @property
+    def available(self) -> bool:
+        return _GUARD_AVAILABLE and self._guard is not None
+    def scan(self, content: str, source: str = "") -> list[str]:
+        """
+        Scan fragment content for code quality issues.
+        Returns list of issue strings (empty = clean).
+        """
+        if not self.available or not content:
+            return []
+        try:
+            return list(self._guard.review_code(content, source))
+        except Exception:
+            return []

entroly/checkpoint.py ADDED Viewed

@@ -0,0 +1,302 @@
+"""
+Checkpoint & Resume System
+===========================
+Serializes the full agent state to disk so that multi-step tasks
+can resume from the last checkpoint instead of restarting from scratch.
+The Problem:
+  An agent working on a 10-step refactoring task fails at step 7
+  (API timeout, context overflow, rate limit). Today, the developer
+  must restart the entire task — re-reading files, re-planning,
+  re-executing steps 1-6 — wasting time and tokens.
+The Solution:
+  Entroly automatically checkpoints after every N tool calls:
+    - All tracked context fragments (with scores)
+    - The dedup index state
+    - Co-access patterns from the pre-fetcher
+    - Custom metadata (task plan, current step, etc.)
+  On resume, the full state is restored in <100ms, and the agent
+  picks up exactly where it left off.
+Storage Format:
+  JSON for human readability and debuggability. Gzipped for
+  space efficiency. Typical checkpoint: 50-200 KB compressed.
+References:
+  - Agentic Plan Caching (arXiv 2025) — reusing structured plans
+  - SagaLLM (arXiv 2025) — transactional guarantees for multi-agent planning
+"""
+from __future__ import annotations
+import gzip
+import json
+import os
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from entroly_core import ContextFragment
+@dataclass
+class Checkpoint:
+    """A serialized snapshot of the Entroly state."""
+    checkpoint_id: str
+    """Unique ID for this checkpoint (timestamp-based)."""
+    timestamp: float
+    """Unix timestamp when this checkpoint was created."""
+    current_turn: int
+    """The turn number at checkpoint time."""
+    fragments: List[Dict[str, Any]]
+    """Serialized context fragments."""
+    dedup_fingerprints: Dict[str, int]
+    """fragment_id → SimHash fingerprint mapping."""
+    co_access_data: Dict[str, Dict[str, int]]
+    """Pre-fetcher co-access counts."""
+    metadata: Dict[str, Any]
+    """Custom metadata (task plan, current step, etc.)."""
+    stats: Dict[str, Any]
+    """Performance stats at checkpoint time."""
+def _fragment_to_dict(frag: ContextFragment) -> Dict[str, Any]:
+    """Serialize a ContextFragment to a JSON-safe dict."""
+    return {
+        "fragment_id": frag.fragment_id,
+        "content": frag.content,
+        "token_count": frag.token_count,
+        "source": frag.source,
+        "recency_score": round(frag.recency_score, 6),
+        "frequency_score": round(frag.frequency_score, 6),
+        "semantic_score": round(frag.semantic_score, 6),
+        "entropy_score": round(frag.entropy_score, 6),
+        "turn_created": frag.turn_created,
+        "turn_last_accessed": frag.turn_last_accessed,
+        "access_count": frag.access_count,
+        "is_pinned": frag.is_pinned,
+        "simhash": frag.simhash,
+    }
+def _dict_to_fragment(d: Dict[str, Any]) -> ContextFragment:
+    """Deserialize a dict back to a ContextFragment."""
+    frag = ContextFragment(
+        fragment_id=d["fragment_id"],
+        content=d["content"],
+        token_count=d["token_count"],
+        source=d.get("source", ""),
+    )
+    frag.recency_score = d.get("recency_score", 0.0)
+    frag.frequency_score = d.get("frequency_score", 0.0)
+    frag.semantic_score = d.get("semantic_score", 0.0)
+    frag.entropy_score = d.get("entropy_score", 0.5)
+    frag.turn_created = d.get("turn_created", 0)
+    frag.turn_last_accessed = d.get("turn_last_accessed", 0)
+    frag.access_count = d.get("access_count", 0)
+    frag.is_pinned = d.get("is_pinned", False)
+    frag.simhash = d.get("simhash", 0)
+    return frag
+class CheckpointManager:
+    """
+    Manages saving and restoring Entroly state.
+    Checkpoints are stored as gzipped JSON files in the checkpoint
+    directory. Each checkpoint includes the full state needed to
+    resume a session without any data loss.
+    Auto-checkpoint:
+      If auto_interval is set, the manager automatically creates
+      a checkpoint every N tool calls. This provides crash recovery
+      without explicit save calls.
+    Retention:
+      Keeps the last `max_checkpoints` checkpoints and deletes older
+      ones to prevent unbounded disk usage.
+    """
+    def __init__(
+        self,
+        checkpoint_dir: str | Path,
+        auto_interval: int = 5,
+        max_checkpoints: int = 10,
+    ):
+        self.checkpoint_dir = Path(checkpoint_dir)
+        self.auto_interval = auto_interval
+        self.max_checkpoints = max_checkpoints
+        self._tool_calls_since_checkpoint = 0
+        self._total_checkpoints_created = 0
+        # Ensure directory exists
+        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
+    def should_auto_checkpoint(self) -> bool:
+        """Check if an auto-checkpoint is due."""
+        self._tool_calls_since_checkpoint += 1
+        return self._tool_calls_since_checkpoint >= self.auto_interval
+    def save(
+        self,
+        fragments: List[ContextFragment],
+        dedup_fingerprints: Dict[str, int],
+        co_access_data: Dict[str, Dict[str, int]],
+        current_turn: int,
+        metadata: Optional[Dict[str, Any]] = None,
+        stats: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        """
+        Save a checkpoint to disk.
+        Returns the checkpoint file path.
+        """
+        checkpoint_id = f"ckpt_{int(time.time())}_{self._total_checkpoints_created}"
+        checkpoint = Checkpoint(
+            checkpoint_id=checkpoint_id,
+            timestamp=time.time(),
+            current_turn=current_turn,
+            fragments=[_fragment_to_dict(f) for f in fragments],
+            dedup_fingerprints={k: v for k, v in dedup_fingerprints.items()},
+            co_access_data={
+                k: dict(v) for k, v in co_access_data.items()
+            },
+            metadata=metadata or {},
+            stats=stats or {},
+        )
+        # Serialize to gzipped JSON
+        filepath = self.checkpoint_dir / f"{checkpoint_id}.json.gz"
+        data = json.dumps({
+            "checkpoint_id": checkpoint.checkpoint_id,
+            "timestamp": checkpoint.timestamp,
+            "current_turn": checkpoint.current_turn,
+            "fragments": checkpoint.fragments,
+            "dedup_fingerprints": checkpoint.dedup_fingerprints,
+            "co_access_data": checkpoint.co_access_data,
+            "metadata": checkpoint.metadata,
+            "stats": checkpoint.stats,
+        }, separators=(",", ":"))
+        with gzip.open(filepath, "wt", encoding="utf-8") as f:
+            f.write(data)
+        self._tool_calls_since_checkpoint = 0
+        self._total_checkpoints_created += 1
+        # Enforce retention policy
+        self._prune_old_checkpoints()
+        return str(filepath)
+    def load_latest(self) -> Optional[Checkpoint]:
+        """
+        Load the most recent checkpoint.
+        Returns None if no checkpoints exist or all are unreadable.
+        """
+        checkpoints = sorted(
+            self.checkpoint_dir.glob("ckpt_*.json.gz"),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        for cp in checkpoints:
+            result = self._load_file(cp)
+            if result is not None:
+                return result
+        return None
+    def load_by_id(self, checkpoint_id: str) -> Optional[Checkpoint]:
+        """Load a specific checkpoint by its ID."""
+        filepath = self.checkpoint_dir / f"{checkpoint_id}.json.gz"
+        if not filepath.exists():
+            return None
+        return self._load_file(filepath)
+    def list_checkpoints(self) -> List[Dict[str, Any]]:
+        """List all available checkpoints with metadata."""
+        checkpoints = sorted(
+            self.checkpoint_dir.glob("ckpt_*.json.gz"),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        result = []
+        for cp_path in checkpoints:
+            try:
+                stat = cp_path.stat()
+                result.append({
+                    "checkpoint_id": cp_path.stem.replace(".json", ""),
+                    "path": str(cp_path),
+                    "size_bytes": stat.st_size,
+                    "created": stat.st_mtime,
+                })
+            except OSError:
+                continue
+        return result
+    def restore_fragments(self, checkpoint: Checkpoint) -> List[ContextFragment]:
+        """Extract ContextFragment objects from a checkpoint."""
+        return [_dict_to_fragment(d) for d in checkpoint.fragments]
+    def _load_file(self, filepath: Path) -> Optional[Checkpoint]:
+        """Load and parse a checkpoint file. Returns None if corrupted."""
+        try:
+            with gzip.open(filepath, "rt", encoding="utf-8") as f:
+                data = json.loads(f.read())
+        except (EOFError, gzip.BadGzipFile, json.JSONDecodeError, OSError):
+            return None
+        return Checkpoint(
+            checkpoint_id=data["checkpoint_id"],
+            timestamp=data["timestamp"],
+            current_turn=data["current_turn"],
+            fragments=data["fragments"],
+            dedup_fingerprints=data.get("dedup_fingerprints", {}),
+            co_access_data=data.get("co_access_data", {}),
+            metadata=data.get("metadata", {}),
+            stats=data.get("stats", {}),
+        )
+    def _prune_old_checkpoints(self) -> None:
+        """Remove old checkpoints beyond the retention limit."""
+        checkpoints = sorted(
+            self.checkpoint_dir.glob("ckpt_*.json.gz"),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        for old_cp in checkpoints[self.max_checkpoints:]:
+            try:
+                old_cp.unlink()
+            except OSError:
+                pass
+    def stats(self) -> dict:
+        checkpoints = list(self.checkpoint_dir.glob("ckpt_*.json.gz"))
+        total_size = sum(cp.stat().st_size for cp in checkpoints)
+        return {
+            "total_checkpoints": len(checkpoints),
+            "total_size_bytes": total_size,
+            "total_size_mb": round(total_size / (1024 * 1024), 2),
+            "tool_calls_since_last": self._tool_calls_since_checkpoint,
+            "auto_interval": self.auto_interval,
+        }

entroly/config.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+Entroly Configuration
+==========================
+Central configuration for the context optimization engine.
+All tunable parameters live here — no magic numbers buried in code.
+"""
+from dataclasses import dataclass, field
+from pathlib import Path
+import os
+@dataclass
+class EntrolyConfig:
+    """Configuration for the Entroly MCP server."""
+    # ── Token Budget ────────────────────────────────────────────────────
+    default_token_budget: int = 128_000
+    """Default max tokens for context optimization (matches GPT-4 Turbo)."""
+    max_fragments: int = 10_000
+    """Maximum context fragments tracked per session."""
+    # ── Knapsack Optimizer Weights ──────────────────────────────────────
+    weight_recency: float = 0.30
+    """How much to weight recency (turns since last access)."""
+    weight_frequency: float = 0.25
+    """How much to weight access frequency."""
+    weight_semantic_sim: float = 0.25
+    """How much to weight semantic similarity to current query."""
+    weight_entropy: float = 0.20
+    """How much to weight information density (Shannon entropy)."""
+    # ── Ebbinghaus Decay ────────────────────────────────────────────────
+    decay_half_life_turns: int = 15
+    """Number of turns for a fragment's relevance to halve."""
+    min_relevance_threshold: float = 0.05
+    """Fragments below this relevance get evicted entirely."""
+    # ── Deduplication ───────────────────────────────────────────────────
+    dedup_similarity_threshold: float = 0.92
+    """SimHash Jaccard threshold above which fragments are considered duplicates."""
+    # ── Predictive Pre-fetch ────────────────────────────────────────────
+    prefetch_depth: int = 2
+    """How many hops in the call graph to pre-fetch."""
+    max_prefetch_fragments: int = 10
+    """Maximum fragments to pre-fetch per symbol lookup."""
+    # ── Checkpoint ──────────────────────────────────────────────────────
+    checkpoint_dir: Path = field(
+        default_factory=lambda: Path(
+            os.environ.get(
+                "ENTROLY_DIR",
+                os.path.expanduser("~/.entroly/checkpoints"),
+            )
+        )
+    )
+    """Directory for persisting checkpoint state."""
+    auto_checkpoint_interval: int = 5
+    """Auto-checkpoint every N tool calls."""
+    # ── Server ──────────────────────────────────────────────────────────
+    server_name: str = "entroly"
+    server_version: str = field(
+        default_factory=lambda: __import__("entroly", fromlist=["__version__"]).__version__
+    )