entroly 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
entroly/__init__.py ADDED
@@ -0,0 +1,27 @@
1
+ """
2
+ Entroly — Information-Theoretic Context Optimization for Agentic AI
3
+ ========================================================================
4
+
5
+ An MCP server that mathematically optimizes what goes into an LLM's
6
+ context window. Uses knapsack dynamic programming, Shannon entropy scoring,
7
+ SimHash deduplication, and predictive pre-fetching to cut token costs by
8
+ 50–70% while improving agent accuracy.
9
+
10
+ Quick Setup (Cursor)::
11
+
12
+ Add to .cursor/mcp.json:
13
+ {
14
+ "mcpServers": {
15
+ "entroly": {
16
+ "command": "entroly"
17
+ }
18
+ }
19
+ }
20
+
21
+ Quick Setup (Claude Code)::
22
+
23
+ claude mcp add entroly -- entroly
24
+
25
+ """
26
+
27
+ __version__ = "0.2.0"
@@ -0,0 +1,161 @@
1
+ """
2
+ AdaptivePruner Bridge for Entroly
3
+ ======================================
4
+
5
+ Wires ebbiforge_core.AdaptivePruner into the feedback loop.
6
+
7
+ The key addition: `historical_success` — a dimension that entroly's
8
+ Rust engine doesn't have. Over time, the RL weight updates learn which
9
+ scoring features matter most for THIS user's codebase.
10
+
11
+ Weight update rule (from ebbiforge Rust source):
12
+ weight += lr * feedback * feature_value (clamped to [-1, 1])
13
+
14
+ Falls back to no-op if ebbiforge_core is not installed.
15
+ """
16
+
17
+ from __future__ import annotations
18
+ import logging
19
+ from typing import Optional, Dict, Any
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ try:
24
+ from ebbiforge_core import AdaptivePruner as _RustPruner, ContextFragment
25
+ _PRUNER_AVAILABLE = True
26
+ except ImportError:
27
+ _PRUNER_AVAILABLE = False
28
+ _RustPruner = None
29
+
30
+ try:
31
+ from ebbiforge_core import CodeQualityGuard as _RustGuard
32
+ _GUARD_AVAILABLE = True
33
+ except ImportError:
34
+ _GUARD_AVAILABLE = False
35
+ _RustGuard = None
36
+
37
+
38
+ class EntrolyPruner:
39
+ """
40
+ Adaptive RL pruner backed by ebbiforge_core.AdaptivePruner.
41
+
42
+ Extends entroly's Wilson-score feedback with a `historical_success`
43
+ dimension: fragments that previously helped get boosted, those that didn't
44
+ get down-weighted over time.
45
+
46
+ Zero-config: if ebbiforge_core is unavailable, all methods are no-ops.
47
+ """
48
+
49
+ def __init__(self):
50
+ self._pruner = _RustPruner() if _PRUNER_AVAILABLE else None
51
+ self._fragment_features: Dict[str, Dict[str, float]] = {}
52
+ if _PRUNER_AVAILABLE:
53
+ logger.info("AdaptivePruner: ebbiforge_core available — RL weight learning active")
54
+ else:
55
+ logger.info("AdaptivePruner: ebbiforge_core not available — using static weights")
56
+
57
+ @property
58
+ def available(self) -> bool:
59
+ return _PRUNER_AVAILABLE and self._pruner is not None
60
+
61
+ def record_fragment_features(
62
+ self,
63
+ fragment_id: str,
64
+ recency: float,
65
+ relevance: float,
66
+ complexity: float,
67
+ was_selected: bool,
68
+ ) -> None:
69
+ """
70
+ Record the scoring features for a fragment at selection time.
71
+ Called from optimize_context for each selected fragment.
72
+ These are stored until feedback arrives.
73
+ """
74
+ self._fragment_features[fragment_id] = {
75
+ "recency": recency,
76
+ "relevance": relevance,
77
+ "complexity": complexity,
78
+ "was_selected": was_selected,
79
+ }
80
+
81
+ def apply_feedback(self, fragment_id: str, feedback: float) -> bool:
82
+ """
83
+ Apply user feedback to update RL weights for this fragment's features.
84
+
85
+ Args:
86
+ fragment_id: The fragment that received feedback.
87
+ feedback: +1.0 = helpful, -1.0 = not helpful, 0.0 = neutral.
88
+
89
+ Returns:
90
+ True if weights were updated, False if no feature record found.
91
+ """
92
+ if not self.available:
93
+ return False
94
+
95
+ features = self._fragment_features.get(fragment_id)
96
+ if not features:
97
+ return False
98
+
99
+ # historical_success: 1.0 if this fragment was previously selected, else 0.5
100
+ historical_success = 1.0 if features.get("was_selected") else 0.5
101
+
102
+ self._pruner.update_policy(
103
+ feedback=feedback,
104
+ recency=features["recency"],
105
+ relevance=features["relevance"],
106
+ historical_success=historical_success,
107
+ complexity=features["complexity"],
108
+ )
109
+ return True
110
+
111
+ def score_fragment(
112
+ self,
113
+ recency: float,
114
+ relevance: float,
115
+ historical_success: float,
116
+ complexity: float,
117
+ ) -> Optional[float]:
118
+ """
119
+ Score a fragment using current learned RL weights.
120
+ Returns None if pruner unavailable (use entroly's own scoring).
121
+ """
122
+ if not self.available:
123
+ return None
124
+ return self._pruner.score_fragment(recency, relevance, historical_success, complexity)
125
+
126
+
127
+ class FragmentGuard:
128
+ """
129
+ Code quality scanner backed by ebbiforge_core.CodeQualityGuard.
130
+
131
+ Scans each ingested fragment for:
132
+ - Hardcoded API secrets (sk-..., API_KEY = "...")
133
+ - unsafe Rust blocks
134
+ - TODO comments
135
+ - Console spam (>5 log statements)
136
+
137
+ Returns a list of issues — empty means clean.
138
+ Zero-config: no-op if ebbiforge_core unavailable.
139
+ """
140
+
141
+ def __init__(self):
142
+ self._guard = _RustGuard() if _GUARD_AVAILABLE else None
143
+ if _GUARD_AVAILABLE:
144
+ logger.info("FragmentGuard: CodeQualityGuard active — scanning ingested fragments")
145
+
146
+ @property
147
+ def available(self) -> bool:
148
+ return _GUARD_AVAILABLE and self._guard is not None
149
+
150
+ def scan(self, content: str, source: str = "") -> list[str]:
151
+ """
152
+ Scan fragment content for code quality issues.
153
+
154
+ Returns list of issue strings (empty = clean).
155
+ """
156
+ if not self.available or not content:
157
+ return []
158
+ try:
159
+ return list(self._guard.review_code(content, source))
160
+ except Exception:
161
+ return []
entroly/checkpoint.py ADDED
@@ -0,0 +1,302 @@
1
+ """
2
+ Checkpoint & Resume System
3
+ ===========================
4
+
5
+ Serializes the full agent state to disk so that multi-step tasks
6
+ can resume from the last checkpoint instead of restarting from scratch.
7
+
8
+ The Problem:
9
+ An agent working on a 10-step refactoring task fails at step 7
10
+ (API timeout, context overflow, rate limit). Today, the developer
11
+ must restart the entire task — re-reading files, re-planning,
12
+ re-executing steps 1-6 — wasting time and tokens.
13
+
14
+ The Solution:
15
+ Entroly automatically checkpoints after every N tool calls:
16
+ - All tracked context fragments (with scores)
17
+ - The dedup index state
18
+ - Co-access patterns from the pre-fetcher
19
+ - Custom metadata (task plan, current step, etc.)
20
+
21
+ On resume, the full state is restored in <100ms, and the agent
22
+ picks up exactly where it left off.
23
+
24
+ Storage Format:
25
+ JSON for human readability and debuggability. Gzipped for
26
+ space efficiency. Typical checkpoint: 50-200 KB compressed.
27
+
28
+ References:
29
+ - Agentic Plan Caching (arXiv 2025) — reusing structured plans
30
+ - SagaLLM (arXiv 2025) — transactional guarantees for multi-agent planning
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import gzip
36
+ import json
37
+ import os
38
+ import time
39
+ from dataclasses import asdict, dataclass
40
+ from pathlib import Path
41
+ from typing import Any, Dict, List, Optional
42
+
43
+ from entroly_core import ContextFragment
44
+
45
+
46
+
47
+ @dataclass
48
+ class Checkpoint:
49
+ """A serialized snapshot of the Entroly state."""
50
+
51
+ checkpoint_id: str
52
+ """Unique ID for this checkpoint (timestamp-based)."""
53
+
54
+ timestamp: float
55
+ """Unix timestamp when this checkpoint was created."""
56
+
57
+ current_turn: int
58
+ """The turn number at checkpoint time."""
59
+
60
+ fragments: List[Dict[str, Any]]
61
+ """Serialized context fragments."""
62
+
63
+ dedup_fingerprints: Dict[str, int]
64
+ """fragment_id → SimHash fingerprint mapping."""
65
+
66
+ co_access_data: Dict[str, Dict[str, int]]
67
+ """Pre-fetcher co-access counts."""
68
+
69
+ metadata: Dict[str, Any]
70
+ """Custom metadata (task plan, current step, etc.)."""
71
+
72
+ stats: Dict[str, Any]
73
+ """Performance stats at checkpoint time."""
74
+
75
+
76
+ def _fragment_to_dict(frag: ContextFragment) -> Dict[str, Any]:
77
+ """Serialize a ContextFragment to a JSON-safe dict."""
78
+ return {
79
+ "fragment_id": frag.fragment_id,
80
+ "content": frag.content,
81
+ "token_count": frag.token_count,
82
+ "source": frag.source,
83
+ "recency_score": round(frag.recency_score, 6),
84
+ "frequency_score": round(frag.frequency_score, 6),
85
+ "semantic_score": round(frag.semantic_score, 6),
86
+ "entropy_score": round(frag.entropy_score, 6),
87
+ "turn_created": frag.turn_created,
88
+ "turn_last_accessed": frag.turn_last_accessed,
89
+ "access_count": frag.access_count,
90
+ "is_pinned": frag.is_pinned,
91
+ "simhash": frag.simhash,
92
+ }
93
+
94
+
95
+ def _dict_to_fragment(d: Dict[str, Any]) -> ContextFragment:
96
+ """Deserialize a dict back to a ContextFragment."""
97
+ frag = ContextFragment(
98
+ fragment_id=d["fragment_id"],
99
+ content=d["content"],
100
+ token_count=d["token_count"],
101
+ source=d.get("source", ""),
102
+ )
103
+ frag.recency_score = d.get("recency_score", 0.0)
104
+ frag.frequency_score = d.get("frequency_score", 0.0)
105
+ frag.semantic_score = d.get("semantic_score", 0.0)
106
+ frag.entropy_score = d.get("entropy_score", 0.5)
107
+ frag.turn_created = d.get("turn_created", 0)
108
+ frag.turn_last_accessed = d.get("turn_last_accessed", 0)
109
+ frag.access_count = d.get("access_count", 0)
110
+ frag.is_pinned = d.get("is_pinned", False)
111
+ frag.simhash = d.get("simhash", 0)
112
+ return frag
113
+
114
+
115
+ class CheckpointManager:
116
+ """
117
+ Manages saving and restoring Entroly state.
118
+
119
+ Checkpoints are stored as gzipped JSON files in the checkpoint
120
+ directory. Each checkpoint includes the full state needed to
121
+ resume a session without any data loss.
122
+
123
+ Auto-checkpoint:
124
+ If auto_interval is set, the manager automatically creates
125
+ a checkpoint every N tool calls. This provides crash recovery
126
+ without explicit save calls.
127
+
128
+ Retention:
129
+ Keeps the last `max_checkpoints` checkpoints and deletes older
130
+ ones to prevent unbounded disk usage.
131
+ """
132
+
133
+ def __init__(
134
+ self,
135
+ checkpoint_dir: str | Path,
136
+ auto_interval: int = 5,
137
+ max_checkpoints: int = 10,
138
+ ):
139
+ self.checkpoint_dir = Path(checkpoint_dir)
140
+ self.auto_interval = auto_interval
141
+ self.max_checkpoints = max_checkpoints
142
+
143
+ self._tool_calls_since_checkpoint = 0
144
+ self._total_checkpoints_created = 0
145
+
146
+ # Ensure directory exists
147
+ self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
148
+
149
+ def should_auto_checkpoint(self) -> bool:
150
+ """Check if an auto-checkpoint is due."""
151
+ self._tool_calls_since_checkpoint += 1
152
+ return self._tool_calls_since_checkpoint >= self.auto_interval
153
+
154
+ def save(
155
+ self,
156
+ fragments: List[ContextFragment],
157
+ dedup_fingerprints: Dict[str, int],
158
+ co_access_data: Dict[str, Dict[str, int]],
159
+ current_turn: int,
160
+ metadata: Optional[Dict[str, Any]] = None,
161
+ stats: Optional[Dict[str, Any]] = None,
162
+ ) -> str:
163
+ """
164
+ Save a checkpoint to disk.
165
+
166
+ Returns the checkpoint file path.
167
+ """
168
+ checkpoint_id = f"ckpt_{int(time.time())}_{self._total_checkpoints_created}"
169
+
170
+ checkpoint = Checkpoint(
171
+ checkpoint_id=checkpoint_id,
172
+ timestamp=time.time(),
173
+ current_turn=current_turn,
174
+ fragments=[_fragment_to_dict(f) for f in fragments],
175
+ dedup_fingerprints={k: v for k, v in dedup_fingerprints.items()},
176
+ co_access_data={
177
+ k: dict(v) for k, v in co_access_data.items()
178
+ },
179
+ metadata=metadata or {},
180
+ stats=stats or {},
181
+ )
182
+
183
+ # Serialize to gzipped JSON
184
+ filepath = self.checkpoint_dir / f"{checkpoint_id}.json.gz"
185
+ data = json.dumps({
186
+ "checkpoint_id": checkpoint.checkpoint_id,
187
+ "timestamp": checkpoint.timestamp,
188
+ "current_turn": checkpoint.current_turn,
189
+ "fragments": checkpoint.fragments,
190
+ "dedup_fingerprints": checkpoint.dedup_fingerprints,
191
+ "co_access_data": checkpoint.co_access_data,
192
+ "metadata": checkpoint.metadata,
193
+ "stats": checkpoint.stats,
194
+ }, separators=(",", ":"))
195
+
196
+ with gzip.open(filepath, "wt", encoding="utf-8") as f:
197
+ f.write(data)
198
+
199
+ self._tool_calls_since_checkpoint = 0
200
+ self._total_checkpoints_created += 1
201
+
202
+ # Enforce retention policy
203
+ self._prune_old_checkpoints()
204
+
205
+ return str(filepath)
206
+
207
+ def load_latest(self) -> Optional[Checkpoint]:
208
+ """
209
+ Load the most recent checkpoint.
210
+
211
+ Returns None if no checkpoints exist or all are unreadable.
212
+ """
213
+ checkpoints = sorted(
214
+ self.checkpoint_dir.glob("ckpt_*.json.gz"),
215
+ key=lambda p: p.stat().st_mtime,
216
+ reverse=True,
217
+ )
218
+
219
+ for cp in checkpoints:
220
+ result = self._load_file(cp)
221
+ if result is not None:
222
+ return result
223
+
224
+ return None
225
+
226
+ def load_by_id(self, checkpoint_id: str) -> Optional[Checkpoint]:
227
+ """Load a specific checkpoint by its ID."""
228
+ filepath = self.checkpoint_dir / f"{checkpoint_id}.json.gz"
229
+ if not filepath.exists():
230
+ return None
231
+ return self._load_file(filepath)
232
+
233
+ def list_checkpoints(self) -> List[Dict[str, Any]]:
234
+ """List all available checkpoints with metadata."""
235
+ checkpoints = sorted(
236
+ self.checkpoint_dir.glob("ckpt_*.json.gz"),
237
+ key=lambda p: p.stat().st_mtime,
238
+ reverse=True,
239
+ )
240
+
241
+ result = []
242
+ for cp_path in checkpoints:
243
+ try:
244
+ stat = cp_path.stat()
245
+ result.append({
246
+ "checkpoint_id": cp_path.stem.replace(".json", ""),
247
+ "path": str(cp_path),
248
+ "size_bytes": stat.st_size,
249
+ "created": stat.st_mtime,
250
+ })
251
+ except OSError:
252
+ continue
253
+
254
+ return result
255
+
256
+ def restore_fragments(self, checkpoint: Checkpoint) -> List[ContextFragment]:
257
+ """Extract ContextFragment objects from a checkpoint."""
258
+ return [_dict_to_fragment(d) for d in checkpoint.fragments]
259
+
260
+ def _load_file(self, filepath: Path) -> Optional[Checkpoint]:
261
+ """Load and parse a checkpoint file. Returns None if corrupted."""
262
+ try:
263
+ with gzip.open(filepath, "rt", encoding="utf-8") as f:
264
+ data = json.loads(f.read())
265
+ except (EOFError, gzip.BadGzipFile, json.JSONDecodeError, OSError):
266
+ return None
267
+
268
+ return Checkpoint(
269
+ checkpoint_id=data["checkpoint_id"],
270
+ timestamp=data["timestamp"],
271
+ current_turn=data["current_turn"],
272
+ fragments=data["fragments"],
273
+ dedup_fingerprints=data.get("dedup_fingerprints", {}),
274
+ co_access_data=data.get("co_access_data", {}),
275
+ metadata=data.get("metadata", {}),
276
+ stats=data.get("stats", {}),
277
+ )
278
+
279
+ def _prune_old_checkpoints(self) -> None:
280
+ """Remove old checkpoints beyond the retention limit."""
281
+ checkpoints = sorted(
282
+ self.checkpoint_dir.glob("ckpt_*.json.gz"),
283
+ key=lambda p: p.stat().st_mtime,
284
+ reverse=True,
285
+ )
286
+
287
+ for old_cp in checkpoints[self.max_checkpoints:]:
288
+ try:
289
+ old_cp.unlink()
290
+ except OSError:
291
+ pass
292
+
293
+ def stats(self) -> dict:
294
+ checkpoints = list(self.checkpoint_dir.glob("ckpt_*.json.gz"))
295
+ total_size = sum(cp.stat().st_size for cp in checkpoints)
296
+ return {
297
+ "total_checkpoints": len(checkpoints),
298
+ "total_size_bytes": total_size,
299
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
300
+ "tool_calls_since_last": self._tool_calls_since_checkpoint,
301
+ "auto_interval": self.auto_interval,
302
+ }
entroly/config.py ADDED
@@ -0,0 +1,74 @@
1
+ """
2
+ Entroly Configuration
3
+ ==========================
4
+
5
+ Central configuration for the context optimization engine.
6
+ All tunable parameters live here — no magic numbers buried in code.
7
+ """
8
+
9
+ from dataclasses import dataclass, field
10
+ from pathlib import Path
11
+ import os
12
+
13
+
14
+ @dataclass
15
+ class EntrolyConfig:
16
+ """Configuration for the Entroly MCP server."""
17
+
18
+ # ── Token Budget ────────────────────────────────────────────────────
19
+ default_token_budget: int = 128_000
20
+ """Default max tokens for context optimization (matches GPT-4 Turbo)."""
21
+
22
+ max_fragments: int = 10_000
23
+ """Maximum context fragments tracked per session."""
24
+
25
+ # ── Knapsack Optimizer Weights ──────────────────────────────────────
26
+ weight_recency: float = 0.30
27
+ """How much to weight recency (turns since last access)."""
28
+
29
+ weight_frequency: float = 0.25
30
+ """How much to weight access frequency."""
31
+
32
+ weight_semantic_sim: float = 0.25
33
+ """How much to weight semantic similarity to current query."""
34
+
35
+ weight_entropy: float = 0.20
36
+ """How much to weight information density (Shannon entropy)."""
37
+
38
+ # ── Ebbinghaus Decay ────────────────────────────────────────────────
39
+ decay_half_life_turns: int = 15
40
+ """Number of turns for a fragment's relevance to halve."""
41
+
42
+ min_relevance_threshold: float = 0.05
43
+ """Fragments below this relevance get evicted entirely."""
44
+
45
+ # ── Deduplication ───────────────────────────────────────────────────
46
+ dedup_similarity_threshold: float = 0.92
47
+ """SimHash Jaccard threshold above which fragments are considered duplicates."""
48
+
49
+ # ── Predictive Pre-fetch ────────────────────────────────────────────
50
+ prefetch_depth: int = 2
51
+ """How many hops in the call graph to pre-fetch."""
52
+
53
+ max_prefetch_fragments: int = 10
54
+ """Maximum fragments to pre-fetch per symbol lookup."""
55
+
56
+ # ── Checkpoint ──────────────────────────────────────────────────────
57
+ checkpoint_dir: Path = field(
58
+ default_factory=lambda: Path(
59
+ os.environ.get(
60
+ "ENTROLY_DIR",
61
+ os.path.expanduser("~/.entroly/checkpoints"),
62
+ )
63
+ )
64
+ )
65
+ """Directory for persisting checkpoint state."""
66
+
67
+ auto_checkpoint_interval: int = 5
68
+ """Auto-checkpoint every N tool calls."""
69
+
70
+ # ── Server ──────────────────────────────────────────────────────────
71
+ server_name: str = "entroly"
72
+ server_version: str = field(
73
+ default_factory=lambda: __import__("entroly", fromlist=["__version__"]).__version__
74
+ )