sharp-context 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sharp_context/__init__.py +27 -0
- sharp_context/checkpoint.py +295 -0
- sharp_context/config.py +72 -0
- sharp_context/dedup.py +239 -0
- sharp_context/entropy.py +277 -0
- sharp_context/knapsack.py +348 -0
- sharp_context/prefetch.py +297 -0
- sharp_context/server.py +624 -0
- sharp_context-0.1.0.dist-info/METADATA +201 -0
- sharp_context-0.1.0.dist-info/RECORD +12 -0
- sharp_context-0.1.0.dist-info/WHEEL +4 -0
- sharp_context-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Predictive Context Pre-fetcher
|
|
3
|
+
===============================
|
|
4
|
+
|
|
5
|
+
When an agent reads a code symbol, predict what it will need next
|
|
6
|
+
and pre-load it into the context cache — BEFORE it asks.
|
|
7
|
+
|
|
8
|
+
This is **CPU cache prefetching applied to LLM context windows**.
|
|
9
|
+
|
|
10
|
+
The Problem:
|
|
11
|
+
An agent debugging function `process_payment()` will inevitably need:
|
|
12
|
+
1. Callers of `process_payment()` — who triggers this?
|
|
13
|
+
2. Callees from `process_payment()` — what does it depend on?
|
|
14
|
+
3. Test file for `process_payment()` — how is it tested?
|
|
15
|
+
4. Type definitions used — what are the data structures?
|
|
16
|
+
|
|
17
|
+
Without pre-fetching, the agent makes 4 sequential tool calls,
|
|
18
|
+
each adding latency and token cost. With pre-fetching, these are
|
|
19
|
+
already in the context cache when the agent asks.
|
|
20
|
+
|
|
21
|
+
Heuristics:
|
|
22
|
+
1. **Static call graph**: Extract function/method calls from source
|
|
23
|
+
2. **Import graph**: Follow import statements to related modules
|
|
24
|
+
3. **Naming conventions**: foo.py → test_foo.py, foo_test.py
|
|
25
|
+
4. **Co-access patterns**: Track which files are accessed together
|
|
26
|
+
across sessions (associative learning)
|
|
27
|
+
|
|
28
|
+
References:
|
|
29
|
+
- CPU prefetch: Smith, J. "Sequential Program Prefetching" (1978)
|
|
30
|
+
- Agentic Plan Caching (arXiv 2025) — reusing structured plans
|
|
31
|
+
- Proximity (arXiv 2026) — LSH-bucketed pre-warming for caches
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import os
|
|
37
|
+
import re
|
|
38
|
+
from collections import Counter, defaultdict
|
|
39
|
+
from dataclasses import dataclass, field
|
|
40
|
+
from pathlib import Path
|
|
41
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class PrefetchResult:
|
|
46
|
+
"""A predicted context fragment that might be needed next."""
|
|
47
|
+
|
|
48
|
+
path: str
|
|
49
|
+
"""File path or symbol identifier."""
|
|
50
|
+
|
|
51
|
+
reason: str
|
|
52
|
+
"""Why this was predicted (e.g., 'callee', 'test_file', 'co_access')."""
|
|
53
|
+
|
|
54
|
+
confidence: float
|
|
55
|
+
"""Prediction confidence [0, 1]."""
|
|
56
|
+
|
|
57
|
+
content: Optional[str] = None
|
|
58
|
+
"""Pre-loaded content (if available)."""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ── Static Analysis Patterns ───────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
# Python function/method calls
|
|
64
|
+
_PY_CALL_RE = re.compile(
|
|
65
|
+
r"(?:self\.)?([a-zA-Z_]\w*)\s*\(", re.MULTILINE
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Python imports
|
|
69
|
+
_PY_IMPORT_RE = re.compile(
|
|
70
|
+
r"(?:from\s+([\w.]+)\s+import|import\s+([\w.]+))", re.MULTILINE
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Python class inheritance
|
|
74
|
+
_PY_CLASS_RE = re.compile(
|
|
75
|
+
r"class\s+\w+\s*\(\s*([\w.,\s]+)\s*\)\s*:", re.MULTILINE
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Rust use/mod statements
|
|
79
|
+
_RS_USE_RE = re.compile(
|
|
80
|
+
r"(?:use\s+([\w:]+)|mod\s+(\w+))", re.MULTILINE
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# TypeScript/JavaScript imports
|
|
84
|
+
_TS_IMPORT_RE = re.compile(
|
|
85
|
+
r"(?:import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]|"
|
|
86
|
+
r"require\s*\(\s*['\"]([^'\"]+)['\"]\s*\))",
|
|
87
|
+
re.MULTILINE,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def extract_callees(source: str, language: str = "python") -> List[str]:
|
|
92
|
+
"""
|
|
93
|
+
Extract function/method names called from a source code fragment.
|
|
94
|
+
|
|
95
|
+
Returns a list of callee names (not fully qualified — just the
|
|
96
|
+
function name as it appears in source).
|
|
97
|
+
"""
|
|
98
|
+
if language == "python":
|
|
99
|
+
return _PY_CALL_RE.findall(source)
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def extract_imports(source: str, language: str = "python") -> List[str]:
|
|
104
|
+
"""
|
|
105
|
+
Extract import targets from a source code fragment.
|
|
106
|
+
|
|
107
|
+
Returns module/path strings that could be resolved to files.
|
|
108
|
+
"""
|
|
109
|
+
if language == "python":
|
|
110
|
+
results = []
|
|
111
|
+
for match in _PY_IMPORT_RE.finditer(source):
|
|
112
|
+
mod = match.group(1) or match.group(2)
|
|
113
|
+
if mod:
|
|
114
|
+
results.append(mod)
|
|
115
|
+
return results
|
|
116
|
+
elif language in ("typescript", "javascript"):
|
|
117
|
+
results = []
|
|
118
|
+
for match in _TS_IMPORT_RE.finditer(source):
|
|
119
|
+
path = match.group(1) or match.group(2)
|
|
120
|
+
if path:
|
|
121
|
+
results.append(path)
|
|
122
|
+
return results
|
|
123
|
+
elif language == "rust":
|
|
124
|
+
results = []
|
|
125
|
+
for match in _RS_USE_RE.finditer(source):
|
|
126
|
+
mod = match.group(1) or match.group(2)
|
|
127
|
+
if mod:
|
|
128
|
+
results.append(mod)
|
|
129
|
+
return results
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def infer_test_files(file_path: str) -> List[str]:
|
|
134
|
+
"""
|
|
135
|
+
Infer likely test file paths from a source file path.
|
|
136
|
+
|
|
137
|
+
Heuristics:
|
|
138
|
+
foo.py → test_foo.py, foo_test.py, tests/test_foo.py
|
|
139
|
+
utils/bar.py → tests/test_bar.py, utils/test_bar.py
|
|
140
|
+
src/baz.rs → tests/baz.rs, src/baz_test.rs
|
|
141
|
+
"""
|
|
142
|
+
path = Path(file_path)
|
|
143
|
+
stem = path.stem
|
|
144
|
+
suffix = path.suffix
|
|
145
|
+
parent = path.parent
|
|
146
|
+
|
|
147
|
+
candidates = [
|
|
148
|
+
str(parent / f"test_{stem}{suffix}"),
|
|
149
|
+
str(parent / f"{stem}_test{suffix}"),
|
|
150
|
+
str(parent / "tests" / f"test_{stem}{suffix}"),
|
|
151
|
+
str(parent.parent / "tests" / f"test_{stem}{suffix}"),
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
# Rust-specific
|
|
155
|
+
if suffix == ".rs":
|
|
156
|
+
candidates.append(str(parent / "tests" / f"{stem}{suffix}"))
|
|
157
|
+
|
|
158
|
+
return candidates
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def module_to_file_candidates(
|
|
162
|
+
module_path: str,
|
|
163
|
+
base_dir: str = "",
|
|
164
|
+
language: str = "python",
|
|
165
|
+
) -> List[str]:
|
|
166
|
+
"""
|
|
167
|
+
Convert a module path (e.g., 'utils.helpers') to candidate file paths.
|
|
168
|
+
|
|
169
|
+
Python: utils.helpers → utils/helpers.py, utils/helpers/__init__.py
|
|
170
|
+
"""
|
|
171
|
+
if language == "python":
|
|
172
|
+
parts = module_path.split(".")
|
|
173
|
+
candidates = [
|
|
174
|
+
os.path.join(base_dir, *parts) + ".py",
|
|
175
|
+
os.path.join(base_dir, *parts, "__init__.py"),
|
|
176
|
+
]
|
|
177
|
+
return candidates
|
|
178
|
+
return []
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class PrefetchEngine:
|
|
182
|
+
"""
|
|
183
|
+
Predictive pre-fetcher that learns co-access patterns across
|
|
184
|
+
sessions and combines them with static analysis for predictions.
|
|
185
|
+
|
|
186
|
+
Two prediction strategies:
|
|
187
|
+
1. **Static**: Parse imports, calls, and naming conventions
|
|
188
|
+
2. **Learned**: Track which files are accessedtogether and
|
|
189
|
+
predict based on historical co-access frequency
|
|
190
|
+
|
|
191
|
+
The learned component uses a simple co-occurrence counter:
|
|
192
|
+
When file A and file B are accessed within K turns of each other,
|
|
193
|
+
increment co_access[A][B] and co_access[B][A].
|
|
194
|
+
|
|
195
|
+
Confidence is computed as:
|
|
196
|
+
- Static predictions: fixed confidence (0.7 for imports, 0.5 for tests)
|
|
197
|
+
- Learned predictions: normalized co-access count
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
def __init__(self, co_access_window: int = 5):
|
|
201
|
+
self.co_access_window = co_access_window
|
|
202
|
+
|
|
203
|
+
# co_access[file_a][file_b] = count of times accessed together
|
|
204
|
+
self._co_access: Dict[str, Counter] = defaultdict(Counter)
|
|
205
|
+
|
|
206
|
+
# Recent access history for learning
|
|
207
|
+
self._recent_accesses: List[Tuple[str, int]] = [] # (path, turn)
|
|
208
|
+
|
|
209
|
+
def record_access(self, file_path: str, turn: int) -> None:
|
|
210
|
+
"""
|
|
211
|
+
Record that a file was accessed at a given turn.
|
|
212
|
+
|
|
213
|
+
Updates co-access counts with all files accessed within
|
|
214
|
+
the co-access window.
|
|
215
|
+
"""
|
|
216
|
+
# Update co-access with recent files
|
|
217
|
+
for prev_path, prev_turn in self._recent_accesses:
|
|
218
|
+
if turn - prev_turn <= self.co_access_window and prev_path != file_path:
|
|
219
|
+
self._co_access[file_path][prev_path] += 1
|
|
220
|
+
self._co_access[prev_path][file_path] += 1
|
|
221
|
+
|
|
222
|
+
self._recent_accesses.append((file_path, turn))
|
|
223
|
+
|
|
224
|
+
# Prune old accesses (keep last 100)
|
|
225
|
+
if len(self._recent_accesses) > 100:
|
|
226
|
+
self._recent_accesses = self._recent_accesses[-100:]
|
|
227
|
+
|
|
228
|
+
def predict(
|
|
229
|
+
self,
|
|
230
|
+
file_path: str,
|
|
231
|
+
source_content: str,
|
|
232
|
+
language: str = "python",
|
|
233
|
+
max_results: int = 10,
|
|
234
|
+
) -> List[PrefetchResult]:
|
|
235
|
+
"""
|
|
236
|
+
Predict what context fragments will be needed next, given
|
|
237
|
+
that the agent just accessed `file_path` with `source_content`.
|
|
238
|
+
|
|
239
|
+
Combines static analysis and learned co-access patterns.
|
|
240
|
+
Results are sorted by confidence (highest first).
|
|
241
|
+
"""
|
|
242
|
+
predictions: List[PrefetchResult] = []
|
|
243
|
+
seen_paths: Set[str] = set()
|
|
244
|
+
|
|
245
|
+
# 1. Import graph (confidence: 0.7)
|
|
246
|
+
imports = extract_imports(source_content, language)
|
|
247
|
+
base_dir = str(Path(file_path).parent)
|
|
248
|
+
for imp in imports:
|
|
249
|
+
candidates = module_to_file_candidates(imp, base_dir, language)
|
|
250
|
+
for candidate in candidates:
|
|
251
|
+
if candidate not in seen_paths:
|
|
252
|
+
seen_paths.add(candidate)
|
|
253
|
+
predictions.append(PrefetchResult(
|
|
254
|
+
path=candidate,
|
|
255
|
+
reason="import",
|
|
256
|
+
confidence=0.70,
|
|
257
|
+
))
|
|
258
|
+
|
|
259
|
+
# 2. Test files (confidence: 0.5)
|
|
260
|
+
test_candidates = infer_test_files(file_path)
|
|
261
|
+
for tc in test_candidates:
|
|
262
|
+
if tc not in seen_paths:
|
|
263
|
+
seen_paths.add(tc)
|
|
264
|
+
predictions.append(PrefetchResult(
|
|
265
|
+
path=tc,
|
|
266
|
+
reason="test_file",
|
|
267
|
+
confidence=0.50,
|
|
268
|
+
))
|
|
269
|
+
|
|
270
|
+
# 3. Learned co-access patterns (confidence: normalized count)
|
|
271
|
+
if file_path in self._co_access:
|
|
272
|
+
co_counts = self._co_access[file_path]
|
|
273
|
+
if co_counts:
|
|
274
|
+
max_count = max(co_counts.values())
|
|
275
|
+
for co_path, count in co_counts.most_common(max_results):
|
|
276
|
+
if co_path not in seen_paths:
|
|
277
|
+
seen_paths.add(co_path)
|
|
278
|
+
confidence = min(count / max(max_count, 1), 1.0) * 0.80
|
|
279
|
+
predictions.append(PrefetchResult(
|
|
280
|
+
path=co_path,
|
|
281
|
+
reason="co_access",
|
|
282
|
+
confidence=round(confidence, 2),
|
|
283
|
+
))
|
|
284
|
+
|
|
285
|
+
# Sort by confidence and limit results
|
|
286
|
+
predictions.sort(key=lambda p: p.confidence, reverse=True)
|
|
287
|
+
return predictions[:max_results]
|
|
288
|
+
|
|
289
|
+
def stats(self) -> dict:
|
|
290
|
+
total_pairs = sum(
|
|
291
|
+
len(targets) for targets in self._co_access.values()
|
|
292
|
+
)
|
|
293
|
+
return {
|
|
294
|
+
"tracked_files": len(self._co_access),
|
|
295
|
+
"co_access_pairs": total_pairs // 2, # Undirected
|
|
296
|
+
"recent_accesses": len(self._recent_accesses),
|
|
297
|
+
}
|