sharp-context 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,297 @@
1
+ """
2
+ Predictive Context Pre-fetcher
3
+ ===============================
4
+
5
+ When an agent reads a code symbol, predict what it will need next
6
+ and pre-load it into the context cache — BEFORE it asks.
7
+
8
+ This is **CPU cache prefetching applied to LLM context windows**.
9
+
10
+ The Problem:
11
+ An agent debugging function `process_payment()` will inevitably need:
12
+ 1. Callers of `process_payment()` — who triggers this?
13
+ 2. Callees from `process_payment()` — what does it depend on?
14
+ 3. Test file for `process_payment()` — how is it tested?
15
+ 4. Type definitions used — what are the data structures?
16
+
17
+ Without pre-fetching, the agent makes 4 sequential tool calls,
18
+ each adding latency and token cost. With pre-fetching, these are
19
+ already in the context cache when the agent asks.
20
+
21
+ Heuristics:
22
+ 1. **Static call graph**: Extract function/method calls from source
23
+ 2. **Import graph**: Follow import statements to related modules
24
+ 3. **Naming conventions**: foo.py → test_foo.py, foo_test.py
25
+ 4. **Co-access patterns**: Track which files are accessed together
26
+ across sessions (associative learning)
27
+
28
+ References:
29
+ - CPU prefetch: Smith, J. "Sequential Program Prefetching" (1978)
30
+ - Agentic Plan Caching (arXiv 2025) — reusing structured plans
31
+ - Proximity (arXiv 2026) — LSH-bucketed pre-warming for caches
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import os
37
+ import re
38
+ from collections import Counter, defaultdict
39
+ from dataclasses import dataclass, field
40
+ from pathlib import Path
41
+ from typing import Dict, List, Optional, Set, Tuple
42
+
43
+
44
+ @dataclass
45
+ class PrefetchResult:
46
+ """A predicted context fragment that might be needed next."""
47
+
48
+ path: str
49
+ """File path or symbol identifier."""
50
+
51
+ reason: str
52
+ """Why this was predicted (e.g., 'callee', 'test_file', 'co_access')."""
53
+
54
+ confidence: float
55
+ """Prediction confidence [0, 1]."""
56
+
57
+ content: Optional[str] = None
58
+ """Pre-loaded content (if available)."""
59
+
60
+
61
+ # ── Static Analysis Patterns ───────────────────────────────────────────
62
+
63
+ # Python function/method calls
64
+ _PY_CALL_RE = re.compile(
65
+ r"(?:self\.)?([a-zA-Z_]\w*)\s*\(", re.MULTILINE
66
+ )
67
+
68
+ # Python imports
69
+ _PY_IMPORT_RE = re.compile(
70
+ r"(?:from\s+([\w.]+)\s+import|import\s+([\w.]+))", re.MULTILINE
71
+ )
72
+
73
+ # Python class inheritance
74
+ _PY_CLASS_RE = re.compile(
75
+ r"class\s+\w+\s*\(\s*([\w.,\s]+)\s*\)\s*:", re.MULTILINE
76
+ )
77
+
78
+ # Rust use/mod statements
79
+ _RS_USE_RE = re.compile(
80
+ r"(?:use\s+([\w:]+)|mod\s+(\w+))", re.MULTILINE
81
+ )
82
+
83
+ # TypeScript/JavaScript imports
84
+ _TS_IMPORT_RE = re.compile(
85
+ r"(?:import\s+.*?\s+from\s+['\"]([^'\"]+)['\"]|"
86
+ r"require\s*\(\s*['\"]([^'\"]+)['\"]\s*\))",
87
+ re.MULTILINE,
88
+ )
89
+
90
+
91
+ def extract_callees(source: str, language: str = "python") -> List[str]:
92
+ """
93
+ Extract function/method names called from a source code fragment.
94
+
95
+ Returns a list of callee names (not fully qualified — just the
96
+ function name as it appears in source).
97
+ """
98
+ if language == "python":
99
+ return _PY_CALL_RE.findall(source)
100
+ return []
101
+
102
+
103
+ def extract_imports(source: str, language: str = "python") -> List[str]:
104
+ """
105
+ Extract import targets from a source code fragment.
106
+
107
+ Returns module/path strings that could be resolved to files.
108
+ """
109
+ if language == "python":
110
+ results = []
111
+ for match in _PY_IMPORT_RE.finditer(source):
112
+ mod = match.group(1) or match.group(2)
113
+ if mod:
114
+ results.append(mod)
115
+ return results
116
+ elif language in ("typescript", "javascript"):
117
+ results = []
118
+ for match in _TS_IMPORT_RE.finditer(source):
119
+ path = match.group(1) or match.group(2)
120
+ if path:
121
+ results.append(path)
122
+ return results
123
+ elif language == "rust":
124
+ results = []
125
+ for match in _RS_USE_RE.finditer(source):
126
+ mod = match.group(1) or match.group(2)
127
+ if mod:
128
+ results.append(mod)
129
+ return results
130
+ return []
131
+
132
+
133
+ def infer_test_files(file_path: str) -> List[str]:
134
+ """
135
+ Infer likely test file paths from a source file path.
136
+
137
+ Heuristics:
138
+ foo.py → test_foo.py, foo_test.py, tests/test_foo.py
139
+ utils/bar.py → tests/test_bar.py, utils/test_bar.py
140
+ src/baz.rs → tests/baz.rs, src/baz_test.rs
141
+ """
142
+ path = Path(file_path)
143
+ stem = path.stem
144
+ suffix = path.suffix
145
+ parent = path.parent
146
+
147
+ candidates = [
148
+ str(parent / f"test_{stem}{suffix}"),
149
+ str(parent / f"{stem}_test{suffix}"),
150
+ str(parent / "tests" / f"test_{stem}{suffix}"),
151
+ str(parent.parent / "tests" / f"test_{stem}{suffix}"),
152
+ ]
153
+
154
+ # Rust-specific
155
+ if suffix == ".rs":
156
+ candidates.append(str(parent / "tests" / f"{stem}{suffix}"))
157
+
158
+ return candidates
159
+
160
+
161
+ def module_to_file_candidates(
162
+ module_path: str,
163
+ base_dir: str = "",
164
+ language: str = "python",
165
+ ) -> List[str]:
166
+ """
167
+ Convert a module path (e.g., 'utils.helpers') to candidate file paths.
168
+
169
+ Python: utils.helpers → utils/helpers.py, utils/helpers/__init__.py
170
+ """
171
+ if language == "python":
172
+ parts = module_path.split(".")
173
+ candidates = [
174
+ os.path.join(base_dir, *parts) + ".py",
175
+ os.path.join(base_dir, *parts, "__init__.py"),
176
+ ]
177
+ return candidates
178
+ return []
179
+
180
+
181
+ class PrefetchEngine:
182
+ """
183
+ Predictive pre-fetcher that learns co-access patterns across
184
+ sessions and combines them with static analysis for predictions.
185
+
186
+ Two prediction strategies:
187
+ 1. **Static**: Parse imports, calls, and naming conventions
188
+ 2. **Learned**: Track which files are accessedtogether and
189
+ predict based on historical co-access frequency
190
+
191
+ The learned component uses a simple co-occurrence counter:
192
+ When file A and file B are accessed within K turns of each other,
193
+ increment co_access[A][B] and co_access[B][A].
194
+
195
+ Confidence is computed as:
196
+ - Static predictions: fixed confidence (0.7 for imports, 0.5 for tests)
197
+ - Learned predictions: normalized co-access count
198
+ """
199
+
200
+ def __init__(self, co_access_window: int = 5):
201
+ self.co_access_window = co_access_window
202
+
203
+ # co_access[file_a][file_b] = count of times accessed together
204
+ self._co_access: Dict[str, Counter] = defaultdict(Counter)
205
+
206
+ # Recent access history for learning
207
+ self._recent_accesses: List[Tuple[str, int]] = [] # (path, turn)
208
+
209
+ def record_access(self, file_path: str, turn: int) -> None:
210
+ """
211
+ Record that a file was accessed at a given turn.
212
+
213
+ Updates co-access counts with all files accessed within
214
+ the co-access window.
215
+ """
216
+ # Update co-access with recent files
217
+ for prev_path, prev_turn in self._recent_accesses:
218
+ if turn - prev_turn <= self.co_access_window and prev_path != file_path:
219
+ self._co_access[file_path][prev_path] += 1
220
+ self._co_access[prev_path][file_path] += 1
221
+
222
+ self._recent_accesses.append((file_path, turn))
223
+
224
+ # Prune old accesses (keep last 100)
225
+ if len(self._recent_accesses) > 100:
226
+ self._recent_accesses = self._recent_accesses[-100:]
227
+
228
+ def predict(
229
+ self,
230
+ file_path: str,
231
+ source_content: str,
232
+ language: str = "python",
233
+ max_results: int = 10,
234
+ ) -> List[PrefetchResult]:
235
+ """
236
+ Predict what context fragments will be needed next, given
237
+ that the agent just accessed `file_path` with `source_content`.
238
+
239
+ Combines static analysis and learned co-access patterns.
240
+ Results are sorted by confidence (highest first).
241
+ """
242
+ predictions: List[PrefetchResult] = []
243
+ seen_paths: Set[str] = set()
244
+
245
+ # 1. Import graph (confidence: 0.7)
246
+ imports = extract_imports(source_content, language)
247
+ base_dir = str(Path(file_path).parent)
248
+ for imp in imports:
249
+ candidates = module_to_file_candidates(imp, base_dir, language)
250
+ for candidate in candidates:
251
+ if candidate not in seen_paths:
252
+ seen_paths.add(candidate)
253
+ predictions.append(PrefetchResult(
254
+ path=candidate,
255
+ reason="import",
256
+ confidence=0.70,
257
+ ))
258
+
259
+ # 2. Test files (confidence: 0.5)
260
+ test_candidates = infer_test_files(file_path)
261
+ for tc in test_candidates:
262
+ if tc not in seen_paths:
263
+ seen_paths.add(tc)
264
+ predictions.append(PrefetchResult(
265
+ path=tc,
266
+ reason="test_file",
267
+ confidence=0.50,
268
+ ))
269
+
270
+ # 3. Learned co-access patterns (confidence: normalized count)
271
+ if file_path in self._co_access:
272
+ co_counts = self._co_access[file_path]
273
+ if co_counts:
274
+ max_count = max(co_counts.values())
275
+ for co_path, count in co_counts.most_common(max_results):
276
+ if co_path not in seen_paths:
277
+ seen_paths.add(co_path)
278
+ confidence = min(count / max(max_count, 1), 1.0) * 0.80
279
+ predictions.append(PrefetchResult(
280
+ path=co_path,
281
+ reason="co_access",
282
+ confidence=round(confidence, 2),
283
+ ))
284
+
285
+ # Sort by confidence and limit results
286
+ predictions.sort(key=lambda p: p.confidence, reverse=True)
287
+ return predictions[:max_results]
288
+
289
+ def stats(self) -> dict:
290
+ total_pairs = sum(
291
+ len(targets) for targets in self._co_access.values()
292
+ )
293
+ return {
294
+ "tracked_files": len(self._co_access),
295
+ "co_access_pairs": total_pairs // 2, # Undirected
296
+ "recent_accesses": len(self._recent_accesses),
297
+ }