sharp-context 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sharp_context/__init__.py +27 -0
- sharp_context/checkpoint.py +295 -0
- sharp_context/config.py +72 -0
- sharp_context/dedup.py +239 -0
- sharp_context/entropy.py +277 -0
- sharp_context/knapsack.py +348 -0
- sharp_context/prefetch.py +297 -0
- sharp_context/server.py +624 -0
- sharp_context-0.1.0.dist-info/METADATA +201 -0
- sharp_context-0.1.0.dist-info/RECORD +12 -0
- sharp_context-0.1.0.dist-info/WHEEL +4 -0
- sharp_context-0.1.0.dist-info/entry_points.txt +2 -0
sharp_context/entropy.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shannon Entropy Scorer
|
|
3
|
+
======================
|
|
4
|
+
|
|
5
|
+
Measures the **information density** of each context fragment using
|
|
6
|
+
Shannon entropy, TF-IDF cross-fragment redundancy, and a novel
|
|
7
|
+
"information surprise" metric.
|
|
8
|
+
|
|
9
|
+
The core insight (from ICPC, arXiv 2025): not all tokens carry equal
|
|
10
|
+
information. Boilerplate code (`import os`, `def __init__`) has near-zero
|
|
11
|
+
entropy — it's predictable. But a specific error message or a novel
|
|
12
|
+
algorithm implementation has high entropy — it's surprising and informative.
|
|
13
|
+
|
|
14
|
+
Three scoring dimensions:
|
|
15
|
+
1. **Character-level Shannon entropy**: Raw information density
|
|
16
|
+
2. **Token-level surprisal**: How unexpected each token is given
|
|
17
|
+
the session's token distribution (self-information)
|
|
18
|
+
3. **Cross-fragment redundancy**: TF-IDF-style scoring that
|
|
19
|
+
penalizes fragments containing information already present
|
|
20
|
+
in other fragments
|
|
21
|
+
|
|
22
|
+
By combining these, we can rank fragments not just by recency or
|
|
23
|
+
relevance, but by how much UNIQUE INFORMATION they contribute.
|
|
24
|
+
|
|
25
|
+
References:
|
|
26
|
+
- Shannon, C. "A Mathematical Theory of Communication" (1948)
|
|
27
|
+
- ICPC: "In-context Prompt Compression" (arXiv 2025)
|
|
28
|
+
- LLMLingua: "Compressing Prompts for Accelerated Inference" (EMNLP 2023)
|
|
29
|
+
- ILRe: "Intermediate Layer Retrieval" (ICLR 2026)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import math
|
|
35
|
+
import re
|
|
36
|
+
from collections import Counter
|
|
37
|
+
from typing import Dict, List, Optional
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# ── Common boilerplate patterns (low information, high predictability) ──
|
|
41
|
+
_BOILERPLATE_PATTERNS = [
|
|
42
|
+
r"^\s*import\s+\w+",
|
|
43
|
+
r"^\s*from\s+\w+\s+import",
|
|
44
|
+
r"^\s*def\s+__\w+__\s*\(", # dunder methods
|
|
45
|
+
r"^\s*class\s+\w+\s*(\(.*\))?\s*:",
|
|
46
|
+
r"^\s*return\s+(None|self|True|False)\s*$",
|
|
47
|
+
r"^\s*pass\s*$",
|
|
48
|
+
r"^\s*\.\.\.\s*$",
|
|
49
|
+
r"^\s*#\s*(TODO|FIXME|HACK|XXX|NOTE)\b",
|
|
50
|
+
r"^\s*(\"\"\"|\'\'\')$",
|
|
51
|
+
r"^\s*\}\s*$",
|
|
52
|
+
r"^\s*\)\s*$",
|
|
53
|
+
r"^\s*\]\s*$",
|
|
54
|
+
]
|
|
55
|
+
_BOILERPLATE_RE = [re.compile(p) for p in _BOILERPLATE_PATTERNS]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def shannon_entropy(text: str) -> float:
|
|
59
|
+
"""
|
|
60
|
+
Compute the character-level Shannon entropy of a text string.
|
|
61
|
+
|
|
62
|
+
H(X) = -Σ p(xᵢ) · log₂(p(xᵢ))
|
|
63
|
+
|
|
64
|
+
Returns bits per character. English text averages ~4.2 bits/char.
|
|
65
|
+
Code typically ranges from 3.5 (boilerplate) to 5.5 (novel logic).
|
|
66
|
+
|
|
67
|
+
A perfectly random string of 256 possible bytes would have H = 8.0.
|
|
68
|
+
A string of all identical characters has H = 0.0.
|
|
69
|
+
"""
|
|
70
|
+
if not text:
|
|
71
|
+
return 0.0
|
|
72
|
+
|
|
73
|
+
counts = Counter(text)
|
|
74
|
+
length = len(text)
|
|
75
|
+
entropy = 0.0
|
|
76
|
+
|
|
77
|
+
for count in counts.values():
|
|
78
|
+
p = count / length
|
|
79
|
+
if p > 0:
|
|
80
|
+
entropy -= p * math.log2(p)
|
|
81
|
+
|
|
82
|
+
return entropy
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def normalized_entropy(text: str, max_entropy: float = 6.0) -> float:
|
|
86
|
+
"""
|
|
87
|
+
Normalize Shannon entropy to [0, 1] range.
|
|
88
|
+
|
|
89
|
+
Max entropy for source code is empirically ~6.0 bits/char.
|
|
90
|
+
Values above this are clipped to 1.0.
|
|
91
|
+
"""
|
|
92
|
+
if not text:
|
|
93
|
+
return 0.0
|
|
94
|
+
raw = shannon_entropy(text)
|
|
95
|
+
return min(raw / max_entropy, 1.0)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def boilerplate_ratio(text: str) -> float:
|
|
99
|
+
"""
|
|
100
|
+
Compute the fraction of lines that match common boilerplate patterns.
|
|
101
|
+
|
|
102
|
+
Returns a value in [0, 1]:
|
|
103
|
+
0.0 = no boilerplate (all novel code)
|
|
104
|
+
1.0 = all boilerplate (imports, pass, empty blocks)
|
|
105
|
+
|
|
106
|
+
This penalizes fragments full of imports or stub implementations
|
|
107
|
+
that contribute little unique information to the context.
|
|
108
|
+
"""
|
|
109
|
+
lines = text.strip().split("\n")
|
|
110
|
+
if not lines:
|
|
111
|
+
return 0.0
|
|
112
|
+
|
|
113
|
+
boilerplate_count = 0
|
|
114
|
+
for line in lines:
|
|
115
|
+
stripped = line.strip()
|
|
116
|
+
if not stripped:
|
|
117
|
+
continue
|
|
118
|
+
for pattern in _BOILERPLATE_RE:
|
|
119
|
+
if pattern.match(stripped):
|
|
120
|
+
boilerplate_count += 1
|
|
121
|
+
break
|
|
122
|
+
|
|
123
|
+
non_empty = sum(1 for l in lines if l.strip())
|
|
124
|
+
if non_empty == 0:
|
|
125
|
+
return 1.0
|
|
126
|
+
|
|
127
|
+
return boilerplate_count / non_empty
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def token_surprisal(
|
|
131
|
+
text: str,
|
|
132
|
+
global_token_counts: Dict[str, int],
|
|
133
|
+
total_tokens: int,
|
|
134
|
+
) -> float:
|
|
135
|
+
"""
|
|
136
|
+
Compute the average self-information (surprisal) of tokens in this
|
|
137
|
+
fragment relative to the global session distribution.
|
|
138
|
+
|
|
139
|
+
Surprisal of token t:
|
|
140
|
+
I(t) = -log₂(p(t))
|
|
141
|
+
where p(t) = count(t, global) / total_tokens
|
|
142
|
+
|
|
143
|
+
Tokens that are rare in the session (e.g., a specific variable name
|
|
144
|
+
that appears only in this fragment) have HIGH surprisal — they carry
|
|
145
|
+
the most information.
|
|
146
|
+
|
|
147
|
+
Tokens that are common (e.g., 'the', 'def', 'return') have LOW
|
|
148
|
+
surprisal — they're predictable and carry little information.
|
|
149
|
+
|
|
150
|
+
This is the dual of Shannon entropy: entropy measures the average
|
|
151
|
+
surprise of a SOURCE, while surprisal measures the surprise of
|
|
152
|
+
individual TOKENS relative to a background distribution.
|
|
153
|
+
|
|
154
|
+
Reference: Shannon (1948), Information Theory applied to lexical
|
|
155
|
+
diversity measurement.
|
|
156
|
+
"""
|
|
157
|
+
if total_tokens == 0 or not text:
|
|
158
|
+
return 0.0
|
|
159
|
+
|
|
160
|
+
# Simple whitespace tokenization (fast, no dependencies)
|
|
161
|
+
tokens = text.lower().split()
|
|
162
|
+
if not tokens:
|
|
163
|
+
return 0.0
|
|
164
|
+
|
|
165
|
+
total_surprisal = 0.0
|
|
166
|
+
for token in tokens:
|
|
167
|
+
count = global_token_counts.get(token, 0)
|
|
168
|
+
if count == 0:
|
|
169
|
+
# Unseen token → maximum surprisal (capped)
|
|
170
|
+
total_surprisal += 20.0 # ~2^20 = 1M vocabulary
|
|
171
|
+
else:
|
|
172
|
+
p = count / total_tokens
|
|
173
|
+
total_surprisal += -math.log2(p)
|
|
174
|
+
|
|
175
|
+
return total_surprisal / len(tokens)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def cross_fragment_redundancy(
|
|
179
|
+
fragment_text: str,
|
|
180
|
+
other_fragments: List[str],
|
|
181
|
+
ngram_size: int = 3,
|
|
182
|
+
) -> float:
|
|
183
|
+
"""
|
|
184
|
+
Compute how much of this fragment's information is already present
|
|
185
|
+
in other fragments using n-gram overlap (TF-IDF inspired).
|
|
186
|
+
|
|
187
|
+
Returns a value in [0, 1]:
|
|
188
|
+
0.0 = completely unique information (no overlap with others)
|
|
189
|
+
1.0 = completely redundant (all n-grams found in other fragments)
|
|
190
|
+
|
|
191
|
+
Uses word-level n-grams (trigrams by default) for a balance between
|
|
192
|
+
exact matching (too strict) and unigram matching (too loose).
|
|
193
|
+
|
|
194
|
+
This catches cases where the same function definition appears in
|
|
195
|
+
multiple tool results — a common source of context bloat.
|
|
196
|
+
"""
|
|
197
|
+
if not fragment_text or not other_fragments:
|
|
198
|
+
return 0.0
|
|
199
|
+
|
|
200
|
+
# Extract n-grams from this fragment
|
|
201
|
+
words = fragment_text.lower().split()
|
|
202
|
+
if len(words) < ngram_size:
|
|
203
|
+
return 0.0
|
|
204
|
+
|
|
205
|
+
fragment_ngrams = set()
|
|
206
|
+
for i in range(len(words) - ngram_size + 1):
|
|
207
|
+
fragment_ngrams.add(tuple(words[i : i + ngram_size]))
|
|
208
|
+
|
|
209
|
+
if not fragment_ngrams:
|
|
210
|
+
return 0.0
|
|
211
|
+
|
|
212
|
+
# Build n-gram set from all other fragments
|
|
213
|
+
other_ngrams = set()
|
|
214
|
+
for other in other_fragments:
|
|
215
|
+
other_words = other.lower().split()
|
|
216
|
+
for i in range(len(other_words) - ngram_size + 1):
|
|
217
|
+
other_ngrams.add(tuple(other_words[i : i + ngram_size]))
|
|
218
|
+
|
|
219
|
+
# Compute overlap ratio
|
|
220
|
+
overlap = fragment_ngrams & other_ngrams
|
|
221
|
+
return len(overlap) / len(fragment_ngrams)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def compute_information_score(
|
|
225
|
+
text: str,
|
|
226
|
+
global_token_counts: Optional[Dict[str, int]] = None,
|
|
227
|
+
total_tokens: int = 0,
|
|
228
|
+
other_fragments: Optional[List[str]] = None,
|
|
229
|
+
) -> float:
|
|
230
|
+
"""
|
|
231
|
+
Compute the final information density score for a context fragment.
|
|
232
|
+
|
|
233
|
+
Combines three metrics:
|
|
234
|
+
1. Shannon entropy (40% weight) — raw information density
|
|
235
|
+
2. Boilerplate penalty (30% weight) — penalizes predictable code
|
|
236
|
+
3. Cross-fragment redundancy (30% weight) — penalizes duplicated info
|
|
237
|
+
|
|
238
|
+
If global token counts are available, token surprisal is blended
|
|
239
|
+
into the Shannon entropy component for a more session-aware score.
|
|
240
|
+
|
|
241
|
+
Returns a value in [0, 1] where:
|
|
242
|
+
1.0 = maximally informative, unique, surprising content
|
|
243
|
+
0.0 = empty, boilerplate, or fully redundant content
|
|
244
|
+
"""
|
|
245
|
+
if not text.strip():
|
|
246
|
+
return 0.0
|
|
247
|
+
|
|
248
|
+
# 1. Shannon entropy (normalized)
|
|
249
|
+
ent = normalized_entropy(text)
|
|
250
|
+
|
|
251
|
+
# Blend with token surprisal if available
|
|
252
|
+
if global_token_counts and total_tokens > 0:
|
|
253
|
+
surprisal = token_surprisal(text, global_token_counts, total_tokens)
|
|
254
|
+
# Normalize surprisal (typical range 5-15, cap at 20)
|
|
255
|
+
norm_surprisal = min(surprisal / 20.0, 1.0)
|
|
256
|
+
# 60% entropy, 40% surprisal
|
|
257
|
+
ent = 0.6 * ent + 0.4 * norm_surprisal
|
|
258
|
+
|
|
259
|
+
# 2. Boilerplate penalty
|
|
260
|
+
bp = boilerplate_ratio(text)
|
|
261
|
+
boilerplate_score = 1.0 - bp # Invert: low boilerplate → high score
|
|
262
|
+
|
|
263
|
+
# 3. Cross-fragment redundancy
|
|
264
|
+
if other_fragments:
|
|
265
|
+
redundancy = cross_fragment_redundancy(text, other_fragments)
|
|
266
|
+
uniqueness_score = 1.0 - redundancy
|
|
267
|
+
else:
|
|
268
|
+
uniqueness_score = 1.0
|
|
269
|
+
|
|
270
|
+
# Weighted combination
|
|
271
|
+
score = (
|
|
272
|
+
0.40 * ent
|
|
273
|
+
+ 0.30 * boilerplate_score
|
|
274
|
+
+ 0.30 * uniqueness_score
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
return round(max(0.0, min(1.0, score)), 4)
|
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knapsack Context Optimizer
|
|
3
|
+
==========================
|
|
4
|
+
|
|
5
|
+
Solves the **0/1 Knapsack Problem** to select the mathematically optimal
|
|
6
|
+
subset of context fragments that fit within a token budget.
|
|
7
|
+
|
|
8
|
+
Every existing AI coding tool does naive FIFO truncation — cut from the top
|
|
9
|
+
when the context is full. SharpContext treats this as a constrained
|
|
10
|
+
optimization problem and solves it optimally.
|
|
11
|
+
|
|
12
|
+
Mathematical Foundation:
|
|
13
|
+
Given N fragments, each with token cost c(i) and relevance score r(i),
|
|
14
|
+
and a total token budget B, we want to:
|
|
15
|
+
|
|
16
|
+
Maximize: Σ r(i) · x(i) for i in [1..N]
|
|
17
|
+
Subject to: Σ c(i) · x(i) ≤ B
|
|
18
|
+
where x(i) ∈ {0, 1}
|
|
19
|
+
|
|
20
|
+
This is the classic 0/1 Knapsack Problem.
|
|
21
|
+
|
|
22
|
+
For typical coding sessions (N ≈ 500 fragments, B ≈ 128K tokens),
|
|
23
|
+
the DP solution runs in under 1ms.
|
|
24
|
+
|
|
25
|
+
For very large sessions (N > 2000), we fall back to a greedy
|
|
26
|
+
approximation with a provable 0.5 optimality guarantee.
|
|
27
|
+
|
|
28
|
+
References:
|
|
29
|
+
- Kellerer, Pferschy, Pisinger. "Knapsack Problems" (Springer, 2004)
|
|
30
|
+
- Dantzig. "Discrete-Variable Extremum Problems" (Operations Research, 1957)
|
|
31
|
+
- ICPC (arXiv 2025) — per-token information content scoring
|
|
32
|
+
- Active Context Compression for LLM Agents (arXiv 2025)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import math
|
|
38
|
+
from dataclasses import dataclass, field
|
|
39
|
+
from typing import List, Optional, Tuple
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class ContextFragment:
|
|
44
|
+
"""A single piece of context (code snippet, file content, tool result, etc.)."""
|
|
45
|
+
|
|
46
|
+
fragment_id: str
|
|
47
|
+
"""Unique identifier for this fragment."""
|
|
48
|
+
|
|
49
|
+
content: str
|
|
50
|
+
"""The raw text content."""
|
|
51
|
+
|
|
52
|
+
token_count: int
|
|
53
|
+
"""Number of tokens in this fragment (pre-computed)."""
|
|
54
|
+
|
|
55
|
+
source: str = ""
|
|
56
|
+
"""Origin label (e.g., 'file:utils.py', 'tool:grep', 'user:message')."""
|
|
57
|
+
|
|
58
|
+
# ── Scoring components (all normalized to [0, 1]) ──────────────────
|
|
59
|
+
recency_score: float = 1.0
|
|
60
|
+
"""1.0 = just accessed, decays toward 0 over turns (Ebbinghaus)."""
|
|
61
|
+
|
|
62
|
+
frequency_score: float = 0.0
|
|
63
|
+
"""Normalized access frequency (0 = never, 1 = most accessed)."""
|
|
64
|
+
|
|
65
|
+
semantic_score: float = 0.0
|
|
66
|
+
"""Cosine similarity to the current query/task (via SimHash)."""
|
|
67
|
+
|
|
68
|
+
entropy_score: float = 0.5
|
|
69
|
+
"""Shannon entropy normalized to [0, 1]. High = unique info."""
|
|
70
|
+
|
|
71
|
+
# ── Metadata ────────────────────────────────────────────────────────
|
|
72
|
+
turn_created: int = 0
|
|
73
|
+
"""Turn number when this fragment was first added."""
|
|
74
|
+
|
|
75
|
+
turn_last_accessed: int = 0
|
|
76
|
+
"""Turn number when this fragment was last accessed."""
|
|
77
|
+
|
|
78
|
+
access_count: int = 0
|
|
79
|
+
"""Total number of times this fragment was accessed."""
|
|
80
|
+
|
|
81
|
+
is_pinned: bool = False
|
|
82
|
+
"""If True, this fragment is always included (user override)."""
|
|
83
|
+
|
|
84
|
+
simhash: int = 0
|
|
85
|
+
"""SimHash fingerprint for O(1) deduplication."""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def compute_relevance(
|
|
89
|
+
frag: ContextFragment,
|
|
90
|
+
w_recency: float = 0.30,
|
|
91
|
+
w_frequency: float = 0.25,
|
|
92
|
+
w_semantic: float = 0.25,
|
|
93
|
+
w_entropy: float = 0.20,
|
|
94
|
+
) -> float:
|
|
95
|
+
"""
|
|
96
|
+
Compute the composite relevance score for a fragment.
|
|
97
|
+
|
|
98
|
+
The score is a weighted combination of four dimensions:
|
|
99
|
+
- Recency: How recently was this fragment accessed?
|
|
100
|
+
- Frequency: How often has this fragment been accessed?
|
|
101
|
+
- Semantic: How similar is this fragment to the current query?
|
|
102
|
+
- Entropy: How much unique information does this fragment contain?
|
|
103
|
+
|
|
104
|
+
All weights must sum to 1.0 (enforced via normalization).
|
|
105
|
+
|
|
106
|
+
Inspired by the ICPC (In-context Prompt Compression) paper (arXiv 2025)
|
|
107
|
+
which shows that per-token information content is a better predictor
|
|
108
|
+
of importance than position or recency alone.
|
|
109
|
+
"""
|
|
110
|
+
total_weight = w_recency + w_frequency + w_semantic + w_entropy
|
|
111
|
+
if total_weight == 0:
|
|
112
|
+
return 0.0
|
|
113
|
+
|
|
114
|
+
score = (
|
|
115
|
+
w_recency * frag.recency_score
|
|
116
|
+
+ w_frequency * frag.frequency_score
|
|
117
|
+
+ w_semantic * frag.semantic_score
|
|
118
|
+
+ w_entropy * frag.entropy_score
|
|
119
|
+
) / total_weight
|
|
120
|
+
|
|
121
|
+
return score
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def knapsack_optimize(
|
|
125
|
+
fragments: List[ContextFragment],
|
|
126
|
+
token_budget: int,
|
|
127
|
+
w_recency: float = 0.30,
|
|
128
|
+
w_frequency: float = 0.25,
|
|
129
|
+
w_semantic: float = 0.25,
|
|
130
|
+
w_entropy: float = 0.20,
|
|
131
|
+
) -> Tuple[List[ContextFragment], dict]:
|
|
132
|
+
"""
|
|
133
|
+
Select the optimal subset of context fragments that fits within
|
|
134
|
+
the token budget while maximizing total relevance.
|
|
135
|
+
|
|
136
|
+
Algorithm Selection:
|
|
137
|
+
- N ≤ 2000 → Exact DP (O(N·B) with quantized budget)
|
|
138
|
+
- N > 2000 → Greedy with density sorting (O(N·log N))
|
|
139
|
+
|
|
140
|
+
The DP solution uses budget quantization to keep the table small:
|
|
141
|
+
we divide the budget into 1000 bins, solving a coarser problem
|
|
142
|
+
that's still ~99.9% optimal but runs in constant memory.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
fragments: List of context fragments to choose from.
|
|
146
|
+
token_budget: Maximum total tokens allowed.
|
|
147
|
+
w_*: Weight parameters for the relevance scoring function.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
(selected_fragments, stats_dict)
|
|
151
|
+
where stats_dict contains optimization metrics.
|
|
152
|
+
"""
|
|
153
|
+
if not fragments:
|
|
154
|
+
return [], {"total_tokens": 0, "total_relevance": 0.0, "method": "empty"}
|
|
155
|
+
|
|
156
|
+
# Separate pinned fragments (always included)
|
|
157
|
+
pinned = [f for f in fragments if f.is_pinned]
|
|
158
|
+
candidates = [f for f in fragments if not f.is_pinned]
|
|
159
|
+
|
|
160
|
+
pinned_tokens = sum(f.token_count for f in pinned)
|
|
161
|
+
remaining_budget = token_budget - pinned_tokens
|
|
162
|
+
|
|
163
|
+
if remaining_budget <= 0:
|
|
164
|
+
# Budget exhausted by pinned fragments alone
|
|
165
|
+
return pinned, {
|
|
166
|
+
"total_tokens": pinned_tokens,
|
|
167
|
+
"total_relevance": sum(
|
|
168
|
+
compute_relevance(f, w_recency, w_frequency, w_semantic, w_entropy)
|
|
169
|
+
for f in pinned
|
|
170
|
+
),
|
|
171
|
+
"method": "pinned_only",
|
|
172
|
+
"candidates_evaluated": 0,
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
# Compute relevance scores
|
|
176
|
+
scored = []
|
|
177
|
+
for frag in candidates:
|
|
178
|
+
rel = compute_relevance(frag, w_recency, w_frequency, w_semantic, w_entropy)
|
|
179
|
+
if rel > 0 and frag.token_count > 0:
|
|
180
|
+
scored.append((frag, rel))
|
|
181
|
+
|
|
182
|
+
if not scored:
|
|
183
|
+
return pinned, {
|
|
184
|
+
"total_tokens": pinned_tokens,
|
|
185
|
+
"total_relevance": 0.0,
|
|
186
|
+
"method": "no_candidates",
|
|
187
|
+
"candidates_evaluated": 0,
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
n = len(scored)
|
|
191
|
+
|
|
192
|
+
if n <= 2000:
|
|
193
|
+
selected = _knapsack_dp(scored, remaining_budget)
|
|
194
|
+
method = "exact_dp"
|
|
195
|
+
else:
|
|
196
|
+
selected = _knapsack_greedy(scored, remaining_budget)
|
|
197
|
+
method = "greedy_approx"
|
|
198
|
+
|
|
199
|
+
result = pinned + selected
|
|
200
|
+
total_tokens = sum(f.token_count for f in result)
|
|
201
|
+
total_relevance = sum(
|
|
202
|
+
compute_relevance(f, w_recency, w_frequency, w_semantic, w_entropy)
|
|
203
|
+
for f in result
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
return result, {
|
|
207
|
+
"total_tokens": total_tokens,
|
|
208
|
+
"total_relevance": round(total_relevance, 4),
|
|
209
|
+
"method": method,
|
|
210
|
+
"candidates_evaluated": n,
|
|
211
|
+
"selected_count": len(selected),
|
|
212
|
+
"pinned_count": len(pinned),
|
|
213
|
+
"budget_utilization": round(total_tokens / token_budget, 4) if token_budget > 0 else 0,
|
|
214
|
+
"tokens_saved": token_budget - total_tokens,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _knapsack_dp(
|
|
219
|
+
scored: List[Tuple[ContextFragment, float]],
|
|
220
|
+
budget: int,
|
|
221
|
+
) -> List[ContextFragment]:
|
|
222
|
+
"""
|
|
223
|
+
Exact 0/1 knapsack via dynamic programming with budget quantization.
|
|
224
|
+
|
|
225
|
+
We quantize the budget into Q bins (default 1000) so the DP table
|
|
226
|
+
is at most N × Q instead of N × B. This reduces memory from
|
|
227
|
+
~500MB (for 128K budget) to ~8MB while losing <0.1% optimality.
|
|
228
|
+
|
|
229
|
+
The quantization granularity is:
|
|
230
|
+
g = max(1, budget // Q)
|
|
231
|
+
|
|
232
|
+
Each fragment's cost is rounded up to the nearest multiple of g.
|
|
233
|
+
This ensures we never exceed the real budget (conservative rounding).
|
|
234
|
+
"""
|
|
235
|
+
Q = 1000 # Number of budget bins
|
|
236
|
+
g = max(1, budget // Q) # Granularity
|
|
237
|
+
quantized_budget = budget // g
|
|
238
|
+
|
|
239
|
+
n = len(scored)
|
|
240
|
+
|
|
241
|
+
# Scale relevance to integers for DP precision (multiply by 10000)
|
|
242
|
+
items = []
|
|
243
|
+
for frag, rel in scored:
|
|
244
|
+
quantized_cost = (frag.token_count + g - 1) // g # Ceiling division
|
|
245
|
+
if quantized_cost <= quantized_budget:
|
|
246
|
+
items.append((frag, int(rel * 10000), quantized_cost))
|
|
247
|
+
|
|
248
|
+
if not items:
|
|
249
|
+
return []
|
|
250
|
+
|
|
251
|
+
n = len(items)
|
|
252
|
+
|
|
253
|
+
# DP with rolling array (only need previous row)
|
|
254
|
+
prev = [0] * (quantized_budget + 1)
|
|
255
|
+
curr = [0] * (quantized_budget + 1)
|
|
256
|
+
|
|
257
|
+
# Track selections with bit arrays (memory-efficient)
|
|
258
|
+
# For N ≤ 2000, this is manageable
|
|
259
|
+
keep = [[False] * (quantized_budget + 1) for _ in range(n)]
|
|
260
|
+
|
|
261
|
+
for i in range(n):
|
|
262
|
+
_, value, cost = items[i]
|
|
263
|
+
for w in range(quantized_budget + 1):
|
|
264
|
+
if cost <= w and prev[w - cost] + value > prev[w]:
|
|
265
|
+
curr[w] = prev[w - cost] + value
|
|
266
|
+
keep[i][w] = True
|
|
267
|
+
else:
|
|
268
|
+
curr[w] = prev[w]
|
|
269
|
+
prev, curr = curr, [0] * (quantized_budget + 1)
|
|
270
|
+
|
|
271
|
+
# Backtrack to find selected items
|
|
272
|
+
selected = []
|
|
273
|
+
w = quantized_budget
|
|
274
|
+
for i in range(n - 1, -1, -1):
|
|
275
|
+
if keep[i][w]:
|
|
276
|
+
frag, _, cost = items[i]
|
|
277
|
+
selected.append(frag)
|
|
278
|
+
w -= cost
|
|
279
|
+
|
|
280
|
+
return selected
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _knapsack_greedy(
|
|
284
|
+
scored: List[Tuple[ContextFragment, float]],
|
|
285
|
+
budget: int,
|
|
286
|
+
) -> List[ContextFragment]:
|
|
287
|
+
"""
|
|
288
|
+
Greedy approximation for large fragment sets (N > 2000).
|
|
289
|
+
|
|
290
|
+
Sorts by relevance density (relevance / token_cost) and greedily
|
|
291
|
+
selects fragments until the budget is exhausted.
|
|
292
|
+
|
|
293
|
+
This has a provable 0.5 optimality guarantee for the 0/1 knapsack.
|
|
294
|
+
In practice, it's typically within 95% of optimal for context
|
|
295
|
+
selection because fragment sizes don't vary as wildly as in
|
|
296
|
+
classical knapsack instances.
|
|
297
|
+
|
|
298
|
+
Reference: Dantzig (1957) — the fractional relaxation bound.
|
|
299
|
+
"""
|
|
300
|
+
# Sort by density (relevance per token), highest first
|
|
301
|
+
density_sorted = sorted(
|
|
302
|
+
scored,
|
|
303
|
+
key=lambda x: x[1] / max(x[0].token_count, 1),
|
|
304
|
+
reverse=True,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
selected = []
|
|
308
|
+
remaining = budget
|
|
309
|
+
|
|
310
|
+
for frag, rel in density_sorted:
|
|
311
|
+
if frag.token_count <= remaining:
|
|
312
|
+
selected.append(frag)
|
|
313
|
+
remaining -= frag.token_count
|
|
314
|
+
|
|
315
|
+
if remaining <= 0:
|
|
316
|
+
break
|
|
317
|
+
|
|
318
|
+
return selected
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def apply_ebbinghaus_decay(
|
|
322
|
+
fragments: List[ContextFragment],
|
|
323
|
+
current_turn: int,
|
|
324
|
+
half_life: int = 15,
|
|
325
|
+
) -> None:
|
|
326
|
+
"""
|
|
327
|
+
Apply Ebbinghaus forgetting curve to fragment recency scores.
|
|
328
|
+
|
|
329
|
+
The recency score decays exponentially based on turns since last access:
|
|
330
|
+
|
|
331
|
+
recency(t) = exp(-λ · Δt)
|
|
332
|
+
where λ = ln(2) / half_life
|
|
333
|
+
and Δt = current_turn - turn_last_accessed
|
|
334
|
+
|
|
335
|
+
This models the psychological Ebbinghaus forgetting curve but applied
|
|
336
|
+
to context fragments rather than human memories.
|
|
337
|
+
|
|
338
|
+
Fragments accessed frequently get a frequency boost that counteracts
|
|
339
|
+
the decay (spaced repetition effect).
|
|
340
|
+
|
|
341
|
+
Reference: Ebbinghaus, H. "Memory: A Contribution to Experimental
|
|
342
|
+
Psychology" (1885), as applied in our hippocampus-sharp-memory engine.
|
|
343
|
+
"""
|
|
344
|
+
decay_rate = math.log(2) / max(half_life, 1)
|
|
345
|
+
|
|
346
|
+
for frag in fragments:
|
|
347
|
+
dt = max(0, current_turn - frag.turn_last_accessed)
|
|
348
|
+
frag.recency_score = math.exp(-decay_rate * dt)
|