pyagent-context 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagent_context/__init__.py +29 -0
- pyagent_context/compression.py +132 -0
- pyagent_context/item.py +132 -0
- pyagent_context/ledger.py +136 -0
- pyagent_context/lifecycle.py +157 -0
- pyagent_context/memory/__init__.py +12 -0
- pyagent_context/memory/semantic.py +153 -0
- pyagent_context/memory/session.py +151 -0
- pyagent_context/memory/working.py +69 -0
- pyagent_context/py.typed +0 -0
- pyagent_context/redaction.py +71 -0
- pyagent_context/retrieval.py +121 -0
- pyagent_context-0.1.0.dist-info/METADATA +286 -0
- pyagent_context-0.1.0.dist-info/RECORD +15 -0
- pyagent_context-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""SemanticMemory: vector-indexed long-term store (Protocol + in-memory TF-IDF impl)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Protocol, runtime_checkable
|
|
9
|
+
|
|
10
|
+
from pyagent_context.item import ContextItem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class SearchResult:
|
|
15
|
+
"""Result of a semantic search.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
item: The matching context item.
|
|
19
|
+
score: Relevance score (0.0–1.0).
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
item: ContextItem
|
|
23
|
+
score: float
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@runtime_checkable
|
|
27
|
+
class SemanticMemoryProtocol(Protocol):
|
|
28
|
+
"""Interface for any vector-backed semantic memory store."""
|
|
29
|
+
|
|
30
|
+
def add(self, item: ContextItem) -> None:
|
|
31
|
+
"""Index an item for later retrieval."""
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
|
|
35
|
+
"""Find the most relevant items for a query."""
|
|
36
|
+
...
|
|
37
|
+
|
|
38
|
+
def remove(self, item_id: str) -> bool:
|
|
39
|
+
"""Remove an item by ID. Returns True if found."""
|
|
40
|
+
...
|
|
41
|
+
|
|
42
|
+
def clear(self) -> None:
|
|
43
|
+
"""Remove all items."""
|
|
44
|
+
...
|
|
45
|
+
|
|
46
|
+
def __len__(self) -> int: ...
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InMemorySemanticStore:
|
|
50
|
+
"""In-memory semantic store using TF-IDF cosine similarity.
|
|
51
|
+
|
|
52
|
+
No external dependencies — suitable for testing and small datasets.
|
|
53
|
+
For production, use a vector DB adapter (ChromaDB, Pinecone, etc.).
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
stop_words: Optional set of words to ignore in scoring.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(self, stop_words: set[str] | None = None) -> None:
|
|
60
|
+
self._items: dict[str, ContextItem] = {}
|
|
61
|
+
self._tf_cache: dict[str, dict[str, float]] = {}
|
|
62
|
+
self._stop_words = stop_words or {
|
|
63
|
+
"a", "an", "the", "is", "are", "was", "were", "be", "been",
|
|
64
|
+
"being", "have", "has", "had", "do", "does", "did", "will",
|
|
65
|
+
"would", "could", "should", "may", "might", "can", "shall",
|
|
66
|
+
"to", "of", "in", "for", "on", "with", "at", "by", "from",
|
|
67
|
+
"as", "into", "through", "during", "before", "after", "and",
|
|
68
|
+
"but", "or", "nor", "not", "so", "yet", "both", "either",
|
|
69
|
+
"neither", "each", "every", "all", "any", "few", "more",
|
|
70
|
+
"most", "other", "some", "such", "no", "only", "own", "same",
|
|
71
|
+
"than", "too", "very", "just", "because", "about", "between",
|
|
72
|
+
"it", "its", "this", "that", "these", "those", "i", "me",
|
|
73
|
+
"my", "we", "our", "you", "your", "he", "him", "his", "she",
|
|
74
|
+
"her", "they", "them", "their", "what", "which", "who",
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def add(self, item: ContextItem) -> None:
|
|
78
|
+
self._items[item.id] = item
|
|
79
|
+
self._tf_cache[item.id] = self._compute_tf(item.content)
|
|
80
|
+
|
|
81
|
+
def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
|
|
82
|
+
if not self._items:
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
query_tf = self._compute_tf(query)
|
|
86
|
+
idf = self._compute_idf()
|
|
87
|
+
|
|
88
|
+
query_tfidf = {w: tf * idf.get(w, 0.0) for w, tf in query_tf.items()}
|
|
89
|
+
|
|
90
|
+
results: list[SearchResult] = []
|
|
91
|
+
for item_id, item in self._items.items():
|
|
92
|
+
doc_tf = self._tf_cache.get(item_id, {})
|
|
93
|
+
doc_tfidf = {w: tf * idf.get(w, 0.0) for w, tf in doc_tf.items()}
|
|
94
|
+
score = self._cosine_similarity(query_tfidf, doc_tfidf)
|
|
95
|
+
if score > 0:
|
|
96
|
+
results.append(SearchResult(item=item, score=score))
|
|
97
|
+
|
|
98
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
99
|
+
return results[:top_k]
|
|
100
|
+
|
|
101
|
+
def remove(self, item_id: str) -> bool:
|
|
102
|
+
if item_id in self._items:
|
|
103
|
+
del self._items[item_id]
|
|
104
|
+
self._tf_cache.pop(item_id, None)
|
|
105
|
+
return True
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
def clear(self) -> None:
|
|
109
|
+
self._items.clear()
|
|
110
|
+
self._tf_cache.clear()
|
|
111
|
+
|
|
112
|
+
def __len__(self) -> int:
|
|
113
|
+
return len(self._items)
|
|
114
|
+
|
|
115
|
+
# ------------------------------------------------------------------
|
|
116
|
+
# TF-IDF helpers
|
|
117
|
+
# ------------------------------------------------------------------
|
|
118
|
+
|
|
119
|
+
def _tokenize(self, text: str) -> list[str]:
|
|
120
|
+
words = text.lower().split()
|
|
121
|
+
return [w.strip(".,!?;:\"'()[]{}") for w in words if w.strip(".,!?;:\"'()[]{}") not in self._stop_words]
|
|
122
|
+
|
|
123
|
+
def _compute_tf(self, text: str) -> dict[str, float]:
|
|
124
|
+
tokens = self._tokenize(text)
|
|
125
|
+
if not tokens:
|
|
126
|
+
return {}
|
|
127
|
+
counts = Counter(tokens)
|
|
128
|
+
total = len(tokens)
|
|
129
|
+
return {word: count / total for word, count in counts.items()}
|
|
130
|
+
|
|
131
|
+
def _compute_idf(self) -> dict[str, float]:
|
|
132
|
+
n_docs = len(self._items)
|
|
133
|
+
if n_docs == 0:
|
|
134
|
+
return {}
|
|
135
|
+
doc_freq: Counter[str] = Counter()
|
|
136
|
+
for tf in self._tf_cache.values():
|
|
137
|
+
for word in tf:
|
|
138
|
+
doc_freq[word] += 1
|
|
139
|
+
return {word: math.log(n_docs / (1 + freq)) for word, freq in doc_freq.items()}
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def _cosine_similarity(a: dict[str, float], b: dict[str, float]) -> float:
|
|
143
|
+
if not a or not b:
|
|
144
|
+
return 0.0
|
|
145
|
+
common_keys = set(a) & set(b)
|
|
146
|
+
if not common_keys:
|
|
147
|
+
return 0.0
|
|
148
|
+
dot = sum(a[k] * b[k] for k in common_keys)
|
|
149
|
+
mag_a = math.sqrt(sum(v ** 2 for v in a.values()))
|
|
150
|
+
mag_b = math.sqrt(sum(v ** 2 for v in b.values()))
|
|
151
|
+
if mag_a == 0 or mag_b == 0:
|
|
152
|
+
return 0.0
|
|
153
|
+
return dot / (mag_a * mag_b)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""SessionMemory: persisted across turns via JSON file or SQLite."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import sqlite3
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from pyagent_context.item import ContextItem
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SessionMemory:
|
|
14
|
+
"""Session-scoped memory that persists across turns.
|
|
15
|
+
|
|
16
|
+
Supports two backends:
|
|
17
|
+
- ``json``: simple JSON file per session (default)
|
|
18
|
+
- ``sqlite``: SQLite database for concurrent-safe access
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
session_id: Unique session identifier.
|
|
22
|
+
backend: ``"json"`` or ``"sqlite"``.
|
|
23
|
+
storage_path: Directory for storage files. Defaults to ``".pyagent_sessions"``.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
session_id: str,
|
|
29
|
+
backend: str = "json",
|
|
30
|
+
storage_path: str | Path = ".pyagent_sessions",
|
|
31
|
+
) -> None:
|
|
32
|
+
self._session_id = session_id
|
|
33
|
+
self._backend = backend
|
|
34
|
+
self._storage_path = Path(storage_path)
|
|
35
|
+
self._storage_path.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
self._items: list[ContextItem] = []
|
|
37
|
+
self._loaded = False
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def session_id(self) -> str:
|
|
41
|
+
return self._session_id
|
|
42
|
+
|
|
43
|
+
def add(self, item: ContextItem) -> None:
|
|
44
|
+
"""Add an item to the session."""
|
|
45
|
+
self._ensure_loaded()
|
|
46
|
+
self._items.append(item)
|
|
47
|
+
|
|
48
|
+
def get_all(self) -> list[ContextItem]:
|
|
49
|
+
"""Return all items in this session."""
|
|
50
|
+
self._ensure_loaded()
|
|
51
|
+
return list(self._items)
|
|
52
|
+
|
|
53
|
+
def save(self) -> None:
|
|
54
|
+
"""Persist current items to storage."""
|
|
55
|
+
if self._backend == "sqlite":
|
|
56
|
+
self._save_sqlite()
|
|
57
|
+
else:
|
|
58
|
+
self._save_json()
|
|
59
|
+
|
|
60
|
+
def load(self) -> None:
|
|
61
|
+
"""Load items from storage."""
|
|
62
|
+
if self._backend == "sqlite":
|
|
63
|
+
self._load_sqlite()
|
|
64
|
+
else:
|
|
65
|
+
self._load_json()
|
|
66
|
+
self._loaded = True
|
|
67
|
+
|
|
68
|
+
def clear(self) -> None:
|
|
69
|
+
"""Remove all items and delete persisted data."""
|
|
70
|
+
self._items.clear()
|
|
71
|
+
path = self._file_path
|
|
72
|
+
if path.exists():
|
|
73
|
+
path.unlink()
|
|
74
|
+
|
|
75
|
+
# ------------------------------------------------------------------
|
|
76
|
+
# JSON backend
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
|
|
79
|
+
def _save_json(self) -> None:
|
|
80
|
+
path = self._file_path
|
|
81
|
+
data = [item.to_dict() for item in self._items]
|
|
82
|
+
path.write_text(json.dumps(data, indent=2))
|
|
83
|
+
|
|
84
|
+
def _load_json(self) -> None:
|
|
85
|
+
path = self._file_path
|
|
86
|
+
if path.exists():
|
|
87
|
+
data = json.loads(path.read_text())
|
|
88
|
+
self._items = [ContextItem.from_dict(d) for d in data]
|
|
89
|
+
else:
|
|
90
|
+
self._items = []
|
|
91
|
+
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
# SQLite backend
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def _save_sqlite(self) -> None:
|
|
97
|
+
db_path = self._storage_path / "sessions.db"
|
|
98
|
+
conn = sqlite3.connect(str(db_path))
|
|
99
|
+
try:
|
|
100
|
+
conn.execute(
|
|
101
|
+
"CREATE TABLE IF NOT EXISTS context_items "
|
|
102
|
+
"(session_id TEXT, item_id TEXT PRIMARY KEY, data TEXT)"
|
|
103
|
+
)
|
|
104
|
+
conn.execute(
|
|
105
|
+
"DELETE FROM context_items WHERE session_id = ?",
|
|
106
|
+
(self._session_id,),
|
|
107
|
+
)
|
|
108
|
+
for item in self._items:
|
|
109
|
+
conn.execute(
|
|
110
|
+
"INSERT INTO context_items (session_id, item_id, data) VALUES (?, ?, ?)",
|
|
111
|
+
(self._session_id, item.id, json.dumps(item.to_dict())),
|
|
112
|
+
)
|
|
113
|
+
conn.commit()
|
|
114
|
+
finally:
|
|
115
|
+
conn.close()
|
|
116
|
+
|
|
117
|
+
def _load_sqlite(self) -> None:
|
|
118
|
+
db_path = self._storage_path / "sessions.db"
|
|
119
|
+
if not db_path.exists():
|
|
120
|
+
self._items = []
|
|
121
|
+
return
|
|
122
|
+
conn = sqlite3.connect(str(db_path))
|
|
123
|
+
try:
|
|
124
|
+
conn.execute(
|
|
125
|
+
"CREATE TABLE IF NOT EXISTS context_items "
|
|
126
|
+
"(session_id TEXT, item_id TEXT PRIMARY KEY, data TEXT)"
|
|
127
|
+
)
|
|
128
|
+
cursor = conn.execute(
|
|
129
|
+
"SELECT data FROM context_items WHERE session_id = ? ORDER BY rowid",
|
|
130
|
+
(self._session_id,),
|
|
131
|
+
)
|
|
132
|
+
self._items = [ContextItem.from_dict(json.loads(row[0])) for row in cursor]
|
|
133
|
+
finally:
|
|
134
|
+
conn.close()
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Helpers
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def _file_path(self) -> Path:
|
|
142
|
+
suffix = ".db" if self._backend == "sqlite" else ".json"
|
|
143
|
+
return self._storage_path / f"{self._session_id}{suffix}"
|
|
144
|
+
|
|
145
|
+
def _ensure_loaded(self) -> None:
|
|
146
|
+
if not self._loaded:
|
|
147
|
+
self.load()
|
|
148
|
+
|
|
149
|
+
def __len__(self) -> int:
|
|
150
|
+
self._ensure_loaded()
|
|
151
|
+
return len(self._items)
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""WorkingMemory: bounded deque with automatic eviction and token limit."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import deque
|
|
6
|
+
|
|
7
|
+
from pyagent_context.item import ContextItem
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class WorkingMemory:
|
|
11
|
+
"""Bounded working memory for a single pattern run.
|
|
12
|
+
|
|
13
|
+
Evicts the oldest items when capacity is exceeded (either by count
|
|
14
|
+
or by total token estimate).
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
max_items: Maximum number of items to keep.
|
|
18
|
+
max_tokens: Maximum total token estimate before eviction.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
max_items: int = 100,
|
|
24
|
+
max_tokens: int = 50_000,
|
|
25
|
+
) -> None:
|
|
26
|
+
self._max_items = max_items
|
|
27
|
+
self._max_tokens = max_tokens
|
|
28
|
+
self._items: deque[ContextItem] = deque()
|
|
29
|
+
|
|
30
|
+
def add(self, item: ContextItem) -> list[ContextItem]:
|
|
31
|
+
"""Add an item, evicting oldest if necessary.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of evicted items (empty if none were evicted).
|
|
35
|
+
"""
|
|
36
|
+
evicted: list[ContextItem] = []
|
|
37
|
+
self._items.append(item)
|
|
38
|
+
|
|
39
|
+
# Evict by count
|
|
40
|
+
while len(self._items) > self._max_items:
|
|
41
|
+
evicted.append(self._items.popleft())
|
|
42
|
+
|
|
43
|
+
# Evict by token budget
|
|
44
|
+
while self.total_tokens > self._max_tokens and len(self._items) > 1:
|
|
45
|
+
evicted.append(self._items.popleft())
|
|
46
|
+
|
|
47
|
+
return evicted
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def total_tokens(self) -> int:
|
|
51
|
+
return sum(item.token_estimate for item in self._items)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def items(self) -> list[ContextItem]:
|
|
55
|
+
return list(self._items)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def utilization(self) -> float:
|
|
59
|
+
"""Token utilization as a fraction of max_tokens."""
|
|
60
|
+
return self.total_tokens / self._max_tokens if self._max_tokens > 0 else 0.0
|
|
61
|
+
|
|
62
|
+
def clear(self) -> None:
|
|
63
|
+
self._items.clear()
|
|
64
|
+
|
|
65
|
+
def __len__(self) -> int:
|
|
66
|
+
return len(self._items)
|
|
67
|
+
|
|
68
|
+
def __bool__(self) -> bool:
|
|
69
|
+
return len(self._items) > 0
|
pyagent_context/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""ContextRedactor: field-level redaction by sensitivity tier."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pyagent_context.item import ContextItem, Sensitivity, SENSITIVITY_ORDER
|
|
6
|
+
from pyagent_context.ledger import ContextLedger
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ContextRedactor:
|
|
10
|
+
"""Redact or filter context items based on sensitivity.
|
|
11
|
+
|
|
12
|
+
Items above the allowed sensitivity threshold are either fully redacted
|
|
13
|
+
(content replaced) or excluded entirely.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
max_sensitivity: Maximum allowed sensitivity level. Items above
|
|
17
|
+
this are redacted.
|
|
18
|
+
redaction_text: Replacement text for redacted items.
|
|
19
|
+
exclude_above: If ``True``, items above max_sensitivity are
|
|
20
|
+
excluded instead of redacted.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
max_sensitivity: Sensitivity = Sensitivity.INTERNAL,
|
|
26
|
+
redaction_text: str = "[REDACTED]",
|
|
27
|
+
exclude_above: bool = False,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._max_sensitivity = max_sensitivity
|
|
30
|
+
self._redaction_text = redaction_text
|
|
31
|
+
self._exclude_above = exclude_above
|
|
32
|
+
|
|
33
|
+
def redact_item(self, item: ContextItem) -> ContextItem | None:
|
|
34
|
+
"""Redact a single item if it exceeds the sensitivity threshold.
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
The original item if within threshold, a redacted copy if above,
|
|
38
|
+
or ``None`` if ``exclude_above`` is ``True`` and the item exceeds.
|
|
39
|
+
"""
|
|
40
|
+
item_level = SENSITIVITY_ORDER.get(item.sensitivity, 0)
|
|
41
|
+
max_level = SENSITIVITY_ORDER.get(self._max_sensitivity, 0)
|
|
42
|
+
|
|
43
|
+
if item_level <= max_level:
|
|
44
|
+
return item
|
|
45
|
+
|
|
46
|
+
if self._exclude_above:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
return ContextItem(
|
|
50
|
+
content=self._redaction_text,
|
|
51
|
+
source=item.source,
|
|
52
|
+
timestamp=item.timestamp,
|
|
53
|
+
trust_level=item.trust_level,
|
|
54
|
+
sensitivity=item.sensitivity,
|
|
55
|
+
expires_at=item.expires_at,
|
|
56
|
+
derived_from=item.id,
|
|
57
|
+
token_estimate=max(1, len(self._redaction_text) // 4),
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
def redact_ledger(self, ledger: ContextLedger) -> ContextLedger:
|
|
61
|
+
"""Apply redaction to all items in a ledger.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
New ledger with redacted/filtered items.
|
|
65
|
+
"""
|
|
66
|
+
new_items: list[ContextItem] = []
|
|
67
|
+
for item in ledger.items:
|
|
68
|
+
result = self.redact_item(item)
|
|
69
|
+
if result is not None:
|
|
70
|
+
new_items.append(result)
|
|
71
|
+
return ContextLedger(items=new_items)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""TrustAwareRetriever: score candidates by trust × recency × relevance."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import math
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from pyagent_context.item import ContextItem, TRUST_ORDER
|
|
10
|
+
from pyagent_context.ledger import ContextLedger
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class ScoredItem:
|
|
15
|
+
"""A context item with a composite retrieval score.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
item: The context item.
|
|
19
|
+
score: Composite score (0.0–1.0).
|
|
20
|
+
trust_score: Trust component of the score.
|
|
21
|
+
recency_score: Recency component of the score.
|
|
22
|
+
relevance_score: Relevance component of the score.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
item: ContextItem
|
|
26
|
+
score: float
|
|
27
|
+
trust_score: float
|
|
28
|
+
recency_score: float
|
|
29
|
+
relevance_score: float
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class TrustAwareRetriever:
|
|
33
|
+
"""Retrieve context items scored by trust, recency, and keyword relevance.
|
|
34
|
+
|
|
35
|
+
Scoring formula:
|
|
36
|
+
``score = w_trust * trust + w_recency * recency + w_relevance * relevance``
|
|
37
|
+
|
|
38
|
+
Where:
|
|
39
|
+
- **trust** = ``TRUST_ORDER[item.trust_level] / 3.0``
|
|
40
|
+
- **recency** = ``exp(-age_seconds / half_life)``
|
|
41
|
+
- **relevance** = keyword overlap ratio with query
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
weight_trust: Weight for trust component.
|
|
45
|
+
weight_recency: Weight for recency component.
|
|
46
|
+
weight_relevance: Weight for relevance component.
|
|
47
|
+
recency_half_life: Seconds for recency score to halve.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
weight_trust: float = 0.3,
|
|
53
|
+
weight_recency: float = 0.3,
|
|
54
|
+
weight_relevance: float = 0.4,
|
|
55
|
+
recency_half_life: float = 3600.0,
|
|
56
|
+
) -> None:
|
|
57
|
+
self._w_trust = weight_trust
|
|
58
|
+
self._w_recency = weight_recency
|
|
59
|
+
self._w_relevance = weight_relevance
|
|
60
|
+
self._half_life = recency_half_life
|
|
61
|
+
|
|
62
|
+
def retrieve(
|
|
63
|
+
self,
|
|
64
|
+
ledger: ContextLedger,
|
|
65
|
+
query: str,
|
|
66
|
+
*,
|
|
67
|
+
top_k: int = 10,
|
|
68
|
+
min_score: float = 0.0,
|
|
69
|
+
) -> list[ScoredItem]:
|
|
70
|
+
"""Score and rank all items in the ledger against a query.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
ledger: The context ledger to search.
|
|
74
|
+
query: The query string for relevance scoring.
|
|
75
|
+
top_k: Maximum results to return.
|
|
76
|
+
min_score: Minimum composite score to include.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Ranked list of ``ScoredItem`` objects.
|
|
80
|
+
"""
|
|
81
|
+
now = time.time()
|
|
82
|
+
query_words = set(query.lower().split())
|
|
83
|
+
results: list[ScoredItem] = []
|
|
84
|
+
|
|
85
|
+
for item in ledger.items:
|
|
86
|
+
if item.is_expired:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
trust = TRUST_ORDER.get(item.trust_level, 0) / 3.0
|
|
90
|
+
age = now - item.timestamp
|
|
91
|
+
recency = math.exp(-age / self._half_life) if self._half_life > 0 else 0.0
|
|
92
|
+
relevance = self._keyword_relevance(item.content, query_words)
|
|
93
|
+
|
|
94
|
+
score = (
|
|
95
|
+
self._w_trust * trust
|
|
96
|
+
+ self._w_recency * recency
|
|
97
|
+
+ self._w_relevance * relevance
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
if score >= min_score:
|
|
101
|
+
results.append(
|
|
102
|
+
ScoredItem(
|
|
103
|
+
item=item,
|
|
104
|
+
score=score,
|
|
105
|
+
trust_score=trust,
|
|
106
|
+
recency_score=recency,
|
|
107
|
+
relevance_score=relevance,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
112
|
+
return results[:top_k]
|
|
113
|
+
|
|
114
|
+
@staticmethod
|
|
115
|
+
def _keyword_relevance(content: str, query_words: set[str]) -> float:
|
|
116
|
+
"""Simple keyword overlap ratio."""
|
|
117
|
+
if not query_words:
|
|
118
|
+
return 0.0
|
|
119
|
+
content_words = set(content.lower().split())
|
|
120
|
+
overlap = query_words & content_words
|
|
121
|
+
return len(overlap) / len(query_words) if query_words else 0.0
|