pyagent-context 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,153 @@
1
+ """SemanticMemory: vector-indexed long-term store (Protocol + in-memory TF-IDF impl)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ from collections import Counter
7
+ from dataclasses import dataclass, field
8
+ from typing import Protocol, runtime_checkable
9
+
10
+ from pyagent_context.item import ContextItem
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class SearchResult:
15
+ """Result of a semantic search.
16
+
17
+ Attributes:
18
+ item: The matching context item.
19
+ score: Relevance score (0.0–1.0).
20
+ """
21
+
22
+ item: ContextItem
23
+ score: float
24
+
25
+
26
+ @runtime_checkable
27
+ class SemanticMemoryProtocol(Protocol):
28
+ """Interface for any vector-backed semantic memory store."""
29
+
30
+ def add(self, item: ContextItem) -> None:
31
+ """Index an item for later retrieval."""
32
+ ...
33
+
34
+ def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
35
+ """Find the most relevant items for a query."""
36
+ ...
37
+
38
+ def remove(self, item_id: str) -> bool:
39
+ """Remove an item by ID. Returns True if found."""
40
+ ...
41
+
42
+ def clear(self) -> None:
43
+ """Remove all items."""
44
+ ...
45
+
46
+ def __len__(self) -> int: ...
47
+
48
+
49
+ class InMemorySemanticStore:
50
+ """In-memory semantic store using TF-IDF cosine similarity.
51
+
52
+ No external dependencies — suitable for testing and small datasets.
53
+ For production, use a vector DB adapter (ChromaDB, Pinecone, etc.).
54
+
55
+ Args:
56
+ stop_words: Optional set of words to ignore in scoring.
57
+ """
58
+
59
+ def __init__(self, stop_words: set[str] | None = None) -> None:
60
+ self._items: dict[str, ContextItem] = {}
61
+ self._tf_cache: dict[str, dict[str, float]] = {}
62
+ self._stop_words = stop_words or {
63
+ "a", "an", "the", "is", "are", "was", "were", "be", "been",
64
+ "being", "have", "has", "had", "do", "does", "did", "will",
65
+ "would", "could", "should", "may", "might", "can", "shall",
66
+ "to", "of", "in", "for", "on", "with", "at", "by", "from",
67
+ "as", "into", "through", "during", "before", "after", "and",
68
+ "but", "or", "nor", "not", "so", "yet", "both", "either",
69
+ "neither", "each", "every", "all", "any", "few", "more",
70
+ "most", "other", "some", "such", "no", "only", "own", "same",
71
+ "than", "too", "very", "just", "because", "about", "between",
72
+ "it", "its", "this", "that", "these", "those", "i", "me",
73
+ "my", "we", "our", "you", "your", "he", "him", "his", "she",
74
+ "her", "they", "them", "their", "what", "which", "who",
75
+ }
76
+
77
+ def add(self, item: ContextItem) -> None:
78
+ self._items[item.id] = item
79
+ self._tf_cache[item.id] = self._compute_tf(item.content)
80
+
81
+ def search(self, query: str, top_k: int = 5) -> list[SearchResult]:
82
+ if not self._items:
83
+ return []
84
+
85
+ query_tf = self._compute_tf(query)
86
+ idf = self._compute_idf()
87
+
88
+ query_tfidf = {w: tf * idf.get(w, 0.0) for w, tf in query_tf.items()}
89
+
90
+ results: list[SearchResult] = []
91
+ for item_id, item in self._items.items():
92
+ doc_tf = self._tf_cache.get(item_id, {})
93
+ doc_tfidf = {w: tf * idf.get(w, 0.0) for w, tf in doc_tf.items()}
94
+ score = self._cosine_similarity(query_tfidf, doc_tfidf)
95
+ if score > 0:
96
+ results.append(SearchResult(item=item, score=score))
97
+
98
+ results.sort(key=lambda r: r.score, reverse=True)
99
+ return results[:top_k]
100
+
101
+ def remove(self, item_id: str) -> bool:
102
+ if item_id in self._items:
103
+ del self._items[item_id]
104
+ self._tf_cache.pop(item_id, None)
105
+ return True
106
+ return False
107
+
108
+ def clear(self) -> None:
109
+ self._items.clear()
110
+ self._tf_cache.clear()
111
+
112
+ def __len__(self) -> int:
113
+ return len(self._items)
114
+
115
+ # ------------------------------------------------------------------
116
+ # TF-IDF helpers
117
+ # ------------------------------------------------------------------
118
+
119
+ def _tokenize(self, text: str) -> list[str]:
120
+ words = text.lower().split()
121
+ return [w.strip(".,!?;:\"'()[]{}") for w in words if w.strip(".,!?;:\"'()[]{}") not in self._stop_words]
122
+
123
+ def _compute_tf(self, text: str) -> dict[str, float]:
124
+ tokens = self._tokenize(text)
125
+ if not tokens:
126
+ return {}
127
+ counts = Counter(tokens)
128
+ total = len(tokens)
129
+ return {word: count / total for word, count in counts.items()}
130
+
131
+ def _compute_idf(self) -> dict[str, float]:
132
+ n_docs = len(self._items)
133
+ if n_docs == 0:
134
+ return {}
135
+ doc_freq: Counter[str] = Counter()
136
+ for tf in self._tf_cache.values():
137
+ for word in tf:
138
+ doc_freq[word] += 1
139
+ return {word: math.log(n_docs / (1 + freq)) for word, freq in doc_freq.items()}
140
+
141
+ @staticmethod
142
+ def _cosine_similarity(a: dict[str, float], b: dict[str, float]) -> float:
143
+ if not a or not b:
144
+ return 0.0
145
+ common_keys = set(a) & set(b)
146
+ if not common_keys:
147
+ return 0.0
148
+ dot = sum(a[k] * b[k] for k in common_keys)
149
+ mag_a = math.sqrt(sum(v ** 2 for v in a.values()))
150
+ mag_b = math.sqrt(sum(v ** 2 for v in b.values()))
151
+ if mag_a == 0 or mag_b == 0:
152
+ return 0.0
153
+ return dot / (mag_a * mag_b)
@@ -0,0 +1,151 @@
1
+ """SessionMemory: persisted across turns via JSON file or SQLite."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import sqlite3
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from pyagent_context.item import ContextItem
11
+
12
+
13
+ class SessionMemory:
14
+ """Session-scoped memory that persists across turns.
15
+
16
+ Supports two backends:
17
+ - ``json``: simple JSON file per session (default)
18
+ - ``sqlite``: SQLite database for concurrent-safe access
19
+
20
+ Args:
21
+ session_id: Unique session identifier.
22
+ backend: ``"json"`` or ``"sqlite"``.
23
+ storage_path: Directory for storage files. Defaults to ``".pyagent_sessions"``.
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ session_id: str,
29
+ backend: str = "json",
30
+ storage_path: str | Path = ".pyagent_sessions",
31
+ ) -> None:
32
+ self._session_id = session_id
33
+ self._backend = backend
34
+ self._storage_path = Path(storage_path)
35
+ self._storage_path.mkdir(parents=True, exist_ok=True)
36
+ self._items: list[ContextItem] = []
37
+ self._loaded = False
38
+
39
+ @property
40
+ def session_id(self) -> str:
41
+ return self._session_id
42
+
43
+ def add(self, item: ContextItem) -> None:
44
+ """Add an item to the session."""
45
+ self._ensure_loaded()
46
+ self._items.append(item)
47
+
48
+ def get_all(self) -> list[ContextItem]:
49
+ """Return all items in this session."""
50
+ self._ensure_loaded()
51
+ return list(self._items)
52
+
53
+ def save(self) -> None:
54
+ """Persist current items to storage."""
55
+ if self._backend == "sqlite":
56
+ self._save_sqlite()
57
+ else:
58
+ self._save_json()
59
+
60
+ def load(self) -> None:
61
+ """Load items from storage."""
62
+ if self._backend == "sqlite":
63
+ self._load_sqlite()
64
+ else:
65
+ self._load_json()
66
+ self._loaded = True
67
+
68
+ def clear(self) -> None:
69
+ """Remove all items and delete persisted data."""
70
+ self._items.clear()
71
+ path = self._file_path
72
+ if path.exists():
73
+ path.unlink()
74
+
75
+ # ------------------------------------------------------------------
76
+ # JSON backend
77
+ # ------------------------------------------------------------------
78
+
79
+ def _save_json(self) -> None:
80
+ path = self._file_path
81
+ data = [item.to_dict() for item in self._items]
82
+ path.write_text(json.dumps(data, indent=2))
83
+
84
+ def _load_json(self) -> None:
85
+ path = self._file_path
86
+ if path.exists():
87
+ data = json.loads(path.read_text())
88
+ self._items = [ContextItem.from_dict(d) for d in data]
89
+ else:
90
+ self._items = []
91
+
92
+ # ------------------------------------------------------------------
93
+ # SQLite backend
94
+ # ------------------------------------------------------------------
95
+
96
+ def _save_sqlite(self) -> None:
97
+ db_path = self._storage_path / "sessions.db"
98
+ conn = sqlite3.connect(str(db_path))
99
+ try:
100
+ conn.execute(
101
+ "CREATE TABLE IF NOT EXISTS context_items "
102
+ "(session_id TEXT, item_id TEXT PRIMARY KEY, data TEXT)"
103
+ )
104
+ conn.execute(
105
+ "DELETE FROM context_items WHERE session_id = ?",
106
+ (self._session_id,),
107
+ )
108
+ for item in self._items:
109
+ conn.execute(
110
+ "INSERT INTO context_items (session_id, item_id, data) VALUES (?, ?, ?)",
111
+ (self._session_id, item.id, json.dumps(item.to_dict())),
112
+ )
113
+ conn.commit()
114
+ finally:
115
+ conn.close()
116
+
117
+ def _load_sqlite(self) -> None:
118
+ db_path = self._storage_path / "sessions.db"
119
+ if not db_path.exists():
120
+ self._items = []
121
+ return
122
+ conn = sqlite3.connect(str(db_path))
123
+ try:
124
+ conn.execute(
125
+ "CREATE TABLE IF NOT EXISTS context_items "
126
+ "(session_id TEXT, item_id TEXT PRIMARY KEY, data TEXT)"
127
+ )
128
+ cursor = conn.execute(
129
+ "SELECT data FROM context_items WHERE session_id = ? ORDER BY rowid",
130
+ (self._session_id,),
131
+ )
132
+ self._items = [ContextItem.from_dict(json.loads(row[0])) for row in cursor]
133
+ finally:
134
+ conn.close()
135
+
136
+ # ------------------------------------------------------------------
137
+ # Helpers
138
+ # ------------------------------------------------------------------
139
+
140
+ @property
141
+ def _file_path(self) -> Path:
142
+ suffix = ".db" if self._backend == "sqlite" else ".json"
143
+ return self._storage_path / f"{self._session_id}{suffix}"
144
+
145
+ def _ensure_loaded(self) -> None:
146
+ if not self._loaded:
147
+ self.load()
148
+
149
+ def __len__(self) -> int:
150
+ self._ensure_loaded()
151
+ return len(self._items)
@@ -0,0 +1,69 @@
1
+ """WorkingMemory: bounded deque with automatic eviction and token limit."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import deque
6
+
7
+ from pyagent_context.item import ContextItem
8
+
9
+
10
+ class WorkingMemory:
11
+ """Bounded working memory for a single pattern run.
12
+
13
+ Evicts the oldest items when capacity is exceeded (either by count
14
+ or by total token estimate).
15
+
16
+ Args:
17
+ max_items: Maximum number of items to keep.
18
+ max_tokens: Maximum total token estimate before eviction.
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ max_items: int = 100,
24
+ max_tokens: int = 50_000,
25
+ ) -> None:
26
+ self._max_items = max_items
27
+ self._max_tokens = max_tokens
28
+ self._items: deque[ContextItem] = deque()
29
+
30
+ def add(self, item: ContextItem) -> list[ContextItem]:
31
+ """Add an item, evicting oldest if necessary.
32
+
33
+ Returns:
34
+ List of evicted items (empty if none were evicted).
35
+ """
36
+ evicted: list[ContextItem] = []
37
+ self._items.append(item)
38
+
39
+ # Evict by count
40
+ while len(self._items) > self._max_items:
41
+ evicted.append(self._items.popleft())
42
+
43
+ # Evict by token budget
44
+ while self.total_tokens > self._max_tokens and len(self._items) > 1:
45
+ evicted.append(self._items.popleft())
46
+
47
+ return evicted
48
+
49
+ @property
50
+ def total_tokens(self) -> int:
51
+ return sum(item.token_estimate for item in self._items)
52
+
53
+ @property
54
+ def items(self) -> list[ContextItem]:
55
+ return list(self._items)
56
+
57
+ @property
58
+ def utilization(self) -> float:
59
+ """Token utilization as a fraction of max_tokens."""
60
+ return self.total_tokens / self._max_tokens if self._max_tokens > 0 else 0.0
61
+
62
+ def clear(self) -> None:
63
+ self._items.clear()
64
+
65
+ def __len__(self) -> int:
66
+ return len(self._items)
67
+
68
+ def __bool__(self) -> bool:
69
+ return len(self._items) > 0
File without changes
@@ -0,0 +1,71 @@
1
+ """ContextRedactor: field-level redaction by sensitivity tier."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pyagent_context.item import ContextItem, Sensitivity, SENSITIVITY_ORDER
6
+ from pyagent_context.ledger import ContextLedger
7
+
8
+
9
+ class ContextRedactor:
10
+ """Redact or filter context items based on sensitivity.
11
+
12
+ Items above the allowed sensitivity threshold are either fully redacted
13
+ (content replaced) or excluded entirely.
14
+
15
+ Args:
16
+ max_sensitivity: Maximum allowed sensitivity level. Items above
17
+ this are redacted.
18
+ redaction_text: Replacement text for redacted items.
19
+ exclude_above: If ``True``, items above max_sensitivity are
20
+ excluded instead of redacted.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ max_sensitivity: Sensitivity = Sensitivity.INTERNAL,
26
+ redaction_text: str = "[REDACTED]",
27
+ exclude_above: bool = False,
28
+ ) -> None:
29
+ self._max_sensitivity = max_sensitivity
30
+ self._redaction_text = redaction_text
31
+ self._exclude_above = exclude_above
32
+
33
+ def redact_item(self, item: ContextItem) -> ContextItem | None:
34
+ """Redact a single item if it exceeds the sensitivity threshold.
35
+
36
+ Returns:
37
+ The original item if within threshold, a redacted copy if above,
38
+ or ``None`` if ``exclude_above`` is ``True`` and the item exceeds.
39
+ """
40
+ item_level = SENSITIVITY_ORDER.get(item.sensitivity, 0)
41
+ max_level = SENSITIVITY_ORDER.get(self._max_sensitivity, 0)
42
+
43
+ if item_level <= max_level:
44
+ return item
45
+
46
+ if self._exclude_above:
47
+ return None
48
+
49
+ return ContextItem(
50
+ content=self._redaction_text,
51
+ source=item.source,
52
+ timestamp=item.timestamp,
53
+ trust_level=item.trust_level,
54
+ sensitivity=item.sensitivity,
55
+ expires_at=item.expires_at,
56
+ derived_from=item.id,
57
+ token_estimate=max(1, len(self._redaction_text) // 4),
58
+ )
59
+
60
+ def redact_ledger(self, ledger: ContextLedger) -> ContextLedger:
61
+ """Apply redaction to all items in a ledger.
62
+
63
+ Returns:
64
+ New ledger with redacted/filtered items.
65
+ """
66
+ new_items: list[ContextItem] = []
67
+ for item in ledger.items:
68
+ result = self.redact_item(item)
69
+ if result is not None:
70
+ new_items.append(result)
71
+ return ContextLedger(items=new_items)
@@ -0,0 +1,121 @@
1
+ """TrustAwareRetriever: score candidates by trust × recency × relevance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import math
6
+ import time
7
+ from dataclasses import dataclass
8
+
9
+ from pyagent_context.item import ContextItem, TRUST_ORDER
10
+ from pyagent_context.ledger import ContextLedger
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class ScoredItem:
15
+ """A context item with a composite retrieval score.
16
+
17
+ Attributes:
18
+ item: The context item.
19
+ score: Composite score (0.0–1.0).
20
+ trust_score: Trust component of the score.
21
+ recency_score: Recency component of the score.
22
+ relevance_score: Relevance component of the score.
23
+ """
24
+
25
+ item: ContextItem
26
+ score: float
27
+ trust_score: float
28
+ recency_score: float
29
+ relevance_score: float
30
+
31
+
32
+ class TrustAwareRetriever:
33
+ """Retrieve context items scored by trust, recency, and keyword relevance.
34
+
35
+ Scoring formula:
36
+ ``score = w_trust * trust + w_recency * recency + w_relevance * relevance``
37
+
38
+ Where:
39
+ - **trust** = ``TRUST_ORDER[item.trust_level] / 3.0``
40
+ - **recency** = ``exp(-age_seconds / half_life)``
41
+ - **relevance** = keyword overlap ratio with query
42
+
43
+ Args:
44
+ weight_trust: Weight for trust component.
45
+ weight_recency: Weight for recency component.
46
+ weight_relevance: Weight for relevance component.
47
+ recency_half_life: Seconds for recency score to halve.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ weight_trust: float = 0.3,
53
+ weight_recency: float = 0.3,
54
+ weight_relevance: float = 0.4,
55
+ recency_half_life: float = 3600.0,
56
+ ) -> None:
57
+ self._w_trust = weight_trust
58
+ self._w_recency = weight_recency
59
+ self._w_relevance = weight_relevance
60
+ self._half_life = recency_half_life
61
+
62
+ def retrieve(
63
+ self,
64
+ ledger: ContextLedger,
65
+ query: str,
66
+ *,
67
+ top_k: int = 10,
68
+ min_score: float = 0.0,
69
+ ) -> list[ScoredItem]:
70
+ """Score and rank all items in the ledger against a query.
71
+
72
+ Args:
73
+ ledger: The context ledger to search.
74
+ query: The query string for relevance scoring.
75
+ top_k: Maximum results to return.
76
+ min_score: Minimum composite score to include.
77
+
78
+ Returns:
79
+ Ranked list of ``ScoredItem`` objects.
80
+ """
81
+ now = time.time()
82
+ query_words = set(query.lower().split())
83
+ results: list[ScoredItem] = []
84
+
85
+ for item in ledger.items:
86
+ if item.is_expired:
87
+ continue
88
+
89
+ trust = TRUST_ORDER.get(item.trust_level, 0) / 3.0
90
+ age = now - item.timestamp
91
+ recency = math.exp(-age / self._half_life) if self._half_life > 0 else 0.0
92
+ relevance = self._keyword_relevance(item.content, query_words)
93
+
94
+ score = (
95
+ self._w_trust * trust
96
+ + self._w_recency * recency
97
+ + self._w_relevance * relevance
98
+ )
99
+
100
+ if score >= min_score:
101
+ results.append(
102
+ ScoredItem(
103
+ item=item,
104
+ score=score,
105
+ trust_score=trust,
106
+ recency_score=recency,
107
+ relevance_score=relevance,
108
+ )
109
+ )
110
+
111
+ results.sort(key=lambda r: r.score, reverse=True)
112
+ return results[:top_k]
113
+
114
+ @staticmethod
115
+ def _keyword_relevance(content: str, query_words: set[str]) -> float:
116
+ """Simple keyword overlap ratio."""
117
+ if not query_words:
118
+ return 0.0
119
+ content_words = set(content.lower().split())
120
+ overlap = query_words & content_words
121
+ return len(overlap) / len(query_words) if query_words else 0.0