pyagent-context 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyagent_context/__init__.py +29 -0
- pyagent_context/compression.py +132 -0
- pyagent_context/item.py +132 -0
- pyagent_context/ledger.py +136 -0
- pyagent_context/lifecycle.py +157 -0
- pyagent_context/memory/__init__.py +12 -0
- pyagent_context/memory/semantic.py +153 -0
- pyagent_context/memory/session.py +151 -0
- pyagent_context/memory/working.py +69 -0
- pyagent_context/py.typed +0 -0
- pyagent_context/redaction.py +71 -0
- pyagent_context/retrieval.py +121 -0
- pyagent_context-0.1.0.dist-info/METADATA +286 -0
- pyagent_context-0.1.0.dist-info/RECORD +15 -0
- pyagent_context-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""PyAgent Context — three-tier memory with trust-aware context ledger."""
|
|
2
|
+
|
|
3
|
+
from pyagent_context.compression import CompressionPolicy, ContextCompressor
|
|
4
|
+
from pyagent_context.item import ContextItem, Sensitivity, TrustLevel
|
|
5
|
+
from pyagent_context.ledger import ContextLedger
|
|
6
|
+
from pyagent_context.lifecycle import ContextLifecycle
|
|
7
|
+
from pyagent_context.memory.semantic import InMemorySemanticStore, SemanticMemoryProtocol
|
|
8
|
+
from pyagent_context.memory.session import SessionMemory
|
|
9
|
+
from pyagent_context.memory.working import WorkingMemory
|
|
10
|
+
from pyagent_context.redaction import ContextRedactor
|
|
11
|
+
from pyagent_context.retrieval import ScoredItem, TrustAwareRetriever
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CompressionPolicy",
|
|
15
|
+
"ContextCompressor",
|
|
16
|
+
"ContextItem",
|
|
17
|
+
"ContextLedger",
|
|
18
|
+
"ContextLifecycle",
|
|
19
|
+
"ContextRedactor",
|
|
20
|
+
"InMemorySemanticStore",
|
|
21
|
+
"ScoredItem",
|
|
22
|
+
"SemanticMemoryProtocol",
|
|
23
|
+
"Sensitivity",
|
|
24
|
+
"SessionMemory",
|
|
25
|
+
"TrustAwareRetriever",
|
|
26
|
+
"TrustLevel",
|
|
27
|
+
"WorkingMemory",
|
|
28
|
+
]
|
|
29
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""ContextCompressor: compression policies for context ledger management."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from pyagent_context.item import ContextItem, TrustLevel
|
|
9
|
+
from pyagent_context.ledger import ContextLedger
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CompressionPolicy(StrEnum):
|
|
16
|
+
"""Available compression strategies."""
|
|
17
|
+
|
|
18
|
+
NONE = "none"
|
|
19
|
+
FIFO = "fifo" # drop oldest items
|
|
20
|
+
SEMANTIC_LOSSLESS = "semantic_lossless" # compress text, keep Knowledge block
|
|
21
|
+
SAWTOOTH = "sawtooth" # compress to floor, grow again
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ContextCompressor:
|
|
25
|
+
"""Apply compression policies to a ContextLedger.
|
|
26
|
+
|
|
27
|
+
The compressor monitors token usage and compresses when a threshold
|
|
28
|
+
is reached.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
policy: Compression strategy.
|
|
32
|
+
threshold_tokens: Token count that triggers compression.
|
|
33
|
+
floor_tokens: Token target after compression (for SAWTOOTH/FIFO).
|
|
34
|
+
preserve_trust: Items at or above this trust level are never dropped.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
policy: CompressionPolicy = CompressionPolicy.FIFO,
|
|
40
|
+
threshold_tokens: int = 10_000,
|
|
41
|
+
floor_tokens: int = 5_000,
|
|
42
|
+
preserve_trust: TrustLevel = TrustLevel.VERIFIED,
|
|
43
|
+
) -> None:
|
|
44
|
+
self._policy = policy
|
|
45
|
+
self._threshold = threshold_tokens
|
|
46
|
+
self._floor = floor_tokens
|
|
47
|
+
self._preserve_trust = preserve_trust
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def policy(self) -> CompressionPolicy:
|
|
51
|
+
return self._policy
|
|
52
|
+
|
|
53
|
+
def should_compress(self, ledger: ContextLedger) -> bool:
|
|
54
|
+
"""Check if the ledger's token count exceeds the threshold."""
|
|
55
|
+
if self._policy == CompressionPolicy.NONE:
|
|
56
|
+
return False
|
|
57
|
+
return ledger.total_tokens >= self._threshold
|
|
58
|
+
|
|
59
|
+
def compress(self, ledger: ContextLedger) -> ContextLedger:
|
|
60
|
+
"""Apply the compression policy and return a new (compressed) ledger.
|
|
61
|
+
|
|
62
|
+
The original ledger is not mutated.
|
|
63
|
+
"""
|
|
64
|
+
if self._policy == CompressionPolicy.NONE:
|
|
65
|
+
return ledger
|
|
66
|
+
if self._policy == CompressionPolicy.FIFO:
|
|
67
|
+
return self._compress_fifo(ledger)
|
|
68
|
+
if self._policy == CompressionPolicy.SEMANTIC_LOSSLESS:
|
|
69
|
+
return self._compress_semantic(ledger)
|
|
70
|
+
if self._policy == CompressionPolicy.SAWTOOTH:
|
|
71
|
+
return self._compress_sawtooth(ledger)
|
|
72
|
+
return ledger
|
|
73
|
+
|
|
74
|
+
def _compress_fifo(self, ledger: ContextLedger) -> ContextLedger:
|
|
75
|
+
"""Drop oldest items until under floor, preserving high-trust items."""
|
|
76
|
+
items = list(ledger.items)
|
|
77
|
+
tokens = ledger.total_tokens
|
|
78
|
+
|
|
79
|
+
# Remove from front (oldest) until under floor
|
|
80
|
+
new_items: list[ContextItem] = []
|
|
81
|
+
for item in items:
|
|
82
|
+
if tokens <= self._floor:
|
|
83
|
+
new_items.append(item)
|
|
84
|
+
elif item.trust_level >= self._preserve_trust:
|
|
85
|
+
new_items.append(item) # keep high-trust
|
|
86
|
+
else:
|
|
87
|
+
tokens -= item.token_estimate # drop
|
|
88
|
+
|
|
89
|
+
return ContextLedger(items=new_items)
|
|
90
|
+
|
|
91
|
+
def _compress_semantic(self, ledger: ContextLedger) -> ContextLedger:
|
|
92
|
+
"""Compress item content text while preserving verified items unchanged.
|
|
93
|
+
|
|
94
|
+
Uses simple sentence extraction — for full compression, integrate
|
|
95
|
+
``MessageCompressor`` from ``pyagent-compress``.
|
|
96
|
+
"""
|
|
97
|
+
items = list(ledger.items)
|
|
98
|
+
new_items: list[ContextItem] = []
|
|
99
|
+
|
|
100
|
+
for item in items:
|
|
101
|
+
if item.trust_level >= self._preserve_trust:
|
|
102
|
+
new_items.append(item)
|
|
103
|
+
else:
|
|
104
|
+
# Simple compression: keep first sentence only
|
|
105
|
+
sentences = item.content.split(". ")
|
|
106
|
+
compressed = sentences[0] + ("." if len(sentences) > 1 else "")
|
|
107
|
+
new_item = ContextItem(
|
|
108
|
+
content=compressed,
|
|
109
|
+
source=item.source,
|
|
110
|
+
timestamp=item.timestamp,
|
|
111
|
+
trust_level=item.trust_level,
|
|
112
|
+
sensitivity=item.sensitivity,
|
|
113
|
+
expires_at=item.expires_at,
|
|
114
|
+
derived_from=item.id,
|
|
115
|
+
)
|
|
116
|
+
new_items.append(new_item)
|
|
117
|
+
|
|
118
|
+
return ContextLedger(items=new_items)
|
|
119
|
+
|
|
120
|
+
def _compress_sawtooth(self, ledger: ContextLedger) -> ContextLedger:
|
|
121
|
+
"""Compress to floor, then allow growth again.
|
|
122
|
+
|
|
123
|
+
Combines FIFO eviction with semantic compression for remaining items.
|
|
124
|
+
"""
|
|
125
|
+
# First pass: FIFO eviction
|
|
126
|
+
fifo_result = self._compress_fifo(ledger)
|
|
127
|
+
|
|
128
|
+
# If still over floor, apply semantic compression
|
|
129
|
+
if fifo_result.total_tokens > self._floor:
|
|
130
|
+
return self._compress_semantic(fifo_result)
|
|
131
|
+
|
|
132
|
+
return fifo_result
|
pyagent_context/item.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""ContextItem: content + metadata with trust, sensitivity, and lifecycle."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TrustLevel(StrEnum):
|
|
12
|
+
"""Trust classification for context items."""
|
|
13
|
+
|
|
14
|
+
VERIFIED = "verified" # from trusted source, validated
|
|
15
|
+
INFERRED = "inferred" # LLM-generated, not validated
|
|
16
|
+
USER_PROVIDED = "user" # direct user input
|
|
17
|
+
EXTERNAL = "external" # from tool/API call
|
|
18
|
+
|
|
19
|
+
def __ge__(self, other: TrustLevel) -> bool:
|
|
20
|
+
order = {TrustLevel.INFERRED: 0, TrustLevel.EXTERNAL: 1, TrustLevel.USER_PROVIDED: 2, TrustLevel.VERIFIED: 3}
|
|
21
|
+
return order.get(self, 0) >= order.get(other, 0)
|
|
22
|
+
|
|
23
|
+
def __gt__(self, other: TrustLevel) -> bool:
|
|
24
|
+
return self != other and self.__ge__(other)
|
|
25
|
+
|
|
26
|
+
def __le__(self, other: TrustLevel) -> bool:
|
|
27
|
+
return other.__ge__(self)
|
|
28
|
+
|
|
29
|
+
def __lt__(self, other: TrustLevel) -> bool:
|
|
30
|
+
return self != other and self.__le__(other)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Numeric ordering for scoring
|
|
34
|
+
TRUST_ORDER: dict[TrustLevel, int] = {
|
|
35
|
+
TrustLevel.INFERRED: 0,
|
|
36
|
+
TrustLevel.EXTERNAL: 1,
|
|
37
|
+
TrustLevel.USER_PROVIDED: 2,
|
|
38
|
+
TrustLevel.VERIFIED: 3,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Sensitivity(StrEnum):
|
|
43
|
+
"""Data sensitivity classification."""
|
|
44
|
+
|
|
45
|
+
PUBLIC = "public"
|
|
46
|
+
INTERNAL = "internal"
|
|
47
|
+
CONFIDENTIAL = "confidential"
|
|
48
|
+
RESTRICTED = "restricted"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
SENSITIVITY_ORDER: dict[Sensitivity, int] = {
|
|
52
|
+
Sensitivity.PUBLIC: 0,
|
|
53
|
+
Sensitivity.INTERNAL: 1,
|
|
54
|
+
Sensitivity.CONFIDENTIAL: 2,
|
|
55
|
+
Sensitivity.RESTRICTED: 3,
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class ContextItem:
|
|
61
|
+
"""A single piece of context with trust and lifecycle metadata.
|
|
62
|
+
|
|
63
|
+
Attributes:
|
|
64
|
+
content: The text content.
|
|
65
|
+
source: Origin — agent name, tool name, or ``"user"``.
|
|
66
|
+
timestamp: Creation time (``time.time()``).
|
|
67
|
+
trust_level: How much to trust this item.
|
|
68
|
+
sensitivity: Data classification for redaction decisions.
|
|
69
|
+
expires_at: Expiration timestamp, or ``None`` for never.
|
|
70
|
+
derived_from: Parent item ID if this was derived from another.
|
|
71
|
+
token_estimate: Rough token count (``len(content) // 4``).
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
content: str
|
|
75
|
+
source: str
|
|
76
|
+
timestamp: float = field(default_factory=time.time)
|
|
77
|
+
trust_level: TrustLevel = TrustLevel.INFERRED
|
|
78
|
+
sensitivity: Sensitivity = Sensitivity.INTERNAL
|
|
79
|
+
expires_at: float | None = None
|
|
80
|
+
derived_from: str | None = None
|
|
81
|
+
token_estimate: int = 0
|
|
82
|
+
_id: str = field(default_factory=lambda: uuid.uuid4().hex[:12])
|
|
83
|
+
|
|
84
|
+
def __post_init__(self) -> None:
|
|
85
|
+
if self.token_estimate == 0:
|
|
86
|
+
self.token_estimate = max(1, len(self.content) // 4)
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def id(self) -> str:
|
|
90
|
+
return self._id
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def is_expired(self) -> bool:
|
|
94
|
+
"""Check if this item has passed its expiry time."""
|
|
95
|
+
if self.expires_at is None:
|
|
96
|
+
return False
|
|
97
|
+
return time.time() > self.expires_at
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def age_seconds(self) -> float:
|
|
101
|
+
"""Time since creation in seconds."""
|
|
102
|
+
return time.time() - self.timestamp
|
|
103
|
+
|
|
104
|
+
def to_dict(self) -> dict:
|
|
105
|
+
"""Serialize to a JSON-compatible dict."""
|
|
106
|
+
return {
|
|
107
|
+
"id": self._id,
|
|
108
|
+
"content": self.content,
|
|
109
|
+
"source": self.source,
|
|
110
|
+
"timestamp": self.timestamp,
|
|
111
|
+
"trust_level": self.trust_level.value,
|
|
112
|
+
"sensitivity": self.sensitivity.value,
|
|
113
|
+
"expires_at": self.expires_at,
|
|
114
|
+
"derived_from": self.derived_from,
|
|
115
|
+
"token_estimate": self.token_estimate,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_dict(cls, data: dict) -> ContextItem:
|
|
120
|
+
"""Deserialize from a dict."""
|
|
121
|
+
item = cls(
|
|
122
|
+
content=data["content"],
|
|
123
|
+
source=data["source"],
|
|
124
|
+
timestamp=data["timestamp"],
|
|
125
|
+
trust_level=TrustLevel(data["trust_level"]),
|
|
126
|
+
sensitivity=Sensitivity(data["sensitivity"]),
|
|
127
|
+
expires_at=data.get("expires_at"),
|
|
128
|
+
derived_from=data.get("derived_from"),
|
|
129
|
+
token_estimate=data.get("token_estimate", 0),
|
|
130
|
+
)
|
|
131
|
+
item._id = data["id"]
|
|
132
|
+
return item
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""ContextLedger: append-only log of ContextItems with query, conversion, and snapshot."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pyagent_patterns.base import Message
|
|
9
|
+
from pyagent_context.item import ContextItem, TrustLevel, TRUST_ORDER
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextLedger:
|
|
13
|
+
"""Append-only ledger of context items.
|
|
14
|
+
|
|
15
|
+
Supports querying by trust level, age, and source. Converts items
|
|
16
|
+
to ``Message`` lists for use with patterns.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
items: Optional initial items.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, items: list[ContextItem] | None = None) -> None:
|
|
23
|
+
self._items: list[ContextItem] = list(items) if items else []
|
|
24
|
+
|
|
25
|
+
def append(self, item: ContextItem) -> None:
|
|
26
|
+
"""Add an item to the ledger."""
|
|
27
|
+
self._items.append(item)
|
|
28
|
+
|
|
29
|
+
def add(
|
|
30
|
+
self,
|
|
31
|
+
content: str,
|
|
32
|
+
source: str,
|
|
33
|
+
trust_level: TrustLevel = TrustLevel.INFERRED,
|
|
34
|
+
**kwargs: Any,
|
|
35
|
+
) -> ContextItem:
|
|
36
|
+
"""Create and append a new item in one step.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The created ``ContextItem``.
|
|
40
|
+
"""
|
|
41
|
+
item = ContextItem(content=content, source=source, trust_level=trust_level, **kwargs)
|
|
42
|
+
self._items.append(item)
|
|
43
|
+
return item
|
|
44
|
+
|
|
45
|
+
def query(
|
|
46
|
+
self,
|
|
47
|
+
*,
|
|
48
|
+
min_trust: TrustLevel | None = None,
|
|
49
|
+
max_age_seconds: float | None = None,
|
|
50
|
+
source: str | None = None,
|
|
51
|
+
) -> list[ContextItem]:
|
|
52
|
+
"""Filter items by trust level, age, and/or source.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
min_trust: Only return items at or above this trust level.
|
|
56
|
+
max_age_seconds: Only return items newer than this.
|
|
57
|
+
source: Only return items from this source.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Filtered list of items (chronological order).
|
|
61
|
+
"""
|
|
62
|
+
now = time.time()
|
|
63
|
+
results: list[ContextItem] = []
|
|
64
|
+
for item in self._items:
|
|
65
|
+
if min_trust is not None and TRUST_ORDER.get(item.trust_level, 0) < TRUST_ORDER.get(min_trust, 0):
|
|
66
|
+
continue
|
|
67
|
+
if max_age_seconds is not None and (now - item.timestamp) > max_age_seconds:
|
|
68
|
+
continue
|
|
69
|
+
if source is not None and item.source != source:
|
|
70
|
+
continue
|
|
71
|
+
results.append(item)
|
|
72
|
+
return results
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def total_tokens(self) -> int:
|
|
76
|
+
"""Sum of token estimates across all items."""
|
|
77
|
+
return sum(item.token_estimate for item in self._items)
|
|
78
|
+
|
|
79
|
+
def to_messages(self, max_tokens: int | None = None) -> list[Message]:
|
|
80
|
+
"""Convert ledger items to a list of Messages.
|
|
81
|
+
|
|
82
|
+
If ``max_tokens`` is set, include items from most recent backward
|
|
83
|
+
until the budget is exhausted.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
max_tokens: Optional token budget.
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
List of ``Message`` objects.
|
|
90
|
+
"""
|
|
91
|
+
if max_tokens is None:
|
|
92
|
+
return [
|
|
93
|
+
Message.assistant(item.content, name=item.source)
|
|
94
|
+
for item in self._items
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
# Walk backward, accumulate until budget exhausted
|
|
98
|
+
selected: list[ContextItem] = []
|
|
99
|
+
budget = max_tokens
|
|
100
|
+
for item in reversed(self._items):
|
|
101
|
+
if item.token_estimate <= budget:
|
|
102
|
+
selected.append(item)
|
|
103
|
+
budget -= item.token_estimate
|
|
104
|
+
else:
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
selected.reverse()
|
|
108
|
+
return [Message.assistant(item.content, name=item.source) for item in selected]
|
|
109
|
+
|
|
110
|
+
def snapshot(self) -> dict:
|
|
111
|
+
"""Serialize the full ledger to a JSON-compatible dict."""
|
|
112
|
+
return {
|
|
113
|
+
"items": [item.to_dict() for item in self._items],
|
|
114
|
+
"total_tokens": self.total_tokens,
|
|
115
|
+
"count": len(self._items),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_snapshot(cls, data: dict) -> ContextLedger:
|
|
120
|
+
"""Restore a ledger from a snapshot dict."""
|
|
121
|
+
items = [ContextItem.from_dict(d) for d in data["items"]]
|
|
122
|
+
return cls(items=items)
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def items(self) -> list[ContextItem]:
|
|
126
|
+
return list(self._items)
|
|
127
|
+
|
|
128
|
+
def __len__(self) -> int:
|
|
129
|
+
return len(self._items)
|
|
130
|
+
|
|
131
|
+
def __bool__(self) -> bool:
|
|
132
|
+
return len(self._items) > 0
|
|
133
|
+
|
|
134
|
+
def clear(self) -> None:
|
|
135
|
+
"""Remove all items."""
|
|
136
|
+
self._items.clear()
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""ContextLifecycle: expiration sweep, consolidation, and freshness decay."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
|
|
8
|
+
from pyagent_context.item import ContextItem
|
|
9
|
+
from pyagent_context.ledger import ContextLedger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ContextLifecycle:
|
|
13
|
+
"""Manage the lifecycle of context items.
|
|
14
|
+
|
|
15
|
+
Provides:
|
|
16
|
+
- **Expiry sweep**: remove items past their ``expires_at``.
|
|
17
|
+
- **Freshness decay**: reduce token budgets for old items.
|
|
18
|
+
- **Consolidation**: merge items from the same source with similar content.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
consolidation_threshold: Minimum keyword overlap ratio (0.0–1.0)
|
|
22
|
+
to consider two items similar enough to merge.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, consolidation_threshold: float = 0.6) -> None:
|
|
26
|
+
self._consolidation_threshold = consolidation_threshold
|
|
27
|
+
|
|
28
|
+
def sweep_expired(self, ledger: ContextLedger) -> tuple[ContextLedger, list[ContextItem]]:
|
|
29
|
+
"""Remove expired items from the ledger.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
ledger: The context ledger.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Tuple of (new ledger without expired items, list of expired items).
|
|
36
|
+
"""
|
|
37
|
+
now = time.time()
|
|
38
|
+
kept: list[ContextItem] = []
|
|
39
|
+
expired: list[ContextItem] = []
|
|
40
|
+
|
|
41
|
+
for item in ledger.items:
|
|
42
|
+
if item.expires_at is not None and now > item.expires_at:
|
|
43
|
+
expired.append(item)
|
|
44
|
+
else:
|
|
45
|
+
kept.append(item)
|
|
46
|
+
|
|
47
|
+
return ContextLedger(items=kept), expired
|
|
48
|
+
|
|
49
|
+
def apply_freshness_decay(
|
|
50
|
+
self,
|
|
51
|
+
ledger: ContextLedger,
|
|
52
|
+
half_life_seconds: float = 3600.0,
|
|
53
|
+
min_tokens: int = 1,
|
|
54
|
+
) -> ContextLedger:
|
|
55
|
+
"""Reduce token estimates based on age.
|
|
56
|
+
|
|
57
|
+
Older items get smaller token budgets, making them less likely
|
|
58
|
+
to survive compression. Items retain at least ``min_tokens``.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
ledger: The context ledger.
|
|
62
|
+
half_life_seconds: Seconds for token estimate to halve.
|
|
63
|
+
min_tokens: Minimum token estimate after decay.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
New ledger with decayed token estimates.
|
|
67
|
+
"""
|
|
68
|
+
import math
|
|
69
|
+
|
|
70
|
+
now = time.time()
|
|
71
|
+
new_items: list[ContextItem] = []
|
|
72
|
+
|
|
73
|
+
for item in ledger.items:
|
|
74
|
+
age = now - item.timestamp
|
|
75
|
+
decay_factor = math.exp(-age / half_life_seconds) if half_life_seconds > 0 else 1.0
|
|
76
|
+
decayed_tokens = max(min_tokens, int(item.token_estimate * decay_factor))
|
|
77
|
+
|
|
78
|
+
new_item = ContextItem(
|
|
79
|
+
content=item.content,
|
|
80
|
+
source=item.source,
|
|
81
|
+
timestamp=item.timestamp,
|
|
82
|
+
trust_level=item.trust_level,
|
|
83
|
+
sensitivity=item.sensitivity,
|
|
84
|
+
expires_at=item.expires_at,
|
|
85
|
+
derived_from=item.derived_from,
|
|
86
|
+
token_estimate=decayed_tokens,
|
|
87
|
+
)
|
|
88
|
+
new_item._id = item.id
|
|
89
|
+
new_items.append(new_item)
|
|
90
|
+
|
|
91
|
+
return ContextLedger(items=new_items)
|
|
92
|
+
|
|
93
|
+
def consolidate(self, ledger: ContextLedger) -> ContextLedger:
|
|
94
|
+
"""Merge similar items from the same source.
|
|
95
|
+
|
|
96
|
+
When two items from the same source have high keyword overlap,
|
|
97
|
+
they are merged into one with combined content and the higher
|
|
98
|
+
trust level.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
ledger: The context ledger.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
New ledger with consolidated items.
|
|
105
|
+
"""
|
|
106
|
+
by_source: dict[str, list[ContextItem]] = defaultdict(list)
|
|
107
|
+
for item in ledger.items:
|
|
108
|
+
by_source[item.source].append(item)
|
|
109
|
+
|
|
110
|
+
consolidated: list[ContextItem] = []
|
|
111
|
+
|
|
112
|
+
for source, items in by_source.items():
|
|
113
|
+
merged_indices: set[int] = set()
|
|
114
|
+
|
|
115
|
+
for i, item_a in enumerate(items):
|
|
116
|
+
if i in merged_indices:
|
|
117
|
+
continue
|
|
118
|
+
merged_content = item_a.content
|
|
119
|
+
best_trust = item_a.trust_level
|
|
120
|
+
latest_time = item_a.timestamp
|
|
121
|
+
|
|
122
|
+
for j in range(i + 1, len(items)):
|
|
123
|
+
if j in merged_indices:
|
|
124
|
+
continue
|
|
125
|
+
item_b = items[j]
|
|
126
|
+
if self._similarity(item_a.content, item_b.content) >= self._consolidation_threshold:
|
|
127
|
+
merged_content = f"{merged_content}\n{item_b.content}"
|
|
128
|
+
if item_b.trust_level > best_trust:
|
|
129
|
+
best_trust = item_b.trust_level
|
|
130
|
+
latest_time = max(latest_time, item_b.timestamp)
|
|
131
|
+
merged_indices.add(j)
|
|
132
|
+
|
|
133
|
+
new_item = ContextItem(
|
|
134
|
+
content=merged_content,
|
|
135
|
+
source=source,
|
|
136
|
+
timestamp=latest_time,
|
|
137
|
+
trust_level=best_trust,
|
|
138
|
+
sensitivity=item_a.sensitivity,
|
|
139
|
+
expires_at=item_a.expires_at,
|
|
140
|
+
derived_from=item_a.id,
|
|
141
|
+
)
|
|
142
|
+
consolidated.append(new_item)
|
|
143
|
+
|
|
144
|
+
# Sort by timestamp to maintain chronological order
|
|
145
|
+
consolidated.sort(key=lambda x: x.timestamp)
|
|
146
|
+
return ContextLedger(items=consolidated)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def _similarity(text_a: str, text_b: str) -> float:
|
|
150
|
+
"""Keyword overlap ratio (Jaccard-like)."""
|
|
151
|
+
words_a = set(text_a.lower().split())
|
|
152
|
+
words_b = set(text_b.lower().split())
|
|
153
|
+
if not words_a or not words_b:
|
|
154
|
+
return 0.0
|
|
155
|
+
intersection = words_a & words_b
|
|
156
|
+
union = words_a | words_b
|
|
157
|
+
return len(intersection) / len(union)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Three-tier memory: working, session, and semantic."""
|
|
2
|
+
|
|
3
|
+
from pyagent_context.memory.working import WorkingMemory
|
|
4
|
+
from pyagent_context.memory.session import SessionMemory
|
|
5
|
+
from pyagent_context.memory.semantic import SemanticMemoryProtocol, InMemorySemanticStore
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"InMemorySemanticStore",
|
|
9
|
+
"SemanticMemoryProtocol",
|
|
10
|
+
"SessionMemory",
|
|
11
|
+
"WorkingMemory",
|
|
12
|
+
]
|