chuk-ai-session-manager 0.7.1__py3-none-any.whl → 0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. chuk_ai_session_manager/__init__.py +84 -40
  2. chuk_ai_session_manager/api/__init__.py +1 -1
  3. chuk_ai_session_manager/api/simple_api.py +53 -59
  4. chuk_ai_session_manager/exceptions.py +31 -17
  5. chuk_ai_session_manager/guards/__init__.py +118 -0
  6. chuk_ai_session_manager/guards/bindings.py +217 -0
  7. chuk_ai_session_manager/guards/cache.py +163 -0
  8. chuk_ai_session_manager/guards/manager.py +819 -0
  9. chuk_ai_session_manager/guards/models.py +498 -0
  10. chuk_ai_session_manager/guards/ungrounded.py +159 -0
  11. chuk_ai_session_manager/infinite_conversation.py +86 -79
  12. chuk_ai_session_manager/memory/__init__.py +247 -0
  13. chuk_ai_session_manager/memory/artifacts_bridge.py +469 -0
  14. chuk_ai_session_manager/memory/context_packer.py +347 -0
  15. chuk_ai_session_manager/memory/fault_handler.py +507 -0
  16. chuk_ai_session_manager/memory/manifest.py +307 -0
  17. chuk_ai_session_manager/memory/models.py +1084 -0
  18. chuk_ai_session_manager/memory/mutation_log.py +186 -0
  19. chuk_ai_session_manager/memory/pack_cache.py +206 -0
  20. chuk_ai_session_manager/memory/page_table.py +275 -0
  21. chuk_ai_session_manager/memory/prefetcher.py +192 -0
  22. chuk_ai_session_manager/memory/tlb.py +247 -0
  23. chuk_ai_session_manager/memory/vm_prompts.py +238 -0
  24. chuk_ai_session_manager/memory/working_set.py +574 -0
  25. chuk_ai_session_manager/models/__init__.py +21 -9
  26. chuk_ai_session_manager/models/event_source.py +3 -1
  27. chuk_ai_session_manager/models/event_type.py +10 -1
  28. chuk_ai_session_manager/models/session.py +103 -68
  29. chuk_ai_session_manager/models/session_event.py +69 -68
  30. chuk_ai_session_manager/models/session_metadata.py +9 -10
  31. chuk_ai_session_manager/models/session_run.py +21 -22
  32. chuk_ai_session_manager/models/token_usage.py +76 -76
  33. chuk_ai_session_manager/procedural_memory/__init__.py +70 -0
  34. chuk_ai_session_manager/procedural_memory/formatter.py +407 -0
  35. chuk_ai_session_manager/procedural_memory/manager.py +523 -0
  36. chuk_ai_session_manager/procedural_memory/models.py +371 -0
  37. chuk_ai_session_manager/sample_tools.py +79 -46
  38. chuk_ai_session_manager/session_aware_tool_processor.py +27 -16
  39. chuk_ai_session_manager/session_manager.py +238 -197
  40. chuk_ai_session_manager/session_prompt_builder.py +163 -111
  41. chuk_ai_session_manager/session_storage.py +45 -52
  42. {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/METADATA +79 -3
  43. chuk_ai_session_manager-0.8.dist-info/RECORD +45 -0
  44. {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/WHEEL +1 -1
  45. chuk_ai_session_manager-0.7.1.dist-info/RECORD +0 -22
  46. {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,186 @@
1
+ # chuk_ai_session_manager/memory/mutation_log.py
2
+ """
3
+ Lite Mutation Log for AI Virtual Memory.
4
+
5
+ Append-only log of page operations for:
6
+ - Debugging: "What was in context for turn T?"
7
+ - Replay: Reconstruct state for testing
8
+ - Grounding story: Prove what the model saw
9
+
10
+ This is a lite version focused on basic tracking.
11
+ Full event-sourcing with time-travel comes in v0.15.
12
+ """
13
+
14
+ import uuid
15
+ from collections import defaultdict
16
+ from datetime import datetime
17
+ from typing import Dict, List, Optional
18
+
19
+ from pydantic import BaseModel, Field, PrivateAttr
20
+
21
+ from .models import (
22
+ Actor,
23
+ MutationType,
24
+ PageMutation,
25
+ StorageTier,
26
+ )
27
+
28
+
29
+ class ContextSnapshot(BaseModel):
30
+ """Snapshot of what pages were in context at a given turn."""
31
+
32
+ turn: int
33
+ page_ids: List[str] = Field(default_factory=list)
34
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
35
+
36
+
37
+ class MutationLogLite(BaseModel):
38
+ """
39
+ Append-only log of page operations.
40
+
41
+ Not full event-sourcing, but enough for:
42
+ - Debugging: "what was in context for turn T?"
43
+ - Replay: Reconstruct state for testing
44
+ - Grounding story: Prove what the model saw
45
+ """
46
+
47
+ session_id: str = Field(default="")
48
+
49
+ # Append-only mutation list
50
+ _mutations: List[PageMutation] = PrivateAttr(default_factory=list)
51
+
52
+ # Index: page_id -> list of mutations
53
+ _by_page: Dict[str, List[PageMutation]] = PrivateAttr(
54
+ default_factory=lambda: defaultdict(list)
55
+ )
56
+
57
+ # Context snapshots per turn
58
+ _context_snapshots: Dict[int, ContextSnapshot] = PrivateAttr(default_factory=dict)
59
+
60
+ def append(self, mutation: PageMutation) -> None:
61
+ """Append a mutation to the log."""
62
+ self._mutations.append(mutation)
63
+ self._by_page[mutation.page_id].append(mutation)
64
+
65
+ def record_mutation(
66
+ self,
67
+ page_id: str,
68
+ mutation_type: MutationType,
69
+ tier_after: StorageTier,
70
+ tier_before: Optional[StorageTier] = None,
71
+ actor: Actor = Actor.SYSTEM,
72
+ cause: Optional[str] = None,
73
+ turn: int = 0,
74
+ ) -> PageMutation:
75
+ """Create and append a mutation record."""
76
+ mutation = PageMutation(
77
+ mutation_id=str(uuid.uuid4())[:8],
78
+ page_id=page_id,
79
+ turn=turn,
80
+ mutation_type=mutation_type,
81
+ tier_before=tier_before,
82
+ tier_after=tier_after,
83
+ actor=actor,
84
+ cause=cause,
85
+ )
86
+ self.append(mutation)
87
+ return mutation
88
+
89
+ def record_context_at_turn(self, turn: int, page_ids: List[str]) -> None:
90
+ """
91
+ Snapshot the context (L0 pages) at a turn.
92
+
93
+ Call this at the start or end of each turn to enable
94
+ "what was in context for turn T?" queries.
95
+ """
96
+ self._context_snapshots[turn] = ContextSnapshot(
97
+ turn=turn,
98
+ page_ids=list(page_ids),
99
+ )
100
+
101
+ def get_context_at_turn(self, turn: int) -> List[str]:
102
+ """
103
+ Replay: what page_ids were in L0 at turn T?
104
+
105
+ Returns empty list if no snapshot for that turn.
106
+ """
107
+ snapshot = self._context_snapshots.get(turn)
108
+ if snapshot:
109
+ return snapshot.page_ids
110
+ return []
111
+
112
+ def get_history(self, page_id: str) -> List[PageMutation]:
113
+ """All mutations for a page."""
114
+ return list(self._by_page.get(page_id, []))
115
+
116
+ def get_mutations_by_actor(self, actor: Actor) -> List[PageMutation]:
117
+ """All mutations by a specific actor."""
118
+ return [m for m in self._mutations if m.actor == actor]
119
+
120
+ def get_mutations_by_type(self, mutation_type: MutationType) -> List[PageMutation]:
121
+ """All mutations of a specific type."""
122
+ return [m for m in self._mutations if m.mutation_type == mutation_type]
123
+
124
+ def get_mutations_in_turn(self, turn: int) -> List[PageMutation]:
125
+ """All mutations in a specific turn."""
126
+ return [m for m in self._mutations if m.turn == turn]
127
+
128
+ def get_pages_created_in_turn(self, turn: int) -> List[str]:
129
+ """Get page IDs created in a specific turn."""
130
+ return [
131
+ m.page_id
132
+ for m in self._mutations
133
+ if m.turn == turn and m.mutation_type == MutationType.CREATE
134
+ ]
135
+
136
+ def get_pages_faulted_in_turn(self, turn: int) -> List[str]:
137
+ """Get page IDs faulted in during a specific turn."""
138
+ return [
139
+ m.page_id
140
+ for m in self._mutations
141
+ if m.turn == turn and m.mutation_type == MutationType.FAULT_IN
142
+ ]
143
+
144
+ def get_pages_evicted_in_turn(self, turn: int) -> List[str]:
145
+ """Get page IDs evicted in a specific turn."""
146
+ return [
147
+ m.page_id
148
+ for m in self._mutations
149
+ if m.turn == turn and m.mutation_type == MutationType.EVICT
150
+ ]
151
+
152
+ def get_all_mutations(self) -> List[PageMutation]:
153
+ """Get all mutations in chronological order."""
154
+ return list(self._mutations)
155
+
156
+ def mutation_count(self) -> int:
157
+ """Total number of mutations logged."""
158
+ return len(self._mutations)
159
+
160
+ def page_count(self) -> int:
161
+ """Number of unique pages with mutations."""
162
+ return len(self._by_page)
163
+
164
+ def clear(self) -> None:
165
+ """Clear all mutations (for testing)."""
166
+ self._mutations.clear()
167
+ self._by_page.clear()
168
+ self._context_snapshots.clear()
169
+
170
+ def get_summary(self) -> Dict[str, int]:
171
+ """Get summary statistics."""
172
+ by_type: Dict[MutationType, int] = defaultdict(int)
173
+ for m in self._mutations:
174
+ by_type[m.mutation_type] += 1
175
+
176
+ return {
177
+ "total_mutations": len(self._mutations),
178
+ "unique_pages": len(self._by_page),
179
+ "context_snapshots": len(self._context_snapshots),
180
+ "creates": by_type.get(MutationType.CREATE, 0),
181
+ "faults": by_type.get(MutationType.FAULT_IN, 0),
182
+ "evictions": by_type.get(MutationType.EVICT, 0),
183
+ "compressions": by_type.get(MutationType.COMPRESS, 0),
184
+ "pins": by_type.get(MutationType.PIN, 0),
185
+ "unpins": by_type.get(MutationType.UNPIN, 0),
186
+ }
@@ -0,0 +1,206 @@
1
+ # chuk_ai_session_manager/memory/pack_cache.py
2
+ """
3
+ Context Pack Cache - caches packed context to avoid re-packing on small incremental turns.
4
+
5
+ The TLB caches address translations. But in LLM terms, the expensive part is often:
6
+ - Selecting pages
7
+ - Choosing compression levels
8
+ - Rendering into VM:CONTEXT
9
+
10
+ This cache stores the packed context output to avoid recomputing it when:
11
+ - User says something short
12
+ - Assistant responds
13
+ - No pages changed
14
+
15
+ Cache is keyed by: (session_id, model_id, token_budget, working_set_hash)
16
+ """
17
+
18
+ import hashlib
19
+ from collections import OrderedDict
20
+ from datetime import datetime
21
+ from typing import Dict, List, Optional
22
+
23
+ from pydantic import BaseModel, Field
24
+
25
+
26
+ class PackedContext(BaseModel):
27
+ """A cached packed context result."""
28
+
29
+ # The packed content
30
+ vm_context: str = Field(..., description="The VM:CONTEXT formatted content")
31
+ vm_manifest_json: str = Field(..., description="The VM:MANIFEST_JSON content")
32
+
33
+ # Metadata
34
+ page_ids: List[str] = Field(default_factory=list, description="Pages included")
35
+ tokens_used: int = Field(default=0)
36
+ created_at: datetime = Field(default_factory=datetime.utcnow)
37
+
38
+ # Cache key components (for debugging)
39
+ session_id: str = Field(default="")
40
+ model_id: str = Field(default="")
41
+ token_budget: int = Field(default=0)
42
+ working_set_hash: str = Field(default="")
43
+
44
+
45
+ class CacheEntry(BaseModel):
46
+ """Internal cache entry with LRU tracking."""
47
+
48
+ packed: PackedContext
49
+ last_accessed: datetime = Field(default_factory=datetime.utcnow)
50
+ access_count: int = Field(default=0)
51
+
52
+
53
+ class ContextPackCache:
54
+ """
55
+ Cache packed context to avoid re-packing on small incremental turns.
56
+ Invalidate on working set changes.
57
+
58
+ This drastically reduces overhead for "small incremental turns" -
59
+ user says something short, assistant responds, no pages changed, reuse the pack.
60
+ """
61
+
62
+ def __init__(self, max_entries: int = 32):
63
+ self.max_entries = max_entries
64
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
65
+ self._stats = {
66
+ "hits": 0,
67
+ "misses": 0,
68
+ "evictions": 0,
69
+ "invalidations": 0,
70
+ }
71
+
72
+ def _make_key(
73
+ self,
74
+ session_id: str,
75
+ model_id: str,
76
+ token_budget: int,
77
+ working_set_hash: str,
78
+ ) -> str:
79
+ """Create cache key from components."""
80
+ key_str = f"{session_id}:{model_id}:{token_budget}:{working_set_hash}"
81
+ return hashlib.sha256(key_str.encode()).hexdigest()[:16]
82
+
83
+ @staticmethod
84
+ def compute_working_set_hash(
85
+ page_ids: List[str], versions: Optional[Dict[str, int]] = None
86
+ ) -> str:
87
+ """
88
+ Compute a hash of the working set for cache key.
89
+
90
+ Args:
91
+ page_ids: List of page IDs in working set
92
+ versions: Optional dict of page_id -> version number
93
+
94
+ Returns:
95
+ Hash string representing the working set state
96
+ """
97
+ # Sort for consistency
98
+ sorted_ids = sorted(page_ids)
99
+
100
+ if versions:
101
+ # Include versions in hash
102
+ parts = [f"{pid}:{versions.get(pid, 0)}" for pid in sorted_ids]
103
+ else:
104
+ parts = sorted_ids
105
+
106
+ content = "|".join(parts)
107
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
108
+
109
+ def get(
110
+ self,
111
+ session_id: str,
112
+ model_id: str,
113
+ token_budget: int,
114
+ working_set_hash: str,
115
+ ) -> Optional[PackedContext]:
116
+ """
117
+ O(1) lookup for cached pack.
118
+
119
+ Returns None if not found or expired.
120
+ """
121
+ key = self._make_key(session_id, model_id, token_budget, working_set_hash)
122
+
123
+ if key not in self._cache:
124
+ self._stats["misses"] += 1
125
+ return None
126
+
127
+ # Move to end (most recently used)
128
+ self._cache.move_to_end(key)
129
+
130
+ entry = self._cache[key]
131
+ entry.last_accessed = datetime.utcnow()
132
+ entry.access_count += 1
133
+
134
+ self._stats["hits"] += 1
135
+ return entry.packed
136
+
137
+ def put(
138
+ self,
139
+ session_id: str,
140
+ model_id: str,
141
+ token_budget: int,
142
+ working_set_hash: str,
143
+ packed: PackedContext,
144
+ ) -> None:
145
+ """Store packed context, evict LRU if full."""
146
+ key = self._make_key(session_id, model_id, token_budget, working_set_hash)
147
+
148
+ # Update packed metadata
149
+ packed.session_id = session_id
150
+ packed.model_id = model_id
151
+ packed.token_budget = token_budget
152
+ packed.working_set_hash = working_set_hash
153
+
154
+ # Evict if at capacity
155
+ while len(self._cache) >= self.max_entries:
156
+ # Remove oldest (first item)
157
+ self._cache.popitem(last=False)
158
+ self._stats["evictions"] += 1
159
+
160
+ self._cache[key] = CacheEntry(packed=packed)
161
+
162
+ def invalidate_session(self, session_id: str) -> int:
163
+ """
164
+ Invalidate all cached packs for session.
165
+
166
+ Called when working set changes.
167
+ Returns number of entries invalidated.
168
+ """
169
+ keys_to_remove = [
170
+ key
171
+ for key, entry in self._cache.items()
172
+ if entry.packed.session_id == session_id
173
+ ]
174
+
175
+ for key in keys_to_remove:
176
+ del self._cache[key]
177
+ self._stats["invalidations"] += 1
178
+
179
+ return len(keys_to_remove)
180
+
181
+ def invalidate_all(self) -> None:
182
+ """Clear entire cache."""
183
+ count = len(self._cache)
184
+ self._cache.clear()
185
+ self._stats["invalidations"] += count
186
+
187
+ @property
188
+ def size(self) -> int:
189
+ """Current number of cached entries."""
190
+ return len(self._cache)
191
+
192
+ @property
193
+ def hit_rate(self) -> float:
194
+ """Cache hit rate."""
195
+ total = self._stats["hits"] + self._stats["misses"]
196
+ if total == 0:
197
+ return 0.0
198
+ return self._stats["hits"] / total
199
+
200
+ def get_stats(self) -> Dict[str, int]:
201
+ """Get cache statistics."""
202
+ return {
203
+ **self._stats,
204
+ "size": len(self._cache),
205
+ "max_size": self.max_entries,
206
+ }
@@ -0,0 +1,275 @@
1
+ # chuk_ai_session_manager/memory/page_table.py
2
+ """
3
+ Page Table implementation for AI Virtual Memory.
4
+
5
+ The PageTable maps page IDs to their metadata (location, state, access patterns).
6
+ It's the core data structure for tracking all pages in the system.
7
+
8
+ Like an OS page table, it doesn't hold content - just metadata about where
9
+ content lives and its current state.
10
+
11
+ Design principles:
12
+ - Pydantic-native: BaseModel subclass with proper validation
13
+ - No magic strings: Uses StorageTier and Modality enums
14
+ - Type-safe: Full type annotations throughout
15
+ """
16
+
17
+ from datetime import datetime
18
+ from typing import Dict, List, Optional, Set
19
+
20
+ from pydantic import BaseModel, Field, PrivateAttr
21
+
22
+ from .models import (
23
+ CompressionLevel,
24
+ MemoryPage,
25
+ Modality,
26
+ PageTableEntry,
27
+ PageTableStats,
28
+ StorageTier,
29
+ )
30
+
31
+
32
+ class PageTable(BaseModel):
33
+ """
34
+ Maps page IDs to their current location and state.
35
+
36
+ The page table is the source of truth for:
37
+ - Where each page lives (which tier)
38
+ - Whether it's dirty (needs flush)
39
+ - Access patterns (for eviction decisions)
40
+ """
41
+
42
+ # Core mapping
43
+ entries: Dict[str, PageTableEntry] = Field(default_factory=dict)
44
+
45
+ # Indexes for fast lookup - use PrivateAttr for internal state
46
+ _by_tier: Dict[StorageTier, Set[str]] = PrivateAttr(
47
+ default_factory=lambda: {t: set() for t in StorageTier}
48
+ )
49
+ _by_modality: Dict[Modality, Set[str]] = PrivateAttr(
50
+ default_factory=lambda: {m: set() for m in Modality}
51
+ )
52
+ _dirty_pages: Set[str] = PrivateAttr(default_factory=set)
53
+
54
+ model_config = {"arbitrary_types_allowed": True}
55
+
56
+ def __len__(self) -> int:
57
+ return len(self.entries)
58
+
59
+ def __contains__(self, page_id: str) -> bool:
60
+ return page_id in self.entries
61
+
62
+ def lookup(self, page_id: str) -> Optional[PageTableEntry]:
63
+ """
64
+ Look up a page entry by ID.
65
+
66
+ Returns None if page doesn't exist.
67
+ Does NOT update access tracking (use mark_accessed for that).
68
+ """
69
+ return self.entries.get(page_id)
70
+
71
+ def register(self, page: MemoryPage) -> PageTableEntry:
72
+ """
73
+ Register a new page in the table.
74
+
75
+ Creates a PageTableEntry from the MemoryPage and adds it to indexes.
76
+ """
77
+ entry = PageTableEntry(
78
+ page_id=page.page_id,
79
+ tier=page.storage_tier,
80
+ artifact_id=page.artifact_id,
81
+ compression_level=page.compression_level,
82
+ dirty=page.dirty,
83
+ last_accessed=page.last_accessed,
84
+ access_count=page.access_count,
85
+ size_tokens=page.size_tokens or page.estimate_tokens(),
86
+ modality=page.modality,
87
+ # v0.8 fields
88
+ page_type=page.page_type,
89
+ provenance=page.provenance,
90
+ pinned=page.pinned,
91
+ )
92
+
93
+ self._add_entry(entry)
94
+ return entry
95
+
96
+ def register_entry(self, entry: PageTableEntry) -> None:
97
+ """Register an existing entry directly."""
98
+ self._add_entry(entry)
99
+
100
+ def _add_entry(self, entry: PageTableEntry) -> None:
101
+ """Internal: add entry to table and indexes."""
102
+ page_id = entry.page_id
103
+
104
+ # Remove from old indexes if updating
105
+ if page_id in self.entries:
106
+ old_entry = self.entries[page_id]
107
+ self._by_tier.get(old_entry.tier, set()).discard(page_id)
108
+ self._by_modality.get(old_entry.modality, set()).discard(page_id)
109
+ if old_entry.dirty:
110
+ self._dirty_pages.discard(page_id)
111
+
112
+ # Add to table
113
+ self.entries[page_id] = entry
114
+
115
+ # Update indexes
116
+ if entry.tier not in self._by_tier:
117
+ self._by_tier[entry.tier] = set()
118
+ self._by_tier[entry.tier].add(page_id)
119
+
120
+ if entry.modality not in self._by_modality:
121
+ self._by_modality[entry.modality] = set()
122
+ self._by_modality[entry.modality].add(page_id)
123
+
124
+ if entry.dirty:
125
+ self._dirty_pages.add(page_id)
126
+
127
+ def remove(self, page_id: str) -> Optional[PageTableEntry]:
128
+ """
129
+ Remove a page from the table.
130
+
131
+ Returns the removed entry, or None if not found.
132
+ """
133
+ entry = self.entries.pop(page_id, None)
134
+ if entry:
135
+ self._by_tier.get(entry.tier, set()).discard(page_id)
136
+ self._by_modality.get(entry.modality, set()).discard(page_id)
137
+ self._dirty_pages.discard(page_id)
138
+ return entry
139
+
140
+ def update_location(
141
+ self,
142
+ page_id: str,
143
+ tier: StorageTier,
144
+ artifact_id: Optional[str] = None,
145
+ compression_level: Optional[CompressionLevel] = None,
146
+ ) -> bool:
147
+ """
148
+ Update a page's location (tier movement).
149
+
150
+ Returns True if successful, False if page not found.
151
+ """
152
+ entry = self.entries.get(page_id)
153
+ if not entry:
154
+ return False
155
+
156
+ # Update tier index
157
+ self._by_tier.get(entry.tier, set()).discard(page_id)
158
+ entry.tier = tier
159
+ if tier not in self._by_tier:
160
+ self._by_tier[tier] = set()
161
+ self._by_tier[tier].add(page_id)
162
+
163
+ # Update other fields if provided
164
+ if artifact_id is not None:
165
+ entry.artifact_id = artifact_id
166
+ if compression_level is not None:
167
+ entry.compression_level = compression_level
168
+
169
+ return True
170
+
171
+ def mark_accessed(self, page_id: str) -> bool:
172
+ """
173
+ Mark a page as accessed (updates LRU tracking).
174
+
175
+ Returns True if successful, False if page not found.
176
+ """
177
+ entry = self.entries.get(page_id)
178
+ if not entry:
179
+ return False
180
+
181
+ entry.mark_accessed()
182
+ return True
183
+
184
+ def mark_dirty(self, page_id: str) -> bool:
185
+ """
186
+ Mark a page as dirty (modified).
187
+
188
+ Returns True if successful, False if page not found.
189
+ """
190
+ entry = self.entries.get(page_id)
191
+ if not entry:
192
+ return False
193
+
194
+ entry.dirty = True
195
+ self._dirty_pages.add(page_id)
196
+ return True
197
+
198
+ def mark_clean(self, page_id: str) -> bool:
199
+ """
200
+ Mark a page as clean (flushed).
201
+
202
+ Returns True if successful, False if page not found.
203
+ """
204
+ entry = self.entries.get(page_id)
205
+ if not entry:
206
+ return False
207
+
208
+ entry.dirty = False
209
+ entry.last_flushed = datetime.utcnow()
210
+ self._dirty_pages.discard(page_id)
211
+ return True
212
+
213
+ def get_by_tier(self, tier: StorageTier) -> List[PageTableEntry]:
214
+ """Get all entries in a specific tier."""
215
+ page_ids = self._by_tier.get(tier, set())
216
+ return [self.entries[pid] for pid in page_ids if pid in self.entries]
217
+
218
+ def get_by_modality(self, modality: Modality) -> List[PageTableEntry]:
219
+ """Get all entries of a specific modality."""
220
+ page_ids = self._by_modality.get(modality, set())
221
+ return [self.entries[pid] for pid in page_ids if pid in self.entries]
222
+
223
+ def get_dirty_pages(self) -> List[PageTableEntry]:
224
+ """Get all dirty (modified) pages."""
225
+ return [self.entries[pid] for pid in self._dirty_pages if pid in self.entries]
226
+
227
+ def get_working_set(self) -> List[PageTableEntry]:
228
+ """Get all pages in L0 and L1 (the working set)."""
229
+ l0 = self.get_by_tier(StorageTier.L0)
230
+ l1 = self.get_by_tier(StorageTier.L1)
231
+ return l0 + l1
232
+
233
+ def get_eviction_candidates(
234
+ self,
235
+ tier: StorageTier = StorageTier.L1,
236
+ limit: int = 10,
237
+ ) -> List[PageTableEntry]:
238
+ """
239
+ Get pages that are candidates for eviction, sorted by LRU.
240
+
241
+ Returns oldest-accessed pages first.
242
+ """
243
+ entries = self.get_by_tier(tier)
244
+ # Sort by last_accessed (oldest first)
245
+ entries.sort(key=lambda e: e.last_accessed)
246
+ return entries[:limit]
247
+
248
+ def get_stats(self) -> "PageTableStats":
249
+ """Get page table statistics."""
250
+ return PageTableStats(
251
+ total_pages=len(self.entries),
252
+ dirty_pages=len(self._dirty_pages),
253
+ pages_by_tier={t: len(self._by_tier.get(t, set())) for t in StorageTier},
254
+ pages_by_modality={
255
+ m: len(self._by_modality.get(m, set())) for m in Modality
256
+ },
257
+ )
258
+
259
+ def get_total_tokens(self, tiers: Optional[List[StorageTier]] = None) -> int:
260
+ """
261
+ Get total estimated tokens across specified tiers.
262
+
263
+ If tiers is None, counts all pages.
264
+ """
265
+ total = 0
266
+ entries_list = list(self.entries.values())
267
+
268
+ if tiers:
269
+ entries_list = [e for e in entries_list if e.tier in tiers]
270
+
271
+ for entry in entries_list:
272
+ if entry.size_tokens:
273
+ total += entry.size_tokens
274
+
275
+ return total