chuk-ai-session-manager 0.7__py3-none-any.whl → 0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. chuk_ai_session_manager/__init__.py +84 -40
  2. chuk_ai_session_manager/api/__init__.py +1 -1
  3. chuk_ai_session_manager/api/simple_api.py +53 -59
  4. chuk_ai_session_manager/exceptions.py +31 -17
  5. chuk_ai_session_manager/guards/__init__.py +118 -0
  6. chuk_ai_session_manager/guards/bindings.py +217 -0
  7. chuk_ai_session_manager/guards/cache.py +163 -0
  8. chuk_ai_session_manager/guards/manager.py +819 -0
  9. chuk_ai_session_manager/guards/models.py +498 -0
  10. chuk_ai_session_manager/guards/ungrounded.py +159 -0
  11. chuk_ai_session_manager/infinite_conversation.py +86 -79
  12. chuk_ai_session_manager/memory/__init__.py +247 -0
  13. chuk_ai_session_manager/memory/artifacts_bridge.py +469 -0
  14. chuk_ai_session_manager/memory/context_packer.py +347 -0
  15. chuk_ai_session_manager/memory/fault_handler.py +507 -0
  16. chuk_ai_session_manager/memory/manifest.py +307 -0
  17. chuk_ai_session_manager/memory/models.py +1084 -0
  18. chuk_ai_session_manager/memory/mutation_log.py +186 -0
  19. chuk_ai_session_manager/memory/pack_cache.py +206 -0
  20. chuk_ai_session_manager/memory/page_table.py +275 -0
  21. chuk_ai_session_manager/memory/prefetcher.py +192 -0
  22. chuk_ai_session_manager/memory/tlb.py +247 -0
  23. chuk_ai_session_manager/memory/vm_prompts.py +238 -0
  24. chuk_ai_session_manager/memory/working_set.py +574 -0
  25. chuk_ai_session_manager/models/__init__.py +21 -9
  26. chuk_ai_session_manager/models/event_source.py +3 -1
  27. chuk_ai_session_manager/models/event_type.py +10 -1
  28. chuk_ai_session_manager/models/session.py +103 -68
  29. chuk_ai_session_manager/models/session_event.py +69 -68
  30. chuk_ai_session_manager/models/session_metadata.py +9 -10
  31. chuk_ai_session_manager/models/session_run.py +21 -22
  32. chuk_ai_session_manager/models/token_usage.py +76 -76
  33. chuk_ai_session_manager/procedural_memory/__init__.py +70 -0
  34. chuk_ai_session_manager/procedural_memory/formatter.py +407 -0
  35. chuk_ai_session_manager/procedural_memory/manager.py +523 -0
  36. chuk_ai_session_manager/procedural_memory/models.py +371 -0
  37. chuk_ai_session_manager/sample_tools.py +79 -46
  38. chuk_ai_session_manager/session_aware_tool_processor.py +27 -16
  39. chuk_ai_session_manager/session_manager.py +238 -197
  40. chuk_ai_session_manager/session_prompt_builder.py +163 -111
  41. chuk_ai_session_manager/session_storage.py +45 -52
  42. {chuk_ai_session_manager-0.7.dist-info → chuk_ai_session_manager-0.8.dist-info}/METADATA +78 -2
  43. chuk_ai_session_manager-0.8.dist-info/RECORD +45 -0
  44. {chuk_ai_session_manager-0.7.dist-info → chuk_ai_session_manager-0.8.dist-info}/WHEEL +1 -1
  45. chuk_ai_session_manager-0.7.dist-info/RECORD +0 -22
  46. {chuk_ai_session_manager-0.7.dist-info → chuk_ai_session_manager-0.8.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1084 @@
1
+ # chuk_ai_session_manager/memory/models.py
2
+ """
3
+ Core models for the AI Virtual Memory system.
4
+
5
+ These models represent the fundamental abstractions for OS-style memory management:
6
+ - MemoryPage: Atomic unit of content (like an OS page)
7
+ - PageTableEntry: Metadata about a page's location and state
8
+ - CompressionLevel: Standard compression levels per modality
9
+ - TokenBudget: Token allocation tracking
10
+
11
+ Design principles:
12
+ - Pydantic-native: All models are BaseModel subclasses
13
+ - No magic strings: Use Enums for all categorical values
14
+ - Type-safe: Full type annotations throughout
15
+ """
16
+
17
+ from datetime import datetime
18
+ from enum import Enum, IntEnum
19
+ from typing import Any, Dict, List, Optional, Tuple, Union
20
+
21
+ from pydantic import BaseModel, Field
22
+
23
+
24
+ # =============================================================================
25
+ # Enums
26
+ # =============================================================================
27
+
28
+
29
+ class CompressionLevel(IntEnum):
30
+ """
31
+ Standard compression levels for page content.
32
+
33
+ Lower levels = more detail, more tokens
34
+ Higher levels = less detail, fewer tokens
35
+ """
36
+
37
+ FULL = 0 # Complete content (full text, full resolution, full audio)
38
+ REDUCED = 1 # Reduced content (excerpts, thumbnail, transcript)
39
+ ABSTRACT = 2 # Abstract/summary (key points, caption, summary)
40
+ REFERENCE = 3 # Reference only (topic tags, page_id only)
41
+
42
+
43
+ class Modality(str, Enum):
44
+ """Content modality types."""
45
+
46
+ TEXT = "text"
47
+ IMAGE = "image"
48
+ AUDIO = "audio"
49
+ VIDEO = "video"
50
+ STRUCTURED = "structured"
51
+
52
+
53
+ class StorageTier(str, Enum):
54
+ """Storage hierarchy tiers (like CPU cache levels)."""
55
+
56
+ L0 = "L0" # Registers - current prompt context window
57
+ L1 = "L1" # Cache - recent pages, hot data (session state)
58
+ L2 = "L2" # RAM - session storage (chuk-sessions)
59
+ L3 = "L3" # Disk - artifact storage (chuk-artifacts filesystem)
60
+ L4 = "L4" # Cold - archive storage (chuk-artifacts S3)
61
+
62
+
63
+ class Affinity(str, Enum):
64
+ """Locality hints for distributed storage (NUMA awareness)."""
65
+
66
+ LOCAL = "local"
67
+ REMOTE = "remote"
68
+ SHARED = "shared"
69
+
70
+
71
+ class VMMode(str, Enum):
72
+ """Virtual memory operation modes."""
73
+
74
+ STRICT = "strict" # No hallucinated memory, citations required
75
+ RELAXED = "relaxed" # VM-aware but more conversational
76
+ PASSIVE = "passive" # No tools, runtime handles everything
77
+
78
+
79
+ class MessageRole(str, Enum):
80
+ """Message roles in conversation context."""
81
+
82
+ USER = "user"
83
+ ASSISTANT = "assistant"
84
+ TOOL = "tool"
85
+ SYSTEM = "system"
86
+
87
+
88
+ class PageType(str, Enum):
89
+ """
90
+ Types of memory pages.
91
+
92
+ Different page types have different eviction/compression rules:
93
+ - transcript: Raw turns, tool outputs (normal eviction)
94
+ - summary: LLM-generated summaries (low eviction, rebuildable)
95
+ - artifact: Tool-created content (normal eviction)
96
+ - claim: Decisions, facts, conclusions (very low eviction, high-value)
97
+ - procedure: "When calling tool X, we do Y" (low eviction)
98
+ - index: Page metadata for search (very low eviction)
99
+ """
100
+
101
+ TRANSCRIPT = "transcript" # Raw turns, tool outputs
102
+ SUMMARY = "summary" # LLM-generated summaries
103
+ ARTIFACT = "artifact" # Tool-created content
104
+ CLAIM = "claim" # Decisions, facts, conclusions (high-value)
105
+ PROCEDURE = "procedure" # Learned patterns for tool usage
106
+ INDEX = "index" # Page metadata for search
107
+
108
+ # Legacy aliases for backwards compatibility
109
+ MESSAGE = "transcript" # Alias
110
+ TOOL_RESULT = "transcript" # Alias
111
+ MEDIA = "artifact" # Alias
112
+ CHECKPOINT = "index" # Alias
113
+
114
+
115
+ class FaultReason(str, Enum):
116
+ """
117
+ Reasons for page faults - enables measuring why faults happen.
118
+
119
+ This helps distinguish good faults (user asked) from bad faults (model guessing).
120
+ """
121
+
122
+ USER_REQUESTED_RECALL = "user_requested_recall" # "What did we say about X?"
123
+ RESOLVE_REFERENCE = "resolve_reference" # Model references page_id
124
+ TOOL_PREREQUISITE = "tool_prereq" # Tool needs this page
125
+ SPECULATIVE = "speculative" # Might be relevant
126
+
127
+
128
+ class MutationType(str, Enum):
129
+ """Types of page mutations for the mutation log."""
130
+
131
+ CREATE = "create"
132
+ FAULT_IN = "fault_in"
133
+ EVICT = "evict"
134
+ COMPRESS = "compress"
135
+ PIN = "pin"
136
+ UNPIN = "unpin"
137
+ UPDATE = "update"
138
+ DELETE = "delete"
139
+
140
+
141
+ class Actor(str, Enum):
142
+ """Who caused a mutation."""
143
+
144
+ USER = "user"
145
+ MODEL = "model"
146
+ TOOL = "tool"
147
+ SYSTEM = "system"
148
+
149
+
150
+ class ContextPrefix(str, Enum):
151
+ """Prefixes for VM:CONTEXT format."""
152
+
153
+ USER = "U"
154
+ ASSISTANT = "A"
155
+ TOOL = "T"
156
+ SUMMARY = "S"
157
+ IMAGE = "I"
158
+ AUDIO = "D" # 'D' for auDio (A is taken)
159
+ VIDEO = "V"
160
+ STRUCTURED = "J" # JSON/structured
161
+ UNKNOWN = "?"
162
+
163
+
164
+ class ToolType(str, Enum):
165
+ """Tool definition types."""
166
+
167
+ FUNCTION = "function"
168
+
169
+
170
+ # =============================================================================
171
+ # Constants
172
+ # =============================================================================
173
+
174
+ # MIME types for VM storage
175
+ MEMORY_PAGE_MIME_TYPE = "application/x-memory-page"
176
+ VM_CHECKPOINT_MIME_TYPE = "application/x-vm-checkpoint"
177
+
178
+ # All compression levels as a list (for iteration)
179
+ ALL_COMPRESSION_LEVELS: List[int] = [level.value for level in CompressionLevel]
180
+
181
+
182
+ # =============================================================================
183
+ # Stats Models
184
+ # =============================================================================
185
+
186
+
187
+ class TLBStats(BaseModel):
188
+ """Statistics for TLB performance."""
189
+
190
+ size: int = Field(default=0, description="Current number of entries")
191
+ max_size: int = Field(default=512, description="Maximum entries")
192
+ utilization: float = Field(default=0.0, description="Current utilization (0-1)")
193
+ hits: int = Field(default=0, description="Total cache hits")
194
+ misses: int = Field(default=0, description="Total cache misses")
195
+ hit_rate: float = Field(default=0.0, description="Hit rate (0-1)")
196
+
197
+
198
+ class WorkingSetStats(BaseModel):
199
+ """Statistics for working set state."""
200
+
201
+ l0_pages: int = Field(default=0, description="Pages in L0 (context)")
202
+ l1_pages: int = Field(default=0, description="Pages in L1 (cache)")
203
+ total_pages: int = Field(default=0, description="Total pages in working set")
204
+ tokens_used: int = Field(default=0, description="Tokens currently used")
205
+ tokens_available: int = Field(default=0, description="Tokens available")
206
+ utilization: float = Field(default=0.0, description="Token utilization (0-1)")
207
+ needs_eviction: bool = Field(
208
+ default=False, description="Whether eviction is needed"
209
+ )
210
+ tokens_by_modality: Dict[Modality, int] = Field(default_factory=dict)
211
+
212
+
213
+ class StorageStats(BaseModel):
214
+ """Statistics for storage backend."""
215
+
216
+ backend: str = Field(..., description="Backend type name")
217
+ persistent: bool = Field(default=False, description="Whether storage persists")
218
+ session_id: Optional[str] = Field(default=None, description="Associated session")
219
+ pages_stored: int = Field(default=0, description="Number of pages stored")
220
+
221
+
222
+ class CombinedPageTableStats(BaseModel):
223
+ """Combined statistics for PageTable and TLB."""
224
+
225
+ page_table: "PageTableStats"
226
+ tlb: TLBStats
227
+
228
+
229
+ class PageTableStats(BaseModel):
230
+ """Statistics about the page table state."""
231
+
232
+ total_pages: int
233
+ dirty_pages: int
234
+ pages_by_tier: Dict[StorageTier, int]
235
+ pages_by_modality: Dict[Modality, int]
236
+
237
+ @property
238
+ def working_set_size(self) -> int:
239
+ """Pages in L0 + L1."""
240
+ return self.pages_by_tier.get(StorageTier.L0, 0) + self.pages_by_tier.get(
241
+ StorageTier.L1, 0
242
+ )
243
+
244
+
245
+ class FaultMetrics(BaseModel):
246
+ """Metrics for page fault handling."""
247
+
248
+ faults_this_turn: int = Field(default=0)
249
+ max_faults_per_turn: int = Field(default=2)
250
+ faults_remaining: int = Field(default=2)
251
+ total_faults: int = Field(default=0)
252
+ tlb_hit_rate: float = Field(default=0.0)
253
+
254
+
255
+ # =============================================================================
256
+ # Content Models (for tool results)
257
+ # =============================================================================
258
+
259
+
260
+ class TextContent(BaseModel):
261
+ """Text content representation."""
262
+
263
+ text: str = Field(default="")
264
+
265
+
266
+ class ImageContent(BaseModel):
267
+ """Image content representation."""
268
+
269
+ caption: Optional[str] = Field(default=None)
270
+ url: Optional[str] = Field(default=None)
271
+ base64: Optional[str] = Field(default=None)
272
+ embedding: Optional[List[float]] = Field(default=None)
273
+
274
+
275
+ class AudioContent(BaseModel):
276
+ """Audio content representation."""
277
+
278
+ transcript: Optional[str] = Field(default=None)
279
+ timestamps: Optional[List[Dict[str, Any]]] = Field(default=None)
280
+ duration_seconds: Optional[float] = Field(default=None)
281
+
282
+
283
+ class VideoContent(BaseModel):
284
+ """Video content representation."""
285
+
286
+ scenes: List[Dict[str, Any]] = Field(default_factory=list)
287
+ transcript: Optional[str] = Field(default=None)
288
+ duration_seconds: Optional[float] = Field(default=None)
289
+
290
+
291
+ class StructuredContent(BaseModel):
292
+ """Structured data content representation."""
293
+
294
+ data: Dict[str, Any] = Field(default_factory=dict)
295
+ schema_name: Optional[str] = Field(default=None)
296
+
297
+
298
+ # Union type for all content types
299
+ PageContent = Union[
300
+ TextContent,
301
+ ImageContent,
302
+ AudioContent,
303
+ VideoContent,
304
+ StructuredContent,
305
+ Dict[str, Any],
306
+ ]
307
+
308
+
309
+ class PageMeta(BaseModel):
310
+ """Metadata for a page in tool results."""
311
+
312
+ source_tier: str = Field(default="unknown")
313
+ mime_type: Optional[str] = Field(default=None)
314
+ size_bytes: Optional[int] = Field(default=None)
315
+ dimensions: Optional[List[int]] = Field(default=None)
316
+ duration_seconds: Optional[float] = Field(default=None)
317
+ latency_ms: Optional[float] = Field(default=None)
318
+
319
+
320
+ class PageData(BaseModel):
321
+ """Page data in tool result envelope."""
322
+
323
+ page_id: str
324
+ modality: str
325
+ level: int
326
+ tier: str
327
+ content: PageContent
328
+ meta: PageMeta = Field(default_factory=PageMeta)
329
+
330
+
331
+ class FaultEffects(BaseModel):
332
+ """Side effects of a fault operation."""
333
+
334
+ promoted_to_working_set: bool = Field(default=False)
335
+ tokens_est: int = Field(default=0)
336
+ evictions: Optional[List[str]] = Field(default=None)
337
+
338
+
339
+ class SearchResultEntry(BaseModel):
340
+ """Single entry in search results."""
341
+
342
+ page_id: str
343
+ modality: str
344
+ tier: str
345
+ levels: List[int] = Field(default_factory=list)
346
+ hint: str = Field(default="")
347
+ relevance: float = Field(default=0.0)
348
+
349
+
350
+ # =============================================================================
351
+ # Tool Definition Models
352
+ # =============================================================================
353
+
354
+
355
+ class ToolParameter(BaseModel):
356
+ """Single parameter in a tool definition."""
357
+
358
+ type: str
359
+ description: str
360
+ enum: Optional[List[str]] = Field(default=None)
361
+ minimum: Optional[int] = Field(default=None)
362
+ maximum: Optional[int] = Field(default=None)
363
+ default: Optional[Any] = Field(default=None)
364
+
365
+
366
+ class ToolParameters(BaseModel):
367
+ """Parameters schema for a tool."""
368
+
369
+ type: str = Field(default="object")
370
+ properties: Dict[str, ToolParameter] = Field(default_factory=dict)
371
+ required: List[str] = Field(default_factory=list)
372
+
373
+
374
+ class ToolFunction(BaseModel):
375
+ """Function definition within a tool."""
376
+
377
+ name: str
378
+ description: str
379
+ parameters: ToolParameters
380
+
381
+
382
+ class ToolDefinition(BaseModel):
383
+ """Complete tool definition for Chat Completions API."""
384
+
385
+ type: ToolType = Field(default=ToolType.FUNCTION)
386
+ function: ToolFunction
387
+
388
+
389
+ # =============================================================================
390
+ # Formatted Output Models
391
+ # =============================================================================
392
+
393
+
394
+ class FormattedPage(BaseModel):
395
+ """Result of formatting a page for context."""
396
+
397
+ content: str = Field(..., description="Formatted content string")
398
+ tokens_est: int = Field(default=0, description="Estimated token count")
399
+
400
+
401
+ # =============================================================================
402
+ # Core Page Models
403
+ # =============================================================================
404
+
405
+
406
+ class MemoryPage(BaseModel):
407
+ """
408
+ Atomic unit of content in the virtual memory system.
409
+
410
+ A page represents any piece of content (text, image, audio, video, structured)
411
+ with identity, versioning, and multi-resolution representations.
412
+
413
+ This is the RIGHT abstraction boundary because it enables:
414
+ - Cross-modal coherence
415
+ - Versioning and dirty tracking
416
+ - Copy-on-write
417
+ - Checkpoint consistency
418
+ """
419
+
420
+ # Identity
421
+ page_id: str = Field(..., description="Unique identifier for this page")
422
+ session_id: Optional[str] = Field(default=None, description="Owning session")
423
+
424
+ # Content type
425
+ modality: Modality = Field(..., description="Content modality")
426
+
427
+ # Page type (critical for eviction/compression decisions)
428
+ page_type: PageType = Field(
429
+ default=PageType.TRANSCRIPT,
430
+ description="Page type determines eviction/compression behavior",
431
+ )
432
+
433
+ # Provenance: what pages justify this one (for claims, summaries)
434
+ provenance: List[str] = Field(
435
+ default_factory=list,
436
+ description="page_ids that this page derives from (for claims/summaries)",
437
+ )
438
+
439
+ # Representation linking (for compression chain)
440
+ represents: Optional[str] = Field(
441
+ default=None,
442
+ description="page_id this is a compressed version of",
443
+ )
444
+ representation_level: int = Field(
445
+ default=0,
446
+ description="0=full, 1=reduced, 2=abstract, 3=reference",
447
+ )
448
+
449
+ # Location
450
+ storage_tier: StorageTier = Field(
451
+ default=StorageTier.L1, description="Current storage tier"
452
+ )
453
+ artifact_id: Optional[str] = Field(
454
+ default=None, description="Reference to chuk-artifacts storage"
455
+ )
456
+
457
+ # Content (when loaded into L0/L1)
458
+ content: Optional[Any] = Field(
459
+ default=None, description="Actual content when in working set"
460
+ )
461
+ compression_level: CompressionLevel = Field(
462
+ default=CompressionLevel.FULL, description="Current compression level"
463
+ )
464
+
465
+ # Multi-resolution representations
466
+ # Maps compression level -> artifact_id for stored representations
467
+ representations: Dict[CompressionLevel, str] = Field(
468
+ default_factory=dict, description="artifact_id for each compression level"
469
+ )
470
+
471
+ # Size tracking
472
+ size_bytes: int = Field(default=0, description="Size in bytes")
473
+ size_tokens: Optional[int] = Field(
474
+ default=None, description="Estimated token count (for text/transcript)"
475
+ )
476
+
477
+ # Access tracking (for LRU/eviction)
478
+ created_at: datetime = Field(default_factory=datetime.utcnow)
479
+ last_accessed: datetime = Field(default_factory=datetime.utcnow)
480
+ access_count: int = Field(default=0, description="Number of times accessed")
481
+
482
+ # Importance (affects eviction priority)
483
+ # Claims default to higher importance
484
+ importance: float = Field(
485
+ default=0.5,
486
+ ge=0.0,
487
+ le=1.0,
488
+ description="Importance score for eviction decisions",
489
+ )
490
+
491
+ # State tracking
492
+ dirty: bool = Field(default=False, description="Has been modified since last flush")
493
+ pinned: bool = Field(default=False, description="Pinned pages are never evicted")
494
+
495
+ # Lineage (legacy, use provenance/represents instead)
496
+ parent_page_id: Optional[str] = Field(
497
+ default=None, description="Parent page if derived (e.g., summary of original)"
498
+ )
499
+
500
+ # Modality-specific metadata
501
+ mime_type: Optional[str] = Field(default=None)
502
+ duration_seconds: Optional[float] = Field(
503
+ default=None, description="Duration for audio/video"
504
+ )
505
+ dimensions: Optional[Tuple[int, int]] = Field(
506
+ default=None, description="Width x height for image/video"
507
+ )
508
+ transcript: Optional[str] = Field(
509
+ default=None, description="Transcript for audio/video (L1 representation)"
510
+ )
511
+ caption: Optional[str] = Field(
512
+ default=None, description="Caption for image (L2 representation)"
513
+ )
514
+
515
+ # Custom metadata
516
+ metadata: Dict[str, Any] = Field(default_factory=dict)
517
+
518
+ def mark_accessed(self) -> None:
519
+ """Update access tracking."""
520
+ self.last_accessed = datetime.utcnow()
521
+ self.access_count += 1
522
+
523
+ def mark_dirty(self) -> None:
524
+ """Mark page as modified."""
525
+ self.dirty = True
526
+
527
+ def mark_clean(self) -> None:
528
+ """Mark page as flushed/clean."""
529
+ self.dirty = False
530
+
531
+ def estimate_tokens(self) -> int:
532
+ """Estimate token count for current content."""
533
+ if self.size_tokens is not None:
534
+ return self.size_tokens
535
+
536
+ if self.content is None:
537
+ return 0
538
+
539
+ if isinstance(self.content, str):
540
+ # Rough estimate: 4 chars per token
541
+ return len(self.content) // 4
542
+ elif isinstance(self.content, dict):
543
+ import json
544
+
545
+ return len(json.dumps(self.content)) // 4
546
+ else:
547
+ return self.size_bytes // 4 if self.size_bytes > 0 else 100
548
+
549
+
550
+ class PageTableEntry(BaseModel):
551
+ """
552
+ Metadata entry for a page in the page table.
553
+
554
+ Tracks location, state, and access patterns without holding content.
555
+ This is what the PageTable stores for each page.
556
+ """
557
+
558
+ page_id: str
559
+
560
+ # Location
561
+ tier: StorageTier
562
+ artifact_id: Optional[str] = None
563
+ compression_level: CompressionLevel = CompressionLevel.FULL
564
+
565
+ # Page type (for eviction decisions)
566
+ page_type: PageType = Field(
567
+ default=PageType.TRANSCRIPT,
568
+ description="Page type for eviction/compression decisions",
569
+ )
570
+
571
+ # Provenance (for tracing back to source)
572
+ provenance: List[str] = Field(
573
+ default_factory=list,
574
+ description="page_ids this page derives from",
575
+ )
576
+
577
+ # State
578
+ dirty: bool = Field(default=False, description="Modified since last flush")
579
+ pinned: bool = Field(default=False, description="Pinned pages are never evicted")
580
+ last_flushed: Optional[datetime] = Field(default=None)
581
+
582
+ # Access tracking
583
+ last_accessed: datetime = Field(default_factory=datetime.utcnow)
584
+ access_count: int = Field(default=0)
585
+
586
+ # Size
587
+ size_tokens: Optional[int] = None
588
+
589
+ # Modality (for filtering)
590
+ modality: Modality = Modality.TEXT
591
+
592
+ # Locality hints (for NUMA awareness)
593
+ affinity: Affinity = Field(
594
+ default=Affinity.LOCAL, description="Locality hint for distributed storage"
595
+ )
596
+
597
+ def mark_accessed(self) -> None:
598
+ """Update access tracking."""
599
+ self.last_accessed = datetime.utcnow()
600
+ self.access_count += 1
601
+
602
+ @property
603
+ def eviction_priority(self) -> float:
604
+ """
605
+ Calculate eviction priority based on page type.
606
+
607
+ Lower value = less likely to evict.
608
+ """
609
+ type_weights = {
610
+ PageType.CLAIM: 0.1, # Very low - claims are precious
611
+ PageType.INDEX: 0.2, # Very low - indexes are needed for search
612
+ PageType.PROCEDURE: 0.3, # Low - procedures help tool usage
613
+ PageType.SUMMARY: 0.4, # Low - rebuildable but useful
614
+ PageType.ARTIFACT: 0.6, # Normal
615
+ PageType.TRANSCRIPT: 0.7, # Normal
616
+ }
617
+ base = type_weights.get(self.page_type, 0.5)
618
+ # Pinned pages get 0 priority (never evict)
619
+ if self.pinned:
620
+ return 0.0
621
+ return base
622
+
623
+
624
+ class TokenBudget(BaseModel):
625
+ """
626
+ Token allocation tracking across modalities.
627
+
628
+ Helps manage context window usage and decide compression levels.
629
+ """
630
+
631
+ total_limit: int = Field(default=128000, description="Total context window size")
632
+ reserved: int = Field(
633
+ default=4000, description="Reserved for system prompt, tools, etc."
634
+ )
635
+
636
+ # Current usage by modality - stored as dict for Pydantic serialization
637
+ tokens_by_modality: Dict[Modality, int] = Field(
638
+ default_factory=lambda: {m: 0 for m in Modality}
639
+ )
640
+
641
+ @property
642
+ def text_tokens(self) -> int:
643
+ return self.tokens_by_modality.get(Modality.TEXT, 0)
644
+
645
+ @property
646
+ def image_tokens(self) -> int:
647
+ return self.tokens_by_modality.get(Modality.IMAGE, 0)
648
+
649
+ @property
650
+ def audio_tokens(self) -> int:
651
+ return self.tokens_by_modality.get(Modality.AUDIO, 0)
652
+
653
+ @property
654
+ def video_tokens(self) -> int:
655
+ return self.tokens_by_modality.get(Modality.VIDEO, 0)
656
+
657
+ @property
658
+ def structured_tokens(self) -> int:
659
+ return self.tokens_by_modality.get(Modality.STRUCTURED, 0)
660
+
661
+ @property
662
+ def used(self) -> int:
663
+ """Total tokens currently used."""
664
+ return sum(self.tokens_by_modality.values())
665
+
666
+ @property
667
+ def available(self) -> int:
668
+ """Tokens available for new content."""
669
+ return max(0, self.total_limit - self.reserved - self.used)
670
+
671
+ @property
672
+ def utilization(self) -> float:
673
+ """Current utilization as percentage (0-1)."""
674
+ usable = self.total_limit - self.reserved
675
+ if usable <= 0:
676
+ return 1.0
677
+ return min(1.0, self.used / usable)
678
+
679
+ def can_fit(self, tokens: int) -> bool:
680
+ """Check if additional tokens can fit."""
681
+ return tokens <= self.available
682
+
683
+ def add(self, tokens: int, modality: Modality) -> bool:
684
+ """
685
+ Add tokens for a modality. Returns True if successful.
686
+ """
687
+ if not self.can_fit(tokens):
688
+ return False
689
+
690
+ current = self.tokens_by_modality.get(modality, 0)
691
+ self.tokens_by_modality[modality] = current + tokens
692
+ return True
693
+
694
+ def remove(self, tokens: int, modality: Modality) -> None:
695
+ """Remove tokens for a modality."""
696
+ current = self.tokens_by_modality.get(modality, 0)
697
+ self.tokens_by_modality[modality] = max(0, current - tokens)
698
+
699
+ def get_tokens(self, modality: Modality) -> int:
700
+ """Get token count for a specific modality."""
701
+ return self.tokens_by_modality.get(modality, 0)
702
+
703
+ def set_tokens(self, modality: Modality, tokens: int) -> None:
704
+ """Set token count for a specific modality."""
705
+ self.tokens_by_modality[modality] = max(0, tokens)
706
+
707
+
708
+ class VMMetrics(BaseModel):
709
+ """
710
+ Metrics for monitoring VM health and performance.
711
+ """
712
+
713
+ # Fault tracking
714
+ faults_total: int = Field(default=0)
715
+ faults_this_turn: int = Field(default=0)
716
+
717
+ # TLB stats
718
+ tlb_hits: int = Field(default=0)
719
+ tlb_misses: int = Field(default=0)
720
+
721
+ # Eviction stats
722
+ evictions_total: int = Field(default=0)
723
+ evictions_this_turn: int = Field(default=0)
724
+
725
+ # Token tracking
726
+ tokens_in_working_set: int = Field(default=0)
727
+ tokens_available: int = Field(default=0)
728
+
729
+ # Page distribution - use Enums as keys
730
+ pages_by_tier: Dict[StorageTier, int] = Field(
731
+ default_factory=lambda: {t: 0 for t in StorageTier}
732
+ )
733
+ pages_by_modality: Dict[Modality, int] = Field(
734
+ default_factory=lambda: {m: 0 for m in Modality}
735
+ )
736
+
737
+ @property
738
+ def fault_rate(self) -> float:
739
+ """Faults per turn (if we track turns)."""
740
+ return self.faults_this_turn
741
+
742
+ @property
743
+ def tlb_hit_rate(self) -> float:
744
+ """TLB hit rate as percentage."""
745
+ total = self.tlb_hits + self.tlb_misses
746
+ if total == 0:
747
+ return 0.0
748
+ return self.tlb_hits / total
749
+
750
+ def record_fault(self) -> None:
751
+ """Record a page fault."""
752
+ self.faults_total += 1
753
+ self.faults_this_turn += 1
754
+
755
+ def record_tlb_hit(self) -> None:
756
+ """Record a TLB hit."""
757
+ self.tlb_hits += 1
758
+
759
+ def record_tlb_miss(self) -> None:
760
+ """Record a TLB miss."""
761
+ self.tlb_misses += 1
762
+
763
+ def record_eviction(self) -> None:
764
+ """Record an eviction."""
765
+ self.evictions_total += 1
766
+ self.evictions_this_turn += 1
767
+
768
+ def new_turn(self) -> None:
769
+ """Reset per-turn counters."""
770
+ self.faults_this_turn = 0
771
+ self.evictions_this_turn = 0
772
+
773
+
774
+ # =============================================================================
775
+ # Fault Policy Models
776
+ # =============================================================================
777
+
778
+
779
+ class FaultConfidenceThreshold(str, Enum):
780
+ """Confidence threshold for page faults."""
781
+
782
+ EXPLICIT = "explicit" # Only fault when page_id is directly requested
783
+ REFERENCED = "referenced" # Fault if page content is referenced/needed
784
+ SPECULATIVE = "speculative" # Fault on potential relevance (aggressive)
785
+
786
+
787
+ class FaultPolicy(BaseModel):
788
+ """
789
+ Guardrails to prevent fault spirals and budget blowouts.
790
+ """
791
+
792
+ # Existing
793
+ max_faults_per_turn: int = Field(default=3)
794
+
795
+ # Token budget for fault resolution
796
+ max_fault_tokens_per_turn: int = Field(
797
+ default=8192, description="Don't let faults blow the token budget"
798
+ )
799
+
800
+ # Confidence threshold - only fault if explicitly needed
801
+ fault_confidence_threshold: FaultConfidenceThreshold = Field(
802
+ default=FaultConfidenceThreshold.REFERENCED
803
+ )
804
+
805
+ # Track tokens used this turn for fault resolution
806
+ tokens_used_this_turn: int = Field(default=0)
807
+ faults_this_turn: int = Field(default=0)
808
+
809
+ def can_fault(self, estimated_tokens: int) -> bool:
810
+ """Check if a fault is allowed under current policy."""
811
+ if self.faults_this_turn >= self.max_faults_per_turn:
812
+ return False
813
+ if (
814
+ self.tokens_used_this_turn + estimated_tokens
815
+ > self.max_fault_tokens_per_turn
816
+ ):
817
+ return False
818
+ return True
819
+
820
+ def record_fault(self, tokens: int) -> None:
821
+ """Record a fault and its token cost."""
822
+ self.faults_this_turn += 1
823
+ self.tokens_used_this_turn += tokens
824
+
825
+ def new_turn(self) -> None:
826
+ """Reset for new turn."""
827
+ self.faults_this_turn = 0
828
+ self.tokens_used_this_turn = 0
829
+
830
+
831
+ class FaultRecord(BaseModel):
832
+ """Record of a single page fault for metrics."""
833
+
834
+ page_id: str
835
+ reason: FaultReason
836
+ turn: int
837
+ tokens_loaded: int
838
+ latency_ms: float = Field(default=0.0)
839
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
840
+
841
+
842
+ # =============================================================================
843
+ # Mutation Log Models
844
+ # =============================================================================
845
+
846
+
847
+ class PageMutation(BaseModel):
848
+ """
849
+ Immutable record of a page change.
850
+
851
+ Enables debugging, replay, and grounding story:
852
+ - "What was in context for turn T?"
853
+ - "Who changed what and why?"
854
+ """
855
+
856
+ mutation_id: str
857
+ page_id: str
858
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
859
+ turn: int = Field(default=0)
860
+
861
+ mutation_type: MutationType
862
+
863
+ # Context at mutation time
864
+ tier_before: Optional[StorageTier] = None
865
+ tier_after: StorageTier
866
+
867
+ # Who caused it
868
+ actor: Actor
869
+ cause: Optional[str] = Field(
870
+ default=None,
871
+ description="e.g., 'eviction_pressure', 'page_fault', 'explicit_request'",
872
+ )
873
+
874
+
875
+ # =============================================================================
876
+ # Memory ABI Models
877
+ # =============================================================================
878
+
879
+
880
+ class PageManifestEntry(BaseModel):
881
+ """Entry in the memory manifest for a page."""
882
+
883
+ page_id: str
884
+ modality: str
885
+ page_type: str # transcript, summary, artifact, claim, procedure, index
886
+ compression_level: int
887
+ tokens: int
888
+ importance: float
889
+ provenance: List[str] = Field(default_factory=list) # source page_ids
890
+ can_evict: bool = Field(default=True)
891
+ can_compress: bool = Field(default=True)
892
+
893
+
894
+ class MemoryABI(BaseModel):
895
+ """
896
+ Application Binary Interface for memory negotiation.
897
+
898
+ Lets different models negotiate memory requirements.
899
+ Smaller models survive with aggressive compression.
900
+ Tool processors reason about memory cost.
901
+ """
902
+
903
+ # What's in context
904
+ pages: List[PageManifestEntry] = Field(default_factory=list)
905
+
906
+ # Capabilities
907
+ faults_allowed: bool = Field(default=True)
908
+ upgrade_budget_tokens: int = Field(
909
+ default=2048, description="Tokens reserved for fault resolution"
910
+ )
911
+
912
+ # Constraints
913
+ max_context_tokens: int = Field(default=128000)
914
+ reserved_tokens: int = Field(default=2000, description="System prompt, etc.")
915
+
916
+ # Tool schema budget (often the hidden token hog)
917
+ tool_schema_tokens_reserved: int = Field(
918
+ default=0, description="Tokens consumed by tool definitions"
919
+ )
920
+ active_toolset_hash: Optional[str] = Field(
921
+ default=None, description="For cache invalidation when tools change"
922
+ )
923
+
924
+ # Preferences
925
+ modality_weights: Dict[str, float] = Field(
926
+ default_factory=lambda: {
927
+ "text": 1.0,
928
+ "image": 0.8,
929
+ "audio": 0.6,
930
+ "video": 0.4,
931
+ }
932
+ )
933
+
934
+ @property
935
+ def available_tokens(self) -> int:
936
+ """Tokens available for content after reservations."""
937
+ return max(
938
+ 0,
939
+ self.max_context_tokens
940
+ - self.reserved_tokens
941
+ - self.tool_schema_tokens_reserved,
942
+ )
943
+
944
+
945
+ # =============================================================================
946
+ # UX Metrics Models
947
+ # =============================================================================
948
+
949
+
950
+ class RecallAttempt(BaseModel):
951
+ """Record of a recall attempt for tracking success rate."""
952
+
953
+ turn: int
954
+ query: str # What user asked to recall
955
+ page_ids_cited: List[str] = Field(default_factory=list)
956
+ user_corrected: bool = Field(default=False)
957
+ timestamp: datetime = Field(default_factory=datetime.utcnow)
958
+
959
+
960
+ class UserExperienceMetrics(BaseModel):
961
+ """
962
+ Metrics that correlate with user satisfaction.
963
+
964
+ These tell you whether the system "feels good" to users.
965
+ """
966
+
967
+ # Recall tracking
968
+ recall_attempts: List[RecallAttempt] = Field(default_factory=list)
969
+
970
+ # Fault history for thrash calculation
971
+ fault_history: List[FaultRecord] = Field(default_factory=list)
972
+
973
+ # Page references per turn (for effective tokens)
974
+ pages_referenced_per_turn: Dict[int, List[str]] = Field(default_factory=dict)
975
+ tokens_in_context_per_turn: Dict[int, int] = Field(default_factory=dict)
976
+
977
+ def recall_success_rate(self) -> float:
978
+ """
979
+ Success rate for recall attempts.
980
+ Success = no correction needed.
981
+ """
982
+ if not self.recall_attempts:
983
+ return 1.0
984
+ successes = sum(1 for r in self.recall_attempts if not r.user_corrected)
985
+ return successes / len(self.recall_attempts)
986
+
987
+ def thrash_index(self, window_turns: int = 5) -> float:
988
+ """
989
+ Faults after first fault in a topic window.
990
+ Low = stable working set. High = constantly missing what we need.
991
+ """
992
+ if not self.fault_history:
993
+ return 0.0
994
+
995
+ # Get recent faults
996
+ if self.fault_history:
997
+ max_turn = max(f.turn for f in self.fault_history)
998
+ min_turn = max(0, max_turn - window_turns)
999
+ recent_faults = [f for f in self.fault_history if f.turn >= min_turn]
1000
+ else:
1001
+ return 0.0
1002
+
1003
+ if not recent_faults:
1004
+ return 0.0
1005
+
1006
+ # Count first faults vs thrash faults
1007
+ seen_pages: set = set()
1008
+ thrash_faults = 0
1009
+ for fault in recent_faults:
1010
+ if fault.page_id in seen_pages:
1011
+ thrash_faults += 1
1012
+ else:
1013
+ seen_pages.add(fault.page_id)
1014
+
1015
+ return thrash_faults / window_turns if window_turns > 0 else 0.0
1016
+
1017
+ def effective_tokens_ratio(self, turn: int) -> float:
1018
+ """
1019
+ What fraction of context tokens actually contributed to the answer?
1020
+ """
1021
+ context_tokens = self.tokens_in_context_per_turn.get(turn, 0)
1022
+ if context_tokens == 0:
1023
+ return 0.0
1024
+
1025
+ referenced_pages = self.pages_referenced_per_turn.get(turn, [])
1026
+ # This is a simplified calculation - in reality you'd sum tokens of referenced pages
1027
+ # For now, estimate based on count
1028
+ referenced_estimate = (
1029
+ len(referenced_pages) * 200
1030
+ ) # ~200 tokens per referenced page
1031
+ return min(1.0, referenced_estimate / context_tokens)
1032
+
1033
+ def record_recall_attempt(
1034
+ self,
1035
+ turn: int,
1036
+ query: str,
1037
+ page_ids_cited: List[str],
1038
+ user_corrected: bool = False,
1039
+ ) -> None:
1040
+ """Record a recall attempt."""
1041
+ self.recall_attempts.append(
1042
+ RecallAttempt(
1043
+ turn=turn,
1044
+ query=query,
1045
+ page_ids_cited=page_ids_cited,
1046
+ user_corrected=user_corrected,
1047
+ )
1048
+ )
1049
+
1050
+ def record_fault(
1051
+ self,
1052
+ page_id: str,
1053
+ reason: FaultReason,
1054
+ turn: int,
1055
+ tokens_loaded: int,
1056
+ latency_ms: float = 0.0,
1057
+ ) -> None:
1058
+ """Record a fault for thrash tracking."""
1059
+ self.fault_history.append(
1060
+ FaultRecord(
1061
+ page_id=page_id,
1062
+ reason=reason,
1063
+ turn=turn,
1064
+ tokens_loaded=tokens_loaded,
1065
+ latency_ms=latency_ms,
1066
+ )
1067
+ )
1068
+
1069
+ def record_turn_context(
1070
+ self,
1071
+ turn: int,
1072
+ tokens_in_context: int,
1073
+ pages_referenced: List[str],
1074
+ ) -> None:
1075
+ """Record context state for effective tokens calculation."""
1076
+ self.tokens_in_context_per_turn[turn] = tokens_in_context
1077
+ self.pages_referenced_per_turn[turn] = pages_referenced
1078
+
1079
+ def get_fault_reason_breakdown(self) -> Dict[FaultReason, int]:
1080
+ """Get count of faults by reason."""
1081
+ breakdown: Dict[FaultReason, int] = {r: 0 for r in FaultReason}
1082
+ for fault in self.fault_history:
1083
+ breakdown[fault.reason] = breakdown.get(fault.reason, 0) + 1
1084
+ return breakdown