chuk-ai-session-manager 0.7.1__py3-none-any.whl → 0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chuk_ai_session_manager/__init__.py +84 -40
- chuk_ai_session_manager/api/__init__.py +1 -1
- chuk_ai_session_manager/api/simple_api.py +53 -59
- chuk_ai_session_manager/exceptions.py +31 -17
- chuk_ai_session_manager/guards/__init__.py +118 -0
- chuk_ai_session_manager/guards/bindings.py +217 -0
- chuk_ai_session_manager/guards/cache.py +163 -0
- chuk_ai_session_manager/guards/manager.py +819 -0
- chuk_ai_session_manager/guards/models.py +498 -0
- chuk_ai_session_manager/guards/ungrounded.py +159 -0
- chuk_ai_session_manager/infinite_conversation.py +86 -79
- chuk_ai_session_manager/memory/__init__.py +247 -0
- chuk_ai_session_manager/memory/artifacts_bridge.py +469 -0
- chuk_ai_session_manager/memory/context_packer.py +347 -0
- chuk_ai_session_manager/memory/fault_handler.py +507 -0
- chuk_ai_session_manager/memory/manifest.py +307 -0
- chuk_ai_session_manager/memory/models.py +1084 -0
- chuk_ai_session_manager/memory/mutation_log.py +186 -0
- chuk_ai_session_manager/memory/pack_cache.py +206 -0
- chuk_ai_session_manager/memory/page_table.py +275 -0
- chuk_ai_session_manager/memory/prefetcher.py +192 -0
- chuk_ai_session_manager/memory/tlb.py +247 -0
- chuk_ai_session_manager/memory/vm_prompts.py +238 -0
- chuk_ai_session_manager/memory/working_set.py +574 -0
- chuk_ai_session_manager/models/__init__.py +21 -9
- chuk_ai_session_manager/models/event_source.py +3 -1
- chuk_ai_session_manager/models/event_type.py +10 -1
- chuk_ai_session_manager/models/session.py +103 -68
- chuk_ai_session_manager/models/session_event.py +69 -68
- chuk_ai_session_manager/models/session_metadata.py +9 -10
- chuk_ai_session_manager/models/session_run.py +21 -22
- chuk_ai_session_manager/models/token_usage.py +76 -76
- chuk_ai_session_manager/procedural_memory/__init__.py +70 -0
- chuk_ai_session_manager/procedural_memory/formatter.py +407 -0
- chuk_ai_session_manager/procedural_memory/manager.py +523 -0
- chuk_ai_session_manager/procedural_memory/models.py +371 -0
- chuk_ai_session_manager/sample_tools.py +79 -46
- chuk_ai_session_manager/session_aware_tool_processor.py +27 -16
- chuk_ai_session_manager/session_manager.py +238 -197
- chuk_ai_session_manager/session_prompt_builder.py +163 -111
- chuk_ai_session_manager/session_storage.py +45 -52
- {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/METADATA +79 -3
- chuk_ai_session_manager-0.8.dist-info/RECORD +45 -0
- {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/WHEEL +1 -1
- chuk_ai_session_manager-0.7.1.dist-info/RECORD +0 -22
- {chuk_ai_session_manager-0.7.1.dist-info → chuk_ai_session_manager-0.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1084 @@
|
|
|
1
|
+
# chuk_ai_session_manager/memory/models.py
|
|
2
|
+
"""
|
|
3
|
+
Core models for the AI Virtual Memory system.
|
|
4
|
+
|
|
5
|
+
These models represent the fundamental abstractions for OS-style memory management:
|
|
6
|
+
- MemoryPage: Atomic unit of content (like an OS page)
|
|
7
|
+
- PageTableEntry: Metadata about a page's location and state
|
|
8
|
+
- CompressionLevel: Standard compression levels per modality
|
|
9
|
+
- TokenBudget: Token allocation tracking
|
|
10
|
+
|
|
11
|
+
Design principles:
|
|
12
|
+
- Pydantic-native: All models are BaseModel subclasses
|
|
13
|
+
- No magic strings: Use Enums for all categorical values
|
|
14
|
+
- Type-safe: Full type annotations throughout
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from enum import Enum, IntEnum
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
|
+
|
|
21
|
+
from pydantic import BaseModel, Field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# =============================================================================
|
|
25
|
+
# Enums
|
|
26
|
+
# =============================================================================
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CompressionLevel(IntEnum):
|
|
30
|
+
"""
|
|
31
|
+
Standard compression levels for page content.
|
|
32
|
+
|
|
33
|
+
Lower levels = more detail, more tokens
|
|
34
|
+
Higher levels = less detail, fewer tokens
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
FULL = 0 # Complete content (full text, full resolution, full audio)
|
|
38
|
+
REDUCED = 1 # Reduced content (excerpts, thumbnail, transcript)
|
|
39
|
+
ABSTRACT = 2 # Abstract/summary (key points, caption, summary)
|
|
40
|
+
REFERENCE = 3 # Reference only (topic tags, page_id only)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Modality(str, Enum):
|
|
44
|
+
"""Content modality types."""
|
|
45
|
+
|
|
46
|
+
TEXT = "text"
|
|
47
|
+
IMAGE = "image"
|
|
48
|
+
AUDIO = "audio"
|
|
49
|
+
VIDEO = "video"
|
|
50
|
+
STRUCTURED = "structured"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class StorageTier(str, Enum):
|
|
54
|
+
"""Storage hierarchy tiers (like CPU cache levels)."""
|
|
55
|
+
|
|
56
|
+
L0 = "L0" # Registers - current prompt context window
|
|
57
|
+
L1 = "L1" # Cache - recent pages, hot data (session state)
|
|
58
|
+
L2 = "L2" # RAM - session storage (chuk-sessions)
|
|
59
|
+
L3 = "L3" # Disk - artifact storage (chuk-artifacts filesystem)
|
|
60
|
+
L4 = "L4" # Cold - archive storage (chuk-artifacts S3)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class Affinity(str, Enum):
|
|
64
|
+
"""Locality hints for distributed storage (NUMA awareness)."""
|
|
65
|
+
|
|
66
|
+
LOCAL = "local"
|
|
67
|
+
REMOTE = "remote"
|
|
68
|
+
SHARED = "shared"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class VMMode(str, Enum):
|
|
72
|
+
"""Virtual memory operation modes."""
|
|
73
|
+
|
|
74
|
+
STRICT = "strict" # No hallucinated memory, citations required
|
|
75
|
+
RELAXED = "relaxed" # VM-aware but more conversational
|
|
76
|
+
PASSIVE = "passive" # No tools, runtime handles everything
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MessageRole(str, Enum):
|
|
80
|
+
"""Message roles in conversation context."""
|
|
81
|
+
|
|
82
|
+
USER = "user"
|
|
83
|
+
ASSISTANT = "assistant"
|
|
84
|
+
TOOL = "tool"
|
|
85
|
+
SYSTEM = "system"
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class PageType(str, Enum):
|
|
89
|
+
"""
|
|
90
|
+
Types of memory pages.
|
|
91
|
+
|
|
92
|
+
Different page types have different eviction/compression rules:
|
|
93
|
+
- transcript: Raw turns, tool outputs (normal eviction)
|
|
94
|
+
- summary: LLM-generated summaries (low eviction, rebuildable)
|
|
95
|
+
- artifact: Tool-created content (normal eviction)
|
|
96
|
+
- claim: Decisions, facts, conclusions (very low eviction, high-value)
|
|
97
|
+
- procedure: "When calling tool X, we do Y" (low eviction)
|
|
98
|
+
- index: Page metadata for search (very low eviction)
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
TRANSCRIPT = "transcript" # Raw turns, tool outputs
|
|
102
|
+
SUMMARY = "summary" # LLM-generated summaries
|
|
103
|
+
ARTIFACT = "artifact" # Tool-created content
|
|
104
|
+
CLAIM = "claim" # Decisions, facts, conclusions (high-value)
|
|
105
|
+
PROCEDURE = "procedure" # Learned patterns for tool usage
|
|
106
|
+
INDEX = "index" # Page metadata for search
|
|
107
|
+
|
|
108
|
+
# Legacy aliases for backwards compatibility
|
|
109
|
+
MESSAGE = "transcript" # Alias
|
|
110
|
+
TOOL_RESULT = "transcript" # Alias
|
|
111
|
+
MEDIA = "artifact" # Alias
|
|
112
|
+
CHECKPOINT = "index" # Alias
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class FaultReason(str, Enum):
|
|
116
|
+
"""
|
|
117
|
+
Reasons for page faults - enables measuring why faults happen.
|
|
118
|
+
|
|
119
|
+
This helps distinguish good faults (user asked) from bad faults (model guessing).
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
USER_REQUESTED_RECALL = "user_requested_recall" # "What did we say about X?"
|
|
123
|
+
RESOLVE_REFERENCE = "resolve_reference" # Model references page_id
|
|
124
|
+
TOOL_PREREQUISITE = "tool_prereq" # Tool needs this page
|
|
125
|
+
SPECULATIVE = "speculative" # Might be relevant
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class MutationType(str, Enum):
|
|
129
|
+
"""Types of page mutations for the mutation log."""
|
|
130
|
+
|
|
131
|
+
CREATE = "create"
|
|
132
|
+
FAULT_IN = "fault_in"
|
|
133
|
+
EVICT = "evict"
|
|
134
|
+
COMPRESS = "compress"
|
|
135
|
+
PIN = "pin"
|
|
136
|
+
UNPIN = "unpin"
|
|
137
|
+
UPDATE = "update"
|
|
138
|
+
DELETE = "delete"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Actor(str, Enum):
|
|
142
|
+
"""Who caused a mutation."""
|
|
143
|
+
|
|
144
|
+
USER = "user"
|
|
145
|
+
MODEL = "model"
|
|
146
|
+
TOOL = "tool"
|
|
147
|
+
SYSTEM = "system"
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class ContextPrefix(str, Enum):
|
|
151
|
+
"""Prefixes for VM:CONTEXT format."""
|
|
152
|
+
|
|
153
|
+
USER = "U"
|
|
154
|
+
ASSISTANT = "A"
|
|
155
|
+
TOOL = "T"
|
|
156
|
+
SUMMARY = "S"
|
|
157
|
+
IMAGE = "I"
|
|
158
|
+
AUDIO = "D" # 'D' for auDio (A is taken)
|
|
159
|
+
VIDEO = "V"
|
|
160
|
+
STRUCTURED = "J" # JSON/structured
|
|
161
|
+
UNKNOWN = "?"
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class ToolType(str, Enum):
|
|
165
|
+
"""Tool definition types."""
|
|
166
|
+
|
|
167
|
+
FUNCTION = "function"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# =============================================================================
|
|
171
|
+
# Constants
|
|
172
|
+
# =============================================================================
|
|
173
|
+
|
|
174
|
+
# MIME types for VM storage
|
|
175
|
+
MEMORY_PAGE_MIME_TYPE = "application/x-memory-page"
|
|
176
|
+
VM_CHECKPOINT_MIME_TYPE = "application/x-vm-checkpoint"
|
|
177
|
+
|
|
178
|
+
# All compression levels as a list (for iteration)
|
|
179
|
+
ALL_COMPRESSION_LEVELS: List[int] = [level.value for level in CompressionLevel]
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
# =============================================================================
|
|
183
|
+
# Stats Models
|
|
184
|
+
# =============================================================================
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class TLBStats(BaseModel):
|
|
188
|
+
"""Statistics for TLB performance."""
|
|
189
|
+
|
|
190
|
+
size: int = Field(default=0, description="Current number of entries")
|
|
191
|
+
max_size: int = Field(default=512, description="Maximum entries")
|
|
192
|
+
utilization: float = Field(default=0.0, description="Current utilization (0-1)")
|
|
193
|
+
hits: int = Field(default=0, description="Total cache hits")
|
|
194
|
+
misses: int = Field(default=0, description="Total cache misses")
|
|
195
|
+
hit_rate: float = Field(default=0.0, description="Hit rate (0-1)")
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class WorkingSetStats(BaseModel):
|
|
199
|
+
"""Statistics for working set state."""
|
|
200
|
+
|
|
201
|
+
l0_pages: int = Field(default=0, description="Pages in L0 (context)")
|
|
202
|
+
l1_pages: int = Field(default=0, description="Pages in L1 (cache)")
|
|
203
|
+
total_pages: int = Field(default=0, description="Total pages in working set")
|
|
204
|
+
tokens_used: int = Field(default=0, description="Tokens currently used")
|
|
205
|
+
tokens_available: int = Field(default=0, description="Tokens available")
|
|
206
|
+
utilization: float = Field(default=0.0, description="Token utilization (0-1)")
|
|
207
|
+
needs_eviction: bool = Field(
|
|
208
|
+
default=False, description="Whether eviction is needed"
|
|
209
|
+
)
|
|
210
|
+
tokens_by_modality: Dict[Modality, int] = Field(default_factory=dict)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class StorageStats(BaseModel):
|
|
214
|
+
"""Statistics for storage backend."""
|
|
215
|
+
|
|
216
|
+
backend: str = Field(..., description="Backend type name")
|
|
217
|
+
persistent: bool = Field(default=False, description="Whether storage persists")
|
|
218
|
+
session_id: Optional[str] = Field(default=None, description="Associated session")
|
|
219
|
+
pages_stored: int = Field(default=0, description="Number of pages stored")
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class CombinedPageTableStats(BaseModel):
|
|
223
|
+
"""Combined statistics for PageTable and TLB."""
|
|
224
|
+
|
|
225
|
+
page_table: "PageTableStats"
|
|
226
|
+
tlb: TLBStats
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class PageTableStats(BaseModel):
|
|
230
|
+
"""Statistics about the page table state."""
|
|
231
|
+
|
|
232
|
+
total_pages: int
|
|
233
|
+
dirty_pages: int
|
|
234
|
+
pages_by_tier: Dict[StorageTier, int]
|
|
235
|
+
pages_by_modality: Dict[Modality, int]
|
|
236
|
+
|
|
237
|
+
@property
|
|
238
|
+
def working_set_size(self) -> int:
|
|
239
|
+
"""Pages in L0 + L1."""
|
|
240
|
+
return self.pages_by_tier.get(StorageTier.L0, 0) + self.pages_by_tier.get(
|
|
241
|
+
StorageTier.L1, 0
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class FaultMetrics(BaseModel):
|
|
246
|
+
"""Metrics for page fault handling."""
|
|
247
|
+
|
|
248
|
+
faults_this_turn: int = Field(default=0)
|
|
249
|
+
max_faults_per_turn: int = Field(default=2)
|
|
250
|
+
faults_remaining: int = Field(default=2)
|
|
251
|
+
total_faults: int = Field(default=0)
|
|
252
|
+
tlb_hit_rate: float = Field(default=0.0)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# =============================================================================
|
|
256
|
+
# Content Models (for tool results)
|
|
257
|
+
# =============================================================================
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
class TextContent(BaseModel):
|
|
261
|
+
"""Text content representation."""
|
|
262
|
+
|
|
263
|
+
text: str = Field(default="")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class ImageContent(BaseModel):
|
|
267
|
+
"""Image content representation."""
|
|
268
|
+
|
|
269
|
+
caption: Optional[str] = Field(default=None)
|
|
270
|
+
url: Optional[str] = Field(default=None)
|
|
271
|
+
base64: Optional[str] = Field(default=None)
|
|
272
|
+
embedding: Optional[List[float]] = Field(default=None)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
class AudioContent(BaseModel):
|
|
276
|
+
"""Audio content representation."""
|
|
277
|
+
|
|
278
|
+
transcript: Optional[str] = Field(default=None)
|
|
279
|
+
timestamps: Optional[List[Dict[str, Any]]] = Field(default=None)
|
|
280
|
+
duration_seconds: Optional[float] = Field(default=None)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class VideoContent(BaseModel):
|
|
284
|
+
"""Video content representation."""
|
|
285
|
+
|
|
286
|
+
scenes: List[Dict[str, Any]] = Field(default_factory=list)
|
|
287
|
+
transcript: Optional[str] = Field(default=None)
|
|
288
|
+
duration_seconds: Optional[float] = Field(default=None)
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class StructuredContent(BaseModel):
|
|
292
|
+
"""Structured data content representation."""
|
|
293
|
+
|
|
294
|
+
data: Dict[str, Any] = Field(default_factory=dict)
|
|
295
|
+
schema_name: Optional[str] = Field(default=None)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
# Union type for all content types
|
|
299
|
+
PageContent = Union[
|
|
300
|
+
TextContent,
|
|
301
|
+
ImageContent,
|
|
302
|
+
AudioContent,
|
|
303
|
+
VideoContent,
|
|
304
|
+
StructuredContent,
|
|
305
|
+
Dict[str, Any],
|
|
306
|
+
]
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class PageMeta(BaseModel):
|
|
310
|
+
"""Metadata for a page in tool results."""
|
|
311
|
+
|
|
312
|
+
source_tier: str = Field(default="unknown")
|
|
313
|
+
mime_type: Optional[str] = Field(default=None)
|
|
314
|
+
size_bytes: Optional[int] = Field(default=None)
|
|
315
|
+
dimensions: Optional[List[int]] = Field(default=None)
|
|
316
|
+
duration_seconds: Optional[float] = Field(default=None)
|
|
317
|
+
latency_ms: Optional[float] = Field(default=None)
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
class PageData(BaseModel):
|
|
321
|
+
"""Page data in tool result envelope."""
|
|
322
|
+
|
|
323
|
+
page_id: str
|
|
324
|
+
modality: str
|
|
325
|
+
level: int
|
|
326
|
+
tier: str
|
|
327
|
+
content: PageContent
|
|
328
|
+
meta: PageMeta = Field(default_factory=PageMeta)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
class FaultEffects(BaseModel):
|
|
332
|
+
"""Side effects of a fault operation."""
|
|
333
|
+
|
|
334
|
+
promoted_to_working_set: bool = Field(default=False)
|
|
335
|
+
tokens_est: int = Field(default=0)
|
|
336
|
+
evictions: Optional[List[str]] = Field(default=None)
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
class SearchResultEntry(BaseModel):
|
|
340
|
+
"""Single entry in search results."""
|
|
341
|
+
|
|
342
|
+
page_id: str
|
|
343
|
+
modality: str
|
|
344
|
+
tier: str
|
|
345
|
+
levels: List[int] = Field(default_factory=list)
|
|
346
|
+
hint: str = Field(default="")
|
|
347
|
+
relevance: float = Field(default=0.0)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
# =============================================================================
|
|
351
|
+
# Tool Definition Models
|
|
352
|
+
# =============================================================================
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
class ToolParameter(BaseModel):
|
|
356
|
+
"""Single parameter in a tool definition."""
|
|
357
|
+
|
|
358
|
+
type: str
|
|
359
|
+
description: str
|
|
360
|
+
enum: Optional[List[str]] = Field(default=None)
|
|
361
|
+
minimum: Optional[int] = Field(default=None)
|
|
362
|
+
maximum: Optional[int] = Field(default=None)
|
|
363
|
+
default: Optional[Any] = Field(default=None)
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
class ToolParameters(BaseModel):
|
|
367
|
+
"""Parameters schema for a tool."""
|
|
368
|
+
|
|
369
|
+
type: str = Field(default="object")
|
|
370
|
+
properties: Dict[str, ToolParameter] = Field(default_factory=dict)
|
|
371
|
+
required: List[str] = Field(default_factory=list)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class ToolFunction(BaseModel):
|
|
375
|
+
"""Function definition within a tool."""
|
|
376
|
+
|
|
377
|
+
name: str
|
|
378
|
+
description: str
|
|
379
|
+
parameters: ToolParameters
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class ToolDefinition(BaseModel):
|
|
383
|
+
"""Complete tool definition for Chat Completions API."""
|
|
384
|
+
|
|
385
|
+
type: ToolType = Field(default=ToolType.FUNCTION)
|
|
386
|
+
function: ToolFunction
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# =============================================================================
|
|
390
|
+
# Formatted Output Models
|
|
391
|
+
# =============================================================================
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
class FormattedPage(BaseModel):
|
|
395
|
+
"""Result of formatting a page for context."""
|
|
396
|
+
|
|
397
|
+
content: str = Field(..., description="Formatted content string")
|
|
398
|
+
tokens_est: int = Field(default=0, description="Estimated token count")
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
# =============================================================================
|
|
402
|
+
# Core Page Models
|
|
403
|
+
# =============================================================================
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
class MemoryPage(BaseModel):
|
|
407
|
+
"""
|
|
408
|
+
Atomic unit of content in the virtual memory system.
|
|
409
|
+
|
|
410
|
+
A page represents any piece of content (text, image, audio, video, structured)
|
|
411
|
+
with identity, versioning, and multi-resolution representations.
|
|
412
|
+
|
|
413
|
+
This is the RIGHT abstraction boundary because it enables:
|
|
414
|
+
- Cross-modal coherence
|
|
415
|
+
- Versioning and dirty tracking
|
|
416
|
+
- Copy-on-write
|
|
417
|
+
- Checkpoint consistency
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
# Identity
|
|
421
|
+
page_id: str = Field(..., description="Unique identifier for this page")
|
|
422
|
+
session_id: Optional[str] = Field(default=None, description="Owning session")
|
|
423
|
+
|
|
424
|
+
# Content type
|
|
425
|
+
modality: Modality = Field(..., description="Content modality")
|
|
426
|
+
|
|
427
|
+
# Page type (critical for eviction/compression decisions)
|
|
428
|
+
page_type: PageType = Field(
|
|
429
|
+
default=PageType.TRANSCRIPT,
|
|
430
|
+
description="Page type determines eviction/compression behavior",
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
# Provenance: what pages justify this one (for claims, summaries)
|
|
434
|
+
provenance: List[str] = Field(
|
|
435
|
+
default_factory=list,
|
|
436
|
+
description="page_ids that this page derives from (for claims/summaries)",
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
# Representation linking (for compression chain)
|
|
440
|
+
represents: Optional[str] = Field(
|
|
441
|
+
default=None,
|
|
442
|
+
description="page_id this is a compressed version of",
|
|
443
|
+
)
|
|
444
|
+
representation_level: int = Field(
|
|
445
|
+
default=0,
|
|
446
|
+
description="0=full, 1=reduced, 2=abstract, 3=reference",
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Location
|
|
450
|
+
storage_tier: StorageTier = Field(
|
|
451
|
+
default=StorageTier.L1, description="Current storage tier"
|
|
452
|
+
)
|
|
453
|
+
artifact_id: Optional[str] = Field(
|
|
454
|
+
default=None, description="Reference to chuk-artifacts storage"
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
# Content (when loaded into L0/L1)
|
|
458
|
+
content: Optional[Any] = Field(
|
|
459
|
+
default=None, description="Actual content when in working set"
|
|
460
|
+
)
|
|
461
|
+
compression_level: CompressionLevel = Field(
|
|
462
|
+
default=CompressionLevel.FULL, description="Current compression level"
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Multi-resolution representations
|
|
466
|
+
# Maps compression level -> artifact_id for stored representations
|
|
467
|
+
representations: Dict[CompressionLevel, str] = Field(
|
|
468
|
+
default_factory=dict, description="artifact_id for each compression level"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
# Size tracking
|
|
472
|
+
size_bytes: int = Field(default=0, description="Size in bytes")
|
|
473
|
+
size_tokens: Optional[int] = Field(
|
|
474
|
+
default=None, description="Estimated token count (for text/transcript)"
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Access tracking (for LRU/eviction)
|
|
478
|
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
|
479
|
+
last_accessed: datetime = Field(default_factory=datetime.utcnow)
|
|
480
|
+
access_count: int = Field(default=0, description="Number of times accessed")
|
|
481
|
+
|
|
482
|
+
# Importance (affects eviction priority)
|
|
483
|
+
# Claims default to higher importance
|
|
484
|
+
importance: float = Field(
|
|
485
|
+
default=0.5,
|
|
486
|
+
ge=0.0,
|
|
487
|
+
le=1.0,
|
|
488
|
+
description="Importance score for eviction decisions",
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# State tracking
|
|
492
|
+
dirty: bool = Field(default=False, description="Has been modified since last flush")
|
|
493
|
+
pinned: bool = Field(default=False, description="Pinned pages are never evicted")
|
|
494
|
+
|
|
495
|
+
# Lineage (legacy, use provenance/represents instead)
|
|
496
|
+
parent_page_id: Optional[str] = Field(
|
|
497
|
+
default=None, description="Parent page if derived (e.g., summary of original)"
|
|
498
|
+
)
|
|
499
|
+
|
|
500
|
+
# Modality-specific metadata
|
|
501
|
+
mime_type: Optional[str] = Field(default=None)
|
|
502
|
+
duration_seconds: Optional[float] = Field(
|
|
503
|
+
default=None, description="Duration for audio/video"
|
|
504
|
+
)
|
|
505
|
+
dimensions: Optional[Tuple[int, int]] = Field(
|
|
506
|
+
default=None, description="Width x height for image/video"
|
|
507
|
+
)
|
|
508
|
+
transcript: Optional[str] = Field(
|
|
509
|
+
default=None, description="Transcript for audio/video (L1 representation)"
|
|
510
|
+
)
|
|
511
|
+
caption: Optional[str] = Field(
|
|
512
|
+
default=None, description="Caption for image (L2 representation)"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
# Custom metadata
|
|
516
|
+
metadata: Dict[str, Any] = Field(default_factory=dict)
|
|
517
|
+
|
|
518
|
+
def mark_accessed(self) -> None:
|
|
519
|
+
"""Update access tracking."""
|
|
520
|
+
self.last_accessed = datetime.utcnow()
|
|
521
|
+
self.access_count += 1
|
|
522
|
+
|
|
523
|
+
def mark_dirty(self) -> None:
|
|
524
|
+
"""Mark page as modified."""
|
|
525
|
+
self.dirty = True
|
|
526
|
+
|
|
527
|
+
def mark_clean(self) -> None:
|
|
528
|
+
"""Mark page as flushed/clean."""
|
|
529
|
+
self.dirty = False
|
|
530
|
+
|
|
531
|
+
def estimate_tokens(self) -> int:
|
|
532
|
+
"""Estimate token count for current content."""
|
|
533
|
+
if self.size_tokens is not None:
|
|
534
|
+
return self.size_tokens
|
|
535
|
+
|
|
536
|
+
if self.content is None:
|
|
537
|
+
return 0
|
|
538
|
+
|
|
539
|
+
if isinstance(self.content, str):
|
|
540
|
+
# Rough estimate: 4 chars per token
|
|
541
|
+
return len(self.content) // 4
|
|
542
|
+
elif isinstance(self.content, dict):
|
|
543
|
+
import json
|
|
544
|
+
|
|
545
|
+
return len(json.dumps(self.content)) // 4
|
|
546
|
+
else:
|
|
547
|
+
return self.size_bytes // 4 if self.size_bytes > 0 else 100
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class PageTableEntry(BaseModel):
|
|
551
|
+
"""
|
|
552
|
+
Metadata entry for a page in the page table.
|
|
553
|
+
|
|
554
|
+
Tracks location, state, and access patterns without holding content.
|
|
555
|
+
This is what the PageTable stores for each page.
|
|
556
|
+
"""
|
|
557
|
+
|
|
558
|
+
page_id: str
|
|
559
|
+
|
|
560
|
+
# Location
|
|
561
|
+
tier: StorageTier
|
|
562
|
+
artifact_id: Optional[str] = None
|
|
563
|
+
compression_level: CompressionLevel = CompressionLevel.FULL
|
|
564
|
+
|
|
565
|
+
# Page type (for eviction decisions)
|
|
566
|
+
page_type: PageType = Field(
|
|
567
|
+
default=PageType.TRANSCRIPT,
|
|
568
|
+
description="Page type for eviction/compression decisions",
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
# Provenance (for tracing back to source)
|
|
572
|
+
provenance: List[str] = Field(
|
|
573
|
+
default_factory=list,
|
|
574
|
+
description="page_ids this page derives from",
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
# State
|
|
578
|
+
dirty: bool = Field(default=False, description="Modified since last flush")
|
|
579
|
+
pinned: bool = Field(default=False, description="Pinned pages are never evicted")
|
|
580
|
+
last_flushed: Optional[datetime] = Field(default=None)
|
|
581
|
+
|
|
582
|
+
# Access tracking
|
|
583
|
+
last_accessed: datetime = Field(default_factory=datetime.utcnow)
|
|
584
|
+
access_count: int = Field(default=0)
|
|
585
|
+
|
|
586
|
+
# Size
|
|
587
|
+
size_tokens: Optional[int] = None
|
|
588
|
+
|
|
589
|
+
# Modality (for filtering)
|
|
590
|
+
modality: Modality = Modality.TEXT
|
|
591
|
+
|
|
592
|
+
# Locality hints (for NUMA awareness)
|
|
593
|
+
affinity: Affinity = Field(
|
|
594
|
+
default=Affinity.LOCAL, description="Locality hint for distributed storage"
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
def mark_accessed(self) -> None:
|
|
598
|
+
"""Update access tracking."""
|
|
599
|
+
self.last_accessed = datetime.utcnow()
|
|
600
|
+
self.access_count += 1
|
|
601
|
+
|
|
602
|
+
@property
|
|
603
|
+
def eviction_priority(self) -> float:
|
|
604
|
+
"""
|
|
605
|
+
Calculate eviction priority based on page type.
|
|
606
|
+
|
|
607
|
+
Lower value = less likely to evict.
|
|
608
|
+
"""
|
|
609
|
+
type_weights = {
|
|
610
|
+
PageType.CLAIM: 0.1, # Very low - claims are precious
|
|
611
|
+
PageType.INDEX: 0.2, # Very low - indexes are needed for search
|
|
612
|
+
PageType.PROCEDURE: 0.3, # Low - procedures help tool usage
|
|
613
|
+
PageType.SUMMARY: 0.4, # Low - rebuildable but useful
|
|
614
|
+
PageType.ARTIFACT: 0.6, # Normal
|
|
615
|
+
PageType.TRANSCRIPT: 0.7, # Normal
|
|
616
|
+
}
|
|
617
|
+
base = type_weights.get(self.page_type, 0.5)
|
|
618
|
+
# Pinned pages get 0 priority (never evict)
|
|
619
|
+
if self.pinned:
|
|
620
|
+
return 0.0
|
|
621
|
+
return base
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
class TokenBudget(BaseModel):
|
|
625
|
+
"""
|
|
626
|
+
Token allocation tracking across modalities.
|
|
627
|
+
|
|
628
|
+
Helps manage context window usage and decide compression levels.
|
|
629
|
+
"""
|
|
630
|
+
|
|
631
|
+
total_limit: int = Field(default=128000, description="Total context window size")
|
|
632
|
+
reserved: int = Field(
|
|
633
|
+
default=4000, description="Reserved for system prompt, tools, etc."
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Current usage by modality - stored as dict for Pydantic serialization
|
|
637
|
+
tokens_by_modality: Dict[Modality, int] = Field(
|
|
638
|
+
default_factory=lambda: {m: 0 for m in Modality}
|
|
639
|
+
)
|
|
640
|
+
|
|
641
|
+
@property
|
|
642
|
+
def text_tokens(self) -> int:
|
|
643
|
+
return self.tokens_by_modality.get(Modality.TEXT, 0)
|
|
644
|
+
|
|
645
|
+
@property
|
|
646
|
+
def image_tokens(self) -> int:
|
|
647
|
+
return self.tokens_by_modality.get(Modality.IMAGE, 0)
|
|
648
|
+
|
|
649
|
+
@property
|
|
650
|
+
def audio_tokens(self) -> int:
|
|
651
|
+
return self.tokens_by_modality.get(Modality.AUDIO, 0)
|
|
652
|
+
|
|
653
|
+
@property
|
|
654
|
+
def video_tokens(self) -> int:
|
|
655
|
+
return self.tokens_by_modality.get(Modality.VIDEO, 0)
|
|
656
|
+
|
|
657
|
+
@property
|
|
658
|
+
def structured_tokens(self) -> int:
|
|
659
|
+
return self.tokens_by_modality.get(Modality.STRUCTURED, 0)
|
|
660
|
+
|
|
661
|
+
@property
|
|
662
|
+
def used(self) -> int:
|
|
663
|
+
"""Total tokens currently used."""
|
|
664
|
+
return sum(self.tokens_by_modality.values())
|
|
665
|
+
|
|
666
|
+
@property
|
|
667
|
+
def available(self) -> int:
|
|
668
|
+
"""Tokens available for new content."""
|
|
669
|
+
return max(0, self.total_limit - self.reserved - self.used)
|
|
670
|
+
|
|
671
|
+
@property
|
|
672
|
+
def utilization(self) -> float:
|
|
673
|
+
"""Current utilization as percentage (0-1)."""
|
|
674
|
+
usable = self.total_limit - self.reserved
|
|
675
|
+
if usable <= 0:
|
|
676
|
+
return 1.0
|
|
677
|
+
return min(1.0, self.used / usable)
|
|
678
|
+
|
|
679
|
+
def can_fit(self, tokens: int) -> bool:
|
|
680
|
+
"""Check if additional tokens can fit."""
|
|
681
|
+
return tokens <= self.available
|
|
682
|
+
|
|
683
|
+
def add(self, tokens: int, modality: Modality) -> bool:
|
|
684
|
+
"""
|
|
685
|
+
Add tokens for a modality. Returns True if successful.
|
|
686
|
+
"""
|
|
687
|
+
if not self.can_fit(tokens):
|
|
688
|
+
return False
|
|
689
|
+
|
|
690
|
+
current = self.tokens_by_modality.get(modality, 0)
|
|
691
|
+
self.tokens_by_modality[modality] = current + tokens
|
|
692
|
+
return True
|
|
693
|
+
|
|
694
|
+
def remove(self, tokens: int, modality: Modality) -> None:
|
|
695
|
+
"""Remove tokens for a modality."""
|
|
696
|
+
current = self.tokens_by_modality.get(modality, 0)
|
|
697
|
+
self.tokens_by_modality[modality] = max(0, current - tokens)
|
|
698
|
+
|
|
699
|
+
def get_tokens(self, modality: Modality) -> int:
|
|
700
|
+
"""Get token count for a specific modality."""
|
|
701
|
+
return self.tokens_by_modality.get(modality, 0)
|
|
702
|
+
|
|
703
|
+
def set_tokens(self, modality: Modality, tokens: int) -> None:
|
|
704
|
+
"""Set token count for a specific modality."""
|
|
705
|
+
self.tokens_by_modality[modality] = max(0, tokens)
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
class VMMetrics(BaseModel):
|
|
709
|
+
"""
|
|
710
|
+
Metrics for monitoring VM health and performance.
|
|
711
|
+
"""
|
|
712
|
+
|
|
713
|
+
# Fault tracking
|
|
714
|
+
faults_total: int = Field(default=0)
|
|
715
|
+
faults_this_turn: int = Field(default=0)
|
|
716
|
+
|
|
717
|
+
# TLB stats
|
|
718
|
+
tlb_hits: int = Field(default=0)
|
|
719
|
+
tlb_misses: int = Field(default=0)
|
|
720
|
+
|
|
721
|
+
# Eviction stats
|
|
722
|
+
evictions_total: int = Field(default=0)
|
|
723
|
+
evictions_this_turn: int = Field(default=0)
|
|
724
|
+
|
|
725
|
+
# Token tracking
|
|
726
|
+
tokens_in_working_set: int = Field(default=0)
|
|
727
|
+
tokens_available: int = Field(default=0)
|
|
728
|
+
|
|
729
|
+
# Page distribution - use Enums as keys
|
|
730
|
+
pages_by_tier: Dict[StorageTier, int] = Field(
|
|
731
|
+
default_factory=lambda: {t: 0 for t in StorageTier}
|
|
732
|
+
)
|
|
733
|
+
pages_by_modality: Dict[Modality, int] = Field(
|
|
734
|
+
default_factory=lambda: {m: 0 for m in Modality}
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
@property
|
|
738
|
+
def fault_rate(self) -> float:
|
|
739
|
+
"""Faults per turn (if we track turns)."""
|
|
740
|
+
return self.faults_this_turn
|
|
741
|
+
|
|
742
|
+
@property
|
|
743
|
+
def tlb_hit_rate(self) -> float:
|
|
744
|
+
"""TLB hit rate as percentage."""
|
|
745
|
+
total = self.tlb_hits + self.tlb_misses
|
|
746
|
+
if total == 0:
|
|
747
|
+
return 0.0
|
|
748
|
+
return self.tlb_hits / total
|
|
749
|
+
|
|
750
|
+
def record_fault(self) -> None:
|
|
751
|
+
"""Record a page fault."""
|
|
752
|
+
self.faults_total += 1
|
|
753
|
+
self.faults_this_turn += 1
|
|
754
|
+
|
|
755
|
+
def record_tlb_hit(self) -> None:
|
|
756
|
+
"""Record a TLB hit."""
|
|
757
|
+
self.tlb_hits += 1
|
|
758
|
+
|
|
759
|
+
def record_tlb_miss(self) -> None:
|
|
760
|
+
"""Record a TLB miss."""
|
|
761
|
+
self.tlb_misses += 1
|
|
762
|
+
|
|
763
|
+
def record_eviction(self) -> None:
|
|
764
|
+
"""Record an eviction."""
|
|
765
|
+
self.evictions_total += 1
|
|
766
|
+
self.evictions_this_turn += 1
|
|
767
|
+
|
|
768
|
+
def new_turn(self) -> None:
|
|
769
|
+
"""Reset per-turn counters."""
|
|
770
|
+
self.faults_this_turn = 0
|
|
771
|
+
self.evictions_this_turn = 0
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
# =============================================================================
|
|
775
|
+
# Fault Policy Models
|
|
776
|
+
# =============================================================================
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
class FaultConfidenceThreshold(str, Enum):
|
|
780
|
+
"""Confidence threshold for page faults."""
|
|
781
|
+
|
|
782
|
+
EXPLICIT = "explicit" # Only fault when page_id is directly requested
|
|
783
|
+
REFERENCED = "referenced" # Fault if page content is referenced/needed
|
|
784
|
+
SPECULATIVE = "speculative" # Fault on potential relevance (aggressive)
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
class FaultPolicy(BaseModel):
|
|
788
|
+
"""
|
|
789
|
+
Guardrails to prevent fault spirals and budget blowouts.
|
|
790
|
+
"""
|
|
791
|
+
|
|
792
|
+
# Existing
|
|
793
|
+
max_faults_per_turn: int = Field(default=3)
|
|
794
|
+
|
|
795
|
+
# Token budget for fault resolution
|
|
796
|
+
max_fault_tokens_per_turn: int = Field(
|
|
797
|
+
default=8192, description="Don't let faults blow the token budget"
|
|
798
|
+
)
|
|
799
|
+
|
|
800
|
+
# Confidence threshold - only fault if explicitly needed
|
|
801
|
+
fault_confidence_threshold: FaultConfidenceThreshold = Field(
|
|
802
|
+
default=FaultConfidenceThreshold.REFERENCED
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
# Track tokens used this turn for fault resolution
|
|
806
|
+
tokens_used_this_turn: int = Field(default=0)
|
|
807
|
+
faults_this_turn: int = Field(default=0)
|
|
808
|
+
|
|
809
|
+
def can_fault(self, estimated_tokens: int) -> bool:
|
|
810
|
+
"""Check if a fault is allowed under current policy."""
|
|
811
|
+
if self.faults_this_turn >= self.max_faults_per_turn:
|
|
812
|
+
return False
|
|
813
|
+
if (
|
|
814
|
+
self.tokens_used_this_turn + estimated_tokens
|
|
815
|
+
> self.max_fault_tokens_per_turn
|
|
816
|
+
):
|
|
817
|
+
return False
|
|
818
|
+
return True
|
|
819
|
+
|
|
820
|
+
def record_fault(self, tokens: int) -> None:
|
|
821
|
+
"""Record a fault and its token cost."""
|
|
822
|
+
self.faults_this_turn += 1
|
|
823
|
+
self.tokens_used_this_turn += tokens
|
|
824
|
+
|
|
825
|
+
def new_turn(self) -> None:
|
|
826
|
+
"""Reset for new turn."""
|
|
827
|
+
self.faults_this_turn = 0
|
|
828
|
+
self.tokens_used_this_turn = 0
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
class FaultRecord(BaseModel):
|
|
832
|
+
"""Record of a single page fault for metrics."""
|
|
833
|
+
|
|
834
|
+
page_id: str
|
|
835
|
+
reason: FaultReason
|
|
836
|
+
turn: int
|
|
837
|
+
tokens_loaded: int
|
|
838
|
+
latency_ms: float = Field(default=0.0)
|
|
839
|
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
840
|
+
|
|
841
|
+
|
|
842
|
+
# =============================================================================
|
|
843
|
+
# Mutation Log Models
|
|
844
|
+
# =============================================================================
|
|
845
|
+
|
|
846
|
+
|
|
847
|
+
class PageMutation(BaseModel):
|
|
848
|
+
"""
|
|
849
|
+
Immutable record of a page change.
|
|
850
|
+
|
|
851
|
+
Enables debugging, replay, and grounding story:
|
|
852
|
+
- "What was in context for turn T?"
|
|
853
|
+
- "Who changed what and why?"
|
|
854
|
+
"""
|
|
855
|
+
|
|
856
|
+
mutation_id: str
|
|
857
|
+
page_id: str
|
|
858
|
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
859
|
+
turn: int = Field(default=0)
|
|
860
|
+
|
|
861
|
+
mutation_type: MutationType
|
|
862
|
+
|
|
863
|
+
# Context at mutation time
|
|
864
|
+
tier_before: Optional[StorageTier] = None
|
|
865
|
+
tier_after: StorageTier
|
|
866
|
+
|
|
867
|
+
# Who caused it
|
|
868
|
+
actor: Actor
|
|
869
|
+
cause: Optional[str] = Field(
|
|
870
|
+
default=None,
|
|
871
|
+
description="e.g., 'eviction_pressure', 'page_fault', 'explicit_request'",
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
# =============================================================================
|
|
876
|
+
# Memory ABI Models
|
|
877
|
+
# =============================================================================
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
class PageManifestEntry(BaseModel):
|
|
881
|
+
"""Entry in the memory manifest for a page."""
|
|
882
|
+
|
|
883
|
+
page_id: str
|
|
884
|
+
modality: str
|
|
885
|
+
page_type: str # transcript, summary, artifact, claim, procedure, index
|
|
886
|
+
compression_level: int
|
|
887
|
+
tokens: int
|
|
888
|
+
importance: float
|
|
889
|
+
provenance: List[str] = Field(default_factory=list) # source page_ids
|
|
890
|
+
can_evict: bool = Field(default=True)
|
|
891
|
+
can_compress: bool = Field(default=True)
|
|
892
|
+
|
|
893
|
+
|
|
894
|
+
class MemoryABI(BaseModel):
|
|
895
|
+
"""
|
|
896
|
+
Application Binary Interface for memory negotiation.
|
|
897
|
+
|
|
898
|
+
Lets different models negotiate memory requirements.
|
|
899
|
+
Smaller models survive with aggressive compression.
|
|
900
|
+
Tool processors reason about memory cost.
|
|
901
|
+
"""
|
|
902
|
+
|
|
903
|
+
# What's in context
|
|
904
|
+
pages: List[PageManifestEntry] = Field(default_factory=list)
|
|
905
|
+
|
|
906
|
+
# Capabilities
|
|
907
|
+
faults_allowed: bool = Field(default=True)
|
|
908
|
+
upgrade_budget_tokens: int = Field(
|
|
909
|
+
default=2048, description="Tokens reserved for fault resolution"
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
# Constraints
|
|
913
|
+
max_context_tokens: int = Field(default=128000)
|
|
914
|
+
reserved_tokens: int = Field(default=2000, description="System prompt, etc.")
|
|
915
|
+
|
|
916
|
+
# Tool schema budget (often the hidden token hog)
|
|
917
|
+
tool_schema_tokens_reserved: int = Field(
|
|
918
|
+
default=0, description="Tokens consumed by tool definitions"
|
|
919
|
+
)
|
|
920
|
+
active_toolset_hash: Optional[str] = Field(
|
|
921
|
+
default=None, description="For cache invalidation when tools change"
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
# Preferences
|
|
925
|
+
modality_weights: Dict[str, float] = Field(
|
|
926
|
+
default_factory=lambda: {
|
|
927
|
+
"text": 1.0,
|
|
928
|
+
"image": 0.8,
|
|
929
|
+
"audio": 0.6,
|
|
930
|
+
"video": 0.4,
|
|
931
|
+
}
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
@property
|
|
935
|
+
def available_tokens(self) -> int:
|
|
936
|
+
"""Tokens available for content after reservations."""
|
|
937
|
+
return max(
|
|
938
|
+
0,
|
|
939
|
+
self.max_context_tokens
|
|
940
|
+
- self.reserved_tokens
|
|
941
|
+
- self.tool_schema_tokens_reserved,
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
|
|
945
|
+
# =============================================================================
|
|
946
|
+
# UX Metrics Models
|
|
947
|
+
# =============================================================================
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
class RecallAttempt(BaseModel):
|
|
951
|
+
"""Record of a recall attempt for tracking success rate."""
|
|
952
|
+
|
|
953
|
+
turn: int
|
|
954
|
+
query: str # What user asked to recall
|
|
955
|
+
page_ids_cited: List[str] = Field(default_factory=list)
|
|
956
|
+
user_corrected: bool = Field(default=False)
|
|
957
|
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
class UserExperienceMetrics(BaseModel):
|
|
961
|
+
"""
|
|
962
|
+
Metrics that correlate with user satisfaction.
|
|
963
|
+
|
|
964
|
+
These tell you whether the system "feels good" to users.
|
|
965
|
+
"""
|
|
966
|
+
|
|
967
|
+
# Recall tracking
|
|
968
|
+
recall_attempts: List[RecallAttempt] = Field(default_factory=list)
|
|
969
|
+
|
|
970
|
+
# Fault history for thrash calculation
|
|
971
|
+
fault_history: List[FaultRecord] = Field(default_factory=list)
|
|
972
|
+
|
|
973
|
+
# Page references per turn (for effective tokens)
|
|
974
|
+
pages_referenced_per_turn: Dict[int, List[str]] = Field(default_factory=dict)
|
|
975
|
+
tokens_in_context_per_turn: Dict[int, int] = Field(default_factory=dict)
|
|
976
|
+
|
|
977
|
+
def recall_success_rate(self) -> float:
|
|
978
|
+
"""
|
|
979
|
+
Success rate for recall attempts.
|
|
980
|
+
Success = no correction needed.
|
|
981
|
+
"""
|
|
982
|
+
if not self.recall_attempts:
|
|
983
|
+
return 1.0
|
|
984
|
+
successes = sum(1 for r in self.recall_attempts if not r.user_corrected)
|
|
985
|
+
return successes / len(self.recall_attempts)
|
|
986
|
+
|
|
987
|
+
def thrash_index(self, window_turns: int = 5) -> float:
|
|
988
|
+
"""
|
|
989
|
+
Faults after first fault in a topic window.
|
|
990
|
+
Low = stable working set. High = constantly missing what we need.
|
|
991
|
+
"""
|
|
992
|
+
if not self.fault_history:
|
|
993
|
+
return 0.0
|
|
994
|
+
|
|
995
|
+
# Get recent faults
|
|
996
|
+
if self.fault_history:
|
|
997
|
+
max_turn = max(f.turn for f in self.fault_history)
|
|
998
|
+
min_turn = max(0, max_turn - window_turns)
|
|
999
|
+
recent_faults = [f for f in self.fault_history if f.turn >= min_turn]
|
|
1000
|
+
else:
|
|
1001
|
+
return 0.0
|
|
1002
|
+
|
|
1003
|
+
if not recent_faults:
|
|
1004
|
+
return 0.0
|
|
1005
|
+
|
|
1006
|
+
# Count first faults vs thrash faults
|
|
1007
|
+
seen_pages: set = set()
|
|
1008
|
+
thrash_faults = 0
|
|
1009
|
+
for fault in recent_faults:
|
|
1010
|
+
if fault.page_id in seen_pages:
|
|
1011
|
+
thrash_faults += 1
|
|
1012
|
+
else:
|
|
1013
|
+
seen_pages.add(fault.page_id)
|
|
1014
|
+
|
|
1015
|
+
return thrash_faults / window_turns if window_turns > 0 else 0.0
|
|
1016
|
+
|
|
1017
|
+
def effective_tokens_ratio(self, turn: int) -> float:
|
|
1018
|
+
"""
|
|
1019
|
+
What fraction of context tokens actually contributed to the answer?
|
|
1020
|
+
"""
|
|
1021
|
+
context_tokens = self.tokens_in_context_per_turn.get(turn, 0)
|
|
1022
|
+
if context_tokens == 0:
|
|
1023
|
+
return 0.0
|
|
1024
|
+
|
|
1025
|
+
referenced_pages = self.pages_referenced_per_turn.get(turn, [])
|
|
1026
|
+
# This is a simplified calculation - in reality you'd sum tokens of referenced pages
|
|
1027
|
+
# For now, estimate based on count
|
|
1028
|
+
referenced_estimate = (
|
|
1029
|
+
len(referenced_pages) * 200
|
|
1030
|
+
) # ~200 tokens per referenced page
|
|
1031
|
+
return min(1.0, referenced_estimate / context_tokens)
|
|
1032
|
+
|
|
1033
|
+
def record_recall_attempt(
|
|
1034
|
+
self,
|
|
1035
|
+
turn: int,
|
|
1036
|
+
query: str,
|
|
1037
|
+
page_ids_cited: List[str],
|
|
1038
|
+
user_corrected: bool = False,
|
|
1039
|
+
) -> None:
|
|
1040
|
+
"""Record a recall attempt."""
|
|
1041
|
+
self.recall_attempts.append(
|
|
1042
|
+
RecallAttempt(
|
|
1043
|
+
turn=turn,
|
|
1044
|
+
query=query,
|
|
1045
|
+
page_ids_cited=page_ids_cited,
|
|
1046
|
+
user_corrected=user_corrected,
|
|
1047
|
+
)
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
def record_fault(
|
|
1051
|
+
self,
|
|
1052
|
+
page_id: str,
|
|
1053
|
+
reason: FaultReason,
|
|
1054
|
+
turn: int,
|
|
1055
|
+
tokens_loaded: int,
|
|
1056
|
+
latency_ms: float = 0.0,
|
|
1057
|
+
) -> None:
|
|
1058
|
+
"""Record a fault for thrash tracking."""
|
|
1059
|
+
self.fault_history.append(
|
|
1060
|
+
FaultRecord(
|
|
1061
|
+
page_id=page_id,
|
|
1062
|
+
reason=reason,
|
|
1063
|
+
turn=turn,
|
|
1064
|
+
tokens_loaded=tokens_loaded,
|
|
1065
|
+
latency_ms=latency_ms,
|
|
1066
|
+
)
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
def record_turn_context(
|
|
1070
|
+
self,
|
|
1071
|
+
turn: int,
|
|
1072
|
+
tokens_in_context: int,
|
|
1073
|
+
pages_referenced: List[str],
|
|
1074
|
+
) -> None:
|
|
1075
|
+
"""Record context state for effective tokens calculation."""
|
|
1076
|
+
self.tokens_in_context_per_turn[turn] = tokens_in_context
|
|
1077
|
+
self.pages_referenced_per_turn[turn] = pages_referenced
|
|
1078
|
+
|
|
1079
|
+
def get_fault_reason_breakdown(self) -> Dict[FaultReason, int]:
|
|
1080
|
+
"""Get count of faults by reason."""
|
|
1081
|
+
breakdown: Dict[FaultReason, int] = {r: 0 for r in FaultReason}
|
|
1082
|
+
for fault in self.fault_history:
|
|
1083
|
+
breakdown[fault.reason] = breakdown.get(fault.reason, 0) + 1
|
|
1084
|
+
return breakdown
|