proxilion 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proxilion/__init__.py +136 -0
- proxilion/audit/__init__.py +133 -0
- proxilion/audit/base_exporters.py +527 -0
- proxilion/audit/compliance/__init__.py +130 -0
- proxilion/audit/compliance/base.py +457 -0
- proxilion/audit/compliance/eu_ai_act.py +603 -0
- proxilion/audit/compliance/iso27001.py +544 -0
- proxilion/audit/compliance/soc2.py +491 -0
- proxilion/audit/events.py +493 -0
- proxilion/audit/explainability.py +1173 -0
- proxilion/audit/exporters/__init__.py +58 -0
- proxilion/audit/exporters/aws_s3.py +636 -0
- proxilion/audit/exporters/azure_storage.py +608 -0
- proxilion/audit/exporters/cloud_base.py +468 -0
- proxilion/audit/exporters/gcp_storage.py +570 -0
- proxilion/audit/exporters/multi_exporter.py +498 -0
- proxilion/audit/hash_chain.py +652 -0
- proxilion/audit/logger.py +543 -0
- proxilion/caching/__init__.py +49 -0
- proxilion/caching/tool_cache.py +633 -0
- proxilion/context/__init__.py +73 -0
- proxilion/context/context_window.py +556 -0
- proxilion/context/message_history.py +505 -0
- proxilion/context/session.py +735 -0
- proxilion/contrib/__init__.py +51 -0
- proxilion/contrib/anthropic.py +609 -0
- proxilion/contrib/google.py +1012 -0
- proxilion/contrib/langchain.py +641 -0
- proxilion/contrib/mcp.py +893 -0
- proxilion/contrib/openai.py +646 -0
- proxilion/core.py +3058 -0
- proxilion/decorators.py +966 -0
- proxilion/engines/__init__.py +287 -0
- proxilion/engines/base.py +266 -0
- proxilion/engines/casbin_engine.py +412 -0
- proxilion/engines/opa_engine.py +493 -0
- proxilion/engines/simple.py +437 -0
- proxilion/exceptions.py +887 -0
- proxilion/guards/__init__.py +54 -0
- proxilion/guards/input_guard.py +522 -0
- proxilion/guards/output_guard.py +634 -0
- proxilion/observability/__init__.py +198 -0
- proxilion/observability/cost_tracker.py +866 -0
- proxilion/observability/hooks.py +683 -0
- proxilion/observability/metrics.py +798 -0
- proxilion/observability/session_cost_tracker.py +1063 -0
- proxilion/policies/__init__.py +67 -0
- proxilion/policies/base.py +304 -0
- proxilion/policies/builtin.py +486 -0
- proxilion/policies/registry.py +376 -0
- proxilion/providers/__init__.py +201 -0
- proxilion/providers/adapter.py +468 -0
- proxilion/providers/anthropic_adapter.py +330 -0
- proxilion/providers/gemini_adapter.py +391 -0
- proxilion/providers/openai_adapter.py +294 -0
- proxilion/py.typed +0 -0
- proxilion/resilience/__init__.py +81 -0
- proxilion/resilience/degradation.py +615 -0
- proxilion/resilience/fallback.py +555 -0
- proxilion/resilience/retry.py +554 -0
- proxilion/scheduling/__init__.py +57 -0
- proxilion/scheduling/priority_queue.py +419 -0
- proxilion/scheduling/scheduler.py +459 -0
- proxilion/security/__init__.py +244 -0
- proxilion/security/agent_trust.py +968 -0
- proxilion/security/behavioral_drift.py +794 -0
- proxilion/security/cascade_protection.py +869 -0
- proxilion/security/circuit_breaker.py +428 -0
- proxilion/security/cost_limiter.py +690 -0
- proxilion/security/idor_protection.py +460 -0
- proxilion/security/intent_capsule.py +849 -0
- proxilion/security/intent_validator.py +495 -0
- proxilion/security/memory_integrity.py +767 -0
- proxilion/security/rate_limiter.py +509 -0
- proxilion/security/scope_enforcer.py +680 -0
- proxilion/security/sequence_validator.py +636 -0
- proxilion/security/trust_boundaries.py +784 -0
- proxilion/streaming/__init__.py +70 -0
- proxilion/streaming/detector.py +761 -0
- proxilion/streaming/transformer.py +674 -0
- proxilion/timeouts/__init__.py +55 -0
- proxilion/timeouts/decorators.py +477 -0
- proxilion/timeouts/manager.py +545 -0
- proxilion/tools/__init__.py +69 -0
- proxilion/tools/decorators.py +493 -0
- proxilion/tools/registry.py +732 -0
- proxilion/types.py +339 -0
- proxilion/validation/__init__.py +93 -0
- proxilion/validation/pydantic_schema.py +351 -0
- proxilion/validation/schema.py +651 -0
- proxilion-0.0.1.dist-info/METADATA +872 -0
- proxilion-0.0.1.dist-info/RECORD +94 -0
- proxilion-0.0.1.dist-info/WHEEL +4 -0
- proxilion-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,556 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context window management for LLM interactions.
|
|
3
|
+
|
|
4
|
+
Provides strategies for fitting conversation history within
|
|
5
|
+
LLM token limits while preserving important context.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Any, Protocol
|
|
13
|
+
|
|
14
|
+
from proxilion.context.message_history import Message, MessageRole
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ContextStrategy(Enum):
|
|
18
|
+
"""Strategy for fitting messages within token limits."""
|
|
19
|
+
|
|
20
|
+
SLIDING_WINDOW = "sliding_window"
|
|
21
|
+
"""Keep most recent messages that fit."""
|
|
22
|
+
|
|
23
|
+
KEEP_SYSTEM_RECENT = "keep_system_recent"
|
|
24
|
+
"""Keep all system messages + most recent other messages."""
|
|
25
|
+
|
|
26
|
+
SUMMARIZE_OLD = "summarize_old"
|
|
27
|
+
"""Summarize older messages (requires callback)."""
|
|
28
|
+
|
|
29
|
+
KEEP_FIRST_LAST = "keep_first_last"
|
|
30
|
+
"""Keep first and last messages, remove middle."""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class FitStrategy(Protocol):
|
|
34
|
+
"""Protocol for context fitting strategies."""
|
|
35
|
+
|
|
36
|
+
def fit(self, messages: list[Message], max_tokens: int) -> list[Message]:
|
|
37
|
+
"""
|
|
38
|
+
Fit messages within token limit.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
messages: The messages to fit.
|
|
42
|
+
max_tokens: Maximum tokens allowed.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
List of messages that fit within the limit.
|
|
46
|
+
"""
|
|
47
|
+
...
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class SlidingWindowStrategy:
|
|
51
|
+
"""
|
|
52
|
+
Keep most recent messages that fit within token limit.
|
|
53
|
+
|
|
54
|
+
This strategy removes messages from the start of the conversation
|
|
55
|
+
until the remaining messages fit within the token limit.
|
|
56
|
+
|
|
57
|
+
Example:
|
|
58
|
+
>>> strategy = SlidingWindowStrategy()
|
|
59
|
+
>>> fitted = strategy.fit(messages, max_tokens=4000)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def fit(self, messages: list[Message], max_tokens: int) -> list[Message]:
|
|
63
|
+
"""
|
|
64
|
+
Fit messages using sliding window approach.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
messages: The messages to fit.
|
|
68
|
+
max_tokens: Maximum tokens allowed.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
List of most recent messages that fit.
|
|
72
|
+
"""
|
|
73
|
+
if not messages:
|
|
74
|
+
return []
|
|
75
|
+
|
|
76
|
+
result: list[Message] = []
|
|
77
|
+
total = 0
|
|
78
|
+
|
|
79
|
+
# Process messages in reverse (most recent first)
|
|
80
|
+
for msg in reversed(messages):
|
|
81
|
+
msg_tokens = msg.token_count or 0
|
|
82
|
+
if total + msg_tokens > max_tokens:
|
|
83
|
+
break
|
|
84
|
+
result.insert(0, msg)
|
|
85
|
+
total += msg_tokens
|
|
86
|
+
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class KeepSystemRecentStrategy:
|
|
91
|
+
"""
|
|
92
|
+
Keep all system messages plus most recent user/assistant messages.
|
|
93
|
+
|
|
94
|
+
This strategy ensures system prompts are always included,
|
|
95
|
+
then fills remaining space with recent conversation.
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> strategy = KeepSystemRecentStrategy()
|
|
99
|
+
>>> fitted = strategy.fit(messages, max_tokens=4000)
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def fit(self, messages: list[Message], max_tokens: int) -> list[Message]:
|
|
103
|
+
"""
|
|
104
|
+
Fit messages keeping system messages and recent others.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
messages: The messages to fit.
|
|
108
|
+
max_tokens: Maximum tokens allowed.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of system messages + recent messages that fit.
|
|
112
|
+
"""
|
|
113
|
+
if not messages:
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
# Separate system and other messages
|
|
117
|
+
system_msgs = [m for m in messages if m.role == MessageRole.SYSTEM]
|
|
118
|
+
other_msgs = [m for m in messages if m.role != MessageRole.SYSTEM]
|
|
119
|
+
|
|
120
|
+
# Calculate tokens used by system messages
|
|
121
|
+
system_tokens = sum(m.token_count or 0 for m in system_msgs)
|
|
122
|
+
|
|
123
|
+
# If system messages exceed limit, truncate them
|
|
124
|
+
if system_tokens > max_tokens:
|
|
125
|
+
# Keep only the most recent system message that fits
|
|
126
|
+
result: list[Message] = []
|
|
127
|
+
total = 0
|
|
128
|
+
for msg in reversed(system_msgs):
|
|
129
|
+
msg_tokens = msg.token_count or 0
|
|
130
|
+
if total + msg_tokens > max_tokens:
|
|
131
|
+
break
|
|
132
|
+
result.insert(0, msg)
|
|
133
|
+
total += msg_tokens
|
|
134
|
+
return result
|
|
135
|
+
|
|
136
|
+
# Calculate remaining tokens for other messages
|
|
137
|
+
remaining = max_tokens - system_tokens
|
|
138
|
+
|
|
139
|
+
# Fill with recent messages
|
|
140
|
+
recent: list[Message] = []
|
|
141
|
+
recent_tokens = 0
|
|
142
|
+
for msg in reversed(other_msgs):
|
|
143
|
+
msg_tokens = msg.token_count or 0
|
|
144
|
+
if recent_tokens + msg_tokens > remaining:
|
|
145
|
+
break
|
|
146
|
+
recent.insert(0, msg)
|
|
147
|
+
recent_tokens += msg_tokens
|
|
148
|
+
|
|
149
|
+
return system_msgs + recent
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class KeepFirstLastStrategy:
|
|
153
|
+
"""
|
|
154
|
+
Keep first and last messages, removing middle messages.
|
|
155
|
+
|
|
156
|
+
This strategy preserves the beginning context and recent
|
|
157
|
+
conversation while removing intermediate messages.
|
|
158
|
+
|
|
159
|
+
Attributes:
|
|
160
|
+
keep_first: Number of messages to keep from start.
|
|
161
|
+
keep_last: Number of messages to keep from end.
|
|
162
|
+
|
|
163
|
+
Example:
|
|
164
|
+
>>> strategy = KeepFirstLastStrategy(keep_first=2, keep_last=5)
|
|
165
|
+
>>> fitted = strategy.fit(messages, max_tokens=4000)
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
def __init__(self, keep_first: int = 1, keep_last: int = 5) -> None:
|
|
169
|
+
"""
|
|
170
|
+
Initialize strategy.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
keep_first: Number of messages to keep from start.
|
|
174
|
+
keep_last: Number of messages to keep from end.
|
|
175
|
+
"""
|
|
176
|
+
self.keep_first = keep_first
|
|
177
|
+
self.keep_last = keep_last
|
|
178
|
+
|
|
179
|
+
def fit(self, messages: list[Message], max_tokens: int) -> list[Message]:
|
|
180
|
+
"""
|
|
181
|
+
Fit messages keeping first and last.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
messages: The messages to fit.
|
|
185
|
+
max_tokens: Maximum tokens allowed.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
List of first and last messages that fit.
|
|
189
|
+
"""
|
|
190
|
+
if not messages:
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
if len(messages) <= self.keep_first + self.keep_last:
|
|
194
|
+
# All messages fit in the keep regions
|
|
195
|
+
total = sum(m.token_count or 0 for m in messages)
|
|
196
|
+
if total <= max_tokens:
|
|
197
|
+
return messages
|
|
198
|
+
# Still need to truncate - use sliding window
|
|
199
|
+
return SlidingWindowStrategy().fit(messages, max_tokens)
|
|
200
|
+
|
|
201
|
+
# Get first and last messages
|
|
202
|
+
first_msgs = messages[: self.keep_first]
|
|
203
|
+
last_msgs = messages[-self.keep_last :]
|
|
204
|
+
|
|
205
|
+
# Calculate tokens
|
|
206
|
+
first_tokens = sum(m.token_count or 0 for m in first_msgs)
|
|
207
|
+
last_tokens = sum(m.token_count or 0 for m in last_msgs)
|
|
208
|
+
|
|
209
|
+
# If first+last exceed limit, prioritize last
|
|
210
|
+
if first_tokens + last_tokens > max_tokens:
|
|
211
|
+
# Try to keep at least some first messages
|
|
212
|
+
available = max_tokens - last_tokens
|
|
213
|
+
if available > 0:
|
|
214
|
+
kept_first: list[Message] = []
|
|
215
|
+
total = 0
|
|
216
|
+
for msg in first_msgs:
|
|
217
|
+
msg_tokens = msg.token_count or 0
|
|
218
|
+
if total + msg_tokens > available:
|
|
219
|
+
break
|
|
220
|
+
kept_first.append(msg)
|
|
221
|
+
total += msg_tokens
|
|
222
|
+
return kept_first + last_msgs
|
|
223
|
+
else:
|
|
224
|
+
# Can only fit last messages
|
|
225
|
+
return SlidingWindowStrategy().fit(last_msgs, max_tokens)
|
|
226
|
+
|
|
227
|
+
return first_msgs + last_msgs
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
@dataclass
|
|
231
|
+
class SummarizeOldStrategy:
|
|
232
|
+
"""
|
|
233
|
+
Summarize older messages to fit within token limit.
|
|
234
|
+
|
|
235
|
+
This strategy requires a summarization callback that takes
|
|
236
|
+
messages and returns a summary string. Useful when you want
|
|
237
|
+
to preserve context without keeping all messages.
|
|
238
|
+
|
|
239
|
+
Attributes:
|
|
240
|
+
summarize_callback: Function to summarize messages.
|
|
241
|
+
summary_prefix: Prefix for summary message.
|
|
242
|
+
keep_recent: Number of recent messages to keep without summarizing.
|
|
243
|
+
|
|
244
|
+
Example:
|
|
245
|
+
>>> def summarize(messages):
|
|
246
|
+
... return f"Previously discussed: {len(messages)} messages"
|
|
247
|
+
>>> strategy = SummarizeOldStrategy(summarize, keep_recent=5)
|
|
248
|
+
>>> fitted = strategy.fit(messages, max_tokens=4000)
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
summarize_callback: Any # Callable[[list[Message]], str]
|
|
252
|
+
summary_prefix: str = "[Summary of previous conversation]"
|
|
253
|
+
keep_recent: int = 10
|
|
254
|
+
|
|
255
|
+
def fit(self, messages: list[Message], max_tokens: int) -> list[Message]:
|
|
256
|
+
"""
|
|
257
|
+
Fit messages by summarizing older ones.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
messages: The messages to fit.
|
|
261
|
+
max_tokens: Maximum tokens allowed.
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
List of messages with older ones summarized.
|
|
265
|
+
"""
|
|
266
|
+
if not messages:
|
|
267
|
+
return []
|
|
268
|
+
|
|
269
|
+
# Calculate total tokens
|
|
270
|
+
total_tokens = sum(m.token_count or 0 for m in messages)
|
|
271
|
+
|
|
272
|
+
if total_tokens <= max_tokens:
|
|
273
|
+
return messages
|
|
274
|
+
|
|
275
|
+
# Split into recent and old
|
|
276
|
+
if len(messages) <= self.keep_recent:
|
|
277
|
+
# Not enough to summarize, use sliding window
|
|
278
|
+
return SlidingWindowStrategy().fit(messages, max_tokens)
|
|
279
|
+
|
|
280
|
+
recent = messages[-self.keep_recent :]
|
|
281
|
+
old = messages[: -self.keep_recent]
|
|
282
|
+
|
|
283
|
+
# Summarize old messages
|
|
284
|
+
summary_text = self.summarize_callback(old)
|
|
285
|
+
summary_content = f"{self.summary_prefix}\n{summary_text}"
|
|
286
|
+
|
|
287
|
+
summary_msg = Message(
|
|
288
|
+
role=MessageRole.SYSTEM,
|
|
289
|
+
content=summary_content,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# Check if summary + recent fits
|
|
293
|
+
result = [summary_msg] + recent
|
|
294
|
+
result_tokens = sum(m.token_count or 0 for m in result)
|
|
295
|
+
|
|
296
|
+
if result_tokens <= max_tokens:
|
|
297
|
+
return result
|
|
298
|
+
|
|
299
|
+
# Still too large, reduce recent messages
|
|
300
|
+
available = max_tokens - (summary_msg.token_count or 0)
|
|
301
|
+
fitted_recent = SlidingWindowStrategy().fit(recent, available)
|
|
302
|
+
return [summary_msg] + fitted_recent
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class ContextWindow:
|
|
306
|
+
"""
|
|
307
|
+
Manages context window for LLM calls.
|
|
308
|
+
|
|
309
|
+
Coordinates fitting messages within token limits using
|
|
310
|
+
configurable strategies.
|
|
311
|
+
|
|
312
|
+
Attributes:
|
|
313
|
+
max_tokens: Maximum tokens for context.
|
|
314
|
+
strategy: The fitting strategy to use.
|
|
315
|
+
reserve_output: Tokens to reserve for output generation.
|
|
316
|
+
|
|
317
|
+
Example:
|
|
318
|
+
>>> window = ContextWindow(
|
|
319
|
+
... max_tokens=8000,
|
|
320
|
+
... strategy=ContextStrategy.KEEP_SYSTEM_RECENT,
|
|
321
|
+
... reserve_output=1000
|
|
322
|
+
... )
|
|
323
|
+
>>> messages = [...]
|
|
324
|
+
>>> fitted = window.fit_messages(messages)
|
|
325
|
+
>>> available = window.get_available_tokens()
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
def __init__(
|
|
329
|
+
self,
|
|
330
|
+
max_tokens: int,
|
|
331
|
+
strategy: ContextStrategy | FitStrategy = ContextStrategy.SLIDING_WINDOW,
|
|
332
|
+
reserve_output: int = 1000,
|
|
333
|
+
) -> None:
|
|
334
|
+
"""
|
|
335
|
+
Initialize context window.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
max_tokens: Maximum tokens for context.
|
|
339
|
+
strategy: The fitting strategy to use.
|
|
340
|
+
reserve_output: Tokens to reserve for output generation.
|
|
341
|
+
"""
|
|
342
|
+
self.max_tokens = max_tokens
|
|
343
|
+
self.reserve_output = reserve_output
|
|
344
|
+
self._strategy = strategy
|
|
345
|
+
self._fit_strategy = self._get_strategy(strategy)
|
|
346
|
+
|
|
347
|
+
def _get_strategy(
|
|
348
|
+
self, strategy: ContextStrategy | FitStrategy
|
|
349
|
+
) -> FitStrategy:
|
|
350
|
+
"""Get the fit strategy implementation."""
|
|
351
|
+
if isinstance(strategy, ContextStrategy):
|
|
352
|
+
if strategy == ContextStrategy.SLIDING_WINDOW:
|
|
353
|
+
return SlidingWindowStrategy()
|
|
354
|
+
elif strategy == ContextStrategy.KEEP_SYSTEM_RECENT:
|
|
355
|
+
return KeepSystemRecentStrategy()
|
|
356
|
+
elif strategy == ContextStrategy.KEEP_FIRST_LAST:
|
|
357
|
+
return KeepFirstLastStrategy()
|
|
358
|
+
else:
|
|
359
|
+
# Default to sliding window for unsupported strategies
|
|
360
|
+
return SlidingWindowStrategy()
|
|
361
|
+
else:
|
|
362
|
+
# Custom strategy implementation
|
|
363
|
+
return strategy
|
|
364
|
+
|
|
365
|
+
@property
|
|
366
|
+
def strategy(self) -> ContextStrategy | FitStrategy:
|
|
367
|
+
"""Get the current strategy."""
|
|
368
|
+
return self._strategy
|
|
369
|
+
|
|
370
|
+
@strategy.setter
|
|
371
|
+
def strategy(self, value: ContextStrategy | FitStrategy) -> None:
|
|
372
|
+
"""Set the strategy."""
|
|
373
|
+
self._strategy = value
|
|
374
|
+
self._fit_strategy = self._get_strategy(value)
|
|
375
|
+
|
|
376
|
+
def fit_messages(self, messages: list[Message]) -> list[Message]:
|
|
377
|
+
"""
|
|
378
|
+
Fit messages within token limit using configured strategy.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
messages: The messages to fit.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
List of messages that fit within the limit.
|
|
385
|
+
"""
|
|
386
|
+
available = self.get_available_tokens()
|
|
387
|
+
return self._fit_strategy.fit(messages, available)
|
|
388
|
+
|
|
389
|
+
def get_available_tokens(self) -> int:
|
|
390
|
+
"""
|
|
391
|
+
Get available tokens for context (after reserving output tokens).
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Number of tokens available for context.
|
|
395
|
+
"""
|
|
396
|
+
return max(0, self.max_tokens - self.reserve_output)
|
|
397
|
+
|
|
398
|
+
def should_truncate(self, messages: list[Message]) -> bool:
|
|
399
|
+
"""
|
|
400
|
+
Check if messages need truncation.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
messages: The messages to check.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
True if messages exceed available tokens.
|
|
407
|
+
"""
|
|
408
|
+
total = sum(m.token_count or 0 for m in messages)
|
|
409
|
+
return total > self.get_available_tokens()
|
|
410
|
+
|
|
411
|
+
def get_token_count(self, messages: list[Message]) -> int:
|
|
412
|
+
"""
|
|
413
|
+
Get total token count for messages.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
messages: The messages to count.
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Total token count.
|
|
420
|
+
"""
|
|
421
|
+
return sum(m.token_count or 0 for m in messages)
|
|
422
|
+
|
|
423
|
+
def get_overflow(self, messages: list[Message]) -> int:
|
|
424
|
+
"""
|
|
425
|
+
Get number of tokens over the limit.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
messages: The messages to check.
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Number of tokens over limit (0 if under).
|
|
432
|
+
"""
|
|
433
|
+
total = self.get_token_count(messages)
|
|
434
|
+
available = self.get_available_tokens()
|
|
435
|
+
return max(0, total - available)
|
|
436
|
+
|
|
437
|
+
def to_dict(self) -> dict[str, Any]:
|
|
438
|
+
"""
|
|
439
|
+
Serialize context window to dictionary.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Dictionary representation.
|
|
443
|
+
"""
|
|
444
|
+
strategy_value: str
|
|
445
|
+
if isinstance(self._strategy, ContextStrategy):
|
|
446
|
+
strategy_value = self._strategy.value
|
|
447
|
+
else:
|
|
448
|
+
strategy_value = "custom"
|
|
449
|
+
|
|
450
|
+
return {
|
|
451
|
+
"max_tokens": self.max_tokens,
|
|
452
|
+
"strategy": strategy_value,
|
|
453
|
+
"reserve_output": self.reserve_output,
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
@classmethod
|
|
457
|
+
def from_dict(cls, data: dict[str, Any]) -> ContextWindow:
|
|
458
|
+
"""
|
|
459
|
+
Deserialize context window from dictionary.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
data: Dictionary with context window data.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
ContextWindow instance.
|
|
466
|
+
"""
|
|
467
|
+
strategy_str = data.get("strategy", "sliding_window")
|
|
468
|
+
try:
|
|
469
|
+
strategy = ContextStrategy(strategy_str)
|
|
470
|
+
except ValueError:
|
|
471
|
+
strategy = ContextStrategy.SLIDING_WINDOW
|
|
472
|
+
|
|
473
|
+
return cls(
|
|
474
|
+
max_tokens=data.get("max_tokens", 8000),
|
|
475
|
+
strategy=strategy,
|
|
476
|
+
reserve_output=data.get("reserve_output", 1000),
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
# Convenience functions for common context window configurations
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def create_gpt4_context_window(
|
|
484
|
+
strategy: ContextStrategy = ContextStrategy.KEEP_SYSTEM_RECENT,
|
|
485
|
+
) -> ContextWindow:
|
|
486
|
+
"""
|
|
487
|
+
Create context window configured for GPT-4 (8K context).
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
strategy: The fitting strategy to use.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
Configured ContextWindow.
|
|
494
|
+
"""
|
|
495
|
+
return ContextWindow(
|
|
496
|
+
max_tokens=8192,
|
|
497
|
+
strategy=strategy,
|
|
498
|
+
reserve_output=1024,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def create_gpt4_32k_context_window(
|
|
503
|
+
strategy: ContextStrategy = ContextStrategy.KEEP_SYSTEM_RECENT,
|
|
504
|
+
) -> ContextWindow:
|
|
505
|
+
"""
|
|
506
|
+
Create context window configured for GPT-4-32K.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
strategy: The fitting strategy to use.
|
|
510
|
+
|
|
511
|
+
Returns:
|
|
512
|
+
Configured ContextWindow.
|
|
513
|
+
"""
|
|
514
|
+
return ContextWindow(
|
|
515
|
+
max_tokens=32768,
|
|
516
|
+
strategy=strategy,
|
|
517
|
+
reserve_output=2048,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def create_claude_context_window(
|
|
522
|
+
strategy: ContextStrategy = ContextStrategy.KEEP_SYSTEM_RECENT,
|
|
523
|
+
) -> ContextWindow:
|
|
524
|
+
"""
|
|
525
|
+
Create context window configured for Claude (200K context).
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
strategy: The fitting strategy to use.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
Configured ContextWindow.
|
|
532
|
+
"""
|
|
533
|
+
return ContextWindow(
|
|
534
|
+
max_tokens=200000,
|
|
535
|
+
strategy=strategy,
|
|
536
|
+
reserve_output=4096,
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def create_gemini_context_window(
|
|
541
|
+
strategy: ContextStrategy = ContextStrategy.KEEP_SYSTEM_RECENT,
|
|
542
|
+
) -> ContextWindow:
|
|
543
|
+
"""
|
|
544
|
+
Create context window configured for Gemini 1.5 (1M context).
|
|
545
|
+
|
|
546
|
+
Args:
|
|
547
|
+
strategy: The fitting strategy to use.
|
|
548
|
+
|
|
549
|
+
Returns:
|
|
550
|
+
Configured ContextWindow.
|
|
551
|
+
"""
|
|
552
|
+
return ContextWindow(
|
|
553
|
+
max_tokens=1000000,
|
|
554
|
+
strategy=strategy,
|
|
555
|
+
reserve_output=8192,
|
|
556
|
+
)
|