deepagents 0.3.7__py3-none-any.whl → 0.3.7a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepagents/backends/filesystem.py +7 -55
- deepagents/graph.py +10 -29
- deepagents/middleware/__init__.py +1 -3
- deepagents/middleware/filesystem.py +75 -76
- deepagents/middleware/memory.py +7 -11
- deepagents/middleware/skills.py +2 -4
- deepagents/middleware/subagents.py +19 -35
- {deepagents-0.3.7.dist-info → deepagents-0.3.7a1.dist-info}/METADATA +1 -1
- deepagents-0.3.7a1.dist-info/RECORD +21 -0
- {deepagents-0.3.7.dist-info → deepagents-0.3.7a1.dist-info}/WHEEL +1 -1
- deepagents/middleware/summarization.py +0 -758
- deepagents-0.3.7.dist-info/RECORD +0 -22
- {deepagents-0.3.7.dist-info → deepagents-0.3.7a1.dist-info}/top_level.txt +0 -0
|
@@ -1,758 +0,0 @@
|
|
|
1
|
-
"""Summarization middleware for offloading conversation history.
|
|
2
|
-
|
|
3
|
-
Persists conversation history to a backend prior to summarization, enabling retrieval of
|
|
4
|
-
full context if needed later by an agent.
|
|
5
|
-
|
|
6
|
-
## Usage
|
|
7
|
-
|
|
8
|
-
```python
|
|
9
|
-
from deepagents import create_deep_agent
|
|
10
|
-
from deepagents.middleware.summarization import SummarizationMiddleware
|
|
11
|
-
from deepagents.backends import FilesystemBackend
|
|
12
|
-
|
|
13
|
-
backend = FilesystemBackend(root_dir="/data")
|
|
14
|
-
|
|
15
|
-
middleware = SummarizationMiddleware(
|
|
16
|
-
model="gpt-4o-mini",
|
|
17
|
-
backend=backend,
|
|
18
|
-
trigger=("fraction", 0.85),
|
|
19
|
-
keep=("fraction", 0.10),
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
agent = create_deep_agent(middleware=[middleware])
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
## Storage
|
|
26
|
-
|
|
27
|
-
Offloaded messages are stored as markdown at `/conversation_history/{thread_id}.md`.
|
|
28
|
-
|
|
29
|
-
Each summarization event appends a new section to this file, creating a running log
|
|
30
|
-
of all evicted messages.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
from __future__ import annotations
|
|
34
|
-
|
|
35
|
-
import logging
|
|
36
|
-
import uuid
|
|
37
|
-
from datetime import UTC, datetime
|
|
38
|
-
from typing import TYPE_CHECKING, Any, cast
|
|
39
|
-
|
|
40
|
-
from langchain.agents.middleware.summarization import (
|
|
41
|
-
_DEFAULT_MESSAGES_TO_KEEP,
|
|
42
|
-
_DEFAULT_TRIM_TOKEN_LIMIT,
|
|
43
|
-
DEFAULT_SUMMARY_PROMPT,
|
|
44
|
-
ContextSize,
|
|
45
|
-
SummarizationMiddleware as BaseSummarizationMiddleware,
|
|
46
|
-
TokenCounter,
|
|
47
|
-
)
|
|
48
|
-
from langchain.tools import ToolRuntime
|
|
49
|
-
from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, RemoveMessage, get_buffer_string
|
|
50
|
-
from langchain_core.messages.utils import count_tokens_approximately
|
|
51
|
-
from langgraph.config import get_config
|
|
52
|
-
from langgraph.graph.message import REMOVE_ALL_MESSAGES
|
|
53
|
-
from typing_extensions import TypedDict, override
|
|
54
|
-
|
|
55
|
-
if TYPE_CHECKING:
|
|
56
|
-
from langchain.agents.middleware.types import AgentState
|
|
57
|
-
from langchain.chat_models import BaseChatModel
|
|
58
|
-
from langchain_core.runnables.config import RunnableConfig
|
|
59
|
-
from langgraph.runtime import Runtime
|
|
60
|
-
|
|
61
|
-
from deepagents.backends.protocol import BACKEND_TYPES, BackendProtocol
|
|
62
|
-
|
|
63
|
-
logger = logging.getLogger(__name__)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
class TruncateArgsSettings(TypedDict, total=False):
|
|
67
|
-
"""Settings for truncating large tool arguments in old messages.
|
|
68
|
-
|
|
69
|
-
Attributes:
|
|
70
|
-
trigger: Threshold to trigger argument truncation. If None, truncation is disabled.
|
|
71
|
-
keep: Context retention policy for message truncation (defaults to last 20 messages).
|
|
72
|
-
max_length: Maximum character length for tool arguments before truncation (defaults to 2000).
|
|
73
|
-
truncation_text: Text to replace truncated arguments with (defaults to "...(argument truncated)").
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
trigger: ContextSize | None
|
|
77
|
-
keep: ContextSize
|
|
78
|
-
max_length: int
|
|
79
|
-
truncation_text: str
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
class SummarizationMiddleware(BaseSummarizationMiddleware):
|
|
83
|
-
"""Summarization middleware with backend for conversation history offloading."""
|
|
84
|
-
|
|
85
|
-
def __init__(
|
|
86
|
-
self,
|
|
87
|
-
model: str | BaseChatModel,
|
|
88
|
-
*,
|
|
89
|
-
backend: BACKEND_TYPES,
|
|
90
|
-
trigger: ContextSize | list[ContextSize] | None = None,
|
|
91
|
-
keep: ContextSize = ("messages", _DEFAULT_MESSAGES_TO_KEEP),
|
|
92
|
-
token_counter: TokenCounter = count_tokens_approximately,
|
|
93
|
-
summary_prompt: str = DEFAULT_SUMMARY_PROMPT,
|
|
94
|
-
trim_tokens_to_summarize: int | None = _DEFAULT_TRIM_TOKEN_LIMIT,
|
|
95
|
-
history_path_prefix: str = "/conversation_history",
|
|
96
|
-
truncate_args_settings: TruncateArgsSettings | None = None,
|
|
97
|
-
**deprecated_kwargs: Any,
|
|
98
|
-
) -> None:
|
|
99
|
-
"""Initialize summarization middleware with backend support.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
model: The language model to use for generating summaries.
|
|
103
|
-
backend: Backend instance or factory for persisting conversation history.
|
|
104
|
-
trigger: Threshold(s) that trigger summarization.
|
|
105
|
-
keep: Context retention policy after summarization.
|
|
106
|
-
|
|
107
|
-
Defaults to keeping last 20 messages.
|
|
108
|
-
token_counter: Function to count tokens in messages.
|
|
109
|
-
summary_prompt: Prompt template for generating summaries.
|
|
110
|
-
trim_tokens_to_summarize: Max tokens to include when generating summary.
|
|
111
|
-
|
|
112
|
-
Defaults to 4000.
|
|
113
|
-
truncate_args_settings: Settings for truncating large tool arguments in old messages.
|
|
114
|
-
|
|
115
|
-
Provide a [`TruncateArgsSettings`][deepagents.middleware.summarization.TruncateArgsSettings]
|
|
116
|
-
dictionary to configure when and how to truncate tool arguments. If `None`,
|
|
117
|
-
argument truncation is disabled.
|
|
118
|
-
|
|
119
|
-
!!! example
|
|
120
|
-
|
|
121
|
-
```python
|
|
122
|
-
# Truncate when 50 messages is reached, ignoring the last 20 messages
|
|
123
|
-
{"trigger": ("messages", 50), "keep": ("messages", 20), "max_length": 2000, "truncation_text": "...(truncated)"}
|
|
124
|
-
|
|
125
|
-
# Truncate when 50% of context window reached, ignoring messages in last 10% of window
|
|
126
|
-
{"trigger": ("fraction", 0.5), "keep": ("fraction", 0.1), "max_length": 2000, "truncation_text": "...(truncated)"}
|
|
127
|
-
history_path_prefix: Path prefix for storing conversation history.
|
|
128
|
-
|
|
129
|
-
Example:
|
|
130
|
-
```python
|
|
131
|
-
from deepagents.middleware.summarization import SummarizationMiddleware
|
|
132
|
-
from deepagents.backends import StateBackend
|
|
133
|
-
|
|
134
|
-
middleware = SummarizationMiddleware(
|
|
135
|
-
model="gpt-4o-mini",
|
|
136
|
-
backend=lambda tool_runtime: StateBackend(tool_runtime),
|
|
137
|
-
trigger=("tokens", 100000),
|
|
138
|
-
keep=("messages", 20),
|
|
139
|
-
)
|
|
140
|
-
```
|
|
141
|
-
"""
|
|
142
|
-
super().__init__(
|
|
143
|
-
model=model,
|
|
144
|
-
trigger=trigger,
|
|
145
|
-
keep=keep,
|
|
146
|
-
token_counter=token_counter,
|
|
147
|
-
summary_prompt=summary_prompt,
|
|
148
|
-
trim_tokens_to_summarize=trim_tokens_to_summarize,
|
|
149
|
-
**deprecated_kwargs,
|
|
150
|
-
)
|
|
151
|
-
self._backend = backend
|
|
152
|
-
self._history_path_prefix = history_path_prefix
|
|
153
|
-
|
|
154
|
-
# Parse truncate_args_settings
|
|
155
|
-
if truncate_args_settings is None:
|
|
156
|
-
self._truncate_args_trigger = None
|
|
157
|
-
self._truncate_args_keep = ("messages", 20)
|
|
158
|
-
self._max_arg_length = 2000
|
|
159
|
-
self._truncation_text = "...(argument truncated)"
|
|
160
|
-
else:
|
|
161
|
-
self._truncate_args_trigger = truncate_args_settings.get("trigger")
|
|
162
|
-
self._truncate_args_keep = truncate_args_settings.get("keep", ("messages", 20))
|
|
163
|
-
self._max_arg_length = truncate_args_settings.get("max_length", 2000)
|
|
164
|
-
self._truncation_text = truncate_args_settings.get("truncation_text", "...(argument truncated)")
|
|
165
|
-
|
|
166
|
-
def _get_backend(
|
|
167
|
-
self,
|
|
168
|
-
state: AgentState[Any],
|
|
169
|
-
runtime: Runtime,
|
|
170
|
-
) -> BackendProtocol:
|
|
171
|
-
"""Resolve backend from instance or factory.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
state: Current agent state.
|
|
175
|
-
runtime: Runtime context for factory functions.
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
Resolved backend instance.
|
|
179
|
-
"""
|
|
180
|
-
if callable(self._backend):
|
|
181
|
-
# Because we're using `before_model`, which doesn't receive `config` as a
|
|
182
|
-
# parameter, we access it via `runtime.config` instead.
|
|
183
|
-
# Cast is safe: empty dict `{}` is a valid `RunnableConfig` (all fields are
|
|
184
|
-
# optional in TypedDict).
|
|
185
|
-
config = cast("RunnableConfig", getattr(runtime, "config", {}))
|
|
186
|
-
|
|
187
|
-
tool_runtime = ToolRuntime(
|
|
188
|
-
state=state,
|
|
189
|
-
context=runtime.context,
|
|
190
|
-
stream_writer=runtime.stream_writer,
|
|
191
|
-
store=runtime.store,
|
|
192
|
-
config=config,
|
|
193
|
-
tool_call_id=None,
|
|
194
|
-
)
|
|
195
|
-
return self._backend(tool_runtime)
|
|
196
|
-
return self._backend
|
|
197
|
-
|
|
198
|
-
def _get_thread_id(self) -> str:
|
|
199
|
-
"""Extract `thread_id` from langgraph config.
|
|
200
|
-
|
|
201
|
-
Uses `get_config()` to access the `RunnableConfig` from langgraph's
|
|
202
|
-
`contextvar`. Falls back to a generated session ID if not available.
|
|
203
|
-
|
|
204
|
-
Returns:
|
|
205
|
-
Thread ID string from config, or a generated session ID
|
|
206
|
-
(e.g., `'session_a1b2c3d4'`) if not in a runnable context.
|
|
207
|
-
"""
|
|
208
|
-
try:
|
|
209
|
-
config = get_config()
|
|
210
|
-
thread_id = config.get("configurable", {}).get("thread_id")
|
|
211
|
-
if thread_id is not None:
|
|
212
|
-
return str(thread_id)
|
|
213
|
-
except RuntimeError:
|
|
214
|
-
# Not in a runnable context
|
|
215
|
-
pass
|
|
216
|
-
|
|
217
|
-
# Fallback: generate session ID
|
|
218
|
-
generated_id = f"session_{uuid.uuid4().hex[:8]}"
|
|
219
|
-
logger.debug("No thread_id found, using generated session ID: %s", generated_id)
|
|
220
|
-
return generated_id
|
|
221
|
-
|
|
222
|
-
def _get_history_path(self) -> str:
|
|
223
|
-
"""Generate path for storing conversation history.
|
|
224
|
-
|
|
225
|
-
Returns a single file per thread that gets appended to over time.
|
|
226
|
-
|
|
227
|
-
Returns:
|
|
228
|
-
Path string like `'/conversation_history/{thread_id}.md'`
|
|
229
|
-
"""
|
|
230
|
-
thread_id = self._get_thread_id()
|
|
231
|
-
return f"{self._history_path_prefix}/{thread_id}.md"
|
|
232
|
-
|
|
233
|
-
def _is_summary_message(self, msg: AnyMessage) -> bool:
|
|
234
|
-
"""Check if a message is a previous summarization message.
|
|
235
|
-
|
|
236
|
-
Summary messages are `HumanMessage` objects with `lc_source='summarization'` in
|
|
237
|
-
`additional_kwargs`. These should be filtered from offloads to avoid redundant
|
|
238
|
-
storage during chained summarization.
|
|
239
|
-
|
|
240
|
-
Args:
|
|
241
|
-
msg: Message to check.
|
|
242
|
-
|
|
243
|
-
Returns:
|
|
244
|
-
Whether this is a summary `HumanMessage` from a previous summarization.
|
|
245
|
-
"""
|
|
246
|
-
if not isinstance(msg, HumanMessage):
|
|
247
|
-
return False
|
|
248
|
-
return msg.additional_kwargs.get("lc_source") == "summarization"
|
|
249
|
-
|
|
250
|
-
def _filter_summary_messages(self, messages: list[AnyMessage]) -> list[AnyMessage]:
|
|
251
|
-
"""Filter out previous summary messages from a message list.
|
|
252
|
-
|
|
253
|
-
When chained summarization occurs, we don't want to re-offload the previous
|
|
254
|
-
summary `HumanMessage` since the original messages are already stored in the
|
|
255
|
-
backend.
|
|
256
|
-
|
|
257
|
-
Args:
|
|
258
|
-
messages: List of messages to filter.
|
|
259
|
-
|
|
260
|
-
Returns:
|
|
261
|
-
Messages without previous summary `HumanMessage` objects.
|
|
262
|
-
"""
|
|
263
|
-
return [msg for msg in messages if not self._is_summary_message(msg)]
|
|
264
|
-
|
|
265
|
-
def _build_new_messages_with_path(self, summary: str, file_path: str | None) -> list[AnyMessage]:
|
|
266
|
-
"""Build the summary message with optional file path reference.
|
|
267
|
-
|
|
268
|
-
Args:
|
|
269
|
-
summary: The generated summary text.
|
|
270
|
-
file_path: Path where conversation history was stored, or `None`.
|
|
271
|
-
|
|
272
|
-
Optional since offloading may fail.
|
|
273
|
-
|
|
274
|
-
Returns:
|
|
275
|
-
List containing the summary `HumanMessage`.
|
|
276
|
-
"""
|
|
277
|
-
if file_path is not None:
|
|
278
|
-
content = f"""\
|
|
279
|
-
You are in the middle of a conversation that has been summarized.
|
|
280
|
-
|
|
281
|
-
The full conversation history has been saved to {file_path} should you need to refer back to it for details.
|
|
282
|
-
|
|
283
|
-
A condensed summary follows:
|
|
284
|
-
|
|
285
|
-
<summary>
|
|
286
|
-
{summary}
|
|
287
|
-
</summary>"""
|
|
288
|
-
else:
|
|
289
|
-
content = f"Here is a summary of the conversation to date:\n\n{summary}"
|
|
290
|
-
|
|
291
|
-
return [
|
|
292
|
-
HumanMessage(
|
|
293
|
-
content=content,
|
|
294
|
-
additional_kwargs={"lc_source": "summarization"},
|
|
295
|
-
)
|
|
296
|
-
]
|
|
297
|
-
|
|
298
|
-
def _should_truncate_args(self, messages: list[AnyMessage], total_tokens: int) -> bool:
|
|
299
|
-
"""Check if argument truncation should be triggered.
|
|
300
|
-
|
|
301
|
-
Args:
|
|
302
|
-
messages: Current message history.
|
|
303
|
-
total_tokens: Total token count of messages.
|
|
304
|
-
|
|
305
|
-
Returns:
|
|
306
|
-
True if truncation should occur, False otherwise.
|
|
307
|
-
"""
|
|
308
|
-
if self._truncate_args_trigger is None:
|
|
309
|
-
return False
|
|
310
|
-
|
|
311
|
-
trigger_type, trigger_value = self._truncate_args_trigger
|
|
312
|
-
|
|
313
|
-
if trigger_type == "messages":
|
|
314
|
-
return len(messages) >= trigger_value
|
|
315
|
-
if trigger_type == "tokens":
|
|
316
|
-
return total_tokens >= trigger_value
|
|
317
|
-
if trigger_type == "fraction":
|
|
318
|
-
max_input_tokens = self._get_profile_limits()
|
|
319
|
-
if max_input_tokens is None:
|
|
320
|
-
return False
|
|
321
|
-
threshold = int(max_input_tokens * trigger_value)
|
|
322
|
-
if threshold <= 0:
|
|
323
|
-
threshold = 1
|
|
324
|
-
return total_tokens >= threshold
|
|
325
|
-
|
|
326
|
-
return False
|
|
327
|
-
|
|
328
|
-
def _determine_truncate_cutoff_index(self, messages: list[AnyMessage]) -> int: # noqa: PLR0911
|
|
329
|
-
"""Determine the cutoff index for argument truncation based on keep policy.
|
|
330
|
-
|
|
331
|
-
Messages at index >= cutoff should be preserved without truncation.
|
|
332
|
-
Messages at index < cutoff can have their tool args truncated.
|
|
333
|
-
|
|
334
|
-
Args:
|
|
335
|
-
messages: Current message history.
|
|
336
|
-
|
|
337
|
-
Returns:
|
|
338
|
-
Index where truncation cutoff occurs. Messages before this index
|
|
339
|
-
should have args truncated, messages at/after should be preserved.
|
|
340
|
-
"""
|
|
341
|
-
keep_type, keep_value = self._truncate_args_keep
|
|
342
|
-
|
|
343
|
-
if keep_type == "messages":
|
|
344
|
-
# Keep the most recent N messages
|
|
345
|
-
if len(messages) <= keep_value:
|
|
346
|
-
return len(messages) # All messages are recent
|
|
347
|
-
return len(messages) - keep_value
|
|
348
|
-
|
|
349
|
-
if keep_type in {"tokens", "fraction"}:
|
|
350
|
-
# Calculate target token count
|
|
351
|
-
if keep_type == "fraction":
|
|
352
|
-
max_input_tokens = self._get_profile_limits()
|
|
353
|
-
if max_input_tokens is None:
|
|
354
|
-
# Fallback to message count if profile not available
|
|
355
|
-
messages_to_keep = 20
|
|
356
|
-
if len(messages) <= messages_to_keep:
|
|
357
|
-
return len(messages)
|
|
358
|
-
return len(messages) - messages_to_keep
|
|
359
|
-
target_token_count = int(max_input_tokens * keep_value)
|
|
360
|
-
else:
|
|
361
|
-
target_token_count = int(keep_value)
|
|
362
|
-
|
|
363
|
-
if target_token_count <= 0:
|
|
364
|
-
target_token_count = 1
|
|
365
|
-
|
|
366
|
-
# Keep recent messages up to token limit
|
|
367
|
-
tokens_kept = 0
|
|
368
|
-
for i in range(len(messages) - 1, -1, -1):
|
|
369
|
-
msg_tokens = self.token_counter([messages[i]])
|
|
370
|
-
if tokens_kept + msg_tokens > target_token_count:
|
|
371
|
-
return i + 1
|
|
372
|
-
tokens_kept += msg_tokens
|
|
373
|
-
return 0 # All messages are within token limit
|
|
374
|
-
|
|
375
|
-
return len(messages)
|
|
376
|
-
|
|
377
|
-
def _truncate_tool_call(self, tool_call: dict[str, Any]) -> dict[str, Any]:
|
|
378
|
-
"""Truncate large arguments in a single tool call.
|
|
379
|
-
|
|
380
|
-
Args:
|
|
381
|
-
tool_call: The tool call dictionary to truncate.
|
|
382
|
-
|
|
383
|
-
Returns:
|
|
384
|
-
A copy of the tool call with large arguments truncated.
|
|
385
|
-
"""
|
|
386
|
-
args = tool_call.get("args", {})
|
|
387
|
-
|
|
388
|
-
truncated_args = {}
|
|
389
|
-
modified = False
|
|
390
|
-
|
|
391
|
-
for key, value in args.items():
|
|
392
|
-
if isinstance(value, str) and len(value) > self._max_arg_length:
|
|
393
|
-
truncated_args[key] = value[:20] + self._truncation_text
|
|
394
|
-
modified = True
|
|
395
|
-
else:
|
|
396
|
-
truncated_args[key] = value
|
|
397
|
-
|
|
398
|
-
if modified:
|
|
399
|
-
return {
|
|
400
|
-
**tool_call,
|
|
401
|
-
"args": truncated_args,
|
|
402
|
-
}
|
|
403
|
-
return tool_call
|
|
404
|
-
|
|
405
|
-
def _truncate_args(self, messages: list[AnyMessage]) -> tuple[list[AnyMessage], bool]:
|
|
406
|
-
"""Truncate large tool call arguments in old messages.
|
|
407
|
-
|
|
408
|
-
Args:
|
|
409
|
-
messages: Messages to potentially truncate.
|
|
410
|
-
|
|
411
|
-
Returns:
|
|
412
|
-
Tuple of (truncated_messages, modified). If modified is False,
|
|
413
|
-
truncated_messages is the same as input messages.
|
|
414
|
-
"""
|
|
415
|
-
total_tokens = self.token_counter(messages)
|
|
416
|
-
if not self._should_truncate_args(messages, total_tokens):
|
|
417
|
-
return messages, False
|
|
418
|
-
|
|
419
|
-
cutoff_index = self._determine_truncate_cutoff_index(messages)
|
|
420
|
-
if cutoff_index >= len(messages):
|
|
421
|
-
return messages, False
|
|
422
|
-
|
|
423
|
-
# Process messages before the cutoff
|
|
424
|
-
truncated_messages = []
|
|
425
|
-
modified = False
|
|
426
|
-
|
|
427
|
-
for i, msg in enumerate(messages):
|
|
428
|
-
if i < cutoff_index and isinstance(msg, AIMessage) and msg.tool_calls:
|
|
429
|
-
# Check if this AIMessage has tool calls we need to truncate
|
|
430
|
-
truncated_tool_calls = []
|
|
431
|
-
msg_modified = False
|
|
432
|
-
|
|
433
|
-
for tool_call in msg.tool_calls:
|
|
434
|
-
if tool_call["name"] in {"write_file", "edit_file"}:
|
|
435
|
-
truncated_call = self._truncate_tool_call(tool_call)
|
|
436
|
-
if truncated_call != tool_call:
|
|
437
|
-
msg_modified = True
|
|
438
|
-
truncated_tool_calls.append(truncated_call)
|
|
439
|
-
else:
|
|
440
|
-
truncated_tool_calls.append(tool_call)
|
|
441
|
-
|
|
442
|
-
if msg_modified:
|
|
443
|
-
# Create a new AIMessage with truncated tool calls
|
|
444
|
-
truncated_msg = msg.model_copy()
|
|
445
|
-
truncated_msg.tool_calls = truncated_tool_calls
|
|
446
|
-
truncated_messages.append(truncated_msg)
|
|
447
|
-
modified = True
|
|
448
|
-
else:
|
|
449
|
-
truncated_messages.append(msg)
|
|
450
|
-
else:
|
|
451
|
-
truncated_messages.append(msg)
|
|
452
|
-
|
|
453
|
-
return truncated_messages, modified
|
|
454
|
-
|
|
455
|
-
def _offload_to_backend(
|
|
456
|
-
self,
|
|
457
|
-
backend: BackendProtocol,
|
|
458
|
-
messages: list[AnyMessage],
|
|
459
|
-
) -> str | None:
|
|
460
|
-
"""Persist messages to backend before summarization.
|
|
461
|
-
|
|
462
|
-
Appends evicted messages to a single markdown file per thread. Each
|
|
463
|
-
summarization event adds a new section with a timestamp header.
|
|
464
|
-
|
|
465
|
-
Previous summary messages are filtered out to avoid redundant storage during
|
|
466
|
-
chained summarization events.
|
|
467
|
-
|
|
468
|
-
Args:
|
|
469
|
-
backend: Backend to write to.
|
|
470
|
-
messages: Messages being summarized.
|
|
471
|
-
|
|
472
|
-
Returns:
|
|
473
|
-
The file path where history was stored, or `None` if write failed.
|
|
474
|
-
"""
|
|
475
|
-
path = self._get_history_path()
|
|
476
|
-
|
|
477
|
-
# Filter out previous summary messages to avoid redundant storage
|
|
478
|
-
filtered_messages = self._filter_summary_messages(messages)
|
|
479
|
-
|
|
480
|
-
timestamp = datetime.now(UTC).isoformat()
|
|
481
|
-
new_section = f"## Summarized at {timestamp}\n\n{get_buffer_string(filtered_messages)}\n\n"
|
|
482
|
-
|
|
483
|
-
# Read existing content (if any) and append
|
|
484
|
-
# Note: We use download_files() instead of read() because read() returns
|
|
485
|
-
# line-numbered content (for LLM consumption), but edit() expects raw content.
|
|
486
|
-
existing_content = ""
|
|
487
|
-
try:
|
|
488
|
-
responses = backend.download_files([path])
|
|
489
|
-
if responses and responses[0].content is not None and responses[0].error is None:
|
|
490
|
-
existing_content = responses[0].content.decode("utf-8")
|
|
491
|
-
except Exception as e: # noqa: BLE001
|
|
492
|
-
# File likely doesn't exist yet, but log for observability
|
|
493
|
-
logger.debug(
|
|
494
|
-
"Exception reading existing history from %s (treating as new file): %s: %s",
|
|
495
|
-
path,
|
|
496
|
-
type(e).__name__,
|
|
497
|
-
e,
|
|
498
|
-
)
|
|
499
|
-
|
|
500
|
-
combined_content = existing_content + new_section
|
|
501
|
-
|
|
502
|
-
try:
|
|
503
|
-
result = backend.edit(path, existing_content, combined_content) if existing_content else backend.write(path, combined_content)
|
|
504
|
-
if result is None or result.error:
|
|
505
|
-
error_msg = result.error if result else "backend returned None"
|
|
506
|
-
logger.warning(
|
|
507
|
-
"Failed to offload conversation history to %s (%d messages): %s",
|
|
508
|
-
path,
|
|
509
|
-
len(filtered_messages),
|
|
510
|
-
error_msg,
|
|
511
|
-
)
|
|
512
|
-
return None
|
|
513
|
-
except Exception as e: # noqa: BLE001
|
|
514
|
-
logger.warning(
|
|
515
|
-
"Exception offloading conversation history to %s (%d messages): %s: %s",
|
|
516
|
-
path,
|
|
517
|
-
len(filtered_messages),
|
|
518
|
-
type(e).__name__,
|
|
519
|
-
e,
|
|
520
|
-
)
|
|
521
|
-
return None
|
|
522
|
-
else:
|
|
523
|
-
logger.debug("Offloaded %d messages to %s", len(filtered_messages), path)
|
|
524
|
-
return path
|
|
525
|
-
|
|
526
|
-
async def _aoffload_to_backend(
|
|
527
|
-
self,
|
|
528
|
-
backend: BackendProtocol,
|
|
529
|
-
messages: list[AnyMessage],
|
|
530
|
-
) -> str | None:
|
|
531
|
-
"""Persist messages to backend before summarization (async).
|
|
532
|
-
|
|
533
|
-
Appends evicted messages to a single markdown file per thread. Each
|
|
534
|
-
summarization event adds a new section with a timestamp header.
|
|
535
|
-
|
|
536
|
-
Previous summary messages are filtered out to avoid redundant storage during
|
|
537
|
-
chained summarization events.
|
|
538
|
-
|
|
539
|
-
Args:
|
|
540
|
-
backend: Backend to write to.
|
|
541
|
-
messages: Messages being summarized.
|
|
542
|
-
|
|
543
|
-
Returns:
|
|
544
|
-
The file path where history was stored, or `None` if write failed.
|
|
545
|
-
"""
|
|
546
|
-
path = self._get_history_path()
|
|
547
|
-
|
|
548
|
-
# Filter out previous summary messages to avoid redundant storage
|
|
549
|
-
filtered_messages = self._filter_summary_messages(messages)
|
|
550
|
-
|
|
551
|
-
timestamp = datetime.now(UTC).isoformat()
|
|
552
|
-
new_section = f"## Summarized at {timestamp}\n\n{get_buffer_string(filtered_messages)}\n\n"
|
|
553
|
-
|
|
554
|
-
# Read existing content (if any) and append
|
|
555
|
-
# Note: We use adownload_files() instead of aread() because read() returns
|
|
556
|
-
# line-numbered content (for LLM consumption), but edit() expects raw content.
|
|
557
|
-
existing_content = ""
|
|
558
|
-
try:
|
|
559
|
-
responses = await backend.adownload_files([path])
|
|
560
|
-
if responses and responses[0].content is not None and responses[0].error is None:
|
|
561
|
-
existing_content = responses[0].content.decode("utf-8")
|
|
562
|
-
except Exception as e: # noqa: BLE001
|
|
563
|
-
# File likely doesn't exist yet, but log for observability
|
|
564
|
-
logger.debug(
|
|
565
|
-
"Exception reading existing history from %s (treating as new file): %s: %s",
|
|
566
|
-
path,
|
|
567
|
-
type(e).__name__,
|
|
568
|
-
e,
|
|
569
|
-
)
|
|
570
|
-
|
|
571
|
-
combined_content = existing_content + new_section
|
|
572
|
-
|
|
573
|
-
try:
|
|
574
|
-
result = (
|
|
575
|
-
await backend.aedit(path, existing_content, combined_content) if existing_content else await backend.awrite(path, combined_content)
|
|
576
|
-
)
|
|
577
|
-
if result is None or result.error:
|
|
578
|
-
error_msg = result.error if result else "backend returned None"
|
|
579
|
-
logger.warning(
|
|
580
|
-
"Failed to offload conversation history to %s (%d messages): %s",
|
|
581
|
-
path,
|
|
582
|
-
len(filtered_messages),
|
|
583
|
-
error_msg,
|
|
584
|
-
)
|
|
585
|
-
return None
|
|
586
|
-
except Exception as e: # noqa: BLE001
|
|
587
|
-
logger.warning(
|
|
588
|
-
"Exception offloading conversation history to %s (%d messages): %s: %s",
|
|
589
|
-
path,
|
|
590
|
-
len(filtered_messages),
|
|
591
|
-
type(e).__name__,
|
|
592
|
-
e,
|
|
593
|
-
)
|
|
594
|
-
return None
|
|
595
|
-
else:
|
|
596
|
-
logger.debug("Offloaded %d messages to %s", len(filtered_messages), path)
|
|
597
|
-
return path
|
|
598
|
-
|
|
599
|
-
@override
|
|
600
|
-
def before_model(
|
|
601
|
-
self,
|
|
602
|
-
state: AgentState[Any],
|
|
603
|
-
runtime: Runtime,
|
|
604
|
-
) -> dict[str, Any] | None:
|
|
605
|
-
"""Process messages before model invocation, with history offloading and arg truncation.
|
|
606
|
-
|
|
607
|
-
First truncates large tool arguments in old messages if configured.
|
|
608
|
-
Then offloads messages to backend before summarization if thresholds are met.
|
|
609
|
-
The summary message includes a reference to the file path where the full
|
|
610
|
-
conversation history was stored.
|
|
611
|
-
|
|
612
|
-
Args:
|
|
613
|
-
state: The agent state.
|
|
614
|
-
runtime: The runtime environment.
|
|
615
|
-
|
|
616
|
-
Returns:
|
|
617
|
-
Updated state with truncated/summarized messages if processing was performed.
|
|
618
|
-
"""
|
|
619
|
-
messages = state["messages"]
|
|
620
|
-
self._ensure_message_ids(messages)
|
|
621
|
-
|
|
622
|
-
# Step 1: Truncate args if configured
|
|
623
|
-
truncated_messages, args_were_truncated = self._truncate_args(messages)
|
|
624
|
-
|
|
625
|
-
# Step 2: Check if summarization should happen
|
|
626
|
-
total_tokens = self.token_counter(truncated_messages)
|
|
627
|
-
should_summarize = self._should_summarize(truncated_messages, total_tokens)
|
|
628
|
-
|
|
629
|
-
# If only truncation happened (no summarization)
|
|
630
|
-
if args_were_truncated and not should_summarize:
|
|
631
|
-
return {
|
|
632
|
-
"messages": [
|
|
633
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
634
|
-
*truncated_messages,
|
|
635
|
-
]
|
|
636
|
-
}
|
|
637
|
-
|
|
638
|
-
# If no truncation and no summarization
|
|
639
|
-
if not should_summarize:
|
|
640
|
-
return None
|
|
641
|
-
|
|
642
|
-
# Step 3: Perform summarization
|
|
643
|
-
cutoff_index = self._determine_cutoff_index(truncated_messages)
|
|
644
|
-
if cutoff_index <= 0:
|
|
645
|
-
# If truncation happened but we can't summarize, still return truncated messages
|
|
646
|
-
if args_were_truncated:
|
|
647
|
-
return {
|
|
648
|
-
"messages": [
|
|
649
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
650
|
-
*truncated_messages,
|
|
651
|
-
]
|
|
652
|
-
}
|
|
653
|
-
return None
|
|
654
|
-
|
|
655
|
-
messages_to_summarize, preserved_messages = self._partition_messages(truncated_messages, cutoff_index)
|
|
656
|
-
|
|
657
|
-
# Offload to backend first - abort summarization if this fails to prevent data loss
|
|
658
|
-
backend = self._get_backend(state, runtime)
|
|
659
|
-
file_path = self._offload_to_backend(backend, messages_to_summarize)
|
|
660
|
-
if file_path is None:
|
|
661
|
-
# Offloading failed - don't proceed with summarization to preserve messages
|
|
662
|
-
return None
|
|
663
|
-
|
|
664
|
-
# Generate summary
|
|
665
|
-
summary = self._create_summary(messages_to_summarize)
|
|
666
|
-
|
|
667
|
-
# Build summary message with file path reference
|
|
668
|
-
new_messages = self._build_new_messages_with_path(summary, file_path)
|
|
669
|
-
|
|
670
|
-
return {
|
|
671
|
-
"messages": [
|
|
672
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
673
|
-
*new_messages,
|
|
674
|
-
*preserved_messages,
|
|
675
|
-
]
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
@override
|
|
679
|
-
async def abefore_model(
|
|
680
|
-
self,
|
|
681
|
-
state: AgentState[Any],
|
|
682
|
-
runtime: Runtime,
|
|
683
|
-
) -> dict[str, Any] | None:
|
|
684
|
-
"""Process messages before model invocation, with history offloading and arg truncation (async).
|
|
685
|
-
|
|
686
|
-
First truncates large tool arguments in old messages if configured.
|
|
687
|
-
Then offloads messages to backend before summarization if thresholds are met.
|
|
688
|
-
The summary message includes a reference to the file path where the
|
|
689
|
-
full conversation history was stored.
|
|
690
|
-
|
|
691
|
-
The summary message includes a reference to the file path where the full
|
|
692
|
-
conversation history was stored.
|
|
693
|
-
|
|
694
|
-
Args:
|
|
695
|
-
state: The agent state.
|
|
696
|
-
runtime: The runtime environment.
|
|
697
|
-
|
|
698
|
-
Returns:
|
|
699
|
-
Updated state with truncated/summarized messages if processing was performed.
|
|
700
|
-
"""
|
|
701
|
-
messages = state["messages"]
|
|
702
|
-
self._ensure_message_ids(messages)
|
|
703
|
-
|
|
704
|
-
# Step 1: Truncate args if configured
|
|
705
|
-
truncated_messages, args_were_truncated = self._truncate_args(messages)
|
|
706
|
-
|
|
707
|
-
# Step 2: Check if summarization should happen
|
|
708
|
-
total_tokens = self.token_counter(truncated_messages)
|
|
709
|
-
should_summarize = self._should_summarize(truncated_messages, total_tokens)
|
|
710
|
-
|
|
711
|
-
# If only truncation happened (no summarization)
|
|
712
|
-
if args_were_truncated and not should_summarize:
|
|
713
|
-
return {
|
|
714
|
-
"messages": [
|
|
715
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
716
|
-
*truncated_messages,
|
|
717
|
-
]
|
|
718
|
-
}
|
|
719
|
-
|
|
720
|
-
# If no truncation and no summarization
|
|
721
|
-
if not should_summarize:
|
|
722
|
-
return None
|
|
723
|
-
|
|
724
|
-
# Step 3: Perform summarization
|
|
725
|
-
cutoff_index = self._determine_cutoff_index(truncated_messages)
|
|
726
|
-
if cutoff_index <= 0:
|
|
727
|
-
# If truncation happened but we can't summarize, still return truncated messages
|
|
728
|
-
if args_were_truncated:
|
|
729
|
-
return {
|
|
730
|
-
"messages": [
|
|
731
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
732
|
-
*truncated_messages,
|
|
733
|
-
]
|
|
734
|
-
}
|
|
735
|
-
return None
|
|
736
|
-
|
|
737
|
-
messages_to_summarize, preserved_messages = self._partition_messages(truncated_messages, cutoff_index)
|
|
738
|
-
|
|
739
|
-
# Offload to backend first - abort summarization if this fails to prevent data loss
|
|
740
|
-
backend = self._get_backend(state, runtime)
|
|
741
|
-
file_path = await self._aoffload_to_backend(backend, messages_to_summarize)
|
|
742
|
-
if file_path is None:
|
|
743
|
-
# Offloading failed - don't proceed with summarization to preserve messages
|
|
744
|
-
return None
|
|
745
|
-
|
|
746
|
-
# Generate summary
|
|
747
|
-
summary = await self._acreate_summary(messages_to_summarize)
|
|
748
|
-
|
|
749
|
-
# Build summary message with file path reference
|
|
750
|
-
new_messages = self._build_new_messages_with_path(summary, file_path)
|
|
751
|
-
|
|
752
|
-
return {
|
|
753
|
-
"messages": [
|
|
754
|
-
RemoveMessage(id=REMOVE_ALL_MESSAGES),
|
|
755
|
-
*new_messages,
|
|
756
|
-
*preserved_messages,
|
|
757
|
-
]
|
|
758
|
-
}
|