camel-ai 0.2.78__py3-none-any.whl → 0.2.79a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +1112 -287
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/memories/__init__.py +1 -2
- camel/memories/agent_memories.py +34 -0
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +117 -17
- camel/memories/context_creators/score_based.py +25 -384
- camel/messages/base.py +26 -0
- camel/models/aws_bedrock_model.py +1 -17
- camel/models/azure_openai_model.py +113 -67
- camel/models/model_factory.py +17 -1
- camel/models/moonshot_model.py +102 -5
- camel/models/openai_compatible_model.py +62 -32
- camel/models/openai_model.py +61 -35
- camel/models/samba_model.py +34 -15
- camel/models/sglang_model.py +41 -11
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +143 -291
- camel/societies/workforce/utils.py +2 -1
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +513 -188
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +144 -140
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/vectordb_storages/oceanbase.py +5 -4
- camel/toolkits/file_toolkit.py +166 -0
- camel/toolkits/message_integration.py +15 -13
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +112 -79
- camel/types/enums.py +1 -0
- camel/utils/context_utils.py +201 -2
- {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/METADATA +14 -13
- {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/RECORD +39 -35
- {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/licenses/LICENSE +0 -0
camel/agents/chat_agent.py
CHANGED
|
@@ -20,7 +20,6 @@ import concurrent.futures
|
|
|
20
20
|
import hashlib
|
|
21
21
|
import inspect
|
|
22
22
|
import json
|
|
23
|
-
import math
|
|
24
23
|
import os
|
|
25
24
|
import random
|
|
26
25
|
import re
|
|
@@ -57,6 +56,7 @@ from pydantic import BaseModel, ValidationError
|
|
|
57
56
|
|
|
58
57
|
from camel.agents._types import ModelResponse, ToolCallRequest
|
|
59
58
|
from camel.agents._utils import (
|
|
59
|
+
build_default_summary_prompt,
|
|
60
60
|
convert_to_function_tool,
|
|
61
61
|
convert_to_schema,
|
|
62
62
|
get_info_dict,
|
|
@@ -68,10 +68,10 @@ from camel.logger import get_logger
|
|
|
68
68
|
from camel.memories import (
|
|
69
69
|
AgentMemory,
|
|
70
70
|
ChatHistoryMemory,
|
|
71
|
+
ContextRecord,
|
|
71
72
|
MemoryRecord,
|
|
72
73
|
ScoreBasedContextCreator,
|
|
73
74
|
)
|
|
74
|
-
from camel.memories.blocks.chat_history_block import EmptyMemoryWarning
|
|
75
75
|
from camel.messages import (
|
|
76
76
|
BaseMessage,
|
|
77
77
|
FunctionCallingMessage,
|
|
@@ -103,6 +103,16 @@ from camel.utils import (
|
|
|
103
103
|
from camel.utils.commons import dependencies_required
|
|
104
104
|
from camel.utils.context_utils import ContextUtility
|
|
105
105
|
|
|
106
|
+
TOKEN_LIMIT_ERROR_MARKERS = (
|
|
107
|
+
"context_length_exceeded",
|
|
108
|
+
"prompt is too long",
|
|
109
|
+
"exceeded your current quota",
|
|
110
|
+
"tokens must be reduced",
|
|
111
|
+
"context length",
|
|
112
|
+
"token count",
|
|
113
|
+
"context limit",
|
|
114
|
+
)
|
|
115
|
+
|
|
106
116
|
if TYPE_CHECKING:
|
|
107
117
|
from camel.terminators import ResponseTerminator
|
|
108
118
|
|
|
@@ -355,9 +365,9 @@ class ChatAgent(BaseAgent):
|
|
|
355
365
|
message_window_size (int, optional): The maximum number of previous
|
|
356
366
|
messages to include in the context window. If `None`, no windowing
|
|
357
367
|
is performed. (default: :obj:`None`)
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
368
|
+
summarize_threshold (int, optional): The percentage of the context
|
|
369
|
+
window that triggers summarization. If `None`, will trigger
|
|
370
|
+
summarization when the context window is full.
|
|
361
371
|
(default: :obj:`None`)
|
|
362
372
|
output_language (str, optional): The language to be output by the
|
|
363
373
|
agent. (default: :obj:`None`)
|
|
@@ -415,6 +425,10 @@ class ChatAgent(BaseAgent):
|
|
|
415
425
|
updates return accumulated content (current behavior). When False,
|
|
416
426
|
partial updates return only the incremental delta. (default:
|
|
417
427
|
:obj:`True`)
|
|
428
|
+
summary_window_ratio (float, optional): Maximum fraction of the total
|
|
429
|
+
context window that can be occupied by summary information. Used
|
|
430
|
+
to limit how much of the model's context is reserved for
|
|
431
|
+
summarization results. (default: :obj:`0.6`)
|
|
418
432
|
"""
|
|
419
433
|
|
|
420
434
|
def __init__(
|
|
@@ -437,6 +451,7 @@ class ChatAgent(BaseAgent):
|
|
|
437
451
|
] = None,
|
|
438
452
|
memory: Optional[AgentMemory] = None,
|
|
439
453
|
message_window_size: Optional[int] = None,
|
|
454
|
+
summarize_threshold: Optional[int] = 50,
|
|
440
455
|
token_limit: Optional[int] = None,
|
|
441
456
|
output_language: Optional[str] = None,
|
|
442
457
|
tools: Optional[List[Union[FunctionTool, Callable]]] = None,
|
|
@@ -459,6 +474,7 @@ class ChatAgent(BaseAgent):
|
|
|
459
474
|
retry_delay: float = 1.0,
|
|
460
475
|
step_timeout: Optional[float] = None,
|
|
461
476
|
stream_accumulate: bool = True,
|
|
477
|
+
summary_window_ratio: float = 0.6,
|
|
462
478
|
) -> None:
|
|
463
479
|
if isinstance(model, ModelManager):
|
|
464
480
|
self.model_backend = model
|
|
@@ -477,7 +493,7 @@ class ChatAgent(BaseAgent):
|
|
|
477
493
|
# Set up memory
|
|
478
494
|
context_creator = ScoreBasedContextCreator(
|
|
479
495
|
self.model_backend.token_counter,
|
|
480
|
-
|
|
496
|
+
self.model_backend.token_limit,
|
|
481
497
|
)
|
|
482
498
|
|
|
483
499
|
self._memory: AgentMemory = memory or ChatHistoryMemory(
|
|
@@ -492,9 +508,7 @@ class ChatAgent(BaseAgent):
|
|
|
492
508
|
|
|
493
509
|
# Set up system message and initialize messages
|
|
494
510
|
self._original_system_message = (
|
|
495
|
-
BaseMessage.
|
|
496
|
-
role_name="Assistant", content=system_message
|
|
497
|
-
)
|
|
511
|
+
BaseMessage.make_system_message(system_message)
|
|
498
512
|
if isinstance(system_message, str)
|
|
499
513
|
else system_message
|
|
500
514
|
)
|
|
@@ -504,6 +518,21 @@ class ChatAgent(BaseAgent):
|
|
|
504
518
|
)
|
|
505
519
|
self.init_messages()
|
|
506
520
|
|
|
521
|
+
# Set up summarize threshold with validation
|
|
522
|
+
if summarize_threshold is not None:
|
|
523
|
+
if not (0 < summarize_threshold <= 100):
|
|
524
|
+
raise ValueError(
|
|
525
|
+
f"summarize_threshold must be between 0 and 100, "
|
|
526
|
+
f"got {summarize_threshold}"
|
|
527
|
+
)
|
|
528
|
+
logger.info(
|
|
529
|
+
f"Automatic context compression is enabled. Will trigger "
|
|
530
|
+
f"summarization when context window exceeds "
|
|
531
|
+
f"{summarize_threshold}% of the total token limit."
|
|
532
|
+
)
|
|
533
|
+
self.summarize_threshold = summarize_threshold
|
|
534
|
+
self._reset_summary_state()
|
|
535
|
+
|
|
507
536
|
# Set up role name and role type
|
|
508
537
|
self.role_name: str = (
|
|
509
538
|
getattr(self.system_message, "role_name", None) or "assistant"
|
|
@@ -551,11 +580,16 @@ class ChatAgent(BaseAgent):
|
|
|
551
580
|
self._context_utility: Optional[ContextUtility] = None
|
|
552
581
|
self._context_summary_agent: Optional["ChatAgent"] = None
|
|
553
582
|
self.stream_accumulate = stream_accumulate
|
|
583
|
+
self._last_tool_call_record: Optional[ToolCallingRecord] = None
|
|
584
|
+
self._last_tool_call_signature: Optional[str] = None
|
|
585
|
+
self._last_token_limit_tool_signature: Optional[str] = None
|
|
586
|
+
self.summary_window_ratio = summary_window_ratio
|
|
554
587
|
|
|
555
588
|
def reset(self):
|
|
556
589
|
r"""Resets the :obj:`ChatAgent` to its initial state."""
|
|
557
590
|
self.terminated = False
|
|
558
591
|
self.init_messages()
|
|
592
|
+
self._reset_summary_state()
|
|
559
593
|
for terminator in self.response_terminators:
|
|
560
594
|
terminator.reset()
|
|
561
595
|
|
|
@@ -762,6 +796,329 @@ class ChatAgent(BaseAgent):
|
|
|
762
796
|
for func_tool in self._internal_tools.values()
|
|
763
797
|
]
|
|
764
798
|
|
|
799
|
+
@staticmethod
|
|
800
|
+
def _is_token_limit_error(error: Exception) -> bool:
|
|
801
|
+
r"""Return True when the exception message indicates a token limit."""
|
|
802
|
+
error_message = str(error).lower()
|
|
803
|
+
return any(
|
|
804
|
+
marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
|
|
805
|
+
)
|
|
806
|
+
|
|
807
|
+
@staticmethod
|
|
808
|
+
def _is_tool_related_record(record: MemoryRecord) -> bool:
|
|
809
|
+
r"""Determine whether the given memory record
|
|
810
|
+
belongs to a tool call."""
|
|
811
|
+
if record.role_at_backend in {
|
|
812
|
+
OpenAIBackendRole.TOOL,
|
|
813
|
+
OpenAIBackendRole.FUNCTION,
|
|
814
|
+
}:
|
|
815
|
+
return True
|
|
816
|
+
|
|
817
|
+
if (
|
|
818
|
+
record.role_at_backend == OpenAIBackendRole.ASSISTANT
|
|
819
|
+
and isinstance(record.message, FunctionCallingMessage)
|
|
820
|
+
):
|
|
821
|
+
return True
|
|
822
|
+
|
|
823
|
+
return False
|
|
824
|
+
|
|
825
|
+
def _find_indices_to_remove_for_last_tool_pair(
|
|
826
|
+
self, recent_records: List[ContextRecord]
|
|
827
|
+
) -> List[int]:
|
|
828
|
+
"""Find indices of records that should be removed to clean up the most
|
|
829
|
+
recent incomplete tool interaction pair.
|
|
830
|
+
|
|
831
|
+
This method identifies tool call/result pairs by tool_call_id and
|
|
832
|
+
returns the exact indices to remove, allowing non-contiguous deletions.
|
|
833
|
+
|
|
834
|
+
Logic:
|
|
835
|
+
- If the last record is a tool result (TOOL/FUNCTION) with a
|
|
836
|
+
tool_call_id, find the matching assistant call anywhere in history
|
|
837
|
+
and return both indices.
|
|
838
|
+
- If the last record is an assistant tool call without a result yet,
|
|
839
|
+
return just that index.
|
|
840
|
+
- For normal messages (non tool-related): remove just the last one.
|
|
841
|
+
- Fallback: If no tool_call_id is available, use heuristic (last 2 if
|
|
842
|
+
tool-related, otherwise last 1).
|
|
843
|
+
|
|
844
|
+
Returns:
|
|
845
|
+
List[int]: Indices to remove (may be non-contiguous).
|
|
846
|
+
"""
|
|
847
|
+
if not recent_records:
|
|
848
|
+
return []
|
|
849
|
+
|
|
850
|
+
last_idx = len(recent_records) - 1
|
|
851
|
+
last_record = recent_records[last_idx].memory_record
|
|
852
|
+
|
|
853
|
+
# Case A: Last is an ASSISTANT tool call with no result yet
|
|
854
|
+
if (
|
|
855
|
+
last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
|
|
856
|
+
and isinstance(last_record.message, FunctionCallingMessage)
|
|
857
|
+
and last_record.message.result is None
|
|
858
|
+
):
|
|
859
|
+
return [last_idx]
|
|
860
|
+
|
|
861
|
+
# Case B: Last is TOOL/FUNCTION result, try id-based pairing
|
|
862
|
+
if last_record.role_at_backend in {
|
|
863
|
+
OpenAIBackendRole.TOOL,
|
|
864
|
+
OpenAIBackendRole.FUNCTION,
|
|
865
|
+
}:
|
|
866
|
+
tool_id = None
|
|
867
|
+
if isinstance(last_record.message, FunctionCallingMessage):
|
|
868
|
+
tool_id = last_record.message.tool_call_id
|
|
869
|
+
|
|
870
|
+
if tool_id:
|
|
871
|
+
for idx in range(len(recent_records) - 2, -1, -1):
|
|
872
|
+
rec = recent_records[idx].memory_record
|
|
873
|
+
if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
|
|
874
|
+
continue
|
|
875
|
+
|
|
876
|
+
# Check if this assistant message contains the tool_call_id
|
|
877
|
+
matched = False
|
|
878
|
+
|
|
879
|
+
# Case 1: FunctionCallingMessage (single tool call)
|
|
880
|
+
if isinstance(rec.message, FunctionCallingMessage):
|
|
881
|
+
if rec.message.tool_call_id == tool_id:
|
|
882
|
+
matched = True
|
|
883
|
+
|
|
884
|
+
# Case 2: BaseMessage with multiple tool_calls in meta_dict
|
|
885
|
+
elif (
|
|
886
|
+
hasattr(rec.message, "meta_dict")
|
|
887
|
+
and rec.message.meta_dict
|
|
888
|
+
):
|
|
889
|
+
tool_calls_list = rec.message.meta_dict.get(
|
|
890
|
+
"tool_calls", []
|
|
891
|
+
)
|
|
892
|
+
if isinstance(tool_calls_list, list):
|
|
893
|
+
for tc in tool_calls_list:
|
|
894
|
+
if (
|
|
895
|
+
isinstance(tc, dict)
|
|
896
|
+
and tc.get("id") == tool_id
|
|
897
|
+
):
|
|
898
|
+
matched = True
|
|
899
|
+
break
|
|
900
|
+
|
|
901
|
+
if matched:
|
|
902
|
+
# Return both assistant call and tool result indices
|
|
903
|
+
return [idx, last_idx]
|
|
904
|
+
|
|
905
|
+
# Fallback: no tool_call_id, use heuristic
|
|
906
|
+
if self._is_tool_related_record(last_record):
|
|
907
|
+
# Remove last 2 (assume they are paired)
|
|
908
|
+
return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
|
|
909
|
+
else:
|
|
910
|
+
return [last_idx]
|
|
911
|
+
|
|
912
|
+
# Default: non tool-related tail => remove last one
|
|
913
|
+
return [last_idx]
|
|
914
|
+
|
|
915
|
+
@staticmethod
|
|
916
|
+
def _serialize_tool_args(args: Dict[str, Any]) -> str:
|
|
917
|
+
try:
|
|
918
|
+
return json.dumps(args, ensure_ascii=False, sort_keys=True)
|
|
919
|
+
except TypeError:
|
|
920
|
+
return str(args)
|
|
921
|
+
|
|
922
|
+
@classmethod
|
|
923
|
+
def _build_tool_signature(
|
|
924
|
+
cls, func_name: str, args: Dict[str, Any]
|
|
925
|
+
) -> str:
|
|
926
|
+
args_repr = cls._serialize_tool_args(args)
|
|
927
|
+
return f"{func_name}:{args_repr}"
|
|
928
|
+
|
|
929
|
+
def _describe_tool_call(
|
|
930
|
+
self, record: Optional[ToolCallingRecord]
|
|
931
|
+
) -> Optional[str]:
|
|
932
|
+
if record is None:
|
|
933
|
+
return None
|
|
934
|
+
args_repr = self._serialize_tool_args(record.args)
|
|
935
|
+
return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
|
|
936
|
+
|
|
937
|
+
def _update_last_tool_call_state(
|
|
938
|
+
self, record: Optional[ToolCallingRecord]
|
|
939
|
+
) -> None:
|
|
940
|
+
"""Track the most recent tool call and its identifying signature."""
|
|
941
|
+
self._last_tool_call_record = record
|
|
942
|
+
if record is None:
|
|
943
|
+
self._last_tool_call_signature = None
|
|
944
|
+
return
|
|
945
|
+
|
|
946
|
+
args = (
|
|
947
|
+
record.args
|
|
948
|
+
if isinstance(record.args, dict)
|
|
949
|
+
else {"_raw": record.args}
|
|
950
|
+
)
|
|
951
|
+
try:
|
|
952
|
+
signature = self._build_tool_signature(record.tool_name, args)
|
|
953
|
+
except Exception: # pragma: no cover - defensive guard
|
|
954
|
+
signature = None
|
|
955
|
+
self._last_tool_call_signature = signature
|
|
956
|
+
|
|
957
|
+
def _format_tool_limit_notice(self) -> Optional[str]:
|
|
958
|
+
record = self._last_tool_call_record
|
|
959
|
+
description = self._describe_tool_call(record)
|
|
960
|
+
if description is None:
|
|
961
|
+
return None
|
|
962
|
+
notice_lines = [
|
|
963
|
+
"[Tool Call Causing Token Limit]",
|
|
964
|
+
description,
|
|
965
|
+
]
|
|
966
|
+
|
|
967
|
+
if record is not None:
|
|
968
|
+
result = record.result
|
|
969
|
+
if isinstance(result, bytes):
|
|
970
|
+
result_repr = result.decode(errors="replace")
|
|
971
|
+
elif isinstance(result, str):
|
|
972
|
+
result_repr = result
|
|
973
|
+
else:
|
|
974
|
+
try:
|
|
975
|
+
result_repr = json.dumps(
|
|
976
|
+
result, ensure_ascii=False, sort_keys=True
|
|
977
|
+
)
|
|
978
|
+
except (TypeError, ValueError):
|
|
979
|
+
result_repr = str(result)
|
|
980
|
+
|
|
981
|
+
result_length = len(result_repr)
|
|
982
|
+
notice_lines.append(f"Tool result length: {result_length}")
|
|
983
|
+
if self.model_backend.token_limit != 999999999:
|
|
984
|
+
notice_lines.append(
|
|
985
|
+
f"Token limit: {self.model_backend.token_limit}"
|
|
986
|
+
)
|
|
987
|
+
|
|
988
|
+
return "\n".join(notice_lines)
|
|
989
|
+
|
|
990
|
+
@staticmethod
|
|
991
|
+
def _append_user_messages_section(
|
|
992
|
+
summary_content: str, user_messages: List[str]
|
|
993
|
+
) -> str:
|
|
994
|
+
section_title = "- **All User Messages**:"
|
|
995
|
+
sanitized_messages: List[str] = []
|
|
996
|
+
for msg in user_messages:
|
|
997
|
+
if not isinstance(msg, str):
|
|
998
|
+
msg = str(msg)
|
|
999
|
+
cleaned = " ".join(msg.strip().splitlines())
|
|
1000
|
+
if cleaned:
|
|
1001
|
+
sanitized_messages.append(cleaned)
|
|
1002
|
+
|
|
1003
|
+
bullet_block = (
|
|
1004
|
+
"\n".join(f"- {m}" for m in sanitized_messages)
|
|
1005
|
+
if sanitized_messages
|
|
1006
|
+
else "- None noted"
|
|
1007
|
+
)
|
|
1008
|
+
user_section = f"{section_title}\n{bullet_block}"
|
|
1009
|
+
|
|
1010
|
+
summary_clean = summary_content.rstrip()
|
|
1011
|
+
separator = "\n\n" if summary_clean else ""
|
|
1012
|
+
return f"{summary_clean}{separator}{user_section}"
|
|
1013
|
+
|
|
1014
|
+
def _reset_summary_state(self) -> None:
|
|
1015
|
+
self._summary_token_count = 0 # Total tokens in summary messages
|
|
1016
|
+
|
|
1017
|
+
def _calculate_next_summary_threshold(self) -> int:
|
|
1018
|
+
r"""Calculate the next token threshold that should trigger
|
|
1019
|
+
summarization.
|
|
1020
|
+
|
|
1021
|
+
The threshold calculation follows a progressive strategy:
|
|
1022
|
+
- First time: token_limit * (summarize_threshold / 100)
|
|
1023
|
+
- Subsequent times: (limit - summary_token) / 2 + summary_token
|
|
1024
|
+
|
|
1025
|
+
This ensures that as summaries accumulate, the threshold adapts
|
|
1026
|
+
to maintain a reasonable balance between context and summaries.
|
|
1027
|
+
|
|
1028
|
+
Returns:
|
|
1029
|
+
int: The token count threshold for next summarization.
|
|
1030
|
+
"""
|
|
1031
|
+
token_limit = self.model_backend.token_limit
|
|
1032
|
+
summary_token_count = self._summary_token_count
|
|
1033
|
+
|
|
1034
|
+
# First summarization: use the percentage threshold
|
|
1035
|
+
if summary_token_count == 0:
|
|
1036
|
+
threshold = int(token_limit * self.summarize_threshold / 100)
|
|
1037
|
+
else:
|
|
1038
|
+
# Subsequent summarizations: adaptive threshold
|
|
1039
|
+
threshold = int(
|
|
1040
|
+
(token_limit - summary_token_count)
|
|
1041
|
+
* self.summarize_threshold
|
|
1042
|
+
/ 100
|
|
1043
|
+
+ summary_token_count
|
|
1044
|
+
)
|
|
1045
|
+
|
|
1046
|
+
return threshold
|
|
1047
|
+
|
|
1048
|
+
def _update_memory_with_summary(
|
|
1049
|
+
self, summary: str, include_summaries: bool = False
|
|
1050
|
+
) -> None:
|
|
1051
|
+
r"""Update memory with summary result.
|
|
1052
|
+
|
|
1053
|
+
This method handles memory clearing and restoration of summaries based
|
|
1054
|
+
on whether it's a progressive or full compression.
|
|
1055
|
+
"""
|
|
1056
|
+
|
|
1057
|
+
summary_content: str = summary
|
|
1058
|
+
|
|
1059
|
+
existing_summaries = []
|
|
1060
|
+
if not include_summaries:
|
|
1061
|
+
messages, _ = self.memory.get_context()
|
|
1062
|
+
for msg in messages:
|
|
1063
|
+
content = msg.get('content', '')
|
|
1064
|
+
if isinstance(content, str) and content.startswith(
|
|
1065
|
+
'[CONTEXT_SUMMARY]'
|
|
1066
|
+
):
|
|
1067
|
+
existing_summaries.append(msg)
|
|
1068
|
+
|
|
1069
|
+
# Clear memory
|
|
1070
|
+
self.clear_memory()
|
|
1071
|
+
|
|
1072
|
+
# Restore old summaries (for progressive compression)
|
|
1073
|
+
for old_summary in existing_summaries:
|
|
1074
|
+
content = old_summary.get('content', '')
|
|
1075
|
+
if not isinstance(content, str):
|
|
1076
|
+
content = str(content)
|
|
1077
|
+
summary_msg = BaseMessage.make_assistant_message(
|
|
1078
|
+
role_name="assistant", content=content
|
|
1079
|
+
)
|
|
1080
|
+
self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
|
|
1081
|
+
|
|
1082
|
+
# Add new summary
|
|
1083
|
+
new_summary_msg = BaseMessage.make_assistant_message(
|
|
1084
|
+
role_name="assistant", content=summary_content
|
|
1085
|
+
)
|
|
1086
|
+
self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
|
|
1087
|
+
input_message = BaseMessage.make_assistant_message(
|
|
1088
|
+
role_name="assistant",
|
|
1089
|
+
content=(
|
|
1090
|
+
"Please continue the conversation from "
|
|
1091
|
+
"where we left it off without asking the user any further "
|
|
1092
|
+
"questions. Continue with the last task that you were "
|
|
1093
|
+
"asked to work on."
|
|
1094
|
+
),
|
|
1095
|
+
)
|
|
1096
|
+
self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
|
|
1097
|
+
# Update token count
|
|
1098
|
+
try:
|
|
1099
|
+
summary_tokens = (
|
|
1100
|
+
self.model_backend.token_counter.count_tokens_from_messages(
|
|
1101
|
+
[{"role": "assistant", "content": summary_content}]
|
|
1102
|
+
)
|
|
1103
|
+
)
|
|
1104
|
+
|
|
1105
|
+
if include_summaries: # Full compression - reset count
|
|
1106
|
+
self._summary_token_count = summary_tokens
|
|
1107
|
+
logger.info(
|
|
1108
|
+
f"Full compression: Summary with {summary_tokens} tokens. "
|
|
1109
|
+
f"Total summary tokens reset to: {summary_tokens}"
|
|
1110
|
+
)
|
|
1111
|
+
else: # Progressive compression - accumulate
|
|
1112
|
+
self._summary_token_count += summary_tokens
|
|
1113
|
+
logger.info(
|
|
1114
|
+
f"Progressive compression: New summary "
|
|
1115
|
+
f"with {summary_tokens} tokens. "
|
|
1116
|
+
f"Total summary tokens: "
|
|
1117
|
+
f"{self._summary_token_count}"
|
|
1118
|
+
)
|
|
1119
|
+
except Exception as e:
|
|
1120
|
+
logger.warning(f"Failed to count summary tokens: {e}")
|
|
1121
|
+
|
|
765
1122
|
def _get_external_tool_names(self) -> Set[str]:
|
|
766
1123
|
r"""Returns a set of external tool names."""
|
|
767
1124
|
return set(self._external_tool_schemas.keys())
|
|
@@ -823,16 +1180,6 @@ class ChatAgent(BaseAgent):
|
|
|
823
1180
|
) -> None:
|
|
824
1181
|
r"""Updates the agent memory with a new message.
|
|
825
1182
|
|
|
826
|
-
If the single *message* exceeds the model's context window, it will
|
|
827
|
-
be **automatically split into multiple smaller chunks** before being
|
|
828
|
-
written into memory. This prevents later failures in
|
|
829
|
-
`ScoreBasedContextCreator` where an over-sized message cannot fit
|
|
830
|
-
into the available token budget at all.
|
|
831
|
-
|
|
832
|
-
This slicing logic handles both regular text messages (in the
|
|
833
|
-
`content` field) and long tool call results (in the `result` field of
|
|
834
|
-
a `FunctionCallingMessage`).
|
|
835
|
-
|
|
836
1183
|
Args:
|
|
837
1184
|
message (BaseMessage): The new message to add to the stored
|
|
838
1185
|
messages.
|
|
@@ -842,153 +1189,15 @@ class ChatAgent(BaseAgent):
|
|
|
842
1189
|
(default: :obj:`None`)
|
|
843
1190
|
(default: obj:`None`)
|
|
844
1191
|
"""
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
):
|
|
850
|
-
self.memory.write_record(
|
|
851
|
-
MemoryRecord(
|
|
852
|
-
message=message,
|
|
853
|
-
role_at_backend=role,
|
|
854
|
-
timestamp=timestamp,
|
|
855
|
-
agent_id=self.agent_id,
|
|
856
|
-
)
|
|
857
|
-
)
|
|
858
|
-
|
|
859
|
-
base_ts = (
|
|
860
|
-
timestamp
|
|
1192
|
+
record = MemoryRecord(
|
|
1193
|
+
message=message,
|
|
1194
|
+
role_at_backend=role,
|
|
1195
|
+
timestamp=timestamp
|
|
861
1196
|
if timestamp is not None
|
|
862
|
-
else time.time_ns() / 1_000_000_000
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
# 2. Get token handling utilities, fallback if unavailable
|
|
866
|
-
try:
|
|
867
|
-
context_creator = self.memory.get_context_creator()
|
|
868
|
-
token_counter = context_creator.token_counter
|
|
869
|
-
token_limit = context_creator.token_limit
|
|
870
|
-
except AttributeError:
|
|
871
|
-
_write_single_record(message, role, base_ts)
|
|
872
|
-
return
|
|
873
|
-
|
|
874
|
-
# 3. Check if slicing is necessary
|
|
875
|
-
try:
|
|
876
|
-
current_tokens = token_counter.count_tokens_from_messages(
|
|
877
|
-
[message.to_openai_message(role)]
|
|
878
|
-
)
|
|
879
|
-
|
|
880
|
-
with warnings.catch_warnings():
|
|
881
|
-
warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
|
|
882
|
-
_, ctx_tokens = self.memory.get_context()
|
|
883
|
-
|
|
884
|
-
remaining_budget = max(0, token_limit - ctx_tokens)
|
|
885
|
-
|
|
886
|
-
if current_tokens <= remaining_budget:
|
|
887
|
-
_write_single_record(message, role, base_ts)
|
|
888
|
-
return
|
|
889
|
-
except Exception as e:
|
|
890
|
-
logger.warning(
|
|
891
|
-
f"Token calculation failed before chunking, "
|
|
892
|
-
f"writing message as-is. Error: {e}"
|
|
893
|
-
)
|
|
894
|
-
_write_single_record(message, role, base_ts)
|
|
895
|
-
return
|
|
896
|
-
|
|
897
|
-
# 4. Perform slicing
|
|
898
|
-
logger.warning(
|
|
899
|
-
f"Message with {current_tokens} tokens exceeds remaining budget "
|
|
900
|
-
f"of {remaining_budget}. Slicing into smaller chunks."
|
|
1197
|
+
else time.time_ns() / 1_000_000_000, # Nanosecond precision
|
|
1198
|
+
agent_id=self.agent_id,
|
|
901
1199
|
)
|
|
902
|
-
|
|
903
|
-
text_to_chunk: Optional[str] = None
|
|
904
|
-
is_function_result = False
|
|
905
|
-
|
|
906
|
-
if isinstance(message, FunctionCallingMessage) and isinstance(
|
|
907
|
-
message.result, str
|
|
908
|
-
):
|
|
909
|
-
text_to_chunk = message.result
|
|
910
|
-
is_function_result = True
|
|
911
|
-
elif isinstance(message.content, str):
|
|
912
|
-
text_to_chunk = message.content
|
|
913
|
-
|
|
914
|
-
if not text_to_chunk or not text_to_chunk.strip():
|
|
915
|
-
_write_single_record(message, role, base_ts)
|
|
916
|
-
return
|
|
917
|
-
# Encode the entire text to get a list of all token IDs
|
|
918
|
-
try:
|
|
919
|
-
all_token_ids = token_counter.encode(text_to_chunk)
|
|
920
|
-
except Exception as e:
|
|
921
|
-
logger.error(f"Failed to encode text for chunking: {e}")
|
|
922
|
-
_write_single_record(message, role, base_ts) # Fallback
|
|
923
|
-
return
|
|
924
|
-
|
|
925
|
-
if not all_token_ids:
|
|
926
|
-
_write_single_record(message, role, base_ts) # Nothing to chunk
|
|
927
|
-
return
|
|
928
|
-
|
|
929
|
-
# 1. Base chunk size: one-tenth of the smaller of (a) total token
|
|
930
|
-
# limit and (b) current remaining budget. This prevents us from
|
|
931
|
-
# creating chunks that are guaranteed to overflow the
|
|
932
|
-
# immediate context window.
|
|
933
|
-
base_chunk_size = max(1, remaining_budget) // 10
|
|
934
|
-
|
|
935
|
-
# 2. Each chunk gets a textual prefix such as:
|
|
936
|
-
# "[chunk 3/12 of a long message]\n"
|
|
937
|
-
# The prefix itself consumes tokens, so if we do not subtract its
|
|
938
|
-
# length the *total* tokens of the outgoing message (prefix + body)
|
|
939
|
-
# can exceed the intended bound. We estimate the prefix length
|
|
940
|
-
# with a representative example that is safely long enough for the
|
|
941
|
-
# vast majority of cases (three-digit indices).
|
|
942
|
-
sample_prefix = "[chunk 1/1000 of a long message]\n"
|
|
943
|
-
prefix_token_len = len(token_counter.encode(sample_prefix))
|
|
944
|
-
|
|
945
|
-
# 3. The real capacity for the message body is therefore the base
|
|
946
|
-
# chunk size minus the prefix length. Fallback to at least one
|
|
947
|
-
# token to avoid zero or negative sizes.
|
|
948
|
-
chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
|
|
949
|
-
|
|
950
|
-
# 4. Calculate how many chunks we will need with this body size.
|
|
951
|
-
num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
|
|
952
|
-
group_id = str(uuid.uuid4())
|
|
953
|
-
|
|
954
|
-
for i in range(num_chunks):
|
|
955
|
-
start_idx = i * chunk_body_limit
|
|
956
|
-
end_idx = start_idx + chunk_body_limit
|
|
957
|
-
chunk_token_ids = all_token_ids[start_idx:end_idx]
|
|
958
|
-
|
|
959
|
-
chunk_body = token_counter.decode(chunk_token_ids)
|
|
960
|
-
|
|
961
|
-
prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
|
|
962
|
-
new_body = prefix + chunk_body
|
|
963
|
-
|
|
964
|
-
if is_function_result and isinstance(
|
|
965
|
-
message, FunctionCallingMessage
|
|
966
|
-
):
|
|
967
|
-
new_msg: BaseMessage = FunctionCallingMessage(
|
|
968
|
-
role_name=message.role_name,
|
|
969
|
-
role_type=message.role_type,
|
|
970
|
-
meta_dict=message.meta_dict,
|
|
971
|
-
content=message.content,
|
|
972
|
-
func_name=message.func_name,
|
|
973
|
-
args=message.args,
|
|
974
|
-
result=new_body,
|
|
975
|
-
tool_call_id=message.tool_call_id,
|
|
976
|
-
)
|
|
977
|
-
else:
|
|
978
|
-
new_msg = message.create_new_instance(new_body)
|
|
979
|
-
|
|
980
|
-
meta = (new_msg.meta_dict or {}).copy()
|
|
981
|
-
meta.update(
|
|
982
|
-
{
|
|
983
|
-
"chunk_idx": i + 1,
|
|
984
|
-
"chunk_total": num_chunks,
|
|
985
|
-
"chunk_group_id": group_id,
|
|
986
|
-
}
|
|
987
|
-
)
|
|
988
|
-
new_msg.meta_dict = meta
|
|
989
|
-
|
|
990
|
-
# Increment timestamp slightly to maintain order
|
|
991
|
-
_write_single_record(new_msg, role, base_ts + i * 1e-6)
|
|
1200
|
+
self.memory.write_record(record)
|
|
992
1201
|
|
|
993
1202
|
def load_memory(self, memory: AgentMemory) -> None:
|
|
994
1203
|
r"""Load the provided memory into the agent.
|
|
@@ -1042,40 +1251,333 @@ class ChatAgent(BaseAgent):
|
|
|
1042
1251
|
f"Skipping invalid record: malformed message "
|
|
1043
1252
|
f"structure in {record_dict}"
|
|
1044
1253
|
)
|
|
1045
|
-
continue
|
|
1254
|
+
continue
|
|
1255
|
+
|
|
1256
|
+
try:
|
|
1257
|
+
record = MemoryRecord.from_dict(record_dict)
|
|
1258
|
+
self.memory.write_records([record])
|
|
1259
|
+
except Exception as e:
|
|
1260
|
+
logger.warning(
|
|
1261
|
+
f"Error converting record to MemoryRecord: {e}. "
|
|
1262
|
+
f"Record: {record_dict}"
|
|
1263
|
+
)
|
|
1264
|
+
logger.info(f"Memory loaded from {path}")
|
|
1265
|
+
|
|
1266
|
+
def save_memory(self, path: str) -> None:
|
|
1267
|
+
r"""Retrieves the current conversation data from memory and writes it
|
|
1268
|
+
into a JSON file using JsonStorage.
|
|
1269
|
+
|
|
1270
|
+
Args:
|
|
1271
|
+
path (str): Target file path to store JSON data.
|
|
1272
|
+
"""
|
|
1273
|
+
json_store = JsonStorage(Path(path))
|
|
1274
|
+
context_records = self.memory.retrieve()
|
|
1275
|
+
to_save = [cr.memory_record.to_dict() for cr in context_records]
|
|
1276
|
+
json_store.save(to_save)
|
|
1277
|
+
logger.info(f"Memory saved to {path}")
|
|
1278
|
+
|
|
1279
|
+
def summarize(
|
|
1280
|
+
self,
|
|
1281
|
+
filename: Optional[str] = None,
|
|
1282
|
+
summary_prompt: Optional[str] = None,
|
|
1283
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
1284
|
+
working_directory: Optional[Union[str, Path]] = None,
|
|
1285
|
+
include_summaries: bool = False,
|
|
1286
|
+
add_user_messages: bool = True,
|
|
1287
|
+
) -> Dict[str, Any]:
|
|
1288
|
+
r"""Summarize the agent's current conversation context and persist it
|
|
1289
|
+
to a markdown file.
|
|
1290
|
+
|
|
1291
|
+
.. deprecated:: 0.2.80
|
|
1292
|
+
Use :meth:`asummarize` for async/await support and better
|
|
1293
|
+
performance in parallel summarization workflows.
|
|
1294
|
+
|
|
1295
|
+
Args:
|
|
1296
|
+
filename (Optional[str]): The base filename (without extension) to
|
|
1297
|
+
use for the markdown file. Defaults to a timestamped name when
|
|
1298
|
+
not provided.
|
|
1299
|
+
summary_prompt (Optional[str]): Custom prompt for the summarizer.
|
|
1300
|
+
When omitted, a default prompt highlighting key decisions,
|
|
1301
|
+
action items, and open questions is used.
|
|
1302
|
+
response_format (Optional[Type[BaseModel]]): A Pydantic model
|
|
1303
|
+
defining the expected structure of the response. If provided,
|
|
1304
|
+
the summary will be generated as structured output and included
|
|
1305
|
+
in the result.
|
|
1306
|
+
include_summaries (bool): Whether to include previously generated
|
|
1307
|
+
summaries in the content to be summarized. If False (default),
|
|
1308
|
+
only non-summary messages will be summarized. If True, all
|
|
1309
|
+
messages including previous summaries will be summarized
|
|
1310
|
+
(full compression). (default: :obj:`False`)
|
|
1311
|
+
working_directory (Optional[str|Path]): Optional directory to save
|
|
1312
|
+
the markdown summary file. If provided, overrides the default
|
|
1313
|
+
directory used by ContextUtility.
|
|
1314
|
+
add_user_messages (bool): Whether add user messages to summary.
|
|
1315
|
+
(default: :obj:`True`)
|
|
1316
|
+
Returns:
|
|
1317
|
+
Dict[str, Any]: A dictionary containing the summary text, file
|
|
1318
|
+
path, status message, and optionally structured_summary if
|
|
1319
|
+
response_format was provided.
|
|
1320
|
+
|
|
1321
|
+
See Also:
|
|
1322
|
+
:meth:`asummarize`: Async version for non-blocking LLM calls.
|
|
1323
|
+
"""
|
|
1324
|
+
|
|
1325
|
+
warnings.warn(
|
|
1326
|
+
"summarize() is synchronous. Consider using asummarize() "
|
|
1327
|
+
"for async/await support and better performance.",
|
|
1328
|
+
DeprecationWarning,
|
|
1329
|
+
stacklevel=2,
|
|
1330
|
+
)
|
|
1331
|
+
|
|
1332
|
+
result: Dict[str, Any] = {
|
|
1333
|
+
"summary": "",
|
|
1334
|
+
"file_path": None,
|
|
1335
|
+
"status": "",
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
try:
|
|
1339
|
+
# Use external context if set, otherwise create local one
|
|
1340
|
+
if self._context_utility is None:
|
|
1341
|
+
if working_directory is not None:
|
|
1342
|
+
self._context_utility = ContextUtility(
|
|
1343
|
+
working_directory=str(working_directory)
|
|
1344
|
+
)
|
|
1345
|
+
else:
|
|
1346
|
+
self._context_utility = ContextUtility()
|
|
1347
|
+
context_util = self._context_utility
|
|
1348
|
+
|
|
1349
|
+
# Get conversation directly from agent's memory
|
|
1350
|
+
messages, _ = self.memory.get_context()
|
|
1351
|
+
|
|
1352
|
+
if not messages:
|
|
1353
|
+
status_message = (
|
|
1354
|
+
"No conversation context available to summarize."
|
|
1355
|
+
)
|
|
1356
|
+
result["status"] = status_message
|
|
1357
|
+
return result
|
|
1358
|
+
|
|
1359
|
+
# Convert messages to conversation text
|
|
1360
|
+
conversation_lines = []
|
|
1361
|
+
user_messages: List[str] = []
|
|
1362
|
+
for message in messages:
|
|
1363
|
+
role = message.get('role', 'unknown')
|
|
1364
|
+
content = message.get('content', '')
|
|
1365
|
+
|
|
1366
|
+
# Skip summary messages if include_summaries is False
|
|
1367
|
+
if not include_summaries and isinstance(content, str):
|
|
1368
|
+
# Check if this is a summary message by looking for marker
|
|
1369
|
+
if content.startswith('[CONTEXT_SUMMARY]'):
|
|
1370
|
+
continue
|
|
1371
|
+
|
|
1372
|
+
# Handle tool call messages (assistant calling tools)
|
|
1373
|
+
tool_calls = message.get('tool_calls')
|
|
1374
|
+
if tool_calls and isinstance(tool_calls, (list, tuple)):
|
|
1375
|
+
for tool_call in tool_calls:
|
|
1376
|
+
# Handle both dict and object formats
|
|
1377
|
+
if isinstance(tool_call, dict):
|
|
1378
|
+
func_name = tool_call.get('function', {}).get(
|
|
1379
|
+
'name', 'unknown_tool'
|
|
1380
|
+
)
|
|
1381
|
+
func_args_str = tool_call.get('function', {}).get(
|
|
1382
|
+
'arguments', '{}'
|
|
1383
|
+
)
|
|
1384
|
+
else:
|
|
1385
|
+
# Handle object format (Pydantic or similar)
|
|
1386
|
+
func_name = getattr(
|
|
1387
|
+
getattr(tool_call, 'function', None),
|
|
1388
|
+
'name',
|
|
1389
|
+
'unknown_tool',
|
|
1390
|
+
)
|
|
1391
|
+
func_args_str = getattr(
|
|
1392
|
+
getattr(tool_call, 'function', None),
|
|
1393
|
+
'arguments',
|
|
1394
|
+
'{}',
|
|
1395
|
+
)
|
|
1396
|
+
|
|
1397
|
+
# Parse and format arguments for readability
|
|
1398
|
+
try:
|
|
1399
|
+
import json
|
|
1400
|
+
|
|
1401
|
+
args_dict = json.loads(func_args_str)
|
|
1402
|
+
args_formatted = ', '.join(
|
|
1403
|
+
f"{k}={v}" for k, v in args_dict.items()
|
|
1404
|
+
)
|
|
1405
|
+
except (json.JSONDecodeError, ValueError, TypeError):
|
|
1406
|
+
args_formatted = func_args_str
|
|
1407
|
+
|
|
1408
|
+
conversation_lines.append(
|
|
1409
|
+
f"[TOOL CALL] {func_name}({args_formatted})"
|
|
1410
|
+
)
|
|
1411
|
+
|
|
1412
|
+
# Handle tool response messages
|
|
1413
|
+
elif role == 'tool':
|
|
1414
|
+
tool_name = message.get('name', 'unknown_tool')
|
|
1415
|
+
if not content:
|
|
1416
|
+
content = str(message.get('content', ''))
|
|
1417
|
+
conversation_lines.append(
|
|
1418
|
+
f"[TOOL RESULT] {tool_name} → {content}"
|
|
1419
|
+
)
|
|
1420
|
+
|
|
1421
|
+
# Handle regular content messages (user/assistant/system)
|
|
1422
|
+
elif content:
|
|
1423
|
+
content = str(content)
|
|
1424
|
+
if role == 'user':
|
|
1425
|
+
user_messages.append(content)
|
|
1426
|
+
conversation_lines.append(f"{role}: {content}")
|
|
1427
|
+
|
|
1428
|
+
conversation_text = "\n".join(conversation_lines).strip()
|
|
1429
|
+
|
|
1430
|
+
if not conversation_text:
|
|
1431
|
+
status_message = (
|
|
1432
|
+
"Conversation context is empty; skipping summary."
|
|
1433
|
+
)
|
|
1434
|
+
result["status"] = status_message
|
|
1435
|
+
return result
|
|
1436
|
+
|
|
1437
|
+
if self._context_summary_agent is None:
|
|
1438
|
+
self._context_summary_agent = ChatAgent(
|
|
1439
|
+
system_message=(
|
|
1440
|
+
"You are a helpful assistant that summarizes "
|
|
1441
|
+
"conversations"
|
|
1442
|
+
),
|
|
1443
|
+
model=self.model_backend,
|
|
1444
|
+
agent_id=f"{self.agent_id}_context_summarizer",
|
|
1445
|
+
)
|
|
1446
|
+
else:
|
|
1447
|
+
self._context_summary_agent.reset()
|
|
1448
|
+
|
|
1449
|
+
if summary_prompt:
|
|
1450
|
+
prompt_text = (
|
|
1451
|
+
f"{summary_prompt.rstrip()}\n\n"
|
|
1452
|
+
f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
|
|
1453
|
+
f"{conversation_text}"
|
|
1454
|
+
)
|
|
1455
|
+
else:
|
|
1456
|
+
prompt_text = build_default_summary_prompt(conversation_text)
|
|
1457
|
+
|
|
1458
|
+
try:
|
|
1459
|
+
# Use structured output if response_format is provided
|
|
1460
|
+
if response_format:
|
|
1461
|
+
response = self._context_summary_agent.step(
|
|
1462
|
+
prompt_text, response_format=response_format
|
|
1463
|
+
)
|
|
1464
|
+
else:
|
|
1465
|
+
response = self._context_summary_agent.step(prompt_text)
|
|
1466
|
+
except Exception as step_exc:
|
|
1467
|
+
error_message = (
|
|
1468
|
+
f"Failed to generate summary using model: {step_exc}"
|
|
1469
|
+
)
|
|
1470
|
+
logger.error(error_message)
|
|
1471
|
+
result["status"] = error_message
|
|
1472
|
+
return result
|
|
1473
|
+
|
|
1474
|
+
if not response.msgs:
|
|
1475
|
+
status_message = (
|
|
1476
|
+
"Failed to generate summary from model response."
|
|
1477
|
+
)
|
|
1478
|
+
result["status"] = status_message
|
|
1479
|
+
return result
|
|
1480
|
+
|
|
1481
|
+
summary_content = response.msgs[-1].content.strip()
|
|
1482
|
+
if not summary_content:
|
|
1483
|
+
status_message = "Generated summary is empty."
|
|
1484
|
+
result["status"] = status_message
|
|
1485
|
+
return result
|
|
1486
|
+
|
|
1487
|
+
# handle structured output if response_format was provided
|
|
1488
|
+
structured_output = None
|
|
1489
|
+
if response_format and response.msgs[-1].parsed:
|
|
1490
|
+
structured_output = response.msgs[-1].parsed
|
|
1491
|
+
|
|
1492
|
+
# determine filename: use provided filename, or extract from
|
|
1493
|
+
# structured output, or generate timestamp
|
|
1494
|
+
if filename:
|
|
1495
|
+
base_filename = filename
|
|
1496
|
+
elif structured_output and hasattr(
|
|
1497
|
+
structured_output, 'task_title'
|
|
1498
|
+
):
|
|
1499
|
+
# use task_title from structured output for filename
|
|
1500
|
+
task_title = structured_output.task_title
|
|
1501
|
+
clean_title = ContextUtility.sanitize_workflow_filename(
|
|
1502
|
+
task_title
|
|
1503
|
+
)
|
|
1504
|
+
base_filename = (
|
|
1505
|
+
f"{clean_title}_workflow" if clean_title else "workflow"
|
|
1506
|
+
)
|
|
1507
|
+
else:
|
|
1508
|
+
base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
|
|
1509
|
+
|
|
1510
|
+
base_filename = Path(base_filename).with_suffix("").name
|
|
1511
|
+
|
|
1512
|
+
metadata = context_util.get_session_metadata()
|
|
1513
|
+
metadata.update(
|
|
1514
|
+
{
|
|
1515
|
+
"agent_id": self.agent_id,
|
|
1516
|
+
"message_count": len(messages),
|
|
1517
|
+
}
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
# convert structured output to custom markdown if present
|
|
1521
|
+
if structured_output:
|
|
1522
|
+
# convert structured output to custom markdown
|
|
1523
|
+
summary_content = context_util.structured_output_to_markdown(
|
|
1524
|
+
structured_data=structured_output, metadata=metadata
|
|
1525
|
+
)
|
|
1526
|
+
if add_user_messages:
|
|
1527
|
+
summary_content = self._append_user_messages_section(
|
|
1528
|
+
summary_content, user_messages
|
|
1529
|
+
)
|
|
1046
1530
|
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1531
|
+
# Save the markdown (either custom structured or default)
|
|
1532
|
+
save_status = context_util.save_markdown_file(
|
|
1533
|
+
base_filename,
|
|
1534
|
+
summary_content,
|
|
1535
|
+
title="Conversation Summary"
|
|
1536
|
+
if not structured_output
|
|
1537
|
+
else None,
|
|
1538
|
+
metadata=metadata if not structured_output else None,
|
|
1539
|
+
)
|
|
1056
1540
|
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1541
|
+
file_path = (
|
|
1542
|
+
context_util.get_working_directory() / f"{base_filename}.md"
|
|
1543
|
+
)
|
|
1544
|
+
summary_content = (
|
|
1545
|
+
f"[CONTEXT_SUMMARY] The following is a summary of our "
|
|
1546
|
+
f"conversation from a previous session: {summary_content}"
|
|
1547
|
+
)
|
|
1548
|
+
# Prepare result dictionary
|
|
1549
|
+
result_dict = {
|
|
1550
|
+
"summary": summary_content,
|
|
1551
|
+
"file_path": str(file_path),
|
|
1552
|
+
"status": save_status,
|
|
1553
|
+
"structured_summary": structured_output,
|
|
1554
|
+
}
|
|
1060
1555
|
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
json_store = JsonStorage(Path(path))
|
|
1065
|
-
context_records = self.memory.retrieve()
|
|
1066
|
-
to_save = [cr.memory_record.to_dict() for cr in context_records]
|
|
1067
|
-
json_store.save(to_save)
|
|
1068
|
-
logger.info(f"Memory saved to {path}")
|
|
1556
|
+
result.update(result_dict)
|
|
1557
|
+
logger.info("Conversation summary saved to %s", file_path)
|
|
1558
|
+
return result
|
|
1069
1559
|
|
|
1070
|
-
|
|
1560
|
+
except Exception as exc:
|
|
1561
|
+
error_message = f"Failed to summarize conversation context: {exc}"
|
|
1562
|
+
logger.error(error_message)
|
|
1563
|
+
result["status"] = error_message
|
|
1564
|
+
return result
|
|
1565
|
+
|
|
1566
|
+
async def asummarize(
|
|
1071
1567
|
self,
|
|
1072
1568
|
filename: Optional[str] = None,
|
|
1073
1569
|
summary_prompt: Optional[str] = None,
|
|
1074
1570
|
response_format: Optional[Type[BaseModel]] = None,
|
|
1075
1571
|
working_directory: Optional[Union[str, Path]] = None,
|
|
1572
|
+
include_summaries: bool = False,
|
|
1573
|
+
add_user_messages: bool = True,
|
|
1076
1574
|
) -> Dict[str, Any]:
|
|
1077
|
-
r"""
|
|
1078
|
-
to a markdown file.
|
|
1575
|
+
r"""Asynchronously summarize the agent's current conversation context
|
|
1576
|
+
and persist it to a markdown file.
|
|
1577
|
+
|
|
1578
|
+
This is the async version of summarize() that uses astep() for
|
|
1579
|
+
non-blocking LLM calls, enabling parallel summarization of multiple
|
|
1580
|
+
agents.
|
|
1079
1581
|
|
|
1080
1582
|
Args:
|
|
1081
1583
|
filename (Optional[str]): The base filename (without extension) to
|
|
@@ -1091,7 +1593,13 @@ class ChatAgent(BaseAgent):
|
|
|
1091
1593
|
working_directory (Optional[str|Path]): Optional directory to save
|
|
1092
1594
|
the markdown summary file. If provided, overrides the default
|
|
1093
1595
|
directory used by ContextUtility.
|
|
1094
|
-
|
|
1596
|
+
include_summaries (bool): Whether to include previously generated
|
|
1597
|
+
summaries in the content to be summarized. If False (default),
|
|
1598
|
+
only non-summary messages will be summarized. If True, all
|
|
1599
|
+
messages including previous summaries will be summarized
|
|
1600
|
+
(full compression). (default: :obj:`False`)
|
|
1601
|
+
add_user_messages (bool): Whether add user messages to summary.
|
|
1602
|
+
(default: :obj:`True`)
|
|
1095
1603
|
Returns:
|
|
1096
1604
|
Dict[str, Any]: A dictionary containing the summary text, file
|
|
1097
1605
|
path, status message, and optionally structured_summary if
|
|
@@ -1127,10 +1635,17 @@ class ChatAgent(BaseAgent):
|
|
|
1127
1635
|
|
|
1128
1636
|
# Convert messages to conversation text
|
|
1129
1637
|
conversation_lines = []
|
|
1638
|
+
user_messages: List[str] = []
|
|
1130
1639
|
for message in messages:
|
|
1131
1640
|
role = message.get('role', 'unknown')
|
|
1132
1641
|
content = message.get('content', '')
|
|
1133
1642
|
|
|
1643
|
+
# Skip summary messages if include_summaries is False
|
|
1644
|
+
if not include_summaries and isinstance(content, str):
|
|
1645
|
+
# Check if this is a summary message by looking for marker
|
|
1646
|
+
if content.startswith('[CONTEXT_SUMMARY]'):
|
|
1647
|
+
continue
|
|
1648
|
+
|
|
1134
1649
|
# Handle tool call messages (assistant calling tools)
|
|
1135
1650
|
tool_calls = message.get('tool_calls')
|
|
1136
1651
|
if tool_calls and isinstance(tool_calls, (list, tuple)):
|
|
@@ -1182,6 +1697,9 @@ class ChatAgent(BaseAgent):
|
|
|
1182
1697
|
|
|
1183
1698
|
# Handle regular content messages (user/assistant/system)
|
|
1184
1699
|
elif content:
|
|
1700
|
+
content = str(content)
|
|
1701
|
+
if role == 'user':
|
|
1702
|
+
user_messages.append(content)
|
|
1185
1703
|
conversation_lines.append(f"{role}: {content}")
|
|
1186
1704
|
|
|
1187
1705
|
conversation_text = "\n".join(conversation_lines).strip()
|
|
@@ -1212,20 +1730,25 @@ class ChatAgent(BaseAgent):
|
|
|
1212
1730
|
f"{conversation_text}"
|
|
1213
1731
|
)
|
|
1214
1732
|
else:
|
|
1215
|
-
prompt_text = (
|
|
1216
|
-
"Summarize the context information in concise markdown "
|
|
1217
|
-
"bullet points highlighting key decisions, action items.\n"
|
|
1218
|
-
f"Context information:\n{conversation_text}"
|
|
1219
|
-
)
|
|
1733
|
+
prompt_text = build_default_summary_prompt(conversation_text)
|
|
1220
1734
|
|
|
1221
1735
|
try:
|
|
1222
1736
|
# Use structured output if response_format is provided
|
|
1223
1737
|
if response_format:
|
|
1224
|
-
response = self._context_summary_agent.
|
|
1738
|
+
response = await self._context_summary_agent.astep(
|
|
1225
1739
|
prompt_text, response_format=response_format
|
|
1226
1740
|
)
|
|
1227
1741
|
else:
|
|
1228
|
-
response = self._context_summary_agent.
|
|
1742
|
+
response = await self._context_summary_agent.astep(
|
|
1743
|
+
prompt_text
|
|
1744
|
+
)
|
|
1745
|
+
|
|
1746
|
+
# Handle streaming response
|
|
1747
|
+
if isinstance(response, AsyncStreamingChatAgentResponse):
|
|
1748
|
+
# Collect final response
|
|
1749
|
+
final_response = await response
|
|
1750
|
+
response = final_response
|
|
1751
|
+
|
|
1229
1752
|
except Exception as step_exc:
|
|
1230
1753
|
error_message = (
|
|
1231
1754
|
f"Failed to generate summary using model: {step_exc}"
|
|
@@ -1247,11 +1770,29 @@ class ChatAgent(BaseAgent):
|
|
|
1247
1770
|
result["status"] = status_message
|
|
1248
1771
|
return result
|
|
1249
1772
|
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1773
|
+
# handle structured output if response_format was provided
|
|
1774
|
+
structured_output = None
|
|
1775
|
+
if response_format and response.msgs[-1].parsed:
|
|
1776
|
+
structured_output = response.msgs[-1].parsed
|
|
1777
|
+
|
|
1778
|
+
# determine filename: use provided filename, or extract from
|
|
1779
|
+
# structured output, or generate timestamp
|
|
1780
|
+
if filename:
|
|
1781
|
+
base_filename = filename
|
|
1782
|
+
elif structured_output and hasattr(
|
|
1783
|
+
structured_output, 'task_title'
|
|
1784
|
+
):
|
|
1785
|
+
# use task_title from structured output for filename
|
|
1786
|
+
task_title = structured_output.task_title
|
|
1787
|
+
clean_title = ContextUtility.sanitize_workflow_filename(
|
|
1788
|
+
task_title
|
|
1789
|
+
)
|
|
1790
|
+
base_filename = (
|
|
1791
|
+
f"{clean_title}_workflow" if clean_title else "workflow"
|
|
1792
|
+
)
|
|
1793
|
+
else:
|
|
1794
|
+
base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
|
|
1795
|
+
|
|
1255
1796
|
base_filename = Path(base_filename).with_suffix("").name
|
|
1256
1797
|
|
|
1257
1798
|
metadata = context_util.get_session_metadata()
|
|
@@ -1262,14 +1803,16 @@ class ChatAgent(BaseAgent):
|
|
|
1262
1803
|
}
|
|
1263
1804
|
)
|
|
1264
1805
|
|
|
1265
|
-
#
|
|
1266
|
-
structured_output
|
|
1267
|
-
|
|
1268
|
-
structured_output = response.msgs[-1].parsed
|
|
1269
|
-
# Convert structured output to custom markdown
|
|
1806
|
+
# convert structured output to custom markdown if present
|
|
1807
|
+
if structured_output:
|
|
1808
|
+
# convert structured output to custom markdown
|
|
1270
1809
|
summary_content = context_util.structured_output_to_markdown(
|
|
1271
1810
|
structured_data=structured_output, metadata=metadata
|
|
1272
1811
|
)
|
|
1812
|
+
if add_user_messages:
|
|
1813
|
+
summary_content = self._append_user_messages_section(
|
|
1814
|
+
summary_content, user_messages
|
|
1815
|
+
)
|
|
1273
1816
|
|
|
1274
1817
|
# Save the markdown (either custom structured or default)
|
|
1275
1818
|
save_status = context_util.save_markdown_file(
|
|
@@ -1285,6 +1828,11 @@ class ChatAgent(BaseAgent):
|
|
|
1285
1828
|
context_util.get_working_directory() / f"{base_filename}.md"
|
|
1286
1829
|
)
|
|
1287
1830
|
|
|
1831
|
+
summary_content = (
|
|
1832
|
+
f"[CONTEXT_SUMMARY] The following is a summary of our "
|
|
1833
|
+
f"conversation from a previous session: {summary_content}"
|
|
1834
|
+
)
|
|
1835
|
+
|
|
1288
1836
|
# Prepare result dictionary
|
|
1289
1837
|
result_dict = {
|
|
1290
1838
|
"summary": summary_content,
|
|
@@ -1312,7 +1860,14 @@ class ChatAgent(BaseAgent):
|
|
|
1312
1860
|
self.memory.clear()
|
|
1313
1861
|
|
|
1314
1862
|
if self.system_message is not None:
|
|
1315
|
-
self.
|
|
1863
|
+
self.memory.write_record(
|
|
1864
|
+
MemoryRecord(
|
|
1865
|
+
message=self.system_message,
|
|
1866
|
+
role_at_backend=OpenAIBackendRole.SYSTEM,
|
|
1867
|
+
timestamp=time.time_ns() / 1_000_000_000,
|
|
1868
|
+
agent_id=self.agent_id,
|
|
1869
|
+
)
|
|
1870
|
+
)
|
|
1316
1871
|
|
|
1317
1872
|
def _generate_system_message_for_output_language(
|
|
1318
1873
|
self,
|
|
@@ -1337,26 +1892,70 @@ class ChatAgent(BaseAgent):
|
|
|
1337
1892
|
content = self._original_system_message.content + language_prompt
|
|
1338
1893
|
return self._original_system_message.create_new_instance(content)
|
|
1339
1894
|
else:
|
|
1340
|
-
return BaseMessage.
|
|
1341
|
-
role_name="Assistant",
|
|
1342
|
-
content=language_prompt,
|
|
1343
|
-
)
|
|
1895
|
+
return BaseMessage.make_system_message(language_prompt)
|
|
1344
1896
|
|
|
1345
1897
|
def init_messages(self) -> None:
|
|
1346
1898
|
r"""Initializes the stored messages list with the current system
|
|
1347
1899
|
message.
|
|
1348
1900
|
"""
|
|
1349
|
-
self.
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1901
|
+
self._reset_summary_state()
|
|
1902
|
+
self.clear_memory()
|
|
1903
|
+
|
|
1904
|
+
def update_system_message(
|
|
1905
|
+
self,
|
|
1906
|
+
system_message: Union[BaseMessage, str],
|
|
1907
|
+
reset_memory: bool = True,
|
|
1908
|
+
) -> None:
|
|
1909
|
+
r"""Update the system message.
|
|
1910
|
+
It will reset conversation with new system message.
|
|
1911
|
+
|
|
1912
|
+
Args:
|
|
1913
|
+
system_message (Union[BaseMessage, str]): The new system message.
|
|
1914
|
+
Can be either a BaseMessage object or a string.
|
|
1915
|
+
If a string is provided, it will be converted
|
|
1916
|
+
into a BaseMessage object.
|
|
1917
|
+
reset_memory (bool):
|
|
1918
|
+
Whether to reinitialize conversation messages after updating
|
|
1919
|
+
the system message. Defaults to True.
|
|
1920
|
+
"""
|
|
1921
|
+
if system_message is None:
|
|
1922
|
+
raise ValueError("system_message is required and cannot be None. ")
|
|
1923
|
+
self._original_system_message = (
|
|
1924
|
+
BaseMessage.make_system_message(system_message)
|
|
1925
|
+
if isinstance(system_message, str)
|
|
1926
|
+
else system_message
|
|
1927
|
+
)
|
|
1928
|
+
self._system_message = (
|
|
1929
|
+
self._generate_system_message_for_output_language()
|
|
1930
|
+
)
|
|
1931
|
+
if reset_memory:
|
|
1932
|
+
self.init_messages()
|
|
1933
|
+
|
|
1934
|
+
def append_to_system_message(
|
|
1935
|
+
self, content: str, reset_memory: bool = True
|
|
1936
|
+
) -> None:
|
|
1937
|
+
"""Append additional context to existing system message.
|
|
1938
|
+
|
|
1939
|
+
Args:
|
|
1940
|
+
content (str): The additional system message.
|
|
1941
|
+
reset_memory (bool):
|
|
1942
|
+
Whether to reinitialize conversation messages after appending
|
|
1943
|
+
additional context. Defaults to True.
|
|
1944
|
+
"""
|
|
1945
|
+
original_content = (
|
|
1946
|
+
self._original_system_message.content
|
|
1947
|
+
if self._original_system_message
|
|
1948
|
+
else ""
|
|
1949
|
+
)
|
|
1950
|
+
new_system_message = original_content + '\n' + content
|
|
1951
|
+
self._original_system_message = BaseMessage.make_system_message(
|
|
1952
|
+
new_system_message
|
|
1953
|
+
)
|
|
1954
|
+
self._system_message = (
|
|
1955
|
+
self._generate_system_message_for_output_language()
|
|
1956
|
+
)
|
|
1957
|
+
if reset_memory:
|
|
1958
|
+
self.init_messages()
|
|
1360
1959
|
|
|
1361
1960
|
def reset_to_original_system_message(self) -> None:
|
|
1362
1961
|
r"""Reset system message to original, removing any appended context.
|
|
@@ -1780,22 +2379,122 @@ class ChatAgent(BaseAgent):
|
|
|
1780
2379
|
|
|
1781
2380
|
try:
|
|
1782
2381
|
openai_messages, num_tokens = self.memory.get_context()
|
|
2382
|
+
if self.summarize_threshold is not None:
|
|
2383
|
+
threshold = self._calculate_next_summary_threshold()
|
|
2384
|
+
summary_token_count = self._summary_token_count
|
|
2385
|
+
token_limit = self.model_backend.token_limit
|
|
2386
|
+
|
|
2387
|
+
if num_tokens <= token_limit:
|
|
2388
|
+
if (
|
|
2389
|
+
summary_token_count
|
|
2390
|
+
> token_limit * self.summary_window_ratio
|
|
2391
|
+
):
|
|
2392
|
+
logger.info(
|
|
2393
|
+
f"Summary tokens ({summary_token_count}) "
|
|
2394
|
+
f"exceed limit, full compression."
|
|
2395
|
+
)
|
|
2396
|
+
# Summarize everything (including summaries)
|
|
2397
|
+
summary = self.summarize(include_summaries=True)
|
|
2398
|
+
self._update_memory_with_summary(
|
|
2399
|
+
summary.get("summary", ""),
|
|
2400
|
+
include_summaries=True,
|
|
2401
|
+
)
|
|
2402
|
+
elif num_tokens > threshold:
|
|
2403
|
+
logger.info(
|
|
2404
|
+
f"Token count ({num_tokens}) exceed threshold "
|
|
2405
|
+
f"({threshold}). Triggering summarization."
|
|
2406
|
+
)
|
|
2407
|
+
# Only summarize non-summary content
|
|
2408
|
+
summary = self.summarize(include_summaries=False)
|
|
2409
|
+
self._update_memory_with_summary(
|
|
2410
|
+
summary.get("summary", ""),
|
|
2411
|
+
include_summaries=False,
|
|
2412
|
+
)
|
|
1783
2413
|
accumulated_context_tokens += num_tokens
|
|
1784
2414
|
except RuntimeError as e:
|
|
1785
2415
|
return self._step_terminate(
|
|
1786
2416
|
e.args[1], tool_call_records, "max_tokens_exceeded"
|
|
1787
2417
|
)
|
|
1788
|
-
# Get response from model backend
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
2418
|
+
# Get response from model backend with token limit error handling
|
|
2419
|
+
try:
|
|
2420
|
+
response = self._get_model_response(
|
|
2421
|
+
openai_messages,
|
|
2422
|
+
num_tokens=num_tokens,
|
|
2423
|
+
current_iteration=iteration_count,
|
|
2424
|
+
response_format=response_format,
|
|
2425
|
+
tool_schemas=[]
|
|
2426
|
+
if disable_tools
|
|
2427
|
+
else self._get_full_tool_schemas(),
|
|
2428
|
+
prev_num_openai_messages=prev_num_openai_messages,
|
|
2429
|
+
)
|
|
2430
|
+
except Exception as exc:
|
|
2431
|
+
logger.exception("Model error: %s", exc)
|
|
2432
|
+
|
|
2433
|
+
if self._is_token_limit_error(exc):
|
|
2434
|
+
tool_signature = self._last_tool_call_signature
|
|
2435
|
+
if (
|
|
2436
|
+
tool_signature is not None
|
|
2437
|
+
and tool_signature
|
|
2438
|
+
== self._last_token_limit_tool_signature
|
|
2439
|
+
):
|
|
2440
|
+
description = self._describe_tool_call(
|
|
2441
|
+
self._last_tool_call_record
|
|
2442
|
+
)
|
|
2443
|
+
repeated_msg = (
|
|
2444
|
+
"Context exceeded again by the same tool call."
|
|
2445
|
+
)
|
|
2446
|
+
if description:
|
|
2447
|
+
repeated_msg += f" {description}"
|
|
2448
|
+
raise RuntimeError(repeated_msg) from exc
|
|
2449
|
+
|
|
2450
|
+
user_message_count = sum(
|
|
2451
|
+
1
|
|
2452
|
+
for msg in openai_messages
|
|
2453
|
+
if getattr(msg, "role", None) == "user"
|
|
2454
|
+
)
|
|
2455
|
+
if (
|
|
2456
|
+
user_message_count == 1
|
|
2457
|
+
and getattr(openai_messages[-1], "role", None)
|
|
2458
|
+
== "user"
|
|
2459
|
+
):
|
|
2460
|
+
raise RuntimeError(
|
|
2461
|
+
"The provided user input alone exceeds the "
|
|
2462
|
+
"context window. Please shorten the input."
|
|
2463
|
+
) from exc
|
|
2464
|
+
|
|
2465
|
+
logger.warning(
|
|
2466
|
+
"Token limit exceeded error detected. "
|
|
2467
|
+
"Summarizing context."
|
|
2468
|
+
)
|
|
2469
|
+
|
|
2470
|
+
recent_records: List[ContextRecord]
|
|
2471
|
+
try:
|
|
2472
|
+
recent_records = self.memory.retrieve()
|
|
2473
|
+
except Exception: # pragma: no cover - defensive guard
|
|
2474
|
+
recent_records = []
|
|
2475
|
+
|
|
2476
|
+
indices_to_remove = (
|
|
2477
|
+
self._find_indices_to_remove_for_last_tool_pair(
|
|
2478
|
+
recent_records
|
|
2479
|
+
)
|
|
2480
|
+
)
|
|
2481
|
+
self.memory.remove_records_by_indices(indices_to_remove)
|
|
2482
|
+
|
|
2483
|
+
summary = self.summarize(include_summaries=False)
|
|
2484
|
+
tool_notice = self._format_tool_limit_notice()
|
|
2485
|
+
summary_messages = summary.get("summary", "")
|
|
2486
|
+
|
|
2487
|
+
if tool_notice:
|
|
2488
|
+
summary_messages += "\n\n" + tool_notice
|
|
2489
|
+
|
|
2490
|
+
self._update_memory_with_summary(
|
|
2491
|
+
summary_messages, include_summaries=False
|
|
2492
|
+
)
|
|
2493
|
+
self._last_token_limit_tool_signature = tool_signature
|
|
2494
|
+
return self._step_impl(input_message, response_format)
|
|
2495
|
+
|
|
2496
|
+
raise
|
|
2497
|
+
|
|
1799
2498
|
prev_num_openai_messages = len(openai_messages)
|
|
1800
2499
|
iteration_count += 1
|
|
1801
2500
|
|
|
@@ -1990,6 +2689,7 @@ class ChatAgent(BaseAgent):
|
|
|
1990
2689
|
step_token_usage = self._create_token_usage_tracker()
|
|
1991
2690
|
iteration_count: int = 0
|
|
1992
2691
|
prev_num_openai_messages: int = 0
|
|
2692
|
+
|
|
1993
2693
|
while True:
|
|
1994
2694
|
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1995
2695
|
if isinstance(self.pause_event, asyncio.Event):
|
|
@@ -2000,21 +2700,128 @@ class ChatAgent(BaseAgent):
|
|
|
2000
2700
|
await loop.run_in_executor(None, self.pause_event.wait)
|
|
2001
2701
|
try:
|
|
2002
2702
|
openai_messages, num_tokens = self.memory.get_context()
|
|
2703
|
+
if self.summarize_threshold is not None:
|
|
2704
|
+
threshold = self._calculate_next_summary_threshold()
|
|
2705
|
+
summary_token_count = self._summary_token_count
|
|
2706
|
+
token_limit = self.model_backend.token_limit
|
|
2707
|
+
|
|
2708
|
+
if num_tokens <= token_limit:
|
|
2709
|
+
if (
|
|
2710
|
+
summary_token_count
|
|
2711
|
+
> token_limit * self.summary_window_ratio
|
|
2712
|
+
):
|
|
2713
|
+
logger.info(
|
|
2714
|
+
f"Summary tokens ({summary_token_count}) "
|
|
2715
|
+
f"exceed limit, full compression."
|
|
2716
|
+
)
|
|
2717
|
+
# Summarize everything (including summaries)
|
|
2718
|
+
summary = await self.asummarize(
|
|
2719
|
+
include_summaries=True
|
|
2720
|
+
)
|
|
2721
|
+
self._update_memory_with_summary(
|
|
2722
|
+
summary.get("summary", ""),
|
|
2723
|
+
include_summaries=True,
|
|
2724
|
+
)
|
|
2725
|
+
elif num_tokens > threshold:
|
|
2726
|
+
logger.info(
|
|
2727
|
+
f"Token count ({num_tokens}) exceed threshold "
|
|
2728
|
+
"({threshold}). Triggering summarization."
|
|
2729
|
+
)
|
|
2730
|
+
# Only summarize non-summary content
|
|
2731
|
+
summary = await self.asummarize(
|
|
2732
|
+
include_summaries=False
|
|
2733
|
+
)
|
|
2734
|
+
self._update_memory_with_summary(
|
|
2735
|
+
summary.get("summary", ""),
|
|
2736
|
+
include_summaries=False,
|
|
2737
|
+
)
|
|
2003
2738
|
accumulated_context_tokens += num_tokens
|
|
2004
2739
|
except RuntimeError as e:
|
|
2005
2740
|
return self._step_terminate(
|
|
2006
2741
|
e.args[1], tool_call_records, "max_tokens_exceeded"
|
|
2007
2742
|
)
|
|
2008
|
-
response
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2743
|
+
# Get response from model backend with token limit error handling
|
|
2744
|
+
try:
|
|
2745
|
+
response = await self._aget_model_response(
|
|
2746
|
+
openai_messages,
|
|
2747
|
+
num_tokens=num_tokens,
|
|
2748
|
+
current_iteration=iteration_count,
|
|
2749
|
+
response_format=response_format,
|
|
2750
|
+
tool_schemas=[]
|
|
2751
|
+
if disable_tools
|
|
2752
|
+
else self._get_full_tool_schemas(),
|
|
2753
|
+
prev_num_openai_messages=prev_num_openai_messages,
|
|
2754
|
+
)
|
|
2755
|
+
except Exception as exc:
|
|
2756
|
+
logger.exception("Model error: %s", exc)
|
|
2757
|
+
|
|
2758
|
+
if self._is_token_limit_error(exc):
|
|
2759
|
+
tool_signature = self._last_tool_call_signature
|
|
2760
|
+
if (
|
|
2761
|
+
tool_signature is not None
|
|
2762
|
+
and tool_signature
|
|
2763
|
+
== self._last_token_limit_tool_signature
|
|
2764
|
+
):
|
|
2765
|
+
description = self._describe_tool_call(
|
|
2766
|
+
self._last_tool_call_record
|
|
2767
|
+
)
|
|
2768
|
+
repeated_msg = (
|
|
2769
|
+
"Context exceeded again by the same tool call."
|
|
2770
|
+
)
|
|
2771
|
+
if description:
|
|
2772
|
+
repeated_msg += f" {description}"
|
|
2773
|
+
raise RuntimeError(repeated_msg) from exc
|
|
2774
|
+
|
|
2775
|
+
user_message_count = sum(
|
|
2776
|
+
1
|
|
2777
|
+
for msg in openai_messages
|
|
2778
|
+
if getattr(msg, "role", None) == "user"
|
|
2779
|
+
)
|
|
2780
|
+
if (
|
|
2781
|
+
user_message_count == 1
|
|
2782
|
+
and getattr(openai_messages[-1], "role", None)
|
|
2783
|
+
== "user"
|
|
2784
|
+
):
|
|
2785
|
+
raise RuntimeError(
|
|
2786
|
+
"The provided user input alone exceeds the"
|
|
2787
|
+
"context window. Please shorten the input."
|
|
2788
|
+
) from exc
|
|
2789
|
+
|
|
2790
|
+
logger.warning(
|
|
2791
|
+
"Token limit exceeded error detected. "
|
|
2792
|
+
"Summarizing context."
|
|
2793
|
+
)
|
|
2794
|
+
|
|
2795
|
+
recent_records: List[ContextRecord]
|
|
2796
|
+
try:
|
|
2797
|
+
recent_records = self.memory.retrieve()
|
|
2798
|
+
except Exception: # pragma: no cover - defensive guard
|
|
2799
|
+
recent_records = []
|
|
2800
|
+
|
|
2801
|
+
indices_to_remove = (
|
|
2802
|
+
self._find_indices_to_remove_for_last_tool_pair(
|
|
2803
|
+
recent_records
|
|
2804
|
+
)
|
|
2805
|
+
)
|
|
2806
|
+
self.memory.remove_records_by_indices(indices_to_remove)
|
|
2807
|
+
|
|
2808
|
+
summary = await self.asummarize()
|
|
2809
|
+
|
|
2810
|
+
tool_notice = self._format_tool_limit_notice()
|
|
2811
|
+
summary_messages = summary.get("summary", "")
|
|
2812
|
+
|
|
2813
|
+
if tool_notice:
|
|
2814
|
+
summary_messages += "\n\n" + tool_notice
|
|
2815
|
+
self._update_memory_with_summary(
|
|
2816
|
+
summary_messages, include_summaries=False
|
|
2817
|
+
)
|
|
2818
|
+
self._last_token_limit_tool_signature = tool_signature
|
|
2819
|
+
return await self._astep_non_streaming_task(
|
|
2820
|
+
input_message, response_format
|
|
2821
|
+
)
|
|
2822
|
+
|
|
2823
|
+
raise
|
|
2824
|
+
|
|
2018
2825
|
prev_num_openai_messages = len(openai_messages)
|
|
2019
2826
|
iteration_count += 1
|
|
2020
2827
|
|
|
@@ -2091,6 +2898,8 @@ class ChatAgent(BaseAgent):
|
|
|
2091
2898
|
if self.prune_tool_calls_from_memory and tool_call_records:
|
|
2092
2899
|
self.memory.clean_tool_calls()
|
|
2093
2900
|
|
|
2901
|
+
self._last_token_limit_user_signature = None
|
|
2902
|
+
|
|
2094
2903
|
return self._convert_to_chatagent_response(
|
|
2095
2904
|
response,
|
|
2096
2905
|
tool_call_records,
|
|
@@ -2187,6 +2996,8 @@ class ChatAgent(BaseAgent):
|
|
|
2187
2996
|
if response:
|
|
2188
2997
|
break
|
|
2189
2998
|
except RateLimitError as e:
|
|
2999
|
+
if self._is_token_limit_error(e):
|
|
3000
|
+
raise
|
|
2190
3001
|
last_error = e
|
|
2191
3002
|
if attempt < self.retry_attempts - 1:
|
|
2192
3003
|
delay = min(self.retry_delay * (2**attempt), 60.0)
|
|
@@ -2204,7 +3015,6 @@ class ChatAgent(BaseAgent):
|
|
|
2204
3015
|
except Exception:
|
|
2205
3016
|
logger.error(
|
|
2206
3017
|
f"Model error: {self.model_backend.model_type}",
|
|
2207
|
-
exc_info=True,
|
|
2208
3018
|
)
|
|
2209
3019
|
raise
|
|
2210
3020
|
else:
|
|
@@ -2251,6 +3061,8 @@ class ChatAgent(BaseAgent):
|
|
|
2251
3061
|
if response:
|
|
2252
3062
|
break
|
|
2253
3063
|
except RateLimitError as e:
|
|
3064
|
+
if self._is_token_limit_error(e):
|
|
3065
|
+
raise
|
|
2254
3066
|
last_error = e
|
|
2255
3067
|
if attempt < self.retry_attempts - 1:
|
|
2256
3068
|
delay = min(self.retry_delay * (2**attempt), 60.0)
|
|
@@ -2758,6 +3570,7 @@ class ChatAgent(BaseAgent):
|
|
|
2758
3570
|
tool_call_id=tool_call_id,
|
|
2759
3571
|
)
|
|
2760
3572
|
|
|
3573
|
+
self._update_last_tool_call_state(tool_record)
|
|
2761
3574
|
return tool_record
|
|
2762
3575
|
|
|
2763
3576
|
def _stream(
|
|
@@ -3319,12 +4132,14 @@ class ChatAgent(BaseAgent):
|
|
|
3319
4132
|
timestamp=base_timestamp + 1e-6,
|
|
3320
4133
|
)
|
|
3321
4134
|
|
|
3322
|
-
|
|
4135
|
+
tool_record = ToolCallingRecord(
|
|
3323
4136
|
tool_name=function_name,
|
|
3324
4137
|
args=args,
|
|
3325
4138
|
result=result,
|
|
3326
4139
|
tool_call_id=tool_call_id,
|
|
3327
4140
|
)
|
|
4141
|
+
self._update_last_tool_call_state(tool_record)
|
|
4142
|
+
return tool_record
|
|
3328
4143
|
|
|
3329
4144
|
except Exception as e:
|
|
3330
4145
|
error_msg = (
|
|
@@ -3346,12 +4161,14 @@ class ChatAgent(BaseAgent):
|
|
|
3346
4161
|
|
|
3347
4162
|
self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
|
|
3348
4163
|
|
|
3349
|
-
|
|
4164
|
+
tool_record = ToolCallingRecord(
|
|
3350
4165
|
tool_name=function_name,
|
|
3351
4166
|
args=args,
|
|
3352
4167
|
result=result,
|
|
3353
4168
|
tool_call_id=tool_call_id,
|
|
3354
4169
|
)
|
|
4170
|
+
self._update_last_tool_call_state(tool_record)
|
|
4171
|
+
return tool_record
|
|
3355
4172
|
else:
|
|
3356
4173
|
logger.warning(
|
|
3357
4174
|
f"Tool '{function_name}' not found in internal tools"
|
|
@@ -3373,6 +4190,23 @@ class ChatAgent(BaseAgent):
|
|
|
3373
4190
|
tool_call_id = tool_call_data['id']
|
|
3374
4191
|
|
|
3375
4192
|
if function_name in self._internal_tools:
|
|
4193
|
+
# Create the tool call message
|
|
4194
|
+
assist_msg = FunctionCallingMessage(
|
|
4195
|
+
role_name=self.role_name,
|
|
4196
|
+
role_type=self.role_type,
|
|
4197
|
+
meta_dict=None,
|
|
4198
|
+
content="",
|
|
4199
|
+
func_name=function_name,
|
|
4200
|
+
args=args,
|
|
4201
|
+
tool_call_id=tool_call_id,
|
|
4202
|
+
)
|
|
4203
|
+
assist_ts = time.time_ns() / 1_000_000_000
|
|
4204
|
+
self.update_memory(
|
|
4205
|
+
assist_msg,
|
|
4206
|
+
OpenAIBackendRole.ASSISTANT,
|
|
4207
|
+
timestamp=assist_ts,
|
|
4208
|
+
)
|
|
4209
|
+
|
|
3376
4210
|
tool = self._internal_tools[function_name]
|
|
3377
4211
|
try:
|
|
3378
4212
|
# Try different invocation paths in order of preference
|
|
@@ -3401,19 +4235,8 @@ class ChatAgent(BaseAgent):
|
|
|
3401
4235
|
else:
|
|
3402
4236
|
# Fallback: synchronous call
|
|
3403
4237
|
result = tool(**args)
|
|
3404
|
-
# First, create and record the assistant message with tool
|
|
3405
|
-
# call
|
|
3406
|
-
assist_msg = FunctionCallingMessage(
|
|
3407
|
-
role_name=self.role_name,
|
|
3408
|
-
role_type=self.role_type,
|
|
3409
|
-
meta_dict=None,
|
|
3410
|
-
content="",
|
|
3411
|
-
func_name=function_name,
|
|
3412
|
-
args=args,
|
|
3413
|
-
tool_call_id=tool_call_id,
|
|
3414
|
-
)
|
|
3415
4238
|
|
|
3416
|
-
#
|
|
4239
|
+
# Create the tool response message
|
|
3417
4240
|
func_msg = FunctionCallingMessage(
|
|
3418
4241
|
role_name=self.role_name,
|
|
3419
4242
|
role_type=self.role_type,
|
|
@@ -3423,31 +4246,21 @@ class ChatAgent(BaseAgent):
|
|
|
3423
4246
|
result=result,
|
|
3424
4247
|
tool_call_id=tool_call_id,
|
|
3425
4248
|
)
|
|
3426
|
-
|
|
3427
|
-
# Record both messages with precise timestamps to ensure
|
|
3428
|
-
# correct ordering
|
|
3429
|
-
current_time_ns = time.time_ns()
|
|
3430
|
-
base_timestamp = (
|
|
3431
|
-
current_time_ns / 1_000_000_000
|
|
3432
|
-
) # Convert to seconds
|
|
3433
|
-
|
|
3434
|
-
self.update_memory(
|
|
3435
|
-
assist_msg,
|
|
3436
|
-
OpenAIBackendRole.ASSISTANT,
|
|
3437
|
-
timestamp=base_timestamp,
|
|
3438
|
-
)
|
|
4249
|
+
func_ts = time.time_ns() / 1_000_000_000
|
|
3439
4250
|
self.update_memory(
|
|
3440
4251
|
func_msg,
|
|
3441
4252
|
OpenAIBackendRole.FUNCTION,
|
|
3442
|
-
timestamp=
|
|
4253
|
+
timestamp=func_ts,
|
|
3443
4254
|
)
|
|
3444
4255
|
|
|
3445
|
-
|
|
4256
|
+
tool_record = ToolCallingRecord(
|
|
3446
4257
|
tool_name=function_name,
|
|
3447
4258
|
args=args,
|
|
3448
4259
|
result=result,
|
|
3449
4260
|
tool_call_id=tool_call_id,
|
|
3450
4261
|
)
|
|
4262
|
+
self._update_last_tool_call_state(tool_record)
|
|
4263
|
+
return tool_record
|
|
3451
4264
|
|
|
3452
4265
|
except Exception as e:
|
|
3453
4266
|
error_msg = (
|
|
@@ -3466,15 +4279,21 @@ class ChatAgent(BaseAgent):
|
|
|
3466
4279
|
result=result,
|
|
3467
4280
|
tool_call_id=tool_call_id,
|
|
3468
4281
|
)
|
|
4282
|
+
func_ts = time.time_ns() / 1_000_000_000
|
|
4283
|
+
self.update_memory(
|
|
4284
|
+
func_msg,
|
|
4285
|
+
OpenAIBackendRole.FUNCTION,
|
|
4286
|
+
timestamp=func_ts,
|
|
4287
|
+
)
|
|
3469
4288
|
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
return ToolCallingRecord(
|
|
4289
|
+
tool_record = ToolCallingRecord(
|
|
3473
4290
|
tool_name=function_name,
|
|
3474
4291
|
args=args,
|
|
3475
4292
|
result=result,
|
|
3476
4293
|
tool_call_id=tool_call_id,
|
|
3477
4294
|
)
|
|
4295
|
+
self._update_last_tool_call_state(tool_record)
|
|
4296
|
+
return tool_record
|
|
3478
4297
|
else:
|
|
3479
4298
|
logger.warning(
|
|
3480
4299
|
f"Tool '{function_name}' not found in internal tools"
|
|
@@ -4176,23 +4995,29 @@ class ChatAgent(BaseAgent):
|
|
|
4176
4995
|
# Toolkit doesn't support cloning, use original
|
|
4177
4996
|
cloned_toolkits[toolkit_id] = toolkit_instance
|
|
4178
4997
|
|
|
4179
|
-
if getattr(
|
|
4180
|
-
tool.func, "__message_integration_enhanced__", False
|
|
4181
|
-
):
|
|
4182
|
-
cloned_tools.append(
|
|
4183
|
-
FunctionTool(
|
|
4184
|
-
func=tool.func,
|
|
4185
|
-
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
4186
|
-
)
|
|
4187
|
-
)
|
|
4188
|
-
continue
|
|
4189
|
-
|
|
4190
4998
|
# Get the method from the cloned (or original) toolkit
|
|
4191
4999
|
toolkit = cloned_toolkits[toolkit_id]
|
|
4192
5000
|
method_name = tool.func.__name__
|
|
4193
5001
|
|
|
5002
|
+
# Check if toolkit was actually cloned or just reused
|
|
5003
|
+
toolkit_was_cloned = toolkit is not toolkit_instance
|
|
5004
|
+
|
|
4194
5005
|
if hasattr(toolkit, method_name):
|
|
4195
5006
|
new_method = getattr(toolkit, method_name)
|
|
5007
|
+
|
|
5008
|
+
# If toolkit wasn't cloned (stateless), preserve the
|
|
5009
|
+
# original function to maintain any enhancements/wrappers
|
|
5010
|
+
if not toolkit_was_cloned:
|
|
5011
|
+
# Toolkit is stateless, safe to reuse original function
|
|
5012
|
+
cloned_tools.append(
|
|
5013
|
+
FunctionTool(
|
|
5014
|
+
func=tool.func,
|
|
5015
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5016
|
+
)
|
|
5017
|
+
)
|
|
5018
|
+
continue
|
|
5019
|
+
|
|
5020
|
+
# Toolkit was cloned, use the new method
|
|
4196
5021
|
# Wrap cloned method into a new FunctionTool,
|
|
4197
5022
|
# preserving schema
|
|
4198
5023
|
try:
|