chuk-ai-session-manager 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ # chuk_ai_session_manager/models/token_usage.py
2
+ """
3
+ Token usage tracking models for the chuk session manager.
4
+
5
+ This module provides models for tracking token usage in LLM interactions
6
+ with proper async support.
7
+ """
8
+ from __future__ import annotations
9
+ from datetime import datetime, timezone
10
+ from typing import Dict, Optional, Union, List, Any
11
+ from uuid import uuid4
12
+ from pydantic import BaseModel, Field, ConfigDict
13
+ import asyncio
14
+
15
+ # Try to import tiktoken, but make it optional
16
+ try:
17
+ import tiktoken
18
+ TIKTOKEN_AVAILABLE = True
19
+ except ImportError:
20
+ TIKTOKEN_AVAILABLE = False
21
+
22
+
23
+ class TokenUsage(BaseModel):
24
+ """
25
+ Tracks token usage for LLM interactions.
26
+
27
+ Attributes:
28
+ prompt_tokens: Number of tokens in the prompt/input
29
+ completion_tokens: Number of tokens in the completion/output
30
+ total_tokens: Total tokens (prompt + completion)
31
+ model: The model used for the interaction (helps with pricing calculations)
32
+ estimated_cost_usd: Estimated cost in USD (if pricing info is available)
33
+ """
34
+ model_config = ConfigDict(arbitrary_types_allowed=True)
35
+
36
+ prompt_tokens: int = 0
37
+ completion_tokens: int = 0
38
+ total_tokens: int = Field(default=0)
39
+ model: str = ""
40
+ estimated_cost_usd: Optional[float] = None
41
+
42
+ def __init__(self, **data):
43
+ super().__init__(**data)
44
+ # Auto-calculate total tokens if not explicitly provided
45
+ if self.total_tokens == 0 and (self.prompt_tokens > 0 or self.completion_tokens > 0):
46
+ self.total_tokens = self.prompt_tokens + self.completion_tokens
47
+
48
+ # Auto-calculate estimated cost if model is provided
49
+ if self.model and self.estimated_cost_usd is None:
50
+ self.estimated_cost_usd = self._calculate_cost_sync()
51
+
52
+ def _calculate_cost_sync(self) -> float:
53
+ """
54
+ Synchronous implementation of calculate_cost.
55
+
56
+ Returns:
57
+ Estimated cost in USD
58
+ """
59
+ # Model pricing per 1000 tokens (approximate as of May 2025)
60
+ pricing = {
61
+ # OpenAI models
62
+ "gpt-4": {"input": 0.03, "output": 0.06},
63
+ "gpt-4-turbo": {"input": 0.01, "output": 0.03},
64
+ "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
65
+
66
+ # Claude models
67
+ "claude-3-opus": {"input": 0.015, "output": 0.075},
68
+ "claude-3-sonnet": {"input": 0.003, "output": 0.015},
69
+ "claude-3-haiku": {"input": 0.00025, "output": 0.00125},
70
+
71
+ # Fallback for unknown models
72
+ "default": {"input": 0.001, "output": 0.002}
73
+ }
74
+
75
+ # Get pricing for this model or use default
76
+ model_pricing = pricing.get(self.model.lower(), pricing["default"])
77
+
78
+ # Calculate cost
79
+ input_cost = (self.prompt_tokens / 1000) * model_pricing["input"]
80
+ output_cost = (self.completion_tokens / 1000) * model_pricing["output"]
81
+
82
+ return round(input_cost + output_cost, 6)
83
+
84
+ async def calculate_cost(self) -> float:
85
+ """
86
+ Async version of calculate_cost.
87
+
88
+ Returns:
89
+ Estimated cost in USD
90
+ """
91
+ # Token calculation is CPU-bound, so run in executor
92
+ loop = asyncio.get_running_loop()
93
+ return await loop.run_in_executor(None, self._calculate_cost_sync)
94
+
95
+ def _update_sync(self, prompt_tokens: int = 0, completion_tokens: int = 0) -> None:
96
+ """
97
+ Synchronous implementation of update.
98
+
99
+ Args:
100
+ prompt_tokens: Additional prompt tokens to add
101
+ completion_tokens: Additional completion tokens to add
102
+ """
103
+ self.prompt_tokens += prompt_tokens
104
+ self.completion_tokens += completion_tokens
105
+ self.total_tokens = self.prompt_tokens + self.completion_tokens
106
+
107
+ if self.model:
108
+ self.estimated_cost_usd = self._calculate_cost_sync()
109
+
110
+ async def update(self, prompt_tokens: int = 0, completion_tokens: int = 0) -> None:
111
+ """
112
+ Async version of update.
113
+
114
+ Args:
115
+ prompt_tokens: Additional prompt tokens to add
116
+ completion_tokens: Additional completion tokens to add
117
+ """
118
+ self.prompt_tokens += prompt_tokens
119
+ self.completion_tokens += completion_tokens
120
+ self.total_tokens = self.prompt_tokens + self.completion_tokens
121
+
122
+ if self.model:
123
+ self.estimated_cost_usd = await self.calculate_cost()
124
+
125
+ @classmethod
126
+ def _from_text_sync(
127
+ cls,
128
+ prompt: str,
129
+ completion: Optional[str] = None,
130
+ model: str = "gpt-3.5-turbo"
131
+ ) -> TokenUsage:
132
+ """
133
+ Synchronous implementation of from_text.
134
+
135
+ Args:
136
+ prompt: The prompt/input text
137
+ completion: The completion/output text (optional)
138
+ model: The model name to use for counting and pricing
139
+
140
+ Returns:
141
+ A TokenUsage instance with token counts
142
+ """
143
+ prompt_tokens = cls._count_tokens_sync(prompt, model)
144
+ completion_tokens = cls._count_tokens_sync(completion, model) if completion else 0
145
+
146
+ return cls(
147
+ prompt_tokens=prompt_tokens,
148
+ completion_tokens=completion_tokens,
149
+ model=model
150
+ )
151
+
152
+ @classmethod
153
+ async def from_text(
154
+ cls,
155
+ prompt: str,
156
+ completion: Optional[str] = None,
157
+ model: str = "gpt-3.5-turbo"
158
+ ) -> TokenUsage:
159
+ """
160
+ Async version of from_text.
161
+
162
+ Args:
163
+ prompt: The prompt/input text
164
+ completion: The completion/output text (optional)
165
+ model: The model name to use for counting and pricing
166
+
167
+ Returns:
168
+ A TokenUsage instance with token counts
169
+ """
170
+ # Run token counting in executor since it's CPU-bound
171
+ loop = asyncio.get_running_loop()
172
+ return await loop.run_in_executor(
173
+ None,
174
+ lambda: cls._from_text_sync(prompt, completion, model)
175
+ )
176
+
177
+ @staticmethod
178
+ def _count_tokens_sync(text: Optional[str], model: str = "gpt-3.5-turbo") -> int:
179
+ """
180
+ Synchronous implementation of count_tokens.
181
+
182
+ Args:
183
+ text: The text to count tokens for
184
+ model: The model name to use for counting
185
+
186
+ Returns:
187
+ The number of tokens
188
+ """
189
+ if text is None:
190
+ return 0
191
+
192
+ if TIKTOKEN_AVAILABLE:
193
+ try:
194
+ encoding = tiktoken.encoding_for_model(model)
195
+ return len(encoding.encode(text))
196
+ except (KeyError, ValueError):
197
+ # Fall back to cl100k_base encoding if the model is not found
198
+ try:
199
+ encoding = tiktoken.get_encoding("cl100k_base")
200
+ return len(encoding.encode(text))
201
+ except Exception:
202
+ # If all else fails, use the approximation
203
+ pass
204
+
205
+ # Simple approximation: ~4 chars per token for English text
206
+ return int(len(text) / 4)
207
+
208
+ @staticmethod
209
+ async def count_tokens(text: Optional[str], model: str = "gpt-3.5-turbo") -> int:
210
+ """
211
+ Async version of count_tokens.
212
+
213
+ Args:
214
+ text: The text to count tokens for
215
+ model: The model name to use for counting
216
+
217
+ Returns:
218
+ The number of tokens
219
+ """
220
+ # Run in executor since token counting is CPU-bound
221
+ loop = asyncio.get_running_loop()
222
+ return await loop.run_in_executor(
223
+ None,
224
+ lambda: TokenUsage._count_tokens_sync(text, model)
225
+ )
226
+
227
+ def __add__(self, other: TokenUsage) -> TokenUsage:
228
+ """
229
+ Add two TokenUsage instances together.
230
+
231
+ Args:
232
+ other: Another TokenUsage instance
233
+
234
+ Returns:
235
+ A new TokenUsage instance with combined counts
236
+ """
237
+ # Use the model from self if it exists, otherwise use the other's model
238
+ model = self.model if self.model else other.model
239
+
240
+ return TokenUsage(
241
+ prompt_tokens=self.prompt_tokens + other.prompt_tokens,
242
+ completion_tokens=self.completion_tokens + other.completion_tokens,
243
+ model=model
244
+ )
245
+
246
+
247
+ class TokenSummary(BaseModel):
248
+ """
249
+ Summarizes token usage across multiple interactions.
250
+
251
+ Attributes:
252
+ total_prompt_tokens: Total tokens used in prompts
253
+ total_completion_tokens: Total tokens used in completions
254
+ total_tokens: Total tokens overall
255
+ usage_by_model: Breakdown of usage by model
256
+ total_estimated_cost_usd: Total estimated cost across all models
257
+ """
258
+ total_prompt_tokens: int = 0
259
+ total_completion_tokens: int = 0
260
+ total_tokens: int = 0
261
+ usage_by_model: Dict[str, TokenUsage] = Field(default_factory=dict)
262
+ total_estimated_cost_usd: float = 0.0
263
+
264
+ def _add_usage_sync(self, usage: TokenUsage) -> None:
265
+ """
266
+ Synchronous implementation of add_usage.
267
+
268
+ Args:
269
+ usage: The TokenUsage to add
270
+ """
271
+ self.total_prompt_tokens += usage.prompt_tokens
272
+ self.total_completion_tokens += usage.completion_tokens
273
+ self.total_tokens += usage.total_tokens
274
+
275
+ if usage.estimated_cost_usd is not None:
276
+ self.total_estimated_cost_usd += usage.estimated_cost_usd
277
+
278
+ if usage.model:
279
+ if usage.model in self.usage_by_model:
280
+ self.usage_by_model[usage.model]._update_sync(
281
+ prompt_tokens=usage.prompt_tokens,
282
+ completion_tokens=usage.completion_tokens
283
+ )
284
+ else:
285
+ self.usage_by_model[usage.model] = TokenUsage(
286
+ prompt_tokens=usage.prompt_tokens,
287
+ completion_tokens=usage.completion_tokens,
288
+ model=usage.model
289
+ )
290
+
291
+ async def add_usage(self, usage: TokenUsage) -> None:
292
+ """
293
+ Async version of add_usage.
294
+
295
+ Args:
296
+ usage: The TokenUsage to add
297
+ """
298
+ self.total_prompt_tokens += usage.prompt_tokens
299
+ self.total_completion_tokens += usage.completion_tokens
300
+ self.total_tokens += usage.total_tokens
301
+
302
+ if usage.estimated_cost_usd is not None:
303
+ self.total_estimated_cost_usd += usage.estimated_cost_usd
304
+
305
+ if usage.model:
306
+ if usage.model in self.usage_by_model:
307
+ await self.usage_by_model[usage.model].update(
308
+ prompt_tokens=usage.prompt_tokens,
309
+ completion_tokens=usage.completion_tokens
310
+ )
311
+ else:
312
+ self.usage_by_model[usage.model] = TokenUsage(
313
+ prompt_tokens=usage.prompt_tokens,
314
+ completion_tokens=usage.completion_tokens,
315
+ model=usage.model
316
+ )
@@ -0,0 +1,194 @@
1
+ # sample_tools.py
2
+ """
3
+ Sample tools for chuk session manager demos - corrected version following registry example
4
+ """
5
+
6
+ import asyncio
7
+ import random
8
+ from datetime import datetime
9
+ from typing import Dict, Any
10
+
11
+ from chuk_tool_processor.registry import register_tool
12
+
13
+
14
+ @register_tool(name="calculator", namespace="default", description="Perform basic arithmetic operations")
15
+ class CalculatorTool:
16
+ """Calculator tool for basic arithmetic."""
17
+
18
+ async def execute(self, operation: str, a: float, b: float) -> Dict[str, Any]:
19
+ """
20
+ Perform a basic arithmetic operation.
21
+
22
+ Args:
23
+ operation: One of "add", "subtract", "multiply", "divide"
24
+ a: First operand
25
+ b: Second operand
26
+
27
+ Returns:
28
+ Dictionary with the result
29
+ """
30
+ print(f"🧮 Calculator executing: {a} {operation} {b}")
31
+
32
+ if operation == "add":
33
+ result = a + b
34
+ elif operation == "subtract":
35
+ result = a - b
36
+ elif operation == "multiply":
37
+ result = a * b
38
+ elif operation == "divide":
39
+ if b == 0:
40
+ raise ValueError("Cannot divide by zero")
41
+ result = a / b
42
+ else:
43
+ raise ValueError(f"Unknown operation: {operation}")
44
+
45
+ return {
46
+ "operation": operation,
47
+ "a": a,
48
+ "b": b,
49
+ "result": result,
50
+ "timestamp": datetime.now().isoformat()
51
+ }
52
+
53
+
54
+ @register_tool(name="weather", namespace="default", description="Get current weather information for a location")
55
+ class WeatherTool:
56
+ """Weather tool that returns mock weather data."""
57
+
58
+ async def execute(self, location: str) -> Dict[str, Any]:
59
+ """
60
+ Get weather information for a specific location.
61
+
62
+ Args:
63
+ location: The city or location to get weather for
64
+
65
+ Returns:
66
+ Dictionary with weather information
67
+ """
68
+ print(f"🌤️ Weather tool executing for: {location}")
69
+ await asyncio.sleep(0.1) # Simulate API delay
70
+
71
+ # Mock realistic weather based on location
72
+ base_temp = 15 # Default moderate temperature
73
+ if any(city in location.lower() for city in ["miami", "phoenix", "dubai", "singapore"]):
74
+ base_temp = 28
75
+ elif any(city in location.lower() for city in ["moscow", "montreal", "oslo", "anchorage"]):
76
+ base_temp = -5
77
+ elif any(city in location.lower() for city in ["london", "seattle", "vancouver"]):
78
+ base_temp = 12
79
+ elif any(city in location.lower() for city in ["tokyo", "new york", "paris", "berlin"]):
80
+ base_temp = 18
81
+
82
+ # Add some randomness
83
+ temperature = base_temp + random.randint(-8, 12)
84
+ conditions = ["Sunny", "Partly Cloudy", "Cloudy", "Light Rain", "Heavy Rain", "Snow", "Thunderstorm", "Foggy"]
85
+ condition = random.choice(conditions)
86
+ humidity = random.randint(35, 85)
87
+ wind_speed = random.uniform(2.0, 25.0)
88
+ feels_like = temperature + random.randint(-3, 3)
89
+
90
+ # Adjust conditions based on temperature
91
+ if temperature < 0:
92
+ condition = random.choice(["Snow", "Cloudy", "Partly Cloudy"])
93
+ elif temperature > 30:
94
+ condition = random.choice(["Sunny", "Partly Cloudy", "Hot"])
95
+
96
+ description = f"Current weather in {location} is {condition.lower()} with temperature {temperature}°C"
97
+
98
+ return {
99
+ "location": location,
100
+ "temperature": float(temperature),
101
+ "condition": condition,
102
+ "humidity": humidity,
103
+ "wind_speed": round(wind_speed, 1),
104
+ "description": description,
105
+ "feels_like": float(feels_like),
106
+ "timestamp": datetime.now().isoformat()
107
+ }
108
+
109
+
110
+ @register_tool(name="search", namespace="default", description="Search for information on the internet")
111
+ class SearchTool:
112
+ """Search tool that returns mock search results."""
113
+
114
+ async def execute(self, query: str, max_results: int = 3) -> Dict[str, Any]:
115
+ """
116
+ Search for information on the internet.
117
+
118
+ Args:
119
+ query: Search query
120
+ max_results: Maximum number of results to return
121
+
122
+ Returns:
123
+ Dictionary with search results
124
+ """
125
+ print(f"🔍 Search tool executing for: {query}")
126
+ await asyncio.sleep(0.2) # Simulate API delay
127
+
128
+ results = []
129
+ query_lower = query.lower()
130
+
131
+ # Generate contextually relevant mock results based on query
132
+ if "climate" in query_lower or "environment" in query_lower:
133
+ result_templates = [
134
+ {
135
+ "title": "Climate Change Adaptation Strategies - IPCC Report",
136
+ "url": "https://www.ipcc.ch/adaptation-strategies",
137
+ "snippet": "Comprehensive guide to climate change adaptation strategies for communities, businesses, and governments. Includes resilience planning and risk assessment."
138
+ },
139
+ {
140
+ "title": "Environmental Adaptation Solutions | Climate.gov",
141
+ "url": "https://www.climate.gov/adaptation-solutions",
142
+ "snippet": "Evidence-based climate adaptation solutions including infrastructure improvements, ecosystem restoration, and community planning approaches."
143
+ },
144
+ {
145
+ "title": "Building Climate Resilience: A Practical Guide",
146
+ "url": "https://www.resilience.org/climate-guide",
147
+ "snippet": "Practical steps for building climate resilience in your community. Covers early warning systems, green infrastructure, and adaptation planning."
148
+ }
149
+ ]
150
+ elif "weather" in query_lower:
151
+ result_templates = [
152
+ {
153
+ "title": "Weather Forecast and Current Conditions",
154
+ "url": "https://weather.com/forecast",
155
+ "snippet": "Get accurate weather forecasts, current conditions, and severe weather alerts for your location."
156
+ },
157
+ {
158
+ "title": "Climate and Weather Patterns Explained",
159
+ "url": "https://www.weatherpatterns.org",
160
+ "snippet": "Understanding weather patterns, climate systems, and meteorological phenomena that affect daily weather."
161
+ }
162
+ ]
163
+ else:
164
+ # Generic results for other queries
165
+ result_templates = [
166
+ {
167
+ "title": f"Everything You Need to Know About {query.title()}",
168
+ "url": f"https://encyclopedia.com/{query.lower().replace(' ', '-')}",
169
+ "snippet": f"Comprehensive information and resources about {query}. Expert insights, latest research, and practical applications."
170
+ },
171
+ {
172
+ "title": f"{query.title()} - Latest News and Updates",
173
+ "url": f"https://news.example.com/{query.lower().replace(' ', '-')}",
174
+ "snippet": f"Stay up to date with the latest news, trends, and developments related to {query}."
175
+ },
176
+ {
177
+ "title": f"Guide to {query.title()} - Best Practices",
178
+ "url": f"https://guides.com/{query.lower().replace(' ', '-')}",
179
+ "snippet": f"Expert guide covering best practices, tips, and strategies for {query}. Includes real-world examples and case studies."
180
+ }
181
+ ]
182
+
183
+ # Select results up to max_results
184
+ selected_results = result_templates[:max_results]
185
+
186
+ return {
187
+ "query": query,
188
+ "results_count": len(selected_results),
189
+ "results": selected_results,
190
+ "timestamp": datetime.now().isoformat()
191
+ }
192
+
193
+
194
+ print("✅ sample_tools.py: 3 tools defined with @register_tool decorator (corrected version)")
@@ -0,0 +1,178 @@
1
+ # chuk_ai_session_manager/session_aware_tool_processor.py
2
+ #!/usr/bin/env python3
3
+ """Session-aware Tool-processor for chuk_tool_processor 0.1.x.
4
+
5
+ * Converts OpenAI `tool_calls` → `ToolCall` objects.
6
+ * Executes them with **ToolProcessor().executor.execute**.
7
+ * Adds caching / retry.
8
+ * Logs every call into the session tree, storing the **string-repr**
9
+ of the result (this is what the prompt-builder currently expects)."""
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import hashlib
15
+ import json
16
+ import logging
17
+ from typing import Any, Dict, List
18
+
19
+ from chuk_tool_processor.core.processor import ToolProcessor
20
+ from chuk_tool_processor.models.tool_call import ToolCall
21
+ from chuk_tool_processor.models.tool_result import ToolResult
22
+
23
+ from chuk_ai_session_manager.models.event_source import EventSource
24
+ from chuk_ai_session_manager.models.event_type import EventType
25
+ from chuk_ai_session_manager.models.session_event import SessionEvent
26
+ from chuk_ai_session_manager.storage import SessionStoreProvider
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class SessionAwareToolProcessor:
32
+ """Run tool-calls, add retry/caching, and log into a session."""
33
+
34
+ # ─────────────────────────── construction ──────────────────────────
35
+ def __init__(
36
+ self,
37
+ session_id: str,
38
+ *,
39
+ enable_caching: bool = True,
40
+ max_retries: int = 2,
41
+ retry_delay: float = 1.0,
42
+ ) -> None:
43
+ self.session_id = session_id
44
+ self.enable_caching = enable_caching
45
+ self.max_retries = max_retries
46
+ self.retry_delay = retry_delay
47
+ self.cache: Dict[str, ToolResult] = {}
48
+
49
+ self._tp = ToolProcessor()
50
+ if not hasattr(self._tp, "executor"):
51
+ raise AttributeError("Installed chuk_tool_processor is too old - missing `.executor`")
52
+
53
+ @classmethod
54
+ async def create(cls, session_id: str, **kwargs):
55
+ store = SessionStoreProvider.get_store()
56
+ if not await store.get(session_id):
57
+ raise ValueError(f"Session {session_id} not found")
58
+ return cls(session_id=session_id, **kwargs)
59
+
60
+ # ─────────────────────────── internals ─────────────────────────────
61
+ async def _maybe_await(self, val: Any) -> Any:
62
+ return await val if asyncio.iscoroutine(val) else val
63
+
64
+ async def _exec_calls(self, calls: List[Dict[str, Any]]) -> List[ToolResult]:
65
+ """Convert dicts → ToolCall and drive the executor."""
66
+ tool_calls: list[ToolCall] = []
67
+ for c in calls:
68
+ fn = c.get("function", {})
69
+ name = fn.get("name", "tool")
70
+ try:
71
+ args = json.loads(fn.get("arguments", "{}"))
72
+ except json.JSONDecodeError:
73
+ args = {"raw": fn.get("arguments")}
74
+ tool_calls.append(ToolCall(tool=name, arguments=args))
75
+
76
+ results = await self._tp.executor.execute(tool_calls)
77
+ for r in results:
78
+ r.result = await self._maybe_await(r.result)
79
+ return results
80
+
81
+ async def _log_event(
82
+ self,
83
+ session,
84
+ parent_id: str,
85
+ res: ToolResult,
86
+ attempt: int,
87
+ *,
88
+ cached: bool,
89
+ failed: bool = False,
90
+ ) -> None:
91
+ """Persist TOOL_CALL with *string* result (prompt-friendly)."""
92
+ result_str = str(res.result) if res.result is not None else "null"
93
+
94
+ ev = await SessionEvent.create_with_tokens(
95
+ message={
96
+ "tool": res.tool,
97
+ "arguments": getattr(res, "arguments", None),
98
+ "result": result_str,
99
+ "error": res.error,
100
+ "cached": cached,
101
+ },
102
+ prompt=f"{res.tool}({json.dumps(getattr(res, 'arguments', None), default=str)})",
103
+ completion=result_str,
104
+ model="tool-execution",
105
+ source=EventSource.SYSTEM,
106
+ type=EventType.TOOL_CALL,
107
+ )
108
+ await ev.update_metadata("parent_event_id", parent_id)
109
+ await ev.update_metadata("call_id", getattr(res, "id", "cid"))
110
+ await ev.update_metadata("attempt", attempt)
111
+ if failed:
112
+ await ev.update_metadata("failed", True)
113
+ await session.add_event_and_save(ev)
114
+
115
+ # ─────────────────────────── public API ────────────────────────────
116
+ async def process_llm_message(self, llm_msg: Dict[str, Any], _) -> List[ToolResult]:
117
+ store = SessionStoreProvider.get_store()
118
+ session = await store.get(self.session_id)
119
+ if not session:
120
+ raise ValueError(f"Session {self.session_id} not found")
121
+
122
+ parent_evt = await SessionEvent.create_with_tokens(
123
+ message=llm_msg,
124
+ prompt="",
125
+ completion=json.dumps(llm_msg, ensure_ascii=False),
126
+ model="gpt-4o-mini",
127
+ source=EventSource.LLM,
128
+ type=EventType.MESSAGE,
129
+ )
130
+ await session.add_event_and_save(parent_evt)
131
+
132
+ calls = llm_msg.get("tool_calls", [])
133
+ if not calls:
134
+ return []
135
+
136
+ out: list[ToolResult] = []
137
+ for call in calls:
138
+ fn = call.get("function", {})
139
+ name = fn.get("name", "tool")
140
+ try:
141
+ args = json.loads(fn.get("arguments", "{}"))
142
+ except json.JSONDecodeError:
143
+ args = {"raw": fn.get("arguments")}
144
+
145
+ cache_key = (
146
+ hashlib.md5(f"{name}:{json.dumps(args, sort_keys=True)}".encode()).hexdigest()
147
+ if self.enable_caching else None
148
+ )
149
+
150
+ # 1) cache hit --------------------------------------------------
151
+ if cache_key and (cached := self.cache.get(cache_key)):
152
+ await self._log_event(session, parent_evt.id, cached, 1, cached=True)
153
+ out.append(cached)
154
+ continue
155
+
156
+ # 2) execute with retry ----------------------------------------
157
+ last_err: str | None = None
158
+ for attempt in range(1, self.max_retries + 2):
159
+ try:
160
+ res = (await self._exec_calls([call]))[0]
161
+ if cache_key:
162
+ self.cache[cache_key] = res
163
+ await self._log_event(session, parent_evt.id, res, attempt, cached=False)
164
+ out.append(res)
165
+ break
166
+ except Exception as exc: # noqa: BLE001
167
+ last_err = str(exc)
168
+ if attempt <= self.max_retries:
169
+ await asyncio.sleep(self.retry_delay)
170
+ continue
171
+ err_res = ToolResult(tool=name, result=None, error=last_err) # type: ignore[arg-type]
172
+ await self._log_event(
173
+ session, parent_evt.id, err_res, attempt,
174
+ cached=False, failed=True
175
+ )
176
+ out.append(err_res)
177
+
178
+ return out