hindsight-api 0.4.7__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -303,8 +303,10 @@ class MemoryEngine(MemoryEngineInterface):
303
303
  db_url = db_url or config.database_url
304
304
  memory_llm_provider = memory_llm_provider or config.llm_provider
305
305
  memory_llm_api_key = memory_llm_api_key or config.llm_api_key
306
- # Ollama and mock don't require an API key
307
- if not memory_llm_api_key and memory_llm_provider not in ("ollama", "mock"):
306
+ # Ollama, openai-codex, claude-code, and mock don't require an API key
307
+ # openai-codex uses OAuth tokens from ~/.codex/auth.json
308
+ # claude-code uses OAuth tokens from macOS Keychain
309
+ if not memory_llm_api_key and memory_llm_provider not in ("ollama", "openai-codex", "claude-code", "mock"):
308
310
  raise ValueError("LLM API key is required. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
309
311
  memory_llm_model = memory_llm_model or config.llm_model
310
312
  memory_llm_base_url = memory_llm_base_url or config.get_llm_base_url() or None
@@ -457,7 +459,11 @@ class MemoryEngine(MemoryEngineInterface):
457
459
  # Store operation validator extension (optional)
458
460
  self._operation_validator = operation_validator
459
461
 
460
- # Store tenant extension (optional)
462
+ # Store tenant extension (always set, use default if none provided)
463
+ if tenant_extension is None:
464
+ from ..extensions.builtin.tenant import DefaultTenantExtension
465
+
466
+ tenant_extension = DefaultTenantExtension(config={})
461
467
  self._tenant_extension = tenant_extension
462
468
 
463
469
  async def _validate_operation(self, validation_coro) -> None:
@@ -495,22 +501,18 @@ class MemoryEngine(MemoryEngineInterface):
495
501
  Raises:
496
502
  AuthenticationError: If authentication fails or request_context is missing when required.
497
503
  """
498
- if self._tenant_extension is None:
499
- _current_schema.set("public")
500
- return "public"
501
-
502
504
  from hindsight_api.extensions import AuthenticationError
503
505
 
504
506
  if request_context is None:
505
- raise AuthenticationError("RequestContext is required when tenant extension is configured")
507
+ raise AuthenticationError("RequestContext is required")
506
508
 
507
509
  # For internal/background operations (e.g., worker tasks), skip extension authentication.
508
510
  # The task was already authenticated at submission time, and execute_task sets _current_schema
509
- # from the task's _schema field. For public schema tasks, _current_schema keeps its default "public".
511
+ # from the task's _schema field.
510
512
  if request_context.internal:
511
513
  return _current_schema.get()
512
514
 
513
- # Let AuthenticationError propagate - HTTP layer will convert to 401
515
+ # Authenticate through tenant extension (always set, may be default no-auth extension)
514
516
  tenant_context = await self._tenant_extension.authenticate(request_context)
515
517
 
516
518
  _current_schema.set(tenant_context.schema_name)
@@ -536,10 +538,15 @@ class MemoryEngine(MemoryEngineInterface):
536
538
  f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
537
539
  )
538
540
 
539
- # Use internal request context for background tasks (skips tenant auth when schema is pre-set)
541
+ # Restore tenant_id/api_key_id from task payload so downstream operations
542
+ # (e.g., consolidation and mental model refreshes) can attribute usage.
540
543
  from hindsight_api.models import RequestContext
541
544
 
542
- internal_context = RequestContext(internal=True)
545
+ internal_context = RequestContext(
546
+ internal=True,
547
+ tenant_id=task_dict.get("_tenant_id"),
548
+ api_key_id=task_dict.get("_api_key_id"),
549
+ )
543
550
  await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
544
551
 
545
552
  logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
@@ -565,7 +572,13 @@ class MemoryEngine(MemoryEngineInterface):
565
572
 
566
573
  from .consolidation import run_consolidation_job
567
574
 
568
- internal_context = RequestContext(internal=True)
575
+ # Restore tenant_id/api_key_id from task payload so downstream operations
576
+ # (e.g., mental model refreshes) can attribute usage to the correct org.
577
+ internal_context = RequestContext(
578
+ internal=True,
579
+ tenant_id=task_dict.get("_tenant_id"),
580
+ api_key_id=task_dict.get("_api_key_id"),
581
+ )
569
582
  result = await run_consolidation_job(
570
583
  memory_engine=self,
571
584
  bank_id=bank_id,
@@ -926,30 +939,34 @@ class MemoryEngine(MemoryEngineInterface):
926
939
 
927
940
  if not self.db_url:
928
941
  raise ValueError("Database URL is required for migrations")
929
- logger.info("Running database migrations...")
930
- # Use configured database schema for migrations (defaults to "public")
931
- run_migrations(self.db_url, schema=get_config().database_schema)
932
942
 
933
- # Migrate all existing tenant schemas (if multi-tenant)
934
- if self._tenant_extension is not None:
935
- try:
936
- tenants = await self._tenant_extension.list_tenants()
937
- if tenants:
938
- logger.info(f"Running migrations on {len(tenants)} tenant schemas...")
939
- for tenant in tenants:
940
- schema = tenant.schema
941
- if schema and schema != "public":
942
- try:
943
- run_migrations(self.db_url, schema=schema)
944
- except Exception as e:
945
- logger.warning(f"Failed to migrate tenant schema {schema}: {e}")
946
- logger.info("Tenant schema migrations completed")
947
- except Exception as e:
948
- logger.warning(f"Failed to run tenant schema migrations: {e}")
949
-
950
- # Ensure embedding column dimension matches the model's dimension
951
- # This is done after migrations and after embeddings.initialize()
952
- ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
943
+ # Migrate all schemas from the tenant extension
944
+ # The tenant extension is the single source of truth for which schemas exist
945
+ logger.info("Running database migrations...")
946
+ try:
947
+ tenants = await self._tenant_extension.list_tenants()
948
+ if tenants:
949
+ logger.info(f"Running migrations on {len(tenants)} schema(s)...")
950
+ for tenant in tenants:
951
+ schema = tenant.schema
952
+ if schema:
953
+ try:
954
+ run_migrations(self.db_url, schema=schema)
955
+ except Exception as e:
956
+ logger.warning(f"Failed to migrate schema {schema}: {e}")
957
+ logger.info("Schema migrations completed")
958
+
959
+ # Ensure embedding column dimension matches the model's dimension
960
+ # This is done after migrations and after embeddings.initialize()
961
+ for tenant in tenants:
962
+ schema = tenant.schema
963
+ if schema:
964
+ try:
965
+ ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=schema)
966
+ except Exception as e:
967
+ logger.warning(f"Failed to ensure embedding dimension for schema {schema}: {e}")
968
+ except Exception as e:
969
+ logger.warning(f"Failed to run schema migrations: {e}")
953
970
 
954
971
  logger.info(f"Connecting to PostgreSQL at {self.db_url}")
955
972
 
@@ -5458,6 +5475,13 @@ class MemoryEngine(MemoryEngineInterface):
5458
5475
  task_payload: dict[str, Any] = {"contents": contents}
5459
5476
  if document_tags:
5460
5477
  task_payload["document_tags"] = document_tags
5478
+ # Pass tenant_id and api_key_id through task payload so the worker
5479
+ # can propagate request context to downstream operations (e.g.,
5480
+ # consolidation and mental model refreshes triggered after retain).
5481
+ if request_context.tenant_id:
5482
+ task_payload["_tenant_id"] = request_context.tenant_id
5483
+ if request_context.api_key_id:
5484
+ task_payload["_api_key_id"] = request_context.api_key_id
5461
5485
 
5462
5486
  result = await self._submit_async_operation(
5463
5487
  bank_id=bank_id,
@@ -5490,11 +5514,21 @@ class MemoryEngine(MemoryEngineInterface):
5490
5514
  Dict with operation_id
5491
5515
  """
5492
5516
  await self._authenticate_tenant(request_context)
5517
+
5518
+ # Pass tenant_id and api_key_id through task payload so the worker
5519
+ # can provide request context to extension hooks (e.g., usage metering
5520
+ # for mental model refreshes triggered by consolidation).
5521
+ task_payload: dict[str, Any] = {}
5522
+ if request_context.tenant_id:
5523
+ task_payload["_tenant_id"] = request_context.tenant_id
5524
+ if request_context.api_key_id:
5525
+ task_payload["_api_key_id"] = request_context.api_key_id
5526
+
5493
5527
  return await self._submit_async_operation(
5494
5528
  bank_id=bank_id,
5495
5529
  operation_type="consolidation",
5496
5530
  task_type="consolidation",
5497
- task_payload={},
5531
+ task_payload=task_payload,
5498
5532
  dedupe_by_bank=True,
5499
5533
  )
5500
5534
 
@@ -0,0 +1,14 @@
1
+ """
2
+ LLM provider implementations.
3
+
4
+ This package contains concrete implementations of the LLMInterface for various providers.
5
+ """
6
+
7
+ from .anthropic_llm import AnthropicLLM
8
+ from .claude_code_llm import ClaudeCodeLLM
9
+ from .codex_llm import CodexLLM
10
+ from .gemini_llm import GeminiLLM
11
+ from .mock_llm import MockLLM
12
+ from .openai_compatible_llm import OpenAICompatibleLLM
13
+
14
+ __all__ = ["AnthropicLLM", "ClaudeCodeLLM", "CodexLLM", "GeminiLLM", "MockLLM", "OpenAICompatibleLLM"]
@@ -0,0 +1,434 @@
1
+ """
2
+ Anthropic LLM provider using the Anthropic Python SDK.
3
+
4
+ This provider enables using Claude models from Anthropic with support for:
5
+ - Structured JSON output
6
+ - Tool/function calling with proper format conversion
7
+ - Extended thinking mode
8
+ - Retry logic with exponential backoff
9
+ """
10
+
11
+ import asyncio
12
+ import json
13
+ import logging
14
+ import time
15
+ from typing import Any
16
+
17
+ from hindsight_api.engine.llm_interface import LLMInterface, OutputTooLongError
18
+ from hindsight_api.engine.response_models import LLMToolCall, LLMToolCallResult, TokenUsage
19
+ from hindsight_api.metrics import get_metrics_collector
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class AnthropicLLM(LLMInterface):
25
+ """
26
+ LLM provider using Anthropic's Claude models.
27
+
28
+ Supports structured output, tool calling, and extended thinking mode.
29
+ Handles format conversion between OpenAI-style messages and Anthropic's format.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ provider: str,
35
+ api_key: str,
36
+ base_url: str,
37
+ model: str,
38
+ reasoning_effort: str = "low",
39
+ timeout: float = 300.0,
40
+ **kwargs: Any,
41
+ ):
42
+ """
43
+ Initialize Anthropic LLM provider.
44
+
45
+ Args:
46
+ provider: Provider name (should be "anthropic").
47
+ api_key: Anthropic API key.
48
+ base_url: Base URL for the API (optional, uses Anthropic default if empty).
49
+ model: Model name (e.g., "claude-sonnet-4-20250514").
50
+ reasoning_effort: Reasoning effort level (not used by Anthropic).
51
+ timeout: Request timeout in seconds.
52
+ **kwargs: Additional provider-specific parameters.
53
+ """
54
+ super().__init__(provider, api_key, base_url, model, reasoning_effort, **kwargs)
55
+
56
+ if not self.api_key:
57
+ raise ValueError("API key is required for Anthropic provider")
58
+
59
+ # Import and initialize Anthropic client
60
+ try:
61
+ from anthropic import AsyncAnthropic
62
+
63
+ client_kwargs: dict[str, Any] = {"api_key": self.api_key}
64
+ if self.base_url:
65
+ client_kwargs["base_url"] = self.base_url
66
+ if timeout:
67
+ client_kwargs["timeout"] = timeout
68
+
69
+ self._client = AsyncAnthropic(**client_kwargs)
70
+ logger.info(f"Anthropic client initialized for model: {self.model}")
71
+ except ImportError as e:
72
+ raise RuntimeError("Anthropic SDK not installed. Run: uv add anthropic or pip install anthropic") from e
73
+
74
+ async def verify_connection(self) -> None:
75
+ """
76
+ Verify that the Anthropic provider is configured correctly by making a simple test call.
77
+
78
+ Raises:
79
+ RuntimeError: If the connection test fails.
80
+ """
81
+ try:
82
+ test_messages = [{"role": "user", "content": "test"}]
83
+ await self.call(
84
+ messages=test_messages,
85
+ max_completion_tokens=10,
86
+ temperature=0.0,
87
+ scope="test",
88
+ max_retries=0,
89
+ )
90
+ logger.info("Anthropic connection verified successfully")
91
+ except Exception as e:
92
+ logger.error(f"Anthropic connection verification failed: {e}")
93
+ raise RuntimeError(f"Failed to verify Anthropic connection: {e}") from e
94
+
95
+ async def call(
96
+ self,
97
+ messages: list[dict[str, str]],
98
+ response_format: Any | None = None,
99
+ max_completion_tokens: int | None = None,
100
+ temperature: float | None = None,
101
+ scope: str = "memory",
102
+ max_retries: int = 10,
103
+ initial_backoff: float = 1.0,
104
+ max_backoff: float = 60.0,
105
+ skip_validation: bool = False,
106
+ strict_schema: bool = False,
107
+ return_usage: bool = False,
108
+ ) -> Any:
109
+ """
110
+ Make an LLM API call with retry logic.
111
+
112
+ Args:
113
+ messages: List of message dicts with 'role' and 'content'.
114
+ response_format: Optional Pydantic model for structured output.
115
+ max_completion_tokens: Maximum tokens in response.
116
+ temperature: Sampling temperature (0.0-2.0).
117
+ scope: Scope identifier for tracking.
118
+ max_retries: Maximum retry attempts.
119
+ initial_backoff: Initial backoff time in seconds.
120
+ max_backoff: Maximum backoff time in seconds.
121
+ skip_validation: Return raw JSON without Pydantic validation.
122
+ strict_schema: Use strict JSON schema enforcement (not supported by Anthropic).
123
+ return_usage: If True, return tuple (result, TokenUsage) instead of just result.
124
+
125
+ Returns:
126
+ If return_usage=False: Parsed response if response_format is provided, otherwise text content.
127
+ If return_usage=True: Tuple of (result, TokenUsage) with token counts.
128
+
129
+ Raises:
130
+ OutputTooLongError: If output exceeds token limits.
131
+ Exception: Re-raises API errors after retries exhausted.
132
+ """
133
+ from anthropic import APIConnectionError, APIStatusError, RateLimitError
134
+
135
+ start_time = time.time()
136
+
137
+ # Convert OpenAI-style messages to Anthropic format
138
+ system_prompt = None
139
+ anthropic_messages = []
140
+
141
+ for msg in messages:
142
+ role = msg.get("role", "user")
143
+ content = msg.get("content", "")
144
+
145
+ if role == "system":
146
+ if system_prompt:
147
+ system_prompt += "\n\n" + content
148
+ else:
149
+ system_prompt = content
150
+ else:
151
+ anthropic_messages.append({"role": role, "content": content})
152
+
153
+ # Add JSON schema instruction if response_format is provided
154
+ if response_format is not None and hasattr(response_format, "model_json_schema"):
155
+ schema = response_format.model_json_schema()
156
+ schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
157
+ if system_prompt:
158
+ system_prompt += schema_msg
159
+ else:
160
+ system_prompt = schema_msg
161
+
162
+ # Prepare parameters
163
+ call_params: dict[str, Any] = {
164
+ "model": self.model,
165
+ "messages": anthropic_messages,
166
+ "max_tokens": max_completion_tokens if max_completion_tokens is not None else 4096,
167
+ }
168
+
169
+ if system_prompt:
170
+ call_params["system"] = system_prompt
171
+
172
+ if temperature is not None:
173
+ call_params["temperature"] = temperature
174
+
175
+ last_exception = None
176
+
177
+ for attempt in range(max_retries + 1):
178
+ try:
179
+ response = await self._client.messages.create(**call_params)
180
+
181
+ # Anthropic response content is a list of blocks
182
+ content = ""
183
+ for block in response.content:
184
+ if block.type == "text":
185
+ content += block.text
186
+
187
+ if response_format is not None:
188
+ # Models may wrap JSON in markdown code blocks
189
+ clean_content = content
190
+ if "```json" in content:
191
+ clean_content = content.split("```json")[1].split("```")[0].strip()
192
+ elif "```" in content:
193
+ clean_content = content.split("```")[1].split("```")[0].strip()
194
+
195
+ try:
196
+ json_data = json.loads(clean_content)
197
+ except json.JSONDecodeError:
198
+ # Fallback to parsing raw content if markdown stripping failed
199
+ json_data = json.loads(content)
200
+
201
+ if skip_validation:
202
+ result = json_data
203
+ else:
204
+ result = response_format.model_validate(json_data)
205
+ else:
206
+ result = content
207
+
208
+ # Record metrics and log slow calls
209
+ duration = time.time() - start_time
210
+ input_tokens = response.usage.input_tokens or 0 if response.usage else 0
211
+ output_tokens = response.usage.output_tokens or 0 if response.usage else 0
212
+ total_tokens = input_tokens + output_tokens
213
+
214
+ # Record LLM metrics
215
+ metrics = get_metrics_collector()
216
+ metrics.record_llm_call(
217
+ provider=self.provider,
218
+ model=self.model,
219
+ scope=scope,
220
+ duration=duration,
221
+ input_tokens=input_tokens,
222
+ output_tokens=output_tokens,
223
+ success=True,
224
+ )
225
+
226
+ # Log slow calls
227
+ if duration > 10.0:
228
+ logger.info(
229
+ f"slow llm call: scope={scope}, model={self.provider}/{self.model}, "
230
+ f"input_tokens={input_tokens}, output_tokens={output_tokens}, "
231
+ f"time={duration:.3f}s"
232
+ )
233
+
234
+ if return_usage:
235
+ token_usage = TokenUsage(
236
+ input_tokens=input_tokens,
237
+ output_tokens=output_tokens,
238
+ total_tokens=total_tokens,
239
+ )
240
+ return result, token_usage
241
+ return result
242
+
243
+ except json.JSONDecodeError as e:
244
+ last_exception = e
245
+ if attempt < max_retries:
246
+ logger.warning("Anthropic returned invalid JSON, retrying...")
247
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
248
+ await asyncio.sleep(backoff)
249
+ continue
250
+ else:
251
+ logger.error(f"Anthropic returned invalid JSON after {max_retries + 1} attempts")
252
+ raise
253
+
254
+ except (APIConnectionError, RateLimitError, APIStatusError) as e:
255
+ # Fast fail on 401/403
256
+ if isinstance(e, APIStatusError) and e.status_code in (401, 403):
257
+ logger.error(f"Anthropic auth error (HTTP {e.status_code}), not retrying: {str(e)}")
258
+ raise
259
+
260
+ last_exception = e
261
+ if attempt < max_retries:
262
+ # Check if it's a rate limit or server error
263
+ should_retry = isinstance(e, (APIConnectionError, RateLimitError)) or (
264
+ isinstance(e, APIStatusError) and e.status_code >= 500
265
+ )
266
+
267
+ if should_retry:
268
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
269
+ jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
270
+ await asyncio.sleep(backoff + jitter)
271
+ continue
272
+
273
+ logger.error(f"Anthropic API error after {max_retries + 1} attempts: {str(e)}")
274
+ raise
275
+
276
+ except Exception as e:
277
+ logger.error(f"Unexpected error during Anthropic call: {type(e).__name__}: {str(e)}")
278
+ raise
279
+
280
+ if last_exception:
281
+ raise last_exception
282
+ raise RuntimeError("Anthropic call failed after all retries")
283
+
284
+ async def call_with_tools(
285
+ self,
286
+ messages: list[dict[str, Any]],
287
+ tools: list[dict[str, Any]],
288
+ max_completion_tokens: int | None = None,
289
+ temperature: float | None = None,
290
+ scope: str = "tools",
291
+ max_retries: int = 5,
292
+ initial_backoff: float = 1.0,
293
+ max_backoff: float = 30.0,
294
+ tool_choice: str | dict[str, Any] = "auto",
295
+ ) -> LLMToolCallResult:
296
+ """
297
+ Make an LLM API call with tool/function calling support.
298
+
299
+ Args:
300
+ messages: List of message dicts. Can include tool results with role='tool'.
301
+ tools: List of tool definitions in OpenAI format.
302
+ max_completion_tokens: Maximum tokens in response.
303
+ temperature: Sampling temperature (0.0-2.0).
304
+ scope: Scope identifier for tracking.
305
+ max_retries: Maximum retry attempts.
306
+ initial_backoff: Initial backoff time in seconds.
307
+ max_backoff: Maximum backoff time in seconds.
308
+ tool_choice: How to choose tools - "auto", "none", "required", or specific function.
309
+
310
+ Returns:
311
+ LLMToolCallResult with content and/or tool_calls.
312
+ """
313
+ from anthropic import APIConnectionError, APIStatusError
314
+
315
+ start_time = time.time()
316
+
317
+ # Convert OpenAI tool format to Anthropic format
318
+ anthropic_tools = []
319
+ for tool in tools:
320
+ func = tool.get("function", {})
321
+ anthropic_tools.append(
322
+ {
323
+ "name": func.get("name", ""),
324
+ "description": func.get("description", ""),
325
+ "input_schema": func.get("parameters", {"type": "object", "properties": {}}),
326
+ }
327
+ )
328
+
329
+ # Convert messages - handle tool results
330
+ system_prompt = None
331
+ anthropic_messages = []
332
+ for msg in messages:
333
+ role = msg.get("role", "user")
334
+ content = msg.get("content", "")
335
+
336
+ if role == "system":
337
+ system_prompt = (system_prompt + "\n\n" + content) if system_prompt else content
338
+ elif role == "tool":
339
+ # Anthropic uses tool_result blocks
340
+ anthropic_messages.append(
341
+ {
342
+ "role": "user",
343
+ "content": [
344
+ {"type": "tool_result", "tool_use_id": msg.get("tool_call_id", ""), "content": content}
345
+ ],
346
+ }
347
+ )
348
+ elif role == "assistant" and msg.get("tool_calls"):
349
+ # Convert assistant tool calls
350
+ tool_use_blocks = []
351
+ for tc in msg["tool_calls"]:
352
+ tool_use_blocks.append(
353
+ {
354
+ "type": "tool_use",
355
+ "id": tc.get("id", ""),
356
+ "name": tc.get("function", {}).get("name", ""),
357
+ "input": json.loads(tc.get("function", {}).get("arguments", "{}")),
358
+ }
359
+ )
360
+ anthropic_messages.append({"role": "assistant", "content": tool_use_blocks})
361
+ else:
362
+ anthropic_messages.append({"role": role, "content": content})
363
+
364
+ call_params: dict[str, Any] = {
365
+ "model": self.model,
366
+ "messages": anthropic_messages,
367
+ "tools": anthropic_tools,
368
+ "max_tokens": max_completion_tokens or 4096,
369
+ }
370
+ if system_prompt:
371
+ call_params["system"] = system_prompt
372
+
373
+ if temperature is not None:
374
+ call_params["temperature"] = temperature
375
+
376
+ last_exception = None
377
+ for attempt in range(max_retries + 1):
378
+ try:
379
+ response = await self._client.messages.create(**call_params)
380
+
381
+ # Extract content and tool calls
382
+ content_parts = []
383
+ tool_calls: list[LLMToolCall] = []
384
+
385
+ for block in response.content:
386
+ if block.type == "text":
387
+ content_parts.append(block.text)
388
+ elif block.type == "tool_use":
389
+ tool_calls.append(LLMToolCall(id=block.id, name=block.name, arguments=block.input or {}))
390
+
391
+ content = "".join(content_parts) if content_parts else None
392
+ finish_reason = "tool_calls" if tool_calls else "stop"
393
+
394
+ # Extract token usage
395
+ input_tokens = response.usage.input_tokens or 0
396
+ output_tokens = response.usage.output_tokens or 0
397
+
398
+ # Record metrics
399
+ metrics = get_metrics_collector()
400
+ metrics.record_llm_call(
401
+ provider=self.provider,
402
+ model=self.model,
403
+ scope=scope,
404
+ duration=time.time() - start_time,
405
+ input_tokens=input_tokens,
406
+ output_tokens=output_tokens,
407
+ success=True,
408
+ )
409
+
410
+ return LLMToolCallResult(
411
+ content=content,
412
+ tool_calls=tool_calls,
413
+ finish_reason=finish_reason,
414
+ input_tokens=input_tokens,
415
+ output_tokens=output_tokens,
416
+ )
417
+
418
+ except (APIConnectionError, APIStatusError) as e:
419
+ if isinstance(e, APIStatusError) and e.status_code in (401, 403):
420
+ raise
421
+ last_exception = e
422
+ if attempt < max_retries:
423
+ await asyncio.sleep(min(initial_backoff * (2**attempt), max_backoff))
424
+ continue
425
+ raise
426
+
427
+ if last_exception:
428
+ raise last_exception
429
+ raise RuntimeError("Anthropic tool call failed")
430
+
431
+ async def cleanup(self) -> None:
432
+ """Clean up resources (close Anthropic client connections)."""
433
+ if hasattr(self, "_client") and self._client:
434
+ await self._client.close()