hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
  3. hindsight_api/api/http.py +83 -1
  4. hindsight_api/banner.py +3 -0
  5. hindsight_api/config.py +44 -6
  6. hindsight_api/daemon.py +18 -112
  7. hindsight_api/engine/llm_interface.py +146 -0
  8. hindsight_api/engine/llm_wrapper.py +304 -1327
  9. hindsight_api/engine/memory_engine.py +125 -41
  10. hindsight_api/engine/providers/__init__.py +14 -0
  11. hindsight_api/engine/providers/anthropic_llm.py +434 -0
  12. hindsight_api/engine/providers/claude_code_llm.py +352 -0
  13. hindsight_api/engine/providers/codex_llm.py +527 -0
  14. hindsight_api/engine/providers/gemini_llm.py +502 -0
  15. hindsight_api/engine/providers/mock_llm.py +234 -0
  16. hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
  17. hindsight_api/engine/retain/fact_extraction.py +13 -9
  18. hindsight_api/engine/retain/fact_storage.py +5 -3
  19. hindsight_api/extensions/__init__.py +10 -0
  20. hindsight_api/extensions/builtin/tenant.py +36 -0
  21. hindsight_api/extensions/operation_validator.py +129 -0
  22. hindsight_api/main.py +6 -21
  23. hindsight_api/migrations.py +75 -0
  24. hindsight_api/worker/main.py +41 -11
  25. hindsight_api/worker/poller.py +26 -14
  26. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
  27. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
  28. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
  29. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,434 @@
1
+ """
2
+ Anthropic LLM provider using the Anthropic Python SDK.
3
+
4
+ This provider enables using Claude models from Anthropic with support for:
5
+ - Structured JSON output
6
+ - Tool/function calling with proper format conversion
7
+ - Extended thinking mode
8
+ - Retry logic with exponential backoff
9
+ """
10
+
11
+ import asyncio
12
+ import json
13
+ import logging
14
+ import time
15
+ from typing import Any
16
+
17
+ from hindsight_api.engine.llm_interface import LLMInterface, OutputTooLongError
18
+ from hindsight_api.engine.response_models import LLMToolCall, LLMToolCallResult, TokenUsage
19
+ from hindsight_api.metrics import get_metrics_collector
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class AnthropicLLM(LLMInterface):
25
+ """
26
+ LLM provider using Anthropic's Claude models.
27
+
28
+ Supports structured output, tool calling, and extended thinking mode.
29
+ Handles format conversion between OpenAI-style messages and Anthropic's format.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ provider: str,
35
+ api_key: str,
36
+ base_url: str,
37
+ model: str,
38
+ reasoning_effort: str = "low",
39
+ timeout: float = 300.0,
40
+ **kwargs: Any,
41
+ ):
42
+ """
43
+ Initialize Anthropic LLM provider.
44
+
45
+ Args:
46
+ provider: Provider name (should be "anthropic").
47
+ api_key: Anthropic API key.
48
+ base_url: Base URL for the API (optional, uses Anthropic default if empty).
49
+ model: Model name (e.g., "claude-sonnet-4-20250514").
50
+ reasoning_effort: Reasoning effort level (not used by Anthropic).
51
+ timeout: Request timeout in seconds.
52
+ **kwargs: Additional provider-specific parameters.
53
+ """
54
+ super().__init__(provider, api_key, base_url, model, reasoning_effort, **kwargs)
55
+
56
+ if not self.api_key:
57
+ raise ValueError("API key is required for Anthropic provider")
58
+
59
+ # Import and initialize Anthropic client
60
+ try:
61
+ from anthropic import AsyncAnthropic
62
+
63
+ client_kwargs: dict[str, Any] = {"api_key": self.api_key}
64
+ if self.base_url:
65
+ client_kwargs["base_url"] = self.base_url
66
+ if timeout:
67
+ client_kwargs["timeout"] = timeout
68
+
69
+ self._client = AsyncAnthropic(**client_kwargs)
70
+ logger.info(f"Anthropic client initialized for model: {self.model}")
71
+ except ImportError as e:
72
+ raise RuntimeError("Anthropic SDK not installed. Run: uv add anthropic or pip install anthropic") from e
73
+
74
+ async def verify_connection(self) -> None:
75
+ """
76
+ Verify that the Anthropic provider is configured correctly by making a simple test call.
77
+
78
+ Raises:
79
+ RuntimeError: If the connection test fails.
80
+ """
81
+ try:
82
+ test_messages = [{"role": "user", "content": "test"}]
83
+ await self.call(
84
+ messages=test_messages,
85
+ max_completion_tokens=10,
86
+ temperature=0.0,
87
+ scope="test",
88
+ max_retries=0,
89
+ )
90
+ logger.info("Anthropic connection verified successfully")
91
+ except Exception as e:
92
+ logger.error(f"Anthropic connection verification failed: {e}")
93
+ raise RuntimeError(f"Failed to verify Anthropic connection: {e}") from e
94
+
95
+ async def call(
96
+ self,
97
+ messages: list[dict[str, str]],
98
+ response_format: Any | None = None,
99
+ max_completion_tokens: int | None = None,
100
+ temperature: float | None = None,
101
+ scope: str = "memory",
102
+ max_retries: int = 10,
103
+ initial_backoff: float = 1.0,
104
+ max_backoff: float = 60.0,
105
+ skip_validation: bool = False,
106
+ strict_schema: bool = False,
107
+ return_usage: bool = False,
108
+ ) -> Any:
109
+ """
110
+ Make an LLM API call with retry logic.
111
+
112
+ Args:
113
+ messages: List of message dicts with 'role' and 'content'.
114
+ response_format: Optional Pydantic model for structured output.
115
+ max_completion_tokens: Maximum tokens in response.
116
+ temperature: Sampling temperature (0.0-2.0).
117
+ scope: Scope identifier for tracking.
118
+ max_retries: Maximum retry attempts.
119
+ initial_backoff: Initial backoff time in seconds.
120
+ max_backoff: Maximum backoff time in seconds.
121
+ skip_validation: Return raw JSON without Pydantic validation.
122
+ strict_schema: Use strict JSON schema enforcement (not supported by Anthropic).
123
+ return_usage: If True, return tuple (result, TokenUsage) instead of just result.
124
+
125
+ Returns:
126
+ If return_usage=False: Parsed response if response_format is provided, otherwise text content.
127
+ If return_usage=True: Tuple of (result, TokenUsage) with token counts.
128
+
129
+ Raises:
130
+ OutputTooLongError: If output exceeds token limits.
131
+ Exception: Re-raises API errors after retries exhausted.
132
+ """
133
+ from anthropic import APIConnectionError, APIStatusError, RateLimitError
134
+
135
+ start_time = time.time()
136
+
137
+ # Convert OpenAI-style messages to Anthropic format
138
+ system_prompt = None
139
+ anthropic_messages = []
140
+
141
+ for msg in messages:
142
+ role = msg.get("role", "user")
143
+ content = msg.get("content", "")
144
+
145
+ if role == "system":
146
+ if system_prompt:
147
+ system_prompt += "\n\n" + content
148
+ else:
149
+ system_prompt = content
150
+ else:
151
+ anthropic_messages.append({"role": role, "content": content})
152
+
153
+ # Add JSON schema instruction if response_format is provided
154
+ if response_format is not None and hasattr(response_format, "model_json_schema"):
155
+ schema = response_format.model_json_schema()
156
+ schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
157
+ if system_prompt:
158
+ system_prompt += schema_msg
159
+ else:
160
+ system_prompt = schema_msg
161
+
162
+ # Prepare parameters
163
+ call_params: dict[str, Any] = {
164
+ "model": self.model,
165
+ "messages": anthropic_messages,
166
+ "max_tokens": max_completion_tokens if max_completion_tokens is not None else 4096,
167
+ }
168
+
169
+ if system_prompt:
170
+ call_params["system"] = system_prompt
171
+
172
+ if temperature is not None:
173
+ call_params["temperature"] = temperature
174
+
175
+ last_exception = None
176
+
177
+ for attempt in range(max_retries + 1):
178
+ try:
179
+ response = await self._client.messages.create(**call_params)
180
+
181
+ # Anthropic response content is a list of blocks
182
+ content = ""
183
+ for block in response.content:
184
+ if block.type == "text":
185
+ content += block.text
186
+
187
+ if response_format is not None:
188
+ # Models may wrap JSON in markdown code blocks
189
+ clean_content = content
190
+ if "```json" in content:
191
+ clean_content = content.split("```json")[1].split("```")[0].strip()
192
+ elif "```" in content:
193
+ clean_content = content.split("```")[1].split("```")[0].strip()
194
+
195
+ try:
196
+ json_data = json.loads(clean_content)
197
+ except json.JSONDecodeError:
198
+ # Fallback to parsing raw content if markdown stripping failed
199
+ json_data = json.loads(content)
200
+
201
+ if skip_validation:
202
+ result = json_data
203
+ else:
204
+ result = response_format.model_validate(json_data)
205
+ else:
206
+ result = content
207
+
208
+ # Record metrics and log slow calls
209
+ duration = time.time() - start_time
210
+ input_tokens = response.usage.input_tokens or 0 if response.usage else 0
211
+ output_tokens = response.usage.output_tokens or 0 if response.usage else 0
212
+ total_tokens = input_tokens + output_tokens
213
+
214
+ # Record LLM metrics
215
+ metrics = get_metrics_collector()
216
+ metrics.record_llm_call(
217
+ provider=self.provider,
218
+ model=self.model,
219
+ scope=scope,
220
+ duration=duration,
221
+ input_tokens=input_tokens,
222
+ output_tokens=output_tokens,
223
+ success=True,
224
+ )
225
+
226
+ # Log slow calls
227
+ if duration > 10.0:
228
+ logger.info(
229
+ f"slow llm call: scope={scope}, model={self.provider}/{self.model}, "
230
+ f"input_tokens={input_tokens}, output_tokens={output_tokens}, "
231
+ f"time={duration:.3f}s"
232
+ )
233
+
234
+ if return_usage:
235
+ token_usage = TokenUsage(
236
+ input_tokens=input_tokens,
237
+ output_tokens=output_tokens,
238
+ total_tokens=total_tokens,
239
+ )
240
+ return result, token_usage
241
+ return result
242
+
243
+ except json.JSONDecodeError as e:
244
+ last_exception = e
245
+ if attempt < max_retries:
246
+ logger.warning("Anthropic returned invalid JSON, retrying...")
247
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
248
+ await asyncio.sleep(backoff)
249
+ continue
250
+ else:
251
+ logger.error(f"Anthropic returned invalid JSON after {max_retries + 1} attempts")
252
+ raise
253
+
254
+ except (APIConnectionError, RateLimitError, APIStatusError) as e:
255
+ # Fast fail on 401/403
256
+ if isinstance(e, APIStatusError) and e.status_code in (401, 403):
257
+ logger.error(f"Anthropic auth error (HTTP {e.status_code}), not retrying: {str(e)}")
258
+ raise
259
+
260
+ last_exception = e
261
+ if attempt < max_retries:
262
+ # Check if it's a rate limit or server error
263
+ should_retry = isinstance(e, (APIConnectionError, RateLimitError)) or (
264
+ isinstance(e, APIStatusError) and e.status_code >= 500
265
+ )
266
+
267
+ if should_retry:
268
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
269
+ jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
270
+ await asyncio.sleep(backoff + jitter)
271
+ continue
272
+
273
+ logger.error(f"Anthropic API error after {max_retries + 1} attempts: {str(e)}")
274
+ raise
275
+
276
+ except Exception as e:
277
+ logger.error(f"Unexpected error during Anthropic call: {type(e).__name__}: {str(e)}")
278
+ raise
279
+
280
+ if last_exception:
281
+ raise last_exception
282
+ raise RuntimeError("Anthropic call failed after all retries")
283
+
284
+ async def call_with_tools(
285
+ self,
286
+ messages: list[dict[str, Any]],
287
+ tools: list[dict[str, Any]],
288
+ max_completion_tokens: int | None = None,
289
+ temperature: float | None = None,
290
+ scope: str = "tools",
291
+ max_retries: int = 5,
292
+ initial_backoff: float = 1.0,
293
+ max_backoff: float = 30.0,
294
+ tool_choice: str | dict[str, Any] = "auto",
295
+ ) -> LLMToolCallResult:
296
+ """
297
+ Make an LLM API call with tool/function calling support.
298
+
299
+ Args:
300
+ messages: List of message dicts. Can include tool results with role='tool'.
301
+ tools: List of tool definitions in OpenAI format.
302
+ max_completion_tokens: Maximum tokens in response.
303
+ temperature: Sampling temperature (0.0-2.0).
304
+ scope: Scope identifier for tracking.
305
+ max_retries: Maximum retry attempts.
306
+ initial_backoff: Initial backoff time in seconds.
307
+ max_backoff: Maximum backoff time in seconds.
308
+ tool_choice: How to choose tools - "auto", "none", "required", or specific function.
309
+
310
+ Returns:
311
+ LLMToolCallResult with content and/or tool_calls.
312
+ """
313
+ from anthropic import APIConnectionError, APIStatusError
314
+
315
+ start_time = time.time()
316
+
317
+ # Convert OpenAI tool format to Anthropic format
318
+ anthropic_tools = []
319
+ for tool in tools:
320
+ func = tool.get("function", {})
321
+ anthropic_tools.append(
322
+ {
323
+ "name": func.get("name", ""),
324
+ "description": func.get("description", ""),
325
+ "input_schema": func.get("parameters", {"type": "object", "properties": {}}),
326
+ }
327
+ )
328
+
329
+ # Convert messages - handle tool results
330
+ system_prompt = None
331
+ anthropic_messages = []
332
+ for msg in messages:
333
+ role = msg.get("role", "user")
334
+ content = msg.get("content", "")
335
+
336
+ if role == "system":
337
+ system_prompt = (system_prompt + "\n\n" + content) if system_prompt else content
338
+ elif role == "tool":
339
+ # Anthropic uses tool_result blocks
340
+ anthropic_messages.append(
341
+ {
342
+ "role": "user",
343
+ "content": [
344
+ {"type": "tool_result", "tool_use_id": msg.get("tool_call_id", ""), "content": content}
345
+ ],
346
+ }
347
+ )
348
+ elif role == "assistant" and msg.get("tool_calls"):
349
+ # Convert assistant tool calls
350
+ tool_use_blocks = []
351
+ for tc in msg["tool_calls"]:
352
+ tool_use_blocks.append(
353
+ {
354
+ "type": "tool_use",
355
+ "id": tc.get("id", ""),
356
+ "name": tc.get("function", {}).get("name", ""),
357
+ "input": json.loads(tc.get("function", {}).get("arguments", "{}")),
358
+ }
359
+ )
360
+ anthropic_messages.append({"role": "assistant", "content": tool_use_blocks})
361
+ else:
362
+ anthropic_messages.append({"role": role, "content": content})
363
+
364
+ call_params: dict[str, Any] = {
365
+ "model": self.model,
366
+ "messages": anthropic_messages,
367
+ "tools": anthropic_tools,
368
+ "max_tokens": max_completion_tokens or 4096,
369
+ }
370
+ if system_prompt:
371
+ call_params["system"] = system_prompt
372
+
373
+ if temperature is not None:
374
+ call_params["temperature"] = temperature
375
+
376
+ last_exception = None
377
+ for attempt in range(max_retries + 1):
378
+ try:
379
+ response = await self._client.messages.create(**call_params)
380
+
381
+ # Extract content and tool calls
382
+ content_parts = []
383
+ tool_calls: list[LLMToolCall] = []
384
+
385
+ for block in response.content:
386
+ if block.type == "text":
387
+ content_parts.append(block.text)
388
+ elif block.type == "tool_use":
389
+ tool_calls.append(LLMToolCall(id=block.id, name=block.name, arguments=block.input or {}))
390
+
391
+ content = "".join(content_parts) if content_parts else None
392
+ finish_reason = "tool_calls" if tool_calls else "stop"
393
+
394
+ # Extract token usage
395
+ input_tokens = response.usage.input_tokens or 0
396
+ output_tokens = response.usage.output_tokens or 0
397
+
398
+ # Record metrics
399
+ metrics = get_metrics_collector()
400
+ metrics.record_llm_call(
401
+ provider=self.provider,
402
+ model=self.model,
403
+ scope=scope,
404
+ duration=time.time() - start_time,
405
+ input_tokens=input_tokens,
406
+ output_tokens=output_tokens,
407
+ success=True,
408
+ )
409
+
410
+ return LLMToolCallResult(
411
+ content=content,
412
+ tool_calls=tool_calls,
413
+ finish_reason=finish_reason,
414
+ input_tokens=input_tokens,
415
+ output_tokens=output_tokens,
416
+ )
417
+
418
+ except (APIConnectionError, APIStatusError) as e:
419
+ if isinstance(e, APIStatusError) and e.status_code in (401, 403):
420
+ raise
421
+ last_exception = e
422
+ if attempt < max_retries:
423
+ await asyncio.sleep(min(initial_backoff * (2**attempt), max_backoff))
424
+ continue
425
+ raise
426
+
427
+ if last_exception:
428
+ raise last_exception
429
+ raise RuntimeError("Anthropic tool call failed")
430
+
431
+ async def cleanup(self) -> None:
432
+ """Clean up resources (close Anthropic client connections)."""
433
+ if hasattr(self, "_client") and self._client:
434
+ await self._client.close()