hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
  3. hindsight_api/api/http.py +83 -1
  4. hindsight_api/banner.py +3 -0
  5. hindsight_api/config.py +44 -6
  6. hindsight_api/daemon.py +18 -112
  7. hindsight_api/engine/llm_interface.py +146 -0
  8. hindsight_api/engine/llm_wrapper.py +304 -1327
  9. hindsight_api/engine/memory_engine.py +125 -41
  10. hindsight_api/engine/providers/__init__.py +14 -0
  11. hindsight_api/engine/providers/anthropic_llm.py +434 -0
  12. hindsight_api/engine/providers/claude_code_llm.py +352 -0
  13. hindsight_api/engine/providers/codex_llm.py +527 -0
  14. hindsight_api/engine/providers/gemini_llm.py +502 -0
  15. hindsight_api/engine/providers/mock_llm.py +234 -0
  16. hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
  17. hindsight_api/engine/retain/fact_extraction.py +13 -9
  18. hindsight_api/engine/retain/fact_storage.py +5 -3
  19. hindsight_api/extensions/__init__.py +10 -0
  20. hindsight_api/extensions/builtin/tenant.py +36 -0
  21. hindsight_api/extensions/operation_validator.py +129 -0
  22. hindsight_api/main.py +6 -21
  23. hindsight_api/migrations.py +75 -0
  24. hindsight_api/worker/main.py +41 -11
  25. hindsight_api/worker/poller.py +26 -14
  26. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
  27. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
  28. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
  29. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,352 @@
1
+ """
2
+ Claude Code LLM provider using Claude Agent SDK.
3
+
4
+ This provider enables using Claude Pro/Max subscriptions for API calls
5
+ via the Claude CLI authentication. It uses the Claude Agent SDK which
6
+ automatically handles authentication via `claude auth login` credentials.
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import time
13
+ from typing import Any
14
+
15
+ from hindsight_api.engine.llm_interface import LLMInterface, OutputTooLongError
16
+ from hindsight_api.engine.response_models import LLMToolCall, LLMToolCallResult, TokenUsage
17
+ from hindsight_api.metrics import get_metrics_collector
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class ClaudeCodeLLM(LLMInterface):
23
+ """
24
+ LLM provider using Claude Code authentication.
25
+
26
+ Authenticates using Claude Pro/Max credentials via `claude auth login`
27
+ and makes API calls through the Claude Agent SDK.
28
+ """
29
+
30
+ def __init__(
31
+ self,
32
+ provider: str,
33
+ api_key: str, # Will be ignored, uses CLI auth
34
+ base_url: str,
35
+ model: str,
36
+ reasoning_effort: str = "low",
37
+ **kwargs: Any,
38
+ ):
39
+ """Initialize Claude Code LLM provider."""
40
+ super().__init__(provider, api_key, base_url, model, reasoning_effort, **kwargs)
41
+
42
+ # Verify Claude Agent SDK is available
43
+ try:
44
+ self._verify_claude_code_available()
45
+ logger.info("Claude Code: Using Claude Agent SDK (authentication via claude auth login)")
46
+ except Exception as e:
47
+ raise RuntimeError(
48
+ f"Failed to initialize Claude Code provider: {e}\n\n"
49
+ "To set up Claude Code authentication:\n"
50
+ "1. Install Claude Code CLI: npm install -g @anthropics/claude-code\n"
51
+ "2. Login with your Pro/Max plan: claude auth login\n"
52
+ "3. Verify authentication: claude --version\n\n"
53
+ "Or use a different provider (anthropic, openai, gemini) with API keys."
54
+ ) from e
55
+
56
+ # Metrics collector is imported at module level
57
+
58
+ def _verify_claude_code_available(self) -> None:
59
+ """
60
+ Verify that Claude Agent SDK can be imported and is properly configured.
61
+
62
+ Raises:
63
+ ImportError: If Claude Agent SDK is not installed.
64
+ RuntimeError: If Claude Code is not authenticated.
65
+ """
66
+ try:
67
+ # Import Claude Agent SDK
68
+ # Reduce Claude Agent SDK logging verbosity
69
+ import logging as sdk_logging
70
+
71
+ from claude_agent_sdk import query # noqa: F401
72
+
73
+ sdk_logging.getLogger("claude_agent_sdk").setLevel(sdk_logging.WARNING)
74
+ sdk_logging.getLogger("claude_agent_sdk._internal").setLevel(sdk_logging.WARNING)
75
+
76
+ logger.debug("Claude Agent SDK imported successfully")
77
+ except ImportError as e:
78
+ raise ImportError(
79
+ "Claude Agent SDK not installed. Run: uv add claude-agent-sdk or pip install claude-agent-sdk"
80
+ ) from e
81
+
82
+ # SDK will automatically check for authentication when first used
83
+ # No need to verify here - let it fail gracefully on first call with helpful error
84
+
85
+ async def verify_connection(self) -> None:
86
+ """
87
+ Verify that the Claude Code provider is configured correctly by making a simple test call.
88
+
89
+ Raises:
90
+ RuntimeError: If the connection test fails.
91
+ """
92
+ try:
93
+ test_messages = [{"role": "user", "content": "test"}]
94
+ await self.call(
95
+ messages=test_messages,
96
+ max_completion_tokens=10,
97
+ temperature=0.0,
98
+ scope="test",
99
+ max_retries=0,
100
+ )
101
+ logger.info("Claude Code connection verified successfully")
102
+ except Exception as e:
103
+ logger.error(f"Claude Code connection verification failed: {e}")
104
+ raise RuntimeError(f"Failed to verify Claude Code connection: {e}") from e
105
+
106
+ async def call(
107
+ self,
108
+ messages: list[dict[str, str]],
109
+ response_format: Any | None = None,
110
+ max_completion_tokens: int | None = None,
111
+ temperature: float | None = None,
112
+ scope: str = "memory",
113
+ max_retries: int = 10,
114
+ initial_backoff: float = 1.0,
115
+ max_backoff: float = 60.0,
116
+ skip_validation: bool = False,
117
+ strict_schema: bool = False,
118
+ return_usage: bool = False,
119
+ ) -> Any:
120
+ """
121
+ Make an LLM API call with retry logic.
122
+
123
+ Args:
124
+ messages: List of message dicts with 'role' and 'content'.
125
+ response_format: Optional Pydantic model for structured output.
126
+ max_completion_tokens: Maximum tokens in response (ignored by Claude Agent SDK).
127
+ temperature: Sampling temperature (ignored by Claude Agent SDK).
128
+ scope: Scope identifier for tracking.
129
+ max_retries: Maximum retry attempts.
130
+ initial_backoff: Initial backoff time in seconds.
131
+ max_backoff: Maximum backoff time in seconds.
132
+ skip_validation: Return raw JSON without Pydantic validation.
133
+ strict_schema: Use strict JSON schema enforcement (not supported).
134
+ return_usage: If True, return tuple (result, TokenUsage) instead of just result.
135
+
136
+ Returns:
137
+ If return_usage=False: Parsed response if response_format is provided, otherwise text content.
138
+ If return_usage=True: Tuple of (result, TokenUsage) with estimated token counts.
139
+
140
+ Raises:
141
+ OutputTooLongError: If output exceeds token limits (not supported by Claude Agent SDK).
142
+ Exception: Re-raises API errors after retries exhausted.
143
+ """
144
+ from claude_agent_sdk import AssistantMessage, ClaudeAgentOptions, TextBlock, query
145
+
146
+ start_time = time.time()
147
+
148
+ # Build system prompt
149
+ system_prompt = ""
150
+ user_content = ""
151
+
152
+ for msg in messages:
153
+ role = msg.get("role", "user")
154
+ content = msg.get("content", "")
155
+
156
+ if role == "system":
157
+ system_prompt += ("\n\n" + content) if system_prompt else content
158
+ elif role == "user":
159
+ user_content += ("\n\n" + content) if user_content else content
160
+ elif role == "assistant":
161
+ # Claude Agent SDK doesn't support multi-turn easily in query()
162
+ # For now, prepend assistant messages to user content
163
+ user_content += f"\n\n[Previous assistant response: {content}]"
164
+
165
+ # Add JSON schema instruction if response_format is provided
166
+ if response_format is not None and hasattr(response_format, "model_json_schema"):
167
+ schema = response_format.model_json_schema()
168
+ schema_instruction = (
169
+ f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}\n\n"
170
+ "Respond with ONLY the JSON, no markdown formatting."
171
+ )
172
+ user_content += schema_instruction
173
+
174
+ # Configure SDK options
175
+ options = ClaudeAgentOptions(
176
+ system_prompt=system_prompt if system_prompt else None,
177
+ max_turns=1, # Single-turn for API-style interactions
178
+ allowed_tools=[], # Disable tools for standard LLM calls
179
+ )
180
+
181
+ # Call Claude Agent SDK
182
+ last_exception = None
183
+ for attempt in range(max_retries + 1):
184
+ try:
185
+ # Collect streaming response
186
+ full_text = ""
187
+
188
+ async for message in query(prompt=user_content, options=options):
189
+ if isinstance(message, AssistantMessage):
190
+ for block in message.content:
191
+ if isinstance(block, TextBlock):
192
+ full_text += block.text
193
+
194
+ # Handle structured output
195
+ if response_format is not None:
196
+ # Models may wrap JSON in markdown
197
+ clean_text = full_text
198
+ if "```json" in full_text:
199
+ clean_text = full_text.split("```json")[1].split("```")[0].strip()
200
+ elif "```" in full_text:
201
+ clean_text = full_text.split("```")[1].split("```")[0].strip()
202
+
203
+ try:
204
+ json_data = json.loads(clean_text)
205
+ except json.JSONDecodeError as e:
206
+ logger.warning(f"Claude Code JSON parse error (attempt {attempt + 1}/{max_retries + 1}): {e}")
207
+ if attempt < max_retries:
208
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
209
+ await asyncio.sleep(backoff)
210
+ last_exception = e
211
+ continue
212
+ raise
213
+
214
+ if skip_validation:
215
+ result = json_data
216
+ else:
217
+ result = response_format.model_validate(json_data)
218
+ else:
219
+ result = full_text
220
+
221
+ # Record metrics
222
+ duration = time.time() - start_time
223
+ metrics = get_metrics_collector()
224
+
225
+ # Estimate token usage (Claude Agent SDK doesn't report exact counts)
226
+ # Use character count / 4 as rough estimate (1 token ≈ 4 characters)
227
+ estimated_input = sum(len(m.get("content", "")) for m in messages) // 4
228
+ estimated_output = len(full_text) // 4
229
+
230
+ metrics.record_llm_call(
231
+ provider=self.provider,
232
+ model=self.model,
233
+ scope=scope,
234
+ duration=duration,
235
+ input_tokens=estimated_input,
236
+ output_tokens=estimated_output,
237
+ success=True,
238
+ )
239
+
240
+ # Log slow calls
241
+ if duration > 10.0:
242
+ logger.info(
243
+ f"slow llm call: scope={scope}, model={self.provider}/{self.model}, time={duration:.3f}s"
244
+ )
245
+
246
+ if return_usage:
247
+ token_usage = TokenUsage(
248
+ input_tokens=estimated_input,
249
+ output_tokens=estimated_output,
250
+ total_tokens=estimated_input + estimated_output,
251
+ )
252
+ return result, token_usage
253
+
254
+ return result
255
+
256
+ except Exception as e:
257
+ last_exception = e
258
+
259
+ # Check for authentication errors
260
+ error_str = str(e).lower()
261
+ if "auth" in error_str or "login" in error_str or "credential" in error_str:
262
+ logger.error(f"Claude Code authentication error: {e}")
263
+ raise RuntimeError(
264
+ f"Claude Code authentication failed: {e}\n\n"
265
+ "Run 'claude auth login' to authenticate with Claude Pro/Max."
266
+ ) from e
267
+
268
+ if attempt < max_retries:
269
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
270
+ logger.warning(f"Claude Code error (attempt {attempt + 1}/{max_retries + 1}): {e}")
271
+ await asyncio.sleep(backoff)
272
+ continue
273
+ else:
274
+ logger.error(f"Claude Code error after {max_retries + 1} attempts: {e}")
275
+ raise
276
+
277
+ if last_exception:
278
+ raise last_exception
279
+ raise RuntimeError("Claude Code call failed after all retries")
280
+
281
+ async def call_with_tools(
282
+ self,
283
+ messages: list[dict[str, Any]],
284
+ tools: list[dict[str, Any]],
285
+ max_completion_tokens: int | None = None,
286
+ temperature: float | None = None,
287
+ scope: str = "tools",
288
+ max_retries: int = 5,
289
+ initial_backoff: float = 1.0,
290
+ max_backoff: float = 30.0,
291
+ tool_choice: str | dict[str, Any] = "auto",
292
+ ) -> LLMToolCallResult:
293
+ """
294
+ Make an LLM API call with tool/function calling support.
295
+
296
+ Note: This is a simplified implementation. Full tool support would require
297
+ integrating with Claude Agent SDK's tool system.
298
+
299
+ Args:
300
+ messages: List of message dicts. Can include tool results with role='tool'.
301
+ tools: List of tool definitions in OpenAI format.
302
+ max_completion_tokens: Maximum tokens in response.
303
+ temperature: Sampling temperature.
304
+ scope: Scope identifier for tracking.
305
+ max_retries: Maximum retry attempts.
306
+ initial_backoff: Initial backoff time in seconds.
307
+ max_backoff: Maximum backoff time in seconds.
308
+ tool_choice: How to choose tools - "auto", "none", "required", or specific function.
309
+
310
+ Returns:
311
+ LLMToolCallResult with content and/or tool_calls.
312
+ """
313
+ # For now, use regular call without tools
314
+ # Full implementation would require mapping OpenAI tool format to Claude Agent SDK tools
315
+ logger.warning(
316
+ "Claude Code provider does not fully support tool calling yet. Falling back to regular text completion."
317
+ )
318
+
319
+ result = await self.call(
320
+ messages=messages,
321
+ response_format=None,
322
+ max_completion_tokens=max_completion_tokens,
323
+ temperature=temperature,
324
+ scope=scope,
325
+ max_retries=max_retries,
326
+ initial_backoff=initial_backoff,
327
+ max_backoff=max_backoff,
328
+ return_usage=True,
329
+ )
330
+
331
+ if isinstance(result, tuple):
332
+ text, usage = result
333
+ return LLMToolCallResult(
334
+ content=text,
335
+ tool_calls=[],
336
+ finish_reason="stop",
337
+ input_tokens=usage.input_tokens,
338
+ output_tokens=usage.output_tokens,
339
+ )
340
+ else:
341
+ # Fallback if return_usage didn't work as expected
342
+ return LLMToolCallResult(
343
+ content=str(result),
344
+ tool_calls=[],
345
+ finish_reason="stop",
346
+ input_tokens=0,
347
+ output_tokens=0,
348
+ )
349
+
350
+ async def cleanup(self) -> None:
351
+ """Clean up resources (no HTTP client to close for Claude Agent SDK)."""
352
+ pass