hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
- hindsight_api/api/http.py +83 -1
- hindsight_api/banner.py +3 -0
- hindsight_api/config.py +44 -6
- hindsight_api/daemon.py +18 -112
- hindsight_api/engine/llm_interface.py +146 -0
- hindsight_api/engine/llm_wrapper.py +304 -1327
- hindsight_api/engine/memory_engine.py +125 -41
- hindsight_api/engine/providers/__init__.py +14 -0
- hindsight_api/engine/providers/anthropic_llm.py +434 -0
- hindsight_api/engine/providers/claude_code_llm.py +352 -0
- hindsight_api/engine/providers/codex_llm.py +527 -0
- hindsight_api/engine/providers/gemini_llm.py +502 -0
- hindsight_api/engine/providers/mock_llm.py +234 -0
- hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
- hindsight_api/engine/retain/fact_extraction.py +13 -9
- hindsight_api/engine/retain/fact_storage.py +5 -3
- hindsight_api/extensions/__init__.py +10 -0
- hindsight_api/extensions/builtin/tenant.py +36 -0
- hindsight_api/extensions/operation_validator.py +129 -0
- hindsight_api/main.py +6 -21
- hindsight_api/migrations.py +75 -0
- hindsight_api/worker/main.py +41 -11
- hindsight_api/worker/poller.py +26 -14
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Claude Code LLM provider using Claude Agent SDK.
|
|
3
|
+
|
|
4
|
+
This provider enables using Claude Pro/Max subscriptions for API calls
|
|
5
|
+
via the Claude CLI authentication. It uses the Claude Agent SDK which
|
|
6
|
+
automatically handles authentication via `claude auth login` credentials.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import time
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from hindsight_api.engine.llm_interface import LLMInterface, OutputTooLongError
|
|
16
|
+
from hindsight_api.engine.response_models import LLMToolCall, LLMToolCallResult, TokenUsage
|
|
17
|
+
from hindsight_api.metrics import get_metrics_collector
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ClaudeCodeLLM(LLMInterface):
|
|
23
|
+
"""
|
|
24
|
+
LLM provider using Claude Code authentication.
|
|
25
|
+
|
|
26
|
+
Authenticates using Claude Pro/Max credentials via `claude auth login`
|
|
27
|
+
and makes API calls through the Claude Agent SDK.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
provider: str,
|
|
33
|
+
api_key: str, # Will be ignored, uses CLI auth
|
|
34
|
+
base_url: str,
|
|
35
|
+
model: str,
|
|
36
|
+
reasoning_effort: str = "low",
|
|
37
|
+
**kwargs: Any,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize Claude Code LLM provider."""
|
|
40
|
+
super().__init__(provider, api_key, base_url, model, reasoning_effort, **kwargs)
|
|
41
|
+
|
|
42
|
+
# Verify Claude Agent SDK is available
|
|
43
|
+
try:
|
|
44
|
+
self._verify_claude_code_available()
|
|
45
|
+
logger.info("Claude Code: Using Claude Agent SDK (authentication via claude auth login)")
|
|
46
|
+
except Exception as e:
|
|
47
|
+
raise RuntimeError(
|
|
48
|
+
f"Failed to initialize Claude Code provider: {e}\n\n"
|
|
49
|
+
"To set up Claude Code authentication:\n"
|
|
50
|
+
"1. Install Claude Code CLI: npm install -g @anthropics/claude-code\n"
|
|
51
|
+
"2. Login with your Pro/Max plan: claude auth login\n"
|
|
52
|
+
"3. Verify authentication: claude --version\n\n"
|
|
53
|
+
"Or use a different provider (anthropic, openai, gemini) with API keys."
|
|
54
|
+
) from e
|
|
55
|
+
|
|
56
|
+
# Metrics collector is imported at module level
|
|
57
|
+
|
|
58
|
+
def _verify_claude_code_available(self) -> None:
|
|
59
|
+
"""
|
|
60
|
+
Verify that Claude Agent SDK can be imported and is properly configured.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ImportError: If Claude Agent SDK is not installed.
|
|
64
|
+
RuntimeError: If Claude Code is not authenticated.
|
|
65
|
+
"""
|
|
66
|
+
try:
|
|
67
|
+
# Import Claude Agent SDK
|
|
68
|
+
# Reduce Claude Agent SDK logging verbosity
|
|
69
|
+
import logging as sdk_logging
|
|
70
|
+
|
|
71
|
+
from claude_agent_sdk import query # noqa: F401
|
|
72
|
+
|
|
73
|
+
sdk_logging.getLogger("claude_agent_sdk").setLevel(sdk_logging.WARNING)
|
|
74
|
+
sdk_logging.getLogger("claude_agent_sdk._internal").setLevel(sdk_logging.WARNING)
|
|
75
|
+
|
|
76
|
+
logger.debug("Claude Agent SDK imported successfully")
|
|
77
|
+
except ImportError as e:
|
|
78
|
+
raise ImportError(
|
|
79
|
+
"Claude Agent SDK not installed. Run: uv add claude-agent-sdk or pip install claude-agent-sdk"
|
|
80
|
+
) from e
|
|
81
|
+
|
|
82
|
+
# SDK will automatically check for authentication when first used
|
|
83
|
+
# No need to verify here - let it fail gracefully on first call with helpful error
|
|
84
|
+
|
|
85
|
+
async def verify_connection(self) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Verify that the Claude Code provider is configured correctly by making a simple test call.
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
RuntimeError: If the connection test fails.
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
test_messages = [{"role": "user", "content": "test"}]
|
|
94
|
+
await self.call(
|
|
95
|
+
messages=test_messages,
|
|
96
|
+
max_completion_tokens=10,
|
|
97
|
+
temperature=0.0,
|
|
98
|
+
scope="test",
|
|
99
|
+
max_retries=0,
|
|
100
|
+
)
|
|
101
|
+
logger.info("Claude Code connection verified successfully")
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.error(f"Claude Code connection verification failed: {e}")
|
|
104
|
+
raise RuntimeError(f"Failed to verify Claude Code connection: {e}") from e
|
|
105
|
+
|
|
106
|
+
async def call(
|
|
107
|
+
self,
|
|
108
|
+
messages: list[dict[str, str]],
|
|
109
|
+
response_format: Any | None = None,
|
|
110
|
+
max_completion_tokens: int | None = None,
|
|
111
|
+
temperature: float | None = None,
|
|
112
|
+
scope: str = "memory",
|
|
113
|
+
max_retries: int = 10,
|
|
114
|
+
initial_backoff: float = 1.0,
|
|
115
|
+
max_backoff: float = 60.0,
|
|
116
|
+
skip_validation: bool = False,
|
|
117
|
+
strict_schema: bool = False,
|
|
118
|
+
return_usage: bool = False,
|
|
119
|
+
) -> Any:
|
|
120
|
+
"""
|
|
121
|
+
Make an LLM API call with retry logic.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
messages: List of message dicts with 'role' and 'content'.
|
|
125
|
+
response_format: Optional Pydantic model for structured output.
|
|
126
|
+
max_completion_tokens: Maximum tokens in response (ignored by Claude Agent SDK).
|
|
127
|
+
temperature: Sampling temperature (ignored by Claude Agent SDK).
|
|
128
|
+
scope: Scope identifier for tracking.
|
|
129
|
+
max_retries: Maximum retry attempts.
|
|
130
|
+
initial_backoff: Initial backoff time in seconds.
|
|
131
|
+
max_backoff: Maximum backoff time in seconds.
|
|
132
|
+
skip_validation: Return raw JSON without Pydantic validation.
|
|
133
|
+
strict_schema: Use strict JSON schema enforcement (not supported).
|
|
134
|
+
return_usage: If True, return tuple (result, TokenUsage) instead of just result.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
If return_usage=False: Parsed response if response_format is provided, otherwise text content.
|
|
138
|
+
If return_usage=True: Tuple of (result, TokenUsage) with estimated token counts.
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
OutputTooLongError: If output exceeds token limits (not supported by Claude Agent SDK).
|
|
142
|
+
Exception: Re-raises API errors after retries exhausted.
|
|
143
|
+
"""
|
|
144
|
+
from claude_agent_sdk import AssistantMessage, ClaudeAgentOptions, TextBlock, query
|
|
145
|
+
|
|
146
|
+
start_time = time.time()
|
|
147
|
+
|
|
148
|
+
# Build system prompt
|
|
149
|
+
system_prompt = ""
|
|
150
|
+
user_content = ""
|
|
151
|
+
|
|
152
|
+
for msg in messages:
|
|
153
|
+
role = msg.get("role", "user")
|
|
154
|
+
content = msg.get("content", "")
|
|
155
|
+
|
|
156
|
+
if role == "system":
|
|
157
|
+
system_prompt += ("\n\n" + content) if system_prompt else content
|
|
158
|
+
elif role == "user":
|
|
159
|
+
user_content += ("\n\n" + content) if user_content else content
|
|
160
|
+
elif role == "assistant":
|
|
161
|
+
# Claude Agent SDK doesn't support multi-turn easily in query()
|
|
162
|
+
# For now, prepend assistant messages to user content
|
|
163
|
+
user_content += f"\n\n[Previous assistant response: {content}]"
|
|
164
|
+
|
|
165
|
+
# Add JSON schema instruction if response_format is provided
|
|
166
|
+
if response_format is not None and hasattr(response_format, "model_json_schema"):
|
|
167
|
+
schema = response_format.model_json_schema()
|
|
168
|
+
schema_instruction = (
|
|
169
|
+
f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}\n\n"
|
|
170
|
+
"Respond with ONLY the JSON, no markdown formatting."
|
|
171
|
+
)
|
|
172
|
+
user_content += schema_instruction
|
|
173
|
+
|
|
174
|
+
# Configure SDK options
|
|
175
|
+
options = ClaudeAgentOptions(
|
|
176
|
+
system_prompt=system_prompt if system_prompt else None,
|
|
177
|
+
max_turns=1, # Single-turn for API-style interactions
|
|
178
|
+
allowed_tools=[], # Disable tools for standard LLM calls
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Call Claude Agent SDK
|
|
182
|
+
last_exception = None
|
|
183
|
+
for attempt in range(max_retries + 1):
|
|
184
|
+
try:
|
|
185
|
+
# Collect streaming response
|
|
186
|
+
full_text = ""
|
|
187
|
+
|
|
188
|
+
async for message in query(prompt=user_content, options=options):
|
|
189
|
+
if isinstance(message, AssistantMessage):
|
|
190
|
+
for block in message.content:
|
|
191
|
+
if isinstance(block, TextBlock):
|
|
192
|
+
full_text += block.text
|
|
193
|
+
|
|
194
|
+
# Handle structured output
|
|
195
|
+
if response_format is not None:
|
|
196
|
+
# Models may wrap JSON in markdown
|
|
197
|
+
clean_text = full_text
|
|
198
|
+
if "```json" in full_text:
|
|
199
|
+
clean_text = full_text.split("```json")[1].split("```")[0].strip()
|
|
200
|
+
elif "```" in full_text:
|
|
201
|
+
clean_text = full_text.split("```")[1].split("```")[0].strip()
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
json_data = json.loads(clean_text)
|
|
205
|
+
except json.JSONDecodeError as e:
|
|
206
|
+
logger.warning(f"Claude Code JSON parse error (attempt {attempt + 1}/{max_retries + 1}): {e}")
|
|
207
|
+
if attempt < max_retries:
|
|
208
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
209
|
+
await asyncio.sleep(backoff)
|
|
210
|
+
last_exception = e
|
|
211
|
+
continue
|
|
212
|
+
raise
|
|
213
|
+
|
|
214
|
+
if skip_validation:
|
|
215
|
+
result = json_data
|
|
216
|
+
else:
|
|
217
|
+
result = response_format.model_validate(json_data)
|
|
218
|
+
else:
|
|
219
|
+
result = full_text
|
|
220
|
+
|
|
221
|
+
# Record metrics
|
|
222
|
+
duration = time.time() - start_time
|
|
223
|
+
metrics = get_metrics_collector()
|
|
224
|
+
|
|
225
|
+
# Estimate token usage (Claude Agent SDK doesn't report exact counts)
|
|
226
|
+
# Use character count / 4 as rough estimate (1 token ≈ 4 characters)
|
|
227
|
+
estimated_input = sum(len(m.get("content", "")) for m in messages) // 4
|
|
228
|
+
estimated_output = len(full_text) // 4
|
|
229
|
+
|
|
230
|
+
metrics.record_llm_call(
|
|
231
|
+
provider=self.provider,
|
|
232
|
+
model=self.model,
|
|
233
|
+
scope=scope,
|
|
234
|
+
duration=duration,
|
|
235
|
+
input_tokens=estimated_input,
|
|
236
|
+
output_tokens=estimated_output,
|
|
237
|
+
success=True,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
# Log slow calls
|
|
241
|
+
if duration > 10.0:
|
|
242
|
+
logger.info(
|
|
243
|
+
f"slow llm call: scope={scope}, model={self.provider}/{self.model}, time={duration:.3f}s"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if return_usage:
|
|
247
|
+
token_usage = TokenUsage(
|
|
248
|
+
input_tokens=estimated_input,
|
|
249
|
+
output_tokens=estimated_output,
|
|
250
|
+
total_tokens=estimated_input + estimated_output,
|
|
251
|
+
)
|
|
252
|
+
return result, token_usage
|
|
253
|
+
|
|
254
|
+
return result
|
|
255
|
+
|
|
256
|
+
except Exception as e:
|
|
257
|
+
last_exception = e
|
|
258
|
+
|
|
259
|
+
# Check for authentication errors
|
|
260
|
+
error_str = str(e).lower()
|
|
261
|
+
if "auth" in error_str or "login" in error_str or "credential" in error_str:
|
|
262
|
+
logger.error(f"Claude Code authentication error: {e}")
|
|
263
|
+
raise RuntimeError(
|
|
264
|
+
f"Claude Code authentication failed: {e}\n\n"
|
|
265
|
+
"Run 'claude auth login' to authenticate with Claude Pro/Max."
|
|
266
|
+
) from e
|
|
267
|
+
|
|
268
|
+
if attempt < max_retries:
|
|
269
|
+
backoff = min(initial_backoff * (2**attempt), max_backoff)
|
|
270
|
+
logger.warning(f"Claude Code error (attempt {attempt + 1}/{max_retries + 1}): {e}")
|
|
271
|
+
await asyncio.sleep(backoff)
|
|
272
|
+
continue
|
|
273
|
+
else:
|
|
274
|
+
logger.error(f"Claude Code error after {max_retries + 1} attempts: {e}")
|
|
275
|
+
raise
|
|
276
|
+
|
|
277
|
+
if last_exception:
|
|
278
|
+
raise last_exception
|
|
279
|
+
raise RuntimeError("Claude Code call failed after all retries")
|
|
280
|
+
|
|
281
|
+
async def call_with_tools(
|
|
282
|
+
self,
|
|
283
|
+
messages: list[dict[str, Any]],
|
|
284
|
+
tools: list[dict[str, Any]],
|
|
285
|
+
max_completion_tokens: int | None = None,
|
|
286
|
+
temperature: float | None = None,
|
|
287
|
+
scope: str = "tools",
|
|
288
|
+
max_retries: int = 5,
|
|
289
|
+
initial_backoff: float = 1.0,
|
|
290
|
+
max_backoff: float = 30.0,
|
|
291
|
+
tool_choice: str | dict[str, Any] = "auto",
|
|
292
|
+
) -> LLMToolCallResult:
|
|
293
|
+
"""
|
|
294
|
+
Make an LLM API call with tool/function calling support.
|
|
295
|
+
|
|
296
|
+
Note: This is a simplified implementation. Full tool support would require
|
|
297
|
+
integrating with Claude Agent SDK's tool system.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
messages: List of message dicts. Can include tool results with role='tool'.
|
|
301
|
+
tools: List of tool definitions in OpenAI format.
|
|
302
|
+
max_completion_tokens: Maximum tokens in response.
|
|
303
|
+
temperature: Sampling temperature.
|
|
304
|
+
scope: Scope identifier for tracking.
|
|
305
|
+
max_retries: Maximum retry attempts.
|
|
306
|
+
initial_backoff: Initial backoff time in seconds.
|
|
307
|
+
max_backoff: Maximum backoff time in seconds.
|
|
308
|
+
tool_choice: How to choose tools - "auto", "none", "required", or specific function.
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
LLMToolCallResult with content and/or tool_calls.
|
|
312
|
+
"""
|
|
313
|
+
# For now, use regular call without tools
|
|
314
|
+
# Full implementation would require mapping OpenAI tool format to Claude Agent SDK tools
|
|
315
|
+
logger.warning(
|
|
316
|
+
"Claude Code provider does not fully support tool calling yet. Falling back to regular text completion."
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
result = await self.call(
|
|
320
|
+
messages=messages,
|
|
321
|
+
response_format=None,
|
|
322
|
+
max_completion_tokens=max_completion_tokens,
|
|
323
|
+
temperature=temperature,
|
|
324
|
+
scope=scope,
|
|
325
|
+
max_retries=max_retries,
|
|
326
|
+
initial_backoff=initial_backoff,
|
|
327
|
+
max_backoff=max_backoff,
|
|
328
|
+
return_usage=True,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
if isinstance(result, tuple):
|
|
332
|
+
text, usage = result
|
|
333
|
+
return LLMToolCallResult(
|
|
334
|
+
content=text,
|
|
335
|
+
tool_calls=[],
|
|
336
|
+
finish_reason="stop",
|
|
337
|
+
input_tokens=usage.input_tokens,
|
|
338
|
+
output_tokens=usage.output_tokens,
|
|
339
|
+
)
|
|
340
|
+
else:
|
|
341
|
+
# Fallback if return_usage didn't work as expected
|
|
342
|
+
return LLMToolCallResult(
|
|
343
|
+
content=str(result),
|
|
344
|
+
tool_calls=[],
|
|
345
|
+
finish_reason="stop",
|
|
346
|
+
input_tokens=0,
|
|
347
|
+
output_tokens=0,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
async def cleanup(self) -> None:
|
|
351
|
+
"""Clean up resources (no HTTP client to close for Claude Agent SDK)."""
|
|
352
|
+
pass
|