hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
  3. hindsight_api/api/http.py +83 -1
  4. hindsight_api/banner.py +3 -0
  5. hindsight_api/config.py +44 -6
  6. hindsight_api/daemon.py +18 -112
  7. hindsight_api/engine/llm_interface.py +146 -0
  8. hindsight_api/engine/llm_wrapper.py +304 -1327
  9. hindsight_api/engine/memory_engine.py +125 -41
  10. hindsight_api/engine/providers/__init__.py +14 -0
  11. hindsight_api/engine/providers/anthropic_llm.py +434 -0
  12. hindsight_api/engine/providers/claude_code_llm.py +352 -0
  13. hindsight_api/engine/providers/codex_llm.py +527 -0
  14. hindsight_api/engine/providers/gemini_llm.py +502 -0
  15. hindsight_api/engine/providers/mock_llm.py +234 -0
  16. hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
  17. hindsight_api/engine/retain/fact_extraction.py +13 -9
  18. hindsight_api/engine/retain/fact_storage.py +5 -3
  19. hindsight_api/extensions/__init__.py +10 -0
  20. hindsight_api/extensions/builtin/tenant.py +36 -0
  21. hindsight_api/extensions/operation_validator.py +129 -0
  22. hindsight_api/main.py +6 -21
  23. hindsight_api/migrations.py +75 -0
  24. hindsight_api/worker/main.py +41 -11
  25. hindsight_api/worker/poller.py +26 -14
  26. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
  27. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
  28. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
  29. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,527 @@
1
+ """
2
+ OpenAI Codex LLM provider using ChatGPT Plus/Pro OAuth authentication.
3
+
4
+ This provider enables using ChatGPT Plus/Pro subscriptions for API calls
5
+ without separate OpenAI Platform API credits. It uses OAuth tokens from
6
+ ~/.codex/auth.json and communicates with the ChatGPT backend API.
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import os
13
+ import time
14
+ import uuid
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ import httpx
19
+
20
+ from hindsight_api.engine.llm_interface import LLMInterface, OutputTooLongError
21
+ from hindsight_api.engine.response_models import LLMToolCall, LLMToolCallResult, TokenUsage
22
+ from hindsight_api.metrics import get_metrics_collector
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class CodexLLM(LLMInterface):
28
+ """
29
+ LLM provider using OpenAI Codex OAuth authentication.
30
+
31
+ Authenticates using ChatGPT Plus/Pro credentials stored in ~/.codex/auth.json
32
+ and makes API calls to chatgpt.com/backend-api/codex/responses.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ provider: str,
38
+ api_key: str, # Will be ignored, reads from ~/.codex/auth.json
39
+ base_url: str,
40
+ model: str,
41
+ reasoning_effort: str = "low",
42
+ **kwargs: Any,
43
+ ):
44
+ """Initialize Codex LLM provider."""
45
+ super().__init__(provider, api_key, base_url, model, reasoning_effort, **kwargs)
46
+
47
+ # Load Codex OAuth credentials
48
+ try:
49
+ self.access_token, self.account_id = self._load_codex_auth()
50
+ logger.info(f"Loaded Codex OAuth credentials for account: {self.account_id}")
51
+ except Exception as e:
52
+ raise RuntimeError(
53
+ f"Failed to load Codex OAuth credentials from ~/.codex/auth.json: {e}\n\n"
54
+ "To set up Codex authentication:\n"
55
+ "1. Install Codex CLI: npm install -g @openai/codex\n"
56
+ "2. Login: codex auth login\n"
57
+ "3. Verify: ls ~/.codex/auth.json\n\n"
58
+ "Or use a different provider (openai, anthropic, gemini) with API keys."
59
+ ) from e
60
+
61
+ # Use ChatGPT backend API endpoint
62
+ if not self.base_url:
63
+ self.base_url = "https://chatgpt.com/backend-api"
64
+
65
+ # Normalize model name (strip openai/ prefix if present)
66
+ if self.model.startswith("openai/"):
67
+ self.model = self.model[len("openai/") :]
68
+
69
+ # Map reasoning effort to Codex reasoning summary format
70
+ # Codex supports: "auto", "concise", "detailed"
71
+ self.reasoning_summary = self._map_reasoning_effort(reasoning_effort)
72
+
73
+ # HTTP client for SSE streaming
74
+ self._client = httpx.AsyncClient(timeout=120.0)
75
+
76
+ def _load_codex_auth(self) -> tuple[str, str]:
77
+ """
78
+ Load OAuth credentials from ~/.codex/auth.json.
79
+
80
+ Returns:
81
+ Tuple of (access_token, account_id).
82
+
83
+ Raises:
84
+ FileNotFoundError: If auth file doesn't exist.
85
+ ValueError: If auth file is invalid.
86
+ """
87
+ auth_file = Path.home() / ".codex" / "auth.json"
88
+
89
+ if not auth_file.exists():
90
+ raise FileNotFoundError(
91
+ f"Codex auth file not found: {auth_file}\nRun 'codex auth login' to authenticate with ChatGPT Plus/Pro."
92
+ )
93
+
94
+ with open(auth_file) as f:
95
+ data = json.load(f)
96
+
97
+ # Validate auth structure
98
+ auth_mode = data.get("auth_mode")
99
+ if auth_mode != "chatgpt":
100
+ raise ValueError(f"Expected auth_mode='chatgpt', got: {auth_mode}")
101
+
102
+ tokens = data.get("tokens", {})
103
+ access_token = tokens.get("access_token")
104
+ account_id = tokens.get("account_id")
105
+
106
+ if not access_token:
107
+ raise ValueError("No access_token found in Codex auth file. Run 'codex auth login' again.")
108
+
109
+ return access_token, account_id
110
+
111
+ def _map_reasoning_effort(self, effort: str) -> str:
112
+ """
113
+ Map standard reasoning effort to Codex reasoning summary format.
114
+
115
+ Args:
116
+ effort: Standard effort level ("low", "medium", "high", "xhigh").
117
+
118
+ Returns:
119
+ Codex reasoning summary: "concise", "detailed", or "auto".
120
+ """
121
+ mapping = {
122
+ "low": "concise",
123
+ "medium": "auto",
124
+ "high": "detailed",
125
+ "xhigh": "detailed",
126
+ }
127
+ return mapping.get(effort.lower(), "auto")
128
+
129
+ async def verify_connection(self) -> None:
130
+ """Verify Codex connection by making a simple test call."""
131
+ try:
132
+ logger.info(f"Verifying Codex LLM: model={self.model}, account={self.account_id}...")
133
+ await self.call(
134
+ messages=[{"role": "user", "content": "Say 'ok'"}],
135
+ max_completion_tokens=10,
136
+ max_retries=2,
137
+ initial_backoff=0.5,
138
+ max_backoff=2.0,
139
+ )
140
+ logger.info(f"Codex LLM verified: {self.model}")
141
+ except Exception as e:
142
+ raise RuntimeError(f"Codex LLM connection verification failed for {self.model}: {e}") from e
143
+
144
+ async def call(
145
+ self,
146
+ messages: list[dict[str, str]],
147
+ response_format: Any | None = None,
148
+ max_completion_tokens: int | None = None,
149
+ temperature: float | None = None,
150
+ scope: str = "memory",
151
+ max_retries: int = 10,
152
+ initial_backoff: float = 1.0,
153
+ max_backoff: float = 60.0,
154
+ skip_validation: bool = False,
155
+ strict_schema: bool = False,
156
+ return_usage: bool = False,
157
+ ) -> Any:
158
+ """Make API call to Codex backend with SSE streaming."""
159
+ start_time = time.time()
160
+
161
+ # Prepare system instructions
162
+ system_instruction = ""
163
+ user_messages = []
164
+
165
+ for msg in messages:
166
+ role = msg.get("role", "user")
167
+ content = msg.get("content", "")
168
+
169
+ if role == "system":
170
+ system_instruction += ("\n\n" + content) if system_instruction else content
171
+ else:
172
+ user_messages.append(msg)
173
+
174
+ # Add JSON schema instruction if response_format is provided
175
+ if response_format is not None and hasattr(response_format, "model_json_schema"):
176
+ schema = response_format.model_json_schema()
177
+ schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
178
+ system_instruction += schema_msg
179
+
180
+ # Build Codex request payload
181
+ payload = {
182
+ "model": self.model,
183
+ "instructions": system_instruction,
184
+ "input": [
185
+ {
186
+ "type": "message",
187
+ "role": msg.get("role", "user"),
188
+ "content": msg.get("content", ""),
189
+ }
190
+ for msg in user_messages
191
+ ],
192
+ "tools": [],
193
+ "tool_choice": "auto",
194
+ "parallel_tool_calls": True,
195
+ "reasoning": {"summary": self.reasoning_summary},
196
+ "store": False, # Codex uses stateless mode
197
+ "stream": True, # SSE streaming
198
+ "include": ["reasoning.encrypted_content"],
199
+ "prompt_cache_key": str(uuid.uuid4()),
200
+ }
201
+
202
+ headers = {
203
+ "Authorization": f"Bearer {self.access_token}",
204
+ "Content-Type": "application/json",
205
+ "OpenAI-Account-ID": self.account_id,
206
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
207
+ "Origin": "https://chatgpt.com",
208
+ }
209
+
210
+ url = f"{self.base_url}/codex/responses"
211
+ last_exception = None
212
+
213
+ for attempt in range(max_retries + 1):
214
+ try:
215
+ response = await self._client.post(url, json=payload, headers=headers, timeout=120.0)
216
+ response.raise_for_status()
217
+
218
+ # Parse SSE stream
219
+ content = await self._parse_sse_stream(response)
220
+
221
+ # Handle structured output
222
+ if response_format is not None:
223
+ # Models may wrap JSON in markdown
224
+ clean_content = content
225
+ if "```json" in content:
226
+ clean_content = content.split("```json")[1].split("```")[0].strip()
227
+ elif "```" in content:
228
+ clean_content = content.split("```")[1].split("```")[0].strip()
229
+
230
+ try:
231
+ json_data = json.loads(clean_content)
232
+ except json.JSONDecodeError as e:
233
+ logger.warning(f"Codex JSON parse error (attempt {attempt + 1}/{max_retries + 1}): {e}")
234
+ if attempt < max_retries:
235
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
236
+ await asyncio.sleep(backoff)
237
+ last_exception = e
238
+ continue
239
+ raise
240
+
241
+ if skip_validation:
242
+ result = json_data
243
+ else:
244
+ result = response_format.model_validate(json_data)
245
+ else:
246
+ result = content
247
+
248
+ # Record metrics
249
+ duration = time.time() - start_time
250
+ metrics = get_metrics_collector()
251
+ metrics.record_llm_call(
252
+ provider=self.provider,
253
+ model=self.model,
254
+ scope=scope,
255
+ duration=duration,
256
+ input_tokens=0, # Codex doesn't report token counts in SSE
257
+ output_tokens=0,
258
+ success=True,
259
+ )
260
+
261
+ if return_usage:
262
+ # Codex doesn't provide token counts, estimate based on content
263
+ estimated_input = sum(len(m.get("content", "")) for m in messages) // 4
264
+ estimated_output = len(content) // 4
265
+ token_usage = TokenUsage(
266
+ input_tokens=estimated_input,
267
+ output_tokens=estimated_output,
268
+ total_tokens=estimated_input + estimated_output,
269
+ )
270
+ return result, token_usage
271
+
272
+ return result
273
+
274
+ except httpx.HTTPStatusError as e:
275
+ last_exception = e
276
+ status_code = e.response.status_code
277
+
278
+ # Fast fail on auth errors
279
+ if status_code in (401, 403):
280
+ logger.error(f"Codex auth error (HTTP {status_code}): {e.response.text[:200]}")
281
+ raise RuntimeError(
282
+ "Codex authentication failed. Your OAuth token may have expired.\n"
283
+ "Run 'codex auth login' to re-authenticate."
284
+ ) from e
285
+
286
+ if attempt < max_retries:
287
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
288
+ logger.warning(f"Codex HTTP error {status_code} (attempt {attempt + 1}/{max_retries + 1})")
289
+ await asyncio.sleep(backoff)
290
+ continue
291
+ else:
292
+ logger.error(f"Codex HTTP error after {max_retries + 1} attempts: {e}")
293
+ raise
294
+
295
+ except httpx.RequestError as e:
296
+ last_exception = e
297
+ if attempt < max_retries:
298
+ backoff = min(initial_backoff * (2**attempt), max_backoff)
299
+ logger.warning(f"Codex connection error (attempt {attempt + 1}/{max_retries + 1}): {e}")
300
+ await asyncio.sleep(backoff)
301
+ continue
302
+ else:
303
+ logger.error(f"Codex connection error after {max_retries + 1} attempts: {e}")
304
+ raise
305
+
306
+ except Exception as e:
307
+ logger.error(f"Unexpected Codex error: {type(e).__name__}: {e}")
308
+ raise
309
+
310
+ if last_exception:
311
+ raise last_exception
312
+ raise RuntimeError("Codex call failed after all retries")
313
+
314
+ async def _parse_sse_stream(self, response: httpx.Response) -> str:
315
+ """
316
+ Parse Server-Sent Events (SSE) stream from Codex API.
317
+
318
+ Args:
319
+ response: HTTP response with SSE stream.
320
+
321
+ Returns:
322
+ Extracted text content from stream.
323
+ """
324
+ full_text = ""
325
+ event_type = None
326
+
327
+ async for line in response.aiter_lines():
328
+ if not line:
329
+ continue
330
+
331
+ # Track event type
332
+ if line.startswith("event: "):
333
+ event_type = line[7:]
334
+
335
+ # Parse data
336
+ elif line.startswith("data: "):
337
+ data_str = line[6:]
338
+ if data_str == "[DONE]":
339
+ break
340
+
341
+ try:
342
+ data = json.loads(data_str)
343
+
344
+ # Extract content based on event type
345
+ if event_type == "response.text.delta" and "delta" in data:
346
+ full_text += data["delta"]
347
+ elif event_type == "response.content_part.delta" and "delta" in data:
348
+ full_text += data["delta"]
349
+ # Check for item content
350
+ elif "item" in data:
351
+ item = data["item"]
352
+ if "content" in item:
353
+ content = item["content"]
354
+ if isinstance(content, list):
355
+ for part in content:
356
+ if isinstance(part, dict) and "text" in part:
357
+ full_text += part["text"]
358
+ elif isinstance(content, str):
359
+ full_text += content
360
+
361
+ except json.JSONDecodeError:
362
+ # Skip malformed JSON events
363
+ pass
364
+
365
+ return full_text
366
+
367
+ async def call_with_tools(
368
+ self,
369
+ messages: list[dict[str, Any]],
370
+ tools: list[dict[str, Any]],
371
+ max_completion_tokens: int | None = None,
372
+ temperature: float | None = None,
373
+ scope: str = "tools",
374
+ max_retries: int = 5,
375
+ initial_backoff: float = 1.0,
376
+ max_backoff: float = 30.0,
377
+ tool_choice: str | dict[str, Any] = "auto",
378
+ ) -> LLMToolCallResult:
379
+ """
380
+ Make API call with tool calling support.
381
+
382
+ Note: This is a basic implementation. Full tool calling support for Codex
383
+ may require additional SSE event parsing.
384
+ """
385
+ start_time = time.time()
386
+
387
+ # Prepare system instructions
388
+ system_instruction = ""
389
+ user_messages = []
390
+
391
+ for msg in messages:
392
+ role = msg.get("role", "user")
393
+ content = msg.get("content", "")
394
+
395
+ if role == "system":
396
+ system_instruction += ("\n\n" + content) if system_instruction else content
397
+ elif role == "tool":
398
+ # Handle tool results
399
+ user_messages.append(
400
+ {
401
+ "type": "message",
402
+ "role": "user",
403
+ "content": f"Tool result: {content}",
404
+ }
405
+ )
406
+ else:
407
+ user_messages.append(
408
+ {
409
+ "type": "message",
410
+ "role": role,
411
+ "content": content,
412
+ }
413
+ )
414
+
415
+ # Convert tools to Codex format
416
+ codex_tools = []
417
+ for tool in tools:
418
+ func = tool.get("function", {})
419
+ codex_tools.append(
420
+ {
421
+ "type": "function",
422
+ "function": {
423
+ "name": func.get("name", ""),
424
+ "description": func.get("description", ""),
425
+ "parameters": func.get("parameters", {}),
426
+ },
427
+ }
428
+ )
429
+
430
+ payload = {
431
+ "model": self.model,
432
+ "instructions": system_instruction,
433
+ "input": user_messages,
434
+ "tools": codex_tools,
435
+ "tool_choice": tool_choice,
436
+ "parallel_tool_calls": True,
437
+ "reasoning": {"summary": self.reasoning_summary},
438
+ "store": False,
439
+ "stream": True,
440
+ "include": ["reasoning.encrypted_content"],
441
+ "prompt_cache_key": str(uuid.uuid4()),
442
+ }
443
+
444
+ headers = {
445
+ "Authorization": f"Bearer {self.access_token}",
446
+ "Content-Type": "application/json",
447
+ "OpenAI-Account-ID": self.account_id,
448
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)",
449
+ "Origin": "https://chatgpt.com",
450
+ }
451
+
452
+ url = f"{self.base_url}/codex/responses"
453
+
454
+ try:
455
+ response = await self._client.post(url, json=payload, headers=headers, timeout=120.0)
456
+ response.raise_for_status()
457
+
458
+ # Parse SSE for tool calls and content
459
+ content, tool_calls = await self._parse_sse_tool_stream(response)
460
+
461
+ duration = time.time() - start_time
462
+ metrics = get_metrics_collector()
463
+ metrics.record_llm_call(
464
+ provider=self.provider,
465
+ model=self.model,
466
+ scope=scope,
467
+ duration=duration,
468
+ input_tokens=0,
469
+ output_tokens=0,
470
+ success=True,
471
+ )
472
+
473
+ return LLMToolCallResult(
474
+ content=content,
475
+ tool_calls=tool_calls,
476
+ finish_reason="tool_calls" if tool_calls else "stop",
477
+ input_tokens=0,
478
+ output_tokens=0,
479
+ )
480
+
481
+ except Exception as e:
482
+ logger.error(f"Codex tool call error: {e}")
483
+ raise
484
+
485
+ async def _parse_sse_tool_stream(self, response: httpx.Response) -> tuple[str | None, list[LLMToolCall]]:
486
+ """
487
+ Parse SSE stream for tool calls and content.
488
+
489
+ Returns:
490
+ Tuple of (content, tool_calls).
491
+ """
492
+ content = ""
493
+ tool_calls: list[LLMToolCall] = []
494
+ event_type = None
495
+
496
+ async for line in response.aiter_lines():
497
+ if not line:
498
+ continue
499
+
500
+ if line.startswith("event: "):
501
+ event_type = line[7:]
502
+
503
+ elif line.startswith("data: "):
504
+ data_str = line[6:]
505
+ if data_str == "[DONE]":
506
+ break
507
+
508
+ try:
509
+ data = json.loads(data_str)
510
+
511
+ # Extract text content
512
+ if event_type == "response.text.delta" and "delta" in data:
513
+ content += data["delta"]
514
+
515
+ # Extract tool calls
516
+ elif event_type == "response.function_call_arguments.delta":
517
+ # Handle tool call events (implementation depends on actual Codex SSE format)
518
+ pass
519
+
520
+ except json.JSONDecodeError:
521
+ pass
522
+
523
+ return content if content else None, tool_calls
524
+
525
+ async def cleanup(self) -> None:
526
+ """Clean up HTTP client."""
527
+ await self._client.aclose()