hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
  3. hindsight_api/api/http.py +83 -1
  4. hindsight_api/banner.py +3 -0
  5. hindsight_api/config.py +44 -6
  6. hindsight_api/daemon.py +18 -112
  7. hindsight_api/engine/llm_interface.py +146 -0
  8. hindsight_api/engine/llm_wrapper.py +304 -1327
  9. hindsight_api/engine/memory_engine.py +125 -41
  10. hindsight_api/engine/providers/__init__.py +14 -0
  11. hindsight_api/engine/providers/anthropic_llm.py +434 -0
  12. hindsight_api/engine/providers/claude_code_llm.py +352 -0
  13. hindsight_api/engine/providers/codex_llm.py +527 -0
  14. hindsight_api/engine/providers/gemini_llm.py +502 -0
  15. hindsight_api/engine/providers/mock_llm.py +234 -0
  16. hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
  17. hindsight_api/engine/retain/fact_extraction.py +13 -9
  18. hindsight_api/engine/retain/fact_storage.py +5 -3
  19. hindsight_api/extensions/__init__.py +10 -0
  20. hindsight_api/extensions/builtin/tenant.py +36 -0
  21. hindsight_api/extensions/operation_validator.py +129 -0
  22. hindsight_api/main.py +6 -21
  23. hindsight_api/migrations.py +75 -0
  24. hindsight_api/worker/main.py +41 -11
  25. hindsight_api/worker/poller.py +26 -14
  26. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
  27. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
  28. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
  29. {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
hindsight_api/__init__.py CHANGED
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.4.6"
49
+ __version__ = "0.4.8"
@@ -11,6 +11,7 @@ from collections.abc import Sequence
11
11
  import sqlalchemy as sa
12
12
  from alembic import op
13
13
  from pgvector.sqlalchemy import Vector
14
+ from sqlalchemy import text
14
15
  from sqlalchemy.dialects import postgresql
15
16
 
16
17
  # revision identifiers, used by Alembic.
@@ -23,8 +24,21 @@ depends_on: str | Sequence[str] | None = None
23
24
  def upgrade() -> None:
24
25
  """Upgrade schema - create all tables from scratch."""
25
26
 
26
- # Enable required extensions
27
- op.execute("CREATE EXTENSION IF NOT EXISTS vector")
27
+ # Note: pgvector extension is installed globally BEFORE migrations run
28
+ # See migrations.py:run_migrations() - this ensures the extension is available
29
+ # to all schemas, not just the one being migrated
30
+
31
+ # We keep this here as a fallback for backwards compatibility
32
+ # This may fail if user lacks permissions, which is fine if extension already exists
33
+ try:
34
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector")
35
+ except Exception:
36
+ # Extension might already exist or user lacks permissions - verify it exists
37
+ conn = op.get_bind()
38
+ result = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).fetchone()
39
+ if not result:
40
+ # Extension truly doesn't exist - re-raise the error
41
+ raise
28
42
 
29
43
  # Create banks table
30
44
  op.create_table(
hindsight_api/api/http.py CHANGED
@@ -1398,14 +1398,19 @@ def create_app(
1398
1398
 
1399
1399
  # Start worker poller if enabled (standalone mode)
1400
1400
  if config.worker_enabled and memory._pool is not None:
1401
+ from ..config import DEFAULT_DATABASE_SCHEMA
1402
+
1401
1403
  worker_id = config.worker_id or socket.gethostname()
1404
+ # Convert default schema to None for SQL compatibility (no schema prefix)
1405
+ schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
1402
1406
  poller = WorkerPoller(
1403
1407
  pool=memory._pool,
1404
1408
  worker_id=worker_id,
1405
1409
  executor=memory.execute_task,
1406
1410
  poll_interval_ms=config.worker_poll_interval_ms,
1407
1411
  max_retries=config.worker_max_retries,
1408
- tenant_extension=getattr(memory, "_tenant_extension", None),
1412
+ schema=schema,
1413
+ tenant_extension=memory._tenant_extension,
1409
1414
  max_slots=config.worker_max_slots,
1410
1415
  consolidation_max_slots=config.worker_consolidation_max_slots,
1411
1416
  )
@@ -2285,6 +2290,23 @@ def _register_routes(app: FastAPI):
2285
2290
  ):
2286
2291
  """Get a mental model by ID."""
2287
2292
  try:
2293
+ # Pre-operation validation hook
2294
+ validator = app.state.memory._operation_validator
2295
+ if validator:
2296
+ from hindsight_api.extensions.operation_validator import MentalModelGetContext
2297
+
2298
+ ctx = MentalModelGetContext(
2299
+ bank_id=bank_id,
2300
+ mental_model_id=mental_model_id,
2301
+ request_context=request_context,
2302
+ )
2303
+ validation = await validator.validate_mental_model_get(ctx)
2304
+ if not validation.allowed:
2305
+ raise OperationValidationError(
2306
+ validation.reason or "Operation not allowed",
2307
+ status_code=validation.status_code,
2308
+ )
2309
+
2288
2310
  mental_model = await app.state.memory.get_mental_model(
2289
2311
  bank_id=bank_id,
2290
2312
  mental_model_id=mental_model_id,
@@ -2292,9 +2314,31 @@ def _register_routes(app: FastAPI):
2292
2314
  )
2293
2315
  if mental_model is None:
2294
2316
  raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
2317
+
2318
+ # Post-operation hook
2319
+ if validator:
2320
+ from hindsight_api.extensions.operation_validator import MentalModelGetResult
2321
+
2322
+ content = mental_model.get("content", "")
2323
+ output_tokens = len(content) // 4 if content else 0
2324
+
2325
+ result_ctx = MentalModelGetResult(
2326
+ bank_id=bank_id,
2327
+ mental_model_id=mental_model_id,
2328
+ request_context=request_context,
2329
+ output_tokens=output_tokens,
2330
+ success=True,
2331
+ )
2332
+ try:
2333
+ await validator.on_mental_model_get_complete(result_ctx)
2334
+ except Exception as hook_err:
2335
+ logger.warning(f"Post-mental-model-get hook error (non-fatal): {hook_err}")
2336
+
2295
2337
  return MentalModelResponse(**mental_model)
2296
2338
  except (AuthenticationError, HTTPException):
2297
2339
  raise
2340
+ except OperationValidationError as e:
2341
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2298
2342
  except Exception as e:
2299
2343
  import traceback
2300
2344
 
@@ -2319,6 +2363,23 @@ def _register_routes(app: FastAPI):
2319
2363
  ):
2320
2364
  """Create a mental model (async - returns operation_id)."""
2321
2365
  try:
2366
+ # Pre-operation validation hook
2367
+ validator = app.state.memory._operation_validator
2368
+ if validator:
2369
+ from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
2370
+
2371
+ ctx = MentalModelRefreshContext(
2372
+ bank_id=bank_id,
2373
+ mental_model_id=None, # Not yet created
2374
+ request_context=request_context,
2375
+ )
2376
+ validation = await validator.validate_mental_model_refresh(ctx)
2377
+ if not validation.allowed:
2378
+ raise OperationValidationError(
2379
+ validation.reason or "Operation not allowed",
2380
+ status_code=validation.status_code,
2381
+ )
2382
+
2322
2383
  # 1. Create the mental model with placeholder content
2323
2384
  mental_model = await app.state.memory.create_mental_model(
2324
2385
  bank_id=bank_id,
@@ -2341,6 +2402,8 @@ def _register_routes(app: FastAPI):
2341
2402
  raise HTTPException(status_code=400, detail=str(e))
2342
2403
  except (AuthenticationError, HTTPException):
2343
2404
  raise
2405
+ except OperationValidationError as e:
2406
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2344
2407
  except Exception as e:
2345
2408
  import traceback
2346
2409
 
@@ -2363,6 +2426,23 @@ def _register_routes(app: FastAPI):
2363
2426
  ):
2364
2427
  """Refresh a mental model by re-running its source query (async)."""
2365
2428
  try:
2429
+ # Pre-operation validation hook
2430
+ validator = app.state.memory._operation_validator
2431
+ if validator:
2432
+ from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
2433
+
2434
+ ctx = MentalModelRefreshContext(
2435
+ bank_id=bank_id,
2436
+ mental_model_id=mental_model_id,
2437
+ request_context=request_context,
2438
+ )
2439
+ validation = await validator.validate_mental_model_refresh(ctx)
2440
+ if not validation.allowed:
2441
+ raise OperationValidationError(
2442
+ validation.reason or "Operation not allowed",
2443
+ status_code=validation.status_code,
2444
+ )
2445
+
2366
2446
  result = await app.state.memory.submit_async_refresh_mental_model(
2367
2447
  bank_id=bank_id,
2368
2448
  mental_model_id=mental_model_id,
@@ -2373,6 +2453,8 @@ def _register_routes(app: FastAPI):
2373
2453
  raise HTTPException(status_code=404, detail=str(e))
2374
2454
  except (AuthenticationError, HTTPException):
2375
2455
  raise
2456
+ except OperationValidationError as e:
2457
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2376
2458
  except Exception as e:
2377
2459
  import traceback
2378
2460
 
hindsight_api/banner.py CHANGED
@@ -83,9 +83,12 @@ def print_startup_info(
83
83
  embeddings_provider: str,
84
84
  reranker_provider: str,
85
85
  mcp_enabled: bool = False,
86
+ version: str | None = None,
86
87
  ):
87
88
  """Print styled startup information."""
88
89
  print(color_start("Starting Hindsight API..."))
90
+ if version:
91
+ print(f" {dim('Version:')} {color(f'v{version}', 0.1)}")
89
92
  print(f" {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
90
93
  print(f" {dim('Database:')} {color(database_url, 0.4)}")
91
94
  print(f" {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")
hindsight_api/config.py CHANGED
@@ -154,7 +154,21 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
154
154
  DEFAULT_DATABASE_URL = "pg0"
155
155
  DEFAULT_DATABASE_SCHEMA = "public"
156
156
  DEFAULT_LLM_PROVIDER = "openai"
157
- DEFAULT_LLM_MODEL = "gpt-5-mini"
157
+
158
+ # Provider-specific default models
159
+ PROVIDER_DEFAULT_MODELS = {
160
+ "openai": "o3-mini",
161
+ "anthropic": "claude-haiku-4-5-20251001",
162
+ "gemini": "gemini-2.5-flash",
163
+ "groq": "openai/gpt-oss-120b",
164
+ "ollama": "gemma3:12b",
165
+ "lmstudio": "local-model",
166
+ "vertexai": "gemini-2.0-flash-001",
167
+ "openai-codex": "gpt-5.2-codex",
168
+ "claude-code": "claude-sonnet-4-5-20250929",
169
+ "mock": "mock-model",
170
+ }
171
+ DEFAULT_LLM_MODEL = "o3-mini" # Fallback if provider not in table
158
172
  DEFAULT_LLM_MAX_CONCURRENT = 32
159
173
  DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
160
174
  DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
@@ -303,6 +317,11 @@ def _validate_extraction_mode(mode: str) -> str:
303
317
  return mode_lower
304
318
 
305
319
 
320
+ def _get_default_model_for_provider(provider: str) -> str:
321
+ """Get the default model for a given provider."""
322
+ return PROVIDER_DEFAULT_MODELS.get(provider.lower(), DEFAULT_LLM_MODEL)
323
+
324
+
306
325
  @dataclass
307
326
  class HindsightConfig:
308
327
  """Configuration container for Hindsight API."""
@@ -431,14 +450,18 @@ class HindsightConfig:
431
450
  @classmethod
432
451
  def from_env(cls) -> "HindsightConfig":
433
452
  """Create configuration from environment variables."""
453
+ # Get provider first to determine default model
454
+ llm_provider = os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER)
455
+ llm_model = os.getenv(ENV_LLM_MODEL) or _get_default_model_for_provider(llm_provider)
456
+
434
457
  return cls(
435
458
  # Database
436
459
  database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
437
460
  database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
438
461
  # LLM
439
- llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
462
+ llm_provider=llm_provider,
440
463
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
441
- llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
464
+ llm_model=llm_model,
442
465
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
443
466
  llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
444
467
  llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
@@ -453,7 +476,12 @@ class HindsightConfig:
453
476
  # Per-operation LLM config (None = use default)
454
477
  retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
455
478
  retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
456
- retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
479
+ retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
480
+ or (
481
+ _get_default_model_for_provider(os.getenv(ENV_RETAIN_LLM_PROVIDER))
482
+ if os.getenv(ENV_RETAIN_LLM_PROVIDER)
483
+ else None
484
+ ),
457
485
  retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
458
486
  retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
459
487
  if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
@@ -470,7 +498,12 @@ class HindsightConfig:
470
498
  retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
471
499
  reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
472
500
  reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
473
- reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
501
+ reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
502
+ or (
503
+ _get_default_model_for_provider(os.getenv(ENV_REFLECT_LLM_PROVIDER))
504
+ if os.getenv(ENV_REFLECT_LLM_PROVIDER)
505
+ else None
506
+ ),
474
507
  reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
475
508
  reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
476
509
  if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
@@ -489,7 +522,12 @@ class HindsightConfig:
489
522
  else None,
490
523
  consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
491
524
  consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
492
- consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
525
+ consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
526
+ or (
527
+ _get_default_model_for_provider(os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER))
528
+ if os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER)
529
+ else None
530
+ ),
493
531
  consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
494
532
  consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
495
533
  if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
hindsight_api/daemon.py CHANGED
@@ -1,11 +1,10 @@
1
1
  """
2
2
  Daemon mode support for Hindsight API.
3
3
 
4
- Provides idle timeout and lockfile management for running as a background daemon.
4
+ Provides idle timeout for running as a background daemon.
5
5
  """
6
6
 
7
7
  import asyncio
8
- import fcntl
9
8
  import logging
10
9
  import os
11
10
  import sys
@@ -15,10 +14,11 @@ from pathlib import Path
15
14
  logger = logging.getLogger(__name__)
16
15
 
17
16
  # Default daemon configuration
18
- DEFAULT_DAEMON_PORT = 8889
17
+ DEFAULT_DAEMON_PORT = 8888
19
18
  DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
20
- LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
21
- DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"
19
+
20
+ # Allow override via environment variable for profile-specific logs
21
+ DAEMON_LOG_PATH = Path(os.getenv("HINDSIGHT_API_DAEMON_LOG", str(Path.home() / ".hindsight" / "daemon.log")))
22
22
 
23
23
 
24
24
  class IdleTimeoutMiddleware:
@@ -58,97 +58,27 @@ class IdleTimeoutMiddleware:
58
58
  os.kill(os.getpid(), signal.SIGTERM)
59
59
 
60
60
 
61
- class DaemonLock:
62
- """
63
- File-based lock to prevent multiple daemon instances.
64
-
65
- Uses fcntl.flock for atomic locking on Unix systems.
66
- """
67
-
68
- def __init__(self, lockfile: Path = LOCKFILE_PATH):
69
- self.lockfile = lockfile
70
- self._fd = None
71
-
72
- def acquire(self) -> bool:
73
- """
74
- Try to acquire the daemon lock.
75
-
76
- Returns True if lock acquired, False if another daemon is running.
77
- """
78
- self.lockfile.parent.mkdir(parents=True, exist_ok=True)
79
-
80
- try:
81
- self._fd = open(self.lockfile, "w")
82
- fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
83
- # Write PID for debugging
84
- self._fd.write(str(os.getpid()))
85
- self._fd.flush()
86
- return True
87
- except (IOError, OSError):
88
- # Lock is held by another process
89
- if self._fd:
90
- self._fd.close()
91
- self._fd = None
92
- return False
93
-
94
- def release(self):
95
- """Release the daemon lock."""
96
- if self._fd:
97
- try:
98
- fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
99
- self._fd.close()
100
- except Exception:
101
- pass
102
- finally:
103
- self._fd = None
104
- # Remove lockfile
105
- try:
106
- self.lockfile.unlink()
107
- except Exception:
108
- pass
109
-
110
- def is_locked(self) -> bool:
111
- """Check if the lock is held by another process."""
112
- if not self.lockfile.exists():
113
- return False
114
-
115
- try:
116
- fd = open(self.lockfile, "r")
117
- fcntl.flock(fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
118
- # We got the lock, so no one else has it
119
- fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
120
- fd.close()
121
- return False
122
- except (IOError, OSError):
123
- return True
124
-
125
- def get_pid(self) -> int | None:
126
- """Get the PID of the daemon holding the lock."""
127
- if not self.lockfile.exists():
128
- return None
129
- try:
130
- with open(self.lockfile, "r") as f:
131
- return int(f.read().strip())
132
- except (ValueError, IOError):
133
- return None
134
-
135
-
136
61
  def daemonize():
137
62
  """
138
63
  Fork the current process into a background daemon.
139
64
 
140
65
  Uses double-fork technique to properly detach from terminal.
141
66
  """
142
- # First fork
143
- pid = os.fork()
144
- if pid > 0:
145
- # Parent exits
146
- sys.exit(0)
147
-
148
- # Create new session
67
+ # First fork - detach from parent
68
+ try:
69
+ pid = os.fork()
70
+ if pid > 0:
71
+ sys.exit(0)
72
+ except OSError as e:
73
+ sys.stderr.write(f"fork #1 failed: {e}\n")
74
+ sys.exit(1)
75
+
76
+ # Decouple from parent environment
77
+ os.chdir("/")
149
78
  os.setsid()
79
+ os.umask(0)
150
80
 
151
- # Second fork to prevent zombie processes
81
+ # Second fork - prevent zombie
152
82
  pid = os.fork()
153
83
  if pid > 0:
154
84
  sys.exit(0)
@@ -181,27 +111,3 @@ def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
181
111
  return result == 0
182
112
  except Exception:
183
113
  return False
184
-
185
-
186
- def stop_daemon(port: int = DEFAULT_DAEMON_PORT) -> bool:
187
- """Stop a running daemon by sending SIGTERM to the process."""
188
- lock = DaemonLock()
189
- pid = lock.get_pid()
190
-
191
- if pid is None:
192
- return False
193
-
194
- try:
195
- import signal
196
-
197
- os.kill(pid, signal.SIGTERM)
198
- # Wait for process to exit
199
- for _ in range(50): # Wait up to 5 seconds
200
- time.sleep(0.1)
201
- try:
202
- os.kill(pid, 0) # Check if process exists
203
- except OSError:
204
- return True # Process exited
205
- return False
206
- except OSError:
207
- return False
@@ -0,0 +1,146 @@
1
+ """
2
+ Abstract interface for LLM providers.
3
+
4
+ This module defines the interface that all LLM providers must implement,
5
+ enabling support for multiple LLM backends (OpenAI, Anthropic, Gemini, Codex, etc.)
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any
10
+
11
+ from .response_models import LLMToolCallResult, TokenUsage
12
+
13
+
14
+ class LLMInterface(ABC):
15
+ """
16
+ Abstract interface for LLM providers.
17
+
18
+ All LLM provider implementations must inherit from this class and implement
19
+ the required methods.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ provider: str,
25
+ api_key: str,
26
+ base_url: str,
27
+ model: str,
28
+ reasoning_effort: str = "low",
29
+ **kwargs: Any,
30
+ ):
31
+ """
32
+ Initialize LLM provider.
33
+
34
+ Args:
35
+ provider: Provider name (e.g., "openai", "codex", "anthropic", "gemini").
36
+ api_key: API key or authentication token.
37
+ base_url: Base URL for the API.
38
+ model: Model name.
39
+ reasoning_effort: Reasoning effort level for supported providers.
40
+ **kwargs: Additional provider-specific parameters.
41
+ """
42
+ self.provider = provider.lower()
43
+ self.api_key = api_key
44
+ self.base_url = base_url
45
+ self.model = model
46
+ self.reasoning_effort = reasoning_effort
47
+
48
+ @abstractmethod
49
+ async def verify_connection(self) -> None:
50
+ """
51
+ Verify that the LLM provider is configured correctly by making a simple test call.
52
+
53
+ Raises:
54
+ RuntimeError: If the connection test fails.
55
+ """
56
+ pass
57
+
58
+ @abstractmethod
59
+ async def call(
60
+ self,
61
+ messages: list[dict[str, str]],
62
+ response_format: Any | None = None,
63
+ max_completion_tokens: int | None = None,
64
+ temperature: float | None = None,
65
+ scope: str = "memory",
66
+ max_retries: int = 10,
67
+ initial_backoff: float = 1.0,
68
+ max_backoff: float = 60.0,
69
+ skip_validation: bool = False,
70
+ strict_schema: bool = False,
71
+ return_usage: bool = False,
72
+ ) -> Any:
73
+ """
74
+ Make an LLM API call with retry logic.
75
+
76
+ Args:
77
+ messages: List of message dicts with 'role' and 'content'.
78
+ response_format: Optional Pydantic model for structured output.
79
+ max_completion_tokens: Maximum tokens in response.
80
+ temperature: Sampling temperature (0.0-2.0).
81
+ scope: Scope identifier for tracking.
82
+ max_retries: Maximum retry attempts.
83
+ initial_backoff: Initial backoff time in seconds.
84
+ max_backoff: Maximum backoff time in seconds.
85
+ skip_validation: Return raw JSON without Pydantic validation.
86
+ strict_schema: Use strict JSON schema enforcement (OpenAI only).
87
+ return_usage: If True, return tuple (result, TokenUsage) instead of just result.
88
+
89
+ Returns:
90
+ If return_usage=False: Parsed response if response_format is provided, otherwise text content.
91
+ If return_usage=True: Tuple of (result, TokenUsage) with token counts.
92
+
93
+ Raises:
94
+ OutputTooLongError: If output exceeds token limits.
95
+ Exception: Re-raises API errors after retries exhausted.
96
+ """
97
+ pass
98
+
99
+ @abstractmethod
100
+ async def call_with_tools(
101
+ self,
102
+ messages: list[dict[str, Any]],
103
+ tools: list[dict[str, Any]],
104
+ max_completion_tokens: int | None = None,
105
+ temperature: float | None = None,
106
+ scope: str = "tools",
107
+ max_retries: int = 5,
108
+ initial_backoff: float = 1.0,
109
+ max_backoff: float = 30.0,
110
+ tool_choice: str | dict[str, Any] = "auto",
111
+ ) -> LLMToolCallResult:
112
+ """
113
+ Make an LLM API call with tool/function calling support.
114
+
115
+ Args:
116
+ messages: List of message dicts. Can include tool results with role='tool'.
117
+ tools: List of tool definitions in OpenAI format.
118
+ max_completion_tokens: Maximum tokens in response.
119
+ temperature: Sampling temperature (0.0-2.0).
120
+ scope: Scope identifier for tracking.
121
+ max_retries: Maximum retry attempts.
122
+ initial_backoff: Initial backoff time in seconds.
123
+ max_backoff: Maximum backoff time in seconds.
124
+ tool_choice: How to choose tools - "auto", "none", "required", or specific function.
125
+
126
+ Returns:
127
+ LLMToolCallResult with content and/or tool_calls.
128
+ """
129
+ pass
130
+
131
+ @abstractmethod
132
+ async def cleanup(self) -> None:
133
+ """Clean up resources (close connections, etc.)."""
134
+ pass
135
+
136
+
137
+ class OutputTooLongError(Exception):
138
+ """
139
+ Bridge exception raised when LLM output exceeds token limits.
140
+
141
+ This wraps provider-specific errors (e.g., OpenAI's LengthFinishReasonError)
142
+ to allow callers to handle output length issues without depending on
143
+ provider-specific implementations.
144
+ """
145
+
146
+ pass