hindsight-api 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
- hindsight_api/api/http.py +83 -1
- hindsight_api/banner.py +3 -0
- hindsight_api/config.py +44 -6
- hindsight_api/daemon.py +18 -112
- hindsight_api/engine/llm_interface.py +146 -0
- hindsight_api/engine/llm_wrapper.py +304 -1327
- hindsight_api/engine/memory_engine.py +125 -41
- hindsight_api/engine/providers/__init__.py +14 -0
- hindsight_api/engine/providers/anthropic_llm.py +434 -0
- hindsight_api/engine/providers/claude_code_llm.py +352 -0
- hindsight_api/engine/providers/codex_llm.py +527 -0
- hindsight_api/engine/providers/gemini_llm.py +502 -0
- hindsight_api/engine/providers/mock_llm.py +234 -0
- hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
- hindsight_api/engine/retain/fact_extraction.py +13 -9
- hindsight_api/engine/retain/fact_storage.py +5 -3
- hindsight_api/extensions/__init__.py +10 -0
- hindsight_api/extensions/builtin/tenant.py +36 -0
- hindsight_api/extensions/operation_validator.py +129 -0
- hindsight_api/main.py +6 -21
- hindsight_api/migrations.py +75 -0
- hindsight_api/worker/main.py +41 -11
- hindsight_api/worker/poller.py +26 -14
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/METADATA +2 -1
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/RECORD +29 -21
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/WHEEL +0 -0
- {hindsight_api-0.4.6.dist-info → hindsight_api-0.4.8.dist-info}/entry_points.txt +0 -0
hindsight_api/__init__.py
CHANGED
|
@@ -11,6 +11,7 @@ from collections.abc import Sequence
|
|
|
11
11
|
import sqlalchemy as sa
|
|
12
12
|
from alembic import op
|
|
13
13
|
from pgvector.sqlalchemy import Vector
|
|
14
|
+
from sqlalchemy import text
|
|
14
15
|
from sqlalchemy.dialects import postgresql
|
|
15
16
|
|
|
16
17
|
# revision identifiers, used by Alembic.
|
|
@@ -23,8 +24,21 @@ depends_on: str | Sequence[str] | None = None
|
|
|
23
24
|
def upgrade() -> None:
|
|
24
25
|
"""Upgrade schema - create all tables from scratch."""
|
|
25
26
|
|
|
26
|
-
#
|
|
27
|
-
|
|
27
|
+
# Note: pgvector extension is installed globally BEFORE migrations run
|
|
28
|
+
# See migrations.py:run_migrations() - this ensures the extension is available
|
|
29
|
+
# to all schemas, not just the one being migrated
|
|
30
|
+
|
|
31
|
+
# We keep this here as a fallback for backwards compatibility
|
|
32
|
+
# This may fail if user lacks permissions, which is fine if extension already exists
|
|
33
|
+
try:
|
|
34
|
+
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
|
35
|
+
except Exception:
|
|
36
|
+
# Extension might already exist or user lacks permissions - verify it exists
|
|
37
|
+
conn = op.get_bind()
|
|
38
|
+
result = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).fetchone()
|
|
39
|
+
if not result:
|
|
40
|
+
# Extension truly doesn't exist - re-raise the error
|
|
41
|
+
raise
|
|
28
42
|
|
|
29
43
|
# Create banks table
|
|
30
44
|
op.create_table(
|
hindsight_api/api/http.py
CHANGED
|
@@ -1398,14 +1398,19 @@ def create_app(
|
|
|
1398
1398
|
|
|
1399
1399
|
# Start worker poller if enabled (standalone mode)
|
|
1400
1400
|
if config.worker_enabled and memory._pool is not None:
|
|
1401
|
+
from ..config import DEFAULT_DATABASE_SCHEMA
|
|
1402
|
+
|
|
1401
1403
|
worker_id = config.worker_id or socket.gethostname()
|
|
1404
|
+
# Convert default schema to None for SQL compatibility (no schema prefix)
|
|
1405
|
+
schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
|
|
1402
1406
|
poller = WorkerPoller(
|
|
1403
1407
|
pool=memory._pool,
|
|
1404
1408
|
worker_id=worker_id,
|
|
1405
1409
|
executor=memory.execute_task,
|
|
1406
1410
|
poll_interval_ms=config.worker_poll_interval_ms,
|
|
1407
1411
|
max_retries=config.worker_max_retries,
|
|
1408
|
-
|
|
1412
|
+
schema=schema,
|
|
1413
|
+
tenant_extension=memory._tenant_extension,
|
|
1409
1414
|
max_slots=config.worker_max_slots,
|
|
1410
1415
|
consolidation_max_slots=config.worker_consolidation_max_slots,
|
|
1411
1416
|
)
|
|
@@ -2285,6 +2290,23 @@ def _register_routes(app: FastAPI):
|
|
|
2285
2290
|
):
|
|
2286
2291
|
"""Get a mental model by ID."""
|
|
2287
2292
|
try:
|
|
2293
|
+
# Pre-operation validation hook
|
|
2294
|
+
validator = app.state.memory._operation_validator
|
|
2295
|
+
if validator:
|
|
2296
|
+
from hindsight_api.extensions.operation_validator import MentalModelGetContext
|
|
2297
|
+
|
|
2298
|
+
ctx = MentalModelGetContext(
|
|
2299
|
+
bank_id=bank_id,
|
|
2300
|
+
mental_model_id=mental_model_id,
|
|
2301
|
+
request_context=request_context,
|
|
2302
|
+
)
|
|
2303
|
+
validation = await validator.validate_mental_model_get(ctx)
|
|
2304
|
+
if not validation.allowed:
|
|
2305
|
+
raise OperationValidationError(
|
|
2306
|
+
validation.reason or "Operation not allowed",
|
|
2307
|
+
status_code=validation.status_code,
|
|
2308
|
+
)
|
|
2309
|
+
|
|
2288
2310
|
mental_model = await app.state.memory.get_mental_model(
|
|
2289
2311
|
bank_id=bank_id,
|
|
2290
2312
|
mental_model_id=mental_model_id,
|
|
@@ -2292,9 +2314,31 @@ def _register_routes(app: FastAPI):
|
|
|
2292
2314
|
)
|
|
2293
2315
|
if mental_model is None:
|
|
2294
2316
|
raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
|
|
2317
|
+
|
|
2318
|
+
# Post-operation hook
|
|
2319
|
+
if validator:
|
|
2320
|
+
from hindsight_api.extensions.operation_validator import MentalModelGetResult
|
|
2321
|
+
|
|
2322
|
+
content = mental_model.get("content", "")
|
|
2323
|
+
output_tokens = len(content) // 4 if content else 0
|
|
2324
|
+
|
|
2325
|
+
result_ctx = MentalModelGetResult(
|
|
2326
|
+
bank_id=bank_id,
|
|
2327
|
+
mental_model_id=mental_model_id,
|
|
2328
|
+
request_context=request_context,
|
|
2329
|
+
output_tokens=output_tokens,
|
|
2330
|
+
success=True,
|
|
2331
|
+
)
|
|
2332
|
+
try:
|
|
2333
|
+
await validator.on_mental_model_get_complete(result_ctx)
|
|
2334
|
+
except Exception as hook_err:
|
|
2335
|
+
logger.warning(f"Post-mental-model-get hook error (non-fatal): {hook_err}")
|
|
2336
|
+
|
|
2295
2337
|
return MentalModelResponse(**mental_model)
|
|
2296
2338
|
except (AuthenticationError, HTTPException):
|
|
2297
2339
|
raise
|
|
2340
|
+
except OperationValidationError as e:
|
|
2341
|
+
raise HTTPException(status_code=e.status_code, detail=e.reason)
|
|
2298
2342
|
except Exception as e:
|
|
2299
2343
|
import traceback
|
|
2300
2344
|
|
|
@@ -2319,6 +2363,23 @@ def _register_routes(app: FastAPI):
|
|
|
2319
2363
|
):
|
|
2320
2364
|
"""Create a mental model (async - returns operation_id)."""
|
|
2321
2365
|
try:
|
|
2366
|
+
# Pre-operation validation hook
|
|
2367
|
+
validator = app.state.memory._operation_validator
|
|
2368
|
+
if validator:
|
|
2369
|
+
from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
|
|
2370
|
+
|
|
2371
|
+
ctx = MentalModelRefreshContext(
|
|
2372
|
+
bank_id=bank_id,
|
|
2373
|
+
mental_model_id=None, # Not yet created
|
|
2374
|
+
request_context=request_context,
|
|
2375
|
+
)
|
|
2376
|
+
validation = await validator.validate_mental_model_refresh(ctx)
|
|
2377
|
+
if not validation.allowed:
|
|
2378
|
+
raise OperationValidationError(
|
|
2379
|
+
validation.reason or "Operation not allowed",
|
|
2380
|
+
status_code=validation.status_code,
|
|
2381
|
+
)
|
|
2382
|
+
|
|
2322
2383
|
# 1. Create the mental model with placeholder content
|
|
2323
2384
|
mental_model = await app.state.memory.create_mental_model(
|
|
2324
2385
|
bank_id=bank_id,
|
|
@@ -2341,6 +2402,8 @@ def _register_routes(app: FastAPI):
|
|
|
2341
2402
|
raise HTTPException(status_code=400, detail=str(e))
|
|
2342
2403
|
except (AuthenticationError, HTTPException):
|
|
2343
2404
|
raise
|
|
2405
|
+
except OperationValidationError as e:
|
|
2406
|
+
raise HTTPException(status_code=e.status_code, detail=e.reason)
|
|
2344
2407
|
except Exception as e:
|
|
2345
2408
|
import traceback
|
|
2346
2409
|
|
|
@@ -2363,6 +2426,23 @@ def _register_routes(app: FastAPI):
|
|
|
2363
2426
|
):
|
|
2364
2427
|
"""Refresh a mental model by re-running its source query (async)."""
|
|
2365
2428
|
try:
|
|
2429
|
+
# Pre-operation validation hook
|
|
2430
|
+
validator = app.state.memory._operation_validator
|
|
2431
|
+
if validator:
|
|
2432
|
+
from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
|
|
2433
|
+
|
|
2434
|
+
ctx = MentalModelRefreshContext(
|
|
2435
|
+
bank_id=bank_id,
|
|
2436
|
+
mental_model_id=mental_model_id,
|
|
2437
|
+
request_context=request_context,
|
|
2438
|
+
)
|
|
2439
|
+
validation = await validator.validate_mental_model_refresh(ctx)
|
|
2440
|
+
if not validation.allowed:
|
|
2441
|
+
raise OperationValidationError(
|
|
2442
|
+
validation.reason or "Operation not allowed",
|
|
2443
|
+
status_code=validation.status_code,
|
|
2444
|
+
)
|
|
2445
|
+
|
|
2366
2446
|
result = await app.state.memory.submit_async_refresh_mental_model(
|
|
2367
2447
|
bank_id=bank_id,
|
|
2368
2448
|
mental_model_id=mental_model_id,
|
|
@@ -2373,6 +2453,8 @@ def _register_routes(app: FastAPI):
|
|
|
2373
2453
|
raise HTTPException(status_code=404, detail=str(e))
|
|
2374
2454
|
except (AuthenticationError, HTTPException):
|
|
2375
2455
|
raise
|
|
2456
|
+
except OperationValidationError as e:
|
|
2457
|
+
raise HTTPException(status_code=e.status_code, detail=e.reason)
|
|
2376
2458
|
except Exception as e:
|
|
2377
2459
|
import traceback
|
|
2378
2460
|
|
hindsight_api/banner.py
CHANGED
|
@@ -83,9 +83,12 @@ def print_startup_info(
|
|
|
83
83
|
embeddings_provider: str,
|
|
84
84
|
reranker_provider: str,
|
|
85
85
|
mcp_enabled: bool = False,
|
|
86
|
+
version: str | None = None,
|
|
86
87
|
):
|
|
87
88
|
"""Print styled startup information."""
|
|
88
89
|
print(color_start("Starting Hindsight API..."))
|
|
90
|
+
if version:
|
|
91
|
+
print(f" {dim('Version:')} {color(f'v{version}', 0.1)}")
|
|
89
92
|
print(f" {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
|
|
90
93
|
print(f" {dim('Database:')} {color(database_url, 0.4)}")
|
|
91
94
|
print(f" {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")
|
hindsight_api/config.py
CHANGED
|
@@ -154,7 +154,21 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
|
154
154
|
DEFAULT_DATABASE_URL = "pg0"
|
|
155
155
|
DEFAULT_DATABASE_SCHEMA = "public"
|
|
156
156
|
DEFAULT_LLM_PROVIDER = "openai"
|
|
157
|
-
|
|
157
|
+
|
|
158
|
+
# Provider-specific default models
|
|
159
|
+
PROVIDER_DEFAULT_MODELS = {
|
|
160
|
+
"openai": "o3-mini",
|
|
161
|
+
"anthropic": "claude-haiku-4-5-20251001",
|
|
162
|
+
"gemini": "gemini-2.5-flash",
|
|
163
|
+
"groq": "openai/gpt-oss-120b",
|
|
164
|
+
"ollama": "gemma3:12b",
|
|
165
|
+
"lmstudio": "local-model",
|
|
166
|
+
"vertexai": "gemini-2.0-flash-001",
|
|
167
|
+
"openai-codex": "gpt-5.2-codex",
|
|
168
|
+
"claude-code": "claude-sonnet-4-5-20250929",
|
|
169
|
+
"mock": "mock-model",
|
|
170
|
+
}
|
|
171
|
+
DEFAULT_LLM_MODEL = "o3-mini" # Fallback if provider not in table
|
|
158
172
|
DEFAULT_LLM_MAX_CONCURRENT = 32
|
|
159
173
|
DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
|
|
160
174
|
DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
|
|
@@ -303,6 +317,11 @@ def _validate_extraction_mode(mode: str) -> str:
|
|
|
303
317
|
return mode_lower
|
|
304
318
|
|
|
305
319
|
|
|
320
|
+
def _get_default_model_for_provider(provider: str) -> str:
|
|
321
|
+
"""Get the default model for a given provider."""
|
|
322
|
+
return PROVIDER_DEFAULT_MODELS.get(provider.lower(), DEFAULT_LLM_MODEL)
|
|
323
|
+
|
|
324
|
+
|
|
306
325
|
@dataclass
|
|
307
326
|
class HindsightConfig:
|
|
308
327
|
"""Configuration container for Hindsight API."""
|
|
@@ -431,14 +450,18 @@ class HindsightConfig:
|
|
|
431
450
|
@classmethod
|
|
432
451
|
def from_env(cls) -> "HindsightConfig":
|
|
433
452
|
"""Create configuration from environment variables."""
|
|
453
|
+
# Get provider first to determine default model
|
|
454
|
+
llm_provider = os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER)
|
|
455
|
+
llm_model = os.getenv(ENV_LLM_MODEL) or _get_default_model_for_provider(llm_provider)
|
|
456
|
+
|
|
434
457
|
return cls(
|
|
435
458
|
# Database
|
|
436
459
|
database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
|
|
437
460
|
database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
|
|
438
461
|
# LLM
|
|
439
|
-
llm_provider=
|
|
462
|
+
llm_provider=llm_provider,
|
|
440
463
|
llm_api_key=os.getenv(ENV_LLM_API_KEY),
|
|
441
|
-
llm_model=
|
|
464
|
+
llm_model=llm_model,
|
|
442
465
|
llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
|
|
443
466
|
llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
|
|
444
467
|
llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
|
|
@@ -453,7 +476,12 @@ class HindsightConfig:
|
|
|
453
476
|
# Per-operation LLM config (None = use default)
|
|
454
477
|
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
455
478
|
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
456
|
-
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
|
|
479
|
+
retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
|
|
480
|
+
or (
|
|
481
|
+
_get_default_model_for_provider(os.getenv(ENV_RETAIN_LLM_PROVIDER))
|
|
482
|
+
if os.getenv(ENV_RETAIN_LLM_PROVIDER)
|
|
483
|
+
else None
|
|
484
|
+
),
|
|
457
485
|
retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
|
|
458
486
|
retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
|
|
459
487
|
if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
|
|
@@ -470,7 +498,12 @@ class HindsightConfig:
|
|
|
470
498
|
retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
|
|
471
499
|
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
|
|
472
500
|
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
|
|
473
|
-
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
|
|
501
|
+
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
|
|
502
|
+
or (
|
|
503
|
+
_get_default_model_for_provider(os.getenv(ENV_REFLECT_LLM_PROVIDER))
|
|
504
|
+
if os.getenv(ENV_REFLECT_LLM_PROVIDER)
|
|
505
|
+
else None
|
|
506
|
+
),
|
|
474
507
|
reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
|
|
475
508
|
reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
|
|
476
509
|
if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
|
|
@@ -489,7 +522,12 @@ class HindsightConfig:
|
|
|
489
522
|
else None,
|
|
490
523
|
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
|
|
491
524
|
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
|
|
492
|
-
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
|
|
525
|
+
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
|
|
526
|
+
or (
|
|
527
|
+
_get_default_model_for_provider(os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER))
|
|
528
|
+
if os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER)
|
|
529
|
+
else None
|
|
530
|
+
),
|
|
493
531
|
consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
|
|
494
532
|
consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
|
|
495
533
|
if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
|
hindsight_api/daemon.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Daemon mode support for Hindsight API.
|
|
3
3
|
|
|
4
|
-
Provides idle timeout
|
|
4
|
+
Provides idle timeout for running as a background daemon.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
-
import fcntl
|
|
9
8
|
import logging
|
|
10
9
|
import os
|
|
11
10
|
import sys
|
|
@@ -15,10 +14,11 @@ from pathlib import Path
|
|
|
15
14
|
logger = logging.getLogger(__name__)
|
|
16
15
|
|
|
17
16
|
# Default daemon configuration
|
|
18
|
-
DEFAULT_DAEMON_PORT =
|
|
17
|
+
DEFAULT_DAEMON_PORT = 8888
|
|
19
18
|
DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
# Allow override via environment variable for profile-specific logs
|
|
21
|
+
DAEMON_LOG_PATH = Path(os.getenv("HINDSIGHT_API_DAEMON_LOG", str(Path.home() / ".hindsight" / "daemon.log")))
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class IdleTimeoutMiddleware:
|
|
@@ -58,97 +58,27 @@ class IdleTimeoutMiddleware:
|
|
|
58
58
|
os.kill(os.getpid(), signal.SIGTERM)
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
class DaemonLock:
|
|
62
|
-
"""
|
|
63
|
-
File-based lock to prevent multiple daemon instances.
|
|
64
|
-
|
|
65
|
-
Uses fcntl.flock for atomic locking on Unix systems.
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
def __init__(self, lockfile: Path = LOCKFILE_PATH):
|
|
69
|
-
self.lockfile = lockfile
|
|
70
|
-
self._fd = None
|
|
71
|
-
|
|
72
|
-
def acquire(self) -> bool:
|
|
73
|
-
"""
|
|
74
|
-
Try to acquire the daemon lock.
|
|
75
|
-
|
|
76
|
-
Returns True if lock acquired, False if another daemon is running.
|
|
77
|
-
"""
|
|
78
|
-
self.lockfile.parent.mkdir(parents=True, exist_ok=True)
|
|
79
|
-
|
|
80
|
-
try:
|
|
81
|
-
self._fd = open(self.lockfile, "w")
|
|
82
|
-
fcntl.flock(self._fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
83
|
-
# Write PID for debugging
|
|
84
|
-
self._fd.write(str(os.getpid()))
|
|
85
|
-
self._fd.flush()
|
|
86
|
-
return True
|
|
87
|
-
except (IOError, OSError):
|
|
88
|
-
# Lock is held by another process
|
|
89
|
-
if self._fd:
|
|
90
|
-
self._fd.close()
|
|
91
|
-
self._fd = None
|
|
92
|
-
return False
|
|
93
|
-
|
|
94
|
-
def release(self):
|
|
95
|
-
"""Release the daemon lock."""
|
|
96
|
-
if self._fd:
|
|
97
|
-
try:
|
|
98
|
-
fcntl.flock(self._fd.fileno(), fcntl.LOCK_UN)
|
|
99
|
-
self._fd.close()
|
|
100
|
-
except Exception:
|
|
101
|
-
pass
|
|
102
|
-
finally:
|
|
103
|
-
self._fd = None
|
|
104
|
-
# Remove lockfile
|
|
105
|
-
try:
|
|
106
|
-
self.lockfile.unlink()
|
|
107
|
-
except Exception:
|
|
108
|
-
pass
|
|
109
|
-
|
|
110
|
-
def is_locked(self) -> bool:
|
|
111
|
-
"""Check if the lock is held by another process."""
|
|
112
|
-
if not self.lockfile.exists():
|
|
113
|
-
return False
|
|
114
|
-
|
|
115
|
-
try:
|
|
116
|
-
fd = open(self.lockfile, "r")
|
|
117
|
-
fcntl.flock(fd.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
118
|
-
# We got the lock, so no one else has it
|
|
119
|
-
fcntl.flock(fd.fileno(), fcntl.LOCK_UN)
|
|
120
|
-
fd.close()
|
|
121
|
-
return False
|
|
122
|
-
except (IOError, OSError):
|
|
123
|
-
return True
|
|
124
|
-
|
|
125
|
-
def get_pid(self) -> int | None:
|
|
126
|
-
"""Get the PID of the daemon holding the lock."""
|
|
127
|
-
if not self.lockfile.exists():
|
|
128
|
-
return None
|
|
129
|
-
try:
|
|
130
|
-
with open(self.lockfile, "r") as f:
|
|
131
|
-
return int(f.read().strip())
|
|
132
|
-
except (ValueError, IOError):
|
|
133
|
-
return None
|
|
134
|
-
|
|
135
|
-
|
|
136
61
|
def daemonize():
|
|
137
62
|
"""
|
|
138
63
|
Fork the current process into a background daemon.
|
|
139
64
|
|
|
140
65
|
Uses double-fork technique to properly detach from terminal.
|
|
141
66
|
"""
|
|
142
|
-
# First fork
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
67
|
+
# First fork - detach from parent
|
|
68
|
+
try:
|
|
69
|
+
pid = os.fork()
|
|
70
|
+
if pid > 0:
|
|
71
|
+
sys.exit(0)
|
|
72
|
+
except OSError as e:
|
|
73
|
+
sys.stderr.write(f"fork #1 failed: {e}\n")
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
|
|
76
|
+
# Decouple from parent environment
|
|
77
|
+
os.chdir("/")
|
|
149
78
|
os.setsid()
|
|
79
|
+
os.umask(0)
|
|
150
80
|
|
|
151
|
-
# Second fork
|
|
81
|
+
# Second fork - prevent zombie
|
|
152
82
|
pid = os.fork()
|
|
153
83
|
if pid > 0:
|
|
154
84
|
sys.exit(0)
|
|
@@ -181,27 +111,3 @@ def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
|
|
|
181
111
|
return result == 0
|
|
182
112
|
except Exception:
|
|
183
113
|
return False
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
def stop_daemon(port: int = DEFAULT_DAEMON_PORT) -> bool:
|
|
187
|
-
"""Stop a running daemon by sending SIGTERM to the process."""
|
|
188
|
-
lock = DaemonLock()
|
|
189
|
-
pid = lock.get_pid()
|
|
190
|
-
|
|
191
|
-
if pid is None:
|
|
192
|
-
return False
|
|
193
|
-
|
|
194
|
-
try:
|
|
195
|
-
import signal
|
|
196
|
-
|
|
197
|
-
os.kill(pid, signal.SIGTERM)
|
|
198
|
-
# Wait for process to exit
|
|
199
|
-
for _ in range(50): # Wait up to 5 seconds
|
|
200
|
-
time.sleep(0.1)
|
|
201
|
-
try:
|
|
202
|
-
os.kill(pid, 0) # Check if process exists
|
|
203
|
-
except OSError:
|
|
204
|
-
return True # Process exited
|
|
205
|
-
return False
|
|
206
|
-
except OSError:
|
|
207
|
-
return False
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Abstract interface for LLM providers.
|
|
3
|
+
|
|
4
|
+
This module defines the interface that all LLM providers must implement,
|
|
5
|
+
enabling support for multiple LLM backends (OpenAI, Anthropic, Gemini, Codex, etc.)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .response_models import LLMToolCallResult, TokenUsage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LLMInterface(ABC):
|
|
15
|
+
"""
|
|
16
|
+
Abstract interface for LLM providers.
|
|
17
|
+
|
|
18
|
+
All LLM provider implementations must inherit from this class and implement
|
|
19
|
+
the required methods.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
provider: str,
|
|
25
|
+
api_key: str,
|
|
26
|
+
base_url: str,
|
|
27
|
+
model: str,
|
|
28
|
+
reasoning_effort: str = "low",
|
|
29
|
+
**kwargs: Any,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize LLM provider.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
provider: Provider name (e.g., "openai", "codex", "anthropic", "gemini").
|
|
36
|
+
api_key: API key or authentication token.
|
|
37
|
+
base_url: Base URL for the API.
|
|
38
|
+
model: Model name.
|
|
39
|
+
reasoning_effort: Reasoning effort level for supported providers.
|
|
40
|
+
**kwargs: Additional provider-specific parameters.
|
|
41
|
+
"""
|
|
42
|
+
self.provider = provider.lower()
|
|
43
|
+
self.api_key = api_key
|
|
44
|
+
self.base_url = base_url
|
|
45
|
+
self.model = model
|
|
46
|
+
self.reasoning_effort = reasoning_effort
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
async def verify_connection(self) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Verify that the LLM provider is configured correctly by making a simple test call.
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
RuntimeError: If the connection test fails.
|
|
55
|
+
"""
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
@abstractmethod
|
|
59
|
+
async def call(
|
|
60
|
+
self,
|
|
61
|
+
messages: list[dict[str, str]],
|
|
62
|
+
response_format: Any | None = None,
|
|
63
|
+
max_completion_tokens: int | None = None,
|
|
64
|
+
temperature: float | None = None,
|
|
65
|
+
scope: str = "memory",
|
|
66
|
+
max_retries: int = 10,
|
|
67
|
+
initial_backoff: float = 1.0,
|
|
68
|
+
max_backoff: float = 60.0,
|
|
69
|
+
skip_validation: bool = False,
|
|
70
|
+
strict_schema: bool = False,
|
|
71
|
+
return_usage: bool = False,
|
|
72
|
+
) -> Any:
|
|
73
|
+
"""
|
|
74
|
+
Make an LLM API call with retry logic.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
messages: List of message dicts with 'role' and 'content'.
|
|
78
|
+
response_format: Optional Pydantic model for structured output.
|
|
79
|
+
max_completion_tokens: Maximum tokens in response.
|
|
80
|
+
temperature: Sampling temperature (0.0-2.0).
|
|
81
|
+
scope: Scope identifier for tracking.
|
|
82
|
+
max_retries: Maximum retry attempts.
|
|
83
|
+
initial_backoff: Initial backoff time in seconds.
|
|
84
|
+
max_backoff: Maximum backoff time in seconds.
|
|
85
|
+
skip_validation: Return raw JSON without Pydantic validation.
|
|
86
|
+
strict_schema: Use strict JSON schema enforcement (OpenAI only).
|
|
87
|
+
return_usage: If True, return tuple (result, TokenUsage) instead of just result.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
If return_usage=False: Parsed response if response_format is provided, otherwise text content.
|
|
91
|
+
If return_usage=True: Tuple of (result, TokenUsage) with token counts.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
OutputTooLongError: If output exceeds token limits.
|
|
95
|
+
Exception: Re-raises API errors after retries exhausted.
|
|
96
|
+
"""
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
async def call_with_tools(
|
|
101
|
+
self,
|
|
102
|
+
messages: list[dict[str, Any]],
|
|
103
|
+
tools: list[dict[str, Any]],
|
|
104
|
+
max_completion_tokens: int | None = None,
|
|
105
|
+
temperature: float | None = None,
|
|
106
|
+
scope: str = "tools",
|
|
107
|
+
max_retries: int = 5,
|
|
108
|
+
initial_backoff: float = 1.0,
|
|
109
|
+
max_backoff: float = 30.0,
|
|
110
|
+
tool_choice: str | dict[str, Any] = "auto",
|
|
111
|
+
) -> LLMToolCallResult:
|
|
112
|
+
"""
|
|
113
|
+
Make an LLM API call with tool/function calling support.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
messages: List of message dicts. Can include tool results with role='tool'.
|
|
117
|
+
tools: List of tool definitions in OpenAI format.
|
|
118
|
+
max_completion_tokens: Maximum tokens in response.
|
|
119
|
+
temperature: Sampling temperature (0.0-2.0).
|
|
120
|
+
scope: Scope identifier for tracking.
|
|
121
|
+
max_retries: Maximum retry attempts.
|
|
122
|
+
initial_backoff: Initial backoff time in seconds.
|
|
123
|
+
max_backoff: Maximum backoff time in seconds.
|
|
124
|
+
tool_choice: How to choose tools - "auto", "none", "required", or specific function.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
LLMToolCallResult with content and/or tool_calls.
|
|
128
|
+
"""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
@abstractmethod
|
|
132
|
+
async def cleanup(self) -> None:
|
|
133
|
+
"""Clean up resources (close connections, etc.)."""
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class OutputTooLongError(Exception):
|
|
138
|
+
"""
|
|
139
|
+
Bridge exception raised when LLM output exceeds token limits.
|
|
140
|
+
|
|
141
|
+
This wraps provider-specific errors (e.g., OpenAI's LengthFinishReasonError)
|
|
142
|
+
to allow callers to handle output length issues without depending on
|
|
143
|
+
provider-specific implementations.
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
pass
|