hindsight-api 0.4.7__tar.gz → 0.4.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/PKG-INFO +2 -1
  2. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/__init__.py +1 -1
  3. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +16 -2
  4. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/api/http.py +39 -1
  5. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/banner.py +3 -0
  6. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/config.py +44 -6
  7. hindsight_api-0.4.8/hindsight_api/daemon.py +113 -0
  8. hindsight_api-0.4.8/hindsight_api/engine/llm_interface.py +146 -0
  9. hindsight_api-0.4.8/hindsight_api/engine/llm_wrapper.py +597 -0
  10. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/memory_engine.py +71 -37
  11. hindsight_api-0.4.8/hindsight_api/engine/providers/__init__.py +14 -0
  12. hindsight_api-0.4.8/hindsight_api/engine/providers/anthropic_llm.py +434 -0
  13. hindsight_api-0.4.8/hindsight_api/engine/providers/claude_code_llm.py +352 -0
  14. hindsight_api-0.4.8/hindsight_api/engine/providers/codex_llm.py +527 -0
  15. hindsight_api-0.4.8/hindsight_api/engine/providers/gemini_llm.py +502 -0
  16. hindsight_api-0.4.8/hindsight_api/engine/providers/mock_llm.py +234 -0
  17. hindsight_api-0.4.8/hindsight_api/engine/providers/openai_compatible_llm.py +745 -0
  18. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/__init__.py +2 -0
  19. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/builtin/tenant.py +36 -0
  20. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/operation_validator.py +26 -0
  21. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/main.py +6 -21
  22. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/migrations.py +75 -0
  23. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/worker/main.py +35 -10
  24. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/worker/poller.py +15 -11
  25. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/pyproject.toml +11 -10
  26. hindsight_api-0.4.7/hindsight_api/daemon.py +0 -207
  27. hindsight_api-0.4.7/hindsight_api/engine/llm_wrapper.py +0 -1620
  28. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/.gitignore +0 -0
  29. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/README.md +0 -0
  30. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/admin/__init__.py +0 -0
  31. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/admin/cli.py +0 -0
  32. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/README +0 -0
  33. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/env.py +0 -0
  34. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/script.py.mako +0 -0
  35. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
  36. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
  37. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
  38. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
  39. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
  40. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
  41. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
  42. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
  43. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
  44. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
  45. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
  46. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
  47. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
  48. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
  49. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
  50. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
  51. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
  52. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
  53. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
  54. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
  55. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
  56. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
  57. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/api/__init__.py +0 -0
  58. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/api/mcp.py +0 -0
  59. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/__init__.py +0 -0
  60. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/consolidation/__init__.py +0 -0
  61. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/consolidation/consolidator.py +0 -0
  62. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/consolidation/prompts.py +0 -0
  63. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/cross_encoder.py +0 -0
  64. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/db_budget.py +0 -0
  65. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/db_utils.py +0 -0
  66. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/directives/__init__.py +0 -0
  67. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/directives/models.py +0 -0
  68. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/embeddings.py +0 -0
  69. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/entity_resolver.py +0 -0
  70. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/interface.py +0 -0
  71. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/mental_models/__init__.py +0 -0
  72. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/mental_models/models.py +0 -0
  73. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/query_analyzer.py +0 -0
  74. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/__init__.py +0 -0
  75. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/agent.py +0 -0
  76. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/models.py +0 -0
  77. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/observations.py +0 -0
  78. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/prompts.py +0 -0
  79. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/tools.py +0 -0
  80. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/reflect/tools_schema.py +0 -0
  81. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/response_models.py +0 -0
  82. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/__init__.py +0 -0
  83. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/bank_utils.py +0 -0
  84. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/chunk_storage.py +0 -0
  85. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/deduplication.py +0 -0
  86. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/embedding_processing.py +0 -0
  87. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/embedding_utils.py +0 -0
  88. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/entity_processing.py +0 -0
  89. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/fact_extraction.py +0 -0
  90. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/fact_storage.py +0 -0
  91. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/link_creation.py +0 -0
  92. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/link_utils.py +0 -0
  93. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/orchestrator.py +0 -0
  94. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/retain/types.py +0 -0
  95. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/__init__.py +0 -0
  96. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/fusion.py +0 -0
  97. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/graph_retrieval.py +0 -0
  98. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
  99. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
  100. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/reranking.py +0 -0
  101. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/retrieval.py +0 -0
  102. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/tags.py +0 -0
  103. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/temporal_extraction.py +0 -0
  104. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/think_utils.py +0 -0
  105. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/trace.py +0 -0
  106. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/tracer.py +0 -0
  107. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/search/types.py +0 -0
  108. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/task_backend.py +0 -0
  109. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/engine/utils.py +0 -0
  110. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/base.py +0 -0
  111. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/builtin/__init__.py +0 -0
  112. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/context.py +0 -0
  113. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/http.py +0 -0
  114. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/loader.py +0 -0
  115. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/extensions/tenant.py +0 -0
  116. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/mcp_local.py +0 -0
  117. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/mcp_tools.py +0 -0
  118. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/metrics.py +0 -0
  119. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/models.py +0 -0
  120. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/pg0.py +0 -0
  121. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/server.py +0 -0
  122. {hindsight_api-0.4.7 → hindsight_api-0.4.8}/hindsight_api/worker/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.4.7
3
+ Version: 0.4.8
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp>=3.13.3
@@ -8,6 +8,7 @@ Requires-Dist: alembic>=1.17.1
8
8
  Requires-Dist: anthropic>=0.40.0
9
9
  Requires-Dist: asyncpg>=0.29.0
10
10
  Requires-Dist: authlib>=1.6.6
11
+ Requires-Dist: claude-agent-sdk>=0.1.27
11
12
  Requires-Dist: cohere>=5.0.0
12
13
  Requires-Dist: dateparser>=1.2.2
13
14
  Requires-Dist: fastapi[standard]>=0.120.3
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.4.7"
49
+ __version__ = "0.4.8"
@@ -11,6 +11,7 @@ from collections.abc import Sequence
11
11
  import sqlalchemy as sa
12
12
  from alembic import op
13
13
  from pgvector.sqlalchemy import Vector
14
+ from sqlalchemy import text
14
15
  from sqlalchemy.dialects import postgresql
15
16
 
16
17
  # revision identifiers, used by Alembic.
@@ -23,8 +24,21 @@ depends_on: str | Sequence[str] | None = None
23
24
  def upgrade() -> None:
24
25
  """Upgrade schema - create all tables from scratch."""
25
26
 
26
- # Enable required extensions
27
- op.execute("CREATE EXTENSION IF NOT EXISTS vector")
27
+ # Note: pgvector extension is installed globally BEFORE migrations run
28
+ # See migrations.py:run_migrations() - this ensures the extension is available
29
+ # to all schemas, not just the one being migrated
30
+
31
+ # We keep this here as a fallback for backwards compatibility
32
+ # This may fail if user lacks permissions, which is fine if extension already exists
33
+ try:
34
+ op.execute("CREATE EXTENSION IF NOT EXISTS vector")
35
+ except Exception:
36
+ # Extension might already exist or user lacks permissions - verify it exists
37
+ conn = op.get_bind()
38
+ result = conn.execute(text("SELECT 1 FROM pg_extension WHERE extname = 'vector'")).fetchone()
39
+ if not result:
40
+ # Extension truly doesn't exist - re-raise the error
41
+ raise
28
42
 
29
43
  # Create banks table
30
44
  op.create_table(
@@ -1410,7 +1410,7 @@ def create_app(
1410
1410
  poll_interval_ms=config.worker_poll_interval_ms,
1411
1411
  max_retries=config.worker_max_retries,
1412
1412
  schema=schema,
1413
- tenant_extension=getattr(memory, "_tenant_extension", None),
1413
+ tenant_extension=memory._tenant_extension,
1414
1414
  max_slots=config.worker_max_slots,
1415
1415
  consolidation_max_slots=config.worker_consolidation_max_slots,
1416
1416
  )
@@ -2363,6 +2363,23 @@ def _register_routes(app: FastAPI):
2363
2363
  ):
2364
2364
  """Create a mental model (async - returns operation_id)."""
2365
2365
  try:
2366
+ # Pre-operation validation hook
2367
+ validator = app.state.memory._operation_validator
2368
+ if validator:
2369
+ from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
2370
+
2371
+ ctx = MentalModelRefreshContext(
2372
+ bank_id=bank_id,
2373
+ mental_model_id=None, # Not yet created
2374
+ request_context=request_context,
2375
+ )
2376
+ validation = await validator.validate_mental_model_refresh(ctx)
2377
+ if not validation.allowed:
2378
+ raise OperationValidationError(
2379
+ validation.reason or "Operation not allowed",
2380
+ status_code=validation.status_code,
2381
+ )
2382
+
2366
2383
  # 1. Create the mental model with placeholder content
2367
2384
  mental_model = await app.state.memory.create_mental_model(
2368
2385
  bank_id=bank_id,
@@ -2385,6 +2402,8 @@ def _register_routes(app: FastAPI):
2385
2402
  raise HTTPException(status_code=400, detail=str(e))
2386
2403
  except (AuthenticationError, HTTPException):
2387
2404
  raise
2405
+ except OperationValidationError as e:
2406
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2388
2407
  except Exception as e:
2389
2408
  import traceback
2390
2409
 
@@ -2407,6 +2426,23 @@ def _register_routes(app: FastAPI):
2407
2426
  ):
2408
2427
  """Refresh a mental model by re-running its source query (async)."""
2409
2428
  try:
2429
+ # Pre-operation validation hook
2430
+ validator = app.state.memory._operation_validator
2431
+ if validator:
2432
+ from hindsight_api.extensions.operation_validator import MentalModelRefreshContext
2433
+
2434
+ ctx = MentalModelRefreshContext(
2435
+ bank_id=bank_id,
2436
+ mental_model_id=mental_model_id,
2437
+ request_context=request_context,
2438
+ )
2439
+ validation = await validator.validate_mental_model_refresh(ctx)
2440
+ if not validation.allowed:
2441
+ raise OperationValidationError(
2442
+ validation.reason or "Operation not allowed",
2443
+ status_code=validation.status_code,
2444
+ )
2445
+
2410
2446
  result = await app.state.memory.submit_async_refresh_mental_model(
2411
2447
  bank_id=bank_id,
2412
2448
  mental_model_id=mental_model_id,
@@ -2417,6 +2453,8 @@ def _register_routes(app: FastAPI):
2417
2453
  raise HTTPException(status_code=404, detail=str(e))
2418
2454
  except (AuthenticationError, HTTPException):
2419
2455
  raise
2456
+ except OperationValidationError as e:
2457
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2420
2458
  except Exception as e:
2421
2459
  import traceback
2422
2460
 
@@ -83,9 +83,12 @@ def print_startup_info(
83
83
  embeddings_provider: str,
84
84
  reranker_provider: str,
85
85
  mcp_enabled: bool = False,
86
+ version: str | None = None,
86
87
  ):
87
88
  """Print styled startup information."""
88
89
  print(color_start("Starting Hindsight API..."))
90
+ if version:
91
+ print(f" {dim('Version:')} {color(f'v{version}', 0.1)}")
89
92
  print(f" {dim('URL:')} {color(f'http://{host}:{port}', 0.2)}")
90
93
  print(f" {dim('Database:')} {color(database_url, 0.4)}")
91
94
  print(f" {dim('LLM:')} {color(f'{llm_provider} / {llm_model}', 0.6)}")
@@ -154,7 +154,21 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
154
154
  DEFAULT_DATABASE_URL = "pg0"
155
155
  DEFAULT_DATABASE_SCHEMA = "public"
156
156
  DEFAULT_LLM_PROVIDER = "openai"
157
- DEFAULT_LLM_MODEL = "gpt-5-mini"
157
+
158
+ # Provider-specific default models
159
+ PROVIDER_DEFAULT_MODELS = {
160
+ "openai": "o3-mini",
161
+ "anthropic": "claude-haiku-4-5-20251001",
162
+ "gemini": "gemini-2.5-flash",
163
+ "groq": "openai/gpt-oss-120b",
164
+ "ollama": "gemma3:12b",
165
+ "lmstudio": "local-model",
166
+ "vertexai": "gemini-2.0-flash-001",
167
+ "openai-codex": "gpt-5.2-codex",
168
+ "claude-code": "claude-sonnet-4-5-20250929",
169
+ "mock": "mock-model",
170
+ }
171
+ DEFAULT_LLM_MODEL = "o3-mini" # Fallback if provider not in table
158
172
  DEFAULT_LLM_MAX_CONCURRENT = 32
159
173
  DEFAULT_LLM_MAX_RETRIES = 10 # Max retry attempts for LLM API calls
160
174
  DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry exponential backoff
@@ -303,6 +317,11 @@ def _validate_extraction_mode(mode: str) -> str:
303
317
  return mode_lower
304
318
 
305
319
 
320
+ def _get_default_model_for_provider(provider: str) -> str:
321
+ """Get the default model for a given provider."""
322
+ return PROVIDER_DEFAULT_MODELS.get(provider.lower(), DEFAULT_LLM_MODEL)
323
+
324
+
306
325
  @dataclass
307
326
  class HindsightConfig:
308
327
  """Configuration container for Hindsight API."""
@@ -431,14 +450,18 @@ class HindsightConfig:
431
450
  @classmethod
432
451
  def from_env(cls) -> "HindsightConfig":
433
452
  """Create configuration from environment variables."""
453
+ # Get provider first to determine default model
454
+ llm_provider = os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER)
455
+ llm_model = os.getenv(ENV_LLM_MODEL) or _get_default_model_for_provider(llm_provider)
456
+
434
457
  return cls(
435
458
  # Database
436
459
  database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
437
460
  database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
438
461
  # LLM
439
- llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
462
+ llm_provider=llm_provider,
440
463
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
441
- llm_model=os.getenv(ENV_LLM_MODEL, DEFAULT_LLM_MODEL),
464
+ llm_model=llm_model,
442
465
  llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
443
466
  llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
444
467
  llm_max_retries=int(os.getenv(ENV_LLM_MAX_RETRIES, str(DEFAULT_LLM_MAX_RETRIES))),
@@ -453,7 +476,12 @@ class HindsightConfig:
453
476
  # Per-operation LLM config (None = use default)
454
477
  retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
455
478
  retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
456
- retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
479
+ retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL)
480
+ or (
481
+ _get_default_model_for_provider(os.getenv(ENV_RETAIN_LLM_PROVIDER))
482
+ if os.getenv(ENV_RETAIN_LLM_PROVIDER)
483
+ else None
484
+ ),
457
485
  retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
458
486
  retain_llm_max_concurrent=int(os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT))
459
487
  if os.getenv(ENV_RETAIN_LLM_MAX_CONCURRENT)
@@ -470,7 +498,12 @@ class HindsightConfig:
470
498
  retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
471
499
  reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
472
500
  reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
473
- reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
501
+ reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
502
+ or (
503
+ _get_default_model_for_provider(os.getenv(ENV_REFLECT_LLM_PROVIDER))
504
+ if os.getenv(ENV_REFLECT_LLM_PROVIDER)
505
+ else None
506
+ ),
474
507
  reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
475
508
  reflect_llm_max_concurrent=int(os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT))
476
509
  if os.getenv(ENV_REFLECT_LLM_MAX_CONCURRENT)
@@ -489,7 +522,12 @@ class HindsightConfig:
489
522
  else None,
490
523
  consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
491
524
  consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
492
- consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
525
+ consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
526
+ or (
527
+ _get_default_model_for_provider(os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER))
528
+ if os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER)
529
+ else None
530
+ ),
493
531
  consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
494
532
  consolidation_llm_max_concurrent=int(os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT))
495
533
  if os.getenv(ENV_CONSOLIDATION_LLM_MAX_CONCURRENT)
@@ -0,0 +1,113 @@
1
+ """
2
+ Daemon mode support for Hindsight API.
3
+
4
+ Provides idle timeout for running as a background daemon.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import os
10
+ import sys
11
+ import time
12
+ from pathlib import Path
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Default daemon configuration
17
+ DEFAULT_DAEMON_PORT = 8888
18
+ DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
19
+
20
+ # Allow override via environment variable for profile-specific logs
21
+ DAEMON_LOG_PATH = Path(os.getenv("HINDSIGHT_API_DAEMON_LOG", str(Path.home() / ".hindsight" / "daemon.log")))
22
+
23
+
24
+ class IdleTimeoutMiddleware:
25
+ """ASGI middleware that tracks activity and exits after idle timeout."""
26
+
27
+ def __init__(self, app, idle_timeout: int = DEFAULT_IDLE_TIMEOUT):
28
+ self.app = app
29
+ self.idle_timeout = idle_timeout
30
+ self.last_activity = time.time()
31
+ self._checker_task = None
32
+
33
+ async def __call__(self, scope, receive, send):
34
+ # Update activity timestamp on each request
35
+ self.last_activity = time.time()
36
+ await self.app(scope, receive, send)
37
+
38
+ def start_idle_checker(self):
39
+ """Start the background task that checks for idle timeout."""
40
+ self._checker_task = asyncio.create_task(self._check_idle())
41
+
42
+ async def _check_idle(self):
43
+ """Background task that exits the process after idle timeout."""
44
+ # If idle_timeout is 0, don't auto-exit
45
+ if self.idle_timeout <= 0:
46
+ return
47
+
48
+ while True:
49
+ await asyncio.sleep(30) # Check every 30 seconds
50
+ idle_time = time.time() - self.last_activity
51
+ if idle_time > self.idle_timeout:
52
+ logger.info(f"Idle timeout reached ({self.idle_timeout}s), shutting down daemon")
53
+ # Give a moment for any in-flight requests
54
+ await asyncio.sleep(1)
55
+ # Send SIGTERM to ourselves to trigger graceful shutdown
56
+ import signal
57
+
58
+ os.kill(os.getpid(), signal.SIGTERM)
59
+
60
+
61
+ def daemonize():
62
+ """
63
+ Fork the current process into a background daemon.
64
+
65
+ Uses double-fork technique to properly detach from terminal.
66
+ """
67
+ # First fork - detach from parent
68
+ try:
69
+ pid = os.fork()
70
+ if pid > 0:
71
+ sys.exit(0)
72
+ except OSError as e:
73
+ sys.stderr.write(f"fork #1 failed: {e}\n")
74
+ sys.exit(1)
75
+
76
+ # Decouple from parent environment
77
+ os.chdir("/")
78
+ os.setsid()
79
+ os.umask(0)
80
+
81
+ # Second fork - prevent zombie
82
+ pid = os.fork()
83
+ if pid > 0:
84
+ sys.exit(0)
85
+
86
+ # Redirect standard file descriptors to log file
87
+ DAEMON_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
88
+
89
+ sys.stdout.flush()
90
+ sys.stderr.flush()
91
+
92
+ # Redirect stdin to /dev/null
93
+ with open("/dev/null", "r") as devnull:
94
+ os.dup2(devnull.fileno(), sys.stdin.fileno())
95
+
96
+ # Redirect stdout/stderr to log file
97
+ log_fd = open(DAEMON_LOG_PATH, "a")
98
+ os.dup2(log_fd.fileno(), sys.stdout.fileno())
99
+ os.dup2(log_fd.fileno(), sys.stderr.fileno())
100
+
101
+
102
+ def check_daemon_running(port: int = DEFAULT_DAEMON_PORT) -> bool:
103
+ """Check if a daemon is running and responsive on the given port."""
104
+ import socket
105
+
106
+ try:
107
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
108
+ sock.settimeout(1)
109
+ result = sock.connect_ex(("127.0.0.1", port))
110
+ sock.close()
111
+ return result == 0
112
+ except Exception:
113
+ return False
@@ -0,0 +1,146 @@
1
+ """
2
+ Abstract interface for LLM providers.
3
+
4
+ This module defines the interface that all LLM providers must implement,
5
+ enabling support for multiple LLM backends (OpenAI, Anthropic, Gemini, Codex, etc.)
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Any
10
+
11
+ from .response_models import LLMToolCallResult, TokenUsage
12
+
13
+
14
+ class LLMInterface(ABC):
15
+ """
16
+ Abstract interface for LLM providers.
17
+
18
+ All LLM provider implementations must inherit from this class and implement
19
+ the required methods.
20
+ """
21
+
22
+ def __init__(
23
+ self,
24
+ provider: str,
25
+ api_key: str,
26
+ base_url: str,
27
+ model: str,
28
+ reasoning_effort: str = "low",
29
+ **kwargs: Any,
30
+ ):
31
+ """
32
+ Initialize LLM provider.
33
+
34
+ Args:
35
+ provider: Provider name (e.g., "openai", "codex", "anthropic", "gemini").
36
+ api_key: API key or authentication token.
37
+ base_url: Base URL for the API.
38
+ model: Model name.
39
+ reasoning_effort: Reasoning effort level for supported providers.
40
+ **kwargs: Additional provider-specific parameters.
41
+ """
42
+ self.provider = provider.lower()
43
+ self.api_key = api_key
44
+ self.base_url = base_url
45
+ self.model = model
46
+ self.reasoning_effort = reasoning_effort
47
+
48
+ @abstractmethod
49
+ async def verify_connection(self) -> None:
50
+ """
51
+ Verify that the LLM provider is configured correctly by making a simple test call.
52
+
53
+ Raises:
54
+ RuntimeError: If the connection test fails.
55
+ """
56
+ pass
57
+
58
+ @abstractmethod
59
+ async def call(
60
+ self,
61
+ messages: list[dict[str, str]],
62
+ response_format: Any | None = None,
63
+ max_completion_tokens: int | None = None,
64
+ temperature: float | None = None,
65
+ scope: str = "memory",
66
+ max_retries: int = 10,
67
+ initial_backoff: float = 1.0,
68
+ max_backoff: float = 60.0,
69
+ skip_validation: bool = False,
70
+ strict_schema: bool = False,
71
+ return_usage: bool = False,
72
+ ) -> Any:
73
+ """
74
+ Make an LLM API call with retry logic.
75
+
76
+ Args:
77
+ messages: List of message dicts with 'role' and 'content'.
78
+ response_format: Optional Pydantic model for structured output.
79
+ max_completion_tokens: Maximum tokens in response.
80
+ temperature: Sampling temperature (0.0-2.0).
81
+ scope: Scope identifier for tracking.
82
+ max_retries: Maximum retry attempts.
83
+ initial_backoff: Initial backoff time in seconds.
84
+ max_backoff: Maximum backoff time in seconds.
85
+ skip_validation: Return raw JSON without Pydantic validation.
86
+ strict_schema: Use strict JSON schema enforcement (OpenAI only).
87
+ return_usage: If True, return tuple (result, TokenUsage) instead of just result.
88
+
89
+ Returns:
90
+ If return_usage=False: Parsed response if response_format is provided, otherwise text content.
91
+ If return_usage=True: Tuple of (result, TokenUsage) with token counts.
92
+
93
+ Raises:
94
+ OutputTooLongError: If output exceeds token limits.
95
+ Exception: Re-raises API errors after retries exhausted.
96
+ """
97
+ pass
98
+
99
+ @abstractmethod
100
+ async def call_with_tools(
101
+ self,
102
+ messages: list[dict[str, Any]],
103
+ tools: list[dict[str, Any]],
104
+ max_completion_tokens: int | None = None,
105
+ temperature: float | None = None,
106
+ scope: str = "tools",
107
+ max_retries: int = 5,
108
+ initial_backoff: float = 1.0,
109
+ max_backoff: float = 30.0,
110
+ tool_choice: str | dict[str, Any] = "auto",
111
+ ) -> LLMToolCallResult:
112
+ """
113
+ Make an LLM API call with tool/function calling support.
114
+
115
+ Args:
116
+ messages: List of message dicts. Can include tool results with role='tool'.
117
+ tools: List of tool definitions in OpenAI format.
118
+ max_completion_tokens: Maximum tokens in response.
119
+ temperature: Sampling temperature (0.0-2.0).
120
+ scope: Scope identifier for tracking.
121
+ max_retries: Maximum retry attempts.
122
+ initial_backoff: Initial backoff time in seconds.
123
+ max_backoff: Maximum backoff time in seconds.
124
+ tool_choice: How to choose tools - "auto", "none", "required", or specific function.
125
+
126
+ Returns:
127
+ LLMToolCallResult with content and/or tool_calls.
128
+ """
129
+ pass
130
+
131
+ @abstractmethod
132
+ async def cleanup(self) -> None:
133
+ """Clean up resources (close connections, etc.)."""
134
+ pass
135
+
136
+
137
+ class OutputTooLongError(Exception):
138
+ """
139
+ Bridge exception raised when LLM output exceeds token limits.
140
+
141
+ This wraps provider-specific errors (e.g., OpenAI's LengthFinishReasonError)
142
+ to allow callers to handle output length issues without depending on
143
+ provider-specific implementations.
144
+ """
145
+
146
+ pass