hindsight-api 0.4.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/.gitignore +4 -1
  2. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/PKG-INFO +1 -1
  3. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/__init__.py +1 -1
  4. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/http.py +3 -2
  5. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/config.py +29 -1
  6. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/consolidator.py +114 -47
  7. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/prompts.py +21 -13
  8. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/cross_encoder.py +50 -24
  9. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/embeddings.py +45 -19
  10. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/memory_engine.py +11 -5
  11. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/tools.py +1 -1
  12. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/builtin/tenant.py +8 -5
  13. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/main.py +12 -0
  14. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/pyproject.toml +1 -1
  15. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/README.md +0 -0
  16. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/admin/__init__.py +0 -0
  17. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/admin/cli.py +0 -0
  18. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/README +0 -0
  19. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/env.py +0 -0
  20. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/script.py.mako +0 -0
  21. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
  22. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
  23. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
  24. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
  25. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
  26. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
  27. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
  28. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
  29. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
  30. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
  31. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
  32. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
  33. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
  34. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
  35. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
  36. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
  37. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
  38. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
  39. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
  40. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
  41. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
  42. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
  43. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
  44. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/__init__.py +0 -0
  45. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/api/mcp.py +0 -0
  46. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/banner.py +0 -0
  47. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/daemon.py +0 -0
  48. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/__init__.py +0 -0
  49. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/consolidation/__init__.py +0 -0
  50. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/db_budget.py +0 -0
  51. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/db_utils.py +0 -0
  52. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/directives/__init__.py +0 -0
  53. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/directives/models.py +0 -0
  54. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/entity_resolver.py +0 -0
  55. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/interface.py +0 -0
  56. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/llm_wrapper.py +0 -0
  57. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/mental_models/__init__.py +0 -0
  58. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/mental_models/models.py +0 -0
  59. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/query_analyzer.py +0 -0
  60. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/__init__.py +0 -0
  61. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/agent.py +0 -0
  62. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/models.py +0 -0
  63. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/observations.py +0 -0
  64. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/prompts.py +0 -0
  65. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/reflect/tools_schema.py +0 -0
  66. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/response_models.py +0 -0
  67. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/__init__.py +0 -0
  68. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/bank_utils.py +0 -0
  69. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/chunk_storage.py +0 -0
  70. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/deduplication.py +0 -0
  71. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/embedding_processing.py +0 -0
  72. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/embedding_utils.py +0 -0
  73. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/entity_processing.py +0 -0
  74. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/fact_extraction.py +0 -0
  75. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/fact_storage.py +0 -0
  76. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/link_creation.py +0 -0
  77. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/link_utils.py +0 -0
  78. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/orchestrator.py +0 -0
  79. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/retain/types.py +0 -0
  80. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/__init__.py +0 -0
  81. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/fusion.py +0 -0
  82. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/graph_retrieval.py +0 -0
  83. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
  84. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
  85. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/reranking.py +0 -0
  86. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/retrieval.py +0 -0
  87. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/tags.py +0 -0
  88. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/temporal_extraction.py +0 -0
  89. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/think_utils.py +0 -0
  90. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/trace.py +0 -0
  91. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/tracer.py +0 -0
  92. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/search/types.py +0 -0
  93. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/task_backend.py +0 -0
  94. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/engine/utils.py +0 -0
  95. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/__init__.py +0 -0
  96. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/base.py +0 -0
  97. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/builtin/__init__.py +0 -0
  98. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/context.py +0 -0
  99. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/http.py +0 -0
  100. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/loader.py +0 -0
  101. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/operation_validator.py +0 -0
  102. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/extensions/tenant.py +0 -0
  103. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/mcp_local.py +0 -0
  104. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/mcp_tools.py +0 -0
  105. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/metrics.py +0 -0
  106. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/migrations.py +0 -0
  107. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/models.py +0 -0
  108. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/pg0.py +0 -0
  109. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/server.py +0 -0
  110. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/__init__.py +0 -0
  111. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/main.py +0 -0
  112. {hindsight_api-0.4.0 → hindsight_api-0.4.1}/hindsight_api/worker/poller.py +0 -0
@@ -45,9 +45,12 @@ hindsight-docs/static/llms-full.txt
45
45
 
46
46
  hindsight-dev/benchmarks/locomo/results/
47
47
  hindsight-dev/benchmarks/longmemeval/results/
48
+ hindsight-dev/benchmarks/consolidation/results/
49
+ benchmarks/results/
48
50
  hindsight-cli/target
49
51
  hindsight-clients/rust/target
50
52
  .claude
51
53
  whats-next.md
52
54
  TASK.md
53
- CHANGELOG.md
55
+ # Changelog is now tracked in hindsight-docs/src/pages/changelog.md
56
+ # CHANGELOG.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp>=3.13.3
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.1.0"
49
+ __version__ = "0.4.1"
@@ -1323,7 +1323,7 @@ class VersionResponse(BaseModel):
1323
1323
  model_config = ConfigDict(
1324
1324
  json_schema_extra={
1325
1325
  "example": {
1326
- "api_version": "1.0.0",
1326
+ "api_version": "0.4.0",
1327
1327
  "features": {
1328
1328
  "observations": False,
1329
1329
  "mcp": True,
@@ -1567,11 +1567,12 @@ def _register_routes(app: FastAPI):
1567
1567
  Returns version info and feature flags that can be used by clients
1568
1568
  to determine which capabilities are available.
1569
1569
  """
1570
+ from hindsight_api import __version__
1570
1571
  from hindsight_api.config import get_config
1571
1572
 
1572
1573
  config = get_config()
1573
1574
  return VersionResponse(
1574
- api_version="1.0.0",
1575
+ api_version=__version__,
1575
1576
  features=FeaturesInfo(
1576
1577
  observations=config.enable_observations,
1577
1578
  mcp=config.mcp_enabled,
@@ -20,6 +20,7 @@ logger = logging.getLogger(__name__)
20
20
 
21
21
  # Environment variable names
22
22
  ENV_DATABASE_URL = "HINDSIGHT_API_DATABASE_URL"
23
+ ENV_DATABASE_SCHEMA = "HINDSIGHT_API_DATABASE_SCHEMA"
23
24
  ENV_LLM_PROVIDER = "HINDSIGHT_API_LLM_PROVIDER"
24
25
  ENV_LLM_API_KEY = "HINDSIGHT_API_LLM_API_KEY"
25
26
  ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
@@ -46,6 +47,7 @@ ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
46
47
 
47
48
  ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
48
49
  ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
50
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU = "HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"
49
51
  ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
50
52
  ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
51
53
  ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
@@ -65,6 +67,7 @@ ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
65
67
 
66
68
  ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
67
69
  ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
70
+ ENV_RERANKER_LOCAL_FORCE_CPU = "HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"
68
71
  ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
69
72
  ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
70
73
  ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
@@ -98,6 +101,7 @@ ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
98
101
  # Observations settings (consolidated knowledge from facts)
99
102
  ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
100
103
  ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
104
+ ENV_CONSOLIDATION_MAX_TOKENS = "HINDSIGHT_API_CONSOLIDATION_MAX_TOKENS"
101
105
 
102
106
  # Optimization flags
103
107
  ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
@@ -125,6 +129,7 @@ ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
125
129
 
126
130
  # Default values
127
131
  DEFAULT_DATABASE_URL = "pg0"
132
+ DEFAULT_DATABASE_SCHEMA = "public"
128
133
  DEFAULT_LLM_PROVIDER = "openai"
129
134
  DEFAULT_LLM_MODEL = "gpt-5-mini"
130
135
  DEFAULT_LLM_MAX_CONCURRENT = 32
@@ -132,11 +137,13 @@ DEFAULT_LLM_TIMEOUT = 120.0 # seconds
132
137
 
133
138
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
134
139
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
140
+ DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
135
141
  DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
136
142
  DEFAULT_EMBEDDING_DIMENSION = 384
137
143
 
138
144
  DEFAULT_RERANKER_PROVIDER = "local"
139
145
  DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
146
+ DEFAULT_RERANKER_LOCAL_FORCE_CPU = False # Force CPU mode for local reranker (avoids MPS/XPC issues on macOS)
140
147
  DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4 # Limit concurrent CPU-bound reranking to prevent thrashing
141
148
  DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
142
149
  DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
@@ -177,6 +184,7 @@ DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (a
177
184
  # Observations defaults (consolidated knowledge from facts)
178
185
  DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
179
186
  DEFAULT_CONSOLIDATION_BATCH_SIZE = 50 # Memories to load per batch (internal memory optimization)
187
+ DEFAULT_CONSOLIDATION_MAX_TOKENS = 1024 # Max tokens for recall when finding related observations
180
188
 
181
189
  # Database migrations
182
190
  DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
@@ -270,6 +278,7 @@ class HindsightConfig:
270
278
 
271
279
  # Database
272
280
  database_url: str
281
+ database_schema: str
273
282
 
274
283
  # LLM (default, used as fallback for per-operation config)
275
284
  llm_provider: str
@@ -298,6 +307,7 @@ class HindsightConfig:
298
307
  # Embeddings
299
308
  embeddings_provider: str
300
309
  embeddings_local_model: str
310
+ embeddings_local_force_cpu: bool
301
311
  embeddings_tei_url: str | None
302
312
  embeddings_openai_base_url: str | None
303
313
  embeddings_cohere_base_url: str | None
@@ -305,6 +315,8 @@ class HindsightConfig:
305
315
  # Reranker
306
316
  reranker_provider: str
307
317
  reranker_local_model: str
318
+ reranker_local_force_cpu: bool
319
+ reranker_local_max_concurrent: int
308
320
  reranker_tei_url: str | None
309
321
  reranker_tei_batch_size: int
310
322
  reranker_tei_max_concurrent: int
@@ -336,6 +348,7 @@ class HindsightConfig:
336
348
  # Observations settings (consolidated knowledge from facts)
337
349
  enable_observations: bool
338
350
  consolidation_batch_size: int
351
+ consolidation_max_tokens: int
339
352
 
340
353
  # Optimization flags
341
354
  skip_llm_verification: bool
@@ -367,6 +380,7 @@ class HindsightConfig:
367
380
  return cls(
368
381
  # Database
369
382
  database_url=os.getenv(ENV_DATABASE_URL, DEFAULT_DATABASE_URL),
383
+ database_schema=os.getenv(ENV_DATABASE_SCHEMA, DEFAULT_DATABASE_SCHEMA),
370
384
  # LLM
371
385
  llm_provider=os.getenv(ENV_LLM_PROVIDER, DEFAULT_LLM_PROVIDER),
372
386
  llm_api_key=os.getenv(ENV_LLM_API_KEY),
@@ -390,12 +404,23 @@ class HindsightConfig:
390
404
  # Embeddings
391
405
  embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
392
406
  embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
407
+ embeddings_local_force_cpu=os.getenv(
408
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU, str(DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU)
409
+ ).lower()
410
+ in ("true", "1"),
393
411
  embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
394
412
  embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
395
413
  embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
396
414
  # Reranker
397
415
  reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
398
416
  reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
417
+ reranker_local_force_cpu=os.getenv(
418
+ ENV_RERANKER_LOCAL_FORCE_CPU, str(DEFAULT_RERANKER_LOCAL_FORCE_CPU)
419
+ ).lower()
420
+ in ("true", "1"),
421
+ reranker_local_max_concurrent=int(
422
+ os.getenv(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
423
+ ),
399
424
  reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
400
425
  reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
401
426
  reranker_tei_max_concurrent=int(
@@ -444,6 +469,9 @@ class HindsightConfig:
444
469
  consolidation_batch_size=int(
445
470
  os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
446
471
  ),
472
+ consolidation_max_tokens=int(
473
+ os.getenv(ENV_CONSOLIDATION_MAX_TOKENS, str(DEFAULT_CONSOLIDATION_MAX_TOKENS))
474
+ ),
447
475
  # Database migrations
448
476
  run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
449
477
  # Database connection pool
@@ -515,7 +543,7 @@ class HindsightConfig:
515
543
 
516
544
  def log_config(self) -> None:
517
545
  """Log the current configuration (without sensitive values)."""
518
- logger.info(f"Database: {self.database_url}")
546
+ logger.info(f"Database: {self.database_url} (schema: {self.database_schema})")
519
547
  logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
520
548
  if self.retain_llm_provider or self.retain_llm_model:
521
549
  retain_provider = self.retain_llm_provider or self.llm_provider
@@ -639,28 +639,27 @@ async def _find_related_observations(
639
639
  request_context: "RequestContext",
640
640
  ) -> list[dict[str, Any]]:
641
641
  """
642
- Find observations related to the given query using the full recall system.
642
+ Find observations related to the given query using optimized recall.
643
643
 
644
644
  IMPORTANT: We do NOT filter by tags here. Consolidation needs to see ALL
645
645
  potentially related observations regardless of scope, so the LLM can
646
646
  decide on tag routing (same scope update vs cross-scope create).
647
647
 
648
- This leverages:
649
- - Semantic search (embedding similarity)
650
- - BM25 text search (keyword matching)
651
- - Entity-based retrieval (shared entities)
652
- - Graph traversal (connected via entity links)
648
+ Uses max_tokens to naturally limit observations (no artificial count limit).
649
+ Includes source memories with dates for LLM context.
653
650
 
654
651
  Returns:
655
- List of related observations with their tags for LLM tag routing
652
+ List of related observations with their tags, source memories, and dates
656
653
  """
657
- # Use recall to find related observations
658
- # NO tags parameter - we want ALL observations regardless of scope
659
- # Use low max_tokens since we only need observations, not memories
654
+ # Use recall to find related observations with token budget
655
+ # max_tokens naturally limits how many observations are returned
656
+ from ...config import get_config
657
+
658
+ config = get_config()
660
659
  recall_result = await memory_engine.recall_async(
661
660
  bank_id=bank_id,
662
661
  query=query,
663
- max_tokens=5000, # Token budget for observations
662
+ max_tokens=config.consolidation_max_tokens, # Token budget for observations (configurable)
664
663
  fact_type=["observation"], # Only retrieve observations
665
664
  request_context=request_context,
666
665
  _quiet=True, # Suppress logging
@@ -668,43 +667,82 @@ async def _find_related_observations(
668
667
  )
669
668
 
670
669
  # If no observations returned, return empty list
671
- # When fact_type=["observation"], results come back in `results` field
672
670
  if not recall_result.results:
673
671
  return []
674
672
 
675
- # Trust recall's relevance filtering - fetch full data for each observation
673
+ # Batch fetch all observations in a single query (no artificial limit)
674
+ observation_ids = [uuid.UUID(obs.id) for obs in recall_result.results]
675
+
676
+ rows = await conn.fetch(
677
+ f"""
678
+ SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at,
679
+ occurred_start, occurred_end, mentioned_at
680
+ FROM {fq_table("memory_units")}
681
+ WHERE id = ANY($1) AND bank_id = $2 AND fact_type = 'observation'
682
+ """,
683
+ observation_ids,
684
+ bank_id,
685
+ )
686
+
687
+ # Build results list preserving recall order
688
+ id_to_row = {row["id"]: row for row in rows}
676
689
  results = []
677
- for obs in recall_result.results:
678
- # Fetch full observation data from DB to get history, source_memory_ids, tags
679
- row = await conn.fetchrow(
680
- f"""
681
- SELECT id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
682
- FROM {fq_table("memory_units")}
683
- WHERE id = $1 AND bank_id = $2 AND fact_type = 'observation'
684
- """,
685
- uuid.UUID(obs.id),
686
- bank_id,
687
- )
688
690
 
689
- if row:
690
- history = row["history"]
691
- if isinstance(history, str):
692
- history = json.loads(history)
693
- elif history is None:
694
- history = []
695
-
696
- results.append(
697
- {
698
- "id": row["id"],
699
- "text": row["text"],
700
- "proof_count": row["proof_count"] or 1,
701
- "history": history,
702
- "tags": row["tags"] or [], # Include tags for LLM tag routing
703
- "source_memory_ids": row["source_memory_ids"] or [],
704
- "similarity": 1.0, # Retrieved via recall so assumed relevant
705
- }
691
+ for obs in recall_result.results:
692
+ obs_id = uuid.UUID(obs.id)
693
+ if obs_id not in id_to_row:
694
+ continue
695
+
696
+ row = id_to_row[obs_id]
697
+ history = row["history"]
698
+ if isinstance(history, str):
699
+ history = json.loads(history)
700
+ elif history is None:
701
+ history = []
702
+
703
+ # Fetch source memories to include their text and dates
704
+ source_memory_ids = row["source_memory_ids"] or []
705
+ source_memories = []
706
+
707
+ if source_memory_ids:
708
+ source_rows = await conn.fetch(
709
+ f"""
710
+ SELECT text, occurred_start, occurred_end, mentioned_at, event_date
711
+ FROM {fq_table("memory_units")}
712
+ WHERE id = ANY($1) AND bank_id = $2
713
+ ORDER BY created_at ASC
714
+ LIMIT 5
715
+ """,
716
+ source_memory_ids[:5], # Limit to first 5 source memories for token efficiency
717
+ bank_id,
706
718
  )
707
719
 
720
+ for src_row in source_rows:
721
+ source_memories.append(
722
+ {
723
+ "text": src_row["text"],
724
+ "occurred_start": src_row["occurred_start"],
725
+ "occurred_end": src_row["occurred_end"],
726
+ "mentioned_at": src_row["mentioned_at"],
727
+ "event_date": src_row["event_date"],
728
+ }
729
+ )
730
+
731
+ results.append(
732
+ {
733
+ "id": row["id"],
734
+ "text": row["text"],
735
+ "proof_count": row["proof_count"] or 1,
736
+ "tags": row["tags"] or [],
737
+ "source_memories": source_memories,
738
+ "occurred_start": row["occurred_start"],
739
+ "occurred_end": row["occurred_end"],
740
+ "mentioned_at": row["mentioned_at"],
741
+ "created_at": row["created_at"],
742
+ "updated_at": row["updated_at"],
743
+ }
744
+ )
745
+
708
746
  return results
709
747
 
710
748
 
@@ -732,14 +770,43 @@ async def _consolidate_with_llm(
732
770
  - {"action": "create", "text": "...", "reason": "..."}
733
771
  - [] if fact is purely ephemeral (no durable knowledge)
734
772
  """
735
- # Format observations WITH their tags (or "None" if empty)
773
+ # Format observations as JSON with source memories and dates
736
774
  if observations:
737
- observations_text = "\n".join(
738
- f'- ID: {obs["id"]}, Tags: {json.dumps(obs["tags"])}, Text: "{obs["text"]}" (proof_count: {obs["proof_count"]})'
739
- for obs in observations
740
- )
775
+ obs_list = []
776
+ for obs in observations:
777
+ obs_data = {
778
+ "id": str(obs["id"]),
779
+ "text": obs["text"],
780
+ "proof_count": obs["proof_count"],
781
+ "tags": obs["tags"],
782
+ "created_at": obs["created_at"].isoformat() if obs.get("created_at") else None,
783
+ "updated_at": obs["updated_at"].isoformat() if obs.get("updated_at") else None,
784
+ }
785
+
786
+ # Include temporal info if available
787
+ if obs.get("occurred_start"):
788
+ obs_data["occurred_start"] = obs["occurred_start"].isoformat()
789
+ if obs.get("occurred_end"):
790
+ obs_data["occurred_end"] = obs["occurred_end"].isoformat()
791
+ if obs.get("mentioned_at"):
792
+ obs_data["mentioned_at"] = obs["mentioned_at"].isoformat()
793
+
794
+ # Include source memories (up to 3 for brevity)
795
+ if obs.get("source_memories"):
796
+ obs_data["source_memories"] = [
797
+ {
798
+ "text": sm["text"],
799
+ "event_date": sm["event_date"].isoformat() if sm.get("event_date") else None,
800
+ "occurred_start": sm["occurred_start"].isoformat() if sm.get("occurred_start") else None,
801
+ }
802
+ for sm in obs["source_memories"][:3] # Limit to 3 for token efficiency
803
+ ]
804
+
805
+ obs_list.append(obs_data)
806
+
807
+ observations_text = json.dumps(obs_list, indent=2)
741
808
  else:
742
- observations_text = "None (this is a new topic - create if fact contains durable knowledge)"
809
+ observations_text = "[]"
743
810
 
744
811
  # Only include mission section if mission is set and not the default
745
812
  mission_section = ""
@@ -47,23 +47,31 @@ CONSOLIDATION_USER_PROMPT = """Analyze this new fact and consolidate into knowle
47
47
  {mission_section}
48
48
  NEW FACT: {fact_text}
49
49
 
50
- EXISTING OBSERVATIONS:
50
+ EXISTING OBSERVATIONS (JSON array with source memories and dates):
51
51
  {observations_text}
52
52
 
53
- Instructions:
54
- 1. First, extract the DURABLE KNOWLEDGE from the fact (not ephemeral state like "user is at X")
55
- 2. Then compare with existing observations:
56
- - If an observation covers the same topic: UPDATE it with the new knowledge
57
- - If no observation covers the topic: CREATE a new one
53
+ Each observation includes:
54
+ - id: unique identifier for updating
55
+ - text: the observation content
56
+ - proof_count: number of supporting memories
57
+ - tags: visibility scope (handled automatically)
58
+ - created_at/updated_at: when observation was created/modified
59
+ - occurred_start/occurred_end: temporal range of source facts
60
+ - source_memories: array of supporting facts with their text and dates
58
61
 
59
- Output JSON array of actions (ALWAYS an array, even for single action):
62
+ Instructions:
63
+ 1. Extract DURABLE KNOWLEDGE from the new fact (not ephemeral state)
64
+ 2. Review source_memories in existing observations to understand evidence
65
+ 3. Check dates to detect contradictions or updates
66
+ 4. Compare with observations:
67
+ - Same topic → UPDATE with learning_id
68
+ - New topic → CREATE new observation
69
+ - Purely ephemeral → return []
70
+
71
+ Output JSON array of actions:
60
72
  [
61
- {{"action": "update", "learning_id": "uuid", "text": "updated durable knowledge", "reason": "..."}},
73
+ {{"action": "update", "learning_id": "uuid-from-observations", "text": "updated knowledge", "reason": "..."}},
62
74
  {{"action": "create", "text": "new durable knowledge", "reason": "..."}}
63
75
  ]
64
76
 
65
- If NO consolidation is needed (fact is purely ephemeral with no durable knowledge):
66
- []
67
-
68
- If no observations exist and fact contains durable knowledge:
69
- [{{"action": "create", "text": "durable knowledge text", "reason": "new topic"}}]"""
77
+ Return [] if fact contains no durable knowledge."""
@@ -20,6 +20,7 @@ from ..config import (
20
20
  DEFAULT_RERANKER_FLASHRANK_CACHE_DIR,
21
21
  DEFAULT_RERANKER_FLASHRANK_MODEL,
22
22
  DEFAULT_RERANKER_LITELLM_MODEL,
23
+ DEFAULT_RERANKER_LOCAL_FORCE_CPU,
23
24
  DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT,
24
25
  DEFAULT_RERANKER_LOCAL_MODEL,
25
26
  DEFAULT_RERANKER_PROVIDER,
@@ -33,6 +34,7 @@ from ..config import (
33
34
  ENV_RERANKER_FLASHRANK_CACHE_DIR,
34
35
  ENV_RERANKER_FLASHRANK_MODEL,
35
36
  ENV_RERANKER_LITELLM_MODEL,
37
+ ENV_RERANKER_LOCAL_FORCE_CPU,
36
38
  ENV_RERANKER_LOCAL_MAX_CONCURRENT,
37
39
  ENV_RERANKER_LOCAL_MODEL,
38
40
  ENV_RERANKER_PROVIDER,
@@ -99,7 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
99
101
  _executor: ThreadPoolExecutor | None = None
100
102
  _max_concurrent: int = 4 # Limit concurrent CPU-bound reranking calls
101
103
 
102
- def __init__(self, model_name: str | None = None, max_concurrent: int = 4):
104
+ def __init__(self, model_name: str | None = None, max_concurrent: int = 4, force_cpu: bool = False):
103
105
  """
104
106
  Initialize local SentenceTransformers cross-encoder.
105
107
 
@@ -108,8 +110,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
108
110
  Default: cross-encoder/ms-marco-MiniLM-L-6-v2
109
111
  max_concurrent: Maximum concurrent reranking calls (default: 2).
110
112
  Higher values may cause CPU thrashing under load.
113
+ force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
114
+ Default: False
111
115
  """
112
116
  self.model_name = model_name or DEFAULT_RERANKER_LOCAL_MODEL
117
+ self.force_cpu = force_cpu
113
118
  self._model = None
114
119
  LocalSTCrossEncoder._max_concurrent = max_concurrent
115
120
 
@@ -139,13 +144,23 @@ class LocalSTCrossEncoder(CrossEncoderModel):
139
144
  # after loading, which conflicts with accelerate's device_map handling.
140
145
  import torch
141
146
 
142
- # Check for GPU (CUDA) or Apple Silicon (MPS)
143
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
144
-
145
- if has_gpu:
146
- device = None # Let sentence-transformers auto-detect GPU/MPS
147
- else:
147
+ # Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
148
+ if self.force_cpu:
148
149
  device = "cpu"
150
+ logger.info("Reranker: forcing CPU mode (HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU=1)")
151
+ else:
152
+ # Check for GPU (CUDA) or Apple Silicon (MPS)
153
+ # Wrap in try-except to gracefully handle any device detection issues
154
+ # (e.g., in CI environments or when PyTorch is built without GPU support)
155
+ device = "cpu" # Default to CPU
156
+ try:
157
+ has_gpu = torch.cuda.is_available() or (
158
+ hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
159
+ )
160
+ if has_gpu:
161
+ device = None # Let sentence-transformers auto-detect GPU/MPS
162
+ except Exception as e:
163
+ logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
149
164
 
150
165
  self._model = CrossEncoder(
151
166
  self.model_name,
@@ -211,12 +226,19 @@ class LocalSTCrossEncoder(CrossEncoderModel):
211
226
  )
212
227
 
213
228
  # Determine device based on hardware availability
214
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
215
-
216
- if has_gpu:
217
- device = None # Let sentence-transformers auto-detect GPU/MPS
218
- else:
229
+ if self.force_cpu:
219
230
  device = "cpu"
231
+ else:
232
+ # Wrap in try-except to gracefully handle any device detection issues
233
+ device = "cpu" # Default to CPU
234
+ try:
235
+ has_gpu = torch.cuda.is_available() or (
236
+ hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
237
+ )
238
+ if has_gpu:
239
+ device = None # Let sentence-transformers auto-detect GPU/MPS
240
+ except Exception as e:
241
+ logger.warning(f"Failed to detect GPU/MPS during reinit, falling back to CPU: {e}")
220
242
 
221
243
  self._model = CrossEncoder(
222
244
  self.model_name,
@@ -873,29 +895,33 @@ class LiteLLMCrossEncoder(CrossEncoderModel):
873
895
 
874
896
  def create_cross_encoder_from_env() -> CrossEncoderModel:
875
897
  """
876
- Create a CrossEncoderModel instance based on environment variables.
898
+ Create a CrossEncoderModel instance based on configuration.
877
899
 
878
- See hindsight_api.config for environment variable names and defaults.
900
+ Reads configuration via get_config() to ensure consistency across the codebase.
879
901
 
880
902
  Returns:
881
903
  Configured CrossEncoderModel instance
882
904
  """
883
- provider = os.environ.get(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER).lower()
905
+ from ..config import get_config
906
+
907
+ config = get_config()
908
+ provider = config.reranker_provider.lower()
884
909
 
885
910
  if provider == "tei":
886
- url = os.environ.get(ENV_RERANKER_TEI_URL)
911
+ url = config.reranker_tei_url
887
912
  if not url:
888
913
  raise ValueError(f"{ENV_RERANKER_TEI_URL} is required when {ENV_RERANKER_PROVIDER} is 'tei'")
889
- batch_size = int(os.environ.get(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE)))
890
- max_concurrent = int(os.environ.get(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT)))
891
- return RemoteTEICrossEncoder(base_url=url, batch_size=batch_size, max_concurrent=max_concurrent)
914
+ return RemoteTEICrossEncoder(
915
+ base_url=url,
916
+ batch_size=config.reranker_tei_batch_size,
917
+ max_concurrent=config.reranker_tei_max_concurrent,
918
+ )
892
919
  elif provider == "local":
893
- model = os.environ.get(ENV_RERANKER_LOCAL_MODEL)
894
- model_name = model or DEFAULT_RERANKER_LOCAL_MODEL
895
- max_concurrent = int(
896
- os.environ.get(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
920
+ return LocalSTCrossEncoder(
921
+ model_name=config.reranker_local_model,
922
+ max_concurrent=config.reranker_local_max_concurrent,
923
+ force_cpu=config.reranker_local_force_cpu,
897
924
  )
898
- return LocalSTCrossEncoder(model_name=model_name, max_concurrent=max_concurrent)
899
925
  elif provider == "cohere":
900
926
  api_key = os.environ.get(ENV_COHERE_API_KEY)
901
927
  if not api_key: