hindsight-api 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@ from ..config import (
20
20
  DEFAULT_RERANKER_FLASHRANK_CACHE_DIR,
21
21
  DEFAULT_RERANKER_FLASHRANK_MODEL,
22
22
  DEFAULT_RERANKER_LITELLM_MODEL,
23
+ DEFAULT_RERANKER_LOCAL_FORCE_CPU,
23
24
  DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT,
24
25
  DEFAULT_RERANKER_LOCAL_MODEL,
25
26
  DEFAULT_RERANKER_PROVIDER,
@@ -33,6 +34,7 @@ from ..config import (
33
34
  ENV_RERANKER_FLASHRANK_CACHE_DIR,
34
35
  ENV_RERANKER_FLASHRANK_MODEL,
35
36
  ENV_RERANKER_LITELLM_MODEL,
37
+ ENV_RERANKER_LOCAL_FORCE_CPU,
36
38
  ENV_RERANKER_LOCAL_MAX_CONCURRENT,
37
39
  ENV_RERANKER_LOCAL_MODEL,
38
40
  ENV_RERANKER_PROVIDER,
@@ -99,7 +101,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
99
101
  _executor: ThreadPoolExecutor | None = None
100
102
  _max_concurrent: int = 4 # Limit concurrent CPU-bound reranking calls
101
103
 
102
- def __init__(self, model_name: str | None = None, max_concurrent: int = 4):
104
+ def __init__(self, model_name: str | None = None, max_concurrent: int = 4, force_cpu: bool = False):
103
105
  """
104
106
  Initialize local SentenceTransformers cross-encoder.
105
107
 
@@ -108,8 +110,11 @@ class LocalSTCrossEncoder(CrossEncoderModel):
108
110
  Default: cross-encoder/ms-marco-MiniLM-L-6-v2
109
111
  max_concurrent: Maximum concurrent reranking calls (default: 2).
110
112
  Higher values may cause CPU thrashing under load.
113
+ force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
114
+ Default: False
111
115
  """
112
116
  self.model_name = model_name or DEFAULT_RERANKER_LOCAL_MODEL
117
+ self.force_cpu = force_cpu
113
118
  self._model = None
114
119
  LocalSTCrossEncoder._max_concurrent = max_concurrent
115
120
 
@@ -139,13 +144,23 @@ class LocalSTCrossEncoder(CrossEncoderModel):
139
144
  # after loading, which conflicts with accelerate's device_map handling.
140
145
  import torch
141
146
 
142
- # Check for GPU (CUDA) or Apple Silicon (MPS)
143
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
144
-
145
- if has_gpu:
146
- device = None # Let sentence-transformers auto-detect GPU/MPS
147
- else:
147
+ # Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
148
+ if self.force_cpu:
148
149
  device = "cpu"
150
+ logger.info("Reranker: forcing CPU mode (HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU=1)")
151
+ else:
152
+ # Check for GPU (CUDA) or Apple Silicon (MPS)
153
+ # Wrap in try-except to gracefully handle any device detection issues
154
+ # (e.g., in CI environments or when PyTorch is built without GPU support)
155
+ device = "cpu" # Default to CPU
156
+ try:
157
+ has_gpu = torch.cuda.is_available() or (
158
+ hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
159
+ )
160
+ if has_gpu:
161
+ device = None # Let sentence-transformers auto-detect GPU/MPS
162
+ except Exception as e:
163
+ logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
149
164
 
150
165
  self._model = CrossEncoder(
151
166
  self.model_name,
@@ -163,101 +178,16 @@ class LocalSTCrossEncoder(CrossEncoderModel):
163
178
  else:
164
179
  logger.info("Reranker: local provider initialized (using existing executor)")
165
180
 
166
- def _is_xpc_error(self, error: Exception) -> bool:
167
- """
168
- Check if an error is an XPC connection error (macOS daemon issue).
169
-
170
- On macOS, long-running daemons can lose XPC connections to system services
171
- when the process is idle for extended periods.
172
- """
173
- error_str = str(error).lower()
174
- return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
175
-
176
- def _reinitialize_model_sync(self) -> None:
177
- """
178
- Clear and reinitialize the cross-encoder model synchronously.
179
-
180
- This is used to recover from XPC errors on macOS where the
181
- PyTorch/MPS backend loses its connection to system services.
182
- """
183
- logger.warning(f"Reinitializing reranker model {self.model_name} due to backend error")
184
-
185
- # Clear existing model
186
- self._model = None
187
-
188
- # Force garbage collection to free resources
189
- import gc
190
-
191
- import torch
192
-
193
- gc.collect()
194
-
195
- # If using CUDA/MPS, clear the cache
196
- if torch.cuda.is_available():
197
- torch.cuda.empty_cache()
198
- elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
199
- try:
200
- torch.mps.empty_cache()
201
- except AttributeError:
202
- pass # Method might not exist in all PyTorch versions
203
-
204
- # Reinitialize the model
205
- try:
206
- from sentence_transformers import CrossEncoder
207
- except ImportError:
208
- raise ImportError(
209
- "sentence-transformers is required for LocalSTCrossEncoder. "
210
- "Install it with: pip install sentence-transformers"
211
- )
212
-
213
- # Determine device based on hardware availability
214
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
215
-
216
- if has_gpu:
217
- device = None # Let sentence-transformers auto-detect GPU/MPS
218
- else:
219
- device = "cpu"
220
-
221
- self._model = CrossEncoder(
222
- self.model_name,
223
- device=device,
224
- model_kwargs={"low_cpu_mem_usage": False},
225
- )
226
-
227
- logger.info("Reranker: local provider reinitialized successfully")
228
-
229
- def _predict_with_recovery(self, pairs: list[tuple[str, str]]) -> list[float]:
230
- """
231
- Predict with automatic recovery from XPC errors.
232
-
233
- This runs synchronously in the thread pool.
234
- """
235
- max_retries = 1
236
- for attempt in range(max_retries + 1):
237
- try:
238
- scores = self._model.predict(pairs, show_progress_bar=False)
239
- return scores.tolist() if hasattr(scores, "tolist") else list(scores)
240
- except Exception as e:
241
- # Check if this is an XPC error (macOS daemon issue)
242
- if self._is_xpc_error(e) and attempt < max_retries:
243
- logger.warning(f"XPC error detected in reranker (attempt {attempt + 1}): {e}")
244
- try:
245
- self._reinitialize_model_sync()
246
- logger.info("Reranker reinitialized successfully, retrying prediction")
247
- continue
248
- except Exception as reinit_error:
249
- logger.error(f"Failed to reinitialize reranker: {reinit_error}")
250
- raise Exception(f"Failed to recover from XPC error: {str(e)}")
251
- else:
252
- # Not an XPC error or out of retries
253
- raise
181
+ def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
182
+ """Synchronous prediction wrapper for thread pool execution."""
183
+ scores = self._model.predict(pairs, show_progress_bar=False)
184
+ return scores.tolist() if hasattr(scores, "tolist") else list(scores)
254
185
 
255
186
  async def predict(self, pairs: list[tuple[str, str]]) -> list[float]:
256
187
  """
257
188
  Score query-document pairs for relevance.
258
189
 
259
190
  Uses a dedicated thread pool with limited workers to prevent CPU thrashing.
260
- Automatically recovers from XPC errors on macOS by reinitializing the model.
261
191
 
262
192
  Args:
263
193
  pairs: List of (query, document) tuples to score
@@ -272,7 +202,7 @@ class LocalSTCrossEncoder(CrossEncoderModel):
272
202
  loop = asyncio.get_event_loop()
273
203
  return await loop.run_in_executor(
274
204
  LocalSTCrossEncoder._executor,
275
- self._predict_with_recovery,
205
+ self._predict_sync,
276
206
  pairs,
277
207
  )
278
208
 
@@ -873,29 +803,33 @@ class LiteLLMCrossEncoder(CrossEncoderModel):
873
803
 
874
804
  def create_cross_encoder_from_env() -> CrossEncoderModel:
875
805
  """
876
- Create a CrossEncoderModel instance based on environment variables.
806
+ Create a CrossEncoderModel instance based on configuration.
877
807
 
878
- See hindsight_api.config for environment variable names and defaults.
808
+ Reads configuration via get_config() to ensure consistency across the codebase.
879
809
 
880
810
  Returns:
881
811
  Configured CrossEncoderModel instance
882
812
  """
883
- provider = os.environ.get(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER).lower()
813
+ from ..config import get_config
814
+
815
+ config = get_config()
816
+ provider = config.reranker_provider.lower()
884
817
 
885
818
  if provider == "tei":
886
- url = os.environ.get(ENV_RERANKER_TEI_URL)
819
+ url = config.reranker_tei_url
887
820
  if not url:
888
821
  raise ValueError(f"{ENV_RERANKER_TEI_URL} is required when {ENV_RERANKER_PROVIDER} is 'tei'")
889
- batch_size = int(os.environ.get(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE)))
890
- max_concurrent = int(os.environ.get(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT)))
891
- return RemoteTEICrossEncoder(base_url=url, batch_size=batch_size, max_concurrent=max_concurrent)
822
+ return RemoteTEICrossEncoder(
823
+ base_url=url,
824
+ batch_size=config.reranker_tei_batch_size,
825
+ max_concurrent=config.reranker_tei_max_concurrent,
826
+ )
892
827
  elif provider == "local":
893
- model = os.environ.get(ENV_RERANKER_LOCAL_MODEL)
894
- model_name = model or DEFAULT_RERANKER_LOCAL_MODEL
895
- max_concurrent = int(
896
- os.environ.get(ENV_RERANKER_LOCAL_MAX_CONCURRENT, str(DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT))
828
+ return LocalSTCrossEncoder(
829
+ model_name=config.reranker_local_model,
830
+ max_concurrent=config.reranker_local_max_concurrent,
831
+ force_cpu=config.reranker_local_force_cpu,
897
832
  )
898
- return LocalSTCrossEncoder(model_name=model_name, max_concurrent=max_concurrent)
899
833
  elif provider == "cohere":
900
834
  api_key = os.environ.get(ENV_COHERE_API_KEY)
901
835
  if not api_key:
@@ -18,6 +18,7 @@ import httpx
18
18
  from ..config import (
19
19
  DEFAULT_EMBEDDINGS_COHERE_MODEL,
20
20
  DEFAULT_EMBEDDINGS_LITELLM_MODEL,
21
+ DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU,
21
22
  DEFAULT_EMBEDDINGS_LOCAL_MODEL,
22
23
  DEFAULT_EMBEDDINGS_OPENAI_MODEL,
23
24
  DEFAULT_EMBEDDINGS_PROVIDER,
@@ -26,6 +27,7 @@ from ..config import (
26
27
  ENV_EMBEDDINGS_COHERE_BASE_URL,
27
28
  ENV_EMBEDDINGS_COHERE_MODEL,
28
29
  ENV_EMBEDDINGS_LITELLM_MODEL,
30
+ ENV_EMBEDDINGS_LOCAL_FORCE_CPU,
29
31
  ENV_EMBEDDINGS_LOCAL_MODEL,
30
32
  ENV_EMBEDDINGS_OPENAI_API_KEY,
31
33
  ENV_EMBEDDINGS_OPENAI_BASE_URL,
@@ -92,15 +94,18 @@ class LocalSTEmbeddings(Embeddings):
92
94
  The embedding dimension is auto-detected from the model.
93
95
  """
94
96
 
95
- def __init__(self, model_name: str | None = None):
97
+ def __init__(self, model_name: str | None = None, force_cpu: bool = False):
96
98
  """
97
99
  Initialize local SentenceTransformers embeddings.
98
100
 
99
101
  Args:
100
102
  model_name: Name of the SentenceTransformer model to use.
101
103
  Default: BAAI/bge-small-en-v1.5
104
+ force_cpu: Force CPU mode (avoids MPS/XPC issues on macOS in daemon mode).
105
+ Default: False
102
106
  """
103
107
  self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
108
+ self.force_cpu = force_cpu
104
109
  self._model = None
105
110
  self._dimension: int | None = None
106
111
 
@@ -134,13 +139,23 @@ class LocalSTEmbeddings(Embeddings):
134
139
  # which can cause issues when accelerate is installed but no GPU is available.
135
140
  import torch
136
141
 
137
- # Check for GPU (CUDA) or Apple Silicon (MPS)
138
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
139
-
140
- if has_gpu:
141
- device = None # Let sentence-transformers auto-detect GPU/MPS
142
- else:
142
+ # Force CPU mode if configured (used in daemon mode to avoid MPS/XPC issues on macOS)
143
+ if self.force_cpu:
143
144
  device = "cpu"
145
+ logger.info("Embeddings: forcing CPU mode")
146
+ else:
147
+ # Check for GPU (CUDA) or Apple Silicon (MPS)
148
+ # Wrap in try-except to gracefully handle any device detection issues
149
+ # (e.g., in CI environments or when PyTorch is built without GPU support)
150
+ device = "cpu" # Default to CPU
151
+ try:
152
+ has_gpu = torch.cuda.is_available() or (
153
+ hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
154
+ )
155
+ if has_gpu:
156
+ device = None # Let sentence-transformers auto-detect GPU/MPS
157
+ except Exception as e:
158
+ logger.warning(f"Failed to detect GPU/MPS, falling back to CPU: {e}")
144
159
 
145
160
  self._model = SentenceTransformer(
146
161
  self.model_name,
@@ -151,75 +166,10 @@ class LocalSTEmbeddings(Embeddings):
151
166
  self._dimension = self._model.get_sentence_embedding_dimension()
152
167
  logger.info(f"Embeddings: local provider initialized (dim: {self._dimension})")
153
168
 
154
- def _is_xpc_error(self, error: Exception) -> bool:
155
- """
156
- Check if an error is an XPC connection error (macOS daemon issue).
157
-
158
- On macOS, long-running daemons can lose XPC connections to system services
159
- when the process is idle for extended periods.
160
- """
161
- error_str = str(error).lower()
162
- return "xpc_error_connection_invalid" in error_str or "xpc error" in error_str
163
-
164
- def _reinitialize_model_sync(self) -> None:
165
- """
166
- Clear and reinitialize the embedding model synchronously.
167
-
168
- This is used to recover from XPC errors on macOS where the
169
- PyTorch/MPS backend loses its connection to system services.
170
- """
171
- logger.warning(f"Reinitializing embedding model {self.model_name} due to backend error")
172
-
173
- # Clear existing model
174
- self._model = None
175
-
176
- # Force garbage collection to free resources
177
- import gc
178
-
179
- import torch
180
-
181
- gc.collect()
182
-
183
- # If using CUDA/MPS, clear the cache
184
- if torch.cuda.is_available():
185
- torch.cuda.empty_cache()
186
- elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
187
- try:
188
- torch.mps.empty_cache()
189
- except AttributeError:
190
- pass # Method might not exist in all PyTorch versions
191
-
192
- # Reinitialize the model (inline version of initialize() but synchronous)
193
- try:
194
- from sentence_transformers import SentenceTransformer
195
- except ImportError:
196
- raise ImportError(
197
- "sentence-transformers is required for LocalSTEmbeddings. "
198
- "Install it with: pip install sentence-transformers"
199
- )
200
-
201
- # Determine device based on hardware availability
202
- has_gpu = torch.cuda.is_available() or (hasattr(torch.backends, "mps") and torch.backends.mps.is_available())
203
-
204
- if has_gpu:
205
- device = None # Let sentence-transformers auto-detect GPU/MPS
206
- else:
207
- device = "cpu"
208
-
209
- self._model = SentenceTransformer(
210
- self.model_name,
211
- device=device,
212
- model_kwargs={"low_cpu_mem_usage": False},
213
- )
214
-
215
- logger.info("Embeddings: local provider reinitialized successfully")
216
-
217
169
  def encode(self, texts: list[str]) -> list[list[float]]:
218
170
  """
219
171
  Generate embeddings for a list of texts.
220
172
 
221
- Automatically recovers from XPC errors on macOS by reinitializing the model.
222
-
223
173
  Args:
224
174
  texts: List of text strings to encode
225
175
 
@@ -229,26 +179,8 @@ class LocalSTEmbeddings(Embeddings):
229
179
  if self._model is None:
230
180
  raise RuntimeError("Embeddings not initialized. Call initialize() first.")
231
181
 
232
- # Try encoding with automatic recovery from XPC errors
233
- max_retries = 1
234
- for attempt in range(max_retries + 1):
235
- try:
236
- embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
237
- return [emb.tolist() for emb in embeddings]
238
- except Exception as e:
239
- # Check if this is an XPC error (macOS daemon issue)
240
- if self._is_xpc_error(e) and attempt < max_retries:
241
- logger.warning(f"XPC error detected in embedding generation (attempt {attempt + 1}): {e}")
242
- try:
243
- self._reinitialize_model_sync()
244
- logger.info("Model reinitialized successfully, retrying embedding generation")
245
- continue
246
- except Exception as reinit_error:
247
- logger.error(f"Failed to reinitialize model: {reinit_error}")
248
- raise Exception(f"Failed to recover from XPC error: {str(e)}")
249
- else:
250
- # Not an XPC error or out of retries
251
- raise
182
+ embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
183
+ return [emb.tolist() for emb in embeddings]
252
184
 
253
185
 
254
186
  class RemoteTEIEmbeddings(Embeddings):
@@ -770,24 +702,28 @@ class LiteLLMEmbeddings(Embeddings):
770
702
 
771
703
  def create_embeddings_from_env() -> Embeddings:
772
704
  """
773
- Create an Embeddings instance based on environment variables.
705
+ Create an Embeddings instance based on configuration.
774
706
 
775
- See hindsight_api.config for environment variable names and defaults.
707
+ Reads configuration via get_config() to ensure consistency across the codebase.
776
708
 
777
709
  Returns:
778
710
  Configured Embeddings instance
779
711
  """
780
- provider = os.environ.get(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER).lower()
712
+ from ..config import get_config
713
+
714
+ config = get_config()
715
+ provider = config.embeddings_provider.lower()
781
716
 
782
717
  if provider == "tei":
783
- url = os.environ.get(ENV_EMBEDDINGS_TEI_URL)
718
+ url = config.embeddings_tei_url
784
719
  if not url:
785
720
  raise ValueError(f"{ENV_EMBEDDINGS_TEI_URL} is required when {ENV_EMBEDDINGS_PROVIDER} is 'tei'")
786
721
  return RemoteTEIEmbeddings(base_url=url)
787
722
  elif provider == "local":
788
- model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
789
- model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
790
- return LocalSTEmbeddings(model_name=model_name)
723
+ return LocalSTEmbeddings(
724
+ model_name=config.embeddings_local_model,
725
+ force_cpu=config.embeddings_local_force_cpu,
726
+ )
791
727
  elif provider == "openai":
792
728
  # Use dedicated embeddings API key, or fall back to LLM API key
793
729
  api_key = os.environ.get(ENV_EMBEDDINGS_OPENAI_API_KEY) or os.environ.get(ENV_LLM_API_KEY)
@@ -23,12 +23,17 @@ from ..metrics import get_metrics_collector
23
23
  from .db_budget import budgeted_operation
24
24
 
25
25
  # Context variable for current schema (async-safe, per-task isolation)
26
- _current_schema: contextvars.ContextVar[str] = contextvars.ContextVar("current_schema", default="public")
26
+ # Note: default is None, actual default comes from config via get_current_schema()
27
+ _current_schema: contextvars.ContextVar[str | None] = contextvars.ContextVar("current_schema", default=None)
27
28
 
28
29
 
29
30
  def get_current_schema() -> str:
30
- """Get the current schema from context (default: 'public')."""
31
- return _current_schema.get()
31
+ """Get the current schema from context (falls back to config default)."""
32
+ schema = _current_schema.get()
33
+ if schema is None:
34
+ # Fall back to configured default schema
35
+ return get_config().database_schema
36
+ return schema
32
37
 
33
38
 
34
39
  def fq_table(table_name: str) -> str:
@@ -881,11 +886,12 @@ class MemoryEngine(MemoryEngineInterface):
881
886
  if not self.db_url:
882
887
  raise ValueError("Database URL is required for migrations")
883
888
  logger.info("Running database migrations...")
884
- run_migrations(self.db_url)
889
+ # Use configured database schema for migrations (defaults to "public")
890
+ run_migrations(self.db_url, schema=get_config().database_schema)
885
891
 
886
892
  # Ensure embedding column dimension matches the model's dimension
887
893
  # This is done after migrations and after embeddings.initialize()
888
- ensure_embedding_dimension(self.db_url, self.embeddings.dimension)
894
+ ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
889
895
 
890
896
  logger.info(f"Connecting to PostgreSQL at {self.db_url}")
891
897
 
@@ -69,7 +69,7 @@ async def tool_search_mental_models(
69
69
  next_param += 1
70
70
 
71
71
  if exclude_ids:
72
- filters += f" AND id != ALL(${next_param}::uuid[])"
72
+ filters += f" AND id != ALL(${next_param}::text[])"
73
73
  params.append(exclude_ids)
74
74
  next_param += 1
75
75
 
@@ -782,12 +782,28 @@ Text:
782
782
  usage = TokenUsage() # Track cumulative usage across retries
783
783
  for attempt in range(max_retries):
784
784
  try:
785
+ # Use retain-specific overrides if set, otherwise fall back to global LLM config
786
+ max_retries = (
787
+ config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
788
+ )
789
+ initial_backoff = (
790
+ config.retain_llm_initial_backoff
791
+ if config.retain_llm_initial_backoff is not None
792
+ else config.llm_initial_backoff
793
+ )
794
+ max_backoff = (
795
+ config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
796
+ )
797
+
785
798
  extraction_response_json, call_usage = await llm_config.call(
786
799
  messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
787
800
  response_format=response_schema,
788
801
  scope="memory_extract_facts",
789
802
  temperature=0.1,
790
803
  max_completion_tokens=config.retain_max_completion_tokens,
804
+ max_retries=max_retries,
805
+ initial_backoff=initial_backoff,
806
+ max_backoff=max_backoff,
791
807
  skip_validation=True, # Get raw JSON, we'll validate leniently
792
808
  return_usage=True,
793
809
  )
@@ -1,5 +1,6 @@
1
1
  """Built-in tenant extension implementations."""
2
2
 
3
+ from hindsight_api.config import get_config
3
4
  from hindsight_api.extensions.tenant import AuthenticationError, Tenant, TenantContext, TenantExtension
4
5
  from hindsight_api.models import RequestContext
5
6
 
@@ -10,11 +11,13 @@ class ApiKeyTenantExtension(TenantExtension):
10
11
 
11
12
  This is a simple implementation that:
12
13
  1. Validates the API key matches HINDSIGHT_API_TENANT_API_KEY
13
- 2. Returns 'public' as the schema for all authenticated requests
14
+ 2. Returns the configured schema (HINDSIGHT_API_DATABASE_SCHEMA, default 'public')
15
+ for all authenticated requests
14
16
 
15
17
  Configuration:
16
18
  HINDSIGHT_API_TENANT_EXTENSION=hindsight_api.extensions.builtin.tenant:ApiKeyTenantExtension
17
19
  HINDSIGHT_API_TENANT_API_KEY=your-secret-key
20
+ HINDSIGHT_API_DATABASE_SCHEMA=your-schema (optional, defaults to 'public')
18
21
 
19
22
  For multi-tenant setups with separate schemas per tenant, implement a custom
20
23
  TenantExtension that looks up the schema based on the API key or token claims.
@@ -27,11 +30,11 @@ class ApiKeyTenantExtension(TenantExtension):
27
30
  raise ValueError("HINDSIGHT_API_TENANT_API_KEY is required when using ApiKeyTenantExtension")
28
31
 
29
32
  async def authenticate(self, context: RequestContext) -> TenantContext:
30
- """Validate API key and return public schema context."""
33
+ """Validate API key and return configured schema context."""
31
34
  if context.api_key != self.expected_api_key:
32
35
  raise AuthenticationError("Invalid API key")
33
- return TenantContext(schema_name="public")
36
+ return TenantContext(schema_name=get_config().database_schema)
34
37
 
35
38
  async def list_tenants(self) -> list[Tenant]:
36
- """Return public schema for single-tenant setup."""
37
- return [Tenant(schema="public")]
39
+ """Return configured schema for single-tenant setup."""
40
+ return [Tenant(schema=get_config().database_schema)]
hindsight_api/main.py CHANGED
@@ -170,31 +170,53 @@ def main():
170
170
  if args.log_level != config.log_level:
171
171
  config = HindsightConfig(
172
172
  database_url=config.database_url,
173
+ database_schema=config.database_schema,
173
174
  llm_provider=config.llm_provider,
174
175
  llm_api_key=config.llm_api_key,
175
176
  llm_model=config.llm_model,
176
177
  llm_base_url=config.llm_base_url,
177
178
  llm_max_concurrent=config.llm_max_concurrent,
179
+ llm_max_retries=config.llm_max_retries,
180
+ llm_initial_backoff=config.llm_initial_backoff,
181
+ llm_max_backoff=config.llm_max_backoff,
178
182
  llm_timeout=config.llm_timeout,
179
183
  retain_llm_provider=config.retain_llm_provider,
180
184
  retain_llm_api_key=config.retain_llm_api_key,
181
185
  retain_llm_model=config.retain_llm_model,
182
186
  retain_llm_base_url=config.retain_llm_base_url,
187
+ retain_llm_max_concurrent=config.retain_llm_max_concurrent,
188
+ retain_llm_max_retries=config.retain_llm_max_retries,
189
+ retain_llm_initial_backoff=config.retain_llm_initial_backoff,
190
+ retain_llm_max_backoff=config.retain_llm_max_backoff,
191
+ retain_llm_timeout=config.retain_llm_timeout,
183
192
  reflect_llm_provider=config.reflect_llm_provider,
184
193
  reflect_llm_api_key=config.reflect_llm_api_key,
185
194
  reflect_llm_model=config.reflect_llm_model,
186
195
  reflect_llm_base_url=config.reflect_llm_base_url,
196
+ reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
197
+ reflect_llm_max_retries=config.reflect_llm_max_retries,
198
+ reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
199
+ reflect_llm_max_backoff=config.reflect_llm_max_backoff,
200
+ reflect_llm_timeout=config.reflect_llm_timeout,
187
201
  consolidation_llm_provider=config.consolidation_llm_provider,
188
202
  consolidation_llm_api_key=config.consolidation_llm_api_key,
189
203
  consolidation_llm_model=config.consolidation_llm_model,
190
204
  consolidation_llm_base_url=config.consolidation_llm_base_url,
205
+ consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
206
+ consolidation_llm_max_retries=config.consolidation_llm_max_retries,
207
+ consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
208
+ consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
209
+ consolidation_llm_timeout=config.consolidation_llm_timeout,
191
210
  embeddings_provider=config.embeddings_provider,
192
211
  embeddings_local_model=config.embeddings_local_model,
212
+ embeddings_local_force_cpu=config.embeddings_local_force_cpu,
193
213
  embeddings_tei_url=config.embeddings_tei_url,
194
214
  embeddings_openai_base_url=config.embeddings_openai_base_url,
195
215
  embeddings_cohere_base_url=config.embeddings_cohere_base_url,
196
216
  reranker_provider=config.reranker_provider,
197
217
  reranker_local_model=config.reranker_local_model,
218
+ reranker_local_force_cpu=config.reranker_local_force_cpu,
219
+ reranker_local_max_concurrent=config.reranker_local_max_concurrent,
198
220
  reranker_tei_url=config.reranker_tei_url,
199
221
  reranker_tei_batch_size=config.reranker_tei_batch_size,
200
222
  reranker_tei_max_concurrent=config.reranker_tei_max_concurrent,
@@ -217,6 +239,7 @@ def main():
217
239
  retain_observations_async=config.retain_observations_async,
218
240
  enable_observations=config.enable_observations,
219
241
  consolidation_batch_size=config.consolidation_batch_size,
242
+ consolidation_max_tokens=config.consolidation_max_tokens,
220
243
  skip_llm_verification=config.skip_llm_verification,
221
244
  lazy_reranker=config.lazy_reranker,
222
245
  run_migrations_on_startup=config.run_migrations_on_startup,
@@ -341,6 +364,7 @@ def main():
341
364
  # Start idle checker in daemon mode
342
365
  if idle_middleware is not None:
343
366
  # Start the idle checker in a background thread with its own event loop
367
+ import logging
344
368
  import threading
345
369
 
346
370
  def run_idle_checker():
@@ -351,8 +375,8 @@ def main():
351
375
  loop = asyncio.new_event_loop()
352
376
  asyncio.set_event_loop(loop)
353
377
  loop.run_until_complete(idle_middleware._check_idle())
354
- except Exception:
355
- pass
378
+ except Exception as e:
379
+ logging.error(f"Idle checker error: {e}", exc_info=True)
356
380
 
357
381
  threading.Thread(target=run_idle_checker, daemon=True).start()
358
382
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp>=3.13.3