hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hindsight_api/__init__.py +10 -2
  2. hindsight_api/alembic/README +1 -0
  3. hindsight_api/alembic/env.py +146 -0
  4. hindsight_api/alembic/script.py.mako +28 -0
  5. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
  6. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  8. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
  9. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
  10. hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
  11. hindsight_api/api/__init__.py +2 -4
  12. hindsight_api/api/http.py +112 -164
  13. hindsight_api/api/mcp.py +2 -1
  14. hindsight_api/config.py +154 -0
  15. hindsight_api/engine/__init__.py +7 -2
  16. hindsight_api/engine/cross_encoder.py +225 -16
  17. hindsight_api/engine/embeddings.py +198 -19
  18. hindsight_api/engine/entity_resolver.py +56 -29
  19. hindsight_api/engine/llm_wrapper.py +147 -106
  20. hindsight_api/engine/memory_engine.py +337 -192
  21. hindsight_api/engine/response_models.py +15 -17
  22. hindsight_api/engine/retain/bank_utils.py +25 -35
  23. hindsight_api/engine/retain/entity_processing.py +5 -5
  24. hindsight_api/engine/retain/fact_extraction.py +86 -24
  25. hindsight_api/engine/retain/fact_storage.py +1 -1
  26. hindsight_api/engine/retain/link_creation.py +12 -6
  27. hindsight_api/engine/retain/link_utils.py +50 -56
  28. hindsight_api/engine/retain/observation_regeneration.py +264 -0
  29. hindsight_api/engine/retain/orchestrator.py +31 -44
  30. hindsight_api/engine/retain/types.py +14 -0
  31. hindsight_api/engine/search/reranking.py +6 -10
  32. hindsight_api/engine/search/retrieval.py +2 -2
  33. hindsight_api/engine/search/think_utils.py +59 -30
  34. hindsight_api/engine/search/tracer.py +1 -1
  35. hindsight_api/main.py +201 -0
  36. hindsight_api/migrations.py +61 -39
  37. hindsight_api/models.py +1 -2
  38. hindsight_api/pg0.py +17 -36
  39. hindsight_api/server.py +43 -0
  40. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
  41. hindsight_api-0.1.1.dist-info/RECORD +60 -0
  42. hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
  43. hindsight_api/cli.py +0 -128
  44. hindsight_api/web/__init__.py +0 -12
  45. hindsight_api/web/server.py +0 -109
  46. hindsight_api-0.0.21.dist-info/RECORD +0 -50
  47. hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
  48. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
@@ -5,15 +5,26 @@ Provides an interface for generating embeddings with different backends.
5
5
 
6
6
  IMPORTANT: All embeddings must produce 384-dimensional vectors to match
7
7
  the database schema (pgvector column defined as vector(384)).
8
+
9
+ Configuration via environment variables - see hindsight_api.config for all env var names.
8
10
  """
9
11
  from abc import ABC, abstractmethod
10
- from typing import List
12
+ from typing import List, Optional
11
13
  import logging
14
+ import os
12
15
 
13
- logger = logging.getLogger(__name__)
16
+ import httpx
14
17
 
15
- # Fixed embedding dimension required by database schema
16
- EMBEDDING_DIMENSION = 384
18
+ from ..config import (
19
+ ENV_EMBEDDINGS_PROVIDER,
20
+ ENV_EMBEDDINGS_LOCAL_MODEL,
21
+ ENV_EMBEDDINGS_TEI_URL,
22
+ DEFAULT_EMBEDDINGS_PROVIDER,
23
+ DEFAULT_EMBEDDINGS_LOCAL_MODEL,
24
+ EMBEDDING_DIMENSION,
25
+ )
26
+
27
+ logger = logging.getLogger(__name__)
17
28
 
18
29
 
19
30
  class Embeddings(ABC):
@@ -24,12 +35,18 @@ class Embeddings(ABC):
24
35
  the database schema.
25
36
  """
26
37
 
38
+ @property
39
+ @abstractmethod
40
+ def provider_name(self) -> str:
41
+ """Return a human-readable name for this provider (e.g., 'local', 'tei')."""
42
+ pass
43
+
27
44
  @abstractmethod
28
- def load(self) -> None:
45
+ async def initialize(self) -> None:
29
46
  """
30
- Load the embedding model.
47
+ Initialize the embedding model asynchronously.
31
48
 
32
- This should be called during initialization to load the model
49
+ This should be called during startup to load/connect to the model
33
50
  and avoid cold start latency on first encode() call.
34
51
  """
35
52
  pass
@@ -48,29 +65,33 @@ class Embeddings(ABC):
48
65
  pass
49
66
 
50
67
 
51
- class SentenceTransformersEmbeddings(Embeddings):
68
+ class LocalSTEmbeddings(Embeddings):
52
69
  """
53
- Embeddings implementation using SentenceTransformers.
70
+ Local embeddings implementation using SentenceTransformers.
54
71
 
55
- Call load() during initialization to load the model and avoid cold starts.
72
+ Call initialize() during startup to load the model and avoid cold starts.
56
73
 
57
74
  Default model is BAAI/bge-small-en-v1.5 which produces 384-dimensional
58
75
  embeddings matching the database schema.
59
76
  """
60
77
 
61
- def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5"):
78
+ def __init__(self, model_name: Optional[str] = None):
62
79
  """
63
- Initialize SentenceTransformers embeddings.
80
+ Initialize local SentenceTransformers embeddings.
64
81
 
65
82
  Args:
66
83
  model_name: Name of the SentenceTransformer model to use.
67
84
  Must produce 384-dimensional embeddings.
68
85
  Default: BAAI/bge-small-en-v1.5
69
86
  """
70
- self.model_name = model_name
87
+ self.model_name = model_name or DEFAULT_EMBEDDINGS_LOCAL_MODEL
71
88
  self._model = None
72
89
 
73
- def load(self) -> None:
90
+ @property
91
+ def provider_name(self) -> str:
92
+ return "local"
93
+
94
+ async def initialize(self) -> None:
74
95
  """Load the embedding model."""
75
96
  if self._model is not None:
76
97
  return
@@ -79,12 +100,17 @@ class SentenceTransformersEmbeddings(Embeddings):
79
100
  from sentence_transformers import SentenceTransformer
80
101
  except ImportError:
81
102
  raise ImportError(
82
- "sentence-transformers is required for SentenceTransformersEmbeddings. "
103
+ "sentence-transformers is required for LocalSTEmbeddings. "
83
104
  "Install it with: pip install sentence-transformers"
84
105
  )
85
106
 
86
- logger.info(f"Loading embedding model: {self.model_name}...")
87
- self._model = SentenceTransformer(self.model_name)
107
+ logger.info(f"Embeddings: initializing local provider with model {self.model_name}")
108
+ # Disable lazy loading (meta tensors) which causes issues with newer transformers/accelerate
109
+ # Setting low_cpu_mem_usage=False and device_map=None ensures tensors are fully materialized
110
+ self._model = SentenceTransformer(
111
+ self.model_name,
112
+ model_kwargs={"low_cpu_mem_usage": False, "device_map": None},
113
+ )
88
114
 
89
115
  # Validate dimension matches database schema
90
116
  model_dim = self._model.get_sentence_embedding_dimension()
@@ -95,7 +121,7 @@ class SentenceTransformersEmbeddings(Embeddings):
95
121
  f"Use a model that produces {EMBEDDING_DIMENSION}-dimensional embeddings."
96
122
  )
97
123
 
98
- logger.info(f"Model loaded (embedding dim: {model_dim})")
124
+ logger.info(f"Embeddings: local provider initialized (dim: {model_dim})")
99
125
 
100
126
  def encode(self, texts: List[str]) -> List[List[float]]:
101
127
  """
@@ -108,6 +134,159 @@ class SentenceTransformersEmbeddings(Embeddings):
108
134
  List of 384-dimensional embedding vectors
109
135
  """
110
136
  if self._model is None:
111
- self.load()
137
+ raise RuntimeError("Embeddings not initialized. Call initialize() first.")
112
138
  embeddings = self._model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
113
139
  return [emb.tolist() for emb in embeddings]
140
+
141
+
142
+ class RemoteTEIEmbeddings(Embeddings):
143
+ """
144
+ Remote embeddings implementation using HuggingFace Text Embeddings Inference (TEI) HTTP API.
145
+
146
+ TEI provides a high-performance inference server for embedding models.
147
+ See: https://github.com/huggingface/text-embeddings-inference
148
+
149
+ The server should be running a model that produces 384-dimensional embeddings.
150
+ """
151
+
152
+ def __init__(
153
+ self,
154
+ base_url: str,
155
+ timeout: float = 30.0,
156
+ batch_size: int = 32,
157
+ max_retries: int = 3,
158
+ retry_delay: float = 0.5,
159
+ ):
160
+ """
161
+ Initialize remote TEI embeddings client.
162
+
163
+ Args:
164
+ base_url: Base URL of the TEI server (e.g., "http://localhost:8080")
165
+ timeout: Request timeout in seconds (default: 30.0)
166
+ batch_size: Maximum batch size for embedding requests (default: 32)
167
+ max_retries: Maximum number of retries for failed requests (default: 3)
168
+ retry_delay: Initial delay between retries in seconds, doubles each retry (default: 0.5)
169
+ """
170
+ self.base_url = base_url.rstrip("/")
171
+ self.timeout = timeout
172
+ self.batch_size = batch_size
173
+ self.max_retries = max_retries
174
+ self.retry_delay = retry_delay
175
+ self._client: Optional[httpx.Client] = None
176
+ self._model_id: Optional[str] = None
177
+
178
+ @property
179
+ def provider_name(self) -> str:
180
+ return "tei"
181
+
182
+ def _request_with_retry(self, method: str, url: str, **kwargs) -> httpx.Response:
183
+ """Make an HTTP request with automatic retries on transient errors."""
184
+ import time
185
+ last_error = None
186
+ delay = self.retry_delay
187
+
188
+ for attempt in range(self.max_retries + 1):
189
+ try:
190
+ if method == "GET":
191
+ response = self._client.get(url, **kwargs)
192
+ else:
193
+ response = self._client.post(url, **kwargs)
194
+ response.raise_for_status()
195
+ return response
196
+ except (httpx.ConnectError, httpx.ReadTimeout, httpx.WriteTimeout) as e:
197
+ last_error = e
198
+ if attempt < self.max_retries:
199
+ logger.warning(f"TEI request failed (attempt {attempt + 1}/{self.max_retries + 1}): {e}. Retrying in {delay}s...")
200
+ time.sleep(delay)
201
+ delay *= 2 # Exponential backoff
202
+ except httpx.HTTPStatusError as e:
203
+ # Retry on 5xx server errors
204
+ if e.response.status_code >= 500 and attempt < self.max_retries:
205
+ last_error = e
206
+ logger.warning(f"TEI server error (attempt {attempt + 1}/{self.max_retries + 1}): {e}. Retrying in {delay}s...")
207
+ time.sleep(delay)
208
+ delay *= 2
209
+ else:
210
+ raise
211
+
212
+ raise last_error
213
+
214
+ async def initialize(self) -> None:
215
+ """Initialize the HTTP client and verify server connectivity."""
216
+ if self._client is not None:
217
+ return
218
+
219
+ logger.info(f"Embeddings: initializing TEI provider at {self.base_url}")
220
+ self._client = httpx.Client(timeout=self.timeout)
221
+
222
+ # Verify server is reachable and get model info
223
+ try:
224
+ response = self._request_with_retry("GET", f"{self.base_url}/info")
225
+ info = response.json()
226
+ self._model_id = info.get("model_id", "unknown")
227
+ logger.info(f"Embeddings: TEI provider initialized (model: {self._model_id})")
228
+ except httpx.HTTPError as e:
229
+ raise RuntimeError(f"Failed to connect to TEI server at {self.base_url}: {e}")
230
+
231
+ def encode(self, texts: List[str]) -> List[List[float]]:
232
+ """
233
+ Generate embeddings using the remote TEI server.
234
+
235
+ Args:
236
+ texts: List of text strings to encode
237
+
238
+ Returns:
239
+ List of embedding vectors
240
+ """
241
+ if self._client is None:
242
+ raise RuntimeError("Embeddings not initialized. Call initialize() first.")
243
+
244
+ if not texts:
245
+ return []
246
+
247
+ all_embeddings = []
248
+
249
+ # Process in batches
250
+ for i in range(0, len(texts), self.batch_size):
251
+ batch = texts[i:i + self.batch_size]
252
+
253
+ try:
254
+ response = self._request_with_retry(
255
+ "POST",
256
+ f"{self.base_url}/embed",
257
+ json={"inputs": batch},
258
+ )
259
+ batch_embeddings = response.json()
260
+ all_embeddings.extend(batch_embeddings)
261
+ except httpx.HTTPError as e:
262
+ raise RuntimeError(f"TEI embedding request failed: {e}")
263
+
264
+ return all_embeddings
265
+
266
+
267
+ def create_embeddings_from_env() -> Embeddings:
268
+ """
269
+ Create an Embeddings instance based on environment variables.
270
+
271
+ See hindsight_api.config for environment variable names and defaults.
272
+
273
+ Returns:
274
+ Configured Embeddings instance
275
+ """
276
+ provider = os.environ.get(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER).lower()
277
+
278
+ if provider == "tei":
279
+ url = os.environ.get(ENV_EMBEDDINGS_TEI_URL)
280
+ if not url:
281
+ raise ValueError(
282
+ f"{ENV_EMBEDDINGS_TEI_URL} is required when {ENV_EMBEDDINGS_PROVIDER} is 'tei'"
283
+ )
284
+ return RemoteTEIEmbeddings(base_url=url)
285
+ elif provider == "local":
286
+ model = os.environ.get(ENV_EMBEDDINGS_LOCAL_MODEL)
287
+ model_name = model or DEFAULT_EMBEDDINGS_LOCAL_MODEL
288
+ return LocalSTEmbeddings(model_name=model_name)
289
+ else:
290
+ raise ValueError(
291
+ f"Unknown embeddings provider: {provider}. Supported: 'local', 'tei'"
292
+ )
@@ -126,18 +126,20 @@ class EntityResolver:
126
126
 
127
127
  # Resolve each entity using pre-fetched candidates
128
128
  entity_ids = [None] * len(entities_data)
129
- entities_to_update = [] # (entity_id, unit_event_date)
130
- entities_to_create = [] # (idx, entity_data)
129
+ entities_to_update = [] # (entity_id, event_date)
130
+ entities_to_create = [] # (idx, entity_data, event_date)
131
131
 
132
132
  for idx, entity_data in enumerate(entities_data):
133
133
  entity_text = entity_data['text']
134
134
  nearby_entities = entity_data.get('nearby_entities', [])
135
+ # Use per-entity date if available, otherwise fall back to batch-level date
136
+ entity_event_date = entity_data.get('event_date', unit_event_date)
135
137
 
136
138
  candidates = all_candidates.get(entity_text, [])
137
139
 
138
140
  if not candidates:
139
141
  # Will create new entity
140
- entities_to_create.append((idx, entity_data))
142
+ entities_to_create.append((idx, entity_data, entity_event_date))
141
143
  continue
142
144
 
143
145
  # Score candidates
@@ -165,9 +167,9 @@ class EntityResolver:
165
167
  score += co_entity_score * 0.3
166
168
 
167
169
  # 3. Temporal proximity (0-0.2)
168
- if last_seen:
170
+ if last_seen and entity_event_date:
169
171
  # Normalize timezone awareness for comparison
170
- event_date_utc = unit_event_date if unit_event_date.tzinfo else unit_event_date.replace(tzinfo=timezone.utc)
172
+ event_date_utc = entity_event_date if entity_event_date.tzinfo else entity_event_date.replace(tzinfo=timezone.utc)
171
173
  last_seen_utc = last_seen if last_seen.tzinfo else last_seen.replace(tzinfo=timezone.utc)
172
174
  days_diff = abs((event_date_utc - last_seen_utc).total_seconds() / 86400)
173
175
  if days_diff < 7:
@@ -183,9 +185,9 @@ class EntityResolver:
183
185
 
184
186
  if best_score > threshold:
185
187
  entity_ids[idx] = best_candidate
186
- entities_to_update.append((best_candidate, unit_event_date))
188
+ entities_to_update.append((best_candidate, entity_event_date))
187
189
  else:
188
- entities_to_create.append((idx, entity_data))
190
+ entities_to_create.append((idx, entity_data, entity_event_date))
189
191
 
190
192
  # Batch update existing entities
191
193
  if entities_to_update:
@@ -199,29 +201,54 @@ class EntityResolver:
199
201
  entities_to_update
200
202
  )
201
203
 
202
- # Create new entities using INSERT ... ON CONFLICT to handle race conditions
203
- # This ensures that if two concurrent transactions try to create the same entity,
204
- # only one succeeds and the other gets the existing ID
204
+ # Batch create new entities using COPY + INSERT for maximum speed
205
+ # This handles duplicates via ON CONFLICT and returns all IDs
205
206
  if entities_to_create:
206
- for idx, entity_data in entities_to_create:
207
- # Use INSERT ... ON CONFLICT to atomically get-or-create
208
- # The unique index is on (bank_id, LOWER(canonical_name))
209
- row = await conn.fetchrow(
210
- """
211
- INSERT INTO entities (bank_id, canonical_name, first_seen, last_seen, mention_count)
212
- VALUES ($1, $2, $3, $4, 1)
213
- ON CONFLICT (bank_id, LOWER(canonical_name))
214
- DO UPDATE SET
215
- mention_count = entities.mention_count + 1,
216
- last_seen = EXCLUDED.last_seen
217
- RETURNING id
218
- """,
219
- bank_id,
220
- entity_data['text'],
221
- unit_event_date,
222
- unit_event_date
223
- )
224
- entity_ids[idx] = row['id']
207
+ # Group entities by canonical name (lowercase) to handle duplicates within batch
208
+ # For duplicates, we only insert once and reuse the ID
209
+ unique_entities = {} # lowercase_name -> (entity_data, event_date, [indices])
210
+ for idx, entity_data, event_date in entities_to_create:
211
+ name_lower = entity_data['text'].lower()
212
+ if name_lower not in unique_entities:
213
+ unique_entities[name_lower] = (entity_data, event_date, [idx])
214
+ else:
215
+ # Same entity appears multiple times - add index to list
216
+ unique_entities[name_lower][2].append(idx)
217
+
218
+ # Batch insert unique entities and get their IDs
219
+ # Use a single query with unnest for speed
220
+ entity_names = []
221
+ entity_dates = []
222
+ indices_map = [] # Maps result index -> list of original indices
223
+
224
+ for name_lower, (entity_data, event_date, indices) in unique_entities.items():
225
+ entity_names.append(entity_data['text'])
226
+ entity_dates.append(event_date)
227
+ indices_map.append(indices)
228
+
229
+ # Batch INSERT ... ON CONFLICT with RETURNING
230
+ # This is much faster than individual inserts
231
+ rows = await conn.fetch(
232
+ """
233
+ INSERT INTO entities (bank_id, canonical_name, first_seen, last_seen, mention_count)
234
+ SELECT $1, name, event_date, event_date, 1
235
+ FROM unnest($2::text[], $3::timestamptz[]) AS t(name, event_date)
236
+ ON CONFLICT (bank_id, LOWER(canonical_name))
237
+ DO UPDATE SET
238
+ mention_count = entities.mention_count + 1,
239
+ last_seen = EXCLUDED.last_seen
240
+ RETURNING id
241
+ """,
242
+ bank_id,
243
+ entity_names,
244
+ entity_dates
245
+ )
246
+
247
+ # Map returned IDs back to original indices
248
+ for result_idx, row in enumerate(rows):
249
+ entity_id = row['id']
250
+ for original_idx in indices_map[result_idx]:
251
+ entity_ids[original_idx] = entity_id
225
252
 
226
253
  return entity_ids
227
254