hindsight-api 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +30 -28
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +9 -13
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +22 -21
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +61 -79
  20. hindsight_api/engine/memory_engine.py +603 -625
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +5 -5
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +22 -23
  40. hindsight_api/engine/search/mpfp_retrieval.py +76 -92
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +87 -66
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -39
  48. hindsight_api/engine/search/tracer.py +44 -35
  49. hindsight_api/engine/search/types.py +20 -17
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +22 -23
  58. hindsight_api/server.py +3 -6
  59. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +2 -2
  60. hindsight_api-0.1.6.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.5.dist-info/RECORD +0 -63
  63. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
@@ -4,12 +4,13 @@ Entity extraction and resolution for memory system.
4
4
  Uses spaCy for entity extraction and implements resolution logic
5
5
  to disambiguate entities across memory units.
6
6
  """
7
- import asyncpg
8
- from typing import List, Dict, Optional, Set, Any
7
+
8
+ from datetime import UTC, datetime
9
9
  from difflib import SequenceMatcher
10
- from datetime import datetime, timezone
11
- from .db_utils import acquire_with_retry
12
10
 
11
+ import asyncpg
12
+
13
+ from .db_utils import acquire_with_retry
13
14
 
14
15
  # Load spaCy model (singleton)
15
16
  _nlp = None
@@ -32,11 +33,11 @@ class EntityResolver:
32
33
  async def resolve_entities_batch(
33
34
  self,
34
35
  bank_id: str,
35
- entities_data: List[Dict],
36
+ entities_data: list[dict],
36
37
  context: str,
37
38
  unit_event_date,
38
39
  conn=None,
39
- ) -> List[str]:
40
+ ) -> list[str]:
40
41
  """
41
42
  Resolve multiple entities in batch (MUCH faster than sequential).
42
43
 
@@ -62,7 +63,9 @@ class EntityResolver:
62
63
  else:
63
64
  return await self._resolve_entities_batch_impl(conn, bank_id, entities_data, context, unit_event_date)
64
65
 
65
- async def _resolve_entities_batch_impl(self, conn, bank_id: str, entities_data: List[Dict], context: str, unit_event_date) -> List[str]:
66
+ async def _resolve_entities_batch_impl(
67
+ self, conn, bank_id: str, entities_data: list[dict], context: str, unit_event_date
68
+ ) -> list[str]:
66
69
  # Query ALL candidates for this bank
67
70
  all_entities = await conn.fetch(
68
71
  """
@@ -70,11 +73,11 @@ class EntityResolver:
70
73
  FROM entities
71
74
  WHERE bank_id = $1
72
75
  """,
73
- bank_id
76
+ bank_id,
74
77
  )
75
78
 
76
79
  # Build entity ID to name mapping for co-occurrence lookups
77
- entity_id_to_name = {row['id']: row['canonical_name'].lower() for row in all_entities}
80
+ entity_id_to_name = {row["id"]: row["canonical_name"].lower() for row in all_entities}
78
81
 
79
82
  # Query ALL co-occurrences for this bank's entities in one query
80
83
  # This builds a map of entity_id -> set of co-occurring entity names
@@ -85,13 +88,13 @@ class EntityResolver:
85
88
  WHERE ec.entity_id_1 IN (SELECT id FROM entities WHERE bank_id = $1)
86
89
  OR ec.entity_id_2 IN (SELECT id FROM entities WHERE bank_id = $1)
87
90
  """,
88
- bank_id
91
+ bank_id,
89
92
  )
90
93
 
91
94
  # Build co-occurrence map: entity_id -> set of co-occurring entity names (lowercase)
92
- cooccurrence_map: Dict[str, Set[str]] = {}
95
+ cooccurrence_map: dict[str, set[str]] = {}
93
96
  for row in all_cooccurrences:
94
- eid1, eid2 = row['entity_id_1'], row['entity_id_2']
97
+ eid1, eid2 = row["entity_id_1"], row["entity_id_2"]
95
98
  # Add both directions
96
99
  if eid1 not in cooccurrence_map:
97
100
  cooccurrence_map[eid1] = set()
@@ -105,22 +108,24 @@ class EntityResolver:
105
108
 
106
109
  # Build candidate map for each entity text
107
110
  all_candidates = {} # Maps entity_text -> list of candidates
108
- entity_texts = list(set(e['text'] for e in entities_data))
111
+ entity_texts = list(set(e["text"] for e in entities_data))
109
112
 
110
113
  for entity_text in entity_texts:
111
114
  matching = []
112
115
  entity_text_lower = entity_text.lower()
113
116
  for row in all_entities:
114
- canonical_name = row['canonical_name']
115
- ent_id = row['id']
116
- metadata = row['metadata']
117
- last_seen = row['last_seen']
118
- mention_count = row['mention_count']
117
+ canonical_name = row["canonical_name"]
118
+ ent_id = row["id"]
119
+ metadata = row["metadata"]
120
+ last_seen = row["last_seen"]
121
+ mention_count = row["mention_count"]
119
122
  canonical_lower = canonical_name.lower()
120
123
  # Match if exact or substring match
121
- if (entity_text_lower == canonical_lower or
122
- entity_text_lower in canonical_lower or
123
- canonical_lower in entity_text_lower):
124
+ if (
125
+ entity_text_lower == canonical_lower
126
+ or entity_text_lower in canonical_lower
127
+ or canonical_lower in entity_text_lower
128
+ ):
124
129
  matching.append((ent_id, canonical_name, metadata, last_seen, mention_count))
125
130
  all_candidates[entity_text] = matching
126
131
 
@@ -130,10 +135,10 @@ class EntityResolver:
130
135
  entities_to_create = [] # (idx, entity_data, event_date)
131
136
 
132
137
  for idx, entity_data in enumerate(entities_data):
133
- entity_text = entity_data['text']
134
- nearby_entities = entity_data.get('nearby_entities', [])
138
+ entity_text = entity_data["text"]
139
+ nearby_entities = entity_data.get("nearby_entities", [])
135
140
  # Use per-entity date if available, otherwise fall back to batch-level date
136
- entity_event_date = entity_data.get('event_date', unit_event_date)
141
+ entity_event_date = entity_data.get("event_date", unit_event_date)
137
142
 
138
143
  candidates = all_candidates.get(entity_text, [])
139
144
 
@@ -146,17 +151,13 @@ class EntityResolver:
146
151
  best_candidate = None
147
152
  best_score = 0.0
148
153
 
149
- nearby_entity_set = {e['text'].lower() for e in nearby_entities if e['text'] != entity_text}
154
+ nearby_entity_set = {e["text"].lower() for e in nearby_entities if e["text"] != entity_text}
150
155
 
151
156
  for candidate_id, canonical_name, metadata, last_seen, mention_count in candidates:
152
157
  score = 0.0
153
158
 
154
159
  # 1. Name similarity (0-0.5)
155
- name_similarity = SequenceMatcher(
156
- None,
157
- entity_text.lower(),
158
- canonical_name.lower()
159
- ).ratio()
160
+ name_similarity = SequenceMatcher(None, entity_text.lower(), canonical_name.lower()).ratio()
160
161
  score += name_similarity * 0.5
161
162
 
162
163
  # 2. Co-occurring entities (0-0.3)
@@ -169,8 +170,10 @@ class EntityResolver:
169
170
  # 3. Temporal proximity (0-0.2)
170
171
  if last_seen and entity_event_date:
171
172
  # Normalize timezone awareness for comparison
172
- event_date_utc = entity_event_date if entity_event_date.tzinfo else entity_event_date.replace(tzinfo=timezone.utc)
173
- last_seen_utc = last_seen if last_seen.tzinfo else last_seen.replace(tzinfo=timezone.utc)
173
+ event_date_utc = (
174
+ entity_event_date if entity_event_date.tzinfo else entity_event_date.replace(tzinfo=UTC)
175
+ )
176
+ last_seen_utc = last_seen if last_seen.tzinfo else last_seen.replace(tzinfo=UTC)
174
177
  days_diff = abs((event_date_utc - last_seen_utc).total_seconds() / 86400)
175
178
  if days_diff < 7:
176
179
  temporal_score = max(0, 1.0 - (days_diff / 7))
@@ -198,7 +201,7 @@ class EntityResolver:
198
201
  last_seen = $2
199
202
  WHERE id = $1::uuid
200
203
  """,
201
- entities_to_update
204
+ entities_to_update,
202
205
  )
203
206
 
204
207
  # Batch create new entities using COPY + INSERT for maximum speed
@@ -208,7 +211,7 @@ class EntityResolver:
208
211
  # For duplicates, we only insert once and reuse the ID
209
212
  unique_entities = {} # lowercase_name -> (entity_data, event_date, [indices])
210
213
  for idx, entity_data, event_date in entities_to_create:
211
- name_lower = entity_data['text'].lower()
214
+ name_lower = entity_data["text"].lower()
212
215
  if name_lower not in unique_entities:
213
216
  unique_entities[name_lower] = (entity_data, event_date, [idx])
214
217
  else:
@@ -222,7 +225,7 @@ class EntityResolver:
222
225
  indices_map = [] # Maps result index -> list of original indices
223
226
 
224
227
  for name_lower, (entity_data, event_date, indices) in unique_entities.items():
225
- entity_names.append(entity_data['text'])
228
+ entity_names.append(entity_data["text"])
226
229
  entity_dates.append(event_date)
227
230
  indices_map.append(indices)
228
231
 
@@ -241,12 +244,12 @@ class EntityResolver:
241
244
  """,
242
245
  bank_id,
243
246
  entity_names,
244
- entity_dates
247
+ entity_dates,
245
248
  )
246
249
 
247
250
  # Map returned IDs back to original indices
248
251
  for result_idx, row in enumerate(rows):
249
- entity_id = row['id']
252
+ entity_id = row["id"]
250
253
  for original_idx in indices_map[result_idx]:
251
254
  entity_ids[original_idx] = entity_id
252
255
 
@@ -257,7 +260,7 @@ class EntityResolver:
257
260
  bank_id: str,
258
261
  entity_text: str,
259
262
  context: str,
260
- nearby_entities: List[Dict],
263
+ nearby_entities: list[dict],
261
264
  unit_event_date,
262
265
  ) -> str:
263
266
  """
@@ -287,14 +290,14 @@ class EntityResolver:
287
290
  )
288
291
  ORDER BY mention_count DESC
289
292
  """,
290
- bank_id, entity_text, f"%{entity_text}%"
293
+ bank_id,
294
+ entity_text,
295
+ f"%{entity_text}%",
291
296
  )
292
297
 
293
298
  if not candidates:
294
299
  # New entity - create it
295
- return await self._create_entity(
296
- conn, bank_id, entity_text, unit_event_date
297
- )
300
+ return await self._create_entity(conn, bank_id, entity_text, unit_event_date)
298
301
 
299
302
  # Score candidates based on:
300
303
  # 1. Name similarity
@@ -306,21 +309,17 @@ class EntityResolver:
306
309
  best_score = 0.0
307
310
  best_name_similarity = 0.0
308
311
 
309
- nearby_entity_set = {e['text'].lower() for e in nearby_entities if e['text'] != entity_text}
312
+ nearby_entity_set = {e["text"].lower() for e in nearby_entities if e["text"] != entity_text}
310
313
 
311
314
  for row in candidates:
312
- candidate_id = row['id']
313
- canonical_name = row['canonical_name']
314
- metadata = row['metadata']
315
- last_seen = row['last_seen']
315
+ candidate_id = row["id"]
316
+ canonical_name = row["canonical_name"]
317
+ metadata = row["metadata"]
318
+ last_seen = row["last_seen"]
316
319
  score = 0.0
317
320
 
318
321
  # 1. Name similarity (0-1)
319
- name_similarity = SequenceMatcher(
320
- None,
321
- entity_text.lower(),
322
- canonical_name.lower()
323
- ).ratio()
322
+ name_similarity = SequenceMatcher(None, entity_text.lower(), canonical_name.lower()).ratio()
324
323
  score += name_similarity * 0.5
325
324
 
326
325
  # 2. Co-occurring entities (0-0.5)
@@ -338,9 +337,9 @@ class EntityResolver:
338
337
  )
339
338
  WHERE ec.entity_id_1 = $1 OR ec.entity_id_2 = $1
340
339
  """,
341
- candidate_id
340
+ candidate_id,
342
341
  )
343
- co_entities = {r['canonical_name'].lower() for r in co_entity_rows}
342
+ co_entities = {r["canonical_name"].lower() for r in co_entity_rows}
344
343
 
345
344
  # Check overlap with nearby entities
346
345
  overlap = len(nearby_entity_set & co_entities)
@@ -372,14 +371,13 @@ class EntityResolver:
372
371
  last_seen = $1
373
372
  WHERE id = $2
374
373
  """,
375
- unit_event_date, best_candidate
374
+ unit_event_date,
375
+ best_candidate,
376
376
  )
377
377
  return best_candidate
378
378
  else:
379
379
  # Not confident - create new entity
380
- return await self._create_entity(
381
- conn, bank_id, entity_text, unit_event_date
382
- )
380
+ return await self._create_entity(conn, bank_id, entity_text, unit_event_date)
383
381
 
384
382
  async def _create_entity(
385
383
  self,
@@ -413,7 +411,10 @@ class EntityResolver:
413
411
  last_seen = EXCLUDED.last_seen
414
412
  RETURNING id
415
413
  """,
416
- bank_id, entity_text, event_date, event_date
414
+ bank_id,
415
+ entity_text,
416
+ event_date,
417
+ event_date,
417
418
  )
418
419
  return entity_id
419
420
 
@@ -434,7 +435,8 @@ class EntityResolver:
434
435
  VALUES ($1, $2)
435
436
  ON CONFLICT DO NOTHING
436
437
  """,
437
- unit_id, entity_id
438
+ unit_id,
439
+ entity_id,
438
440
  )
439
441
 
440
442
  # Update co-occurrence cache: find other entities in this unit
@@ -444,10 +446,11 @@ class EntityResolver:
444
446
  FROM unit_entities
445
447
  WHERE unit_id = $1 AND entity_id != $2
446
448
  """,
447
- unit_id, entity_id
449
+ unit_id,
450
+ entity_id,
448
451
  )
449
452
 
450
- other_entities = [row['entity_id'] for row in rows]
453
+ other_entities = [row["entity_id"] for row in rows]
451
454
 
452
455
  # Update co-occurrences for each pair
453
456
  for other_entity_id in other_entities:
@@ -477,10 +480,11 @@ class EntityResolver:
477
480
  cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
478
481
  last_cooccurred = NOW()
479
482
  """,
480
- entity_id_1, entity_id_2
483
+ entity_id_1,
484
+ entity_id_2,
481
485
  )
482
486
 
483
- async def link_units_to_entities_batch(self, unit_entity_pairs: List[tuple[str, str]], conn=None):
487
+ async def link_units_to_entities_batch(self, unit_entity_pairs: list[tuple[str, str]], conn=None):
484
488
  """
485
489
  Link multiple memory units to entities in batch (MUCH faster than sequential).
486
490
 
@@ -499,7 +503,7 @@ class EntityResolver:
499
503
  else:
500
504
  return await self._link_units_to_entities_batch_impl(conn, unit_entity_pairs)
501
505
 
502
- async def _link_units_to_entities_batch_impl(self, conn, unit_entity_pairs: List[tuple[str, str]]):
506
+ async def _link_units_to_entities_batch_impl(self, conn, unit_entity_pairs: list[tuple[str, str]]):
503
507
  # Batch insert all unit-entity links
504
508
  await conn.executemany(
505
509
  """
@@ -507,7 +511,7 @@ class EntityResolver:
507
511
  VALUES ($1, $2)
508
512
  ON CONFLICT DO NOTHING
509
513
  """,
510
- unit_entity_pairs
514
+ unit_entity_pairs,
511
515
  )
512
516
 
513
517
  # Build map of unit -> entities for co-occurrence calculation
@@ -524,7 +528,7 @@ class EntityResolver:
524
528
  entity_list = list(entity_ids) # Convert set to list for iteration
525
529
  # For each pair of entities in this unit, create co-occurrence
526
530
  for i, entity_id_1 in enumerate(entity_list):
527
- for entity_id_2 in entity_list[i+1:]:
531
+ for entity_id_2 in entity_list[i + 1 :]:
528
532
  # Skip if same entity (shouldn't happen with set, but be safe)
529
533
  if entity_id_1 == entity_id_2:
530
534
  continue
@@ -535,7 +539,7 @@ class EntityResolver:
535
539
 
536
540
  # Batch update co-occurrences
537
541
  if cooccurrence_pairs:
538
- now = datetime.now(timezone.utc)
542
+ now = datetime.now(UTC)
539
543
  await conn.executemany(
540
544
  """
541
545
  INSERT INTO entity_cooccurrences (entity_id_1, entity_id_2, cooccurrence_count, last_cooccurred)
@@ -545,10 +549,10 @@ class EntityResolver:
545
549
  cooccurrence_count = entity_cooccurrences.cooccurrence_count + 1,
546
550
  last_cooccurred = EXCLUDED.last_cooccurred
547
551
  """,
548
- [(e1, e2, 1, now) for e1, e2 in cooccurrence_pairs]
552
+ [(e1, e2, 1, now) for e1, e2 in cooccurrence_pairs],
549
553
  )
550
554
 
551
- async def get_units_by_entity(self, entity_id: str, limit: int = 100) -> List[str]:
555
+ async def get_units_by_entity(self, entity_id: str, limit: int = 100) -> list[str]:
552
556
  """
553
557
  Get all units that mention an entity.
554
558
 
@@ -568,15 +572,16 @@ class EntityResolver:
568
572
  ORDER BY unit_id
569
573
  LIMIT $2
570
574
  """,
571
- entity_id, limit
575
+ entity_id,
576
+ limit,
572
577
  )
573
- return [row['unit_id'] for row in rows]
578
+ return [row["unit_id"] for row in rows]
574
579
 
575
580
  async def get_entity_by_text(
576
581
  self,
577
582
  bank_id: str,
578
583
  entity_text: str,
579
- ) -> Optional[str]:
584
+ ) -> str | None:
580
585
  """
581
586
  Find an entity by text (for query resolution).
582
587
 
@@ -596,7 +601,8 @@ class EntityResolver:
596
601
  ORDER BY mention_count DESC
597
602
  LIMIT 1
598
603
  """,
599
- bank_id, entity_text
604
+ bank_id,
605
+ entity_text,
600
606
  )
601
607
 
602
- return row['id'] if row else None
608
+ return row["id"] if row else None