hindsight-api 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +30 -28
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +9 -13
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +22 -21
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +61 -79
  20. hindsight_api/engine/memory_engine.py +603 -625
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +5 -5
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +22 -23
  40. hindsight_api/engine/search/mpfp_retrieval.py +76 -92
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +87 -66
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -39
  48. hindsight_api/engine/search/tracer.py +44 -35
  49. hindsight_api/engine/search/types.py +20 -17
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +22 -23
  58. hindsight_api/server.py +3 -6
  59. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +2 -2
  60. hindsight_api-0.1.6.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.5.dist-info/RECORD +0 -63
  63. {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
@@ -6,8 +6,7 @@ from content input to fact storage.
6
6
  """
7
7
 
8
8
  from dataclasses import dataclass, field
9
- from typing import List, Optional, Dict, Any
10
- from datetime import datetime
9
+ from datetime import UTC, datetime
11
10
  from uuid import UUID
12
11
 
13
12
 
@@ -18,16 +17,18 @@ class RetainContent:
18
17
 
19
18
  Represents a single piece of content to extract facts from.
20
19
  """
20
+
21
21
  content: str
22
22
  context: str = ""
23
- event_date: Optional[datetime] = None
24
- metadata: Dict[str, str] = field(default_factory=dict)
23
+ event_date: datetime | None = None
24
+ metadata: dict[str, str] = field(default_factory=dict)
25
25
 
26
26
  def __post_init__(self):
27
27
  """Ensure event_date is set."""
28
28
  if self.event_date is None:
29
- from datetime import datetime, timezone
30
- self.event_date = datetime.now(timezone.utc)
29
+ from datetime import datetime
30
+
31
+ self.event_date = datetime.now(UTC)
31
32
 
32
33
 
33
34
  @dataclass
@@ -37,6 +38,7 @@ class ChunkMetadata:
37
38
 
38
39
  Used to track which facts were extracted from which chunks.
39
40
  """
41
+
40
42
  chunk_text: str
41
43
  fact_count: int
42
44
  content_index: int # Index of the source content
@@ -50,9 +52,10 @@ class EntityRef:
50
52
 
51
53
  Entities are extracted by the LLM during fact extraction.
52
54
  """
55
+
53
56
  name: str
54
- canonical_name: Optional[str] = None # Resolved canonical name
55
- entity_id: Optional[UUID] = None # Resolved entity ID
57
+ canonical_name: str | None = None # Resolved canonical name
58
+ entity_id: UUID | None = None # Resolved entity ID
56
59
 
57
60
 
58
61
  @dataclass
@@ -62,6 +65,7 @@ class CausalRelation:
62
65
 
63
66
  Represents how one fact causes, enables, or prevents another.
64
67
  """
68
+
65
69
  relation_type: str # "causes", "enables", "prevents", "caused_by"
66
70
  target_fact_index: int # Index of the target fact in the batch
67
71
  strength: float = 1.0 # Strength of the causal relationship
@@ -74,20 +78,21 @@ class ExtractedFact:
74
78
 
75
79
  This is the raw output from fact extraction before processing.
76
80
  """
81
+
77
82
  fact_text: str
78
83
  fact_type: str # "world", "experience", "opinion", "observation"
79
- entities: List[str] = field(default_factory=list)
80
- occurred_start: Optional[datetime] = None
81
- occurred_end: Optional[datetime] = None
82
- where: Optional[str] = None # WHERE the fact occurred or is about
83
- causal_relations: List[CausalRelation] = field(default_factory=list)
84
+ entities: list[str] = field(default_factory=list)
85
+ occurred_start: datetime | None = None
86
+ occurred_end: datetime | None = None
87
+ where: str | None = None # WHERE the fact occurred or is about
88
+ causal_relations: list[CausalRelation] = field(default_factory=list)
84
89
 
85
90
  # Context from the content item
86
91
  content_index: int = 0 # Which content this fact came from
87
92
  chunk_index: int = 0 # Which chunk this fact came from
88
93
  context: str = ""
89
- mentioned_at: Optional[datetime] = None
90
- metadata: Dict[str, str] = field(default_factory=dict)
94
+ mentioned_at: datetime | None = None
95
+ metadata: dict[str, str] = field(default_factory=dict)
91
96
 
92
97
 
93
98
  @dataclass
@@ -97,37 +102,38 @@ class ProcessedFact:
97
102
 
98
103
  Includes resolved entities, embeddings, and all necessary fields.
99
104
  """
105
+
100
106
  # Core fact data
101
107
  fact_text: str
102
108
  fact_type: str
103
- embedding: List[float]
109
+ embedding: list[float]
104
110
 
105
111
  # Temporal data
106
- occurred_start: Optional[datetime]
107
- occurred_end: Optional[datetime]
112
+ occurred_start: datetime | None
113
+ occurred_end: datetime | None
108
114
  mentioned_at: datetime
109
115
 
110
116
  # Context and metadata
111
117
  context: str
112
- metadata: Dict[str, str]
118
+ metadata: dict[str, str]
113
119
 
114
120
  # Location data
115
- where: Optional[str] = None
121
+ where: str | None = None
116
122
 
117
123
  # Entities
118
- entities: List[EntityRef] = field(default_factory=list)
124
+ entities: list[EntityRef] = field(default_factory=list)
119
125
 
120
126
  # Causal relations
121
- causal_relations: List[CausalRelation] = field(default_factory=list)
127
+ causal_relations: list[CausalRelation] = field(default_factory=list)
122
128
 
123
129
  # Chunk reference
124
- chunk_id: Optional[str] = None
130
+ chunk_id: str | None = None
125
131
 
126
132
  # Document reference (denormalized for query performance)
127
- document_id: Optional[str] = None
133
+ document_id: str | None = None
128
134
 
129
135
  # DB fields (set after insertion)
130
- unit_id: Optional[UUID] = None
136
+ unit_id: UUID | None = None
131
137
 
132
138
  @property
133
139
  def is_duplicate(self) -> bool:
@@ -136,10 +142,8 @@ class ProcessedFact:
136
142
 
137
143
  @staticmethod
138
144
  def from_extracted_fact(
139
- extracted_fact: 'ExtractedFact',
140
- embedding: List[float],
141
- chunk_id: Optional[str] = None
142
- ) -> 'ProcessedFact':
145
+ extracted_fact: "ExtractedFact", embedding: list[float], chunk_id: str | None = None
146
+ ) -> "ProcessedFact":
143
147
  """
144
148
  Create ProcessedFact from ExtractedFact.
145
149
 
@@ -151,12 +155,12 @@ class ProcessedFact:
151
155
  Returns:
152
156
  ProcessedFact ready for storage
153
157
  """
154
- from datetime import datetime, timezone
158
+ from datetime import datetime
155
159
 
156
160
  # Use occurred dates only if explicitly provided by LLM
157
161
  occurred_start = extracted_fact.occurred_start
158
162
  occurred_end = extracted_fact.occurred_end
159
- mentioned_at = extracted_fact.mentioned_at or datetime.now(timezone.utc)
163
+ mentioned_at = extracted_fact.mentioned_at or datetime.now(UTC)
160
164
 
161
165
  # Convert entity strings to EntityRef objects
162
166
  entities = [EntityRef(name=name) for name in extracted_fact.entities]
@@ -172,7 +176,7 @@ class ProcessedFact:
172
176
  metadata=extracted_fact.metadata,
173
177
  entities=entities,
174
178
  causal_relations=extracted_fact.causal_relations,
175
- chunk_id=chunk_id
179
+ chunk_id=chunk_id,
176
180
  )
177
181
 
178
182
 
@@ -183,10 +187,11 @@ class EntityLink:
183
187
 
184
188
  Used for entity-based graph connections in the memory graph.
185
189
  """
190
+
186
191
  from_unit_id: UUID
187
192
  to_unit_id: UUID
188
193
  entity_id: UUID
189
- link_type: str = 'entity'
194
+ link_type: str = "entity"
190
195
  weight: float = 1.0
191
196
 
192
197
 
@@ -197,24 +202,25 @@ class RetainBatch:
197
202
 
198
203
  Tracks all facts, chunks, and metadata for a batch operation.
199
204
  """
205
+
200
206
  bank_id: str
201
- contents: List[RetainContent]
202
- document_id: Optional[str] = None
203
- fact_type_override: Optional[str] = None
204
- confidence_score: Optional[float] = None
207
+ contents: list[RetainContent]
208
+ document_id: str | None = None
209
+ fact_type_override: str | None = None
210
+ confidence_score: float | None = None
205
211
 
206
212
  # Extracted data (populated during processing)
207
- extracted_facts: List[ExtractedFact] = field(default_factory=list)
208
- processed_facts: List[ProcessedFact] = field(default_factory=list)
209
- chunks: List[ChunkMetadata] = field(default_factory=list)
213
+ extracted_facts: list[ExtractedFact] = field(default_factory=list)
214
+ processed_facts: list[ProcessedFact] = field(default_factory=list)
215
+ chunks: list[ChunkMetadata] = field(default_factory=list)
210
216
 
211
217
  # Results (populated after storage)
212
- unit_ids_by_content: List[List[str]] = field(default_factory=list)
218
+ unit_ids_by_content: list[list[str]] = field(default_factory=list)
213
219
 
214
- def get_facts_for_content(self, content_index: int) -> List[ExtractedFact]:
220
+ def get_facts_for_content(self, content_index: int) -> list[ExtractedFact]:
215
221
  """Get all extracted facts for a specific content item."""
216
222
  return [f for f in self.extracted_facts if f.content_index == content_index]
217
223
 
218
- def get_chunks_for_content(self, content_index: int) -> List[ChunkMetadata]:
224
+ def get_chunks_for_content(self, content_index: int) -> list[ChunkMetadata]:
219
225
  """Get all chunks for a specific content item."""
220
226
  return [c for c in self.chunks if c.content_index == content_index]
@@ -7,15 +7,15 @@ Provides modular search architecture:
7
7
  - Reranking: Pluggable strategies (heuristic, cross-encoder)
8
8
  """
9
9
 
10
+ from .graph_retrieval import BFSGraphRetriever, GraphRetriever
11
+ from .mpfp_retrieval import MPFPGraphRetriever
12
+ from .reranking import CrossEncoderReranker
10
13
  from .retrieval import (
11
- retrieve_parallel,
14
+ ParallelRetrievalResult,
12
15
  get_default_graph_retriever,
16
+ retrieve_parallel,
13
17
  set_default_graph_retriever,
14
- ParallelRetrievalResult,
15
18
  )
16
- from .graph_retrieval import GraphRetriever, BFSGraphRetriever
17
- from .mpfp_retrieval import MPFPGraphRetriever
18
- from .reranking import CrossEncoderReranker
19
19
 
20
20
  __all__ = [
21
21
  "retrieve_parallel",
@@ -2,15 +2,12 @@
2
2
  Helper functions for hybrid search (semantic + BM25 + graph).
3
3
  """
4
4
 
5
- from typing import List, Dict, Any, Tuple
6
- import asyncio
7
- from .types import RetrievalResult, MergedCandidate
5
+ from typing import Any
8
6
 
7
+ from .types import MergedCandidate, RetrievalResult
9
8
 
10
- def reciprocal_rank_fusion(
11
- result_lists: List[List[RetrievalResult]],
12
- k: int = 60
13
- ) -> List[MergedCandidate]:
9
+
10
+ def reciprocal_rank_fusion(result_lists: list[list[RetrievalResult]], k: int = 60) -> list[MergedCandidate]:
14
11
  """
15
12
  Merge multiple ranked result lists using Reciprocal Rank Fusion.
16
13
 
@@ -73,20 +70,14 @@ def reciprocal_rank_fusion(
73
70
  sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True), start=1
74
71
  ):
75
72
  merged_candidate = MergedCandidate(
76
- retrieval=all_retrievals[doc_id],
77
- rrf_score=rrf_score,
78
- rrf_rank=rrf_rank,
79
- source_ranks=source_ranks[doc_id]
73
+ retrieval=all_retrievals[doc_id], rrf_score=rrf_score, rrf_rank=rrf_rank, source_ranks=source_ranks[doc_id]
80
74
  )
81
75
  merged_results.append(merged_candidate)
82
76
 
83
77
  return merged_results
84
78
 
85
79
 
86
- def normalize_scores_on_deltas(
87
- results: List[Dict[str, Any]],
88
- score_keys: List[str]
89
- ) -> List[Dict[str, Any]]:
80
+ def normalize_scores_on_deltas(results: list[dict[str, Any]], score_keys: list[str]) -> list[dict[str, Any]]:
90
81
  """
91
82
  Normalize scores based on deltas (min-max normalization within result set).
92
83
 
@@ -6,13 +6,11 @@ allowing different algorithms (BFS spreading activation, PPR, etc.) to be
6
6
  swapped without changing the rest of the recall pipeline.
7
7
  """
8
8
 
9
- from abc import ABC, abstractmethod
10
- from typing import List, Optional
11
- from datetime import datetime
12
9
  import logging
10
+ from abc import ABC, abstractmethod
13
11
 
14
- from .types import RetrievalResult
15
12
  from ..db_utils import acquire_with_retry
13
+ from .types import RetrievalResult
16
14
 
17
15
  logger = logging.getLogger(__name__)
18
16
 
@@ -40,10 +38,10 @@ class GraphRetriever(ABC):
40
38
  bank_id: str,
41
39
  fact_type: str,
42
40
  budget: int,
43
- query_text: Optional[str] = None,
44
- semantic_seeds: Optional[List[RetrievalResult]] = None,
45
- temporal_seeds: Optional[List[RetrievalResult]] = None,
46
- ) -> List[RetrievalResult]:
41
+ query_text: str | None = None,
42
+ semantic_seeds: list[RetrievalResult] | None = None,
43
+ temporal_seeds: list[RetrievalResult] | None = None,
44
+ ) -> list[RetrievalResult]:
47
45
  """
48
46
  Retrieve relevant facts via graph traversal.
49
47
 
@@ -109,10 +107,10 @@ class BFSGraphRetriever(GraphRetriever):
109
107
  bank_id: str,
110
108
  fact_type: str,
111
109
  budget: int,
112
- query_text: Optional[str] = None,
113
- semantic_seeds: Optional[List[RetrievalResult]] = None,
114
- temporal_seeds: Optional[List[RetrievalResult]] = None,
115
- ) -> List[RetrievalResult]:
110
+ query_text: str | None = None,
111
+ semantic_seeds: list[RetrievalResult] | None = None,
112
+ temporal_seeds: list[RetrievalResult] | None = None,
113
+ ) -> list[RetrievalResult]:
116
114
  """
117
115
  Retrieve facts using BFS spreading activation.
118
116
 
@@ -127,9 +125,7 @@ class BFSGraphRetriever(GraphRetriever):
127
125
  for interface compatibility but not used.
128
126
  """
129
127
  async with acquire_with_retry(pool) as conn:
130
- return await self._retrieve_with_conn(
131
- conn, query_embedding_str, bank_id, fact_type, budget
132
- )
128
+ return await self._retrieve_with_conn(conn, query_embedding_str, bank_id, fact_type, budget)
133
129
 
134
130
  async def _retrieve_with_conn(
135
131
  self,
@@ -138,7 +134,7 @@ class BFSGraphRetriever(GraphRetriever):
138
134
  bank_id: str,
139
135
  fact_type: str,
140
136
  budget: int,
141
- ) -> List[RetrievalResult]:
137
+ ) -> list[RetrievalResult]:
142
138
  """Internal implementation with connection."""
143
139
 
144
140
  # Step 1: Find entry points
@@ -155,8 +151,11 @@ class BFSGraphRetriever(GraphRetriever):
155
151
  ORDER BY embedding <=> $1::vector
156
152
  LIMIT $5
157
153
  """,
158
- query_embedding_str, bank_id, fact_type,
159
- self.entry_point_threshold, self.entry_point_limit
154
+ query_embedding_str,
155
+ bank_id,
156
+ fact_type,
157
+ self.entry_point_threshold,
158
+ self.entry_point_limit,
160
159
  )
161
160
 
162
161
  if not entry_points:
@@ -165,10 +164,7 @@ class BFSGraphRetriever(GraphRetriever):
165
164
  # Step 2: BFS spreading activation
166
165
  visited = set()
167
166
  results = []
168
- queue = [
169
- (RetrievalResult.from_db_row(dict(r)), r["similarity"])
170
- for r in entry_points
171
- ]
167
+ queue = [(RetrievalResult.from_db_row(dict(r)), r["similarity"]) for r in entry_points]
172
168
  budget_remaining = budget
173
169
 
174
170
  while queue and budget_remaining > 0:
@@ -205,7 +201,10 @@ class BFSGraphRetriever(GraphRetriever):
205
201
  ORDER BY ml.weight DESC
206
202
  LIMIT $4
207
203
  """,
208
- batch_nodes, self.min_activation, fact_type, max_neighbors
204
+ batch_nodes,
205
+ self.min_activation,
206
+ fact_type,
207
+ max_neighbors,
209
208
  )
210
209
 
211
210
  for n in neighbors: