hindsight-api 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. hindsight_api/__init__.py +10 -9
  2. hindsight_api/alembic/env.py +5 -8
  3. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
  4. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
  5. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
  6. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
  7. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
  8. hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
  9. hindsight_api/api/__init__.py +10 -10
  10. hindsight_api/api/http.py +575 -593
  11. hindsight_api/api/mcp.py +31 -33
  12. hindsight_api/banner.py +13 -6
  13. hindsight_api/config.py +17 -12
  14. hindsight_api/engine/__init__.py +9 -9
  15. hindsight_api/engine/cross_encoder.py +23 -27
  16. hindsight_api/engine/db_utils.py +5 -4
  17. hindsight_api/engine/embeddings.py +22 -21
  18. hindsight_api/engine/entity_resolver.py +81 -75
  19. hindsight_api/engine/llm_wrapper.py +74 -88
  20. hindsight_api/engine/memory_engine.py +663 -673
  21. hindsight_api/engine/query_analyzer.py +100 -97
  22. hindsight_api/engine/response_models.py +105 -106
  23. hindsight_api/engine/retain/__init__.py +9 -16
  24. hindsight_api/engine/retain/bank_utils.py +34 -58
  25. hindsight_api/engine/retain/chunk_storage.py +4 -12
  26. hindsight_api/engine/retain/deduplication.py +9 -28
  27. hindsight_api/engine/retain/embedding_processing.py +4 -11
  28. hindsight_api/engine/retain/embedding_utils.py +3 -4
  29. hindsight_api/engine/retain/entity_processing.py +7 -17
  30. hindsight_api/engine/retain/fact_extraction.py +155 -165
  31. hindsight_api/engine/retain/fact_storage.py +11 -23
  32. hindsight_api/engine/retain/link_creation.py +11 -39
  33. hindsight_api/engine/retain/link_utils.py +166 -95
  34. hindsight_api/engine/retain/observation_regeneration.py +39 -52
  35. hindsight_api/engine/retain/orchestrator.py +72 -62
  36. hindsight_api/engine/retain/types.py +49 -43
  37. hindsight_api/engine/search/__init__.py +15 -1
  38. hindsight_api/engine/search/fusion.py +6 -15
  39. hindsight_api/engine/search/graph_retrieval.py +234 -0
  40. hindsight_api/engine/search/mpfp_retrieval.py +438 -0
  41. hindsight_api/engine/search/observation_utils.py +9 -16
  42. hindsight_api/engine/search/reranking.py +4 -7
  43. hindsight_api/engine/search/retrieval.py +388 -193
  44. hindsight_api/engine/search/scoring.py +5 -7
  45. hindsight_api/engine/search/temporal_extraction.py +8 -11
  46. hindsight_api/engine/search/think_utils.py +115 -39
  47. hindsight_api/engine/search/trace.py +68 -38
  48. hindsight_api/engine/search/tracer.py +49 -35
  49. hindsight_api/engine/search/types.py +22 -16
  50. hindsight_api/engine/task_backend.py +21 -26
  51. hindsight_api/engine/utils.py +25 -10
  52. hindsight_api/main.py +21 -40
  53. hindsight_api/mcp_local.py +190 -0
  54. hindsight_api/metrics.py +44 -30
  55. hindsight_api/migrations.py +10 -8
  56. hindsight_api/models.py +60 -72
  57. hindsight_api/pg0.py +64 -337
  58. hindsight_api/server.py +3 -6
  59. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/METADATA +6 -5
  60. hindsight_api-0.1.6.dist-info/RECORD +64 -0
  61. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/entry_points.txt +1 -0
  62. hindsight_api-0.1.4.dist-info/RECORD +0 -61
  63. {hindsight_api-0.1.4.dist-info → hindsight_api-0.1.6.dist-info}/WHEEL +0 -0
hindsight_api/models.py CHANGED
@@ -1,49 +1,47 @@
1
1
  """
2
2
  SQLAlchemy models for the memory system.
3
3
  """
4
+
4
5
  from datetime import datetime
5
- from typing import Optional
6
- from uuid import UUID as PyUUID, uuid4
6
+ from uuid import UUID as PyUUID
7
7
 
8
+ from pgvector.sqlalchemy import Vector
8
9
  from sqlalchemy import (
9
10
  CheckConstraint,
10
- Column,
11
11
  Float,
12
12
  ForeignKey,
13
13
  ForeignKeyConstraint,
14
14
  Index,
15
15
  Integer,
16
- PrimaryKeyConstraint,
17
16
  Text,
18
17
  func,
18
+ )
19
+ from sqlalchemy import (
19
20
  text as sql_text,
20
21
  )
21
22
  from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP, UUID
22
23
  from sqlalchemy.ext.asyncio import AsyncAttrs
23
24
  from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
24
- from pgvector.sqlalchemy import Vector
25
25
 
26
26
 
27
27
  class Base(AsyncAttrs, DeclarativeBase):
28
28
  """Base class for all models."""
29
+
29
30
  pass
30
31
 
31
32
 
32
33
  class Document(Base):
33
34
  """Source documents for memory units."""
35
+
34
36
  __tablename__ = "documents"
35
37
 
36
38
  id: Mapped[str] = mapped_column(Text, primary_key=True)
37
39
  bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
38
- original_text: Mapped[Optional[str]] = mapped_column(Text)
39
- content_hash: Mapped[Optional[str]] = mapped_column(Text)
40
+ original_text: Mapped[str | None] = mapped_column(Text)
41
+ content_hash: Mapped[str | None] = mapped_column(Text)
40
42
  doc_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
41
- created_at: Mapped[datetime] = mapped_column(
42
- TIMESTAMP(timezone=True), server_default=func.now()
43
- )
44
- updated_at: Mapped[datetime] = mapped_column(
45
- TIMESTAMP(timezone=True), server_default=func.now()
46
- )
43
+ created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
44
+ updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
47
45
 
48
46
  # Relationships
49
47
  memory_units = relationship("MemoryUnit", back_populates="document", cascade="all, delete-orphan")
@@ -56,45 +54,42 @@ class Document(Base):
56
54
 
57
55
  class MemoryUnit(Base):
58
56
  """Individual sentence-level memories."""
57
+
59
58
  __tablename__ = "memory_units"
60
59
 
61
60
  id: Mapped[PyUUID] = mapped_column(
62
61
  UUID(as_uuid=True), primary_key=True, server_default=sql_text("gen_random_uuid()")
63
62
  )
64
63
  bank_id: Mapped[str] = mapped_column(Text, nullable=False)
65
- document_id: Mapped[Optional[str]] = mapped_column(Text)
64
+ document_id: Mapped[str | None] = mapped_column(Text)
66
65
  text: Mapped[str] = mapped_column(Text, nullable=False)
67
66
  embedding = mapped_column(Vector(384)) # pgvector type
68
- context: Mapped[Optional[str]] = mapped_column(Text)
69
- event_date: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False) # Kept for backward compatibility
70
- occurred_start: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True)) # When fact occurred (range start)
71
- occurred_end: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True)) # When fact occurred (range end)
72
- mentioned_at: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True)) # When fact was mentioned
67
+ context: Mapped[str | None] = mapped_column(Text)
68
+ event_date: Mapped[datetime] = mapped_column(
69
+ TIMESTAMP(timezone=True), nullable=False
70
+ ) # Kept for backward compatibility
71
+ occurred_start: Mapped[datetime | None] = mapped_column(
72
+ TIMESTAMP(timezone=True)
73
+ ) # When fact occurred (range start)
74
+ occurred_end: Mapped[datetime | None] = mapped_column(TIMESTAMP(timezone=True)) # When fact occurred (range end)
75
+ mentioned_at: Mapped[datetime | None] = mapped_column(TIMESTAMP(timezone=True)) # When fact was mentioned
73
76
  fact_type: Mapped[str] = mapped_column(Text, nullable=False, server_default="world")
74
- confidence_score: Mapped[Optional[float]] = mapped_column(Float)
77
+ confidence_score: Mapped[float | None] = mapped_column(Float)
75
78
  access_count: Mapped[int] = mapped_column(Integer, server_default="0")
76
- unit_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb")) # User-defined metadata (str->str)
77
- created_at: Mapped[datetime] = mapped_column(
78
- TIMESTAMP(timezone=True), server_default=func.now()
79
- )
80
- updated_at: Mapped[datetime] = mapped_column(
81
- TIMESTAMP(timezone=True), server_default=func.now()
82
- )
79
+ unit_metadata: Mapped[dict] = mapped_column(
80
+ "metadata", JSONB, server_default=sql_text("'{}'::jsonb")
81
+ ) # User-defined metadata (str->str)
82
+ created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
83
+ updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
83
84
 
84
85
  # Relationships
85
86
  document = relationship("Document", back_populates="memory_units")
86
87
  unit_entities = relationship("UnitEntity", back_populates="memory_unit", cascade="all, delete-orphan")
87
88
  outgoing_links = relationship(
88
- "MemoryLink",
89
- foreign_keys="MemoryLink.from_unit_id",
90
- back_populates="from_unit",
91
- cascade="all, delete-orphan"
89
+ "MemoryLink", foreign_keys="MemoryLink.from_unit_id", back_populates="from_unit", cascade="all, delete-orphan"
92
90
  )
93
91
  incoming_links = relationship(
94
- "MemoryLink",
95
- foreign_keys="MemoryLink.to_unit_id",
96
- back_populates="to_unit",
97
- cascade="all, delete-orphan"
92
+ "MemoryLink", foreign_keys="MemoryLink.to_unit_id", back_populates="to_unit", cascade="all, delete-orphan"
98
93
  )
99
94
 
100
95
  __table_args__ = (
@@ -110,7 +105,7 @@ class MemoryUnit(Base):
110
105
  "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
111
106
  "(fact_type = 'observation') OR "
112
107
  "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
113
- name="confidence_score_fact_type_check"
108
+ name="confidence_score_fact_type_check",
114
109
  ),
115
110
  Index("idx_memory_units_bank_id", "bank_id"),
116
111
  Index("idx_memory_units_document_id", "document_id"),
@@ -119,39 +114,46 @@ class MemoryUnit(Base):
119
114
  Index("idx_memory_units_access_count", "access_count", postgresql_ops={"access_count": "DESC"}),
120
115
  Index("idx_memory_units_fact_type", "fact_type"),
121
116
  Index("idx_memory_units_bank_fact_type", "bank_id", "fact_type"),
122
- Index("idx_memory_units_bank_type_date", "bank_id", "fact_type", "event_date", postgresql_ops={"event_date": "DESC"}),
117
+ Index(
118
+ "idx_memory_units_bank_type_date",
119
+ "bank_id",
120
+ "fact_type",
121
+ "event_date",
122
+ postgresql_ops={"event_date": "DESC"},
123
+ ),
123
124
  Index(
124
125
  "idx_memory_units_opinion_confidence",
125
126
  "bank_id",
126
127
  "confidence_score",
127
128
  postgresql_where=sql_text("fact_type = 'opinion'"),
128
- postgresql_ops={"confidence_score": "DESC"}
129
+ postgresql_ops={"confidence_score": "DESC"},
129
130
  ),
130
131
  Index(
131
132
  "idx_memory_units_opinion_date",
132
133
  "bank_id",
133
134
  "event_date",
134
135
  postgresql_where=sql_text("fact_type = 'opinion'"),
135
- postgresql_ops={"event_date": "DESC"}
136
+ postgresql_ops={"event_date": "DESC"},
136
137
  ),
137
138
  Index(
138
139
  "idx_memory_units_observation_date",
139
140
  "bank_id",
140
141
  "event_date",
141
142
  postgresql_where=sql_text("fact_type = 'observation'"),
142
- postgresql_ops={"event_date": "DESC"}
143
+ postgresql_ops={"event_date": "DESC"},
143
144
  ),
144
145
  Index(
145
146
  "idx_memory_units_embedding",
146
147
  "embedding",
147
148
  postgresql_using="hnsw",
148
- postgresql_ops={"embedding": "vector_cosine_ops"}
149
+ postgresql_ops={"embedding": "vector_cosine_ops"},
149
150
  ),
150
151
  )
151
152
 
152
153
 
153
154
  class Entity(Base):
154
155
  """Resolved entities (people, organizations, locations, etc.)."""
156
+
155
157
  __tablename__ = "entities"
156
158
 
157
159
  id: Mapped[PyUUID] = mapped_column(
@@ -160,12 +162,8 @@ class Entity(Base):
160
162
  canonical_name: Mapped[str] = mapped_column(Text, nullable=False)
161
163
  bank_id: Mapped[str] = mapped_column(Text, nullable=False)
162
164
  entity_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
163
- first_seen: Mapped[datetime] = mapped_column(
164
- TIMESTAMP(timezone=True), server_default=func.now()
165
- )
166
- last_seen: Mapped[datetime] = mapped_column(
167
- TIMESTAMP(timezone=True), server_default=func.now()
168
- )
165
+ first_seen: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
166
+ last_seen: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
169
167
  mention_count: Mapped[int] = mapped_column(Integer, server_default="1")
170
168
 
171
169
  # Relationships
@@ -175,13 +173,13 @@ class Entity(Base):
175
173
  "EntityCooccurrence",
176
174
  foreign_keys="EntityCooccurrence.entity_id_1",
177
175
  back_populates="entity_1",
178
- cascade="all, delete-orphan"
176
+ cascade="all, delete-orphan",
179
177
  )
180
178
  cooccurrences_2 = relationship(
181
179
  "EntityCooccurrence",
182
180
  foreign_keys="EntityCooccurrence.entity_id_2",
183
181
  back_populates="entity_2",
184
- cascade="all, delete-orphan"
182
+ cascade="all, delete-orphan",
185
183
  )
186
184
 
187
185
  __table_args__ = (
@@ -193,6 +191,7 @@ class Entity(Base):
193
191
 
194
192
  class UnitEntity(Base):
195
193
  """Association between memory units and entities."""
194
+
196
195
  __tablename__ = "unit_entities"
197
196
 
198
197
  unit_id: Mapped[PyUUID] = mapped_column(
@@ -214,6 +213,7 @@ class UnitEntity(Base):
214
213
 
215
214
  class EntityCooccurrence(Base):
216
215
  """Materialized cache of entity co-occurrences."""
216
+
217
217
  __tablename__ = "entity_cooccurrences"
218
218
 
219
219
  entity_id_1: Mapped[PyUUID] = mapped_column(
@@ -223,9 +223,7 @@ class EntityCooccurrence(Base):
223
223
  UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
224
224
  )
225
225
  cooccurrence_count: Mapped[int] = mapped_column(Integer, server_default="1")
226
- last_cooccurred: Mapped[datetime] = mapped_column(
227
- TIMESTAMP(timezone=True), server_default=func.now()
228
- )
226
+ last_cooccurred: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
229
227
 
230
228
  # Relationships
231
229
  entity_1 = relationship("Entity", foreign_keys=[entity_id_1], back_populates="cooccurrences_1")
@@ -241,6 +239,7 @@ class EntityCooccurrence(Base):
241
239
 
242
240
  class MemoryLink(Base):
243
241
  """Links between memory units (temporal, semantic, entity)."""
242
+
244
243
  __tablename__ = "memory_links"
245
244
 
246
245
  from_unit_id: Mapped[PyUUID] = mapped_column(
@@ -250,13 +249,11 @@ class MemoryLink(Base):
250
249
  UUID(as_uuid=True), ForeignKey("memory_units.id", ondelete="CASCADE"), primary_key=True
251
250
  )
252
251
  link_type: Mapped[str] = mapped_column(Text, primary_key=True)
253
- entity_id: Mapped[Optional[PyUUID]] = mapped_column(
252
+ entity_id: Mapped[PyUUID | None] = mapped_column(
254
253
  UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
255
254
  )
256
255
  weight: Mapped[float] = mapped_column(Float, nullable=False, server_default="1.0")
257
- created_at: Mapped[datetime] = mapped_column(
258
- TIMESTAMP(timezone=True), server_default=func.now()
259
- )
256
+ created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
260
257
 
261
258
  # Relationships
262
259
  from_unit = relationship("MemoryUnit", foreign_keys=[from_unit_id], back_populates="outgoing_links")
@@ -266,7 +263,7 @@ class MemoryLink(Base):
266
263
  __table_args__ = (
267
264
  CheckConstraint(
268
265
  "link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
269
- name="memory_links_link_type_check"
266
+ name="memory_links_link_type_check",
270
267
  ),
271
268
  CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
272
269
  Index("idx_memory_links_from", "from_unit_id"),
@@ -278,31 +275,22 @@ class MemoryLink(Base):
278
275
  "from_unit_id",
279
276
  "weight",
280
277
  postgresql_where=sql_text("weight >= 0.1"),
281
- postgresql_ops={"weight": "DESC"}
278
+ postgresql_ops={"weight": "DESC"},
282
279
  ),
283
280
  )
284
281
 
285
282
 
286
283
  class Bank(Base):
287
284
  """Memory bank profiles with disposition traits and background."""
285
+
288
286
  __tablename__ = "banks"
289
287
 
290
288
  bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
291
289
  disposition: Mapped[dict] = mapped_column(
292
- JSONB,
293
- nullable=False,
294
- server_default=sql_text(
295
- '\'{"skepticism": 3, "literalism": 3, "empathy": 3}\'::jsonb'
296
- )
290
+ JSONB, nullable=False, server_default=sql_text('\'{"skepticism": 3, "literalism": 3, "empathy": 3}\'::jsonb')
297
291
  )
298
292
  background: Mapped[str] = mapped_column(Text, nullable=False, server_default="")
299
- created_at: Mapped[datetime] = mapped_column(
300
- TIMESTAMP(timezone=True), server_default=func.now()
301
- )
302
- updated_at: Mapped[datetime] = mapped_column(
303
- TIMESTAMP(timezone=True), server_default=func.now()
304
- )
293
+ created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
294
+ updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
305
295
 
306
- __table_args__ = (
307
- Index("idx_banks_bank_id", "bank_id"),
308
- )
296
+ __table_args__ = (Index("idx_banks_bank_id", "bank_id"),)