hindsight-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +30 -28
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +9 -13
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +22 -21
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +61 -79
- hindsight_api/engine/memory_engine.py +603 -625
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +5 -5
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +22 -23
- hindsight_api/engine/search/mpfp_retrieval.py +76 -92
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +87 -66
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -39
- hindsight_api/engine/search/tracer.py +44 -35
- hindsight_api/engine/search/types.py +20 -17
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +22 -23
- hindsight_api/server.py +3 -6
- hindsight_api-0.1.7.dist-info/METADATA +178 -0
- hindsight_api-0.1.7.dist-info/RECORD +64 -0
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.5.dist-info/METADATA +0 -42
- hindsight_api-0.1.5.dist-info/RECORD +0 -63
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/WHEEL +0 -0
hindsight_api/models.py
CHANGED
|
@@ -1,49 +1,47 @@
|
|
|
1
1
|
"""
|
|
2
2
|
SQLAlchemy models for the memory system.
|
|
3
3
|
"""
|
|
4
|
+
|
|
4
5
|
from datetime import datetime
|
|
5
|
-
from
|
|
6
|
-
from uuid import UUID as PyUUID, uuid4
|
|
6
|
+
from uuid import UUID as PyUUID
|
|
7
7
|
|
|
8
|
+
from pgvector.sqlalchemy import Vector
|
|
8
9
|
from sqlalchemy import (
|
|
9
10
|
CheckConstraint,
|
|
10
|
-
Column,
|
|
11
11
|
Float,
|
|
12
12
|
ForeignKey,
|
|
13
13
|
ForeignKeyConstraint,
|
|
14
14
|
Index,
|
|
15
15
|
Integer,
|
|
16
|
-
PrimaryKeyConstraint,
|
|
17
16
|
Text,
|
|
18
17
|
func,
|
|
18
|
+
)
|
|
19
|
+
from sqlalchemy import (
|
|
19
20
|
text as sql_text,
|
|
20
21
|
)
|
|
21
22
|
from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP, UUID
|
|
22
23
|
from sqlalchemy.ext.asyncio import AsyncAttrs
|
|
23
24
|
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
24
|
-
from pgvector.sqlalchemy import Vector
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class Base(AsyncAttrs, DeclarativeBase):
|
|
28
28
|
"""Base class for all models."""
|
|
29
|
+
|
|
29
30
|
pass
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
class Document(Base):
|
|
33
34
|
"""Source documents for memory units."""
|
|
35
|
+
|
|
34
36
|
__tablename__ = "documents"
|
|
35
37
|
|
|
36
38
|
id: Mapped[str] = mapped_column(Text, primary_key=True)
|
|
37
39
|
bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
|
|
38
|
-
original_text: Mapped[
|
|
39
|
-
content_hash: Mapped[
|
|
40
|
+
original_text: Mapped[str | None] = mapped_column(Text)
|
|
41
|
+
content_hash: Mapped[str | None] = mapped_column(Text)
|
|
40
42
|
doc_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
|
|
41
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
42
|
-
|
|
43
|
-
)
|
|
44
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
45
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
46
|
-
)
|
|
43
|
+
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
44
|
+
updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
47
45
|
|
|
48
46
|
# Relationships
|
|
49
47
|
memory_units = relationship("MemoryUnit", back_populates="document", cascade="all, delete-orphan")
|
|
@@ -56,45 +54,42 @@ class Document(Base):
|
|
|
56
54
|
|
|
57
55
|
class MemoryUnit(Base):
|
|
58
56
|
"""Individual sentence-level memories."""
|
|
57
|
+
|
|
59
58
|
__tablename__ = "memory_units"
|
|
60
59
|
|
|
61
60
|
id: Mapped[PyUUID] = mapped_column(
|
|
62
61
|
UUID(as_uuid=True), primary_key=True, server_default=sql_text("gen_random_uuid()")
|
|
63
62
|
)
|
|
64
63
|
bank_id: Mapped[str] = mapped_column(Text, nullable=False)
|
|
65
|
-
document_id: Mapped[
|
|
64
|
+
document_id: Mapped[str | None] = mapped_column(Text)
|
|
66
65
|
text: Mapped[str] = mapped_column(Text, nullable=False)
|
|
67
66
|
embedding = mapped_column(Vector(384)) # pgvector type
|
|
68
|
-
context: Mapped[
|
|
69
|
-
event_date: Mapped[datetime] = mapped_column(
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
context: Mapped[str | None] = mapped_column(Text)
|
|
68
|
+
event_date: Mapped[datetime] = mapped_column(
|
|
69
|
+
TIMESTAMP(timezone=True), nullable=False
|
|
70
|
+
) # Kept for backward compatibility
|
|
71
|
+
occurred_start: Mapped[datetime | None] = mapped_column(
|
|
72
|
+
TIMESTAMP(timezone=True)
|
|
73
|
+
) # When fact occurred (range start)
|
|
74
|
+
occurred_end: Mapped[datetime | None] = mapped_column(TIMESTAMP(timezone=True)) # When fact occurred (range end)
|
|
75
|
+
mentioned_at: Mapped[datetime | None] = mapped_column(TIMESTAMP(timezone=True)) # When fact was mentioned
|
|
73
76
|
fact_type: Mapped[str] = mapped_column(Text, nullable=False, server_default="world")
|
|
74
|
-
confidence_score: Mapped[
|
|
77
|
+
confidence_score: Mapped[float | None] = mapped_column(Float)
|
|
75
78
|
access_count: Mapped[int] = mapped_column(Integer, server_default="0")
|
|
76
|
-
unit_metadata: Mapped[dict] = mapped_column(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
80
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
81
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
82
|
-
)
|
|
79
|
+
unit_metadata: Mapped[dict] = mapped_column(
|
|
80
|
+
"metadata", JSONB, server_default=sql_text("'{}'::jsonb")
|
|
81
|
+
) # User-defined metadata (str->str)
|
|
82
|
+
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
83
|
+
updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
83
84
|
|
|
84
85
|
# Relationships
|
|
85
86
|
document = relationship("Document", back_populates="memory_units")
|
|
86
87
|
unit_entities = relationship("UnitEntity", back_populates="memory_unit", cascade="all, delete-orphan")
|
|
87
88
|
outgoing_links = relationship(
|
|
88
|
-
"MemoryLink",
|
|
89
|
-
foreign_keys="MemoryLink.from_unit_id",
|
|
90
|
-
back_populates="from_unit",
|
|
91
|
-
cascade="all, delete-orphan"
|
|
89
|
+
"MemoryLink", foreign_keys="MemoryLink.from_unit_id", back_populates="from_unit", cascade="all, delete-orphan"
|
|
92
90
|
)
|
|
93
91
|
incoming_links = relationship(
|
|
94
|
-
"MemoryLink",
|
|
95
|
-
foreign_keys="MemoryLink.to_unit_id",
|
|
96
|
-
back_populates="to_unit",
|
|
97
|
-
cascade="all, delete-orphan"
|
|
92
|
+
"MemoryLink", foreign_keys="MemoryLink.to_unit_id", back_populates="to_unit", cascade="all, delete-orphan"
|
|
98
93
|
)
|
|
99
94
|
|
|
100
95
|
__table_args__ = (
|
|
@@ -110,7 +105,7 @@ class MemoryUnit(Base):
|
|
|
110
105
|
"(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
|
|
111
106
|
"(fact_type = 'observation') OR "
|
|
112
107
|
"(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
|
|
113
|
-
name="confidence_score_fact_type_check"
|
|
108
|
+
name="confidence_score_fact_type_check",
|
|
114
109
|
),
|
|
115
110
|
Index("idx_memory_units_bank_id", "bank_id"),
|
|
116
111
|
Index("idx_memory_units_document_id", "document_id"),
|
|
@@ -119,39 +114,46 @@ class MemoryUnit(Base):
|
|
|
119
114
|
Index("idx_memory_units_access_count", "access_count", postgresql_ops={"access_count": "DESC"}),
|
|
120
115
|
Index("idx_memory_units_fact_type", "fact_type"),
|
|
121
116
|
Index("idx_memory_units_bank_fact_type", "bank_id", "fact_type"),
|
|
122
|
-
Index(
|
|
117
|
+
Index(
|
|
118
|
+
"idx_memory_units_bank_type_date",
|
|
119
|
+
"bank_id",
|
|
120
|
+
"fact_type",
|
|
121
|
+
"event_date",
|
|
122
|
+
postgresql_ops={"event_date": "DESC"},
|
|
123
|
+
),
|
|
123
124
|
Index(
|
|
124
125
|
"idx_memory_units_opinion_confidence",
|
|
125
126
|
"bank_id",
|
|
126
127
|
"confidence_score",
|
|
127
128
|
postgresql_where=sql_text("fact_type = 'opinion'"),
|
|
128
|
-
postgresql_ops={"confidence_score": "DESC"}
|
|
129
|
+
postgresql_ops={"confidence_score": "DESC"},
|
|
129
130
|
),
|
|
130
131
|
Index(
|
|
131
132
|
"idx_memory_units_opinion_date",
|
|
132
133
|
"bank_id",
|
|
133
134
|
"event_date",
|
|
134
135
|
postgresql_where=sql_text("fact_type = 'opinion'"),
|
|
135
|
-
postgresql_ops={"event_date": "DESC"}
|
|
136
|
+
postgresql_ops={"event_date": "DESC"},
|
|
136
137
|
),
|
|
137
138
|
Index(
|
|
138
139
|
"idx_memory_units_observation_date",
|
|
139
140
|
"bank_id",
|
|
140
141
|
"event_date",
|
|
141
142
|
postgresql_where=sql_text("fact_type = 'observation'"),
|
|
142
|
-
postgresql_ops={"event_date": "DESC"}
|
|
143
|
+
postgresql_ops={"event_date": "DESC"},
|
|
143
144
|
),
|
|
144
145
|
Index(
|
|
145
146
|
"idx_memory_units_embedding",
|
|
146
147
|
"embedding",
|
|
147
148
|
postgresql_using="hnsw",
|
|
148
|
-
postgresql_ops={"embedding": "vector_cosine_ops"}
|
|
149
|
+
postgresql_ops={"embedding": "vector_cosine_ops"},
|
|
149
150
|
),
|
|
150
151
|
)
|
|
151
152
|
|
|
152
153
|
|
|
153
154
|
class Entity(Base):
|
|
154
155
|
"""Resolved entities (people, organizations, locations, etc.)."""
|
|
156
|
+
|
|
155
157
|
__tablename__ = "entities"
|
|
156
158
|
|
|
157
159
|
id: Mapped[PyUUID] = mapped_column(
|
|
@@ -160,12 +162,8 @@ class Entity(Base):
|
|
|
160
162
|
canonical_name: Mapped[str] = mapped_column(Text, nullable=False)
|
|
161
163
|
bank_id: Mapped[str] = mapped_column(Text, nullable=False)
|
|
162
164
|
entity_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
|
|
163
|
-
first_seen: Mapped[datetime] = mapped_column(
|
|
164
|
-
|
|
165
|
-
)
|
|
166
|
-
last_seen: Mapped[datetime] = mapped_column(
|
|
167
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
168
|
-
)
|
|
165
|
+
first_seen: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
166
|
+
last_seen: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
169
167
|
mention_count: Mapped[int] = mapped_column(Integer, server_default="1")
|
|
170
168
|
|
|
171
169
|
# Relationships
|
|
@@ -175,13 +173,13 @@ class Entity(Base):
|
|
|
175
173
|
"EntityCooccurrence",
|
|
176
174
|
foreign_keys="EntityCooccurrence.entity_id_1",
|
|
177
175
|
back_populates="entity_1",
|
|
178
|
-
cascade="all, delete-orphan"
|
|
176
|
+
cascade="all, delete-orphan",
|
|
179
177
|
)
|
|
180
178
|
cooccurrences_2 = relationship(
|
|
181
179
|
"EntityCooccurrence",
|
|
182
180
|
foreign_keys="EntityCooccurrence.entity_id_2",
|
|
183
181
|
back_populates="entity_2",
|
|
184
|
-
cascade="all, delete-orphan"
|
|
182
|
+
cascade="all, delete-orphan",
|
|
185
183
|
)
|
|
186
184
|
|
|
187
185
|
__table_args__ = (
|
|
@@ -193,6 +191,7 @@ class Entity(Base):
|
|
|
193
191
|
|
|
194
192
|
class UnitEntity(Base):
|
|
195
193
|
"""Association between memory units and entities."""
|
|
194
|
+
|
|
196
195
|
__tablename__ = "unit_entities"
|
|
197
196
|
|
|
198
197
|
unit_id: Mapped[PyUUID] = mapped_column(
|
|
@@ -214,6 +213,7 @@ class UnitEntity(Base):
|
|
|
214
213
|
|
|
215
214
|
class EntityCooccurrence(Base):
|
|
216
215
|
"""Materialized cache of entity co-occurrences."""
|
|
216
|
+
|
|
217
217
|
__tablename__ = "entity_cooccurrences"
|
|
218
218
|
|
|
219
219
|
entity_id_1: Mapped[PyUUID] = mapped_column(
|
|
@@ -223,9 +223,7 @@ class EntityCooccurrence(Base):
|
|
|
223
223
|
UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
|
|
224
224
|
)
|
|
225
225
|
cooccurrence_count: Mapped[int] = mapped_column(Integer, server_default="1")
|
|
226
|
-
last_cooccurred: Mapped[datetime] = mapped_column(
|
|
227
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
228
|
-
)
|
|
226
|
+
last_cooccurred: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
229
227
|
|
|
230
228
|
# Relationships
|
|
231
229
|
entity_1 = relationship("Entity", foreign_keys=[entity_id_1], back_populates="cooccurrences_1")
|
|
@@ -241,6 +239,7 @@ class EntityCooccurrence(Base):
|
|
|
241
239
|
|
|
242
240
|
class MemoryLink(Base):
|
|
243
241
|
"""Links between memory units (temporal, semantic, entity)."""
|
|
242
|
+
|
|
244
243
|
__tablename__ = "memory_links"
|
|
245
244
|
|
|
246
245
|
from_unit_id: Mapped[PyUUID] = mapped_column(
|
|
@@ -250,13 +249,11 @@ class MemoryLink(Base):
|
|
|
250
249
|
UUID(as_uuid=True), ForeignKey("memory_units.id", ondelete="CASCADE"), primary_key=True
|
|
251
250
|
)
|
|
252
251
|
link_type: Mapped[str] = mapped_column(Text, primary_key=True)
|
|
253
|
-
entity_id: Mapped[
|
|
252
|
+
entity_id: Mapped[PyUUID | None] = mapped_column(
|
|
254
253
|
UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
|
|
255
254
|
)
|
|
256
255
|
weight: Mapped[float] = mapped_column(Float, nullable=False, server_default="1.0")
|
|
257
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
258
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
259
|
-
)
|
|
256
|
+
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
260
257
|
|
|
261
258
|
# Relationships
|
|
262
259
|
from_unit = relationship("MemoryUnit", foreign_keys=[from_unit_id], back_populates="outgoing_links")
|
|
@@ -266,7 +263,7 @@ class MemoryLink(Base):
|
|
|
266
263
|
__table_args__ = (
|
|
267
264
|
CheckConstraint(
|
|
268
265
|
"link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
|
|
269
|
-
name="memory_links_link_type_check"
|
|
266
|
+
name="memory_links_link_type_check",
|
|
270
267
|
),
|
|
271
268
|
CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
|
|
272
269
|
Index("idx_memory_links_from", "from_unit_id"),
|
|
@@ -278,31 +275,22 @@ class MemoryLink(Base):
|
|
|
278
275
|
"from_unit_id",
|
|
279
276
|
"weight",
|
|
280
277
|
postgresql_where=sql_text("weight >= 0.1"),
|
|
281
|
-
postgresql_ops={"weight": "DESC"}
|
|
278
|
+
postgresql_ops={"weight": "DESC"},
|
|
282
279
|
),
|
|
283
280
|
)
|
|
284
281
|
|
|
285
282
|
|
|
286
283
|
class Bank(Base):
|
|
287
284
|
"""Memory bank profiles with disposition traits and background."""
|
|
285
|
+
|
|
288
286
|
__tablename__ = "banks"
|
|
289
287
|
|
|
290
288
|
bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
|
|
291
289
|
disposition: Mapped[dict] = mapped_column(
|
|
292
|
-
JSONB,
|
|
293
|
-
nullable=False,
|
|
294
|
-
server_default=sql_text(
|
|
295
|
-
'\'{"skepticism": 3, "literalism": 3, "empathy": 3}\'::jsonb'
|
|
296
|
-
)
|
|
290
|
+
JSONB, nullable=False, server_default=sql_text('\'{"skepticism": 3, "literalism": 3, "empathy": 3}\'::jsonb')
|
|
297
291
|
)
|
|
298
292
|
background: Mapped[str] = mapped_column(Text, nullable=False, server_default="")
|
|
299
|
-
created_at: Mapped[datetime] = mapped_column(
|
|
300
|
-
|
|
301
|
-
)
|
|
302
|
-
updated_at: Mapped[datetime] = mapped_column(
|
|
303
|
-
TIMESTAMP(timezone=True), server_default=func.now()
|
|
304
|
-
)
|
|
293
|
+
created_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
294
|
+
updated_at: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), server_default=func.now())
|
|
305
295
|
|
|
306
|
-
__table_args__ = (
|
|
307
|
-
Index("idx_banks_bank_id", "bank_id"),
|
|
308
|
-
)
|
|
296
|
+
__table_args__ = (Index("idx_banks_bank_id", "bank_id"),)
|
hindsight_api/pg0.py
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from pg0 import Pg0
|
|
6
5
|
|
|
7
6
|
logger = logging.getLogger(__name__)
|
|
8
7
|
|
|
9
|
-
DEFAULT_PORT = 5555
|
|
10
8
|
DEFAULT_USERNAME = "hindsight"
|
|
11
9
|
DEFAULT_PASSWORD = "hindsight"
|
|
12
10
|
DEFAULT_DATABASE = "hindsight"
|
|
@@ -17,34 +15,38 @@ class EmbeddedPostgres:
|
|
|
17
15
|
|
|
18
16
|
def __init__(
|
|
19
17
|
self,
|
|
20
|
-
port: int =
|
|
18
|
+
port: int | None = None,
|
|
21
19
|
username: str = DEFAULT_USERNAME,
|
|
22
20
|
password: str = DEFAULT_PASSWORD,
|
|
23
21
|
database: str = DEFAULT_DATABASE,
|
|
24
22
|
name: str = "hindsight",
|
|
25
23
|
**kwargs,
|
|
26
24
|
):
|
|
27
|
-
self.port = port
|
|
25
|
+
self.port = port # None means pg0 will auto-assign
|
|
28
26
|
self.username = username
|
|
29
27
|
self.password = password
|
|
30
28
|
self.database = database
|
|
31
29
|
self.name = name
|
|
32
|
-
self._pg0:
|
|
30
|
+
self._pg0: Pg0 | None = None
|
|
33
31
|
|
|
34
32
|
def _get_pg0(self) -> Pg0:
|
|
35
33
|
if self._pg0 is None:
|
|
36
|
-
|
|
37
|
-
name
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
34
|
+
kwargs = {
|
|
35
|
+
"name": self.name,
|
|
36
|
+
"username": self.username,
|
|
37
|
+
"password": self.password,
|
|
38
|
+
"database": self.database,
|
|
39
|
+
}
|
|
40
|
+
# Only set port if explicitly specified
|
|
41
|
+
if self.port is not None:
|
|
42
|
+
kwargs["port"] = self.port
|
|
43
|
+
self._pg0 = Pg0(**kwargs)
|
|
43
44
|
return self._pg0
|
|
44
45
|
|
|
45
|
-
async def start(self, max_retries: int =
|
|
46
|
+
async def start(self, max_retries: int = 5, retry_delay: float = 4.0) -> str:
|
|
46
47
|
"""Start the PostgreSQL server with retry logic."""
|
|
47
|
-
|
|
48
|
+
port_info = f"port={self.port}" if self.port else "port=auto"
|
|
49
|
+
logger.info(f"Starting embedded PostgreSQL (name={self.name}, {port_info})...")
|
|
48
50
|
|
|
49
51
|
pg0 = self._get_pg0()
|
|
50
52
|
last_error = None
|
|
@@ -53,9 +55,9 @@ class EmbeddedPostgres:
|
|
|
53
55
|
try:
|
|
54
56
|
loop = asyncio.get_event_loop()
|
|
55
57
|
info = await loop.run_in_executor(None, pg0.start)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
58
|
+
# Get URI from pg0 (includes auto-assigned port)
|
|
59
|
+
uri = info.uri
|
|
60
|
+
logger.info(f"PostgreSQL started: {uri}")
|
|
59
61
|
return uri
|
|
60
62
|
except Exception as e:
|
|
61
63
|
last_error = str(e)
|
|
@@ -68,8 +70,7 @@ class EmbeddedPostgres:
|
|
|
68
70
|
logger.debug(f"pg0 start attempt {attempt}/{max_retries} failed: {last_error}")
|
|
69
71
|
|
|
70
72
|
raise RuntimeError(
|
|
71
|
-
f"Failed to start embedded PostgreSQL after {max_retries} attempts. "
|
|
72
|
-
f"Last error: {last_error}"
|
|
73
|
+
f"Failed to start embedded PostgreSQL after {max_retries} attempts. Last error: {last_error}"
|
|
73
74
|
)
|
|
74
75
|
|
|
75
76
|
async def stop(self) -> None:
|
|
@@ -91,9 +92,7 @@ class EmbeddedPostgres:
|
|
|
91
92
|
pg0 = self._get_pg0()
|
|
92
93
|
loop = asyncio.get_event_loop()
|
|
93
94
|
info = await loop.run_in_executor(None, pg0.info)
|
|
94
|
-
|
|
95
|
-
uri = info.uri if info and info.uri else f"postgresql://{self.username}:{self.password}@localhost:{self.port}/{self.database}"
|
|
96
|
-
return uri
|
|
95
|
+
return info.uri
|
|
97
96
|
|
|
98
97
|
async def is_running(self) -> bool:
|
|
99
98
|
"""Check if the PostgreSQL server is currently running."""
|
|
@@ -112,7 +111,7 @@ class EmbeddedPostgres:
|
|
|
112
111
|
return await self.start()
|
|
113
112
|
|
|
114
113
|
|
|
115
|
-
_default_instance:
|
|
114
|
+
_default_instance: EmbeddedPostgres | None = None
|
|
116
115
|
|
|
117
116
|
|
|
118
117
|
def get_embedded_postgres() -> EmbeddedPostgres:
|
hindsight_api/server.py
CHANGED
|
@@ -6,6 +6,7 @@ This module provides the ASGI app for uvicorn import string usage:
|
|
|
6
6
|
|
|
7
7
|
For CLI usage, use the hindsight-api command instead.
|
|
8
8
|
"""
|
|
9
|
+
|
|
9
10
|
import os
|
|
10
11
|
import warnings
|
|
11
12
|
|
|
@@ -29,15 +30,11 @@ config.configure_logging()
|
|
|
29
30
|
_memory = MemoryEngine()
|
|
30
31
|
|
|
31
32
|
# Create unified app with both HTTP and optionally MCP
|
|
32
|
-
app = create_app(
|
|
33
|
-
memory=_memory,
|
|
34
|
-
http_api_enabled=True,
|
|
35
|
-
mcp_api_enabled=config.mcp_enabled,
|
|
36
|
-
mcp_mount_path="/mcp"
|
|
37
|
-
)
|
|
33
|
+
app = create_app(memory=_memory, http_api_enabled=True, mcp_api_enabled=config.mcp_enabled, mcp_mount_path="/mcp")
|
|
38
34
|
|
|
39
35
|
|
|
40
36
|
if __name__ == "__main__":
|
|
41
37
|
# When run directly, delegate to the CLI
|
|
42
38
|
from hindsight_api.main import main
|
|
39
|
+
|
|
43
40
|
main()
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hindsight-api
|
|
3
|
+
Version: 0.1.7
|
|
4
|
+
Summary: Hindsight: Agent Memory That Works Like Human Memory
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: alembic>=1.17.1
|
|
7
|
+
Requires-Dist: asyncpg>=0.29.0
|
|
8
|
+
Requires-Dist: dateparser>=1.2.2
|
|
9
|
+
Requires-Dist: fastapi[standard]>=0.120.3
|
|
10
|
+
Requires-Dist: fastmcp>=2.3.0
|
|
11
|
+
Requires-Dist: google-genai>=1.0.0
|
|
12
|
+
Requires-Dist: greenlet>=3.2.4
|
|
13
|
+
Requires-Dist: httpx>=0.27.0
|
|
14
|
+
Requires-Dist: langchain-text-splitters>=0.3.0
|
|
15
|
+
Requires-Dist: openai>=1.0.0
|
|
16
|
+
Requires-Dist: opentelemetry-api>=1.20.0
|
|
17
|
+
Requires-Dist: opentelemetry-exporter-prometheus>=0.41b0
|
|
18
|
+
Requires-Dist: opentelemetry-instrumentation-fastapi>=0.41b0
|
|
19
|
+
Requires-Dist: opentelemetry-sdk>=1.20.0
|
|
20
|
+
Requires-Dist: pg0-embedded>=0.11.0
|
|
21
|
+
Requires-Dist: pgvector>=0.4.1
|
|
22
|
+
Requires-Dist: psycopg2-binary>=2.9.11
|
|
23
|
+
Requires-Dist: pydantic>=2.0.0
|
|
24
|
+
Requires-Dist: python-dateutil>=2.8.0
|
|
25
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
26
|
+
Requires-Dist: rich>=13.0.0
|
|
27
|
+
Requires-Dist: sentence-transformers<3.3.0,>=3.0.0
|
|
28
|
+
Requires-Dist: sqlalchemy>=2.0.44
|
|
29
|
+
Requires-Dist: tiktoken>=0.12.0
|
|
30
|
+
Requires-Dist: torch>=2.0.0
|
|
31
|
+
Requires-Dist: transformers<4.46.0,>=4.30.0
|
|
32
|
+
Requires-Dist: uvicorn>=0.38.0
|
|
33
|
+
Requires-Dist: wsproto>=1.0.0
|
|
34
|
+
Provides-Extra: test
|
|
35
|
+
Requires-Dist: filelock>=3.0.0; extra == 'test'
|
|
36
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'test'
|
|
37
|
+
Requires-Dist: pytest-timeout>=2.4.0; extra == 'test'
|
|
38
|
+
Requires-Dist: pytest-xdist>=3.0.0; extra == 'test'
|
|
39
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
40
|
+
Description-Content-Type: text/markdown
|
|
41
|
+
|
|
42
|
+
# Hindsight API
|
|
43
|
+
|
|
44
|
+
**Memory System for AI Agents** — Temporal + Semantic + Entity Memory Architecture using PostgreSQL with pgvector.
|
|
45
|
+
|
|
46
|
+
Hindsight gives AI agents persistent memory that works like human memory: it stores facts, tracks entities and relationships, handles temporal reasoning ("what happened last spring?"), and forms opinions based on configurable disposition traits.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install hindsight-api
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
### Run the Server
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Set your LLM provider
|
|
60
|
+
export HINDSIGHT_API_LLM_PROVIDER=openai
|
|
61
|
+
export HINDSIGHT_API_LLM_API_KEY=sk-xxxxxxxxxxxx
|
|
62
|
+
|
|
63
|
+
# Start the server (uses embedded PostgreSQL by default)
|
|
64
|
+
hindsight-api
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
The server starts at http://localhost:8888 with:
|
|
68
|
+
- REST API for memory operations
|
|
69
|
+
- MCP server at `/mcp` for tool-use integration
|
|
70
|
+
|
|
71
|
+
### Use the Python API
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
from hindsight_api import MemoryEngine
|
|
75
|
+
|
|
76
|
+
# Create and initialize the memory engine
|
|
77
|
+
memory = MemoryEngine()
|
|
78
|
+
await memory.initialize()
|
|
79
|
+
|
|
80
|
+
# Create a memory bank for your agent
|
|
81
|
+
bank = await memory.create_memory_bank(
|
|
82
|
+
name="my-assistant",
|
|
83
|
+
background="A helpful coding assistant"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Store a memory
|
|
87
|
+
await memory.retain(
|
|
88
|
+
memory_bank_id=bank.id,
|
|
89
|
+
content="The user prefers Python for data science projects"
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Recall memories
|
|
93
|
+
results = await memory.recall(
|
|
94
|
+
memory_bank_id=bank.id,
|
|
95
|
+
query="What programming language does the user prefer?"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Reflect with reasoning
|
|
99
|
+
response = await memory.reflect(
|
|
100
|
+
memory_bank_id=bank.id,
|
|
101
|
+
query="Should I recommend Python or R for this ML project?"
|
|
102
|
+
)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## CLI Options
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
hindsight-api --help
|
|
109
|
+
|
|
110
|
+
# Common options
|
|
111
|
+
hindsight-api --port 9000 # Custom port (default: 8888)
|
|
112
|
+
hindsight-api --host 127.0.0.1 # Bind to localhost only
|
|
113
|
+
hindsight-api --workers 4 # Multiple worker processes
|
|
114
|
+
hindsight-api --log-level debug # Verbose logging
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Configuration
|
|
118
|
+
|
|
119
|
+
Configure via environment variables:
|
|
120
|
+
|
|
121
|
+
| Variable | Description | Default |
|
|
122
|
+
|----------|-------------|---------|
|
|
123
|
+
| `HINDSIGHT_API_DATABASE_URL` | PostgreSQL connection string | `pg0` (embedded) |
|
|
124
|
+
| `HINDSIGHT_API_LLM_PROVIDER` | `openai`, `groq`, `gemini`, `ollama` | `openai` |
|
|
125
|
+
| `HINDSIGHT_API_LLM_API_KEY` | API key for LLM provider | - |
|
|
126
|
+
| `HINDSIGHT_API_LLM_MODEL` | Model name | `gpt-4o-mini` |
|
|
127
|
+
| `HINDSIGHT_API_HOST` | Server bind address | `0.0.0.0` |
|
|
128
|
+
| `HINDSIGHT_API_PORT` | Server port | `8888` |
|
|
129
|
+
|
|
130
|
+
### Example with External PostgreSQL
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
export HINDSIGHT_API_DATABASE_URL=postgresql://user:pass@localhost:5432/hindsight
|
|
134
|
+
export HINDSIGHT_API_LLM_PROVIDER=groq
|
|
135
|
+
export HINDSIGHT_API_LLM_API_KEY=gsk_xxxxxxxxxxxx
|
|
136
|
+
|
|
137
|
+
hindsight-api
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Docker
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
docker run --rm -it -p 8888:8888 \
|
|
144
|
+
-e HINDSIGHT_API_LLM_API_KEY=$OPENAI_API_KEY \
|
|
145
|
+
-v $HOME/.hindsight-docker:/home/hindsight/.pg0 \
|
|
146
|
+
ghcr.io/vectorize-io/hindsight:latest
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## MCP Server
|
|
150
|
+
|
|
151
|
+
For local MCP integration without running the full API server:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
hindsight-local-mcp
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
This runs a stdio-based MCP server that can be used directly with MCP-compatible clients.
|
|
158
|
+
|
|
159
|
+
## Key Features
|
|
160
|
+
|
|
161
|
+
- **Multi-Strategy Retrieval (TEMPR)** — Semantic, keyword, graph, and temporal search combined with RRF fusion
|
|
162
|
+
- **Entity Graph** — Automatic entity extraction and relationship tracking
|
|
163
|
+
- **Temporal Reasoning** — Native support for time-based queries
|
|
164
|
+
- **Disposition Traits** — Configurable skepticism, literalism, and empathy influence opinion formation
|
|
165
|
+
- **Three Memory Types** — World facts, bank actions, and formed opinions with confidence scores
|
|
166
|
+
|
|
167
|
+
## Documentation
|
|
168
|
+
|
|
169
|
+
Full documentation: [https://hindsight.vectorize.io](https://hindsight.vectorize.io)
|
|
170
|
+
|
|
171
|
+
- [Installation Guide](https://hindsight.vectorize.io/developer/installation)
|
|
172
|
+
- [Configuration Reference](https://hindsight.vectorize.io/developer/configuration)
|
|
173
|
+
- [API Reference](https://hindsight.vectorize.io/api-reference)
|
|
174
|
+
- [Python SDK](https://hindsight.vectorize.io/sdks/python)
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
Apache 2.0
|