hindsight-api 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +38 -0
- hindsight_api/api/__init__.py +105 -0
- hindsight_api/api/http.py +1872 -0
- hindsight_api/api/mcp.py +157 -0
- hindsight_api/engine/__init__.py +47 -0
- hindsight_api/engine/cross_encoder.py +97 -0
- hindsight_api/engine/db_utils.py +93 -0
- hindsight_api/engine/embeddings.py +113 -0
- hindsight_api/engine/entity_resolver.py +575 -0
- hindsight_api/engine/llm_wrapper.py +269 -0
- hindsight_api/engine/memory_engine.py +3095 -0
- hindsight_api/engine/query_analyzer.py +519 -0
- hindsight_api/engine/response_models.py +222 -0
- hindsight_api/engine/retain/__init__.py +50 -0
- hindsight_api/engine/retain/bank_utils.py +423 -0
- hindsight_api/engine/retain/chunk_storage.py +82 -0
- hindsight_api/engine/retain/deduplication.py +104 -0
- hindsight_api/engine/retain/embedding_processing.py +62 -0
- hindsight_api/engine/retain/embedding_utils.py +54 -0
- hindsight_api/engine/retain/entity_processing.py +90 -0
- hindsight_api/engine/retain/fact_extraction.py +1027 -0
- hindsight_api/engine/retain/fact_storage.py +176 -0
- hindsight_api/engine/retain/link_creation.py +121 -0
- hindsight_api/engine/retain/link_utils.py +651 -0
- hindsight_api/engine/retain/orchestrator.py +405 -0
- hindsight_api/engine/retain/types.py +206 -0
- hindsight_api/engine/search/__init__.py +15 -0
- hindsight_api/engine/search/fusion.py +122 -0
- hindsight_api/engine/search/observation_utils.py +132 -0
- hindsight_api/engine/search/reranking.py +103 -0
- hindsight_api/engine/search/retrieval.py +503 -0
- hindsight_api/engine/search/scoring.py +161 -0
- hindsight_api/engine/search/temporal_extraction.py +64 -0
- hindsight_api/engine/search/think_utils.py +255 -0
- hindsight_api/engine/search/trace.py +215 -0
- hindsight_api/engine/search/tracer.py +447 -0
- hindsight_api/engine/search/types.py +160 -0
- hindsight_api/engine/task_backend.py +223 -0
- hindsight_api/engine/utils.py +203 -0
- hindsight_api/metrics.py +227 -0
- hindsight_api/migrations.py +163 -0
- hindsight_api/models.py +309 -0
- hindsight_api/pg0.py +425 -0
- hindsight_api/web/__init__.py +12 -0
- hindsight_api/web/server.py +143 -0
- hindsight_api-0.0.13.dist-info/METADATA +41 -0
- hindsight_api-0.0.13.dist-info/RECORD +48 -0
- hindsight_api-0.0.13.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Fact storage for retain pipeline.
|
|
3
|
+
|
|
4
|
+
Handles insertion of facts into the database.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
import json
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
from uuid import UUID
|
|
10
|
+
|
|
11
|
+
from .types import ProcessedFact
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def insert_facts_batch(
|
|
17
|
+
conn,
|
|
18
|
+
bank_id: str,
|
|
19
|
+
facts: List[ProcessedFact],
|
|
20
|
+
document_id: Optional[str] = None
|
|
21
|
+
) -> List[str]:
|
|
22
|
+
"""
|
|
23
|
+
Insert facts into the database in batch.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
conn: Database connection
|
|
27
|
+
bank_id: Bank identifier
|
|
28
|
+
facts: List of ProcessedFact objects to insert
|
|
29
|
+
document_id: Optional document ID to associate with facts
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of unit IDs (UUIDs as strings) for the inserted facts
|
|
33
|
+
"""
|
|
34
|
+
if not facts:
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
# Prepare data for batch insert
|
|
38
|
+
fact_texts = []
|
|
39
|
+
embeddings = []
|
|
40
|
+
event_dates = []
|
|
41
|
+
occurred_starts = []
|
|
42
|
+
occurred_ends = []
|
|
43
|
+
mentioned_ats = []
|
|
44
|
+
contexts = []
|
|
45
|
+
fact_types = []
|
|
46
|
+
confidence_scores = []
|
|
47
|
+
access_counts = []
|
|
48
|
+
metadata_jsons = []
|
|
49
|
+
chunk_ids = []
|
|
50
|
+
document_ids = []
|
|
51
|
+
|
|
52
|
+
for fact in facts:
|
|
53
|
+
fact_texts.append(fact.fact_text)
|
|
54
|
+
# Convert embedding to string for asyncpg vector type
|
|
55
|
+
embeddings.append(str(fact.embedding))
|
|
56
|
+
# event_date: Use occurred_start if available, otherwise use mentioned_at
|
|
57
|
+
# This maintains backward compatibility while handling None occurred_start
|
|
58
|
+
event_dates.append(fact.occurred_start if fact.occurred_start is not None else fact.mentioned_at)
|
|
59
|
+
occurred_starts.append(fact.occurred_start)
|
|
60
|
+
occurred_ends.append(fact.occurred_end)
|
|
61
|
+
mentioned_ats.append(fact.mentioned_at)
|
|
62
|
+
contexts.append(fact.context)
|
|
63
|
+
fact_types.append(fact.fact_type)
|
|
64
|
+
# confidence_score is only for opinion facts
|
|
65
|
+
confidence_scores.append(1.0 if fact.fact_type == 'opinion' else None)
|
|
66
|
+
access_counts.append(0) # Initial access count
|
|
67
|
+
metadata_jsons.append(json.dumps(fact.metadata))
|
|
68
|
+
chunk_ids.append(fact.chunk_id)
|
|
69
|
+
# Use per-fact document_id if available, otherwise fallback to batch-level document_id
|
|
70
|
+
document_ids.append(fact.document_id if fact.document_id else document_id)
|
|
71
|
+
|
|
72
|
+
# Batch insert all facts
|
|
73
|
+
results = await conn.fetch(
|
|
74
|
+
"""
|
|
75
|
+
INSERT INTO memory_units (bank_id, text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
|
|
76
|
+
context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id)
|
|
77
|
+
SELECT $1, * FROM unnest(
|
|
78
|
+
$2::text[], $3::vector[], $4::timestamptz[], $5::timestamptz[], $6::timestamptz[], $7::timestamptz[],
|
|
79
|
+
$8::text[], $9::text[], $10::float[], $11::int[], $12::jsonb[], $13::text[], $14::text[]
|
|
80
|
+
)
|
|
81
|
+
RETURNING id
|
|
82
|
+
""",
|
|
83
|
+
bank_id,
|
|
84
|
+
fact_texts,
|
|
85
|
+
embeddings,
|
|
86
|
+
event_dates, # event_date: occurred_start if available, else mentioned_at
|
|
87
|
+
occurred_starts,
|
|
88
|
+
occurred_ends,
|
|
89
|
+
mentioned_ats,
|
|
90
|
+
contexts,
|
|
91
|
+
fact_types,
|
|
92
|
+
confidence_scores,
|
|
93
|
+
access_counts,
|
|
94
|
+
metadata_jsons,
|
|
95
|
+
chunk_ids,
|
|
96
|
+
document_ids
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
unit_ids = [str(row['id']) for row in results]
|
|
100
|
+
return unit_ids
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
async def ensure_bank_exists(conn, bank_id: str) -> None:
|
|
104
|
+
"""
|
|
105
|
+
Ensure bank exists in the database.
|
|
106
|
+
|
|
107
|
+
Creates bank with default values if it doesn't exist.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
conn: Database connection
|
|
111
|
+
bank_id: Bank identifier
|
|
112
|
+
"""
|
|
113
|
+
await conn.execute(
|
|
114
|
+
"""
|
|
115
|
+
INSERT INTO banks (bank_id, personality, background)
|
|
116
|
+
VALUES ($1, $2::jsonb, $3)
|
|
117
|
+
ON CONFLICT (bank_id) DO UPDATE
|
|
118
|
+
SET updated_at = NOW()
|
|
119
|
+
""",
|
|
120
|
+
bank_id,
|
|
121
|
+
'{"openness": 0.5, "conscientiousness": 0.5, "extraversion": 0.5, "agreeableness": 0.5, "neuroticism": 0.5, "bias_strength": 0.5}',
|
|
122
|
+
""
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def handle_document_tracking(
|
|
127
|
+
conn,
|
|
128
|
+
bank_id: str,
|
|
129
|
+
document_id: str,
|
|
130
|
+
combined_content: str,
|
|
131
|
+
is_first_batch: bool,
|
|
132
|
+
retain_params: Optional[dict] = None
|
|
133
|
+
) -> None:
|
|
134
|
+
"""
|
|
135
|
+
Handle document tracking in the database.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
conn: Database connection
|
|
139
|
+
bank_id: Bank identifier
|
|
140
|
+
document_id: Document identifier
|
|
141
|
+
combined_content: Combined content text from all content items
|
|
142
|
+
is_first_batch: Whether this is the first batch (for chunked operations)
|
|
143
|
+
retain_params: Optional parameters passed during retain (context, event_date, etc.)
|
|
144
|
+
"""
|
|
145
|
+
import hashlib
|
|
146
|
+
|
|
147
|
+
# Calculate content hash
|
|
148
|
+
content_hash = hashlib.sha256(combined_content.encode()).hexdigest()
|
|
149
|
+
|
|
150
|
+
# Always delete old document first if it exists (cascades to units and links)
|
|
151
|
+
# Only delete on the first batch to avoid deleting data we just inserted
|
|
152
|
+
if is_first_batch:
|
|
153
|
+
await conn.fetchval(
|
|
154
|
+
"DELETE FROM documents WHERE id = $1 AND bank_id = $2 RETURNING id",
|
|
155
|
+
document_id, bank_id
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Insert document (or update if exists from concurrent operations)
|
|
159
|
+
await conn.execute(
|
|
160
|
+
"""
|
|
161
|
+
INSERT INTO documents (id, bank_id, original_text, content_hash, metadata, retain_params)
|
|
162
|
+
VALUES ($1, $2, $3, $4, $5, $6)
|
|
163
|
+
ON CONFLICT (id, bank_id) DO UPDATE
|
|
164
|
+
SET original_text = EXCLUDED.original_text,
|
|
165
|
+
content_hash = EXCLUDED.content_hash,
|
|
166
|
+
metadata = EXCLUDED.metadata,
|
|
167
|
+
retain_params = EXCLUDED.retain_params,
|
|
168
|
+
updated_at = NOW()
|
|
169
|
+
""",
|
|
170
|
+
document_id,
|
|
171
|
+
bank_id,
|
|
172
|
+
combined_content,
|
|
173
|
+
content_hash,
|
|
174
|
+
json.dumps({}), # Empty metadata dict
|
|
175
|
+
json.dumps(retain_params) if retain_params else None
|
|
176
|
+
)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Link creation for retain pipeline.
|
|
3
|
+
|
|
4
|
+
Handles creation of temporal, semantic, and causal links between facts.
|
|
5
|
+
"""
|
|
6
|
+
import logging
|
|
7
|
+
from typing import List
|
|
8
|
+
|
|
9
|
+
from .types import ProcessedFact, CausalRelation
|
|
10
|
+
from . import link_utils
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def create_temporal_links_batch(
|
|
16
|
+
conn,
|
|
17
|
+
bank_id: str,
|
|
18
|
+
unit_ids: List[str]
|
|
19
|
+
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Create temporal links between facts.
|
|
22
|
+
|
|
23
|
+
Links facts that occurred close in time to each other.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
conn: Database connection
|
|
27
|
+
bank_id: Bank identifier
|
|
28
|
+
unit_ids: List of unit IDs to create links for
|
|
29
|
+
"""
|
|
30
|
+
if not unit_ids:
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
await link_utils.create_temporal_links_batch_per_fact(
|
|
34
|
+
conn,
|
|
35
|
+
bank_id,
|
|
36
|
+
unit_ids,
|
|
37
|
+
log_buffer=[]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def create_semantic_links_batch(
|
|
42
|
+
conn,
|
|
43
|
+
bank_id: str,
|
|
44
|
+
unit_ids: List[str],
|
|
45
|
+
embeddings: List[List[float]]
|
|
46
|
+
) -> None:
|
|
47
|
+
"""
|
|
48
|
+
Create semantic links between facts.
|
|
49
|
+
|
|
50
|
+
Links facts that are semantically similar based on embeddings.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
conn: Database connection
|
|
54
|
+
bank_id: Bank identifier
|
|
55
|
+
unit_ids: List of unit IDs to create links for
|
|
56
|
+
embeddings: List of embedding vectors (same length as unit_ids)
|
|
57
|
+
"""
|
|
58
|
+
if not unit_ids or not embeddings:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
if len(unit_ids) != len(embeddings):
|
|
62
|
+
raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and embeddings ({len(embeddings)})")
|
|
63
|
+
|
|
64
|
+
await link_utils.create_semantic_links_batch(
|
|
65
|
+
conn,
|
|
66
|
+
bank_id,
|
|
67
|
+
unit_ids,
|
|
68
|
+
embeddings,
|
|
69
|
+
log_buffer=[]
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
async def create_causal_links_batch(
|
|
74
|
+
conn,
|
|
75
|
+
unit_ids: List[str],
|
|
76
|
+
facts: List[ProcessedFact]
|
|
77
|
+
) -> int:
|
|
78
|
+
"""
|
|
79
|
+
Create causal links between facts.
|
|
80
|
+
|
|
81
|
+
Links facts that have causal relationships (causes, enables, prevents).
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
conn: Database connection
|
|
85
|
+
unit_ids: List of unit IDs (same length as facts)
|
|
86
|
+
facts: List of ProcessedFact objects with causal_relations
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
Number of causal links created
|
|
90
|
+
"""
|
|
91
|
+
if not unit_ids or not facts:
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
if len(unit_ids) != len(facts):
|
|
95
|
+
raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and facts ({len(facts)})")
|
|
96
|
+
|
|
97
|
+
# Extract causal relations in the format expected by link_utils
|
|
98
|
+
# Format: List of lists, where each inner list is the causal relations for that fact
|
|
99
|
+
causal_relations_per_fact = []
|
|
100
|
+
for fact in facts:
|
|
101
|
+
if fact.causal_relations:
|
|
102
|
+
# Convert CausalRelation objects to dicts
|
|
103
|
+
relations_dicts = [
|
|
104
|
+
{
|
|
105
|
+
'relation_type': rel.relation_type,
|
|
106
|
+
'target_fact_index': rel.target_fact_index,
|
|
107
|
+
'strength': rel.strength
|
|
108
|
+
}
|
|
109
|
+
for rel in fact.causal_relations
|
|
110
|
+
]
|
|
111
|
+
causal_relations_per_fact.append(relations_dicts)
|
|
112
|
+
else:
|
|
113
|
+
causal_relations_per_fact.append([])
|
|
114
|
+
|
|
115
|
+
link_count = await link_utils.create_causal_links_batch(
|
|
116
|
+
conn,
|
|
117
|
+
unit_ids,
|
|
118
|
+
causal_relations_per_fact
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return link_count
|