hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hindsight_api/__init__.py +38 -0
  2. hindsight_api/api/__init__.py +105 -0
  3. hindsight_api/api/http.py +1872 -0
  4. hindsight_api/api/mcp.py +157 -0
  5. hindsight_api/engine/__init__.py +47 -0
  6. hindsight_api/engine/cross_encoder.py +97 -0
  7. hindsight_api/engine/db_utils.py +93 -0
  8. hindsight_api/engine/embeddings.py +113 -0
  9. hindsight_api/engine/entity_resolver.py +575 -0
  10. hindsight_api/engine/llm_wrapper.py +269 -0
  11. hindsight_api/engine/memory_engine.py +3095 -0
  12. hindsight_api/engine/query_analyzer.py +519 -0
  13. hindsight_api/engine/response_models.py +222 -0
  14. hindsight_api/engine/retain/__init__.py +50 -0
  15. hindsight_api/engine/retain/bank_utils.py +423 -0
  16. hindsight_api/engine/retain/chunk_storage.py +82 -0
  17. hindsight_api/engine/retain/deduplication.py +104 -0
  18. hindsight_api/engine/retain/embedding_processing.py +62 -0
  19. hindsight_api/engine/retain/embedding_utils.py +54 -0
  20. hindsight_api/engine/retain/entity_processing.py +90 -0
  21. hindsight_api/engine/retain/fact_extraction.py +1027 -0
  22. hindsight_api/engine/retain/fact_storage.py +176 -0
  23. hindsight_api/engine/retain/link_creation.py +121 -0
  24. hindsight_api/engine/retain/link_utils.py +651 -0
  25. hindsight_api/engine/retain/orchestrator.py +405 -0
  26. hindsight_api/engine/retain/types.py +206 -0
  27. hindsight_api/engine/search/__init__.py +15 -0
  28. hindsight_api/engine/search/fusion.py +122 -0
  29. hindsight_api/engine/search/observation_utils.py +132 -0
  30. hindsight_api/engine/search/reranking.py +103 -0
  31. hindsight_api/engine/search/retrieval.py +503 -0
  32. hindsight_api/engine/search/scoring.py +161 -0
  33. hindsight_api/engine/search/temporal_extraction.py +64 -0
  34. hindsight_api/engine/search/think_utils.py +255 -0
  35. hindsight_api/engine/search/trace.py +215 -0
  36. hindsight_api/engine/search/tracer.py +447 -0
  37. hindsight_api/engine/search/types.py +160 -0
  38. hindsight_api/engine/task_backend.py +223 -0
  39. hindsight_api/engine/utils.py +203 -0
  40. hindsight_api/metrics.py +227 -0
  41. hindsight_api/migrations.py +163 -0
  42. hindsight_api/models.py +309 -0
  43. hindsight_api/pg0.py +425 -0
  44. hindsight_api/web/__init__.py +12 -0
  45. hindsight_api/web/server.py +143 -0
  46. hindsight_api-0.0.13.dist-info/METADATA +41 -0
  47. hindsight_api-0.0.13.dist-info/RECORD +48 -0
  48. hindsight_api-0.0.13.dist-info/WHEEL +4 -0
@@ -0,0 +1,176 @@
1
+ """
2
+ Fact storage for retain pipeline.
3
+
4
+ Handles insertion of facts into the database.
5
+ """
6
+ import logging
7
+ import json
8
+ from typing import List, Optional
9
+ from uuid import UUID
10
+
11
+ from .types import ProcessedFact
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ async def insert_facts_batch(
17
+ conn,
18
+ bank_id: str,
19
+ facts: List[ProcessedFact],
20
+ document_id: Optional[str] = None
21
+ ) -> List[str]:
22
+ """
23
+ Insert facts into the database in batch.
24
+
25
+ Args:
26
+ conn: Database connection
27
+ bank_id: Bank identifier
28
+ facts: List of ProcessedFact objects to insert
29
+ document_id: Optional document ID to associate with facts
30
+
31
+ Returns:
32
+ List of unit IDs (UUIDs as strings) for the inserted facts
33
+ """
34
+ if not facts:
35
+ return []
36
+
37
+ # Prepare data for batch insert
38
+ fact_texts = []
39
+ embeddings = []
40
+ event_dates = []
41
+ occurred_starts = []
42
+ occurred_ends = []
43
+ mentioned_ats = []
44
+ contexts = []
45
+ fact_types = []
46
+ confidence_scores = []
47
+ access_counts = []
48
+ metadata_jsons = []
49
+ chunk_ids = []
50
+ document_ids = []
51
+
52
+ for fact in facts:
53
+ fact_texts.append(fact.fact_text)
54
+ # Convert embedding to string for asyncpg vector type
55
+ embeddings.append(str(fact.embedding))
56
+ # event_date: Use occurred_start if available, otherwise use mentioned_at
57
+ # This maintains backward compatibility while handling None occurred_start
58
+ event_dates.append(fact.occurred_start if fact.occurred_start is not None else fact.mentioned_at)
59
+ occurred_starts.append(fact.occurred_start)
60
+ occurred_ends.append(fact.occurred_end)
61
+ mentioned_ats.append(fact.mentioned_at)
62
+ contexts.append(fact.context)
63
+ fact_types.append(fact.fact_type)
64
+ # confidence_score is only for opinion facts
65
+ confidence_scores.append(1.0 if fact.fact_type == 'opinion' else None)
66
+ access_counts.append(0) # Initial access count
67
+ metadata_jsons.append(json.dumps(fact.metadata))
68
+ chunk_ids.append(fact.chunk_id)
69
+ # Use per-fact document_id if available, otherwise fallback to batch-level document_id
70
+ document_ids.append(fact.document_id if fact.document_id else document_id)
71
+
72
+ # Batch insert all facts
73
+ results = await conn.fetch(
74
+ """
75
+ INSERT INTO memory_units (bank_id, text, embedding, event_date, occurred_start, occurred_end, mentioned_at,
76
+ context, fact_type, confidence_score, access_count, metadata, chunk_id, document_id)
77
+ SELECT $1, * FROM unnest(
78
+ $2::text[], $3::vector[], $4::timestamptz[], $5::timestamptz[], $6::timestamptz[], $7::timestamptz[],
79
+ $8::text[], $9::text[], $10::float[], $11::int[], $12::jsonb[], $13::text[], $14::text[]
80
+ )
81
+ RETURNING id
82
+ """,
83
+ bank_id,
84
+ fact_texts,
85
+ embeddings,
86
+ event_dates, # event_date: occurred_start if available, else mentioned_at
87
+ occurred_starts,
88
+ occurred_ends,
89
+ mentioned_ats,
90
+ contexts,
91
+ fact_types,
92
+ confidence_scores,
93
+ access_counts,
94
+ metadata_jsons,
95
+ chunk_ids,
96
+ document_ids
97
+ )
98
+
99
+ unit_ids = [str(row['id']) for row in results]
100
+ return unit_ids
101
+
102
+
103
+ async def ensure_bank_exists(conn, bank_id: str) -> None:
104
+ """
105
+ Ensure bank exists in the database.
106
+
107
+ Creates bank with default values if it doesn't exist.
108
+
109
+ Args:
110
+ conn: Database connection
111
+ bank_id: Bank identifier
112
+ """
113
+ await conn.execute(
114
+ """
115
+ INSERT INTO banks (bank_id, personality, background)
116
+ VALUES ($1, $2::jsonb, $3)
117
+ ON CONFLICT (bank_id) DO UPDATE
118
+ SET updated_at = NOW()
119
+ """,
120
+ bank_id,
121
+ '{"openness": 0.5, "conscientiousness": 0.5, "extraversion": 0.5, "agreeableness": 0.5, "neuroticism": 0.5, "bias_strength": 0.5}',
122
+ ""
123
+ )
124
+
125
+
126
+ async def handle_document_tracking(
127
+ conn,
128
+ bank_id: str,
129
+ document_id: str,
130
+ combined_content: str,
131
+ is_first_batch: bool,
132
+ retain_params: Optional[dict] = None
133
+ ) -> None:
134
+ """
135
+ Handle document tracking in the database.
136
+
137
+ Args:
138
+ conn: Database connection
139
+ bank_id: Bank identifier
140
+ document_id: Document identifier
141
+ combined_content: Combined content text from all content items
142
+ is_first_batch: Whether this is the first batch (for chunked operations)
143
+ retain_params: Optional parameters passed during retain (context, event_date, etc.)
144
+ """
145
+ import hashlib
146
+
147
+ # Calculate content hash
148
+ content_hash = hashlib.sha256(combined_content.encode()).hexdigest()
149
+
150
+ # Always delete old document first if it exists (cascades to units and links)
151
+ # Only delete on the first batch to avoid deleting data we just inserted
152
+ if is_first_batch:
153
+ await conn.fetchval(
154
+ "DELETE FROM documents WHERE id = $1 AND bank_id = $2 RETURNING id",
155
+ document_id, bank_id
156
+ )
157
+
158
+ # Insert document (or update if exists from concurrent operations)
159
+ await conn.execute(
160
+ """
161
+ INSERT INTO documents (id, bank_id, original_text, content_hash, metadata, retain_params)
162
+ VALUES ($1, $2, $3, $4, $5, $6)
163
+ ON CONFLICT (id, bank_id) DO UPDATE
164
+ SET original_text = EXCLUDED.original_text,
165
+ content_hash = EXCLUDED.content_hash,
166
+ metadata = EXCLUDED.metadata,
167
+ retain_params = EXCLUDED.retain_params,
168
+ updated_at = NOW()
169
+ """,
170
+ document_id,
171
+ bank_id,
172
+ combined_content,
173
+ content_hash,
174
+ json.dumps({}), # Empty metadata dict
175
+ json.dumps(retain_params) if retain_params else None
176
+ )
@@ -0,0 +1,121 @@
1
+ """
2
+ Link creation for retain pipeline.
3
+
4
+ Handles creation of temporal, semantic, and causal links between facts.
5
+ """
6
+ import logging
7
+ from typing import List
8
+
9
+ from .types import ProcessedFact, CausalRelation
10
+ from . import link_utils
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ async def create_temporal_links_batch(
16
+ conn,
17
+ bank_id: str,
18
+ unit_ids: List[str]
19
+ ) -> None:
20
+ """
21
+ Create temporal links between facts.
22
+
23
+ Links facts that occurred close in time to each other.
24
+
25
+ Args:
26
+ conn: Database connection
27
+ bank_id: Bank identifier
28
+ unit_ids: List of unit IDs to create links for
29
+ """
30
+ if not unit_ids:
31
+ return
32
+
33
+ await link_utils.create_temporal_links_batch_per_fact(
34
+ conn,
35
+ bank_id,
36
+ unit_ids,
37
+ log_buffer=[]
38
+ )
39
+
40
+
41
+ async def create_semantic_links_batch(
42
+ conn,
43
+ bank_id: str,
44
+ unit_ids: List[str],
45
+ embeddings: List[List[float]]
46
+ ) -> None:
47
+ """
48
+ Create semantic links between facts.
49
+
50
+ Links facts that are semantically similar based on embeddings.
51
+
52
+ Args:
53
+ conn: Database connection
54
+ bank_id: Bank identifier
55
+ unit_ids: List of unit IDs to create links for
56
+ embeddings: List of embedding vectors (same length as unit_ids)
57
+ """
58
+ if not unit_ids or not embeddings:
59
+ return
60
+
61
+ if len(unit_ids) != len(embeddings):
62
+ raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and embeddings ({len(embeddings)})")
63
+
64
+ await link_utils.create_semantic_links_batch(
65
+ conn,
66
+ bank_id,
67
+ unit_ids,
68
+ embeddings,
69
+ log_buffer=[]
70
+ )
71
+
72
+
73
+ async def create_causal_links_batch(
74
+ conn,
75
+ unit_ids: List[str],
76
+ facts: List[ProcessedFact]
77
+ ) -> int:
78
+ """
79
+ Create causal links between facts.
80
+
81
+ Links facts that have causal relationships (causes, enables, prevents).
82
+
83
+ Args:
84
+ conn: Database connection
85
+ unit_ids: List of unit IDs (same length as facts)
86
+ facts: List of ProcessedFact objects with causal_relations
87
+
88
+ Returns:
89
+ Number of causal links created
90
+ """
91
+ if not unit_ids or not facts:
92
+ return 0
93
+
94
+ if len(unit_ids) != len(facts):
95
+ raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and facts ({len(facts)})")
96
+
97
+ # Extract causal relations in the format expected by link_utils
98
+ # Format: List of lists, where each inner list is the causal relations for that fact
99
+ causal_relations_per_fact = []
100
+ for fact in facts:
101
+ if fact.causal_relations:
102
+ # Convert CausalRelation objects to dicts
103
+ relations_dicts = [
104
+ {
105
+ 'relation_type': rel.relation_type,
106
+ 'target_fact_index': rel.target_fact_index,
107
+ 'strength': rel.strength
108
+ }
109
+ for rel in fact.causal_relations
110
+ ]
111
+ causal_relations_per_fact.append(relations_dicts)
112
+ else:
113
+ causal_relations_per_fact.append([])
114
+
115
+ link_count = await link_utils.create_causal_links_batch(
116
+ conn,
117
+ unit_ids,
118
+ causal_relations_per_fact
119
+ )
120
+
121
+ return link_count