hindsight-api 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/cli.py +59 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1119 -93
- hindsight_api/api/mcp.py +11 -191
- hindsight_api/config.py +145 -45
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +114 -9
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +102 -5
- hindsight_api/engine/interface.py +32 -13
- hindsight_api/engine/llm_wrapper.py +505 -43
- hindsight_api/engine/memory_engine.py +2090 -1089
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +130 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +81 -48
- hindsight_api/engine/retain/fact_storage.py +5 -8
- hindsight_api/engine/retain/link_utils.py +5 -8
- hindsight_api/engine/retain/orchestrator.py +1 -55
- hindsight_api/engine/retain/types.py +2 -2
- hindsight_api/engine/search/graph_retrieval.py +2 -2
- hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
- hindsight_api/engine/search/mpfp_retrieval.py +1 -1
- hindsight_api/engine/search/retrieval.py +14 -14
- hindsight_api/engine/search/think_utils.py +41 -140
- hindsight_api/engine/search/trace.py +0 -1
- hindsight_api/engine/search/tracer.py +2 -5
- hindsight_api/engine/search/types.py +0 -3
- hindsight_api/engine/task_backend.py +112 -196
- hindsight_api/engine/utils.py +0 -151
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +16 -5
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/models.py +0 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +12 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +1 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.3.0.dist-info/RECORD +0 -82
- {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -1,254 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Observation regeneration for retain pipeline.
|
|
3
|
-
|
|
4
|
-
Regenerates entity observations as part of the retain transaction.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
import time
|
|
9
|
-
import uuid
|
|
10
|
-
from datetime import UTC, datetime
|
|
11
|
-
|
|
12
|
-
from ...config import get_config
|
|
13
|
-
from ..memory_engine import fq_table
|
|
14
|
-
from ..search import observation_utils
|
|
15
|
-
from . import embedding_utils
|
|
16
|
-
from .types import EntityLink
|
|
17
|
-
|
|
18
|
-
logger = logging.getLogger(__name__)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def utcnow():
|
|
22
|
-
"""Get current UTC time."""
|
|
23
|
-
return datetime.now(UTC)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
# Simple dataclass-like container for facts (avoid importing from memory_engine)
|
|
27
|
-
class MemoryFactForObservation:
|
|
28
|
-
def __init__(self, id: str, text: str, fact_type: str, context: str, occurred_start: str | None):
|
|
29
|
-
self.id = id
|
|
30
|
-
self.text = text
|
|
31
|
-
self.fact_type = fact_type
|
|
32
|
-
self.context = context
|
|
33
|
-
self.occurred_start = occurred_start
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
async def regenerate_observations_batch(
|
|
37
|
-
conn, embeddings_model, llm_config, bank_id: str, entity_links: list[EntityLink], log_buffer: list[str] = None
|
|
38
|
-
) -> None:
|
|
39
|
-
"""
|
|
40
|
-
Regenerate observations for top entities in this batch.
|
|
41
|
-
|
|
42
|
-
Called INSIDE the retain transaction for atomicity - if observations
|
|
43
|
-
fail, the entire retain batch is rolled back.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
conn: Database connection (from the retain transaction)
|
|
47
|
-
embeddings_model: Embeddings model for generating observation embeddings
|
|
48
|
-
llm_config: LLM configuration for observation extraction
|
|
49
|
-
bank_id: Bank identifier
|
|
50
|
-
entity_links: Entity links from this batch
|
|
51
|
-
log_buffer: Optional log buffer for timing
|
|
52
|
-
"""
|
|
53
|
-
config = get_config()
|
|
54
|
-
TOP_N_ENTITIES = config.observation_top_entities
|
|
55
|
-
MIN_FACTS_THRESHOLD = config.observation_min_facts
|
|
56
|
-
|
|
57
|
-
if not entity_links:
|
|
58
|
-
return
|
|
59
|
-
|
|
60
|
-
# Count mentions per entity in this batch
|
|
61
|
-
entity_mention_counts: dict[str, int] = {}
|
|
62
|
-
for link in entity_links:
|
|
63
|
-
if link.entity_id:
|
|
64
|
-
entity_id = str(link.entity_id)
|
|
65
|
-
entity_mention_counts[entity_id] = entity_mention_counts.get(entity_id, 0) + 1
|
|
66
|
-
|
|
67
|
-
if not entity_mention_counts:
|
|
68
|
-
return
|
|
69
|
-
|
|
70
|
-
# Sort by mention count descending and take top N
|
|
71
|
-
sorted_entities = sorted(entity_mention_counts.items(), key=lambda x: x[1], reverse=True)
|
|
72
|
-
entities_to_process = [e[0] for e in sorted_entities[:TOP_N_ENTITIES]]
|
|
73
|
-
|
|
74
|
-
obs_start = time.time()
|
|
75
|
-
|
|
76
|
-
# Convert to UUIDs
|
|
77
|
-
entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entities_to_process]
|
|
78
|
-
|
|
79
|
-
# Batch query for entity names
|
|
80
|
-
entity_rows = await conn.fetch(
|
|
81
|
-
f"""
|
|
82
|
-
SELECT id, canonical_name FROM {fq_table("entities")}
|
|
83
|
-
WHERE id = ANY($1) AND bank_id = $2
|
|
84
|
-
""",
|
|
85
|
-
entity_uuids,
|
|
86
|
-
bank_id,
|
|
87
|
-
)
|
|
88
|
-
entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
|
|
89
|
-
|
|
90
|
-
# Batch query for fact counts
|
|
91
|
-
fact_counts = await conn.fetch(
|
|
92
|
-
f"""
|
|
93
|
-
SELECT ue.entity_id, COUNT(*) as cnt
|
|
94
|
-
FROM {fq_table("unit_entities")} ue
|
|
95
|
-
JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
|
|
96
|
-
WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
|
|
97
|
-
GROUP BY ue.entity_id
|
|
98
|
-
""",
|
|
99
|
-
entity_uuids,
|
|
100
|
-
bank_id,
|
|
101
|
-
)
|
|
102
|
-
entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
|
|
103
|
-
|
|
104
|
-
# Filter entities that meet the threshold
|
|
105
|
-
entities_with_names = []
|
|
106
|
-
for entity_id in entities_to_process:
|
|
107
|
-
entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
|
|
108
|
-
if entity_uuid not in entity_names:
|
|
109
|
-
continue
|
|
110
|
-
fact_count = entity_fact_counts.get(entity_uuid, 0)
|
|
111
|
-
if fact_count >= MIN_FACTS_THRESHOLD:
|
|
112
|
-
entities_with_names.append((entity_id, entity_names[entity_uuid]))
|
|
113
|
-
|
|
114
|
-
if not entities_with_names:
|
|
115
|
-
return
|
|
116
|
-
|
|
117
|
-
# Process entities SEQUENTIALLY (asyncpg doesn't allow concurrent queries on same connection)
|
|
118
|
-
# We must use the same connection to stay in the retain transaction
|
|
119
|
-
total_observations = 0
|
|
120
|
-
|
|
121
|
-
for entity_id, entity_name in entities_with_names:
|
|
122
|
-
try:
|
|
123
|
-
obs_ids = await _regenerate_entity_observations(
|
|
124
|
-
conn, embeddings_model, llm_config, bank_id, entity_id, entity_name
|
|
125
|
-
)
|
|
126
|
-
total_observations += len(obs_ids)
|
|
127
|
-
except Exception as e:
|
|
128
|
-
logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
|
|
129
|
-
|
|
130
|
-
obs_time = time.time() - obs_start
|
|
131
|
-
if log_buffer is not None:
|
|
132
|
-
log_buffer.append(
|
|
133
|
-
f"[11] Observations: {total_observations} observations for {len(entities_with_names)} entities in {obs_time:.3f}s"
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
async def _regenerate_entity_observations(
|
|
138
|
-
conn, embeddings_model, llm_config, bank_id: str, entity_id: str, entity_name: str
|
|
139
|
-
) -> list[str]:
|
|
140
|
-
"""
|
|
141
|
-
Regenerate observations for a single entity.
|
|
142
|
-
|
|
143
|
-
Uses the provided connection (part of retain transaction).
|
|
144
|
-
|
|
145
|
-
Args:
|
|
146
|
-
conn: Database connection (from the retain transaction)
|
|
147
|
-
embeddings_model: Embeddings model
|
|
148
|
-
llm_config: LLM configuration
|
|
149
|
-
bank_id: Bank identifier
|
|
150
|
-
entity_id: Entity UUID
|
|
151
|
-
entity_name: Canonical name of the entity
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
List of created observation IDs
|
|
155
|
-
"""
|
|
156
|
-
entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
|
|
157
|
-
|
|
158
|
-
# Get all facts mentioning this entity (exclude observations themselves)
|
|
159
|
-
rows = await conn.fetch(
|
|
160
|
-
f"""
|
|
161
|
-
SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
|
|
162
|
-
FROM {fq_table("memory_units")} mu
|
|
163
|
-
JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
|
|
164
|
-
WHERE mu.bank_id = $1
|
|
165
|
-
AND ue.entity_id = $2
|
|
166
|
-
AND mu.fact_type IN ('world', 'experience')
|
|
167
|
-
ORDER BY mu.occurred_start DESC
|
|
168
|
-
LIMIT 50
|
|
169
|
-
""",
|
|
170
|
-
bank_id,
|
|
171
|
-
entity_uuid,
|
|
172
|
-
)
|
|
173
|
-
|
|
174
|
-
if not rows:
|
|
175
|
-
return []
|
|
176
|
-
|
|
177
|
-
# Convert to fact objects for observation extraction
|
|
178
|
-
facts = []
|
|
179
|
-
for row in rows:
|
|
180
|
-
occurred_start = row["occurred_start"].isoformat() if row["occurred_start"] else None
|
|
181
|
-
facts.append(
|
|
182
|
-
MemoryFactForObservation(
|
|
183
|
-
id=str(row["id"]),
|
|
184
|
-
text=row["text"],
|
|
185
|
-
fact_type=row["fact_type"],
|
|
186
|
-
context=row["context"],
|
|
187
|
-
occurred_start=occurred_start,
|
|
188
|
-
)
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
# Extract observations using LLM
|
|
192
|
-
observations = await observation_utils.extract_observations_from_facts(llm_config, entity_name, facts)
|
|
193
|
-
|
|
194
|
-
if not observations:
|
|
195
|
-
return []
|
|
196
|
-
|
|
197
|
-
# Delete old observations for this entity
|
|
198
|
-
await conn.execute(
|
|
199
|
-
f"""
|
|
200
|
-
DELETE FROM {fq_table("memory_units")}
|
|
201
|
-
WHERE id IN (
|
|
202
|
-
SELECT mu.id
|
|
203
|
-
FROM {fq_table("memory_units")} mu
|
|
204
|
-
JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
|
|
205
|
-
WHERE mu.bank_id = $1
|
|
206
|
-
AND mu.fact_type = 'observation'
|
|
207
|
-
AND ue.entity_id = $2
|
|
208
|
-
)
|
|
209
|
-
""",
|
|
210
|
-
bank_id,
|
|
211
|
-
entity_uuid,
|
|
212
|
-
)
|
|
213
|
-
|
|
214
|
-
# Generate embeddings for new observations
|
|
215
|
-
embeddings = await embedding_utils.generate_embeddings_batch(embeddings_model, observations)
|
|
216
|
-
|
|
217
|
-
# Insert new observations
|
|
218
|
-
current_time = utcnow()
|
|
219
|
-
created_ids = []
|
|
220
|
-
|
|
221
|
-
for obs_text, embedding in zip(observations, embeddings):
|
|
222
|
-
result = await conn.fetchrow(
|
|
223
|
-
f"""
|
|
224
|
-
INSERT INTO {fq_table("memory_units")} (
|
|
225
|
-
bank_id, text, embedding, context, event_date,
|
|
226
|
-
occurred_start, occurred_end, mentioned_at,
|
|
227
|
-
fact_type, access_count
|
|
228
|
-
)
|
|
229
|
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
|
|
230
|
-
RETURNING id
|
|
231
|
-
""",
|
|
232
|
-
bank_id,
|
|
233
|
-
obs_text,
|
|
234
|
-
str(embedding),
|
|
235
|
-
f"observation about {entity_name}",
|
|
236
|
-
current_time,
|
|
237
|
-
current_time,
|
|
238
|
-
current_time,
|
|
239
|
-
current_time,
|
|
240
|
-
)
|
|
241
|
-
obs_id = str(result["id"])
|
|
242
|
-
created_ids.append(obs_id)
|
|
243
|
-
|
|
244
|
-
# Link observation to entity
|
|
245
|
-
await conn.execute(
|
|
246
|
-
f"""
|
|
247
|
-
INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
|
|
248
|
-
VALUES ($1, $2)
|
|
249
|
-
""",
|
|
250
|
-
uuid.UUID(obs_id),
|
|
251
|
-
entity_uuid,
|
|
252
|
-
)
|
|
253
|
-
|
|
254
|
-
return created_ids
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Observation utilities for generating entity observations from facts.
|
|
3
|
-
|
|
4
|
-
Observations are objective facts synthesized from multiple memory facts
|
|
5
|
-
about an entity, without personality influence.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import logging
|
|
9
|
-
|
|
10
|
-
from pydantic import BaseModel, Field
|
|
11
|
-
|
|
12
|
-
from ..response_models import MemoryFact
|
|
13
|
-
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Observation(BaseModel):
|
|
18
|
-
"""An observation about an entity."""
|
|
19
|
-
|
|
20
|
-
observation: str = Field(description="The observation text - a factual statement about the entity")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ObservationExtractionResponse(BaseModel):
|
|
24
|
-
"""Response containing extracted observations."""
|
|
25
|
-
|
|
26
|
-
observations: list[Observation] = Field(default_factory=list, description="List of observations about the entity")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def format_facts_for_observation_prompt(facts: list[MemoryFact]) -> str:
|
|
30
|
-
"""Format facts as text for observation extraction prompt."""
|
|
31
|
-
import json
|
|
32
|
-
|
|
33
|
-
if not facts:
|
|
34
|
-
return "[]"
|
|
35
|
-
formatted = []
|
|
36
|
-
for fact in facts:
|
|
37
|
-
fact_obj = {"text": fact.text}
|
|
38
|
-
|
|
39
|
-
# Add context if available
|
|
40
|
-
if fact.context:
|
|
41
|
-
fact_obj["context"] = fact.context
|
|
42
|
-
|
|
43
|
-
# Add occurred_start if available
|
|
44
|
-
if fact.occurred_start:
|
|
45
|
-
fact_obj["occurred_at"] = fact.occurred_start
|
|
46
|
-
|
|
47
|
-
formatted.append(fact_obj)
|
|
48
|
-
|
|
49
|
-
return json.dumps(formatted, indent=2)
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def build_observation_prompt(
|
|
53
|
-
entity_name: str,
|
|
54
|
-
facts_text: str,
|
|
55
|
-
) -> str:
|
|
56
|
-
"""Build the observation extraction prompt for the LLM."""
|
|
57
|
-
return f"""Based on the following facts about "{entity_name}", generate a list of key observations.
|
|
58
|
-
|
|
59
|
-
FACTS ABOUT {entity_name.upper()}:
|
|
60
|
-
{facts_text}
|
|
61
|
-
|
|
62
|
-
Your task: Synthesize the facts into clear, objective observations about {entity_name}.
|
|
63
|
-
|
|
64
|
-
GUIDELINES:
|
|
65
|
-
1. Each observation should be a factual statement about {entity_name}
|
|
66
|
-
2. Combine related facts into single observations where appropriate
|
|
67
|
-
3. Be objective - do not add opinions, judgments, or interpretations
|
|
68
|
-
4. Focus on what we KNOW about {entity_name}, not what we assume
|
|
69
|
-
5. Include observations about: identity, characteristics, roles, relationships, activities
|
|
70
|
-
6. Write in third person (e.g., "John is..." not "I think John is...")
|
|
71
|
-
7. If there are conflicting facts, note the most recent or most supported one
|
|
72
|
-
|
|
73
|
-
EXAMPLES of good observations:
|
|
74
|
-
- "John works at Google as a software engineer"
|
|
75
|
-
- "John is detail-oriented and methodical in his approach"
|
|
76
|
-
- "John collaborates frequently with Sarah on the AI project"
|
|
77
|
-
- "John joined the company in 2023"
|
|
78
|
-
|
|
79
|
-
EXAMPLES of bad observations (avoid these):
|
|
80
|
-
- "John seems like a good person" (opinion/judgment)
|
|
81
|
-
- "John probably likes his job" (assumption)
|
|
82
|
-
- "I believe John is reliable" (first-person opinion)
|
|
83
|
-
|
|
84
|
-
Generate 3-7 observations based on the available facts. If there are very few facts, generate fewer observations."""
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def get_observation_system_message() -> str:
|
|
88
|
-
"""Get the system message for observation extraction."""
|
|
89
|
-
return "You are an objective observer synthesizing facts about an entity. Generate clear, factual observations without opinions or personality influence. Be concise and accurate."
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
async def extract_observations_from_facts(llm_config, entity_name: str, facts: list[MemoryFact]) -> list[str]:
|
|
93
|
-
"""
|
|
94
|
-
Extract observations from facts about an entity using LLM.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
llm_config: LLM configuration to use
|
|
98
|
-
entity_name: Name of the entity to generate observations about
|
|
99
|
-
facts: List of facts mentioning the entity
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
List of observation strings
|
|
103
|
-
"""
|
|
104
|
-
if not facts:
|
|
105
|
-
return []
|
|
106
|
-
|
|
107
|
-
facts_text = format_facts_for_observation_prompt(facts)
|
|
108
|
-
prompt = build_observation_prompt(entity_name, facts_text)
|
|
109
|
-
|
|
110
|
-
try:
|
|
111
|
-
result = await llm_config.call(
|
|
112
|
-
messages=[
|
|
113
|
-
{"role": "system", "content": get_observation_system_message()},
|
|
114
|
-
{"role": "user", "content": prompt},
|
|
115
|
-
],
|
|
116
|
-
response_format=ObservationExtractionResponse,
|
|
117
|
-
scope="memory_extract_observation",
|
|
118
|
-
)
|
|
119
|
-
|
|
120
|
-
observations = [op.observation for op in result.observations]
|
|
121
|
-
return observations
|
|
122
|
-
|
|
123
|
-
except Exception as e:
|
|
124
|
-
logger.warning(f"Failed to extract observations for {entity_name}: {str(e)}")
|
|
125
|
-
return []
|
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Scoring functions for memory search and retrieval.
|
|
3
|
-
|
|
4
|
-
Includes recency weighting, frequency weighting, temporal proximity,
|
|
5
|
-
and similarity calculations used in memory activation and ranking.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
|
|
12
|
-
"""
|
|
13
|
-
Calculate cosine similarity between two vectors.
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
vec1: First vector
|
|
17
|
-
vec2: Second vector
|
|
18
|
-
|
|
19
|
-
Returns:
|
|
20
|
-
Similarity score between 0 and 1
|
|
21
|
-
"""
|
|
22
|
-
if len(vec1) != len(vec2):
|
|
23
|
-
raise ValueError("Vectors must have same dimension")
|
|
24
|
-
|
|
25
|
-
dot_product = sum(a * b for a, b in zip(vec1, vec2))
|
|
26
|
-
magnitude1 = sum(a * a for a in vec1) ** 0.5
|
|
27
|
-
magnitude2 = sum(b * b for b in vec2) ** 0.5
|
|
28
|
-
|
|
29
|
-
if magnitude1 == 0 or magnitude2 == 0:
|
|
30
|
-
return 0.0
|
|
31
|
-
|
|
32
|
-
return dot_product / (magnitude1 * magnitude2)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def calculate_recency_weight(days_since: float, half_life_days: float = 365.0) -> float:
|
|
36
|
-
"""
|
|
37
|
-
Calculate recency weight using logarithmic decay.
|
|
38
|
-
|
|
39
|
-
This provides much better differentiation over long time periods compared to
|
|
40
|
-
exponential decay. Uses a log-based decay where the half-life parameter controls
|
|
41
|
-
when memories reach 50% weight.
|
|
42
|
-
|
|
43
|
-
Examples:
|
|
44
|
-
- Today (0 days): 1.0
|
|
45
|
-
- 1 year (365 days): ~0.5 (with default half_life=365)
|
|
46
|
-
- 2 years (730 days): ~0.33
|
|
47
|
-
- 5 years (1825 days): ~0.17
|
|
48
|
-
- 10 years (3650 days): ~0.09
|
|
49
|
-
|
|
50
|
-
This ensures that 2-year-old and 5-year-old memories have meaningfully
|
|
51
|
-
different weights, unlike exponential decay which makes them both ~0.
|
|
52
|
-
|
|
53
|
-
Args:
|
|
54
|
-
days_since: Number of days since the memory was created
|
|
55
|
-
half_life_days: Number of days for weight to reach 0.5 (default: 1 year)
|
|
56
|
-
|
|
57
|
-
Returns:
|
|
58
|
-
Weight between 0 and 1
|
|
59
|
-
"""
|
|
60
|
-
import math
|
|
61
|
-
|
|
62
|
-
# Logarithmic decay: 1 / (1 + log(1 + days_since/half_life))
|
|
63
|
-
# This decays much slower than exponential, giving better long-term differentiation
|
|
64
|
-
normalized_age = days_since / half_life_days
|
|
65
|
-
return 1.0 / (1.0 + math.log1p(normalized_age))
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def calculate_frequency_weight(access_count: int, max_boost: float = 2.0) -> float:
|
|
69
|
-
"""
|
|
70
|
-
Calculate frequency weight based on access count.
|
|
71
|
-
|
|
72
|
-
Frequently accessed memories are weighted higher.
|
|
73
|
-
Uses logarithmic scaling to avoid over-weighting.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
access_count: Number of times the memory was accessed
|
|
77
|
-
max_boost: Maximum multiplier for frequently accessed memories
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
Weight between 1.0 and max_boost
|
|
81
|
-
"""
|
|
82
|
-
import math
|
|
83
|
-
|
|
84
|
-
if access_count <= 0:
|
|
85
|
-
return 1.0
|
|
86
|
-
|
|
87
|
-
# Logarithmic scaling: log(access_count + 1) / log(10)
|
|
88
|
-
# This gives: 0 accesses = 1.0, 9 accesses ~= 1.5, 99 accesses ~= 2.0
|
|
89
|
-
normalized = math.log(access_count + 1) / math.log(10)
|
|
90
|
-
return 1.0 + min(normalized, max_boost - 1.0)
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def calculate_temporal_anchor(occurred_start: datetime, occurred_end: datetime) -> datetime:
|
|
94
|
-
"""
|
|
95
|
-
Calculate a single temporal anchor point from a temporal range.
|
|
96
|
-
|
|
97
|
-
Used for spreading activation - we need a single representative date
|
|
98
|
-
to calculate temporal proximity between facts. This simplifies the
|
|
99
|
-
range-to-range distance problem.
|
|
100
|
-
|
|
101
|
-
Strategy: Use midpoint of the range for balanced representation.
|
|
102
|
-
|
|
103
|
-
Args:
|
|
104
|
-
occurred_start: Start of temporal range
|
|
105
|
-
occurred_end: End of temporal range
|
|
106
|
-
|
|
107
|
-
Returns:
|
|
108
|
-
Single datetime representing the temporal anchor (midpoint)
|
|
109
|
-
|
|
110
|
-
Examples:
|
|
111
|
-
- Point event (July 14): start=July 14, end=July 14 → anchor=July 14
|
|
112
|
-
- Month range (February): start=Feb 1, end=Feb 28 → anchor=Feb 14
|
|
113
|
-
- Year range (2023): start=Jan 1, end=Dec 31 → anchor=July 1
|
|
114
|
-
"""
|
|
115
|
-
# Calculate midpoint
|
|
116
|
-
time_delta = occurred_end - occurred_start
|
|
117
|
-
midpoint = occurred_start + (time_delta / 2)
|
|
118
|
-
return midpoint
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def calculate_temporal_proximity(anchor_a: datetime, anchor_b: datetime, half_life_days: float = 30.0) -> float:
|
|
122
|
-
"""
|
|
123
|
-
Calculate temporal proximity between two temporal anchors.
|
|
124
|
-
|
|
125
|
-
Used for spreading activation to determine how "close" two facts are
|
|
126
|
-
in time. Uses logarithmic decay so that temporal similarity doesn't
|
|
127
|
-
drop off too quickly.
|
|
128
|
-
|
|
129
|
-
Args:
|
|
130
|
-
anchor_a: Temporal anchor of first fact
|
|
131
|
-
anchor_b: Temporal anchor of second fact
|
|
132
|
-
half_life_days: Number of days for proximity to reach 0.5
|
|
133
|
-
(default: 30 days = 1 month)
|
|
134
|
-
|
|
135
|
-
Returns:
|
|
136
|
-
Proximity score in [0, 1] where:
|
|
137
|
-
- 1.0 = same day
|
|
138
|
-
- 0.5 = ~half_life days apart
|
|
139
|
-
- 0.0 = very distant in time
|
|
140
|
-
|
|
141
|
-
Examples:
|
|
142
|
-
- Same day: 1.0
|
|
143
|
-
- 1 week apart (half_life=30): ~0.7
|
|
144
|
-
- 1 month apart (half_life=30): ~0.5
|
|
145
|
-
- 1 year apart (half_life=30): ~0.2
|
|
146
|
-
"""
|
|
147
|
-
import math
|
|
148
|
-
|
|
149
|
-
days_apart = abs((anchor_a - anchor_b).days)
|
|
150
|
-
|
|
151
|
-
if days_apart == 0:
|
|
152
|
-
return 1.0
|
|
153
|
-
|
|
154
|
-
# Logarithmic decay: 1 / (1 + log(1 + days_apart/half_life))
|
|
155
|
-
# Similar to calculate_recency_weight but for proximity between events
|
|
156
|
-
normalized_distance = days_apart / half_life_days
|
|
157
|
-
proximity = 1.0 / (1.0 + math.log1p(normalized_distance))
|
|
158
|
-
|
|
159
|
-
return proximity
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
hindsight_api/__init__.py,sha256=lPhgtKMvT8qjORFKWlhlq-LVdwesIu0gbUYNPZQEFiI,1197
|
|
2
|
-
hindsight_api/banner.py,sha256=BXn-jhkXe4xi-YV4JeuaVvjYhTMs96O43XoOMv4Cd28,4591
|
|
3
|
-
hindsight_api/config.py,sha256=cq6-Np4HuDV9w5qvX9jfrW4HYH1yGnIrpF84eBN9Un4,19903
|
|
4
|
-
hindsight_api/daemon.py,sha256=3CKcO_ENQ57dIWrTsmYUj-V4zvoAB1toNtVh3EVkg-c,5982
|
|
5
|
-
hindsight_api/main.py,sha256=P1jpn2WWF2aZ5WcVBzcvXr9-BqCIqJR7xfEetPDrkvY,12736
|
|
6
|
-
hindsight_api/mcp_local.py,sha256=fL2hpwQSNExcjIwZn1E5vy5No6iZFmw78yRNXxJzri0,7371
|
|
7
|
-
hindsight_api/metrics.py,sha256=go3X7wyFAPkc55HFvu7esiaJXDrUsrSrC8Pq5NjcqU0,20692
|
|
8
|
-
hindsight_api/migrations.py,sha256=V4QL_N1cMe6kNF1ejJ3lPIPFXKU2Pzbaiviws7AyMIY,14624
|
|
9
|
-
hindsight_api/models.py,sha256=FrV6DicpmubfwU4h35Y01XM5Jt-n_RIGAmqzgdJH3eU,13011
|
|
10
|
-
hindsight_api/pg0.py,sha256=XORoiemECidQgBP53EBSCF3i0PJegLRRWKl2hU5UPhE,6390
|
|
11
|
-
hindsight_api/server.py,sha256=MU2ZvKe3KWfxKYZq8EEJPgKMmq5diPkRqfQBaz-yOQI,2483
|
|
12
|
-
hindsight_api/admin/__init__.py,sha256=RvaczuwTxg6ajc_Jlk0EhVz5JqlNB3T8su060gRQwfs,26
|
|
13
|
-
hindsight_api/admin/cli.py,sha256=6n3po14XiYBfFoTXBXQBvafU2--_7rMgs33qmtOl_Po,9353
|
|
14
|
-
hindsight_api/alembic/README,sha256=MVlc9TYmr57RbhXET6QxgyCcwWP7w-vLkEsirENqiIQ,38
|
|
15
|
-
hindsight_api/alembic/env.py,sha256=I4sGdtUo8xcXe95MyD36JQeMod_Bvp9JUkW64Ve4XSM,5808
|
|
16
|
-
hindsight_api/alembic/script.py.mako,sha256=04kgeBtNMa4cCnG8CfQcKt6P6rnloIfj8wy0u_DBydM,704
|
|
17
|
-
hindsight_api/alembic/versions/5a366d414dce_initial_schema.py,sha256=g3G7fV70Z10PZxwTrTmR34OAlEZjQTLJKr-Ol54JqrQ,17665
|
|
18
|
-
hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py,sha256=MaHFU4JczUIFLeUMBTKIV3ocuclil55N9fPPim-HRfk,2599
|
|
19
|
-
hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py,sha256=ChqkHANauZb4-nBt2uepoZN3q0vRzN6aRsWTGueULiA,1146
|
|
20
|
-
hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py,sha256=s5_B2D0JdaxO7WM-vWC5Yt6hAtTsAUzJhFGLFSkfuQU,1808
|
|
21
|
-
hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py,sha256=IdDP6fgsYj5fCXAF0QT-3t_wcKJsnf7B0mh7qS-cf_w,3806
|
|
22
|
-
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py,sha256=tqkOLVD_p1NXVsIRxAc1mBiNpEosU9WkwrNUEGbc9DY,1598
|
|
23
|
-
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py,sha256=4P7OGJf2t9IWxI0wi8ibC3mrQzjWJaTZ5z5QPr67gig,1772
|
|
24
|
-
hindsight_api/alembic/versions/rename_personality_to_disposition.py,sha256=A29-nDJ2Re4u9jdp2sUw29It808j4h6BpcA4wDHJMJ8,2765
|
|
25
|
-
hindsight_api/api/__init__.py,sha256=npF0AAy8WJhHF5a9ehkNn9_iYLk7RQOk2gdkdFb49Hk,3840
|
|
26
|
-
hindsight_api/api/http.py,sha256=JAnsYU1ovOwksdcSTtsSUtTjMVDNFxIjdmG2cknfQqs,90545
|
|
27
|
-
hindsight_api/api/mcp.py,sha256=RMrCzkza6MzZLzIj-4a9os-OmAMJjRRxIKp0za5BIvE,14849
|
|
28
|
-
hindsight_api/engine/__init__.py,sha256=-BwaSwG9fTT_BBO0c_2MBkxG6-tGdclSzIqsgHw4cnw,1633
|
|
29
|
-
hindsight_api/engine/cross_encoder.py,sha256=UJsjlb8V0Q1QhzM92w8F86IrRVqWMMfjS2it4rtgiRM,31018
|
|
30
|
-
hindsight_api/engine/db_budget.py,sha256=1OmZiuszpuEaYz355QlOqwaupXPd9FrnbyENsFboBkg,8642
|
|
31
|
-
hindsight_api/engine/db_utils.py,sha256=Fq1pXETt8ZPhkWYjrcGbgL6glrwmCGWh3_lYJgHqQPo,3067
|
|
32
|
-
hindsight_api/engine/embeddings.py,sha256=qSaPEJb8-wQhI7njWhwDwb2kKtKjiJ-9CTix5i6xilk,25798
|
|
33
|
-
hindsight_api/engine/entity_resolver.py,sha256=qVvWJHnbGEfh0iUFtc1dbM3IUNwPMsQsmg2rMgiX2DY,23794
|
|
34
|
-
hindsight_api/engine/interface.py,sha256=egpn50n4CRNDOip-ggcGZakj4Ddoj15_BUO0c0bzjI0,16496
|
|
35
|
-
hindsight_api/engine/llm_wrapper.py,sha256=yn_SOmDkiYkD1imaWFqdUXdTn3_xiV04iukSO2Mq3kk,48875
|
|
36
|
-
hindsight_api/engine/memory_engine.py,sha256=DhFkKNz6u-1QS4sGJOCbwBAM2NZJ16Ay7PSCsHtG3nA,191874
|
|
37
|
-
hindsight_api/engine/query_analyzer.py,sha256=7APe0MjBcUxjivcMlM03PmMk_w5FjWvlEe20yAJlHlc,19741
|
|
38
|
-
hindsight_api/engine/response_models.py,sha256=84SyIkt0zITSupLdnRmwyTkge6MzFY6qi9s6TKORSsc,10595
|
|
39
|
-
hindsight_api/engine/task_backend.py,sha256=POs2wcBYJErFIKj3sWMGqs7bPwaiPKpE7q_86ttxhQA,12139
|
|
40
|
-
hindsight_api/engine/utils.py,sha256=IPOzdMh6GbLYmggxa4UAVupY4wh1BzB80pwuxE8KaQU,6994
|
|
41
|
-
hindsight_api/engine/retain/__init__.py,sha256=t6q3-_kf4iYTl9j2PVB6laqMSs6UuPeXBSYMW6HT1sA,1152
|
|
42
|
-
hindsight_api/engine/retain/bank_utils.py,sha256=JjrTE-bixHZKaUyl4uPQ6FV9O7hMOOEijXUnqXhOB5g,14097
|
|
43
|
-
hindsight_api/engine/retain/chunk_storage.py,sha256=zXAqbcFeYpjyWlOoi8zeK5G91zHpF75CUVF-6wsEJpU,2064
|
|
44
|
-
hindsight_api/engine/retain/deduplication.py,sha256=kqs7I7eIc_ppvgAF9GlzL6fSGuEEzrgw17-7NdyUDis,3099
|
|
45
|
-
hindsight_api/engine/retain/embedding_processing.py,sha256=R35oyKYIKjuqC-yZl5Ru56F8xRe0N6KW_9p5PZ9CBi0,1649
|
|
46
|
-
hindsight_api/engine/retain/embedding_utils.py,sha256=uulXIBiA7XNsj16K1VGawR3s5jV-hsAmvmoCi-IodpU,1565
|
|
47
|
-
hindsight_api/engine/retain/entity_processing.py,sha256=0x5b48Im7pWjeqg3xTMIRVhrzd4otc4rSkFBjxgOL9Y,3632
|
|
48
|
-
hindsight_api/engine/retain/fact_extraction.py,sha256=5YP-46fLfxYrJWMSoIMmvg9RXonWALNY1B1RpAFNnnM,61808
|
|
49
|
-
hindsight_api/engine/retain/fact_storage.py,sha256=yFOhC97wIAUcB5cU6KgTISqZ0c_kDAXcPzc6-BkPWtQ,6849
|
|
50
|
-
hindsight_api/engine/retain/link_creation.py,sha256=KP2kGU2VCymJptgw0hjaSdsjvncBgNp3P_A4OB_qx-w,3082
|
|
51
|
-
hindsight_api/engine/retain/link_utils.py,sha256=-QaFF5R03vlE8n2M2074k_sl_4L82J_K5lxjZu_zIes,33376
|
|
52
|
-
hindsight_api/engine/retain/observation_regeneration.py,sha256=GByj4cQ-kp5iM_juryWjKYRYN2H63ttsmcpoUJzIIaI,8259
|
|
53
|
-
hindsight_api/engine/retain/orchestrator.py,sha256=8DDv46x20GE3UcGjI9ZNcCgslpdXiOJ1aS1r4dy916Y,22679
|
|
54
|
-
hindsight_api/engine/retain/types.py,sha256=TZ4Zcpv7DP9Wn6-NnOpColPtnqO8rtHPTqYFnGoVrfo,7770
|
|
55
|
-
hindsight_api/engine/search/__init__.py,sha256=YPz_4g7IOabx078Xwg3RBfbOpJ649NRwNfe0gTI9P1U,802
|
|
56
|
-
hindsight_api/engine/search/fusion.py,sha256=cY81BH9U5RyWrPXbQnrDBghtelDMckZWCke9aqMyNnQ,4220
|
|
57
|
-
hindsight_api/engine/search/graph_retrieval.py,sha256=7VVv2LsGwMH9hMrPtoxDi0_qwAosXCBC3VZ2TJZJ7Ak,10192
|
|
58
|
-
hindsight_api/engine/search/link_expansion_retrieval.py,sha256=eYwnKXpw2Jx06DSqdLbHTwUVPLszEIuO7_09SEz_X8E,9438
|
|
59
|
-
hindsight_api/engine/search/mpfp_retrieval.py,sha256=1OlARy2F_QbkjEZ7Q5wZ34qOr6uyrMNr2KcIWcF9KaE,24475
|
|
60
|
-
hindsight_api/engine/search/observation_utils.py,sha256=rlvGA4oFomMZNCZiJvPIQ0iwGaq9XqhRM530unqziCE,4243
|
|
61
|
-
hindsight_api/engine/search/reranking.py,sha256=hNwte352lTB8A7wlez8-05cdL2_Ghy2kbTs93sGyug4,3929
|
|
62
|
-
hindsight_api/engine/search/retrieval.py,sha256=ydyPHczDIU0NRi00qjROZyf7BBRNU9kWPmtfezr1pfw,51774
|
|
63
|
-
hindsight_api/engine/search/scoring.py,sha256=7jbBtdnow7JU0d8xdW-ZqYvP4s-TYX2tqPhu2DiqHUI,5132
|
|
64
|
-
hindsight_api/engine/search/tags.py,sha256=3oxpm3VonwvowyOXn1FPVby50PakVfxvTT1FuEI_iDo,5843
|
|
65
|
-
hindsight_api/engine/search/temporal_extraction.py,sha256=j7hPqpx2jMdR2BqgFrL-rrV2Hzq8HV24MtjYLJqVl2U,1732
|
|
66
|
-
hindsight_api/engine/search/think_utils.py,sha256=ASbkbMxjIVgN5c3VurapHfiOjG2CaRZGDJvte0KylFA,14056
|
|
67
|
-
hindsight_api/engine/search/trace.py,sha256=cRms7r9u6yuUBAy7d421rvkvl_WuawA0JwgBdfKeuhk,11837
|
|
68
|
-
hindsight_api/engine/search/tracer.py,sha256=Oh8xLOIm5xXEmI0misleuNWknPk9ZYSajmwtjE_E_F0,16332
|
|
69
|
-
hindsight_api/engine/search/types.py,sha256=q03ckzA3LC19TnNeM16dhNu-Ym5dK0TzY-8P5ydQ51A,6554
|
|
70
|
-
hindsight_api/extensions/__init__.py,sha256=gt8RxBwz6JOjbwbPPJ1LGE7ugk1nYkEAlD-LN1ap7FE,1926
|
|
71
|
-
hindsight_api/extensions/base.py,sha256=M7zXuM-tbqDnUwXX1mxAxiFs1eXOzNqIJutKLiUE4mU,2357
|
|
72
|
-
hindsight_api/extensions/context.py,sha256=Qq-uy3hhxO6ioDmf6dPXdnIjs_pdm7lTspDiEhJJmPU,4469
|
|
73
|
-
hindsight_api/extensions/http.py,sha256=c-a1g6R6rzibyReyR-WHz8DjRRGr4rVSyV9KB4UxVVU,2907
|
|
74
|
-
hindsight_api/extensions/loader.py,sha256=UwGM0XH7zHGng_xfHUY0VbOQemj9DmjuDaMst1TrFi8,4170
|
|
75
|
-
hindsight_api/extensions/operation_validator.py,sha256=340M0NqA7juSZimOicIhkZ2j0lc9L4M3Uzr94iGnLKA,10720
|
|
76
|
-
hindsight_api/extensions/tenant.py,sha256=gvngBMn3cJtUfd4P0P_288faNJq00T8zPQkeldEsD3g,1903
|
|
77
|
-
hindsight_api/extensions/builtin/__init__.py,sha256=hLx2oFYZ1JtZhTWfab6AYcR02SWP2gIdbEqnZezT8ek,526
|
|
78
|
-
hindsight_api/extensions/builtin/tenant.py,sha256=lsS0GDEUXmfPBzqhqk2FpN4Z_k5cA3Y3PFNYyiiuZjU,1444
|
|
79
|
-
hindsight_api-0.3.0.dist-info/METADATA,sha256=_5AcFNHsdqfULphbm9ExiOScBGU1_rTWRMDSQZT9WvY,5584
|
|
80
|
-
hindsight_api-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
81
|
-
hindsight_api-0.3.0.dist-info/entry_points.txt,sha256=PD6Uc6yxrI2_XYZNKBqBQY6ZlxNUb0xjna6kVnVn1wA,156
|
|
82
|
-
hindsight_api-0.3.0.dist-info/RECORD,,
|
|
File without changes
|