hindsight-api 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +1 -1
- hindsight_api/api/http.py +7 -19
- hindsight_api/api/mcp.py +45 -5
- hindsight_api/config.py +115 -11
- hindsight_api/daemon.py +4 -1
- hindsight_api/engine/consolidation/consolidator.py +39 -3
- hindsight_api/engine/cross_encoder.py +7 -99
- hindsight_api/engine/embeddings.py +3 -93
- hindsight_api/engine/interface.py +0 -43
- hindsight_api/engine/llm_wrapper.py +93 -22
- hindsight_api/engine/memory_engine.py +37 -138
- hindsight_api/engine/response_models.py +1 -21
- hindsight_api/engine/retain/fact_extraction.py +19 -23
- hindsight_api/engine/retain/orchestrator.py +1 -4
- hindsight_api/engine/utils.py +0 -3
- hindsight_api/main.py +27 -12
- hindsight_api/mcp_tools.py +31 -12
- hindsight_api/metrics.py +3 -3
- hindsight_api/pg0.py +1 -1
- hindsight_api/worker/main.py +11 -11
- hindsight_api/worker/poller.py +226 -97
- {hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/METADATA +2 -1
- {hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/RECORD +25 -25
- {hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/WHEEL +0 -0
- {hindsight_api-0.4.1.dist-info → hindsight_api-0.4.3.dist-info}/entry_points.txt +0 -0
|
@@ -504,12 +504,11 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
504
504
|
if request_context is None:
|
|
505
505
|
raise AuthenticationError("RequestContext is required when tenant extension is configured")
|
|
506
506
|
|
|
507
|
-
# For internal/background operations (e.g., worker tasks), skip extension authentication
|
|
508
|
-
#
|
|
507
|
+
# For internal/background operations (e.g., worker tasks), skip extension authentication.
|
|
508
|
+
# The task was already authenticated at submission time, and execute_task sets _current_schema
|
|
509
|
+
# from the task's _schema field. For public schema tasks, _current_schema keeps its default "public".
|
|
509
510
|
if request_context.internal:
|
|
510
|
-
|
|
511
|
-
if current and current != "public":
|
|
512
|
-
return current
|
|
511
|
+
return _current_schema.get()
|
|
513
512
|
|
|
514
513
|
# Let AuthenticationError propagate - HTTP layer will convert to 401
|
|
515
514
|
tenant_context = await self._tenant_extension.authenticate(request_context)
|
|
@@ -789,7 +788,7 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
789
788
|
kwargs = {"name": self._pg0_instance_name}
|
|
790
789
|
if self._pg0_port is not None:
|
|
791
790
|
kwargs["port"] = self._pg0_port
|
|
792
|
-
pg0 = EmbeddedPostgres(**kwargs)
|
|
791
|
+
pg0 = EmbeddedPostgres(**kwargs)
|
|
793
792
|
# Check if pg0 is already running before we start it
|
|
794
793
|
was_already_running = await pg0.is_running()
|
|
795
794
|
self.db_url = await pg0.ensure_running()
|
|
@@ -889,6 +888,23 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
889
888
|
# Use configured database schema for migrations (defaults to "public")
|
|
890
889
|
run_migrations(self.db_url, schema=get_config().database_schema)
|
|
891
890
|
|
|
891
|
+
# Migrate all existing tenant schemas (if multi-tenant)
|
|
892
|
+
if self._tenant_extension is not None:
|
|
893
|
+
try:
|
|
894
|
+
tenants = await self._tenant_extension.list_tenants()
|
|
895
|
+
if tenants:
|
|
896
|
+
logger.info(f"Running migrations on {len(tenants)} tenant schemas...")
|
|
897
|
+
for tenant in tenants:
|
|
898
|
+
schema = tenant.schema
|
|
899
|
+
if schema and schema != "public":
|
|
900
|
+
try:
|
|
901
|
+
run_migrations(self.db_url, schema=schema)
|
|
902
|
+
except Exception as e:
|
|
903
|
+
logger.warning(f"Failed to migrate tenant schema {schema}: {e}")
|
|
904
|
+
logger.info("Tenant schema migrations completed")
|
|
905
|
+
except Exception as e:
|
|
906
|
+
logger.warning(f"Failed to run tenant schema migrations: {e}")
|
|
907
|
+
|
|
892
908
|
# Ensure embedding column dimension matches the model's dimension
|
|
893
909
|
# This is done after migrations and after embeddings.initialize()
|
|
894
910
|
ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
|
|
@@ -1175,15 +1191,15 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
1175
1191
|
context: Context about when/why this memory was formed
|
|
1176
1192
|
event_date: When the event occurred (defaults to now)
|
|
1177
1193
|
document_id: Optional document ID for tracking (always upserts if document already exists)
|
|
1178
|
-
fact_type_override: Override fact type ('world', 'experience'
|
|
1179
|
-
confidence_score: Confidence score
|
|
1194
|
+
fact_type_override: Override fact type ('world', 'experience')
|
|
1195
|
+
confidence_score: Confidence score (0.0 to 1.0)
|
|
1180
1196
|
request_context: Request context for authentication.
|
|
1181
1197
|
|
|
1182
1198
|
Returns:
|
|
1183
1199
|
List of created unit IDs
|
|
1184
1200
|
"""
|
|
1185
1201
|
# Build content dict
|
|
1186
|
-
content_dict: RetainContentDict = {"content": content, "context": context}
|
|
1202
|
+
content_dict: RetainContentDict = {"content": content, "context": context}
|
|
1187
1203
|
if event_date:
|
|
1188
1204
|
content_dict["event_date"] = event_date
|
|
1189
1205
|
if document_id:
|
|
@@ -1231,8 +1247,8 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
1231
1247
|
- "document_id" (optional): Document ID for this specific content item
|
|
1232
1248
|
document_id: **DEPRECATED** - Use "document_id" key in each content dict instead.
|
|
1233
1249
|
Applies the same document_id to ALL content items that don't specify their own.
|
|
1234
|
-
fact_type_override: Override fact type for all facts ('world', 'experience'
|
|
1235
|
-
confidence_score: Confidence score
|
|
1250
|
+
fact_type_override: Override fact type for all facts ('world', 'experience')
|
|
1251
|
+
confidence_score: Confidence score (0.0 to 1.0)
|
|
1236
1252
|
return_usage: If True, returns tuple of (unit_ids, TokenUsage). Default False for backward compatibility.
|
|
1237
1253
|
|
|
1238
1254
|
Returns:
|
|
@@ -1554,16 +1570,16 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
1554
1570
|
if fact_type is None:
|
|
1555
1571
|
fact_type = list(VALID_RECALL_FACT_TYPES)
|
|
1556
1572
|
|
|
1557
|
-
#
|
|
1573
|
+
# Filter out 'opinion' early (deprecated, silently ignore)
|
|
1574
|
+
fact_type = [ft for ft in fact_type if ft != "opinion"]
|
|
1575
|
+
|
|
1576
|
+
# Validate fact types
|
|
1558
1577
|
invalid_types = set(fact_type) - VALID_RECALL_FACT_TYPES
|
|
1559
1578
|
if invalid_types:
|
|
1560
1579
|
raise ValueError(
|
|
1561
1580
|
f"Invalid fact type(s): {', '.join(sorted(invalid_types))}. "
|
|
1562
1581
|
f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
|
|
1563
1582
|
)
|
|
1564
|
-
|
|
1565
|
-
# Filter out 'opinion' - opinions are no longer returned from recall
|
|
1566
|
-
fact_type = [ft for ft in fact_type if ft != "opinion"]
|
|
1567
1583
|
if not fact_type:
|
|
1568
1584
|
# All requested types were opinions - return empty result
|
|
1569
1585
|
return RecallResultModel(results=[], entities={}, chunks={})
|
|
@@ -2219,44 +2235,15 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
2219
2235
|
)
|
|
2220
2236
|
top_results_dicts.append(result_dict)
|
|
2221
2237
|
|
|
2222
|
-
# Get entities for each fact if include_entities is requested
|
|
2223
|
-
fact_entity_map = {} # unit_id -> list of (entity_id, entity_name)
|
|
2224
|
-
if include_entities and top_scored:
|
|
2225
|
-
unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
|
|
2226
|
-
if unit_ids:
|
|
2227
|
-
async with acquire_with_retry(pool) as entity_conn:
|
|
2228
|
-
entity_rows = await entity_conn.fetch(
|
|
2229
|
-
f"""
|
|
2230
|
-
SELECT ue.unit_id, e.id as entity_id, e.canonical_name
|
|
2231
|
-
FROM {fq_table("unit_entities")} ue
|
|
2232
|
-
JOIN {fq_table("entities")} e ON ue.entity_id = e.id
|
|
2233
|
-
WHERE ue.unit_id = ANY($1::uuid[])
|
|
2234
|
-
""",
|
|
2235
|
-
unit_ids,
|
|
2236
|
-
)
|
|
2237
|
-
for row in entity_rows:
|
|
2238
|
-
unit_id = str(row["unit_id"])
|
|
2239
|
-
if unit_id not in fact_entity_map:
|
|
2240
|
-
fact_entity_map[unit_id] = []
|
|
2241
|
-
fact_entity_map[unit_id].append(
|
|
2242
|
-
{"entity_id": str(row["entity_id"]), "canonical_name": row["canonical_name"]}
|
|
2243
|
-
)
|
|
2244
|
-
|
|
2245
2238
|
# Convert results to MemoryFact objects
|
|
2246
2239
|
memory_facts = []
|
|
2247
2240
|
for result_dict in top_results_dicts:
|
|
2248
|
-
result_id = str(result_dict.get("id"))
|
|
2249
|
-
# Get entity names for this fact
|
|
2250
|
-
entity_names = None
|
|
2251
|
-
if include_entities and result_id in fact_entity_map:
|
|
2252
|
-
entity_names = [e["canonical_name"] for e in fact_entity_map[result_id]]
|
|
2253
|
-
|
|
2254
2241
|
memory_facts.append(
|
|
2255
2242
|
MemoryFact(
|
|
2256
|
-
id=
|
|
2243
|
+
id=str(result_dict.get("id")),
|
|
2257
2244
|
text=result_dict.get("text"),
|
|
2258
2245
|
fact_type=result_dict.get("fact_type", "world"),
|
|
2259
|
-
entities=
|
|
2246
|
+
entities=None, # Entity observations removed
|
|
2260
2247
|
context=result_dict.get("context"),
|
|
2261
2248
|
occurred_start=result_dict.get("occurred_start"),
|
|
2262
2249
|
occurred_end=result_dict.get("occurred_end"),
|
|
@@ -2267,38 +2254,12 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
2267
2254
|
)
|
|
2268
2255
|
)
|
|
2269
2256
|
|
|
2270
|
-
#
|
|
2257
|
+
# Entity observations removed - always set to None
|
|
2271
2258
|
entities_dict = None
|
|
2272
|
-
total_entity_tokens = 0
|
|
2273
|
-
total_chunk_tokens = 0
|
|
2274
|
-
if include_entities and fact_entity_map:
|
|
2275
|
-
# Collect unique entities in order of fact relevance (preserving order from top_scored)
|
|
2276
|
-
# Use a list to maintain order, but track seen entities to avoid duplicates
|
|
2277
|
-
entities_ordered = [] # list of (entity_id, entity_name) tuples
|
|
2278
|
-
seen_entity_ids = set()
|
|
2279
|
-
|
|
2280
|
-
# Iterate through facts in relevance order
|
|
2281
|
-
for sr in top_scored:
|
|
2282
|
-
unit_id = sr.id
|
|
2283
|
-
if unit_id in fact_entity_map:
|
|
2284
|
-
for entity in fact_entity_map[unit_id]:
|
|
2285
|
-
entity_id = entity["entity_id"]
|
|
2286
|
-
entity_name = entity["canonical_name"]
|
|
2287
|
-
if entity_id not in seen_entity_ids:
|
|
2288
|
-
entities_ordered.append((entity_id, entity_name))
|
|
2289
|
-
seen_entity_ids.add(entity_id)
|
|
2290
|
-
|
|
2291
|
-
# Return entities with empty observations (summaries now live in mental models)
|
|
2292
|
-
entities_dict = {}
|
|
2293
|
-
for entity_id, entity_name in entities_ordered:
|
|
2294
|
-
entities_dict[entity_name] = EntityState(
|
|
2295
|
-
entity_id=entity_id,
|
|
2296
|
-
canonical_name=entity_name,
|
|
2297
|
-
observations=[], # Mental models provide this now
|
|
2298
|
-
)
|
|
2299
2259
|
|
|
2300
2260
|
# Fetch chunks if requested
|
|
2301
2261
|
chunks_dict = None
|
|
2262
|
+
total_chunk_tokens = 0
|
|
2302
2263
|
if include_chunks and top_scored:
|
|
2303
2264
|
from .response_models import ChunkInfo
|
|
2304
2265
|
|
|
@@ -2367,7 +2328,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
2367
2328
|
# Log final recall stats
|
|
2368
2329
|
total_time = time.time() - recall_start
|
|
2369
2330
|
num_chunks = len(chunks_dict) if chunks_dict else 0
|
|
2370
|
-
num_entities = len(entities_dict) if entities_dict else 0
|
|
2371
2331
|
# Include wait times in log if significant
|
|
2372
2332
|
wait_parts = []
|
|
2373
2333
|
if semaphore_wait > 0.01:
|
|
@@ -2376,7 +2336,7 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
2376
2336
|
wait_parts.append(f"conn={max_conn_wait:.3f}s")
|
|
2377
2337
|
wait_info = f" | waits: {', '.join(wait_parts)}" if wait_parts else ""
|
|
2378
2338
|
log_buffer.append(
|
|
2379
|
-
f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok)
|
|
2339
|
+
f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
|
|
2380
2340
|
)
|
|
2381
2341
|
if not quiet:
|
|
2382
2342
|
logger.info("\n" + "\n".join(log_buffer))
|
|
@@ -3550,7 +3510,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
3550
3510
|
ReflectResult containing:
|
|
3551
3511
|
- text: Plain text answer
|
|
3552
3512
|
- based_on: Empty dict (agent retrieves facts dynamically)
|
|
3553
|
-
- new_opinions: Empty list
|
|
3554
3513
|
- structured_output: None (not yet supported for agentic reflect)
|
|
3555
3514
|
"""
|
|
3556
3515
|
# Use cached LLM config
|
|
@@ -3875,7 +3834,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
3875
3834
|
result = ReflectResult(
|
|
3876
3835
|
text=agent_result.text,
|
|
3877
3836
|
based_on=based_on,
|
|
3878
|
-
new_opinions=[], # Learnings stored as mental models
|
|
3879
3837
|
structured_output=agent_result.structured_output,
|
|
3880
3838
|
usage=usage,
|
|
3881
3839
|
tool_trace=tool_trace_result,
|
|
@@ -3904,32 +3862,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
3904
3862
|
|
|
3905
3863
|
return result
|
|
3906
3864
|
|
|
3907
|
-
async def get_entity_observations(
|
|
3908
|
-
self,
|
|
3909
|
-
bank_id: str,
|
|
3910
|
-
entity_id: str,
|
|
3911
|
-
*,
|
|
3912
|
-
limit: int = 10,
|
|
3913
|
-
request_context: "RequestContext",
|
|
3914
|
-
) -> list[Any]:
|
|
3915
|
-
"""
|
|
3916
|
-
Get observations for an entity.
|
|
3917
|
-
|
|
3918
|
-
NOTE: Entity observations/summaries have been moved to mental models.
|
|
3919
|
-
This method returns an empty list. Use mental models for entity summaries.
|
|
3920
|
-
|
|
3921
|
-
Args:
|
|
3922
|
-
bank_id: bank IDentifier
|
|
3923
|
-
entity_id: Entity UUID to get observations for
|
|
3924
|
-
limit: Ignored (kept for backwards compatibility)
|
|
3925
|
-
request_context: Request context for authentication.
|
|
3926
|
-
|
|
3927
|
-
Returns:
|
|
3928
|
-
Empty list (observations now in mental models)
|
|
3929
|
-
"""
|
|
3930
|
-
await self._authenticate_tenant(request_context)
|
|
3931
|
-
return []
|
|
3932
|
-
|
|
3933
3865
|
async def list_entities(
|
|
3934
3866
|
self,
|
|
3935
3867
|
bank_id: str,
|
|
@@ -4116,36 +4048,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
4116
4048
|
await self._authenticate_tenant(request_context)
|
|
4117
4049
|
return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
|
|
4118
4050
|
|
|
4119
|
-
async def regenerate_entity_observations(
|
|
4120
|
-
self,
|
|
4121
|
-
bank_id: str,
|
|
4122
|
-
entity_id: str,
|
|
4123
|
-
entity_name: str,
|
|
4124
|
-
*,
|
|
4125
|
-
version: str | None = None,
|
|
4126
|
-
conn=None,
|
|
4127
|
-
request_context: "RequestContext",
|
|
4128
|
-
) -> list[str]:
|
|
4129
|
-
"""
|
|
4130
|
-
Regenerate observations for an entity.
|
|
4131
|
-
|
|
4132
|
-
NOTE: Entity observations/summaries have been moved to mental models.
|
|
4133
|
-
This method is now a no-op and returns an empty list.
|
|
4134
|
-
|
|
4135
|
-
Args:
|
|
4136
|
-
bank_id: bank IDentifier
|
|
4137
|
-
entity_id: Entity UUID
|
|
4138
|
-
entity_name: Canonical name of the entity
|
|
4139
|
-
version: Entity's last_seen timestamp when task was created (for deduplication)
|
|
4140
|
-
conn: Optional database connection (ignored)
|
|
4141
|
-
request_context: Request context for authentication.
|
|
4142
|
-
|
|
4143
|
-
Returns:
|
|
4144
|
-
Empty list (observations now in mental models)
|
|
4145
|
-
"""
|
|
4146
|
-
await self._authenticate_tenant(request_context)
|
|
4147
|
-
return []
|
|
4148
|
-
|
|
4149
4051
|
# =========================================================================
|
|
4150
4052
|
# Statistics & Operations (for HTTP API layer)
|
|
4151
4053
|
# =========================================================================
|
|
@@ -4256,9 +4158,6 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
4256
4158
|
if not entity_row:
|
|
4257
4159
|
return None
|
|
4258
4160
|
|
|
4259
|
-
# Get observations for the entity
|
|
4260
|
-
observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
|
|
4261
|
-
|
|
4262
4161
|
return {
|
|
4263
4162
|
"id": str(entity_row["id"]),
|
|
4264
4163
|
"canonical_name": entity_row["canonical_name"],
|
|
@@ -4266,7 +4165,7 @@ class MemoryEngine(MemoryEngineInterface):
|
|
|
4266
4165
|
"first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
|
|
4267
4166
|
"last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
|
|
4268
4167
|
"metadata": entity_row["metadata"] or {},
|
|
4269
|
-
"observations":
|
|
4168
|
+
"observations": [],
|
|
4270
4169
|
}
|
|
4271
4170
|
|
|
4272
4171
|
def _parse_observations(self, observations_raw: list):
|
|
@@ -263,7 +263,6 @@ class ReflectResult(BaseModel):
|
|
|
263
263
|
}
|
|
264
264
|
],
|
|
265
265
|
},
|
|
266
|
-
"new_opinions": ["Machine learning has great potential in healthcare"],
|
|
267
266
|
"structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
|
|
268
267
|
"usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
|
|
269
268
|
}
|
|
@@ -272,9 +271,8 @@ class ReflectResult(BaseModel):
|
|
|
272
271
|
|
|
273
272
|
text: str = Field(description="The formulated answer text")
|
|
274
273
|
based_on: dict[str, Any] = Field(
|
|
275
|
-
description="Facts used to formulate the answer, organized by type (world, experience,
|
|
274
|
+
description="Facts used to formulate the answer, organized by type (world, experience, mental_models, directives)"
|
|
276
275
|
)
|
|
277
|
-
new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
|
|
278
276
|
structured_output: dict[str, Any] | None = Field(
|
|
279
277
|
default=None,
|
|
280
278
|
description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
|
|
@@ -297,24 +295,6 @@ class ReflectResult(BaseModel):
|
|
|
297
295
|
)
|
|
298
296
|
|
|
299
297
|
|
|
300
|
-
class Opinion(BaseModel):
|
|
301
|
-
"""
|
|
302
|
-
An opinion with confidence score.
|
|
303
|
-
|
|
304
|
-
Opinions represent the bank's formed perspectives on topics,
|
|
305
|
-
with a confidence level indicating strength of belief.
|
|
306
|
-
"""
|
|
307
|
-
|
|
308
|
-
model_config = ConfigDict(
|
|
309
|
-
json_schema_extra={
|
|
310
|
-
"example": {"text": "Machine learning has great potential in healthcare", "confidence": 0.85}
|
|
311
|
-
}
|
|
312
|
-
)
|
|
313
|
-
|
|
314
|
-
text: str = Field(description="The opinion text")
|
|
315
|
-
confidence: float = Field(description="Confidence score between 0.0 and 1.0")
|
|
316
|
-
|
|
317
|
-
|
|
318
298
|
class EntityObservation(BaseModel):
|
|
319
299
|
"""
|
|
320
300
|
An observation about an entity.
|
|
@@ -693,7 +693,6 @@ async def _extract_facts_from_chunk(
|
|
|
693
693
|
context: str,
|
|
694
694
|
llm_config: "LLMConfig",
|
|
695
695
|
agent_name: str = None,
|
|
696
|
-
extract_opinions: bool = False,
|
|
697
696
|
) -> tuple[list[dict[str, str]], TokenUsage]:
|
|
698
697
|
"""
|
|
699
698
|
Extract facts from a single chunk (internal helper for parallel processing).
|
|
@@ -707,17 +706,9 @@ async def _extract_facts_from_chunk(
|
|
|
707
706
|
|
|
708
707
|
logger = logging.getLogger(__name__)
|
|
709
708
|
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
# Determine which fact types to extract based on the flag
|
|
709
|
+
# Determine which fact types to extract
|
|
713
710
|
# Note: We use "assistant" in the prompt but convert to "bank" for storage
|
|
714
|
-
|
|
715
|
-
# Opinion extraction uses a separate prompt (not this one)
|
|
716
|
-
fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
|
|
717
|
-
else:
|
|
718
|
-
fact_types_instruction = (
|
|
719
|
-
"Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
|
|
720
|
-
)
|
|
711
|
+
fact_types_instruction = "Extract ONLY 'world' and 'assistant' type facts."
|
|
721
712
|
|
|
722
713
|
# Check config for extraction mode and causal link extraction
|
|
723
714
|
config = get_config()
|
|
@@ -770,7 +761,6 @@ async def _extract_facts_from_chunk(
|
|
|
770
761
|
# Format event_date with day of week for better temporal reasoning
|
|
771
762
|
event_date_formatted = event_date.strftime("%A, %B %d, %Y") # e.g., "Monday, June 10, 2024"
|
|
772
763
|
user_message = f"""Extract facts from the following text chunk.
|
|
773
|
-
{memory_bank_context}
|
|
774
764
|
|
|
775
765
|
Chunk: {chunk_index + 1}/{total_chunks}
|
|
776
766
|
Event Date: {event_date_formatted} ({event_date.isoformat()})
|
|
@@ -782,12 +772,28 @@ Text:
|
|
|
782
772
|
usage = TokenUsage() # Track cumulative usage across retries
|
|
783
773
|
for attempt in range(max_retries):
|
|
784
774
|
try:
|
|
775
|
+
# Use retain-specific overrides if set, otherwise fall back to global LLM config
|
|
776
|
+
max_retries = (
|
|
777
|
+
config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
|
|
778
|
+
)
|
|
779
|
+
initial_backoff = (
|
|
780
|
+
config.retain_llm_initial_backoff
|
|
781
|
+
if config.retain_llm_initial_backoff is not None
|
|
782
|
+
else config.llm_initial_backoff
|
|
783
|
+
)
|
|
784
|
+
max_backoff = (
|
|
785
|
+
config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
|
|
786
|
+
)
|
|
787
|
+
|
|
785
788
|
extraction_response_json, call_usage = await llm_config.call(
|
|
786
789
|
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
|
|
787
790
|
response_format=response_schema,
|
|
788
791
|
scope="memory_extract_facts",
|
|
789
792
|
temperature=0.1,
|
|
790
793
|
max_completion_tokens=config.retain_max_completion_tokens,
|
|
794
|
+
max_retries=max_retries,
|
|
795
|
+
initial_backoff=initial_backoff,
|
|
796
|
+
max_backoff=max_backoff,
|
|
791
797
|
skip_validation=True, # Get raw JSON, we'll validate leniently
|
|
792
798
|
return_usage=True,
|
|
793
799
|
)
|
|
@@ -1013,7 +1019,6 @@ async def _extract_facts_with_auto_split(
|
|
|
1013
1019
|
context: str,
|
|
1014
1020
|
llm_config: LLMConfig,
|
|
1015
1021
|
agent_name: str = None,
|
|
1016
|
-
extract_opinions: bool = False,
|
|
1017
1022
|
) -> tuple[list[dict[str, str]], TokenUsage]:
|
|
1018
1023
|
"""
|
|
1019
1024
|
Extract facts from a chunk with automatic splitting if output exceeds token limits.
|
|
@@ -1029,7 +1034,6 @@ async def _extract_facts_with_auto_split(
|
|
|
1029
1034
|
context: Context about the conversation/document
|
|
1030
1035
|
llm_config: LLM configuration to use
|
|
1031
1036
|
agent_name: Optional agent name (memory owner)
|
|
1032
|
-
extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
|
|
1033
1037
|
|
|
1034
1038
|
Returns:
|
|
1035
1039
|
Tuple of (facts list, token usage) extracted from the chunk (possibly from sub-chunks)
|
|
@@ -1048,7 +1052,6 @@ async def _extract_facts_with_auto_split(
|
|
|
1048
1052
|
context=context,
|
|
1049
1053
|
llm_config=llm_config,
|
|
1050
1054
|
agent_name=agent_name,
|
|
1051
|
-
extract_opinions=extract_opinions,
|
|
1052
1055
|
)
|
|
1053
1056
|
except OutputTooLongError:
|
|
1054
1057
|
# Output exceeded token limits - split the chunk in half and retry
|
|
@@ -1093,7 +1096,6 @@ async def _extract_facts_with_auto_split(
|
|
|
1093
1096
|
context=context,
|
|
1094
1097
|
llm_config=llm_config,
|
|
1095
1098
|
agent_name=agent_name,
|
|
1096
|
-
extract_opinions=extract_opinions,
|
|
1097
1099
|
),
|
|
1098
1100
|
_extract_facts_with_auto_split(
|
|
1099
1101
|
chunk=second_half,
|
|
@@ -1103,7 +1105,6 @@ async def _extract_facts_with_auto_split(
|
|
|
1103
1105
|
context=context,
|
|
1104
1106
|
llm_config=llm_config,
|
|
1105
1107
|
agent_name=agent_name,
|
|
1106
|
-
extract_opinions=extract_opinions,
|
|
1107
1108
|
),
|
|
1108
1109
|
]
|
|
1109
1110
|
|
|
@@ -1127,7 +1128,6 @@ async def extract_facts_from_text(
|
|
|
1127
1128
|
llm_config: LLMConfig,
|
|
1128
1129
|
agent_name: str,
|
|
1129
1130
|
context: str = "",
|
|
1130
|
-
extract_opinions: bool = False,
|
|
1131
1131
|
) -> tuple[list[Fact], list[tuple[str, int]], TokenUsage]:
|
|
1132
1132
|
"""
|
|
1133
1133
|
Extract semantic facts from conversational or narrative text using LLM.
|
|
@@ -1144,7 +1144,6 @@ async def extract_facts_from_text(
|
|
|
1144
1144
|
context: Context about the conversation/document
|
|
1145
1145
|
llm_config: LLM configuration to use
|
|
1146
1146
|
agent_name: Agent name (memory owner)
|
|
1147
|
-
extract_opinions: If True, extract ONLY opinions. If False, extract world and bank facts (no opinions)
|
|
1148
1147
|
|
|
1149
1148
|
Returns:
|
|
1150
1149
|
Tuple of (facts, chunks, usage) where:
|
|
@@ -1172,7 +1171,6 @@ async def extract_facts_from_text(
|
|
|
1172
1171
|
context=context,
|
|
1173
1172
|
llm_config=llm_config,
|
|
1174
1173
|
agent_name=agent_name,
|
|
1175
|
-
extract_opinions=extract_opinions,
|
|
1176
1174
|
)
|
|
1177
1175
|
for i, chunk in enumerate(chunks)
|
|
1178
1176
|
]
|
|
@@ -1204,7 +1202,7 @@ SECONDS_PER_FACT = 10
|
|
|
1204
1202
|
|
|
1205
1203
|
|
|
1206
1204
|
async def extract_facts_from_contents(
|
|
1207
|
-
contents: list[RetainContent], llm_config, agent_name: str
|
|
1205
|
+
contents: list[RetainContent], llm_config, agent_name: str
|
|
1208
1206
|
) -> tuple[list[ExtractedFactType], list[ChunkMetadata], TokenUsage]:
|
|
1209
1207
|
"""
|
|
1210
1208
|
Extract facts from multiple content items in parallel.
|
|
@@ -1219,7 +1217,6 @@ async def extract_facts_from_contents(
|
|
|
1219
1217
|
contents: List of RetainContent objects to process
|
|
1220
1218
|
llm_config: LLM configuration for fact extraction
|
|
1221
1219
|
agent_name: Name of the agent (for agent-related fact detection)
|
|
1222
|
-
extract_opinions: If True, extract only opinions; otherwise world/bank facts
|
|
1223
1220
|
|
|
1224
1221
|
Returns:
|
|
1225
1222
|
Tuple of (extracted_facts, chunks_metadata, usage)
|
|
@@ -1238,7 +1235,6 @@ async def extract_facts_from_contents(
|
|
|
1238
1235
|
context=item.context,
|
|
1239
1236
|
llm_config=llm_config,
|
|
1240
1237
|
agent_name=agent_name,
|
|
1241
|
-
extract_opinions=extract_opinions,
|
|
1242
1238
|
)
|
|
1243
1239
|
fact_extraction_tasks.append(task)
|
|
1244
1240
|
|
|
@@ -101,11 +101,8 @@ async def retain_batch(
|
|
|
101
101
|
|
|
102
102
|
# Step 1: Extract facts from all contents
|
|
103
103
|
step_start = time.time()
|
|
104
|
-
extract_opinions = fact_type_override == "opinion"
|
|
105
104
|
|
|
106
|
-
extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(
|
|
107
|
-
contents, llm_config, agent_name, extract_opinions
|
|
108
|
-
)
|
|
105
|
+
extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(contents, llm_config, agent_name)
|
|
109
106
|
log_buffer.append(
|
|
110
107
|
f"[1] Extract facts: {len(extracted_facts)} facts, {len(chunks)} chunks from {len(contents)} contents in {time.time() - step_start:.3f}s"
|
|
111
108
|
)
|
hindsight_api/engine/utils.py
CHANGED
|
@@ -19,7 +19,6 @@ async def extract_facts(
|
|
|
19
19
|
context: str = "",
|
|
20
20
|
llm_config: "LLMConfig" = None,
|
|
21
21
|
agent_name: str = None,
|
|
22
|
-
extract_opinions: bool = False,
|
|
23
22
|
) -> tuple[list["Fact"], list[tuple[str, int]]]:
|
|
24
23
|
"""
|
|
25
24
|
Extract semantic facts from text using LLM.
|
|
@@ -36,7 +35,6 @@ async def extract_facts(
|
|
|
36
35
|
context: Context about the conversation/document
|
|
37
36
|
llm_config: LLM configuration to use
|
|
38
37
|
agent_name: Optional agent name to help identify agent-related facts
|
|
39
|
-
extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
|
|
40
38
|
|
|
41
39
|
Returns:
|
|
42
40
|
Tuple of (facts, chunks) where:
|
|
@@ -55,7 +53,6 @@ async def extract_facts(
|
|
|
55
53
|
context=context,
|
|
56
54
|
llm_config=llm_config,
|
|
57
55
|
agent_name=agent_name,
|
|
58
|
-
extract_opinions=extract_opinions,
|
|
59
56
|
)
|
|
60
57
|
|
|
61
58
|
if not facts:
|
hindsight_api/main.py
CHANGED
|
@@ -140,13 +140,6 @@ def main():
|
|
|
140
140
|
args.port = DEFAULT_DAEMON_PORT
|
|
141
141
|
args.host = "127.0.0.1" # Only bind to localhost for security
|
|
142
142
|
|
|
143
|
-
# Force CPU mode for daemon to avoid macOS MPS/XPC issues
|
|
144
|
-
# MPS (Metal Performance Shaders) has unstable XPC connections in background processes
|
|
145
|
-
# that can cause assertion failures and process crashes at the C++ level
|
|
146
|
-
# (which Python exception handlers cannot catch)
|
|
147
|
-
os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
|
|
148
|
-
os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
|
|
149
|
-
|
|
150
143
|
# Check if another daemon is already running
|
|
151
144
|
daemon_lock = DaemonLock()
|
|
152
145
|
if not daemon_lock.acquire():
|
|
@@ -183,19 +176,40 @@ def main():
|
|
|
183
176
|
llm_model=config.llm_model,
|
|
184
177
|
llm_base_url=config.llm_base_url,
|
|
185
178
|
llm_max_concurrent=config.llm_max_concurrent,
|
|
179
|
+
llm_max_retries=config.llm_max_retries,
|
|
180
|
+
llm_initial_backoff=config.llm_initial_backoff,
|
|
181
|
+
llm_max_backoff=config.llm_max_backoff,
|
|
186
182
|
llm_timeout=config.llm_timeout,
|
|
183
|
+
llm_vertexai_project_id=config.llm_vertexai_project_id,
|
|
184
|
+
llm_vertexai_region=config.llm_vertexai_region,
|
|
185
|
+
llm_vertexai_service_account_key=config.llm_vertexai_service_account_key,
|
|
187
186
|
retain_llm_provider=config.retain_llm_provider,
|
|
188
187
|
retain_llm_api_key=config.retain_llm_api_key,
|
|
189
188
|
retain_llm_model=config.retain_llm_model,
|
|
190
189
|
retain_llm_base_url=config.retain_llm_base_url,
|
|
190
|
+
retain_llm_max_concurrent=config.retain_llm_max_concurrent,
|
|
191
|
+
retain_llm_max_retries=config.retain_llm_max_retries,
|
|
192
|
+
retain_llm_initial_backoff=config.retain_llm_initial_backoff,
|
|
193
|
+
retain_llm_max_backoff=config.retain_llm_max_backoff,
|
|
194
|
+
retain_llm_timeout=config.retain_llm_timeout,
|
|
191
195
|
reflect_llm_provider=config.reflect_llm_provider,
|
|
192
196
|
reflect_llm_api_key=config.reflect_llm_api_key,
|
|
193
197
|
reflect_llm_model=config.reflect_llm_model,
|
|
194
198
|
reflect_llm_base_url=config.reflect_llm_base_url,
|
|
199
|
+
reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
|
|
200
|
+
reflect_llm_max_retries=config.reflect_llm_max_retries,
|
|
201
|
+
reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
|
|
202
|
+
reflect_llm_max_backoff=config.reflect_llm_max_backoff,
|
|
203
|
+
reflect_llm_timeout=config.reflect_llm_timeout,
|
|
195
204
|
consolidation_llm_provider=config.consolidation_llm_provider,
|
|
196
205
|
consolidation_llm_api_key=config.consolidation_llm_api_key,
|
|
197
206
|
consolidation_llm_model=config.consolidation_llm_model,
|
|
198
207
|
consolidation_llm_base_url=config.consolidation_llm_base_url,
|
|
208
|
+
consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
|
|
209
|
+
consolidation_llm_max_retries=config.consolidation_llm_max_retries,
|
|
210
|
+
consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
|
|
211
|
+
consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
|
|
212
|
+
consolidation_llm_timeout=config.consolidation_llm_timeout,
|
|
199
213
|
embeddings_provider=config.embeddings_provider,
|
|
200
214
|
embeddings_local_model=config.embeddings_local_model,
|
|
201
215
|
embeddings_local_force_cpu=config.embeddings_local_force_cpu,
|
|
@@ -225,7 +239,6 @@ def main():
|
|
|
225
239
|
retain_extract_causal_links=config.retain_extract_causal_links,
|
|
226
240
|
retain_extraction_mode=config.retain_extraction_mode,
|
|
227
241
|
retain_custom_instructions=config.retain_custom_instructions,
|
|
228
|
-
retain_observations_async=config.retain_observations_async,
|
|
229
242
|
enable_observations=config.enable_observations,
|
|
230
243
|
consolidation_batch_size=config.consolidation_batch_size,
|
|
231
244
|
consolidation_max_tokens=config.consolidation_max_tokens,
|
|
@@ -240,8 +253,9 @@ def main():
|
|
|
240
253
|
worker_id=config.worker_id,
|
|
241
254
|
worker_poll_interval_ms=config.worker_poll_interval_ms,
|
|
242
255
|
worker_max_retries=config.worker_max_retries,
|
|
243
|
-
worker_batch_size=config.worker_batch_size,
|
|
244
256
|
worker_http_port=config.worker_http_port,
|
|
257
|
+
worker_max_slots=config.worker_max_slots,
|
|
258
|
+
worker_consolidation_max_slots=config.worker_consolidation_max_slots,
|
|
245
259
|
reflect_max_iterations=config.reflect_max_iterations,
|
|
246
260
|
mental_model_refresh_concurrency=config.mental_model_refresh_concurrency,
|
|
247
261
|
)
|
|
@@ -353,6 +367,7 @@ def main():
|
|
|
353
367
|
# Start idle checker in daemon mode
|
|
354
368
|
if idle_middleware is not None:
|
|
355
369
|
# Start the idle checker in a background thread with its own event loop
|
|
370
|
+
import logging
|
|
356
371
|
import threading
|
|
357
372
|
|
|
358
373
|
def run_idle_checker():
|
|
@@ -363,12 +378,12 @@ def main():
|
|
|
363
378
|
loop = asyncio.new_event_loop()
|
|
364
379
|
asyncio.set_event_loop(loop)
|
|
365
380
|
loop.run_until_complete(idle_middleware._check_idle())
|
|
366
|
-
except Exception:
|
|
367
|
-
|
|
381
|
+
except Exception as e:
|
|
382
|
+
logging.error(f"Idle checker error: {e}", exc_info=True)
|
|
368
383
|
|
|
369
384
|
threading.Thread(target=run_idle_checker, daemon=True).start()
|
|
370
385
|
|
|
371
|
-
uvicorn.run(**uvicorn_config)
|
|
386
|
+
uvicorn.run(**uvicorn_config)
|
|
372
387
|
|
|
373
388
|
|
|
374
389
|
if __name__ == "__main__":
|