hindsight-api 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -504,12 +504,11 @@ class MemoryEngine(MemoryEngineInterface):
504
504
  if request_context is None:
505
505
  raise AuthenticationError("RequestContext is required when tenant extension is configured")
506
506
 
507
- # For internal/background operations (e.g., worker tasks), skip extension authentication
508
- # if the schema has already been set by execute_task via the _schema field.
507
+ # For internal/background operations (e.g., worker tasks), skip extension authentication.
508
+ # The task was already authenticated at submission time, and execute_task sets _current_schema
509
+ # from the task's _schema field. For public schema tasks, _current_schema keeps its default "public".
509
510
  if request_context.internal:
510
- current = _current_schema.get()
511
- if current and current != "public":
512
- return current
511
+ return _current_schema.get()
513
512
 
514
513
  # Let AuthenticationError propagate - HTTP layer will convert to 401
515
514
  tenant_context = await self._tenant_extension.authenticate(request_context)
@@ -789,7 +788,7 @@ class MemoryEngine(MemoryEngineInterface):
789
788
  kwargs = {"name": self._pg0_instance_name}
790
789
  if self._pg0_port is not None:
791
790
  kwargs["port"] = self._pg0_port
792
- pg0 = EmbeddedPostgres(**kwargs) # type: ignore[invalid-argument-type] - dict kwargs
791
+ pg0 = EmbeddedPostgres(**kwargs)
793
792
  # Check if pg0 is already running before we start it
794
793
  was_already_running = await pg0.is_running()
795
794
  self.db_url = await pg0.ensure_running()
@@ -889,6 +888,23 @@ class MemoryEngine(MemoryEngineInterface):
889
888
  # Use configured database schema for migrations (defaults to "public")
890
889
  run_migrations(self.db_url, schema=get_config().database_schema)
891
890
 
891
+ # Migrate all existing tenant schemas (if multi-tenant)
892
+ if self._tenant_extension is not None:
893
+ try:
894
+ tenants = await self._tenant_extension.list_tenants()
895
+ if tenants:
896
+ logger.info(f"Running migrations on {len(tenants)} tenant schemas...")
897
+ for tenant in tenants:
898
+ schema = tenant.schema
899
+ if schema and schema != "public":
900
+ try:
901
+ run_migrations(self.db_url, schema=schema)
902
+ except Exception as e:
903
+ logger.warning(f"Failed to migrate tenant schema {schema}: {e}")
904
+ logger.info("Tenant schema migrations completed")
905
+ except Exception as e:
906
+ logger.warning(f"Failed to run tenant schema migrations: {e}")
907
+
892
908
  # Ensure embedding column dimension matches the model's dimension
893
909
  # This is done after migrations and after embeddings.initialize()
894
910
  ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
@@ -1175,15 +1191,15 @@ class MemoryEngine(MemoryEngineInterface):
1175
1191
  context: Context about when/why this memory was formed
1176
1192
  event_date: When the event occurred (defaults to now)
1177
1193
  document_id: Optional document ID for tracking (always upserts if document already exists)
1178
- fact_type_override: Override fact type ('world', 'experience', 'opinion')
1179
- confidence_score: Confidence score for opinions (0.0 to 1.0)
1194
+ fact_type_override: Override fact type ('world', 'experience')
1195
+ confidence_score: Confidence score (0.0 to 1.0)
1180
1196
  request_context: Request context for authentication.
1181
1197
 
1182
1198
  Returns:
1183
1199
  List of created unit IDs
1184
1200
  """
1185
1201
  # Build content dict
1186
- content_dict: RetainContentDict = {"content": content, "context": context} # type: ignore[typeddict-item] - building incrementally
1202
+ content_dict: RetainContentDict = {"content": content, "context": context}
1187
1203
  if event_date:
1188
1204
  content_dict["event_date"] = event_date
1189
1205
  if document_id:
@@ -1231,8 +1247,8 @@ class MemoryEngine(MemoryEngineInterface):
1231
1247
  - "document_id" (optional): Document ID for this specific content item
1232
1248
  document_id: **DEPRECATED** - Use "document_id" key in each content dict instead.
1233
1249
  Applies the same document_id to ALL content items that don't specify their own.
1234
- fact_type_override: Override fact type for all facts ('world', 'experience', 'opinion')
1235
- confidence_score: Confidence score for opinions (0.0 to 1.0)
1250
+ fact_type_override: Override fact type for all facts ('world', 'experience')
1251
+ confidence_score: Confidence score (0.0 to 1.0)
1236
1252
  return_usage: If True, returns tuple of (unit_ids, TokenUsage). Default False for backward compatibility.
1237
1253
 
1238
1254
  Returns:
@@ -1554,16 +1570,16 @@ class MemoryEngine(MemoryEngineInterface):
1554
1570
  if fact_type is None:
1555
1571
  fact_type = list(VALID_RECALL_FACT_TYPES)
1556
1572
 
1557
- # Validate fact types early
1573
+ # Filter out 'opinion' early (deprecated, silently ignore)
1574
+ fact_type = [ft for ft in fact_type if ft != "opinion"]
1575
+
1576
+ # Validate fact types
1558
1577
  invalid_types = set(fact_type) - VALID_RECALL_FACT_TYPES
1559
1578
  if invalid_types:
1560
1579
  raise ValueError(
1561
1580
  f"Invalid fact type(s): {', '.join(sorted(invalid_types))}. "
1562
1581
  f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
1563
1582
  )
1564
-
1565
- # Filter out 'opinion' - opinions are no longer returned from recall
1566
- fact_type = [ft for ft in fact_type if ft != "opinion"]
1567
1583
  if not fact_type:
1568
1584
  # All requested types were opinions - return empty result
1569
1585
  return RecallResultModel(results=[], entities={}, chunks={})
@@ -2219,44 +2235,15 @@ class MemoryEngine(MemoryEngineInterface):
2219
2235
  )
2220
2236
  top_results_dicts.append(result_dict)
2221
2237
 
2222
- # Get entities for each fact if include_entities is requested
2223
- fact_entity_map = {} # unit_id -> list of (entity_id, entity_name)
2224
- if include_entities and top_scored:
2225
- unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
2226
- if unit_ids:
2227
- async with acquire_with_retry(pool) as entity_conn:
2228
- entity_rows = await entity_conn.fetch(
2229
- f"""
2230
- SELECT ue.unit_id, e.id as entity_id, e.canonical_name
2231
- FROM {fq_table("unit_entities")} ue
2232
- JOIN {fq_table("entities")} e ON ue.entity_id = e.id
2233
- WHERE ue.unit_id = ANY($1::uuid[])
2234
- """,
2235
- unit_ids,
2236
- )
2237
- for row in entity_rows:
2238
- unit_id = str(row["unit_id"])
2239
- if unit_id not in fact_entity_map:
2240
- fact_entity_map[unit_id] = []
2241
- fact_entity_map[unit_id].append(
2242
- {"entity_id": str(row["entity_id"]), "canonical_name": row["canonical_name"]}
2243
- )
2244
-
2245
2238
  # Convert results to MemoryFact objects
2246
2239
  memory_facts = []
2247
2240
  for result_dict in top_results_dicts:
2248
- result_id = str(result_dict.get("id"))
2249
- # Get entity names for this fact
2250
- entity_names = None
2251
- if include_entities and result_id in fact_entity_map:
2252
- entity_names = [e["canonical_name"] for e in fact_entity_map[result_id]]
2253
-
2254
2241
  memory_facts.append(
2255
2242
  MemoryFact(
2256
- id=result_id,
2243
+ id=str(result_dict.get("id")),
2257
2244
  text=result_dict.get("text"),
2258
2245
  fact_type=result_dict.get("fact_type", "world"),
2259
- entities=entity_names,
2246
+ entities=None, # Entity observations removed
2260
2247
  context=result_dict.get("context"),
2261
2248
  occurred_start=result_dict.get("occurred_start"),
2262
2249
  occurred_end=result_dict.get("occurred_end"),
@@ -2267,38 +2254,12 @@ class MemoryEngine(MemoryEngineInterface):
2267
2254
  )
2268
2255
  )
2269
2256
 
2270
- # Fetch entity observations if requested
2257
+ # Entity observations removed - always set to None
2271
2258
  entities_dict = None
2272
- total_entity_tokens = 0
2273
- total_chunk_tokens = 0
2274
- if include_entities and fact_entity_map:
2275
- # Collect unique entities in order of fact relevance (preserving order from top_scored)
2276
- # Use a list to maintain order, but track seen entities to avoid duplicates
2277
- entities_ordered = [] # list of (entity_id, entity_name) tuples
2278
- seen_entity_ids = set()
2279
-
2280
- # Iterate through facts in relevance order
2281
- for sr in top_scored:
2282
- unit_id = sr.id
2283
- if unit_id in fact_entity_map:
2284
- for entity in fact_entity_map[unit_id]:
2285
- entity_id = entity["entity_id"]
2286
- entity_name = entity["canonical_name"]
2287
- if entity_id not in seen_entity_ids:
2288
- entities_ordered.append((entity_id, entity_name))
2289
- seen_entity_ids.add(entity_id)
2290
-
2291
- # Return entities with empty observations (summaries now live in mental models)
2292
- entities_dict = {}
2293
- for entity_id, entity_name in entities_ordered:
2294
- entities_dict[entity_name] = EntityState(
2295
- entity_id=entity_id,
2296
- canonical_name=entity_name,
2297
- observations=[], # Mental models provide this now
2298
- )
2299
2259
 
2300
2260
  # Fetch chunks if requested
2301
2261
  chunks_dict = None
2262
+ total_chunk_tokens = 0
2302
2263
  if include_chunks and top_scored:
2303
2264
  from .response_models import ChunkInfo
2304
2265
 
@@ -2367,7 +2328,6 @@ class MemoryEngine(MemoryEngineInterface):
2367
2328
  # Log final recall stats
2368
2329
  total_time = time.time() - recall_start
2369
2330
  num_chunks = len(chunks_dict) if chunks_dict else 0
2370
- num_entities = len(entities_dict) if entities_dict else 0
2371
2331
  # Include wait times in log if significant
2372
2332
  wait_parts = []
2373
2333
  if semaphore_wait > 0.01:
@@ -2376,7 +2336,7 @@ class MemoryEngine(MemoryEngineInterface):
2376
2336
  wait_parts.append(f"conn={max_conn_wait:.3f}s")
2377
2337
  wait_info = f" | waits: {', '.join(wait_parts)}" if wait_parts else ""
2378
2338
  log_buffer.append(
2379
- f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
2339
+ f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
2380
2340
  )
2381
2341
  if not quiet:
2382
2342
  logger.info("\n" + "\n".join(log_buffer))
@@ -3550,7 +3510,6 @@ class MemoryEngine(MemoryEngineInterface):
3550
3510
  ReflectResult containing:
3551
3511
  - text: Plain text answer
3552
3512
  - based_on: Empty dict (agent retrieves facts dynamically)
3553
- - new_opinions: Empty list
3554
3513
  - structured_output: None (not yet supported for agentic reflect)
3555
3514
  """
3556
3515
  # Use cached LLM config
@@ -3875,7 +3834,6 @@ class MemoryEngine(MemoryEngineInterface):
3875
3834
  result = ReflectResult(
3876
3835
  text=agent_result.text,
3877
3836
  based_on=based_on,
3878
- new_opinions=[], # Learnings stored as mental models
3879
3837
  structured_output=agent_result.structured_output,
3880
3838
  usage=usage,
3881
3839
  tool_trace=tool_trace_result,
@@ -3904,32 +3862,6 @@ class MemoryEngine(MemoryEngineInterface):
3904
3862
 
3905
3863
  return result
3906
3864
 
3907
- async def get_entity_observations(
3908
- self,
3909
- bank_id: str,
3910
- entity_id: str,
3911
- *,
3912
- limit: int = 10,
3913
- request_context: "RequestContext",
3914
- ) -> list[Any]:
3915
- """
3916
- Get observations for an entity.
3917
-
3918
- NOTE: Entity observations/summaries have been moved to mental models.
3919
- This method returns an empty list. Use mental models for entity summaries.
3920
-
3921
- Args:
3922
- bank_id: bank IDentifier
3923
- entity_id: Entity UUID to get observations for
3924
- limit: Ignored (kept for backwards compatibility)
3925
- request_context: Request context for authentication.
3926
-
3927
- Returns:
3928
- Empty list (observations now in mental models)
3929
- """
3930
- await self._authenticate_tenant(request_context)
3931
- return []
3932
-
3933
3865
  async def list_entities(
3934
3866
  self,
3935
3867
  bank_id: str,
@@ -4116,36 +4048,6 @@ class MemoryEngine(MemoryEngineInterface):
4116
4048
  await self._authenticate_tenant(request_context)
4117
4049
  return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
4118
4050
 
4119
- async def regenerate_entity_observations(
4120
- self,
4121
- bank_id: str,
4122
- entity_id: str,
4123
- entity_name: str,
4124
- *,
4125
- version: str | None = None,
4126
- conn=None,
4127
- request_context: "RequestContext",
4128
- ) -> list[str]:
4129
- """
4130
- Regenerate observations for an entity.
4131
-
4132
- NOTE: Entity observations/summaries have been moved to mental models.
4133
- This method is now a no-op and returns an empty list.
4134
-
4135
- Args:
4136
- bank_id: bank IDentifier
4137
- entity_id: Entity UUID
4138
- entity_name: Canonical name of the entity
4139
- version: Entity's last_seen timestamp when task was created (for deduplication)
4140
- conn: Optional database connection (ignored)
4141
- request_context: Request context for authentication.
4142
-
4143
- Returns:
4144
- Empty list (observations now in mental models)
4145
- """
4146
- await self._authenticate_tenant(request_context)
4147
- return []
4148
-
4149
4051
  # =========================================================================
4150
4052
  # Statistics & Operations (for HTTP API layer)
4151
4053
  # =========================================================================
@@ -4256,9 +4158,6 @@ class MemoryEngine(MemoryEngineInterface):
4256
4158
  if not entity_row:
4257
4159
  return None
4258
4160
 
4259
- # Get observations for the entity
4260
- observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
4261
-
4262
4161
  return {
4263
4162
  "id": str(entity_row["id"]),
4264
4163
  "canonical_name": entity_row["canonical_name"],
@@ -4266,7 +4165,7 @@ class MemoryEngine(MemoryEngineInterface):
4266
4165
  "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
4267
4166
  "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
4268
4167
  "metadata": entity_row["metadata"] or {},
4269
- "observations": observations,
4168
+ "observations": [],
4270
4169
  }
4271
4170
 
4272
4171
  def _parse_observations(self, observations_raw: list):
@@ -263,7 +263,6 @@ class ReflectResult(BaseModel):
263
263
  }
264
264
  ],
265
265
  },
266
- "new_opinions": ["Machine learning has great potential in healthcare"],
267
266
  "structured_output": {"summary": "ML in healthcare", "confidence": 0.9},
268
267
  "usage": {"input_tokens": 1500, "output_tokens": 500, "total_tokens": 2000},
269
268
  }
@@ -272,9 +271,8 @@ class ReflectResult(BaseModel):
272
271
 
273
272
  text: str = Field(description="The formulated answer text")
274
273
  based_on: dict[str, Any] = Field(
275
- description="Facts used to formulate the answer, organized by type (world, experience, opinion, mental_models, directives)"
274
+ description="Facts used to formulate the answer, organized by type (world, experience, mental_models, directives)"
276
275
  )
277
- new_opinions: list[str] = Field(default_factory=list, description="List of newly formed opinions during reflection")
278
276
  structured_output: dict[str, Any] | None = Field(
279
277
  default=None,
280
278
  description="Structured output parsed according to the provided response schema. Only present when response_schema was provided.",
@@ -297,24 +295,6 @@ class ReflectResult(BaseModel):
297
295
  )
298
296
 
299
297
 
300
- class Opinion(BaseModel):
301
- """
302
- An opinion with confidence score.
303
-
304
- Opinions represent the bank's formed perspectives on topics,
305
- with a confidence level indicating strength of belief.
306
- """
307
-
308
- model_config = ConfigDict(
309
- json_schema_extra={
310
- "example": {"text": "Machine learning has great potential in healthcare", "confidence": 0.85}
311
- }
312
- )
313
-
314
- text: str = Field(description="The opinion text")
315
- confidence: float = Field(description="Confidence score between 0.0 and 1.0")
316
-
317
-
318
298
  class EntityObservation(BaseModel):
319
299
  """
320
300
  An observation about an entity.
@@ -693,7 +693,6 @@ async def _extract_facts_from_chunk(
693
693
  context: str,
694
694
  llm_config: "LLMConfig",
695
695
  agent_name: str = None,
696
- extract_opinions: bool = False,
697
696
  ) -> tuple[list[dict[str, str]], TokenUsage]:
698
697
  """
699
698
  Extract facts from a single chunk (internal helper for parallel processing).
@@ -707,17 +706,9 @@ async def _extract_facts_from_chunk(
707
706
 
708
707
  logger = logging.getLogger(__name__)
709
708
 
710
- memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
711
-
712
- # Determine which fact types to extract based on the flag
709
+ # Determine which fact types to extract
713
710
  # Note: We use "assistant" in the prompt but convert to "bank" for storage
714
- if extract_opinions:
715
- # Opinion extraction uses a separate prompt (not this one)
716
- fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
717
- else:
718
- fact_types_instruction = (
719
- "Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
720
- )
711
+ fact_types_instruction = "Extract ONLY 'world' and 'assistant' type facts."
721
712
 
722
713
  # Check config for extraction mode and causal link extraction
723
714
  config = get_config()
@@ -770,7 +761,6 @@ async def _extract_facts_from_chunk(
770
761
  # Format event_date with day of week for better temporal reasoning
771
762
  event_date_formatted = event_date.strftime("%A, %B %d, %Y") # e.g., "Monday, June 10, 2024"
772
763
  user_message = f"""Extract facts from the following text chunk.
773
- {memory_bank_context}
774
764
 
775
765
  Chunk: {chunk_index + 1}/{total_chunks}
776
766
  Event Date: {event_date_formatted} ({event_date.isoformat()})
@@ -782,12 +772,28 @@ Text:
782
772
  usage = TokenUsage() # Track cumulative usage across retries
783
773
  for attempt in range(max_retries):
784
774
  try:
775
+ # Use retain-specific overrides if set, otherwise fall back to global LLM config
776
+ max_retries = (
777
+ config.retain_llm_max_retries if config.retain_llm_max_retries is not None else config.llm_max_retries
778
+ )
779
+ initial_backoff = (
780
+ config.retain_llm_initial_backoff
781
+ if config.retain_llm_initial_backoff is not None
782
+ else config.llm_initial_backoff
783
+ )
784
+ max_backoff = (
785
+ config.retain_llm_max_backoff if config.retain_llm_max_backoff is not None else config.llm_max_backoff
786
+ )
787
+
785
788
  extraction_response_json, call_usage = await llm_config.call(
786
789
  messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
787
790
  response_format=response_schema,
788
791
  scope="memory_extract_facts",
789
792
  temperature=0.1,
790
793
  max_completion_tokens=config.retain_max_completion_tokens,
794
+ max_retries=max_retries,
795
+ initial_backoff=initial_backoff,
796
+ max_backoff=max_backoff,
791
797
  skip_validation=True, # Get raw JSON, we'll validate leniently
792
798
  return_usage=True,
793
799
  )
@@ -1013,7 +1019,6 @@ async def _extract_facts_with_auto_split(
1013
1019
  context: str,
1014
1020
  llm_config: LLMConfig,
1015
1021
  agent_name: str = None,
1016
- extract_opinions: bool = False,
1017
1022
  ) -> tuple[list[dict[str, str]], TokenUsage]:
1018
1023
  """
1019
1024
  Extract facts from a chunk with automatic splitting if output exceeds token limits.
@@ -1029,7 +1034,6 @@ async def _extract_facts_with_auto_split(
1029
1034
  context: Context about the conversation/document
1030
1035
  llm_config: LLM configuration to use
1031
1036
  agent_name: Optional agent name (memory owner)
1032
- extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
1033
1037
 
1034
1038
  Returns:
1035
1039
  Tuple of (facts list, token usage) extracted from the chunk (possibly from sub-chunks)
@@ -1048,7 +1052,6 @@ async def _extract_facts_with_auto_split(
1048
1052
  context=context,
1049
1053
  llm_config=llm_config,
1050
1054
  agent_name=agent_name,
1051
- extract_opinions=extract_opinions,
1052
1055
  )
1053
1056
  except OutputTooLongError:
1054
1057
  # Output exceeded token limits - split the chunk in half and retry
@@ -1093,7 +1096,6 @@ async def _extract_facts_with_auto_split(
1093
1096
  context=context,
1094
1097
  llm_config=llm_config,
1095
1098
  agent_name=agent_name,
1096
- extract_opinions=extract_opinions,
1097
1099
  ),
1098
1100
  _extract_facts_with_auto_split(
1099
1101
  chunk=second_half,
@@ -1103,7 +1105,6 @@ async def _extract_facts_with_auto_split(
1103
1105
  context=context,
1104
1106
  llm_config=llm_config,
1105
1107
  agent_name=agent_name,
1106
- extract_opinions=extract_opinions,
1107
1108
  ),
1108
1109
  ]
1109
1110
 
@@ -1127,7 +1128,6 @@ async def extract_facts_from_text(
1127
1128
  llm_config: LLMConfig,
1128
1129
  agent_name: str,
1129
1130
  context: str = "",
1130
- extract_opinions: bool = False,
1131
1131
  ) -> tuple[list[Fact], list[tuple[str, int]], TokenUsage]:
1132
1132
  """
1133
1133
  Extract semantic facts from conversational or narrative text using LLM.
@@ -1144,7 +1144,6 @@ async def extract_facts_from_text(
1144
1144
  context: Context about the conversation/document
1145
1145
  llm_config: LLM configuration to use
1146
1146
  agent_name: Agent name (memory owner)
1147
- extract_opinions: If True, extract ONLY opinions. If False, extract world and bank facts (no opinions)
1148
1147
 
1149
1148
  Returns:
1150
1149
  Tuple of (facts, chunks, usage) where:
@@ -1172,7 +1171,6 @@ async def extract_facts_from_text(
1172
1171
  context=context,
1173
1172
  llm_config=llm_config,
1174
1173
  agent_name=agent_name,
1175
- extract_opinions=extract_opinions,
1176
1174
  )
1177
1175
  for i, chunk in enumerate(chunks)
1178
1176
  ]
@@ -1204,7 +1202,7 @@ SECONDS_PER_FACT = 10
1204
1202
 
1205
1203
 
1206
1204
  async def extract_facts_from_contents(
1207
- contents: list[RetainContent], llm_config, agent_name: str, extract_opinions: bool = False
1205
+ contents: list[RetainContent], llm_config, agent_name: str
1208
1206
  ) -> tuple[list[ExtractedFactType], list[ChunkMetadata], TokenUsage]:
1209
1207
  """
1210
1208
  Extract facts from multiple content items in parallel.
@@ -1219,7 +1217,6 @@ async def extract_facts_from_contents(
1219
1217
  contents: List of RetainContent objects to process
1220
1218
  llm_config: LLM configuration for fact extraction
1221
1219
  agent_name: Name of the agent (for agent-related fact detection)
1222
- extract_opinions: If True, extract only opinions; otherwise world/bank facts
1223
1220
 
1224
1221
  Returns:
1225
1222
  Tuple of (extracted_facts, chunks_metadata, usage)
@@ -1238,7 +1235,6 @@ async def extract_facts_from_contents(
1238
1235
  context=item.context,
1239
1236
  llm_config=llm_config,
1240
1237
  agent_name=agent_name,
1241
- extract_opinions=extract_opinions,
1242
1238
  )
1243
1239
  fact_extraction_tasks.append(task)
1244
1240
 
@@ -101,11 +101,8 @@ async def retain_batch(
101
101
 
102
102
  # Step 1: Extract facts from all contents
103
103
  step_start = time.time()
104
- extract_opinions = fact_type_override == "opinion"
105
104
 
106
- extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(
107
- contents, llm_config, agent_name, extract_opinions
108
- )
105
+ extracted_facts, chunks, usage = await fact_extraction.extract_facts_from_contents(contents, llm_config, agent_name)
109
106
  log_buffer.append(
110
107
  f"[1] Extract facts: {len(extracted_facts)} facts, {len(chunks)} chunks from {len(contents)} contents in {time.time() - step_start:.3f}s"
111
108
  )
@@ -19,7 +19,6 @@ async def extract_facts(
19
19
  context: str = "",
20
20
  llm_config: "LLMConfig" = None,
21
21
  agent_name: str = None,
22
- extract_opinions: bool = False,
23
22
  ) -> tuple[list["Fact"], list[tuple[str, int]]]:
24
23
  """
25
24
  Extract semantic facts from text using LLM.
@@ -36,7 +35,6 @@ async def extract_facts(
36
35
  context: Context about the conversation/document
37
36
  llm_config: LLM configuration to use
38
37
  agent_name: Optional agent name to help identify agent-related facts
39
- extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
40
38
 
41
39
  Returns:
42
40
  Tuple of (facts, chunks) where:
@@ -55,7 +53,6 @@ async def extract_facts(
55
53
  context=context,
56
54
  llm_config=llm_config,
57
55
  agent_name=agent_name,
58
- extract_opinions=extract_opinions,
59
56
  )
60
57
 
61
58
  if not facts:
hindsight_api/main.py CHANGED
@@ -140,13 +140,6 @@ def main():
140
140
  args.port = DEFAULT_DAEMON_PORT
141
141
  args.host = "127.0.0.1" # Only bind to localhost for security
142
142
 
143
- # Force CPU mode for daemon to avoid macOS MPS/XPC issues
144
- # MPS (Metal Performance Shaders) has unstable XPC connections in background processes
145
- # that can cause assertion failures and process crashes at the C++ level
146
- # (which Python exception handlers cannot catch)
147
- os.environ["HINDSIGHT_API_EMBEDDINGS_LOCAL_FORCE_CPU"] = "1"
148
- os.environ["HINDSIGHT_API_RERANKER_LOCAL_FORCE_CPU"] = "1"
149
-
150
143
  # Check if another daemon is already running
151
144
  daemon_lock = DaemonLock()
152
145
  if not daemon_lock.acquire():
@@ -183,19 +176,40 @@ def main():
183
176
  llm_model=config.llm_model,
184
177
  llm_base_url=config.llm_base_url,
185
178
  llm_max_concurrent=config.llm_max_concurrent,
179
+ llm_max_retries=config.llm_max_retries,
180
+ llm_initial_backoff=config.llm_initial_backoff,
181
+ llm_max_backoff=config.llm_max_backoff,
186
182
  llm_timeout=config.llm_timeout,
183
+ llm_vertexai_project_id=config.llm_vertexai_project_id,
184
+ llm_vertexai_region=config.llm_vertexai_region,
185
+ llm_vertexai_service_account_key=config.llm_vertexai_service_account_key,
187
186
  retain_llm_provider=config.retain_llm_provider,
188
187
  retain_llm_api_key=config.retain_llm_api_key,
189
188
  retain_llm_model=config.retain_llm_model,
190
189
  retain_llm_base_url=config.retain_llm_base_url,
190
+ retain_llm_max_concurrent=config.retain_llm_max_concurrent,
191
+ retain_llm_max_retries=config.retain_llm_max_retries,
192
+ retain_llm_initial_backoff=config.retain_llm_initial_backoff,
193
+ retain_llm_max_backoff=config.retain_llm_max_backoff,
194
+ retain_llm_timeout=config.retain_llm_timeout,
191
195
  reflect_llm_provider=config.reflect_llm_provider,
192
196
  reflect_llm_api_key=config.reflect_llm_api_key,
193
197
  reflect_llm_model=config.reflect_llm_model,
194
198
  reflect_llm_base_url=config.reflect_llm_base_url,
199
+ reflect_llm_max_concurrent=config.reflect_llm_max_concurrent,
200
+ reflect_llm_max_retries=config.reflect_llm_max_retries,
201
+ reflect_llm_initial_backoff=config.reflect_llm_initial_backoff,
202
+ reflect_llm_max_backoff=config.reflect_llm_max_backoff,
203
+ reflect_llm_timeout=config.reflect_llm_timeout,
195
204
  consolidation_llm_provider=config.consolidation_llm_provider,
196
205
  consolidation_llm_api_key=config.consolidation_llm_api_key,
197
206
  consolidation_llm_model=config.consolidation_llm_model,
198
207
  consolidation_llm_base_url=config.consolidation_llm_base_url,
208
+ consolidation_llm_max_concurrent=config.consolidation_llm_max_concurrent,
209
+ consolidation_llm_max_retries=config.consolidation_llm_max_retries,
210
+ consolidation_llm_initial_backoff=config.consolidation_llm_initial_backoff,
211
+ consolidation_llm_max_backoff=config.consolidation_llm_max_backoff,
212
+ consolidation_llm_timeout=config.consolidation_llm_timeout,
199
213
  embeddings_provider=config.embeddings_provider,
200
214
  embeddings_local_model=config.embeddings_local_model,
201
215
  embeddings_local_force_cpu=config.embeddings_local_force_cpu,
@@ -225,7 +239,6 @@ def main():
225
239
  retain_extract_causal_links=config.retain_extract_causal_links,
226
240
  retain_extraction_mode=config.retain_extraction_mode,
227
241
  retain_custom_instructions=config.retain_custom_instructions,
228
- retain_observations_async=config.retain_observations_async,
229
242
  enable_observations=config.enable_observations,
230
243
  consolidation_batch_size=config.consolidation_batch_size,
231
244
  consolidation_max_tokens=config.consolidation_max_tokens,
@@ -240,8 +253,9 @@ def main():
240
253
  worker_id=config.worker_id,
241
254
  worker_poll_interval_ms=config.worker_poll_interval_ms,
242
255
  worker_max_retries=config.worker_max_retries,
243
- worker_batch_size=config.worker_batch_size,
244
256
  worker_http_port=config.worker_http_port,
257
+ worker_max_slots=config.worker_max_slots,
258
+ worker_consolidation_max_slots=config.worker_consolidation_max_slots,
245
259
  reflect_max_iterations=config.reflect_max_iterations,
246
260
  mental_model_refresh_concurrency=config.mental_model_refresh_concurrency,
247
261
  )
@@ -353,6 +367,7 @@ def main():
353
367
  # Start idle checker in daemon mode
354
368
  if idle_middleware is not None:
355
369
  # Start the idle checker in a background thread with its own event loop
370
+ import logging
356
371
  import threading
357
372
 
358
373
  def run_idle_checker():
@@ -363,12 +378,12 @@ def main():
363
378
  loop = asyncio.new_event_loop()
364
379
  asyncio.set_event_loop(loop)
365
380
  loop.run_until_complete(idle_middleware._check_idle())
366
- except Exception:
367
- pass
381
+ except Exception as e:
382
+ logging.error(f"Idle checker error: {e}", exc_info=True)
368
383
 
369
384
  threading.Thread(target=run_idle_checker, daemon=True).start()
370
385
 
371
- uvicorn.run(**uvicorn_config) # type: ignore[invalid-argument-type] - dict kwargs
386
+ uvicorn.run(**uvicorn_config)
372
387
 
373
388
 
374
389
  if __name__ == "__main__":