hindsight-api 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. hindsight_api/__init__.py +1 -1
  2. hindsight_api/admin/cli.py +59 -0
  3. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  4. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  5. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  6. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  7. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  8. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  9. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  10. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  11. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  12. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  13. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  14. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  15. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  16. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  17. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  18. hindsight_api/api/http.py +1120 -93
  19. hindsight_api/api/mcp.py +11 -191
  20. hindsight_api/config.py +174 -46
  21. hindsight_api/engine/consolidation/__init__.py +5 -0
  22. hindsight_api/engine/consolidation/consolidator.py +926 -0
  23. hindsight_api/engine/consolidation/prompts.py +77 -0
  24. hindsight_api/engine/cross_encoder.py +153 -22
  25. hindsight_api/engine/directives/__init__.py +5 -0
  26. hindsight_api/engine/directives/models.py +37 -0
  27. hindsight_api/engine/embeddings.py +136 -13
  28. hindsight_api/engine/interface.py +32 -13
  29. hindsight_api/engine/llm_wrapper.py +505 -43
  30. hindsight_api/engine/memory_engine.py +2101 -1094
  31. hindsight_api/engine/mental_models/__init__.py +14 -0
  32. hindsight_api/engine/mental_models/models.py +53 -0
  33. hindsight_api/engine/reflect/__init__.py +18 -0
  34. hindsight_api/engine/reflect/agent.py +933 -0
  35. hindsight_api/engine/reflect/models.py +109 -0
  36. hindsight_api/engine/reflect/observations.py +186 -0
  37. hindsight_api/engine/reflect/prompts.py +483 -0
  38. hindsight_api/engine/reflect/tools.py +437 -0
  39. hindsight_api/engine/reflect/tools_schema.py +250 -0
  40. hindsight_api/engine/response_models.py +130 -4
  41. hindsight_api/engine/retain/bank_utils.py +79 -201
  42. hindsight_api/engine/retain/fact_extraction.py +81 -48
  43. hindsight_api/engine/retain/fact_storage.py +5 -8
  44. hindsight_api/engine/retain/link_utils.py +5 -8
  45. hindsight_api/engine/retain/orchestrator.py +1 -55
  46. hindsight_api/engine/retain/types.py +2 -2
  47. hindsight_api/engine/search/graph_retrieval.py +2 -2
  48. hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
  49. hindsight_api/engine/search/mpfp_retrieval.py +1 -1
  50. hindsight_api/engine/search/retrieval.py +14 -14
  51. hindsight_api/engine/search/think_utils.py +41 -140
  52. hindsight_api/engine/search/trace.py +0 -1
  53. hindsight_api/engine/search/tracer.py +2 -5
  54. hindsight_api/engine/search/types.py +0 -3
  55. hindsight_api/engine/task_backend.py +112 -196
  56. hindsight_api/engine/utils.py +0 -151
  57. hindsight_api/extensions/__init__.py +10 -1
  58. hindsight_api/extensions/builtin/tenant.py +11 -4
  59. hindsight_api/extensions/operation_validator.py +81 -4
  60. hindsight_api/extensions/tenant.py +26 -0
  61. hindsight_api/main.py +28 -5
  62. hindsight_api/mcp_local.py +12 -53
  63. hindsight_api/mcp_tools.py +494 -0
  64. hindsight_api/models.py +0 -2
  65. hindsight_api/worker/__init__.py +11 -0
  66. hindsight_api/worker/main.py +296 -0
  67. hindsight_api/worker/poller.py +486 -0
  68. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/METADATA +12 -6
  69. hindsight_api-0.4.1.dist-info/RECORD +112 -0
  70. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/entry_points.txt +1 -0
  71. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  72. hindsight_api/engine/search/observation_utils.py +0 -125
  73. hindsight_api/engine/search/scoring.py +0 -159
  74. hindsight_api-0.3.0.dist-info/RECORD +0 -82
  75. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.1.dist-info}/WHEEL +0 -0
@@ -11,6 +11,7 @@ This implements a sophisticated memory architecture that combines:
11
11
 
12
12
  import asyncio
13
13
  import contextvars
14
+ import json
14
15
  import logging
15
16
  import time
16
17
  import uuid
@@ -22,12 +23,17 @@ from ..metrics import get_metrics_collector
22
23
  from .db_budget import budgeted_operation
23
24
 
24
25
  # Context variable for current schema (async-safe, per-task isolation)
25
- _current_schema: contextvars.ContextVar[str] = contextvars.ContextVar("current_schema", default="public")
26
+ # Note: default is None, actual default comes from config via get_current_schema()
27
+ _current_schema: contextvars.ContextVar[str | None] = contextvars.ContextVar("current_schema", default=None)
26
28
 
27
29
 
28
30
  def get_current_schema() -> str:
29
- """Get the current schema from context (default: 'public')."""
30
- return _current_schema.get()
31
+ """Get the current schema from context (falls back to config default)."""
32
+ schema = _current_schema.get()
33
+ if schema is None:
34
+ # Fall back to configured default schema
35
+ return get_config().database_schema
36
+ return schema
31
37
 
32
38
 
33
39
  def fq_table(table_name: str) -> str:
@@ -134,25 +140,31 @@ if TYPE_CHECKING:
134
140
 
135
141
  from enum import Enum
136
142
 
143
+ from ..metrics import get_metrics_collector
137
144
  from ..pg0 import EmbeddedPostgres, parse_pg0_url
138
145
  from .entity_resolver import EntityResolver
139
146
  from .llm_wrapper import LLMConfig
140
147
  from .query_analyzer import QueryAnalyzer
148
+ from .reflect import run_reflect_agent
149
+ from .reflect.tools import tool_expand, tool_recall, tool_search_mental_models, tool_search_observations
141
150
  from .response_models import (
142
151
  VALID_RECALL_FACT_TYPES,
143
152
  EntityObservation,
144
153
  EntityState,
154
+ LLMCallTrace,
145
155
  MemoryFact,
156
+ ObservationRef,
146
157
  ReflectResult,
147
158
  TokenUsage,
159
+ ToolCallTrace,
148
160
  )
149
161
  from .response_models import RecallResult as RecallResultModel
150
162
  from .retain import bank_utils, embedding_utils
151
163
  from .retain.types import RetainContentDict
152
- from .search import observation_utils, think_utils
164
+ from .search import think_utils
153
165
  from .search.reranking import CrossEncoderReranker
154
166
  from .search.tags import TagsMatch
155
- from .task_backend import AsyncIOQueueBackend, NoopTaskBackend, TaskBackend
167
+ from .task_backend import BrokerTaskBackend, SyncTaskBackend, TaskBackend
156
168
 
157
169
 
158
170
  class Budget(str, Enum):
@@ -214,6 +226,10 @@ class MemoryEngine(MemoryEngineInterface):
214
226
  reflect_llm_api_key: str | None = None,
215
227
  reflect_llm_model: str | None = None,
216
228
  reflect_llm_base_url: str | None = None,
229
+ consolidation_llm_provider: str | None = None,
230
+ consolidation_llm_api_key: str | None = None,
231
+ consolidation_llm_model: str | None = None,
232
+ consolidation_llm_base_url: str | None = None,
217
233
  embeddings: Embeddings | None = None,
218
234
  cross_encoder: CrossEncoderModel | None = None,
219
235
  query_analyzer: QueryAnalyzer | None = None,
@@ -222,8 +238,6 @@ class MemoryEngine(MemoryEngineInterface):
222
238
  db_command_timeout: int | None = None,
223
239
  db_acquire_timeout: int | None = None,
224
240
  task_backend: TaskBackend | None = None,
225
- task_batch_size: int | None = None,
226
- task_batch_interval: float | None = None,
227
241
  run_migrations: bool = True,
228
242
  operation_validator: "OperationValidatorExtension | None" = None,
229
243
  tenant_extension: "TenantExtension | None" = None,
@@ -251,6 +265,10 @@ class MemoryEngine(MemoryEngineInterface):
251
265
  reflect_llm_api_key: API key for reflect LLM. Falls back to memory_llm_api_key.
252
266
  reflect_llm_model: Model for reflect operations. Falls back to memory_llm_model.
253
267
  reflect_llm_base_url: Base URL for reflect LLM. Falls back to memory_llm_base_url.
268
+ consolidation_llm_provider: LLM provider for consolidation operations. Falls back to memory_llm_provider.
269
+ consolidation_llm_api_key: API key for consolidation LLM. Falls back to memory_llm_api_key.
270
+ consolidation_llm_model: Model for consolidation operations. Falls back to memory_llm_model.
271
+ consolidation_llm_base_url: Base URL for consolidation LLM. Falls back to memory_llm_base_url.
254
272
  embeddings: Embeddings implementation. If not provided, created from env vars.
255
273
  cross_encoder: Cross-encoder model. If not provided, created from env vars.
256
274
  query_analyzer: Query analyzer implementation. If not provided, uses DateparserQueryAnalyzer.
@@ -258,9 +276,7 @@ class MemoryEngine(MemoryEngineInterface):
258
276
  pool_max_size: Maximum number of connections in the pool. Defaults to HINDSIGHT_API_DB_POOL_MAX_SIZE.
259
277
  db_command_timeout: PostgreSQL command timeout in seconds. Defaults to HINDSIGHT_API_DB_COMMAND_TIMEOUT.
260
278
  db_acquire_timeout: Connection acquisition timeout in seconds. Defaults to HINDSIGHT_API_DB_ACQUIRE_TIMEOUT.
261
- task_backend: Custom task backend. If not provided, uses AsyncIOQueueBackend.
262
- task_batch_size: Background task batch size. Defaults to HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE.
263
- task_batch_interval: Background task batch interval in seconds. Defaults to HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL.
279
+ task_backend: Custom task backend. If not provided, uses BrokerTaskBackend for distributed processing.
264
280
  run_migrations: Whether to run database migrations during initialize(). Default: True
265
281
  operation_validator: Optional extension to validate operations before execution.
266
282
  If provided, retain/recall/reflect operations will be validated.
@@ -394,21 +410,37 @@ class MemoryEngine(MemoryEngineInterface):
394
410
  model=reflect_model,
395
411
  )
396
412
 
413
+ # Consolidation LLM config - for mental model consolidation (can use efficient models)
414
+ consolidation_provider = consolidation_llm_provider or config.consolidation_llm_provider or memory_llm_provider
415
+ consolidation_api_key = consolidation_llm_api_key or config.consolidation_llm_api_key or memory_llm_api_key
416
+ consolidation_model = consolidation_llm_model or config.consolidation_llm_model or memory_llm_model
417
+ consolidation_base_url = consolidation_llm_base_url or config.consolidation_llm_base_url or memory_llm_base_url
418
+ # Apply provider-specific base URL defaults for consolidation
419
+ if consolidation_base_url is None:
420
+ if consolidation_provider.lower() == "groq":
421
+ consolidation_base_url = "https://api.groq.com/openai/v1"
422
+ elif consolidation_provider.lower() == "ollama":
423
+ consolidation_base_url = "http://localhost:11434/v1"
424
+ else:
425
+ consolidation_base_url = ""
426
+
427
+ self._consolidation_llm_config = LLMConfig(
428
+ provider=consolidation_provider,
429
+ api_key=consolidation_api_key,
430
+ base_url=consolidation_base_url,
431
+ model=consolidation_model,
432
+ )
433
+
397
434
  # Initialize cross-encoder reranker (cached for performance)
398
435
  self._cross_encoder_reranker = CrossEncoderReranker(cross_encoder=cross_encoder)
399
436
 
400
437
  # Initialize task backend
401
- if task_backend:
402
- self._task_backend = task_backend
403
- elif config.task_backend == "noop":
404
- self._task_backend = NoopTaskBackend()
405
- else:
406
- # Default to memory (AsyncIOQueueBackend)
407
- _task_batch_size = task_batch_size if task_batch_size is not None else config.task_backend_memory_batch_size
408
- _task_batch_interval = (
409
- task_batch_interval if task_batch_interval is not None else config.task_backend_memory_batch_interval
410
- )
411
- self._task_backend = AsyncIOQueueBackend(batch_size=_task_batch_size, batch_interval=_task_batch_interval)
438
+ # If no custom backend provided, use BrokerTaskBackend which stores tasks in PostgreSQL
439
+ # The pool_getter lambda will return the pool once it's initialized
440
+ self._task_backend = task_backend or BrokerTaskBackend(
441
+ pool_getter=lambda: self._pool,
442
+ schema_getter=get_current_schema,
443
+ )
412
444
 
413
445
  # Backpressure mechanism: limit concurrent searches to prevent overwhelming the database
414
446
  # Configurable via HINDSIGHT_API_RECALL_MAX_CONCURRENT (default: 50)
@@ -472,35 +504,19 @@ class MemoryEngine(MemoryEngineInterface):
472
504
  if request_context is None:
473
505
  raise AuthenticationError("RequestContext is required when tenant extension is configured")
474
506
 
507
+ # For internal/background operations (e.g., worker tasks), skip extension authentication
508
+ # if the schema has already been set by execute_task via the _schema field.
509
+ if request_context.internal:
510
+ current = _current_schema.get()
511
+ if current and current != "public":
512
+ return current
513
+
475
514
  # Let AuthenticationError propagate - HTTP layer will convert to 401
476
515
  tenant_context = await self._tenant_extension.authenticate(request_context)
477
516
 
478
517
  _current_schema.set(tenant_context.schema_name)
479
518
  return tenant_context.schema_name
480
519
 
481
- async def _handle_access_count_update(self, task_dict: dict[str, Any]):
482
- """
483
- Handler for access count update tasks.
484
-
485
- Args:
486
- task_dict: Dict with 'node_ids' key containing list of node IDs to update
487
-
488
- Raises:
489
- Exception: Any exception from database operations (propagates to execute_task for retry)
490
- """
491
- node_ids = task_dict.get("node_ids", [])
492
- if not node_ids:
493
- return
494
-
495
- pool = await self._get_pool()
496
- # Convert string UUIDs to UUID type for faster matching
497
- uuid_list = [uuid.UUID(nid) for nid in node_ids]
498
- async with acquire_with_retry(pool) as conn:
499
- await conn.execute(
500
- f"UPDATE {fq_table('memory_units')} SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
501
- uuid_list,
502
- )
503
-
504
520
  async def _handle_batch_retain(self, task_dict: dict[str, Any]):
505
521
  """
506
522
  Handler for batch retain tasks.
@@ -521,14 +537,113 @@ class MemoryEngine(MemoryEngineInterface):
521
537
  f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
522
538
  )
523
539
 
524
- # Use internal request context for background tasks
540
+ # Use internal request context for background tasks (skips tenant auth when schema is pre-set)
525
541
  from hindsight_api.models import RequestContext
526
542
 
527
- internal_context = RequestContext()
543
+ internal_context = RequestContext(internal=True)
528
544
  await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
529
545
 
530
546
  logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
531
547
 
548
+ async def _handle_consolidation(self, task_dict: dict[str, Any]):
549
+ """
550
+ Handler for consolidation tasks.
551
+
552
+ Consolidates new memories into mental models for a bank.
553
+
554
+ Args:
555
+ task_dict: Dict with 'bank_id'
556
+
557
+ Raises:
558
+ ValueError: If bank_id is missing
559
+ Exception: Any exception from consolidation (propagates to execute_task for retry)
560
+ """
561
+ bank_id = task_dict.get("bank_id")
562
+ if not bank_id:
563
+ raise ValueError("bank_id is required for consolidation task")
564
+
565
+ from hindsight_api.models import RequestContext
566
+
567
+ from .consolidation import run_consolidation_job
568
+
569
+ internal_context = RequestContext(internal=True)
570
+ result = await run_consolidation_job(
571
+ memory_engine=self,
572
+ bank_id=bank_id,
573
+ request_context=internal_context,
574
+ )
575
+
576
+ logger.info(f"[CONSOLIDATION] bank={bank_id} completed: {result.get('memories_processed', 0)} processed")
577
+
578
+ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
579
+ """
580
+ Handler for refresh_mental_model tasks.
581
+
582
+ Re-runs the source query through reflect and updates the mental model content.
583
+
584
+ Args:
585
+ task_dict: Dict with 'bank_id', 'mental_model_id', 'operation_id'
586
+
587
+ Raises:
588
+ ValueError: If required fields are missing
589
+ Exception: Any exception from reflect/update (propagates to execute_task for retry)
590
+ """
591
+ bank_id = task_dict.get("bank_id")
592
+ mental_model_id = task_dict.get("mental_model_id")
593
+
594
+ if not bank_id or not mental_model_id:
595
+ raise ValueError("bank_id and mental_model_id are required for refresh_mental_model task")
596
+
597
+ logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Starting for bank_id={bank_id}, mental_model_id={mental_model_id}")
598
+
599
+ from hindsight_api.models import RequestContext
600
+
601
+ internal_context = RequestContext(internal=True)
602
+
603
+ # Get the current mental model to get source_query
604
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=internal_context)
605
+ if not mental_model:
606
+ raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
607
+
608
+ source_query = mental_model["source_query"]
609
+
610
+ # Run reflect to generate new content, excluding the mental model being refreshed
611
+ reflect_result = await self.reflect_async(
612
+ bank_id=bank_id,
613
+ query=source_query,
614
+ request_context=internal_context,
615
+ exclude_mental_model_ids=[mental_model_id],
616
+ )
617
+
618
+ generated_content = reflect_result.text or "No content generated"
619
+
620
+ # Build reflect_response payload to store
621
+ reflect_response = {
622
+ "text": reflect_result.text,
623
+ "based_on": {
624
+ fact_type: [
625
+ {
626
+ "id": str(fact.id),
627
+ "text": fact.text,
628
+ "type": fact_type,
629
+ }
630
+ for fact in facts
631
+ ]
632
+ for fact_type, facts in reflect_result.based_on.items()
633
+ },
634
+ }
635
+
636
+ # Update the mental model with the generated content and reflect_response
637
+ await self.update_mental_model(
638
+ bank_id=bank_id,
639
+ mental_model_id=mental_model_id,
640
+ content=generated_content,
641
+ reflect_response=reflect_response,
642
+ request_context=internal_context,
643
+ )
644
+
645
+ logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Completed for bank_id={bank_id}, mental_model_id={mental_model_id}")
646
+
532
647
  async def execute_task(self, task_dict: dict[str, Any]):
533
648
  """
534
649
  Execute a task by routing it to the appropriate handler.
@@ -538,13 +653,18 @@ class MemoryEngine(MemoryEngineInterface):
538
653
 
539
654
  Args:
540
655
  task_dict: Task dictionary with 'type' key and other payload data
541
- Example: {'type': 'access_count_update', 'node_ids': [...]}
656
+ Example: {'type': 'batch_retain', 'bank_id': '...', 'contents': [...]}
542
657
  """
543
658
  task_type = task_dict.get("type")
544
659
  operation_id = task_dict.get("operation_id")
545
660
  retry_count = task_dict.get("retry_count", 0)
546
661
  max_retries = 3
547
662
 
663
+ # Set schema context for multi-tenant task execution
664
+ schema = task_dict.pop("_schema", None)
665
+ if schema:
666
+ _current_schema.set(schema)
667
+
548
668
  # Check if operation was cancelled (only for tasks with operation_id)
549
669
  if operation_id:
550
670
  try:
@@ -563,16 +683,12 @@ class MemoryEngine(MemoryEngineInterface):
563
683
  # Continue with processing if we can't check status
564
684
 
565
685
  try:
566
- if task_type == "access_count_update":
567
- await self._handle_access_count_update(task_dict)
568
- elif task_type == "reinforce_opinion":
569
- await self._handle_reinforce_opinion(task_dict)
570
- elif task_type == "form_opinion":
571
- await self._handle_form_opinion(task_dict)
572
- elif task_type == "batch_retain":
686
+ if task_type == "batch_retain":
573
687
  await self._handle_batch_retain(task_dict)
574
- elif task_type == "regenerate_observations":
575
- await self._handle_regenerate_observations(task_dict)
688
+ elif task_type == "consolidation":
689
+ await self._handle_consolidation(task_dict)
690
+ elif task_type == "refresh_mental_model":
691
+ await self._handle_refresh_mental_model(task_dict)
576
692
  else:
577
693
  logger.error(f"Unknown task type: {task_type}")
578
694
  # Don't retry unknown task types
@@ -580,9 +696,9 @@ class MemoryEngine(MemoryEngineInterface):
580
696
  await self._delete_operation_record(operation_id)
581
697
  return
582
698
 
583
- # Task succeeded - delete operation record
699
+ # Task succeeded - mark operation as completed
584
700
  if operation_id:
585
- await self._delete_operation_record(operation_id)
701
+ await self._mark_operation_completed(operation_id)
586
702
 
587
703
  except Exception as e:
588
704
  # Task failed - check if we should retry
@@ -628,7 +744,7 @@ class MemoryEngine(MemoryEngineInterface):
628
744
  await conn.execute(
629
745
  f"""
630
746
  UPDATE {fq_table("async_operations")}
631
- SET status = 'failed', error_message = $2
747
+ SET status = 'failed', error_message = $2, updated_at = NOW()
632
748
  WHERE operation_id = $1
633
749
  """,
634
750
  uuid.UUID(operation_id),
@@ -638,6 +754,23 @@ class MemoryEngine(MemoryEngineInterface):
638
754
  except Exception as e:
639
755
  logger.error(f"Failed to mark operation as failed {operation_id}: {e}")
640
756
 
757
+ async def _mark_operation_completed(self, operation_id: str):
758
+ """Helper to mark an operation as completed in the database."""
759
+ try:
760
+ pool = await self._get_pool()
761
+ async with acquire_with_retry(pool) as conn:
762
+ await conn.execute(
763
+ f"""
764
+ UPDATE {fq_table("async_operations")}
765
+ SET status = 'completed', updated_at = NOW(), completed_at = NOW()
766
+ WHERE operation_id = $1
767
+ """,
768
+ uuid.UUID(operation_id),
769
+ )
770
+ logger.info(f"Marked async operation as completed: {operation_id}")
771
+ except Exception as e:
772
+ logger.error(f"Failed to mark operation as completed {operation_id}: {e}")
773
+
641
774
  async def initialize(self):
642
775
  """Initialize the connection pool, models, and background workers.
643
776
 
@@ -710,6 +843,23 @@ class MemoryEngine(MemoryEngineInterface):
710
843
  )
711
844
  if reflect_is_different:
712
845
  await self._reflect_llm_config.verify_connection()
846
+ # Verify consolidation config if different from all others
847
+ consolidation_is_different = (
848
+ (
849
+ self._consolidation_llm_config.provider != self._llm_config.provider
850
+ or self._consolidation_llm_config.model != self._llm_config.model
851
+ )
852
+ and (
853
+ self._consolidation_llm_config.provider != self._retain_llm_config.provider
854
+ or self._consolidation_llm_config.model != self._retain_llm_config.model
855
+ )
856
+ and (
857
+ self._consolidation_llm_config.provider != self._reflect_llm_config.provider
858
+ or self._consolidation_llm_config.model != self._reflect_llm_config.model
859
+ )
860
+ )
861
+ if consolidation_is_different:
862
+ await self._consolidation_llm_config.verify_connection()
713
863
 
714
864
  # Build list of initialization tasks
715
865
  init_tasks = [
@@ -736,11 +886,12 @@ class MemoryEngine(MemoryEngineInterface):
736
886
  if not self.db_url:
737
887
  raise ValueError("Database URL is required for migrations")
738
888
  logger.info("Running database migrations...")
739
- run_migrations(self.db_url)
889
+ # Use configured database schema for migrations (defaults to "public")
890
+ run_migrations(self.db_url, schema=get_config().database_schema)
740
891
 
741
892
  # Ensure embedding column dimension matches the model's dimension
742
893
  # This is done after migrations and after embeddings.initialize()
743
- ensure_embedding_dimension(self.db_url, self.embeddings.dimension)
894
+ ensure_embedding_dimension(self.db_url, self.embeddings.dimension, schema=get_config().database_schema)
744
895
 
745
896
  logger.info(f"Connecting to PostgreSQL at {self.db_url}")
746
897
 
@@ -836,8 +987,7 @@ class MemoryEngine(MemoryEngineInterface):
836
987
  """
837
988
  Wait for all pending background tasks to complete.
838
989
 
839
- This is useful in tests to ensure background tasks (like opinion reinforcement)
840
- complete before making assertions.
990
+ This is useful in tests to ensure background tasks complete before making assertions.
841
991
  """
842
992
  if hasattr(self._task_backend, "wait_for_pending_tasks"):
843
993
  await self._task_backend.wait_for_pending_tasks()
@@ -1178,7 +1328,7 @@ class MemoryEngine(MemoryEngineInterface):
1178
1328
 
1179
1329
  logger.info(f"Split into {len(sub_batches)} sub-batches: {[len(b) for b in sub_batches]} items each")
1180
1330
 
1181
- # Process each sub-batch using internal method (skip chunking check)
1331
+ # Process each sub-batch
1182
1332
  all_results = []
1183
1333
  for i, sub_batch in enumerate(sub_batches, 1):
1184
1334
  sub_batch_chars = sum(len(item.get("content", "")) for item in sub_batch)
@@ -1235,6 +1385,17 @@ class MemoryEngine(MemoryEngineInterface):
1235
1385
  except Exception as e:
1236
1386
  logger.warning(f"Post-retain hook error (non-fatal): {e}")
1237
1387
 
1388
+ # Trigger consolidation as a tracked async operation if enabled
1389
+ from ..config import get_config
1390
+
1391
+ config = get_config()
1392
+ if config.enable_observations:
1393
+ try:
1394
+ await self.submit_async_consolidation(bank_id=bank_id, request_context=request_context)
1395
+ except Exception as e:
1396
+ # Log but don't fail the retain - consolidation is non-critical
1397
+ logger.warning(f"Failed to submit consolidation task for bank {bank_id}: {e}")
1398
+
1238
1399
  if return_usage:
1239
1400
  return result, total_usage
1240
1401
  return result
@@ -1280,7 +1441,6 @@ class MemoryEngine(MemoryEngineInterface):
1280
1441
  embeddings_model=self.embeddings,
1281
1442
  llm_config=self._retain_llm_config,
1282
1443
  entity_resolver=self.entity_resolver,
1283
- task_backend=self._task_backend,
1284
1444
  format_date_fn=self._format_readable_date,
1285
1445
  duplicate_checker_fn=self._find_duplicate_facts_batch,
1286
1446
  bank_id=bank_id,
@@ -1350,6 +1510,8 @@ class MemoryEngine(MemoryEngineInterface):
1350
1510
  request_context: "RequestContext",
1351
1511
  tags: list[str] | None = None,
1352
1512
  tags_match: TagsMatch = "any",
1513
+ _connection_budget: int | None = None,
1514
+ _quiet: bool = False,
1353
1515
  ) -> RecallResultModel:
1354
1516
  """
1355
1517
  Recall memories using N*4-way parallel retrieval (N fact types × 4 retrieval methods).
@@ -1400,6 +1562,12 @@ class MemoryEngine(MemoryEngineInterface):
1400
1562
  f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
1401
1563
  )
1402
1564
 
1565
+ # Filter out 'opinion' - opinions are no longer returned from recall
1566
+ fact_type = [ft for ft in fact_type if ft != "opinion"]
1567
+ if not fact_type:
1568
+ # All requested types were opinions - return empty result
1569
+ return RecallResultModel(results=[], entities={}, chunks={})
1570
+
1403
1571
  # Validate operation if validator is configured
1404
1572
  if self._operation_validator:
1405
1573
  from hindsight_api.extensions import RecallContext
@@ -1425,6 +1593,11 @@ class MemoryEngine(MemoryEngineInterface):
1425
1593
  effective_budget = budget if budget is not None else Budget.MID
1426
1594
  thinking_budget = budget_mapping[effective_budget]
1427
1595
 
1596
+ # Log recall start with tags if present (skip if quiet mode for internal operations)
1597
+ if not _quiet:
1598
+ tags_info = f", tags={tags} ({tags_match})" if tags else ""
1599
+ logger.info(f"[RECALL {bank_id[:8]}] Starting recall for query: {query[:50]}...{tags_info}")
1600
+
1428
1601
  # Backpressure: limit concurrent recalls to prevent overwhelming the database
1429
1602
  result = None
1430
1603
  error_msg = None
@@ -1451,6 +1624,8 @@ class MemoryEngine(MemoryEngineInterface):
1451
1624
  semaphore_wait=semaphore_wait,
1452
1625
  tags=tags,
1453
1626
  tags_match=tags_match,
1627
+ connection_budget=_connection_budget,
1628
+ quiet=_quiet,
1454
1629
  )
1455
1630
  break # Success - exit retry loop
1456
1631
  except Exception as e:
@@ -1571,6 +1746,8 @@ class MemoryEngine(MemoryEngineInterface):
1571
1746
  semaphore_wait: float = 0.0,
1572
1747
  tags: list[str] | None = None,
1573
1748
  tags_match: TagsMatch = "any",
1749
+ connection_budget: int | None = None,
1750
+ quiet: bool = False,
1574
1751
  ) -> RecallResultModel:
1575
1752
  """
1576
1753
  Search implementation with modular retrieval and reranking.
@@ -1645,8 +1822,11 @@ class MemoryEngine(MemoryEngineInterface):
1645
1822
 
1646
1823
  # Run optimized retrieval with connection budget
1647
1824
  config = get_config()
1825
+ effective_connection_budget = (
1826
+ connection_budget if connection_budget is not None else config.recall_connection_budget
1827
+ )
1648
1828
  async with budgeted_operation(
1649
- max_connections=config.recall_connection_budget,
1829
+ max_connections=effective_connection_budget,
1650
1830
  operation_id=f"recall-{recall_id}",
1651
1831
  ) as op:
1652
1832
  budgeted_pool = op.wrap_pool(pool)
@@ -1702,8 +1882,6 @@ class MemoryEngine(MemoryEngineInterface):
1702
1882
  # Capture temporal constraint (same across all fact types)
1703
1883
  if retrieval_result.temporal_constraint:
1704
1884
  detected_temporal_constraint = retrieval_result.temporal_constraint
1705
- # Collect MPFP timings
1706
- all_mpfp_timings.extend(retrieval_result.mpfp_timings)
1707
1885
 
1708
1886
  # If no temporal results from any fact type, set to None
1709
1887
  if not temporal_results:
@@ -1722,7 +1900,8 @@ class MemoryEngine(MemoryEngineInterface):
1722
1900
  retrieval_duration = time.time() - retrieval_start
1723
1901
 
1724
1902
  step_duration = time.time() - step_start
1725
- # Format per-method timings (these are the actual parallel retrieval times)
1903
+ total_retrievals = len(fact_type) * (4 if temporal_results else 3)
1904
+ # Format per-method timings
1726
1905
  timing_parts = [
1727
1906
  f"semantic={len(semantic_results)}({aggregated_timings['semantic']:.3f}s)",
1728
1907
  f"bm25={len(bm25_results)}({aggregated_timings['bm25']:.3f}s)",
@@ -1945,6 +2124,9 @@ class MemoryEngine(MemoryEngineInterface):
1945
2124
 
1946
2125
  # Re-sort by combined score
1947
2126
  scored_results.sort(key=lambda x: x.weight, reverse=True)
2127
+ log_buffer.append(
2128
+ " [4.6] Combined scoring: cross_encoder(0.6) + rrf(0.2) + temporal(0.1) + recency(0.1)"
2129
+ )
1948
2130
 
1949
2131
  # Add reranked results to tracer AFTER combined scoring (so normalized values are included)
1950
2132
  if tracer:
@@ -1963,6 +2145,7 @@ class MemoryEngine(MemoryEngineInterface):
1963
2145
  # Step 5: Truncate to thinking_budget * 2 for token filtering
1964
2146
  rerank_limit = thinking_budget * 2
1965
2147
  top_scored = scored_results[:rerank_limit]
2148
+ log_buffer.append(f" [5] Truncated to top {len(top_scored)} results")
1966
2149
 
1967
2150
  # Step 6: Token budget filtering
1968
2151
  step_start = time.time()
@@ -1977,7 +2160,7 @@ class MemoryEngine(MemoryEngineInterface):
1977
2160
 
1978
2161
  step_duration = time.time() - step_start
1979
2162
  log_buffer.append(
1980
- f" [5] Token filtering: {len(top_scored)} results, {total_tokens}/{max_tokens} tokens in {step_duration:.3f}s"
2163
+ f" [6] Token filtering: {len(top_scored)} results, {total_tokens}/{max_tokens} tokens in {step_duration:.3f}s"
1981
2164
  )
1982
2165
 
1983
2166
  if tracer:
@@ -1995,7 +2178,6 @@ class MemoryEngine(MemoryEngineInterface):
1995
2178
  text=sr.retrieval.text,
1996
2179
  context=sr.retrieval.context or "",
1997
2180
  event_date=sr.retrieval.occurred_start,
1998
- access_count=sr.retrieval.access_count,
1999
2181
  is_entry_point=(sr.id in [ep.node_id for ep in tracer.entry_points]),
2000
2182
  parent_node_id=None, # In parallel retrieval, there's no clear parent
2001
2183
  link_type=None,
@@ -2007,11 +2189,6 @@ class MemoryEngine(MemoryEngineInterface):
2007
2189
  final_weight=sr.weight,
2008
2190
  )
2009
2191
 
2010
- # Step 8: Queue access count updates for visited nodes
2011
- visited_ids = list(set([sr.id for sr in scored_results[:50]])) # Top 50
2012
- if visited_ids:
2013
- await self._task_backend.submit_task({"type": "access_count_update", "node_ids": visited_ids})
2014
-
2015
2192
  # Log fact_type distribution in results
2016
2193
  fact_type_counts = {}
2017
2194
  for sr in top_scored:
@@ -2043,7 +2220,6 @@ class MemoryEngine(MemoryEngineInterface):
2043
2220
  top_results_dicts.append(result_dict)
2044
2221
 
2045
2222
  # Get entities for each fact if include_entities is requested
2046
- step_start = time.time()
2047
2223
  fact_entity_map = {} # unit_id -> list of (entity_id, entity_name)
2048
2224
  if include_entities and top_scored:
2049
2225
  unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
@@ -2065,7 +2241,6 @@ class MemoryEngine(MemoryEngineInterface):
2065
2241
  fact_entity_map[unit_id].append(
2066
2242
  {"entity_id": str(row["entity_id"]), "canonical_name": row["canonical_name"]}
2067
2243
  )
2068
- entity_map_duration = time.time() - step_start
2069
2244
 
2070
2245
  # Convert results to MemoryFact objects
2071
2246
  memory_facts = []
@@ -2093,7 +2268,6 @@ class MemoryEngine(MemoryEngineInterface):
2093
2268
  )
2094
2269
 
2095
2270
  # Fetch entity observations if requested
2096
- step_start = time.time()
2097
2271
  entities_dict = None
2098
2272
  total_entity_tokens = 0
2099
2273
  total_chunk_tokens = 0
@@ -2114,42 +2288,16 @@ class MemoryEngine(MemoryEngineInterface):
2114
2288
  entities_ordered.append((entity_id, entity_name))
2115
2289
  seen_entity_ids.add(entity_id)
2116
2290
 
2117
- # Fetch all observations in a single batched query
2118
- entity_ids = [eid for eid, _ in entities_ordered]
2119
- all_observations = await self.get_entity_observations_batch(
2120
- bank_id, entity_ids, limit_per_entity=5, request_context=request_context
2121
- )
2122
-
2123
- # Build entities_dict respecting token budget, in relevance order
2291
+ # Return entities with empty observations (summaries now live in mental models)
2124
2292
  entities_dict = {}
2125
- encoding = _get_tiktoken_encoding()
2126
-
2127
2293
  for entity_id, entity_name in entities_ordered:
2128
- if total_entity_tokens >= max_entity_tokens:
2129
- break
2130
-
2131
- observations = all_observations.get(entity_id, [])
2132
-
2133
- # Calculate tokens for this entity's observations
2134
- entity_tokens = 0
2135
- included_observations = []
2136
- for obs in observations:
2137
- obs_tokens = len(encoding.encode(obs.text))
2138
- if total_entity_tokens + entity_tokens + obs_tokens <= max_entity_tokens:
2139
- included_observations.append(obs)
2140
- entity_tokens += obs_tokens
2141
- else:
2142
- break
2143
-
2144
- if included_observations:
2145
- entities_dict[entity_name] = EntityState(
2146
- entity_id=entity_id, canonical_name=entity_name, observations=included_observations
2147
- )
2148
- total_entity_tokens += entity_tokens
2149
- entity_obs_duration = time.time() - step_start
2294
+ entities_dict[entity_name] = EntityState(
2295
+ entity_id=entity_id,
2296
+ canonical_name=entity_name,
2297
+ observations=[], # Mental models provide this now
2298
+ )
2150
2299
 
2151
2300
  # Fetch chunks if requested
2152
- step_start = time.time()
2153
2301
  chunks_dict = None
2154
2302
  if include_chunks and top_scored:
2155
2303
  from .response_models import ChunkInfo
@@ -2209,12 +2357,6 @@ class MemoryEngine(MemoryEngineInterface):
2209
2357
  chunk_text=chunk_text, chunk_index=row["chunk_index"], truncated=False
2210
2358
  )
2211
2359
  total_chunk_tokens += chunk_tokens
2212
- chunks_duration = time.time() - step_start
2213
-
2214
- # Log entity/chunk fetch timing (only if any enrichment was requested)
2215
- log_buffer.append(
2216
- f" [6] Response enrichment: entity_map={entity_map_duration:.3f}s, entity_obs={entity_obs_duration:.3f}s, chunks={chunks_duration:.3f}s"
2217
- )
2218
2360
 
2219
2361
  # Finalize trace if enabled
2220
2362
  trace_dict = None
@@ -2236,13 +2378,15 @@ class MemoryEngine(MemoryEngineInterface):
2236
2378
  log_buffer.append(
2237
2379
  f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
2238
2380
  )
2239
- logger.info("\n" + "\n".join(log_buffer))
2381
+ if not quiet:
2382
+ logger.info("\n" + "\n".join(log_buffer))
2240
2383
 
2241
2384
  return RecallResultModel(results=memory_facts, trace=trace_dict, entities=entities_dict, chunks=chunks_dict)
2242
2385
 
2243
2386
  except Exception as e:
2244
2387
  log_buffer.append(f"[RECALL {recall_id}] ERROR after {time.time() - recall_start:.3f}s: {str(e)}")
2245
- logger.error("\n" + "\n".join(log_buffer))
2388
+ if not quiet:
2389
+ logger.error("\n" + "\n".join(log_buffer))
2246
2390
  raise Exception(f"Failed to search memories: {str(e)}")
2247
2391
 
2248
2392
  def _filter_by_token_budget(
@@ -2350,10 +2494,12 @@ class MemoryEngine(MemoryEngineInterface):
2350
2494
  pool = await self._get_pool()
2351
2495
  async with acquire_with_retry(pool) as conn:
2352
2496
  async with conn.transaction():
2353
- # Count units before deletion
2354
- units_count = await conn.fetchval(
2355
- f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
2497
+ # Get memory unit IDs before deletion (for mental model invalidation)
2498
+ unit_rows = await conn.fetch(
2499
+ f"SELECT id FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
2356
2500
  )
2501
+ unit_ids = [str(row["id"]) for row in unit_rows]
2502
+ units_count = len(unit_ids)
2357
2503
 
2358
2504
  # Delete document (cascades to memory_units and all their links)
2359
2505
  deleted = await conn.fetchval(
@@ -2362,6 +2508,10 @@ class MemoryEngine(MemoryEngineInterface):
2362
2508
  bank_id,
2363
2509
  )
2364
2510
 
2511
+ # Invalidate deleted fact IDs from mental models
2512
+ if deleted and unit_ids:
2513
+ await self._invalidate_facts_from_mental_models(conn, bank_id, unit_ids)
2514
+
2365
2515
  return {"document_deleted": 1 if deleted else 0, "memory_units_deleted": units_count if deleted else 0}
2366
2516
 
2367
2517
  async def delete_memory_unit(
@@ -2389,11 +2539,18 @@ class MemoryEngine(MemoryEngineInterface):
2389
2539
  pool = await self._get_pool()
2390
2540
  async with acquire_with_retry(pool) as conn:
2391
2541
  async with conn.transaction():
2542
+ # Get bank_id before deletion (for mental model invalidation)
2543
+ bank_id = await conn.fetchval(f"SELECT bank_id FROM {fq_table('memory_units')} WHERE id = $1", unit_id)
2544
+
2392
2545
  # Delete the memory unit (cascades to links and associations)
2393
2546
  deleted = await conn.fetchval(
2394
2547
  f"DELETE FROM {fq_table('memory_units')} WHERE id = $1 RETURNING id", unit_id
2395
2548
  )
2396
2549
 
2550
+ # Invalidate deleted fact ID from mental models
2551
+ if deleted and bank_id:
2552
+ await self._invalidate_facts_from_mental_models(conn, bank_id, [str(deleted)])
2553
+
2397
2554
  return {
2398
2555
  "success": deleted is not None,
2399
2556
  "unit_id": str(deleted) if deleted else None,
@@ -2431,10 +2588,9 @@ class MemoryEngine(MemoryEngineInterface):
2431
2588
  await self._authenticate_tenant(request_context)
2432
2589
  pool = await self._get_pool()
2433
2590
  async with acquire_with_retry(pool) as conn:
2591
+ # Ensure connection is not in read-only mode (can happen with connection poolers)
2592
+ await conn.execute("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE")
2434
2593
  async with conn.transaction():
2435
- # Ensure transaction is not in read-only mode (can happen with connection poolers)
2436
- # Using SET LOCAL so it only affects this transaction, not the session
2437
- await conn.execute("SET LOCAL transaction_read_only TO off")
2438
2594
  try:
2439
2595
  if fact_type:
2440
2596
  # Delete only memories of a specific fact type
@@ -2486,6 +2642,79 @@ class MemoryEngine(MemoryEngineInterface):
2486
2642
  except Exception as e:
2487
2643
  raise Exception(f"Failed to delete agent data: {str(e)}")
2488
2644
 
2645
+ async def clear_observations(
2646
+ self,
2647
+ bank_id: str,
2648
+ *,
2649
+ request_context: "RequestContext",
2650
+ ) -> dict[str, int]:
2651
+ """
2652
+ Clear all observations for a bank (consolidated knowledge).
2653
+
2654
+ Args:
2655
+ bank_id: Bank ID to clear observations for
2656
+ request_context: Request context for authentication.
2657
+
2658
+ Returns:
2659
+ Dictionary with count of deleted observations
2660
+ """
2661
+ await self._authenticate_tenant(request_context)
2662
+ pool = await self._get_pool()
2663
+ async with acquire_with_retry(pool) as conn:
2664
+ async with conn.transaction():
2665
+ # Count observations before deletion
2666
+ count = await conn.fetchval(
2667
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
2668
+ bank_id,
2669
+ )
2670
+
2671
+ # Delete all observations
2672
+ await conn.execute(
2673
+ f"DELETE FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
2674
+ bank_id,
2675
+ )
2676
+
2677
+ # Reset consolidation timestamp
2678
+ await conn.execute(
2679
+ f"UPDATE {fq_table('banks')} SET last_consolidated_at = NULL WHERE bank_id = $1",
2680
+ bank_id,
2681
+ )
2682
+
2683
+ return {"deleted_count": count or 0}
2684
+
2685
+ async def run_consolidation(
2686
+ self,
2687
+ bank_id: str,
2688
+ *,
2689
+ request_context: "RequestContext",
2690
+ ) -> dict[str, int]:
2691
+ """
2692
+ Run memory consolidation to create/update mental models.
2693
+
2694
+ Args:
2695
+ bank_id: Bank ID to run consolidation for
2696
+ request_context: Request context for authentication.
2697
+
2698
+ Returns:
2699
+ Dictionary with consolidation stats
2700
+ """
2701
+ await self._authenticate_tenant(request_context)
2702
+
2703
+ from .consolidation import run_consolidation_job
2704
+
2705
+ result = await run_consolidation_job(
2706
+ memory_engine=self,
2707
+ bank_id=bank_id,
2708
+ request_context=request_context,
2709
+ )
2710
+
2711
+ return {
2712
+ "processed": result.get("processed", 0),
2713
+ "created": result.get("created", 0),
2714
+ "updated": result.get("updated", 0),
2715
+ "skipped": result.get("skipped", 0),
2716
+ }
2717
+
2489
2718
  async def get_graph_data(
2490
2719
  self,
2491
2720
  bank_id: str | None = None,
@@ -2541,7 +2770,7 @@ class MemoryEngine(MemoryEngineInterface):
2541
2770
  param_count += 1
2542
2771
  units = await conn.fetch(
2543
2772
  f"""
2544
- SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type
2773
+ SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type, tags, created_at, proof_count, source_memory_ids
2545
2774
  FROM {fq_table("memory_units")}
2546
2775
  {where_clause}
2547
2776
  ORDER BY mentioned_at DESC NULLS LAST, event_date DESC
@@ -2554,7 +2783,18 @@ class MemoryEngine(MemoryEngineInterface):
2554
2783
  # Get links, filtering to only include links between units of the selected agent
2555
2784
  # Use DISTINCT ON with LEAST/GREATEST to deduplicate bidirectional links
2556
2785
  unit_ids = [row["id"] for row in units]
2557
- if unit_ids:
2786
+ unit_id_set = set(unit_ids)
2787
+
2788
+ # Collect source memory IDs from observations
2789
+ source_memory_ids = []
2790
+ for unit in units:
2791
+ if unit["source_memory_ids"]:
2792
+ source_memory_ids.extend(unit["source_memory_ids"])
2793
+ source_memory_ids = list(set(source_memory_ids)) # Deduplicate
2794
+
2795
+ # Fetch links involving both visible units AND source memories
2796
+ all_relevant_ids = unit_ids + source_memory_ids
2797
+ if all_relevant_ids:
2558
2798
  links = await conn.fetch(
2559
2799
  f"""
2560
2800
  SELECT DISTINCT ON (LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid))
@@ -2565,14 +2805,69 @@ class MemoryEngine(MemoryEngineInterface):
2565
2805
  e.canonical_name as entity_name
2566
2806
  FROM {fq_table("memory_links")} ml
2567
2807
  LEFT JOIN {fq_table("entities")} e ON ml.entity_id = e.id
2568
- WHERE ml.from_unit_id = ANY($1::uuid[]) AND ml.to_unit_id = ANY($1::uuid[])
2808
+ WHERE ml.from_unit_id = ANY($1::uuid[]) OR ml.to_unit_id = ANY($1::uuid[])
2569
2809
  ORDER BY LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid), ml.weight DESC
2570
2810
  """,
2571
- unit_ids,
2811
+ all_relevant_ids,
2572
2812
  )
2573
2813
  else:
2574
2814
  links = []
2575
2815
 
2816
+ # Copy links from source memories to observations
2817
+ # Observations inherit links from their source memories via source_memory_ids
2818
+ # Build a map from source_id to observation_ids
2819
+ source_to_observations = {}
2820
+ for unit in units:
2821
+ if unit["source_memory_ids"]:
2822
+ for source_id in unit["source_memory_ids"]:
2823
+ if source_id not in source_to_observations:
2824
+ source_to_observations[source_id] = []
2825
+ source_to_observations[source_id].append(unit["id"])
2826
+
2827
+ copied_links = []
2828
+ for link in links:
2829
+ from_id = link["from_unit_id"]
2830
+ to_id = link["to_unit_id"]
2831
+
2832
+ # Get observations that should inherit this link
2833
+ from_observations = source_to_observations.get(from_id, [])
2834
+ to_observations = source_to_observations.get(to_id, [])
2835
+
2836
+ # If from_id is a source memory, copy links to its observations
2837
+ if from_observations:
2838
+ for obs_id in from_observations:
2839
+ # Only include if the target is visible
2840
+ if to_id in unit_id_set or to_observations:
2841
+ target = to_observations[0] if to_observations and to_id not in unit_id_set else to_id
2842
+ if target in unit_id_set:
2843
+ copied_links.append(
2844
+ {
2845
+ "from_unit_id": obs_id,
2846
+ "to_unit_id": target,
2847
+ "link_type": link["link_type"],
2848
+ "weight": link["weight"],
2849
+ "entity_name": link["entity_name"],
2850
+ }
2851
+ )
2852
+
2853
+ # If to_id is a source memory, copy links to its observations
2854
+ if to_observations and from_id in unit_id_set:
2855
+ for obs_id in to_observations:
2856
+ copied_links.append(
2857
+ {
2858
+ "from_unit_id": from_id,
2859
+ "to_unit_id": obs_id,
2860
+ "link_type": link["link_type"],
2861
+ "weight": link["weight"],
2862
+ "entity_name": link["entity_name"],
2863
+ }
2864
+ )
2865
+
2866
+ # Keep only direct links between visible nodes
2867
+ direct_links = [
2868
+ link for link in links if link["from_unit_id"] in unit_id_set and link["to_unit_id"] in unit_id_set
2869
+ ]
2870
+
2576
2871
  # Get entity information
2577
2872
  unit_entities = await conn.fetch(f"""
2578
2873
  SELECT ue.unit_id, e.canonical_name
@@ -2590,6 +2885,18 @@ class MemoryEngine(MemoryEngineInterface):
2590
2885
  entity_map[unit_id] = []
2591
2886
  entity_map[unit_id].append(entity_name)
2592
2887
 
2888
+ # For observations, inherit entities from source memories
2889
+ for unit in units:
2890
+ if unit["source_memory_ids"] and unit["id"] not in entity_map:
2891
+ # Collect entities from all source memories
2892
+ source_entities = []
2893
+ for source_id in unit["source_memory_ids"]:
2894
+ if source_id in entity_map:
2895
+ source_entities.extend(entity_map[source_id])
2896
+ if source_entities:
2897
+ # Deduplicate while preserving order
2898
+ entity_map[unit["id"]] = list(dict.fromkeys(source_entities))
2899
+
2593
2900
  # Build nodes
2594
2901
  nodes = []
2595
2902
  for row in units:
@@ -2623,14 +2930,15 @@ class MemoryEngine(MemoryEngineInterface):
2623
2930
  }
2624
2931
  )
2625
2932
 
2626
- # Build edges
2933
+ # Build edges (combine direct links and copied links from sources)
2627
2934
  edges = []
2628
- for row in links:
2935
+ all_links = direct_links + copied_links
2936
+ for row in all_links:
2629
2937
  from_id = str(row["from_unit_id"])
2630
2938
  to_id = str(row["to_unit_id"])
2631
2939
  link_type = row["link_type"]
2632
2940
  weight = row["weight"]
2633
- entity_name = row["entity_name"]
2941
+ entity_name = row.get("entity_name")
2634
2942
 
2635
2943
  # Color by link type
2636
2944
  if link_type == "temporal":
@@ -2682,6 +2990,9 @@ class MemoryEngine(MemoryEngineInterface):
2682
2990
  "document_id": row["document_id"],
2683
2991
  "chunk_id": row["chunk_id"] if row["chunk_id"] else None,
2684
2992
  "fact_type": row["fact_type"],
2993
+ "tags": list(row["tags"]) if row["tags"] else [],
2994
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
2995
+ "proof_count": row["proof_count"] if row["proof_count"] else None,
2685
2996
  }
2686
2997
  )
2687
2998
 
@@ -2834,11 +3145,11 @@ class MemoryEngine(MemoryEngineInterface):
2834
3145
  await self._authenticate_tenant(request_context)
2835
3146
  pool = await self._get_pool()
2836
3147
  async with acquire_with_retry(pool) as conn:
2837
- # Get the memory unit
3148
+ # Get the memory unit (include source_memory_ids for mental models)
2838
3149
  row = await conn.fetchrow(
2839
3150
  f"""
2840
3151
  SELECT id, text, context, event_date, occurred_start, occurred_end,
2841
- mentioned_at, fact_type, document_id, chunk_id, tags
3152
+ mentioned_at, fact_type, document_id, chunk_id, tags, source_memory_ids
2842
3153
  FROM {fq_table("memory_units")}
2843
3154
  WHERE id = $1 AND bank_id = $2
2844
3155
  """,
@@ -2861,7 +3172,7 @@ class MemoryEngine(MemoryEngineInterface):
2861
3172
  )
2862
3173
  entities = [r["canonical_name"] for r in entities_rows]
2863
3174
 
2864
- return {
3175
+ result = {
2865
3176
  "id": str(row["id"]),
2866
3177
  "text": row["text"],
2867
3178
  "context": row["context"] if row["context"] else "",
@@ -2876,6 +3187,35 @@ class MemoryEngine(MemoryEngineInterface):
2876
3187
  "tags": row["tags"] if row["tags"] else [],
2877
3188
  }
2878
3189
 
3190
+ # For observations, include source_memory_ids and fetch source_memories
3191
+ if row["fact_type"] == "observation" and row["source_memory_ids"]:
3192
+ source_ids = row["source_memory_ids"]
3193
+ result["source_memory_ids"] = [str(sid) for sid in source_ids]
3194
+
3195
+ # Fetch source memories
3196
+ source_rows = await conn.fetch(
3197
+ f"""
3198
+ SELECT id, text, fact_type, context, occurred_start, mentioned_at
3199
+ FROM {fq_table("memory_units")}
3200
+ WHERE id = ANY($1::uuid[])
3201
+ ORDER BY mentioned_at DESC NULLS LAST
3202
+ """,
3203
+ source_ids,
3204
+ )
3205
+ result["source_memories"] = [
3206
+ {
3207
+ "id": str(r["id"]),
3208
+ "text": r["text"],
3209
+ "type": r["fact_type"],
3210
+ "context": r["context"],
3211
+ "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
3212
+ "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
3213
+ }
3214
+ for r in source_rows
3215
+ ]
3216
+
3217
+ return result
3218
+
2879
3219
  async def list_documents(
2880
3220
  self,
2881
3221
  bank_id: str,
@@ -3052,322 +3392,100 @@ class MemoryEngine(MemoryEngineInterface):
3052
3392
  "created_at": chunk["created_at"].isoformat() if chunk["created_at"] else "",
3053
3393
  }
3054
3394
 
3055
- async def _evaluate_opinion_update_async(
3395
+ # ==================== bank profile Methods ====================
3396
+
3397
+ async def get_bank_profile(
3056
3398
  self,
3057
- opinion_text: str,
3058
- opinion_confidence: float,
3059
- new_event_text: str,
3060
- entity_name: str,
3061
- ) -> dict[str, Any] | None:
3399
+ bank_id: str,
3400
+ *,
3401
+ request_context: "RequestContext",
3402
+ ) -> dict[str, Any]:
3062
3403
  """
3063
- Evaluate if an opinion should be updated based on a new event.
3404
+ Get bank profile (name, disposition + mission).
3405
+ Auto-creates agent with default values if not exists.
3064
3406
 
3065
3407
  Args:
3066
- opinion_text: Current opinion text (includes reasons)
3067
- opinion_confidence: Current confidence score (0.0-1.0)
3068
- new_event_text: Text of the new event
3069
- entity_name: Name of the entity this opinion is about
3408
+ bank_id: bank IDentifier
3409
+ request_context: Request context for authentication.
3070
3410
 
3071
3411
  Returns:
3072
- Dict with 'action' ('keep'|'update'), 'new_confidence', 'new_text' (if action=='update')
3073
- or None if no changes needed
3412
+ Dict with name, disposition traits, and mission
3074
3413
  """
3414
+ await self._authenticate_tenant(request_context)
3415
+ pool = await self._get_pool()
3416
+ profile = await bank_utils.get_bank_profile(pool, bank_id)
3417
+ disposition = profile["disposition"]
3418
+ return {
3419
+ "bank_id": bank_id,
3420
+ "name": profile["name"],
3421
+ "disposition": disposition,
3422
+ "mission": profile["mission"],
3423
+ }
3075
3424
 
3076
- class OpinionEvaluation(BaseModel):
3077
- """Evaluation of whether an opinion should be updated."""
3078
-
3079
- action: str = Field(description="Action to take: 'keep' (no change) or 'update' (modify opinion)")
3080
- reasoning: str = Field(description="Brief explanation of why this action was chosen")
3081
- new_confidence: float = Field(
3082
- description="New confidence score (0.0-1.0). Can be higher, lower, or same as before."
3083
- )
3084
- new_opinion_text: str | None = Field(
3085
- default=None,
3086
- description="If action is 'update', the revised opinion text that acknowledges the previous view. Otherwise None.",
3087
- )
3088
-
3089
- evaluation_prompt = f"""You are evaluating whether an existing opinion should be updated based on new information.
3090
-
3091
- ENTITY: {entity_name}
3092
-
3093
- EXISTING OPINION:
3094
- {opinion_text}
3095
- Current confidence: {opinion_confidence:.2f}
3096
-
3097
- NEW EVENT:
3098
- {new_event_text}
3099
-
3100
- Evaluate whether this new event:
3101
- 1. REINFORCES the opinion (increase confidence, keep text)
3102
- 2. WEAKENS the opinion (decrease confidence, keep text)
3103
- 3. CHANGES the opinion (update both text and confidence, noting "Previously I thought X, but now Y...")
3104
- 4. IRRELEVANT (keep everything as is)
3105
-
3106
- Guidelines:
3107
- - Only suggest 'update' action if the new event genuinely contradicts or significantly modifies the opinion
3108
- - If updating the text, acknowledge the previous opinion and explain the change
3109
- - Confidence should reflect accumulated evidence (0.0 = no confidence, 1.0 = very confident)
3110
- - Small changes in confidence are normal; large jumps should be rare"""
3111
-
3112
- try:
3113
- result = await self._reflect_llm_config.call(
3114
- messages=[
3115
- {"role": "system", "content": "You evaluate and update opinions based on new information."},
3116
- {"role": "user", "content": evaluation_prompt},
3117
- ],
3118
- response_format=OpinionEvaluation,
3119
- scope="memory_evaluate_opinion",
3120
- temperature=0.3, # Lower temperature for more consistent evaluation
3121
- )
3122
-
3123
- # Only return updates if something actually changed
3124
- if result.action == "keep" and abs(result.new_confidence - opinion_confidence) < 0.01:
3125
- return None
3126
-
3127
- return {
3128
- "action": result.action,
3129
- "reasoning": result.reasoning,
3130
- "new_confidence": result.new_confidence,
3131
- "new_text": result.new_opinion_text if result.action == "update" else None,
3132
- }
3133
-
3134
- except Exception as e:
3135
- logger.warning(f"Failed to evaluate opinion update: {str(e)}")
3136
- return None
3137
-
3138
- async def _handle_form_opinion(self, task_dict: dict[str, Any]):
3425
+ async def update_bank_disposition(
3426
+ self,
3427
+ bank_id: str,
3428
+ disposition: dict[str, int],
3429
+ *,
3430
+ request_context: "RequestContext",
3431
+ ) -> None:
3139
3432
  """
3140
- Handler for form opinion tasks.
3433
+ Update bank disposition traits.
3141
3434
 
3142
3435
  Args:
3143
- task_dict: Dict with keys: 'bank_id', 'answer_text', 'query', 'tenant_id'
3436
+ bank_id: bank IDentifier
3437
+ disposition: Dict with skepticism, literalism, empathy (all 1-5)
3438
+ request_context: Request context for authentication.
3144
3439
  """
3145
- bank_id = task_dict["bank_id"]
3146
- answer_text = task_dict["answer_text"]
3147
- query = task_dict["query"]
3148
- tenant_id = task_dict.get("tenant_id")
3149
-
3150
- await self._extract_and_store_opinions_async(
3151
- bank_id=bank_id, answer_text=answer_text, query=query, tenant_id=tenant_id
3152
- )
3440
+ await self._authenticate_tenant(request_context)
3441
+ pool = await self._get_pool()
3442
+ await bank_utils.update_bank_disposition(pool, bank_id, disposition)
3153
3443
 
3154
- async def _handle_reinforce_opinion(self, task_dict: dict[str, Any]):
3444
+ async def set_bank_mission(
3445
+ self,
3446
+ bank_id: str,
3447
+ mission: str,
3448
+ *,
3449
+ request_context: "RequestContext",
3450
+ ) -> dict[str, Any]:
3155
3451
  """
3156
- Handler for reinforce opinion tasks.
3452
+ Set the mission for a bank.
3157
3453
 
3158
3454
  Args:
3159
- task_dict: Dict with keys: 'bank_id', 'created_unit_ids', 'unit_texts', 'unit_entities'
3455
+ bank_id: bank IDentifier
3456
+ mission: The mission text
3457
+ request_context: Request context for authentication.
3458
+
3459
+ Returns:
3460
+ Dict with bank_id and mission.
3160
3461
  """
3161
- bank_id = task_dict["bank_id"]
3162
- created_unit_ids = task_dict["created_unit_ids"]
3163
- unit_texts = task_dict["unit_texts"]
3164
- unit_entities = task_dict["unit_entities"]
3462
+ await self._authenticate_tenant(request_context)
3463
+ pool = await self._get_pool()
3464
+ await bank_utils.set_bank_mission(pool, bank_id, mission)
3465
+ return {"bank_id": bank_id, "mission": mission}
3165
3466
 
3166
- await self._reinforce_opinions_async(
3167
- bank_id=bank_id, created_unit_ids=created_unit_ids, unit_texts=unit_texts, unit_entities=unit_entities
3168
- )
3169
-
3170
- async def _reinforce_opinions_async(
3171
- self,
3172
- bank_id: str,
3173
- created_unit_ids: list[str],
3174
- unit_texts: list[str],
3175
- unit_entities: list[list[dict[str, str]]],
3176
- ):
3177
- """
3178
- Background task to reinforce opinions based on newly ingested events.
3179
-
3180
- This runs asynchronously and does not block the put operation.
3181
-
3182
- Args:
3183
- bank_id: bank ID
3184
- created_unit_ids: List of newly created memory unit IDs
3185
- unit_texts: Texts of the newly created units
3186
- unit_entities: Entities extracted from each unit
3187
- """
3188
- try:
3189
- # Extract all unique entity names from the new units
3190
- entity_names = set()
3191
- for entities_list in unit_entities:
3192
- for entity in entities_list:
3193
- # Handle both Entity objects and dicts
3194
- if hasattr(entity, "text"):
3195
- entity_names.add(entity.text)
3196
- elif isinstance(entity, dict):
3197
- entity_names.add(entity["text"])
3198
-
3199
- if not entity_names:
3200
- return
3201
-
3202
- pool = await self._get_pool()
3203
- async with acquire_with_retry(pool) as conn:
3204
- # Find all opinions related to these entities
3205
- opinions = await conn.fetch(
3206
- f"""
3207
- SELECT DISTINCT mu.id, mu.text, mu.confidence_score, e.canonical_name
3208
- FROM {fq_table("memory_units")} mu
3209
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3210
- JOIN {fq_table("entities")} e ON ue.entity_id = e.id
3211
- WHERE mu.bank_id = $1
3212
- AND mu.fact_type = 'opinion'
3213
- AND e.canonical_name = ANY($2::text[])
3214
- """,
3215
- bank_id,
3216
- list(entity_names),
3217
- )
3218
-
3219
- if not opinions:
3220
- return
3221
-
3222
- # Use cached LLM config
3223
- if self._reflect_llm_config is None:
3224
- logger.error("[REINFORCE] LLM config not available, skipping opinion reinforcement")
3225
- return
3226
-
3227
- # Evaluate each opinion against the new events
3228
- updates_to_apply = []
3229
- for opinion in opinions:
3230
- opinion_id = str(opinion["id"])
3231
- opinion_text = opinion["text"]
3232
- opinion_confidence = opinion["confidence_score"]
3233
- entity_name = opinion["canonical_name"]
3234
-
3235
- # Find all new events mentioning this entity
3236
- relevant_events = []
3237
- for unit_text, entities_list in zip(unit_texts, unit_entities):
3238
- if any(e["text"] == entity_name for e in entities_list):
3239
- relevant_events.append(unit_text)
3240
-
3241
- if not relevant_events:
3242
- continue
3243
-
3244
- # Combine all relevant events
3245
- combined_events = "\n".join(relevant_events)
3246
-
3247
- # Evaluate if opinion should be updated
3248
- evaluation = await self._evaluate_opinion_update_async(
3249
- opinion_text, opinion_confidence, combined_events, entity_name
3250
- )
3251
-
3252
- if evaluation:
3253
- updates_to_apply.append({"opinion_id": opinion_id, "evaluation": evaluation})
3254
-
3255
- # Apply all updates in a single transaction
3256
- if updates_to_apply:
3257
- async with conn.transaction():
3258
- for update in updates_to_apply:
3259
- opinion_id = update["opinion_id"]
3260
- evaluation = update["evaluation"]
3261
-
3262
- if evaluation["action"] == "update" and evaluation["new_text"]:
3263
- # Update both text and confidence
3264
- await conn.execute(
3265
- f"""
3266
- UPDATE {fq_table("memory_units")}
3267
- SET text = $1, confidence_score = $2, updated_at = NOW()
3268
- WHERE id = $3
3269
- """,
3270
- evaluation["new_text"],
3271
- evaluation["new_confidence"],
3272
- uuid.UUID(opinion_id),
3273
- )
3274
- else:
3275
- # Only update confidence
3276
- await conn.execute(
3277
- f"""
3278
- UPDATE {fq_table("memory_units")}
3279
- SET confidence_score = $1, updated_at = NOW()
3280
- WHERE id = $2
3281
- """,
3282
- evaluation["new_confidence"],
3283
- uuid.UUID(opinion_id),
3284
- )
3285
-
3286
- else:
3287
- pass # No opinions to update
3288
-
3289
- except Exception as e:
3290
- logger.error(f"[REINFORCE] Error during opinion reinforcement: {str(e)}")
3291
- import traceback
3292
-
3293
- traceback.print_exc()
3294
-
3295
- # ==================== bank profile Methods ====================
3296
-
3297
- async def get_bank_profile(
3298
- self,
3299
- bank_id: str,
3300
- *,
3301
- request_context: "RequestContext",
3302
- ) -> dict[str, Any]:
3303
- """
3304
- Get bank profile (name, disposition + background).
3305
- Auto-creates agent with default values if not exists.
3306
-
3307
- Args:
3308
- bank_id: bank IDentifier
3309
- request_context: Request context for authentication.
3310
-
3311
- Returns:
3312
- Dict with name, disposition traits, and background
3313
- """
3314
- await self._authenticate_tenant(request_context)
3315
- pool = await self._get_pool()
3316
- profile = await bank_utils.get_bank_profile(pool, bank_id)
3317
- disposition = profile["disposition"]
3318
- return {
3319
- "bank_id": bank_id,
3320
- "name": profile["name"],
3321
- "disposition": disposition,
3322
- "background": profile["background"],
3323
- }
3324
-
3325
- async def update_bank_disposition(
3326
- self,
3327
- bank_id: str,
3328
- disposition: dict[str, int],
3329
- *,
3330
- request_context: "RequestContext",
3331
- ) -> None:
3332
- """
3333
- Update bank disposition traits.
3334
-
3335
- Args:
3336
- bank_id: bank IDentifier
3337
- disposition: Dict with skepticism, literalism, empathy (all 1-5)
3338
- request_context: Request context for authentication.
3339
- """
3340
- await self._authenticate_tenant(request_context)
3341
- pool = await self._get_pool()
3342
- await bank_utils.update_bank_disposition(pool, bank_id, disposition)
3343
-
3344
- async def merge_bank_background(
3467
+ async def merge_bank_mission(
3345
3468
  self,
3346
3469
  bank_id: str,
3347
3470
  new_info: str,
3348
3471
  *,
3349
- update_disposition: bool = True,
3350
3472
  request_context: "RequestContext",
3351
3473
  ) -> dict[str, Any]:
3352
3474
  """
3353
- Merge new background information with existing background using LLM.
3475
+ Merge new mission information with existing mission using LLM.
3354
3476
  Normalizes to first person ("I") and resolves conflicts.
3355
- Optionally infers disposition traits from the merged background.
3356
3477
 
3357
3478
  Args:
3358
3479
  bank_id: bank IDentifier
3359
- new_info: New background information to add/merge
3360
- update_disposition: If True, infer Big Five traits from background (default: True)
3480
+ new_info: New mission information to add/merge
3361
3481
  request_context: Request context for authentication.
3362
3482
 
3363
3483
  Returns:
3364
- Dict with 'background' (str) and optionally 'disposition' (dict) keys
3484
+ Dict with 'mission' (str) key
3365
3485
  """
3366
3486
  await self._authenticate_tenant(request_context)
3367
3487
  pool = await self._get_pool()
3368
- return await bank_utils.merge_bank_background(
3369
- pool, self._reflect_llm_config, bank_id, new_info, update_disposition
3370
- )
3488
+ return await bank_utils.merge_bank_mission(pool, self._reflect_llm_config, bank_id, new_info)
3371
3489
 
3372
3490
  async def list_banks(
3373
3491
  self,
@@ -3381,7 +3499,7 @@ Guidelines:
3381
3499
  request_context: Request context for authentication.
3382
3500
 
3383
3501
  Returns:
3384
- List of dicts with bank_id, name, disposition, background, created_at, updated_at
3502
+ List of dicts with bank_id, name, disposition, mission, created_at, updated_at
3385
3503
  """
3386
3504
  await self._authenticate_tenant(request_context)
3387
3505
  pool = await self._get_pool()
@@ -3401,32 +3519,39 @@ Guidelines:
3401
3519
  request_context: "RequestContext",
3402
3520
  tags: list[str] | None = None,
3403
3521
  tags_match: TagsMatch = "any",
3522
+ exclude_mental_model_ids: list[str] | None = None,
3404
3523
  ) -> ReflectResult:
3405
3524
  """
3406
- Reflect and formulate an answer using bank identity, world facts, and opinions.
3525
+ Reflect and formulate an answer using an agentic loop with tools.
3407
3526
 
3408
- This method:
3409
- 1. Retrieves experience (conversations and events)
3410
- 2. Retrieves world facts (general knowledge)
3411
- 3. Retrieves existing opinions (bank's formed perspectives)
3412
- 4. Uses LLM to formulate an answer
3413
- 5. Extracts and stores any new opinions formed during reflection
3414
- 6. Optionally generates structured output based on response_schema
3415
- 7. Returns plain text answer and the facts used
3527
+ The reflect agent iteratively uses tools to:
3528
+ 1. lookup: Get mental models (synthesized knowledge)
3529
+ 2. recall: Search facts (semantic + temporal retrieval)
3530
+ 3. learn: Create/update mental models with new insights
3531
+ 4. expand: Get chunk/document context for memories
3532
+
3533
+ The agent starts with empty context and must call tools to gather
3534
+ information. On the last iteration, tools are removed to force a
3535
+ final text response.
3416
3536
 
3417
3537
  Args:
3418
3538
  bank_id: bank identifier
3419
3539
  query: Question to answer
3420
- budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
3421
- context: Additional context string to include in LLM prompt (not used in recall)
3422
- response_schema: Optional JSON Schema for structured output
3540
+ budget: Budget level (currently unused, reserved for future)
3541
+ context: Additional context string to include in agent prompt
3542
+ max_tokens: Max tokens (currently unused, reserved for future)
3543
+ response_schema: Optional JSON Schema for structured output (not yet supported)
3544
+ tags: Optional tags to filter memories
3545
+ tags_match: How to match tags - "any" (OR), "all" (AND)
3546
+ exclude_mental_model_ids: Optional list of mental model IDs to exclude from search
3547
+ (used when refreshing a mental model to avoid circular reference)
3423
3548
 
3424
3549
  Returns:
3425
3550
  ReflectResult containing:
3426
- - text: Plain text answer (no markdown)
3427
- - based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
3428
- - new_opinions: List of newly formed opinions
3429
- - structured_output: Optional dict if response_schema was provided
3551
+ - text: Plain text answer
3552
+ - based_on: Empty dict (agent retrieves facts dynamically)
3553
+ - new_opinions: Empty list
3554
+ - structured_output: None (not yet supported for agentic reflect)
3430
3555
  """
3431
3556
  # Use cached LLM config
3432
3557
  if self._reflect_llm_config is None:
@@ -3450,129 +3575,312 @@ Guidelines:
3450
3575
 
3451
3576
  reflect_start = time.time()
3452
3577
  reflect_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
3453
- log_buffer = []
3454
- log_buffer.append(f"[REFLECT {reflect_id}] Query: '{query[:50]}...'")
3578
+ tags_info = f", tags={tags} ({tags_match})" if tags else ""
3579
+ logger.info(f"[REFLECT {reflect_id}] Starting agentic reflect for query: {query[:50]}...{tags_info}")
3455
3580
 
3456
- # Steps 1-3: Run multi-fact-type search (12-way retrieval: 4 methods × 3 fact types)
3457
- recall_start = time.time()
3458
- metrics = get_metrics_collector()
3459
- with metrics.record_operation(
3460
- "recall", bank_id=bank_id, source="reflect", budget=budget.value if budget else None
3461
- ):
3462
- search_result = await self.recall_async(
3463
- bank_id=bank_id,
3464
- query=query,
3465
- budget=budget,
3466
- max_tokens=4096,
3467
- enable_trace=False,
3468
- fact_type=["experience", "world", "opinion"],
3469
- include_entities=True,
3470
- request_context=request_context,
3581
+ # Get bank profile for agent identity
3582
+ profile = await self.get_bank_profile(bank_id, request_context=request_context)
3583
+
3584
+ # NOTE: Mental models are NOT pre-loaded to keep the initial prompt small.
3585
+ # The agent can call lookup() to list available models if needed.
3586
+ # This is critical for banks with many mental models to avoid huge prompts.
3587
+
3588
+ # Compute max iterations based on budget
3589
+ config = get_config()
3590
+ base_max_iterations = config.reflect_max_iterations
3591
+ # Budget multipliers: low=0.5x, mid=1x, high=2x
3592
+ budget_multipliers = {Budget.LOW: 0.5, Budget.MID: 1.0, Budget.HIGH: 2.0}
3593
+ effective_budget = budget or Budget.LOW
3594
+ max_iterations = max(1, int(base_max_iterations * budget_multipliers.get(effective_budget, 1.0)))
3595
+
3596
+ # Run agentic loop - acquire connections only when needed for DB operations
3597
+ # (not held during LLM calls which can be slow)
3598
+ pool = await self._get_pool()
3599
+
3600
+ # Get bank stats for freshness info
3601
+ bank_stats = await self.get_bank_stats(bank_id, request_context=request_context)
3602
+ last_consolidated_at = bank_stats.last_consolidated_at if hasattr(bank_stats, "last_consolidated_at") else None
3603
+ pending_consolidation = bank_stats.pending_consolidation if hasattr(bank_stats, "pending_consolidation") else 0
3604
+
3605
+ # Create tool callbacks that acquire connections only when needed
3606
+ from .retain import embedding_utils
3607
+
3608
+ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any]:
3609
+ # Generate embedding for the query
3610
+ embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, [q])
3611
+ query_embedding = embeddings[0]
3612
+ async with pool.acquire() as conn:
3613
+ return await tool_search_mental_models(
3614
+ conn,
3615
+ bank_id,
3616
+ q,
3617
+ query_embedding,
3618
+ max_results=max_results,
3619
+ tags=tags,
3620
+ tags_match=tags_match,
3621
+ exclude_ids=exclude_mental_model_ids,
3622
+ )
3623
+
3624
+ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
3625
+ return await tool_search_observations(
3626
+ self,
3627
+ bank_id,
3628
+ q,
3629
+ request_context,
3630
+ max_tokens=max_tokens,
3471
3631
  tags=tags,
3472
3632
  tags_match=tags_match,
3633
+ last_consolidated_at=last_consolidated_at,
3634
+ pending_consolidation=pending_consolidation,
3473
3635
  )
3474
- recall_time = time.time() - recall_start
3475
3636
 
3476
- all_results = search_result.results
3637
+ async def recall_fn(q: str, max_tokens: int = 4096) -> dict[str, Any]:
3638
+ return await tool_recall(
3639
+ self, bank_id, q, request_context, max_tokens=max_tokens, tags=tags, tags_match=tags_match
3640
+ )
3477
3641
 
3478
- # Split results by fact type for structured response
3479
- agent_results = [r for r in all_results if r.fact_type == "experience"]
3480
- world_results = [r for r in all_results if r.fact_type == "world"]
3481
- opinion_results = [r for r in all_results if r.fact_type == "opinion"]
3642
+ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
3643
+ async with pool.acquire() as conn:
3644
+ return await tool_expand(conn, bank_id, memory_ids, depth)
3482
3645
 
3483
- log_buffer.append(
3484
- f"[REFLECT {reflect_id}] Recall: {len(all_results)} facts (experience={len(agent_results)}, world={len(world_results)}, opinion={len(opinion_results)}) in {recall_time:.3f}s"
3646
+ # Load directives from the dedicated directives table
3647
+ # Directives are hard rules that must be followed in all responses
3648
+ directives_raw = await self.list_directives(
3649
+ bank_id=bank_id,
3650
+ tags=tags,
3651
+ tags_match=tags_match,
3652
+ active_only=True,
3653
+ request_context=request_context,
3485
3654
  )
3655
+ # Convert directive format to the expected format for reflect agent
3656
+ # The agent expects: name, description (optional), observations (list of {title, content})
3657
+ directives = [
3658
+ {
3659
+ "name": d["name"],
3660
+ "description": d["content"], # Use content as description
3661
+ "observations": [], # Directives use content directly, not observations
3662
+ }
3663
+ for d in directives_raw
3664
+ ]
3665
+ if directives:
3666
+ logger.info(f"[REFLECT {reflect_id}] Loaded {len(directives)} directives")
3486
3667
 
3487
- # Format facts for LLM
3488
- agent_facts_text = think_utils.format_facts_for_prompt(agent_results)
3489
- world_facts_text = think_utils.format_facts_for_prompt(world_results)
3490
- opinion_facts_text = think_utils.format_facts_for_prompt(opinion_results)
3668
+ # Check if the bank has any mental models
3669
+ async with pool.acquire() as conn:
3670
+ mental_model_count = await conn.fetchval(
3671
+ f"SELECT COUNT(*) FROM {fq_table('mental_models')} WHERE bank_id = $1",
3672
+ bank_id,
3673
+ )
3674
+ has_mental_models = mental_model_count > 0
3675
+ if has_mental_models:
3676
+ logger.info(f"[REFLECT {reflect_id}] Bank has {mental_model_count} mental models")
3491
3677
 
3492
- # Get bank profile (name, disposition + background)
3493
- profile = await self.get_bank_profile(bank_id, request_context=request_context)
3494
- name = profile["name"]
3495
- disposition = profile["disposition"] # Typed as DispositionTraits
3496
- background = profile["background"]
3497
-
3498
- # Build the prompt
3499
- prompt = think_utils.build_think_prompt(
3500
- agent_facts_text=agent_facts_text,
3501
- world_facts_text=world_facts_text,
3502
- opinion_facts_text=opinion_facts_text,
3678
+ # Run the agent
3679
+ agent_result = await run_reflect_agent(
3680
+ llm_config=self._reflect_llm_config,
3681
+ bank_id=bank_id,
3503
3682
  query=query,
3504
- name=name,
3505
- disposition=disposition,
3506
- background=background,
3683
+ bank_profile=profile,
3684
+ search_mental_models_fn=search_mental_models_fn,
3685
+ search_observations_fn=search_observations_fn,
3686
+ recall_fn=recall_fn,
3687
+ expand_fn=expand_fn,
3507
3688
  context=context,
3689
+ max_iterations=max_iterations,
3690
+ max_tokens=max_tokens,
3691
+ response_schema=response_schema,
3692
+ directives=directives,
3693
+ has_mental_models=has_mental_models,
3694
+ budget=effective_budget,
3508
3695
  )
3509
3696
 
3510
- log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
3511
-
3512
- system_message = think_utils.get_system_message(disposition)
3513
- messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
3514
-
3515
- # Prepare response_format if schema provided
3516
- response_format = None
3517
- if response_schema is not None:
3518
- # Wrapper class to provide Pydantic-like interface for raw JSON schemas
3519
- class JsonSchemaWrapper:
3520
- def __init__(self, schema: dict):
3521
- self._schema = schema
3522
-
3523
- def model_json_schema(self):
3524
- return self._schema
3525
-
3526
- response_format = JsonSchemaWrapper(response_schema)
3527
-
3528
- llm_start = time.time()
3529
- llm_result, usage = await self._reflect_llm_config.call(
3530
- messages=messages,
3531
- scope="memory_reflect",
3532
- max_completion_tokens=max_tokens,
3533
- response_format=response_format,
3534
- skip_validation=True if response_format else False,
3535
- # Don't enforce strict_schema - not all providers support it and may retry forever
3536
- # Soft enforcement (schema in prompt + json_object mode) is sufficient
3537
- strict_schema=False,
3538
- return_usage=True,
3697
+ total_time = time.time() - reflect_start
3698
+ logger.info(
3699
+ f"[REFLECT {reflect_id}] Complete: {len(agent_result.text)} chars, "
3700
+ f"{agent_result.iterations} iterations, {agent_result.tools_called} tool calls | {total_time:.3f}s"
3539
3701
  )
3540
- llm_time = time.time() - llm_start
3541
3702
 
3542
- # Handle response based on whether structured output was requested
3543
- if response_schema is not None:
3544
- structured_output = llm_result
3545
- answer_text = "" # Empty for backward compatibility
3546
- log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
3547
- else:
3548
- structured_output = None
3549
- answer_text = llm_result.strip()
3703
+ # Convert agent tool trace to ToolCallTrace objects
3704
+ tool_trace_result = [
3705
+ ToolCallTrace(
3706
+ tool=tc.tool,
3707
+ reason=tc.reason,
3708
+ input=tc.input,
3709
+ output=tc.output,
3710
+ duration_ms=tc.duration_ms,
3711
+ iteration=tc.iteration,
3712
+ )
3713
+ for tc in agent_result.tool_trace
3714
+ ]
3550
3715
 
3551
- # Submit form_opinion task for background processing
3552
- # Pass tenant_id from request context for internal authentication in background task
3553
- await self._task_backend.submit_task(
3554
- {
3555
- "type": "form_opinion",
3556
- "bank_id": bank_id,
3557
- "answer_text": answer_text,
3558
- "query": query,
3559
- "tenant_id": getattr(request_context, "tenant_id", None) if request_context else None,
3560
- }
3561
- )
3716
+ # Convert agent LLM trace to LLMCallTrace objects
3717
+ llm_trace_result = [LLMCallTrace(scope=lc.scope, duration_ms=lc.duration_ms) for lc in agent_result.llm_trace]
3718
+
3719
+ # Extract memories from recall tool outputs - only include memories the agent actually used
3720
+ # agent_result.used_memory_ids contains validated IDs from the done action
3721
+ used_memory_ids_set = set(agent_result.used_memory_ids) if agent_result.used_memory_ids else set()
3722
+ based_on: dict[str, list[MemoryFact]] = {"world": [], "experience": [], "opinion": [], "observation": []}
3723
+ seen_memory_ids: set[str] = set()
3724
+ for tc in agent_result.tool_trace:
3725
+ if tc.tool == "recall" and "memories" in tc.output:
3726
+ for memory_data in tc.output["memories"]:
3727
+ memory_id = memory_data.get("id")
3728
+ # Only include memories that the agent declared as used (or all if none specified)
3729
+ if memory_id and memory_id not in seen_memory_ids:
3730
+ if used_memory_ids_set and memory_id not in used_memory_ids_set:
3731
+ continue # Skip memories not actually used by the agent
3732
+ seen_memory_ids.add(memory_id)
3733
+ fact_type = memory_data.get("type", "world")
3734
+ if fact_type in based_on:
3735
+ based_on[fact_type].append(
3736
+ MemoryFact(
3737
+ id=memory_id,
3738
+ text=memory_data.get("text", ""),
3739
+ fact_type=fact_type,
3740
+ context=None,
3741
+ occurred_start=memory_data.get("occurred"),
3742
+ occurred_end=memory_data.get("occurred"),
3743
+ )
3744
+ )
3562
3745
 
3563
- total_time = time.time() - reflect_start
3564
- log_buffer.append(
3565
- f"[REFLECT {reflect_id}] Complete: {len(answer_text)} chars response, LLM {llm_time:.3f}s, total {total_time:.3f}s"
3746
+ # Extract mental models from tool outputs - only include models the agent actually used
3747
+ # agent_result.used_mental_model_ids contains validated IDs from the done action
3748
+ used_model_ids_set = set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
3749
+ based_on["mental-models"] = []
3750
+ seen_model_ids: set[str] = set()
3751
+ for tc in agent_result.tool_trace:
3752
+ if tc.tool == "get_mental_model":
3753
+ # Single model lookup (with full details)
3754
+ if tc.output.get("found") and "model" in tc.output:
3755
+ model = tc.output["model"]
3756
+ model_id = model.get("id")
3757
+ if model_id and model_id not in seen_model_ids:
3758
+ # Only include models that the agent declared as used (or all if none specified)
3759
+ if used_model_ids_set and model_id not in used_model_ids_set:
3760
+ continue # Skip models not actually used by the agent
3761
+ seen_model_ids.add(model_id)
3762
+ # Add to based_on as MemoryFact with type "mental-models"
3763
+ model_name = model.get("name", "")
3764
+ model_summary = model.get("summary") or model.get("description", "")
3765
+ based_on["mental-models"].append(
3766
+ MemoryFact(
3767
+ id=model_id,
3768
+ text=f"{model_name}: {model_summary}",
3769
+ fact_type="mental-models",
3770
+ context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
3771
+ occurred_start=None,
3772
+ occurred_end=None,
3773
+ )
3774
+ )
3775
+ elif tc.tool == "search_mental_models":
3776
+ # Search mental models - include all returned models (filtered by used_model_ids_set if specified)
3777
+ for model in tc.output.get("mental_models", []):
3778
+ model_id = model.get("id")
3779
+ if model_id and model_id not in seen_model_ids:
3780
+ # Only include models that the agent declared as used (or all if none specified)
3781
+ if used_model_ids_set and model_id not in used_model_ids_set:
3782
+ continue # Skip models not actually used by the agent
3783
+ seen_model_ids.add(model_id)
3784
+ # Add to based_on as MemoryFact with type "mental-models"
3785
+ model_name = model.get("name", "")
3786
+ model_summary = model.get("summary") or model.get("description", "")
3787
+ based_on["mental-models"].append(
3788
+ MemoryFact(
3789
+ id=model_id,
3790
+ text=f"{model_name}: {model_summary}",
3791
+ fact_type="mental-models",
3792
+ context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
3793
+ occurred_start=None,
3794
+ occurred_end=None,
3795
+ )
3796
+ )
3797
+ elif tc.tool == "search_mental_models":
3798
+ # Search mental models - include all returned mental models (filtered by used_mental_model_ids_set if specified)
3799
+ used_mental_model_ids_set = (
3800
+ set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
3801
+ )
3802
+ for mental_model in tc.output.get("mental_models", []):
3803
+ mental_model_id = mental_model.get("id")
3804
+ if mental_model_id and mental_model_id not in seen_model_ids:
3805
+ # Only include mental models that the agent declared as used (or all if none specified)
3806
+ if used_mental_model_ids_set and mental_model_id not in used_mental_model_ids_set:
3807
+ continue # Skip mental models not actually used by the agent
3808
+ seen_model_ids.add(mental_model_id)
3809
+ # Add to based_on as MemoryFact with type "mental-models" (mental models are synthesized knowledge)
3810
+ mental_model_name = mental_model.get("name", "")
3811
+ mental_model_content = mental_model.get("content", "")
3812
+ based_on["mental-models"].append(
3813
+ MemoryFact(
3814
+ id=mental_model_id,
3815
+ text=f"{mental_model_name}: {mental_model_content}",
3816
+ fact_type="mental-models",
3817
+ context="mental model (user-curated)",
3818
+ occurred_start=None,
3819
+ occurred_end=None,
3820
+ )
3821
+ )
3822
+ # List all models lookup - don't add to based_on (too verbose, just a listing)
3823
+
3824
+ # Add directives to based_on["mental-models"] (they are mental models with subtype='directive')
3825
+ for directive in directives:
3826
+ # Extract summary from observations
3827
+ summary_parts: list[str] = []
3828
+ for obs in directive.get("observations", []):
3829
+ # Support both Pydantic Observation objects and dicts
3830
+ if hasattr(obs, "content"):
3831
+ content = obs.content
3832
+ title = obs.title
3833
+ else:
3834
+ content = obs.get("content", "")
3835
+ title = obs.get("title", "")
3836
+ if title and content:
3837
+ summary_parts.append(f"{title}: {content}")
3838
+ elif content:
3839
+ summary_parts.append(content)
3840
+
3841
+ # Fallback to description if no observations
3842
+ if not summary_parts and directive.get("description"):
3843
+ summary_parts.append(directive["description"])
3844
+
3845
+ directive_name = directive.get("name", "")
3846
+ directive_summary = "; ".join(summary_parts) if summary_parts else ""
3847
+ based_on["mental-models"].append(
3848
+ MemoryFact(
3849
+ id=directive.get("id", ""),
3850
+ text=f"{directive_name}: {directive_summary}",
3851
+ fact_type="mental-models",
3852
+ context="directive (directive)",
3853
+ occurred_start=None,
3854
+ occurred_end=None,
3855
+ )
3856
+ )
3857
+
3858
+ # Build directives_applied from agent result
3859
+ from hindsight_api.engine.response_models import DirectiveRef
3860
+
3861
+ directives_applied_result = [
3862
+ DirectiveRef(id=d.id, name=d.name, content=d.content) for d in agent_result.directives_applied
3863
+ ]
3864
+
3865
+ # Convert agent usage to TokenUsage format
3866
+ from hindsight_api.engine.response_models import TokenUsage
3867
+
3868
+ usage = TokenUsage(
3869
+ input_tokens=agent_result.usage.input_tokens,
3870
+ output_tokens=agent_result.usage.output_tokens,
3871
+ total_tokens=agent_result.usage.total_tokens,
3566
3872
  )
3567
- logger.info("\n" + "\n".join(log_buffer))
3568
3873
 
3569
- # Return response with facts split by type
3874
+ # Return response (compatible with existing API)
3570
3875
  result = ReflectResult(
3571
- text=answer_text,
3572
- based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
3573
- new_opinions=[], # Opinions are being extracted asynchronously
3574
- structured_output=structured_output,
3876
+ text=agent_result.text,
3877
+ based_on=based_on,
3878
+ new_opinions=[], # Learnings stored as mental models
3879
+ structured_output=agent_result.structured_output,
3575
3880
  usage=usage,
3881
+ tool_trace=tool_trace_result,
3882
+ llm_trace=llm_trace_result,
3883
+ directives_applied=directives_applied_result,
3576
3884
  )
3577
3885
 
3578
3886
  # Call post-operation hook if validator is configured
@@ -3596,50 +3904,6 @@ Guidelines:
3596
3904
 
3597
3905
  return result
3598
3906
 
3599
- async def _extract_and_store_opinions_async(
3600
- self, bank_id: str, answer_text: str, query: str, tenant_id: str | None = None
3601
- ):
3602
- """
3603
- Background task to extract and store opinions from think response.
3604
-
3605
- This runs asynchronously and does not block the think response.
3606
-
3607
- Args:
3608
- bank_id: bank IDentifier
3609
- answer_text: The generated answer text
3610
- query: The original query
3611
- tenant_id: Tenant identifier for internal authentication
3612
- """
3613
- try:
3614
- # Extract opinions from the answer
3615
- new_opinions = await think_utils.extract_opinions_from_text(
3616
- self._reflect_llm_config, text=answer_text, query=query
3617
- )
3618
-
3619
- # Store new opinions
3620
- if new_opinions:
3621
- from datetime import datetime
3622
-
3623
- current_time = datetime.now(UTC)
3624
- # Use internal context with tenant_id for background authentication
3625
- # Extension can check internal=True to bypass normal auth
3626
- from hindsight_api.models import RequestContext
3627
-
3628
- internal_context = RequestContext(tenant_id=tenant_id, internal=True)
3629
- for opinion in new_opinions:
3630
- await self.retain_async(
3631
- bank_id=bank_id,
3632
- content=opinion.opinion,
3633
- context=f"formed during thinking about: {query}",
3634
- event_date=current_time,
3635
- fact_type_override="opinion",
3636
- confidence_score=opinion.confidence,
3637
- request_context=internal_context,
3638
- )
3639
-
3640
- except Exception as e:
3641
- logger.warning(f"[REFLECT] Failed to extract/store opinions: {str(e)}")
3642
-
3643
3907
  async def get_entity_observations(
3644
3908
  self,
3645
3909
  bank_id: str,
@@ -3649,99 +3913,22 @@ Guidelines:
3649
3913
  request_context: "RequestContext",
3650
3914
  ) -> list[Any]:
3651
3915
  """
3652
- Get observations linked to an entity.
3653
-
3654
- Args:
3655
- bank_id: bank IDentifier
3656
- entity_id: Entity UUID to get observations for
3657
- limit: Maximum number of observations to return
3658
- request_context: Request context for authentication.
3659
-
3660
- Returns:
3661
- List of EntityObservation objects
3662
- """
3663
- await self._authenticate_tenant(request_context)
3664
- pool = await self._get_pool()
3665
- async with acquire_with_retry(pool) as conn:
3666
- rows = await conn.fetch(
3667
- f"""
3668
- SELECT mu.text, mu.mentioned_at
3669
- FROM {fq_table("memory_units")} mu
3670
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3671
- WHERE mu.bank_id = $1
3672
- AND mu.fact_type = 'observation'
3673
- AND ue.entity_id = $2
3674
- ORDER BY mu.mentioned_at DESC
3675
- LIMIT $3
3676
- """,
3677
- bank_id,
3678
- uuid.UUID(entity_id),
3679
- limit,
3680
- )
3681
-
3682
- observations = []
3683
- for row in rows:
3684
- mentioned_at = row["mentioned_at"].isoformat() if row["mentioned_at"] else None
3685
- observations.append(EntityObservation(text=row["text"], mentioned_at=mentioned_at))
3686
- return observations
3916
+ Get observations for an entity.
3687
3917
 
3688
- async def get_entity_observations_batch(
3689
- self,
3690
- bank_id: str,
3691
- entity_ids: list[str],
3692
- *,
3693
- limit_per_entity: int = 5,
3694
- request_context: "RequestContext",
3695
- ) -> dict[str, list[Any]]:
3696
- """
3697
- Get observations for multiple entities in a single query.
3918
+ NOTE: Entity observations/summaries have been moved to mental models.
3919
+ This method returns an empty list. Use mental models for entity summaries.
3698
3920
 
3699
3921
  Args:
3700
3922
  bank_id: bank IDentifier
3701
- entity_ids: List of entity UUIDs to get observations for
3702
- limit_per_entity: Maximum observations per entity
3923
+ entity_id: Entity UUID to get observations for
3924
+ limit: Ignored (kept for backwards compatibility)
3703
3925
  request_context: Request context for authentication.
3704
3926
 
3705
3927
  Returns:
3706
- Dict mapping entity_id -> list of EntityObservation objects
3928
+ Empty list (observations now in mental models)
3707
3929
  """
3708
- if not entity_ids:
3709
- return {}
3710
-
3711
3930
  await self._authenticate_tenant(request_context)
3712
- pool = await self._get_pool()
3713
- async with acquire_with_retry(pool) as conn:
3714
- # Use window function to limit observations per entity
3715
- rows = await conn.fetch(
3716
- f"""
3717
- WITH ranked AS (
3718
- SELECT
3719
- ue.entity_id,
3720
- mu.text,
3721
- mu.mentioned_at,
3722
- ROW_NUMBER() OVER (PARTITION BY ue.entity_id ORDER BY mu.mentioned_at DESC) as rn
3723
- FROM {fq_table("memory_units")} mu
3724
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3725
- WHERE mu.bank_id = $1
3726
- AND mu.fact_type = 'observation'
3727
- AND ue.entity_id = ANY($2::uuid[])
3728
- )
3729
- SELECT entity_id, text, mentioned_at
3730
- FROM ranked
3731
- WHERE rn <= $3
3732
- ORDER BY entity_id, rn
3733
- """,
3734
- bank_id,
3735
- [uuid.UUID(eid) for eid in entity_ids],
3736
- limit_per_entity,
3737
- )
3738
-
3739
- result: dict[str, list[Any]] = {eid: [] for eid in entity_ids}
3740
- for row in rows:
3741
- entity_id = str(row["entity_id"])
3742
- mentioned_at = row["mentioned_at"].isoformat() if row["mentioned_at"] else None
3743
- result[entity_id].append(EntityObservation(text=row["text"], mentioned_at=mentioned_at))
3744
- return result
3931
+ return []
3745
3932
 
3746
3933
  async def list_entities(
3747
3934
  self,
@@ -3783,7 +3970,7 @@ Guidelines:
3783
3970
  SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
3784
3971
  FROM {fq_table("entities")}
3785
3972
  WHERE bank_id = $1
3786
- ORDER BY mention_count DESC, last_seen DESC
3973
+ ORDER BY mention_count DESC, last_seen DESC, id ASC
3787
3974
  LIMIT $2 OFFSET $3
3788
3975
  """,
3789
3976
  bank_id,
@@ -3911,22 +4098,23 @@ Guidelines:
3911
4098
  request_context: "RequestContext",
3912
4099
  ) -> EntityState:
3913
4100
  """
3914
- Get the current state (mental model) of an entity.
4101
+ Get the current state of an entity.
4102
+
4103
+ NOTE: Entity observations/summaries have been moved to mental models.
4104
+ This method returns an entity with empty observations.
3915
4105
 
3916
4106
  Args:
3917
4107
  bank_id: bank IDentifier
3918
4108
  entity_id: Entity UUID
3919
4109
  entity_name: Canonical name of the entity
3920
- limit: Maximum number of observations to include
4110
+ limit: Maximum number of observations to include (kept for backwards compat)
3921
4111
  request_context: Request context for authentication.
3922
4112
 
3923
4113
  Returns:
3924
- EntityState with observations
4114
+ EntityState with empty observations (summaries now in mental models)
3925
4115
  """
3926
- observations = await self.get_entity_observations(
3927
- bank_id, entity_id, limit=limit, request_context=request_context
3928
- )
3929
- return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=observations)
4116
+ await self._authenticate_tenant(request_context)
4117
+ return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
3930
4118
 
3931
4119
  async def regenerate_entity_observations(
3932
4120
  self,
@@ -3937,535 +4125,1228 @@ Guidelines:
3937
4125
  version: str | None = None,
3938
4126
  conn=None,
3939
4127
  request_context: "RequestContext",
3940
- ) -> None:
4128
+ ) -> list[str]:
3941
4129
  """
3942
- Regenerate observations for an entity by:
3943
- 1. Checking version for deduplication (if provided)
3944
- 2. Searching all facts mentioning the entity
3945
- 3. Using LLM to synthesize observations (no personality)
3946
- 4. Deleting old observations for this entity
3947
- 5. Storing new observations linked to the entity
4130
+ Regenerate observations for an entity.
4131
+
4132
+ NOTE: Entity observations/summaries have been moved to mental models.
4133
+ This method is now a no-op and returns an empty list.
3948
4134
 
3949
4135
  Args:
3950
4136
  bank_id: bank IDentifier
3951
4137
  entity_id: Entity UUID
3952
4138
  entity_name: Canonical name of the entity
3953
4139
  version: Entity's last_seen timestamp when task was created (for deduplication)
3954
- conn: Optional database connection (for transactional atomicity with caller)
4140
+ conn: Optional database connection (ignored)
3955
4141
  request_context: Request context for authentication.
4142
+
4143
+ Returns:
4144
+ Empty list (observations now in mental models)
3956
4145
  """
3957
4146
  await self._authenticate_tenant(request_context)
3958
- pool = await self._get_pool()
3959
- entity_uuid = uuid.UUID(entity_id)
4147
+ return []
3960
4148
 
3961
- # Helper to run a query with provided conn or acquire one
3962
- async def fetch_with_conn(query, *args):
3963
- if conn is not None:
3964
- return await conn.fetch(query, *args)
3965
- else:
3966
- async with acquire_with_retry(pool) as acquired_conn:
3967
- return await acquired_conn.fetch(query, *args)
4149
+ # =========================================================================
4150
+ # Statistics & Operations (for HTTP API layer)
4151
+ # =========================================================================
3968
4152
 
3969
- async def fetchval_with_conn(query, *args):
3970
- if conn is not None:
3971
- return await conn.fetchval(query, *args)
3972
- else:
3973
- async with acquire_with_retry(pool) as acquired_conn:
3974
- return await acquired_conn.fetchval(query, *args)
4153
+ async def get_bank_stats(
4154
+ self,
4155
+ bank_id: str,
4156
+ *,
4157
+ request_context: "RequestContext",
4158
+ ) -> dict[str, Any]:
4159
+ """Get statistics about memory nodes and links for a bank."""
4160
+ await self._authenticate_tenant(request_context)
4161
+ pool = await self._get_pool()
3975
4162
 
3976
- # Step 1: Check version for deduplication
3977
- if version:
3978
- current_last_seen = await fetchval_with_conn(
4163
+ async with acquire_with_retry(pool) as conn:
4164
+ # Get node counts by fact_type
4165
+ node_stats = await conn.fetch(
3979
4166
  f"""
3980
- SELECT last_seen
3981
- FROM {fq_table("entities")}
3982
- WHERE id = $1 AND bank_id = $2
4167
+ SELECT fact_type, COUNT(*) as count
4168
+ FROM {fq_table("memory_units")}
4169
+ WHERE bank_id = $1
4170
+ GROUP BY fact_type
3983
4171
  """,
3984
- entity_uuid,
3985
4172
  bank_id,
3986
4173
  )
3987
4174
 
3988
- if current_last_seen and current_last_seen.isoformat() != version:
3989
- return []
3990
-
3991
- # Step 2: Get all facts mentioning this entity (exclude observations themselves)
3992
- rows = await fetch_with_conn(
3993
- f"""
3994
- SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
3995
- FROM {fq_table("memory_units")} mu
3996
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3997
- WHERE mu.bank_id = $1
3998
- AND ue.entity_id = $2
3999
- AND mu.fact_type IN ('world', 'experience')
4000
- ORDER BY mu.occurred_start DESC
4001
- LIMIT 50
4002
- """,
4003
- bank_id,
4004
- entity_uuid,
4005
- )
4006
-
4007
- if not rows:
4008
- return []
4009
-
4010
- # Convert to MemoryFact objects for the observation extraction
4011
- facts = []
4012
- for row in rows:
4013
- occurred_start = row["occurred_start"].isoformat() if row["occurred_start"] else None
4014
- facts.append(
4015
- MemoryFact(
4016
- id=str(row["id"]),
4017
- text=row["text"],
4018
- fact_type=row["fact_type"],
4019
- context=row["context"],
4020
- occurred_start=occurred_start,
4021
- )
4022
- )
4023
-
4024
- # Step 3: Extract observations using LLM (no personality)
4025
- observations = await observation_utils.extract_observations_from_facts(
4026
- self._reflect_llm_config, entity_name, facts
4027
- )
4028
-
4029
- if not observations:
4030
- return []
4031
-
4032
- # Step 4: Delete old observations and insert new ones
4033
- # If conn provided, we're already in a transaction - don't start another
4034
- # If conn is None, acquire one and start a transaction
4035
- async def do_db_operations(db_conn):
4036
- # Delete old observations for this entity
4037
- await db_conn.execute(
4175
+ # Get link counts by link_type
4176
+ link_stats = await conn.fetch(
4038
4177
  f"""
4039
- DELETE FROM {fq_table("memory_units")}
4040
- WHERE id IN (
4041
- SELECT mu.id
4042
- FROM {fq_table("memory_units")} mu
4043
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
4044
- WHERE mu.bank_id = $1
4045
- AND mu.fact_type = 'observation'
4046
- AND ue.entity_id = $2
4047
- )
4178
+ SELECT ml.link_type, COUNT(*) as count
4179
+ FROM {fq_table("memory_links")} ml
4180
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4181
+ WHERE mu.bank_id = $1
4182
+ GROUP BY ml.link_type
4048
4183
  """,
4049
4184
  bank_id,
4050
- entity_uuid,
4051
4185
  )
4052
4186
 
4053
- # Generate embeddings for new observations
4054
- embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, observations)
4187
+ # Get link counts by fact_type (from nodes)
4188
+ link_fact_type_stats = await conn.fetch(
4189
+ f"""
4190
+ SELECT mu.fact_type, COUNT(*) as count
4191
+ FROM {fq_table("memory_links")} ml
4192
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4193
+ WHERE mu.bank_id = $1
4194
+ GROUP BY mu.fact_type
4195
+ """,
4196
+ bank_id,
4197
+ )
4055
4198
 
4056
- # Insert new observations
4057
- current_time = utcnow()
4058
- created_ids = []
4199
+ # Get link counts by fact_type AND link_type
4200
+ link_breakdown_stats = await conn.fetch(
4201
+ f"""
4202
+ SELECT mu.fact_type, ml.link_type, COUNT(*) as count
4203
+ FROM {fq_table("memory_links")} ml
4204
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4205
+ WHERE mu.bank_id = $1
4206
+ GROUP BY mu.fact_type, ml.link_type
4207
+ """,
4208
+ bank_id,
4209
+ )
4059
4210
 
4060
- for obs_text, embedding in zip(observations, embeddings):
4061
- result = await db_conn.fetchrow(
4062
- f"""
4063
- INSERT INTO {fq_table("memory_units")} (
4064
- bank_id, text, embedding, context, event_date,
4065
- occurred_start, occurred_end, mentioned_at,
4066
- fact_type, access_count
4067
- )
4068
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
4069
- RETURNING id
4070
- """,
4071
- bank_id,
4072
- obs_text,
4073
- str(embedding),
4074
- f"observation about {entity_name}",
4075
- current_time,
4076
- current_time,
4077
- current_time,
4078
- current_time,
4079
- )
4080
- obs_id = str(result["id"])
4081
- created_ids.append(obs_id)
4211
+ # Get pending and failed operations counts
4212
+ ops_stats = await conn.fetch(
4213
+ f"""
4214
+ SELECT status, COUNT(*) as count
4215
+ FROM {fq_table("async_operations")}
4216
+ WHERE bank_id = $1
4217
+ GROUP BY status
4218
+ """,
4219
+ bank_id,
4220
+ )
4082
4221
 
4083
- # Link observation to entity
4084
- await db_conn.execute(
4085
- f"""
4086
- INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
4087
- VALUES ($1, $2)
4088
- """,
4089
- uuid.UUID(obs_id),
4090
- entity_uuid,
4222
+ return {
4223
+ "bank_id": bank_id,
4224
+ "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
4225
+ "link_counts": {row["link_type"]: row["count"] for row in link_stats},
4226
+ "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
4227
+ "link_breakdown": [
4228
+ {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
4229
+ for row in link_breakdown_stats
4230
+ ],
4231
+ "operations": {row["status"]: row["count"] for row in ops_stats},
4232
+ }
4233
+
4234
+ async def get_entity(
4235
+ self,
4236
+ bank_id: str,
4237
+ entity_id: str,
4238
+ *,
4239
+ request_context: "RequestContext",
4240
+ ) -> dict[str, Any] | None:
4241
+ """Get entity details including metadata and observations."""
4242
+ await self._authenticate_tenant(request_context)
4243
+ pool = await self._get_pool()
4244
+
4245
+ async with acquire_with_retry(pool) as conn:
4246
+ entity_row = await conn.fetchrow(
4247
+ f"""
4248
+ SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
4249
+ FROM {fq_table("entities")}
4250
+ WHERE bank_id = $1 AND id = $2
4251
+ """,
4252
+ bank_id,
4253
+ uuid.UUID(entity_id),
4254
+ )
4255
+
4256
+ if not entity_row:
4257
+ return None
4258
+
4259
+ # Get observations for the entity
4260
+ observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
4261
+
4262
+ return {
4263
+ "id": str(entity_row["id"]),
4264
+ "canonical_name": entity_row["canonical_name"],
4265
+ "mention_count": entity_row["mention_count"],
4266
+ "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
4267
+ "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
4268
+ "metadata": entity_row["metadata"] or {},
4269
+ "observations": observations,
4270
+ }
4271
+
4272
+ def _parse_observations(self, observations_raw: list):
4273
+ """Parse raw observation dicts into typed Observation models.
4274
+
4275
+ Returns list of Observation models with computed trend/evidence_span/evidence_count.
4276
+ """
4277
+ from .reflect.observations import Observation, ObservationEvidence
4278
+
4279
+ observations: list[Observation] = []
4280
+ for obs in observations_raw:
4281
+ if not isinstance(obs, dict):
4282
+ continue
4283
+
4284
+ try:
4285
+ parsed = Observation(
4286
+ title=obs.get("title", ""),
4287
+ content=obs.get("content", ""),
4288
+ evidence=[
4289
+ ObservationEvidence(
4290
+ memory_id=ev.get("memory_id", ""),
4291
+ quote=ev.get("quote", ""),
4292
+ relevance=ev.get("relevance", ""),
4293
+ timestamp=ev.get("timestamp"),
4294
+ )
4295
+ for ev in obs.get("evidence", [])
4296
+ if isinstance(ev, dict)
4297
+ ],
4298
+ created_at=obs.get("created_at"),
4091
4299
  )
4300
+ observations.append(parsed)
4301
+ except Exception as e:
4302
+ logger.warning(f"Failed to parse observation: {e}")
4303
+ continue
4092
4304
 
4093
- return created_ids
4305
+ return observations
4094
4306
 
4095
- if conn is not None:
4096
- # Use provided connection (already in a transaction)
4097
- return await do_db_operations(conn)
4098
- else:
4099
- # Acquire connection and start our own transaction
4100
- async with acquire_with_retry(pool) as acquired_conn:
4101
- async with acquired_conn.transaction():
4102
- return await do_db_operations(acquired_conn)
4307
+ async def _count_memories_since(
4308
+ self,
4309
+ bank_id: str,
4310
+ since_timestamp: str | None,
4311
+ pool=None,
4312
+ ) -> int:
4313
+ """
4314
+ Count memories created after a given timestamp.
4315
+
4316
+ Args:
4317
+ bank_id: Bank identifier
4318
+ since_timestamp: ISO timestamp string. If None, returns total count.
4319
+ pool: Optional database pool (uses default if not provided)
4103
4320
 
4104
- async def _regenerate_observations_sync(
4321
+ Returns:
4322
+ Number of memories created since the timestamp
4323
+ """
4324
+ if pool is None:
4325
+ pool = await self._get_pool()
4326
+
4327
+ async with acquire_with_retry(pool) as conn:
4328
+ if since_timestamp:
4329
+ # Parse the timestamp
4330
+ from datetime import datetime
4331
+
4332
+ try:
4333
+ ts = datetime.fromisoformat(since_timestamp.replace("Z", "+00:00"))
4334
+ except ValueError:
4335
+ # Invalid timestamp, return total count
4336
+ ts = None
4337
+
4338
+ if ts:
4339
+ count = await conn.fetchval(
4340
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND created_at > $2",
4341
+ bank_id,
4342
+ ts,
4343
+ )
4344
+ return count or 0
4345
+
4346
+ # No timestamp or invalid, return total count
4347
+ count = await conn.fetchval(
4348
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1",
4349
+ bank_id,
4350
+ )
4351
+ return count or 0
4352
+
4353
+ async def _invalidate_facts_from_mental_models(
4105
4354
  self,
4355
+ conn,
4106
4356
  bank_id: str,
4107
- entity_ids: list[str],
4108
- min_facts: int | None = None,
4109
- conn=None,
4110
- request_context: "RequestContext | None" = None,
4111
- ) -> None:
4357
+ fact_ids: list[str],
4358
+ ) -> int:
4112
4359
  """
4113
- Regenerate observations for entities synchronously (called during retain).
4360
+ Remove fact IDs from observation source_memory_ids when memories are deleted.
4114
4361
 
4115
- Processes entities in PARALLEL for faster execution.
4362
+ Observations are stored in memory_units with fact_type='observation'
4363
+ and have a source_memory_ids column (UUID[]) tracking their source memories.
4116
4364
 
4117
4365
  Args:
4366
+ conn: Database connection
4118
4367
  bank_id: Bank identifier
4119
- entity_ids: List of entity IDs to process
4120
- min_facts: Minimum facts required to regenerate observations (uses config default if None)
4121
- conn: Optional database connection (for transactional atomicity)
4368
+ fact_ids: List of fact IDs to remove from observations
4369
+
4370
+ Returns:
4371
+ Number of observations updated
4122
4372
  """
4123
- if not bank_id or not entity_ids:
4124
- return
4373
+ if not fact_ids:
4374
+ return 0
4375
+
4376
+ # Convert string IDs to UUIDs for the array comparison
4377
+ import uuid as uuid_module
4378
+
4379
+ fact_uuids = [uuid_module.UUID(fid) for fid in fact_ids]
4380
+
4381
+ # Update observations (memory_units with fact_type='observation')
4382
+ # by removing the deleted fact IDs from source_memory_ids
4383
+ # Use array subtraction: source_memory_ids - deleted_ids
4384
+ result = await conn.execute(
4385
+ f"""
4386
+ UPDATE {fq_table("memory_units")}
4387
+ SET source_memory_ids = (
4388
+ SELECT COALESCE(array_agg(elem), ARRAY[]::uuid[])
4389
+ FROM unnest(source_memory_ids) AS elem
4390
+ WHERE elem != ALL($2::uuid[])
4391
+ ),
4392
+ updated_at = NOW()
4393
+ WHERE bank_id = $1
4394
+ AND fact_type = 'observation'
4395
+ AND source_memory_ids && $2::uuid[]
4396
+ """,
4397
+ bank_id,
4398
+ fact_uuids,
4399
+ )
4400
+
4401
+ # Parse the result to get number of updated rows
4402
+ updated_count = int(result.split()[-1]) if result and "UPDATE" in result else 0
4403
+ if updated_count > 0:
4404
+ logger.info(
4405
+ f"[OBSERVATIONS] Invalidated {len(fact_ids)} fact IDs from {updated_count} observations in bank {bank_id}"
4406
+ )
4407
+ return updated_count
4408
+
4409
+ # =========================================================================
4410
+ # MENTAL MODELS (CONSOLIDATED) - Read-only access to auto-consolidated mental models
4411
+ # =========================================================================
4125
4412
 
4126
- # Use config default if min_facts not specified
4127
- if min_facts is None:
4128
- min_facts = get_config().observation_min_facts
4413
+ async def list_mental_models_consolidated(
4414
+ self,
4415
+ bank_id: str,
4416
+ *,
4417
+ tags: list[str] | None = None,
4418
+ tags_match: str = "any",
4419
+ limit: int = 100,
4420
+ offset: int = 0,
4421
+ request_context: "RequestContext",
4422
+ ) -> list[dict[str, Any]]:
4423
+ """List auto-consolidated observations for a bank.
4129
4424
 
4130
- # Convert to UUIDs
4131
- entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entity_ids]
4425
+ Observations are stored in memory_units with fact_type='observation'.
4426
+ They are automatically created and updated by the consolidation engine.
4132
4427
 
4133
- # Use provided connection or acquire a new one
4134
- if conn is not None:
4135
- # Use the provided connection (transactional with caller)
4136
- entity_rows = await conn.fetch(
4428
+ Args:
4429
+ bank_id: Bank identifier
4430
+ tags: Optional tags to filter by
4431
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4432
+ limit: Maximum number of results
4433
+ offset: Offset for pagination
4434
+ request_context: Request context for authentication
4435
+
4436
+ Returns:
4437
+ List of observation dicts
4438
+ """
4439
+ await self._authenticate_tenant(request_context)
4440
+ pool = await self._get_pool()
4441
+
4442
+ async with acquire_with_retry(pool) as conn:
4443
+ # Build tag filter
4444
+ tag_filter = ""
4445
+ params: list[Any] = [bank_id, limit, offset]
4446
+ if tags:
4447
+ if tags_match == "all":
4448
+ tag_filter = " AND tags @> $4::varchar[]"
4449
+ elif tags_match == "exact":
4450
+ tag_filter = " AND tags = $4::varchar[]"
4451
+ else: # any
4452
+ tag_filter = " AND tags && $4::varchar[]"
4453
+ params.append(tags)
4454
+
4455
+ rows = await conn.fetch(
4137
4456
  f"""
4138
- SELECT id, canonical_name FROM {fq_table("entities")}
4139
- WHERE id = ANY($1) AND bank_id = $2
4457
+ SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
4458
+ FROM {fq_table("memory_units")}
4459
+ WHERE bank_id = $1 AND fact_type = 'observation' {tag_filter}
4460
+ ORDER BY updated_at DESC NULLS LAST
4461
+ LIMIT $2 OFFSET $3
4462
+ """,
4463
+ *params,
4464
+ )
4465
+
4466
+ return [self._row_to_observation_consolidated(row) for row in rows]
4467
+
4468
+ async def get_observation_consolidated(
4469
+ self,
4470
+ bank_id: str,
4471
+ observation_id: str,
4472
+ *,
4473
+ include_source_memories: bool = True,
4474
+ request_context: "RequestContext",
4475
+ ) -> dict[str, Any] | None:
4476
+ """Get a single observation by ID.
4477
+
4478
+ Args:
4479
+ bank_id: Bank identifier
4480
+ observation_id: Observation ID
4481
+ include_source_memories: Whether to include full source memory details
4482
+ request_context: Request context for authentication
4483
+
4484
+ Returns:
4485
+ Observation dict or None if not found
4486
+ """
4487
+ await self._authenticate_tenant(request_context)
4488
+ pool = await self._get_pool()
4489
+
4490
+ async with acquire_with_retry(pool) as conn:
4491
+ row = await conn.fetchrow(
4492
+ f"""
4493
+ SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
4494
+ FROM {fq_table("memory_units")}
4495
+ WHERE bank_id = $1 AND id = $2 AND fact_type = 'observation'
4140
4496
  """,
4141
- entity_uuids,
4142
4497
  bank_id,
4498
+ observation_id,
4499
+ )
4500
+
4501
+ if not row:
4502
+ return None
4503
+
4504
+ result = self._row_to_observation_consolidated(row)
4505
+
4506
+ # Fetch source memories if requested and source_memory_ids exist
4507
+ if include_source_memories and result.get("source_memory_ids"):
4508
+ source_ids = [uuid.UUID(sid) if isinstance(sid, str) else sid for sid in result["source_memory_ids"]]
4509
+ source_rows = await conn.fetch(
4510
+ f"""
4511
+ SELECT id, text, fact_type, context, occurred_start, mentioned_at
4512
+ FROM {fq_table("memory_units")}
4513
+ WHERE id = ANY($1::uuid[])
4514
+ ORDER BY mentioned_at DESC NULLS LAST
4515
+ """,
4516
+ source_ids,
4517
+ )
4518
+ result["source_memories"] = [
4519
+ {
4520
+ "id": str(r["id"]),
4521
+ "text": r["text"],
4522
+ "type": r["fact_type"],
4523
+ "context": r["context"],
4524
+ "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
4525
+ "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
4526
+ }
4527
+ for r in source_rows
4528
+ ]
4529
+
4530
+ return result
4531
+
4532
+ def _row_to_observation_consolidated(self, row: Any) -> dict[str, Any]:
4533
+ """Convert a database row to an observation dict."""
4534
+ import json
4535
+
4536
+ history = row["history"]
4537
+ if isinstance(history, str):
4538
+ history = json.loads(history)
4539
+ elif history is None:
4540
+ history = []
4541
+
4542
+ # Convert source_memory_ids to strings
4543
+ source_memory_ids = row.get("source_memory_ids") or []
4544
+ source_memory_ids = [str(sid) for sid in source_memory_ids]
4545
+
4546
+ return {
4547
+ "id": str(row["id"]),
4548
+ "bank_id": row["bank_id"],
4549
+ "text": row["text"],
4550
+ "proof_count": row["proof_count"] or 1,
4551
+ "history": history,
4552
+ "tags": row["tags"] or [],
4553
+ "source_memory_ids": source_memory_ids,
4554
+ "source_memories": [], # Populated separately when fetching full details
4555
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
4556
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
4557
+ }
4558
+
4559
+ # =========================================================================
4560
+ # MENTAL MODELS CRUD
4561
+ # =========================================================================
4562
+
4563
+ async def list_mental_models(
4564
+ self,
4565
+ bank_id: str,
4566
+ *,
4567
+ tags: list[str] | None = None,
4568
+ tags_match: str = "any",
4569
+ limit: int = 100,
4570
+ offset: int = 0,
4571
+ request_context: "RequestContext",
4572
+ ) -> list[dict[str, Any]]:
4573
+ """List pinned mental models for a bank.
4574
+
4575
+ Args:
4576
+ bank_id: Bank identifier
4577
+ tags: Optional tags to filter by
4578
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4579
+ limit: Maximum number of results
4580
+ offset: Offset for pagination
4581
+ request_context: Request context for authentication
4582
+
4583
+ Returns:
4584
+ List of pinned mental model dicts
4585
+ """
4586
+ await self._authenticate_tenant(request_context)
4587
+ pool = await self._get_pool()
4588
+
4589
+ async with acquire_with_retry(pool) as conn:
4590
+ # Build tag filter
4591
+ tag_filter = ""
4592
+ params: list[Any] = [bank_id, limit, offset]
4593
+ if tags:
4594
+ if tags_match == "all":
4595
+ tag_filter = " AND tags @> $4::varchar[]"
4596
+ elif tags_match == "exact":
4597
+ tag_filter = " AND tags = $4::varchar[]"
4598
+ else: # any
4599
+ tag_filter = " AND tags && $4::varchar[]"
4600
+ params.append(tags)
4601
+
4602
+ rows = await conn.fetch(
4603
+ f"""
4604
+ SELECT id, bank_id, name, source_query, content, tags,
4605
+ last_refreshed_at, created_at, reflect_response,
4606
+ max_tokens, trigger
4607
+ FROM {fq_table("mental_models")}
4608
+ WHERE bank_id = $1 {tag_filter}
4609
+ ORDER BY last_refreshed_at DESC
4610
+ LIMIT $2 OFFSET $3
4611
+ """,
4612
+ *params,
4143
4613
  )
4144
- entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
4145
4614
 
4146
- fact_counts = await conn.fetch(
4615
+ return [self._row_to_mental_model(row) for row in rows]
4616
+
4617
+ async def get_mental_model(
4618
+ self,
4619
+ bank_id: str,
4620
+ mental_model_id: str,
4621
+ *,
4622
+ request_context: "RequestContext",
4623
+ ) -> dict[str, Any] | None:
4624
+ """Get a single pinned mental model by ID.
4625
+
4626
+ Args:
4627
+ bank_id: Bank identifier
4628
+ mental_model_id: Pinned mental model UUID
4629
+ request_context: Request context for authentication
4630
+
4631
+ Returns:
4632
+ Pinned mental model dict or None if not found
4633
+ """
4634
+ await self._authenticate_tenant(request_context)
4635
+ pool = await self._get_pool()
4636
+
4637
+ async with acquire_with_retry(pool) as conn:
4638
+ row = await conn.fetchrow(
4147
4639
  f"""
4148
- SELECT ue.entity_id, COUNT(*) as cnt
4149
- FROM {fq_table("unit_entities")} ue
4150
- JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
4151
- WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
4152
- GROUP BY ue.entity_id
4640
+ SELECT id, bank_id, name, source_query, content, tags,
4641
+ last_refreshed_at, created_at, reflect_response,
4642
+ max_tokens, trigger
4643
+ FROM {fq_table("mental_models")}
4644
+ WHERE bank_id = $1 AND id = $2
4153
4645
  """,
4154
- entity_uuids,
4155
4646
  bank_id,
4647
+ mental_model_id,
4156
4648
  )
4157
- entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
4158
- else:
4159
- # Acquire a new connection (standalone call)
4160
- pool = await self._get_pool()
4161
- async with pool.acquire() as acquired_conn:
4162
- entity_rows = await acquired_conn.fetch(
4649
+
4650
+ return self._row_to_mental_model(row) if row else None
4651
+
4652
+ async def create_mental_model(
4653
+ self,
4654
+ bank_id: str,
4655
+ name: str,
4656
+ source_query: str,
4657
+ content: str,
4658
+ *,
4659
+ mental_model_id: str | None = None,
4660
+ tags: list[str] | None = None,
4661
+ max_tokens: int | None = None,
4662
+ trigger: dict[str, Any] | None = None,
4663
+ request_context: "RequestContext",
4664
+ ) -> dict[str, Any]:
4665
+ """Create a new pinned mental model.
4666
+
4667
+ Args:
4668
+ bank_id: Bank identifier
4669
+ name: Human-readable name for the mental model
4670
+ source_query: The query that generated this mental model
4671
+ content: The synthesized content
4672
+ mental_model_id: Optional UUID for the mental model (auto-generated if not provided)
4673
+ tags: Optional tags for scoped visibility
4674
+ max_tokens: Token limit for content generation during refresh
4675
+ trigger: Trigger settings (e.g., refresh_after_consolidation)
4676
+ request_context: Request context for authentication
4677
+
4678
+ Returns:
4679
+ The created pinned mental model dict
4680
+ """
4681
+ await self._authenticate_tenant(request_context)
4682
+ pool = await self._get_pool()
4683
+
4684
+ # Generate embedding for the content
4685
+ embedding_text = f"{name} {content}"
4686
+ embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
4687
+ # Convert embedding to string for asyncpg vector type
4688
+ embedding_str = str(embedding[0]) if embedding else None
4689
+
4690
+ async with acquire_with_retry(pool) as conn:
4691
+ if mental_model_id:
4692
+ row = await conn.fetchrow(
4163
4693
  f"""
4164
- SELECT id, canonical_name FROM {fq_table("entities")}
4165
- WHERE id = ANY($1) AND bank_id = $2
4694
+ INSERT INTO {fq_table("mental_models")}
4695
+ (id, bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
4696
+ VALUES ($1, $2, $3, $4, $5, $6, $7, COALESCE($8, 2048), COALESCE($9, '{{"refresh_after_consolidation": false}}'::jsonb))
4697
+ RETURNING id, bank_id, name, source_query, content, tags,
4698
+ last_refreshed_at, created_at, reflect_response,
4699
+ max_tokens, trigger
4166
4700
  """,
4167
- entity_uuids,
4701
+ mental_model_id,
4168
4702
  bank_id,
4703
+ name,
4704
+ source_query,
4705
+ content,
4706
+ embedding_str,
4707
+ tags or [],
4708
+ max_tokens,
4709
+ json.dumps(trigger) if trigger else None,
4169
4710
  )
4170
- entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
4171
-
4172
- fact_counts = await acquired_conn.fetch(
4711
+ else:
4712
+ row = await conn.fetchrow(
4173
4713
  f"""
4174
- SELECT ue.entity_id, COUNT(*) as cnt
4175
- FROM {fq_table("unit_entities")} ue
4176
- JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
4177
- WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
4178
- GROUP BY ue.entity_id
4714
+ INSERT INTO {fq_table("mental_models")}
4715
+ (bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
4716
+ VALUES ($1, $2, $3, $4, $5, $6, COALESCE($7, 2048), COALESCE($8, '{{"refresh_after_consolidation": false}}'::jsonb))
4717
+ RETURNING id, bank_id, name, source_query, content, tags,
4718
+ last_refreshed_at, created_at, reflect_response,
4719
+ max_tokens, trigger
4179
4720
  """,
4180
- entity_uuids,
4181
4721
  bank_id,
4722
+ name,
4723
+ source_query,
4724
+ content,
4725
+ embedding_str,
4726
+ tags or [],
4727
+ max_tokens,
4728
+ json.dumps(trigger) if trigger else None,
4182
4729
  )
4183
- entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
4184
4730
 
4185
- # Filter entities that meet the threshold
4186
- entities_to_process = []
4187
- for entity_id in entity_ids:
4188
- entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
4189
- if entity_uuid not in entity_names:
4190
- continue
4191
- fact_count = entity_fact_counts.get(entity_uuid, 0)
4192
- if fact_count >= min_facts:
4193
- entities_to_process.append((entity_id, entity_names[entity_uuid]))
4731
+ logger.info(f"[MENTAL_MODELS] Created pinned mental model '{name}' for bank {bank_id}")
4732
+ return self._row_to_mental_model(row)
4194
4733
 
4195
- if not entities_to_process:
4196
- return
4734
+ async def refresh_mental_model(
4735
+ self,
4736
+ bank_id: str,
4737
+ mental_model_id: str,
4738
+ *,
4739
+ request_context: "RequestContext",
4740
+ ) -> dict[str, Any] | None:
4741
+ """Refresh a pinned mental model by re-running its source query.
4197
4742
 
4198
- # Use internal context if not provided (for internal/background calls)
4199
- from hindsight_api.models import RequestContext as RC
4743
+ This method:
4744
+ 1. Gets the pinned mental model
4745
+ 2. Runs the source_query through reflect
4746
+ 3. Updates the content with the new synthesis
4747
+ 4. Updates last_refreshed_at
4200
4748
 
4201
- ctx = request_context if request_context is not None else RC()
4749
+ Args:
4750
+ bank_id: Bank identifier
4751
+ mental_model_id: Pinned mental model UUID
4752
+ request_context: Request context for authentication
4753
+
4754
+ Returns:
4755
+ Updated pinned mental model dict or None if not found
4756
+ """
4757
+ await self._authenticate_tenant(request_context)
4758
+
4759
+ # Get the current mental model
4760
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
4761
+ if not mental_model:
4762
+ return None
4763
+
4764
+ # Run reflect with the source query, excluding the mental model being refreshed
4765
+ reflect_result = await self.reflect_async(
4766
+ bank_id=bank_id,
4767
+ query=mental_model["source_query"],
4768
+ request_context=request_context,
4769
+ exclude_mental_model_ids=[mental_model_id],
4770
+ )
4771
+
4772
+ # Build reflect_response payload to store
4773
+ reflect_response_payload = {
4774
+ "text": reflect_result.text,
4775
+ "based_on": {
4776
+ fact_type: [
4777
+ {
4778
+ "id": str(fact.id),
4779
+ "text": fact.text,
4780
+ "type": fact_type,
4781
+ }
4782
+ for fact in facts
4783
+ ]
4784
+ for fact_type, facts in reflect_result.based_on.items()
4785
+ },
4786
+ "mental_models": [], # Mental models are included in based_on["mental-models"]
4787
+ }
4788
+
4789
+ # Update the mental model with new content and reflect_response
4790
+ return await self.update_mental_model(
4791
+ bank_id,
4792
+ mental_model_id,
4793
+ content=reflect_result.text,
4794
+ reflect_response=reflect_response_payload,
4795
+ request_context=request_context,
4796
+ )
4797
+
4798
+ async def update_mental_model(
4799
+ self,
4800
+ bank_id: str,
4801
+ mental_model_id: str,
4802
+ *,
4803
+ name: str | None = None,
4804
+ content: str | None = None,
4805
+ source_query: str | None = None,
4806
+ max_tokens: int | None = None,
4807
+ tags: list[str] | None = None,
4808
+ trigger: dict[str, Any] | None = None,
4809
+ reflect_response: dict[str, Any] | None = None,
4810
+ request_context: "RequestContext",
4811
+ ) -> dict[str, Any] | None:
4812
+ """Update a pinned mental model.
4813
+
4814
+ Args:
4815
+ bank_id: Bank identifier
4816
+ mental_model_id: Pinned mental model UUID
4817
+ name: New name (if changing)
4818
+ content: New content (if changing)
4819
+ source_query: New source query (if changing)
4820
+ max_tokens: New max tokens (if changing)
4821
+ tags: New tags (if changing)
4822
+ trigger: New trigger settings (if changing)
4823
+ reflect_response: Full reflect API response payload (if changing)
4824
+ request_context: Request context for authentication
4825
+
4826
+ Returns:
4827
+ Updated pinned mental model dict or None if not found
4828
+ """
4829
+ await self._authenticate_tenant(request_context)
4830
+ pool = await self._get_pool()
4831
+
4832
+ async with acquire_with_retry(pool) as conn:
4833
+ # Build dynamic update
4834
+ updates = []
4835
+ params: list[Any] = [bank_id, mental_model_id]
4836
+ param_idx = 3
4837
+
4838
+ if name is not None:
4839
+ updates.append(f"name = ${param_idx}")
4840
+ params.append(name)
4841
+ param_idx += 1
4842
+
4843
+ if content is not None:
4844
+ updates.append(f"content = ${param_idx}")
4845
+ params.append(content)
4846
+ param_idx += 1
4847
+ updates.append("last_refreshed_at = NOW()")
4848
+ # Also update embedding (convert to string for asyncpg vector type)
4849
+ embedding_text = f"{name or ''} {content}"
4850
+ embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
4851
+ if embedding:
4852
+ updates.append(f"embedding = ${param_idx}")
4853
+ params.append(str(embedding[0]))
4854
+ param_idx += 1
4855
+
4856
+ if reflect_response is not None:
4857
+ updates.append(f"reflect_response = ${param_idx}")
4858
+ params.append(json.dumps(reflect_response))
4859
+ param_idx += 1
4860
+
4861
+ if source_query is not None:
4862
+ updates.append(f"source_query = ${param_idx}")
4863
+ params.append(source_query)
4864
+ param_idx += 1
4865
+
4866
+ if max_tokens is not None:
4867
+ updates.append(f"max_tokens = ${param_idx}")
4868
+ params.append(max_tokens)
4869
+ param_idx += 1
4870
+
4871
+ if tags is not None:
4872
+ updates.append(f"tags = ${param_idx}")
4873
+ params.append(tags)
4874
+ param_idx += 1
4875
+
4876
+ if trigger is not None:
4877
+ updates.append(f"trigger = ${param_idx}")
4878
+ params.append(json.dumps(trigger))
4879
+ param_idx += 1
4880
+
4881
+ if not updates:
4882
+ return None
4883
+
4884
+ query = f"""
4885
+ UPDATE {fq_table("mental_models")}
4886
+ SET {", ".join(updates)}
4887
+ WHERE bank_id = $1 AND id = $2
4888
+ RETURNING id, bank_id, name, source_query, content, tags,
4889
+ last_refreshed_at, created_at, reflect_response,
4890
+ max_tokens, trigger
4891
+ """
4892
+
4893
+ row = await conn.fetchrow(query, *params)
4894
+
4895
+ return self._row_to_mental_model(row) if row else None
4202
4896
 
4203
- # Process all entities in PARALLEL (LLM calls are the bottleneck)
4204
- async def process_entity(entity_id: str, entity_name: str):
4897
+ async def delete_mental_model(
4898
+ self,
4899
+ bank_id: str,
4900
+ mental_model_id: str,
4901
+ *,
4902
+ request_context: "RequestContext",
4903
+ ) -> bool:
4904
+ """Delete a pinned mental model.
4905
+
4906
+ Args:
4907
+ bank_id: Bank identifier
4908
+ mental_model_id: Pinned mental model UUID
4909
+ request_context: Request context for authentication
4910
+
4911
+ Returns:
4912
+ True if deleted, False if not found
4913
+ """
4914
+ await self._authenticate_tenant(request_context)
4915
+ pool = await self._get_pool()
4916
+
4917
+ async with acquire_with_retry(pool) as conn:
4918
+ result = await conn.execute(
4919
+ f"DELETE FROM {fq_table('mental_models')} WHERE bank_id = $1 AND id = $2",
4920
+ bank_id,
4921
+ mental_model_id,
4922
+ )
4923
+
4924
+ return result == "DELETE 1"
4925
+
4926
+ def _row_to_mental_model(self, row) -> dict[str, Any]:
4927
+ """Convert a database row to a mental model dict."""
4928
+ reflect_response = row.get("reflect_response")
4929
+ # Parse JSON string to dict if needed (asyncpg may return JSONB as string)
4930
+ if isinstance(reflect_response, str):
4205
4931
  try:
4206
- await self.regenerate_entity_observations(
4207
- bank_id, entity_id, entity_name, version=None, conn=conn, request_context=ctx
4208
- )
4209
- except Exception as e:
4210
- logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
4932
+ reflect_response = json.loads(reflect_response)
4933
+ except json.JSONDecodeError:
4934
+ reflect_response = None
4935
+ trigger = row.get("trigger")
4936
+ if isinstance(trigger, str):
4937
+ try:
4938
+ trigger = json.loads(trigger)
4939
+ except json.JSONDecodeError:
4940
+ trigger = None
4941
+ return {
4942
+ "id": str(row["id"]),
4943
+ "bank_id": row["bank_id"],
4944
+ "name": row["name"],
4945
+ "source_query": row["source_query"],
4946
+ "content": row["content"],
4947
+ "tags": row["tags"] or [],
4948
+ "max_tokens": row.get("max_tokens"),
4949
+ "trigger": trigger,
4950
+ "last_refreshed_at": row["last_refreshed_at"].isoformat() if row["last_refreshed_at"] else None,
4951
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
4952
+ "reflect_response": reflect_response,
4953
+ }
4954
+
4955
+ # =========================================================================
4956
+ # Directives - Hard rules injected into prompts
4957
+ # =========================================================================
4958
+
4959
+ async def list_directives(
4960
+ self,
4961
+ bank_id: str,
4962
+ *,
4963
+ tags: list[str] | None = None,
4964
+ tags_match: str = "any",
4965
+ active_only: bool = True,
4966
+ limit: int = 100,
4967
+ offset: int = 0,
4968
+ request_context: "RequestContext",
4969
+ ) -> list[dict[str, Any]]:
4970
+ """List directives for a bank.
4211
4971
 
4212
- await asyncio.gather(*[process_entity(eid, name) for eid, name in entities_to_process])
4972
+ Args:
4973
+ bank_id: Bank identifier
4974
+ tags: Optional tags to filter by
4975
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4976
+ active_only: Only return active directives (default True)
4977
+ limit: Maximum number of results
4978
+ offset: Offset for pagination
4979
+ request_context: Request context for authentication
4213
4980
 
4214
- async def _handle_regenerate_observations(self, task_dict: dict[str, Any]):
4981
+ Returns:
4982
+ List of directive dicts
4215
4983
  """
4216
- Handler for regenerate_observations tasks.
4984
+ await self._authenticate_tenant(request_context)
4985
+ pool = await self._get_pool()
4986
+
4987
+ async with acquire_with_retry(pool) as conn:
4988
+ # Build filters
4989
+ filters = ["bank_id = $1"]
4990
+ params: list[Any] = [bank_id]
4991
+ param_idx = 2
4992
+
4993
+ if active_only:
4994
+ filters.append("is_active = TRUE")
4995
+
4996
+ if tags:
4997
+ if tags_match == "all":
4998
+ filters.append(f"tags @> ${param_idx}::varchar[]")
4999
+ elif tags_match == "exact":
5000
+ filters.append(f"tags = ${param_idx}::varchar[]")
5001
+ else: # any
5002
+ filters.append(f"tags && ${param_idx}::varchar[]")
5003
+ params.append(tags)
5004
+ param_idx += 1
5005
+
5006
+ params.extend([limit, offset])
5007
+
5008
+ rows = await conn.fetch(
5009
+ f"""
5010
+ SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5011
+ FROM {fq_table("directives")}
5012
+ WHERE {" AND ".join(filters)}
5013
+ ORDER BY priority DESC, created_at DESC
5014
+ LIMIT ${param_idx} OFFSET ${param_idx + 1}
5015
+ """,
5016
+ *params,
5017
+ )
5018
+
5019
+ return [self._row_to_directive(row) for row in rows]
5020
+
5021
+ async def get_directive(
5022
+ self,
5023
+ bank_id: str,
5024
+ directive_id: str,
5025
+ *,
5026
+ request_context: "RequestContext",
5027
+ ) -> dict[str, Any] | None:
5028
+ """Get a single directive by ID.
4217
5029
 
4218
5030
  Args:
4219
- task_dict: Dict with 'bank_id' and either:
4220
- - 'entity_ids' (list): Process multiple entities
4221
- - 'entity_id', 'entity_name': Process single entity (legacy)
5031
+ bank_id: Bank identifier
5032
+ directive_id: Directive UUID
5033
+ request_context: Request context for authentication
4222
5034
 
4223
- Raises:
4224
- ValueError: If required fields are missing
4225
- Exception: Any exception from regenerate_entity_observations (propagates to execute_task for retry)
5035
+ Returns:
5036
+ Directive dict or None if not found
4226
5037
  """
4227
- bank_id = task_dict.get("bank_id")
4228
- # Use internal request context for background tasks
4229
- from hindsight_api.models import RequestContext
5038
+ await self._authenticate_tenant(request_context)
5039
+ pool = await self._get_pool()
5040
+
5041
+ async with acquire_with_retry(pool) as conn:
5042
+ row = await conn.fetchrow(
5043
+ f"""
5044
+ SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5045
+ FROM {fq_table("directives")}
5046
+ WHERE bank_id = $1 AND id = $2
5047
+ """,
5048
+ bank_id,
5049
+ directive_id,
5050
+ )
5051
+
5052
+ return self._row_to_directive(row) if row else None
5053
+
5054
+ async def create_directive(
5055
+ self,
5056
+ bank_id: str,
5057
+ name: str,
5058
+ content: str,
5059
+ *,
5060
+ priority: int = 0,
5061
+ is_active: bool = True,
5062
+ tags: list[str] | None = None,
5063
+ request_context: "RequestContext",
5064
+ ) -> dict[str, Any]:
5065
+ """Create a new directive.
4230
5066
 
4231
- internal_context = RequestContext()
5067
+ Args:
5068
+ bank_id: Bank identifier
5069
+ name: Human-readable name for the directive
5070
+ content: The directive text to inject into prompts
5071
+ priority: Higher priority directives are injected first (default 0)
5072
+ is_active: Whether this directive is active (default True)
5073
+ tags: Optional tags for filtering
5074
+ request_context: Request context for authentication
5075
+
5076
+ Returns:
5077
+ The created directive dict
5078
+ """
5079
+ await self._authenticate_tenant(request_context)
5080
+ pool = await self._get_pool()
5081
+
5082
+ async with acquire_with_retry(pool) as conn:
5083
+ row = await conn.fetchrow(
5084
+ f"""
5085
+ INSERT INTO {fq_table("directives")}
5086
+ (bank_id, name, content, priority, is_active, tags)
5087
+ VALUES ($1, $2, $3, $4, $5, $6)
5088
+ RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5089
+ """,
5090
+ bank_id,
5091
+ name,
5092
+ content,
5093
+ priority,
5094
+ is_active,
5095
+ tags or [],
5096
+ )
5097
+
5098
+ logger.info(f"[DIRECTIVES] Created directive '{name}' for bank {bank_id}")
5099
+ return self._row_to_directive(row)
5100
+
5101
+ async def update_directive(
5102
+ self,
5103
+ bank_id: str,
5104
+ directive_id: str,
5105
+ *,
5106
+ name: str | None = None,
5107
+ content: str | None = None,
5108
+ priority: int | None = None,
5109
+ is_active: bool | None = None,
5110
+ tags: list[str] | None = None,
5111
+ request_context: "RequestContext",
5112
+ ) -> dict[str, Any] | None:
5113
+ """Update a directive.
5114
+
5115
+ Args:
5116
+ bank_id: Bank identifier
5117
+ directive_id: Directive UUID
5118
+ name: New name (optional)
5119
+ content: New content (optional)
5120
+ priority: New priority (optional)
5121
+ is_active: New active status (optional)
5122
+ tags: New tags (optional)
5123
+ request_context: Request context for authentication
5124
+
5125
+ Returns:
5126
+ Updated directive dict or None if not found
5127
+ """
5128
+ await self._authenticate_tenant(request_context)
5129
+ pool = await self._get_pool()
4232
5130
 
4233
- # New format: multiple entity_ids
4234
- if "entity_ids" in task_dict:
4235
- entity_ids = task_dict.get("entity_ids", [])
4236
- min_facts = task_dict.get("min_facts", 5)
5131
+ # Build update query dynamically
5132
+ updates = ["updated_at = now()"]
5133
+ params: list[Any] = []
5134
+ param_idx = 1
4237
5135
 
4238
- if not bank_id or not entity_ids:
4239
- raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
5136
+ if name is not None:
5137
+ updates.append(f"name = ${param_idx}")
5138
+ params.append(name)
5139
+ param_idx += 1
4240
5140
 
4241
- # Process each entity
4242
- pool = await self._get_pool()
4243
- async with pool.acquire() as conn:
4244
- for entity_id in entity_ids:
4245
- try:
4246
- # Fetch entity name and check fact count
4247
- import uuid as uuid_module
5141
+ if content is not None:
5142
+ updates.append(f"content = ${param_idx}")
5143
+ params.append(content)
5144
+ param_idx += 1
4248
5145
 
4249
- entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
5146
+ if priority is not None:
5147
+ updates.append(f"priority = ${param_idx}")
5148
+ params.append(priority)
5149
+ param_idx += 1
4250
5150
 
4251
- # First check if entity exists
4252
- entity_exists = await conn.fetchrow(
4253
- f"SELECT canonical_name FROM {fq_table('entities')} WHERE id = $1 AND bank_id = $2",
4254
- entity_uuid,
4255
- bank_id,
4256
- )
5151
+ if is_active is not None:
5152
+ updates.append(f"is_active = ${param_idx}")
5153
+ params.append(is_active)
5154
+ param_idx += 1
4257
5155
 
4258
- if not entity_exists:
4259
- logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
4260
- continue
5156
+ if tags is not None:
5157
+ updates.append(f"tags = ${param_idx}")
5158
+ params.append(tags)
5159
+ param_idx += 1
4261
5160
 
4262
- entity_name = entity_exists["canonical_name"]
5161
+ params.extend([bank_id, directive_id])
4263
5162
 
4264
- # Count facts linked to this entity
4265
- fact_count = (
4266
- await conn.fetchval(
4267
- f"SELECT COUNT(*) FROM {fq_table('unit_entities')} WHERE entity_id = $1",
4268
- entity_uuid,
4269
- )
4270
- or 0
4271
- )
5163
+ async with acquire_with_retry(pool) as conn:
5164
+ row = await conn.fetchrow(
5165
+ f"""
5166
+ UPDATE {fq_table("directives")}
5167
+ SET {", ".join(updates)}
5168
+ WHERE bank_id = ${param_idx} AND id = ${param_idx + 1}
5169
+ RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5170
+ """,
5171
+ *params,
5172
+ )
4272
5173
 
4273
- # Only regenerate if entity has enough facts
4274
- if fact_count >= min_facts:
4275
- await self.regenerate_entity_observations(
4276
- bank_id, entity_id, entity_name, version=None, request_context=internal_context
4277
- )
4278
- else:
4279
- logger.debug(
4280
- f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)"
4281
- )
5174
+ return self._row_to_directive(row) if row else None
4282
5175
 
4283
- except Exception as e:
4284
- # Log but continue processing other entities - individual entity failures
4285
- # shouldn't fail the whole batch
4286
- logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
4287
- continue
5176
+ async def delete_directive(
5177
+ self,
5178
+ bank_id: str,
5179
+ directive_id: str,
5180
+ *,
5181
+ request_context: "RequestContext",
5182
+ ) -> bool:
5183
+ """Delete a directive.
4288
5184
 
4289
- # Legacy format: single entity
4290
- else:
4291
- entity_id = task_dict.get("entity_id")
4292
- entity_name = task_dict.get("entity_name")
4293
- version = task_dict.get("version")
5185
+ Args:
5186
+ bank_id: Bank identifier
5187
+ directive_id: Directive UUID
5188
+ request_context: Request context for authentication
4294
5189
 
4295
- if not all([bank_id, entity_id, entity_name]):
4296
- raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
5190
+ Returns:
5191
+ True if deleted, False if not found
5192
+ """
5193
+ await self._authenticate_tenant(request_context)
5194
+ pool = await self._get_pool()
4297
5195
 
4298
- # Type assertions after validation
4299
- assert isinstance(bank_id, str) and isinstance(entity_id, str) and isinstance(entity_name, str)
4300
- await self.regenerate_entity_observations(
4301
- bank_id, entity_id, entity_name, version=version, request_context=internal_context
5196
+ async with acquire_with_retry(pool) as conn:
5197
+ result = await conn.execute(
5198
+ f"DELETE FROM {fq_table('directives')} WHERE bank_id = $1 AND id = $2",
5199
+ bank_id,
5200
+ directive_id,
4302
5201
  )
4303
5202
 
4304
- # =========================================================================
4305
- # Statistics & Operations (for HTTP API layer)
4306
- # =========================================================================
5203
+ return result == "DELETE 1"
4307
5204
 
4308
- async def get_bank_stats(
5205
+ def _row_to_directive(self, row) -> dict[str, Any]:
5206
+ """Convert a database row to a directive dict."""
5207
+ return {
5208
+ "id": str(row["id"]),
5209
+ "bank_id": row["bank_id"],
5210
+ "name": row["name"],
5211
+ "content": row["content"],
5212
+ "priority": row["priority"],
5213
+ "is_active": row["is_active"],
5214
+ "tags": row["tags"] or [],
5215
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
5216
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
5217
+ }
5218
+
5219
+ async def list_operations(
4309
5220
  self,
4310
5221
  bank_id: str,
4311
5222
  *,
5223
+ status: str | None = None,
5224
+ limit: int = 20,
5225
+ offset: int = 0,
4312
5226
  request_context: "RequestContext",
4313
5227
  ) -> dict[str, Any]:
4314
- """Get statistics about memory nodes and links for a bank."""
5228
+ """List async operations for a bank with optional filtering and pagination.
5229
+
5230
+ Args:
5231
+ bank_id: Bank identifier
5232
+ status: Optional status filter (pending, completed, failed)
5233
+ limit: Maximum number of operations to return (default 20)
5234
+ offset: Number of operations to skip (default 0)
5235
+ request_context: Request context for authentication
5236
+
5237
+ Returns:
5238
+ Dict with total count and list of operations, sorted by most recent first
5239
+ """
4315
5240
  await self._authenticate_tenant(request_context)
4316
5241
  pool = await self._get_pool()
4317
5242
 
4318
5243
  async with acquire_with_retry(pool) as conn:
4319
- # Get node counts by fact_type
4320
- node_stats = await conn.fetch(
4321
- f"""
4322
- SELECT fact_type, COUNT(*) as count
4323
- FROM {fq_table("memory_units")}
4324
- WHERE bank_id = $1
4325
- GROUP BY fact_type
4326
- """,
4327
- bank_id,
4328
- )
5244
+ # Build WHERE clause
5245
+ where_conditions = ["bank_id = $1"]
5246
+ params: list[Any] = [bank_id]
4329
5247
 
4330
- # Get link counts by link_type
4331
- link_stats = await conn.fetch(
4332
- f"""
4333
- SELECT ml.link_type, COUNT(*) as count
4334
- FROM {fq_table("memory_links")} ml
4335
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4336
- WHERE mu.bank_id = $1
4337
- GROUP BY ml.link_type
4338
- """,
4339
- bank_id,
4340
- )
5248
+ if status:
5249
+ # Map API status to DB statuses (pending includes processing)
5250
+ if status == "pending":
5251
+ where_conditions.append("status IN ('pending', 'processing')")
5252
+ else:
5253
+ where_conditions.append(f"status = ${len(params) + 1}")
5254
+ params.append(status)
4341
5255
 
4342
- # Get link counts by fact_type (from nodes)
4343
- link_fact_type_stats = await conn.fetch(
4344
- f"""
4345
- SELECT mu.fact_type, COUNT(*) as count
4346
- FROM {fq_table("memory_links")} ml
4347
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4348
- WHERE mu.bank_id = $1
4349
- GROUP BY mu.fact_type
4350
- """,
4351
- bank_id,
4352
- )
5256
+ where_clause = " AND ".join(where_conditions)
4353
5257
 
4354
- # Get link counts by fact_type AND link_type
4355
- link_breakdown_stats = await conn.fetch(
4356
- f"""
4357
- SELECT mu.fact_type, ml.link_type, COUNT(*) as count
4358
- FROM {fq_table("memory_links")} ml
4359
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4360
- WHERE mu.bank_id = $1
4361
- GROUP BY mu.fact_type, ml.link_type
4362
- """,
4363
- bank_id,
5258
+ # Get total count (with filter)
5259
+ total_row = await conn.fetchrow(
5260
+ f"SELECT COUNT(*) as total FROM {fq_table('async_operations')} WHERE {where_clause}",
5261
+ *params,
4364
5262
  )
5263
+ total = total_row["total"] if total_row else 0
4365
5264
 
4366
- # Get pending and failed operations counts
4367
- ops_stats = await conn.fetch(
5265
+ # Get operations with pagination
5266
+ operations = await conn.fetch(
4368
5267
  f"""
4369
- SELECT status, COUNT(*) as count
5268
+ SELECT operation_id, operation_type, created_at, status, error_message
4370
5269
  FROM {fq_table("async_operations")}
4371
- WHERE bank_id = $1
4372
- GROUP BY status
5270
+ WHERE {where_clause}
5271
+ ORDER BY created_at DESC
5272
+ LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}
4373
5273
  """,
4374
- bank_id,
5274
+ *params,
5275
+ limit,
5276
+ offset,
4375
5277
  )
4376
5278
 
4377
5279
  return {
4378
- "bank_id": bank_id,
4379
- "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
4380
- "link_counts": {row["link_type"]: row["count"] for row in link_stats},
4381
- "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
4382
- "link_breakdown": [
4383
- {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
4384
- for row in link_breakdown_stats
5280
+ "total": total,
5281
+ "operations": [
5282
+ {
5283
+ "id": str(row["operation_id"]),
5284
+ "task_type": row["operation_type"],
5285
+ "items_count": 0,
5286
+ "document_id": None,
5287
+ "created_at": row["created_at"].isoformat(),
5288
+ # Map DB status to API status (processing -> pending for simplicity)
5289
+ "status": "pending" if row["status"] in ("pending", "processing") else row["status"],
5290
+ "error_message": row["error_message"],
5291
+ }
5292
+ for row in operations
4385
5293
  ],
4386
- "operations": {row["status"]: row["count"] for row in ops_stats},
4387
5294
  }
4388
5295
 
4389
- async def get_entity(
5296
+ async def get_operation_status(
4390
5297
  self,
4391
5298
  bank_id: str,
4392
- entity_id: str,
5299
+ operation_id: str,
4393
5300
  *,
4394
5301
  request_context: "RequestContext",
4395
- ) -> dict[str, Any] | None:
4396
- """Get entity details including metadata and observations."""
4397
- await self._authenticate_tenant(request_context)
4398
- pool = await self._get_pool()
4399
-
4400
- async with acquire_with_retry(pool) as conn:
4401
- entity_row = await conn.fetchrow(
4402
- f"""
4403
- SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
4404
- FROM {fq_table("entities")}
4405
- WHERE bank_id = $1 AND id = $2
4406
- """,
4407
- bank_id,
4408
- uuid.UUID(entity_id),
4409
- )
4410
-
4411
- if not entity_row:
4412
- return None
4413
-
4414
- # Get observations for the entity
4415
- observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
4416
-
4417
- return {
4418
- "id": str(entity_row["id"]),
4419
- "canonical_name": entity_row["canonical_name"],
4420
- "mention_count": entity_row["mention_count"],
4421
- "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
4422
- "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
4423
- "metadata": entity_row["metadata"] or {},
4424
- "observations": observations,
4425
- }
5302
+ ) -> dict[str, Any]:
5303
+ """Get the status of a specific async operation.
4426
5304
 
4427
- async def list_operations(
4428
- self,
4429
- bank_id: str,
4430
- *,
4431
- request_context: "RequestContext",
4432
- ) -> list[dict[str, Any]]:
4433
- """List async operations for a bank."""
5305
+ Returns:
5306
+ - status: "pending", "completed", or "failed"
5307
+ - updated_at: last update timestamp
5308
+ - completed_at: completion timestamp (if completed)
5309
+ """
4434
5310
  await self._authenticate_tenant(request_context)
4435
5311
  pool = await self._get_pool()
4436
5312
 
5313
+ op_uuid = uuid.UUID(operation_id)
5314
+
4437
5315
  async with acquire_with_retry(pool) as conn:
4438
- operations = await conn.fetch(
5316
+ row = await conn.fetchrow(
4439
5317
  f"""
4440
- SELECT operation_id, bank_id, operation_type, created_at, status, error_message, result_metadata
5318
+ SELECT operation_id, operation_type, created_at, updated_at, completed_at, status, error_message
4441
5319
  FROM {fq_table("async_operations")}
4442
- WHERE bank_id = $1
4443
- ORDER BY created_at DESC
5320
+ WHERE operation_id = $1 AND bank_id = $2
4444
5321
  """,
5322
+ op_uuid,
4445
5323
  bank_id,
4446
5324
  )
4447
5325
 
4448
- def parse_metadata(metadata):
4449
- if metadata is None:
4450
- return {}
4451
- if isinstance(metadata, str):
4452
- import json
4453
-
4454
- return json.loads(metadata)
4455
- return metadata
4456
-
4457
- return [
4458
- {
4459
- "id": str(row["operation_id"]),
4460
- "task_type": row["operation_type"],
4461
- "items_count": parse_metadata(row["result_metadata"]).get("items_count", 0),
4462
- "document_id": parse_metadata(row["result_metadata"]).get("document_id"),
4463
- "created_at": row["created_at"].isoformat(),
4464
- "status": row["status"],
5326
+ if row:
5327
+ # Map DB status to API status (processing -> pending for simplicity)
5328
+ db_status = row["status"]
5329
+ api_status = "pending" if db_status in ("pending", "processing") else db_status
5330
+ return {
5331
+ "operation_id": operation_id,
5332
+ "status": api_status,
5333
+ "operation_type": row["operation_type"],
5334
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
5335
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
5336
+ "completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
4465
5337
  "error_message": row["error_message"],
4466
5338
  }
4467
- for row in operations
4468
- ]
5339
+ else:
5340
+ # Operation not found
5341
+ return {
5342
+ "operation_id": operation_id,
5343
+ "status": "not_found",
5344
+ "operation_type": None,
5345
+ "created_at": None,
5346
+ "updated_at": None,
5347
+ "completed_at": None,
5348
+ "error_message": None,
5349
+ }
4469
5350
 
4470
5351
  async def cancel_operation(
4471
5352
  self,
@@ -4506,10 +5387,10 @@ Guidelines:
4506
5387
  bank_id: str,
4507
5388
  *,
4508
5389
  name: str | None = None,
4509
- background: str | None = None,
5390
+ mission: str | None = None,
4510
5391
  request_context: "RequestContext",
4511
5392
  ) -> dict[str, Any]:
4512
- """Update bank name and/or background."""
5393
+ """Update bank name and/or mission."""
4513
5394
  await self._authenticate_tenant(request_context)
4514
5395
  pool = await self._get_pool()
4515
5396
 
@@ -4525,34 +5406,72 @@ Guidelines:
4525
5406
  name,
4526
5407
  )
4527
5408
 
4528
- if background is not None:
5409
+ if mission is not None:
4529
5410
  await conn.execute(
4530
5411
  f"""
4531
5412
  UPDATE {fq_table("banks")}
4532
- SET background = $2, updated_at = NOW()
5413
+ SET mission = $2, updated_at = NOW()
4533
5414
  WHERE bank_id = $1
4534
5415
  """,
4535
5416
  bank_id,
4536
- background,
5417
+ mission,
4537
5418
  )
4538
5419
 
4539
5420
  # Return updated profile
4540
5421
  return await self.get_bank_profile(bank_id, request_context=request_context)
4541
5422
 
4542
- async def submit_async_retain(
5423
+ async def _submit_async_operation(
4543
5424
  self,
4544
5425
  bank_id: str,
4545
- contents: list[dict[str, Any]],
5426
+ operation_type: str,
5427
+ task_type: str,
5428
+ task_payload: dict[str, Any],
4546
5429
  *,
4547
- request_context: "RequestContext",
4548
- document_tags: list[str] | None = None,
5430
+ result_metadata: dict[str, Any] | None = None,
5431
+ dedupe_by_bank: bool = False,
4549
5432
  ) -> dict[str, Any]:
4550
- """Submit a batch retain operation to run asynchronously."""
4551
- await self._authenticate_tenant(request_context)
4552
- pool = await self._get_pool()
5433
+ """Generic helper to submit an async operation.
5434
+
5435
+ Args:
5436
+ bank_id: Bank identifier
5437
+ operation_type: Operation type for the async_operations record (e.g., 'consolidation', 'retain')
5438
+ task_type: Task type for the task payload (e.g., 'consolidation', 'batch_retain')
5439
+ task_payload: Additional task payload fields (operation_id and bank_id are added automatically)
5440
+ result_metadata: Optional metadata to store with the operation record
5441
+ dedupe_by_bank: If True, skip creating a new task if one is already pending for this bank+operation_type
4553
5442
 
5443
+ Returns:
5444
+ Dict with operation_id and optionally deduplicated=True if an existing task was found
5445
+ """
4554
5446
  import json
4555
5447
 
5448
+ pool = await self._get_pool()
5449
+
5450
+ # Check for existing pending task if deduplication is enabled
5451
+ # Note: We only check 'pending', not 'processing', because a processing task
5452
+ # uses a watermark from when it started - new memories added after that point
5453
+ # would need another consolidation run to be processed.
5454
+ if dedupe_by_bank:
5455
+ async with acquire_with_retry(pool) as conn:
5456
+ existing = await conn.fetchrow(
5457
+ f"""
5458
+ SELECT operation_id FROM {fq_table("async_operations")}
5459
+ WHERE bank_id = $1 AND operation_type = $2 AND status = 'pending'
5460
+ LIMIT 1
5461
+ """,
5462
+ bank_id,
5463
+ operation_type,
5464
+ )
5465
+ if existing:
5466
+ logger.debug(
5467
+ f"{operation_type} task already pending for bank_id={bank_id}, "
5468
+ f"skipping duplicate (existing operation_id={existing['operation_id']})"
5469
+ )
5470
+ return {
5471
+ "operation_id": str(existing["operation_id"]),
5472
+ "deduplicated": True,
5473
+ }
5474
+
4556
5475
  operation_id = uuid.uuid4()
4557
5476
 
4558
5477
  # Insert operation record into database
@@ -4564,25 +5483,113 @@ Guidelines:
4564
5483
  """,
4565
5484
  operation_id,
4566
5485
  bank_id,
4567
- "retain",
4568
- json.dumps({"items_count": len(contents)}),
5486
+ operation_type,
5487
+ json.dumps(result_metadata or {}),
4569
5488
  )
4570
5489
 
4571
- # Submit task to background queue
4572
- task_payload = {
4573
- "type": "batch_retain",
5490
+ # Build and submit task payload
5491
+ full_payload = {
5492
+ "type": task_type,
4574
5493
  "operation_id": str(operation_id),
4575
5494
  "bank_id": bank_id,
4576
- "contents": contents,
5495
+ **task_payload,
4577
5496
  }
4578
- if document_tags:
4579
- task_payload["document_tags"] = document_tags
4580
5497
 
4581
- await self._task_backend.submit_task(task_payload)
5498
+ await self._task_backend.submit_task(full_payload)
4582
5499
 
4583
- logger.info(f"Retain task queued for bank_id={bank_id}, {len(contents)} items, operation_id={operation_id}")
5500
+ logger.info(f"{operation_type} task queued for bank_id={bank_id}, operation_id={operation_id}")
4584
5501
 
4585
5502
  return {
4586
5503
  "operation_id": str(operation_id),
4587
- "items_count": len(contents),
4588
5504
  }
5505
+
5506
+ async def submit_async_retain(
5507
+ self,
5508
+ bank_id: str,
5509
+ contents: list[dict[str, Any]],
5510
+ *,
5511
+ request_context: "RequestContext",
5512
+ document_tags: list[str] | None = None,
5513
+ ) -> dict[str, Any]:
5514
+ """Submit a batch retain operation to run asynchronously."""
5515
+ await self._authenticate_tenant(request_context)
5516
+
5517
+ task_payload: dict[str, Any] = {"contents": contents}
5518
+ if document_tags:
5519
+ task_payload["document_tags"] = document_tags
5520
+
5521
+ result = await self._submit_async_operation(
5522
+ bank_id=bank_id,
5523
+ operation_type="retain",
5524
+ task_type="batch_retain",
5525
+ task_payload=task_payload,
5526
+ result_metadata={"items_count": len(contents)},
5527
+ dedupe_by_bank=False,
5528
+ )
5529
+
5530
+ result["items_count"] = len(contents)
5531
+ return result
5532
+
5533
+ async def submit_async_consolidation(
5534
+ self,
5535
+ bank_id: str,
5536
+ *,
5537
+ request_context: "RequestContext",
5538
+ ) -> dict[str, Any]:
5539
+ """Submit a consolidation operation to run asynchronously.
5540
+
5541
+ Deduplicates by bank_id - if there's already a pending consolidation for this bank,
5542
+ returns the existing operation_id instead of creating a new one.
5543
+
5544
+ Args:
5545
+ bank_id: Bank identifier
5546
+ request_context: Request context for authentication
5547
+
5548
+ Returns:
5549
+ Dict with operation_id
5550
+ """
5551
+ await self._authenticate_tenant(request_context)
5552
+ return await self._submit_async_operation(
5553
+ bank_id=bank_id,
5554
+ operation_type="consolidation",
5555
+ task_type="consolidation",
5556
+ task_payload={},
5557
+ dedupe_by_bank=True,
5558
+ )
5559
+
5560
+ async def submit_async_refresh_mental_model(
5561
+ self,
5562
+ bank_id: str,
5563
+ mental_model_id: str,
5564
+ *,
5565
+ request_context: "RequestContext",
5566
+ ) -> dict[str, Any]:
5567
+ """Submit an async mental model refresh operation.
5568
+
5569
+ This schedules a background task to re-run the source query and update the content.
5570
+
5571
+ Args:
5572
+ bank_id: Bank identifier
5573
+ mental_model_id: Mental model UUID to refresh
5574
+ request_context: Request context for authentication
5575
+
5576
+ Returns:
5577
+ Dict with operation_id
5578
+ """
5579
+ await self._authenticate_tenant(request_context)
5580
+
5581
+ # Verify mental model exists
5582
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
5583
+ if not mental_model:
5584
+ raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
5585
+
5586
+ return await self._submit_async_operation(
5587
+ bank_id=bank_id,
5588
+ operation_type="refresh_mental_model",
5589
+ task_type="refresh_mental_model",
5590
+ task_payload={
5591
+ "mental_model_id": mental_model_id,
5592
+ },
5593
+ result_metadata={"mental_model_id": mental_model_id, "name": mental_model["name"]},
5594
+ dedupe_by_bank=False,
5595
+ )