hindsight-api 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. hindsight_api/admin/cli.py +59 -0
  2. hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
  3. hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
  4. hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
  5. hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
  6. hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
  7. hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
  8. hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
  9. hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
  10. hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
  11. hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
  12. hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
  13. hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
  14. hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
  15. hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
  16. hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
  17. hindsight_api/api/http.py +1119 -93
  18. hindsight_api/api/mcp.py +11 -191
  19. hindsight_api/config.py +145 -45
  20. hindsight_api/engine/consolidation/__init__.py +5 -0
  21. hindsight_api/engine/consolidation/consolidator.py +859 -0
  22. hindsight_api/engine/consolidation/prompts.py +69 -0
  23. hindsight_api/engine/cross_encoder.py +114 -9
  24. hindsight_api/engine/directives/__init__.py +5 -0
  25. hindsight_api/engine/directives/models.py +37 -0
  26. hindsight_api/engine/embeddings.py +102 -5
  27. hindsight_api/engine/interface.py +32 -13
  28. hindsight_api/engine/llm_wrapper.py +505 -43
  29. hindsight_api/engine/memory_engine.py +2090 -1089
  30. hindsight_api/engine/mental_models/__init__.py +14 -0
  31. hindsight_api/engine/mental_models/models.py +53 -0
  32. hindsight_api/engine/reflect/__init__.py +18 -0
  33. hindsight_api/engine/reflect/agent.py +933 -0
  34. hindsight_api/engine/reflect/models.py +109 -0
  35. hindsight_api/engine/reflect/observations.py +186 -0
  36. hindsight_api/engine/reflect/prompts.py +483 -0
  37. hindsight_api/engine/reflect/tools.py +437 -0
  38. hindsight_api/engine/reflect/tools_schema.py +250 -0
  39. hindsight_api/engine/response_models.py +130 -4
  40. hindsight_api/engine/retain/bank_utils.py +79 -201
  41. hindsight_api/engine/retain/fact_extraction.py +81 -48
  42. hindsight_api/engine/retain/fact_storage.py +5 -8
  43. hindsight_api/engine/retain/link_utils.py +5 -8
  44. hindsight_api/engine/retain/orchestrator.py +1 -55
  45. hindsight_api/engine/retain/types.py +2 -2
  46. hindsight_api/engine/search/graph_retrieval.py +2 -2
  47. hindsight_api/engine/search/link_expansion_retrieval.py +164 -29
  48. hindsight_api/engine/search/mpfp_retrieval.py +1 -1
  49. hindsight_api/engine/search/retrieval.py +14 -14
  50. hindsight_api/engine/search/think_utils.py +41 -140
  51. hindsight_api/engine/search/trace.py +0 -1
  52. hindsight_api/engine/search/tracer.py +2 -5
  53. hindsight_api/engine/search/types.py +0 -3
  54. hindsight_api/engine/task_backend.py +112 -196
  55. hindsight_api/engine/utils.py +0 -151
  56. hindsight_api/extensions/__init__.py +10 -1
  57. hindsight_api/extensions/builtin/tenant.py +5 -1
  58. hindsight_api/extensions/operation_validator.py +81 -4
  59. hindsight_api/extensions/tenant.py +26 -0
  60. hindsight_api/main.py +16 -5
  61. hindsight_api/mcp_local.py +12 -53
  62. hindsight_api/mcp_tools.py +494 -0
  63. hindsight_api/models.py +0 -2
  64. hindsight_api/worker/__init__.py +11 -0
  65. hindsight_api/worker/main.py +296 -0
  66. hindsight_api/worker/poller.py +486 -0
  67. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +12 -6
  68. hindsight_api-0.4.0.dist-info/RECORD +112 -0
  69. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +1 -0
  70. hindsight_api/engine/retain/observation_regeneration.py +0 -254
  71. hindsight_api/engine/search/observation_utils.py +0 -125
  72. hindsight_api/engine/search/scoring.py +0 -159
  73. hindsight_api-0.3.0.dist-info/RECORD +0 -82
  74. {hindsight_api-0.3.0.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
@@ -11,6 +11,7 @@ This implements a sophisticated memory architecture that combines:
11
11
 
12
12
  import asyncio
13
13
  import contextvars
14
+ import json
14
15
  import logging
15
16
  import time
16
17
  import uuid
@@ -134,25 +135,31 @@ if TYPE_CHECKING:
134
135
 
135
136
  from enum import Enum
136
137
 
138
+ from ..metrics import get_metrics_collector
137
139
  from ..pg0 import EmbeddedPostgres, parse_pg0_url
138
140
  from .entity_resolver import EntityResolver
139
141
  from .llm_wrapper import LLMConfig
140
142
  from .query_analyzer import QueryAnalyzer
143
+ from .reflect import run_reflect_agent
144
+ from .reflect.tools import tool_expand, tool_recall, tool_search_mental_models, tool_search_observations
141
145
  from .response_models import (
142
146
  VALID_RECALL_FACT_TYPES,
143
147
  EntityObservation,
144
148
  EntityState,
149
+ LLMCallTrace,
145
150
  MemoryFact,
151
+ ObservationRef,
146
152
  ReflectResult,
147
153
  TokenUsage,
154
+ ToolCallTrace,
148
155
  )
149
156
  from .response_models import RecallResult as RecallResultModel
150
157
  from .retain import bank_utils, embedding_utils
151
158
  from .retain.types import RetainContentDict
152
- from .search import observation_utils, think_utils
159
+ from .search import think_utils
153
160
  from .search.reranking import CrossEncoderReranker
154
161
  from .search.tags import TagsMatch
155
- from .task_backend import AsyncIOQueueBackend, NoopTaskBackend, TaskBackend
162
+ from .task_backend import BrokerTaskBackend, SyncTaskBackend, TaskBackend
156
163
 
157
164
 
158
165
  class Budget(str, Enum):
@@ -214,6 +221,10 @@ class MemoryEngine(MemoryEngineInterface):
214
221
  reflect_llm_api_key: str | None = None,
215
222
  reflect_llm_model: str | None = None,
216
223
  reflect_llm_base_url: str | None = None,
224
+ consolidation_llm_provider: str | None = None,
225
+ consolidation_llm_api_key: str | None = None,
226
+ consolidation_llm_model: str | None = None,
227
+ consolidation_llm_base_url: str | None = None,
217
228
  embeddings: Embeddings | None = None,
218
229
  cross_encoder: CrossEncoderModel | None = None,
219
230
  query_analyzer: QueryAnalyzer | None = None,
@@ -222,8 +233,6 @@ class MemoryEngine(MemoryEngineInterface):
222
233
  db_command_timeout: int | None = None,
223
234
  db_acquire_timeout: int | None = None,
224
235
  task_backend: TaskBackend | None = None,
225
- task_batch_size: int | None = None,
226
- task_batch_interval: float | None = None,
227
236
  run_migrations: bool = True,
228
237
  operation_validator: "OperationValidatorExtension | None" = None,
229
238
  tenant_extension: "TenantExtension | None" = None,
@@ -251,6 +260,10 @@ class MemoryEngine(MemoryEngineInterface):
251
260
  reflect_llm_api_key: API key for reflect LLM. Falls back to memory_llm_api_key.
252
261
  reflect_llm_model: Model for reflect operations. Falls back to memory_llm_model.
253
262
  reflect_llm_base_url: Base URL for reflect LLM. Falls back to memory_llm_base_url.
263
+ consolidation_llm_provider: LLM provider for consolidation operations. Falls back to memory_llm_provider.
264
+ consolidation_llm_api_key: API key for consolidation LLM. Falls back to memory_llm_api_key.
265
+ consolidation_llm_model: Model for consolidation operations. Falls back to memory_llm_model.
266
+ consolidation_llm_base_url: Base URL for consolidation LLM. Falls back to memory_llm_base_url.
254
267
  embeddings: Embeddings implementation. If not provided, created from env vars.
255
268
  cross_encoder: Cross-encoder model. If not provided, created from env vars.
256
269
  query_analyzer: Query analyzer implementation. If not provided, uses DateparserQueryAnalyzer.
@@ -258,9 +271,7 @@ class MemoryEngine(MemoryEngineInterface):
258
271
  pool_max_size: Maximum number of connections in the pool. Defaults to HINDSIGHT_API_DB_POOL_MAX_SIZE.
259
272
  db_command_timeout: PostgreSQL command timeout in seconds. Defaults to HINDSIGHT_API_DB_COMMAND_TIMEOUT.
260
273
  db_acquire_timeout: Connection acquisition timeout in seconds. Defaults to HINDSIGHT_API_DB_ACQUIRE_TIMEOUT.
261
- task_backend: Custom task backend. If not provided, uses AsyncIOQueueBackend.
262
- task_batch_size: Background task batch size. Defaults to HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_SIZE.
263
- task_batch_interval: Background task batch interval in seconds. Defaults to HINDSIGHT_API_TASK_BACKEND_MEMORY_BATCH_INTERVAL.
274
+ task_backend: Custom task backend. If not provided, uses BrokerTaskBackend for distributed processing.
264
275
  run_migrations: Whether to run database migrations during initialize(). Default: True
265
276
  operation_validator: Optional extension to validate operations before execution.
266
277
  If provided, retain/recall/reflect operations will be validated.
@@ -394,21 +405,37 @@ class MemoryEngine(MemoryEngineInterface):
394
405
  model=reflect_model,
395
406
  )
396
407
 
408
+ # Consolidation LLM config - for mental model consolidation (can use efficient models)
409
+ consolidation_provider = consolidation_llm_provider or config.consolidation_llm_provider or memory_llm_provider
410
+ consolidation_api_key = consolidation_llm_api_key or config.consolidation_llm_api_key or memory_llm_api_key
411
+ consolidation_model = consolidation_llm_model or config.consolidation_llm_model or memory_llm_model
412
+ consolidation_base_url = consolidation_llm_base_url or config.consolidation_llm_base_url or memory_llm_base_url
413
+ # Apply provider-specific base URL defaults for consolidation
414
+ if consolidation_base_url is None:
415
+ if consolidation_provider.lower() == "groq":
416
+ consolidation_base_url = "https://api.groq.com/openai/v1"
417
+ elif consolidation_provider.lower() == "ollama":
418
+ consolidation_base_url = "http://localhost:11434/v1"
419
+ else:
420
+ consolidation_base_url = ""
421
+
422
+ self._consolidation_llm_config = LLMConfig(
423
+ provider=consolidation_provider,
424
+ api_key=consolidation_api_key,
425
+ base_url=consolidation_base_url,
426
+ model=consolidation_model,
427
+ )
428
+
397
429
  # Initialize cross-encoder reranker (cached for performance)
398
430
  self._cross_encoder_reranker = CrossEncoderReranker(cross_encoder=cross_encoder)
399
431
 
400
432
  # Initialize task backend
401
- if task_backend:
402
- self._task_backend = task_backend
403
- elif config.task_backend == "noop":
404
- self._task_backend = NoopTaskBackend()
405
- else:
406
- # Default to memory (AsyncIOQueueBackend)
407
- _task_batch_size = task_batch_size if task_batch_size is not None else config.task_backend_memory_batch_size
408
- _task_batch_interval = (
409
- task_batch_interval if task_batch_interval is not None else config.task_backend_memory_batch_interval
410
- )
411
- self._task_backend = AsyncIOQueueBackend(batch_size=_task_batch_size, batch_interval=_task_batch_interval)
433
+ # If no custom backend provided, use BrokerTaskBackend which stores tasks in PostgreSQL
434
+ # The pool_getter lambda will return the pool once it's initialized
435
+ self._task_backend = task_backend or BrokerTaskBackend(
436
+ pool_getter=lambda: self._pool,
437
+ schema_getter=get_current_schema,
438
+ )
412
439
 
413
440
  # Backpressure mechanism: limit concurrent searches to prevent overwhelming the database
414
441
  # Configurable via HINDSIGHT_API_RECALL_MAX_CONCURRENT (default: 50)
@@ -472,35 +499,19 @@ class MemoryEngine(MemoryEngineInterface):
472
499
  if request_context is None:
473
500
  raise AuthenticationError("RequestContext is required when tenant extension is configured")
474
501
 
502
+ # For internal/background operations (e.g., worker tasks), skip extension authentication
503
+ # if the schema has already been set by execute_task via the _schema field.
504
+ if request_context.internal:
505
+ current = _current_schema.get()
506
+ if current and current != "public":
507
+ return current
508
+
475
509
  # Let AuthenticationError propagate - HTTP layer will convert to 401
476
510
  tenant_context = await self._tenant_extension.authenticate(request_context)
477
511
 
478
512
  _current_schema.set(tenant_context.schema_name)
479
513
  return tenant_context.schema_name
480
514
 
481
- async def _handle_access_count_update(self, task_dict: dict[str, Any]):
482
- """
483
- Handler for access count update tasks.
484
-
485
- Args:
486
- task_dict: Dict with 'node_ids' key containing list of node IDs to update
487
-
488
- Raises:
489
- Exception: Any exception from database operations (propagates to execute_task for retry)
490
- """
491
- node_ids = task_dict.get("node_ids", [])
492
- if not node_ids:
493
- return
494
-
495
- pool = await self._get_pool()
496
- # Convert string UUIDs to UUID type for faster matching
497
- uuid_list = [uuid.UUID(nid) for nid in node_ids]
498
- async with acquire_with_retry(pool) as conn:
499
- await conn.execute(
500
- f"UPDATE {fq_table('memory_units')} SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
501
- uuid_list,
502
- )
503
-
504
515
  async def _handle_batch_retain(self, task_dict: dict[str, Any]):
505
516
  """
506
517
  Handler for batch retain tasks.
@@ -521,14 +532,113 @@ class MemoryEngine(MemoryEngineInterface):
521
532
  f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
522
533
  )
523
534
 
524
- # Use internal request context for background tasks
535
+ # Use internal request context for background tasks (skips tenant auth when schema is pre-set)
525
536
  from hindsight_api.models import RequestContext
526
537
 
527
- internal_context = RequestContext()
538
+ internal_context = RequestContext(internal=True)
528
539
  await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
529
540
 
530
541
  logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
531
542
 
543
+ async def _handle_consolidation(self, task_dict: dict[str, Any]):
544
+ """
545
+ Handler for consolidation tasks.
546
+
547
+ Consolidates new memories into mental models for a bank.
548
+
549
+ Args:
550
+ task_dict: Dict with 'bank_id'
551
+
552
+ Raises:
553
+ ValueError: If bank_id is missing
554
+ Exception: Any exception from consolidation (propagates to execute_task for retry)
555
+ """
556
+ bank_id = task_dict.get("bank_id")
557
+ if not bank_id:
558
+ raise ValueError("bank_id is required for consolidation task")
559
+
560
+ from hindsight_api.models import RequestContext
561
+
562
+ from .consolidation import run_consolidation_job
563
+
564
+ internal_context = RequestContext(internal=True)
565
+ result = await run_consolidation_job(
566
+ memory_engine=self,
567
+ bank_id=bank_id,
568
+ request_context=internal_context,
569
+ )
570
+
571
+ logger.info(f"[CONSOLIDATION] bank={bank_id} completed: {result.get('memories_processed', 0)} processed")
572
+
573
+ async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
574
+ """
575
+ Handler for refresh_mental_model tasks.
576
+
577
+ Re-runs the source query through reflect and updates the mental model content.
578
+
579
+ Args:
580
+ task_dict: Dict with 'bank_id', 'mental_model_id', 'operation_id'
581
+
582
+ Raises:
583
+ ValueError: If required fields are missing
584
+ Exception: Any exception from reflect/update (propagates to execute_task for retry)
585
+ """
586
+ bank_id = task_dict.get("bank_id")
587
+ mental_model_id = task_dict.get("mental_model_id")
588
+
589
+ if not bank_id or not mental_model_id:
590
+ raise ValueError("bank_id and mental_model_id are required for refresh_mental_model task")
591
+
592
+ logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Starting for bank_id={bank_id}, mental_model_id={mental_model_id}")
593
+
594
+ from hindsight_api.models import RequestContext
595
+
596
+ internal_context = RequestContext(internal=True)
597
+
598
+ # Get the current mental model to get source_query
599
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=internal_context)
600
+ if not mental_model:
601
+ raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
602
+
603
+ source_query = mental_model["source_query"]
604
+
605
+ # Run reflect to generate new content, excluding the mental model being refreshed
606
+ reflect_result = await self.reflect_async(
607
+ bank_id=bank_id,
608
+ query=source_query,
609
+ request_context=internal_context,
610
+ exclude_mental_model_ids=[mental_model_id],
611
+ )
612
+
613
+ generated_content = reflect_result.text or "No content generated"
614
+
615
+ # Build reflect_response payload to store
616
+ reflect_response = {
617
+ "text": reflect_result.text,
618
+ "based_on": {
619
+ fact_type: [
620
+ {
621
+ "id": str(fact.id),
622
+ "text": fact.text,
623
+ "type": fact_type,
624
+ }
625
+ for fact in facts
626
+ ]
627
+ for fact_type, facts in reflect_result.based_on.items()
628
+ },
629
+ }
630
+
631
+ # Update the mental model with the generated content and reflect_response
632
+ await self.update_mental_model(
633
+ bank_id=bank_id,
634
+ mental_model_id=mental_model_id,
635
+ content=generated_content,
636
+ reflect_response=reflect_response,
637
+ request_context=internal_context,
638
+ )
639
+
640
+ logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Completed for bank_id={bank_id}, mental_model_id={mental_model_id}")
641
+
532
642
  async def execute_task(self, task_dict: dict[str, Any]):
533
643
  """
534
644
  Execute a task by routing it to the appropriate handler.
@@ -538,13 +648,18 @@ class MemoryEngine(MemoryEngineInterface):
538
648
 
539
649
  Args:
540
650
  task_dict: Task dictionary with 'type' key and other payload data
541
- Example: {'type': 'access_count_update', 'node_ids': [...]}
651
+ Example: {'type': 'batch_retain', 'bank_id': '...', 'contents': [...]}
542
652
  """
543
653
  task_type = task_dict.get("type")
544
654
  operation_id = task_dict.get("operation_id")
545
655
  retry_count = task_dict.get("retry_count", 0)
546
656
  max_retries = 3
547
657
 
658
+ # Set schema context for multi-tenant task execution
659
+ schema = task_dict.pop("_schema", None)
660
+ if schema:
661
+ _current_schema.set(schema)
662
+
548
663
  # Check if operation was cancelled (only for tasks with operation_id)
549
664
  if operation_id:
550
665
  try:
@@ -563,16 +678,12 @@ class MemoryEngine(MemoryEngineInterface):
563
678
  # Continue with processing if we can't check status
564
679
 
565
680
  try:
566
- if task_type == "access_count_update":
567
- await self._handle_access_count_update(task_dict)
568
- elif task_type == "reinforce_opinion":
569
- await self._handle_reinforce_opinion(task_dict)
570
- elif task_type == "form_opinion":
571
- await self._handle_form_opinion(task_dict)
572
- elif task_type == "batch_retain":
681
+ if task_type == "batch_retain":
573
682
  await self._handle_batch_retain(task_dict)
574
- elif task_type == "regenerate_observations":
575
- await self._handle_regenerate_observations(task_dict)
683
+ elif task_type == "consolidation":
684
+ await self._handle_consolidation(task_dict)
685
+ elif task_type == "refresh_mental_model":
686
+ await self._handle_refresh_mental_model(task_dict)
576
687
  else:
577
688
  logger.error(f"Unknown task type: {task_type}")
578
689
  # Don't retry unknown task types
@@ -580,9 +691,9 @@ class MemoryEngine(MemoryEngineInterface):
580
691
  await self._delete_operation_record(operation_id)
581
692
  return
582
693
 
583
- # Task succeeded - delete operation record
694
+ # Task succeeded - mark operation as completed
584
695
  if operation_id:
585
- await self._delete_operation_record(operation_id)
696
+ await self._mark_operation_completed(operation_id)
586
697
 
587
698
  except Exception as e:
588
699
  # Task failed - check if we should retry
@@ -628,7 +739,7 @@ class MemoryEngine(MemoryEngineInterface):
628
739
  await conn.execute(
629
740
  f"""
630
741
  UPDATE {fq_table("async_operations")}
631
- SET status = 'failed', error_message = $2
742
+ SET status = 'failed', error_message = $2, updated_at = NOW()
632
743
  WHERE operation_id = $1
633
744
  """,
634
745
  uuid.UUID(operation_id),
@@ -638,6 +749,23 @@ class MemoryEngine(MemoryEngineInterface):
638
749
  except Exception as e:
639
750
  logger.error(f"Failed to mark operation as failed {operation_id}: {e}")
640
751
 
752
+ async def _mark_operation_completed(self, operation_id: str):
753
+ """Helper to mark an operation as completed in the database."""
754
+ try:
755
+ pool = await self._get_pool()
756
+ async with acquire_with_retry(pool) as conn:
757
+ await conn.execute(
758
+ f"""
759
+ UPDATE {fq_table("async_operations")}
760
+ SET status = 'completed', updated_at = NOW(), completed_at = NOW()
761
+ WHERE operation_id = $1
762
+ """,
763
+ uuid.UUID(operation_id),
764
+ )
765
+ logger.info(f"Marked async operation as completed: {operation_id}")
766
+ except Exception as e:
767
+ logger.error(f"Failed to mark operation as completed {operation_id}: {e}")
768
+
641
769
  async def initialize(self):
642
770
  """Initialize the connection pool, models, and background workers.
643
771
 
@@ -710,6 +838,23 @@ class MemoryEngine(MemoryEngineInterface):
710
838
  )
711
839
  if reflect_is_different:
712
840
  await self._reflect_llm_config.verify_connection()
841
+ # Verify consolidation config if different from all others
842
+ consolidation_is_different = (
843
+ (
844
+ self._consolidation_llm_config.provider != self._llm_config.provider
845
+ or self._consolidation_llm_config.model != self._llm_config.model
846
+ )
847
+ and (
848
+ self._consolidation_llm_config.provider != self._retain_llm_config.provider
849
+ or self._consolidation_llm_config.model != self._retain_llm_config.model
850
+ )
851
+ and (
852
+ self._consolidation_llm_config.provider != self._reflect_llm_config.provider
853
+ or self._consolidation_llm_config.model != self._reflect_llm_config.model
854
+ )
855
+ )
856
+ if consolidation_is_different:
857
+ await self._consolidation_llm_config.verify_connection()
713
858
 
714
859
  # Build list of initialization tasks
715
860
  init_tasks = [
@@ -836,8 +981,7 @@ class MemoryEngine(MemoryEngineInterface):
836
981
  """
837
982
  Wait for all pending background tasks to complete.
838
983
 
839
- This is useful in tests to ensure background tasks (like opinion reinforcement)
840
- complete before making assertions.
984
+ This is useful in tests to ensure background tasks complete before making assertions.
841
985
  """
842
986
  if hasattr(self._task_backend, "wait_for_pending_tasks"):
843
987
  await self._task_backend.wait_for_pending_tasks()
@@ -1178,7 +1322,7 @@ class MemoryEngine(MemoryEngineInterface):
1178
1322
 
1179
1323
  logger.info(f"Split into {len(sub_batches)} sub-batches: {[len(b) for b in sub_batches]} items each")
1180
1324
 
1181
- # Process each sub-batch using internal method (skip chunking check)
1325
+ # Process each sub-batch
1182
1326
  all_results = []
1183
1327
  for i, sub_batch in enumerate(sub_batches, 1):
1184
1328
  sub_batch_chars = sum(len(item.get("content", "")) for item in sub_batch)
@@ -1235,6 +1379,17 @@ class MemoryEngine(MemoryEngineInterface):
1235
1379
  except Exception as e:
1236
1380
  logger.warning(f"Post-retain hook error (non-fatal): {e}")
1237
1381
 
1382
+ # Trigger consolidation as a tracked async operation if enabled
1383
+ from ..config import get_config
1384
+
1385
+ config = get_config()
1386
+ if config.enable_observations:
1387
+ try:
1388
+ await self.submit_async_consolidation(bank_id=bank_id, request_context=request_context)
1389
+ except Exception as e:
1390
+ # Log but don't fail the retain - consolidation is non-critical
1391
+ logger.warning(f"Failed to submit consolidation task for bank {bank_id}: {e}")
1392
+
1238
1393
  if return_usage:
1239
1394
  return result, total_usage
1240
1395
  return result
@@ -1280,7 +1435,6 @@ class MemoryEngine(MemoryEngineInterface):
1280
1435
  embeddings_model=self.embeddings,
1281
1436
  llm_config=self._retain_llm_config,
1282
1437
  entity_resolver=self.entity_resolver,
1283
- task_backend=self._task_backend,
1284
1438
  format_date_fn=self._format_readable_date,
1285
1439
  duplicate_checker_fn=self._find_duplicate_facts_batch,
1286
1440
  bank_id=bank_id,
@@ -1350,6 +1504,8 @@ class MemoryEngine(MemoryEngineInterface):
1350
1504
  request_context: "RequestContext",
1351
1505
  tags: list[str] | None = None,
1352
1506
  tags_match: TagsMatch = "any",
1507
+ _connection_budget: int | None = None,
1508
+ _quiet: bool = False,
1353
1509
  ) -> RecallResultModel:
1354
1510
  """
1355
1511
  Recall memories using N*4-way parallel retrieval (N fact types × 4 retrieval methods).
@@ -1400,6 +1556,12 @@ class MemoryEngine(MemoryEngineInterface):
1400
1556
  f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
1401
1557
  )
1402
1558
 
1559
+ # Filter out 'opinion' - opinions are no longer returned from recall
1560
+ fact_type = [ft for ft in fact_type if ft != "opinion"]
1561
+ if not fact_type:
1562
+ # All requested types were opinions - return empty result
1563
+ return RecallResultModel(results=[], entities={}, chunks={})
1564
+
1403
1565
  # Validate operation if validator is configured
1404
1566
  if self._operation_validator:
1405
1567
  from hindsight_api.extensions import RecallContext
@@ -1425,6 +1587,11 @@ class MemoryEngine(MemoryEngineInterface):
1425
1587
  effective_budget = budget if budget is not None else Budget.MID
1426
1588
  thinking_budget = budget_mapping[effective_budget]
1427
1589
 
1590
+ # Log recall start with tags if present (skip if quiet mode for internal operations)
1591
+ if not _quiet:
1592
+ tags_info = f", tags={tags} ({tags_match})" if tags else ""
1593
+ logger.info(f"[RECALL {bank_id[:8]}] Starting recall for query: {query[:50]}...{tags_info}")
1594
+
1428
1595
  # Backpressure: limit concurrent recalls to prevent overwhelming the database
1429
1596
  result = None
1430
1597
  error_msg = None
@@ -1451,6 +1618,8 @@ class MemoryEngine(MemoryEngineInterface):
1451
1618
  semaphore_wait=semaphore_wait,
1452
1619
  tags=tags,
1453
1620
  tags_match=tags_match,
1621
+ connection_budget=_connection_budget,
1622
+ quiet=_quiet,
1454
1623
  )
1455
1624
  break # Success - exit retry loop
1456
1625
  except Exception as e:
@@ -1571,6 +1740,8 @@ class MemoryEngine(MemoryEngineInterface):
1571
1740
  semaphore_wait: float = 0.0,
1572
1741
  tags: list[str] | None = None,
1573
1742
  tags_match: TagsMatch = "any",
1743
+ connection_budget: int | None = None,
1744
+ quiet: bool = False,
1574
1745
  ) -> RecallResultModel:
1575
1746
  """
1576
1747
  Search implementation with modular retrieval and reranking.
@@ -1645,8 +1816,11 @@ class MemoryEngine(MemoryEngineInterface):
1645
1816
 
1646
1817
  # Run optimized retrieval with connection budget
1647
1818
  config = get_config()
1819
+ effective_connection_budget = (
1820
+ connection_budget if connection_budget is not None else config.recall_connection_budget
1821
+ )
1648
1822
  async with budgeted_operation(
1649
- max_connections=config.recall_connection_budget,
1823
+ max_connections=effective_connection_budget,
1650
1824
  operation_id=f"recall-{recall_id}",
1651
1825
  ) as op:
1652
1826
  budgeted_pool = op.wrap_pool(pool)
@@ -1702,8 +1876,6 @@ class MemoryEngine(MemoryEngineInterface):
1702
1876
  # Capture temporal constraint (same across all fact types)
1703
1877
  if retrieval_result.temporal_constraint:
1704
1878
  detected_temporal_constraint = retrieval_result.temporal_constraint
1705
- # Collect MPFP timings
1706
- all_mpfp_timings.extend(retrieval_result.mpfp_timings)
1707
1879
 
1708
1880
  # If no temporal results from any fact type, set to None
1709
1881
  if not temporal_results:
@@ -1722,7 +1894,8 @@ class MemoryEngine(MemoryEngineInterface):
1722
1894
  retrieval_duration = time.time() - retrieval_start
1723
1895
 
1724
1896
  step_duration = time.time() - step_start
1725
- # Format per-method timings (these are the actual parallel retrieval times)
1897
+ total_retrievals = len(fact_type) * (4 if temporal_results else 3)
1898
+ # Format per-method timings
1726
1899
  timing_parts = [
1727
1900
  f"semantic={len(semantic_results)}({aggregated_timings['semantic']:.3f}s)",
1728
1901
  f"bm25={len(bm25_results)}({aggregated_timings['bm25']:.3f}s)",
@@ -1945,6 +2118,9 @@ class MemoryEngine(MemoryEngineInterface):
1945
2118
 
1946
2119
  # Re-sort by combined score
1947
2120
  scored_results.sort(key=lambda x: x.weight, reverse=True)
2121
+ log_buffer.append(
2122
+ " [4.6] Combined scoring: cross_encoder(0.6) + rrf(0.2) + temporal(0.1) + recency(0.1)"
2123
+ )
1948
2124
 
1949
2125
  # Add reranked results to tracer AFTER combined scoring (so normalized values are included)
1950
2126
  if tracer:
@@ -1963,6 +2139,7 @@ class MemoryEngine(MemoryEngineInterface):
1963
2139
  # Step 5: Truncate to thinking_budget * 2 for token filtering
1964
2140
  rerank_limit = thinking_budget * 2
1965
2141
  top_scored = scored_results[:rerank_limit]
2142
+ log_buffer.append(f" [5] Truncated to top {len(top_scored)} results")
1966
2143
 
1967
2144
  # Step 6: Token budget filtering
1968
2145
  step_start = time.time()
@@ -1977,7 +2154,7 @@ class MemoryEngine(MemoryEngineInterface):
1977
2154
 
1978
2155
  step_duration = time.time() - step_start
1979
2156
  log_buffer.append(
1980
- f" [5] Token filtering: {len(top_scored)} results, {total_tokens}/{max_tokens} tokens in {step_duration:.3f}s"
2157
+ f" [6] Token filtering: {len(top_scored)} results, {total_tokens}/{max_tokens} tokens in {step_duration:.3f}s"
1981
2158
  )
1982
2159
 
1983
2160
  if tracer:
@@ -1995,7 +2172,6 @@ class MemoryEngine(MemoryEngineInterface):
1995
2172
  text=sr.retrieval.text,
1996
2173
  context=sr.retrieval.context or "",
1997
2174
  event_date=sr.retrieval.occurred_start,
1998
- access_count=sr.retrieval.access_count,
1999
2175
  is_entry_point=(sr.id in [ep.node_id for ep in tracer.entry_points]),
2000
2176
  parent_node_id=None, # In parallel retrieval, there's no clear parent
2001
2177
  link_type=None,
@@ -2007,11 +2183,6 @@ class MemoryEngine(MemoryEngineInterface):
2007
2183
  final_weight=sr.weight,
2008
2184
  )
2009
2185
 
2010
- # Step 8: Queue access count updates for visited nodes
2011
- visited_ids = list(set([sr.id for sr in scored_results[:50]])) # Top 50
2012
- if visited_ids:
2013
- await self._task_backend.submit_task({"type": "access_count_update", "node_ids": visited_ids})
2014
-
2015
2186
  # Log fact_type distribution in results
2016
2187
  fact_type_counts = {}
2017
2188
  for sr in top_scored:
@@ -2043,7 +2214,6 @@ class MemoryEngine(MemoryEngineInterface):
2043
2214
  top_results_dicts.append(result_dict)
2044
2215
 
2045
2216
  # Get entities for each fact if include_entities is requested
2046
- step_start = time.time()
2047
2217
  fact_entity_map = {} # unit_id -> list of (entity_id, entity_name)
2048
2218
  if include_entities and top_scored:
2049
2219
  unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
@@ -2065,7 +2235,6 @@ class MemoryEngine(MemoryEngineInterface):
2065
2235
  fact_entity_map[unit_id].append(
2066
2236
  {"entity_id": str(row["entity_id"]), "canonical_name": row["canonical_name"]}
2067
2237
  )
2068
- entity_map_duration = time.time() - step_start
2069
2238
 
2070
2239
  # Convert results to MemoryFact objects
2071
2240
  memory_facts = []
@@ -2093,7 +2262,6 @@ class MemoryEngine(MemoryEngineInterface):
2093
2262
  )
2094
2263
 
2095
2264
  # Fetch entity observations if requested
2096
- step_start = time.time()
2097
2265
  entities_dict = None
2098
2266
  total_entity_tokens = 0
2099
2267
  total_chunk_tokens = 0
@@ -2114,42 +2282,16 @@ class MemoryEngine(MemoryEngineInterface):
2114
2282
  entities_ordered.append((entity_id, entity_name))
2115
2283
  seen_entity_ids.add(entity_id)
2116
2284
 
2117
- # Fetch all observations in a single batched query
2118
- entity_ids = [eid for eid, _ in entities_ordered]
2119
- all_observations = await self.get_entity_observations_batch(
2120
- bank_id, entity_ids, limit_per_entity=5, request_context=request_context
2121
- )
2122
-
2123
- # Build entities_dict respecting token budget, in relevance order
2285
+ # Return entities with empty observations (summaries now live in mental models)
2124
2286
  entities_dict = {}
2125
- encoding = _get_tiktoken_encoding()
2126
-
2127
2287
  for entity_id, entity_name in entities_ordered:
2128
- if total_entity_tokens >= max_entity_tokens:
2129
- break
2130
-
2131
- observations = all_observations.get(entity_id, [])
2132
-
2133
- # Calculate tokens for this entity's observations
2134
- entity_tokens = 0
2135
- included_observations = []
2136
- for obs in observations:
2137
- obs_tokens = len(encoding.encode(obs.text))
2138
- if total_entity_tokens + entity_tokens + obs_tokens <= max_entity_tokens:
2139
- included_observations.append(obs)
2140
- entity_tokens += obs_tokens
2141
- else:
2142
- break
2143
-
2144
- if included_observations:
2145
- entities_dict[entity_name] = EntityState(
2146
- entity_id=entity_id, canonical_name=entity_name, observations=included_observations
2147
- )
2148
- total_entity_tokens += entity_tokens
2149
- entity_obs_duration = time.time() - step_start
2288
+ entities_dict[entity_name] = EntityState(
2289
+ entity_id=entity_id,
2290
+ canonical_name=entity_name,
2291
+ observations=[], # Mental models provide this now
2292
+ )
2150
2293
 
2151
2294
  # Fetch chunks if requested
2152
- step_start = time.time()
2153
2295
  chunks_dict = None
2154
2296
  if include_chunks and top_scored:
2155
2297
  from .response_models import ChunkInfo
@@ -2209,12 +2351,6 @@ class MemoryEngine(MemoryEngineInterface):
2209
2351
  chunk_text=chunk_text, chunk_index=row["chunk_index"], truncated=False
2210
2352
  )
2211
2353
  total_chunk_tokens += chunk_tokens
2212
- chunks_duration = time.time() - step_start
2213
-
2214
- # Log entity/chunk fetch timing (only if any enrichment was requested)
2215
- log_buffer.append(
2216
- f" [6] Response enrichment: entity_map={entity_map_duration:.3f}s, entity_obs={entity_obs_duration:.3f}s, chunks={chunks_duration:.3f}s"
2217
- )
2218
2354
 
2219
2355
  # Finalize trace if enabled
2220
2356
  trace_dict = None
@@ -2236,13 +2372,15 @@ class MemoryEngine(MemoryEngineInterface):
2236
2372
  log_buffer.append(
2237
2373
  f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
2238
2374
  )
2239
- logger.info("\n" + "\n".join(log_buffer))
2375
+ if not quiet:
2376
+ logger.info("\n" + "\n".join(log_buffer))
2240
2377
 
2241
2378
  return RecallResultModel(results=memory_facts, trace=trace_dict, entities=entities_dict, chunks=chunks_dict)
2242
2379
 
2243
2380
  except Exception as e:
2244
2381
  log_buffer.append(f"[RECALL {recall_id}] ERROR after {time.time() - recall_start:.3f}s: {str(e)}")
2245
- logger.error("\n" + "\n".join(log_buffer))
2382
+ if not quiet:
2383
+ logger.error("\n" + "\n".join(log_buffer))
2246
2384
  raise Exception(f"Failed to search memories: {str(e)}")
2247
2385
 
2248
2386
  def _filter_by_token_budget(
@@ -2350,10 +2488,12 @@ class MemoryEngine(MemoryEngineInterface):
2350
2488
  pool = await self._get_pool()
2351
2489
  async with acquire_with_retry(pool) as conn:
2352
2490
  async with conn.transaction():
2353
- # Count units before deletion
2354
- units_count = await conn.fetchval(
2355
- f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
2491
+ # Get memory unit IDs before deletion (for mental model invalidation)
2492
+ unit_rows = await conn.fetch(
2493
+ f"SELECT id FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
2356
2494
  )
2495
+ unit_ids = [str(row["id"]) for row in unit_rows]
2496
+ units_count = len(unit_ids)
2357
2497
 
2358
2498
  # Delete document (cascades to memory_units and all their links)
2359
2499
  deleted = await conn.fetchval(
@@ -2362,6 +2502,10 @@ class MemoryEngine(MemoryEngineInterface):
2362
2502
  bank_id,
2363
2503
  )
2364
2504
 
2505
+ # Invalidate deleted fact IDs from mental models
2506
+ if deleted and unit_ids:
2507
+ await self._invalidate_facts_from_mental_models(conn, bank_id, unit_ids)
2508
+
2365
2509
  return {"document_deleted": 1 if deleted else 0, "memory_units_deleted": units_count if deleted else 0}
2366
2510
 
2367
2511
  async def delete_memory_unit(
@@ -2389,11 +2533,18 @@ class MemoryEngine(MemoryEngineInterface):
2389
2533
  pool = await self._get_pool()
2390
2534
  async with acquire_with_retry(pool) as conn:
2391
2535
  async with conn.transaction():
2536
+ # Get bank_id before deletion (for mental model invalidation)
2537
+ bank_id = await conn.fetchval(f"SELECT bank_id FROM {fq_table('memory_units')} WHERE id = $1", unit_id)
2538
+
2392
2539
  # Delete the memory unit (cascades to links and associations)
2393
2540
  deleted = await conn.fetchval(
2394
2541
  f"DELETE FROM {fq_table('memory_units')} WHERE id = $1 RETURNING id", unit_id
2395
2542
  )
2396
2543
 
2544
+ # Invalidate deleted fact ID from mental models
2545
+ if deleted and bank_id:
2546
+ await self._invalidate_facts_from_mental_models(conn, bank_id, [str(deleted)])
2547
+
2397
2548
  return {
2398
2549
  "success": deleted is not None,
2399
2550
  "unit_id": str(deleted) if deleted else None,
@@ -2431,10 +2582,9 @@ class MemoryEngine(MemoryEngineInterface):
2431
2582
  await self._authenticate_tenant(request_context)
2432
2583
  pool = await self._get_pool()
2433
2584
  async with acquire_with_retry(pool) as conn:
2585
+ # Ensure connection is not in read-only mode (can happen with connection poolers)
2586
+ await conn.execute("SET SESSION CHARACTERISTICS AS TRANSACTION READ WRITE")
2434
2587
  async with conn.transaction():
2435
- # Ensure transaction is not in read-only mode (can happen with connection poolers)
2436
- # Using SET LOCAL so it only affects this transaction, not the session
2437
- await conn.execute("SET LOCAL transaction_read_only TO off")
2438
2588
  try:
2439
2589
  if fact_type:
2440
2590
  # Delete only memories of a specific fact type
@@ -2486,6 +2636,79 @@ class MemoryEngine(MemoryEngineInterface):
2486
2636
  except Exception as e:
2487
2637
  raise Exception(f"Failed to delete agent data: {str(e)}")
2488
2638
 
2639
+ async def clear_observations(
2640
+ self,
2641
+ bank_id: str,
2642
+ *,
2643
+ request_context: "RequestContext",
2644
+ ) -> dict[str, int]:
2645
+ """
2646
+ Clear all observations for a bank (consolidated knowledge).
2647
+
2648
+ Args:
2649
+ bank_id: Bank ID to clear observations for
2650
+ request_context: Request context for authentication.
2651
+
2652
+ Returns:
2653
+ Dictionary with count of deleted observations
2654
+ """
2655
+ await self._authenticate_tenant(request_context)
2656
+ pool = await self._get_pool()
2657
+ async with acquire_with_retry(pool) as conn:
2658
+ async with conn.transaction():
2659
+ # Count observations before deletion
2660
+ count = await conn.fetchval(
2661
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
2662
+ bank_id,
2663
+ )
2664
+
2665
+ # Delete all observations
2666
+ await conn.execute(
2667
+ f"DELETE FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
2668
+ bank_id,
2669
+ )
2670
+
2671
+ # Reset consolidation timestamp
2672
+ await conn.execute(
2673
+ f"UPDATE {fq_table('banks')} SET last_consolidated_at = NULL WHERE bank_id = $1",
2674
+ bank_id,
2675
+ )
2676
+
2677
+ return {"deleted_count": count or 0}
2678
+
2679
+ async def run_consolidation(
2680
+ self,
2681
+ bank_id: str,
2682
+ *,
2683
+ request_context: "RequestContext",
2684
+ ) -> dict[str, int]:
2685
+ """
2686
+ Run memory consolidation to create/update mental models.
2687
+
2688
+ Args:
2689
+ bank_id: Bank ID to run consolidation for
2690
+ request_context: Request context for authentication.
2691
+
2692
+ Returns:
2693
+ Dictionary with consolidation stats
2694
+ """
2695
+ await self._authenticate_tenant(request_context)
2696
+
2697
+ from .consolidation import run_consolidation_job
2698
+
2699
+ result = await run_consolidation_job(
2700
+ memory_engine=self,
2701
+ bank_id=bank_id,
2702
+ request_context=request_context,
2703
+ )
2704
+
2705
+ return {
2706
+ "processed": result.get("processed", 0),
2707
+ "created": result.get("created", 0),
2708
+ "updated": result.get("updated", 0),
2709
+ "skipped": result.get("skipped", 0),
2710
+ }
2711
+
2489
2712
  async def get_graph_data(
2490
2713
  self,
2491
2714
  bank_id: str | None = None,
@@ -2541,7 +2764,7 @@ class MemoryEngine(MemoryEngineInterface):
2541
2764
  param_count += 1
2542
2765
  units = await conn.fetch(
2543
2766
  f"""
2544
- SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type
2767
+ SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type, tags, created_at, proof_count, source_memory_ids
2545
2768
  FROM {fq_table("memory_units")}
2546
2769
  {where_clause}
2547
2770
  ORDER BY mentioned_at DESC NULLS LAST, event_date DESC
@@ -2554,7 +2777,18 @@ class MemoryEngine(MemoryEngineInterface):
2554
2777
  # Get links, filtering to only include links between units of the selected agent
2555
2778
  # Use DISTINCT ON with LEAST/GREATEST to deduplicate bidirectional links
2556
2779
  unit_ids = [row["id"] for row in units]
2557
- if unit_ids:
2780
+ unit_id_set = set(unit_ids)
2781
+
2782
+ # Collect source memory IDs from observations
2783
+ source_memory_ids = []
2784
+ for unit in units:
2785
+ if unit["source_memory_ids"]:
2786
+ source_memory_ids.extend(unit["source_memory_ids"])
2787
+ source_memory_ids = list(set(source_memory_ids)) # Deduplicate
2788
+
2789
+ # Fetch links involving both visible units AND source memories
2790
+ all_relevant_ids = unit_ids + source_memory_ids
2791
+ if all_relevant_ids:
2558
2792
  links = await conn.fetch(
2559
2793
  f"""
2560
2794
  SELECT DISTINCT ON (LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid))
@@ -2565,14 +2799,69 @@ class MemoryEngine(MemoryEngineInterface):
2565
2799
  e.canonical_name as entity_name
2566
2800
  FROM {fq_table("memory_links")} ml
2567
2801
  LEFT JOIN {fq_table("entities")} e ON ml.entity_id = e.id
2568
- WHERE ml.from_unit_id = ANY($1::uuid[]) AND ml.to_unit_id = ANY($1::uuid[])
2802
+ WHERE ml.from_unit_id = ANY($1::uuid[]) OR ml.to_unit_id = ANY($1::uuid[])
2569
2803
  ORDER BY LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid), ml.weight DESC
2570
2804
  """,
2571
- unit_ids,
2805
+ all_relevant_ids,
2572
2806
  )
2573
2807
  else:
2574
2808
  links = []
2575
2809
 
2810
+ # Copy links from source memories to observations
2811
+ # Observations inherit links from their source memories via source_memory_ids
2812
+ # Build a map from source_id to observation_ids
2813
+ source_to_observations = {}
2814
+ for unit in units:
2815
+ if unit["source_memory_ids"]:
2816
+ for source_id in unit["source_memory_ids"]:
2817
+ if source_id not in source_to_observations:
2818
+ source_to_observations[source_id] = []
2819
+ source_to_observations[source_id].append(unit["id"])
2820
+
2821
+ copied_links = []
2822
+ for link in links:
2823
+ from_id = link["from_unit_id"]
2824
+ to_id = link["to_unit_id"]
2825
+
2826
+ # Get observations that should inherit this link
2827
+ from_observations = source_to_observations.get(from_id, [])
2828
+ to_observations = source_to_observations.get(to_id, [])
2829
+
2830
+ # If from_id is a source memory, copy links to its observations
2831
+ if from_observations:
2832
+ for obs_id in from_observations:
2833
+ # Only include if the target is visible
2834
+ if to_id in unit_id_set or to_observations:
2835
+ target = to_observations[0] if to_observations and to_id not in unit_id_set else to_id
2836
+ if target in unit_id_set:
2837
+ copied_links.append(
2838
+ {
2839
+ "from_unit_id": obs_id,
2840
+ "to_unit_id": target,
2841
+ "link_type": link["link_type"],
2842
+ "weight": link["weight"],
2843
+ "entity_name": link["entity_name"],
2844
+ }
2845
+ )
2846
+
2847
+ # If to_id is a source memory, copy links to its observations
2848
+ if to_observations and from_id in unit_id_set:
2849
+ for obs_id in to_observations:
2850
+ copied_links.append(
2851
+ {
2852
+ "from_unit_id": from_id,
2853
+ "to_unit_id": obs_id,
2854
+ "link_type": link["link_type"],
2855
+ "weight": link["weight"],
2856
+ "entity_name": link["entity_name"],
2857
+ }
2858
+ )
2859
+
2860
+ # Keep only direct links between visible nodes
2861
+ direct_links = [
2862
+ link for link in links if link["from_unit_id"] in unit_id_set and link["to_unit_id"] in unit_id_set
2863
+ ]
2864
+
2576
2865
  # Get entity information
2577
2866
  unit_entities = await conn.fetch(f"""
2578
2867
  SELECT ue.unit_id, e.canonical_name
@@ -2590,6 +2879,18 @@ class MemoryEngine(MemoryEngineInterface):
2590
2879
  entity_map[unit_id] = []
2591
2880
  entity_map[unit_id].append(entity_name)
2592
2881
 
2882
+ # For observations, inherit entities from source memories
2883
+ for unit in units:
2884
+ if unit["source_memory_ids"] and unit["id"] not in entity_map:
2885
+ # Collect entities from all source memories
2886
+ source_entities = []
2887
+ for source_id in unit["source_memory_ids"]:
2888
+ if source_id in entity_map:
2889
+ source_entities.extend(entity_map[source_id])
2890
+ if source_entities:
2891
+ # Deduplicate while preserving order
2892
+ entity_map[unit["id"]] = list(dict.fromkeys(source_entities))
2893
+
2593
2894
  # Build nodes
2594
2895
  nodes = []
2595
2896
  for row in units:
@@ -2623,14 +2924,15 @@ class MemoryEngine(MemoryEngineInterface):
2623
2924
  }
2624
2925
  )
2625
2926
 
2626
- # Build edges
2927
+ # Build edges (combine direct links and copied links from sources)
2627
2928
  edges = []
2628
- for row in links:
2929
+ all_links = direct_links + copied_links
2930
+ for row in all_links:
2629
2931
  from_id = str(row["from_unit_id"])
2630
2932
  to_id = str(row["to_unit_id"])
2631
2933
  link_type = row["link_type"]
2632
2934
  weight = row["weight"]
2633
- entity_name = row["entity_name"]
2935
+ entity_name = row.get("entity_name")
2634
2936
 
2635
2937
  # Color by link type
2636
2938
  if link_type == "temporal":
@@ -2682,6 +2984,9 @@ class MemoryEngine(MemoryEngineInterface):
2682
2984
  "document_id": row["document_id"],
2683
2985
  "chunk_id": row["chunk_id"] if row["chunk_id"] else None,
2684
2986
  "fact_type": row["fact_type"],
2987
+ "tags": list(row["tags"]) if row["tags"] else [],
2988
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
2989
+ "proof_count": row["proof_count"] if row["proof_count"] else None,
2685
2990
  }
2686
2991
  )
2687
2992
 
@@ -2834,11 +3139,11 @@ class MemoryEngine(MemoryEngineInterface):
2834
3139
  await self._authenticate_tenant(request_context)
2835
3140
  pool = await self._get_pool()
2836
3141
  async with acquire_with_retry(pool) as conn:
2837
- # Get the memory unit
3142
+ # Get the memory unit (include source_memory_ids for mental models)
2838
3143
  row = await conn.fetchrow(
2839
3144
  f"""
2840
3145
  SELECT id, text, context, event_date, occurred_start, occurred_end,
2841
- mentioned_at, fact_type, document_id, chunk_id, tags
3146
+ mentioned_at, fact_type, document_id, chunk_id, tags, source_memory_ids
2842
3147
  FROM {fq_table("memory_units")}
2843
3148
  WHERE id = $1 AND bank_id = $2
2844
3149
  """,
@@ -2861,7 +3166,7 @@ class MemoryEngine(MemoryEngineInterface):
2861
3166
  )
2862
3167
  entities = [r["canonical_name"] for r in entities_rows]
2863
3168
 
2864
- return {
3169
+ result = {
2865
3170
  "id": str(row["id"]),
2866
3171
  "text": row["text"],
2867
3172
  "context": row["context"] if row["context"] else "",
@@ -2876,6 +3181,35 @@ class MemoryEngine(MemoryEngineInterface):
2876
3181
  "tags": row["tags"] if row["tags"] else [],
2877
3182
  }
2878
3183
 
3184
+ # For observations, include source_memory_ids and fetch source_memories
3185
+ if row["fact_type"] == "observation" and row["source_memory_ids"]:
3186
+ source_ids = row["source_memory_ids"]
3187
+ result["source_memory_ids"] = [str(sid) for sid in source_ids]
3188
+
3189
+ # Fetch source memories
3190
+ source_rows = await conn.fetch(
3191
+ f"""
3192
+ SELECT id, text, fact_type, context, occurred_start, mentioned_at
3193
+ FROM {fq_table("memory_units")}
3194
+ WHERE id = ANY($1::uuid[])
3195
+ ORDER BY mentioned_at DESC NULLS LAST
3196
+ """,
3197
+ source_ids,
3198
+ )
3199
+ result["source_memories"] = [
3200
+ {
3201
+ "id": str(r["id"]),
3202
+ "text": r["text"],
3203
+ "type": r["fact_type"],
3204
+ "context": r["context"],
3205
+ "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
3206
+ "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
3207
+ }
3208
+ for r in source_rows
3209
+ ]
3210
+
3211
+ return result
3212
+
2879
3213
  async def list_documents(
2880
3214
  self,
2881
3215
  bank_id: str,
@@ -3052,322 +3386,100 @@ class MemoryEngine(MemoryEngineInterface):
3052
3386
  "created_at": chunk["created_at"].isoformat() if chunk["created_at"] else "",
3053
3387
  }
3054
3388
 
3055
- async def _evaluate_opinion_update_async(
3389
+ # ==================== bank profile Methods ====================
3390
+
3391
+ async def get_bank_profile(
3056
3392
  self,
3057
- opinion_text: str,
3058
- opinion_confidence: float,
3059
- new_event_text: str,
3060
- entity_name: str,
3061
- ) -> dict[str, Any] | None:
3393
+ bank_id: str,
3394
+ *,
3395
+ request_context: "RequestContext",
3396
+ ) -> dict[str, Any]:
3062
3397
  """
3063
- Evaluate if an opinion should be updated based on a new event.
3398
+ Get bank profile (name, disposition + mission).
3399
+ Auto-creates agent with default values if not exists.
3064
3400
 
3065
3401
  Args:
3066
- opinion_text: Current opinion text (includes reasons)
3067
- opinion_confidence: Current confidence score (0.0-1.0)
3068
- new_event_text: Text of the new event
3069
- entity_name: Name of the entity this opinion is about
3402
+ bank_id: bank IDentifier
3403
+ request_context: Request context for authentication.
3070
3404
 
3071
3405
  Returns:
3072
- Dict with 'action' ('keep'|'update'), 'new_confidence', 'new_text' (if action=='update')
3073
- or None if no changes needed
3406
+ Dict with name, disposition traits, and mission
3074
3407
  """
3408
+ await self._authenticate_tenant(request_context)
3409
+ pool = await self._get_pool()
3410
+ profile = await bank_utils.get_bank_profile(pool, bank_id)
3411
+ disposition = profile["disposition"]
3412
+ return {
3413
+ "bank_id": bank_id,
3414
+ "name": profile["name"],
3415
+ "disposition": disposition,
3416
+ "mission": profile["mission"],
3417
+ }
3075
3418
 
3076
- class OpinionEvaluation(BaseModel):
3077
- """Evaluation of whether an opinion should be updated."""
3078
-
3079
- action: str = Field(description="Action to take: 'keep' (no change) or 'update' (modify opinion)")
3080
- reasoning: str = Field(description="Brief explanation of why this action was chosen")
3081
- new_confidence: float = Field(
3082
- description="New confidence score (0.0-1.0). Can be higher, lower, or same as before."
3083
- )
3084
- new_opinion_text: str | None = Field(
3085
- default=None,
3086
- description="If action is 'update', the revised opinion text that acknowledges the previous view. Otherwise None.",
3087
- )
3088
-
3089
- evaluation_prompt = f"""You are evaluating whether an existing opinion should be updated based on new information.
3090
-
3091
- ENTITY: {entity_name}
3092
-
3093
- EXISTING OPINION:
3094
- {opinion_text}
3095
- Current confidence: {opinion_confidence:.2f}
3096
-
3097
- NEW EVENT:
3098
- {new_event_text}
3099
-
3100
- Evaluate whether this new event:
3101
- 1. REINFORCES the opinion (increase confidence, keep text)
3102
- 2. WEAKENS the opinion (decrease confidence, keep text)
3103
- 3. CHANGES the opinion (update both text and confidence, noting "Previously I thought X, but now Y...")
3104
- 4. IRRELEVANT (keep everything as is)
3105
-
3106
- Guidelines:
3107
- - Only suggest 'update' action if the new event genuinely contradicts or significantly modifies the opinion
3108
- - If updating the text, acknowledge the previous opinion and explain the change
3109
- - Confidence should reflect accumulated evidence (0.0 = no confidence, 1.0 = very confident)
3110
- - Small changes in confidence are normal; large jumps should be rare"""
3111
-
3112
- try:
3113
- result = await self._reflect_llm_config.call(
3114
- messages=[
3115
- {"role": "system", "content": "You evaluate and update opinions based on new information."},
3116
- {"role": "user", "content": evaluation_prompt},
3117
- ],
3118
- response_format=OpinionEvaluation,
3119
- scope="memory_evaluate_opinion",
3120
- temperature=0.3, # Lower temperature for more consistent evaluation
3121
- )
3122
-
3123
- # Only return updates if something actually changed
3124
- if result.action == "keep" and abs(result.new_confidence - opinion_confidence) < 0.01:
3125
- return None
3126
-
3127
- return {
3128
- "action": result.action,
3129
- "reasoning": result.reasoning,
3130
- "new_confidence": result.new_confidence,
3131
- "new_text": result.new_opinion_text if result.action == "update" else None,
3132
- }
3133
-
3134
- except Exception as e:
3135
- logger.warning(f"Failed to evaluate opinion update: {str(e)}")
3136
- return None
3137
-
3138
- async def _handle_form_opinion(self, task_dict: dict[str, Any]):
3419
+ async def update_bank_disposition(
3420
+ self,
3421
+ bank_id: str,
3422
+ disposition: dict[str, int],
3423
+ *,
3424
+ request_context: "RequestContext",
3425
+ ) -> None:
3139
3426
  """
3140
- Handler for form opinion tasks.
3427
+ Update bank disposition traits.
3141
3428
 
3142
3429
  Args:
3143
- task_dict: Dict with keys: 'bank_id', 'answer_text', 'query', 'tenant_id'
3430
+ bank_id: bank IDentifier
3431
+ disposition: Dict with skepticism, literalism, empathy (all 1-5)
3432
+ request_context: Request context for authentication.
3144
3433
  """
3145
- bank_id = task_dict["bank_id"]
3146
- answer_text = task_dict["answer_text"]
3147
- query = task_dict["query"]
3148
- tenant_id = task_dict.get("tenant_id")
3149
-
3150
- await self._extract_and_store_opinions_async(
3151
- bank_id=bank_id, answer_text=answer_text, query=query, tenant_id=tenant_id
3152
- )
3434
+ await self._authenticate_tenant(request_context)
3435
+ pool = await self._get_pool()
3436
+ await bank_utils.update_bank_disposition(pool, bank_id, disposition)
3153
3437
 
3154
- async def _handle_reinforce_opinion(self, task_dict: dict[str, Any]):
3438
+ async def set_bank_mission(
3439
+ self,
3440
+ bank_id: str,
3441
+ mission: str,
3442
+ *,
3443
+ request_context: "RequestContext",
3444
+ ) -> dict[str, Any]:
3155
3445
  """
3156
- Handler for reinforce opinion tasks.
3446
+ Set the mission for a bank.
3157
3447
 
3158
3448
  Args:
3159
- task_dict: Dict with keys: 'bank_id', 'created_unit_ids', 'unit_texts', 'unit_entities'
3449
+ bank_id: bank IDentifier
3450
+ mission: The mission text
3451
+ request_context: Request context for authentication.
3452
+
3453
+ Returns:
3454
+ Dict with bank_id and mission.
3160
3455
  """
3161
- bank_id = task_dict["bank_id"]
3162
- created_unit_ids = task_dict["created_unit_ids"]
3163
- unit_texts = task_dict["unit_texts"]
3164
- unit_entities = task_dict["unit_entities"]
3456
+ await self._authenticate_tenant(request_context)
3457
+ pool = await self._get_pool()
3458
+ await bank_utils.set_bank_mission(pool, bank_id, mission)
3459
+ return {"bank_id": bank_id, "mission": mission}
3165
3460
 
3166
- await self._reinforce_opinions_async(
3167
- bank_id=bank_id, created_unit_ids=created_unit_ids, unit_texts=unit_texts, unit_entities=unit_entities
3168
- )
3169
-
3170
- async def _reinforce_opinions_async(
3171
- self,
3172
- bank_id: str,
3173
- created_unit_ids: list[str],
3174
- unit_texts: list[str],
3175
- unit_entities: list[list[dict[str, str]]],
3176
- ):
3177
- """
3178
- Background task to reinforce opinions based on newly ingested events.
3179
-
3180
- This runs asynchronously and does not block the put operation.
3181
-
3182
- Args:
3183
- bank_id: bank ID
3184
- created_unit_ids: List of newly created memory unit IDs
3185
- unit_texts: Texts of the newly created units
3186
- unit_entities: Entities extracted from each unit
3187
- """
3188
- try:
3189
- # Extract all unique entity names from the new units
3190
- entity_names = set()
3191
- for entities_list in unit_entities:
3192
- for entity in entities_list:
3193
- # Handle both Entity objects and dicts
3194
- if hasattr(entity, "text"):
3195
- entity_names.add(entity.text)
3196
- elif isinstance(entity, dict):
3197
- entity_names.add(entity["text"])
3198
-
3199
- if not entity_names:
3200
- return
3201
-
3202
- pool = await self._get_pool()
3203
- async with acquire_with_retry(pool) as conn:
3204
- # Find all opinions related to these entities
3205
- opinions = await conn.fetch(
3206
- f"""
3207
- SELECT DISTINCT mu.id, mu.text, mu.confidence_score, e.canonical_name
3208
- FROM {fq_table("memory_units")} mu
3209
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3210
- JOIN {fq_table("entities")} e ON ue.entity_id = e.id
3211
- WHERE mu.bank_id = $1
3212
- AND mu.fact_type = 'opinion'
3213
- AND e.canonical_name = ANY($2::text[])
3214
- """,
3215
- bank_id,
3216
- list(entity_names),
3217
- )
3218
-
3219
- if not opinions:
3220
- return
3221
-
3222
- # Use cached LLM config
3223
- if self._reflect_llm_config is None:
3224
- logger.error("[REINFORCE] LLM config not available, skipping opinion reinforcement")
3225
- return
3226
-
3227
- # Evaluate each opinion against the new events
3228
- updates_to_apply = []
3229
- for opinion in opinions:
3230
- opinion_id = str(opinion["id"])
3231
- opinion_text = opinion["text"]
3232
- opinion_confidence = opinion["confidence_score"]
3233
- entity_name = opinion["canonical_name"]
3234
-
3235
- # Find all new events mentioning this entity
3236
- relevant_events = []
3237
- for unit_text, entities_list in zip(unit_texts, unit_entities):
3238
- if any(e["text"] == entity_name for e in entities_list):
3239
- relevant_events.append(unit_text)
3240
-
3241
- if not relevant_events:
3242
- continue
3243
-
3244
- # Combine all relevant events
3245
- combined_events = "\n".join(relevant_events)
3246
-
3247
- # Evaluate if opinion should be updated
3248
- evaluation = await self._evaluate_opinion_update_async(
3249
- opinion_text, opinion_confidence, combined_events, entity_name
3250
- )
3251
-
3252
- if evaluation:
3253
- updates_to_apply.append({"opinion_id": opinion_id, "evaluation": evaluation})
3254
-
3255
- # Apply all updates in a single transaction
3256
- if updates_to_apply:
3257
- async with conn.transaction():
3258
- for update in updates_to_apply:
3259
- opinion_id = update["opinion_id"]
3260
- evaluation = update["evaluation"]
3261
-
3262
- if evaluation["action"] == "update" and evaluation["new_text"]:
3263
- # Update both text and confidence
3264
- await conn.execute(
3265
- f"""
3266
- UPDATE {fq_table("memory_units")}
3267
- SET text = $1, confidence_score = $2, updated_at = NOW()
3268
- WHERE id = $3
3269
- """,
3270
- evaluation["new_text"],
3271
- evaluation["new_confidence"],
3272
- uuid.UUID(opinion_id),
3273
- )
3274
- else:
3275
- # Only update confidence
3276
- await conn.execute(
3277
- f"""
3278
- UPDATE {fq_table("memory_units")}
3279
- SET confidence_score = $1, updated_at = NOW()
3280
- WHERE id = $2
3281
- """,
3282
- evaluation["new_confidence"],
3283
- uuid.UUID(opinion_id),
3284
- )
3285
-
3286
- else:
3287
- pass # No opinions to update
3288
-
3289
- except Exception as e:
3290
- logger.error(f"[REINFORCE] Error during opinion reinforcement: {str(e)}")
3291
- import traceback
3292
-
3293
- traceback.print_exc()
3294
-
3295
- # ==================== bank profile Methods ====================
3296
-
3297
- async def get_bank_profile(
3298
- self,
3299
- bank_id: str,
3300
- *,
3301
- request_context: "RequestContext",
3302
- ) -> dict[str, Any]:
3303
- """
3304
- Get bank profile (name, disposition + background).
3305
- Auto-creates agent with default values if not exists.
3306
-
3307
- Args:
3308
- bank_id: bank IDentifier
3309
- request_context: Request context for authentication.
3310
-
3311
- Returns:
3312
- Dict with name, disposition traits, and background
3313
- """
3314
- await self._authenticate_tenant(request_context)
3315
- pool = await self._get_pool()
3316
- profile = await bank_utils.get_bank_profile(pool, bank_id)
3317
- disposition = profile["disposition"]
3318
- return {
3319
- "bank_id": bank_id,
3320
- "name": profile["name"],
3321
- "disposition": disposition,
3322
- "background": profile["background"],
3323
- }
3324
-
3325
- async def update_bank_disposition(
3326
- self,
3327
- bank_id: str,
3328
- disposition: dict[str, int],
3329
- *,
3330
- request_context: "RequestContext",
3331
- ) -> None:
3332
- """
3333
- Update bank disposition traits.
3334
-
3335
- Args:
3336
- bank_id: bank IDentifier
3337
- disposition: Dict with skepticism, literalism, empathy (all 1-5)
3338
- request_context: Request context for authentication.
3339
- """
3340
- await self._authenticate_tenant(request_context)
3341
- pool = await self._get_pool()
3342
- await bank_utils.update_bank_disposition(pool, bank_id, disposition)
3343
-
3344
- async def merge_bank_background(
3461
+ async def merge_bank_mission(
3345
3462
  self,
3346
3463
  bank_id: str,
3347
3464
  new_info: str,
3348
3465
  *,
3349
- update_disposition: bool = True,
3350
3466
  request_context: "RequestContext",
3351
3467
  ) -> dict[str, Any]:
3352
3468
  """
3353
- Merge new background information with existing background using LLM.
3469
+ Merge new mission information with existing mission using LLM.
3354
3470
  Normalizes to first person ("I") and resolves conflicts.
3355
- Optionally infers disposition traits from the merged background.
3356
3471
 
3357
3472
  Args:
3358
3473
  bank_id: bank IDentifier
3359
- new_info: New background information to add/merge
3360
- update_disposition: If True, infer Big Five traits from background (default: True)
3474
+ new_info: New mission information to add/merge
3361
3475
  request_context: Request context for authentication.
3362
3476
 
3363
3477
  Returns:
3364
- Dict with 'background' (str) and optionally 'disposition' (dict) keys
3478
+ Dict with 'mission' (str) key
3365
3479
  """
3366
3480
  await self._authenticate_tenant(request_context)
3367
3481
  pool = await self._get_pool()
3368
- return await bank_utils.merge_bank_background(
3369
- pool, self._reflect_llm_config, bank_id, new_info, update_disposition
3370
- )
3482
+ return await bank_utils.merge_bank_mission(pool, self._reflect_llm_config, bank_id, new_info)
3371
3483
 
3372
3484
  async def list_banks(
3373
3485
  self,
@@ -3381,7 +3493,7 @@ Guidelines:
3381
3493
  request_context: Request context for authentication.
3382
3494
 
3383
3495
  Returns:
3384
- List of dicts with bank_id, name, disposition, background, created_at, updated_at
3496
+ List of dicts with bank_id, name, disposition, mission, created_at, updated_at
3385
3497
  """
3386
3498
  await self._authenticate_tenant(request_context)
3387
3499
  pool = await self._get_pool()
@@ -3401,32 +3513,39 @@ Guidelines:
3401
3513
  request_context: "RequestContext",
3402
3514
  tags: list[str] | None = None,
3403
3515
  tags_match: TagsMatch = "any",
3516
+ exclude_mental_model_ids: list[str] | None = None,
3404
3517
  ) -> ReflectResult:
3405
3518
  """
3406
- Reflect and formulate an answer using bank identity, world facts, and opinions.
3519
+ Reflect and formulate an answer using an agentic loop with tools.
3407
3520
 
3408
- This method:
3409
- 1. Retrieves experience (conversations and events)
3410
- 2. Retrieves world facts (general knowledge)
3411
- 3. Retrieves existing opinions (bank's formed perspectives)
3412
- 4. Uses LLM to formulate an answer
3413
- 5. Extracts and stores any new opinions formed during reflection
3414
- 6. Optionally generates structured output based on response_schema
3415
- 7. Returns plain text answer and the facts used
3521
+ The reflect agent iteratively uses tools to:
3522
+ 1. lookup: Get mental models (synthesized knowledge)
3523
+ 2. recall: Search facts (semantic + temporal retrieval)
3524
+ 3. learn: Create/update mental models with new insights
3525
+ 4. expand: Get chunk/document context for memories
3526
+
3527
+ The agent starts with empty context and must call tools to gather
3528
+ information. On the last iteration, tools are removed to force a
3529
+ final text response.
3416
3530
 
3417
3531
  Args:
3418
3532
  bank_id: bank identifier
3419
3533
  query: Question to answer
3420
- budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
3421
- context: Additional context string to include in LLM prompt (not used in recall)
3422
- response_schema: Optional JSON Schema for structured output
3534
+ budget: Budget level (currently unused, reserved for future)
3535
+ context: Additional context string to include in agent prompt
3536
+ max_tokens: Max tokens (currently unused, reserved for future)
3537
+ response_schema: Optional JSON Schema for structured output (not yet supported)
3538
+ tags: Optional tags to filter memories
3539
+ tags_match: How to match tags - "any" (OR), "all" (AND)
3540
+ exclude_mental_model_ids: Optional list of mental model IDs to exclude from search
3541
+ (used when refreshing a mental model to avoid circular reference)
3423
3542
 
3424
3543
  Returns:
3425
3544
  ReflectResult containing:
3426
- - text: Plain text answer (no markdown)
3427
- - based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
3428
- - new_opinions: List of newly formed opinions
3429
- - structured_output: Optional dict if response_schema was provided
3545
+ - text: Plain text answer
3546
+ - based_on: Empty dict (agent retrieves facts dynamically)
3547
+ - new_opinions: Empty list
3548
+ - structured_output: None (not yet supported for agentic reflect)
3430
3549
  """
3431
3550
  # Use cached LLM config
3432
3551
  if self._reflect_llm_config is None:
@@ -3450,129 +3569,312 @@ Guidelines:
3450
3569
 
3451
3570
  reflect_start = time.time()
3452
3571
  reflect_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
3453
- log_buffer = []
3454
- log_buffer.append(f"[REFLECT {reflect_id}] Query: '{query[:50]}...'")
3572
+ tags_info = f", tags={tags} ({tags_match})" if tags else ""
3573
+ logger.info(f"[REFLECT {reflect_id}] Starting agentic reflect for query: {query[:50]}...{tags_info}")
3455
3574
 
3456
- # Steps 1-3: Run multi-fact-type search (12-way retrieval: 4 methods × 3 fact types)
3457
- recall_start = time.time()
3458
- metrics = get_metrics_collector()
3459
- with metrics.record_operation(
3460
- "recall", bank_id=bank_id, source="reflect", budget=budget.value if budget else None
3461
- ):
3462
- search_result = await self.recall_async(
3463
- bank_id=bank_id,
3464
- query=query,
3465
- budget=budget,
3466
- max_tokens=4096,
3467
- enable_trace=False,
3468
- fact_type=["experience", "world", "opinion"],
3469
- include_entities=True,
3470
- request_context=request_context,
3575
+ # Get bank profile for agent identity
3576
+ profile = await self.get_bank_profile(bank_id, request_context=request_context)
3577
+
3578
+ # NOTE: Mental models are NOT pre-loaded to keep the initial prompt small.
3579
+ # The agent can call lookup() to list available models if needed.
3580
+ # This is critical for banks with many mental models to avoid huge prompts.
3581
+
3582
+ # Compute max iterations based on budget
3583
+ config = get_config()
3584
+ base_max_iterations = config.reflect_max_iterations
3585
+ # Budget multipliers: low=0.5x, mid=1x, high=2x
3586
+ budget_multipliers = {Budget.LOW: 0.5, Budget.MID: 1.0, Budget.HIGH: 2.0}
3587
+ effective_budget = budget or Budget.LOW
3588
+ max_iterations = max(1, int(base_max_iterations * budget_multipliers.get(effective_budget, 1.0)))
3589
+
3590
+ # Run agentic loop - acquire connections only when needed for DB operations
3591
+ # (not held during LLM calls which can be slow)
3592
+ pool = await self._get_pool()
3593
+
3594
+ # Get bank stats for freshness info
3595
+ bank_stats = await self.get_bank_stats(bank_id, request_context=request_context)
3596
+ last_consolidated_at = bank_stats.last_consolidated_at if hasattr(bank_stats, "last_consolidated_at") else None
3597
+ pending_consolidation = bank_stats.pending_consolidation if hasattr(bank_stats, "pending_consolidation") else 0
3598
+
3599
+ # Create tool callbacks that acquire connections only when needed
3600
+ from .retain import embedding_utils
3601
+
3602
+ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any]:
3603
+ # Generate embedding for the query
3604
+ embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, [q])
3605
+ query_embedding = embeddings[0]
3606
+ async with pool.acquire() as conn:
3607
+ return await tool_search_mental_models(
3608
+ conn,
3609
+ bank_id,
3610
+ q,
3611
+ query_embedding,
3612
+ max_results=max_results,
3613
+ tags=tags,
3614
+ tags_match=tags_match,
3615
+ exclude_ids=exclude_mental_model_ids,
3616
+ )
3617
+
3618
+ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
3619
+ return await tool_search_observations(
3620
+ self,
3621
+ bank_id,
3622
+ q,
3623
+ request_context,
3624
+ max_tokens=max_tokens,
3471
3625
  tags=tags,
3472
3626
  tags_match=tags_match,
3627
+ last_consolidated_at=last_consolidated_at,
3628
+ pending_consolidation=pending_consolidation,
3473
3629
  )
3474
- recall_time = time.time() - recall_start
3475
3630
 
3476
- all_results = search_result.results
3631
+ async def recall_fn(q: str, max_tokens: int = 4096) -> dict[str, Any]:
3632
+ return await tool_recall(
3633
+ self, bank_id, q, request_context, max_tokens=max_tokens, tags=tags, tags_match=tags_match
3634
+ )
3477
3635
 
3478
- # Split results by fact type for structured response
3479
- agent_results = [r for r in all_results if r.fact_type == "experience"]
3480
- world_results = [r for r in all_results if r.fact_type == "world"]
3481
- opinion_results = [r for r in all_results if r.fact_type == "opinion"]
3636
+ async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
3637
+ async with pool.acquire() as conn:
3638
+ return await tool_expand(conn, bank_id, memory_ids, depth)
3482
3639
 
3483
- log_buffer.append(
3484
- f"[REFLECT {reflect_id}] Recall: {len(all_results)} facts (experience={len(agent_results)}, world={len(world_results)}, opinion={len(opinion_results)}) in {recall_time:.3f}s"
3640
+ # Load directives from the dedicated directives table
3641
+ # Directives are hard rules that must be followed in all responses
3642
+ directives_raw = await self.list_directives(
3643
+ bank_id=bank_id,
3644
+ tags=tags,
3645
+ tags_match=tags_match,
3646
+ active_only=True,
3647
+ request_context=request_context,
3485
3648
  )
3649
+ # Convert directive format to the expected format for reflect agent
3650
+ # The agent expects: name, description (optional), observations (list of {title, content})
3651
+ directives = [
3652
+ {
3653
+ "name": d["name"],
3654
+ "description": d["content"], # Use content as description
3655
+ "observations": [], # Directives use content directly, not observations
3656
+ }
3657
+ for d in directives_raw
3658
+ ]
3659
+ if directives:
3660
+ logger.info(f"[REFLECT {reflect_id}] Loaded {len(directives)} directives")
3486
3661
 
3487
- # Format facts for LLM
3488
- agent_facts_text = think_utils.format_facts_for_prompt(agent_results)
3489
- world_facts_text = think_utils.format_facts_for_prompt(world_results)
3490
- opinion_facts_text = think_utils.format_facts_for_prompt(opinion_results)
3662
+ # Check if the bank has any mental models
3663
+ async with pool.acquire() as conn:
3664
+ mental_model_count = await conn.fetchval(
3665
+ f"SELECT COUNT(*) FROM {fq_table('mental_models')} WHERE bank_id = $1",
3666
+ bank_id,
3667
+ )
3668
+ has_mental_models = mental_model_count > 0
3669
+ if has_mental_models:
3670
+ logger.info(f"[REFLECT {reflect_id}] Bank has {mental_model_count} mental models")
3491
3671
 
3492
- # Get bank profile (name, disposition + background)
3493
- profile = await self.get_bank_profile(bank_id, request_context=request_context)
3494
- name = profile["name"]
3495
- disposition = profile["disposition"] # Typed as DispositionTraits
3496
- background = profile["background"]
3497
-
3498
- # Build the prompt
3499
- prompt = think_utils.build_think_prompt(
3500
- agent_facts_text=agent_facts_text,
3501
- world_facts_text=world_facts_text,
3502
- opinion_facts_text=opinion_facts_text,
3672
+ # Run the agent
3673
+ agent_result = await run_reflect_agent(
3674
+ llm_config=self._reflect_llm_config,
3675
+ bank_id=bank_id,
3503
3676
  query=query,
3504
- name=name,
3505
- disposition=disposition,
3506
- background=background,
3677
+ bank_profile=profile,
3678
+ search_mental_models_fn=search_mental_models_fn,
3679
+ search_observations_fn=search_observations_fn,
3680
+ recall_fn=recall_fn,
3681
+ expand_fn=expand_fn,
3507
3682
  context=context,
3683
+ max_iterations=max_iterations,
3684
+ max_tokens=max_tokens,
3685
+ response_schema=response_schema,
3686
+ directives=directives,
3687
+ has_mental_models=has_mental_models,
3688
+ budget=effective_budget,
3508
3689
  )
3509
3690
 
3510
- log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
3511
-
3512
- system_message = think_utils.get_system_message(disposition)
3513
- messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
3514
-
3515
- # Prepare response_format if schema provided
3516
- response_format = None
3517
- if response_schema is not None:
3518
- # Wrapper class to provide Pydantic-like interface for raw JSON schemas
3519
- class JsonSchemaWrapper:
3520
- def __init__(self, schema: dict):
3521
- self._schema = schema
3522
-
3523
- def model_json_schema(self):
3524
- return self._schema
3525
-
3526
- response_format = JsonSchemaWrapper(response_schema)
3527
-
3528
- llm_start = time.time()
3529
- llm_result, usage = await self._reflect_llm_config.call(
3530
- messages=messages,
3531
- scope="memory_reflect",
3532
- max_completion_tokens=max_tokens,
3533
- response_format=response_format,
3534
- skip_validation=True if response_format else False,
3535
- # Don't enforce strict_schema - not all providers support it and may retry forever
3536
- # Soft enforcement (schema in prompt + json_object mode) is sufficient
3537
- strict_schema=False,
3538
- return_usage=True,
3691
+ total_time = time.time() - reflect_start
3692
+ logger.info(
3693
+ f"[REFLECT {reflect_id}] Complete: {len(agent_result.text)} chars, "
3694
+ f"{agent_result.iterations} iterations, {agent_result.tools_called} tool calls | {total_time:.3f}s"
3539
3695
  )
3540
- llm_time = time.time() - llm_start
3541
3696
 
3542
- # Handle response based on whether structured output was requested
3543
- if response_schema is not None:
3544
- structured_output = llm_result
3545
- answer_text = "" # Empty for backward compatibility
3546
- log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
3547
- else:
3548
- structured_output = None
3549
- answer_text = llm_result.strip()
3697
+ # Convert agent tool trace to ToolCallTrace objects
3698
+ tool_trace_result = [
3699
+ ToolCallTrace(
3700
+ tool=tc.tool,
3701
+ reason=tc.reason,
3702
+ input=tc.input,
3703
+ output=tc.output,
3704
+ duration_ms=tc.duration_ms,
3705
+ iteration=tc.iteration,
3706
+ )
3707
+ for tc in agent_result.tool_trace
3708
+ ]
3550
3709
 
3551
- # Submit form_opinion task for background processing
3552
- # Pass tenant_id from request context for internal authentication in background task
3553
- await self._task_backend.submit_task(
3554
- {
3555
- "type": "form_opinion",
3556
- "bank_id": bank_id,
3557
- "answer_text": answer_text,
3558
- "query": query,
3559
- "tenant_id": getattr(request_context, "tenant_id", None) if request_context else None,
3560
- }
3561
- )
3710
+ # Convert agent LLM trace to LLMCallTrace objects
3711
+ llm_trace_result = [LLMCallTrace(scope=lc.scope, duration_ms=lc.duration_ms) for lc in agent_result.llm_trace]
3712
+
3713
+ # Extract memories from recall tool outputs - only include memories the agent actually used
3714
+ # agent_result.used_memory_ids contains validated IDs from the done action
3715
+ used_memory_ids_set = set(agent_result.used_memory_ids) if agent_result.used_memory_ids else set()
3716
+ based_on: dict[str, list[MemoryFact]] = {"world": [], "experience": [], "opinion": [], "observation": []}
3717
+ seen_memory_ids: set[str] = set()
3718
+ for tc in agent_result.tool_trace:
3719
+ if tc.tool == "recall" and "memories" in tc.output:
3720
+ for memory_data in tc.output["memories"]:
3721
+ memory_id = memory_data.get("id")
3722
+ # Only include memories that the agent declared as used (or all if none specified)
3723
+ if memory_id and memory_id not in seen_memory_ids:
3724
+ if used_memory_ids_set and memory_id not in used_memory_ids_set:
3725
+ continue # Skip memories not actually used by the agent
3726
+ seen_memory_ids.add(memory_id)
3727
+ fact_type = memory_data.get("type", "world")
3728
+ if fact_type in based_on:
3729
+ based_on[fact_type].append(
3730
+ MemoryFact(
3731
+ id=memory_id,
3732
+ text=memory_data.get("text", ""),
3733
+ fact_type=fact_type,
3734
+ context=None,
3735
+ occurred_start=memory_data.get("occurred"),
3736
+ occurred_end=memory_data.get("occurred"),
3737
+ )
3738
+ )
3562
3739
 
3563
- total_time = time.time() - reflect_start
3564
- log_buffer.append(
3565
- f"[REFLECT {reflect_id}] Complete: {len(answer_text)} chars response, LLM {llm_time:.3f}s, total {total_time:.3f}s"
3740
+ # Extract mental models from tool outputs - only include models the agent actually used
3741
+ # agent_result.used_mental_model_ids contains validated IDs from the done action
3742
+ used_model_ids_set = set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
3743
+ based_on["mental-models"] = []
3744
+ seen_model_ids: set[str] = set()
3745
+ for tc in agent_result.tool_trace:
3746
+ if tc.tool == "get_mental_model":
3747
+ # Single model lookup (with full details)
3748
+ if tc.output.get("found") and "model" in tc.output:
3749
+ model = tc.output["model"]
3750
+ model_id = model.get("id")
3751
+ if model_id and model_id not in seen_model_ids:
3752
+ # Only include models that the agent declared as used (or all if none specified)
3753
+ if used_model_ids_set and model_id not in used_model_ids_set:
3754
+ continue # Skip models not actually used by the agent
3755
+ seen_model_ids.add(model_id)
3756
+ # Add to based_on as MemoryFact with type "mental-models"
3757
+ model_name = model.get("name", "")
3758
+ model_summary = model.get("summary") or model.get("description", "")
3759
+ based_on["mental-models"].append(
3760
+ MemoryFact(
3761
+ id=model_id,
3762
+ text=f"{model_name}: {model_summary}",
3763
+ fact_type="mental-models",
3764
+ context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
3765
+ occurred_start=None,
3766
+ occurred_end=None,
3767
+ )
3768
+ )
3769
+ elif tc.tool == "search_mental_models":
3770
+ # Search mental models - include all returned models (filtered by used_model_ids_set if specified)
3771
+ for model in tc.output.get("mental_models", []):
3772
+ model_id = model.get("id")
3773
+ if model_id and model_id not in seen_model_ids:
3774
+ # Only include models that the agent declared as used (or all if none specified)
3775
+ if used_model_ids_set and model_id not in used_model_ids_set:
3776
+ continue # Skip models not actually used by the agent
3777
+ seen_model_ids.add(model_id)
3778
+ # Add to based_on as MemoryFact with type "mental-models"
3779
+ model_name = model.get("name", "")
3780
+ model_summary = model.get("summary") or model.get("description", "")
3781
+ based_on["mental-models"].append(
3782
+ MemoryFact(
3783
+ id=model_id,
3784
+ text=f"{model_name}: {model_summary}",
3785
+ fact_type="mental-models",
3786
+ context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
3787
+ occurred_start=None,
3788
+ occurred_end=None,
3789
+ )
3790
+ )
3791
+ elif tc.tool == "search_mental_models":
3792
+ # Search mental models - include all returned mental models (filtered by used_mental_model_ids_set if specified)
3793
+ used_mental_model_ids_set = (
3794
+ set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
3795
+ )
3796
+ for mental_model in tc.output.get("mental_models", []):
3797
+ mental_model_id = mental_model.get("id")
3798
+ if mental_model_id and mental_model_id not in seen_model_ids:
3799
+ # Only include mental models that the agent declared as used (or all if none specified)
3800
+ if used_mental_model_ids_set and mental_model_id not in used_mental_model_ids_set:
3801
+ continue # Skip mental models not actually used by the agent
3802
+ seen_model_ids.add(mental_model_id)
3803
+ # Add to based_on as MemoryFact with type "mental-models" (mental models are synthesized knowledge)
3804
+ mental_model_name = mental_model.get("name", "")
3805
+ mental_model_content = mental_model.get("content", "")
3806
+ based_on["mental-models"].append(
3807
+ MemoryFact(
3808
+ id=mental_model_id,
3809
+ text=f"{mental_model_name}: {mental_model_content}",
3810
+ fact_type="mental-models",
3811
+ context="mental model (user-curated)",
3812
+ occurred_start=None,
3813
+ occurred_end=None,
3814
+ )
3815
+ )
3816
+ # List all models lookup - don't add to based_on (too verbose, just a listing)
3817
+
3818
+ # Add directives to based_on["mental-models"] (they are mental models with subtype='directive')
3819
+ for directive in directives:
3820
+ # Extract summary from observations
3821
+ summary_parts: list[str] = []
3822
+ for obs in directive.get("observations", []):
3823
+ # Support both Pydantic Observation objects and dicts
3824
+ if hasattr(obs, "content"):
3825
+ content = obs.content
3826
+ title = obs.title
3827
+ else:
3828
+ content = obs.get("content", "")
3829
+ title = obs.get("title", "")
3830
+ if title and content:
3831
+ summary_parts.append(f"{title}: {content}")
3832
+ elif content:
3833
+ summary_parts.append(content)
3834
+
3835
+ # Fallback to description if no observations
3836
+ if not summary_parts and directive.get("description"):
3837
+ summary_parts.append(directive["description"])
3838
+
3839
+ directive_name = directive.get("name", "")
3840
+ directive_summary = "; ".join(summary_parts) if summary_parts else ""
3841
+ based_on["mental-models"].append(
3842
+ MemoryFact(
3843
+ id=directive.get("id", ""),
3844
+ text=f"{directive_name}: {directive_summary}",
3845
+ fact_type="mental-models",
3846
+ context="directive (directive)",
3847
+ occurred_start=None,
3848
+ occurred_end=None,
3849
+ )
3850
+ )
3851
+
3852
+ # Build directives_applied from agent result
3853
+ from hindsight_api.engine.response_models import DirectiveRef
3854
+
3855
+ directives_applied_result = [
3856
+ DirectiveRef(id=d.id, name=d.name, content=d.content) for d in agent_result.directives_applied
3857
+ ]
3858
+
3859
+ # Convert agent usage to TokenUsage format
3860
+ from hindsight_api.engine.response_models import TokenUsage
3861
+
3862
+ usage = TokenUsage(
3863
+ input_tokens=agent_result.usage.input_tokens,
3864
+ output_tokens=agent_result.usage.output_tokens,
3865
+ total_tokens=agent_result.usage.total_tokens,
3566
3866
  )
3567
- logger.info("\n" + "\n".join(log_buffer))
3568
3867
 
3569
- # Return response with facts split by type
3868
+ # Return response (compatible with existing API)
3570
3869
  result = ReflectResult(
3571
- text=answer_text,
3572
- based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
3573
- new_opinions=[], # Opinions are being extracted asynchronously
3574
- structured_output=structured_output,
3870
+ text=agent_result.text,
3871
+ based_on=based_on,
3872
+ new_opinions=[], # Learnings stored as mental models
3873
+ structured_output=agent_result.structured_output,
3575
3874
  usage=usage,
3875
+ tool_trace=tool_trace_result,
3876
+ llm_trace=llm_trace_result,
3877
+ directives_applied=directives_applied_result,
3576
3878
  )
3577
3879
 
3578
3880
  # Call post-operation hook if validator is configured
@@ -3596,50 +3898,6 @@ Guidelines:
3596
3898
 
3597
3899
  return result
3598
3900
 
3599
- async def _extract_and_store_opinions_async(
3600
- self, bank_id: str, answer_text: str, query: str, tenant_id: str | None = None
3601
- ):
3602
- """
3603
- Background task to extract and store opinions from think response.
3604
-
3605
- This runs asynchronously and does not block the think response.
3606
-
3607
- Args:
3608
- bank_id: bank IDentifier
3609
- answer_text: The generated answer text
3610
- query: The original query
3611
- tenant_id: Tenant identifier for internal authentication
3612
- """
3613
- try:
3614
- # Extract opinions from the answer
3615
- new_opinions = await think_utils.extract_opinions_from_text(
3616
- self._reflect_llm_config, text=answer_text, query=query
3617
- )
3618
-
3619
- # Store new opinions
3620
- if new_opinions:
3621
- from datetime import datetime
3622
-
3623
- current_time = datetime.now(UTC)
3624
- # Use internal context with tenant_id for background authentication
3625
- # Extension can check internal=True to bypass normal auth
3626
- from hindsight_api.models import RequestContext
3627
-
3628
- internal_context = RequestContext(tenant_id=tenant_id, internal=True)
3629
- for opinion in new_opinions:
3630
- await self.retain_async(
3631
- bank_id=bank_id,
3632
- content=opinion.opinion,
3633
- context=f"formed during thinking about: {query}",
3634
- event_date=current_time,
3635
- fact_type_override="opinion",
3636
- confidence_score=opinion.confidence,
3637
- request_context=internal_context,
3638
- )
3639
-
3640
- except Exception as e:
3641
- logger.warning(f"[REFLECT] Failed to extract/store opinions: {str(e)}")
3642
-
3643
3901
  async def get_entity_observations(
3644
3902
  self,
3645
3903
  bank_id: str,
@@ -3649,99 +3907,22 @@ Guidelines:
3649
3907
  request_context: "RequestContext",
3650
3908
  ) -> list[Any]:
3651
3909
  """
3652
- Get observations linked to an entity.
3653
-
3654
- Args:
3655
- bank_id: bank IDentifier
3656
- entity_id: Entity UUID to get observations for
3657
- limit: Maximum number of observations to return
3658
- request_context: Request context for authentication.
3659
-
3660
- Returns:
3661
- List of EntityObservation objects
3662
- """
3663
- await self._authenticate_tenant(request_context)
3664
- pool = await self._get_pool()
3665
- async with acquire_with_retry(pool) as conn:
3666
- rows = await conn.fetch(
3667
- f"""
3668
- SELECT mu.text, mu.mentioned_at
3669
- FROM {fq_table("memory_units")} mu
3670
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3671
- WHERE mu.bank_id = $1
3672
- AND mu.fact_type = 'observation'
3673
- AND ue.entity_id = $2
3674
- ORDER BY mu.mentioned_at DESC
3675
- LIMIT $3
3676
- """,
3677
- bank_id,
3678
- uuid.UUID(entity_id),
3679
- limit,
3680
- )
3681
-
3682
- observations = []
3683
- for row in rows:
3684
- mentioned_at = row["mentioned_at"].isoformat() if row["mentioned_at"] else None
3685
- observations.append(EntityObservation(text=row["text"], mentioned_at=mentioned_at))
3686
- return observations
3910
+ Get observations for an entity.
3687
3911
 
3688
- async def get_entity_observations_batch(
3689
- self,
3690
- bank_id: str,
3691
- entity_ids: list[str],
3692
- *,
3693
- limit_per_entity: int = 5,
3694
- request_context: "RequestContext",
3695
- ) -> dict[str, list[Any]]:
3696
- """
3697
- Get observations for multiple entities in a single query.
3912
+ NOTE: Entity observations/summaries have been moved to mental models.
3913
+ This method returns an empty list. Use mental models for entity summaries.
3698
3914
 
3699
3915
  Args:
3700
3916
  bank_id: bank IDentifier
3701
- entity_ids: List of entity UUIDs to get observations for
3702
- limit_per_entity: Maximum observations per entity
3917
+ entity_id: Entity UUID to get observations for
3918
+ limit: Ignored (kept for backwards compatibility)
3703
3919
  request_context: Request context for authentication.
3704
3920
 
3705
3921
  Returns:
3706
- Dict mapping entity_id -> list of EntityObservation objects
3922
+ Empty list (observations now in mental models)
3707
3923
  """
3708
- if not entity_ids:
3709
- return {}
3710
-
3711
3924
  await self._authenticate_tenant(request_context)
3712
- pool = await self._get_pool()
3713
- async with acquire_with_retry(pool) as conn:
3714
- # Use window function to limit observations per entity
3715
- rows = await conn.fetch(
3716
- f"""
3717
- WITH ranked AS (
3718
- SELECT
3719
- ue.entity_id,
3720
- mu.text,
3721
- mu.mentioned_at,
3722
- ROW_NUMBER() OVER (PARTITION BY ue.entity_id ORDER BY mu.mentioned_at DESC) as rn
3723
- FROM {fq_table("memory_units")} mu
3724
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3725
- WHERE mu.bank_id = $1
3726
- AND mu.fact_type = 'observation'
3727
- AND ue.entity_id = ANY($2::uuid[])
3728
- )
3729
- SELECT entity_id, text, mentioned_at
3730
- FROM ranked
3731
- WHERE rn <= $3
3732
- ORDER BY entity_id, rn
3733
- """,
3734
- bank_id,
3735
- [uuid.UUID(eid) for eid in entity_ids],
3736
- limit_per_entity,
3737
- )
3738
-
3739
- result: dict[str, list[Any]] = {eid: [] for eid in entity_ids}
3740
- for row in rows:
3741
- entity_id = str(row["entity_id"])
3742
- mentioned_at = row["mentioned_at"].isoformat() if row["mentioned_at"] else None
3743
- result[entity_id].append(EntityObservation(text=row["text"], mentioned_at=mentioned_at))
3744
- return result
3925
+ return []
3745
3926
 
3746
3927
  async def list_entities(
3747
3928
  self,
@@ -3783,7 +3964,7 @@ Guidelines:
3783
3964
  SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
3784
3965
  FROM {fq_table("entities")}
3785
3966
  WHERE bank_id = $1
3786
- ORDER BY mention_count DESC, last_seen DESC
3967
+ ORDER BY mention_count DESC, last_seen DESC, id ASC
3787
3968
  LIMIT $2 OFFSET $3
3788
3969
  """,
3789
3970
  bank_id,
@@ -3911,22 +4092,23 @@ Guidelines:
3911
4092
  request_context: "RequestContext",
3912
4093
  ) -> EntityState:
3913
4094
  """
3914
- Get the current state (mental model) of an entity.
4095
+ Get the current state of an entity.
4096
+
4097
+ NOTE: Entity observations/summaries have been moved to mental models.
4098
+ This method returns an entity with empty observations.
3915
4099
 
3916
4100
  Args:
3917
4101
  bank_id: bank IDentifier
3918
4102
  entity_id: Entity UUID
3919
4103
  entity_name: Canonical name of the entity
3920
- limit: Maximum number of observations to include
4104
+ limit: Maximum number of observations to include (kept for backwards compat)
3921
4105
  request_context: Request context for authentication.
3922
4106
 
3923
4107
  Returns:
3924
- EntityState with observations
4108
+ EntityState with empty observations (summaries now in mental models)
3925
4109
  """
3926
- observations = await self.get_entity_observations(
3927
- bank_id, entity_id, limit=limit, request_context=request_context
3928
- )
3929
- return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=observations)
4110
+ await self._authenticate_tenant(request_context)
4111
+ return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
3930
4112
 
3931
4113
  async def regenerate_entity_observations(
3932
4114
  self,
@@ -3937,535 +4119,1228 @@ Guidelines:
3937
4119
  version: str | None = None,
3938
4120
  conn=None,
3939
4121
  request_context: "RequestContext",
3940
- ) -> None:
4122
+ ) -> list[str]:
3941
4123
  """
3942
- Regenerate observations for an entity by:
3943
- 1. Checking version for deduplication (if provided)
3944
- 2. Searching all facts mentioning the entity
3945
- 3. Using LLM to synthesize observations (no personality)
3946
- 4. Deleting old observations for this entity
3947
- 5. Storing new observations linked to the entity
4124
+ Regenerate observations for an entity.
4125
+
4126
+ NOTE: Entity observations/summaries have been moved to mental models.
4127
+ This method is now a no-op and returns an empty list.
3948
4128
 
3949
4129
  Args:
3950
4130
  bank_id: bank IDentifier
3951
4131
  entity_id: Entity UUID
3952
4132
  entity_name: Canonical name of the entity
3953
4133
  version: Entity's last_seen timestamp when task was created (for deduplication)
3954
- conn: Optional database connection (for transactional atomicity with caller)
4134
+ conn: Optional database connection (ignored)
3955
4135
  request_context: Request context for authentication.
4136
+
4137
+ Returns:
4138
+ Empty list (observations now in mental models)
3956
4139
  """
3957
4140
  await self._authenticate_tenant(request_context)
3958
- pool = await self._get_pool()
3959
- entity_uuid = uuid.UUID(entity_id)
4141
+ return []
3960
4142
 
3961
- # Helper to run a query with provided conn or acquire one
3962
- async def fetch_with_conn(query, *args):
3963
- if conn is not None:
3964
- return await conn.fetch(query, *args)
3965
- else:
3966
- async with acquire_with_retry(pool) as acquired_conn:
3967
- return await acquired_conn.fetch(query, *args)
4143
+ # =========================================================================
4144
+ # Statistics & Operations (for HTTP API layer)
4145
+ # =========================================================================
3968
4146
 
3969
- async def fetchval_with_conn(query, *args):
3970
- if conn is not None:
3971
- return await conn.fetchval(query, *args)
3972
- else:
3973
- async with acquire_with_retry(pool) as acquired_conn:
3974
- return await acquired_conn.fetchval(query, *args)
4147
+ async def get_bank_stats(
4148
+ self,
4149
+ bank_id: str,
4150
+ *,
4151
+ request_context: "RequestContext",
4152
+ ) -> dict[str, Any]:
4153
+ """Get statistics about memory nodes and links for a bank."""
4154
+ await self._authenticate_tenant(request_context)
4155
+ pool = await self._get_pool()
3975
4156
 
3976
- # Step 1: Check version for deduplication
3977
- if version:
3978
- current_last_seen = await fetchval_with_conn(
4157
+ async with acquire_with_retry(pool) as conn:
4158
+ # Get node counts by fact_type
4159
+ node_stats = await conn.fetch(
3979
4160
  f"""
3980
- SELECT last_seen
3981
- FROM {fq_table("entities")}
3982
- WHERE id = $1 AND bank_id = $2
4161
+ SELECT fact_type, COUNT(*) as count
4162
+ FROM {fq_table("memory_units")}
4163
+ WHERE bank_id = $1
4164
+ GROUP BY fact_type
3983
4165
  """,
3984
- entity_uuid,
3985
4166
  bank_id,
3986
4167
  )
3987
4168
 
3988
- if current_last_seen and current_last_seen.isoformat() != version:
3989
- return []
3990
-
3991
- # Step 2: Get all facts mentioning this entity (exclude observations themselves)
3992
- rows = await fetch_with_conn(
3993
- f"""
3994
- SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
3995
- FROM {fq_table("memory_units")} mu
3996
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
3997
- WHERE mu.bank_id = $1
3998
- AND ue.entity_id = $2
3999
- AND mu.fact_type IN ('world', 'experience')
4000
- ORDER BY mu.occurred_start DESC
4001
- LIMIT 50
4002
- """,
4003
- bank_id,
4004
- entity_uuid,
4005
- )
4006
-
4007
- if not rows:
4008
- return []
4009
-
4010
- # Convert to MemoryFact objects for the observation extraction
4011
- facts = []
4012
- for row in rows:
4013
- occurred_start = row["occurred_start"].isoformat() if row["occurred_start"] else None
4014
- facts.append(
4015
- MemoryFact(
4016
- id=str(row["id"]),
4017
- text=row["text"],
4018
- fact_type=row["fact_type"],
4019
- context=row["context"],
4020
- occurred_start=occurred_start,
4021
- )
4022
- )
4023
-
4024
- # Step 3: Extract observations using LLM (no personality)
4025
- observations = await observation_utils.extract_observations_from_facts(
4026
- self._reflect_llm_config, entity_name, facts
4027
- )
4028
-
4029
- if not observations:
4030
- return []
4031
-
4032
- # Step 4: Delete old observations and insert new ones
4033
- # If conn provided, we're already in a transaction - don't start another
4034
- # If conn is None, acquire one and start a transaction
4035
- async def do_db_operations(db_conn):
4036
- # Delete old observations for this entity
4037
- await db_conn.execute(
4169
+ # Get link counts by link_type
4170
+ link_stats = await conn.fetch(
4038
4171
  f"""
4039
- DELETE FROM {fq_table("memory_units")}
4040
- WHERE id IN (
4041
- SELECT mu.id
4042
- FROM {fq_table("memory_units")} mu
4043
- JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
4044
- WHERE mu.bank_id = $1
4045
- AND mu.fact_type = 'observation'
4046
- AND ue.entity_id = $2
4047
- )
4172
+ SELECT ml.link_type, COUNT(*) as count
4173
+ FROM {fq_table("memory_links")} ml
4174
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4175
+ WHERE mu.bank_id = $1
4176
+ GROUP BY ml.link_type
4048
4177
  """,
4049
4178
  bank_id,
4050
- entity_uuid,
4051
4179
  )
4052
4180
 
4053
- # Generate embeddings for new observations
4054
- embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, observations)
4181
+ # Get link counts by fact_type (from nodes)
4182
+ link_fact_type_stats = await conn.fetch(
4183
+ f"""
4184
+ SELECT mu.fact_type, COUNT(*) as count
4185
+ FROM {fq_table("memory_links")} ml
4186
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4187
+ WHERE mu.bank_id = $1
4188
+ GROUP BY mu.fact_type
4189
+ """,
4190
+ bank_id,
4191
+ )
4055
4192
 
4056
- # Insert new observations
4057
- current_time = utcnow()
4058
- created_ids = []
4193
+ # Get link counts by fact_type AND link_type
4194
+ link_breakdown_stats = await conn.fetch(
4195
+ f"""
4196
+ SELECT mu.fact_type, ml.link_type, COUNT(*) as count
4197
+ FROM {fq_table("memory_links")} ml
4198
+ JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4199
+ WHERE mu.bank_id = $1
4200
+ GROUP BY mu.fact_type, ml.link_type
4201
+ """,
4202
+ bank_id,
4203
+ )
4059
4204
 
4060
- for obs_text, embedding in zip(observations, embeddings):
4061
- result = await db_conn.fetchrow(
4062
- f"""
4063
- INSERT INTO {fq_table("memory_units")} (
4064
- bank_id, text, embedding, context, event_date,
4065
- occurred_start, occurred_end, mentioned_at,
4066
- fact_type, access_count
4067
- )
4068
- VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
4069
- RETURNING id
4070
- """,
4071
- bank_id,
4072
- obs_text,
4073
- str(embedding),
4074
- f"observation about {entity_name}",
4075
- current_time,
4076
- current_time,
4077
- current_time,
4078
- current_time,
4079
- )
4080
- obs_id = str(result["id"])
4081
- created_ids.append(obs_id)
4205
+ # Get pending and failed operations counts
4206
+ ops_stats = await conn.fetch(
4207
+ f"""
4208
+ SELECT status, COUNT(*) as count
4209
+ FROM {fq_table("async_operations")}
4210
+ WHERE bank_id = $1
4211
+ GROUP BY status
4212
+ """,
4213
+ bank_id,
4214
+ )
4082
4215
 
4083
- # Link observation to entity
4084
- await db_conn.execute(
4085
- f"""
4086
- INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
4087
- VALUES ($1, $2)
4088
- """,
4089
- uuid.UUID(obs_id),
4090
- entity_uuid,
4216
+ return {
4217
+ "bank_id": bank_id,
4218
+ "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
4219
+ "link_counts": {row["link_type"]: row["count"] for row in link_stats},
4220
+ "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
4221
+ "link_breakdown": [
4222
+ {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
4223
+ for row in link_breakdown_stats
4224
+ ],
4225
+ "operations": {row["status"]: row["count"] for row in ops_stats},
4226
+ }
4227
+
4228
+ async def get_entity(
4229
+ self,
4230
+ bank_id: str,
4231
+ entity_id: str,
4232
+ *,
4233
+ request_context: "RequestContext",
4234
+ ) -> dict[str, Any] | None:
4235
+ """Get entity details including metadata and observations."""
4236
+ await self._authenticate_tenant(request_context)
4237
+ pool = await self._get_pool()
4238
+
4239
+ async with acquire_with_retry(pool) as conn:
4240
+ entity_row = await conn.fetchrow(
4241
+ f"""
4242
+ SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
4243
+ FROM {fq_table("entities")}
4244
+ WHERE bank_id = $1 AND id = $2
4245
+ """,
4246
+ bank_id,
4247
+ uuid.UUID(entity_id),
4248
+ )
4249
+
4250
+ if not entity_row:
4251
+ return None
4252
+
4253
+ # Get observations for the entity
4254
+ observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
4255
+
4256
+ return {
4257
+ "id": str(entity_row["id"]),
4258
+ "canonical_name": entity_row["canonical_name"],
4259
+ "mention_count": entity_row["mention_count"],
4260
+ "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
4261
+ "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
4262
+ "metadata": entity_row["metadata"] or {},
4263
+ "observations": observations,
4264
+ }
4265
+
4266
+ def _parse_observations(self, observations_raw: list):
4267
+ """Parse raw observation dicts into typed Observation models.
4268
+
4269
+ Returns list of Observation models with computed trend/evidence_span/evidence_count.
4270
+ """
4271
+ from .reflect.observations import Observation, ObservationEvidence
4272
+
4273
+ observations: list[Observation] = []
4274
+ for obs in observations_raw:
4275
+ if not isinstance(obs, dict):
4276
+ continue
4277
+
4278
+ try:
4279
+ parsed = Observation(
4280
+ title=obs.get("title", ""),
4281
+ content=obs.get("content", ""),
4282
+ evidence=[
4283
+ ObservationEvidence(
4284
+ memory_id=ev.get("memory_id", ""),
4285
+ quote=ev.get("quote", ""),
4286
+ relevance=ev.get("relevance", ""),
4287
+ timestamp=ev.get("timestamp"),
4288
+ )
4289
+ for ev in obs.get("evidence", [])
4290
+ if isinstance(ev, dict)
4291
+ ],
4292
+ created_at=obs.get("created_at"),
4091
4293
  )
4294
+ observations.append(parsed)
4295
+ except Exception as e:
4296
+ logger.warning(f"Failed to parse observation: {e}")
4297
+ continue
4092
4298
 
4093
- return created_ids
4299
+ return observations
4094
4300
 
4095
- if conn is not None:
4096
- # Use provided connection (already in a transaction)
4097
- return await do_db_operations(conn)
4098
- else:
4099
- # Acquire connection and start our own transaction
4100
- async with acquire_with_retry(pool) as acquired_conn:
4101
- async with acquired_conn.transaction():
4102
- return await do_db_operations(acquired_conn)
4301
+ async def _count_memories_since(
4302
+ self,
4303
+ bank_id: str,
4304
+ since_timestamp: str | None,
4305
+ pool=None,
4306
+ ) -> int:
4307
+ """
4308
+ Count memories created after a given timestamp.
4309
+
4310
+ Args:
4311
+ bank_id: Bank identifier
4312
+ since_timestamp: ISO timestamp string. If None, returns total count.
4313
+ pool: Optional database pool (uses default if not provided)
4103
4314
 
4104
- async def _regenerate_observations_sync(
4315
+ Returns:
4316
+ Number of memories created since the timestamp
4317
+ """
4318
+ if pool is None:
4319
+ pool = await self._get_pool()
4320
+
4321
+ async with acquire_with_retry(pool) as conn:
4322
+ if since_timestamp:
4323
+ # Parse the timestamp
4324
+ from datetime import datetime
4325
+
4326
+ try:
4327
+ ts = datetime.fromisoformat(since_timestamp.replace("Z", "+00:00"))
4328
+ except ValueError:
4329
+ # Invalid timestamp, return total count
4330
+ ts = None
4331
+
4332
+ if ts:
4333
+ count = await conn.fetchval(
4334
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND created_at > $2",
4335
+ bank_id,
4336
+ ts,
4337
+ )
4338
+ return count or 0
4339
+
4340
+ # No timestamp or invalid, return total count
4341
+ count = await conn.fetchval(
4342
+ f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1",
4343
+ bank_id,
4344
+ )
4345
+ return count or 0
4346
+
4347
+ async def _invalidate_facts_from_mental_models(
4105
4348
  self,
4349
+ conn,
4106
4350
  bank_id: str,
4107
- entity_ids: list[str],
4108
- min_facts: int | None = None,
4109
- conn=None,
4110
- request_context: "RequestContext | None" = None,
4111
- ) -> None:
4351
+ fact_ids: list[str],
4352
+ ) -> int:
4112
4353
  """
4113
- Regenerate observations for entities synchronously (called during retain).
4354
+ Remove fact IDs from observation source_memory_ids when memories are deleted.
4114
4355
 
4115
- Processes entities in PARALLEL for faster execution.
4356
+ Observations are stored in memory_units with fact_type='observation'
4357
+ and have a source_memory_ids column (UUID[]) tracking their source memories.
4116
4358
 
4117
4359
  Args:
4360
+ conn: Database connection
4118
4361
  bank_id: Bank identifier
4119
- entity_ids: List of entity IDs to process
4120
- min_facts: Minimum facts required to regenerate observations (uses config default if None)
4121
- conn: Optional database connection (for transactional atomicity)
4362
+ fact_ids: List of fact IDs to remove from observations
4363
+
4364
+ Returns:
4365
+ Number of observations updated
4122
4366
  """
4123
- if not bank_id or not entity_ids:
4124
- return
4367
+ if not fact_ids:
4368
+ return 0
4369
+
4370
+ # Convert string IDs to UUIDs for the array comparison
4371
+ import uuid as uuid_module
4372
+
4373
+ fact_uuids = [uuid_module.UUID(fid) for fid in fact_ids]
4374
+
4375
+ # Update observations (memory_units with fact_type='observation')
4376
+ # by removing the deleted fact IDs from source_memory_ids
4377
+ # Use array subtraction: source_memory_ids - deleted_ids
4378
+ result = await conn.execute(
4379
+ f"""
4380
+ UPDATE {fq_table("memory_units")}
4381
+ SET source_memory_ids = (
4382
+ SELECT COALESCE(array_agg(elem), ARRAY[]::uuid[])
4383
+ FROM unnest(source_memory_ids) AS elem
4384
+ WHERE elem != ALL($2::uuid[])
4385
+ ),
4386
+ updated_at = NOW()
4387
+ WHERE bank_id = $1
4388
+ AND fact_type = 'observation'
4389
+ AND source_memory_ids && $2::uuid[]
4390
+ """,
4391
+ bank_id,
4392
+ fact_uuids,
4393
+ )
4394
+
4395
+ # Parse the result to get number of updated rows
4396
+ updated_count = int(result.split()[-1]) if result and "UPDATE" in result else 0
4397
+ if updated_count > 0:
4398
+ logger.info(
4399
+ f"[OBSERVATIONS] Invalidated {len(fact_ids)} fact IDs from {updated_count} observations in bank {bank_id}"
4400
+ )
4401
+ return updated_count
4402
+
4403
+ # =========================================================================
4404
+ # MENTAL MODELS (CONSOLIDATED) - Read-only access to auto-consolidated mental models
4405
+ # =========================================================================
4125
4406
 
4126
- # Use config default if min_facts not specified
4127
- if min_facts is None:
4128
- min_facts = get_config().observation_min_facts
4407
+ async def list_mental_models_consolidated(
4408
+ self,
4409
+ bank_id: str,
4410
+ *,
4411
+ tags: list[str] | None = None,
4412
+ tags_match: str = "any",
4413
+ limit: int = 100,
4414
+ offset: int = 0,
4415
+ request_context: "RequestContext",
4416
+ ) -> list[dict[str, Any]]:
4417
+ """List auto-consolidated observations for a bank.
4129
4418
 
4130
- # Convert to UUIDs
4131
- entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entity_ids]
4419
+ Observations are stored in memory_units with fact_type='observation'.
4420
+ They are automatically created and updated by the consolidation engine.
4132
4421
 
4133
- # Use provided connection or acquire a new one
4134
- if conn is not None:
4135
- # Use the provided connection (transactional with caller)
4136
- entity_rows = await conn.fetch(
4422
+ Args:
4423
+ bank_id: Bank identifier
4424
+ tags: Optional tags to filter by
4425
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4426
+ limit: Maximum number of results
4427
+ offset: Offset for pagination
4428
+ request_context: Request context for authentication
4429
+
4430
+ Returns:
4431
+ List of observation dicts
4432
+ """
4433
+ await self._authenticate_tenant(request_context)
4434
+ pool = await self._get_pool()
4435
+
4436
+ async with acquire_with_retry(pool) as conn:
4437
+ # Build tag filter
4438
+ tag_filter = ""
4439
+ params: list[Any] = [bank_id, limit, offset]
4440
+ if tags:
4441
+ if tags_match == "all":
4442
+ tag_filter = " AND tags @> $4::varchar[]"
4443
+ elif tags_match == "exact":
4444
+ tag_filter = " AND tags = $4::varchar[]"
4445
+ else: # any
4446
+ tag_filter = " AND tags && $4::varchar[]"
4447
+ params.append(tags)
4448
+
4449
+ rows = await conn.fetch(
4137
4450
  f"""
4138
- SELECT id, canonical_name FROM {fq_table("entities")}
4139
- WHERE id = ANY($1) AND bank_id = $2
4451
+ SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
4452
+ FROM {fq_table("memory_units")}
4453
+ WHERE bank_id = $1 AND fact_type = 'observation' {tag_filter}
4454
+ ORDER BY updated_at DESC NULLS LAST
4455
+ LIMIT $2 OFFSET $3
4456
+ """,
4457
+ *params,
4458
+ )
4459
+
4460
+ return [self._row_to_observation_consolidated(row) for row in rows]
4461
+
4462
+ async def get_observation_consolidated(
4463
+ self,
4464
+ bank_id: str,
4465
+ observation_id: str,
4466
+ *,
4467
+ include_source_memories: bool = True,
4468
+ request_context: "RequestContext",
4469
+ ) -> dict[str, Any] | None:
4470
+ """Get a single observation by ID.
4471
+
4472
+ Args:
4473
+ bank_id: Bank identifier
4474
+ observation_id: Observation ID
4475
+ include_source_memories: Whether to include full source memory details
4476
+ request_context: Request context for authentication
4477
+
4478
+ Returns:
4479
+ Observation dict or None if not found
4480
+ """
4481
+ await self._authenticate_tenant(request_context)
4482
+ pool = await self._get_pool()
4483
+
4484
+ async with acquire_with_retry(pool) as conn:
4485
+ row = await conn.fetchrow(
4486
+ f"""
4487
+ SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
4488
+ FROM {fq_table("memory_units")}
4489
+ WHERE bank_id = $1 AND id = $2 AND fact_type = 'observation'
4140
4490
  """,
4141
- entity_uuids,
4142
4491
  bank_id,
4492
+ observation_id,
4493
+ )
4494
+
4495
+ if not row:
4496
+ return None
4497
+
4498
+ result = self._row_to_observation_consolidated(row)
4499
+
4500
+ # Fetch source memories if requested and source_memory_ids exist
4501
+ if include_source_memories and result.get("source_memory_ids"):
4502
+ source_ids = [uuid.UUID(sid) if isinstance(sid, str) else sid for sid in result["source_memory_ids"]]
4503
+ source_rows = await conn.fetch(
4504
+ f"""
4505
+ SELECT id, text, fact_type, context, occurred_start, mentioned_at
4506
+ FROM {fq_table("memory_units")}
4507
+ WHERE id = ANY($1::uuid[])
4508
+ ORDER BY mentioned_at DESC NULLS LAST
4509
+ """,
4510
+ source_ids,
4511
+ )
4512
+ result["source_memories"] = [
4513
+ {
4514
+ "id": str(r["id"]),
4515
+ "text": r["text"],
4516
+ "type": r["fact_type"],
4517
+ "context": r["context"],
4518
+ "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
4519
+ "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
4520
+ }
4521
+ for r in source_rows
4522
+ ]
4523
+
4524
+ return result
4525
+
4526
+ def _row_to_observation_consolidated(self, row: Any) -> dict[str, Any]:
4527
+ """Convert a database row to an observation dict."""
4528
+ import json
4529
+
4530
+ history = row["history"]
4531
+ if isinstance(history, str):
4532
+ history = json.loads(history)
4533
+ elif history is None:
4534
+ history = []
4535
+
4536
+ # Convert source_memory_ids to strings
4537
+ source_memory_ids = row.get("source_memory_ids") or []
4538
+ source_memory_ids = [str(sid) for sid in source_memory_ids]
4539
+
4540
+ return {
4541
+ "id": str(row["id"]),
4542
+ "bank_id": row["bank_id"],
4543
+ "text": row["text"],
4544
+ "proof_count": row["proof_count"] or 1,
4545
+ "history": history,
4546
+ "tags": row["tags"] or [],
4547
+ "source_memory_ids": source_memory_ids,
4548
+ "source_memories": [], # Populated separately when fetching full details
4549
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
4550
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
4551
+ }
4552
+
4553
+ # =========================================================================
4554
+ # MENTAL MODELS CRUD
4555
+ # =========================================================================
4556
+
4557
+ async def list_mental_models(
4558
+ self,
4559
+ bank_id: str,
4560
+ *,
4561
+ tags: list[str] | None = None,
4562
+ tags_match: str = "any",
4563
+ limit: int = 100,
4564
+ offset: int = 0,
4565
+ request_context: "RequestContext",
4566
+ ) -> list[dict[str, Any]]:
4567
+ """List pinned mental models for a bank.
4568
+
4569
+ Args:
4570
+ bank_id: Bank identifier
4571
+ tags: Optional tags to filter by
4572
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4573
+ limit: Maximum number of results
4574
+ offset: Offset for pagination
4575
+ request_context: Request context for authentication
4576
+
4577
+ Returns:
4578
+ List of pinned mental model dicts
4579
+ """
4580
+ await self._authenticate_tenant(request_context)
4581
+ pool = await self._get_pool()
4582
+
4583
+ async with acquire_with_retry(pool) as conn:
4584
+ # Build tag filter
4585
+ tag_filter = ""
4586
+ params: list[Any] = [bank_id, limit, offset]
4587
+ if tags:
4588
+ if tags_match == "all":
4589
+ tag_filter = " AND tags @> $4::varchar[]"
4590
+ elif tags_match == "exact":
4591
+ tag_filter = " AND tags = $4::varchar[]"
4592
+ else: # any
4593
+ tag_filter = " AND tags && $4::varchar[]"
4594
+ params.append(tags)
4595
+
4596
+ rows = await conn.fetch(
4597
+ f"""
4598
+ SELECT id, bank_id, name, source_query, content, tags,
4599
+ last_refreshed_at, created_at, reflect_response,
4600
+ max_tokens, trigger
4601
+ FROM {fq_table("mental_models")}
4602
+ WHERE bank_id = $1 {tag_filter}
4603
+ ORDER BY last_refreshed_at DESC
4604
+ LIMIT $2 OFFSET $3
4605
+ """,
4606
+ *params,
4143
4607
  )
4144
- entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
4145
4608
 
4146
- fact_counts = await conn.fetch(
4609
+ return [self._row_to_mental_model(row) for row in rows]
4610
+
4611
+ async def get_mental_model(
4612
+ self,
4613
+ bank_id: str,
4614
+ mental_model_id: str,
4615
+ *,
4616
+ request_context: "RequestContext",
4617
+ ) -> dict[str, Any] | None:
4618
+ """Get a single pinned mental model by ID.
4619
+
4620
+ Args:
4621
+ bank_id: Bank identifier
4622
+ mental_model_id: Pinned mental model UUID
4623
+ request_context: Request context for authentication
4624
+
4625
+ Returns:
4626
+ Pinned mental model dict or None if not found
4627
+ """
4628
+ await self._authenticate_tenant(request_context)
4629
+ pool = await self._get_pool()
4630
+
4631
+ async with acquire_with_retry(pool) as conn:
4632
+ row = await conn.fetchrow(
4147
4633
  f"""
4148
- SELECT ue.entity_id, COUNT(*) as cnt
4149
- FROM {fq_table("unit_entities")} ue
4150
- JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
4151
- WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
4152
- GROUP BY ue.entity_id
4634
+ SELECT id, bank_id, name, source_query, content, tags,
4635
+ last_refreshed_at, created_at, reflect_response,
4636
+ max_tokens, trigger
4637
+ FROM {fq_table("mental_models")}
4638
+ WHERE bank_id = $1 AND id = $2
4153
4639
  """,
4154
- entity_uuids,
4155
4640
  bank_id,
4641
+ mental_model_id,
4156
4642
  )
4157
- entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
4158
- else:
4159
- # Acquire a new connection (standalone call)
4160
- pool = await self._get_pool()
4161
- async with pool.acquire() as acquired_conn:
4162
- entity_rows = await acquired_conn.fetch(
4643
+
4644
+ return self._row_to_mental_model(row) if row else None
4645
+
4646
+ async def create_mental_model(
4647
+ self,
4648
+ bank_id: str,
4649
+ name: str,
4650
+ source_query: str,
4651
+ content: str,
4652
+ *,
4653
+ mental_model_id: str | None = None,
4654
+ tags: list[str] | None = None,
4655
+ max_tokens: int | None = None,
4656
+ trigger: dict[str, Any] | None = None,
4657
+ request_context: "RequestContext",
4658
+ ) -> dict[str, Any]:
4659
+ """Create a new pinned mental model.
4660
+
4661
+ Args:
4662
+ bank_id: Bank identifier
4663
+ name: Human-readable name for the mental model
4664
+ source_query: The query that generated this mental model
4665
+ content: The synthesized content
4666
+ mental_model_id: Optional UUID for the mental model (auto-generated if not provided)
4667
+ tags: Optional tags for scoped visibility
4668
+ max_tokens: Token limit for content generation during refresh
4669
+ trigger: Trigger settings (e.g., refresh_after_consolidation)
4670
+ request_context: Request context for authentication
4671
+
4672
+ Returns:
4673
+ The created pinned mental model dict
4674
+ """
4675
+ await self._authenticate_tenant(request_context)
4676
+ pool = await self._get_pool()
4677
+
4678
+ # Generate embedding for the content
4679
+ embedding_text = f"{name} {content}"
4680
+ embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
4681
+ # Convert embedding to string for asyncpg vector type
4682
+ embedding_str = str(embedding[0]) if embedding else None
4683
+
4684
+ async with acquire_with_retry(pool) as conn:
4685
+ if mental_model_id:
4686
+ row = await conn.fetchrow(
4163
4687
  f"""
4164
- SELECT id, canonical_name FROM {fq_table("entities")}
4165
- WHERE id = ANY($1) AND bank_id = $2
4688
+ INSERT INTO {fq_table("mental_models")}
4689
+ (id, bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
4690
+ VALUES ($1, $2, $3, $4, $5, $6, $7, COALESCE($8, 2048), COALESCE($9, '{{"refresh_after_consolidation": false}}'::jsonb))
4691
+ RETURNING id, bank_id, name, source_query, content, tags,
4692
+ last_refreshed_at, created_at, reflect_response,
4693
+ max_tokens, trigger
4166
4694
  """,
4167
- entity_uuids,
4695
+ mental_model_id,
4168
4696
  bank_id,
4697
+ name,
4698
+ source_query,
4699
+ content,
4700
+ embedding_str,
4701
+ tags or [],
4702
+ max_tokens,
4703
+ json.dumps(trigger) if trigger else None,
4169
4704
  )
4170
- entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
4171
-
4172
- fact_counts = await acquired_conn.fetch(
4705
+ else:
4706
+ row = await conn.fetchrow(
4173
4707
  f"""
4174
- SELECT ue.entity_id, COUNT(*) as cnt
4175
- FROM {fq_table("unit_entities")} ue
4176
- JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
4177
- WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
4178
- GROUP BY ue.entity_id
4708
+ INSERT INTO {fq_table("mental_models")}
4709
+ (bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
4710
+ VALUES ($1, $2, $3, $4, $5, $6, COALESCE($7, 2048), COALESCE($8, '{{"refresh_after_consolidation": false}}'::jsonb))
4711
+ RETURNING id, bank_id, name, source_query, content, tags,
4712
+ last_refreshed_at, created_at, reflect_response,
4713
+ max_tokens, trigger
4179
4714
  """,
4180
- entity_uuids,
4181
4715
  bank_id,
4716
+ name,
4717
+ source_query,
4718
+ content,
4719
+ embedding_str,
4720
+ tags or [],
4721
+ max_tokens,
4722
+ json.dumps(trigger) if trigger else None,
4182
4723
  )
4183
- entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
4184
4724
 
4185
- # Filter entities that meet the threshold
4186
- entities_to_process = []
4187
- for entity_id in entity_ids:
4188
- entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
4189
- if entity_uuid not in entity_names:
4190
- continue
4191
- fact_count = entity_fact_counts.get(entity_uuid, 0)
4192
- if fact_count >= min_facts:
4193
- entities_to_process.append((entity_id, entity_names[entity_uuid]))
4725
+ logger.info(f"[MENTAL_MODELS] Created pinned mental model '{name}' for bank {bank_id}")
4726
+ return self._row_to_mental_model(row)
4194
4727
 
4195
- if not entities_to_process:
4196
- return
4728
+ async def refresh_mental_model(
4729
+ self,
4730
+ bank_id: str,
4731
+ mental_model_id: str,
4732
+ *,
4733
+ request_context: "RequestContext",
4734
+ ) -> dict[str, Any] | None:
4735
+ """Refresh a pinned mental model by re-running its source query.
4197
4736
 
4198
- # Use internal context if not provided (for internal/background calls)
4199
- from hindsight_api.models import RequestContext as RC
4737
+ This method:
4738
+ 1. Gets the pinned mental model
4739
+ 2. Runs the source_query through reflect
4740
+ 3. Updates the content with the new synthesis
4741
+ 4. Updates last_refreshed_at
4200
4742
 
4201
- ctx = request_context if request_context is not None else RC()
4743
+ Args:
4744
+ bank_id: Bank identifier
4745
+ mental_model_id: Pinned mental model UUID
4746
+ request_context: Request context for authentication
4747
+
4748
+ Returns:
4749
+ Updated pinned mental model dict or None if not found
4750
+ """
4751
+ await self._authenticate_tenant(request_context)
4752
+
4753
+ # Get the current mental model
4754
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
4755
+ if not mental_model:
4756
+ return None
4757
+
4758
+ # Run reflect with the source query, excluding the mental model being refreshed
4759
+ reflect_result = await self.reflect_async(
4760
+ bank_id=bank_id,
4761
+ query=mental_model["source_query"],
4762
+ request_context=request_context,
4763
+ exclude_mental_model_ids=[mental_model_id],
4764
+ )
4765
+
4766
+ # Build reflect_response payload to store
4767
+ reflect_response_payload = {
4768
+ "text": reflect_result.text,
4769
+ "based_on": {
4770
+ fact_type: [
4771
+ {
4772
+ "id": str(fact.id),
4773
+ "text": fact.text,
4774
+ "type": fact_type,
4775
+ }
4776
+ for fact in facts
4777
+ ]
4778
+ for fact_type, facts in reflect_result.based_on.items()
4779
+ },
4780
+ "mental_models": [], # Mental models are included in based_on["mental-models"]
4781
+ }
4782
+
4783
+ # Update the mental model with new content and reflect_response
4784
+ return await self.update_mental_model(
4785
+ bank_id,
4786
+ mental_model_id,
4787
+ content=reflect_result.text,
4788
+ reflect_response=reflect_response_payload,
4789
+ request_context=request_context,
4790
+ )
4791
+
4792
+ async def update_mental_model(
4793
+ self,
4794
+ bank_id: str,
4795
+ mental_model_id: str,
4796
+ *,
4797
+ name: str | None = None,
4798
+ content: str | None = None,
4799
+ source_query: str | None = None,
4800
+ max_tokens: int | None = None,
4801
+ tags: list[str] | None = None,
4802
+ trigger: dict[str, Any] | None = None,
4803
+ reflect_response: dict[str, Any] | None = None,
4804
+ request_context: "RequestContext",
4805
+ ) -> dict[str, Any] | None:
4806
+ """Update a pinned mental model.
4807
+
4808
+ Args:
4809
+ bank_id: Bank identifier
4810
+ mental_model_id: Pinned mental model UUID
4811
+ name: New name (if changing)
4812
+ content: New content (if changing)
4813
+ source_query: New source query (if changing)
4814
+ max_tokens: New max tokens (if changing)
4815
+ tags: New tags (if changing)
4816
+ trigger: New trigger settings (if changing)
4817
+ reflect_response: Full reflect API response payload (if changing)
4818
+ request_context: Request context for authentication
4819
+
4820
+ Returns:
4821
+ Updated pinned mental model dict or None if not found
4822
+ """
4823
+ await self._authenticate_tenant(request_context)
4824
+ pool = await self._get_pool()
4825
+
4826
+ async with acquire_with_retry(pool) as conn:
4827
+ # Build dynamic update
4828
+ updates = []
4829
+ params: list[Any] = [bank_id, mental_model_id]
4830
+ param_idx = 3
4831
+
4832
+ if name is not None:
4833
+ updates.append(f"name = ${param_idx}")
4834
+ params.append(name)
4835
+ param_idx += 1
4836
+
4837
+ if content is not None:
4838
+ updates.append(f"content = ${param_idx}")
4839
+ params.append(content)
4840
+ param_idx += 1
4841
+ updates.append("last_refreshed_at = NOW()")
4842
+ # Also update embedding (convert to string for asyncpg vector type)
4843
+ embedding_text = f"{name or ''} {content}"
4844
+ embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
4845
+ if embedding:
4846
+ updates.append(f"embedding = ${param_idx}")
4847
+ params.append(str(embedding[0]))
4848
+ param_idx += 1
4849
+
4850
+ if reflect_response is not None:
4851
+ updates.append(f"reflect_response = ${param_idx}")
4852
+ params.append(json.dumps(reflect_response))
4853
+ param_idx += 1
4854
+
4855
+ if source_query is not None:
4856
+ updates.append(f"source_query = ${param_idx}")
4857
+ params.append(source_query)
4858
+ param_idx += 1
4859
+
4860
+ if max_tokens is not None:
4861
+ updates.append(f"max_tokens = ${param_idx}")
4862
+ params.append(max_tokens)
4863
+ param_idx += 1
4864
+
4865
+ if tags is not None:
4866
+ updates.append(f"tags = ${param_idx}")
4867
+ params.append(tags)
4868
+ param_idx += 1
4869
+
4870
+ if trigger is not None:
4871
+ updates.append(f"trigger = ${param_idx}")
4872
+ params.append(json.dumps(trigger))
4873
+ param_idx += 1
4874
+
4875
+ if not updates:
4876
+ return None
4877
+
4878
+ query = f"""
4879
+ UPDATE {fq_table("mental_models")}
4880
+ SET {", ".join(updates)}
4881
+ WHERE bank_id = $1 AND id = $2
4882
+ RETURNING id, bank_id, name, source_query, content, tags,
4883
+ last_refreshed_at, created_at, reflect_response,
4884
+ max_tokens, trigger
4885
+ """
4886
+
4887
+ row = await conn.fetchrow(query, *params)
4888
+
4889
+ return self._row_to_mental_model(row) if row else None
4202
4890
 
4203
- # Process all entities in PARALLEL (LLM calls are the bottleneck)
4204
- async def process_entity(entity_id: str, entity_name: str):
4891
+ async def delete_mental_model(
4892
+ self,
4893
+ bank_id: str,
4894
+ mental_model_id: str,
4895
+ *,
4896
+ request_context: "RequestContext",
4897
+ ) -> bool:
4898
+ """Delete a pinned mental model.
4899
+
4900
+ Args:
4901
+ bank_id: Bank identifier
4902
+ mental_model_id: Pinned mental model UUID
4903
+ request_context: Request context for authentication
4904
+
4905
+ Returns:
4906
+ True if deleted, False if not found
4907
+ """
4908
+ await self._authenticate_tenant(request_context)
4909
+ pool = await self._get_pool()
4910
+
4911
+ async with acquire_with_retry(pool) as conn:
4912
+ result = await conn.execute(
4913
+ f"DELETE FROM {fq_table('mental_models')} WHERE bank_id = $1 AND id = $2",
4914
+ bank_id,
4915
+ mental_model_id,
4916
+ )
4917
+
4918
+ return result == "DELETE 1"
4919
+
4920
+ def _row_to_mental_model(self, row) -> dict[str, Any]:
4921
+ """Convert a database row to a mental model dict."""
4922
+ reflect_response = row.get("reflect_response")
4923
+ # Parse JSON string to dict if needed (asyncpg may return JSONB as string)
4924
+ if isinstance(reflect_response, str):
4205
4925
  try:
4206
- await self.regenerate_entity_observations(
4207
- bank_id, entity_id, entity_name, version=None, conn=conn, request_context=ctx
4208
- )
4209
- except Exception as e:
4210
- logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
4926
+ reflect_response = json.loads(reflect_response)
4927
+ except json.JSONDecodeError:
4928
+ reflect_response = None
4929
+ trigger = row.get("trigger")
4930
+ if isinstance(trigger, str):
4931
+ try:
4932
+ trigger = json.loads(trigger)
4933
+ except json.JSONDecodeError:
4934
+ trigger = None
4935
+ return {
4936
+ "id": str(row["id"]),
4937
+ "bank_id": row["bank_id"],
4938
+ "name": row["name"],
4939
+ "source_query": row["source_query"],
4940
+ "content": row["content"],
4941
+ "tags": row["tags"] or [],
4942
+ "max_tokens": row.get("max_tokens"),
4943
+ "trigger": trigger,
4944
+ "last_refreshed_at": row["last_refreshed_at"].isoformat() if row["last_refreshed_at"] else None,
4945
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
4946
+ "reflect_response": reflect_response,
4947
+ }
4948
+
4949
+ # =========================================================================
4950
+ # Directives - Hard rules injected into prompts
4951
+ # =========================================================================
4952
+
4953
+ async def list_directives(
4954
+ self,
4955
+ bank_id: str,
4956
+ *,
4957
+ tags: list[str] | None = None,
4958
+ tags_match: str = "any",
4959
+ active_only: bool = True,
4960
+ limit: int = 100,
4961
+ offset: int = 0,
4962
+ request_context: "RequestContext",
4963
+ ) -> list[dict[str, Any]]:
4964
+ """List directives for a bank.
4211
4965
 
4212
- await asyncio.gather(*[process_entity(eid, name) for eid, name in entities_to_process])
4966
+ Args:
4967
+ bank_id: Bank identifier
4968
+ tags: Optional tags to filter by
4969
+ tags_match: How to match tags - 'any', 'all', or 'exact'
4970
+ active_only: Only return active directives (default True)
4971
+ limit: Maximum number of results
4972
+ offset: Offset for pagination
4973
+ request_context: Request context for authentication
4213
4974
 
4214
- async def _handle_regenerate_observations(self, task_dict: dict[str, Any]):
4975
+ Returns:
4976
+ List of directive dicts
4215
4977
  """
4216
- Handler for regenerate_observations tasks.
4978
+ await self._authenticate_tenant(request_context)
4979
+ pool = await self._get_pool()
4980
+
4981
+ async with acquire_with_retry(pool) as conn:
4982
+ # Build filters
4983
+ filters = ["bank_id = $1"]
4984
+ params: list[Any] = [bank_id]
4985
+ param_idx = 2
4986
+
4987
+ if active_only:
4988
+ filters.append("is_active = TRUE")
4989
+
4990
+ if tags:
4991
+ if tags_match == "all":
4992
+ filters.append(f"tags @> ${param_idx}::varchar[]")
4993
+ elif tags_match == "exact":
4994
+ filters.append(f"tags = ${param_idx}::varchar[]")
4995
+ else: # any
4996
+ filters.append(f"tags && ${param_idx}::varchar[]")
4997
+ params.append(tags)
4998
+ param_idx += 1
4999
+
5000
+ params.extend([limit, offset])
5001
+
5002
+ rows = await conn.fetch(
5003
+ f"""
5004
+ SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5005
+ FROM {fq_table("directives")}
5006
+ WHERE {" AND ".join(filters)}
5007
+ ORDER BY priority DESC, created_at DESC
5008
+ LIMIT ${param_idx} OFFSET ${param_idx + 1}
5009
+ """,
5010
+ *params,
5011
+ )
5012
+
5013
+ return [self._row_to_directive(row) for row in rows]
5014
+
5015
+ async def get_directive(
5016
+ self,
5017
+ bank_id: str,
5018
+ directive_id: str,
5019
+ *,
5020
+ request_context: "RequestContext",
5021
+ ) -> dict[str, Any] | None:
5022
+ """Get a single directive by ID.
4217
5023
 
4218
5024
  Args:
4219
- task_dict: Dict with 'bank_id' and either:
4220
- - 'entity_ids' (list): Process multiple entities
4221
- - 'entity_id', 'entity_name': Process single entity (legacy)
5025
+ bank_id: Bank identifier
5026
+ directive_id: Directive UUID
5027
+ request_context: Request context for authentication
4222
5028
 
4223
- Raises:
4224
- ValueError: If required fields are missing
4225
- Exception: Any exception from regenerate_entity_observations (propagates to execute_task for retry)
5029
+ Returns:
5030
+ Directive dict or None if not found
4226
5031
  """
4227
- bank_id = task_dict.get("bank_id")
4228
- # Use internal request context for background tasks
4229
- from hindsight_api.models import RequestContext
5032
+ await self._authenticate_tenant(request_context)
5033
+ pool = await self._get_pool()
5034
+
5035
+ async with acquire_with_retry(pool) as conn:
5036
+ row = await conn.fetchrow(
5037
+ f"""
5038
+ SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5039
+ FROM {fq_table("directives")}
5040
+ WHERE bank_id = $1 AND id = $2
5041
+ """,
5042
+ bank_id,
5043
+ directive_id,
5044
+ )
5045
+
5046
+ return self._row_to_directive(row) if row else None
5047
+
5048
+ async def create_directive(
5049
+ self,
5050
+ bank_id: str,
5051
+ name: str,
5052
+ content: str,
5053
+ *,
5054
+ priority: int = 0,
5055
+ is_active: bool = True,
5056
+ tags: list[str] | None = None,
5057
+ request_context: "RequestContext",
5058
+ ) -> dict[str, Any]:
5059
+ """Create a new directive.
4230
5060
 
4231
- internal_context = RequestContext()
5061
+ Args:
5062
+ bank_id: Bank identifier
5063
+ name: Human-readable name for the directive
5064
+ content: The directive text to inject into prompts
5065
+ priority: Higher priority directives are injected first (default 0)
5066
+ is_active: Whether this directive is active (default True)
5067
+ tags: Optional tags for filtering
5068
+ request_context: Request context for authentication
5069
+
5070
+ Returns:
5071
+ The created directive dict
5072
+ """
5073
+ await self._authenticate_tenant(request_context)
5074
+ pool = await self._get_pool()
5075
+
5076
+ async with acquire_with_retry(pool) as conn:
5077
+ row = await conn.fetchrow(
5078
+ f"""
5079
+ INSERT INTO {fq_table("directives")}
5080
+ (bank_id, name, content, priority, is_active, tags)
5081
+ VALUES ($1, $2, $3, $4, $5, $6)
5082
+ RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5083
+ """,
5084
+ bank_id,
5085
+ name,
5086
+ content,
5087
+ priority,
5088
+ is_active,
5089
+ tags or [],
5090
+ )
5091
+
5092
+ logger.info(f"[DIRECTIVES] Created directive '{name}' for bank {bank_id}")
5093
+ return self._row_to_directive(row)
5094
+
5095
+ async def update_directive(
5096
+ self,
5097
+ bank_id: str,
5098
+ directive_id: str,
5099
+ *,
5100
+ name: str | None = None,
5101
+ content: str | None = None,
5102
+ priority: int | None = None,
5103
+ is_active: bool | None = None,
5104
+ tags: list[str] | None = None,
5105
+ request_context: "RequestContext",
5106
+ ) -> dict[str, Any] | None:
5107
+ """Update a directive.
5108
+
5109
+ Args:
5110
+ bank_id: Bank identifier
5111
+ directive_id: Directive UUID
5112
+ name: New name (optional)
5113
+ content: New content (optional)
5114
+ priority: New priority (optional)
5115
+ is_active: New active status (optional)
5116
+ tags: New tags (optional)
5117
+ request_context: Request context for authentication
5118
+
5119
+ Returns:
5120
+ Updated directive dict or None if not found
5121
+ """
5122
+ await self._authenticate_tenant(request_context)
5123
+ pool = await self._get_pool()
4232
5124
 
4233
- # New format: multiple entity_ids
4234
- if "entity_ids" in task_dict:
4235
- entity_ids = task_dict.get("entity_ids", [])
4236
- min_facts = task_dict.get("min_facts", 5)
5125
+ # Build update query dynamically
5126
+ updates = ["updated_at = now()"]
5127
+ params: list[Any] = []
5128
+ param_idx = 1
4237
5129
 
4238
- if not bank_id or not entity_ids:
4239
- raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
5130
+ if name is not None:
5131
+ updates.append(f"name = ${param_idx}")
5132
+ params.append(name)
5133
+ param_idx += 1
4240
5134
 
4241
- # Process each entity
4242
- pool = await self._get_pool()
4243
- async with pool.acquire() as conn:
4244
- for entity_id in entity_ids:
4245
- try:
4246
- # Fetch entity name and check fact count
4247
- import uuid as uuid_module
5135
+ if content is not None:
5136
+ updates.append(f"content = ${param_idx}")
5137
+ params.append(content)
5138
+ param_idx += 1
4248
5139
 
4249
- entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
5140
+ if priority is not None:
5141
+ updates.append(f"priority = ${param_idx}")
5142
+ params.append(priority)
5143
+ param_idx += 1
4250
5144
 
4251
- # First check if entity exists
4252
- entity_exists = await conn.fetchrow(
4253
- f"SELECT canonical_name FROM {fq_table('entities')} WHERE id = $1 AND bank_id = $2",
4254
- entity_uuid,
4255
- bank_id,
4256
- )
5145
+ if is_active is not None:
5146
+ updates.append(f"is_active = ${param_idx}")
5147
+ params.append(is_active)
5148
+ param_idx += 1
4257
5149
 
4258
- if not entity_exists:
4259
- logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
4260
- continue
5150
+ if tags is not None:
5151
+ updates.append(f"tags = ${param_idx}")
5152
+ params.append(tags)
5153
+ param_idx += 1
4261
5154
 
4262
- entity_name = entity_exists["canonical_name"]
5155
+ params.extend([bank_id, directive_id])
4263
5156
 
4264
- # Count facts linked to this entity
4265
- fact_count = (
4266
- await conn.fetchval(
4267
- f"SELECT COUNT(*) FROM {fq_table('unit_entities')} WHERE entity_id = $1",
4268
- entity_uuid,
4269
- )
4270
- or 0
4271
- )
5157
+ async with acquire_with_retry(pool) as conn:
5158
+ row = await conn.fetchrow(
5159
+ f"""
5160
+ UPDATE {fq_table("directives")}
5161
+ SET {", ".join(updates)}
5162
+ WHERE bank_id = ${param_idx} AND id = ${param_idx + 1}
5163
+ RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
5164
+ """,
5165
+ *params,
5166
+ )
4272
5167
 
4273
- # Only regenerate if entity has enough facts
4274
- if fact_count >= min_facts:
4275
- await self.regenerate_entity_observations(
4276
- bank_id, entity_id, entity_name, version=None, request_context=internal_context
4277
- )
4278
- else:
4279
- logger.debug(
4280
- f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)"
4281
- )
5168
+ return self._row_to_directive(row) if row else None
4282
5169
 
4283
- except Exception as e:
4284
- # Log but continue processing other entities - individual entity failures
4285
- # shouldn't fail the whole batch
4286
- logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
4287
- continue
5170
+ async def delete_directive(
5171
+ self,
5172
+ bank_id: str,
5173
+ directive_id: str,
5174
+ *,
5175
+ request_context: "RequestContext",
5176
+ ) -> bool:
5177
+ """Delete a directive.
4288
5178
 
4289
- # Legacy format: single entity
4290
- else:
4291
- entity_id = task_dict.get("entity_id")
4292
- entity_name = task_dict.get("entity_name")
4293
- version = task_dict.get("version")
5179
+ Args:
5180
+ bank_id: Bank identifier
5181
+ directive_id: Directive UUID
5182
+ request_context: Request context for authentication
4294
5183
 
4295
- if not all([bank_id, entity_id, entity_name]):
4296
- raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
5184
+ Returns:
5185
+ True if deleted, False if not found
5186
+ """
5187
+ await self._authenticate_tenant(request_context)
5188
+ pool = await self._get_pool()
4297
5189
 
4298
- # Type assertions after validation
4299
- assert isinstance(bank_id, str) and isinstance(entity_id, str) and isinstance(entity_name, str)
4300
- await self.regenerate_entity_observations(
4301
- bank_id, entity_id, entity_name, version=version, request_context=internal_context
5190
+ async with acquire_with_retry(pool) as conn:
5191
+ result = await conn.execute(
5192
+ f"DELETE FROM {fq_table('directives')} WHERE bank_id = $1 AND id = $2",
5193
+ bank_id,
5194
+ directive_id,
4302
5195
  )
4303
5196
 
4304
- # =========================================================================
4305
- # Statistics & Operations (for HTTP API layer)
4306
- # =========================================================================
5197
+ return result == "DELETE 1"
4307
5198
 
4308
- async def get_bank_stats(
5199
+ def _row_to_directive(self, row) -> dict[str, Any]:
5200
+ """Convert a database row to a directive dict."""
5201
+ return {
5202
+ "id": str(row["id"]),
5203
+ "bank_id": row["bank_id"],
5204
+ "name": row["name"],
5205
+ "content": row["content"],
5206
+ "priority": row["priority"],
5207
+ "is_active": row["is_active"],
5208
+ "tags": row["tags"] or [],
5209
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
5210
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
5211
+ }
5212
+
5213
+ async def list_operations(
4309
5214
  self,
4310
5215
  bank_id: str,
4311
5216
  *,
5217
+ status: str | None = None,
5218
+ limit: int = 20,
5219
+ offset: int = 0,
4312
5220
  request_context: "RequestContext",
4313
5221
  ) -> dict[str, Any]:
4314
- """Get statistics about memory nodes and links for a bank."""
5222
+ """List async operations for a bank with optional filtering and pagination.
5223
+
5224
+ Args:
5225
+ bank_id: Bank identifier
5226
+ status: Optional status filter (pending, completed, failed)
5227
+ limit: Maximum number of operations to return (default 20)
5228
+ offset: Number of operations to skip (default 0)
5229
+ request_context: Request context for authentication
5230
+
5231
+ Returns:
5232
+ Dict with total count and list of operations, sorted by most recent first
5233
+ """
4315
5234
  await self._authenticate_tenant(request_context)
4316
5235
  pool = await self._get_pool()
4317
5236
 
4318
5237
  async with acquire_with_retry(pool) as conn:
4319
- # Get node counts by fact_type
4320
- node_stats = await conn.fetch(
4321
- f"""
4322
- SELECT fact_type, COUNT(*) as count
4323
- FROM {fq_table("memory_units")}
4324
- WHERE bank_id = $1
4325
- GROUP BY fact_type
4326
- """,
4327
- bank_id,
4328
- )
5238
+ # Build WHERE clause
5239
+ where_conditions = ["bank_id = $1"]
5240
+ params: list[Any] = [bank_id]
4329
5241
 
4330
- # Get link counts by link_type
4331
- link_stats = await conn.fetch(
4332
- f"""
4333
- SELECT ml.link_type, COUNT(*) as count
4334
- FROM {fq_table("memory_links")} ml
4335
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4336
- WHERE mu.bank_id = $1
4337
- GROUP BY ml.link_type
4338
- """,
4339
- bank_id,
4340
- )
5242
+ if status:
5243
+ # Map API status to DB statuses (pending includes processing)
5244
+ if status == "pending":
5245
+ where_conditions.append("status IN ('pending', 'processing')")
5246
+ else:
5247
+ where_conditions.append(f"status = ${len(params) + 1}")
5248
+ params.append(status)
4341
5249
 
4342
- # Get link counts by fact_type (from nodes)
4343
- link_fact_type_stats = await conn.fetch(
4344
- f"""
4345
- SELECT mu.fact_type, COUNT(*) as count
4346
- FROM {fq_table("memory_links")} ml
4347
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4348
- WHERE mu.bank_id = $1
4349
- GROUP BY mu.fact_type
4350
- """,
4351
- bank_id,
4352
- )
5250
+ where_clause = " AND ".join(where_conditions)
4353
5251
 
4354
- # Get link counts by fact_type AND link_type
4355
- link_breakdown_stats = await conn.fetch(
4356
- f"""
4357
- SELECT mu.fact_type, ml.link_type, COUNT(*) as count
4358
- FROM {fq_table("memory_links")} ml
4359
- JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
4360
- WHERE mu.bank_id = $1
4361
- GROUP BY mu.fact_type, ml.link_type
4362
- """,
4363
- bank_id,
5252
+ # Get total count (with filter)
5253
+ total_row = await conn.fetchrow(
5254
+ f"SELECT COUNT(*) as total FROM {fq_table('async_operations')} WHERE {where_clause}",
5255
+ *params,
4364
5256
  )
5257
+ total = total_row["total"] if total_row else 0
4365
5258
 
4366
- # Get pending and failed operations counts
4367
- ops_stats = await conn.fetch(
5259
+ # Get operations with pagination
5260
+ operations = await conn.fetch(
4368
5261
  f"""
4369
- SELECT status, COUNT(*) as count
5262
+ SELECT operation_id, operation_type, created_at, status, error_message
4370
5263
  FROM {fq_table("async_operations")}
4371
- WHERE bank_id = $1
4372
- GROUP BY status
5264
+ WHERE {where_clause}
5265
+ ORDER BY created_at DESC
5266
+ LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}
4373
5267
  """,
4374
- bank_id,
5268
+ *params,
5269
+ limit,
5270
+ offset,
4375
5271
  )
4376
5272
 
4377
5273
  return {
4378
- "bank_id": bank_id,
4379
- "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
4380
- "link_counts": {row["link_type"]: row["count"] for row in link_stats},
4381
- "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
4382
- "link_breakdown": [
4383
- {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
4384
- for row in link_breakdown_stats
5274
+ "total": total,
5275
+ "operations": [
5276
+ {
5277
+ "id": str(row["operation_id"]),
5278
+ "task_type": row["operation_type"],
5279
+ "items_count": 0,
5280
+ "document_id": None,
5281
+ "created_at": row["created_at"].isoformat(),
5282
+ # Map DB status to API status (processing -> pending for simplicity)
5283
+ "status": "pending" if row["status"] in ("pending", "processing") else row["status"],
5284
+ "error_message": row["error_message"],
5285
+ }
5286
+ for row in operations
4385
5287
  ],
4386
- "operations": {row["status"]: row["count"] for row in ops_stats},
4387
5288
  }
4388
5289
 
4389
- async def get_entity(
5290
+ async def get_operation_status(
4390
5291
  self,
4391
5292
  bank_id: str,
4392
- entity_id: str,
5293
+ operation_id: str,
4393
5294
  *,
4394
5295
  request_context: "RequestContext",
4395
- ) -> dict[str, Any] | None:
4396
- """Get entity details including metadata and observations."""
4397
- await self._authenticate_tenant(request_context)
4398
- pool = await self._get_pool()
4399
-
4400
- async with acquire_with_retry(pool) as conn:
4401
- entity_row = await conn.fetchrow(
4402
- f"""
4403
- SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
4404
- FROM {fq_table("entities")}
4405
- WHERE bank_id = $1 AND id = $2
4406
- """,
4407
- bank_id,
4408
- uuid.UUID(entity_id),
4409
- )
4410
-
4411
- if not entity_row:
4412
- return None
4413
-
4414
- # Get observations for the entity
4415
- observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
4416
-
4417
- return {
4418
- "id": str(entity_row["id"]),
4419
- "canonical_name": entity_row["canonical_name"],
4420
- "mention_count": entity_row["mention_count"],
4421
- "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
4422
- "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
4423
- "metadata": entity_row["metadata"] or {},
4424
- "observations": observations,
4425
- }
5296
+ ) -> dict[str, Any]:
5297
+ """Get the status of a specific async operation.
4426
5298
 
4427
- async def list_operations(
4428
- self,
4429
- bank_id: str,
4430
- *,
4431
- request_context: "RequestContext",
4432
- ) -> list[dict[str, Any]]:
4433
- """List async operations for a bank."""
5299
+ Returns:
5300
+ - status: "pending", "completed", or "failed"
5301
+ - updated_at: last update timestamp
5302
+ - completed_at: completion timestamp (if completed)
5303
+ """
4434
5304
  await self._authenticate_tenant(request_context)
4435
5305
  pool = await self._get_pool()
4436
5306
 
5307
+ op_uuid = uuid.UUID(operation_id)
5308
+
4437
5309
  async with acquire_with_retry(pool) as conn:
4438
- operations = await conn.fetch(
5310
+ row = await conn.fetchrow(
4439
5311
  f"""
4440
- SELECT operation_id, bank_id, operation_type, created_at, status, error_message, result_metadata
5312
+ SELECT operation_id, operation_type, created_at, updated_at, completed_at, status, error_message
4441
5313
  FROM {fq_table("async_operations")}
4442
- WHERE bank_id = $1
4443
- ORDER BY created_at DESC
5314
+ WHERE operation_id = $1 AND bank_id = $2
4444
5315
  """,
5316
+ op_uuid,
4445
5317
  bank_id,
4446
5318
  )
4447
5319
 
4448
- def parse_metadata(metadata):
4449
- if metadata is None:
4450
- return {}
4451
- if isinstance(metadata, str):
4452
- import json
4453
-
4454
- return json.loads(metadata)
4455
- return metadata
4456
-
4457
- return [
4458
- {
4459
- "id": str(row["operation_id"]),
4460
- "task_type": row["operation_type"],
4461
- "items_count": parse_metadata(row["result_metadata"]).get("items_count", 0),
4462
- "document_id": parse_metadata(row["result_metadata"]).get("document_id"),
4463
- "created_at": row["created_at"].isoformat(),
4464
- "status": row["status"],
5320
+ if row:
5321
+ # Map DB status to API status (processing -> pending for simplicity)
5322
+ db_status = row["status"]
5323
+ api_status = "pending" if db_status in ("pending", "processing") else db_status
5324
+ return {
5325
+ "operation_id": operation_id,
5326
+ "status": api_status,
5327
+ "operation_type": row["operation_type"],
5328
+ "created_at": row["created_at"].isoformat() if row["created_at"] else None,
5329
+ "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
5330
+ "completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
4465
5331
  "error_message": row["error_message"],
4466
5332
  }
4467
- for row in operations
4468
- ]
5333
+ else:
5334
+ # Operation not found
5335
+ return {
5336
+ "operation_id": operation_id,
5337
+ "status": "not_found",
5338
+ "operation_type": None,
5339
+ "created_at": None,
5340
+ "updated_at": None,
5341
+ "completed_at": None,
5342
+ "error_message": None,
5343
+ }
4469
5344
 
4470
5345
  async def cancel_operation(
4471
5346
  self,
@@ -4506,10 +5381,10 @@ Guidelines:
4506
5381
  bank_id: str,
4507
5382
  *,
4508
5383
  name: str | None = None,
4509
- background: str | None = None,
5384
+ mission: str | None = None,
4510
5385
  request_context: "RequestContext",
4511
5386
  ) -> dict[str, Any]:
4512
- """Update bank name and/or background."""
5387
+ """Update bank name and/or mission."""
4513
5388
  await self._authenticate_tenant(request_context)
4514
5389
  pool = await self._get_pool()
4515
5390
 
@@ -4525,34 +5400,72 @@ Guidelines:
4525
5400
  name,
4526
5401
  )
4527
5402
 
4528
- if background is not None:
5403
+ if mission is not None:
4529
5404
  await conn.execute(
4530
5405
  f"""
4531
5406
  UPDATE {fq_table("banks")}
4532
- SET background = $2, updated_at = NOW()
5407
+ SET mission = $2, updated_at = NOW()
4533
5408
  WHERE bank_id = $1
4534
5409
  """,
4535
5410
  bank_id,
4536
- background,
5411
+ mission,
4537
5412
  )
4538
5413
 
4539
5414
  # Return updated profile
4540
5415
  return await self.get_bank_profile(bank_id, request_context=request_context)
4541
5416
 
4542
- async def submit_async_retain(
5417
+ async def _submit_async_operation(
4543
5418
  self,
4544
5419
  bank_id: str,
4545
- contents: list[dict[str, Any]],
5420
+ operation_type: str,
5421
+ task_type: str,
5422
+ task_payload: dict[str, Any],
4546
5423
  *,
4547
- request_context: "RequestContext",
4548
- document_tags: list[str] | None = None,
5424
+ result_metadata: dict[str, Any] | None = None,
5425
+ dedupe_by_bank: bool = False,
4549
5426
  ) -> dict[str, Any]:
4550
- """Submit a batch retain operation to run asynchronously."""
4551
- await self._authenticate_tenant(request_context)
4552
- pool = await self._get_pool()
5427
+ """Generic helper to submit an async operation.
5428
+
5429
+ Args:
5430
+ bank_id: Bank identifier
5431
+ operation_type: Operation type for the async_operations record (e.g., 'consolidation', 'retain')
5432
+ task_type: Task type for the task payload (e.g., 'consolidation', 'batch_retain')
5433
+ task_payload: Additional task payload fields (operation_id and bank_id are added automatically)
5434
+ result_metadata: Optional metadata to store with the operation record
5435
+ dedupe_by_bank: If True, skip creating a new task if one is already pending for this bank+operation_type
4553
5436
 
5437
+ Returns:
5438
+ Dict with operation_id and optionally deduplicated=True if an existing task was found
5439
+ """
4554
5440
  import json
4555
5441
 
5442
+ pool = await self._get_pool()
5443
+
5444
+ # Check for existing pending task if deduplication is enabled
5445
+ # Note: We only check 'pending', not 'processing', because a processing task
5446
+ # uses a watermark from when it started - new memories added after that point
5447
+ # would need another consolidation run to be processed.
5448
+ if dedupe_by_bank:
5449
+ async with acquire_with_retry(pool) as conn:
5450
+ existing = await conn.fetchrow(
5451
+ f"""
5452
+ SELECT operation_id FROM {fq_table("async_operations")}
5453
+ WHERE bank_id = $1 AND operation_type = $2 AND status = 'pending'
5454
+ LIMIT 1
5455
+ """,
5456
+ bank_id,
5457
+ operation_type,
5458
+ )
5459
+ if existing:
5460
+ logger.debug(
5461
+ f"{operation_type} task already pending for bank_id={bank_id}, "
5462
+ f"skipping duplicate (existing operation_id={existing['operation_id']})"
5463
+ )
5464
+ return {
5465
+ "operation_id": str(existing["operation_id"]),
5466
+ "deduplicated": True,
5467
+ }
5468
+
4556
5469
  operation_id = uuid.uuid4()
4557
5470
 
4558
5471
  # Insert operation record into database
@@ -4564,25 +5477,113 @@ Guidelines:
4564
5477
  """,
4565
5478
  operation_id,
4566
5479
  bank_id,
4567
- "retain",
4568
- json.dumps({"items_count": len(contents)}),
5480
+ operation_type,
5481
+ json.dumps(result_metadata or {}),
4569
5482
  )
4570
5483
 
4571
- # Submit task to background queue
4572
- task_payload = {
4573
- "type": "batch_retain",
5484
+ # Build and submit task payload
5485
+ full_payload = {
5486
+ "type": task_type,
4574
5487
  "operation_id": str(operation_id),
4575
5488
  "bank_id": bank_id,
4576
- "contents": contents,
5489
+ **task_payload,
4577
5490
  }
4578
- if document_tags:
4579
- task_payload["document_tags"] = document_tags
4580
5491
 
4581
- await self._task_backend.submit_task(task_payload)
5492
+ await self._task_backend.submit_task(full_payload)
4582
5493
 
4583
- logger.info(f"Retain task queued for bank_id={bank_id}, {len(contents)} items, operation_id={operation_id}")
5494
+ logger.info(f"{operation_type} task queued for bank_id={bank_id}, operation_id={operation_id}")
4584
5495
 
4585
5496
  return {
4586
5497
  "operation_id": str(operation_id),
4587
- "items_count": len(contents),
4588
5498
  }
5499
+
5500
+ async def submit_async_retain(
5501
+ self,
5502
+ bank_id: str,
5503
+ contents: list[dict[str, Any]],
5504
+ *,
5505
+ request_context: "RequestContext",
5506
+ document_tags: list[str] | None = None,
5507
+ ) -> dict[str, Any]:
5508
+ """Submit a batch retain operation to run asynchronously."""
5509
+ await self._authenticate_tenant(request_context)
5510
+
5511
+ task_payload: dict[str, Any] = {"contents": contents}
5512
+ if document_tags:
5513
+ task_payload["document_tags"] = document_tags
5514
+
5515
+ result = await self._submit_async_operation(
5516
+ bank_id=bank_id,
5517
+ operation_type="retain",
5518
+ task_type="batch_retain",
5519
+ task_payload=task_payload,
5520
+ result_metadata={"items_count": len(contents)},
5521
+ dedupe_by_bank=False,
5522
+ )
5523
+
5524
+ result["items_count"] = len(contents)
5525
+ return result
5526
+
5527
+ async def submit_async_consolidation(
5528
+ self,
5529
+ bank_id: str,
5530
+ *,
5531
+ request_context: "RequestContext",
5532
+ ) -> dict[str, Any]:
5533
+ """Submit a consolidation operation to run asynchronously.
5534
+
5535
+ Deduplicates by bank_id - if there's already a pending consolidation for this bank,
5536
+ returns the existing operation_id instead of creating a new one.
5537
+
5538
+ Args:
5539
+ bank_id: Bank identifier
5540
+ request_context: Request context for authentication
5541
+
5542
+ Returns:
5543
+ Dict with operation_id
5544
+ """
5545
+ await self._authenticate_tenant(request_context)
5546
+ return await self._submit_async_operation(
5547
+ bank_id=bank_id,
5548
+ operation_type="consolidation",
5549
+ task_type="consolidation",
5550
+ task_payload={},
5551
+ dedupe_by_bank=True,
5552
+ )
5553
+
5554
+ async def submit_async_refresh_mental_model(
5555
+ self,
5556
+ bank_id: str,
5557
+ mental_model_id: str,
5558
+ *,
5559
+ request_context: "RequestContext",
5560
+ ) -> dict[str, Any]:
5561
+ """Submit an async mental model refresh operation.
5562
+
5563
+ This schedules a background task to re-run the source query and update the content.
5564
+
5565
+ Args:
5566
+ bank_id: Bank identifier
5567
+ mental_model_id: Mental model UUID to refresh
5568
+ request_context: Request context for authentication
5569
+
5570
+ Returns:
5571
+ Dict with operation_id
5572
+ """
5573
+ await self._authenticate_tenant(request_context)
5574
+
5575
+ # Verify mental model exists
5576
+ mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
5577
+ if not mental_model:
5578
+ raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
5579
+
5580
+ return await self._submit_async_operation(
5581
+ bank_id=bank_id,
5582
+ operation_type="refresh_mental_model",
5583
+ task_type="refresh_mental_model",
5584
+ task_payload={
5585
+ "mental_model_id": mental_model_id,
5586
+ },
5587
+ result_metadata={"mental_model_id": mental_model_id, "name": mental_model["name"]},
5588
+ dedupe_by_bank=False,
5589
+ )