hindsight-api 0.4.6__tar.gz → 0.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/PKG-INFO +1 -1
  2. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/__init__.py +1 -1
  3. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/api/http.py +44 -0
  4. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/daemon.py +1 -1
  5. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/memory_engine.py +54 -4
  6. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/fact_extraction.py +13 -9
  7. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/fact_storage.py +5 -3
  8. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/__init__.py +8 -0
  9. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/operation_validator.py +103 -0
  10. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/worker/main.py +6 -1
  11. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/worker/poller.py +15 -7
  12. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/pyproject.toml +1 -1
  13. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/.gitignore +0 -0
  14. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/README.md +0 -0
  15. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/admin/__init__.py +0 -0
  16. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/admin/cli.py +0 -0
  17. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/README +0 -0
  18. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/env.py +0 -0
  19. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/script.py.mako +0 -0
  20. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
  21. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
  22. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
  23. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
  24. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
  25. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
  26. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
  27. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
  28. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
  29. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
  30. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
  31. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
  32. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
  33. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
  34. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
  35. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
  36. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
  37. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
  38. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
  39. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
  40. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
  41. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
  42. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
  43. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/api/__init__.py +0 -0
  44. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/api/mcp.py +0 -0
  45. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/banner.py +0 -0
  46. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/config.py +0 -0
  47. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/__init__.py +0 -0
  48. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/consolidation/__init__.py +0 -0
  49. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/consolidation/consolidator.py +0 -0
  50. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/consolidation/prompts.py +0 -0
  51. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/cross_encoder.py +0 -0
  52. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/db_budget.py +0 -0
  53. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/db_utils.py +0 -0
  54. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/directives/__init__.py +0 -0
  55. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/directives/models.py +0 -0
  56. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/embeddings.py +0 -0
  57. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/entity_resolver.py +0 -0
  58. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/interface.py +0 -0
  59. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/llm_wrapper.py +0 -0
  60. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/mental_models/__init__.py +0 -0
  61. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/mental_models/models.py +0 -0
  62. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/query_analyzer.py +0 -0
  63. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/__init__.py +0 -0
  64. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/agent.py +0 -0
  65. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/models.py +0 -0
  66. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/observations.py +0 -0
  67. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/prompts.py +0 -0
  68. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/tools.py +0 -0
  69. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/reflect/tools_schema.py +0 -0
  70. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/response_models.py +0 -0
  71. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/__init__.py +0 -0
  72. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/bank_utils.py +0 -0
  73. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/chunk_storage.py +0 -0
  74. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/deduplication.py +0 -0
  75. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/embedding_processing.py +0 -0
  76. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/embedding_utils.py +0 -0
  77. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/entity_processing.py +0 -0
  78. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/link_creation.py +0 -0
  79. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/link_utils.py +0 -0
  80. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/orchestrator.py +0 -0
  81. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/retain/types.py +0 -0
  82. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/__init__.py +0 -0
  83. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/fusion.py +0 -0
  84. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/graph_retrieval.py +0 -0
  85. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
  86. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
  87. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/reranking.py +0 -0
  88. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/retrieval.py +0 -0
  89. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/tags.py +0 -0
  90. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/temporal_extraction.py +0 -0
  91. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/think_utils.py +0 -0
  92. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/trace.py +0 -0
  93. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/tracer.py +0 -0
  94. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/search/types.py +0 -0
  95. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/task_backend.py +0 -0
  96. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/engine/utils.py +0 -0
  97. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/base.py +0 -0
  98. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/builtin/__init__.py +0 -0
  99. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/builtin/tenant.py +0 -0
  100. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/context.py +0 -0
  101. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/http.py +0 -0
  102. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/loader.py +0 -0
  103. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/extensions/tenant.py +0 -0
  104. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/main.py +0 -0
  105. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/mcp_local.py +0 -0
  106. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/mcp_tools.py +0 -0
  107. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/metrics.py +0 -0
  108. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/migrations.py +0 -0
  109. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/models.py +0 -0
  110. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/pg0.py +0 -0
  111. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/server.py +0 -0
  112. {hindsight_api-0.4.6 → hindsight_api-0.4.7}/hindsight_api/worker/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.4.6
3
+ Version: 0.4.7
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp>=3.13.3
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.4.6"
49
+ __version__ = "0.4.7"
@@ -1398,13 +1398,18 @@ def create_app(
1398
1398
 
1399
1399
  # Start worker poller if enabled (standalone mode)
1400
1400
  if config.worker_enabled and memory._pool is not None:
1401
+ from ..config import DEFAULT_DATABASE_SCHEMA
1402
+
1401
1403
  worker_id = config.worker_id or socket.gethostname()
1404
+ # Convert default schema to None for SQL compatibility (no schema prefix)
1405
+ schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
1402
1406
  poller = WorkerPoller(
1403
1407
  pool=memory._pool,
1404
1408
  worker_id=worker_id,
1405
1409
  executor=memory.execute_task,
1406
1410
  poll_interval_ms=config.worker_poll_interval_ms,
1407
1411
  max_retries=config.worker_max_retries,
1412
+ schema=schema,
1408
1413
  tenant_extension=getattr(memory, "_tenant_extension", None),
1409
1414
  max_slots=config.worker_max_slots,
1410
1415
  consolidation_max_slots=config.worker_consolidation_max_slots,
@@ -2285,6 +2290,23 @@ def _register_routes(app: FastAPI):
2285
2290
  ):
2286
2291
  """Get a mental model by ID."""
2287
2292
  try:
2293
+ # Pre-operation validation hook
2294
+ validator = app.state.memory._operation_validator
2295
+ if validator:
2296
+ from hindsight_api.extensions.operation_validator import MentalModelGetContext
2297
+
2298
+ ctx = MentalModelGetContext(
2299
+ bank_id=bank_id,
2300
+ mental_model_id=mental_model_id,
2301
+ request_context=request_context,
2302
+ )
2303
+ validation = await validator.validate_mental_model_get(ctx)
2304
+ if not validation.allowed:
2305
+ raise OperationValidationError(
2306
+ validation.reason or "Operation not allowed",
2307
+ status_code=validation.status_code,
2308
+ )
2309
+
2288
2310
  mental_model = await app.state.memory.get_mental_model(
2289
2311
  bank_id=bank_id,
2290
2312
  mental_model_id=mental_model_id,
@@ -2292,9 +2314,31 @@ def _register_routes(app: FastAPI):
2292
2314
  )
2293
2315
  if mental_model is None:
2294
2316
  raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
2317
+
2318
+ # Post-operation hook
2319
+ if validator:
2320
+ from hindsight_api.extensions.operation_validator import MentalModelGetResult
2321
+
2322
+ content = mental_model.get("content", "")
2323
+ output_tokens = len(content) // 4 if content else 0
2324
+
2325
+ result_ctx = MentalModelGetResult(
2326
+ bank_id=bank_id,
2327
+ mental_model_id=mental_model_id,
2328
+ request_context=request_context,
2329
+ output_tokens=output_tokens,
2330
+ success=True,
2331
+ )
2332
+ try:
2333
+ await validator.on_mental_model_get_complete(result_ctx)
2334
+ except Exception as hook_err:
2335
+ logger.warning(f"Post-mental-model-get hook error (non-fatal): {hook_err}")
2336
+
2295
2337
  return MentalModelResponse(**mental_model)
2296
2338
  except (AuthenticationError, HTTPException):
2297
2339
  raise
2340
+ except OperationValidationError as e:
2341
+ raise HTTPException(status_code=e.status_code, detail=e.reason)
2298
2342
  except Exception as e:
2299
2343
  import traceback
2300
2344
 
@@ -15,7 +15,7 @@ from pathlib import Path
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
17
  # Default daemon configuration
18
- DEFAULT_DAEMON_PORT = 8889
18
+ DEFAULT_DAEMON_PORT = 8888
19
19
  DEFAULT_IDLE_TIMEOUT = 0 # 0 = no auto-exit (hindsight-embed passes its own timeout)
20
20
  LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
21
21
  DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"
@@ -597,7 +597,13 @@ class MemoryEngine(MemoryEngineInterface):
597
597
 
598
598
  from hindsight_api.models import RequestContext
599
599
 
600
- internal_context = RequestContext(internal=True)
600
+ # Restore tenant_id/api_key_id from task payload so extensions can
601
+ # attribute the mental_model_refresh operation to the correct org.
602
+ internal_context = RequestContext(
603
+ internal=True,
604
+ tenant_id=task_dict.get("_tenant_id"),
605
+ api_key_id=task_dict.get("_api_key_id"),
606
+ )
601
607
 
602
608
  # Get the current mental model to get source_query
603
609
  mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=internal_context)
@@ -641,6 +647,42 @@ class MemoryEngine(MemoryEngineInterface):
641
647
  request_context=internal_context,
642
648
  )
643
649
 
650
+ # Call post-operation hook if validator is configured
651
+ if self._operation_validator:
652
+ from hindsight_api.extensions.operation_validator import MentalModelRefreshResult
653
+
654
+ # Count facts and mental models from based_on
655
+ facts_used = 0
656
+ mental_models_used = 0
657
+ if reflect_result.based_on:
658
+ for fact_type, facts in reflect_result.based_on.items():
659
+ if facts:
660
+ if fact_type == "mental_models":
661
+ mental_models_used += len(facts)
662
+ else:
663
+ facts_used += len(facts)
664
+
665
+ # Estimate tokens
666
+ query_tokens = len(source_query) // 4 if source_query else 0
667
+ output_tokens = len(generated_content) // 4 if generated_content else 0
668
+ context_tokens = 0 # refresh doesn't use additional context
669
+
670
+ result_ctx = MentalModelRefreshResult(
671
+ bank_id=bank_id,
672
+ mental_model_id=mental_model_id,
673
+ request_context=internal_context,
674
+ query_tokens=query_tokens,
675
+ output_tokens=output_tokens,
676
+ context_tokens=context_tokens,
677
+ facts_used=facts_used,
678
+ mental_models_used=mental_models_used,
679
+ success=True,
680
+ )
681
+ try:
682
+ await self._operation_validator.on_mental_model_refresh_complete(result_ctx)
683
+ except Exception as hook_err:
684
+ logger.warning(f"Post-mental-model-refresh hook error (non-fatal): {hook_err}")
685
+
644
686
  logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Completed for bank_id={bank_id}, mental_model_id={mental_model_id}")
645
687
 
646
688
  async def execute_task(self, task_dict: dict[str, Any]):
@@ -5482,13 +5524,21 @@ class MemoryEngine(MemoryEngineInterface):
5482
5524
  if not mental_model:
5483
5525
  raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
5484
5526
 
5527
+ # Pass tenant_id and api_key_id through task payload so the worker
5528
+ # can provide request context to extension hooks.
5529
+ task_payload: dict[str, Any] = {
5530
+ "mental_model_id": mental_model_id,
5531
+ }
5532
+ if request_context.tenant_id:
5533
+ task_payload["_tenant_id"] = request_context.tenant_id
5534
+ if request_context.api_key_id:
5535
+ task_payload["_api_key_id"] = request_context.api_key_id
5536
+
5485
5537
  return await self._submit_async_operation(
5486
5538
  bank_id=bank_id,
5487
5539
  operation_type="refresh_mental_model",
5488
5540
  task_type="refresh_mental_model",
5489
- task_payload={
5490
- "mental_model_id": mental_model_id,
5491
- },
5541
+ task_payload=task_payload,
5492
5542
  result_metadata={"mental_model_id": mental_model_id, "name": mental_model["name"]},
5493
5543
  dedupe_by_bank=False,
5494
5544
  )
@@ -57,21 +57,25 @@ def _infer_temporal_date(fact_text: str, event_date: datetime) -> str | None:
57
57
  return None
58
58
 
59
59
 
60
- def _sanitize_text(text: str) -> str:
60
+ def _sanitize_text(text: str | None) -> str | None:
61
61
  """
62
- Sanitize text by removing invalid Unicode surrogate characters.
62
+ Sanitize text by removing characters that break downstream systems.
63
63
 
64
- Surrogate characters (U+D800 to U+DFFF) are used in UTF-16 encoding
65
- but cannot be encoded in UTF-8. They can appear in Python strings
66
- from improperly decoded data (e.g., from JavaScript or broken files).
64
+ Removes:
65
+ - Null bytes (\\x00): Invalid in PostgreSQL UTF-8 encoding
66
+ - Unicode surrogates (U+D800-U+DFFF): Invalid in UTF-8, break LLM APIs
67
67
 
68
- This function removes unpaired surrogates to prevent UnicodeEncodeError
69
- when the text is sent to the LLM API.
68
+ Surrogate characters are used in UTF-16 encoding but cannot be encoded
69
+ in UTF-8. They can appear in Python strings from improperly decoded data
70
+ (e.g., from JavaScript or broken files). Null bytes commonly appear in
71
+ OCR output, PDF extraction, or copy-paste from binary sources.
70
72
  """
73
+ if text is None:
74
+ return None
71
75
  if not text:
72
76
  return text
73
- # Remove surrogate characters (U+D800 to U+DFFF) using regex
74
- # These are invalid in UTF-8 and cause encoding errors
77
+ # Remove null bytes and surrogate characters
78
+ text = text.replace("\x00", "")
75
79
  return re.sub(r"[\ud800-\udfff]", "", text)
76
80
 
77
81
 
@@ -8,6 +8,7 @@ import json
8
8
  import logging
9
9
 
10
10
  from ..memory_engine import fq_table
11
+ from .fact_extraction import _sanitize_text
11
12
  from .types import ProcessedFact
12
13
 
13
14
  logger = logging.getLogger(__name__)
@@ -47,7 +48,7 @@ async def insert_facts_batch(
47
48
  tags_list = []
48
49
 
49
50
  for fact in facts:
50
- fact_texts.append(fact.fact_text)
51
+ fact_texts.append(_sanitize_text(fact.fact_text))
51
52
  # Convert embedding to string for asyncpg vector type
52
53
  embeddings.append(str(fact.embedding))
53
54
  # event_date: Use occurred_start if available, otherwise use mentioned_at
@@ -56,7 +57,7 @@ async def insert_facts_batch(
56
57
  occurred_starts.append(fact.occurred_start)
57
58
  occurred_ends.append(fact.occurred_end)
58
59
  mentioned_ats.append(fact.mentioned_at)
59
- contexts.append(fact.context)
60
+ contexts.append(_sanitize_text(fact.context))
60
61
  fact_types.append(fact.fact_type)
61
62
  # confidence_score is only for opinion facts
62
63
  confidence_scores.append(1.0 if fact.fact_type == "opinion" else None)
@@ -157,7 +158,8 @@ async def handle_document_tracking(
157
158
  """
158
159
  import hashlib
159
160
 
160
- # Calculate content hash
161
+ # Sanitize and calculate content hash
162
+ combined_content = _sanitize_text(combined_content) or ""
161
163
  content_hash = hashlib.sha256(combined_content.encode()).hexdigest()
162
164
 
163
165
  # Always delete old document first if it exists (cascades to units and links)
@@ -24,6 +24,10 @@ from hindsight_api.extensions.operation_validator import (
24
24
  # Consolidation operation
25
25
  ConsolidateContext,
26
26
  ConsolidateResult,
27
+ # Mental Model operations
28
+ MentalModelGetContext,
29
+ MentalModelGetResult,
30
+ MentalModelRefreshResult,
27
31
  # Core operations
28
32
  OperationValidationError,
29
33
  OperationValidatorExtension,
@@ -65,6 +69,10 @@ __all__ = [
65
69
  # Operation Validator - Consolidation
66
70
  "ConsolidateContext",
67
71
  "ConsolidateResult",
72
+ # Operation Validator - Mental Model
73
+ "MentalModelGetContext",
74
+ "MentalModelGetResult",
75
+ "MentalModelRefreshResult",
68
76
  # Tenant/Auth
69
77
  "ApiKeyTenantExtension",
70
78
  "AuthenticationError",
@@ -196,6 +196,48 @@ class ConsolidateResult:
196
196
  error: str | None = None
197
197
 
198
198
 
199
+ # =============================================================================
200
+ # Mental Model Contexts
201
+ # =============================================================================
202
+
203
+
204
+ @dataclass
205
+ class MentalModelGetContext:
206
+ """Context for a mental model GET operation validation (pre-operation)."""
207
+
208
+ bank_id: str
209
+ mental_model_id: str
210
+ request_context: "RequestContext"
211
+
212
+
213
+ @dataclass
214
+ class MentalModelGetResult:
215
+ """Result context for post-mental-model-GET hook."""
216
+
217
+ bank_id: str
218
+ mental_model_id: str
219
+ request_context: "RequestContext"
220
+ output_tokens: int # tokens in the returned content
221
+ success: bool = True
222
+ error: str | None = None
223
+
224
+
225
+ @dataclass
226
+ class MentalModelRefreshResult:
227
+ """Result context for post-mental-model-refresh hook."""
228
+
229
+ bank_id: str
230
+ mental_model_id: str
231
+ request_context: "RequestContext"
232
+ query_tokens: int # tokens in source_query
233
+ output_tokens: int # tokens in generated content
234
+ context_tokens: int # tokens in context (if any)
235
+ facts_used: int # facts referenced in based_on
236
+ mental_models_used: int # mental models referenced in based_on
237
+ success: bool = True
238
+ error: str | None = None
239
+
240
+
199
241
  class OperationValidatorExtension(Extension, ABC):
200
242
  """
201
243
  Validates and hooks into retain/recall/reflect/consolidate operations.
@@ -402,3 +444,64 @@ class OperationValidatorExtension(Extension, ABC):
402
444
  - error: Error message (if failed)
403
445
  """
404
446
  pass
447
+
448
+ # =========================================================================
449
+ # Mental Model - Pre-operation validation hook (optional - override to implement)
450
+ # =========================================================================
451
+
452
+ async def validate_mental_model_get(self, ctx: MentalModelGetContext) -> ValidationResult:
453
+ """
454
+ Validate a mental model GET operation before execution.
455
+
456
+ Override to implement custom validation logic for mental model retrieval.
457
+
458
+ Args:
459
+ ctx: Context containing:
460
+ - bank_id: Bank identifier
461
+ - mental_model_id: Mental model identifier
462
+ - request_context: Request context with auth info
463
+
464
+ Returns:
465
+ ValidationResult indicating whether the operation is allowed.
466
+ """
467
+ return ValidationResult.accept()
468
+
469
+ # =========================================================================
470
+ # Mental Model - Post-operation hooks (optional - override to implement)
471
+ # =========================================================================
472
+
473
+ async def on_mental_model_get_complete(self, result: MentalModelGetResult) -> None:
474
+ """
475
+ Called after a mental model GET operation completes (success or failure).
476
+
477
+ Override to implement post-operation logic such as tracking or audit logging.
478
+
479
+ Args:
480
+ result: Result context containing:
481
+ - bank_id: Bank identifier
482
+ - mental_model_id: Mental model identifier
483
+ - output_tokens: Token count of the returned content
484
+ - success: Whether the operation succeeded
485
+ - error: Error message (if failed)
486
+ """
487
+ pass
488
+
489
+ async def on_mental_model_refresh_complete(self, result: MentalModelRefreshResult) -> None:
490
+ """
491
+ Called after a mental model refresh operation completes (success or failure).
492
+
493
+ Override to implement post-operation logic such as tracking or audit logging.
494
+
495
+ Args:
496
+ result: Result context containing:
497
+ - bank_id: Bank identifier
498
+ - mental_model_id: Mental model identifier
499
+ - query_tokens: Tokens in source_query
500
+ - output_tokens: Tokens in generated content
501
+ - context_tokens: Tokens in context
502
+ - facts_used: Number of facts referenced
503
+ - mental_models_used: Number of mental models referenced
504
+ - success: Whether the operation succeeded
505
+ - error: Error message (if failed)
506
+ """
507
+ pass
@@ -200,15 +200,20 @@ def main():
200
200
  if tenant_extension:
201
201
  print("Tenant extension loaded - schemas will be discovered dynamically on each poll")
202
202
  else:
203
- print("No tenant extension configured, using public schema only")
203
+ print(f"No tenant extension configured, using schema: {config.database_schema}")
204
204
 
205
205
  # Create a single poller that handles all schemas dynamically
206
+ # Convert default schema to None for SQL compatibility (no schema prefix)
207
+ from hindsight_api.config import DEFAULT_DATABASE_SCHEMA
208
+
209
+ schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
206
210
  poller = WorkerPoller(
207
211
  pool=memory._pool,
208
212
  worker_id=args.worker_id,
209
213
  executor=memory.execute_task,
210
214
  poll_interval_ms=args.poll_interval,
211
215
  max_retries=args.max_retries,
216
+ schema=schema,
212
217
  tenant_extension=tenant_extension,
213
218
  max_slots=config.worker_max_slots,
214
219
  consolidation_max_slots=config.worker_consolidation_max_slots,
@@ -99,11 +99,13 @@ class WorkerPoller:
99
99
  self._in_flight_by_type: dict[str, int] = {}
100
100
 
101
101
  async def _get_schemas(self) -> list[str | None]:
102
- """Get list of schemas to poll. Returns [None] for public schema."""
102
+ """Get list of schemas to poll. Returns [None] for default schema (no prefix)."""
103
103
  if self._tenant_extension is not None:
104
+ from ..config import DEFAULT_DATABASE_SCHEMA
105
+
104
106
  tenants = await self._tenant_extension.list_tenants()
105
- # Convert "public" to None for SQL compatibility, keep others as-is
106
- return [t.schema if t.schema != "public" else None for t in tenants]
107
+ # Convert default schema to None for SQL compatibility (no prefix), keep others as-is
108
+ return [t.schema if t.schema != DEFAULT_DATABASE_SCHEMA else None for t in tenants]
107
109
  # Single schema mode
108
110
  return [self._schema]
109
111
 
@@ -194,7 +196,9 @@ class WorkerPoller:
194
196
  try:
195
197
  return await self._claim_batch_for_schema_inner(schema, limit, consolidation_limit)
196
198
  except Exception as e:
197
- logger.warning(f"Worker {self._worker_id} failed to claim tasks for schema {schema or 'public'}: {e}")
199
+ # Format schema for logging: custom schemas in quotes, None as-is
200
+ schema_display = f'"{schema}"' if schema else str(schema)
201
+ logger.warning(f"Worker {self._worker_id} failed to claim tasks for schema {schema_display}: {e}")
198
202
  return []
199
203
 
200
204
  async def _claim_batch_for_schema_inner(
@@ -418,7 +422,9 @@ class WorkerPoller:
418
422
  count = int(result.split()[-1]) if result else 0
419
423
  total_count += count
420
424
  except Exception as e:
421
- logger.warning(f"Worker {self._worker_id} failed to recover tasks for schema {schema or 'public'}: {e}")
425
+ # Format schema for logging: custom schemas in quotes, None as-is
426
+ schema_display = f'"{schema}"' if schema else str(schema)
427
+ logger.warning(f"Worker {self._worker_id} failed to recover tasks for schema {schema_display}: {e}")
422
428
 
423
429
  if total_count > 0:
424
430
  logger.info(f"Worker {self._worker_id} recovered {total_count} stale tasks from previous run")
@@ -457,7 +463,8 @@ class WorkerPoller:
457
463
  consolidation_count += 1
458
464
 
459
465
  types_str = ", ".join(f"{k}:{v}" for k, v in task_types.items())
460
- schemas_str = ", ".join(s or "public" for s in schemas_seen)
466
+ # Display None as "default" in logs
467
+ schemas_str = ", ".join(s if s else "default" for s in schemas_seen)
461
468
  logger.info(
462
469
  f"Worker {self._worker_id} claimed {len(tasks)} tasks "
463
470
  f"({consolidation_count} consolidation): {types_str} (schemas: {schemas_str})"
@@ -591,7 +598,8 @@ class WorkerPoller:
591
598
  other_workers.append(f"{wid}:{cnt}")
592
599
  others_str = ", ".join(other_workers) if other_workers else "none"
593
600
 
594
- schemas_str = ", ".join(s or "public" for s in schemas)
601
+ # Display None as "default" in logs
602
+ schemas_str = ", ".join(s if s else "default" for s in schemas)
595
603
  logger.info(
596
604
  f"[WORKER_STATS] worker={self._worker_id} "
597
605
  f"slots={in_flight}/{self._max_slots} (consolidation={consolidation_count}/{self._consolidation_max_slots}) | "
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "hindsight-api"
7
- version = "0.4.6"
7
+ version = "0.4.7"
8
8
  description = "Hindsight: Agent Memory That Works Like Human Memory"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
File without changes
File without changes