PyPI - hindsight-api - Versions diffs - 0.4.5__tar.gz → 0.4.7__tar.gz - Mend

hindsight-api 0.4.5tar.gz → 0.4.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hindsight-api
-Version: 0.4.5
+Version: 0.4.7
 Summary: Hindsight: Agent Memory That Works Like Human Memory
 Requires-Python: >=3.11
 Requires-Dist: aiohttp>=3.13.3

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/__init__.py RENAMED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.4.5"
+__version__ = "0.4.7"

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/api/http.py RENAMED Viewed

@@ -1398,13 +1398,18 @@ def create_app(
         # Start worker poller if enabled (standalone mode)
         if config.worker_enabled and memory._pool is not None:
+            from ..config import DEFAULT_DATABASE_SCHEMA
             worker_id = config.worker_id or socket.gethostname()
+            # Convert default schema to None for SQL compatibility (no schema prefix)
+            schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
             poller = WorkerPoller(
                 pool=memory._pool,
                 worker_id=worker_id,
                 executor=memory.execute_task,
                 poll_interval_ms=config.worker_poll_interval_ms,
                 max_retries=config.worker_max_retries,
+                schema=schema,
                 tenant_extension=getattr(memory, "_tenant_extension", None),
                 max_slots=config.worker_max_slots,
                 consolidation_max_slots=config.worker_consolidation_max_slots,
@@ -2285,6 +2290,23 @@ def _register_routes(app: FastAPI):
     ):
         """Get a mental model by ID."""
         try:
+            # Pre-operation validation hook
+            validator = app.state.memory._operation_validator
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelGetContext
+                ctx = MentalModelGetContext(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                )
+                validation = await validator.validate_mental_model_get(ctx)
+                if not validation.allowed:
+                    raise OperationValidationError(
+                        validation.reason or "Operation not allowed",
+                        status_code=validation.status_code,
+                    )
             mental_model = await app.state.memory.get_mental_model(
                 bank_id=bank_id,
                 mental_model_id=mental_model_id,
@@ -2292,9 +2314,31 @@ def _register_routes(app: FastAPI):
             )
             if mental_model is None:
                 raise HTTPException(status_code=404, detail=f"Mental model '{mental_model_id}' not found")
+            # Post-operation hook
+            if validator:
+                from hindsight_api.extensions.operation_validator import MentalModelGetResult
+                content = mental_model.get("content", "")
+                output_tokens = len(content) // 4 if content else 0
+                result_ctx = MentalModelGetResult(
+                    bank_id=bank_id,
+                    mental_model_id=mental_model_id,
+                    request_context=request_context,
+                    output_tokens=output_tokens,
+                    success=True,
+                )
+                try:
+                    await validator.on_mental_model_get_complete(result_ctx)
+                except Exception as hook_err:
+                    logger.warning(f"Post-mental-model-get hook error (non-fatal): {hook_err}")
             return MentalModelResponse(**mental_model)
         except (AuthenticationError, HTTPException):
             raise
+        except OperationValidationError as e:
+            raise HTTPException(status_code=e.status_code, detail=e.reason)
         except Exception as e:
             import traceback

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/daemon.py RENAMED Viewed

@@ -15,7 +15,7 @@ from pathlib import Path
 logger = logging.getLogger(__name__)
 # Default daemon configuration
-DEFAULT_DAEMON_PORT = 8889
+DEFAULT_DAEMON_PORT = 8888
 DEFAULT_IDLE_TIMEOUT = 0  # 0 = no auto-exit (hindsight-embed passes its own timeout)
 LOCKFILE_PATH = Path.home() / ".hindsight" / "daemon.lock"
 DAEMON_LOG_PATH = Path.home() / ".hindsight" / "daemon.log"

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/engine/memory_engine.py RENAMED Viewed

@@ -597,7 +597,13 @@ class MemoryEngine(MemoryEngineInterface):
         from hindsight_api.models import RequestContext
-        internal_context = RequestContext(internal=True)
+        # Restore tenant_id/api_key_id from task payload so extensions can
+        # attribute the mental_model_refresh operation to the correct org.
+        internal_context = RequestContext(
+            internal=True,
+            tenant_id=task_dict.get("_tenant_id"),
+            api_key_id=task_dict.get("_api_key_id"),
+        )
         # Get the current mental model to get source_query
         mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=internal_context)
@@ -641,6 +647,42 @@ class MemoryEngine(MemoryEngineInterface):
             request_context=internal_context,
         )
+        # Call post-operation hook if validator is configured
+        if self._operation_validator:
+            from hindsight_api.extensions.operation_validator import MentalModelRefreshResult
+            # Count facts and mental models from based_on
+            facts_used = 0
+            mental_models_used = 0
+            if reflect_result.based_on:
+                for fact_type, facts in reflect_result.based_on.items():
+                    if facts:
+                        if fact_type == "mental_models":
+                            mental_models_used += len(facts)
+                        else:
+                            facts_used += len(facts)
+            # Estimate tokens
+            query_tokens = len(source_query) // 4 if source_query else 0
+            output_tokens = len(generated_content) // 4 if generated_content else 0
+            context_tokens = 0  # refresh doesn't use additional context
+            result_ctx = MentalModelRefreshResult(
+                bank_id=bank_id,
+                mental_model_id=mental_model_id,
+                request_context=internal_context,
+                query_tokens=query_tokens,
+                output_tokens=output_tokens,
+                context_tokens=context_tokens,
+                facts_used=facts_used,
+                mental_models_used=mental_models_used,
+                success=True,
+            )
+            try:
+                await self._operation_validator.on_mental_model_refresh_complete(result_ctx)
+            except Exception as hook_err:
+                logger.warning(f"Post-mental-model-refresh hook error (non-fatal): {hook_err}")
         logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Completed for bank_id={bank_id}, mental_model_id={mental_model_id}")
     async def execute_task(self, task_dict: dict[str, Any]):
@@ -5482,13 +5524,21 @@ class MemoryEngine(MemoryEngineInterface):
         if not mental_model:
             raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
+        # Pass tenant_id and api_key_id through task payload so the worker
+        # can provide request context to extension hooks.
+        task_payload: dict[str, Any] = {
+            "mental_model_id": mental_model_id,
+        }
+        if request_context.tenant_id:
+            task_payload["_tenant_id"] = request_context.tenant_id
+        if request_context.api_key_id:
+            task_payload["_api_key_id"] = request_context.api_key_id
         return await self._submit_async_operation(
             bank_id=bank_id,
             operation_type="refresh_mental_model",
             task_type="refresh_mental_model",
-            task_payload={
-                "mental_model_id": mental_model_id,
-            },
+            task_payload=task_payload,
             result_metadata={"mental_model_id": mental_model_id, "name": mental_model["name"]},
             dedupe_by_bank=False,
         )

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/engine/retain/fact_extraction.py RENAMED Viewed

@@ -57,21 +57,25 @@ def _infer_temporal_date(fact_text: str, event_date: datetime) -> str | None:
     return None
-def _sanitize_text(text: str) -> str:
+def _sanitize_text(text: str | None) -> str | None:
     """
-    Sanitize text by removing invalid Unicode surrogate characters.
+    Sanitize text by removing characters that break downstream systems.
-    Surrogate characters (U+D800 to U+DFFF) are used in UTF-16 encoding
-    but cannot be encoded in UTF-8. They can appear in Python strings
-    from improperly decoded data (e.g., from JavaScript or broken files).
+    Removes:
+    - Null bytes (\\x00): Invalid in PostgreSQL UTF-8 encoding
+    - Unicode surrogates (U+D800-U+DFFF): Invalid in UTF-8, break LLM APIs
-    This function removes unpaired surrogates to prevent UnicodeEncodeError
-    when the text is sent to the LLM API.
+    Surrogate characters are used in UTF-16 encoding but cannot be encoded
+    in UTF-8. They can appear in Python strings from improperly decoded data
+    (e.g., from JavaScript or broken files). Null bytes commonly appear in
+    OCR output, PDF extraction, or copy-paste from binary sources.
     """
+    if text is None:
+        return None
     if not text:
         return text
-    # Remove surrogate characters (U+D800 to U+DFFF) using regex
-    # These are invalid in UTF-8 and cause encoding errors
+    # Remove null bytes and surrogate characters
+    text = text.replace("\x00", "")
     return re.sub(r"[\ud800-\udfff]", "", text)

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/engine/retain/fact_storage.py RENAMED Viewed

@@ -8,6 +8,7 @@ import json
 import logging
 from ..memory_engine import fq_table
+from .fact_extraction import _sanitize_text
 from .types import ProcessedFact
 logger = logging.getLogger(__name__)
@@ -47,7 +48,7 @@ async def insert_facts_batch(
     tags_list = []
     for fact in facts:
-        fact_texts.append(fact.fact_text)
+        fact_texts.append(_sanitize_text(fact.fact_text))
         # Convert embedding to string for asyncpg vector type
         embeddings.append(str(fact.embedding))
         # event_date: Use occurred_start if available, otherwise use mentioned_at
@@ -56,7 +57,7 @@ async def insert_facts_batch(
         occurred_starts.append(fact.occurred_start)
         occurred_ends.append(fact.occurred_end)
         mentioned_ats.append(fact.mentioned_at)
-        contexts.append(fact.context)
+        contexts.append(_sanitize_text(fact.context))
         fact_types.append(fact.fact_type)
         # confidence_score is only for opinion facts
         confidence_scores.append(1.0 if fact.fact_type == "opinion" else None)
@@ -157,7 +158,8 @@ async def handle_document_tracking(
     """
     import hashlib
-    # Calculate content hash
+    # Sanitize and calculate content hash
+    combined_content = _sanitize_text(combined_content) or ""
     content_hash = hashlib.sha256(combined_content.encode()).hexdigest()
     # Always delete old document first if it exists (cascades to units and links)

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/extensions/__init__.py RENAMED Viewed

@@ -24,6 +24,10 @@ from hindsight_api.extensions.operation_validator import (
     # Consolidation operation
     ConsolidateContext,
     ConsolidateResult,
+    # Mental Model operations
+    MentalModelGetContext,
+    MentalModelGetResult,
+    MentalModelRefreshResult,
     # Core operations
     OperationValidationError,
     OperationValidatorExtension,
@@ -65,6 +69,10 @@ __all__ = [
     # Operation Validator - Consolidation
     "ConsolidateContext",
     "ConsolidateResult",
+    # Operation Validator - Mental Model
+    "MentalModelGetContext",
+    "MentalModelGetResult",
+    "MentalModelRefreshResult",
     # Tenant/Auth
     "ApiKeyTenantExtension",
     "AuthenticationError",

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/extensions/operation_validator.py RENAMED Viewed

@@ -196,6 +196,48 @@ class ConsolidateResult:
     error: str | None = None
+# =============================================================================
+# Mental Model Contexts
+# =============================================================================
+@dataclass
+class MentalModelGetContext:
+    """Context for a mental model GET operation validation (pre-operation)."""
+    bank_id: str
+    mental_model_id: str
+    request_context: "RequestContext"
+@dataclass
+class MentalModelGetResult:
+    """Result context for post-mental-model-GET hook."""
+    bank_id: str
+    mental_model_id: str
+    request_context: "RequestContext"
+    output_tokens: int  # tokens in the returned content
+    success: bool = True
+    error: str | None = None
+@dataclass
+class MentalModelRefreshResult:
+    """Result context for post-mental-model-refresh hook."""
+    bank_id: str
+    mental_model_id: str
+    request_context: "RequestContext"
+    query_tokens: int  # tokens in source_query
+    output_tokens: int  # tokens in generated content
+    context_tokens: int  # tokens in context (if any)
+    facts_used: int  # facts referenced in based_on
+    mental_models_used: int  # mental models referenced in based_on
+    success: bool = True
+    error: str | None = None
 class OperationValidatorExtension(Extension, ABC):
     """
     Validates and hooks into retain/recall/reflect/consolidate operations.
@@ -402,3 +444,64 @@ class OperationValidatorExtension(Extension, ABC):
                 - error: Error message (if failed)
         """
         pass
+    # =========================================================================
+    # Mental Model - Pre-operation validation hook (optional - override to implement)
+    # =========================================================================
+    async def validate_mental_model_get(self, ctx: MentalModelGetContext) -> ValidationResult:
+        """
+        Validate a mental model GET operation before execution.
+        Override to implement custom validation logic for mental model retrieval.
+        Args:
+            ctx: Context containing:
+                - bank_id: Bank identifier
+                - mental_model_id: Mental model identifier
+                - request_context: Request context with auth info
+        Returns:
+            ValidationResult indicating whether the operation is allowed.
+        """
+        return ValidationResult.accept()
+    # =========================================================================
+    # Mental Model - Post-operation hooks (optional - override to implement)
+    # =========================================================================
+    async def on_mental_model_get_complete(self, result: MentalModelGetResult) -> None:
+        """
+        Called after a mental model GET operation completes (success or failure).
+        Override to implement post-operation logic such as tracking or audit logging.
+        Args:
+            result: Result context containing:
+                - bank_id: Bank identifier
+                - mental_model_id: Mental model identifier
+                - output_tokens: Token count of the returned content
+                - success: Whether the operation succeeded
+                - error: Error message (if failed)
+        """
+        pass
+    async def on_mental_model_refresh_complete(self, result: MentalModelRefreshResult) -> None:
+        """
+        Called after a mental model refresh operation completes (success or failure).
+        Override to implement post-operation logic such as tracking or audit logging.
+        Args:
+            result: Result context containing:
+                - bank_id: Bank identifier
+                - mental_model_id: Mental model identifier
+                - query_tokens: Tokens in source_query
+                - output_tokens: Tokens in generated content
+                - context_tokens: Tokens in context
+                - facts_used: Number of facts referenced
+                - mental_models_used: Number of mental models referenced
+                - success: Whether the operation succeeded
+                - error: Error message (if failed)
+        """
+        pass

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/worker/main.py RENAMED Viewed

@@ -200,15 +200,20 @@ def main():
         if tenant_extension:
             print("Tenant extension loaded - schemas will be discovered dynamically on each poll")
         else:
-            print("No tenant extension configured, using public schema only")
+            print(f"No tenant extension configured, using schema: {config.database_schema}")
         # Create a single poller that handles all schemas dynamically
+        # Convert default schema to None for SQL compatibility (no schema prefix)
+        from hindsight_api.config import DEFAULT_DATABASE_SCHEMA
+        schema = None if config.database_schema == DEFAULT_DATABASE_SCHEMA else config.database_schema
         poller = WorkerPoller(
             pool=memory._pool,
             worker_id=args.worker_id,
             executor=memory.execute_task,
             poll_interval_ms=args.poll_interval,
             max_retries=args.max_retries,
+            schema=schema,
             tenant_extension=tenant_extension,
             max_slots=config.worker_max_slots,
             consolidation_max_slots=config.worker_consolidation_max_slots,

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/hindsight_api/worker/poller.py RENAMED Viewed

@@ -99,11 +99,13 @@ class WorkerPoller:
         self._in_flight_by_type: dict[str, int] = {}
     async def _get_schemas(self) -> list[str | None]:
-        """Get list of schemas to poll. Returns [None] for public schema."""
+        """Get list of schemas to poll. Returns [None] for default schema (no prefix)."""
         if self._tenant_extension is not None:
+            from ..config import DEFAULT_DATABASE_SCHEMA
             tenants = await self._tenant_extension.list_tenants()
-            # Convert "public" to None for SQL compatibility, keep others as-is
-            return [t.schema if t.schema != "public" else None for t in tenants]
+            # Convert default schema to None for SQL compatibility (no prefix), keep others as-is
+            return [t.schema if t.schema != DEFAULT_DATABASE_SCHEMA else None for t in tenants]
         # Single schema mode
         return [self._schema]
@@ -194,7 +196,9 @@ class WorkerPoller:
         try:
             return await self._claim_batch_for_schema_inner(schema, limit, consolidation_limit)
         except Exception as e:
-            logger.warning(f"Worker {self._worker_id} failed to claim tasks for schema {schema or 'public'}: {e}")
+            # Format schema for logging: custom schemas in quotes, None as-is
+            schema_display = f'"{schema}"' if schema else str(schema)
+            logger.warning(f"Worker {self._worker_id} failed to claim tasks for schema {schema_display}: {e}")
             return []
     async def _claim_batch_for_schema_inner(
@@ -418,7 +422,9 @@ class WorkerPoller:
                 count = int(result.split()[-1]) if result else 0
                 total_count += count
             except Exception as e:
-                logger.warning(f"Worker {self._worker_id} failed to recover tasks for schema {schema or 'public'}: {e}")
+                # Format schema for logging: custom schemas in quotes, None as-is
+                schema_display = f'"{schema}"' if schema else str(schema)
+                logger.warning(f"Worker {self._worker_id} failed to recover tasks for schema {schema_display}: {e}")
         if total_count > 0:
             logger.info(f"Worker {self._worker_id} recovered {total_count} stale tasks from previous run")
@@ -457,7 +463,8 @@ class WorkerPoller:
                             consolidation_count += 1
                     types_str = ", ".join(f"{k}:{v}" for k, v in task_types.items())
-                    schemas_str = ", ".join(s or "public" for s in schemas_seen)
+                    # Display None as "default" in logs
+                    schemas_str = ", ".join(s if s else "default" for s in schemas_seen)
                     logger.info(
                         f"Worker {self._worker_id} claimed {len(tasks)} tasks "
                         f"({consolidation_count} consolidation): {types_str} (schemas: {schemas_str})"
@@ -591,7 +598,8 @@ class WorkerPoller:
                     other_workers.append(f"{wid}:{cnt}")
             others_str = ", ".join(other_workers) if other_workers else "none"
-            schemas_str = ", ".join(s or "public" for s in schemas)
+            # Display None as "default" in logs
+            schemas_str = ", ".join(s if s else "default" for s in schemas)
             logger.info(
                 f"[WORKER_STATS] worker={self._worker_id} "
                 f"slots={in_flight}/{self._max_slots} (consolidation={consolidation_count}/{self._consolidation_max_slots}) | "

{hindsight_api-0.4.5 → hindsight_api-0.4.7}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "hindsight-api"
-version = "0.4.5"
+version = "0.4.7"
 description = "Hindsight: Agent Memory That Works Like Human Memory"
 readme = "README.md"
 requires-python = ">=3.11"