PyPI - hindsight-api - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hindsight-api 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

hindsight_api/admin/__init__.py +1 -0
hindsight_api/admin/cli.py +311 -0
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
hindsight_api/api/http.py +1406 -118
hindsight_api/api/mcp.py +11 -196
hindsight_api/config.py +359 -27
hindsight_api/engine/consolidation/__init__.py +5 -0
hindsight_api/engine/consolidation/consolidator.py +859 -0
hindsight_api/engine/consolidation/prompts.py +69 -0
hindsight_api/engine/cross_encoder.py +706 -88
hindsight_api/engine/db_budget.py +284 -0
hindsight_api/engine/db_utils.py +11 -0
hindsight_api/engine/directives/__init__.py +5 -0
hindsight_api/engine/directives/models.py +37 -0
hindsight_api/engine/embeddings.py +553 -29
hindsight_api/engine/entity_resolver.py +8 -5
hindsight_api/engine/interface.py +40 -17
hindsight_api/engine/llm_wrapper.py +744 -68
hindsight_api/engine/memory_engine.py +2505 -1017
hindsight_api/engine/mental_models/__init__.py +14 -0
hindsight_api/engine/mental_models/models.py +53 -0
hindsight_api/engine/query_analyzer.py +4 -3
hindsight_api/engine/reflect/__init__.py +18 -0
hindsight_api/engine/reflect/agent.py +933 -0
hindsight_api/engine/reflect/models.py +109 -0
hindsight_api/engine/reflect/observations.py +186 -0
hindsight_api/engine/reflect/prompts.py +483 -0
hindsight_api/engine/reflect/tools.py +437 -0
hindsight_api/engine/reflect/tools_schema.py +250 -0
hindsight_api/engine/response_models.py +168 -4
hindsight_api/engine/retain/bank_utils.py +79 -201
hindsight_api/engine/retain/fact_extraction.py +424 -195
hindsight_api/engine/retain/fact_storage.py +35 -12
hindsight_api/engine/retain/link_utils.py +29 -24
hindsight_api/engine/retain/orchestrator.py +24 -43
hindsight_api/engine/retain/types.py +11 -2
hindsight_api/engine/search/graph_retrieval.py +43 -14
hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
hindsight_api/engine/search/mpfp_retrieval.py +362 -117
hindsight_api/engine/search/reranking.py +2 -2
hindsight_api/engine/search/retrieval.py +848 -201
hindsight_api/engine/search/tags.py +172 -0
hindsight_api/engine/search/think_utils.py +42 -141
hindsight_api/engine/search/trace.py +12 -1
hindsight_api/engine/search/tracer.py +26 -6
hindsight_api/engine/search/types.py +21 -3
hindsight_api/engine/task_backend.py +113 -106
hindsight_api/engine/utils.py +1 -152
hindsight_api/extensions/__init__.py +10 -1
hindsight_api/extensions/builtin/tenant.py +5 -1
hindsight_api/extensions/context.py +10 -1
hindsight_api/extensions/operation_validator.py +81 -4
hindsight_api/extensions/tenant.py +26 -0
hindsight_api/main.py +69 -6
hindsight_api/mcp_local.py +12 -53
hindsight_api/mcp_tools.py +494 -0
hindsight_api/metrics.py +433 -48
hindsight_api/migrations.py +141 -1
hindsight_api/models.py +3 -3
hindsight_api/pg0.py +53 -0
hindsight_api/server.py +39 -2
hindsight_api/worker/__init__.py +11 -0
hindsight_api/worker/main.py +296 -0
hindsight_api/worker/poller.py +486 -0
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
hindsight_api-0.4.0.dist-info/RECORD +112 -0
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
hindsight_api/engine/retain/observation_regeneration.py +0 -254
hindsight_api/engine/search/observation_utils.py +0 -125
hindsight_api/engine/search/scoring.py +0 -159
hindsight_api-0.2.1.dist-info/RECORD +0 -75
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0

hindsight_api/config.py CHANGED Viewed

@@ -4,9 +4,17 @@ Centralized configuration for Hindsight API.
 All environment variables and their defaults are defined here.
 """
+import json
 import logging
 import os
+import sys
 from dataclasses import dataclass
+from datetime import datetime, timezone
+from dotenv import find_dotenv, load_dotenv
+# Load .env file, searching current and parent directories (overrides existing env vars)
+load_dotenv(find_dotenv(usecwd=True), override=True)
 logger = logging.getLogger(__name__)
@@ -18,31 +26,103 @@ ENV_LLM_MODEL = "HINDSIGHT_API_LLM_MODEL"
 ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
 ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
 ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
+ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
+# Per-operation LLM configuration (optional, falls back to global LLM config)
+ENV_RETAIN_LLM_PROVIDER = "HINDSIGHT_API_RETAIN_LLM_PROVIDER"
+ENV_RETAIN_LLM_API_KEY = "HINDSIGHT_API_RETAIN_LLM_API_KEY"
+ENV_RETAIN_LLM_MODEL = "HINDSIGHT_API_RETAIN_LLM_MODEL"
+ENV_RETAIN_LLM_BASE_URL = "HINDSIGHT_API_RETAIN_LLM_BASE_URL"
+ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
+ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
+ENV_REFLECT_LLM_MODEL = "HINDSIGHT_API_REFLECT_LLM_MODEL"
+ENV_REFLECT_LLM_BASE_URL = "HINDSIGHT_API_REFLECT_LLM_BASE_URL"
+ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
+ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
+ENV_CONSOLIDATION_LLM_MODEL = "HINDSIGHT_API_CONSOLIDATION_LLM_MODEL"
+ENV_CONSOLIDATION_LLM_BASE_URL = "HINDSIGHT_API_CONSOLIDATION_LLM_BASE_URL"
 ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
 ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
 ENV_EMBEDDINGS_TEI_URL = "HINDSIGHT_API_EMBEDDINGS_TEI_URL"
+ENV_EMBEDDINGS_OPENAI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_OPENAI_API_KEY"
+ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
+ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
+ENV_COHERE_API_KEY = "HINDSIGHT_API_COHERE_API_KEY"
+ENV_EMBEDDINGS_COHERE_MODEL = "HINDSIGHT_API_EMBEDDINGS_COHERE_MODEL"
+ENV_EMBEDDINGS_COHERE_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_COHERE_BASE_URL"
+ENV_RERANKER_COHERE_MODEL = "HINDSIGHT_API_RERANKER_COHERE_MODEL"
+ENV_RERANKER_COHERE_BASE_URL = "HINDSIGHT_API_RERANKER_COHERE_BASE_URL"
+# LiteLLM gateway configuration (for embeddings and reranker via LiteLLM proxy)
+ENV_LITELLM_API_BASE = "HINDSIGHT_API_LITELLM_API_BASE"
+ENV_LITELLM_API_KEY = "HINDSIGHT_API_LITELLM_API_KEY"
+ENV_EMBEDDINGS_LITELLM_MODEL = "HINDSIGHT_API_EMBEDDINGS_LITELLM_MODEL"
+ENV_RERANKER_LITELLM_MODEL = "HINDSIGHT_API_RERANKER_LITELLM_MODEL"
 ENV_RERANKER_PROVIDER = "HINDSIGHT_API_RERANKER_PROVIDER"
 ENV_RERANKER_LOCAL_MODEL = "HINDSIGHT_API_RERANKER_LOCAL_MODEL"
+ENV_RERANKER_LOCAL_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT"
 ENV_RERANKER_TEI_URL = "HINDSIGHT_API_RERANKER_TEI_URL"
+ENV_RERANKER_TEI_BATCH_SIZE = "HINDSIGHT_API_RERANKER_TEI_BATCH_SIZE"
+ENV_RERANKER_TEI_MAX_CONCURRENT = "HINDSIGHT_API_RERANKER_TEI_MAX_CONCURRENT"
+ENV_RERANKER_MAX_CANDIDATES = "HINDSIGHT_API_RERANKER_MAX_CANDIDATES"
+ENV_RERANKER_FLASHRANK_MODEL = "HINDSIGHT_API_RERANKER_FLASHRANK_MODEL"
+ENV_RERANKER_FLASHRANK_CACHE_DIR = "HINDSIGHT_API_RERANKER_FLASHRANK_CACHE_DIR"
 ENV_HOST = "HINDSIGHT_API_HOST"
 ENV_PORT = "HINDSIGHT_API_PORT"
 ENV_LOG_LEVEL = "HINDSIGHT_API_LOG_LEVEL"
+ENV_LOG_FORMAT = "HINDSIGHT_API_LOG_FORMAT"
+ENV_WORKERS = "HINDSIGHT_API_WORKERS"
 ENV_MCP_ENABLED = "HINDSIGHT_API_MCP_ENABLED"
 ENV_GRAPH_RETRIEVER = "HINDSIGHT_API_GRAPH_RETRIEVER"
+ENV_MPFP_TOP_K_NEIGHBORS = "HINDSIGHT_API_MPFP_TOP_K_NEIGHBORS"
+ENV_RECALL_MAX_CONCURRENT = "HINDSIGHT_API_RECALL_MAX_CONCURRENT"
+ENV_RECALL_CONNECTION_BUDGET = "HINDSIGHT_API_RECALL_CONNECTION_BUDGET"
 ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
 ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
+ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
+# Retain settings
+ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
+ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
+ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
+ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
+ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
+ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
-# Observation thresholds
-ENV_OBSERVATION_MIN_FACTS = "HINDSIGHT_API_OBSERVATION_MIN_FACTS"
-ENV_OBSERVATION_TOP_ENTITIES = "HINDSIGHT_API_OBSERVATION_TOP_ENTITIES"
+# Observations settings (consolidated knowledge from facts)
+ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
+ENV_CONSOLIDATION_BATCH_SIZE = "HINDSIGHT_API_CONSOLIDATION_BATCH_SIZE"
 # Optimization flags
 ENV_SKIP_LLM_VERIFICATION = "HINDSIGHT_API_SKIP_LLM_VERIFICATION"
 ENV_LAZY_RERANKER = "HINDSIGHT_API_LAZY_RERANKER"
+# Database migrations
+ENV_RUN_MIGRATIONS_ON_STARTUP = "HINDSIGHT_API_RUN_MIGRATIONS_ON_STARTUP"
+# Database connection pool
+ENV_DB_POOL_MIN_SIZE = "HINDSIGHT_API_DB_POOL_MIN_SIZE"
+ENV_DB_POOL_MAX_SIZE = "HINDSIGHT_API_DB_POOL_MAX_SIZE"
+ENV_DB_COMMAND_TIMEOUT = "HINDSIGHT_API_DB_COMMAND_TIMEOUT"
+ENV_DB_ACQUIRE_TIMEOUT = "HINDSIGHT_API_DB_ACQUIRE_TIMEOUT"
+# Worker configuration (distributed task processing)
+ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
+ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
+ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
+ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
+ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
+ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
+# Reflect agent settings
+ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
 # Default values
 DEFAULT_DATABASE_URL = "pg0"
 DEFAULT_LLM_PROVIDER = "openai"
@@ -52,20 +132,71 @@ DEFAULT_LLM_TIMEOUT = 120.0  # seconds
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
+DEFAULT_EMBEDDINGS_OPENAI_MODEL = "text-embedding-3-small"
+DEFAULT_EMBEDDING_DIMENSION = 384
 DEFAULT_RERANKER_PROVIDER = "local"
 DEFAULT_RERANKER_LOCAL_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+DEFAULT_RERANKER_LOCAL_MAX_CONCURRENT = 4  # Limit concurrent CPU-bound reranking to prevent thrashing
+DEFAULT_RERANKER_TEI_BATCH_SIZE = 128
+DEFAULT_RERANKER_TEI_MAX_CONCURRENT = 8
+DEFAULT_RERANKER_MAX_CANDIDATES = 300
+DEFAULT_RERANKER_FLASHRANK_MODEL = "ms-marco-MiniLM-L-12-v2"  # Best balance of speed and quality
+DEFAULT_RERANKER_FLASHRANK_CACHE_DIR = None  # Use default cache directory
+DEFAULT_EMBEDDINGS_COHERE_MODEL = "embed-english-v3.0"
+DEFAULT_RERANKER_COHERE_MODEL = "rerank-english-v3.0"
+# LiteLLM defaults
+DEFAULT_LITELLM_API_BASE = "http://localhost:4000"
+DEFAULT_EMBEDDINGS_LITELLM_MODEL = "text-embedding-3-small"
+DEFAULT_RERANKER_LITELLM_MODEL = "cohere/rerank-english-v3.0"
 DEFAULT_HOST = "0.0.0.0"
 DEFAULT_PORT = 8888
 DEFAULT_LOG_LEVEL = "info"
+DEFAULT_LOG_FORMAT = "text"  # Options: "text", "json"
+DEFAULT_WORKERS = 1
 DEFAULT_MCP_ENABLED = True
-DEFAULT_GRAPH_RETRIEVER = "bfs"  # Options: "bfs", "mpfp"
+DEFAULT_GRAPH_RETRIEVER = "link_expansion"  # Options: "link_expansion", "mpfp", "bfs"
+DEFAULT_MPFP_TOP_K_NEIGHBORS = 20  # Fan-out limit per node in MPFP graph traversal
+DEFAULT_RECALL_MAX_CONCURRENT = 32  # Max concurrent recall operations per worker
+DEFAULT_RECALL_CONNECTION_BUDGET = 4  # Max concurrent DB connections per recall operation
 DEFAULT_MCP_LOCAL_BANK_ID = "mcp"
-# Observation thresholds
-DEFAULT_OBSERVATION_MIN_FACTS = 5  # Min facts required to generate entity observations
-DEFAULT_OBSERVATION_TOP_ENTITIES = 5  # Max entities to process per retain batch
+DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY = 8  # Max concurrent mental model refreshes
+# Retain settings
+DEFAULT_RETAIN_MAX_COMPLETION_TOKENS = 64000  # Max tokens for fact extraction LLM call
+DEFAULT_RETAIN_CHUNK_SIZE = 3000  # Max chars per chunk for fact extraction
+DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True  # Extract causal links between facts
+DEFAULT_RETAIN_EXTRACTION_MODE = "concise"  # Extraction mode: "concise", "verbose", or "custom"
+RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom")  # Allowed extraction modes
+DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None  # Custom extraction guidelines (only used when mode="custom")
+DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False  # Run observation generation async (after retain completes)
+# Observations defaults (consolidated knowledge from facts)
+DEFAULT_ENABLE_OBSERVATIONS = True  # Observations enabled by default
+DEFAULT_CONSOLIDATION_BATCH_SIZE = 50  # Memories to load per batch (internal memory optimization)
+# Database migrations
+DEFAULT_RUN_MIGRATIONS_ON_STARTUP = True
+# Database connection pool
+DEFAULT_DB_POOL_MIN_SIZE = 5
+DEFAULT_DB_POOL_MAX_SIZE = 100
+DEFAULT_DB_COMMAND_TIMEOUT = 60  # seconds
+DEFAULT_DB_ACQUIRE_TIMEOUT = 30  # seconds
+# Worker configuration (distributed task processing)
+DEFAULT_WORKER_ENABLED = True  # API runs worker by default (standalone mode)
+DEFAULT_WORKER_ID = None  # Will use hostname if not specified
+DEFAULT_WORKER_POLL_INTERVAL_MS = 500  # Poll database every 500ms
+DEFAULT_WORKER_MAX_RETRIES = 3  # Max retries before marking task failed
+DEFAULT_WORKER_BATCH_SIZE = 10  # Tasks to claim per poll cycle
+DEFAULT_WORKER_HTTP_PORT = 8889  # HTTP port for worker metrics/health
+# Reflect agent settings
+DEFAULT_REFLECT_MAX_ITERATIONS = 10  # Max tool call iterations before forcing response
 # Default MCP tool descriptions (can be customized via env vars)
 DEFAULT_MCP_RETAIN_DESCRIPTION = """Store important information to long-term memory.
@@ -87,8 +218,50 @@ Use this tool PROACTIVELY to:
 - Remember user's goals and context
 - Personalize responses based on past interactions"""
-# Required embedding dimension for database schema
-EMBEDDING_DIMENSION = 384
+# Default embedding dimension (used by initial migration, adjusted at runtime)
+EMBEDDING_DIMENSION = DEFAULT_EMBEDDING_DIMENSION
+class JsonFormatter(logging.Formatter):
+    """JSON formatter for structured logging.
+    Outputs logs in JSON format with a 'severity' field that cloud logging
+    systems (GCP, AWS CloudWatch, etc.) can parse to correctly categorize log levels.
+    """
+    SEVERITY_MAP = {
+        logging.DEBUG: "DEBUG",
+        logging.INFO: "INFO",
+        logging.WARNING: "WARNING",
+        logging.ERROR: "ERROR",
+        logging.CRITICAL: "CRITICAL",
+    }
+    def format(self, record: logging.LogRecord) -> str:
+        log_entry = {
+            "severity": self.SEVERITY_MAP.get(record.levelno, "DEFAULT"),
+            "message": record.getMessage(),
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "logger": record.name,
+        }
+        # Add exception info if present
+        if record.exc_info:
+            log_entry["exception"] = self.formatException(record.exc_info)
+        return json.dumps(log_entry)
+def _validate_extraction_mode(mode: str) -> str:
+    """Validate and normalize extraction mode."""
+    mode_lower = mode.lower()
+    if mode_lower not in RETAIN_EXTRACTION_MODES:
+        logger.warning(
+            f"Invalid extraction mode '{mode}', must be one of {RETAIN_EXTRACTION_MODES}. "
+            f"Defaulting to '{DEFAULT_RETAIN_EXTRACTION_MODE}'."
+        )
+        return DEFAULT_RETAIN_EXTRACTION_MODE
+    return mode_lower
 @dataclass
@@ -98,7 +271,7 @@ class HindsightConfig:
     # Database
     database_url: str
-    # LLM
+    # LLM (default, used as fallback for per-operation config)
     llm_provider: str
     llm_api_key: str | None
     llm_model: str
@@ -106,33 +279,88 @@ class HindsightConfig:
     llm_max_concurrent: int
     llm_timeout: float
+    # Per-operation LLM configuration (None = use default LLM config)
+    retain_llm_provider: str | None
+    retain_llm_api_key: str | None
+    retain_llm_model: str | None
+    retain_llm_base_url: str | None
+    reflect_llm_provider: str | None
+    reflect_llm_api_key: str | None
+    reflect_llm_model: str | None
+    reflect_llm_base_url: str | None
+    consolidation_llm_provider: str | None
+    consolidation_llm_api_key: str | None
+    consolidation_llm_model: str | None
+    consolidation_llm_base_url: str | None
     # Embeddings
     embeddings_provider: str
     embeddings_local_model: str
     embeddings_tei_url: str | None
+    embeddings_openai_base_url: str | None
+    embeddings_cohere_base_url: str | None
     # Reranker
     reranker_provider: str
     reranker_local_model: str
     reranker_tei_url: str | None
+    reranker_tei_batch_size: int
+    reranker_tei_max_concurrent: int
+    reranker_max_candidates: int
+    reranker_cohere_base_url: str | None
     # Server
     host: str
     port: int
     log_level: str
+    log_format: str
     mcp_enabled: bool
     # Recall
     graph_retriever: str
-    # Observation thresholds
-    observation_min_facts: int
-    observation_top_entities: int
+    mpfp_top_k_neighbors: int
+    recall_max_concurrent: int
+    recall_connection_budget: int
+    mental_model_refresh_concurrency: int
+    # Retain settings
+    retain_max_completion_tokens: int
+    retain_chunk_size: int
+    retain_extract_causal_links: bool
+    retain_extraction_mode: str
+    retain_custom_instructions: str | None
+    retain_observations_async: bool
+    # Observations settings (consolidated knowledge from facts)
+    enable_observations: bool
+    consolidation_batch_size: int
     # Optimization flags
     skip_llm_verification: bool
     lazy_reranker: bool
+    # Database migrations
+    run_migrations_on_startup: bool
+    # Database connection pool
+    db_pool_min_size: int
+    db_pool_max_size: int
+    db_command_timeout: int
+    db_acquire_timeout: int
+    # Worker configuration (distributed task processing)
+    worker_enabled: bool
+    worker_id: str | None
+    worker_poll_interval_ms: int
+    worker_max_retries: int
+    worker_batch_size: int
+    worker_http_port: int
+    # Reflect agent settings
+    reflect_max_iterations: int
     @classmethod
     def from_env(cls) -> "HindsightConfig":
         """Create configuration from environment variables."""
@@ -146,29 +374,92 @@ class HindsightConfig:
             llm_base_url=os.getenv(ENV_LLM_BASE_URL) or None,
             llm_max_concurrent=int(os.getenv(ENV_LLM_MAX_CONCURRENT, str(DEFAULT_LLM_MAX_CONCURRENT))),
             llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
+            # Per-operation LLM config (None = use default)
+            retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
+            retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
+            retain_llm_model=os.getenv(ENV_RETAIN_LLM_MODEL) or None,
+            retain_llm_base_url=os.getenv(ENV_RETAIN_LLM_BASE_URL) or None,
+            reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
+            reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
+            reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL) or None,
+            reflect_llm_base_url=os.getenv(ENV_REFLECT_LLM_BASE_URL) or None,
+            consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
+            consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
+            consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL) or None,
+            consolidation_llm_base_url=os.getenv(ENV_CONSOLIDATION_LLM_BASE_URL) or None,
             # Embeddings
             embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
             embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
             embeddings_tei_url=os.getenv(ENV_EMBEDDINGS_TEI_URL),
+            embeddings_openai_base_url=os.getenv(ENV_EMBEDDINGS_OPENAI_BASE_URL) or None,
+            embeddings_cohere_base_url=os.getenv(ENV_EMBEDDINGS_COHERE_BASE_URL) or None,
             # Reranker
             reranker_provider=os.getenv(ENV_RERANKER_PROVIDER, DEFAULT_RERANKER_PROVIDER),
             reranker_local_model=os.getenv(ENV_RERANKER_LOCAL_MODEL, DEFAULT_RERANKER_LOCAL_MODEL),
             reranker_tei_url=os.getenv(ENV_RERANKER_TEI_URL),
+            reranker_tei_batch_size=int(os.getenv(ENV_RERANKER_TEI_BATCH_SIZE, str(DEFAULT_RERANKER_TEI_BATCH_SIZE))),
+            reranker_tei_max_concurrent=int(
+                os.getenv(ENV_RERANKER_TEI_MAX_CONCURRENT, str(DEFAULT_RERANKER_TEI_MAX_CONCURRENT))
+            ),
+            reranker_max_candidates=int(os.getenv(ENV_RERANKER_MAX_CANDIDATES, str(DEFAULT_RERANKER_MAX_CANDIDATES))),
+            reranker_cohere_base_url=os.getenv(ENV_RERANKER_COHERE_BASE_URL) or None,
             # Server
             host=os.getenv(ENV_HOST, DEFAULT_HOST),
             port=int(os.getenv(ENV_PORT, DEFAULT_PORT)),
             log_level=os.getenv(ENV_LOG_LEVEL, DEFAULT_LOG_LEVEL),
+            log_format=os.getenv(ENV_LOG_FORMAT, DEFAULT_LOG_FORMAT).lower(),
             mcp_enabled=os.getenv(ENV_MCP_ENABLED, str(DEFAULT_MCP_ENABLED)).lower() == "true",
             # Recall
             graph_retriever=os.getenv(ENV_GRAPH_RETRIEVER, DEFAULT_GRAPH_RETRIEVER),
+            mpfp_top_k_neighbors=int(os.getenv(ENV_MPFP_TOP_K_NEIGHBORS, str(DEFAULT_MPFP_TOP_K_NEIGHBORS))),
+            recall_max_concurrent=int(os.getenv(ENV_RECALL_MAX_CONCURRENT, str(DEFAULT_RECALL_MAX_CONCURRENT))),
+            recall_connection_budget=int(
+                os.getenv(ENV_RECALL_CONNECTION_BUDGET, str(DEFAULT_RECALL_CONNECTION_BUDGET))
+            ),
+            mental_model_refresh_concurrency=int(
+                os.getenv(ENV_MENTAL_MODEL_REFRESH_CONCURRENCY, str(DEFAULT_MENTAL_MODEL_REFRESH_CONCURRENCY))
+            ),
             # Optimization flags
             skip_llm_verification=os.getenv(ENV_SKIP_LLM_VERIFICATION, "false").lower() == "true",
             lazy_reranker=os.getenv(ENV_LAZY_RERANKER, "false").lower() == "true",
-            # Observation thresholds
-            observation_min_facts=int(os.getenv(ENV_OBSERVATION_MIN_FACTS, str(DEFAULT_OBSERVATION_MIN_FACTS))),
-            observation_top_entities=int(
-                os.getenv(ENV_OBSERVATION_TOP_ENTITIES, str(DEFAULT_OBSERVATION_TOP_ENTITIES))
+            # Retain settings
+            retain_max_completion_tokens=int(
+                os.getenv(ENV_RETAIN_MAX_COMPLETION_TOKENS, str(DEFAULT_RETAIN_MAX_COMPLETION_TOKENS))
+            ),
+            retain_chunk_size=int(os.getenv(ENV_RETAIN_CHUNK_SIZE, str(DEFAULT_RETAIN_CHUNK_SIZE))),
+            retain_extract_causal_links=os.getenv(
+                ENV_RETAIN_EXTRACT_CAUSAL_LINKS, str(DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS)
+            ).lower()
+            == "true",
+            retain_extraction_mode=_validate_extraction_mode(
+                os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
             ),
+            retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
+            retain_observations_async=os.getenv(
+                ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
+            ).lower()
+            == "true",
+            # Observations settings (consolidated knowledge from facts)
+            enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
+            consolidation_batch_size=int(
+                os.getenv(ENV_CONSOLIDATION_BATCH_SIZE, str(DEFAULT_CONSOLIDATION_BATCH_SIZE))
+            ),
+            # Database migrations
+            run_migrations_on_startup=os.getenv(ENV_RUN_MIGRATIONS_ON_STARTUP, "true").lower() == "true",
+            # Database connection pool
+            db_pool_min_size=int(os.getenv(ENV_DB_POOL_MIN_SIZE, str(DEFAULT_DB_POOL_MIN_SIZE))),
+            db_pool_max_size=int(os.getenv(ENV_DB_POOL_MAX_SIZE, str(DEFAULT_DB_POOL_MAX_SIZE))),
+            db_command_timeout=int(os.getenv(ENV_DB_COMMAND_TIMEOUT, str(DEFAULT_DB_COMMAND_TIMEOUT))),
+            db_acquire_timeout=int(os.getenv(ENV_DB_ACQUIRE_TIMEOUT, str(DEFAULT_DB_ACQUIRE_TIMEOUT))),
+            # Worker configuration
+            worker_enabled=os.getenv(ENV_WORKER_ENABLED, str(DEFAULT_WORKER_ENABLED)).lower() == "true",
+            worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
+            worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
+            worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
+            worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
+            worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
+            # Reflect agent settings
+            reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
         )
     def get_llm_base_url(self) -> str:
@@ -199,22 +490,63 @@ class HindsightConfig:
         return log_level_map.get(self.log_level.lower(), logging.INFO)
     def configure_logging(self) -> None:
-        """Configure Python logging based on the log level."""
-        logging.basicConfig(
-            level=self.get_python_log_level(),
-            format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-            force=True,  # Override any existing configuration
-        )
+        """Configure Python logging based on the log level and format.
+        When log_format is "json", outputs structured JSON logs with a severity
+        field that GCP Cloud Logging can parse for proper log level categorization.
+        """
+        root_logger = logging.getLogger()
+        root_logger.setLevel(self.get_python_log_level())
+        # Remove existing handlers
+        for handler in root_logger.handlers[:]:
+            root_logger.removeHandler(handler)
+        # Create handler writing to stdout (GCP treats stderr as ERROR)
+        handler = logging.StreamHandler(sys.stdout)
+        handler.setLevel(self.get_python_log_level())
+        if self.log_format == "json":
+            handler.setFormatter(JsonFormatter())
+        else:
+            handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(name)s - %(message)s"))
+        root_logger.addHandler(handler)
     def log_config(self) -> None:
         """Log the current configuration (without sensitive values)."""
         logger.info(f"Database: {self.database_url}")
         logger.info(f"LLM: provider={self.llm_provider}, model={self.llm_model}")
+        if self.retain_llm_provider or self.retain_llm_model:
+            retain_provider = self.retain_llm_provider or self.llm_provider
+            retain_model = self.retain_llm_model or self.llm_model
+            logger.info(f"LLM (retain): provider={retain_provider}, model={retain_model}")
+        if self.reflect_llm_provider or self.reflect_llm_model:
+            reflect_provider = self.reflect_llm_provider or self.llm_provider
+            reflect_model = self.reflect_llm_model or self.llm_model
+            logger.info(f"LLM (reflect): provider={reflect_provider}, model={reflect_model}")
+        if self.consolidation_llm_provider or self.consolidation_llm_model:
+            consolidation_provider = self.consolidation_llm_provider or self.llm_provider
+            consolidation_model = self.consolidation_llm_model or self.llm_model
+            logger.info(f"LLM (consolidation): provider={consolidation_provider}, model={consolidation_model}")
         logger.info(f"Embeddings: provider={self.embeddings_provider}")
         logger.info(f"Reranker: provider={self.reranker_provider}")
         logger.info(f"Graph retriever: {self.graph_retriever}")
+# Cached config instance
+_config_cache: HindsightConfig | None = None
 def get_config() -> HindsightConfig:
-    """Get the current configuration from environment variables."""
-    return HindsightConfig.from_env()
+    """Get the cached configuration, loading from environment on first call."""
+    global _config_cache
+    if _config_cache is None:
+        _config_cache = HindsightConfig.from_env()
+    return _config_cache
+def clear_config_cache() -> None:
+    """Clear the config cache. Useful for testing or reloading config."""
+    global _config_cache
+    _config_cache = None

hindsight_api/engine/consolidation/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Consolidation engine for automatic learning creation from memories."""
+from .consolidator import run_consolidation_job
+__all__ = ["run_consolidation_job"]

hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

hindsight-api 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl