PyPI - hindsight-api - Versions diffs - 0.4.2__tar.gz → 0.4.3__tar.gz - Mend

hindsight-api 0.4.2tar.gz → 0.4.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hindsight-api
-Version: 0.4.2
+Version: 0.4.3
 Summary: Hindsight: Agent Memory That Works Like Human Memory
 Requires-Python: >=3.11
 Requires-Dist: aiohttp>=3.13.3
@@ -14,6 +14,7 @@ Requires-Dist: fastapi[standard]>=0.120.3
 Requires-Dist: fastmcp>=2.14.0
 Requires-Dist: filelock>=3.20.1
 Requires-Dist: flashrank>=0.2.0
+Requires-Dist: google-auth>=2.0.0
 Requires-Dist: google-genai>=1.0.0
 Requires-Dist: greenlet>=3.2.4
 Requires-Dist: httpx>=0.27.0

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/__init__.py RENAMED Viewed

@@ -46,4 +46,4 @@ __all__ = [
     "RemoteTEICrossEncoder",
     "LLMConfig",
 ]
-__version__ = "0.4.2"
+__version__ = "0.4.3"

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/http.py RENAMED Viewed

@@ -92,8 +92,7 @@ class RecallRequest(BaseModel):
     query: str
     types: list[str] | None = Field(
         default=None,
-        description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified. "
-        "Note: 'opinion' is accepted but ignored (opinions are excluded from recall).",
+        description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.",
     )
     budget: Budget = Budget.MID
     max_tokens: int = 4096
@@ -504,13 +503,6 @@ class ReflectRequest(BaseModel):
     )
-class OpinionItem(BaseModel):
-    """Model for an opinion with confidence score."""
-    text: str
-    confidence: float
 class ReflectFact(BaseModel):
     """A fact used in think response."""
@@ -529,7 +521,7 @@ class ReflectFact(BaseModel):
     id: str | None = None
     text: str
-    type: str | None = None  # fact type: world, experience, opinion
+    type: str | None = None  # fact type: world, experience, observation
     context: str | None = None
     occurred_start: str | None = None
     occurred_end: str | None = None
@@ -1412,9 +1404,10 @@ def create_app(
                 worker_id=worker_id,
                 executor=memory.execute_task,
                 poll_interval_ms=config.worker_poll_interval_ms,
-                batch_size=config.worker_batch_size,
                 max_retries=config.worker_max_retries,
                 tenant_extension=getattr(memory, "_tenant_extension", None),
+                max_slots=config.worker_max_slots,
+                consolidation_max_slots=config.worker_consolidation_max_slots,
             )
             poller_task = asyncio.create_task(poller.run())
             logging.info(f"Worker poller started (worker_id={worker_id})")
@@ -1707,9 +1700,7 @@ def _register_routes(app: FastAPI):
         description="Recall memory using semantic similarity and spreading activation.\n\n"
         "The type parameter is optional and must be one of:\n"
         "- `world`: General knowledge about people, places, events, and things that happen\n"
-        "- `experience`: Memories about experience, conversations, actions taken, and tasks performed\n"
-        "- `opinion`: The bank's formed beliefs, perspectives, and viewpoints\n\n"
-        "Set `include_entities=true` to get entity observations alongside recall results.",
+        "- `experience`: Memories about experience, conversations, actions taken, and tasks performed",
         operation_id="recall_memories",
         tags=["Memory"],
     )
@@ -1723,10 +1714,8 @@ def _register_routes(app: FastAPI):
         metrics = get_metrics_collector()
         try:
-            # Default to world and experience if not specified (exclude observation and opinion)
-            # Filter out 'opinion' even if requested - opinions are excluded from recall
+            # Default to world and experience if not specified (exclude observation)
             fact_types = request.types if request.types else list(VALID_RECALL_FACT_TYPES)
-            fact_types = [ft for ft in fact_types if ft != "opinion"]
             # Parse query_timestamp if provided
             question_date = None
@@ -1858,8 +1847,7 @@ def _register_routes(app: FastAPI):
         "2. Retrieves world facts relevant to the query\n"
         "3. Retrieves existing opinions (bank's perspectives)\n"
         "4. Uses LLM to formulate a contextual answer\n"
-        "5. Extracts and stores any new opinions formed\n"
-        "6. Returns plain text answer, the facts used, and new opinions",
+        "5. Returns plain text answer and the facts used",
         operation_id="reflect",
         tags=["Memory"],
     )

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/mcp.py RENAMED Viewed

@@ -29,15 +29,26 @@ logger = logging.getLogger(__name__)
 # Default bank_id from environment variable
 DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
+# MCP authentication token (optional - if set, Bearer token auth is required)
+MCP_AUTH_TOKEN = os.environ.get("HINDSIGHT_API_MCP_AUTH_TOKEN")
 # Context variable to hold the current bank_id
 _current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
+# Context variable to hold the current API key (for tenant auth propagation)
+_current_api_key: ContextVar[str | None] = ContextVar("current_api_key", default=None)
 def get_current_bank_id() -> str | None:
     """Get the current bank_id from context."""
     return _current_bank_id.get()
+def get_current_api_key() -> str | None:
+    """Get the current API key from context."""
+    return _current_api_key.get()
 def create_mcp_server(memory: MemoryEngine) -> FastMCP:
     """
     Create and configure the Hindsight MCP server.
@@ -54,6 +65,7 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
     # Configure and register tools using shared module
     config = MCPToolsConfig(
         bank_id_resolver=get_current_bank_id,
+        api_key_resolver=get_current_api_key,  # Propagate API key for tenant auth
         include_bank_id_param=True,  # HTTP MCP supports multi-bank via parameter
         tools=None,  # All tools
         retain_fire_and_forget=False,  # HTTP MCP supports sync/async modes
@@ -65,7 +77,11 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
 class MCPMiddleware:
-    """ASGI middleware that extracts bank_id from header or path and sets context.
+    """ASGI middleware that handles authentication and extracts bank_id from header or path.
+    Authentication:
+        If HINDSIGHT_API_MCP_AUTH_TOKEN is set, all requests must include a valid
+        Authorization header with Bearer token or direct token matching the configured value.
     Bank ID can be provided via:
     1. X-Bank-Id header (recommended for Claude Code)
@@ -74,7 +90,7 @@ class MCPMiddleware:
     For Claude Code, configure with:
         claude mcp add --transport http hindsight http://localhost:8888/mcp \\
-            --header "X-Bank-Id: my-bank"
+            --header "X-Bank-Id: my-bank" --header "Authorization: Bearer <token>"
     """
     def __init__(self, app, memory: MemoryEngine):
@@ -98,6 +114,22 @@ class MCPMiddleware:
             await self.mcp_app(scope, receive, send)
             return
+        # Extract auth token from header (for tenant auth propagation)
+        auth_header = self._get_header(scope, "Authorization")
+        auth_token: str | None = None
+        if auth_header:
+            # Support both "Bearer <token>" and direct token
+            auth_token = auth_header[7:].strip() if auth_header.startswith("Bearer ") else auth_header.strip()
+        # Authenticate if MCP_AUTH_TOKEN is configured
+        if MCP_AUTH_TOKEN:
+            if not auth_token:
+                await self._send_error(send, 401, "Authorization header required")
+                return
+            if auth_token != MCP_AUTH_TOKEN:
+                await self._send_error(send, 401, "Invalid authentication token")
+                return
         path = scope.get("path", "")
         # Strip any mount prefix (e.g., /mcp) that FastAPI might not have stripped
@@ -132,8 +164,10 @@ class MCPMiddleware:
             bank_id = DEFAULT_BANK_ID
             logger.debug(f"Using default bank_id: {bank_id}")
-        # Set bank_id context
-        token = _current_bank_id.set(bank_id)
+        # Set bank_id and api_key context
+        bank_id_token = _current_bank_id.set(bank_id)
+        # Store the auth token for tenant extension to validate
+        api_key_token = _current_api_key.set(auth_token) if auth_token else None
         try:
             new_scope = scope.copy()
             new_scope["path"] = new_path
@@ -152,7 +186,9 @@ class MCPMiddleware:
             await self.mcp_app(new_scope, receive, send_wrapper)
         finally:
-            _current_bank_id.reset(token)
+            _current_bank_id.reset(bank_id_token)
+            if api_key_token is not None:
+                _current_api_key.reset(api_key_token)
     async def _send_error(self, send, status: int, message: str):
         """Send an error response."""
@@ -176,6 +212,10 @@ def create_mcp_app(memory: MemoryEngine):
     """
     Create an ASGI app that handles MCP requests.
+    Authentication:
+        Set HINDSIGHT_API_MCP_AUTH_TOKEN to require Bearer token authentication.
+        If not set, MCP endpoint is open (for local development).
     Bank ID can be provided via:
     1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
     2. URL path: /mcp/{bank_id}/

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/config.py RENAMED Viewed

@@ -108,13 +108,17 @@ ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
 ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
 ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
+# Vertex AI configuration
+ENV_LLM_VERTEXAI_PROJECT_ID = "HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID"
+ENV_LLM_VERTEXAI_REGION = "HINDSIGHT_API_LLM_VERTEXAI_REGION"
+ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = "HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY"
 # Retain settings
 ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
 ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
 ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
 ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
 ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
-ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
 # Observations settings (consolidated knowledge from facts)
 ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
@@ -139,8 +143,9 @@ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
 ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
 ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
 ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
-ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
 ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
+ENV_WORKER_MAX_SLOTS = "HINDSIGHT_API_WORKER_MAX_SLOTS"
+ENV_WORKER_CONSOLIDATION_MAX_SLOTS = "HINDSIGHT_API_WORKER_CONSOLIDATION_MAX_SLOTS"
 # Reflect agent settings
 ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
@@ -156,6 +161,11 @@ DEFAULT_LLM_INITIAL_BACKOFF = 1.0  # Initial backoff in seconds for retry expone
 DEFAULT_LLM_MAX_BACKOFF = 60.0  # Max backoff cap in seconds for retry exponential backoff
 DEFAULT_LLM_TIMEOUT = 120.0  # seconds
+# Vertex AI defaults
+DEFAULT_LLM_VERTEXAI_PROJECT_ID = None  # Required for Vertex AI
+DEFAULT_LLM_VERTEXAI_REGION = "us-central1"
+DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = None  # Optional, uses ADC if not set
 DEFAULT_EMBEDDINGS_PROVIDER = "local"
 DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
 DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False  # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
@@ -200,7 +210,6 @@ DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True  # Extract causal links between facts
 DEFAULT_RETAIN_EXTRACTION_MODE = "concise"  # Extraction mode: "concise", "verbose", or "custom"
 RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom")  # Allowed extraction modes
 DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None  # Custom extraction guidelines (only used when mode="custom")
-DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False  # Run observation generation async (after retain completes)
 # Observations defaults (consolidated knowledge from facts)
 DEFAULT_ENABLE_OBSERVATIONS = True  # Observations enabled by default
@@ -221,8 +230,9 @@ DEFAULT_WORKER_ENABLED = True  # API runs worker by default (standalone mode)
 DEFAULT_WORKER_ID = None  # Will use hostname if not specified
 DEFAULT_WORKER_POLL_INTERVAL_MS = 500  # Poll database every 500ms
 DEFAULT_WORKER_MAX_RETRIES = 3  # Max retries before marking task failed
-DEFAULT_WORKER_BATCH_SIZE = 10  # Tasks to claim per poll cycle
 DEFAULT_WORKER_HTTP_PORT = 8889  # HTTP port for worker metrics/health
+DEFAULT_WORKER_MAX_SLOTS = 10  # Total concurrent tasks per worker
+DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS = 2  # Max concurrent consolidation tasks per worker
 # Reflect agent settings
 DEFAULT_REFLECT_MAX_ITERATIONS = 10  # Max tool call iterations before forcing response
@@ -312,6 +322,11 @@ class HindsightConfig:
     llm_max_backoff: float
     llm_timeout: float
+    # Vertex AI configuration
+    llm_vertexai_project_id: str | None
+    llm_vertexai_region: str
+    llm_vertexai_service_account_key: str | None
     # Per-operation LLM configuration (None = use default LLM config)
     retain_llm_provider: str | None
     retain_llm_api_key: str | None
@@ -382,7 +397,6 @@ class HindsightConfig:
     retain_extract_causal_links: bool
     retain_extraction_mode: str
     retain_custom_instructions: str | None
-    retain_observations_async: bool
     # Observations settings (consolidated knowledge from facts)
     enable_observations: bool
@@ -407,8 +421,9 @@ class HindsightConfig:
     worker_id: str | None
     worker_poll_interval_ms: int
     worker_max_retries: int
-    worker_batch_size: int
     worker_http_port: int
+    worker_max_slots: int
+    worker_consolidation_max_slots: int
     # Reflect agent settings
     reflect_max_iterations: int
@@ -430,6 +445,11 @@ class HindsightConfig:
             llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
             llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
             llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
+            # Vertex AI
+            llm_vertexai_project_id=os.getenv(ENV_LLM_VERTEXAI_PROJECT_ID) or DEFAULT_LLM_VERTEXAI_PROJECT_ID,
+            llm_vertexai_region=os.getenv(ENV_LLM_VERTEXAI_REGION, DEFAULT_LLM_VERTEXAI_REGION),
+            llm_vertexai_service_account_key=os.getenv(ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY)
+            or DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY,
             # Per-operation LLM config (None = use default)
             retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
             retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
@@ -545,10 +565,6 @@ class HindsightConfig:
                 os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
             ),
             retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
-            retain_observations_async=os.getenv(
-                ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
-            ).lower()
-            == "true",
             # Observations settings (consolidated knowledge from facts)
             enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
             consolidation_batch_size=int(
@@ -569,8 +585,11 @@ class HindsightConfig:
             worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
             worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
             worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
-            worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
             worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
+            worker_max_slots=int(os.getenv(ENV_WORKER_MAX_SLOTS, str(DEFAULT_WORKER_MAX_SLOTS))),
+            worker_consolidation_max_slots=int(
+                os.getenv(ENV_WORKER_CONSOLIDATION_MAX_SLOTS, str(DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS))
+            ),
             # Reflect agent settings
             reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
         )

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/consolidator.py RENAMED Viewed

@@ -865,7 +865,14 @@ Focus on DURABLE knowledge that serves this mission, not ephemeral state.
         )
         # Parse JSON response - should be an array
         if isinstance(result, str):
-            result = json.loads(result)
+            # Strip markdown code fences (some models wrap JSON in ```json ... ```)
+            clean = result.strip()
+            if clean.startswith("```"):
+                clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
+                if clean.endswith("```"):
+                    clean = clean[:-3]
+                clean = clean.strip()
+            result = json.loads(clean)
         # Ensure result is a list
         if isinstance(result, list):
             return result

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/cross_encoder.py RENAMED Viewed

@@ -614,7 +614,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
             return
         try:
-            from flashrank import Ranker  # type: ignore[import-untyped]
+            from flashrank import Ranker
         except ImportError:
             raise ImportError("flashrank is required for FlashRankCrossEncoder. Install it with: pip install flashrank")
@@ -641,7 +641,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
     def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
         """Synchronous predict - processes each query group."""
-        from flashrank import RerankRequest  # type: ignore[import-untyped]
+        from flashrank import RerankRequest
         if not pairs:
             return []

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/embeddings.py RENAMED Viewed

@@ -545,7 +545,7 @@ class CohereEmbeddings(Embeddings):
                 model=self.model,
                 input_type=self.input_type,
             )
-            if response.embeddings:
+            if response.embeddings and isinstance(response.embeddings, list):
                 self._dimension = len(response.embeddings[0])
         logger.info(f"Embeddings: Cohere provider initialized (model: {self.model}, dim: {self._dimension})")

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/interface.py RENAMED Viewed

@@ -442,49 +442,6 @@ class MemoryEngineInterface(ABC):
         """
         ...
-    @abstractmethod
-    async def get_entity_observations(
-        self,
-        bank_id: str,
-        entity_id: str,
-        *,
-        limit: int = 10,
-        request_context: "RequestContext",
-    ) -> list[Any]:
-        """
-        Get observations for an entity.
-        Args:
-            bank_id: The memory bank ID.
-            entity_id: The entity ID.
-            limit: Maximum observations.
-            request_context: Request context for authentication.
-        Returns:
-            List of EntityObservation objects.
-        """
-        ...
-    @abstractmethod
-    async def regenerate_entity_observations(
-        self,
-        bank_id: str,
-        entity_id: str,
-        entity_name: str,
-        *,
-        request_context: "RequestContext",
-    ) -> None:
-        """
-        Regenerate observations for an entity.
-        Args:
-            bank_id: The memory bank ID.
-            entity_id: The entity ID.
-            entity_name: The entity's canonical name.
-            request_context: Request context for authentication.
-        """
-        ...
     # =========================================================================
     # Statistics & Operations
     # =========================================================================

{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/llm_wrapper.py RENAMED Viewed

@@ -16,6 +16,15 @@ from google.genai import errors as genai_errors
 from google.genai import types as genai_types
 from openai import APIConnectionError, APIStatusError, AsyncOpenAI, LengthFinishReasonError
+# Vertex AI imports (conditional)
+try:
+    import google.auth
+    from google.oauth2 import service_account
+    VERTEXAI_AVAILABLE = True
+except ImportError:
+    VERTEXAI_AVAILABLE = False
 from ..config import (
     DEFAULT_LLM_MAX_CONCURRENT,
     DEFAULT_LLM_TIMEOUT,
@@ -88,7 +97,7 @@ class LLMProvider:
         self.groq_service_tier = groq_service_tier or os.getenv(ENV_LLM_GROQ_SERVICE_TIER, "auto")
         # Validate provider
-        valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "mock"]
+        valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "vertexai", "mock"]
         if self.provider not in valid_providers:
             raise ValueError(f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}")
@@ -105,8 +114,51 @@ class LLMProvider:
             elif self.provider == "lmstudio":
                 self.base_url = "http://localhost:1234/v1"
-        # Validate API key (not needed for ollama, lmstudio, or mock)
-        if self.provider not in ("ollama", "lmstudio", "mock") and not self.api_key:
+        # Vertex AI config — stored for client creation below
+        self._vertexai_project_id: str | None = None
+        self._vertexai_region: str | None = None
+        self._vertexai_credentials: Any = None
+        if self.provider == "vertexai":
+            from ..config import get_config
+            config = get_config()
+            self._vertexai_project_id = config.llm_vertexai_project_id
+            if not self._vertexai_project_id:
+                raise ValueError(
+                    "HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID is required for Vertex AI provider. "
+                    "Set it to your GCP project ID."
+                )
+            self._vertexai_region = config.llm_vertexai_region or "us-central1"
+            service_account_key = config.llm_vertexai_service_account_key
+            # Load explicit service account credentials if provided
+            if service_account_key:
+                if not VERTEXAI_AVAILABLE:
+                    raise ValueError(
+                        "Vertex AI service account auth requires 'google-auth' package. "
+                        "Install with: pip install google-auth"
+                    )
+                self._vertexai_credentials = service_account.Credentials.from_service_account_file(
+                    service_account_key,
+                    scopes=["https://www.googleapis.com/auth/cloud-platform"],
+                )
+                logger.info(f"Vertex AI: Using service account key: {service_account_key}")
+            # Strip google/ prefix from model name — native SDK uses bare names
+            # e.g. "google/gemini-2.0-flash-lite-001" -> "gemini-2.0-flash-lite-001"
+            if self.model.startswith("google/"):
+                self.model = self.model[len("google/") :]
+            logger.info(
+                f"Vertex AI: project={self._vertexai_project_id}, region={self._vertexai_region}, "
+                f"model={self.model}, auth={'service_account' if service_account_key else 'ADC'}"
+            )
+        # Validate API key (not needed for ollama, lmstudio, vertexai, or mock)
+        if self.provider not in ("ollama", "lmstudio", "vertexai", "mock") and not self.api_key:
             raise ValueError(f"API key not found for {self.provider}")
         # Get timeout config (set HINDSIGHT_API_LLM_TIMEOUT for local LLMs that need longer timeouts)
@@ -132,6 +184,17 @@ class LLMProvider:
             if self.timeout:
                 anthropic_kwargs["timeout"] = self.timeout
             self._anthropic_client = AsyncAnthropic(**anthropic_kwargs)
+        elif self.provider == "vertexai":
+            # Native genai SDK with Vertex AI — handles ADC automatically,
+            # or uses explicit service account credentials if provided
+            client_kwargs = {
+                "vertexai": True,
+                "project": self._vertexai_project_id,
+                "location": self._vertexai_region,
+            }
+            if self._vertexai_credentials is not None:
+                client_kwargs["credentials"] = self._vertexai_credentials
+            self._gemini_client = genai.Client(**client_kwargs)
         elif self.provider in ("ollama", "lmstudio"):
             # Use dummy key if not provided for local
             api_key = self.api_key or "local"
@@ -223,8 +286,8 @@ class LLMProvider:
                     return_usage,
                 )
-            # Handle Gemini provider separately
-            if self.provider == "gemini":
+            # Handle Gemini and Vertex AI providers (both use native genai SDK)
+            if self.provider in ("gemini", "vertexai"):
                 return await self._call_gemini(
                     messages,
                     response_format,
@@ -342,11 +405,13 @@ class LLMProvider:
                         schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
                         if call_params["messages"] and call_params["messages"][0].get("role") == "system":
-                            call_params["messages"][0]["content"] += schema_msg
+                            first_msg = call_params["messages"][0]
+                            if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
+                                first_msg["content"] += schema_msg
                         elif call_params["messages"]:
-                            call_params["messages"][0]["content"] = (
-                                schema_msg + "\n\n" + call_params["messages"][0]["content"]
-                            )
+                            first_msg = call_params["messages"][0]
+                            if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
+                                first_msg["content"] = schema_msg + "\n\n" + first_msg["content"]
                     if self.provider not in ("lmstudio", "ollama"):
                         # LM Studio and Ollama don't support json_object response format reliably
                         # We rely on the schema in the system message instead
@@ -586,8 +651,8 @@ class LLMProvider:
                     messages, tools, max_completion_tokens, max_retries, initial_backoff, max_backoff, start_time, scope
                 )
-            # Handle Gemini (convert to Gemini tool format)
-            if self.provider == "gemini":
+            # Handle Gemini and Vertex AI (convert to Gemini tool format)
+            if self.provider in ("gemini", "vertexai"):
                 return await self._call_with_tools_gemini(
                     messages, tools, max_retries, initial_backoff, max_backoff, start_time, scope
                 )
@@ -917,18 +982,20 @@ class LLMProvider:
                 tool_calls: list[LLMToolCall] = []
                 if response.candidates and response.candidates[0].content:
-                    for part in response.candidates[0].content.parts:
-                        if hasattr(part, "text") and part.text:
-                            content = part.text
-                        if hasattr(part, "function_call") and part.function_call:
-                            fc = part.function_call
-                            tool_calls.append(
-                                LLMToolCall(
-                                    id=f"gemini_{len(tool_calls)}",
-                                    name=fc.name,
-                                    arguments=dict(fc.args) if fc.args else {},
+                    parts = response.candidates[0].content.parts
+                    if parts:
+                        for part in parts:
+                            if hasattr(part, "text") and part.text:
+                                content = part.text
+                            if hasattr(part, "function_call") and part.function_call:
+                                fc = part.function_call
+                                tool_calls.append(
+                                    LLMToolCall(
+                                        id=f"gemini_{len(tool_calls)}",
+                                        name=fc.name,
+                                        arguments=dict(fc.args) if fc.args else {},
+                                    )
                                 )
-                            )
                 finish_reason = "tool_calls" if tool_calls else "stop"
@@ -1504,6 +1571,10 @@ class LLMProvider:
         """Clear the recorded mock calls."""
         self._mock_calls = []
+    async def cleanup(self) -> None:
+        """Clean up resources."""
+        pass
     @classmethod
     def for_memory(cls) -> "LLMProvider":
         """Create provider for memory operations from environment variables."""

hindsight-api 0.4.2__tar.gz → 0.4.3__tar.gz

hindsight-api 0.4.2tar.gz → 0.4.3tar.gz