hindsight-api 0.4.2__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/PKG-INFO +2 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/__init__.py +1 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/http.py +7 -19
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/mcp.py +45 -5
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/config.py +30 -11
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/consolidator.py +8 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/cross_encoder.py +2 -2
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/embeddings.py +1 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/interface.py +0 -43
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/llm_wrapper.py +93 -22
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/memory_engine.py +37 -138
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/response_models.py +1 -21
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/fact_extraction.py +3 -23
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/orchestrator.py +1 -4
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/utils.py +0 -3
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/main.py +6 -3
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/mcp_tools.py +31 -12
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/metrics.py +3 -3
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/pg0.py +1 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/main.py +11 -11
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/poller.py +226 -97
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/pyproject.toml +7 -1
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/.gitignore +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/README.md +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/admin/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/admin/cli.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/README +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/env.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/script.py.mako +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/banner.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/daemon.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/prompts.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/db_budget.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/db_utils.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/directives/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/directives/models.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/entity_resolver.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/mental_models/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/mental_models/models.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/query_analyzer.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/agent.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/models.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/observations.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/prompts.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/tools.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/tools_schema.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/bank_utils.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/chunk_storage.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/deduplication.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/embedding_processing.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/embedding_utils.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/entity_processing.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/fact_storage.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/link_creation.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/link_utils.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/types.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/fusion.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/graph_retrieval.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/reranking.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/retrieval.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/tags.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/temporal_extraction.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/think_utils.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/trace.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/tracer.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/types.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/task_backend.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/base.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/builtin/__init__.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/builtin/tenant.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/context.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/http.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/loader.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/operation_validator.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/tenant.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/mcp_local.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/migrations.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/models.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/server.py +0 -0
- {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hindsight-api
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Hindsight: Agent Memory That Works Like Human Memory
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: aiohttp>=3.13.3
|
|
@@ -14,6 +14,7 @@ Requires-Dist: fastapi[standard]>=0.120.3
|
|
|
14
14
|
Requires-Dist: fastmcp>=2.14.0
|
|
15
15
|
Requires-Dist: filelock>=3.20.1
|
|
16
16
|
Requires-Dist: flashrank>=0.2.0
|
|
17
|
+
Requires-Dist: google-auth>=2.0.0
|
|
17
18
|
Requires-Dist: google-genai>=1.0.0
|
|
18
19
|
Requires-Dist: greenlet>=3.2.4
|
|
19
20
|
Requires-Dist: httpx>=0.27.0
|
|
@@ -92,8 +92,7 @@ class RecallRequest(BaseModel):
|
|
|
92
92
|
query: str
|
|
93
93
|
types: list[str] | None = Field(
|
|
94
94
|
default=None,
|
|
95
|
-
description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.
|
|
96
|
-
"Note: 'opinion' is accepted but ignored (opinions are excluded from recall).",
|
|
95
|
+
description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.",
|
|
97
96
|
)
|
|
98
97
|
budget: Budget = Budget.MID
|
|
99
98
|
max_tokens: int = 4096
|
|
@@ -504,13 +503,6 @@ class ReflectRequest(BaseModel):
|
|
|
504
503
|
)
|
|
505
504
|
|
|
506
505
|
|
|
507
|
-
class OpinionItem(BaseModel):
|
|
508
|
-
"""Model for an opinion with confidence score."""
|
|
509
|
-
|
|
510
|
-
text: str
|
|
511
|
-
confidence: float
|
|
512
|
-
|
|
513
|
-
|
|
514
506
|
class ReflectFact(BaseModel):
|
|
515
507
|
"""A fact used in think response."""
|
|
516
508
|
|
|
@@ -529,7 +521,7 @@ class ReflectFact(BaseModel):
|
|
|
529
521
|
|
|
530
522
|
id: str | None = None
|
|
531
523
|
text: str
|
|
532
|
-
type: str | None = None # fact type: world, experience,
|
|
524
|
+
type: str | None = None # fact type: world, experience, observation
|
|
533
525
|
context: str | None = None
|
|
534
526
|
occurred_start: str | None = None
|
|
535
527
|
occurred_end: str | None = None
|
|
@@ -1412,9 +1404,10 @@ def create_app(
|
|
|
1412
1404
|
worker_id=worker_id,
|
|
1413
1405
|
executor=memory.execute_task,
|
|
1414
1406
|
poll_interval_ms=config.worker_poll_interval_ms,
|
|
1415
|
-
batch_size=config.worker_batch_size,
|
|
1416
1407
|
max_retries=config.worker_max_retries,
|
|
1417
1408
|
tenant_extension=getattr(memory, "_tenant_extension", None),
|
|
1409
|
+
max_slots=config.worker_max_slots,
|
|
1410
|
+
consolidation_max_slots=config.worker_consolidation_max_slots,
|
|
1418
1411
|
)
|
|
1419
1412
|
poller_task = asyncio.create_task(poller.run())
|
|
1420
1413
|
logging.info(f"Worker poller started (worker_id={worker_id})")
|
|
@@ -1707,9 +1700,7 @@ def _register_routes(app: FastAPI):
|
|
|
1707
1700
|
description="Recall memory using semantic similarity and spreading activation.\n\n"
|
|
1708
1701
|
"The type parameter is optional and must be one of:\n"
|
|
1709
1702
|
"- `world`: General knowledge about people, places, events, and things that happen\n"
|
|
1710
|
-
"- `experience`: Memories about experience, conversations, actions taken, and tasks performed
|
|
1711
|
-
"- `opinion`: The bank's formed beliefs, perspectives, and viewpoints\n\n"
|
|
1712
|
-
"Set `include_entities=true` to get entity observations alongside recall results.",
|
|
1703
|
+
"- `experience`: Memories about experience, conversations, actions taken, and tasks performed",
|
|
1713
1704
|
operation_id="recall_memories",
|
|
1714
1705
|
tags=["Memory"],
|
|
1715
1706
|
)
|
|
@@ -1723,10 +1714,8 @@ def _register_routes(app: FastAPI):
|
|
|
1723
1714
|
metrics = get_metrics_collector()
|
|
1724
1715
|
|
|
1725
1716
|
try:
|
|
1726
|
-
# Default to world and experience if not specified (exclude observation
|
|
1727
|
-
# Filter out 'opinion' even if requested - opinions are excluded from recall
|
|
1717
|
+
# Default to world and experience if not specified (exclude observation)
|
|
1728
1718
|
fact_types = request.types if request.types else list(VALID_RECALL_FACT_TYPES)
|
|
1729
|
-
fact_types = [ft for ft in fact_types if ft != "opinion"]
|
|
1730
1719
|
|
|
1731
1720
|
# Parse query_timestamp if provided
|
|
1732
1721
|
question_date = None
|
|
@@ -1858,8 +1847,7 @@ def _register_routes(app: FastAPI):
|
|
|
1858
1847
|
"2. Retrieves world facts relevant to the query\n"
|
|
1859
1848
|
"3. Retrieves existing opinions (bank's perspectives)\n"
|
|
1860
1849
|
"4. Uses LLM to formulate a contextual answer\n"
|
|
1861
|
-
"5.
|
|
1862
|
-
"6. Returns plain text answer, the facts used, and new opinions",
|
|
1850
|
+
"5. Returns plain text answer and the facts used",
|
|
1863
1851
|
operation_id="reflect",
|
|
1864
1852
|
tags=["Memory"],
|
|
1865
1853
|
)
|
|
@@ -29,15 +29,26 @@ logger = logging.getLogger(__name__)
|
|
|
29
29
|
# Default bank_id from environment variable
|
|
30
30
|
DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
|
|
31
31
|
|
|
32
|
+
# MCP authentication token (optional - if set, Bearer token auth is required)
|
|
33
|
+
MCP_AUTH_TOKEN = os.environ.get("HINDSIGHT_API_MCP_AUTH_TOKEN")
|
|
34
|
+
|
|
32
35
|
# Context variable to hold the current bank_id
|
|
33
36
|
_current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
|
|
34
37
|
|
|
38
|
+
# Context variable to hold the current API key (for tenant auth propagation)
|
|
39
|
+
_current_api_key: ContextVar[str | None] = ContextVar("current_api_key", default=None)
|
|
40
|
+
|
|
35
41
|
|
|
36
42
|
def get_current_bank_id() -> str | None:
|
|
37
43
|
"""Get the current bank_id from context."""
|
|
38
44
|
return _current_bank_id.get()
|
|
39
45
|
|
|
40
46
|
|
|
47
|
+
def get_current_api_key() -> str | None:
|
|
48
|
+
"""Get the current API key from context."""
|
|
49
|
+
return _current_api_key.get()
|
|
50
|
+
|
|
51
|
+
|
|
41
52
|
def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
42
53
|
"""
|
|
43
54
|
Create and configure the Hindsight MCP server.
|
|
@@ -54,6 +65,7 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
54
65
|
# Configure and register tools using shared module
|
|
55
66
|
config = MCPToolsConfig(
|
|
56
67
|
bank_id_resolver=get_current_bank_id,
|
|
68
|
+
api_key_resolver=get_current_api_key, # Propagate API key for tenant auth
|
|
57
69
|
include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
|
|
58
70
|
tools=None, # All tools
|
|
59
71
|
retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
|
|
@@ -65,7 +77,11 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
|
|
|
65
77
|
|
|
66
78
|
|
|
67
79
|
class MCPMiddleware:
|
|
68
|
-
"""ASGI middleware that extracts bank_id from header or path
|
|
80
|
+
"""ASGI middleware that handles authentication and extracts bank_id from header or path.
|
|
81
|
+
|
|
82
|
+
Authentication:
|
|
83
|
+
If HINDSIGHT_API_MCP_AUTH_TOKEN is set, all requests must include a valid
|
|
84
|
+
Authorization header with Bearer token or direct token matching the configured value.
|
|
69
85
|
|
|
70
86
|
Bank ID can be provided via:
|
|
71
87
|
1. X-Bank-Id header (recommended for Claude Code)
|
|
@@ -74,7 +90,7 @@ class MCPMiddleware:
|
|
|
74
90
|
|
|
75
91
|
For Claude Code, configure with:
|
|
76
92
|
claude mcp add --transport http hindsight http://localhost:8888/mcp \\
|
|
77
|
-
--header "X-Bank-Id: my-bank"
|
|
93
|
+
--header "X-Bank-Id: my-bank" --header "Authorization: Bearer <token>"
|
|
78
94
|
"""
|
|
79
95
|
|
|
80
96
|
def __init__(self, app, memory: MemoryEngine):
|
|
@@ -98,6 +114,22 @@ class MCPMiddleware:
|
|
|
98
114
|
await self.mcp_app(scope, receive, send)
|
|
99
115
|
return
|
|
100
116
|
|
|
117
|
+
# Extract auth token from header (for tenant auth propagation)
|
|
118
|
+
auth_header = self._get_header(scope, "Authorization")
|
|
119
|
+
auth_token: str | None = None
|
|
120
|
+
if auth_header:
|
|
121
|
+
# Support both "Bearer <token>" and direct token
|
|
122
|
+
auth_token = auth_header[7:].strip() if auth_header.startswith("Bearer ") else auth_header.strip()
|
|
123
|
+
|
|
124
|
+
# Authenticate if MCP_AUTH_TOKEN is configured
|
|
125
|
+
if MCP_AUTH_TOKEN:
|
|
126
|
+
if not auth_token:
|
|
127
|
+
await self._send_error(send, 401, "Authorization header required")
|
|
128
|
+
return
|
|
129
|
+
if auth_token != MCP_AUTH_TOKEN:
|
|
130
|
+
await self._send_error(send, 401, "Invalid authentication token")
|
|
131
|
+
return
|
|
132
|
+
|
|
101
133
|
path = scope.get("path", "")
|
|
102
134
|
|
|
103
135
|
# Strip any mount prefix (e.g., /mcp) that FastAPI might not have stripped
|
|
@@ -132,8 +164,10 @@ class MCPMiddleware:
|
|
|
132
164
|
bank_id = DEFAULT_BANK_ID
|
|
133
165
|
logger.debug(f"Using default bank_id: {bank_id}")
|
|
134
166
|
|
|
135
|
-
# Set bank_id context
|
|
136
|
-
|
|
167
|
+
# Set bank_id and api_key context
|
|
168
|
+
bank_id_token = _current_bank_id.set(bank_id)
|
|
169
|
+
# Store the auth token for tenant extension to validate
|
|
170
|
+
api_key_token = _current_api_key.set(auth_token) if auth_token else None
|
|
137
171
|
try:
|
|
138
172
|
new_scope = scope.copy()
|
|
139
173
|
new_scope["path"] = new_path
|
|
@@ -152,7 +186,9 @@ class MCPMiddleware:
|
|
|
152
186
|
|
|
153
187
|
await self.mcp_app(new_scope, receive, send_wrapper)
|
|
154
188
|
finally:
|
|
155
|
-
_current_bank_id.reset(
|
|
189
|
+
_current_bank_id.reset(bank_id_token)
|
|
190
|
+
if api_key_token is not None:
|
|
191
|
+
_current_api_key.reset(api_key_token)
|
|
156
192
|
|
|
157
193
|
async def _send_error(self, send, status: int, message: str):
|
|
158
194
|
"""Send an error response."""
|
|
@@ -176,6 +212,10 @@ def create_mcp_app(memory: MemoryEngine):
|
|
|
176
212
|
"""
|
|
177
213
|
Create an ASGI app that handles MCP requests.
|
|
178
214
|
|
|
215
|
+
Authentication:
|
|
216
|
+
Set HINDSIGHT_API_MCP_AUTH_TOKEN to require Bearer token authentication.
|
|
217
|
+
If not set, MCP endpoint is open (for local development).
|
|
218
|
+
|
|
179
219
|
Bank ID can be provided via:
|
|
180
220
|
1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
|
|
181
221
|
2. URL path: /mcp/{bank_id}/
|
|
@@ -108,13 +108,17 @@ ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
|
|
|
108
108
|
ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
|
|
109
109
|
ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
|
|
110
110
|
|
|
111
|
+
# Vertex AI configuration
|
|
112
|
+
ENV_LLM_VERTEXAI_PROJECT_ID = "HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID"
|
|
113
|
+
ENV_LLM_VERTEXAI_REGION = "HINDSIGHT_API_LLM_VERTEXAI_REGION"
|
|
114
|
+
ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = "HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY"
|
|
115
|
+
|
|
111
116
|
# Retain settings
|
|
112
117
|
ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
|
|
113
118
|
ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
|
|
114
119
|
ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
|
|
115
120
|
ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
|
|
116
121
|
ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
|
|
117
|
-
ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
|
|
118
122
|
|
|
119
123
|
# Observations settings (consolidated knowledge from facts)
|
|
120
124
|
ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
|
|
@@ -139,8 +143,9 @@ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
|
|
|
139
143
|
ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
|
|
140
144
|
ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
|
|
141
145
|
ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
|
|
142
|
-
ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
|
|
143
146
|
ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
|
|
147
|
+
ENV_WORKER_MAX_SLOTS = "HINDSIGHT_API_WORKER_MAX_SLOTS"
|
|
148
|
+
ENV_WORKER_CONSOLIDATION_MAX_SLOTS = "HINDSIGHT_API_WORKER_CONSOLIDATION_MAX_SLOTS"
|
|
144
149
|
|
|
145
150
|
# Reflect agent settings
|
|
146
151
|
ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
|
|
@@ -156,6 +161,11 @@ DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry expone
|
|
|
156
161
|
DEFAULT_LLM_MAX_BACKOFF = 60.0 # Max backoff cap in seconds for retry exponential backoff
|
|
157
162
|
DEFAULT_LLM_TIMEOUT = 120.0 # seconds
|
|
158
163
|
|
|
164
|
+
# Vertex AI defaults
|
|
165
|
+
DEFAULT_LLM_VERTEXAI_PROJECT_ID = None # Required for Vertex AI
|
|
166
|
+
DEFAULT_LLM_VERTEXAI_REGION = "us-central1"
|
|
167
|
+
DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = None # Optional, uses ADC if not set
|
|
168
|
+
|
|
159
169
|
DEFAULT_EMBEDDINGS_PROVIDER = "local"
|
|
160
170
|
DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
|
|
161
171
|
DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
|
|
@@ -200,7 +210,6 @@ DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
|
|
|
200
210
|
DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
|
|
201
211
|
RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
|
|
202
212
|
DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
|
|
203
|
-
DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
|
|
204
213
|
|
|
205
214
|
# Observations defaults (consolidated knowledge from facts)
|
|
206
215
|
DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
|
|
@@ -221,8 +230,9 @@ DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
|
|
|
221
230
|
DEFAULT_WORKER_ID = None # Will use hostname if not specified
|
|
222
231
|
DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
|
|
223
232
|
DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
|
|
224
|
-
DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
|
|
225
233
|
DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
|
|
234
|
+
DEFAULT_WORKER_MAX_SLOTS = 10 # Total concurrent tasks per worker
|
|
235
|
+
DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS = 2 # Max concurrent consolidation tasks per worker
|
|
226
236
|
|
|
227
237
|
# Reflect agent settings
|
|
228
238
|
DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
|
|
@@ -312,6 +322,11 @@ class HindsightConfig:
|
|
|
312
322
|
llm_max_backoff: float
|
|
313
323
|
llm_timeout: float
|
|
314
324
|
|
|
325
|
+
# Vertex AI configuration
|
|
326
|
+
llm_vertexai_project_id: str | None
|
|
327
|
+
llm_vertexai_region: str
|
|
328
|
+
llm_vertexai_service_account_key: str | None
|
|
329
|
+
|
|
315
330
|
# Per-operation LLM configuration (None = use default LLM config)
|
|
316
331
|
retain_llm_provider: str | None
|
|
317
332
|
retain_llm_api_key: str | None
|
|
@@ -382,7 +397,6 @@ class HindsightConfig:
|
|
|
382
397
|
retain_extract_causal_links: bool
|
|
383
398
|
retain_extraction_mode: str
|
|
384
399
|
retain_custom_instructions: str | None
|
|
385
|
-
retain_observations_async: bool
|
|
386
400
|
|
|
387
401
|
# Observations settings (consolidated knowledge from facts)
|
|
388
402
|
enable_observations: bool
|
|
@@ -407,8 +421,9 @@ class HindsightConfig:
|
|
|
407
421
|
worker_id: str | None
|
|
408
422
|
worker_poll_interval_ms: int
|
|
409
423
|
worker_max_retries: int
|
|
410
|
-
worker_batch_size: int
|
|
411
424
|
worker_http_port: int
|
|
425
|
+
worker_max_slots: int
|
|
426
|
+
worker_consolidation_max_slots: int
|
|
412
427
|
|
|
413
428
|
# Reflect agent settings
|
|
414
429
|
reflect_max_iterations: int
|
|
@@ -430,6 +445,11 @@ class HindsightConfig:
|
|
|
430
445
|
llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
|
|
431
446
|
llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
|
|
432
447
|
llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
|
|
448
|
+
# Vertex AI
|
|
449
|
+
llm_vertexai_project_id=os.getenv(ENV_LLM_VERTEXAI_PROJECT_ID) or DEFAULT_LLM_VERTEXAI_PROJECT_ID,
|
|
450
|
+
llm_vertexai_region=os.getenv(ENV_LLM_VERTEXAI_REGION, DEFAULT_LLM_VERTEXAI_REGION),
|
|
451
|
+
llm_vertexai_service_account_key=os.getenv(ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY)
|
|
452
|
+
or DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY,
|
|
433
453
|
# Per-operation LLM config (None = use default)
|
|
434
454
|
retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
|
|
435
455
|
retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
|
|
@@ -545,10 +565,6 @@ class HindsightConfig:
|
|
|
545
565
|
os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
|
|
546
566
|
),
|
|
547
567
|
retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
|
|
548
|
-
retain_observations_async=os.getenv(
|
|
549
|
-
ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
|
|
550
|
-
).lower()
|
|
551
|
-
== "true",
|
|
552
568
|
# Observations settings (consolidated knowledge from facts)
|
|
553
569
|
enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
|
|
554
570
|
consolidation_batch_size=int(
|
|
@@ -569,8 +585,11 @@ class HindsightConfig:
|
|
|
569
585
|
worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
|
|
570
586
|
worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
|
|
571
587
|
worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
|
|
572
|
-
worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
|
|
573
588
|
worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
|
|
589
|
+
worker_max_slots=int(os.getenv(ENV_WORKER_MAX_SLOTS, str(DEFAULT_WORKER_MAX_SLOTS))),
|
|
590
|
+
worker_consolidation_max_slots=int(
|
|
591
|
+
os.getenv(ENV_WORKER_CONSOLIDATION_MAX_SLOTS, str(DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS))
|
|
592
|
+
),
|
|
574
593
|
# Reflect agent settings
|
|
575
594
|
reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
|
|
576
595
|
)
|
{hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/consolidator.py
RENAMED
|
@@ -865,7 +865,14 @@ Focus on DURABLE knowledge that serves this mission, not ephemeral state.
|
|
|
865
865
|
)
|
|
866
866
|
# Parse JSON response - should be an array
|
|
867
867
|
if isinstance(result, str):
|
|
868
|
-
|
|
868
|
+
# Strip markdown code fences (some models wrap JSON in ```json ... ```)
|
|
869
|
+
clean = result.strip()
|
|
870
|
+
if clean.startswith("```"):
|
|
871
|
+
clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
|
|
872
|
+
if clean.endswith("```"):
|
|
873
|
+
clean = clean[:-3]
|
|
874
|
+
clean = clean.strip()
|
|
875
|
+
result = json.loads(clean)
|
|
869
876
|
# Ensure result is a list
|
|
870
877
|
if isinstance(result, list):
|
|
871
878
|
return result
|
|
@@ -614,7 +614,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
|
|
|
614
614
|
return
|
|
615
615
|
|
|
616
616
|
try:
|
|
617
|
-
from flashrank import Ranker
|
|
617
|
+
from flashrank import Ranker
|
|
618
618
|
except ImportError:
|
|
619
619
|
raise ImportError("flashrank is required for FlashRankCrossEncoder. Install it with: pip install flashrank")
|
|
620
620
|
|
|
@@ -641,7 +641,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
|
|
|
641
641
|
|
|
642
642
|
def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
|
|
643
643
|
"""Synchronous predict - processes each query group."""
|
|
644
|
-
from flashrank import RerankRequest
|
|
644
|
+
from flashrank import RerankRequest
|
|
645
645
|
|
|
646
646
|
if not pairs:
|
|
647
647
|
return []
|
|
@@ -545,7 +545,7 @@ class CohereEmbeddings(Embeddings):
|
|
|
545
545
|
model=self.model,
|
|
546
546
|
input_type=self.input_type,
|
|
547
547
|
)
|
|
548
|
-
if response.embeddings:
|
|
548
|
+
if response.embeddings and isinstance(response.embeddings, list):
|
|
549
549
|
self._dimension = len(response.embeddings[0])
|
|
550
550
|
|
|
551
551
|
logger.info(f"Embeddings: Cohere provider initialized (model: {self.model}, dim: {self._dimension})")
|
|
@@ -442,49 +442,6 @@ class MemoryEngineInterface(ABC):
|
|
|
442
442
|
"""
|
|
443
443
|
...
|
|
444
444
|
|
|
445
|
-
@abstractmethod
|
|
446
|
-
async def get_entity_observations(
|
|
447
|
-
self,
|
|
448
|
-
bank_id: str,
|
|
449
|
-
entity_id: str,
|
|
450
|
-
*,
|
|
451
|
-
limit: int = 10,
|
|
452
|
-
request_context: "RequestContext",
|
|
453
|
-
) -> list[Any]:
|
|
454
|
-
"""
|
|
455
|
-
Get observations for an entity.
|
|
456
|
-
|
|
457
|
-
Args:
|
|
458
|
-
bank_id: The memory bank ID.
|
|
459
|
-
entity_id: The entity ID.
|
|
460
|
-
limit: Maximum observations.
|
|
461
|
-
request_context: Request context for authentication.
|
|
462
|
-
|
|
463
|
-
Returns:
|
|
464
|
-
List of EntityObservation objects.
|
|
465
|
-
"""
|
|
466
|
-
...
|
|
467
|
-
|
|
468
|
-
@abstractmethod
|
|
469
|
-
async def regenerate_entity_observations(
|
|
470
|
-
self,
|
|
471
|
-
bank_id: str,
|
|
472
|
-
entity_id: str,
|
|
473
|
-
entity_name: str,
|
|
474
|
-
*,
|
|
475
|
-
request_context: "RequestContext",
|
|
476
|
-
) -> None:
|
|
477
|
-
"""
|
|
478
|
-
Regenerate observations for an entity.
|
|
479
|
-
|
|
480
|
-
Args:
|
|
481
|
-
bank_id: The memory bank ID.
|
|
482
|
-
entity_id: The entity ID.
|
|
483
|
-
entity_name: The entity's canonical name.
|
|
484
|
-
request_context: Request context for authentication.
|
|
485
|
-
"""
|
|
486
|
-
...
|
|
487
|
-
|
|
488
445
|
# =========================================================================
|
|
489
446
|
# Statistics & Operations
|
|
490
447
|
# =========================================================================
|
|
@@ -16,6 +16,15 @@ from google.genai import errors as genai_errors
|
|
|
16
16
|
from google.genai import types as genai_types
|
|
17
17
|
from openai import APIConnectionError, APIStatusError, AsyncOpenAI, LengthFinishReasonError
|
|
18
18
|
|
|
19
|
+
# Vertex AI imports (conditional)
|
|
20
|
+
try:
|
|
21
|
+
import google.auth
|
|
22
|
+
from google.oauth2 import service_account
|
|
23
|
+
|
|
24
|
+
VERTEXAI_AVAILABLE = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
VERTEXAI_AVAILABLE = False
|
|
27
|
+
|
|
19
28
|
from ..config import (
|
|
20
29
|
DEFAULT_LLM_MAX_CONCURRENT,
|
|
21
30
|
DEFAULT_LLM_TIMEOUT,
|
|
@@ -88,7 +97,7 @@ class LLMProvider:
|
|
|
88
97
|
self.groq_service_tier = groq_service_tier or os.getenv(ENV_LLM_GROQ_SERVICE_TIER, "auto")
|
|
89
98
|
|
|
90
99
|
# Validate provider
|
|
91
|
-
valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "mock"]
|
|
100
|
+
valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "vertexai", "mock"]
|
|
92
101
|
if self.provider not in valid_providers:
|
|
93
102
|
raise ValueError(f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}")
|
|
94
103
|
|
|
@@ -105,8 +114,51 @@ class LLMProvider:
|
|
|
105
114
|
elif self.provider == "lmstudio":
|
|
106
115
|
self.base_url = "http://localhost:1234/v1"
|
|
107
116
|
|
|
108
|
-
#
|
|
109
|
-
|
|
117
|
+
# Vertex AI config — stored for client creation below
|
|
118
|
+
self._vertexai_project_id: str | None = None
|
|
119
|
+
self._vertexai_region: str | None = None
|
|
120
|
+
self._vertexai_credentials: Any = None
|
|
121
|
+
|
|
122
|
+
if self.provider == "vertexai":
|
|
123
|
+
from ..config import get_config
|
|
124
|
+
|
|
125
|
+
config = get_config()
|
|
126
|
+
|
|
127
|
+
self._vertexai_project_id = config.llm_vertexai_project_id
|
|
128
|
+
if not self._vertexai_project_id:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID is required for Vertex AI provider. "
|
|
131
|
+
"Set it to your GCP project ID."
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self._vertexai_region = config.llm_vertexai_region or "us-central1"
|
|
135
|
+
service_account_key = config.llm_vertexai_service_account_key
|
|
136
|
+
|
|
137
|
+
# Load explicit service account credentials if provided
|
|
138
|
+
if service_account_key:
|
|
139
|
+
if not VERTEXAI_AVAILABLE:
|
|
140
|
+
raise ValueError(
|
|
141
|
+
"Vertex AI service account auth requires 'google-auth' package. "
|
|
142
|
+
"Install with: pip install google-auth"
|
|
143
|
+
)
|
|
144
|
+
self._vertexai_credentials = service_account.Credentials.from_service_account_file(
|
|
145
|
+
service_account_key,
|
|
146
|
+
scopes=["https://www.googleapis.com/auth/cloud-platform"],
|
|
147
|
+
)
|
|
148
|
+
logger.info(f"Vertex AI: Using service account key: {service_account_key}")
|
|
149
|
+
|
|
150
|
+
# Strip google/ prefix from model name — native SDK uses bare names
|
|
151
|
+
# e.g. "google/gemini-2.0-flash-lite-001" -> "gemini-2.0-flash-lite-001"
|
|
152
|
+
if self.model.startswith("google/"):
|
|
153
|
+
self.model = self.model[len("google/") :]
|
|
154
|
+
|
|
155
|
+
logger.info(
|
|
156
|
+
f"Vertex AI: project={self._vertexai_project_id}, region={self._vertexai_region}, "
|
|
157
|
+
f"model={self.model}, auth={'service_account' if service_account_key else 'ADC'}"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Validate API key (not needed for ollama, lmstudio, vertexai, or mock)
|
|
161
|
+
if self.provider not in ("ollama", "lmstudio", "vertexai", "mock") and not self.api_key:
|
|
110
162
|
raise ValueError(f"API key not found for {self.provider}")
|
|
111
163
|
|
|
112
164
|
# Get timeout config (set HINDSIGHT_API_LLM_TIMEOUT for local LLMs that need longer timeouts)
|
|
@@ -132,6 +184,17 @@ class LLMProvider:
|
|
|
132
184
|
if self.timeout:
|
|
133
185
|
anthropic_kwargs["timeout"] = self.timeout
|
|
134
186
|
self._anthropic_client = AsyncAnthropic(**anthropic_kwargs)
|
|
187
|
+
elif self.provider == "vertexai":
|
|
188
|
+
# Native genai SDK with Vertex AI — handles ADC automatically,
|
|
189
|
+
# or uses explicit service account credentials if provided
|
|
190
|
+
client_kwargs = {
|
|
191
|
+
"vertexai": True,
|
|
192
|
+
"project": self._vertexai_project_id,
|
|
193
|
+
"location": self._vertexai_region,
|
|
194
|
+
}
|
|
195
|
+
if self._vertexai_credentials is not None:
|
|
196
|
+
client_kwargs["credentials"] = self._vertexai_credentials
|
|
197
|
+
self._gemini_client = genai.Client(**client_kwargs)
|
|
135
198
|
elif self.provider in ("ollama", "lmstudio"):
|
|
136
199
|
# Use dummy key if not provided for local
|
|
137
200
|
api_key = self.api_key or "local"
|
|
@@ -223,8 +286,8 @@ class LLMProvider:
|
|
|
223
286
|
return_usage,
|
|
224
287
|
)
|
|
225
288
|
|
|
226
|
-
# Handle Gemini
|
|
227
|
-
if self.provider
|
|
289
|
+
# Handle Gemini and Vertex AI providers (both use native genai SDK)
|
|
290
|
+
if self.provider in ("gemini", "vertexai"):
|
|
228
291
|
return await self._call_gemini(
|
|
229
292
|
messages,
|
|
230
293
|
response_format,
|
|
@@ -342,11 +405,13 @@ class LLMProvider:
|
|
|
342
405
|
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
|
|
343
406
|
|
|
344
407
|
if call_params["messages"] and call_params["messages"][0].get("role") == "system":
|
|
345
|
-
call_params["messages"][0]
|
|
408
|
+
first_msg = call_params["messages"][0]
|
|
409
|
+
if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
|
|
410
|
+
first_msg["content"] += schema_msg
|
|
346
411
|
elif call_params["messages"]:
|
|
347
|
-
call_params["messages"][0]
|
|
348
|
-
|
|
349
|
-
|
|
412
|
+
first_msg = call_params["messages"][0]
|
|
413
|
+
if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
|
|
414
|
+
first_msg["content"] = schema_msg + "\n\n" + first_msg["content"]
|
|
350
415
|
if self.provider not in ("lmstudio", "ollama"):
|
|
351
416
|
# LM Studio and Ollama don't support json_object response format reliably
|
|
352
417
|
# We rely on the schema in the system message instead
|
|
@@ -586,8 +651,8 @@ class LLMProvider:
|
|
|
586
651
|
messages, tools, max_completion_tokens, max_retries, initial_backoff, max_backoff, start_time, scope
|
|
587
652
|
)
|
|
588
653
|
|
|
589
|
-
# Handle Gemini (convert to Gemini tool format)
|
|
590
|
-
if self.provider
|
|
654
|
+
# Handle Gemini and Vertex AI (convert to Gemini tool format)
|
|
655
|
+
if self.provider in ("gemini", "vertexai"):
|
|
591
656
|
return await self._call_with_tools_gemini(
|
|
592
657
|
messages, tools, max_retries, initial_backoff, max_backoff, start_time, scope
|
|
593
658
|
)
|
|
@@ -917,18 +982,20 @@ class LLMProvider:
|
|
|
917
982
|
tool_calls: list[LLMToolCall] = []
|
|
918
983
|
|
|
919
984
|
if response.candidates and response.candidates[0].content:
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
985
|
+
parts = response.candidates[0].content.parts
|
|
986
|
+
if parts:
|
|
987
|
+
for part in parts:
|
|
988
|
+
if hasattr(part, "text") and part.text:
|
|
989
|
+
content = part.text
|
|
990
|
+
if hasattr(part, "function_call") and part.function_call:
|
|
991
|
+
fc = part.function_call
|
|
992
|
+
tool_calls.append(
|
|
993
|
+
LLMToolCall(
|
|
994
|
+
id=f"gemini_{len(tool_calls)}",
|
|
995
|
+
name=fc.name,
|
|
996
|
+
arguments=dict(fc.args) if fc.args else {},
|
|
997
|
+
)
|
|
930
998
|
)
|
|
931
|
-
)
|
|
932
999
|
|
|
933
1000
|
finish_reason = "tool_calls" if tool_calls else "stop"
|
|
934
1001
|
|
|
@@ -1504,6 +1571,10 @@ class LLMProvider:
|
|
|
1504
1571
|
"""Clear the recorded mock calls."""
|
|
1505
1572
|
self._mock_calls = []
|
|
1506
1573
|
|
|
1574
|
+
async def cleanup(self) -> None:
|
|
1575
|
+
"""Clean up resources."""
|
|
1576
|
+
pass
|
|
1577
|
+
|
|
1507
1578
|
@classmethod
|
|
1508
1579
|
def for_memory(cls) -> "LLMProvider":
|
|
1509
1580
|
"""Create provider for memory operations from environment variables."""
|