hindsight-api 0.4.2__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/PKG-INFO +2 -1
  2. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/__init__.py +1 -1
  3. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/http.py +7 -19
  4. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/mcp.py +45 -5
  5. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/config.py +30 -11
  6. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/consolidator.py +8 -1
  7. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/cross_encoder.py +2 -2
  8. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/embeddings.py +1 -1
  9. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/interface.py +0 -43
  10. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/llm_wrapper.py +93 -22
  11. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/memory_engine.py +37 -138
  12. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/response_models.py +1 -21
  13. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/fact_extraction.py +3 -23
  14. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/orchestrator.py +1 -4
  15. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/utils.py +0 -3
  16. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/main.py +6 -3
  17. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/mcp_tools.py +31 -12
  18. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/metrics.py +3 -3
  19. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/pg0.py +1 -1
  20. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/main.py +11 -11
  21. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/poller.py +226 -97
  22. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/pyproject.toml +7 -1
  23. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/.gitignore +0 -0
  24. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/README.md +0 -0
  25. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/admin/__init__.py +0 -0
  26. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/admin/cli.py +0 -0
  27. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/README +0 -0
  28. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/env.py +0 -0
  29. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/script.py.mako +0 -0
  30. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +0 -0
  31. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +0 -0
  32. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +0 -0
  33. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +0 -0
  34. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +0 -0
  35. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +0 -0
  36. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +0 -0
  37. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +0 -0
  38. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +0 -0
  39. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +0 -0
  40. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +0 -0
  41. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +0 -0
  42. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +0 -0
  43. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +0 -0
  44. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +0 -0
  45. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +0 -0
  46. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +0 -0
  47. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +0 -0
  48. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/rename_personality_to_disposition.py +0 -0
  49. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +0 -0
  50. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +0 -0
  51. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +0 -0
  52. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +0 -0
  53. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/api/__init__.py +0 -0
  54. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/banner.py +0 -0
  55. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/daemon.py +0 -0
  56. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/__init__.py +0 -0
  57. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/__init__.py +0 -0
  58. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/consolidation/prompts.py +0 -0
  59. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/db_budget.py +0 -0
  60. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/db_utils.py +0 -0
  61. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/directives/__init__.py +0 -0
  62. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/directives/models.py +0 -0
  63. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/entity_resolver.py +0 -0
  64. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/mental_models/__init__.py +0 -0
  65. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/mental_models/models.py +0 -0
  66. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/query_analyzer.py +0 -0
  67. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/__init__.py +0 -0
  68. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/agent.py +0 -0
  69. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/models.py +0 -0
  70. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/observations.py +0 -0
  71. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/prompts.py +0 -0
  72. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/tools.py +0 -0
  73. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/reflect/tools_schema.py +0 -0
  74. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/__init__.py +0 -0
  75. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/bank_utils.py +0 -0
  76. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/chunk_storage.py +0 -0
  77. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/deduplication.py +0 -0
  78. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/embedding_processing.py +0 -0
  79. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/embedding_utils.py +0 -0
  80. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/entity_processing.py +0 -0
  81. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/fact_storage.py +0 -0
  82. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/link_creation.py +0 -0
  83. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/link_utils.py +0 -0
  84. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/retain/types.py +0 -0
  85. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/__init__.py +0 -0
  86. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/fusion.py +0 -0
  87. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/graph_retrieval.py +0 -0
  88. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/link_expansion_retrieval.py +0 -0
  89. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/mpfp_retrieval.py +0 -0
  90. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/reranking.py +0 -0
  91. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/retrieval.py +0 -0
  92. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/tags.py +0 -0
  93. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/temporal_extraction.py +0 -0
  94. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/think_utils.py +0 -0
  95. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/trace.py +0 -0
  96. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/tracer.py +0 -0
  97. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/search/types.py +0 -0
  98. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/engine/task_backend.py +0 -0
  99. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/__init__.py +0 -0
  100. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/base.py +0 -0
  101. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/builtin/__init__.py +0 -0
  102. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/builtin/tenant.py +0 -0
  103. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/context.py +0 -0
  104. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/http.py +0 -0
  105. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/loader.py +0 -0
  106. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/operation_validator.py +0 -0
  107. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/extensions/tenant.py +0 -0
  108. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/mcp_local.py +0 -0
  109. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/migrations.py +0 -0
  110. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/models.py +0 -0
  111. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/server.py +0 -0
  112. {hindsight_api-0.4.2 → hindsight_api-0.4.3}/hindsight_api/worker/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.4.2
3
+ Version: 0.4.3
4
4
  Summary: Hindsight: Agent Memory That Works Like Human Memory
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp>=3.13.3
@@ -14,6 +14,7 @@ Requires-Dist: fastapi[standard]>=0.120.3
14
14
  Requires-Dist: fastmcp>=2.14.0
15
15
  Requires-Dist: filelock>=3.20.1
16
16
  Requires-Dist: flashrank>=0.2.0
17
+ Requires-Dist: google-auth>=2.0.0
17
18
  Requires-Dist: google-genai>=1.0.0
18
19
  Requires-Dist: greenlet>=3.2.4
19
20
  Requires-Dist: httpx>=0.27.0
@@ -46,4 +46,4 @@ __all__ = [
46
46
  "RemoteTEICrossEncoder",
47
47
  "LLMConfig",
48
48
  ]
49
- __version__ = "0.4.2"
49
+ __version__ = "0.4.3"
@@ -92,8 +92,7 @@ class RecallRequest(BaseModel):
92
92
  query: str
93
93
  types: list[str] | None = Field(
94
94
  default=None,
95
- description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified. "
96
- "Note: 'opinion' is accepted but ignored (opinions are excluded from recall).",
95
+ description="List of fact types to recall: 'world', 'experience', 'observation'. Defaults to world and experience if not specified.",
97
96
  )
98
97
  budget: Budget = Budget.MID
99
98
  max_tokens: int = 4096
@@ -504,13 +503,6 @@ class ReflectRequest(BaseModel):
504
503
  )
505
504
 
506
505
 
507
- class OpinionItem(BaseModel):
508
- """Model for an opinion with confidence score."""
509
-
510
- text: str
511
- confidence: float
512
-
513
-
514
506
  class ReflectFact(BaseModel):
515
507
  """A fact used in think response."""
516
508
 
@@ -529,7 +521,7 @@ class ReflectFact(BaseModel):
529
521
 
530
522
  id: str | None = None
531
523
  text: str
532
- type: str | None = None # fact type: world, experience, opinion
524
+ type: str | None = None # fact type: world, experience, observation
533
525
  context: str | None = None
534
526
  occurred_start: str | None = None
535
527
  occurred_end: str | None = None
@@ -1412,9 +1404,10 @@ def create_app(
1412
1404
  worker_id=worker_id,
1413
1405
  executor=memory.execute_task,
1414
1406
  poll_interval_ms=config.worker_poll_interval_ms,
1415
- batch_size=config.worker_batch_size,
1416
1407
  max_retries=config.worker_max_retries,
1417
1408
  tenant_extension=getattr(memory, "_tenant_extension", None),
1409
+ max_slots=config.worker_max_slots,
1410
+ consolidation_max_slots=config.worker_consolidation_max_slots,
1418
1411
  )
1419
1412
  poller_task = asyncio.create_task(poller.run())
1420
1413
  logging.info(f"Worker poller started (worker_id={worker_id})")
@@ -1707,9 +1700,7 @@ def _register_routes(app: FastAPI):
1707
1700
  description="Recall memory using semantic similarity and spreading activation.\n\n"
1708
1701
  "The type parameter is optional and must be one of:\n"
1709
1702
  "- `world`: General knowledge about people, places, events, and things that happen\n"
1710
- "- `experience`: Memories about experience, conversations, actions taken, and tasks performed\n"
1711
- "- `opinion`: The bank's formed beliefs, perspectives, and viewpoints\n\n"
1712
- "Set `include_entities=true` to get entity observations alongside recall results.",
1703
+ "- `experience`: Memories about experience, conversations, actions taken, and tasks performed",
1713
1704
  operation_id="recall_memories",
1714
1705
  tags=["Memory"],
1715
1706
  )
@@ -1723,10 +1714,8 @@ def _register_routes(app: FastAPI):
1723
1714
  metrics = get_metrics_collector()
1724
1715
 
1725
1716
  try:
1726
- # Default to world and experience if not specified (exclude observation and opinion)
1727
- # Filter out 'opinion' even if requested - opinions are excluded from recall
1717
+ # Default to world and experience if not specified (exclude observation)
1728
1718
  fact_types = request.types if request.types else list(VALID_RECALL_FACT_TYPES)
1729
- fact_types = [ft for ft in fact_types if ft != "opinion"]
1730
1719
 
1731
1720
  # Parse query_timestamp if provided
1732
1721
  question_date = None
@@ -1858,8 +1847,7 @@ def _register_routes(app: FastAPI):
1858
1847
  "2. Retrieves world facts relevant to the query\n"
1859
1848
  "3. Retrieves existing opinions (bank's perspectives)\n"
1860
1849
  "4. Uses LLM to formulate a contextual answer\n"
1861
- "5. Extracts and stores any new opinions formed\n"
1862
- "6. Returns plain text answer, the facts used, and new opinions",
1850
+ "5. Returns plain text answer and the facts used",
1863
1851
  operation_id="reflect",
1864
1852
  tags=["Memory"],
1865
1853
  )
@@ -29,15 +29,26 @@ logger = logging.getLogger(__name__)
29
29
  # Default bank_id from environment variable
30
30
  DEFAULT_BANK_ID = os.environ.get("HINDSIGHT_MCP_BANK_ID", "default")
31
31
 
32
+ # MCP authentication token (optional - if set, Bearer token auth is required)
33
+ MCP_AUTH_TOKEN = os.environ.get("HINDSIGHT_API_MCP_AUTH_TOKEN")
34
+
32
35
  # Context variable to hold the current bank_id
33
36
  _current_bank_id: ContextVar[str | None] = ContextVar("current_bank_id", default=None)
34
37
 
38
+ # Context variable to hold the current API key (for tenant auth propagation)
39
+ _current_api_key: ContextVar[str | None] = ContextVar("current_api_key", default=None)
40
+
35
41
 
36
42
  def get_current_bank_id() -> str | None:
37
43
  """Get the current bank_id from context."""
38
44
  return _current_bank_id.get()
39
45
 
40
46
 
47
+ def get_current_api_key() -> str | None:
48
+ """Get the current API key from context."""
49
+ return _current_api_key.get()
50
+
51
+
41
52
  def create_mcp_server(memory: MemoryEngine) -> FastMCP:
42
53
  """
43
54
  Create and configure the Hindsight MCP server.
@@ -54,6 +65,7 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
54
65
  # Configure and register tools using shared module
55
66
  config = MCPToolsConfig(
56
67
  bank_id_resolver=get_current_bank_id,
68
+ api_key_resolver=get_current_api_key, # Propagate API key for tenant auth
57
69
  include_bank_id_param=True, # HTTP MCP supports multi-bank via parameter
58
70
  tools=None, # All tools
59
71
  retain_fire_and_forget=False, # HTTP MCP supports sync/async modes
@@ -65,7 +77,11 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
65
77
 
66
78
 
67
79
  class MCPMiddleware:
68
- """ASGI middleware that extracts bank_id from header or path and sets context.
80
+ """ASGI middleware that handles authentication and extracts bank_id from header or path.
81
+
82
+ Authentication:
83
+ If HINDSIGHT_API_MCP_AUTH_TOKEN is set, all requests must include a valid
84
+ Authorization header with Bearer token or direct token matching the configured value.
69
85
 
70
86
  Bank ID can be provided via:
71
87
  1. X-Bank-Id header (recommended for Claude Code)
@@ -74,7 +90,7 @@ class MCPMiddleware:
74
90
 
75
91
  For Claude Code, configure with:
76
92
  claude mcp add --transport http hindsight http://localhost:8888/mcp \\
77
- --header "X-Bank-Id: my-bank"
93
+ --header "X-Bank-Id: my-bank" --header "Authorization: Bearer <token>"
78
94
  """
79
95
 
80
96
  def __init__(self, app, memory: MemoryEngine):
@@ -98,6 +114,22 @@ class MCPMiddleware:
98
114
  await self.mcp_app(scope, receive, send)
99
115
  return
100
116
 
117
+ # Extract auth token from header (for tenant auth propagation)
118
+ auth_header = self._get_header(scope, "Authorization")
119
+ auth_token: str | None = None
120
+ if auth_header:
121
+ # Support both "Bearer <token>" and direct token
122
+ auth_token = auth_header[7:].strip() if auth_header.startswith("Bearer ") else auth_header.strip()
123
+
124
+ # Authenticate if MCP_AUTH_TOKEN is configured
125
+ if MCP_AUTH_TOKEN:
126
+ if not auth_token:
127
+ await self._send_error(send, 401, "Authorization header required")
128
+ return
129
+ if auth_token != MCP_AUTH_TOKEN:
130
+ await self._send_error(send, 401, "Invalid authentication token")
131
+ return
132
+
101
133
  path = scope.get("path", "")
102
134
 
103
135
  # Strip any mount prefix (e.g., /mcp) that FastAPI might not have stripped
@@ -132,8 +164,10 @@ class MCPMiddleware:
132
164
  bank_id = DEFAULT_BANK_ID
133
165
  logger.debug(f"Using default bank_id: {bank_id}")
134
166
 
135
- # Set bank_id context
136
- token = _current_bank_id.set(bank_id)
167
+ # Set bank_id and api_key context
168
+ bank_id_token = _current_bank_id.set(bank_id)
169
+ # Store the auth token for tenant extension to validate
170
+ api_key_token = _current_api_key.set(auth_token) if auth_token else None
137
171
  try:
138
172
  new_scope = scope.copy()
139
173
  new_scope["path"] = new_path
@@ -152,7 +186,9 @@ class MCPMiddleware:
152
186
 
153
187
  await self.mcp_app(new_scope, receive, send_wrapper)
154
188
  finally:
155
- _current_bank_id.reset(token)
189
+ _current_bank_id.reset(bank_id_token)
190
+ if api_key_token is not None:
191
+ _current_api_key.reset(api_key_token)
156
192
 
157
193
  async def _send_error(self, send, status: int, message: str):
158
194
  """Send an error response."""
@@ -176,6 +212,10 @@ def create_mcp_app(memory: MemoryEngine):
176
212
  """
177
213
  Create an ASGI app that handles MCP requests.
178
214
 
215
+ Authentication:
216
+ Set HINDSIGHT_API_MCP_AUTH_TOKEN to require Bearer token authentication.
217
+ If not set, MCP endpoint is open (for local development).
218
+
179
219
  Bank ID can be provided via:
180
220
  1. X-Bank-Id header: claude mcp add --transport http hindsight http://localhost:8888/mcp --header "X-Bank-Id: my-bank"
181
221
  2. URL path: /mcp/{bank_id}/
@@ -108,13 +108,17 @@ ENV_MCP_LOCAL_BANK_ID = "HINDSIGHT_API_MCP_LOCAL_BANK_ID"
108
108
  ENV_MCP_INSTRUCTIONS = "HINDSIGHT_API_MCP_INSTRUCTIONS"
109
109
  ENV_MENTAL_MODEL_REFRESH_CONCURRENCY = "HINDSIGHT_API_MENTAL_MODEL_REFRESH_CONCURRENCY"
110
110
 
111
+ # Vertex AI configuration
112
+ ENV_LLM_VERTEXAI_PROJECT_ID = "HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID"
113
+ ENV_LLM_VERTEXAI_REGION = "HINDSIGHT_API_LLM_VERTEXAI_REGION"
114
+ ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = "HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY"
115
+
111
116
  # Retain settings
112
117
  ENV_RETAIN_MAX_COMPLETION_TOKENS = "HINDSIGHT_API_RETAIN_MAX_COMPLETION_TOKENS"
113
118
  ENV_RETAIN_CHUNK_SIZE = "HINDSIGHT_API_RETAIN_CHUNK_SIZE"
114
119
  ENV_RETAIN_EXTRACT_CAUSAL_LINKS = "HINDSIGHT_API_RETAIN_EXTRACT_CAUSAL_LINKS"
115
120
  ENV_RETAIN_EXTRACTION_MODE = "HINDSIGHT_API_RETAIN_EXTRACTION_MODE"
116
121
  ENV_RETAIN_CUSTOM_INSTRUCTIONS = "HINDSIGHT_API_RETAIN_CUSTOM_INSTRUCTIONS"
117
- ENV_RETAIN_OBSERVATIONS_ASYNC = "HINDSIGHT_API_RETAIN_OBSERVATIONS_ASYNC"
118
122
 
119
123
  # Observations settings (consolidated knowledge from facts)
120
124
  ENV_ENABLE_OBSERVATIONS = "HINDSIGHT_API_ENABLE_OBSERVATIONS"
@@ -139,8 +143,9 @@ ENV_WORKER_ENABLED = "HINDSIGHT_API_WORKER_ENABLED"
139
143
  ENV_WORKER_ID = "HINDSIGHT_API_WORKER_ID"
140
144
  ENV_WORKER_POLL_INTERVAL_MS = "HINDSIGHT_API_WORKER_POLL_INTERVAL_MS"
141
145
  ENV_WORKER_MAX_RETRIES = "HINDSIGHT_API_WORKER_MAX_RETRIES"
142
- ENV_WORKER_BATCH_SIZE = "HINDSIGHT_API_WORKER_BATCH_SIZE"
143
146
  ENV_WORKER_HTTP_PORT = "HINDSIGHT_API_WORKER_HTTP_PORT"
147
+ ENV_WORKER_MAX_SLOTS = "HINDSIGHT_API_WORKER_MAX_SLOTS"
148
+ ENV_WORKER_CONSOLIDATION_MAX_SLOTS = "HINDSIGHT_API_WORKER_CONSOLIDATION_MAX_SLOTS"
144
149
 
145
150
  # Reflect agent settings
146
151
  ENV_REFLECT_MAX_ITERATIONS = "HINDSIGHT_API_REFLECT_MAX_ITERATIONS"
@@ -156,6 +161,11 @@ DEFAULT_LLM_INITIAL_BACKOFF = 1.0 # Initial backoff in seconds for retry expone
156
161
  DEFAULT_LLM_MAX_BACKOFF = 60.0 # Max backoff cap in seconds for retry exponential backoff
157
162
  DEFAULT_LLM_TIMEOUT = 120.0 # seconds
158
163
 
164
+ # Vertex AI defaults
165
+ DEFAULT_LLM_VERTEXAI_PROJECT_ID = None # Required for Vertex AI
166
+ DEFAULT_LLM_VERTEXAI_REGION = "us-central1"
167
+ DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY = None # Optional, uses ADC if not set
168
+
159
169
  DEFAULT_EMBEDDINGS_PROVIDER = "local"
160
170
  DEFAULT_EMBEDDINGS_LOCAL_MODEL = "BAAI/bge-small-en-v1.5"
161
171
  DEFAULT_EMBEDDINGS_LOCAL_FORCE_CPU = False # Force CPU mode for local embeddings (avoids MPS/XPC issues on macOS)
@@ -200,7 +210,6 @@ DEFAULT_RETAIN_EXTRACT_CAUSAL_LINKS = True # Extract causal links between facts
200
210
  DEFAULT_RETAIN_EXTRACTION_MODE = "concise" # Extraction mode: "concise", "verbose", or "custom"
201
211
  RETAIN_EXTRACTION_MODES = ("concise", "verbose", "custom") # Allowed extraction modes
202
212
  DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS = None # Custom extraction guidelines (only used when mode="custom")
203
- DEFAULT_RETAIN_OBSERVATIONS_ASYNC = False # Run observation generation async (after retain completes)
204
213
 
205
214
  # Observations defaults (consolidated knowledge from facts)
206
215
  DEFAULT_ENABLE_OBSERVATIONS = True # Observations enabled by default
@@ -221,8 +230,9 @@ DEFAULT_WORKER_ENABLED = True # API runs worker by default (standalone mode)
221
230
  DEFAULT_WORKER_ID = None # Will use hostname if not specified
222
231
  DEFAULT_WORKER_POLL_INTERVAL_MS = 500 # Poll database every 500ms
223
232
  DEFAULT_WORKER_MAX_RETRIES = 3 # Max retries before marking task failed
224
- DEFAULT_WORKER_BATCH_SIZE = 10 # Tasks to claim per poll cycle
225
233
  DEFAULT_WORKER_HTTP_PORT = 8889 # HTTP port for worker metrics/health
234
+ DEFAULT_WORKER_MAX_SLOTS = 10 # Total concurrent tasks per worker
235
+ DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS = 2 # Max concurrent consolidation tasks per worker
226
236
 
227
237
  # Reflect agent settings
228
238
  DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
@@ -312,6 +322,11 @@ class HindsightConfig:
312
322
  llm_max_backoff: float
313
323
  llm_timeout: float
314
324
 
325
+ # Vertex AI configuration
326
+ llm_vertexai_project_id: str | None
327
+ llm_vertexai_region: str
328
+ llm_vertexai_service_account_key: str | None
329
+
315
330
  # Per-operation LLM configuration (None = use default LLM config)
316
331
  retain_llm_provider: str | None
317
332
  retain_llm_api_key: str | None
@@ -382,7 +397,6 @@ class HindsightConfig:
382
397
  retain_extract_causal_links: bool
383
398
  retain_extraction_mode: str
384
399
  retain_custom_instructions: str | None
385
- retain_observations_async: bool
386
400
 
387
401
  # Observations settings (consolidated knowledge from facts)
388
402
  enable_observations: bool
@@ -407,8 +421,9 @@ class HindsightConfig:
407
421
  worker_id: str | None
408
422
  worker_poll_interval_ms: int
409
423
  worker_max_retries: int
410
- worker_batch_size: int
411
424
  worker_http_port: int
425
+ worker_max_slots: int
426
+ worker_consolidation_max_slots: int
412
427
 
413
428
  # Reflect agent settings
414
429
  reflect_max_iterations: int
@@ -430,6 +445,11 @@ class HindsightConfig:
430
445
  llm_initial_backoff=float(os.getenv(ENV_LLM_INITIAL_BACKOFF, str(DEFAULT_LLM_INITIAL_BACKOFF))),
431
446
  llm_max_backoff=float(os.getenv(ENV_LLM_MAX_BACKOFF, str(DEFAULT_LLM_MAX_BACKOFF))),
432
447
  llm_timeout=float(os.getenv(ENV_LLM_TIMEOUT, str(DEFAULT_LLM_TIMEOUT))),
448
+ # Vertex AI
449
+ llm_vertexai_project_id=os.getenv(ENV_LLM_VERTEXAI_PROJECT_ID) or DEFAULT_LLM_VERTEXAI_PROJECT_ID,
450
+ llm_vertexai_region=os.getenv(ENV_LLM_VERTEXAI_REGION, DEFAULT_LLM_VERTEXAI_REGION),
451
+ llm_vertexai_service_account_key=os.getenv(ENV_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY)
452
+ or DEFAULT_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY,
433
453
  # Per-operation LLM config (None = use default)
434
454
  retain_llm_provider=os.getenv(ENV_RETAIN_LLM_PROVIDER) or None,
435
455
  retain_llm_api_key=os.getenv(ENV_RETAIN_LLM_API_KEY) or None,
@@ -545,10 +565,6 @@ class HindsightConfig:
545
565
  os.getenv(ENV_RETAIN_EXTRACTION_MODE, DEFAULT_RETAIN_EXTRACTION_MODE)
546
566
  ),
547
567
  retain_custom_instructions=os.getenv(ENV_RETAIN_CUSTOM_INSTRUCTIONS) or DEFAULT_RETAIN_CUSTOM_INSTRUCTIONS,
548
- retain_observations_async=os.getenv(
549
- ENV_RETAIN_OBSERVATIONS_ASYNC, str(DEFAULT_RETAIN_OBSERVATIONS_ASYNC)
550
- ).lower()
551
- == "true",
552
568
  # Observations settings (consolidated knowledge from facts)
553
569
  enable_observations=os.getenv(ENV_ENABLE_OBSERVATIONS, str(DEFAULT_ENABLE_OBSERVATIONS)).lower() == "true",
554
570
  consolidation_batch_size=int(
@@ -569,8 +585,11 @@ class HindsightConfig:
569
585
  worker_id=os.getenv(ENV_WORKER_ID) or DEFAULT_WORKER_ID,
570
586
  worker_poll_interval_ms=int(os.getenv(ENV_WORKER_POLL_INTERVAL_MS, str(DEFAULT_WORKER_POLL_INTERVAL_MS))),
571
587
  worker_max_retries=int(os.getenv(ENV_WORKER_MAX_RETRIES, str(DEFAULT_WORKER_MAX_RETRIES))),
572
- worker_batch_size=int(os.getenv(ENV_WORKER_BATCH_SIZE, str(DEFAULT_WORKER_BATCH_SIZE))),
573
588
  worker_http_port=int(os.getenv(ENV_WORKER_HTTP_PORT, str(DEFAULT_WORKER_HTTP_PORT))),
589
+ worker_max_slots=int(os.getenv(ENV_WORKER_MAX_SLOTS, str(DEFAULT_WORKER_MAX_SLOTS))),
590
+ worker_consolidation_max_slots=int(
591
+ os.getenv(ENV_WORKER_CONSOLIDATION_MAX_SLOTS, str(DEFAULT_WORKER_CONSOLIDATION_MAX_SLOTS))
592
+ ),
574
593
  # Reflect agent settings
575
594
  reflect_max_iterations=int(os.getenv(ENV_REFLECT_MAX_ITERATIONS, str(DEFAULT_REFLECT_MAX_ITERATIONS))),
576
595
  )
@@ -865,7 +865,14 @@ Focus on DURABLE knowledge that serves this mission, not ephemeral state.
865
865
  )
866
866
  # Parse JSON response - should be an array
867
867
  if isinstance(result, str):
868
- result = json.loads(result)
868
+ # Strip markdown code fences (some models wrap JSON in ```json ... ```)
869
+ clean = result.strip()
870
+ if clean.startswith("```"):
871
+ clean = clean.split("\n", 1)[1] if "\n" in clean else clean[3:]
872
+ if clean.endswith("```"):
873
+ clean = clean[:-3]
874
+ clean = clean.strip()
875
+ result = json.loads(clean)
869
876
  # Ensure result is a list
870
877
  if isinstance(result, list):
871
878
  return result
@@ -614,7 +614,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
614
614
  return
615
615
 
616
616
  try:
617
- from flashrank import Ranker # type: ignore[import-untyped]
617
+ from flashrank import Ranker
618
618
  except ImportError:
619
619
  raise ImportError("flashrank is required for FlashRankCrossEncoder. Install it with: pip install flashrank")
620
620
 
@@ -641,7 +641,7 @@ class FlashRankCrossEncoder(CrossEncoderModel):
641
641
 
642
642
  def _predict_sync(self, pairs: list[tuple[str, str]]) -> list[float]:
643
643
  """Synchronous predict - processes each query group."""
644
- from flashrank import RerankRequest # type: ignore[import-untyped]
644
+ from flashrank import RerankRequest
645
645
 
646
646
  if not pairs:
647
647
  return []
@@ -545,7 +545,7 @@ class CohereEmbeddings(Embeddings):
545
545
  model=self.model,
546
546
  input_type=self.input_type,
547
547
  )
548
- if response.embeddings:
548
+ if response.embeddings and isinstance(response.embeddings, list):
549
549
  self._dimension = len(response.embeddings[0])
550
550
 
551
551
  logger.info(f"Embeddings: Cohere provider initialized (model: {self.model}, dim: {self._dimension})")
@@ -442,49 +442,6 @@ class MemoryEngineInterface(ABC):
442
442
  """
443
443
  ...
444
444
 
445
- @abstractmethod
446
- async def get_entity_observations(
447
- self,
448
- bank_id: str,
449
- entity_id: str,
450
- *,
451
- limit: int = 10,
452
- request_context: "RequestContext",
453
- ) -> list[Any]:
454
- """
455
- Get observations for an entity.
456
-
457
- Args:
458
- bank_id: The memory bank ID.
459
- entity_id: The entity ID.
460
- limit: Maximum observations.
461
- request_context: Request context for authentication.
462
-
463
- Returns:
464
- List of EntityObservation objects.
465
- """
466
- ...
467
-
468
- @abstractmethod
469
- async def regenerate_entity_observations(
470
- self,
471
- bank_id: str,
472
- entity_id: str,
473
- entity_name: str,
474
- *,
475
- request_context: "RequestContext",
476
- ) -> None:
477
- """
478
- Regenerate observations for an entity.
479
-
480
- Args:
481
- bank_id: The memory bank ID.
482
- entity_id: The entity ID.
483
- entity_name: The entity's canonical name.
484
- request_context: Request context for authentication.
485
- """
486
- ...
487
-
488
445
  # =========================================================================
489
446
  # Statistics & Operations
490
447
  # =========================================================================
@@ -16,6 +16,15 @@ from google.genai import errors as genai_errors
16
16
  from google.genai import types as genai_types
17
17
  from openai import APIConnectionError, APIStatusError, AsyncOpenAI, LengthFinishReasonError
18
18
 
19
+ # Vertex AI imports (conditional)
20
+ try:
21
+ import google.auth
22
+ from google.oauth2 import service_account
23
+
24
+ VERTEXAI_AVAILABLE = True
25
+ except ImportError:
26
+ VERTEXAI_AVAILABLE = False
27
+
19
28
  from ..config import (
20
29
  DEFAULT_LLM_MAX_CONCURRENT,
21
30
  DEFAULT_LLM_TIMEOUT,
@@ -88,7 +97,7 @@ class LLMProvider:
88
97
  self.groq_service_tier = groq_service_tier or os.getenv(ENV_LLM_GROQ_SERVICE_TIER, "auto")
89
98
 
90
99
  # Validate provider
91
- valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "mock"]
100
+ valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio", "vertexai", "mock"]
92
101
  if self.provider not in valid_providers:
93
102
  raise ValueError(f"Invalid LLM provider: {self.provider}. Must be one of: {', '.join(valid_providers)}")
94
103
 
@@ -105,8 +114,51 @@ class LLMProvider:
105
114
  elif self.provider == "lmstudio":
106
115
  self.base_url = "http://localhost:1234/v1"
107
116
 
108
- # Validate API key (not needed for ollama, lmstudio, or mock)
109
- if self.provider not in ("ollama", "lmstudio", "mock") and not self.api_key:
117
+ # Vertex AI config stored for client creation below
118
+ self._vertexai_project_id: str | None = None
119
+ self._vertexai_region: str | None = None
120
+ self._vertexai_credentials: Any = None
121
+
122
+ if self.provider == "vertexai":
123
+ from ..config import get_config
124
+
125
+ config = get_config()
126
+
127
+ self._vertexai_project_id = config.llm_vertexai_project_id
128
+ if not self._vertexai_project_id:
129
+ raise ValueError(
130
+ "HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID is required for Vertex AI provider. "
131
+ "Set it to your GCP project ID."
132
+ )
133
+
134
+ self._vertexai_region = config.llm_vertexai_region or "us-central1"
135
+ service_account_key = config.llm_vertexai_service_account_key
136
+
137
+ # Load explicit service account credentials if provided
138
+ if service_account_key:
139
+ if not VERTEXAI_AVAILABLE:
140
+ raise ValueError(
141
+ "Vertex AI service account auth requires 'google-auth' package. "
142
+ "Install with: pip install google-auth"
143
+ )
144
+ self._vertexai_credentials = service_account.Credentials.from_service_account_file(
145
+ service_account_key,
146
+ scopes=["https://www.googleapis.com/auth/cloud-platform"],
147
+ )
148
+ logger.info(f"Vertex AI: Using service account key: {service_account_key}")
149
+
150
+ # Strip google/ prefix from model name — native SDK uses bare names
151
+ # e.g. "google/gemini-2.0-flash-lite-001" -> "gemini-2.0-flash-lite-001"
152
+ if self.model.startswith("google/"):
153
+ self.model = self.model[len("google/") :]
154
+
155
+ logger.info(
156
+ f"Vertex AI: project={self._vertexai_project_id}, region={self._vertexai_region}, "
157
+ f"model={self.model}, auth={'service_account' if service_account_key else 'ADC'}"
158
+ )
159
+
160
+ # Validate API key (not needed for ollama, lmstudio, vertexai, or mock)
161
+ if self.provider not in ("ollama", "lmstudio", "vertexai", "mock") and not self.api_key:
110
162
  raise ValueError(f"API key not found for {self.provider}")
111
163
 
112
164
  # Get timeout config (set HINDSIGHT_API_LLM_TIMEOUT for local LLMs that need longer timeouts)
@@ -132,6 +184,17 @@ class LLMProvider:
132
184
  if self.timeout:
133
185
  anthropic_kwargs["timeout"] = self.timeout
134
186
  self._anthropic_client = AsyncAnthropic(**anthropic_kwargs)
187
+ elif self.provider == "vertexai":
188
+ # Native genai SDK with Vertex AI — handles ADC automatically,
189
+ # or uses explicit service account credentials if provided
190
+ client_kwargs = {
191
+ "vertexai": True,
192
+ "project": self._vertexai_project_id,
193
+ "location": self._vertexai_region,
194
+ }
195
+ if self._vertexai_credentials is not None:
196
+ client_kwargs["credentials"] = self._vertexai_credentials
197
+ self._gemini_client = genai.Client(**client_kwargs)
135
198
  elif self.provider in ("ollama", "lmstudio"):
136
199
  # Use dummy key if not provided for local
137
200
  api_key = self.api_key or "local"
@@ -223,8 +286,8 @@ class LLMProvider:
223
286
  return_usage,
224
287
  )
225
288
 
226
- # Handle Gemini provider separately
227
- if self.provider == "gemini":
289
+ # Handle Gemini and Vertex AI providers (both use native genai SDK)
290
+ if self.provider in ("gemini", "vertexai"):
228
291
  return await self._call_gemini(
229
292
  messages,
230
293
  response_format,
@@ -342,11 +405,13 @@ class LLMProvider:
342
405
  schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
343
406
 
344
407
  if call_params["messages"] and call_params["messages"][0].get("role") == "system":
345
- call_params["messages"][0]["content"] += schema_msg
408
+ first_msg = call_params["messages"][0]
409
+ if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
410
+ first_msg["content"] += schema_msg
346
411
  elif call_params["messages"]:
347
- call_params["messages"][0]["content"] = (
348
- schema_msg + "\n\n" + call_params["messages"][0]["content"]
349
- )
412
+ first_msg = call_params["messages"][0]
413
+ if isinstance(first_msg, dict) and isinstance(first_msg.get("content"), str):
414
+ first_msg["content"] = schema_msg + "\n\n" + first_msg["content"]
350
415
  if self.provider not in ("lmstudio", "ollama"):
351
416
  # LM Studio and Ollama don't support json_object response format reliably
352
417
  # We rely on the schema in the system message instead
@@ -586,8 +651,8 @@ class LLMProvider:
586
651
  messages, tools, max_completion_tokens, max_retries, initial_backoff, max_backoff, start_time, scope
587
652
  )
588
653
 
589
- # Handle Gemini (convert to Gemini tool format)
590
- if self.provider == "gemini":
654
+ # Handle Gemini and Vertex AI (convert to Gemini tool format)
655
+ if self.provider in ("gemini", "vertexai"):
591
656
  return await self._call_with_tools_gemini(
592
657
  messages, tools, max_retries, initial_backoff, max_backoff, start_time, scope
593
658
  )
@@ -917,18 +982,20 @@ class LLMProvider:
917
982
  tool_calls: list[LLMToolCall] = []
918
983
 
919
984
  if response.candidates and response.candidates[0].content:
920
- for part in response.candidates[0].content.parts:
921
- if hasattr(part, "text") and part.text:
922
- content = part.text
923
- if hasattr(part, "function_call") and part.function_call:
924
- fc = part.function_call
925
- tool_calls.append(
926
- LLMToolCall(
927
- id=f"gemini_{len(tool_calls)}",
928
- name=fc.name,
929
- arguments=dict(fc.args) if fc.args else {},
985
+ parts = response.candidates[0].content.parts
986
+ if parts:
987
+ for part in parts:
988
+ if hasattr(part, "text") and part.text:
989
+ content = part.text
990
+ if hasattr(part, "function_call") and part.function_call:
991
+ fc = part.function_call
992
+ tool_calls.append(
993
+ LLMToolCall(
994
+ id=f"gemini_{len(tool_calls)}",
995
+ name=fc.name,
996
+ arguments=dict(fc.args) if fc.args else {},
997
+ )
930
998
  )
931
- )
932
999
 
933
1000
  finish_reason = "tool_calls" if tool_calls else "stop"
934
1001
 
@@ -1504,6 +1571,10 @@ class LLMProvider:
1504
1571
  """Clear the recorded mock calls."""
1505
1572
  self._mock_calls = []
1506
1573
 
1574
+ async def cleanup(self) -> None:
1575
+ """Clean up resources."""
1576
+ pass
1577
+
1507
1578
  @classmethod
1508
1579
  def for_memory(cls) -> "LLMProvider":
1509
1580
  """Create provider for memory operations from environment variables."""