hindsight-api 0.0.17__py3-none-any.whl → 0.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -87,7 +87,7 @@ from .http import (
87
87
  ReflectRequest,
88
88
  ReflectResponse,
89
89
  CreateBankRequest,
90
- PersonalityTraits,
90
+ DispositionTraits,
91
91
  )
92
92
 
93
93
  __all__ = [
@@ -100,5 +100,5 @@ __all__ = [
100
100
  "ReflectRequest",
101
101
  "ReflectResponse",
102
102
  "CreateBankRequest",
103
- "PersonalityTraits",
103
+ "DispositionTraits",
104
104
  ]
hindsight_api/api/http.py CHANGED
@@ -84,7 +84,7 @@ class RecallRequest(BaseModel):
84
84
  model_config = ConfigDict(json_schema_extra={
85
85
  "example": {
86
86
  "query": "What did Alice say about machine learning?",
87
- "types": ["world", "bank"],
87
+ "types": ["world", "experience"],
88
88
  "budget": "mid",
89
89
  "max_tokens": 4096,
90
90
  "trace": True,
@@ -131,7 +131,7 @@ class RecallResult(BaseModel):
131
131
 
132
132
  id: str
133
133
  text: str
134
- type: Optional[str] = None # fact type: world, agent, opinion, observation
134
+ type: Optional[str] = None # fact type: world, experience, opinion, observation
135
135
  entities: Optional[List[str]] = None # Entity names mentioned in this fact
136
136
  context: Optional[str] = None
137
137
  occurred_start: Optional[str] = None # ISO format date when the event started
@@ -397,7 +397,7 @@ class ReflectFact(BaseModel):
397
397
 
398
398
  id: Optional[str] = None
399
399
  text: str
400
- type: Optional[str] = None # fact type: world, agent, opinion
400
+ type: Optional[str] = None # fact type: world, experience, opinion
401
401
  context: Optional[str] = None
402
402
  occurred_start: Optional[str] = None
403
403
  occurred_end: Optional[str] = None
@@ -417,7 +417,7 @@ class ReflectResponse(BaseModel):
417
417
  {
418
418
  "id": "456",
419
419
  "text": "I discussed AI applications last week",
420
- "type": "bank"
420
+ "type": "experience"
421
421
  }
422
422
  ]
423
423
  }
@@ -438,8 +438,8 @@ class BanksResponse(BaseModel):
438
438
  banks: List[str]
439
439
 
440
440
 
441
- class PersonalityTraits(BaseModel):
442
- """Personality traits based on Big Five model."""
441
+ class DispositionTraits(BaseModel):
442
+ """Disposition traits based on Big Five model."""
443
443
  model_config = ConfigDict(json_schema_extra={
444
444
  "example": {
445
445
  "openness": 0.8,
@@ -456,7 +456,7 @@ class PersonalityTraits(BaseModel):
456
456
  extraversion: float = Field(ge=0.0, le=1.0, description="Extraversion (0-1)")
457
457
  agreeableness: float = Field(ge=0.0, le=1.0, description="Agreeableness (0-1)")
458
458
  neuroticism: float = Field(ge=0.0, le=1.0, description="Neuroticism (0-1)")
459
- bias_strength: float = Field(ge=0.0, le=1.0, description="How strongly personality influences opinions (0-1)")
459
+ bias_strength: float = Field(ge=0.0, le=1.0, description="How strongly disposition influences opinions (0-1)")
460
460
 
461
461
 
462
462
  class BankProfileResponse(BaseModel):
@@ -465,7 +465,7 @@ class BankProfileResponse(BaseModel):
465
465
  "example": {
466
466
  "bank_id": "user123",
467
467
  "name": "Alice",
468
- "personality": {
468
+ "disposition": {
469
469
  "openness": 0.8,
470
470
  "conscientiousness": 0.6,
471
471
  "extraversion": 0.5,
@@ -479,13 +479,13 @@ class BankProfileResponse(BaseModel):
479
479
 
480
480
  bank_id: str
481
481
  name: str
482
- personality: PersonalityTraits
482
+ disposition: DispositionTraits
483
483
  background: str
484
484
 
485
485
 
486
- class UpdatePersonalityRequest(BaseModel):
487
- """Request model for updating personality traits."""
488
- personality: PersonalityTraits
486
+ class UpdateDispositionRequest(BaseModel):
487
+ """Request model for updating disposition traits."""
488
+ disposition: DispositionTraits
489
489
 
490
490
 
491
491
  class AddBackgroundRequest(BaseModel):
@@ -493,14 +493,14 @@ class AddBackgroundRequest(BaseModel):
493
493
  model_config = ConfigDict(json_schema_extra={
494
494
  "example": {
495
495
  "content": "I was born in Texas",
496
- "update_personality": True
496
+ "update_disposition": True
497
497
  }
498
498
  })
499
499
 
500
500
  content: str = Field(description="New background information to add or merge")
501
- update_personality: bool = Field(
501
+ update_disposition: bool = Field(
502
502
  default=True,
503
- description="If true, infer Big Five personality traits from the merged background (default: true)"
503
+ description="If true, infer Big Five disposition traits from the merged background (default: true)"
504
504
  )
505
505
 
506
506
 
@@ -509,7 +509,7 @@ class BackgroundResponse(BaseModel):
509
509
  model_config = ConfigDict(json_schema_extra={
510
510
  "example": {
511
511
  "background": "I was born in Texas. I am a software engineer with 10 years of experience.",
512
- "personality": {
512
+ "disposition": {
513
513
  "openness": 0.7,
514
514
  "conscientiousness": 0.6,
515
515
  "extraversion": 0.5,
@@ -521,14 +521,14 @@ class BackgroundResponse(BaseModel):
521
521
  })
522
522
 
523
523
  background: str
524
- personality: Optional[PersonalityTraits] = None
524
+ disposition: Optional[DispositionTraits] = None
525
525
 
526
526
 
527
527
  class BankListItem(BaseModel):
528
528
  """Bank list item with profile summary."""
529
529
  bank_id: str
530
530
  name: str
531
- personality: PersonalityTraits
531
+ disposition: DispositionTraits
532
532
  background: str
533
533
  created_at: Optional[str] = None
534
534
  updated_at: Optional[str] = None
@@ -542,7 +542,7 @@ class BankListResponse(BaseModel):
542
542
  {
543
543
  "bank_id": "user123",
544
544
  "name": "Alice",
545
- "personality": {
545
+ "disposition": {
546
546
  "openness": 0.5,
547
547
  "conscientiousness": 0.5,
548
548
  "extraversion": 0.5,
@@ -566,7 +566,7 @@ class CreateBankRequest(BaseModel):
566
566
  model_config = ConfigDict(json_schema_extra={
567
567
  "example": {
568
568
  "name": "Alice",
569
- "personality": {
569
+ "disposition": {
570
570
  "openness": 0.8,
571
571
  "conscientiousness": 0.6,
572
572
  "extraversion": 0.5,
@@ -579,7 +579,7 @@ class CreateBankRequest(BaseModel):
579
579
  })
580
580
 
581
581
  name: Optional[str] = None
582
- personality: Optional[PersonalityTraits] = None
582
+ disposition: Optional[DispositionTraits] = None
583
583
  background: Optional[str] = None
584
584
 
585
585
 
@@ -833,7 +833,7 @@ def _register_routes(app: FastAPI):
833
833
  "/v1/default/banks/{bank_id}/graph",
834
834
  response_model=GraphDataResponse,
835
835
  summary="Get memory graph data",
836
- description="Retrieve graph data for visualization, optionally filtered by type (world/agent/opinion). Limited to 1000 most recent items.",
836
+ description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion). Limited to 1000 most recent items.",
837
837
  operation_id="get_graph"
838
838
  )
839
839
  async def api_graph(bank_id: str,
@@ -871,7 +871,7 @@ def _register_routes(app: FastAPI):
871
871
 
872
872
  Args:
873
873
  bank_id: Memory Bank ID (from path)
874
- type: Filter by fact type (world, agent, opinion)
874
+ type: Filter by fact type (world, experience, opinion)
875
875
  q: Search query for full-text search (searches text and context)
876
876
  limit: Maximum number of results (default: 100)
877
877
  offset: Offset for pagination (default: 0)
@@ -901,7 +901,7 @@ def _register_routes(app: FastAPI):
901
901
 
902
902
  The type parameter is optional and must be one of:
903
903
  - 'world': General knowledge about people, places, events, and things that happen
904
- - 'bank': Memories about what the AI agent did, actions taken, and tasks performed
904
+ - 'experience': Memories about experience, conversations, actions taken, and tasks performed
905
905
  - 'opinion': The bank's formed beliefs, perspectives, and viewpoints
906
906
 
907
907
  Set include_entities=true to get entity observations alongside recall results.
@@ -914,10 +914,10 @@ def _register_routes(app: FastAPI):
914
914
 
915
915
  try:
916
916
  # Validate types
917
- valid_fact_types = ["world", "bank", "opinion"]
917
+ valid_fact_types = ["world", "experience", "opinion"]
918
918
 
919
- # Default to world, agent, opinion if not specified (exclude observation by default)
920
- fact_types = request.types if request.types else ["world", "bank", "opinion"]
919
+ # Default to world, experience, opinion if not specified (exclude observation by default)
920
+ fact_types = request.types if request.types else ["world", "experience", "opinion"]
921
921
  for ft in fact_types:
922
922
  if ft not in valid_fact_types:
923
923
  raise HTTPException(
@@ -1026,7 +1026,7 @@ def _register_routes(app: FastAPI):
1026
1026
  Reflect and formulate an answer using bank identity, world facts, and opinions.
1027
1027
 
1028
1028
  This endpoint:
1029
- 1. Retrieves agent facts (bank's identity)
1029
+ 1. Retrieves experience (conversations and events)
1030
1030
  2. Retrieves world facts relevant to the query
1031
1031
  3. Retrieves existing opinions (bank's perspectives)
1032
1032
  4. Uses LLM to formulate a contextual answer
@@ -1579,19 +1579,19 @@ This operation cannot be undone.
1579
1579
  "/v1/default/banks/{bank_id}/profile",
1580
1580
  response_model=BankProfileResponse,
1581
1581
  summary="Get memory bank profile",
1582
- description="Get personality traits and background for a memory bank. Auto-creates agent with defaults if not exists.",
1582
+ description="Get disposition traits and background for a memory bank. Auto-creates agent with defaults if not exists.",
1583
1583
  operation_id="get_bank_profile"
1584
1584
  )
1585
1585
  async def api_get_bank_profile(bank_id: str):
1586
- """Get memory bank profile (personality + background)."""
1586
+ """Get memory bank profile (disposition + background)."""
1587
1587
  try:
1588
1588
  profile = await app.state.memory.get_bank_profile(bank_id)
1589
- # Convert PersonalityTraits object to dict for Pydantic
1590
- personality_dict = profile["personality"].model_dump() if hasattr(profile["personality"], 'model_dump') else dict(profile["personality"])
1589
+ # Convert DispositionTraits object to dict for Pydantic
1590
+ disposition_dict = profile["disposition"].model_dump() if hasattr(profile["disposition"], 'model_dump') else dict(profile["disposition"])
1591
1591
  return BankProfileResponse(
1592
1592
  bank_id=bank_id,
1593
1593
  name=profile["name"],
1594
- personality=PersonalityTraits(**personality_dict),
1594
+ disposition=DispositionTraits(**disposition_dict),
1595
1595
  background=profile["background"]
1596
1596
  )
1597
1597
  except Exception as e:
@@ -1604,28 +1604,28 @@ This operation cannot be undone.
1604
1604
  @app.put(
1605
1605
  "/v1/default/banks/{bank_id}/profile",
1606
1606
  response_model=BankProfileResponse,
1607
- summary="Update memory bank personality",
1608
- description="Update bank's Big Five personality traits and bias strength",
1609
- operation_id="update_bank_personality"
1607
+ summary="Update memory bank disposition",
1608
+ description="Update bank's Big Five disposition traits and bias strength",
1609
+ operation_id="update_bank_disposition"
1610
1610
  )
1611
- async def api_update_bank_personality(bank_id: str,
1612
- request: UpdatePersonalityRequest
1611
+ async def api_update_bank_disposition(bank_id: str,
1612
+ request: UpdateDispositionRequest
1613
1613
  ):
1614
- """Update bank personality traits."""
1614
+ """Update bank disposition traits."""
1615
1615
  try:
1616
- # Update personality
1617
- await app.state.memory.update_bank_personality(
1616
+ # Update disposition
1617
+ await app.state.memory.update_bank_disposition(
1618
1618
  bank_id,
1619
- request.personality.model_dump()
1619
+ request.disposition.model_dump()
1620
1620
  )
1621
1621
 
1622
1622
  # Get updated profile
1623
1623
  profile = await app.state.memory.get_bank_profile(bank_id)
1624
- personality_dict = profile["personality"].model_dump() if hasattr(profile["personality"], 'model_dump') else dict(profile["personality"])
1624
+ disposition_dict = profile["disposition"].model_dump() if hasattr(profile["disposition"], 'model_dump') else dict(profile["disposition"])
1625
1625
  return BankProfileResponse(
1626
1626
  bank_id=bank_id,
1627
1627
  name=profile["name"],
1628
- personality=PersonalityTraits(**personality_dict),
1628
+ disposition=DispositionTraits(**disposition_dict),
1629
1629
  background=profile["background"]
1630
1630
  )
1631
1631
  except Exception as e:
@@ -1639,23 +1639,23 @@ This operation cannot be undone.
1639
1639
  "/v1/default/banks/{bank_id}/background",
1640
1640
  response_model=BackgroundResponse,
1641
1641
  summary="Add/merge memory bank background",
1642
- description="Add new background information or merge with existing. LLM intelligently resolves conflicts, normalizes to first person, and optionally infers personality traits.",
1642
+ description="Add new background information or merge with existing. LLM intelligently resolves conflicts, normalizes to first person, and optionally infers disposition traits.",
1643
1643
  operation_id="add_bank_background"
1644
1644
  )
1645
1645
  async def api_add_bank_background(bank_id: str,
1646
1646
  request: AddBackgroundRequest
1647
1647
  ):
1648
- """Add or merge bank background information. Optionally infer personality traits."""
1648
+ """Add or merge bank background information. Optionally infer disposition traits."""
1649
1649
  try:
1650
1650
  result = await app.state.memory.merge_bank_background(
1651
1651
  bank_id,
1652
1652
  request.content,
1653
- update_personality=request.update_personality
1653
+ update_disposition=request.update_disposition
1654
1654
  )
1655
1655
 
1656
1656
  response = BackgroundResponse(background=result["background"])
1657
- if "personality" in result:
1658
- response.personality = PersonalityTraits(**result["personality"])
1657
+ if "disposition" in result:
1658
+ response.disposition = DispositionTraits(**result["disposition"])
1659
1659
 
1660
1660
  return response
1661
1661
  except Exception as e:
@@ -1669,13 +1669,13 @@ This operation cannot be undone.
1669
1669
  "/v1/default/banks/{bank_id}",
1670
1670
  response_model=BankProfileResponse,
1671
1671
  summary="Create or update memory bank",
1672
- description="Create a new agent or update existing agent with personality and background. Auto-fills missing fields with defaults.",
1672
+ description="Create a new agent or update existing agent with disposition and background. Auto-fills missing fields with defaults.",
1673
1673
  operation_id="create_or_update_bank"
1674
1674
  )
1675
1675
  async def api_create_or_update_bank(bank_id: str,
1676
1676
  request: CreateBankRequest
1677
1677
  ):
1678
- """Create or update an agent with personality and background."""
1678
+ """Create or update an agent with disposition and background."""
1679
1679
  try:
1680
1680
  # Get existing profile or create with defaults
1681
1681
  profile = await app.state.memory.get_bank_profile(bank_id)
@@ -1696,13 +1696,13 @@ This operation cannot be undone.
1696
1696
  )
1697
1697
  profile["name"] = request.name
1698
1698
 
1699
- # Update personality if provided
1700
- if request.personality is not None:
1701
- await app.state.memory.update_bank_personality(
1699
+ # Update disposition if provided
1700
+ if request.disposition is not None:
1701
+ await app.state.memory.update_bank_disposition(
1702
1702
  bank_id,
1703
- request.personality.model_dump()
1703
+ request.disposition.model_dump()
1704
1704
  )
1705
- profile["personality"] = request.personality.model_dump()
1705
+ profile["disposition"] = request.disposition.model_dump()
1706
1706
 
1707
1707
  # Update background if provided (replace, not merge)
1708
1708
  if request.background is not None:
@@ -1722,11 +1722,11 @@ This operation cannot be undone.
1722
1722
 
1723
1723
  # Get final profile
1724
1724
  final_profile = await app.state.memory.get_bank_profile(bank_id)
1725
- personality_dict = final_profile["personality"].model_dump() if hasattr(final_profile["personality"], 'model_dump') else dict(final_profile["personality"])
1725
+ disposition_dict = final_profile["disposition"].model_dump() if hasattr(final_profile["disposition"], 'model_dump') else dict(final_profile["disposition"])
1726
1726
  return BankProfileResponse(
1727
1727
  bank_id=bank_id,
1728
1728
  name=final_profile["name"],
1729
- personality=PersonalityTraits(**personality_dict),
1729
+ disposition=DispositionTraits(**disposition_dict),
1730
1730
  background=final_profile["background"]
1731
1731
  )
1732
1732
  except Exception as e:
@@ -1852,11 +1852,11 @@ This operation cannot be undone.
1852
1852
  "/v1/default/banks/{bank_id}/memories",
1853
1853
  response_model=DeleteResponse,
1854
1854
  summary="Clear memory bank memories",
1855
- description="Delete memory units for a memory bank. Optionally filter by type (world, agent, opinion) to delete only specific types. This is a destructive operation that cannot be undone. The bank profile (personality and background) will be preserved.",
1855
+ description="Delete memory units for a memory bank. Optionally filter by type (world, experience, opinion) to delete only specific types. This is a destructive operation that cannot be undone. The bank profile (personality and background) will be preserved.",
1856
1856
  operation_id="clear_bank_memories"
1857
1857
  )
1858
1858
  async def api_clear_bank_memories(bank_id: str,
1859
- type: Optional[str] = Query(None, description="Optional fact type filter (world, agent, opinion)")
1859
+ type: Optional[str] = Query(None, description="Optional fact type filter (world, experience, opinion)")
1860
1860
  ):
1861
1861
  """Clear memories for a memory bank, optionally filtered by type."""
1862
1862
  try:
hindsight_api/api/mcp.py CHANGED
@@ -90,7 +90,7 @@ def create_mcp_server(memory: MemoryEngine) -> FastMCP:
90
90
  search_result = await memory.recall_async(
91
91
  bank_id=bank_id,
92
92
  query=query,
93
- fact_type=["world", "bank", "opinion"],
93
+ fact_type=["world", "experience", "opinion"],
94
94
  budget=Budget.LOW
95
95
  )
96
96
 
@@ -6,6 +6,9 @@ import time
6
6
  import asyncio
7
7
  from typing import Optional, Any, Dict, List
8
8
  from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, LengthFinishReasonError
9
+ from google import genai
10
+ from google.genai import types as genai_types
11
+ from google.genai import errors as genai_errors
9
12
  import logging
10
13
 
11
14
  logger = logging.getLogger(__name__)
@@ -53,9 +56,9 @@ class LLMConfig:
53
56
  self.model = model
54
57
 
55
58
  # Validate provider
56
- if self.provider not in ["openai", "groq", "ollama"]:
59
+ if self.provider not in ["openai", "groq", "ollama", "gemini"]:
57
60
  raise ValueError(
58
- f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', or 'ollama'."
61
+ f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', 'ollama', or 'gemini'."
59
62
  )
60
63
 
61
64
  # Set default base URLs
@@ -66,19 +69,25 @@ class LLMConfig:
66
69
  self.base_url = "http://localhost:11434/v1"
67
70
 
68
71
  # Validate API key (not needed for ollama)
69
- if self.provider != "ollama" and not self.api_key:
72
+ if self.provider not in ["ollama"] and not self.api_key:
70
73
  raise ValueError(
71
74
  f"API key not found for {self.provider}"
72
75
  )
73
76
 
74
77
  # Create client (private - use .call() method instead)
75
78
  # Disable automatic retries - we handle retries in the call() method
76
- if self.provider == "ollama":
79
+ if self.provider == "gemini":
80
+ self._gemini_client = genai.Client(api_key=self.api_key)
81
+ self._client = None # Not used for Gemini
82
+ elif self.provider == "ollama":
77
83
  self._client = AsyncOpenAI(api_key="ollama", base_url=self.base_url, max_retries=0)
84
+ self._gemini_client = None
78
85
  elif self.base_url:
79
86
  self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
87
+ self._gemini_client = None
80
88
  else:
81
89
  self._client = AsyncOpenAI(api_key=self.api_key, max_retries=0)
90
+ self._gemini_client = None
82
91
 
83
92
  logger.info(
84
93
  f"Initialized LLM: provider={self.provider}, model={self.model}, base_url={self.base_url}"
@@ -116,6 +125,11 @@ class LLMConfig:
116
125
  # Use global semaphore to limit concurrent requests
117
126
  async with _global_llm_semaphore:
118
127
  start_time = time.time()
128
+ import json
129
+
130
+ # Handle Gemini provider separately
131
+ if self.provider == "gemini":
132
+ return await self._call_gemini(messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time, **kwargs)
119
133
 
120
134
  call_params = {
121
135
  "model": self.model,
@@ -137,7 +151,6 @@ class LLMConfig:
137
151
  if response_format is not None:
138
152
  # Use JSON mode instead of strict parse for flexibility with optional fields
139
153
  # This allows the LLM to omit optional fields without validation errors
140
- import json
141
154
 
142
155
  # Add schema to the system message
143
156
  if hasattr(response_format, 'model_json_schema'):
@@ -215,6 +228,128 @@ class LLMConfig:
215
228
  raise last_exception
216
229
  raise RuntimeError(f"LLM call failed after all retries with no exception captured")
217
230
 
231
+ async def _call_gemini(
232
+ self,
233
+ messages: List[Dict[str, str]],
234
+ response_format: Optional[Any],
235
+ max_retries: int,
236
+ initial_backoff: float,
237
+ max_backoff: float,
238
+ skip_validation: bool,
239
+ start_time: float,
240
+ **kwargs
241
+ ) -> Any:
242
+ """Handle Gemini-specific API calls using google-genai SDK."""
243
+ import json
244
+
245
+ # Convert OpenAI-style messages to Gemini format
246
+ # Gemini uses 'user' and 'model' roles, and system instructions are separate
247
+ system_instruction = None
248
+ gemini_contents = []
249
+
250
+ for msg in messages:
251
+ role = msg.get('role', 'user')
252
+ content = msg.get('content', '')
253
+
254
+ if role == 'system':
255
+ # Accumulate system messages as system instruction
256
+ if system_instruction:
257
+ system_instruction += "\n\n" + content
258
+ else:
259
+ system_instruction = content
260
+ elif role == 'assistant':
261
+ gemini_contents.append(genai_types.Content(
262
+ role="model",
263
+ parts=[genai_types.Part(text=content)]
264
+ ))
265
+ else: # user or any other role
266
+ gemini_contents.append(genai_types.Content(
267
+ role="user",
268
+ parts=[genai_types.Part(text=content)]
269
+ ))
270
+
271
+ # Add JSON schema instruction if response_format is provided
272
+ if response_format is not None and hasattr(response_format, 'model_json_schema'):
273
+ schema = response_format.model_json_schema()
274
+ schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
275
+ if system_instruction:
276
+ system_instruction += schema_msg
277
+ else:
278
+ system_instruction = schema_msg
279
+
280
+ # Build generation config
281
+ config_kwargs = {}
282
+ if system_instruction:
283
+ config_kwargs['system_instruction'] = system_instruction
284
+ if 'temperature' in kwargs:
285
+ config_kwargs['temperature'] = kwargs['temperature']
286
+ if 'max_tokens' in kwargs:
287
+ config_kwargs['max_output_tokens'] = kwargs['max_tokens']
288
+ if response_format is not None:
289
+ config_kwargs['response_mime_type'] = 'application/json'
290
+
291
+ generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
292
+
293
+ last_exception = None
294
+
295
+ for attempt in range(max_retries + 1):
296
+ try:
297
+ response = await self._gemini_client.aio.models.generate_content(
298
+ model=self.model,
299
+ contents=gemini_contents,
300
+ config=generation_config,
301
+ )
302
+
303
+ content = response.text
304
+
305
+ if response_format is not None:
306
+ # Parse the JSON response
307
+ json_data = json.loads(content)
308
+
309
+ # Return raw JSON if skip_validation is True, otherwise validate with Pydantic
310
+ if skip_validation:
311
+ result = json_data
312
+ else:
313
+ result = response_format.model_validate(json_data)
314
+ else:
315
+ result = content
316
+
317
+ # Log call details only if it takes more than 10 seconds
318
+ duration = time.time() - start_time
319
+ if duration > 10.0 and hasattr(response, 'usage_metadata') and response.usage_metadata:
320
+ usage = response.usage_metadata
321
+ logger.info(
322
+ f"slow llm call: model={self.provider}/{self.model}, "
323
+ f"input_tokens={usage.prompt_token_count}, output_tokens={usage.candidates_token_count}, "
324
+ f"time={duration:.3f}s"
325
+ )
326
+
327
+ return result
328
+
329
+ except genai_errors.APIError as e:
330
+ # Handle rate limits and server errors with retry
331
+ if e.code in (429, 503, 500):
332
+ last_exception = e
333
+ if attempt < max_retries:
334
+ backoff = min(initial_backoff * (2 ** attempt), max_backoff)
335
+ jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
336
+ sleep_time = backoff + jitter
337
+ await asyncio.sleep(sleep_time)
338
+ else:
339
+ logger.error(f"Gemini API error after {max_retries + 1} attempts: {str(e)}")
340
+ raise
341
+ else:
342
+ logger.error(f"Gemini API error: {type(e).__name__}: {str(e)}")
343
+ raise
344
+
345
+ except Exception as e:
346
+ logger.error(f"Unexpected error during Gemini call: {type(e).__name__}: {str(e)}")
347
+ raise
348
+
349
+ if last_exception:
350
+ raise last_exception
351
+ raise RuntimeError(f"Gemini call failed after all retries with no exception captured")
352
+
218
353
  @classmethod
219
354
  def for_memory(cls) -> "LLMConfig":
220
355
  """Create configuration for memory operations from environment variables."""