hindsight-api 0.0.17__py3-none-any.whl → 0.0.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/api/__init__.py +2 -2
- hindsight_api/api/http.py +60 -60
- hindsight_api/api/mcp.py +1 -1
- hindsight_api/engine/llm_wrapper.py +140 -5
- hindsight_api/engine/memory_engine.py +33 -31
- hindsight_api/engine/response_models.py +6 -6
- hindsight_api/engine/retain/bank_utils.py +66 -66
- hindsight_api/engine/retain/fact_extraction.py +8 -8
- hindsight_api/engine/retain/fact_storage.py +1 -1
- hindsight_api/engine/retain/link_utils.py +112 -43
- hindsight_api/engine/retain/types.py +1 -1
- hindsight_api/engine/search/think_utils.py +20 -20
- hindsight_api/engine/search/trace.py +1 -1
- hindsight_api/models.py +5 -5
- {hindsight_api-0.0.17.dist-info → hindsight_api-0.0.20.dist-info}/METADATA +2 -1
- {hindsight_api-0.0.17.dist-info → hindsight_api-0.0.20.dist-info}/RECORD +18 -18
- {hindsight_api-0.0.17.dist-info → hindsight_api-0.0.20.dist-info}/WHEEL +0 -0
- {hindsight_api-0.0.17.dist-info → hindsight_api-0.0.20.dist-info}/entry_points.txt +0 -0
hindsight_api/api/__init__.py
CHANGED
|
@@ -87,7 +87,7 @@ from .http import (
|
|
|
87
87
|
ReflectRequest,
|
|
88
88
|
ReflectResponse,
|
|
89
89
|
CreateBankRequest,
|
|
90
|
-
|
|
90
|
+
DispositionTraits,
|
|
91
91
|
)
|
|
92
92
|
|
|
93
93
|
__all__ = [
|
|
@@ -100,5 +100,5 @@ __all__ = [
|
|
|
100
100
|
"ReflectRequest",
|
|
101
101
|
"ReflectResponse",
|
|
102
102
|
"CreateBankRequest",
|
|
103
|
-
"
|
|
103
|
+
"DispositionTraits",
|
|
104
104
|
]
|
hindsight_api/api/http.py
CHANGED
|
@@ -84,7 +84,7 @@ class RecallRequest(BaseModel):
|
|
|
84
84
|
model_config = ConfigDict(json_schema_extra={
|
|
85
85
|
"example": {
|
|
86
86
|
"query": "What did Alice say about machine learning?",
|
|
87
|
-
"types": ["world", "
|
|
87
|
+
"types": ["world", "experience"],
|
|
88
88
|
"budget": "mid",
|
|
89
89
|
"max_tokens": 4096,
|
|
90
90
|
"trace": True,
|
|
@@ -131,7 +131,7 @@ class RecallResult(BaseModel):
|
|
|
131
131
|
|
|
132
132
|
id: str
|
|
133
133
|
text: str
|
|
134
|
-
type: Optional[str] = None # fact type: world,
|
|
134
|
+
type: Optional[str] = None # fact type: world, experience, opinion, observation
|
|
135
135
|
entities: Optional[List[str]] = None # Entity names mentioned in this fact
|
|
136
136
|
context: Optional[str] = None
|
|
137
137
|
occurred_start: Optional[str] = None # ISO format date when the event started
|
|
@@ -397,7 +397,7 @@ class ReflectFact(BaseModel):
|
|
|
397
397
|
|
|
398
398
|
id: Optional[str] = None
|
|
399
399
|
text: str
|
|
400
|
-
type: Optional[str] = None # fact type: world,
|
|
400
|
+
type: Optional[str] = None # fact type: world, experience, opinion
|
|
401
401
|
context: Optional[str] = None
|
|
402
402
|
occurred_start: Optional[str] = None
|
|
403
403
|
occurred_end: Optional[str] = None
|
|
@@ -417,7 +417,7 @@ class ReflectResponse(BaseModel):
|
|
|
417
417
|
{
|
|
418
418
|
"id": "456",
|
|
419
419
|
"text": "I discussed AI applications last week",
|
|
420
|
-
"type": "
|
|
420
|
+
"type": "experience"
|
|
421
421
|
}
|
|
422
422
|
]
|
|
423
423
|
}
|
|
@@ -438,8 +438,8 @@ class BanksResponse(BaseModel):
|
|
|
438
438
|
banks: List[str]
|
|
439
439
|
|
|
440
440
|
|
|
441
|
-
class
|
|
442
|
-
"""
|
|
441
|
+
class DispositionTraits(BaseModel):
|
|
442
|
+
"""Disposition traits based on Big Five model."""
|
|
443
443
|
model_config = ConfigDict(json_schema_extra={
|
|
444
444
|
"example": {
|
|
445
445
|
"openness": 0.8,
|
|
@@ -456,7 +456,7 @@ class PersonalityTraits(BaseModel):
|
|
|
456
456
|
extraversion: float = Field(ge=0.0, le=1.0, description="Extraversion (0-1)")
|
|
457
457
|
agreeableness: float = Field(ge=0.0, le=1.0, description="Agreeableness (0-1)")
|
|
458
458
|
neuroticism: float = Field(ge=0.0, le=1.0, description="Neuroticism (0-1)")
|
|
459
|
-
bias_strength: float = Field(ge=0.0, le=1.0, description="How strongly
|
|
459
|
+
bias_strength: float = Field(ge=0.0, le=1.0, description="How strongly disposition influences opinions (0-1)")
|
|
460
460
|
|
|
461
461
|
|
|
462
462
|
class BankProfileResponse(BaseModel):
|
|
@@ -465,7 +465,7 @@ class BankProfileResponse(BaseModel):
|
|
|
465
465
|
"example": {
|
|
466
466
|
"bank_id": "user123",
|
|
467
467
|
"name": "Alice",
|
|
468
|
-
"
|
|
468
|
+
"disposition": {
|
|
469
469
|
"openness": 0.8,
|
|
470
470
|
"conscientiousness": 0.6,
|
|
471
471
|
"extraversion": 0.5,
|
|
@@ -479,13 +479,13 @@ class BankProfileResponse(BaseModel):
|
|
|
479
479
|
|
|
480
480
|
bank_id: str
|
|
481
481
|
name: str
|
|
482
|
-
|
|
482
|
+
disposition: DispositionTraits
|
|
483
483
|
background: str
|
|
484
484
|
|
|
485
485
|
|
|
486
|
-
class
|
|
487
|
-
"""Request model for updating
|
|
488
|
-
|
|
486
|
+
class UpdateDispositionRequest(BaseModel):
|
|
487
|
+
"""Request model for updating disposition traits."""
|
|
488
|
+
disposition: DispositionTraits
|
|
489
489
|
|
|
490
490
|
|
|
491
491
|
class AddBackgroundRequest(BaseModel):
|
|
@@ -493,14 +493,14 @@ class AddBackgroundRequest(BaseModel):
|
|
|
493
493
|
model_config = ConfigDict(json_schema_extra={
|
|
494
494
|
"example": {
|
|
495
495
|
"content": "I was born in Texas",
|
|
496
|
-
"
|
|
496
|
+
"update_disposition": True
|
|
497
497
|
}
|
|
498
498
|
})
|
|
499
499
|
|
|
500
500
|
content: str = Field(description="New background information to add or merge")
|
|
501
|
-
|
|
501
|
+
update_disposition: bool = Field(
|
|
502
502
|
default=True,
|
|
503
|
-
description="If true, infer Big Five
|
|
503
|
+
description="If true, infer Big Five disposition traits from the merged background (default: true)"
|
|
504
504
|
)
|
|
505
505
|
|
|
506
506
|
|
|
@@ -509,7 +509,7 @@ class BackgroundResponse(BaseModel):
|
|
|
509
509
|
model_config = ConfigDict(json_schema_extra={
|
|
510
510
|
"example": {
|
|
511
511
|
"background": "I was born in Texas. I am a software engineer with 10 years of experience.",
|
|
512
|
-
"
|
|
512
|
+
"disposition": {
|
|
513
513
|
"openness": 0.7,
|
|
514
514
|
"conscientiousness": 0.6,
|
|
515
515
|
"extraversion": 0.5,
|
|
@@ -521,14 +521,14 @@ class BackgroundResponse(BaseModel):
|
|
|
521
521
|
})
|
|
522
522
|
|
|
523
523
|
background: str
|
|
524
|
-
|
|
524
|
+
disposition: Optional[DispositionTraits] = None
|
|
525
525
|
|
|
526
526
|
|
|
527
527
|
class BankListItem(BaseModel):
|
|
528
528
|
"""Bank list item with profile summary."""
|
|
529
529
|
bank_id: str
|
|
530
530
|
name: str
|
|
531
|
-
|
|
531
|
+
disposition: DispositionTraits
|
|
532
532
|
background: str
|
|
533
533
|
created_at: Optional[str] = None
|
|
534
534
|
updated_at: Optional[str] = None
|
|
@@ -542,7 +542,7 @@ class BankListResponse(BaseModel):
|
|
|
542
542
|
{
|
|
543
543
|
"bank_id": "user123",
|
|
544
544
|
"name": "Alice",
|
|
545
|
-
"
|
|
545
|
+
"disposition": {
|
|
546
546
|
"openness": 0.5,
|
|
547
547
|
"conscientiousness": 0.5,
|
|
548
548
|
"extraversion": 0.5,
|
|
@@ -566,7 +566,7 @@ class CreateBankRequest(BaseModel):
|
|
|
566
566
|
model_config = ConfigDict(json_schema_extra={
|
|
567
567
|
"example": {
|
|
568
568
|
"name": "Alice",
|
|
569
|
-
"
|
|
569
|
+
"disposition": {
|
|
570
570
|
"openness": 0.8,
|
|
571
571
|
"conscientiousness": 0.6,
|
|
572
572
|
"extraversion": 0.5,
|
|
@@ -579,7 +579,7 @@ class CreateBankRequest(BaseModel):
|
|
|
579
579
|
})
|
|
580
580
|
|
|
581
581
|
name: Optional[str] = None
|
|
582
|
-
|
|
582
|
+
disposition: Optional[DispositionTraits] = None
|
|
583
583
|
background: Optional[str] = None
|
|
584
584
|
|
|
585
585
|
|
|
@@ -833,7 +833,7 @@ def _register_routes(app: FastAPI):
|
|
|
833
833
|
"/v1/default/banks/{bank_id}/graph",
|
|
834
834
|
response_model=GraphDataResponse,
|
|
835
835
|
summary="Get memory graph data",
|
|
836
|
-
description="Retrieve graph data for visualization, optionally filtered by type (world/
|
|
836
|
+
description="Retrieve graph data for visualization, optionally filtered by type (world/experience/opinion). Limited to 1000 most recent items.",
|
|
837
837
|
operation_id="get_graph"
|
|
838
838
|
)
|
|
839
839
|
async def api_graph(bank_id: str,
|
|
@@ -871,7 +871,7 @@ def _register_routes(app: FastAPI):
|
|
|
871
871
|
|
|
872
872
|
Args:
|
|
873
873
|
bank_id: Memory Bank ID (from path)
|
|
874
|
-
type: Filter by fact type (world,
|
|
874
|
+
type: Filter by fact type (world, experience, opinion)
|
|
875
875
|
q: Search query for full-text search (searches text and context)
|
|
876
876
|
limit: Maximum number of results (default: 100)
|
|
877
877
|
offset: Offset for pagination (default: 0)
|
|
@@ -901,7 +901,7 @@ def _register_routes(app: FastAPI):
|
|
|
901
901
|
|
|
902
902
|
The type parameter is optional and must be one of:
|
|
903
903
|
- 'world': General knowledge about people, places, events, and things that happen
|
|
904
|
-
- '
|
|
904
|
+
- 'experience': Memories about experience, conversations, actions taken, and tasks performed
|
|
905
905
|
- 'opinion': The bank's formed beliefs, perspectives, and viewpoints
|
|
906
906
|
|
|
907
907
|
Set include_entities=true to get entity observations alongside recall results.
|
|
@@ -914,10 +914,10 @@ def _register_routes(app: FastAPI):
|
|
|
914
914
|
|
|
915
915
|
try:
|
|
916
916
|
# Validate types
|
|
917
|
-
valid_fact_types = ["world", "
|
|
917
|
+
valid_fact_types = ["world", "experience", "opinion"]
|
|
918
918
|
|
|
919
|
-
# Default to world,
|
|
920
|
-
fact_types = request.types if request.types else ["world", "
|
|
919
|
+
# Default to world, experience, opinion if not specified (exclude observation by default)
|
|
920
|
+
fact_types = request.types if request.types else ["world", "experience", "opinion"]
|
|
921
921
|
for ft in fact_types:
|
|
922
922
|
if ft not in valid_fact_types:
|
|
923
923
|
raise HTTPException(
|
|
@@ -1026,7 +1026,7 @@ def _register_routes(app: FastAPI):
|
|
|
1026
1026
|
Reflect and formulate an answer using bank identity, world facts, and opinions.
|
|
1027
1027
|
|
|
1028
1028
|
This endpoint:
|
|
1029
|
-
1. Retrieves
|
|
1029
|
+
1. Retrieves experience (conversations and events)
|
|
1030
1030
|
2. Retrieves world facts relevant to the query
|
|
1031
1031
|
3. Retrieves existing opinions (bank's perspectives)
|
|
1032
1032
|
4. Uses LLM to formulate a contextual answer
|
|
@@ -1579,19 +1579,19 @@ This operation cannot be undone.
|
|
|
1579
1579
|
"/v1/default/banks/{bank_id}/profile",
|
|
1580
1580
|
response_model=BankProfileResponse,
|
|
1581
1581
|
summary="Get memory bank profile",
|
|
1582
|
-
description="Get
|
|
1582
|
+
description="Get disposition traits and background for a memory bank. Auto-creates agent with defaults if not exists.",
|
|
1583
1583
|
operation_id="get_bank_profile"
|
|
1584
1584
|
)
|
|
1585
1585
|
async def api_get_bank_profile(bank_id: str):
|
|
1586
|
-
"""Get memory bank profile (
|
|
1586
|
+
"""Get memory bank profile (disposition + background)."""
|
|
1587
1587
|
try:
|
|
1588
1588
|
profile = await app.state.memory.get_bank_profile(bank_id)
|
|
1589
|
-
# Convert
|
|
1590
|
-
|
|
1589
|
+
# Convert DispositionTraits object to dict for Pydantic
|
|
1590
|
+
disposition_dict = profile["disposition"].model_dump() if hasattr(profile["disposition"], 'model_dump') else dict(profile["disposition"])
|
|
1591
1591
|
return BankProfileResponse(
|
|
1592
1592
|
bank_id=bank_id,
|
|
1593
1593
|
name=profile["name"],
|
|
1594
|
-
|
|
1594
|
+
disposition=DispositionTraits(**disposition_dict),
|
|
1595
1595
|
background=profile["background"]
|
|
1596
1596
|
)
|
|
1597
1597
|
except Exception as e:
|
|
@@ -1604,28 +1604,28 @@ This operation cannot be undone.
|
|
|
1604
1604
|
@app.put(
|
|
1605
1605
|
"/v1/default/banks/{bank_id}/profile",
|
|
1606
1606
|
response_model=BankProfileResponse,
|
|
1607
|
-
summary="Update memory bank
|
|
1608
|
-
description="Update bank's Big Five
|
|
1609
|
-
operation_id="
|
|
1607
|
+
summary="Update memory bank disposition",
|
|
1608
|
+
description="Update bank's Big Five disposition traits and bias strength",
|
|
1609
|
+
operation_id="update_bank_disposition"
|
|
1610
1610
|
)
|
|
1611
|
-
async def
|
|
1612
|
-
request:
|
|
1611
|
+
async def api_update_bank_disposition(bank_id: str,
|
|
1612
|
+
request: UpdateDispositionRequest
|
|
1613
1613
|
):
|
|
1614
|
-
"""Update bank
|
|
1614
|
+
"""Update bank disposition traits."""
|
|
1615
1615
|
try:
|
|
1616
|
-
# Update
|
|
1617
|
-
await app.state.memory.
|
|
1616
|
+
# Update disposition
|
|
1617
|
+
await app.state.memory.update_bank_disposition(
|
|
1618
1618
|
bank_id,
|
|
1619
|
-
request.
|
|
1619
|
+
request.disposition.model_dump()
|
|
1620
1620
|
)
|
|
1621
1621
|
|
|
1622
1622
|
# Get updated profile
|
|
1623
1623
|
profile = await app.state.memory.get_bank_profile(bank_id)
|
|
1624
|
-
|
|
1624
|
+
disposition_dict = profile["disposition"].model_dump() if hasattr(profile["disposition"], 'model_dump') else dict(profile["disposition"])
|
|
1625
1625
|
return BankProfileResponse(
|
|
1626
1626
|
bank_id=bank_id,
|
|
1627
1627
|
name=profile["name"],
|
|
1628
|
-
|
|
1628
|
+
disposition=DispositionTraits(**disposition_dict),
|
|
1629
1629
|
background=profile["background"]
|
|
1630
1630
|
)
|
|
1631
1631
|
except Exception as e:
|
|
@@ -1639,23 +1639,23 @@ This operation cannot be undone.
|
|
|
1639
1639
|
"/v1/default/banks/{bank_id}/background",
|
|
1640
1640
|
response_model=BackgroundResponse,
|
|
1641
1641
|
summary="Add/merge memory bank background",
|
|
1642
|
-
description="Add new background information or merge with existing. LLM intelligently resolves conflicts, normalizes to first person, and optionally infers
|
|
1642
|
+
description="Add new background information or merge with existing. LLM intelligently resolves conflicts, normalizes to first person, and optionally infers disposition traits.",
|
|
1643
1643
|
operation_id="add_bank_background"
|
|
1644
1644
|
)
|
|
1645
1645
|
async def api_add_bank_background(bank_id: str,
|
|
1646
1646
|
request: AddBackgroundRequest
|
|
1647
1647
|
):
|
|
1648
|
-
"""Add or merge bank background information. Optionally infer
|
|
1648
|
+
"""Add or merge bank background information. Optionally infer disposition traits."""
|
|
1649
1649
|
try:
|
|
1650
1650
|
result = await app.state.memory.merge_bank_background(
|
|
1651
1651
|
bank_id,
|
|
1652
1652
|
request.content,
|
|
1653
|
-
|
|
1653
|
+
update_disposition=request.update_disposition
|
|
1654
1654
|
)
|
|
1655
1655
|
|
|
1656
1656
|
response = BackgroundResponse(background=result["background"])
|
|
1657
|
-
if "
|
|
1658
|
-
response.
|
|
1657
|
+
if "disposition" in result:
|
|
1658
|
+
response.disposition = DispositionTraits(**result["disposition"])
|
|
1659
1659
|
|
|
1660
1660
|
return response
|
|
1661
1661
|
except Exception as e:
|
|
@@ -1669,13 +1669,13 @@ This operation cannot be undone.
|
|
|
1669
1669
|
"/v1/default/banks/{bank_id}",
|
|
1670
1670
|
response_model=BankProfileResponse,
|
|
1671
1671
|
summary="Create or update memory bank",
|
|
1672
|
-
description="Create a new agent or update existing agent with
|
|
1672
|
+
description="Create a new agent or update existing agent with disposition and background. Auto-fills missing fields with defaults.",
|
|
1673
1673
|
operation_id="create_or_update_bank"
|
|
1674
1674
|
)
|
|
1675
1675
|
async def api_create_or_update_bank(bank_id: str,
|
|
1676
1676
|
request: CreateBankRequest
|
|
1677
1677
|
):
|
|
1678
|
-
"""Create or update an agent with
|
|
1678
|
+
"""Create or update an agent with disposition and background."""
|
|
1679
1679
|
try:
|
|
1680
1680
|
# Get existing profile or create with defaults
|
|
1681
1681
|
profile = await app.state.memory.get_bank_profile(bank_id)
|
|
@@ -1696,13 +1696,13 @@ This operation cannot be undone.
|
|
|
1696
1696
|
)
|
|
1697
1697
|
profile["name"] = request.name
|
|
1698
1698
|
|
|
1699
|
-
# Update
|
|
1700
|
-
if request.
|
|
1701
|
-
await app.state.memory.
|
|
1699
|
+
# Update disposition if provided
|
|
1700
|
+
if request.disposition is not None:
|
|
1701
|
+
await app.state.memory.update_bank_disposition(
|
|
1702
1702
|
bank_id,
|
|
1703
|
-
request.
|
|
1703
|
+
request.disposition.model_dump()
|
|
1704
1704
|
)
|
|
1705
|
-
profile["
|
|
1705
|
+
profile["disposition"] = request.disposition.model_dump()
|
|
1706
1706
|
|
|
1707
1707
|
# Update background if provided (replace, not merge)
|
|
1708
1708
|
if request.background is not None:
|
|
@@ -1722,11 +1722,11 @@ This operation cannot be undone.
|
|
|
1722
1722
|
|
|
1723
1723
|
# Get final profile
|
|
1724
1724
|
final_profile = await app.state.memory.get_bank_profile(bank_id)
|
|
1725
|
-
|
|
1725
|
+
disposition_dict = final_profile["disposition"].model_dump() if hasattr(final_profile["disposition"], 'model_dump') else dict(final_profile["disposition"])
|
|
1726
1726
|
return BankProfileResponse(
|
|
1727
1727
|
bank_id=bank_id,
|
|
1728
1728
|
name=final_profile["name"],
|
|
1729
|
-
|
|
1729
|
+
disposition=DispositionTraits(**disposition_dict),
|
|
1730
1730
|
background=final_profile["background"]
|
|
1731
1731
|
)
|
|
1732
1732
|
except Exception as e:
|
|
@@ -1852,11 +1852,11 @@ This operation cannot be undone.
|
|
|
1852
1852
|
"/v1/default/banks/{bank_id}/memories",
|
|
1853
1853
|
response_model=DeleteResponse,
|
|
1854
1854
|
summary="Clear memory bank memories",
|
|
1855
|
-
description="Delete memory units for a memory bank. Optionally filter by type (world,
|
|
1855
|
+
description="Delete memory units for a memory bank. Optionally filter by type (world, experience, opinion) to delete only specific types. This is a destructive operation that cannot be undone. The bank profile (personality and background) will be preserved.",
|
|
1856
1856
|
operation_id="clear_bank_memories"
|
|
1857
1857
|
)
|
|
1858
1858
|
async def api_clear_bank_memories(bank_id: str,
|
|
1859
|
-
type: Optional[str] = Query(None, description="Optional fact type filter (world,
|
|
1859
|
+
type: Optional[str] = Query(None, description="Optional fact type filter (world, experience, opinion)")
|
|
1860
1860
|
):
|
|
1861
1861
|
"""Clear memories for a memory bank, optionally filtered by type."""
|
|
1862
1862
|
try:
|
hindsight_api/api/mcp.py
CHANGED
|
@@ -6,6 +6,9 @@ import time
|
|
|
6
6
|
import asyncio
|
|
7
7
|
from typing import Optional, Any, Dict, List
|
|
8
8
|
from openai import AsyncOpenAI, RateLimitError, APIError, APIStatusError, LengthFinishReasonError
|
|
9
|
+
from google import genai
|
|
10
|
+
from google.genai import types as genai_types
|
|
11
|
+
from google.genai import errors as genai_errors
|
|
9
12
|
import logging
|
|
10
13
|
|
|
11
14
|
logger = logging.getLogger(__name__)
|
|
@@ -53,9 +56,9 @@ class LLMConfig:
|
|
|
53
56
|
self.model = model
|
|
54
57
|
|
|
55
58
|
# Validate provider
|
|
56
|
-
if self.provider not in ["openai", "groq", "ollama"]:
|
|
59
|
+
if self.provider not in ["openai", "groq", "ollama", "gemini"]:
|
|
57
60
|
raise ValueError(
|
|
58
|
-
f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', or '
|
|
61
|
+
f"Invalid LLM provider: {self.provider}. Must be 'openai', 'groq', 'ollama', or 'gemini'."
|
|
59
62
|
)
|
|
60
63
|
|
|
61
64
|
# Set default base URLs
|
|
@@ -66,19 +69,25 @@ class LLMConfig:
|
|
|
66
69
|
self.base_url = "http://localhost:11434/v1"
|
|
67
70
|
|
|
68
71
|
# Validate API key (not needed for ollama)
|
|
69
|
-
if self.provider
|
|
72
|
+
if self.provider not in ["ollama"] and not self.api_key:
|
|
70
73
|
raise ValueError(
|
|
71
74
|
f"API key not found for {self.provider}"
|
|
72
75
|
)
|
|
73
76
|
|
|
74
77
|
# Create client (private - use .call() method instead)
|
|
75
78
|
# Disable automatic retries - we handle retries in the call() method
|
|
76
|
-
if self.provider == "
|
|
79
|
+
if self.provider == "gemini":
|
|
80
|
+
self._gemini_client = genai.Client(api_key=self.api_key)
|
|
81
|
+
self._client = None # Not used for Gemini
|
|
82
|
+
elif self.provider == "ollama":
|
|
77
83
|
self._client = AsyncOpenAI(api_key="ollama", base_url=self.base_url, max_retries=0)
|
|
84
|
+
self._gemini_client = None
|
|
78
85
|
elif self.base_url:
|
|
79
86
|
self._client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url, max_retries=0)
|
|
87
|
+
self._gemini_client = None
|
|
80
88
|
else:
|
|
81
89
|
self._client = AsyncOpenAI(api_key=self.api_key, max_retries=0)
|
|
90
|
+
self._gemini_client = None
|
|
82
91
|
|
|
83
92
|
logger.info(
|
|
84
93
|
f"Initialized LLM: provider={self.provider}, model={self.model}, base_url={self.base_url}"
|
|
@@ -116,6 +125,11 @@ class LLMConfig:
|
|
|
116
125
|
# Use global semaphore to limit concurrent requests
|
|
117
126
|
async with _global_llm_semaphore:
|
|
118
127
|
start_time = time.time()
|
|
128
|
+
import json
|
|
129
|
+
|
|
130
|
+
# Handle Gemini provider separately
|
|
131
|
+
if self.provider == "gemini":
|
|
132
|
+
return await self._call_gemini(messages, response_format, max_retries, initial_backoff, max_backoff, skip_validation, start_time, **kwargs)
|
|
119
133
|
|
|
120
134
|
call_params = {
|
|
121
135
|
"model": self.model,
|
|
@@ -137,7 +151,6 @@ class LLMConfig:
|
|
|
137
151
|
if response_format is not None:
|
|
138
152
|
# Use JSON mode instead of strict parse for flexibility with optional fields
|
|
139
153
|
# This allows the LLM to omit optional fields without validation errors
|
|
140
|
-
import json
|
|
141
154
|
|
|
142
155
|
# Add schema to the system message
|
|
143
156
|
if hasattr(response_format, 'model_json_schema'):
|
|
@@ -215,6 +228,128 @@ class LLMConfig:
|
|
|
215
228
|
raise last_exception
|
|
216
229
|
raise RuntimeError(f"LLM call failed after all retries with no exception captured")
|
|
217
230
|
|
|
231
|
+
async def _call_gemini(
|
|
232
|
+
self,
|
|
233
|
+
messages: List[Dict[str, str]],
|
|
234
|
+
response_format: Optional[Any],
|
|
235
|
+
max_retries: int,
|
|
236
|
+
initial_backoff: float,
|
|
237
|
+
max_backoff: float,
|
|
238
|
+
skip_validation: bool,
|
|
239
|
+
start_time: float,
|
|
240
|
+
**kwargs
|
|
241
|
+
) -> Any:
|
|
242
|
+
"""Handle Gemini-specific API calls using google-genai SDK."""
|
|
243
|
+
import json
|
|
244
|
+
|
|
245
|
+
# Convert OpenAI-style messages to Gemini format
|
|
246
|
+
# Gemini uses 'user' and 'model' roles, and system instructions are separate
|
|
247
|
+
system_instruction = None
|
|
248
|
+
gemini_contents = []
|
|
249
|
+
|
|
250
|
+
for msg in messages:
|
|
251
|
+
role = msg.get('role', 'user')
|
|
252
|
+
content = msg.get('content', '')
|
|
253
|
+
|
|
254
|
+
if role == 'system':
|
|
255
|
+
# Accumulate system messages as system instruction
|
|
256
|
+
if system_instruction:
|
|
257
|
+
system_instruction += "\n\n" + content
|
|
258
|
+
else:
|
|
259
|
+
system_instruction = content
|
|
260
|
+
elif role == 'assistant':
|
|
261
|
+
gemini_contents.append(genai_types.Content(
|
|
262
|
+
role="model",
|
|
263
|
+
parts=[genai_types.Part(text=content)]
|
|
264
|
+
))
|
|
265
|
+
else: # user or any other role
|
|
266
|
+
gemini_contents.append(genai_types.Content(
|
|
267
|
+
role="user",
|
|
268
|
+
parts=[genai_types.Part(text=content)]
|
|
269
|
+
))
|
|
270
|
+
|
|
271
|
+
# Add JSON schema instruction if response_format is provided
|
|
272
|
+
if response_format is not None and hasattr(response_format, 'model_json_schema'):
|
|
273
|
+
schema = response_format.model_json_schema()
|
|
274
|
+
schema_msg = f"\n\nYou must respond with valid JSON matching this schema:\n{json.dumps(schema, indent=2)}"
|
|
275
|
+
if system_instruction:
|
|
276
|
+
system_instruction += schema_msg
|
|
277
|
+
else:
|
|
278
|
+
system_instruction = schema_msg
|
|
279
|
+
|
|
280
|
+
# Build generation config
|
|
281
|
+
config_kwargs = {}
|
|
282
|
+
if system_instruction:
|
|
283
|
+
config_kwargs['system_instruction'] = system_instruction
|
|
284
|
+
if 'temperature' in kwargs:
|
|
285
|
+
config_kwargs['temperature'] = kwargs['temperature']
|
|
286
|
+
if 'max_tokens' in kwargs:
|
|
287
|
+
config_kwargs['max_output_tokens'] = kwargs['max_tokens']
|
|
288
|
+
if response_format is not None:
|
|
289
|
+
config_kwargs['response_mime_type'] = 'application/json'
|
|
290
|
+
|
|
291
|
+
generation_config = genai_types.GenerateContentConfig(**config_kwargs) if config_kwargs else None
|
|
292
|
+
|
|
293
|
+
last_exception = None
|
|
294
|
+
|
|
295
|
+
for attempt in range(max_retries + 1):
|
|
296
|
+
try:
|
|
297
|
+
response = await self._gemini_client.aio.models.generate_content(
|
|
298
|
+
model=self.model,
|
|
299
|
+
contents=gemini_contents,
|
|
300
|
+
config=generation_config,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
content = response.text
|
|
304
|
+
|
|
305
|
+
if response_format is not None:
|
|
306
|
+
# Parse the JSON response
|
|
307
|
+
json_data = json.loads(content)
|
|
308
|
+
|
|
309
|
+
# Return raw JSON if skip_validation is True, otherwise validate with Pydantic
|
|
310
|
+
if skip_validation:
|
|
311
|
+
result = json_data
|
|
312
|
+
else:
|
|
313
|
+
result = response_format.model_validate(json_data)
|
|
314
|
+
else:
|
|
315
|
+
result = content
|
|
316
|
+
|
|
317
|
+
# Log call details only if it takes more than 10 seconds
|
|
318
|
+
duration = time.time() - start_time
|
|
319
|
+
if duration > 10.0 and hasattr(response, 'usage_metadata') and response.usage_metadata:
|
|
320
|
+
usage = response.usage_metadata
|
|
321
|
+
logger.info(
|
|
322
|
+
f"slow llm call: model={self.provider}/{self.model}, "
|
|
323
|
+
f"input_tokens={usage.prompt_token_count}, output_tokens={usage.candidates_token_count}, "
|
|
324
|
+
f"time={duration:.3f}s"
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
return result
|
|
328
|
+
|
|
329
|
+
except genai_errors.APIError as e:
|
|
330
|
+
# Handle rate limits and server errors with retry
|
|
331
|
+
if e.code in (429, 503, 500):
|
|
332
|
+
last_exception = e
|
|
333
|
+
if attempt < max_retries:
|
|
334
|
+
backoff = min(initial_backoff * (2 ** attempt), max_backoff)
|
|
335
|
+
jitter = backoff * 0.2 * (2 * (time.time() % 1) - 1)
|
|
336
|
+
sleep_time = backoff + jitter
|
|
337
|
+
await asyncio.sleep(sleep_time)
|
|
338
|
+
else:
|
|
339
|
+
logger.error(f"Gemini API error after {max_retries + 1} attempts: {str(e)}")
|
|
340
|
+
raise
|
|
341
|
+
else:
|
|
342
|
+
logger.error(f"Gemini API error: {type(e).__name__}: {str(e)}")
|
|
343
|
+
raise
|
|
344
|
+
|
|
345
|
+
except Exception as e:
|
|
346
|
+
logger.error(f"Unexpected error during Gemini call: {type(e).__name__}: {str(e)}")
|
|
347
|
+
raise
|
|
348
|
+
|
|
349
|
+
if last_exception:
|
|
350
|
+
raise last_exception
|
|
351
|
+
raise RuntimeError(f"Gemini call failed after all retries with no exception captured")
|
|
352
|
+
|
|
218
353
|
@classmethod
|
|
219
354
|
def for_memory(cls) -> "LLMConfig":
|
|
220
355
|
"""Create configuration for memory operations from environment variables."""
|