PyPI - graphiti-core - Versions diffs - 0.20.4__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

graphiti-core 0.20.4py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graphiti-core might be problematic. Click here for more details.

Files changed (39) hide show

graphiti_core/driver/driver.py +28 -0
graphiti_core/driver/falkordb_driver.py +112 -0
graphiti_core/driver/kuzu_driver.py +1 -0
graphiti_core/driver/neo4j_driver.py +10 -2
graphiti_core/driver/neptune_driver.py +4 -6
graphiti_core/edges.py +67 -7
graphiti_core/embedder/client.py +2 -1
graphiti_core/graph_queries.py +35 -6
graphiti_core/graphiti.py +27 -23
graphiti_core/graphiti_types.py +0 -1
graphiti_core/helpers.py +2 -2
graphiti_core/llm_client/client.py +19 -4
graphiti_core/llm_client/gemini_client.py +4 -2
graphiti_core/llm_client/openai_base_client.py +3 -2
graphiti_core/llm_client/openai_generic_client.py +3 -2
graphiti_core/models/edges/edge_db_queries.py +36 -16
graphiti_core/models/nodes/node_db_queries.py +30 -10
graphiti_core/nodes.py +126 -25
graphiti_core/prompts/dedupe_edges.py +40 -29
graphiti_core/prompts/dedupe_nodes.py +51 -34
graphiti_core/prompts/eval.py +3 -3
graphiti_core/prompts/extract_edges.py +17 -9
graphiti_core/prompts/extract_nodes.py +10 -9
graphiti_core/prompts/prompt_helpers.py +3 -3
graphiti_core/prompts/summarize_nodes.py +5 -5
graphiti_core/search/search_filters.py +53 -0
graphiti_core/search/search_helpers.py +5 -7
graphiti_core/search/search_utils.py +227 -57
graphiti_core/utils/bulk_utils.py +168 -69
graphiti_core/utils/maintenance/community_operations.py +8 -20
graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
graphiti_core/utils/maintenance/edge_operations.py +187 -50
graphiti_core/utils/maintenance/graph_data_operations.py +9 -5
graphiti_core/utils/maintenance/node_operations.py +244 -88
graphiti_core/utils/maintenance/temporal_operations.py +0 -4
{graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/METADATA +7 -1
{graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/RECORD +39 -38
{graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/WHEEL +0 -0
{graphiti_core-0.20.4.dist-info → graphiti_core-0.21.0.dist-info}/licenses/LICENSE +0 -0

graphiti_core/driver/driver.py CHANGED Viewed

@@ -16,13 +16,25 @@ limitations under the License.
 import copy
 import logging
+import os
 from abc import ABC, abstractmethod
 from collections.abc import Coroutine
 from enum import Enum
 from typing import Any
+from dotenv import load_dotenv
 logger = logging.getLogger(__name__)
+DEFAULT_SIZE = 10
+load_dotenv()
+ENTITY_INDEX_NAME = os.environ.get('ENTITY_INDEX_NAME', 'entities')
+EPISODE_INDEX_NAME = os.environ.get('EPISODE_INDEX_NAME', 'episodes')
+COMMUNITY_INDEX_NAME = os.environ.get('COMMUNITY_INDEX_NAME', 'communities')
+ENTITY_EDGE_INDEX_NAME = os.environ.get('ENTITY_EDGE_INDEX_NAME', 'entity_edges')
 class GraphProvider(Enum):
     NEO4J = 'neo4j'
@@ -61,6 +73,7 @@ class GraphDriver(ABC):
         ''  # Neo4j (default) syntax does not require a prefix for fulltext queries
     )
     _database: str
+    aoss_client: Any  # type: ignore
     @abstractmethod
     def execute_query(self, cypher_query_: str, **kwargs: Any) -> Coroutine:
@@ -87,3 +100,18 @@ class GraphDriver(ABC):
         cloned._database = database
         return cloned
+    def build_fulltext_query(
+        self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128
+    ) -> str:
+        """
+        Specific fulltext query builder for database providers.
+        Only implemented by providers that need custom fulltext query building.
+        """
+        raise NotImplementedError(f'build_fulltext_query not implemented for {self.provider}')
+    async def save_to_aoss(self, name: str, data: list[dict]) -> int:
+        return 0
+    async def clear_aoss_indices(self):
+        return 1

graphiti_core/driver/falkordb_driver.py CHANGED Viewed

@@ -36,6 +36,42 @@ from graphiti_core.utils.datetime_utils import convert_datetimes_to_strings
 logger = logging.getLogger(__name__)
+STOPWORDS = [
+    'a',
+    'is',
+    'the',
+    'an',
+    'and',
+    'are',
+    'as',
+    'at',
+    'be',
+    'but',
+    'by',
+    'for',
+    'if',
+    'in',
+    'into',
+    'it',
+    'no',
+    'not',
+    'of',
+    'on',
+    'or',
+    'such',
+    'that',
+    'their',
+    'then',
+    'there',
+    'these',
+    'they',
+    'this',
+    'to',
+    'was',
+    'will',
+    'with',
+]
 class FalkorDriverSession(GraphDriverSession):
     provider = GraphProvider.FALKORDB
@@ -74,6 +110,7 @@ class FalkorDriverSession(GraphDriverSession):
 class FalkorDriver(GraphDriver):
     provider = GraphProvider.FALKORDB
+    aoss_client: None = None
     def __init__(
         self,
@@ -166,3 +203,78 @@ class FalkorDriver(GraphDriver):
         cloned = FalkorDriver(falkor_db=self.client, database=database)
         return cloned
+    def sanitize(self, query: str) -> str:
+        """
+        Replace FalkorDB special characters with whitespace.
+        Based on FalkorDB tokenization rules: ,.<>{}[]"':;!@#$%^&*()-+=~
+        """
+        # FalkorDB separator characters that break text into tokens
+        separator_map = str.maketrans(
+            {
+                ',': ' ',
+                '.': ' ',
+                '<': ' ',
+                '>': ' ',
+                '{': ' ',
+                '}': ' ',
+                '[': ' ',
+                ']': ' ',
+                '"': ' ',
+                "'": ' ',
+                ':': ' ',
+                ';': ' ',
+                '!': ' ',
+                '@': ' ',
+                '#': ' ',
+                '$': ' ',
+                '%': ' ',
+                '^': ' ',
+                '&': ' ',
+                '*': ' ',
+                '(': ' ',
+                ')': ' ',
+                '-': ' ',
+                '+': ' ',
+                '=': ' ',
+                '~': ' ',
+                '?': ' ',
+            }
+        )
+        sanitized = query.translate(separator_map)
+        # Clean up multiple spaces
+        sanitized = ' '.join(sanitized.split())
+        return sanitized
+    def build_fulltext_query(
+        self, query: str, group_ids: list[str] | None = None, max_query_length: int = 128
+    ) -> str:
+        """
+        Build a fulltext query string for FalkorDB using RedisSearch syntax.
+        FalkorDB uses RedisSearch-like syntax where:
+        - Field queries use @ prefix: @field:value
+        - Multiple values for same field: (@field:value1|value2)
+        - Text search doesn't need @ prefix for content fields
+        - AND is implicit with space: (@group_id:value) (text)
+        - OR uses pipe within parentheses: (@group_id:value1|value2)
+        """
+        if group_ids is None or len(group_ids) == 0:
+            group_filter = ''
+        else:
+            group_values = '|'.join(group_ids)
+            group_filter = f'(@group_id:{group_values})'
+        sanitized_query = self.sanitize(query)
+        # Remove stopwords from the sanitized query
+        query_words = sanitized_query.split()
+        filtered_words = [word for word in query_words if word.lower() not in STOPWORDS]
+        sanitized_query = ' | '.join(filtered_words)
+        # If the query is too long return no query
+        if len(sanitized_query.split(' ')) + len(group_ids or '') >= max_query_length:
+            return ''
+        full_query = group_filter + ' (' + sanitized_query + ')'
+        return full_query

graphiti_core/driver/kuzu_driver.py CHANGED Viewed

@@ -92,6 +92,7 @@ SCHEMA_QUERIES = """
 class KuzuDriver(GraphDriver):
     provider: GraphProvider = GraphProvider.KUZU
+    aoss_client: None = None
     def __init__(
         self,

graphiti_core/driver/neo4j_driver.py CHANGED Viewed

@@ -29,7 +29,13 @@ logger = logging.getLogger(__name__)
 class Neo4jDriver(GraphDriver):
     provider = GraphProvider.NEO4J
-    def __init__(self, uri: str, user: str | None, password: str | None, database: str = 'neo4j'):
+    def __init__(
+        self,
+        uri: str,
+        user: str | None,
+        password: str | None,
+        database: str = 'neo4j',
+    ):
         super().__init__()
         self.client = AsyncGraphDatabase.driver(
             uri=uri,
@@ -37,6 +43,8 @@ class Neo4jDriver(GraphDriver):
         )
         self._database = database
+        self.aoss_client = None
     async def execute_query(self, cypher_query_: LiteralString, **kwargs: Any) -> EagerResult:
         # Check if database_ is provided in kwargs.
         # If not populated, set the value to retain backwards compatibility
@@ -60,7 +68,7 @@ class Neo4jDriver(GraphDriver):
     async def close(self) -> None:
         return await self.client.close()
-    def delete_all_indexes(self) -> Coroutine[Any, Any, EagerResult]:
+    def delete_all_indexes(self) -> Coroutine:
         return self.client.execute_query(
             'CALL db.indexes() YIELD name DROP INDEX name',
         )

graphiti_core/driver/neptune_driver.py CHANGED Viewed

@@ -257,15 +257,13 @@ class NeptuneDriver(GraphDriver):
             if name.lower() == index['index_name']:
                 to_index = []
                 for d in data:
-                    item = {'_index': name}
+                    item = {'_index': name, '_id': d['uuid']}
                     for p in index['body']['mappings']['properties']:
-                        item[p] = d[p]
+                        if p in d:
+                            item[p] = d[p]
                     to_index.append(item)
                 success, failed = helpers.bulk(self.aoss_client, to_index, stats_only=True)
-                if failed > 0:
-                    return success
-                else:
-                    return 0
+                return success
         return 0

graphiti_core/edges.py CHANGED Viewed

@@ -25,7 +25,7 @@ from uuid import uuid4
 from pydantic import BaseModel, Field
 from typing_extensions import LiteralString
-from graphiti_core.driver.driver import GraphDriver, GraphProvider
+from graphiti_core.driver.driver import ENTITY_EDGE_INDEX_NAME, GraphDriver, GraphProvider
 from graphiti_core.embedder import EmbedderClient
 from graphiti_core.errors import EdgeNotFoundError, GroupsEdgesNotFoundError
 from graphiti_core.helpers import parse_db_date
@@ -77,6 +77,13 @@ class Edge(BaseModel, ABC):
                 uuid=self.uuid,
             )
+            if driver.aoss_client:
+                await driver.aoss_client.delete(
+                    index=ENTITY_EDGE_INDEX_NAME,
+                    id=self.uuid,
+                    params={'routing': self.group_id},
+                )
         logger.debug(f'Deleted Edge: {self.uuid}')
     @classmethod
@@ -108,6 +115,12 @@ class Edge(BaseModel, ABC):
                 uuids=uuids,
             )
+            if driver.aoss_client:
+                await driver.aoss_client.delete_by_query(
+                    index=ENTITY_EDGE_INDEX_NAME,
+                    body={'query': {'terms': {'uuid': uuids}}},
+                )
         logger.debug(f'Deleted Edges: {uuids}')
     def __hash__(self):
@@ -255,6 +268,21 @@ class EntityEdge(Edge):
                 MATCH (n:Entity)-[e:RELATES_TO {uuid: $uuid}]->(m:Entity)
                 RETURN [x IN split(e.fact_embedding, ",") | toFloat(x)] as fact_embedding
             """
+        elif driver.aoss_client:
+            resp = await driver.aoss_client.search(
+                body={
+                    'query': {'multi_match': {'query': self.uuid, 'fields': ['uuid']}},
+                    'size': 1,
+                },
+                index=ENTITY_EDGE_INDEX_NAME,
+                params={'routing': self.group_id},
+            )
+            if resp['hits']['hits']:
+                self.fact_embedding = resp['hits']['hits'][0]['_source']['fact_embedding']
+                return
+            else:
+                raise EdgeNotFoundError(self.uuid)
         if driver.provider == GraphProvider.KUZU:
             query = """
@@ -292,14 +320,14 @@ class EntityEdge(Edge):
         if driver.provider == GraphProvider.KUZU:
             edge_data['attributes'] = json.dumps(self.attributes)
             result = await driver.execute_query(
-                get_entity_edge_save_query(driver.provider),
+                get_entity_edge_save_query(driver.provider, has_aoss=bool(driver.aoss_client)),
                 **edge_data,
             )
         else:
             edge_data.update(self.attributes or {})
-            if driver.provider == GraphProvider.NEPTUNE:
-                driver.save_to_aoss('edge_name_and_fact', [edge_data])  # pyright: ignore reportAttributeAccessIssue
+            if driver.aoss_client:
+                await driver.save_to_aoss(ENTITY_EDGE_INDEX_NAME, [edge_data])  # pyright: ignore reportAttributeAccessIssue
             result = await driver.execute_query(
                 get_entity_edge_save_query(driver.provider),
@@ -336,6 +364,35 @@ class EntityEdge(Edge):
             raise EdgeNotFoundError(uuid)
         return edges[0]
+    @classmethod
+    async def get_between_nodes(
+        cls, driver: GraphDriver, source_node_uuid: str, target_node_uuid: str
+    ):
+        match_query = """
+            MATCH (n:Entity {uuid: $source_node_uuid})-[e:RELATES_TO]->(m:Entity {uuid: $target_node_uuid})
+        """
+        if driver.provider == GraphProvider.KUZU:
+            match_query = """
+                MATCH (n:Entity {uuid: $source_node_uuid})
+                      -[:RELATES_TO]->(e:RelatesToNode_)
+                      -[:RELATES_TO]->(m:Entity {uuid: $target_node_uuid})
+            """
+        records, _, _ = await driver.execute_query(
+            match_query
+            + """
+            RETURN
+            """
+            + get_entity_edge_return_query(driver.provider),
+            source_node_uuid=source_node_uuid,
+            target_node_uuid=target_node_uuid,
+            routing_='r',
+        )
+        edges = [get_entity_edge_from_record(record, driver.provider) for record in records]
+        return edges
     @classmethod
     async def get_by_uuids(cls, driver: GraphDriver, uuids: list[str]):
         if len(uuids) == 0:
@@ -587,8 +644,11 @@ def get_community_edge_from_record(record: Any):
 async def create_entity_edge_embeddings(embedder: EmbedderClient, edges: list[EntityEdge]):
-    if len(edges) == 0:
+    # filter out falsey values from edges
+    filtered_edges = [edge for edge in edges if edge.fact]
+    if len(filtered_edges) == 0:
         return
-    fact_embeddings = await embedder.create_batch([edge.fact for edge in edges])
-    for edge, fact_embedding in zip(edges, fact_embeddings, strict=True):
+    fact_embeddings = await embedder.create_batch([edge.fact for edge in filtered_edges])
+    for edge, fact_embedding in zip(filtered_edges, fact_embeddings, strict=True):
         edge.fact_embedding = fact_embedding

graphiti_core/embedder/client.py CHANGED Viewed

@@ -14,12 +14,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
+import os
 from abc import ABC, abstractmethod
 from collections.abc import Iterable
 from pydantic import BaseModel, Field
-EMBEDDING_DIM = 1024
+EMBEDDING_DIM = int(os.getenv('EMBEDDING_DIM', 1024))
 class EmbedderConfig(BaseModel):

graphiti_core/graph_queries.py CHANGED Viewed

@@ -71,12 +71,41 @@ def get_range_indices(provider: GraphProvider) -> list[LiteralString]:
 def get_fulltext_indices(provider: GraphProvider) -> list[LiteralString]:
     if provider == GraphProvider.FALKORDB:
-        return [
-            """CREATE FULLTEXT INDEX FOR (e:Episodic) ON (e.content, e.source, e.source_description, e.group_id)""",
-            """CREATE FULLTEXT INDEX FOR (n:Entity) ON (n.name, n.summary, n.group_id)""",
-            """CREATE FULLTEXT INDEX FOR (n:Community) ON (n.name, n.group_id)""",
-            """CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""",
-        ]
+        from typing import cast
+        from graphiti_core.driver.falkordb_driver import STOPWORDS
+        # Convert to string representation for embedding in queries
+        stopwords_str = str(STOPWORDS)
+        # Use type: ignore to satisfy LiteralString requirement while maintaining single source of truth
+        return cast(
+            list[LiteralString],
+            [
+                f"""CALL db.idx.fulltext.createNodeIndex(
+                                                {{
+                                                    label: 'Episodic',
+                                                    stopwords: {stopwords_str}
+                                                }},
+                                                'content', 'source', 'source_description', 'group_id'
+                                                )""",
+                f"""CALL db.idx.fulltext.createNodeIndex(
+                                                {{
+                                                    label: 'Entity',
+                                                    stopwords: {stopwords_str}
+                                                }},
+                                                'name', 'summary', 'group_id'
+                                                )""",
+                f"""CALL db.idx.fulltext.createNodeIndex(
+                                                {{
+                                                    label: 'Community',
+                                                    stopwords: {stopwords_str}
+                                                }},
+                                                'name', 'group_id'
+                                                )""",
+                """CREATE FULLTEXT INDEX FOR ()-[e:RELATES_TO]-() ON (e.name, e.fact, e.group_id)""",
+            ],
+        )
     if provider == GraphProvider.KUZU:
         return [

graphiti_core/graphiti.py CHANGED Viewed

@@ -60,9 +60,7 @@ from graphiti_core.search.search_config_recipes import (
 from graphiti_core.search.search_filters import SearchFilters
 from graphiti_core.search.search_utils import (
     RELEVANT_SCHEMA_LIMIT,
-    get_edge_invalidation_candidates,
     get_mentioned_nodes,
-    get_relevant_edges,
 )
 from graphiti_core.telemetry import capture_event
 from graphiti_core.utils.bulk_utils import (
@@ -81,7 +79,6 @@ from graphiti_core.utils.maintenance.community_operations import (
     update_community,
 )
 from graphiti_core.utils.maintenance.edge_operations import (
-    build_duplicate_of_edges,
     build_episodic_edges,
     extract_edges,
     resolve_extracted_edge,
@@ -139,7 +136,6 @@ class Graphiti:
         store_raw_episode_content: bool = True,
         graph_driver: GraphDriver | None = None,
         max_coroutines: int | None = None,
-        ensure_ascii: bool = False,
     ):
         """
         Initialize a Graphiti instance.
@@ -172,10 +168,6 @@ class Graphiti:
         max_coroutines : int | None, optional
             The maximum number of concurrent operations allowed. Overrides SEMAPHORE_LIMIT set in the environment.
             If not set, the Graphiti default is used.
-        ensure_ascii : bool, optional
-            Whether to escape non-ASCII characters in JSON serialization for prompts. Defaults to False.
-            Set as False to preserve non-ASCII characters (e.g., Korean, Japanese, Chinese) in their
-            original form, making them readable in LLM logs and improving model understanding.
         Returns
         -------
@@ -205,7 +197,6 @@ class Graphiti:
         self.store_raw_episode_content = store_raw_episode_content
         self.max_coroutines = max_coroutines
-        self.ensure_ascii = ensure_ascii
         if llm_client:
             self.llm_client = llm_client
         else:
@@ -224,7 +215,6 @@ class Graphiti:
             llm_client=self.llm_client,
             embedder=self.embedder,
             cross_encoder=self.cross_encoder,
-            ensure_ascii=self.ensure_ascii,
         )
         # Capture telemetry event
@@ -458,12 +448,12 @@ class Graphiti:
             start = time()
             now = utc_now()
-            # if group_id is None, use the default group id by the provider
-            group_id = group_id or get_default_group_id(self.driver.provider)
             validate_entity_types(entity_types)
             validate_excluded_entity_types(excluded_entity_types, entity_types)
             validate_group_id(group_id)
+            # if group_id is None, use the default group id by the provider
+            group_id = group_id or get_default_group_id(self.driver.provider)
             previous_episodes = (
                 await self.retrieve_episodes(
@@ -505,7 +495,7 @@ class Graphiti:
             )
             # Extract edges and resolve nodes
-            (nodes, uuid_map, node_duplicates), extracted_edges = await semaphore_gather(
+            (nodes, uuid_map, _), extracted_edges = await semaphore_gather(
                 resolve_extracted_nodes(
                     self.clients,
                     extracted_nodes,
@@ -542,9 +532,7 @@ class Graphiti:
                 max_coroutines=self.max_coroutines,
             )
-            duplicate_of_edges = build_duplicate_of_edges(episode, now, node_duplicates)
-            entity_edges = resolved_edges + invalidated_edges + duplicate_of_edges
+            entity_edges = resolved_edges + invalidated_edges
             episodic_edges = build_episodic_edges(nodes, episode.uuid, now)
@@ -564,9 +552,7 @@ class Graphiti:
             if update_communities:
                 communities, community_edges = await semaphore_gather(
                     *[
-                        update_community(
-                            self.driver, self.llm_client, self.embedder, node, self.ensure_ascii
-                        )
+                        update_community(self.driver, self.llm_client, self.embedder, node)
                         for node in nodes
                     ],
                     max_coroutines=self.max_coroutines,
@@ -1037,10 +1023,28 @@ class Graphiti:
         updated_edge = resolve_edge_pointers([edge], uuid_map)[0]
-        related_edges = (await get_relevant_edges(self.driver, [updated_edge], SearchFilters()))[0]
+        valid_edges = await EntityEdge.get_between_nodes(
+            self.driver, edge.source_node_uuid, edge.target_node_uuid
+        )
+        related_edges = (
+            await search(
+                self.clients,
+                updated_edge.fact,
+                group_ids=[updated_edge.group_id],
+                config=EDGE_HYBRID_SEARCH_RRF,
+                search_filter=SearchFilters(edge_uuids=[edge.uuid for edge in valid_edges]),
+            )
+        ).edges
         existing_edges = (
-            await get_edge_invalidation_candidates(self.driver, [updated_edge], SearchFilters())
-        )[0]
+            await search(
+                self.clients,
+                updated_edge.fact,
+                group_ids=[updated_edge.group_id],
+                config=EDGE_HYBRID_SEARCH_RRF,
+                search_filter=SearchFilters(),
+            )
+        ).edges
         resolved_edge, invalidated_edges, _ = await resolve_extracted_edge(
             self.llm_client,
@@ -1057,7 +1061,7 @@ class Graphiti:
                 group_id=edge.group_id,
             ),
             None,
-            self.ensure_ascii,
+            None,
         )
         edges: list[EntityEdge] = [resolved_edge] + invalidated_edges

graphiti_core/graphiti_types.py CHANGED Viewed

@@ -27,6 +27,5 @@ class GraphitiClients(BaseModel):
     llm_client: LLMClient
     embedder: EmbedderClient
     cross_encoder: CrossEncoderClient
-    ensure_ascii: bool = False
     model_config = ConfigDict(arbitrary_types_allowed=True)

graphiti_core/helpers.py CHANGED Viewed

@@ -54,7 +54,7 @@ def get_default_group_id(provider: GraphProvider) -> str:
     For most databases, the default group id is an empty string, while there are database types that require a specific default group id.
     """
     if provider == GraphProvider.FALKORDB:
-        return '_'
+        return '\\_'
     else:
         return ''
@@ -116,7 +116,7 @@ async def semaphore_gather(
     return await asyncio.gather(*(_wrap_coroutine(coroutine) for coroutine in coroutines))
-def validate_group_id(group_id: str) -> bool:
+def validate_group_id(group_id: str | None) -> bool:
     """
     Validate that a group_id contains only ASCII alphanumeric characters, dashes, and underscores.

graphiti_core/llm_client/client.py CHANGED Viewed

@@ -32,9 +32,23 @@ from .errors import RateLimitError
 DEFAULT_TEMPERATURE = 0
 DEFAULT_CACHE_DIR = './llm_cache'
-MULTILINGUAL_EXTRACTION_RESPONSES = (
-    '\n\nAny extracted information should be returned in the same language as it was written in.'
-)
+def get_extraction_language_instruction(group_id: str | None = None) -> str:
+    """Returns instruction for language extraction behavior.
+    Override this function to customize language extraction:
+    - Return empty string to disable multilingual instructions
+    - Return custom instructions for specific language requirements
+    - Use group_id to provide different instructions per group/partition
+    Args:
+        group_id: Optional partition identifier for the graph
+    Returns:
+        str: Language instruction to append to system messages
+    """
+    return '\n\nAny extracted information should be returned in the same language as it was written in.'
 logger = logging.getLogger(__name__)
@@ -132,6 +146,7 @@ class LLMClient(ABC):
         response_model: type[BaseModel] | None = None,
         max_tokens: int | None = None,
         model_size: ModelSize = ModelSize.medium,
+        group_id: str | None = None,
     ) -> dict[str, typing.Any]:
         if max_tokens is None:
             max_tokens = self.max_tokens
@@ -145,7 +160,7 @@ class LLMClient(ABC):
             )
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction(group_id)
         if self.cache_enabled and self.cache_dir is not None:
             cache_key = self._get_cache_key(messages)

graphiti_core/llm_client/gemini_client.py CHANGED Viewed

@@ -23,7 +23,7 @@ from typing import TYPE_CHECKING, ClassVar
 from pydantic import BaseModel
 from ..prompts.models import Message
-from .client import MULTILINGUAL_EXTRACTION_RESPONSES, LLMClient
+from .client import LLMClient, get_extraction_language_instruction
 from .config import LLMConfig, ModelSize
 from .errors import RateLimitError
@@ -357,6 +357,7 @@ class GeminiClient(LLMClient):
         response_model: type[BaseModel] | None = None,
         max_tokens: int | None = None,
         model_size: ModelSize = ModelSize.medium,
+        group_id: str | None = None,
     ) -> dict[str, typing.Any]:
         """
         Generate a response from the Gemini language model with retry logic and error handling.
@@ -367,6 +368,7 @@ class GeminiClient(LLMClient):
             response_model (type[BaseModel] | None): An optional Pydantic model to parse the response into.
             max_tokens (int | None): The maximum number of tokens to generate in the response.
             model_size (ModelSize): The size of the model to use (small or medium).
+            group_id (str | None): Optional partition identifier for the graph.
         Returns:
             dict[str, typing.Any]: The response from the language model.
@@ -376,7 +378,7 @@ class GeminiClient(LLMClient):
         last_output = None
         # Add multilingual extraction instructions
-        messages[0].content += MULTILINGUAL_EXTRACTION_RESPONSES
+        messages[0].content += get_extraction_language_instruction(group_id)
         while retry_count < self.MAX_RETRIES:
             try:

graphiti-core 0.20.4__py3-none-any.whl → 0.21.0__py3-none-any.whl

Potentially problematic release.

graphiti-core 0.20.4py3-none-any.whl → 0.21.0py3-none-any.whl