PyPI - graphiti-core - Versions diffs - 0.24.3__py3-none-any.whl → 0.25.3__py3-none-any.whl - Mend

graphiti-core 0.24.3py3-none-any.whl → 0.25.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

graphiti_core/driver/neo4j_driver.py +17 -8
graphiti_core/graphiti.py +59 -11
graphiti_core/helpers.py +16 -1
graphiti_core/llm_client/openai_base_client.py +2 -2
graphiti_core/llm_client/openai_client.py +19 -9
graphiti_core/prompts/extract_edges.py +1 -1
graphiti_core/prompts/extract_nodes.py +3 -3
graphiti_core/search/search_filters.py +2 -1
graphiti_core/utils/content_chunking.py +702 -0
graphiti_core/utils/maintenance/edge_operations.py +26 -36
graphiti_core/utils/maintenance/node_operations.py +163 -65
{graphiti_core-0.24.3.dist-info → graphiti_core-0.25.3.dist-info}/METADATA +2 -2
{graphiti_core-0.24.3.dist-info → graphiti_core-0.25.3.dist-info}/RECORD +15 -14
{graphiti_core-0.24.3.dist-info → graphiti_core-0.25.3.dist-info}/WHEEL +0 -0
{graphiti_core-0.24.3.dist-info → graphiti_core-0.25.3.dist-info}/licenses/LICENSE +0 -0

graphiti_core/driver/neo4j_driver.py CHANGED Viewed

@@ -19,6 +19,7 @@ from collections.abc import Coroutine
 from typing import Any
 from neo4j import AsyncGraphDatabase, EagerResult
+from neo4j.exceptions import ClientError
 from typing_extensions import LiteralString
 from graphiti_core.driver.driver import GraphDriver, GraphDriverSession, GraphProvider
@@ -88,6 +89,21 @@ class Neo4jDriver(GraphDriver):
             'CALL db.indexes() YIELD name DROP INDEX name',
         )
+    async def _execute_index_query(self, query: LiteralString) -> EagerResult | None:
+        """Execute an index creation query, ignoring 'index already exists' errors.
+        Neo4j can raise EquivalentSchemaRuleAlreadyExists when concurrent CREATE INDEX
+        IF NOT EXISTS queries race, even though the index exists. This is safe to ignore.
+        """
+        try:
+            return await self.execute_query(query)
+        except ClientError as e:
+            # Ignore "equivalent index already exists" error (race condition with IF NOT EXISTS)
+            if 'EquivalentSchemaRuleAlreadyExists' in str(e):
+                logger.debug(f'Index already exists (concurrent creation): {query[:50]}...')
+                return None
+            raise
     async def build_indices_and_constraints(self, delete_existing: bool = False):
         if delete_existing:
             await self.delete_all_indexes()
@@ -98,14 +114,7 @@ class Neo4jDriver(GraphDriver):
         index_queries: list[LiteralString] = range_indices + fulltext_indices
-        await semaphore_gather(
-            *[
-                self.execute_query(
-                    query,
-                )
-                for query in index_queries
-            ]
-        )
+        await semaphore_gather(*[self._execute_index_query(query) for query in index_queries])
     async def health_check(self) -> None:
         """Check Neo4j connectivity by running the driver's verify_connectivity method."""

graphiti_core/graphiti.py CHANGED Viewed

@@ -35,6 +35,7 @@ from graphiti_core.edges import (
     create_entity_edge_embeddings,
 )
 from graphiti_core.embedder import EmbedderClient, OpenAIEmbedder
+from graphiti_core.errors import NodeNotFoundError
 from graphiti_core.graphiti_types import GraphitiClients
 from graphiti_core.helpers import (
     get_default_group_id,
@@ -384,6 +385,7 @@ class Graphiti:
         edge_types: dict[str, type[BaseModel]] | None,
         nodes: list[EntityNode],
         uuid_map: dict[str, str],
+        custom_extraction_instructions: str | None = None,
     ) -> tuple[list[EntityEdge], list[EntityEdge]]:
         """Extract edges from episode and resolve against existing graph."""
         extracted_edges = await extract_edges(
@@ -394,6 +396,7 @@ class Graphiti:
             edge_type_map,
             group_id,
             edge_types,
+            custom_extraction_instructions,
         )
         edges = resolve_edge_pointers(extracted_edges, uuid_map)
@@ -627,6 +630,7 @@ class Graphiti:
         previous_episode_uuids: list[str] | None = None,
         edge_types: dict[str, type[BaseModel]] | None = None,
         edge_type_map: dict[tuple[str, str], list[str]] | None = None,
+        custom_extraction_instructions: str | None = None,
     ) -> AddEpisodeResults:
         """
         Process an episode and update the graph.
@@ -661,6 +665,9 @@ class Graphiti:
         previous_episode_uuids : list[str] | None
             Optional.  list of episode uuids to use as the previous episodes. If this is not provided,
             the most recent episodes by created_at date will be used.
+        custom_extraction_instructions : str | None
+            Optional. Custom extraction instructions string to be included in the extract entities and extract edges prompts.
+            This allows for additional instructions or context to guide the extraction process.
         Returns
         -------
@@ -739,7 +746,12 @@ class Graphiti:
                 # Extract and resolve nodes
                 extracted_nodes = await extract_nodes(
-                    self.clients, episode, previous_episodes, entity_types, excluded_entity_types
+                    self.clients,
+                    episode,
+                    previous_episodes,
+                    entity_types,
+                    excluded_entity_types,
+                    custom_extraction_instructions,
                 )
                 nodes, uuid_map, _ = await resolve_extracted_nodes(
@@ -760,6 +772,7 @@ class Graphiti:
                     edge_types,
                     nodes,
                     uuid_map,
+                    custom_extraction_instructions,
                 )
                 # Extract node attributes
@@ -1176,12 +1189,47 @@ class Graphiti:
         if edge.fact_embedding is None:
             await edge.generate_embedding(self.embedder)
-        nodes, uuid_map, _ = await resolve_extracted_nodes(
-            self.clients,
-            [source_node, target_node],
-        )
+        try:
+            resolved_source = await EntityNode.get_by_uuid(self.driver, source_node.uuid)
+        except NodeNotFoundError:
+            resolved_source_nodes, _, _ = await resolve_extracted_nodes(
+                self.clients,
+                [source_node],
+            )
+            resolved_source = resolved_source_nodes[0]
+        try:
+            resolved_target = await EntityNode.get_by_uuid(self.driver, target_node.uuid)
+        except NodeNotFoundError:
+            resolved_target_nodes, _, _ = await resolve_extracted_nodes(
+                self.clients,
+                [target_node],
+            )
+            resolved_target = resolved_target_nodes[0]
+        nodes = [resolved_source, resolved_target]
+        # Merge user-provided properties from original nodes into resolved nodes (excluding uuid)
+        # Update attributes dictionary (merge rather than replace)
+        if source_node.attributes:
+            resolved_source.attributes.update(source_node.attributes)
+        if target_node.attributes:
+            resolved_target.attributes.update(target_node.attributes)
+        # Update summary if provided by user (non-empty string)
+        if source_node.summary:
+            resolved_source.summary = source_node.summary
+        if target_node.summary:
+            resolved_target.summary = target_node.summary
+        # Update labels (merge with existing)
+        if source_node.labels:
+            resolved_source.labels = list(set(resolved_source.labels) | set(source_node.labels))
+        if target_node.labels:
+            resolved_target.labels = list(set(resolved_target.labels) | set(target_node.labels))
-        updated_edge = resolve_edge_pointers([edge], uuid_map)[0]
+        edge.source_node_uuid = resolved_source.uuid
+        edge.target_node_uuid = resolved_target.uuid
         valid_edges = await EntityEdge.get_between_nodes(
             self.driver, edge.source_node_uuid, edge.target_node_uuid
@@ -1190,8 +1238,8 @@ class Graphiti:
         related_edges = (
             await search(
                 self.clients,
-                updated_edge.fact,
-                group_ids=[updated_edge.group_id],
+                edge.fact,
+                group_ids=[edge.group_id],
                 config=EDGE_HYBRID_SEARCH_RRF,
                 search_filter=SearchFilters(edge_uuids=[edge.uuid for edge in valid_edges]),
             )
@@ -1199,8 +1247,8 @@ class Graphiti:
         existing_edges = (
             await search(
                 self.clients,
-                updated_edge.fact,
-                group_ids=[updated_edge.group_id],
+                edge.fact,
+                group_ids=[edge.group_id],
                 config=EDGE_HYBRID_SEARCH_RRF,
                 search_filter=SearchFilters(),
             )
@@ -1208,7 +1256,7 @@ class Graphiti:
         resolved_edge, invalidated_edges, _ = await resolve_extracted_edge(
             self.llm_client,
-            updated_edge,
+            edge,
             related_edges,
             existing_edges,
             EpisodicNode(

graphiti_core/helpers.py CHANGED Viewed

@@ -34,9 +34,24 @@ load_dotenv()
 USE_PARALLEL_RUNTIME = bool(os.getenv('USE_PARALLEL_RUNTIME', False))
 SEMAPHORE_LIMIT = int(os.getenv('SEMAPHORE_LIMIT', 20))
-MAX_REFLEXION_ITERATIONS = int(os.getenv('MAX_REFLEXION_ITERATIONS', 0))
 DEFAULT_PAGE_LIMIT = 20
+# Content chunking configuration for entity extraction
+# Density-based chunking: only chunk high-density content (many entities per token)
+# This targets the failure case (large entity-dense inputs) while preserving
+# context for prose/narrative content
+CHUNK_TOKEN_SIZE = int(os.getenv('CHUNK_TOKEN_SIZE', 3000))
+CHUNK_OVERLAP_TOKENS = int(os.getenv('CHUNK_OVERLAP_TOKENS', 200))
+# Minimum tokens before considering chunking - short content processes fine regardless of density
+CHUNK_MIN_TOKENS = int(os.getenv('CHUNK_MIN_TOKENS', 1000))
+# Entity density threshold: chunk if estimated density > this value
+# For JSON: elements per 1000 tokens > threshold * 1000 (e.g., 0.15 = 150 elements/1000 tokens)
+# For Text: capitalized words per 1000 tokens > threshold * 500 (e.g., 0.15 = 75 caps/1000 tokens)
+# Higher values = more conservative (less chunking), targets P95+ density cases
+# Examples that trigger chunking at 0.15: AWS cost data (12mo), bulk data imports, entity-dense JSON
+# Examples that DON'T chunk at 0.15: meeting transcripts, news articles, documentation
+CHUNK_DENSITY_THRESHOLD = float(os.getenv('CHUNK_DENSITY_THRESHOLD', 0.15))
 def parse_db_date(input_date: neo4j_time.DateTime | str | None) -> datetime | None:
     if isinstance(input_date, neo4j_time.DateTime):

graphiti_core/llm_client/openai_base_client.py CHANGED Viewed

@@ -31,8 +31,8 @@ from .errors import RateLimitError, RefusalError
 logger = logging.getLogger(__name__)
-DEFAULT_MODEL = 'gpt-5-mini'
-DEFAULT_SMALL_MODEL = 'gpt-5-nano'
+DEFAULT_MODEL = 'gpt-4.1-mini'
+DEFAULT_SMALL_MODEL = 'gpt-4.1-nano'
 DEFAULT_REASONING = 'minimal'
 DEFAULT_VERBOSITY = 'low'

graphiti_core/llm_client/openai_client.py CHANGED Viewed

@@ -78,15 +78,25 @@ class OpenAIClient(BaseOpenAIClient):
             model.startswith('gpt-5') or model.startswith('o1') or model.startswith('o3')
         )
-        response = await self.client.responses.parse(
-            model=model,
-            input=messages,  # type: ignore
-            temperature=temperature if not is_reasoning_model else None,
-            max_output_tokens=max_tokens,
-            text_format=response_model,  # type: ignore
-            reasoning={'effort': reasoning} if reasoning is not None else None,  # type: ignore
-            text={'verbosity': verbosity} if verbosity is not None else None,  # type: ignore
-        )
+        request_kwargs = {
+            'model': model,
+            'input': messages,  # type: ignore
+            'max_output_tokens': max_tokens,
+            'text_format': response_model,  # type: ignore
+        }
+        temperature_value = temperature if not is_reasoning_model else None
+        if temperature_value is not None:
+            request_kwargs['temperature'] = temperature_value
+        # Only include reasoning and verbosity parameters for reasoning models
+        if is_reasoning_model and reasoning is not None:
+            request_kwargs['reasoning'] = {'effort': reasoning}  # type: ignore
+        if is_reasoning_model and verbosity is not None:
+            request_kwargs['text'] = {'verbosity': verbosity}  # type: ignore
+        response = await self.client.responses.parse(**request_kwargs)
         return response

graphiti_core/prompts/extract_edges.py CHANGED Viewed

@@ -110,7 +110,7 @@ Only extract facts that:
 You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
-{context['custom_prompt']}
+{context['custom_extraction_instructions']}
 # EXTRACTION RULES

graphiti_core/prompts/extract_nodes.py CHANGED Viewed

@@ -124,7 +124,7 @@ reference entities. Only extract distinct entities from the CURRENT MESSAGE. Don
 5. **Formatting**:
    - Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
-{context['custom_prompt']}
+{context['custom_extraction_instructions']}
 """
     return [
         Message(role='system', content=sys_prompt),
@@ -148,7 +148,7 @@ def extract_json(context: dict[str, Any]) -> list[Message]:
 {context['episode_content']}
 </JSON>
-{context['custom_prompt']}
+{context['custom_extraction_instructions']}
 Given the above source description and JSON, extract relevant entities from the provided JSON.
 For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
@@ -182,7 +182,7 @@ Given the above text, extract entities from the TEXT that are explicitly or impl
 For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
 Indicate the classified entity type by providing its entity_type_id.
-{context['custom_prompt']}
+{context['custom_extraction_instructions']}
 Guidelines:
 1. Extract significant entities, concepts, or actors mentioned in the conversation.

graphiti_core/search/search_filters.py CHANGED Viewed

@@ -35,7 +35,7 @@ class ComparisonOperator(Enum):
 class DateFilter(BaseModel):
-    date: datetime | None = Field(description='A datetime to filter on')
+    date: datetime | None = Field(default=None, description='A datetime to filter on')
     comparison_operator: ComparisonOperator = Field(
         description='Comparison operator for date filter'
     )
@@ -44,6 +44,7 @@ class DateFilter(BaseModel):
 class PropertyFilter(BaseModel):
     property_name: str = Field(description='Property name')
     property_value: str | int | float | None = Field(
+        default=None,
         description='Value you want to match on for the property'
     )
     comparison_operator: ComparisonOperator = Field(

graphiti-core 0.24.3__py3-none-any.whl → 0.25.3__py3-none-any.whl

graphiti-core 0.24.3py3-none-any.whl → 0.25.3py3-none-any.whl