PyPI - graphiti-core - Versions diffs - 0.12.0rc1__py3-none-any.whl → 0.24.3__py3-none-any.whl - Mend

graphiti-core 0.12.0rc1py3-none-any.whl → 0.24.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

graphiti_core/cross_encoder/bge_reranker_client.py +12 -2
graphiti_core/cross_encoder/gemini_reranker_client.py +161 -0
graphiti_core/cross_encoder/openai_reranker_client.py +7 -5
graphiti_core/decorators.py +110 -0
graphiti_core/driver/__init__.py +19 -0
graphiti_core/driver/driver.py +124 -0
graphiti_core/driver/falkordb_driver.py +362 -0
graphiti_core/driver/graph_operations/graph_operations.py +191 -0
graphiti_core/driver/kuzu_driver.py +182 -0
graphiti_core/driver/neo4j_driver.py +117 -0
graphiti_core/driver/neptune_driver.py +305 -0
graphiti_core/driver/search_interface/search_interface.py +89 -0
graphiti_core/edges.py +287 -172
graphiti_core/embedder/azure_openai.py +71 -0
graphiti_core/embedder/client.py +2 -1
graphiti_core/embedder/gemini.py +116 -22
graphiti_core/embedder/voyage.py +13 -2
graphiti_core/errors.py +8 -0
graphiti_core/graph_queries.py +162 -0
graphiti_core/graphiti.py +705 -193
graphiti_core/graphiti_types.py +4 -2
graphiti_core/helpers.py +87 -10
graphiti_core/llm_client/__init__.py +16 -0
graphiti_core/llm_client/anthropic_client.py +159 -56
graphiti_core/llm_client/azure_openai_client.py +115 -0
graphiti_core/llm_client/client.py +98 -21
graphiti_core/llm_client/config.py +1 -1
graphiti_core/llm_client/gemini_client.py +290 -41
graphiti_core/llm_client/groq_client.py +14 -3
graphiti_core/llm_client/openai_base_client.py +261 -0
graphiti_core/llm_client/openai_client.py +56 -132
graphiti_core/llm_client/openai_generic_client.py +91 -56
graphiti_core/models/edges/edge_db_queries.py +259 -35
graphiti_core/models/nodes/node_db_queries.py +311 -32
graphiti_core/nodes.py +420 -205
graphiti_core/prompts/dedupe_edges.py +46 -32
graphiti_core/prompts/dedupe_nodes.py +67 -42
graphiti_core/prompts/eval.py +4 -4
graphiti_core/prompts/extract_edges.py +27 -16
graphiti_core/prompts/extract_nodes.py +74 -31
graphiti_core/prompts/prompt_helpers.py +39 -0
graphiti_core/prompts/snippets.py +29 -0
graphiti_core/prompts/summarize_nodes.py +23 -25
graphiti_core/search/search.py +158 -82
graphiti_core/search/search_config.py +39 -4
graphiti_core/search/search_filters.py +126 -35
graphiti_core/search/search_helpers.py +5 -6
graphiti_core/search/search_utils.py +1405 -485
graphiti_core/telemetry/__init__.py +9 -0
graphiti_core/telemetry/telemetry.py +117 -0
graphiti_core/tracer.py +193 -0
graphiti_core/utils/bulk_utils.py +364 -285
graphiti_core/utils/datetime_utils.py +13 -0
graphiti_core/utils/maintenance/community_operations.py +67 -49
graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
graphiti_core/utils/maintenance/edge_operations.py +339 -197
graphiti_core/utils/maintenance/graph_data_operations.py +50 -114
graphiti_core/utils/maintenance/node_operations.py +319 -238
graphiti_core/utils/maintenance/temporal_operations.py +11 -3
graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
graphiti_core/utils/text_utils.py +53 -0
graphiti_core-0.24.3.dist-info/METADATA +726 -0
graphiti_core-0.24.3.dist-info/RECORD +86 -0
{graphiti_core-0.12.0rc1.dist-info → graphiti_core-0.24.3.dist-info}/WHEEL +1 -1
graphiti_core-0.12.0rc1.dist-info/METADATA +0 -350
graphiti_core-0.12.0rc1.dist-info/RECORD +0 -66
/graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
{graphiti_core-0.12.0rc1.dist-info → graphiti_core-0.24.3.dist-info/licenses}/LICENSE +0 -0

graphiti_core/prompts/dedupe_edges.py CHANGED Viewed

@@ -14,22 +14,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class EdgeDuplicate(BaseModel):
-    duplicate_fact_id: int = Field(
+    duplicate_facts: list[int] = Field(
         ...,
-        description='id of the duplicate fact. If no duplicate facts are found, default to -1.',
+        description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
     )
     contradicted_facts: list[int] = Field(
         ...,
-        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
+        description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
     )
     fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
@@ -67,16 +67,17 @@ def edge(context: dict[str, Any]) -> list[Message]:
         Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
         <EXISTING EDGES>
-        {json.dumps(context['related_edges'], indent=2)}
+        {to_prompt_json(context['related_edges'])}
         </EXISTING EDGES>
         <NEW EDGE>
-        {json.dumps(context['extracted_edges'], indent=2)}
+        {to_prompt_json(context['extracted_edges'])}
         </NEW EDGE>
         Task:
-        If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact.
-        If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return -1.
+        If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
+            as part of the list of duplicate_facts.
+        If the NEW EDGE is not a duplicate of any of the EXISTING EDGES, return an empty list.
         Guidelines:
         1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
@@ -97,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
         Given the following context, find all of the duplicates in a list of facts:
         Facts:
-        {json.dumps(context['edges'], indent=2)}
+        {to_prompt_json(context['edges'])}
         Task:
         If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
@@ -123,35 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-        <NEW FACT>
-        {context['new_edge']}
-        </NEW FACT>
+        Task:
+        You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
+        1. DUPLICATE DETECTION:
+           - If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
+           - Facts with similar information that contain key differences should NOT be marked as duplicates.
+           - Return idx values from EXISTING FACTS.
+           - If no duplicates, return an empty list for duplicate_facts.
+        2. FACT TYPE CLASSIFICATION:
+           - Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
+           - Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
+        3. CONTRADICTION DETECTION:
+           - Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
+           - Return idx values from FACT INVALIDATION CANDIDATES.
+           - If no contradictions, return an empty list for contradicted_facts.
+        IMPORTANT:
+        - duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
+        - contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
+        - These are two separate lists with independent idx ranges starting from 0
+        Guidelines:
+        1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
+            Do not mark these facts as duplicates.
+        <FACT TYPES>
+        {context['edge_types']}
+        </FACT TYPES>
         <EXISTING FACTS>
         {context['existing_edges']}
         </EXISTING FACTS>
         <FACT INVALIDATION CANDIDATES>
         {context['edge_invalidation_candidates']}
         </FACT INVALIDATION CANDIDATES>
-        <FACT TYPES>
-        {context['edge_types']}
-        </FACT TYPES>
-        Task:
-        If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
-        If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
-        Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
-        Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
-        Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
-        Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
-        If there are no contradicted facts, return an empty list.
-        Guidelines:
-        1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
+        <NEW FACT>
+        {context['new_edge']}
+        </NEW FACT>
         """,
         ),
     ]

graphiti_core/prompts/dedupe_nodes.py CHANGED Viewed

@@ -14,23 +14,27 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class NodeDuplicate(BaseModel):
     id: int = Field(..., description='integer id of the entity')
     duplicate_idx: int = Field(
         ...,
-        description='idx of the duplicate node. If no duplicate nodes are found, default to -1.',
+        description='idx of the duplicate entity. If no duplicate entities are found, default to -1.',
     )
     name: str = Field(
         ...,
-        description='Name of the entity. Should be the most complete and descriptive name possible.',
+        description='Name of the entity. Should be the most complete and descriptive name of the entity. Do not include any JSON formatting in the Entity name such as {}.',
+    )
+    duplicates: list[int] = Field(
+        ...,
+        description='idx of all entities that are a duplicate of the entity with the above id.',
     )
@@ -60,40 +64,51 @@ def node(context: dict[str, Any]) -> list[Message]:
             role='user',
             content=f"""
         <PREVIOUS MESSAGES>
-        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        {to_prompt_json([ep for ep in context['previous_episodes']])}
         </PREVIOUS MESSAGES>
         <CURRENT MESSAGE>
         {context['episode_content']}
         </CURRENT MESSAGE>
         <NEW ENTITY>
-        {json.dumps(context['extracted_node'], indent=2)}
+        {to_prompt_json(context['extracted_node'])}
         </NEW ENTITY>
         <ENTITY TYPE DESCRIPTION>
-        {json.dumps(context['entity_type_description'], indent=2)}
+        {to_prompt_json(context['entity_type_description'])}
         </ENTITY TYPE DESCRIPTION>
         <EXISTING ENTITIES>
-        {json.dumps(context['existing_nodes'], indent=2)}
+        {to_prompt_json(context['existing_nodes'])}
         </EXISTING ENTITIES>
         Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
         is a duplicate entity of one of the EXISTING ENTITIES.
         Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
+        Semantic Equivalence: if a descriptive label in existing_entities clearly refers to a named entity in context, treat them as duplicates.
         Do NOT mark entities as duplicates if:
         - They are related but distinct.
         - They have similar names or purposes but refer to separate instances or concepts.
-        Task:
-        If the NEW ENTITY represents a duplicate entity of any entity in EXISTING ENTITIES, set duplicate_entity_id to the
-        id of the EXISTING ENTITY that is the duplicate.
-        If the NEW ENTITY is not a duplicate of any of the EXISTING ENTITIES,
-        duplicate_entity_id should be set to -1.
-        Also return the name that best describes the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
-        is a duplicate of, or a combination of the two).
+         TASK:
+         1. Compare `new_entity` against each item in `existing_entities`.
+         2. If it refers to the same real-world object or concept, collect its index.
+         3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
+         4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
+        Respond with a JSON object containing an "entity_resolutions" array with a single entry:
+        {{
+            "entity_resolutions": [
+                {{
+                    "id": integer id from NEW ENTITY,
+                    "name": the best full name for the entity,
+                    "duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
+                    "duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
+                }}
+            ]
+        }}
+        Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
         """,
         ),
     ]
@@ -104,41 +119,45 @@ def nodes(context: dict[str, Any]) -> list[Message]:
         Message(
             role='system',
             content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
-            'of existing entities.',
+            ' of existing entities.',
         ),
         Message(
             role='user',
             content=f"""
         <PREVIOUS MESSAGES>
-        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        {to_prompt_json([ep for ep in context['previous_episodes']])}
         </PREVIOUS MESSAGES>
         <CURRENT MESSAGE>
         {context['episode_content']}
         </CURRENT MESSAGE>
         Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
         Each entity in ENTITIES is represented as a JSON object with the following structure:
         {{
             id: integer id of the entity,
             name: "name of the entity",
-            entity_type: "ontological classification of the entity",
-            entity_type_description: "Description of what the entity type represents",
-            duplication_candidates: [
-                {{
-                    idx: integer index of the candidate entity,
-                    name: "name of the candidate entity",
-                    entity_type: "ontological classification of the candidate entity",
-                    ...<additional attributes>
-                }}
-            ]
+            entity_type: ["Entity", "<optional additional label>", ...],
+            entity_type_description: "Description of what the entity type represents"
         }}
         <ENTITIES>
-        {json.dumps(context['extracted_nodes'], indent=2)}
+        {to_prompt_json(context['extracted_nodes'])}
         </ENTITIES>
-        For each of the above ENTITIES, determine if the entity is a duplicate of any of its duplication candidates.
+        <EXISTING ENTITIES>
+        {to_prompt_json(context['existing_nodes'])}
+        </EXISTING ENTITIES>
+        Each entry in EXISTING ENTITIES is an object with the following structure:
+        {{
+            idx: integer index of the candidate entity (use this when referencing a duplicate),
+            name: "name of the candidate entity",
+            entity_types: ["Entity", "<optional additional label>", ...],
+            ...<additional attributes such as summaries or metadata>
+        }}
+        For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
         Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
@@ -147,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
         - They have similar names or purposes but refer to separate instances or concepts.
         Task:
-        Your response will be a list called entity_resolutions which contains one entry for each entity.
-        For each entity, return the id of the entity as id, the name of the entity as name, and the duplicate_idx
-        as an integer.
-        - If an entity is a duplicate of one of its duplication_candidates, return the idx of the candidate it is a
-        duplicate of.
-        - If an entity is not a duplicate of one of its duplication candidates, return the -1 as the duplication_idx
+        ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
+        Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
+        For every entity, return an object with the following keys:
+        {{
+            "id": integer id from ENTITIES,
+            "name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
+            "duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
+            "duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
+        }}
+        - Only use idx values that appear in EXISTING ENTITIES.
+        - Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
+        - Never fabricate entities or indices.
         """,
         ),
     ]
@@ -172,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
         Given the following context, deduplicate a list of nodes:
         Nodes:
-        {json.dumps(context['nodes'], indent=2)}
+        {to_prompt_json(context['nodes'])}
         Task:
         1. Group nodes together such that all duplicate nodes are in the same list of uuids

graphiti_core/prompts/eval.py CHANGED Viewed

@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class QueryExpansion(BaseModel):
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
     Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
     that maintains the relevant context?
     <QUESTION>
-    {json.dumps(context['query'])}
+    {to_prompt_json(context['query'])}
     </QUESTION>
     """
     return [
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
     Your task is to briefly answer the question in the way that you think Alice would answer the question.
     You are given the following entity summaries and facts to help you determine the answer to your question.
     <ENTITY_SUMMARIES>
-    {json.dumps(context['entity_summaries'])}
+    {to_prompt_json(context['entity_summaries'])}
     </ENTITY_SUMMARIES>
     <FACTS>
-    {json.dumps(context['facts'])}
+    {to_prompt_json(context['facts'])}
     </FACTS>
     <QUESTION>
     {context['query']}

graphiti_core/prompts/extract_edges.py CHANGED Viewed

@@ -14,19 +14,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class Edge(BaseModel):
     relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
-    source_entity_name: str = Field(..., description='The name of the source entity of the fact.')
-    target_entity_name: str = Field(..., description='The name of the target entity of the fact.')
-    fact: str = Field(..., description='')
+    source_entity_id: int = Field(
+        ..., description='The id of the source entity from the ENTITIES list'
+    )
+    target_entity_id: int = Field(
+        ..., description='The id of the target entity from the ENTITIES list'
+    )
+    fact: str = Field(
+        ...,
+        description='A natural language description of the relationship between the entities, paraphrased from the source text',
+    )
     valid_at: str | None = Field(
         None,
         description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
@@ -68,8 +75,12 @@ def edge(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
+<FACT TYPES>
+{context['edge_types']}
+</FACT TYPES>
 <PREVIOUS_MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS_MESSAGES>
 <CURRENT_MESSAGE>
@@ -77,25 +88,24 @@ def edge(context: dict[str, Any]) -> list[Message]:
 </CURRENT_MESSAGE>
 <ENTITIES>
-{context['nodes']}  # Each has: id, label (e.g., Person, Org), name, aliases
+{to_prompt_json(context['nodes'])}
 </ENTITIES>
 <REFERENCE_TIME>
 {context['reference_time']}  # ISO 8601 (UTC); used to resolve relative time mentions
 </REFERENCE_TIME>
-<FACT TYPES>
-{context['edge_types']}
-</FACT TYPES>
 # TASK
 Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
 Only extract facts that:
 - involve two DISTINCT ENTITIES from the ENTITIES list,
 - are clearly stated or unambiguously implied in the CURRENT MESSAGE,
     and can be represented as edges in a knowledge graph.
-- The FACT TYPES provide a list of the most important types of facts, make sure to extract any facts that
-    could be classified into one of the provided fact types
+- Facts should include entity names rather than pronouns whenever possible.
+- The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
+- The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
+    of the FACT TYPES
+- The FACT TYPES each contain their fact_type_signature which represents the source and target entity types.
 You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
@@ -104,11 +114,12 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
 # EXTRACTION RULES
-1. Only emit facts where both the subject and object match IDs in ENTITIES.
+1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
+   - **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
 2. Each fact must involve two **distinct** entities.
 3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
 4. Do not emit duplicate or semantically redundant facts.
-5. The `fact_text` should quote or closely paraphrase the original source sentence(s).
+5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
 6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
 7. Do **not** hallucinate or infer temporal bounds from unrelated events.
@@ -130,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
     user_prompt = f"""
 <PREVIOUS MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS MESSAGES>
 <CURRENT MESSAGE>
 {context['episode_content']}
@@ -164,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
             content=f"""
         <MESSAGE>
-        {json.dumps(context['episode_content'], indent=2)}
+        {to_prompt_json(context['episode_content'])}
         </MESSAGE>
         <REFERENCE TIME>
         {context['reference_time']}

graphiti-core 0.12.0rc1__py3-none-any.whl → 0.24.3__py3-none-any.whl

graphiti-core 0.12.0rc1py3-none-any.whl → 0.24.3py3-none-any.whl