PyPI - graphiti-core - Versions diffs - 0.17.4__py3-none-any.whl → 0.24.3__py3-none-any.whl - Mend

graphiti-core 0.17.4py3-none-any.whl → 0.24.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

graphiti_core/cross_encoder/gemini_reranker_client.py +1 -1
graphiti_core/cross_encoder/openai_reranker_client.py +1 -1
graphiti_core/decorators.py +110 -0
graphiti_core/driver/driver.py +62 -2
graphiti_core/driver/falkordb_driver.py +215 -23
graphiti_core/driver/graph_operations/graph_operations.py +191 -0
graphiti_core/driver/kuzu_driver.py +182 -0
graphiti_core/driver/neo4j_driver.py +61 -8
graphiti_core/driver/neptune_driver.py +305 -0
graphiti_core/driver/search_interface/search_interface.py +89 -0
graphiti_core/edges.py +264 -132
graphiti_core/embedder/azure_openai.py +10 -3
graphiti_core/embedder/client.py +2 -1
graphiti_core/graph_queries.py +114 -101
graphiti_core/graphiti.py +582 -255
graphiti_core/graphiti_types.py +2 -0
graphiti_core/helpers.py +21 -14
graphiti_core/llm_client/anthropic_client.py +142 -52
graphiti_core/llm_client/azure_openai_client.py +57 -19
graphiti_core/llm_client/client.py +83 -21
graphiti_core/llm_client/config.py +1 -1
graphiti_core/llm_client/gemini_client.py +75 -57
graphiti_core/llm_client/openai_base_client.py +94 -50
graphiti_core/llm_client/openai_client.py +28 -8
graphiti_core/llm_client/openai_generic_client.py +91 -56
graphiti_core/models/edges/edge_db_queries.py +259 -35
graphiti_core/models/nodes/node_db_queries.py +311 -32
graphiti_core/nodes.py +388 -164
graphiti_core/prompts/dedupe_edges.py +42 -31
graphiti_core/prompts/dedupe_nodes.py +56 -39
graphiti_core/prompts/eval.py +4 -4
graphiti_core/prompts/extract_edges.py +23 -14
graphiti_core/prompts/extract_nodes.py +73 -32
graphiti_core/prompts/prompt_helpers.py +39 -0
graphiti_core/prompts/snippets.py +29 -0
graphiti_core/prompts/summarize_nodes.py +23 -25
graphiti_core/search/search.py +154 -74
graphiti_core/search/search_config.py +39 -4
graphiti_core/search/search_filters.py +109 -31
graphiti_core/search/search_helpers.py +5 -6
graphiti_core/search/search_utils.py +1360 -473
graphiti_core/tracer.py +193 -0
graphiti_core/utils/bulk_utils.py +216 -90
graphiti_core/utils/datetime_utils.py +13 -0
graphiti_core/utils/maintenance/community_operations.py +62 -38
graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
graphiti_core/utils/maintenance/edge_operations.py +286 -126
graphiti_core/utils/maintenance/graph_data_operations.py +44 -74
graphiti_core/utils/maintenance/node_operations.py +320 -158
graphiti_core/utils/maintenance/temporal_operations.py +11 -3
graphiti_core/utils/ontology_utils/entity_types_utils.py +1 -1
graphiti_core/utils/text_utils.py +53 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/METADATA +221 -87
graphiti_core-0.24.3.dist-info/RECORD +86 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/WHEEL +1 -1
graphiti_core-0.17.4.dist-info/RECORD +0 -77
/graphiti_core/{utils/maintenance/utils.py → migrations/__init__.py} +0 -0
{graphiti_core-0.17.4.dist-info → graphiti_core-0.24.3.dist-info}/licenses/LICENSE +0 -0

graphiti_core/prompts/dedupe_edges.py CHANGED Viewed

@@ -14,22 +14,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class EdgeDuplicate(BaseModel):
     duplicate_facts: list[int] = Field(
         ...,
-        description='List of ids of any duplicate facts. If no duplicate facts are found, default to empty list.',
+        description='List of idx values of any duplicate facts. If no duplicate facts are found, default to empty list.',
     )
     contradicted_facts: list[int] = Field(
         ...,
-        description='List of ids of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
+        description='List of idx values of facts that should be invalidated. If no facts should be invalidated, the list should be empty.',
     )
     fact_type: str = Field(..., description='One of the provided fact types or DEFAULT')
@@ -67,13 +67,13 @@ def edge(context: dict[str, Any]) -> list[Message]:
         Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
         <EXISTING EDGES>
-        {json.dumps(context['related_edges'], indent=2)}
+        {to_prompt_json(context['related_edges'])}
         </EXISTING EDGES>
         <NEW EDGE>
-        {json.dumps(context['extracted_edges'], indent=2)}
+        {to_prompt_json(context['extracted_edges'])}
         </NEW EDGE>
         Task:
         If the New Edges represents the same factual information as any edge in Existing Edges, return the id of the duplicate fact
             as part of the list of duplicate_facts.
@@ -98,7 +98,7 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
         Given the following context, find all of the duplicates in a list of facts:
         Facts:
-        {json.dumps(context['edges'], indent=2)}
+        {to_prompt_json(context['edges'])}
         Task:
         If any facts in Facts is a duplicate of another fact, return a new fact with one of their uuid's.
@@ -124,37 +124,48 @@ def resolve_edge(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-        <NEW FACT>
-        {context['new_edge']}
-        </NEW FACT>
+        Task:
+        You will receive TWO separate lists of facts. Each list uses 'idx' as its index field, starting from 0.
+        1. DUPLICATE DETECTION:
+           - If the NEW FACT represents identical factual information as any fact in EXISTING FACTS, return those idx values in duplicate_facts.
+           - Facts with similar information that contain key differences should NOT be marked as duplicates.
+           - Return idx values from EXISTING FACTS.
+           - If no duplicates, return an empty list for duplicate_facts.
+        2. FACT TYPE CLASSIFICATION:
+           - Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
+           - Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
+        3. CONTRADICTION DETECTION:
+           - Based on FACT INVALIDATION CANDIDATES and NEW FACT, determine which facts the new fact contradicts.
+           - Return idx values from FACT INVALIDATION CANDIDATES.
+           - If no contradictions, return an empty list for contradicted_facts.
+        IMPORTANT:
+        - duplicate_facts: Use ONLY 'idx' values from EXISTING FACTS
+        - contradicted_facts: Use ONLY 'idx' values from FACT INVALIDATION CANDIDATES
+        - These are two separate lists with independent idx ranges starting from 0
+        Guidelines:
+        1. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
+            Do not mark these facts as duplicates.
+        <FACT TYPES>
+        {context['edge_types']}
+        </FACT TYPES>
         <EXISTING FACTS>
         {context['existing_edges']}
         </EXISTING FACTS>
         <FACT INVALIDATION CANDIDATES>
         {context['edge_invalidation_candidates']}
         </FACT INVALIDATION CANDIDATES>
-        <FACT TYPES>
-        {context['edge_types']}
-        </FACT TYPES>
-        Task:
-        If the NEW FACT represents the same factual information as any fact in EXISTING FACTS, return the idx of the duplicate fact.
-        Facts with similar information that contain key differences should not be marked as duplicates.
-        If the NEW FACT is not a duplicate of any of the EXISTING FACTS, return -1.
-        Given the predefined FACT TYPES, determine if the NEW FACT should be classified as one of these types.
-        Return the fact type as fact_type or DEFAULT if NEW FACT is not one of the FACT TYPES.
-        Based on the provided FACT INVALIDATION CANDIDATES and NEW FACT, determine which existing facts the new fact contradicts.
-        Return a list containing all idx's of the facts that are contradicted by the NEW FACT.
-        If there are no contradicted facts, return an empty list.
-        Guidelines:
-        1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
-        2. Some facts may be very similar but will have key differences, particularly around numeric values in the facts.
+        <NEW FACT>
+        {context['new_edge']}
+        </NEW FACT>
         """,
         ),
     ]

graphiti_core/prompts/dedupe_nodes.py CHANGED Viewed

@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class NodeDuplicate(BaseModel):
@@ -34,7 +34,7 @@ class NodeDuplicate(BaseModel):
     )
     duplicates: list[int] = Field(
         ...,
-        description='idx of all duplicate entities.',
+        description='idx of all entities that are a duplicate of the entity with the above id.',
     )
@@ -64,20 +64,20 @@ def node(context: dict[str, Any]) -> list[Message]:
             role='user',
             content=f"""
         <PREVIOUS MESSAGES>
-        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        {to_prompt_json([ep for ep in context['previous_episodes']])}
         </PREVIOUS MESSAGES>
         <CURRENT MESSAGE>
         {context['episode_content']}
         </CURRENT MESSAGE>
         <NEW ENTITY>
-        {json.dumps(context['extracted_node'], indent=2)}
+        {to_prompt_json(context['extracted_node'])}
         </NEW ENTITY>
         <ENTITY TYPE DESCRIPTION>
-        {json.dumps(context['entity_type_description'], indent=2)}
+        {to_prompt_json(context['entity_type_description'])}
         </ENTITY TYPE DESCRIPTION>
         <EXISTING ENTITIES>
-        {json.dumps(context['existing_nodes'], indent=2)}
+        {to_prompt_json(context['existing_nodes'])}
         </EXISTING ENTITIES>
         Given the above EXISTING ENTITIES and their attributes, MESSAGE, and PREVIOUS MESSAGES; Determine if the NEW ENTITY extracted from the conversation
@@ -92,12 +92,23 @@ def node(context: dict[str, Any]) -> list[Message]:
          TASK:
          1. Compare `new_entity` against each item in `existing_entities`.
-         2. If it refers to the same real‐world object or concept, collect its index.
-         3. Let `duplicate_idx` = the *first* collected index, or –1 if none.
-         4. Let `duplicates` = the list of *all* collected indices (empty list if none).
-        Also return the full name of the NEW ENTITY (whether it is the name of the NEW ENTITY, a node it
-        is a duplicate of, or a combination of the two).
+         2. If it refers to the same real-world object or concept, collect its index.
+         3. Let `duplicate_idx` = the smallest collected index, or -1 if none.
+         4. Let `duplicates` = the sorted list of all collected indices (empty list if none).
+        Respond with a JSON object containing an "entity_resolutions" array with a single entry:
+        {{
+            "entity_resolutions": [
+                {{
+                    "id": integer id from NEW ENTITY,
+                    "name": the best full name for the entity,
+                    "duplicate_idx": integer index of the best duplicate in EXISTING ENTITIES, or -1 if none,
+                    "duplicates": sorted list of all duplicate indices you collected (deduplicate the list, use [] when none)
+                }}
+            ]
+        }}
+        Only reference indices that appear in EXISTING ENTITIES, and return [] / -1 when unsure.
         """,
         ),
     ]
@@ -108,44 +119,44 @@ def nodes(context: dict[str, Any]) -> list[Message]:
         Message(
             role='system',
             content='You are a helpful assistant that determines whether or not ENTITIES extracted from a conversation are duplicates'
-            'of existing entities.',
+            ' of existing entities.',
         ),
         Message(
             role='user',
             content=f"""
         <PREVIOUS MESSAGES>
-        {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+        {to_prompt_json([ep for ep in context['previous_episodes']])}
         </PREVIOUS MESSAGES>
         <CURRENT MESSAGE>
         {context['episode_content']}
         </CURRENT MESSAGE>
         Each of the following ENTITIES were extracted from the CURRENT MESSAGE.
         Each entity in ENTITIES is represented as a JSON object with the following structure:
         {{
             id: integer id of the entity,
             name: "name of the entity",
-            entity_type: "ontological classification of the entity",
-            entity_type_description: "Description of what the entity type represents",
-            duplication_candidates: [
-                {{
-                    idx: integer index of the candidate entity,
-                    name: "name of the candidate entity",
-                    entity_type: "ontological classification of the candidate entity",
-                    ...<additional attributes>
-                }}
-            ]
+            entity_type: ["Entity", "<optional additional label>", ...],
+            entity_type_description: "Description of what the entity type represents"
         }}
         <ENTITIES>
-        {json.dumps(context['extracted_nodes'], indent=2)}
+        {to_prompt_json(context['extracted_nodes'])}
         </ENTITIES>
         <EXISTING ENTITIES>
-        {json.dumps(context['existing_nodes'], indent=2)}
+        {to_prompt_json(context['existing_nodes'])}
         </EXISTING ENTITIES>
+        Each entry in EXISTING ENTITIES is an object with the following structure:
+        {{
+            idx: integer index of the candidate entity (use this when referencing a duplicate),
+            name: "name of the candidate entity",
+            entity_types: ["Entity", "<optional additional label>", ...],
+            ...<additional attributes such as summaries or metadata>
+        }}
         For each of the above ENTITIES, determine if the entity is a duplicate of any of the EXISTING ENTITIES.
         Entities should only be considered duplicates if they refer to the *same real-world object or concept*.
@@ -155,14 +166,20 @@ def nodes(context: dict[str, Any]) -> list[Message]:
         - They have similar names or purposes but refer to separate instances or concepts.
         Task:
-        Your response will be a list called entity_resolutions which contains one entry for each entity.
-        For each entity, return the id of the entity as id, the name of the entity as name, and the duplicate_idx
-        as an integer.
-        - If an entity is a duplicate of one of the EXISTING ENTITIES, return the idx of the candidate it is a
-        duplicate of.
-        - If an entity is not a duplicate of one of the EXISTING ENTITIES, return the -1 as the duplication_idx
+        ENTITIES contains {len(context['extracted_nodes'])} entities with IDs 0 through {len(context['extracted_nodes']) - 1}.
+        Your response MUST include EXACTLY {len(context['extracted_nodes'])} resolutions with IDs 0 through {len(context['extracted_nodes']) - 1}. Do not skip or add IDs.
+        For every entity, return an object with the following keys:
+        {{
+            "id": integer id from ENTITIES,
+            "name": the best full name for the entity (preserve the original name unless a duplicate has a more complete name),
+            "duplicate_idx": the idx of the EXISTING ENTITY that is the best duplicate match, or -1 if there is no duplicate,
+            "duplicates": a sorted list of all idx values from EXISTING ENTITIES that refer to duplicates (deduplicate the list, use [] when none or unsure)
+        }}
+        - Only use idx values that appear in EXISTING ENTITIES.
+        - Set duplicate_idx to the smallest idx you collected for that entity, or -1 if duplicates is empty.
+        - Never fabricate entities or indices.
         """,
         ),
     ]
@@ -180,7 +197,7 @@ def node_list(context: dict[str, Any]) -> list[Message]:
         Given the following context, deduplicate a list of nodes:
         Nodes:
-        {json.dumps(context['nodes'], indent=2)}
+        {to_prompt_json(context['nodes'])}
         Task:
         1. Group nodes together such that all duplicate nodes are in the same list of uuids

graphiti_core/prompts/eval.py CHANGED Viewed

@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class QueryExpansion(BaseModel):
@@ -68,7 +68,7 @@ def query_expansion(context: dict[str, Any]) -> list[Message]:
     Bob is asking Alice a question, are you able to rephrase the question into a simpler one about Alice in the third person
     that maintains the relevant context?
     <QUESTION>
-    {json.dumps(context['query'])}
+    {to_prompt_json(context['query'])}
     </QUESTION>
     """
     return [
@@ -84,10 +84,10 @@ def qa_prompt(context: dict[str, Any]) -> list[Message]:
     Your task is to briefly answer the question in the way that you think Alice would answer the question.
     You are given the following entity summaries and facts to help you determine the answer to your question.
     <ENTITY_SUMMARIES>
-    {json.dumps(context['entity_summaries'])}
+    {to_prompt_json(context['entity_summaries'])}
     </ENTITY_SUMMARIES>
     <FACTS>
-    {json.dumps(context['facts'])}
+    {to_prompt_json(context['facts'])}
     </FACTS>
     <QUESTION>
     {context['query']}

graphiti_core/prompts/extract_edges.py CHANGED Viewed

@@ -14,19 +14,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
 class Edge(BaseModel):
     relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
-    source_entity_id: int = Field(..., description='The id of the source entity of the fact.')
-    target_entity_id: int = Field(..., description='The id of the target entity of the fact.')
-    fact: str = Field(..., description='')
+    source_entity_id: int = Field(
+        ..., description='The id of the source entity from the ENTITIES list'
+    )
+    target_entity_id: int = Field(
+        ..., description='The id of the target entity from the ENTITIES list'
+    )
+    fact: str = Field(
+        ...,
+        description='A natural language description of the relationship between the entities, paraphrased from the source text',
+    )
     valid_at: str | None = Field(
         None,
         description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
@@ -68,8 +75,12 @@ def edge(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
+<FACT TYPES>
+{context['edge_types']}
+</FACT TYPES>
 <PREVIOUS_MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS_MESSAGES>
 <CURRENT_MESSAGE>
@@ -77,23 +88,20 @@ def edge(context: dict[str, Any]) -> list[Message]:
 </CURRENT_MESSAGE>
 <ENTITIES>
-{context['nodes']}
+{to_prompt_json(context['nodes'])}
 </ENTITIES>
 <REFERENCE_TIME>
 {context['reference_time']}  # ISO 8601 (UTC); used to resolve relative time mentions
 </REFERENCE_TIME>
-<FACT TYPES>
-{context['edge_types']}
-</FACT TYPES>
 # TASK
 Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
 Only extract facts that:
 - involve two DISTINCT ENTITIES from the ENTITIES list,
 - are clearly stated or unambiguously implied in the CURRENT MESSAGE,
     and can be represented as edges in a knowledge graph.
+- Facts should include entity names rather than pronouns whenever possible.
 - The FACT TYPES provide a list of the most important types of facts, make sure to extract facts of these types
 - The FACT TYPES are not an exhaustive list, extract all facts from the message even if they do not fit into one
     of the FACT TYPES
@@ -106,11 +114,12 @@ You may use information from the PREVIOUS MESSAGES only to disambiguate referenc
 # EXTRACTION RULES
-1. Only emit facts where both the subject and object match IDs in ENTITIES.
+1. **Entity ID Validation**: `source_entity_id` and `target_entity_id` must use only the `id` values from the ENTITIES list provided above.
+   - **CRITICAL**: Using IDs not in the list will cause the edge to be rejected
 2. Each fact must involve two **distinct** entities.
 3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
 4. Do not emit duplicate or semantically redundant facts.
-5. The `fact_text` should quote or closely paraphrase the original source sentence(s).
+5. The `fact` should closely paraphrase the original source sentence(s). Do not verbatim quote the original text.
 6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
 7. Do **not** hallucinate or infer temporal bounds from unrelated events.
@@ -132,7 +141,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
     user_prompt = f"""
 <PREVIOUS MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS MESSAGES>
 <CURRENT MESSAGE>
 {context['episode_content']}
@@ -166,7 +175,7 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
             content=f"""
         <MESSAGE>
-        {json.dumps(context['episode_content'], indent=2)}
+        {to_prompt_json(context['episode_content'])}
         </MESSAGE>
         <REFERENCE TIME>
         {context['reference_time']}

graphiti_core/prompts/extract_nodes.py CHANGED Viewed

@@ -14,12 +14,15 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import json
 from typing import Any, Protocol, TypedDict
 from pydantic import BaseModel, Field
+from graphiti_core.utils.text_utils import MAX_SUMMARY_CHARS
 from .models import Message, PromptFunction, PromptVersion
+from .prompt_helpers import to_prompt_json
+from .snippets import summary_instructions
 class ExtractedEntity(BaseModel):
@@ -42,7 +45,8 @@ class EntityClassificationTriple(BaseModel):
     uuid: str = Field(description='UUID of the entity')
     name: str = Field(description='Name of the entity')
     entity_type: str | None = Field(
-        default=None, description='Type of the entity. Must be one of the provided types or None'
+        default=None,
+        description='Type of the entity. Must be one of the provided types or None',
     )
@@ -52,6 +56,13 @@ class EntityClassification(BaseModel):
     )
+class EntitySummary(BaseModel):
+    summary: str = Field(
+        ...,
+        description=f'Summary containing the important information about the entity. Under {MAX_SUMMARY_CHARS} characters.',
+    )
 class Prompt(Protocol):
     extract_message: PromptVersion
     extract_json: PromptVersion
@@ -59,6 +70,7 @@ class Prompt(Protocol):
     reflexion: PromptVersion
     classify_nodes: PromptVersion
     extract_attributes: PromptVersion
+    extract_summary: PromptVersion
 class Versions(TypedDict):
@@ -68,6 +80,7 @@ class Versions(TypedDict):
     reflexion: PromptFunction
     classify_nodes: PromptFunction
     extract_attributes: PromptFunction
+    extract_summary: PromptFunction
 def extract_message(context: dict[str, Any]) -> list[Message]:
@@ -75,23 +88,23 @@ def extract_message(context: dict[str, Any]) -> list[Message]:
     Your primary task is to extract and classify the speaker and other significant entities mentioned in the conversation."""
     user_prompt = f"""
+<ENTITY TYPES>
+{context['entity_types']}
+</ENTITY TYPES>
 <PREVIOUS MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS MESSAGES>
 <CURRENT MESSAGE>
 {context['episode_content']}
 </CURRENT MESSAGE>
-<ENTITY TYPES>
-{context['entity_types']}
-</ENTITY TYPES>
 Instructions:
 You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
 Pronoun references such as he/she/they or this/that/those should be disambiguated to the names of the
-reference entities.
+reference entities. Only extract distinct entities from the CURRENT MESSAGE. Don't extract pronouns like you, me, he/she/they, we/us as entities.
 1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
    - If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
@@ -124,15 +137,16 @@ def extract_json(context: dict[str, Any]) -> list[Message]:
     Your primary task is to extract and classify relevant entities from JSON files"""
     user_prompt = f"""
+<ENTITY TYPES>
+{context['entity_types']}
+</ENTITY TYPES>
 <SOURCE DESCRIPTION>:
 {context['source_description']}
 </SOURCE DESCRIPTION>
 <JSON>
 {context['episode_content']}
 </JSON>
-<ENTITY TYPES>
-{context['entity_types']}
-</ENTITY TYPES>
 {context['custom_prompt']}
@@ -141,8 +155,9 @@ For each entity extracted, also determine its entity type based on the provided
 Indicate the classified entity type by providing its entity_type_id.
 Guidelines:
-1. Always try to extract an entities that the JSON represents. This will often be something like a "name" or "user field
-2. Do NOT extract any properties that contain dates
+1. Extract all entities that the JSON represents. This will often be something like a "name" or "user" field
+2. Extract all entities mentioned in all other properties throughout the JSON structure
+3. Do NOT extract any properties that contain dates
 """
     return [
         Message(role='system', content=sys_prompt),
@@ -155,13 +170,14 @@ def extract_text(context: dict[str, Any]) -> list[Message]:
     Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text."""
     user_prompt = f"""
-<TEXT>
-{context['episode_content']}
-</TEXT>
 <ENTITY TYPES>
 {context['entity_types']}
 </ENTITY TYPES>
+<TEXT>
+{context['episode_content']}
+</TEXT>
 Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
 For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
 Indicate the classified entity type by providing its entity_type_id.
@@ -185,7 +201,7 @@ def reflexion(context: dict[str, Any]) -> list[Message]:
     user_prompt = f"""
 <PREVIOUS MESSAGES>
-{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+{to_prompt_json([ep for ep in context['previous_episodes']])}
 </PREVIOUS MESSAGES>
 <CURRENT MESSAGE>
 {context['episode_content']}
@@ -209,22 +225,22 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
     user_prompt = f"""
     <PREVIOUS MESSAGES>
-    {json.dumps([ep for ep in context['previous_episodes']], indent=2)}
+    {to_prompt_json([ep for ep in context['previous_episodes']])}
     </PREVIOUS MESSAGES>
     <CURRENT MESSAGE>
     {context['episode_content']}
     </CURRENT MESSAGE>
     <EXTRACTED ENTITIES>
     {context['extracted_entities']}
     </EXTRACTED ENTITIES>
     <ENTITY TYPES>
     {context['entity_types']}
     </ENTITY TYPES>
     Given the above conversation, extracted entities, and provided entity types and their descriptions, classify the extracted entities.
     Guidelines:
     1. Each entity must have exactly one type
     2. Only use the provided ENTITY TYPES as types, do not use additional types to classify entities.
@@ -245,21 +261,45 @@ def extract_attributes(context: dict[str, Any]) -> list[Message]:
         Message(
             role='user',
             content=f"""
-        <MESSAGES>
-        {json.dumps(context['previous_episodes'], indent=2)}
-        {json.dumps(context['episode_content'], indent=2)}
-        </MESSAGES>
-        Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
+        Given the MESSAGES and the following ENTITY, update any of its attributes based on the information provided
         in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
         Guidelines:
         1. Do not hallucinate entity property values if they cannot be found in the current context.
         2. Only use the provided MESSAGES and ENTITY to set attribute values.
-        3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the MESSAGES.
-            Summaries must be no longer than 250 words.
+        <MESSAGES>
+        {to_prompt_json(context['previous_episodes'])}
+        {to_prompt_json(context['episode_content'])}
+        </MESSAGES>
+        <ENTITY>
+        {context['node']}
+        </ENTITY>
+        """,
+        ),
+    ]
+def extract_summary(context: dict[str, Any]) -> list[Message]:
+    return [
+        Message(
+            role='system',
+            content='You are a helpful assistant that extracts entity summaries from the provided text.',
+        ),
+        Message(
+            role='user',
+            content=f"""
+        Given the MESSAGES and the ENTITY, update the summary that combines relevant information about the entity
+        from the messages and relevant information from the existing summary.
+        {summary_instructions}
+        <MESSAGES>
+        {to_prompt_json(context['previous_episodes'])}
+        {to_prompt_json(context['episode_content'])}
+        </MESSAGES>
         <ENTITY>
         {context['node']}
         </ENTITY>
@@ -273,6 +313,7 @@ versions: Versions = {
     'extract_json': extract_json,
     'extract_text': extract_text,
     'reflexion': reflexion,
+    'extract_summary': extract_summary,
     'classify_nodes': classify_nodes,
     'extract_attributes': extract_attributes,
 }

graphiti-core 0.17.4__py3-none-any.whl → 0.24.3__py3-none-any.whl

graphiti-core 0.17.4py3-none-any.whl → 0.24.3py3-none-any.whl