graphiti-core 0.10.5__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/edges.py +32 -57
- graphiti_core/embedder/client.py +3 -0
- graphiti_core/embedder/gemini.py +10 -0
- graphiti_core/embedder/openai.py +6 -0
- graphiti_core/embedder/voyage.py +7 -0
- graphiti_core/graphiti.py +42 -138
- graphiti_core/graphiti_types.py +31 -0
- graphiti_core/helpers.py +6 -1
- graphiti_core/models/edges/edge_db_queries.py +1 -1
- graphiti_core/nodes.py +8 -2
- graphiti_core/prompts/dedupe_edges.py +5 -7
- graphiti_core/prompts/dedupe_nodes.py +8 -21
- graphiti_core/prompts/extract_edges.py +61 -26
- graphiti_core/prompts/extract_nodes.py +89 -18
- graphiti_core/prompts/invalidate_edges.py +11 -11
- graphiti_core/search/search.py +13 -5
- graphiti_core/search/search_utils.py +208 -82
- graphiti_core/utils/bulk_utils.py +10 -7
- graphiti_core/utils/maintenance/edge_operations.py +88 -40
- graphiti_core/utils/maintenance/graph_data_operations.py +9 -3
- graphiti_core/utils/maintenance/node_operations.py +217 -223
- graphiti_core/utils/maintenance/temporal_operations.py +4 -11
- {graphiti_core-0.10.5.dist-info → graphiti_core-0.11.1.dist-info}/METADATA +14 -8
- {graphiti_core-0.10.5.dist-info → graphiti_core-0.11.1.dist-info}/RECORD +26 -25
- {graphiti_core-0.10.5.dist-info → graphiti_core-0.11.1.dist-info}/LICENSE +0 -0
- {graphiti_core-0.10.5.dist-info → graphiti_core-0.11.1.dist-info}/WHEEL +0 -0
|
@@ -23,10 +23,18 @@ from .models import Message, PromptFunction, PromptVersion
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class Edge(BaseModel):
|
|
26
|
-
relation_type: str = Field(..., description='
|
|
27
|
-
source_entity_name: str = Field(..., description='name of the source entity')
|
|
28
|
-
target_entity_name: str = Field(..., description='name of the target entity')
|
|
29
|
-
fact: str = Field(..., description='
|
|
26
|
+
relation_type: str = Field(..., description='FACT_PREDICATE_IN_SCREAMING_SNAKE_CASE')
|
|
27
|
+
source_entity_name: str = Field(..., description='The name of the source entity of the fact.')
|
|
28
|
+
target_entity_name: str = Field(..., description='The name of the target entity of the fact.')
|
|
29
|
+
fact: str = Field(..., description='')
|
|
30
|
+
valid_at: str | None = Field(
|
|
31
|
+
None,
|
|
32
|
+
description='The date and time when the relationship described by the edge fact became true or was established. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
|
|
33
|
+
)
|
|
34
|
+
invalid_at: str | None = Field(
|
|
35
|
+
None,
|
|
36
|
+
description='The date and time when the relationship described by the edge fact stopped being true or ended. Use ISO 8601 format (YYYY-MM-DDTHH:MM:SS.SSSSSSZ)',
|
|
37
|
+
)
|
|
30
38
|
|
|
31
39
|
|
|
32
40
|
class ExtractedEdges(BaseModel):
|
|
@@ -51,32 +59,59 @@ def edge(context: dict[str, Any]) -> list[Message]:
|
|
|
51
59
|
return [
|
|
52
60
|
Message(
|
|
53
61
|
role='system',
|
|
54
|
-
content='You are an expert fact extractor that extracts fact triples from text.'
|
|
62
|
+
content='You are an expert fact extractor that extracts fact triples from text. '
|
|
63
|
+
'1. Extracted fact triples should also be extracted with relevant date information.'
|
|
64
|
+
'2. Treat the CURRENT TIME as the time the CURRENT MESSAGE was sent. All temporal information should be extracted relative to this time.',
|
|
55
65
|
),
|
|
56
66
|
Message(
|
|
57
67
|
role='user',
|
|
58
68
|
content=f"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
69
|
+
<PREVIOUS_MESSAGES>
|
|
70
|
+
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
71
|
+
</PREVIOUS_MESSAGES>
|
|
72
|
+
|
|
73
|
+
<CURRENT_MESSAGE>
|
|
74
|
+
{context['episode_content']}
|
|
75
|
+
</CURRENT_MESSAGE>
|
|
76
|
+
|
|
77
|
+
<ENTITIES>
|
|
78
|
+
{context['nodes']} # Each has: id, label (e.g., Person, Org), name, aliases
|
|
79
|
+
</ENTITIES>
|
|
80
|
+
|
|
81
|
+
<REFERENCE_TIME>
|
|
82
|
+
{context['reference_time']} # ISO 8601 (UTC); used to resolve relative time mentions
|
|
83
|
+
</REFERENCE_TIME>
|
|
84
|
+
|
|
85
|
+
# TASK
|
|
86
|
+
Extract all factual relationships between the given ENTITIES based on the CURRENT MESSAGE.
|
|
87
|
+
Only extract facts that:
|
|
88
|
+
- involve two DISTINCT ENTITIES from the ENTITIES list,
|
|
89
|
+
- are clearly stated or unambiguously implied in the CURRENT MESSAGE,
|
|
90
|
+
- and can be represented as edges in a knowledge graph.
|
|
91
|
+
|
|
92
|
+
You may use information from the PREVIOUS MESSAGES only to disambiguate references or support continuity.
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
{context['custom_prompt']}
|
|
96
|
+
|
|
97
|
+
# EXTRACTION RULES
|
|
98
|
+
|
|
99
|
+
1. Only emit facts where both the subject and object match IDs in ENTITIES.
|
|
100
|
+
2. Each fact must involve two **distinct** entities.
|
|
101
|
+
3. Use a SCREAMING_SNAKE_CASE string as the `relation_type` (e.g., FOUNDED, WORKS_AT).
|
|
102
|
+
4. Do not emit duplicate or semantically redundant facts.
|
|
103
|
+
5. The `fact_text` should quote or closely paraphrase the original source sentence(s).
|
|
104
|
+
6. Use `REFERENCE_TIME` to resolve vague or relative temporal expressions (e.g., "last week").
|
|
105
|
+
7. Do **not** hallucinate or infer temporal bounds from unrelated events.
|
|
106
|
+
|
|
107
|
+
# DATETIME RULES
|
|
108
|
+
|
|
109
|
+
- Use ISO 8601 with “Z” suffix (UTC) (e.g., 2025-04-30T00:00:00Z).
|
|
110
|
+
- If the fact is ongoing (present tense), set `valid_at` to REFERENCE_TIME.
|
|
111
|
+
- If a change/termination is expressed, set `invalid_at` to the relevant timestamp.
|
|
112
|
+
- Leave both fields `null` if no explicit or resolvable time is stated.
|
|
113
|
+
- If only a date is mentioned (no time), assume 00:00:00.
|
|
114
|
+
- If only a year is mentioned, use January 1st at 00:00:00.
|
|
80
115
|
""",
|
|
81
116
|
),
|
|
82
117
|
]
|
|
@@ -22,8 +22,16 @@ from pydantic import BaseModel, Field
|
|
|
22
22
|
from .models import Message, PromptFunction, PromptVersion
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
class
|
|
26
|
-
|
|
25
|
+
class ExtractedEntity(BaseModel):
|
|
26
|
+
name: str = Field(..., description='Name of the extracted entity')
|
|
27
|
+
entity_type_id: int = Field(
|
|
28
|
+
description='ID of the classified entity type. '
|
|
29
|
+
'Must be one of the provided entity_type_id integers.',
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ExtractedEntities(BaseModel):
|
|
34
|
+
extracted_entities: list[ExtractedEntity] = Field(..., description='List of extracted entities')
|
|
27
35
|
|
|
28
36
|
|
|
29
37
|
class MissedEntities(BaseModel):
|
|
@@ -50,6 +58,7 @@ class Prompt(Protocol):
|
|
|
50
58
|
extract_text: PromptVersion
|
|
51
59
|
reflexion: PromptVersion
|
|
52
60
|
classify_nodes: PromptVersion
|
|
61
|
+
extract_attributes: PromptVersion
|
|
53
62
|
|
|
54
63
|
|
|
55
64
|
class Versions(TypedDict):
|
|
@@ -58,31 +67,49 @@ class Versions(TypedDict):
|
|
|
58
67
|
extract_text: PromptFunction
|
|
59
68
|
reflexion: PromptFunction
|
|
60
69
|
classify_nodes: PromptFunction
|
|
70
|
+
extract_attributes: PromptFunction
|
|
61
71
|
|
|
62
72
|
|
|
63
73
|
def extract_message(context: dict[str, Any]) -> list[Message]:
|
|
64
|
-
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational messages.
|
|
74
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from conversational messages.
|
|
75
|
+
Your primary task is to extract and classify the speaker and other significant entities mentioned in the conversation."""
|
|
65
76
|
|
|
66
77
|
user_prompt = f"""
|
|
67
78
|
<PREVIOUS MESSAGES>
|
|
68
79
|
{json.dumps([ep for ep in context['previous_episodes']], indent=2)}
|
|
69
80
|
</PREVIOUS MESSAGES>
|
|
81
|
+
|
|
70
82
|
<CURRENT MESSAGE>
|
|
71
83
|
{context['episode_content']}
|
|
72
84
|
</CURRENT MESSAGE>
|
|
73
85
|
|
|
74
|
-
|
|
86
|
+
<ENTITY TYPES>
|
|
87
|
+
{context['entity_types']}
|
|
88
|
+
</ENTITY TYPES>
|
|
75
89
|
|
|
76
|
-
|
|
90
|
+
Instructions:
|
|
77
91
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
92
|
+
You are given a conversation context and a CURRENT MESSAGE. Your task is to extract **entity nodes** mentioned **explicitly or implicitly** in the CURRENT MESSAGE.
|
|
93
|
+
|
|
94
|
+
1. **Speaker Extraction**: Always extract the speaker (the part before the colon `:` in each dialogue line) as the first entity node.
|
|
95
|
+
- If the speaker is mentioned again in the message, treat both mentions as a **single entity**.
|
|
96
|
+
|
|
97
|
+
2. **Entity Identification**:
|
|
98
|
+
- Extract all significant entities, concepts, or actors that are **explicitly or implicitly** mentioned in the CURRENT MESSAGE.
|
|
99
|
+
- **Exclude** entities mentioned only in the PREVIOUS MESSAGES (they are for context only).
|
|
100
|
+
|
|
101
|
+
3. **Entity Classification**:
|
|
102
|
+
- Use the descriptions in ENTITY TYPES to classify each extracted entity.
|
|
103
|
+
- Assign the appropriate `entity_type_id` for each one.
|
|
104
|
+
|
|
105
|
+
4. **Exclusions**:
|
|
106
|
+
- Do NOT extract entities representing relationships or actions.
|
|
107
|
+
- Do NOT extract dates, times, or other temporal information—these will be handled separately.
|
|
108
|
+
|
|
109
|
+
5. **Formatting**:
|
|
110
|
+
- Be **explicit and unambiguous** in naming entities (e.g., use full names when available).
|
|
111
|
+
|
|
112
|
+
{context['custom_prompt']}
|
|
86
113
|
"""
|
|
87
114
|
return [
|
|
88
115
|
Message(role='system', content=sys_prompt),
|
|
@@ -92,7 +119,7 @@ Guidelines:
|
|
|
92
119
|
|
|
93
120
|
def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
94
121
|
sys_prompt = """You are an AI assistant that extracts entity nodes from JSON.
|
|
95
|
-
Your primary task is to
|
|
122
|
+
Your primary task is to extract and classify relevant entities from JSON files"""
|
|
96
123
|
|
|
97
124
|
user_prompt = f"""
|
|
98
125
|
<SOURCE DESCRIPTION>:
|
|
@@ -101,10 +128,15 @@ def extract_json(context: dict[str, Any]) -> list[Message]:
|
|
|
101
128
|
<JSON>
|
|
102
129
|
{context['episode_content']}
|
|
103
130
|
</JSON>
|
|
131
|
+
<ENTITY TYPES>
|
|
132
|
+
{context['entity_types']}
|
|
133
|
+
</ENTITY TYPES>
|
|
104
134
|
|
|
105
135
|
{context['custom_prompt']}
|
|
106
136
|
|
|
107
|
-
Given the above source description and JSON, extract relevant
|
|
137
|
+
Given the above source description and JSON, extract relevant entities from the provided JSON.
|
|
138
|
+
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
|
139
|
+
Indicate the classified entity type by providing its entity_type_id.
|
|
108
140
|
|
|
109
141
|
Guidelines:
|
|
110
142
|
1. Always try to extract an entities that the JSON represents. This will often be something like a "name" or "user field
|
|
@@ -117,16 +149,22 @@ Guidelines:
|
|
|
117
149
|
|
|
118
150
|
|
|
119
151
|
def extract_text(context: dict[str, Any]) -> list[Message]:
|
|
120
|
-
sys_prompt = """You are an AI assistant that extracts entity nodes from text.
|
|
152
|
+
sys_prompt = """You are an AI assistant that extracts entity nodes from text.
|
|
153
|
+
Your primary task is to extract and classify the speaker and other significant entities mentioned in the provided text."""
|
|
121
154
|
|
|
122
155
|
user_prompt = f"""
|
|
123
156
|
<TEXT>
|
|
124
157
|
{context['episode_content']}
|
|
125
158
|
</TEXT>
|
|
159
|
+
<ENTITY TYPES>
|
|
160
|
+
{context['entity_types']}
|
|
161
|
+
</ENTITY TYPES>
|
|
126
162
|
|
|
127
|
-
|
|
163
|
+
Given the above text, extract entities from the TEXT that are explicitly or implicitly mentioned.
|
|
164
|
+
For each entity extracted, also determine its entity type based on the provided ENTITY TYPES and their descriptions.
|
|
165
|
+
Indicate the classified entity type by providing its entity_type_id.
|
|
128
166
|
|
|
129
|
-
|
|
167
|
+
{context['custom_prompt']}
|
|
130
168
|
|
|
131
169
|
Guidelines:
|
|
132
170
|
1. Extract significant entities, concepts, or actors mentioned in the conversation.
|
|
@@ -196,10 +234,43 @@ def classify_nodes(context: dict[str, Any]) -> list[Message]:
|
|
|
196
234
|
]
|
|
197
235
|
|
|
198
236
|
|
|
237
|
+
def extract_attributes(context: dict[str, Any]) -> list[Message]:
|
|
238
|
+
return [
|
|
239
|
+
Message(
|
|
240
|
+
role='system',
|
|
241
|
+
content='You are a helpful assistant that extracts entity properties from the provided text.',
|
|
242
|
+
),
|
|
243
|
+
Message(
|
|
244
|
+
role='user',
|
|
245
|
+
content=f"""
|
|
246
|
+
|
|
247
|
+
<MESSAGES>
|
|
248
|
+
{json.dumps(context['previous_episodes'], indent=2)}
|
|
249
|
+
{json.dumps(context['episode_content'], indent=2)}
|
|
250
|
+
</MESSAGES>
|
|
251
|
+
|
|
252
|
+
Given the above MESSAGES and the following ENTITY, update any of its attributes based on the information provided
|
|
253
|
+
in MESSAGES. Use the provided attribute descriptions to better understand how each attribute should be determined.
|
|
254
|
+
|
|
255
|
+
Guidelines:
|
|
256
|
+
1. Do not hallucinate entity property values if they cannot be found in the current context.
|
|
257
|
+
2. Only use the provided MESSAGES and ENTITY to set attribute values.
|
|
258
|
+
3. The summary attribute represents a summary of the ENTITY, and should be updated with new information about the Entity from the MESSAGES.
|
|
259
|
+
Summaries must be no longer than 200 words.
|
|
260
|
+
|
|
261
|
+
<ENTITY>
|
|
262
|
+
{context['node']}
|
|
263
|
+
</ENTITY>
|
|
264
|
+
""",
|
|
265
|
+
),
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
|
|
199
269
|
versions: Versions = {
|
|
200
270
|
'extract_message': extract_message,
|
|
201
271
|
'extract_json': extract_json,
|
|
202
272
|
'extract_text': extract_text,
|
|
203
273
|
'reflexion': reflexion,
|
|
204
274
|
'classify_nodes': classify_nodes,
|
|
275
|
+
'extract_attributes': extract_attributes,
|
|
205
276
|
}
|
|
@@ -21,14 +21,10 @@ from pydantic import BaseModel, Field
|
|
|
21
21
|
from .models import Message, PromptFunction, PromptVersion
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class InvalidatedEdge(BaseModel):
|
|
25
|
-
uuid: str = Field(..., description='The UUID of the edge to be invalidated')
|
|
26
|
-
fact: str = Field(..., description='Updated fact of the edge')
|
|
27
|
-
|
|
28
|
-
|
|
29
24
|
class InvalidatedEdges(BaseModel):
|
|
30
|
-
|
|
31
|
-
...,
|
|
25
|
+
contradicted_facts: list[int] = Field(
|
|
26
|
+
...,
|
|
27
|
+
description='List of ids of facts that be should invalidated. If no facts should be invalidated, the list should be empty.',
|
|
32
28
|
)
|
|
33
29
|
|
|
34
30
|
|
|
@@ -78,18 +74,22 @@ def v2(context: dict[str, Any]) -> list[Message]:
|
|
|
78
74
|
return [
|
|
79
75
|
Message(
|
|
80
76
|
role='system',
|
|
81
|
-
content='You are an AI assistant that
|
|
77
|
+
content='You are an AI assistant that determines which facts contradict each other.',
|
|
82
78
|
),
|
|
83
79
|
Message(
|
|
84
80
|
role='user',
|
|
85
81
|
content=f"""
|
|
86
|
-
Based on the provided
|
|
82
|
+
Based on the provided EXISTING FACTS and a NEW FACT, determine which existing facts the new fact contradicts.
|
|
83
|
+
Return a list containing all ids of the facts that are contradicted by the NEW FACT.
|
|
84
|
+
If there are no contradicted facts, return an empty list.
|
|
87
85
|
|
|
88
|
-
|
|
86
|
+
<EXISTING FACTS>
|
|
89
87
|
{context['existing_edges']}
|
|
88
|
+
</EXISTING FACTS>
|
|
90
89
|
|
|
91
|
-
|
|
90
|
+
<NEW FACT>
|
|
92
91
|
{context['new_edge']}
|
|
92
|
+
</NEW FACT>
|
|
93
93
|
""",
|
|
94
94
|
),
|
|
95
95
|
]
|
graphiti_core/search/search.py
CHANGED
|
@@ -22,8 +22,8 @@ from neo4j import AsyncDriver
|
|
|
22
22
|
|
|
23
23
|
from graphiti_core.cross_encoder.client import CrossEncoderClient
|
|
24
24
|
from graphiti_core.edges import EntityEdge
|
|
25
|
-
from graphiti_core.embedder import EmbedderClient
|
|
26
25
|
from graphiti_core.errors import SearchRerankerError
|
|
26
|
+
from graphiti_core.graphiti_types import GraphitiClients
|
|
27
27
|
from graphiti_core.helpers import semaphore_gather
|
|
28
28
|
from graphiti_core.nodes import CommunityNode, EntityNode, EpisodicNode
|
|
29
29
|
from graphiti_core.search.search_config import (
|
|
@@ -62,17 +62,21 @@ logger = logging.getLogger(__name__)
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
async def search(
|
|
65
|
-
|
|
66
|
-
embedder: EmbedderClient,
|
|
67
|
-
cross_encoder: CrossEncoderClient,
|
|
65
|
+
clients: GraphitiClients,
|
|
68
66
|
query: str,
|
|
69
67
|
group_ids: list[str] | None,
|
|
70
68
|
config: SearchConfig,
|
|
71
69
|
search_filter: SearchFilters,
|
|
72
70
|
center_node_uuid: str | None = None,
|
|
73
71
|
bfs_origin_node_uuids: list[str] | None = None,
|
|
72
|
+
query_vector: list[float] | None = None,
|
|
74
73
|
) -> SearchResults:
|
|
75
74
|
start = time()
|
|
75
|
+
|
|
76
|
+
driver = clients.driver
|
|
77
|
+
embedder = clients.embedder
|
|
78
|
+
cross_encoder = clients.cross_encoder
|
|
79
|
+
|
|
76
80
|
if query.strip() == '':
|
|
77
81
|
return SearchResults(
|
|
78
82
|
edges=[],
|
|
@@ -80,7 +84,11 @@ async def search(
|
|
|
80
84
|
episodes=[],
|
|
81
85
|
communities=[],
|
|
82
86
|
)
|
|
83
|
-
query_vector =
|
|
87
|
+
query_vector = (
|
|
88
|
+
query_vector
|
|
89
|
+
if query_vector is not None
|
|
90
|
+
else await embedder.create(input_data=[query.replace('\n', ' ')])
|
|
91
|
+
)
|
|
84
92
|
|
|
85
93
|
# if group_ids is empty, set it to None
|
|
86
94
|
group_ids = group_ids if group_ids else None
|