graphiti-core 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- graphiti_core/driver/driver.py +28 -0
- graphiti_core/driver/falkordb_driver.py +112 -0
- graphiti_core/driver/kuzu_driver.py +1 -0
- graphiti_core/driver/neo4j_driver.py +10 -2
- graphiti_core/driver/neptune_driver.py +4 -6
- graphiti_core/edges.py +67 -7
- graphiti_core/embedder/client.py +2 -1
- graphiti_core/graph_queries.py +35 -6
- graphiti_core/graphiti.py +36 -24
- graphiti_core/graphiti_types.py +0 -1
- graphiti_core/helpers.py +2 -2
- graphiti_core/llm_client/client.py +19 -4
- graphiti_core/llm_client/gemini_client.py +4 -2
- graphiti_core/llm_client/openai_base_client.py +3 -2
- graphiti_core/llm_client/openai_generic_client.py +3 -2
- graphiti_core/models/edges/edge_db_queries.py +36 -16
- graphiti_core/models/nodes/node_db_queries.py +30 -10
- graphiti_core/nodes.py +126 -25
- graphiti_core/prompts/dedupe_edges.py +40 -29
- graphiti_core/prompts/dedupe_nodes.py +51 -34
- graphiti_core/prompts/eval.py +3 -3
- graphiti_core/prompts/extract_edges.py +17 -9
- graphiti_core/prompts/extract_nodes.py +10 -9
- graphiti_core/prompts/prompt_helpers.py +3 -3
- graphiti_core/prompts/summarize_nodes.py +5 -5
- graphiti_core/search/search_filters.py +53 -0
- graphiti_core/search/search_helpers.py +5 -7
- graphiti_core/search/search_utils.py +227 -57
- graphiti_core/utils/bulk_utils.py +168 -69
- graphiti_core/utils/maintenance/community_operations.py +8 -20
- graphiti_core/utils/maintenance/dedup_helpers.py +262 -0
- graphiti_core/utils/maintenance/edge_operations.py +187 -50
- graphiti_core/utils/maintenance/graph_data_operations.py +9 -5
- graphiti_core/utils/maintenance/node_operations.py +244 -88
- graphiti_core/utils/maintenance/temporal_operations.py +0 -4
- {graphiti_core-0.20.3.dist-info → graphiti_core-0.21.0.dist-info}/METADATA +7 -1
- {graphiti_core-0.20.3.dist-info → graphiti_core-0.21.0.dist-info}/RECORD +39 -38
- {graphiti_core-0.20.3.dist-info → graphiti_core-0.21.0.dist-info}/WHEEL +0 -0
- {graphiti_core-0.20.3.dist-info → graphiti_core-0.21.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,6 +15,7 @@ limitations under the License.
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
import logging
|
|
18
|
+
from collections.abc import Awaitable, Callable
|
|
18
19
|
from time import time
|
|
19
20
|
from typing import Any
|
|
20
21
|
|
|
@@ -24,7 +25,12 @@ from graphiti_core.graphiti_types import GraphitiClients
|
|
|
24
25
|
from graphiti_core.helpers import MAX_REFLEXION_ITERATIONS, semaphore_gather
|
|
25
26
|
from graphiti_core.llm_client import LLMClient
|
|
26
27
|
from graphiti_core.llm_client.config import ModelSize
|
|
27
|
-
from graphiti_core.nodes import
|
|
28
|
+
from graphiti_core.nodes import (
|
|
29
|
+
EntityNode,
|
|
30
|
+
EpisodeType,
|
|
31
|
+
EpisodicNode,
|
|
32
|
+
create_entity_node_embeddings,
|
|
33
|
+
)
|
|
28
34
|
from graphiti_core.prompts import prompt_library
|
|
29
35
|
from graphiti_core.prompts.dedupe_nodes import NodeDuplicate, NodeResolutions
|
|
30
36
|
from graphiti_core.prompts.extract_nodes import (
|
|
@@ -38,28 +44,37 @@ from graphiti_core.search.search_config import SearchResults
|
|
|
38
44
|
from graphiti_core.search.search_config_recipes import NODE_HYBRID_SEARCH_RRF
|
|
39
45
|
from graphiti_core.search.search_filters import SearchFilters
|
|
40
46
|
from graphiti_core.utils.datetime_utils import utc_now
|
|
41
|
-
from graphiti_core.utils.maintenance.
|
|
47
|
+
from graphiti_core.utils.maintenance.dedup_helpers import (
|
|
48
|
+
DedupCandidateIndexes,
|
|
49
|
+
DedupResolutionState,
|
|
50
|
+
_build_candidate_indexes,
|
|
51
|
+
_resolve_with_similarity,
|
|
52
|
+
)
|
|
53
|
+
from graphiti_core.utils.maintenance.edge_operations import (
|
|
54
|
+
filter_existing_duplicate_of_edges,
|
|
55
|
+
)
|
|
42
56
|
|
|
43
57
|
logger = logging.getLogger(__name__)
|
|
44
58
|
|
|
59
|
+
NodeSummaryFilter = Callable[[EntityNode], Awaitable[bool]]
|
|
60
|
+
|
|
45
61
|
|
|
46
62
|
async def extract_nodes_reflexion(
|
|
47
63
|
llm_client: LLMClient,
|
|
48
64
|
episode: EpisodicNode,
|
|
49
65
|
previous_episodes: list[EpisodicNode],
|
|
50
66
|
node_names: list[str],
|
|
51
|
-
|
|
67
|
+
group_id: str | None = None,
|
|
52
68
|
) -> list[str]:
|
|
53
69
|
# Prepare context for LLM
|
|
54
70
|
context = {
|
|
55
71
|
'episode_content': episode.content,
|
|
56
72
|
'previous_episodes': [ep.content for ep in previous_episodes],
|
|
57
73
|
'extracted_entities': node_names,
|
|
58
|
-
'ensure_ascii': ensure_ascii,
|
|
59
74
|
}
|
|
60
75
|
|
|
61
76
|
llm_response = await llm_client.generate_response(
|
|
62
|
-
prompt_library.extract_nodes.reflexion(context), MissedEntities
|
|
77
|
+
prompt_library.extract_nodes.reflexion(context), MissedEntities, group_id=group_id
|
|
63
78
|
)
|
|
64
79
|
missed_entities = llm_response.get('missed_entities', [])
|
|
65
80
|
|
|
@@ -108,7 +123,6 @@ async def extract_nodes(
|
|
|
108
123
|
'custom_prompt': custom_prompt,
|
|
109
124
|
'entity_types': entity_types_context,
|
|
110
125
|
'source_description': episode.source_description,
|
|
111
|
-
'ensure_ascii': clients.ensure_ascii,
|
|
112
126
|
}
|
|
113
127
|
|
|
114
128
|
while entities_missed and reflexion_iterations <= MAX_REFLEXION_ITERATIONS:
|
|
@@ -116,14 +130,19 @@ async def extract_nodes(
|
|
|
116
130
|
llm_response = await llm_client.generate_response(
|
|
117
131
|
prompt_library.extract_nodes.extract_message(context),
|
|
118
132
|
response_model=ExtractedEntities,
|
|
133
|
+
group_id=episode.group_id,
|
|
119
134
|
)
|
|
120
135
|
elif episode.source == EpisodeType.text:
|
|
121
136
|
llm_response = await llm_client.generate_response(
|
|
122
|
-
prompt_library.extract_nodes.extract_text(context),
|
|
137
|
+
prompt_library.extract_nodes.extract_text(context),
|
|
138
|
+
response_model=ExtractedEntities,
|
|
139
|
+
group_id=episode.group_id,
|
|
123
140
|
)
|
|
124
141
|
elif episode.source == EpisodeType.json:
|
|
125
142
|
llm_response = await llm_client.generate_response(
|
|
126
|
-
prompt_library.extract_nodes.extract_json(context),
|
|
143
|
+
prompt_library.extract_nodes.extract_json(context),
|
|
144
|
+
response_model=ExtractedEntities,
|
|
145
|
+
group_id=episode.group_id,
|
|
127
146
|
)
|
|
128
147
|
|
|
129
148
|
response_object = ExtractedEntities(**llm_response)
|
|
@@ -137,7 +156,7 @@ async def extract_nodes(
|
|
|
137
156
|
episode,
|
|
138
157
|
previous_episodes,
|
|
139
158
|
[entity.name for entity in extracted_entities],
|
|
140
|
-
|
|
159
|
+
episode.group_id,
|
|
141
160
|
)
|
|
142
161
|
|
|
143
162
|
entities_missed = len(missing_entities) != 0
|
|
@@ -178,20 +197,16 @@ async def extract_nodes(
|
|
|
178
197
|
logger.debug(f'Created new node: {new_node.name} (UUID: {new_node.uuid})')
|
|
179
198
|
|
|
180
199
|
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
200
|
+
|
|
181
201
|
return extracted_nodes
|
|
182
202
|
|
|
183
203
|
|
|
184
|
-
async def
|
|
204
|
+
async def _collect_candidate_nodes(
|
|
185
205
|
clients: GraphitiClients,
|
|
186
206
|
extracted_nodes: list[EntityNode],
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
existing_nodes_override: list[EntityNode] | None = None,
|
|
191
|
-
) -> tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]:
|
|
192
|
-
llm_client = clients.llm_client
|
|
193
|
-
driver = clients.driver
|
|
194
|
-
|
|
207
|
+
existing_nodes_override: list[EntityNode] | None,
|
|
208
|
+
) -> list[EntityNode]:
|
|
209
|
+
"""Search per extracted name and return unique candidates with overrides honored in order."""
|
|
195
210
|
search_results: list[SearchResults] = await semaphore_gather(
|
|
196
211
|
*[
|
|
197
212
|
search(
|
|
@@ -205,33 +220,43 @@ async def resolve_extracted_nodes(
|
|
|
205
220
|
]
|
|
206
221
|
)
|
|
207
222
|
|
|
208
|
-
candidate_nodes: list[EntityNode] =
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
)
|
|
223
|
+
candidate_nodes: list[EntityNode] = [node for result in search_results for node in result.nodes]
|
|
224
|
+
|
|
225
|
+
if existing_nodes_override is not None:
|
|
226
|
+
candidate_nodes.extend(existing_nodes_override)
|
|
213
227
|
|
|
214
|
-
|
|
228
|
+
seen_candidate_uuids: set[str] = set()
|
|
229
|
+
ordered_candidates: list[EntityNode] = []
|
|
230
|
+
for candidate in candidate_nodes:
|
|
231
|
+
if candidate.uuid in seen_candidate_uuids:
|
|
232
|
+
continue
|
|
233
|
+
seen_candidate_uuids.add(candidate.uuid)
|
|
234
|
+
ordered_candidates.append(candidate)
|
|
215
235
|
|
|
216
|
-
|
|
236
|
+
return ordered_candidates
|
|
217
237
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
238
|
+
|
|
239
|
+
async def _resolve_with_llm(
|
|
240
|
+
llm_client: LLMClient,
|
|
241
|
+
extracted_nodes: list[EntityNode],
|
|
242
|
+
indexes: DedupCandidateIndexes,
|
|
243
|
+
state: DedupResolutionState,
|
|
244
|
+
episode: EpisodicNode | None,
|
|
245
|
+
previous_episodes: list[EpisodicNode] | None,
|
|
246
|
+
entity_types: dict[str, type[BaseModel]] | None,
|
|
247
|
+
) -> None:
|
|
248
|
+
"""Escalate unresolved nodes to the dedupe prompt so the LLM can select or reject duplicates.
|
|
249
|
+
|
|
250
|
+
The guardrails below defensively ignore malformed or duplicate LLM responses so the
|
|
251
|
+
ingestion workflow remains deterministic even when the model misbehaves.
|
|
252
|
+
"""
|
|
253
|
+
if not state.unresolved_indices:
|
|
254
|
+
return
|
|
231
255
|
|
|
232
256
|
entity_types_dict: dict[str, type[BaseModel]] = entity_types if entity_types is not None else {}
|
|
233
257
|
|
|
234
|
-
|
|
258
|
+
llm_extracted_nodes = [extracted_nodes[i] for i in state.unresolved_indices]
|
|
259
|
+
|
|
235
260
|
extracted_nodes_context = [
|
|
236
261
|
{
|
|
237
262
|
'id': i,
|
|
@@ -242,17 +267,49 @@ async def resolve_extracted_nodes(
|
|
|
242
267
|
).__doc__
|
|
243
268
|
or 'Default Entity Type',
|
|
244
269
|
}
|
|
245
|
-
for i, node in enumerate(
|
|
270
|
+
for i, node in enumerate(llm_extracted_nodes)
|
|
271
|
+
]
|
|
272
|
+
|
|
273
|
+
sent_ids = [ctx['id'] for ctx in extracted_nodes_context]
|
|
274
|
+
logger.debug(
|
|
275
|
+
'Sending %d entities to LLM for deduplication with IDs 0-%d (actual IDs sent: %s)',
|
|
276
|
+
len(llm_extracted_nodes),
|
|
277
|
+
len(llm_extracted_nodes) - 1,
|
|
278
|
+
sent_ids if len(sent_ids) < 20 else f'{sent_ids[:10]}...{sent_ids[-10:]}',
|
|
279
|
+
)
|
|
280
|
+
if llm_extracted_nodes:
|
|
281
|
+
sample_size = min(3, len(extracted_nodes_context))
|
|
282
|
+
logger.debug(
|
|
283
|
+
'First %d entities: %s',
|
|
284
|
+
sample_size,
|
|
285
|
+
[(ctx['id'], ctx['name']) for ctx in extracted_nodes_context[:sample_size]],
|
|
286
|
+
)
|
|
287
|
+
if len(extracted_nodes_context) > 3:
|
|
288
|
+
logger.debug(
|
|
289
|
+
'Last %d entities: %s',
|
|
290
|
+
sample_size,
|
|
291
|
+
[(ctx['id'], ctx['name']) for ctx in extracted_nodes_context[-sample_size:]],
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
existing_nodes_context = [
|
|
295
|
+
{
|
|
296
|
+
**{
|
|
297
|
+
'idx': i,
|
|
298
|
+
'name': candidate.name,
|
|
299
|
+
'entity_types': candidate.labels,
|
|
300
|
+
},
|
|
301
|
+
**candidate.attributes,
|
|
302
|
+
}
|
|
303
|
+
for i, candidate in enumerate(indexes.existing_nodes)
|
|
246
304
|
]
|
|
247
305
|
|
|
248
306
|
context = {
|
|
249
307
|
'extracted_nodes': extracted_nodes_context,
|
|
250
308
|
'existing_nodes': existing_nodes_context,
|
|
251
309
|
'episode_content': episode.content if episode is not None else '',
|
|
252
|
-
'previous_episodes':
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
'ensure_ascii': clients.ensure_ascii,
|
|
310
|
+
'previous_episodes': (
|
|
311
|
+
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
|
312
|
+
),
|
|
256
313
|
}
|
|
257
314
|
|
|
258
315
|
llm_response = await llm_client.generate_response(
|
|
@@ -262,41 +319,127 @@ async def resolve_extracted_nodes(
|
|
|
262
319
|
|
|
263
320
|
node_resolutions: list[NodeDuplicate] = NodeResolutions(**llm_response).entity_resolutions
|
|
264
321
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
322
|
+
valid_relative_range = range(len(state.unresolved_indices))
|
|
323
|
+
processed_relative_ids: set[int] = set()
|
|
324
|
+
|
|
325
|
+
received_ids = {r.id for r in node_resolutions}
|
|
326
|
+
expected_ids = set(valid_relative_range)
|
|
327
|
+
missing_ids = expected_ids - received_ids
|
|
328
|
+
extra_ids = received_ids - expected_ids
|
|
329
|
+
|
|
330
|
+
logger.debug(
|
|
331
|
+
'Received %d resolutions for %d entities',
|
|
332
|
+
len(node_resolutions),
|
|
333
|
+
len(state.unresolved_indices),
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
if missing_ids:
|
|
337
|
+
logger.warning('LLM did not return resolutions for IDs: %s', sorted(missing_ids))
|
|
338
|
+
|
|
339
|
+
if extra_ids:
|
|
340
|
+
logger.warning(
|
|
341
|
+
'LLM returned invalid IDs outside valid range 0-%d: %s (all returned IDs: %s)',
|
|
342
|
+
len(state.unresolved_indices) - 1,
|
|
343
|
+
sorted(extra_ids),
|
|
344
|
+
sorted(received_ids),
|
|
345
|
+
)
|
|
346
|
+
|
|
268
347
|
for resolution in node_resolutions:
|
|
269
|
-
|
|
348
|
+
relative_id: int = resolution.id
|
|
270
349
|
duplicate_idx: int = resolution.duplicate_idx
|
|
271
350
|
|
|
272
|
-
|
|
351
|
+
if relative_id not in valid_relative_range:
|
|
352
|
+
logger.warning(
|
|
353
|
+
'Skipping invalid LLM dedupe id %d (valid range: 0-%d, received %d resolutions)',
|
|
354
|
+
relative_id,
|
|
355
|
+
len(state.unresolved_indices) - 1,
|
|
356
|
+
len(node_resolutions),
|
|
357
|
+
)
|
|
358
|
+
continue
|
|
273
359
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
360
|
+
if relative_id in processed_relative_ids:
|
|
361
|
+
logger.warning('Duplicate LLM dedupe id %s received; ignoring.', relative_id)
|
|
362
|
+
continue
|
|
363
|
+
processed_relative_ids.add(relative_id)
|
|
364
|
+
|
|
365
|
+
original_index = state.unresolved_indices[relative_id]
|
|
366
|
+
extracted_node = extracted_nodes[original_index]
|
|
367
|
+
|
|
368
|
+
resolved_node: EntityNode
|
|
369
|
+
if duplicate_idx == -1:
|
|
370
|
+
resolved_node = extracted_node
|
|
371
|
+
elif 0 <= duplicate_idx < len(indexes.existing_nodes):
|
|
372
|
+
resolved_node = indexes.existing_nodes[duplicate_idx]
|
|
373
|
+
else:
|
|
374
|
+
logger.warning(
|
|
375
|
+
'Invalid duplicate_idx %s for extracted node %s; treating as no duplicate.',
|
|
376
|
+
duplicate_idx,
|
|
377
|
+
extracted_node.uuid,
|
|
378
|
+
)
|
|
379
|
+
resolved_node = extracted_node
|
|
380
|
+
|
|
381
|
+
state.resolved_nodes[original_index] = resolved_node
|
|
382
|
+
state.uuid_map[extracted_node.uuid] = resolved_node.uuid
|
|
383
|
+
if resolved_node.uuid != extracted_node.uuid:
|
|
384
|
+
state.duplicate_pairs.append((extracted_node, resolved_node))
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
async def resolve_extracted_nodes(
|
|
388
|
+
clients: GraphitiClients,
|
|
389
|
+
extracted_nodes: list[EntityNode],
|
|
390
|
+
episode: EpisodicNode | None = None,
|
|
391
|
+
previous_episodes: list[EpisodicNode] | None = None,
|
|
392
|
+
entity_types: dict[str, type[BaseModel]] | None = None,
|
|
393
|
+
existing_nodes_override: list[EntityNode] | None = None,
|
|
394
|
+
) -> tuple[list[EntityNode], dict[str, str], list[tuple[EntityNode, EntityNode]]]:
|
|
395
|
+
"""Search for existing nodes, resolve deterministic matches, then escalate holdouts to the LLM dedupe prompt."""
|
|
396
|
+
llm_client = clients.llm_client
|
|
397
|
+
driver = clients.driver
|
|
398
|
+
existing_nodes = await _collect_candidate_nodes(
|
|
399
|
+
clients,
|
|
400
|
+
extracted_nodes,
|
|
401
|
+
existing_nodes_override,
|
|
402
|
+
)
|
|
279
403
|
|
|
280
|
-
|
|
404
|
+
indexes: DedupCandidateIndexes = _build_candidate_indexes(existing_nodes)
|
|
281
405
|
|
|
282
|
-
|
|
283
|
-
|
|
406
|
+
state = DedupResolutionState(
|
|
407
|
+
resolved_nodes=[None] * len(extracted_nodes),
|
|
408
|
+
uuid_map={},
|
|
409
|
+
unresolved_indices=[],
|
|
410
|
+
)
|
|
284
411
|
|
|
285
|
-
|
|
286
|
-
if duplicate_idx not in duplicates and duplicate_idx > -1:
|
|
287
|
-
duplicates.append(duplicate_idx)
|
|
288
|
-
for idx in duplicates:
|
|
289
|
-
existing_node = existing_nodes[idx] if idx < len(existing_nodes) else resolved_node
|
|
412
|
+
_resolve_with_similarity(extracted_nodes, indexes, state)
|
|
290
413
|
|
|
291
|
-
|
|
414
|
+
await _resolve_with_llm(
|
|
415
|
+
llm_client,
|
|
416
|
+
extracted_nodes,
|
|
417
|
+
indexes,
|
|
418
|
+
state,
|
|
419
|
+
episode,
|
|
420
|
+
previous_episodes,
|
|
421
|
+
entity_types,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
for idx, node in enumerate(extracted_nodes):
|
|
425
|
+
if state.resolved_nodes[idx] is None:
|
|
426
|
+
state.resolved_nodes[idx] = node
|
|
427
|
+
state.uuid_map[node.uuid] = node.uuid
|
|
292
428
|
|
|
293
|
-
logger.debug(
|
|
429
|
+
logger.debug(
|
|
430
|
+
'Resolved nodes: %s',
|
|
431
|
+
[(node.name, node.uuid) for node in state.resolved_nodes if node is not None],
|
|
432
|
+
)
|
|
294
433
|
|
|
295
434
|
new_node_duplicates: list[
|
|
296
435
|
tuple[EntityNode, EntityNode]
|
|
297
|
-
] = await filter_existing_duplicate_of_edges(driver,
|
|
436
|
+
] = await filter_existing_duplicate_of_edges(driver, state.duplicate_pairs)
|
|
298
437
|
|
|
299
|
-
return
|
|
438
|
+
return (
|
|
439
|
+
[node for node in state.resolved_nodes if node is not None],
|
|
440
|
+
state.uuid_map,
|
|
441
|
+
new_node_duplicates,
|
|
442
|
+
)
|
|
300
443
|
|
|
301
444
|
|
|
302
445
|
async def extract_attributes_from_nodes(
|
|
@@ -305,6 +448,7 @@ async def extract_attributes_from_nodes(
|
|
|
305
448
|
episode: EpisodicNode | None = None,
|
|
306
449
|
previous_episodes: list[EpisodicNode] | None = None,
|
|
307
450
|
entity_types: dict[str, type[BaseModel]] | None = None,
|
|
451
|
+
should_summarize_node: NodeSummaryFilter | None = None,
|
|
308
452
|
) -> list[EntityNode]:
|
|
309
453
|
llm_client = clients.llm_client
|
|
310
454
|
embedder = clients.embedder
|
|
@@ -315,10 +459,12 @@ async def extract_attributes_from_nodes(
|
|
|
315
459
|
node,
|
|
316
460
|
episode,
|
|
317
461
|
previous_episodes,
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
462
|
+
(
|
|
463
|
+
entity_types.get(next((item for item in node.labels if item != 'Entity'), ''))
|
|
464
|
+
if entity_types is not None
|
|
465
|
+
else None
|
|
466
|
+
),
|
|
467
|
+
should_summarize_node,
|
|
322
468
|
)
|
|
323
469
|
for node in nodes
|
|
324
470
|
]
|
|
@@ -335,7 +481,7 @@ async def extract_attributes_from_node(
|
|
|
335
481
|
episode: EpisodicNode | None = None,
|
|
336
482
|
previous_episodes: list[EpisodicNode] | None = None,
|
|
337
483
|
entity_type: type[BaseModel] | None = None,
|
|
338
|
-
|
|
484
|
+
should_summarize_node: NodeSummaryFilter | None = None,
|
|
339
485
|
) -> EntityNode:
|
|
340
486
|
node_context: dict[str, Any] = {
|
|
341
487
|
'name': node.name,
|
|
@@ -347,43 +493,53 @@ async def extract_attributes_from_node(
|
|
|
347
493
|
attributes_context: dict[str, Any] = {
|
|
348
494
|
'node': node_context,
|
|
349
495
|
'episode_content': episode.content if episode is not None else '',
|
|
350
|
-
'previous_episodes':
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
'ensure_ascii': ensure_ascii,
|
|
496
|
+
'previous_episodes': (
|
|
497
|
+
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
|
498
|
+
),
|
|
354
499
|
}
|
|
355
500
|
|
|
356
501
|
summary_context: dict[str, Any] = {
|
|
357
502
|
'node': node_context,
|
|
358
503
|
'episode_content': episode.content if episode is not None else '',
|
|
359
|
-
'previous_episodes':
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
'ensure_ascii': ensure_ascii,
|
|
504
|
+
'previous_episodes': (
|
|
505
|
+
[ep.content for ep in previous_episodes] if previous_episodes is not None else []
|
|
506
|
+
),
|
|
363
507
|
}
|
|
364
508
|
|
|
509
|
+
has_entity_attributes: bool = bool(
|
|
510
|
+
entity_type is not None and len(entity_type.model_fields) != 0
|
|
511
|
+
)
|
|
512
|
+
|
|
365
513
|
llm_response = (
|
|
366
514
|
(
|
|
367
515
|
await llm_client.generate_response(
|
|
368
516
|
prompt_library.extract_nodes.extract_attributes(attributes_context),
|
|
369
517
|
response_model=entity_type,
|
|
370
518
|
model_size=ModelSize.small,
|
|
519
|
+
group_id=node.group_id,
|
|
371
520
|
)
|
|
372
521
|
)
|
|
373
|
-
if
|
|
522
|
+
if has_entity_attributes
|
|
374
523
|
else {}
|
|
375
524
|
)
|
|
376
525
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
526
|
+
# Determine if summary should be generated
|
|
527
|
+
generate_summary = True
|
|
528
|
+
if should_summarize_node is not None:
|
|
529
|
+
generate_summary = await should_summarize_node(node)
|
|
530
|
+
|
|
531
|
+
# Conditionally generate summary
|
|
532
|
+
if generate_summary:
|
|
533
|
+
summary_response = await llm_client.generate_response(
|
|
534
|
+
prompt_library.extract_nodes.extract_summary(summary_context),
|
|
535
|
+
response_model=EntitySummary,
|
|
536
|
+
model_size=ModelSize.small,
|
|
537
|
+
group_id=node.group_id,
|
|
538
|
+
)
|
|
539
|
+
node.summary = summary_response.get('summary', '')
|
|
382
540
|
|
|
383
|
-
if entity_type is not None:
|
|
541
|
+
if has_entity_attributes and entity_type is not None:
|
|
384
542
|
entity_type(**llm_response)
|
|
385
|
-
|
|
386
|
-
node.summary = summary_response.get('summary', '')
|
|
387
543
|
node_attributes = {key: value for key, value in llm_response.items()}
|
|
388
544
|
|
|
389
545
|
node.attributes.update(node_attributes)
|
|
@@ -35,14 +35,12 @@ async def extract_edge_dates(
|
|
|
35
35
|
edge: EntityEdge,
|
|
36
36
|
current_episode: EpisodicNode,
|
|
37
37
|
previous_episodes: list[EpisodicNode],
|
|
38
|
-
ensure_ascii: bool = False,
|
|
39
38
|
) -> tuple[datetime | None, datetime | None]:
|
|
40
39
|
context = {
|
|
41
40
|
'edge_fact': edge.fact,
|
|
42
41
|
'current_episode': current_episode.content,
|
|
43
42
|
'previous_episodes': [ep.content for ep in previous_episodes],
|
|
44
43
|
'reference_timestamp': current_episode.valid_at.isoformat(),
|
|
45
|
-
'ensure_ascii': ensure_ascii,
|
|
46
44
|
}
|
|
47
45
|
llm_response = await llm_client.generate_response(
|
|
48
46
|
prompt_library.extract_edge_dates.v1(context), response_model=EdgeDates
|
|
@@ -75,7 +73,6 @@ async def get_edge_contradictions(
|
|
|
75
73
|
llm_client: LLMClient,
|
|
76
74
|
new_edge: EntityEdge,
|
|
77
75
|
existing_edges: list[EntityEdge],
|
|
78
|
-
ensure_ascii: bool = False,
|
|
79
76
|
) -> list[EntityEdge]:
|
|
80
77
|
start = time()
|
|
81
78
|
|
|
@@ -87,7 +84,6 @@ async def get_edge_contradictions(
|
|
|
87
84
|
context = {
|
|
88
85
|
'new_edge': new_edge_context,
|
|
89
86
|
'existing_edges': existing_edge_context,
|
|
90
|
-
'ensure_ascii': ensure_ascii,
|
|
91
87
|
}
|
|
92
88
|
|
|
93
89
|
llm_response = await llm_client.generate_response(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: graphiti-core
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.21.0
|
|
4
4
|
Summary: A temporal graph building library
|
|
5
5
|
Project-URL: Homepage, https://help.getzep.com/graphiti/graphiti/overview
|
|
6
6
|
Project-URL: Repository, https://github.com/getzep/graphiti
|
|
@@ -20,6 +20,7 @@ Provides-Extra: anthropic
|
|
|
20
20
|
Requires-Dist: anthropic>=0.49.0; extra == 'anthropic'
|
|
21
21
|
Provides-Extra: dev
|
|
22
22
|
Requires-Dist: anthropic>=0.49.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: boto3>=1.39.16; extra == 'dev'
|
|
23
24
|
Requires-Dist: diskcache-stubs>=5.6.3.6.20240818; extra == 'dev'
|
|
24
25
|
Requires-Dist: falkordb<2.0.0,>=1.1.2; extra == 'dev'
|
|
25
26
|
Requires-Dist: google-genai>=1.8.0; extra == 'dev'
|
|
@@ -28,9 +29,11 @@ Requires-Dist: ipykernel>=6.29.5; extra == 'dev'
|
|
|
28
29
|
Requires-Dist: jupyterlab>=4.2.4; extra == 'dev'
|
|
29
30
|
Requires-Dist: kuzu>=0.11.2; extra == 'dev'
|
|
30
31
|
Requires-Dist: langchain-anthropic>=0.2.4; extra == 'dev'
|
|
32
|
+
Requires-Dist: langchain-aws>=0.2.29; extra == 'dev'
|
|
31
33
|
Requires-Dist: langchain-openai>=0.2.6; extra == 'dev'
|
|
32
34
|
Requires-Dist: langgraph>=0.2.15; extra == 'dev'
|
|
33
35
|
Requires-Dist: langsmith>=0.1.108; extra == 'dev'
|
|
36
|
+
Requires-Dist: opensearch-py>=3.0.0; extra == 'dev'
|
|
34
37
|
Requires-Dist: pyright>=1.1.404; extra == 'dev'
|
|
35
38
|
Requires-Dist: pytest-asyncio>=0.24.0; extra == 'dev'
|
|
36
39
|
Requires-Dist: pytest-xdist>=3.6.1; extra == 'dev'
|
|
@@ -47,6 +50,9 @@ Provides-Extra: groq
|
|
|
47
50
|
Requires-Dist: groq>=0.2.0; extra == 'groq'
|
|
48
51
|
Provides-Extra: kuzu
|
|
49
52
|
Requires-Dist: kuzu>=0.11.2; extra == 'kuzu'
|
|
53
|
+
Provides-Extra: neo4j-opensearch
|
|
54
|
+
Requires-Dist: boto3>=1.39.16; extra == 'neo4j-opensearch'
|
|
55
|
+
Requires-Dist: opensearch-py>=3.0.0; extra == 'neo4j-opensearch'
|
|
50
56
|
Provides-Extra: neptune
|
|
51
57
|
Requires-Dist: boto3>=1.39.16; extra == 'neptune'
|
|
52
58
|
Requires-Dist: langchain-aws>=0.2.29; extra == 'neptune'
|