graphiti-core 0.3.9__tar.gz → 0.3.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of graphiti-core might be problematic. Click here for more details.
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/PKG-INFO +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/edges.py +5 -5
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/embedder/openai.py +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/embedder/voyage.py +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/errors.py +8 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/graphiti.py +11 -10
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/nodes.py +12 -6
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/search/search.py +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/search/search_config_recipes.py +3 -2
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/search/search_utils.py +60 -65
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/community_operations.py +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/edge_operations.py +5 -5
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/node_operations.py +5 -5
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/temporal_operations.py +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/pyproject.toml +1 -1
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/LICENSE +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/README.md +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/embedder/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/embedder/client.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/helpers.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/anthropic_client.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/client.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/config.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/errors.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/groq_client.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/openai_client.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/llm_client/utils.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/dedupe_edges.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/dedupe_nodes.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/eval.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/extract_edge_dates.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/extract_edges.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/extract_nodes.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/invalidate_edges.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/lib.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/models.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/prompts/summarize_nodes.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/py.typed +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/search/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/search/search_config.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/bulk_utils.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/__init__.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/graph_data_operations.py +0 -0
- {graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/utils.py +0 -0
|
@@ -51,7 +51,7 @@ class Edge(BaseModel, ABC):
|
|
|
51
51
|
uuid=self.uuid,
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
-
logger.
|
|
54
|
+
logger.debug(f'Deleted Edge: {self.uuid}')
|
|
55
55
|
|
|
56
56
|
return result
|
|
57
57
|
|
|
@@ -83,7 +83,7 @@ class EpisodicEdge(Edge):
|
|
|
83
83
|
created_at=self.created_at,
|
|
84
84
|
)
|
|
85
85
|
|
|
86
|
-
logger.
|
|
86
|
+
logger.debug(f'Saved edge to neo4j: {self.uuid}')
|
|
87
87
|
|
|
88
88
|
return result
|
|
89
89
|
|
|
@@ -178,7 +178,7 @@ class EntityEdge(Edge):
|
|
|
178
178
|
self.fact_embedding = await embedder.create(input=[text])
|
|
179
179
|
|
|
180
180
|
end = time()
|
|
181
|
-
logger.
|
|
181
|
+
logger.debug(f'embedded {text} in {end - start} ms')
|
|
182
182
|
|
|
183
183
|
return self.fact_embedding
|
|
184
184
|
|
|
@@ -206,7 +206,7 @@ class EntityEdge(Edge):
|
|
|
206
206
|
invalid_at=self.invalid_at,
|
|
207
207
|
)
|
|
208
208
|
|
|
209
|
-
logger.
|
|
209
|
+
logger.debug(f'Saved edge to neo4j: {self.uuid}')
|
|
210
210
|
|
|
211
211
|
return result
|
|
212
212
|
|
|
@@ -313,7 +313,7 @@ class CommunityEdge(Edge):
|
|
|
313
313
|
created_at=self.created_at,
|
|
314
314
|
)
|
|
315
315
|
|
|
316
|
-
logger.
|
|
316
|
+
logger.debug(f'Saved edge to neo4j: {self.uuid}')
|
|
317
317
|
|
|
318
318
|
return result
|
|
319
319
|
|
|
@@ -42,7 +42,7 @@ class OpenAIEmbedder(EmbedderClient):
|
|
|
42
42
|
self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
|
|
43
43
|
|
|
44
44
|
async def create(
|
|
45
|
-
|
|
45
|
+
self, input: str | List[str] | Iterable[int] | Iterable[Iterable[int]]
|
|
46
46
|
) -> list[float]:
|
|
47
47
|
result = await self.client.embeddings.create(input=input, model=self.config.embedding_model)
|
|
48
48
|
return result.data[0].embedding[: self.config.embedding_dim]
|
|
@@ -41,7 +41,7 @@ class VoyageAIEmbedder(EmbedderClient):
|
|
|
41
41
|
self.client = voyageai.AsyncClient(api_key=config.api_key)
|
|
42
42
|
|
|
43
43
|
async def create(
|
|
44
|
-
|
|
44
|
+
self, input: str | List[str] | Iterable[int] | Iterable[Iterable[int]]
|
|
45
45
|
) -> list[float]:
|
|
46
46
|
result = await self.client.embed(input, model=self.config.embedding_model)
|
|
47
47
|
return result.embeddings[0][: self.config.embedding_dim]
|
|
@@ -35,6 +35,14 @@ class GroupsEdgesNotFoundError(GraphitiError):
|
|
|
35
35
|
super().__init__(self.message)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
class GroupsNodesNotFoundError(GraphitiError):
|
|
39
|
+
"""Raised when no nodes are found for a list of group ids."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, group_ids: list[str]):
|
|
42
|
+
self.message = f'no nodes found for group ids {group_ids}'
|
|
43
|
+
super().__init__(self.message)
|
|
44
|
+
|
|
45
|
+
|
|
38
46
|
class NodeNotFoundError(GraphitiError):
|
|
39
47
|
"""Raised when a node is not found."""
|
|
40
48
|
|
|
@@ -319,13 +319,11 @@ class Graphiti:
|
|
|
319
319
|
valid_at=reference_time,
|
|
320
320
|
)
|
|
321
321
|
episode.uuid = uuid if uuid is not None else episode.uuid
|
|
322
|
-
if not self.store_raw_episode_content:
|
|
323
|
-
episode.content = ''
|
|
324
322
|
|
|
325
323
|
# Extract entities as nodes
|
|
326
324
|
|
|
327
325
|
extracted_nodes = await extract_nodes(self.llm_client, episode, previous_episodes)
|
|
328
|
-
logger.
|
|
326
|
+
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
329
327
|
|
|
330
328
|
# Calculate Embeddings
|
|
331
329
|
|
|
@@ -340,7 +338,7 @@ class Graphiti:
|
|
|
340
338
|
)
|
|
341
339
|
)
|
|
342
340
|
|
|
343
|
-
logger.
|
|
341
|
+
logger.debug(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
|
|
344
342
|
|
|
345
343
|
(mentioned_nodes, uuid_map), extracted_edges = await asyncio.gather(
|
|
346
344
|
resolve_extracted_nodes(self.llm_client, extracted_nodes, existing_nodes_lists),
|
|
@@ -348,7 +346,7 @@ class Graphiti:
|
|
|
348
346
|
self.llm_client, episode, extracted_nodes, previous_episodes, group_id
|
|
349
347
|
),
|
|
350
348
|
)
|
|
351
|
-
logger.
|
|
349
|
+
logger.debug(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
|
|
352
350
|
nodes = mentioned_nodes
|
|
353
351
|
|
|
354
352
|
extracted_edges_with_resolved_pointers = resolve_edge_pointers(
|
|
@@ -378,10 +376,10 @@ class Graphiti:
|
|
|
378
376
|
]
|
|
379
377
|
)
|
|
380
378
|
)
|
|
381
|
-
logger.
|
|
379
|
+
logger.debug(
|
|
382
380
|
f'Related edges lists: {[(e.name, e.uuid) for edges_lst in related_edges_list for e in edges_lst]}'
|
|
383
381
|
)
|
|
384
|
-
logger.
|
|
382
|
+
logger.debug(
|
|
385
383
|
f'Extracted edges: {[(e.name, e.uuid) for e in extracted_edges_with_resolved_pointers]}'
|
|
386
384
|
)
|
|
387
385
|
|
|
@@ -433,15 +431,18 @@ class Graphiti:
|
|
|
433
431
|
|
|
434
432
|
entity_edges.extend(resolved_edges + invalidated_edges)
|
|
435
433
|
|
|
436
|
-
logger.
|
|
434
|
+
logger.debug(f'Resolved edges: {[(e.name, e.uuid) for e in resolved_edges]}')
|
|
437
435
|
|
|
438
436
|
episodic_edges: list[EpisodicEdge] = build_episodic_edges(mentioned_nodes, episode, now)
|
|
439
437
|
|
|
440
|
-
logger.
|
|
438
|
+
logger.debug(f'Built episodic edges: {episodic_edges}')
|
|
441
439
|
|
|
442
440
|
episode.entity_edges = [edge.uuid for edge in entity_edges]
|
|
443
441
|
|
|
444
442
|
# Future optimization would be using batch operations to save nodes and edges
|
|
443
|
+
if not self.store_raw_episode_content:
|
|
444
|
+
episode.content = ''
|
|
445
|
+
|
|
445
446
|
await episode.save(self.driver)
|
|
446
447
|
await asyncio.gather(*[node.save(self.driver) for node in nodes])
|
|
447
448
|
await asyncio.gather(*[edge.save(self.driver) for edge in episodic_edges])
|
|
@@ -563,7 +564,7 @@ class Graphiti:
|
|
|
563
564
|
edges = await dedupe_edges_bulk(
|
|
564
565
|
self.driver, self.llm_client, extracted_edges_with_resolved_pointers
|
|
565
566
|
)
|
|
566
|
-
logger.
|
|
567
|
+
logger.debug(f'extracted edge length: {len(edges)}')
|
|
567
568
|
|
|
568
569
|
# invalidate edges
|
|
569
570
|
|
|
@@ -86,7 +86,7 @@ class Node(BaseModel, ABC):
|
|
|
86
86
|
uuid=self.uuid,
|
|
87
87
|
)
|
|
88
88
|
|
|
89
|
-
logger.
|
|
89
|
+
logger.debug(f'Deleted Node: {self.uuid}')
|
|
90
90
|
|
|
91
91
|
return result
|
|
92
92
|
|
|
@@ -135,7 +135,7 @@ class EpisodicNode(Node):
|
|
|
135
135
|
source=self.source.value,
|
|
136
136
|
)
|
|
137
137
|
|
|
138
|
-
logger.
|
|
138
|
+
logger.debug(f'Saved Node to neo4j: {self.uuid}')
|
|
139
139
|
|
|
140
140
|
return result
|
|
141
141
|
|
|
@@ -217,7 +217,7 @@ class EntityNode(Node):
|
|
|
217
217
|
text = self.name.replace('\n', ' ')
|
|
218
218
|
self.name_embedding = await embedder.create(input=[text])
|
|
219
219
|
end = time()
|
|
220
|
-
logger.
|
|
220
|
+
logger.debug(f'embedded {text} in {end - start} ms')
|
|
221
221
|
|
|
222
222
|
return self.name_embedding
|
|
223
223
|
|
|
@@ -236,7 +236,7 @@ class EntityNode(Node):
|
|
|
236
236
|
created_at=self.created_at,
|
|
237
237
|
)
|
|
238
238
|
|
|
239
|
-
logger.
|
|
239
|
+
logger.debug(f'Saved Node to neo4j: {self.uuid}')
|
|
240
240
|
|
|
241
241
|
return result
|
|
242
242
|
|
|
@@ -258,6 +258,9 @@ class EntityNode(Node):
|
|
|
258
258
|
|
|
259
259
|
nodes = [get_entity_node_from_record(record) for record in records]
|
|
260
260
|
|
|
261
|
+
if len(nodes) == 0:
|
|
262
|
+
raise NodeNotFoundError(uuid)
|
|
263
|
+
|
|
261
264
|
return nodes[0]
|
|
262
265
|
|
|
263
266
|
@classmethod
|
|
@@ -320,7 +323,7 @@ class CommunityNode(Node):
|
|
|
320
323
|
created_at=self.created_at,
|
|
321
324
|
)
|
|
322
325
|
|
|
323
|
-
logger.
|
|
326
|
+
logger.debug(f'Saved Node to neo4j: {self.uuid}')
|
|
324
327
|
|
|
325
328
|
return result
|
|
326
329
|
|
|
@@ -329,7 +332,7 @@ class CommunityNode(Node):
|
|
|
329
332
|
text = self.name.replace('\n', ' ')
|
|
330
333
|
self.name_embedding = await embedder.create(input=[text])
|
|
331
334
|
end = time()
|
|
332
|
-
logger.
|
|
335
|
+
logger.debug(f'embedded {text} in {end - start} ms')
|
|
333
336
|
|
|
334
337
|
return self.name_embedding
|
|
335
338
|
|
|
@@ -351,6 +354,9 @@ class CommunityNode(Node):
|
|
|
351
354
|
|
|
352
355
|
nodes = [get_community_node_from_record(record) for record in records]
|
|
353
356
|
|
|
357
|
+
if len(nodes) == 0:
|
|
358
|
+
raise NodeNotFoundError(uuid)
|
|
359
|
+
|
|
354
360
|
return nodes[0]
|
|
355
361
|
|
|
356
362
|
@classmethod
|
|
@@ -160,7 +160,7 @@ async def edge_search(
|
|
|
160
160
|
for edge in sorted_results:
|
|
161
161
|
source_to_edge_uuid_map[edge.source_node_uuid].append(edge.uuid)
|
|
162
162
|
|
|
163
|
-
source_uuids = [
|
|
163
|
+
source_uuids = [source_node_uuid for source_node_uuid in source_to_edge_uuid_map]
|
|
164
164
|
|
|
165
165
|
reranked_node_uuids = await node_distance_reranker(driver, source_uuids, center_node_uuid)
|
|
166
166
|
|
|
@@ -68,7 +68,7 @@ EDGE_HYBRID_SEARCH_RRF = SearchConfig(
|
|
|
68
68
|
)
|
|
69
69
|
|
|
70
70
|
# performs a hybrid search over edges with mmr reranking
|
|
71
|
-
|
|
71
|
+
EDGE_HYBRID_SEARCH_MMR = SearchConfig(
|
|
72
72
|
edge_config=EdgeSearchConfig(
|
|
73
73
|
search_methods=[EdgeSearchMethod.bm25, EdgeSearchMethod.cosine_similarity],
|
|
74
74
|
reranker=EdgeReranker.mmr,
|
|
@@ -80,7 +80,8 @@ EDGE_HYBRID_SEARCH_NODE_DISTANCE = SearchConfig(
|
|
|
80
80
|
edge_config=EdgeSearchConfig(
|
|
81
81
|
search_methods=[EdgeSearchMethod.bm25, EdgeSearchMethod.cosine_similarity],
|
|
82
82
|
reranker=EdgeReranker.node_distance,
|
|
83
|
-
)
|
|
83
|
+
),
|
|
84
|
+
limit=30,
|
|
84
85
|
)
|
|
85
86
|
|
|
86
87
|
# performs a hybrid search over edges with episode mention reranking
|
|
@@ -49,17 +49,17 @@ def fulltext_query(query: str, group_ids: list[str] | None = None):
|
|
|
49
49
|
|
|
50
50
|
group_ids_filter += ' AND ' if group_ids_filter else ''
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
full_query = group_ids_filter +
|
|
52
|
+
lucene_query = lucene_sanitize(query)
|
|
53
|
+
full_query = group_ids_filter + lucene_query
|
|
54
54
|
|
|
55
55
|
return full_query
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
async def get_episodes_by_mentions(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
59
|
+
driver: AsyncDriver,
|
|
60
|
+
nodes: list[EntityNode],
|
|
61
|
+
edges: list[EntityEdge],
|
|
62
|
+
limit: int = RELEVANT_SCHEMA_LIMIT,
|
|
63
63
|
) -> list[EpisodicNode]:
|
|
64
64
|
episode_uuids: list[str] = []
|
|
65
65
|
for edge in edges:
|
|
@@ -71,7 +71,7 @@ async def get_episodes_by_mentions(
|
|
|
71
71
|
|
|
72
72
|
|
|
73
73
|
async def get_mentioned_nodes(
|
|
74
|
-
|
|
74
|
+
driver: AsyncDriver, episodes: list[EpisodicNode]
|
|
75
75
|
) -> list[EntityNode]:
|
|
76
76
|
episode_uuids = [episode.uuid for episode in episodes]
|
|
77
77
|
records, _, _ = await driver.execute_query(
|
|
@@ -94,7 +94,7 @@ async def get_mentioned_nodes(
|
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
async def get_communities_by_nodes(
|
|
97
|
-
|
|
97
|
+
driver: AsyncDriver, nodes: list[EntityNode]
|
|
98
98
|
) -> list[CommunityNode]:
|
|
99
99
|
node_uuids = [node.uuid for node in nodes]
|
|
100
100
|
records, _, _ = await driver.execute_query(
|
|
@@ -117,12 +117,12 @@ async def get_communities_by_nodes(
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
async def edge_fulltext_search(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
120
|
+
driver: AsyncDriver,
|
|
121
|
+
query: str,
|
|
122
|
+
source_node_uuid: str | None,
|
|
123
|
+
target_node_uuid: str | None,
|
|
124
|
+
group_ids: list[str] | None = None,
|
|
125
|
+
limit=RELEVANT_SCHEMA_LIMIT,
|
|
126
126
|
) -> list[EntityEdge]:
|
|
127
127
|
# fulltext search over facts
|
|
128
128
|
fuzzy_query = fulltext_query(query, group_ids)
|
|
@@ -162,13 +162,13 @@ async def edge_fulltext_search(
|
|
|
162
162
|
|
|
163
163
|
|
|
164
164
|
async def edge_similarity_search(
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
165
|
+
driver: AsyncDriver,
|
|
166
|
+
search_vector: list[float],
|
|
167
|
+
source_node_uuid: str | None,
|
|
168
|
+
target_node_uuid: str | None,
|
|
169
|
+
group_ids: list[str] | None = None,
|
|
170
|
+
limit: int = RELEVANT_SCHEMA_LIMIT,
|
|
171
|
+
min_score: float = DEFAULT_MIN_SCORE,
|
|
172
172
|
) -> list[EntityEdge]:
|
|
173
173
|
# vector similarity search over embedded facts
|
|
174
174
|
query = Query("""
|
|
@@ -212,10 +212,10 @@ async def edge_similarity_search(
|
|
|
212
212
|
|
|
213
213
|
|
|
214
214
|
async def node_fulltext_search(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
215
|
+
driver: AsyncDriver,
|
|
216
|
+
query: str,
|
|
217
|
+
group_ids: list[str] | None = None,
|
|
218
|
+
limit=RELEVANT_SCHEMA_LIMIT,
|
|
219
219
|
) -> list[EntityNode]:
|
|
220
220
|
# BM25 search to get top nodes
|
|
221
221
|
fuzzy_query = fulltext_query(query, group_ids)
|
|
@@ -244,11 +244,11 @@ async def node_fulltext_search(
|
|
|
244
244
|
|
|
245
245
|
|
|
246
246
|
async def node_similarity_search(
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
247
|
+
driver: AsyncDriver,
|
|
248
|
+
search_vector: list[float],
|
|
249
|
+
group_ids: list[str] | None = None,
|
|
250
|
+
limit=RELEVANT_SCHEMA_LIMIT,
|
|
251
|
+
min_score: float = DEFAULT_MIN_SCORE,
|
|
252
252
|
) -> list[EntityNode]:
|
|
253
253
|
# vector similarity search over entity names
|
|
254
254
|
records, _, _ = await driver.execute_query(
|
|
@@ -279,10 +279,10 @@ async def node_similarity_search(
|
|
|
279
279
|
|
|
280
280
|
|
|
281
281
|
async def community_fulltext_search(
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
282
|
+
driver: AsyncDriver,
|
|
283
|
+
query: str,
|
|
284
|
+
group_ids: list[str] | None = None,
|
|
285
|
+
limit=RELEVANT_SCHEMA_LIMIT,
|
|
286
286
|
) -> list[CommunityNode]:
|
|
287
287
|
# BM25 search to get top communities
|
|
288
288
|
fuzzy_query = fulltext_query(query, group_ids)
|
|
@@ -311,11 +311,11 @@ async def community_fulltext_search(
|
|
|
311
311
|
|
|
312
312
|
|
|
313
313
|
async def community_similarity_search(
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
314
|
+
driver: AsyncDriver,
|
|
315
|
+
search_vector: list[float],
|
|
316
|
+
group_ids: list[str] | None = None,
|
|
317
|
+
limit=RELEVANT_SCHEMA_LIMIT,
|
|
318
|
+
min_score=DEFAULT_MIN_SCORE,
|
|
319
319
|
) -> list[CommunityNode]:
|
|
320
320
|
# vector similarity search over entity names
|
|
321
321
|
records, _, _ = await driver.execute_query(
|
|
@@ -346,11 +346,11 @@ async def community_similarity_search(
|
|
|
346
346
|
|
|
347
347
|
|
|
348
348
|
async def hybrid_node_search(
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
349
|
+
queries: list[str],
|
|
350
|
+
embeddings: list[list[float]],
|
|
351
|
+
driver: AsyncDriver,
|
|
352
|
+
group_ids: list[str] | None = None,
|
|
353
|
+
limit: int = RELEVANT_SCHEMA_LIMIT,
|
|
354
354
|
) -> list[EntityNode]:
|
|
355
355
|
"""
|
|
356
356
|
Perform a hybrid search for nodes using both text queries and embeddings.
|
|
@@ -408,13 +408,13 @@ async def hybrid_node_search(
|
|
|
408
408
|
relevant_nodes: list[EntityNode] = [node_uuid_map[uuid] for uuid in ranked_uuids]
|
|
409
409
|
|
|
410
410
|
end = time()
|
|
411
|
-
logger.
|
|
411
|
+
logger.debug(f'Found relevant nodes: {ranked_uuids} in {(end - start) * 1000} ms')
|
|
412
412
|
return relevant_nodes
|
|
413
413
|
|
|
414
414
|
|
|
415
415
|
async def get_relevant_nodes(
|
|
416
|
-
|
|
417
|
-
|
|
416
|
+
nodes: list[EntityNode],
|
|
417
|
+
driver: AsyncDriver,
|
|
418
418
|
) -> list[EntityNode]:
|
|
419
419
|
"""
|
|
420
420
|
Retrieve relevant nodes based on the provided list of EntityNodes.
|
|
@@ -451,11 +451,11 @@ async def get_relevant_nodes(
|
|
|
451
451
|
|
|
452
452
|
|
|
453
453
|
async def get_relevant_edges(
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
454
|
+
driver: AsyncDriver,
|
|
455
|
+
edges: list[EntityEdge],
|
|
456
|
+
source_node_uuid: str | None,
|
|
457
|
+
target_node_uuid: str | None,
|
|
458
|
+
limit: int = RELEVANT_SCHEMA_LIMIT,
|
|
459
459
|
) -> list[EntityEdge]:
|
|
460
460
|
start = time()
|
|
461
461
|
relevant_edges: list[EntityEdge] = []
|
|
@@ -491,7 +491,7 @@ async def get_relevant_edges(
|
|
|
491
491
|
relevant_edges.append(edge)
|
|
492
492
|
|
|
493
493
|
end = time()
|
|
494
|
-
logger.
|
|
494
|
+
logger.debug(f'Found relevant edges: {relevant_edge_uuids} in {(end - start) * 1000} ms')
|
|
495
495
|
|
|
496
496
|
return relevant_edges
|
|
497
497
|
|
|
@@ -512,7 +512,7 @@ def rrf(results: list[list[str]], rank_const=1) -> list[str]:
|
|
|
512
512
|
|
|
513
513
|
|
|
514
514
|
async def node_distance_reranker(
|
|
515
|
-
|
|
515
|
+
driver: AsyncDriver, node_uuids: list[str], center_node_uuid: str
|
|
516
516
|
) -> list[str]:
|
|
517
517
|
# filter out node_uuid center node node uuid
|
|
518
518
|
filtered_uuids = list(filter(lambda uuid: uuid != center_node_uuid, node_uuids))
|
|
@@ -544,7 +544,7 @@ async def node_distance_reranker(
|
|
|
544
544
|
# rerank on shortest distance
|
|
545
545
|
filtered_uuids.sort(key=lambda cur_uuid: scores[cur_uuid])
|
|
546
546
|
|
|
547
|
-
# add back in filtered center
|
|
547
|
+
# add back in filtered center uuid
|
|
548
548
|
filtered_uuids = [center_node_uuid] + filtered_uuids
|
|
549
549
|
|
|
550
550
|
return filtered_uuids
|
|
@@ -582,19 +582,14 @@ async def episode_mentions_reranker(driver: AsyncDriver, node_uuids: list[list[s
|
|
|
582
582
|
|
|
583
583
|
|
|
584
584
|
def maximal_marginal_relevance(
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
585
|
+
query_vector: list[float],
|
|
586
|
+
candidates: list[tuple[str, list[float]]],
|
|
587
|
+
mmr_lambda: float = DEFAULT_MMR_LAMBDA,
|
|
588
588
|
):
|
|
589
589
|
candidates_with_mmr: list[tuple[str, float]] = []
|
|
590
590
|
for candidate in candidates:
|
|
591
|
-
max_sim = max(
|
|
592
|
-
|
|
593
|
-
np.dot(normalize_l2(candidate[1]), normalize_l2(c[1]))
|
|
594
|
-
for c in candidates
|
|
595
|
-
]
|
|
596
|
-
)
|
|
597
|
-
mmr = mmr_lambda * np.dot(candidate[1], query_vector) + (1 - mmr_lambda) * max_sim
|
|
591
|
+
max_sim = max([np.dot(normalize_l2(candidate[1]), normalize_l2(c[1])) for c in candidates])
|
|
592
|
+
mmr = mmr_lambda * np.dot(candidate[1], query_vector) - (1 - mmr_lambda) * max_sim
|
|
598
593
|
candidates_with_mmr.append((candidate[0], mmr))
|
|
599
594
|
|
|
600
595
|
candidates_with_mmr.sort(reverse=True, key=lambda c: c[1])
|
{graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/community_operations.py
RENAMED
|
@@ -179,7 +179,7 @@ async def build_community(
|
|
|
179
179
|
)
|
|
180
180
|
community_edges = build_community_edges(community_cluster, community_node, now)
|
|
181
181
|
|
|
182
|
-
logger.
|
|
182
|
+
logger.debug((community_node, community_edges))
|
|
183
183
|
|
|
184
184
|
return community_node, community_edges
|
|
185
185
|
|
{graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/edge_operations.py
RENAMED
|
@@ -97,7 +97,7 @@ async def extract_edges(
|
|
|
97
97
|
edges_data = llm_response.get('edges', [])
|
|
98
98
|
|
|
99
99
|
end = time()
|
|
100
|
-
logger.
|
|
100
|
+
logger.debug(f'Extracted new edges: {edges_data} in {(end - start) * 1000} ms')
|
|
101
101
|
|
|
102
102
|
# Convert the extracted data into EntityEdge objects
|
|
103
103
|
edges = []
|
|
@@ -115,7 +115,7 @@ async def extract_edges(
|
|
|
115
115
|
invalid_at=None,
|
|
116
116
|
)
|
|
117
117
|
edges.append(edge)
|
|
118
|
-
logger.
|
|
118
|
+
logger.debug(
|
|
119
119
|
f'Created new edge: {edge.name} from (UUID: {edge.source_node_uuid}) to (UUID: {edge.target_node_uuid})'
|
|
120
120
|
)
|
|
121
121
|
|
|
@@ -144,7 +144,7 @@ async def dedupe_extracted_edges(
|
|
|
144
144
|
|
|
145
145
|
llm_response = await llm_client.generate_response(prompt_library.dedupe_edges.v1(context))
|
|
146
146
|
duplicate_data = llm_response.get('duplicates', [])
|
|
147
|
-
logger.
|
|
147
|
+
logger.debug(f'Extracted unique edges: {duplicate_data}')
|
|
148
148
|
|
|
149
149
|
duplicate_uuid_map: dict[str, str] = {}
|
|
150
150
|
for duplicate in duplicate_data:
|
|
@@ -299,7 +299,7 @@ async def dedupe_extracted_edge(
|
|
|
299
299
|
edge = existing_edge
|
|
300
300
|
|
|
301
301
|
end = time()
|
|
302
|
-
logger.
|
|
302
|
+
logger.debug(
|
|
303
303
|
f'Resolved Edge: {extracted_edge.name} is {edge.name}, in {(end - start) * 1000} ms'
|
|
304
304
|
)
|
|
305
305
|
|
|
@@ -326,7 +326,7 @@ async def dedupe_edge_list(
|
|
|
326
326
|
unique_edges_data = llm_response.get('unique_facts', [])
|
|
327
327
|
|
|
328
328
|
end = time()
|
|
329
|
-
logger.
|
|
329
|
+
logger.debug(f'Extracted edge duplicates: {unique_edges_data} in {(end - start) * 1000} ms ')
|
|
330
330
|
|
|
331
331
|
# Get full edge data
|
|
332
332
|
unique_edges = []
|
{graphiti_core-0.3.9 → graphiti_core-0.3.12}/graphiti_core/utils/maintenance/node_operations.py
RENAMED
|
@@ -104,7 +104,7 @@ async def extract_nodes(
|
|
|
104
104
|
extracted_node_data = await extract_json_nodes(llm_client, episode)
|
|
105
105
|
|
|
106
106
|
end = time()
|
|
107
|
-
logger.
|
|
107
|
+
logger.debug(f'Extracted new nodes: {extracted_node_data} in {(end - start) * 1000} ms')
|
|
108
108
|
# Convert the extracted data into EntityNode objects
|
|
109
109
|
new_nodes = []
|
|
110
110
|
for node_data in extracted_node_data:
|
|
@@ -116,7 +116,7 @@ async def extract_nodes(
|
|
|
116
116
|
created_at=datetime.now(),
|
|
117
117
|
)
|
|
118
118
|
new_nodes.append(new_node)
|
|
119
|
-
logger.
|
|
119
|
+
logger.debug(f'Created new node: {new_node.name} (UUID: {new_node.uuid})')
|
|
120
120
|
|
|
121
121
|
return new_nodes
|
|
122
122
|
|
|
@@ -152,7 +152,7 @@ async def dedupe_extracted_nodes(
|
|
|
152
152
|
duplicate_data = llm_response.get('duplicates', [])
|
|
153
153
|
|
|
154
154
|
end = time()
|
|
155
|
-
logger.
|
|
155
|
+
logger.debug(f'Deduplicated nodes: {duplicate_data} in {(end - start) * 1000} ms')
|
|
156
156
|
|
|
157
157
|
uuid_map: dict[str, str] = {}
|
|
158
158
|
for duplicate in duplicate_data:
|
|
@@ -232,7 +232,7 @@ async def resolve_extracted_node(
|
|
|
232
232
|
uuid_map[extracted_node.uuid] = existing_node.uuid
|
|
233
233
|
|
|
234
234
|
end = time()
|
|
235
|
-
logger.
|
|
235
|
+
logger.debug(
|
|
236
236
|
f'Resolved node: {extracted_node.name} is {node.name}, in {(end - start) * 1000} ms'
|
|
237
237
|
)
|
|
238
238
|
|
|
@@ -266,7 +266,7 @@ async def dedupe_node_list(
|
|
|
266
266
|
nodes_data = llm_response.get('nodes', [])
|
|
267
267
|
|
|
268
268
|
end = time()
|
|
269
|
-
logger.
|
|
269
|
+
logger.debug(f'Deduplicated nodes: {nodes_data} in {(end - start) * 1000} ms')
|
|
270
270
|
|
|
271
271
|
# Get full node data
|
|
272
272
|
unique_nodes = []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|