graphiti-core 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of graphiti-core might be problematic. Click here for more details.

graphiti_core/graphiti.py CHANGED
@@ -29,6 +29,7 @@ from graphiti_core.llm_client.utils import generate_embedding
29
29
  from graphiti_core.nodes import EntityNode, EpisodeType, EpisodicNode
30
30
  from graphiti_core.search.search import Reranker, SearchConfig, SearchMethod, hybrid_search
31
31
  from graphiti_core.search.search_utils import (
32
+ RELEVANT_SCHEMA_LIMIT,
32
33
  get_relevant_edges,
33
34
  get_relevant_nodes,
34
35
  hybrid_node_search,
@@ -41,19 +42,23 @@ from graphiti_core.utils.bulk_utils import (
41
42
  RawEpisode,
42
43
  dedupe_edges_bulk,
43
44
  dedupe_nodes_bulk,
45
+ extract_edge_dates_bulk,
44
46
  extract_nodes_and_edges_bulk,
45
47
  resolve_edge_pointers,
46
48
  retrieve_previous_episodes_bulk,
47
49
  )
48
50
  from graphiti_core.utils.maintenance.edge_operations import (
49
- dedupe_extracted_edges,
50
51
  extract_edges,
52
+ resolve_extracted_edges,
51
53
  )
52
54
  from graphiti_core.utils.maintenance.graph_data_operations import (
53
55
  EPISODE_WINDOW_LEN,
54
56
  build_indices_and_constraints,
55
57
  )
56
- from graphiti_core.utils.maintenance.node_operations import dedupe_extracted_nodes, extract_nodes
58
+ from graphiti_core.utils.maintenance.node_operations import (
59
+ extract_nodes,
60
+ resolve_extracted_nodes,
61
+ )
57
62
  from graphiti_core.utils.maintenance.temporal_operations import (
58
63
  extract_edge_dates,
59
64
  invalidate_edges,
@@ -175,9 +180,9 @@ class Graphiti:
175
180
  await build_indices_and_constraints(self.driver)
176
181
 
177
182
  async def retrieve_episodes(
178
- self,
179
- reference_time: datetime,
180
- last_n: int = EPISODE_WINDOW_LEN,
183
+ self,
184
+ reference_time: datetime,
185
+ last_n: int = EPISODE_WINDOW_LEN,
181
186
  ) -> list[EpisodicNode]:
182
187
  """
183
188
  Retrieve the last n episodic nodes from the graph.
@@ -205,14 +210,14 @@ class Graphiti:
205
210
  return await retrieve_episodes(self.driver, reference_time, last_n)
206
211
 
207
212
  async def add_episode(
208
- self,
209
- name: str,
210
- episode_body: str,
211
- source_description: str,
212
- reference_time: datetime,
213
- source: EpisodeType = EpisodeType.message,
214
- success_callback: Callable | None = None,
215
- error_callback: Callable | None = None,
213
+ self,
214
+ name: str,
215
+ episode_body: str,
216
+ source_description: str,
217
+ reference_time: datetime,
218
+ source: EpisodeType = EpisodeType.message,
219
+ success_callback: Callable | None = None,
220
+ error_callback: Callable | None = None,
216
221
  ):
217
222
  """
218
223
  Process an episode and update the graph.
@@ -263,7 +268,6 @@ class Graphiti:
263
268
 
264
269
  nodes: list[EntityNode] = []
265
270
  entity_edges: list[EntityEdge] = []
266
- episodic_edges: list[EpisodicEdge] = []
267
271
  embedder = self.llm_client.get_embedder()
268
272
  now = datetime.now()
269
273
 
@@ -278,6 +282,8 @@ class Graphiti:
278
282
  valid_at=reference_time,
279
283
  )
280
284
 
285
+ # Extract entities as nodes
286
+
281
287
  extracted_nodes = await extract_nodes(self.llm_client, episode, previous_episodes)
282
288
  logger.info(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
283
289
 
@@ -286,59 +292,82 @@ class Graphiti:
286
292
  await asyncio.gather(
287
293
  *[node.generate_name_embedding(embedder) for node in extracted_nodes]
288
294
  )
289
- existing_nodes = await get_relevant_nodes(extracted_nodes, self.driver)
295
+
296
+ # Resolve extracted nodes with nodes already in the graph
297
+ existing_nodes_lists: list[list[EntityNode]] = list(
298
+ await asyncio.gather(
299
+ *[get_relevant_nodes([node], self.driver) for node in extracted_nodes]
300
+ )
301
+ )
302
+
290
303
  logger.info(f'Extracted nodes: {[(n.name, n.uuid) for n in extracted_nodes]}')
291
- touched_nodes, _, brand_new_nodes = await dedupe_extracted_nodes(
292
- self.llm_client, extracted_nodes, existing_nodes
304
+
305
+ mentioned_nodes, _ = await resolve_extracted_nodes(
306
+ self.llm_client, extracted_nodes, existing_nodes_lists
293
307
  )
294
- logger.info(f'Adjusted touched nodes: {[(n.name, n.uuid) for n in touched_nodes]}')
295
- nodes.extend(touched_nodes)
308
+ logger.info(f'Adjusted mentioned nodes: {[(n.name, n.uuid) for n in mentioned_nodes]}')
309
+ nodes.extend(mentioned_nodes)
296
310
 
311
+ # Extract facts as edges given entity nodes
297
312
  extracted_edges = await extract_edges(
298
- self.llm_client, episode, touched_nodes, previous_episodes
313
+ self.llm_client, episode, mentioned_nodes, previous_episodes
299
314
  )
300
315
 
316
+ # calculate embeddings
301
317
  await asyncio.gather(*[edge.generate_embedding(embedder) for edge in extracted_edges])
302
318
 
303
- existing_edges = await get_relevant_edges(extracted_edges, self.driver)
304
- logger.info(f'Existing edges: {[(e.name, e.uuid) for e in existing_edges]}')
319
+ # Resolve extracted edges with edges already in the graph
320
+ existing_edges_list: list[list[EntityEdge]] = list(
321
+ await asyncio.gather(
322
+ *[
323
+ get_relevant_edges(
324
+ [edge],
325
+ self.driver,
326
+ RELEVANT_SCHEMA_LIMIT,
327
+ edge.source_node_uuid,
328
+ edge.target_node_uuid,
329
+ )
330
+ for edge in extracted_edges
331
+ ]
332
+ )
333
+ )
334
+ logger.info(
335
+ f'Existing edges lists: {[(e.name, e.uuid) for edges_lst in existing_edges_list for e in edges_lst]}'
336
+ )
305
337
  logger.info(f'Extracted edges: {[(e.name, e.uuid) for e in extracted_edges]}')
306
338
 
307
- deduped_edges = await dedupe_extracted_edges(
308
- self.llm_client,
309
- extracted_edges,
310
- existing_edges,
339
+ deduped_edges: list[EntityEdge] = await resolve_extracted_edges(
340
+ self.llm_client, extracted_edges, existing_edges_list
311
341
  )
312
342
 
313
- edge_touched_node_uuids = [n.uuid for n in brand_new_nodes]
314
- for edge in deduped_edges:
315
- edge_touched_node_uuids.append(edge.source_node_uuid)
316
- edge_touched_node_uuids.append(edge.target_node_uuid)
317
-
318
- for edge in deduped_edges:
319
- valid_at, invalid_at, _ = await extract_edge_dates(
320
- self.llm_client,
321
- edge,
322
- episode.valid_at,
323
- episode,
324
- previous_episodes,
325
- )
326
- edge.valid_at = valid_at
327
- edge.invalid_at = invalid_at
328
- if edge.invalid_at:
329
- edge.expired_at = datetime.now()
330
- for edge in existing_edges:
331
- valid_at, invalid_at, _ = await extract_edge_dates(
332
- self.llm_client,
333
- edge,
334
- episode.valid_at,
335
- episode,
336
- previous_episodes,
337
- )
343
+ # Extract dates for the newly extracted edges
344
+ edge_dates = await asyncio.gather(
345
+ *[
346
+ extract_edge_dates(
347
+ self.llm_client,
348
+ edge,
349
+ episode,
350
+ previous_episodes,
351
+ )
352
+ for edge in deduped_edges
353
+ ]
354
+ )
355
+
356
+ for i, edge in enumerate(deduped_edges):
357
+ valid_at = edge_dates[i][0]
358
+ invalid_at = edge_dates[i][1]
359
+
338
360
  edge.valid_at = valid_at
339
361
  edge.invalid_at = invalid_at
340
- if edge.invalid_at:
341
- edge.expired_at = datetime.now()
362
+ if edge.invalid_at is not None:
363
+ edge.expired_at = now
364
+
365
+ entity_edges.extend(deduped_edges)
366
+
367
+ existing_edges: list[EntityEdge] = [
368
+ e for edge_lst in existing_edges_list for e in edge_lst
369
+ ]
370
+
342
371
  (
343
372
  old_edges_with_nodes_pending_invalidation,
344
373
  new_edges_with_nodes,
@@ -361,30 +390,18 @@ class Graphiti:
361
390
  for deduped_edge in deduped_edges:
362
391
  if deduped_edge.uuid == edge.uuid:
363
392
  deduped_edge.expired_at = edge.expired_at
364
- edge_touched_node_uuids.append(edge.source_node_uuid)
365
- edge_touched_node_uuids.append(edge.target_node_uuid)
366
393
  logger.info(f'Invalidated edges: {[(e.name, e.uuid) for e in invalidated_edges]}')
367
394
 
368
- edges_to_save = existing_edges + deduped_edges
369
-
370
- entity_edges.extend(edges_to_save)
371
-
372
- edge_touched_node_uuids = list(set(edge_touched_node_uuids))
373
- involved_nodes = [node for node in nodes if node.uuid in edge_touched_node_uuids]
374
-
375
- logger.info(f'Edge touched nodes: {[(n.name, n.uuid) for n in involved_nodes]}')
395
+ entity_edges.extend(existing_edges)
376
396
 
377
397
  logger.info(f'Deduped edges: {[(e.name, e.uuid) for e in deduped_edges]}')
378
398
 
379
- episodic_edges.extend(
380
- build_episodic_edges(
381
- # There may be an overlap between new_nodes and affected_nodes, so we're deduplicating them
382
- involved_nodes,
383
- episode,
384
- now,
385
- )
399
+ episodic_edges: list[EpisodicEdge] = build_episodic_edges(
400
+ mentioned_nodes,
401
+ episode,
402
+ now,
386
403
  )
387
- # Important to append the episode to the nodes at the end so that self referencing episodic edges are not built
404
+
388
405
  logger.info(f'Built episodic edges: {episodic_edges}')
389
406
 
390
407
  # Future optimization would be using batch operations to save nodes and edges
@@ -395,9 +412,7 @@ class Graphiti:
395
412
 
396
413
  end = time()
397
414
  logger.info(f'Completed add_episode in {(end - start) * 1000} ms')
398
- # for node in nodes:
399
- # if isinstance(node, EntityNode):
400
- # await node.update_summary(self.driver)
415
+
401
416
  if success_callback:
402
417
  await success_callback(episode)
403
418
  except Exception as e:
@@ -407,8 +422,8 @@ class Graphiti:
407
422
  raise e
408
423
 
409
424
  async def add_episode_bulk(
410
- self,
411
- bulk_episodes: list[RawEpisode],
425
+ self,
426
+ bulk_episodes: list[RawEpisode],
412
427
  ):
413
428
  """
414
429
  Process multiple episodes in bulk and update the graph.
@@ -481,15 +496,18 @@ class Graphiti:
481
496
  *[edge.generate_embedding(embedder) for edge in extracted_edges],
482
497
  )
483
498
 
484
- # Dedupe extracted nodes
485
- nodes, uuid_map = await dedupe_nodes_bulk(self.driver, self.llm_client, extracted_nodes)
499
+ # Dedupe extracted nodes, compress extracted edges
500
+ (nodes, uuid_map), extracted_edges_timestamped = await asyncio.gather(
501
+ dedupe_nodes_bulk(self.driver, self.llm_client, extracted_nodes),
502
+ extract_edge_dates_bulk(self.llm_client, extracted_edges, episode_pairs),
503
+ )
486
504
 
487
505
  # save nodes to KG
488
506
  await asyncio.gather(*[node.save(self.driver) for node in nodes])
489
507
 
490
508
  # re-map edge pointers so that they don't point to discard dupe nodes
491
509
  extracted_edges_with_resolved_pointers: list[EntityEdge] = resolve_edge_pointers(
492
- extracted_edges, uuid_map
510
+ extracted_edges_timestamped, uuid_map
493
511
  )
494
512
  episodic_edges_with_resolved_pointers: list[EpisodicEdge] = resolve_edge_pointers(
495
513
  episodic_edges, uuid_map
@@ -569,17 +587,19 @@ class Graphiti:
569
587
  return edges
570
588
 
571
589
  async def _search(
572
- self,
573
- query: str,
574
- timestamp: datetime,
575
- config: SearchConfig,
576
- center_node_uuid: str | None = None,
590
+ self,
591
+ query: str,
592
+ timestamp: datetime,
593
+ config: SearchConfig,
594
+ center_node_uuid: str | None = None,
577
595
  ):
578
596
  return await hybrid_search(
579
597
  self.driver, self.llm_client.get_embedder(), query, timestamp, config, center_node_uuid
580
598
  )
581
599
 
582
- async def get_nodes_by_query(self, query: str, limit: int | None = None) -> list[EntityNode]:
600
+ async def get_nodes_by_query(
601
+ self, query: str, limit: int = RELEVANT_SCHEMA_LIMIT
602
+ ) -> list[EntityNode]:
583
603
  """
584
604
  Retrieve nodes from the graph database based on a text query.
585
605
 
@@ -60,6 +60,5 @@ class OpenAIClient(LLMClient):
60
60
  result = response.choices[0].message.content or ''
61
61
  return json.loads(result)
62
62
  except Exception as e:
63
- print(openai_messages)
64
63
  logger.error(f'Error in generating LLM response: {e}')
65
64
  raise
@@ -23,12 +23,14 @@ from .models import Message, PromptFunction, PromptVersion
23
23
  class Prompt(Protocol):
24
24
  v1: PromptVersion
25
25
  v2: PromptVersion
26
+ v3: PromptVersion
26
27
  edge_list: PromptVersion
27
28
 
28
29
 
29
30
  class Versions(TypedDict):
30
31
  v1: PromptFunction
31
32
  v2: PromptFunction
33
+ v3: PromptFunction
32
34
  edge_list: PromptFunction
33
35
 
34
36
 
@@ -41,17 +43,17 @@ def v1(context: dict[str, Any]) -> list[Message]:
41
43
  Message(
42
44
  role='user',
43
45
  content=f"""
44
- Given the following context, deduplicate facts from a list of new facts given a list of existing facts:
46
+ Given the following context, deduplicate facts from a list of new facts given a list of existing edges:
45
47
 
46
- Existing Facts:
48
+ Existing Edges:
47
49
  {json.dumps(context['existing_edges'], indent=2)}
48
50
 
49
- New Facts:
51
+ New Edges:
50
52
  {json.dumps(context['extracted_edges'], indent=2)}
51
53
 
52
54
  Task:
53
- If any facts in New Facts is a duplicate of a fact in Existing Facts,
54
- do not return it in the list of unique facts.
55
+ If any edge in New Edges is a duplicate of an edge in Existing Edges, add their uuids to the output list.
56
+ When finding duplicates edges, synthesize their facts into a short new fact.
55
57
 
56
58
  Guidelines:
57
59
  1. identical or near identical facts are duplicates
@@ -60,9 +62,11 @@ def v1(context: dict[str, Any]) -> list[Message]:
60
62
 
61
63
  Respond with a JSON object in the following format:
62
64
  {{
63
- "unique_facts": [
65
+ "duplicates": [
64
66
  {{
65
- "uuid": "unique identifier of the fact"
67
+ "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
68
+ "duplicate_of": "uuid of the existing node",
69
+ "fact": "one sentence description of the fact"
66
70
  }}
67
71
  ]
68
72
  }}
@@ -113,6 +117,40 @@ def v2(context: dict[str, Any]) -> list[Message]:
113
117
  ]
114
118
 
115
119
 
120
+ def v3(context: dict[str, Any]) -> list[Message]:
121
+ return [
122
+ Message(
123
+ role='system',
124
+ content='You are a helpful assistant that de-duplicates edges from edge lists.',
125
+ ),
126
+ Message(
127
+ role='user',
128
+ content=f"""
129
+ Given the following context, determine whether the New Edge represents any of the edges in the list of Existing Edges.
130
+
131
+ Existing Edges:
132
+ {json.dumps(context['existing_edges'], indent=2)}
133
+
134
+ New Edge:
135
+ {json.dumps(context['extracted_edges'], indent=2)}
136
+ Task:
137
+ 1. If the New Edges represents the same factual information as any edge in Existing Edges, return 'is_duplicate: true' in the
138
+ response. Otherwise, return 'is_duplicate: false'
139
+ 2. If is_duplicate is true, also return the uuid of the existing edge in the response
140
+
141
+ Guidelines:
142
+ 1. The facts do not need to be completely identical to be duplicates, they just need to express the same information.
143
+
144
+ Respond with a JSON object in the following format:
145
+ {{
146
+ "is_duplicate": true or false,
147
+ "uuid": uuid of the existing edge like "5d643020624c42fa9de13f97b1b3fa39" or null,
148
+ }}
149
+ """,
150
+ ),
151
+ ]
152
+
153
+
116
154
  def edge_list(context: dict[str, Any]) -> list[Message]:
117
155
  return [
118
156
  Message(
@@ -151,4 +189,4 @@ def edge_list(context: dict[str, Any]) -> list[Message]:
151
189
  ]
152
190
 
153
191
 
154
- versions: Versions = {'v1': v1, 'v2': v2, 'edge_list': edge_list}
192
+ versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'edge_list': edge_list}
@@ -23,13 +23,15 @@ from .models import Message, PromptFunction, PromptVersion
23
23
  class Prompt(Protocol):
24
24
  v1: PromptVersion
25
25
  v2: PromptVersion
26
+ v3: PromptVersion
26
27
  node_list: PromptVersion
27
28
 
28
29
 
29
30
  class Versions(TypedDict):
30
31
  v1: PromptFunction
31
32
  v2: PromptFunction
32
- node_list: PromptVersion
33
+ v3: PromptFunction
34
+ node_list: PromptFunction
33
35
 
34
36
 
35
37
  def v1(context: dict[str, Any]) -> list[Message]:
@@ -53,7 +55,9 @@ def v1(context: dict[str, Any]) -> list[Message]:
53
55
  1. start with the list of nodes from New Nodes
54
56
  2. If any node in New Nodes is a duplicate of a node in Existing Nodes, replace the new node with the existing
55
57
  node in the list
56
- 3. Respond with the resulting list of nodes
58
+ 3. when deduplicating nodes, synthesize their summaries into a short new summary that contains the relevant information
59
+ of the summaries of the new and existing nodes
60
+ 4. Respond with the resulting list of nodes
57
61
 
58
62
  Guidelines:
59
63
  1. Use both the name and summary of nodes to determine if they are duplicates,
@@ -64,6 +68,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
64
68
  "new_nodes": [
65
69
  {{
66
70
  "name": "Unique identifier for the node",
71
+ "summary": "Brief summary of the node's role or significance"
67
72
  }}
68
73
  ]
69
74
  }}
@@ -91,20 +96,23 @@ def v2(context: dict[str, Any]) -> list[Message]:
91
96
  Important:
92
97
  If a node in the new nodes is describing the same entity as a node in the existing nodes, mark it as a duplicate!!!
93
98
  Task:
94
- If any node in New Nodes is a duplicate of a node in Existing Nodes, add their names to the output list
99
+ If any node in New Nodes is a duplicate of a node in Existing Nodes, add their uuids to the output list
100
+ When finding duplicates nodes, synthesize their summaries into a short new summary that contains the
101
+ relevant information of the summaries of the new and existing nodes.
95
102
 
96
103
  Guidelines:
97
104
  1. Use both the name and summary of nodes to determine if they are duplicates,
98
105
  duplicate nodes may have different names
99
- 2. In the output, name should always be the name of the New Node that is a duplicate. duplicate_of should be
100
- the name of the Existing Node.
106
+ 2. In the output, uuid should always be the uuid of the New Node that is a duplicate. duplicate_of should be
107
+ the uuid of the Existing Node.
101
108
 
102
109
  Respond with a JSON object in the following format:
103
110
  {{
104
111
  "duplicates": [
105
112
  {{
106
- "name": "name of the new node",
107
- "duplicate_of": "name of the existing node"
113
+ "uuid": "uuid of the new node like 5d643020624c42fa9de13f97b1b3fa39",
114
+ "duplicate_of": "uuid of the existing node",
115
+ "summary": "Brief summary of the node's role or significance. Takes information from the new and existing nodes"
108
116
  }}
109
117
  ]
110
118
  }}
@@ -113,6 +121,44 @@ def v2(context: dict[str, Any]) -> list[Message]:
113
121
  ]
114
122
 
115
123
 
124
+ def v3(context: dict[str, Any]) -> list[Message]:
125
+ return [
126
+ Message(
127
+ role='system',
128
+ content='You are a helpful assistant that de-duplicates nodes from node lists.',
129
+ ),
130
+ Message(
131
+ role='user',
132
+ content=f"""
133
+ Given the following context, determine whether the New Node represents any of the entities in the list of Existing Nodes.
134
+
135
+ Existing Nodes:
136
+ {json.dumps(context['existing_nodes'], indent=2)}
137
+
138
+ New Node:
139
+ {json.dumps(context['extracted_nodes'], indent=2)}
140
+ Task:
141
+ 1. If the New Node represents the same entity as any node in Existing Nodes, return 'is_duplicate: true' in the
142
+ response. Otherwise, return 'is_duplicate: false'
143
+ 2. If is_duplicate is true, also return the uuid of the existing node in the response
144
+ 3. If is_duplicate is true, return a summary that synthesizes the information in the New Node summary and the
145
+ summary of the Existing Node it is a duplicate of.
146
+
147
+ Guidelines:
148
+ 1. Use both the name and summary of nodes to determine if the entities are duplicates,
149
+ duplicate nodes may have different names
150
+
151
+ Respond with a JSON object in the following format:
152
+ {{
153
+ "is_duplicate": true or false,
154
+ "uuid": "uuid of the existing node like 5d643020624c42fa9de13f97b1b3fa39 or null",
155
+ "summary": "Brief summary of the node's role or significance. Takes information from the new and existing node"
156
+ }}
157
+ """,
158
+ ),
159
+ ]
160
+
161
+
116
162
  def node_list(context: dict[str, Any]) -> list[Message]:
117
163
  return [
118
164
  Message(
@@ -128,18 +174,20 @@ def node_list(context: dict[str, Any]) -> list[Message]:
128
174
  {json.dumps(context['nodes'], indent=2)}
129
175
 
130
176
  Task:
131
- 1. Group nodes together such that all duplicate nodes are in the same list of names
132
- 2. All duplicate names should be grouped together in the same list
177
+ 1. Group nodes together such that all duplicate nodes are in the same list of uuids
178
+ 2. All duplicate uuids should be grouped together in the same list
179
+ 3. Also return a new summary that synthesizes the summary into a new short summary
133
180
 
134
181
  Guidelines:
135
- 1. Each name from the list of nodes should appear EXACTLY once in your response
136
- 2. If a node has no duplicates, it should appear in the response in a list of only one name
182
+ 1. Each uuid from the list of nodes should appear EXACTLY once in your response
183
+ 2. If a node has no duplicates, it should appear in the response in a list of only one uuid
137
184
 
138
185
  Respond with a JSON object in the following format:
139
186
  {{
140
187
  "nodes": [
141
188
  {{
142
- "names": ["myNode", "node that is a duplicate of myNode"],
189
+ "uuids": ["5d643020624c42fa9de13f97b1b3fa39", "node that is a duplicate of 5d643020624c42fa9de13f97b1b3fa39"],
190
+ "summary": "Brief summary of the node summaries that appear in the list of names."
143
191
  }}
144
192
  ]
145
193
  }}
@@ -148,4 +196,4 @@ def node_list(context: dict[str, Any]) -> list[Message]:
148
196
  ]
149
197
 
150
198
 
151
- versions: Versions = {'v1': v1, 'v2': v2, 'node_list': node_list}
199
+ versions: Versions = {'v1': v1, 'v2': v2, 'v3': v3, 'node_list': node_list}
@@ -110,10 +110,11 @@ def v2(context: dict[str, Any]) -> list[Message]:
110
110
 
111
111
  Guidelines:
112
112
  1. Create edges only between the provided nodes.
113
- 2. Each edge should represent a clear relationship between two nodes.
113
+ 2. Each edge should represent a clear relationship between two DISTINCT nodes.
114
114
  3. The relation_type should be a concise, all-caps description of the relationship (e.g., LOVES, IS_FRIENDS_WITH, WORKS_FOR).
115
115
  4. Provide a more detailed fact describing the relationship.
116
116
  5. Consider temporal aspects of relationships when relevant.
117
+ 6. Avoid using the same node as the source and target of a relationship
117
118
 
118
119
  Respond with a JSON object in the following format:
119
120
  {{
@@ -55,6 +55,7 @@ def v1(context: dict[str, Any]) -> list[Message]:
55
55
  1. Focus on entities, concepts, or actors that are central to the current episode.
56
56
  2. Avoid creating nodes for relationships or actions (these will be handled as edges later).
57
57
  3. Provide a brief but informative summary for each node.
58
+ 4. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
58
59
 
59
60
  Respond with a JSON object in the following format:
60
61
  {{
@@ -90,6 +91,7 @@ Guidelines:
90
91
  3. Provide concise but informative summaries for each extracted node.
91
92
  4. Avoid creating nodes for relationships or actions.
92
93
  5. Avoid creating nodes for temporal information like dates, times or years (these will be added to edges later).
94
+ 6. Be as explicit as possible in your node names, using full names and avoiding abbreviations.
93
95
 
94
96
  Respond with a JSON object in the following format:
95
97
  {{
@@ -63,12 +63,12 @@ class SearchResults(BaseModel):
63
63
 
64
64
 
65
65
  async def hybrid_search(
66
- driver: AsyncDriver,
67
- embedder,
68
- query: str,
69
- timestamp: datetime,
70
- config: SearchConfig,
71
- center_node_uuid: str | None = None,
66
+ driver: AsyncDriver,
67
+ embedder,
68
+ query: str,
69
+ timestamp: datetime,
70
+ config: SearchConfig,
71
+ center_node_uuid: str | None = None,
72
72
  ) -> SearchResults:
73
73
  start = time()
74
74
 
@@ -83,7 +83,7 @@ async def hybrid_search(
83
83
  nodes.extend(await get_mentioned_nodes(driver, episodes))
84
84
 
85
85
  if SearchMethod.bm25 in config.search_methods:
86
- text_search = await edge_fulltext_search(query, driver, 2 * config.num_edges)
86
+ text_search = await edge_fulltext_search(driver, query, 2 * config.num_edges)
87
87
  search_results.append(text_search)
88
88
 
89
89
  if SearchMethod.cosine_similarity in config.search_methods:
@@ -95,7 +95,7 @@ async def hybrid_search(
95
95
  )
96
96
 
97
97
  similarity_search = await edge_similarity_search(
98
- search_vector, driver, 2 * config.num_edges
98
+ driver, search_vector, 2 * config.num_edges
99
99
  )
100
100
  search_results.append(similarity_search)
101
101