cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +1 -5
  2. cognee/api/v1/add/add.py +2 -1
  3. cognee/api/v1/cognify/cognify.py +24 -16
  4. cognee/api/v1/cognify/routers/__init__.py +0 -1
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
  6. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  7. cognee/api/v1/ontologies/ontologies.py +12 -37
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
  9. cognee/api/v1/search/search.py +4 -0
  10. cognee/api/v1/ui/node_setup.py +360 -0
  11. cognee/api/v1/ui/npm_utils.py +50 -0
  12. cognee/api/v1/ui/ui.py +38 -68
  13. cognee/context_global_variables.py +61 -16
  14. cognee/eval_framework/Dockerfile +29 -0
  15. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  16. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  17. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  18. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  19. cognee/eval_framework/eval_config.py +2 -2
  20. cognee/eval_framework/modal_run_eval.py +16 -28
  21. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  22. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  23. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  24. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  25. cognee/infrastructure/databases/graph/config.py +3 -0
  26. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
  27. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  28. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  29. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  30. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  31. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  32. cognee/infrastructure/databases/utils/__init__.py +3 -0
  33. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  34. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
  35. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  36. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  37. cognee/infrastructure/databases/vector/config.py +2 -0
  38. cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
  39. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  40. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  41. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  42. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  43. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  44. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  45. cognee/infrastructure/files/storage/s3_config.py +2 -0
  46. cognee/infrastructure/llm/LLMGateway.py +5 -2
  47. cognee/infrastructure/llm/config.py +35 -0
  48. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  49. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
  51. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  52. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  53. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
  54. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
  55. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
  56. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
  57. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
  58. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
  59. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  60. cognee/modules/cognify/config.py +2 -0
  61. cognee/modules/data/deletion/prune_system.py +52 -2
  62. cognee/modules/data/methods/delete_dataset.py +26 -0
  63. cognee/modules/engine/models/Triplet.py +9 -0
  64. cognee/modules/engine/models/__init__.py +1 -0
  65. cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
  66. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  67. cognee/modules/memify/memify.py +1 -7
  68. cognee/modules/pipelines/operations/pipeline.py +18 -2
  69. cognee/modules/retrieval/__init__.py +1 -1
  70. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
  71. cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
  72. cognee/modules/retrieval/graph_completion_retriever.py +10 -0
  73. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  74. cognee/modules/retrieval/register_retriever.py +10 -0
  75. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  76. cognee/modules/retrieval/temporal_retriever.py +4 -0
  77. cognee/modules/retrieval/triplet_retriever.py +182 -0
  78. cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
  79. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
  80. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  81. cognee/modules/search/methods/no_access_control_search.py +4 -0
  82. cognee/modules/search/methods/search.py +21 -0
  83. cognee/modules/search/types/SearchType.py +1 -1
  84. cognee/modules/settings/get_settings.py +19 -0
  85. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  86. cognee/modules/users/models/DatasetDatabase.py +15 -3
  87. cognee/shared/logging_utils.py +4 -0
  88. cognee/shared/rate_limiting.py +30 -0
  89. cognee/tasks/documents/__init__.py +0 -1
  90. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  91. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  92. cognee/tasks/storage/add_data_points.py +142 -2
  93. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  94. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  95. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  96. cognee/tests/test_cognee_server_start.py +2 -4
  97. cognee/tests/test_conversation_history.py +23 -1
  98. cognee/tests/test_dataset_database_handler.py +137 -0
  99. cognee/tests/test_dataset_delete.py +76 -0
  100. cognee/tests/test_edge_centered_payload.py +170 -0
  101. cognee/tests/test_pipeline_cache.py +164 -0
  102. cognee/tests/test_search_db.py +37 -1
  103. cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
  104. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  105. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  106. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  107. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  108. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  109. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  110. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  111. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  112. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  113. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/METADATA +76 -89
  114. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/RECORD +118 -97
  115. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/WHEEL +1 -1
  116. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  117. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  118. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  119. cognee/modules/retrieval/code_retriever.py +0 -232
  120. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  121. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  122. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  123. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  124. cognee/tasks/repo_processor/__init__.py +0 -2
  125. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  126. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  127. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  128. cognee/tests/test_delete_bmw_example.py +0 -60
  129. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,17 +1,67 @@
1
+ from sqlalchemy.exc import OperationalError
2
+
3
+ from cognee.infrastructure.databases.exceptions import EntityNotFoundError
4
+ from cognee.context_global_variables import backend_access_control_enabled
1
5
  from cognee.infrastructure.databases.vector import get_vector_engine
2
6
  from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
3
7
  from cognee.infrastructure.databases.relational import get_relational_engine
8
+ from cognee.infrastructure.databases.utils import (
9
+ get_graph_dataset_database_handler,
10
+ get_vector_dataset_database_handler,
11
+ )
4
12
  from cognee.shared.cache import delete_cache
13
+ from cognee.modules.users.models import DatasetDatabase
14
+ from cognee.shared.logging_utils import get_logger
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ async def prune_graph_databases():
20
+ db_engine = get_relational_engine()
21
+ try:
22
+ dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
23
+ # Go through each dataset database and delete the graph database
24
+ for dataset_database in dataset_databases:
25
+ handler = get_graph_dataset_database_handler(dataset_database)
26
+ await handler["handler_instance"].delete_dataset(dataset_database)
27
+ except (OperationalError, EntityNotFoundError) as e:
28
+ logger.debug(
29
+ "Skipping pruning of graph DB. Error when accessing dataset_database table: %s",
30
+ e,
31
+ )
32
+ return
33
+
34
+
35
+ async def prune_vector_databases():
36
+ db_engine = get_relational_engine()
37
+ try:
38
+ dataset_databases = await db_engine.get_all_data_from_table("dataset_database")
39
+ # Go through each dataset database and delete the vector database
40
+ for dataset_database in dataset_databases:
41
+ handler = get_vector_dataset_database_handler(dataset_database)
42
+ await handler["handler_instance"].delete_dataset(dataset_database)
43
+ except (OperationalError, EntityNotFoundError) as e:
44
+ logger.debug(
45
+ "Skipping pruning of vector DB. Error when accessing dataset_database table: %s",
46
+ e,
47
+ )
48
+ return
5
49
 
6
50
 
7
51
  async def prune_system(graph=True, vector=True, metadata=True, cache=True):
8
- if graph:
52
+ # Note: prune system should not be available through the API, it has no permission checks and will
53
+ # delete all graph and vector databases if called. It should only be used in development or testing environments.
54
+ if graph and not backend_access_control_enabled():
9
55
  graph_engine = await get_graph_engine()
10
56
  await graph_engine.delete_graph()
57
+ elif graph and backend_access_control_enabled():
58
+ await prune_graph_databases()
11
59
 
12
- if vector:
60
+ if vector and not backend_access_control_enabled():
13
61
  vector_engine = get_vector_engine()
14
62
  await vector_engine.prune()
63
+ elif vector and backend_access_control_enabled():
64
+ await prune_vector_databases()
15
65
 
16
66
  if metadata:
17
67
  db_engine = get_relational_engine()
@@ -1,8 +1,34 @@
1
+ from cognee.modules.users.models import DatasetDatabase
2
+ from sqlalchemy import select
3
+
1
4
  from cognee.modules.data.models import Dataset
5
+ from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
6
+ get_vector_dataset_database_handler,
7
+ )
8
+ from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
9
+ get_graph_dataset_database_handler,
10
+ )
2
11
  from cognee.infrastructure.databases.relational import get_relational_engine
3
12
 
4
13
 
5
14
  async def delete_dataset(dataset: Dataset):
6
15
  db_engine = get_relational_engine()
7
16
 
17
+ async with db_engine.get_async_session() as session:
18
+ stmt = select(DatasetDatabase).where(
19
+ DatasetDatabase.dataset_id == dataset.id,
20
+ )
21
+ dataset_database: DatasetDatabase = await session.scalar(stmt)
22
+ if dataset_database:
23
+ graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
24
+ vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
25
+ await graph_dataset_database_handler["handler_instance"].delete_dataset(
26
+ dataset_database
27
+ )
28
+ await vector_dataset_database_handler["handler_instance"].delete_dataset(
29
+ dataset_database
30
+ )
31
+ # TODO: Remove dataset from pipeline_run_status in Data objects related to dataset as well
32
+ # This blocks recreation of the dataset with the same name and data after deletion as
33
+ # it's marked as completed and will be just skipped even though it's empty.
8
34
  return await db_engine.delete_entity_by_id(dataset.__tablename__, dataset.id)
@@ -0,0 +1,9 @@
1
+ from cognee.infrastructure.engine import DataPoint
2
+
3
+
4
+ class Triplet(DataPoint):
5
+ text: str
6
+ from_node_id: str
7
+ to_node_id: str
8
+
9
+ metadata: dict = {"index_fields": ["text"]}
@@ -7,3 +7,4 @@ from .ColumnValue import ColumnValue
7
7
  from .Timestamp import Timestamp
8
8
  from .Interval import Interval
9
9
  from .Event import Event
10
+ from .Triplet import Triplet
@@ -56,6 +56,68 @@ class CogneeGraph(CogneeAbstractGraph):
56
56
  def get_edges(self) -> List[Edge]:
57
57
  return self.edges
58
58
 
59
+ async def _get_nodeset_subgraph(
60
+ self,
61
+ adapter,
62
+ node_type,
63
+ node_name,
64
+ ):
65
+ """Retrieve subgraph based on node type and name."""
66
+ logger.info("Retrieving graph filtered by node type and node name (NodeSet).")
67
+ nodes_data, edges_data = await adapter.get_nodeset_subgraph(
68
+ node_type=node_type, node_name=node_name
69
+ )
70
+ if not nodes_data or not edges_data:
71
+ raise EntityNotFoundError(
72
+ message="Nodeset does not exist, or empty nodeset projected from the database."
73
+ )
74
+ return nodes_data, edges_data
75
+
76
+ async def _get_full_or_id_filtered_graph(
77
+ self,
78
+ adapter,
79
+ relevant_ids_to_filter,
80
+ ):
81
+ """Retrieve full or ID-filtered graph with fallback."""
82
+ if relevant_ids_to_filter is None:
83
+ logger.info("Retrieving full graph.")
84
+ nodes_data, edges_data = await adapter.get_graph_data()
85
+ if not nodes_data or not edges_data:
86
+ raise EntityNotFoundError(message="Empty graph projected from the database.")
87
+ return nodes_data, edges_data
88
+
89
+ get_graph_data_fn = getattr(adapter, "get_id_filtered_graph_data", adapter.get_graph_data)
90
+ if getattr(adapter.__class__, "get_id_filtered_graph_data", None):
91
+ logger.info("Retrieving ID-filtered graph from database.")
92
+ nodes_data, edges_data = await get_graph_data_fn(target_ids=relevant_ids_to_filter)
93
+ else:
94
+ logger.info("Retrieving full graph from database.")
95
+ nodes_data, edges_data = await get_graph_data_fn()
96
+ if hasattr(adapter, "get_id_filtered_graph_data") and (not nodes_data or not edges_data):
97
+ logger.warning(
98
+ "Id filtered graph returned empty, falling back to full graph retrieval."
99
+ )
100
+ logger.info("Retrieving full graph")
101
+ nodes_data, edges_data = await adapter.get_graph_data()
102
+
103
+ if not nodes_data or not edges_data:
104
+ raise EntityNotFoundError("Empty graph projected from the database.")
105
+ return nodes_data, edges_data
106
+
107
+ async def _get_filtered_graph(
108
+ self,
109
+ adapter,
110
+ memory_fragment_filter,
111
+ ):
112
+ """Retrieve graph filtered by attributes."""
113
+ logger.info("Retrieving graph filtered by memory fragment")
114
+ nodes_data, edges_data = await adapter.get_filtered_graph_data(
115
+ attribute_filters=memory_fragment_filter
116
+ )
117
+ if not nodes_data or not edges_data:
118
+ raise EntityNotFoundError(message="Empty filtered graph projected from the database.")
119
+ return nodes_data, edges_data
120
+
59
121
  async def project_graph_from_db(
60
122
  self,
61
123
  adapter: Union[GraphDBInterface],
@@ -67,40 +129,39 @@ class CogneeGraph(CogneeAbstractGraph):
67
129
  memory_fragment_filter=[],
68
130
  node_type: Optional[Type] = None,
69
131
  node_name: Optional[List[str]] = None,
132
+ relevant_ids_to_filter: Optional[List[str]] = None,
133
+ triplet_distance_penalty: float = 3.5,
70
134
  ) -> None:
71
135
  if node_dimension < 1 or edge_dimension < 1:
72
136
  raise InvalidDimensionsError()
73
137
  try:
74
- import time
75
-
76
- start_time = time.time()
77
-
78
- # Determine projection strategy
79
138
  if node_type is not None and node_name not in [None, [], ""]:
80
- nodes_data, edges_data = await adapter.get_nodeset_subgraph(
81
- node_type=node_type, node_name=node_name
139
+ nodes_data, edges_data = await self._get_nodeset_subgraph(
140
+ adapter, node_type, node_name
82
141
  )
83
- if not nodes_data or not edges_data:
84
- raise EntityNotFoundError(
85
- message="Nodeset does not exist, or empty nodetes projected from the database."
86
- )
87
142
  elif len(memory_fragment_filter) == 0:
88
- nodes_data, edges_data = await adapter.get_graph_data()
89
- if not nodes_data or not edges_data:
90
- raise EntityNotFoundError(message="Empty graph projected from the database.")
143
+ nodes_data, edges_data = await self._get_full_or_id_filtered_graph(
144
+ adapter, relevant_ids_to_filter
145
+ )
91
146
  else:
92
- nodes_data, edges_data = await adapter.get_filtered_graph_data(
93
- attribute_filters=memory_fragment_filter
147
+ nodes_data, edges_data = await self._get_filtered_graph(
148
+ adapter, memory_fragment_filter
94
149
  )
95
- if not nodes_data or not edges_data:
96
- raise EntityNotFoundError(
97
- message="Empty filtered graph projected from the database."
98
- )
99
150
 
151
+ import time
152
+
153
+ start_time = time.time()
100
154
  # Process nodes
101
155
  for node_id, properties in nodes_data:
102
156
  node_attributes = {key: properties.get(key) for key in node_properties_to_project}
103
- self.add_node(Node(str(node_id), node_attributes, dimension=node_dimension))
157
+ self.add_node(
158
+ Node(
159
+ str(node_id),
160
+ node_attributes,
161
+ dimension=node_dimension,
162
+ node_penalty=triplet_distance_penalty,
163
+ )
164
+ )
104
165
 
105
166
  # Process edges
106
167
  for source_id, target_id, relationship_type, properties in edges_data:
@@ -118,6 +179,7 @@ class CogneeGraph(CogneeAbstractGraph):
118
179
  attributes=edge_attributes,
119
180
  directed=directed,
120
181
  dimension=edge_dimension,
182
+ edge_penalty=triplet_distance_penalty,
121
183
  )
122
184
  self.add_edge(edge)
123
185
 
@@ -149,24 +211,10 @@ class CogneeGraph(CogneeAbstractGraph):
149
211
  node.add_attribute("vector_distance", score)
150
212
  mapped_nodes += 1
151
213
 
152
- async def map_vector_distances_to_graph_edges(
153
- self, vector_engine, query_vector, edge_distances
154
- ) -> None:
214
+ async def map_vector_distances_to_graph_edges(self, edge_distances) -> None:
155
215
  try:
156
- if query_vector is None or len(query_vector) == 0:
157
- raise ValueError("Failed to generate query embedding.")
158
-
159
216
  if edge_distances is None:
160
- start_time = time.time()
161
- edge_distances = await vector_engine.search(
162
- collection_name="EdgeType_relationship_name",
163
- query_vector=query_vector,
164
- limit=None,
165
- )
166
- projection_time = time.time() - start_time
167
- logger.info(
168
- f"Edge collection distances were calculated separately from nodes in {projection_time:.2f}s"
169
- )
217
+ return
170
218
 
171
219
  embedding_map = {result.payload["text"]: result.score for result in edge_distances}
172
220
 
@@ -20,13 +20,17 @@ class Node:
20
20
  status: np.ndarray
21
21
 
22
22
  def __init__(
23
- self, node_id: str, attributes: Optional[Dict[str, Any]] = None, dimension: int = 1
23
+ self,
24
+ node_id: str,
25
+ attributes: Optional[Dict[str, Any]] = None,
26
+ dimension: int = 1,
27
+ node_penalty: float = 3.5,
24
28
  ):
25
29
  if dimension <= 0:
26
30
  raise InvalidDimensionsError()
27
31
  self.id = node_id
28
32
  self.attributes = attributes if attributes is not None else {}
29
- self.attributes["vector_distance"] = float("inf")
33
+ self.attributes["vector_distance"] = node_penalty
30
34
  self.skeleton_neighbours = []
31
35
  self.skeleton_edges = []
32
36
  self.status = np.ones(dimension, dtype=int)
@@ -105,13 +109,14 @@ class Edge:
105
109
  attributes: Optional[Dict[str, Any]] = None,
106
110
  directed: bool = True,
107
111
  dimension: int = 1,
112
+ edge_penalty: float = 3.5,
108
113
  ):
109
114
  if dimension <= 0:
110
115
  raise InvalidDimensionsError()
111
116
  self.node1 = node1
112
117
  self.node2 = node2
113
118
  self.attributes = attributes if attributes is not None else {}
114
- self.attributes["vector_distance"] = float("inf")
119
+ self.attributes["vector_distance"] = edge_penalty
115
120
  self.directed = directed
116
121
  self.status = np.ones(dimension, dtype=int)
117
122
 
@@ -12,9 +12,6 @@ from cognee.modules.users.models import User
12
12
  from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
13
13
  resolve_authorized_user_datasets,
14
14
  )
15
- from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
16
- reset_dataset_pipeline_run_status,
17
- )
18
15
  from cognee.modules.engine.operations.setup import setup
19
16
  from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
20
17
  from cognee.tasks.memify.extract_subgraph_chunks import extract_subgraph_chunks
@@ -97,10 +94,6 @@ async def memify(
97
94
  *enrichment_tasks,
98
95
  ]
99
96
 
100
- await reset_dataset_pipeline_run_status(
101
- authorized_dataset.id, user, pipeline_names=["memify_pipeline"]
102
- )
103
-
104
97
  # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
105
98
  pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
106
99
 
@@ -113,6 +106,7 @@ async def memify(
113
106
  datasets=authorized_dataset.id,
114
107
  vector_db_config=vector_db_config,
115
108
  graph_db_config=graph_db_config,
109
+ use_pipeline_cache=False,
116
110
  incremental_loading=False,
117
111
  pipeline_name="memify_pipeline",
118
112
  )
@@ -20,6 +20,9 @@ from cognee.modules.pipelines.layers.resolve_authorized_user_datasets import (
20
20
  from cognee.modules.pipelines.layers.check_pipeline_run_qualification import (
21
21
  check_pipeline_run_qualification,
22
22
  )
23
+ from cognee.modules.pipelines.models.PipelineRunInfo import (
24
+ PipelineRunStarted,
25
+ )
23
26
  from typing import Any
24
27
 
25
28
  logger = get_logger("cognee.pipeline")
@@ -35,6 +38,7 @@ async def run_pipeline(
35
38
  pipeline_name: str = "custom_pipeline",
36
39
  vector_db_config: dict = None,
37
40
  graph_db_config: dict = None,
41
+ use_pipeline_cache: bool = False,
38
42
  incremental_loading: bool = False,
39
43
  data_per_batch: int = 20,
40
44
  ):
@@ -51,6 +55,7 @@ async def run_pipeline(
51
55
  data=data,
52
56
  pipeline_name=pipeline_name,
53
57
  context={"dataset": dataset},
58
+ use_pipeline_cache=use_pipeline_cache,
54
59
  incremental_loading=incremental_loading,
55
60
  data_per_batch=data_per_batch,
56
61
  ):
@@ -64,6 +69,7 @@ async def run_pipeline_per_dataset(
64
69
  data=None,
65
70
  pipeline_name: str = "custom_pipeline",
66
71
  context: dict = None,
72
+ use_pipeline_cache=False,
67
73
  incremental_loading=False,
68
74
  data_per_batch: int = 20,
69
75
  ):
@@ -77,8 +83,18 @@ async def run_pipeline_per_dataset(
77
83
  if process_pipeline_status:
78
84
  # If pipeline was already processed or is currently being processed
79
85
  # return status information to async generator and finish execution
80
- yield process_pipeline_status
81
- return
86
+ if use_pipeline_cache:
87
+ # If pipeline caching is enabled we do not proceed with re-processing
88
+ yield process_pipeline_status
89
+ return
90
+ else:
91
+ # If pipeline caching is disabled we always return pipeline started information and proceed with re-processing
92
+ yield PipelineRunStarted(
93
+ pipeline_run_id=process_pipeline_status.pipeline_run_id,
94
+ dataset_id=dataset.id,
95
+ dataset_name=dataset.name,
96
+ payload=data,
97
+ )
82
98
 
83
99
  pipeline_run = run_tasks(
84
100
  tasks,
@@ -1 +1 @@
1
- from cognee.modules.retrieval.code_retriever import CodeRetriever
1
+
@@ -39,6 +39,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
39
39
  node_type: Optional[Type] = None,
40
40
  node_name: Optional[List[str]] = None,
41
41
  save_interaction: bool = False,
42
+ wide_search_top_k: Optional[int] = 100,
43
+ triplet_distance_penalty: Optional[float] = 3.5,
42
44
  ):
43
45
  super().__init__(
44
46
  user_prompt_path=user_prompt_path,
@@ -48,6 +50,8 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
48
50
  node_name=node_name,
49
51
  save_interaction=save_interaction,
50
52
  system_prompt=system_prompt,
53
+ wide_search_top_k=wide_search_top_k,
54
+ triplet_distance_penalty=triplet_distance_penalty,
51
55
  )
52
56
 
53
57
  async def get_completion(
@@ -65,6 +65,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
65
65
  node_type: Optional[Type] = None,
66
66
  node_name: Optional[List[str]] = None,
67
67
  save_interaction: bool = False,
68
+ wide_search_top_k: Optional[int] = 100,
69
+ triplet_distance_penalty: Optional[float] = 3.5,
68
70
  ):
69
71
  super().__init__(
70
72
  user_prompt_path=user_prompt_path,
@@ -74,6 +76,8 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
74
76
  node_type=node_type,
75
77
  node_name=node_name,
76
78
  save_interaction=save_interaction,
79
+ wide_search_top_k=wide_search_top_k,
80
+ triplet_distance_penalty=triplet_distance_penalty,
77
81
  )
78
82
  self.validation_system_prompt_path = validation_system_prompt_path
79
83
  self.validation_user_prompt_path = validation_user_prompt_path
@@ -47,6 +47,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
47
47
  node_type: Optional[Type] = None,
48
48
  node_name: Optional[List[str]] = None,
49
49
  save_interaction: bool = False,
50
+ wide_search_top_k: Optional[int] = 100,
51
+ triplet_distance_penalty: Optional[float] = 3.5,
50
52
  ):
51
53
  """Initialize retriever with prompt paths and search parameters."""
52
54
  self.save_interaction = save_interaction
@@ -54,8 +56,10 @@ class GraphCompletionRetriever(BaseGraphRetriever):
54
56
  self.system_prompt_path = system_prompt_path
55
57
  self.system_prompt = system_prompt
56
58
  self.top_k = top_k if top_k is not None else 5
59
+ self.wide_search_top_k = wide_search_top_k
57
60
  self.node_type = node_type
58
61
  self.node_name = node_name
62
+ self.triplet_distance_penalty = triplet_distance_penalty
59
63
 
60
64
  async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
61
65
  """
@@ -105,6 +109,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
105
109
  collections=vector_index_collections or None,
106
110
  node_type=self.node_type,
107
111
  node_name=self.node_name,
112
+ wide_search_top_k=self.wide_search_top_k,
113
+ triplet_distance_penalty=self.triplet_distance_penalty,
108
114
  )
109
115
 
110
116
  return found_triplets
@@ -141,6 +147,10 @@ class GraphCompletionRetriever(BaseGraphRetriever):
141
147
 
142
148
  return triplets
143
149
 
150
+ async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
151
+ context = await self.resolve_edges_to_text(triplets)
152
+ return context
153
+
144
154
  async def get_completion(
145
155
  self,
146
156
  query: str,
@@ -26,6 +26,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
26
26
  node_type: Optional[Type] = None,
27
27
  node_name: Optional[List[str]] = None,
28
28
  save_interaction: bool = False,
29
+ wide_search_top_k: Optional[int] = 100,
30
+ triplet_distance_penalty: Optional[float] = 3.5,
29
31
  ):
30
32
  """Initialize retriever with default prompt paths and search parameters."""
31
33
  super().__init__(
@@ -36,6 +38,8 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
36
38
  node_name=node_name,
37
39
  save_interaction=save_interaction,
38
40
  system_prompt=system_prompt,
41
+ wide_search_top_k=wide_search_top_k,
42
+ triplet_distance_penalty=triplet_distance_penalty,
39
43
  )
40
44
  self.summarize_prompt_path = summarize_prompt_path
41
45
 
@@ -0,0 +1,10 @@
1
+ from typing import Type
2
+
3
+ from .base_retriever import BaseRetriever
4
+ from .registered_community_retrievers import registered_community_retrievers
5
+ from ..search.types import SearchType
6
+
7
+
8
+ def use_retriever(search_type: SearchType, retriever: Type[BaseRetriever]):
9
+ """Register a retriever class for a given search type."""
10
+ registered_community_retrievers[search_type] = retriever
@@ -0,0 +1 @@
1
+ registered_community_retrievers = {}
@@ -47,6 +47,8 @@ class TemporalRetriever(GraphCompletionRetriever):
47
47
  top_k: Optional[int] = 5,
48
48
  node_type: Optional[Type] = None,
49
49
  node_name: Optional[List[str]] = None,
50
+ wide_search_top_k: Optional[int] = 100,
51
+ triplet_distance_penalty: Optional[float] = 3.5,
50
52
  ):
51
53
  super().__init__(
52
54
  user_prompt_path=user_prompt_path,
@@ -54,6 +56,8 @@ class TemporalRetriever(GraphCompletionRetriever):
54
56
  top_k=top_k,
55
57
  node_type=node_type,
56
58
  node_name=node_name,
59
+ wide_search_top_k=wide_search_top_k,
60
+ triplet_distance_penalty=triplet_distance_penalty,
57
61
  )
58
62
  self.user_prompt_path = user_prompt_path
59
63
  self.system_prompt_path = system_prompt_path