cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +5 -1
  2. cognee/api/v1/add/add.py +1 -2
  3. cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
  4. cognee/api/v1/cognify/cognify.py +16 -24
  5. cognee/api/v1/cognify/routers/__init__.py +1 -0
  6. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/ontologies.py +37 -12
  10. cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
  11. cognee/api/v1/search/search.py +0 -4
  12. cognee/api/v1/ui/ui.py +68 -38
  13. cognee/context_global_variables.py +16 -61
  14. cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
  15. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  16. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
  17. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  18. cognee/eval_framework/eval_config.py +2 -2
  19. cognee/eval_framework/modal_run_eval.py +28 -16
  20. cognee/infrastructure/databases/graph/config.py +0 -3
  21. cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
  22. cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
  23. cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
  24. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
  25. cognee/infrastructure/databases/utils/__init__.py +0 -3
  26. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
  27. cognee/infrastructure/databases/vector/config.py +0 -2
  28. cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
  32. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
  33. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
  34. cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
  35. cognee/infrastructure/files/storage/s3_config.py +0 -2
  36. cognee/infrastructure/llm/LLMGateway.py +2 -5
  37. cognee/infrastructure/llm/config.py +0 -35
  38. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  39. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
  40. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
  47. cognee/modules/cognify/config.py +0 -2
  48. cognee/modules/data/deletion/prune_system.py +2 -52
  49. cognee/modules/data/methods/delete_dataset.py +0 -26
  50. cognee/modules/engine/models/__init__.py +0 -1
  51. cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
  52. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
  53. cognee/modules/memify/memify.py +7 -1
  54. cognee/modules/pipelines/operations/pipeline.py +2 -18
  55. cognee/modules/retrieval/__init__.py +1 -1
  56. cognee/modules/retrieval/code_retriever.py +232 -0
  57. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
  58. cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
  59. cognee/modules/retrieval/graph_completion_retriever.py +0 -10
  60. cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
  61. cognee/modules/retrieval/temporal_retriever.py +0 -4
  62. cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
  63. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
  64. cognee/modules/search/methods/get_search_type_tools.py +8 -54
  65. cognee/modules/search/methods/no_access_control_search.py +0 -4
  66. cognee/modules/search/methods/search.py +0 -21
  67. cognee/modules/search/types/SearchType.py +1 -1
  68. cognee/modules/settings/get_settings.py +0 -19
  69. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  70. cognee/modules/users/models/DatasetDatabase.py +3 -15
  71. cognee/shared/logging_utils.py +0 -4
  72. cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
  73. cognee/tasks/code/get_local_dependencies_checker.py +20 -0
  74. cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
  75. cognee/tasks/documents/__init__.py +1 -0
  76. cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
  77. cognee/tasks/graph/extract_graph_from_data.py +10 -9
  78. cognee/tasks/repo_processor/__init__.py +2 -0
  79. cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
  80. cognee/tasks/repo_processor/get_non_code_files.py +158 -0
  81. cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
  82. cognee/tasks/storage/add_data_points.py +2 -142
  83. cognee/tests/test_cognee_server_start.py +4 -2
  84. cognee/tests/test_conversation_history.py +1 -23
  85. cognee/tests/test_delete_bmw_example.py +60 -0
  86. cognee/tests/test_search_db.py +1 -37
  87. cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
  88. cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
  89. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
  90. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  91. cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
  92. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
  93. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
  94. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  95. cognee/api/v1/ui/node_setup.py +0 -360
  96. cognee/api/v1/ui/npm_utils.py +0 -50
  97. cognee/eval_framework/Dockerfile +0 -29
  98. cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
  99. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
  100. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
  101. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
  102. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
  103. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
  104. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
  105. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
  106. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
  107. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
  108. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
  109. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
  110. cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
  111. cognee/modules/engine/models/Triplet.py +0 -9
  112. cognee/modules/retrieval/register_retriever.py +0 -10
  113. cognee/modules/retrieval/registered_community_retrievers.py +0 -1
  114. cognee/modules/retrieval/triplet_retriever.py +0 -182
  115. cognee/shared/rate_limiting.py +0 -30
  116. cognee/tasks/memify/get_triplet_datapoints.py +0 -289
  117. cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
  118. cognee/tests/integration/tasks/test_add_data_points.py +0 -139
  119. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
  120. cognee/tests/test_dataset_database_handler.py +0 -137
  121. cognee/tests/test_dataset_delete.py +0 -76
  122. cognee/tests/test_edge_centered_payload.py +0 -170
  123. cognee/tests/test_pipeline_cache.py +0 -164
  124. cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
  125. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
  126. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
  127. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
  128. cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
  129. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -65,8 +65,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
65
65
  node_type: Optional[Type] = None,
66
66
  node_name: Optional[List[str]] = None,
67
67
  save_interaction: bool = False,
68
- wide_search_top_k: Optional[int] = 100,
69
- triplet_distance_penalty: Optional[float] = 3.5,
70
68
  ):
71
69
  super().__init__(
72
70
  user_prompt_path=user_prompt_path,
@@ -76,8 +74,6 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
76
74
  node_type=node_type,
77
75
  node_name=node_name,
78
76
  save_interaction=save_interaction,
79
- wide_search_top_k=wide_search_top_k,
80
- triplet_distance_penalty=triplet_distance_penalty,
81
77
  )
82
78
  self.validation_system_prompt_path = validation_system_prompt_path
83
79
  self.validation_user_prompt_path = validation_user_prompt_path
@@ -47,8 +47,6 @@ class GraphCompletionRetriever(BaseGraphRetriever):
47
47
  node_type: Optional[Type] = None,
48
48
  node_name: Optional[List[str]] = None,
49
49
  save_interaction: bool = False,
50
- wide_search_top_k: Optional[int] = 100,
51
- triplet_distance_penalty: Optional[float] = 3.5,
52
50
  ):
53
51
  """Initialize retriever with prompt paths and search parameters."""
54
52
  self.save_interaction = save_interaction
@@ -56,10 +54,8 @@ class GraphCompletionRetriever(BaseGraphRetriever):
56
54
  self.system_prompt_path = system_prompt_path
57
55
  self.system_prompt = system_prompt
58
56
  self.top_k = top_k if top_k is not None else 5
59
- self.wide_search_top_k = wide_search_top_k
60
57
  self.node_type = node_type
61
58
  self.node_name = node_name
62
- self.triplet_distance_penalty = triplet_distance_penalty
63
59
 
64
60
  async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
65
61
  """
@@ -109,8 +105,6 @@ class GraphCompletionRetriever(BaseGraphRetriever):
109
105
  collections=vector_index_collections or None,
110
106
  node_type=self.node_type,
111
107
  node_name=self.node_name,
112
- wide_search_top_k=self.wide_search_top_k,
113
- triplet_distance_penalty=self.triplet_distance_penalty,
114
108
  )
115
109
 
116
110
  return found_triplets
@@ -147,10 +141,6 @@ class GraphCompletionRetriever(BaseGraphRetriever):
147
141
 
148
142
  return triplets
149
143
 
150
- async def convert_retrieved_objects_to_context(self, triplets: List[Edge]):
151
- context = await self.resolve_edges_to_text(triplets)
152
- return context
153
-
154
144
  async def get_completion(
155
145
  self,
156
146
  query: str,
@@ -26,8 +26,6 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
26
26
  node_type: Optional[Type] = None,
27
27
  node_name: Optional[List[str]] = None,
28
28
  save_interaction: bool = False,
29
- wide_search_top_k: Optional[int] = 100,
30
- triplet_distance_penalty: Optional[float] = 3.5,
31
29
  ):
32
30
  """Initialize retriever with default prompt paths and search parameters."""
33
31
  super().__init__(
@@ -38,8 +36,6 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
38
36
  node_name=node_name,
39
37
  save_interaction=save_interaction,
40
38
  system_prompt=system_prompt,
41
- wide_search_top_k=wide_search_top_k,
42
- triplet_distance_penalty=triplet_distance_penalty,
43
39
  )
44
40
  self.summarize_prompt_path = summarize_prompt_path
45
41
 
@@ -47,8 +47,6 @@ class TemporalRetriever(GraphCompletionRetriever):
47
47
  top_k: Optional[int] = 5,
48
48
  node_type: Optional[Type] = None,
49
49
  node_name: Optional[List[str]] = None,
50
- wide_search_top_k: Optional[int] = 100,
51
- triplet_distance_penalty: Optional[float] = 3.5,
52
50
  ):
53
51
  super().__init__(
54
52
  user_prompt_path=user_prompt_path,
@@ -56,8 +54,6 @@ class TemporalRetriever(GraphCompletionRetriever):
56
54
  top_k=top_k,
57
55
  node_type=node_type,
58
56
  node_name=node_name,
59
- wide_search_top_k=wide_search_top_k,
60
- triplet_distance_penalty=triplet_distance_penalty,
61
57
  )
62
58
  self.user_prompt_path = user_prompt_path
63
59
  self.system_prompt_path = system_prompt_path
@@ -58,8 +58,6 @@ async def get_memory_fragment(
58
58
  properties_to_project: Optional[List[str]] = None,
59
59
  node_type: Optional[Type] = None,
60
60
  node_name: Optional[List[str]] = None,
61
- relevant_ids_to_filter: Optional[List[str]] = None,
62
- triplet_distance_penalty: Optional[float] = 3.5,
63
61
  ) -> CogneeGraph:
64
62
  """Creates and initializes a CogneeGraph memory fragment with optional property projections."""
65
63
  if properties_to_project is None:
@@ -76,8 +74,6 @@ async def get_memory_fragment(
76
74
  edge_properties_to_project=["relationship_name", "edge_text"],
77
75
  node_type=node_type,
78
76
  node_name=node_name,
79
- relevant_ids_to_filter=relevant_ids_to_filter,
80
- triplet_distance_penalty=triplet_distance_penalty,
81
77
  )
82
78
 
83
79
  except EntityNotFoundError:
@@ -99,8 +95,6 @@ async def brute_force_triplet_search(
99
95
  memory_fragment: Optional[CogneeGraph] = None,
100
96
  node_type: Optional[Type] = None,
101
97
  node_name: Optional[List[str]] = None,
102
- wide_search_top_k: Optional[int] = 100,
103
- triplet_distance_penalty: Optional[float] = 3.5,
104
98
  ) -> List[Edge]:
105
99
  """
106
100
  Performs a brute force search to retrieve the top triplets from the graph.
@@ -113,8 +107,6 @@ async def brute_force_triplet_search(
113
107
  memory_fragment (Optional[CogneeGraph]): Existing memory fragment to reuse.
114
108
  node_type: node type to filter
115
109
  node_name: node name to filter
116
- wide_search_top_k (Optional[int]): Number of initial elements to retrieve from collections
117
- triplet_distance_penalty (Optional[float]): Default distance penalty in graph projection
118
110
 
119
111
  Returns:
120
112
  list: The top triplet results.
@@ -124,10 +116,10 @@ async def brute_force_triplet_search(
124
116
  if top_k <= 0:
125
117
  raise ValueError("top_k must be a positive integer.")
126
118
 
127
- # Setting wide search limit based on the parameters
128
- non_global_search = node_name is None
129
-
130
- wide_search_limit = wide_search_top_k if non_global_search else None
119
+ if memory_fragment is None:
120
+ memory_fragment = await get_memory_fragment(
121
+ properties_to_project, node_type=node_type, node_name=node_name
122
+ )
131
123
 
132
124
  if collections is None:
133
125
  collections = [
@@ -137,9 +129,6 @@ async def brute_force_triplet_search(
137
129
  "DocumentChunk_text",
138
130
  ]
139
131
 
140
- if "EdgeType_relationship_name" not in collections:
141
- collections.append("EdgeType_relationship_name")
142
-
143
132
  try:
144
133
  vector_engine = get_vector_engine()
145
134
  except Exception as e:
@@ -151,7 +140,7 @@ async def brute_force_triplet_search(
151
140
  async def search_in_collection(collection_name: str):
152
141
  try:
153
142
  return await vector_engine.search(
154
- collection_name=collection_name, query_vector=query_vector, limit=wide_search_limit
143
+ collection_name=collection_name, query_vector=query_vector, limit=None
155
144
  )
156
145
  except CollectionNotFoundError:
157
146
  return []
@@ -167,40 +156,19 @@ async def brute_force_triplet_search(
167
156
  return []
168
157
 
169
158
  # Final statistics
170
- vector_collection_search_time = time.time() - start_time
159
+ projection_time = time.time() - start_time
171
160
  logger.info(
172
- f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {vector_collection_search_time:.2f}s"
161
+ f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {projection_time:.2f}s"
173
162
  )
174
163
 
175
164
  node_distances = {collection: result for collection, result in zip(collections, results)}
176
165
 
177
166
  edge_distances = node_distances.get("EdgeType_relationship_name", None)
178
167
 
179
- if wide_search_limit is not None:
180
- relevant_ids_to_filter = list(
181
- {
182
- str(getattr(scored_node, "id"))
183
- for collection_name, score_collection in node_distances.items()
184
- if collection_name != "EdgeType_relationship_name"
185
- and isinstance(score_collection, (list, tuple))
186
- for scored_node in score_collection
187
- if getattr(scored_node, "id", None)
188
- }
189
- )
190
- else:
191
- relevant_ids_to_filter = None
192
-
193
- if memory_fragment is None:
194
- memory_fragment = await get_memory_fragment(
195
- properties_to_project=properties_to_project,
196
- node_type=node_type,
197
- node_name=node_name,
198
- relevant_ids_to_filter=relevant_ids_to_filter,
199
- triplet_distance_penalty=triplet_distance_penalty,
200
- )
201
-
202
168
  await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
203
- await memory_fragment.map_vector_distances_to_graph_edges(edge_distances=edge_distances)
169
+ await memory_fragment.map_vector_distances_to_graph_edges(
170
+ vector_engine=vector_engine, query_vector=query_vector, edge_distances=edge_distances
171
+ )
204
172
 
205
173
  results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
206
174
 
@@ -18,8 +18,6 @@ async def run_custom_pipeline(
18
18
  user: User = None,
19
19
  vector_db_config: Optional[dict] = None,
20
20
  graph_db_config: Optional[dict] = None,
21
- use_pipeline_cache: bool = False,
22
- incremental_loading: bool = False,
23
21
  data_per_batch: int = 20,
24
22
  run_in_background: bool = False,
25
23
  pipeline_name: str = "custom_pipeline",
@@ -42,10 +40,6 @@ async def run_custom_pipeline(
42
40
  user: User context for authentication and data access. Uses default if None.
43
41
  vector_db_config: Custom vector database configuration for embeddings storage.
44
42
  graph_db_config: Custom graph database configuration for relationship storage.
45
- use_pipeline_cache: If True, pipelines with the same ID that are currently executing and pipelines with the same ID that were completed won't process data again.
46
- Pipelines ID is created based on the generate_pipeline_id function. Pipeline status can be manually reset with the reset_dataset_pipeline_run_status function.
47
- incremental_loading: If True, only new or modified data will be processed to avoid duplication. (Only works if data is used with the Cognee python Data model).
48
- The incremental system stores and compares hashes of processed data in the Data model and skips data with the same content hash.
49
43
  data_per_batch: Number of data items to be processed in parallel.
50
44
  run_in_background: If True, starts processing asynchronously and returns immediately.
51
45
  If False, waits for completion before returning.
@@ -69,8 +63,7 @@ async def run_custom_pipeline(
69
63
  datasets=dataset,
70
64
  vector_db_config=vector_db_config,
71
65
  graph_db_config=graph_db_config,
72
- use_pipeline_cache=use_pipeline_cache,
73
- incremental_loading=incremental_loading,
66
+ incremental_loading=False,
74
67
  data_per_batch=data_per_batch,
75
68
  pipeline_name=pipeline_name,
76
69
  )
@@ -2,7 +2,6 @@ import os
2
2
  from typing import Callable, List, Optional, Type
3
3
 
4
4
  from cognee.modules.engine.models.node_set import NodeSet
5
- from cognee.modules.retrieval.triplet_retriever import TripletRetriever
6
5
  from cognee.modules.search.types import SearchType
7
6
  from cognee.modules.search.operations import select_search_type
8
7
  from cognee.modules.search.exceptions import UnsupportedSearchTypeError
@@ -23,6 +22,7 @@ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphComplet
23
22
  from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
24
23
  GraphCompletionContextExtensionRetriever,
25
24
  )
25
+ from cognee.modules.retrieval.code_retriever import CodeRetriever
26
26
  from cognee.modules.retrieval.cypher_search_retriever import CypherSearchRetriever
27
27
  from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageRetriever
28
28
 
@@ -37,8 +37,6 @@ async def get_search_type_tools(
37
37
  node_name: Optional[List[str]] = None,
38
38
  save_interaction: bool = False,
39
39
  last_k: Optional[int] = None,
40
- wide_search_top_k: Optional[int] = 100,
41
- triplet_distance_penalty: Optional[float] = 3.5,
42
40
  ) -> list:
43
41
  search_tasks: dict[SearchType, List[Callable]] = {
44
42
  SearchType.SUMMARIES: [
@@ -61,18 +59,6 @@ async def get_search_type_tools(
61
59
  system_prompt=system_prompt,
62
60
  ).get_context,
63
61
  ],
64
- SearchType.TRIPLET_COMPLETION: [
65
- TripletRetriever(
66
- system_prompt_path=system_prompt_path,
67
- top_k=top_k,
68
- system_prompt=system_prompt,
69
- ).get_completion,
70
- TripletRetriever(
71
- system_prompt_path=system_prompt_path,
72
- top_k=top_k,
73
- system_prompt=system_prompt,
74
- ).get_context,
75
- ],
76
62
  SearchType.GRAPH_COMPLETION: [
77
63
  GraphCompletionRetriever(
78
64
  system_prompt_path=system_prompt_path,
@@ -81,8 +67,6 @@ async def get_search_type_tools(
81
67
  node_name=node_name,
82
68
  save_interaction=save_interaction,
83
69
  system_prompt=system_prompt,
84
- wide_search_top_k=wide_search_top_k,
85
- triplet_distance_penalty=triplet_distance_penalty,
86
70
  ).get_completion,
87
71
  GraphCompletionRetriever(
88
72
  system_prompt_path=system_prompt_path,
@@ -91,8 +75,6 @@ async def get_search_type_tools(
91
75
  node_name=node_name,
92
76
  save_interaction=save_interaction,
93
77
  system_prompt=system_prompt,
94
- wide_search_top_k=wide_search_top_k,
95
- triplet_distance_penalty=triplet_distance_penalty,
96
78
  ).get_context,
97
79
  ],
98
80
  SearchType.GRAPH_COMPLETION_COT: [
@@ -103,8 +85,6 @@ async def get_search_type_tools(
103
85
  node_name=node_name,
104
86
  save_interaction=save_interaction,
105
87
  system_prompt=system_prompt,
106
- wide_search_top_k=wide_search_top_k,
107
- triplet_distance_penalty=triplet_distance_penalty,
108
88
  ).get_completion,
109
89
  GraphCompletionCotRetriever(
110
90
  system_prompt_path=system_prompt_path,
@@ -113,8 +93,6 @@ async def get_search_type_tools(
113
93
  node_name=node_name,
114
94
  save_interaction=save_interaction,
115
95
  system_prompt=system_prompt,
116
- wide_search_top_k=wide_search_top_k,
117
- triplet_distance_penalty=triplet_distance_penalty,
118
96
  ).get_context,
119
97
  ],
120
98
  SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: [
@@ -125,8 +103,6 @@ async def get_search_type_tools(
125
103
  node_name=node_name,
126
104
  save_interaction=save_interaction,
127
105
  system_prompt=system_prompt,
128
- wide_search_top_k=wide_search_top_k,
129
- triplet_distance_penalty=triplet_distance_penalty,
130
106
  ).get_completion,
131
107
  GraphCompletionContextExtensionRetriever(
132
108
  system_prompt_path=system_prompt_path,
@@ -135,8 +111,6 @@ async def get_search_type_tools(
135
111
  node_name=node_name,
136
112
  save_interaction=save_interaction,
137
113
  system_prompt=system_prompt,
138
- wide_search_top_k=wide_search_top_k,
139
- triplet_distance_penalty=triplet_distance_penalty,
140
114
  ).get_context,
141
115
  ],
142
116
  SearchType.GRAPH_SUMMARY_COMPLETION: [
@@ -147,8 +121,6 @@ async def get_search_type_tools(
147
121
  node_name=node_name,
148
122
  save_interaction=save_interaction,
149
123
  system_prompt=system_prompt,
150
- wide_search_top_k=wide_search_top_k,
151
- triplet_distance_penalty=triplet_distance_penalty,
152
124
  ).get_completion,
153
125
  GraphSummaryCompletionRetriever(
154
126
  system_prompt_path=system_prompt_path,
@@ -157,10 +129,12 @@ async def get_search_type_tools(
157
129
  node_name=node_name,
158
130
  save_interaction=save_interaction,
159
131
  system_prompt=system_prompt,
160
- wide_search_top_k=wide_search_top_k,
161
- triplet_distance_penalty=triplet_distance_penalty,
162
132
  ).get_context,
163
133
  ],
134
+ SearchType.CODE: [
135
+ CodeRetriever(top_k=top_k).get_completion,
136
+ CodeRetriever(top_k=top_k).get_context,
137
+ ],
164
138
  SearchType.CYPHER: [
165
139
  CypherSearchRetriever().get_completion,
166
140
  CypherSearchRetriever().get_context,
@@ -171,16 +145,8 @@ async def get_search_type_tools(
171
145
  ],
172
146
  SearchType.FEEDBACK: [UserQAFeedback(last_k=last_k).add_feedback],
173
147
  SearchType.TEMPORAL: [
174
- TemporalRetriever(
175
- top_k=top_k,
176
- wide_search_top_k=wide_search_top_k,
177
- triplet_distance_penalty=triplet_distance_penalty,
178
- ).get_completion,
179
- TemporalRetriever(
180
- top_k=top_k,
181
- wide_search_top_k=wide_search_top_k,
182
- triplet_distance_penalty=triplet_distance_penalty,
183
- ).get_context,
148
+ TemporalRetriever(top_k=top_k).get_completion,
149
+ TemporalRetriever(top_k=top_k).get_context,
184
150
  ],
185
151
  SearchType.CHUNKS_LEXICAL: (
186
152
  lambda _r=JaccardChunksRetriever(top_k=top_k): [
@@ -203,19 +169,7 @@ async def get_search_type_tools(
203
169
  ):
204
170
  raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
205
171
 
206
- from cognee.modules.retrieval.registered_community_retrievers import (
207
- registered_community_retrievers,
208
- )
209
-
210
- if query_type in registered_community_retrievers:
211
- retriever = registered_community_retrievers[query_type]
212
- retriever_instance = retriever(top_k=top_k)
213
- search_type_tools = [
214
- retriever_instance.get_completion,
215
- retriever_instance.get_context,
216
- ]
217
- else:
218
- search_type_tools = search_tasks.get(query_type)
172
+ search_type_tools = search_tasks.get(query_type)
219
173
 
220
174
  if not search_type_tools:
221
175
  raise UnsupportedSearchTypeError(str(query_type))
@@ -24,8 +24,6 @@ async def no_access_control_search(
24
24
  last_k: Optional[int] = None,
25
25
  only_context: bool = False,
26
26
  session_id: Optional[str] = None,
27
- wide_search_top_k: Optional[int] = 100,
28
- triplet_distance_penalty: Optional[float] = 3.5,
29
27
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
30
28
  search_tools = await get_search_type_tools(
31
29
  query_type=query_type,
@@ -37,8 +35,6 @@ async def no_access_control_search(
37
35
  node_name=node_name,
38
36
  save_interaction=save_interaction,
39
37
  last_k=last_k,
40
- wide_search_top_k=wide_search_top_k,
41
- triplet_distance_penalty=triplet_distance_penalty,
42
38
  )
43
39
  graph_engine = await get_graph_engine()
44
40
  is_empty = await graph_engine.is_empty()
@@ -47,8 +47,6 @@ async def search(
47
47
  only_context: bool = False,
48
48
  use_combined_context: bool = False,
49
49
  session_id: Optional[str] = None,
50
- wide_search_top_k: Optional[int] = 100,
51
- triplet_distance_penalty: Optional[float] = 3.5,
52
50
  ) -> Union[CombinedSearchResult, List[SearchResult]]:
53
51
  """
54
52
 
@@ -92,8 +90,6 @@ async def search(
92
90
  only_context=only_context,
93
91
  use_combined_context=use_combined_context,
94
92
  session_id=session_id,
95
- wide_search_top_k=wide_search_top_k,
96
- triplet_distance_penalty=triplet_distance_penalty,
97
93
  )
98
94
  else:
99
95
  search_results = [
@@ -109,8 +105,6 @@ async def search(
109
105
  last_k=last_k,
110
106
  only_context=only_context,
111
107
  session_id=session_id,
112
- wide_search_top_k=wide_search_top_k,
113
- triplet_distance_penalty=triplet_distance_penalty,
114
108
  )
115
109
  ]
116
110
 
@@ -225,8 +219,6 @@ async def authorized_search(
225
219
  only_context: bool = False,
226
220
  use_combined_context: bool = False,
227
221
  session_id: Optional[str] = None,
228
- wide_search_top_k: Optional[int] = 100,
229
- triplet_distance_penalty: Optional[float] = 3.5,
230
222
  ) -> Union[
231
223
  Tuple[Any, Union[List[Edge], str], List[Dataset]],
232
224
  List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
@@ -254,8 +246,6 @@ async def authorized_search(
254
246
  last_k=last_k,
255
247
  only_context=True,
256
248
  session_id=session_id,
257
- wide_search_top_k=wide_search_top_k,
258
- triplet_distance_penalty=triplet_distance_penalty,
259
249
  )
260
250
 
261
251
  context = {}
@@ -277,8 +267,6 @@ async def authorized_search(
277
267
  node_name=node_name,
278
268
  save_interaction=save_interaction,
279
269
  last_k=last_k,
280
- wide_search_top_k=wide_search_top_k,
281
- triplet_distance_penalty=triplet_distance_penalty,
282
270
  )
283
271
  search_tools = specific_search_tools
284
272
  if len(search_tools) == 2:
@@ -318,7 +306,6 @@ async def authorized_search(
318
306
  last_k=last_k,
319
307
  only_context=only_context,
320
308
  session_id=session_id,
321
- wide_search_top_k=wide_search_top_k,
322
309
  )
323
310
 
324
311
  return search_results
@@ -338,8 +325,6 @@ async def search_in_datasets_context(
338
325
  only_context: bool = False,
339
326
  context: Optional[Any] = None,
340
327
  session_id: Optional[str] = None,
341
- wide_search_top_k: Optional[int] = 100,
342
- triplet_distance_penalty: Optional[float] = 3.5,
343
328
  ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
344
329
  """
345
330
  Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@@ -360,8 +345,6 @@ async def search_in_datasets_context(
360
345
  only_context: bool = False,
361
346
  context: Optional[Any] = None,
362
347
  session_id: Optional[str] = None,
363
- wide_search_top_k: Optional[int] = 100,
364
- triplet_distance_penalty: Optional[float] = 3.5,
365
348
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
366
349
  # Set database configuration in async context for each dataset user has access for
367
350
  await set_database_global_context_variables(dataset.id, dataset.owner_id)
@@ -395,8 +378,6 @@ async def search_in_datasets_context(
395
378
  node_name=node_name,
396
379
  save_interaction=save_interaction,
397
380
  last_k=last_k,
398
- wide_search_top_k=wide_search_top_k,
399
- triplet_distance_penalty=triplet_distance_penalty,
400
381
  )
401
382
  search_tools = specific_search_tools
402
383
  if len(search_tools) == 2:
@@ -432,8 +413,6 @@ async def search_in_datasets_context(
432
413
  only_context=only_context,
433
414
  context=context,
434
415
  session_id=session_id,
435
- wide_search_top_k=wide_search_top_k,
436
- triplet_distance_penalty=triplet_distance_penalty,
437
416
  )
438
417
  )
439
418
 
@@ -5,9 +5,9 @@ class SearchType(Enum):
5
5
  SUMMARIES = "SUMMARIES"
6
6
  CHUNKS = "CHUNKS"
7
7
  RAG_COMPLETION = "RAG_COMPLETION"
8
- TRIPLET_COMPLETION = "TRIPLET_COMPLETION"
9
8
  GRAPH_COMPLETION = "GRAPH_COMPLETION"
10
9
  GRAPH_SUMMARY_COMPLETION = "GRAPH_SUMMARY_COMPLETION"
10
+ CODE = "CODE"
11
11
  CYPHER = "CYPHER"
12
12
  NATURAL_LANGUAGE = "NATURAL_LANGUAGE"
13
13
  GRAPH_COMPLETION_COT = "GRAPH_COMPLETION_COT"
@@ -16,7 +16,6 @@ class ModelName(Enum):
16
16
  anthropic = "anthropic"
17
17
  gemini = "gemini"
18
18
  mistral = "mistral"
19
- bedrock = "bedrock"
20
19
 
21
20
 
22
21
  class LLMConfig(BaseModel):
@@ -78,10 +77,6 @@ def get_settings() -> SettingsDict:
78
77
  "value": "mistral",
79
78
  "label": "Mistral",
80
79
  },
81
- {
82
- "value": "bedrock",
83
- "label": "Bedrock",
84
- },
85
80
  ]
86
81
 
87
82
  return SettingsDict.model_validate(
@@ -162,20 +157,6 @@ def get_settings() -> SettingsDict:
162
157
  "label": "Mistral Large 2.1",
163
158
  },
164
159
  ],
165
- "bedrock": [
166
- {
167
- "value": "eu.anthropic.claude-sonnet-4-5-20250929-v1:0",
168
- "label": "Claude 4.5 Sonnet",
169
- },
170
- {
171
- "value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
172
- "label": "Claude 4.5 Haiku",
173
- },
174
- {
175
- "value": "eu.amazon.nova-lite-v1:0",
176
- "label": "Amazon Nova Lite",
177
- },
178
- ],
179
160
  },
180
161
  },
181
162
  vector_db={
@@ -12,8 +12,8 @@ logger = get_logger("get_authenticated_user")
12
12
 
13
13
  # Check environment variable to determine authentication requirement
14
14
  REQUIRE_AUTHENTICATION = (
15
- os.getenv("REQUIRE_AUTHENTICATION", "true").lower() == "true"
16
- or os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", "true").lower() == "true"
15
+ os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
16
+ or backend_access_control_enabled()
17
17
  )
18
18
 
19
19
  fastapi_users = get_fastapi_users()
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime, timezone
2
2
 
3
- from sqlalchemy import Column, DateTime, String, UUID, ForeignKey, JSON, text
3
+ from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
4
4
  from cognee.infrastructure.databases.relational import Base
5
5
 
6
6
 
@@ -12,29 +12,17 @@ class DatasetDatabase(Base):
12
12
  UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
13
13
  )
14
14
 
15
- vector_database_name = Column(String, unique=False, nullable=False)
16
- graph_database_name = Column(String, unique=False, nullable=False)
15
+ vector_database_name = Column(String, unique=True, nullable=False)
16
+ graph_database_name = Column(String, unique=True, nullable=False)
17
17
 
18
18
  vector_database_provider = Column(String, unique=False, nullable=False)
19
19
  graph_database_provider = Column(String, unique=False, nullable=False)
20
20
 
21
- graph_dataset_database_handler = Column(String, unique=False, nullable=False)
22
- vector_dataset_database_handler = Column(String, unique=False, nullable=False)
23
-
24
21
  vector_database_url = Column(String, unique=False, nullable=True)
25
22
  graph_database_url = Column(String, unique=False, nullable=True)
26
23
 
27
24
  vector_database_key = Column(String, unique=False, nullable=True)
28
25
  graph_database_key = Column(String, unique=False, nullable=True)
29
26
 
30
- # configuration details for different database types. This would make it more flexible to add new database types
31
- # without changing the database schema.
32
- graph_database_connection_info = Column(
33
- JSON, unique=False, nullable=False, server_default=text("'{}'")
34
- )
35
- vector_database_connection_info = Column(
36
- JSON, unique=False, nullable=False, server_default=text("'{}'")
37
- )
38
-
39
27
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
40
28
  updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
@@ -534,10 +534,6 @@ def setup_logging(log_level=None, name=None):
534
534
  # Get a configured logger and log system information
535
535
  logger = structlog.get_logger(name if name else __name__)
536
536
 
537
- logger.warning(
538
- "From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation."
539
- )
540
-
541
537
  if logs_dir is not None:
542
538
  logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
543
539
 
@@ -0,0 +1,35 @@
1
+ import os
2
+ import asyncio
3
+ import argparse
4
+ from cognee.tasks.repo_processor.get_repo_file_dependencies import get_repo_file_dependencies
5
+ from cognee.tasks.repo_processor.enrich_dependency_graph import enrich_dependency_graph
6
+
7
+
8
+ def main():
9
+ """
10
+ Execute the main logic of the dependency graph processor.
11
+
12
+ This function sets up argument parsing to retrieve the repository path, checks the
13
+ existence of the specified path, and processes the repository to produce a dependency
14
+ graph. If the repository path does not exist, it logs an error message and terminates
15
+ without further execution.
16
+ """
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument("repo_path", help="Path to the repository")
19
+ args = parser.parse_args()
20
+
21
+ repo_path = args.repo_path
22
+ if not os.path.exists(repo_path):
23
+ print(f"Error: The provided repository path does not exist: {repo_path}")
24
+ return
25
+
26
+ graph = asyncio.run(get_repo_file_dependencies(repo_path))
27
+ graph = asyncio.run(enrich_dependency_graph(graph))
28
+ for node in graph.nodes:
29
+ print(f"Node: {node}")
30
+ for _, target, data in graph.out_edges(node, data=True):
31
+ print(f" Edge to {target}, data: {data}")
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()