cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -58,6 +58,8 @@ async def get_memory_fragment(
58
58
  properties_to_project: Optional[List[str]] = None,
59
59
  node_type: Optional[Type] = None,
60
60
  node_name: Optional[List[str]] = None,
61
+ relevant_ids_to_filter: Optional[List[str]] = None,
62
+ triplet_distance_penalty: Optional[float] = 3.5,
61
63
  ) -> CogneeGraph:
62
64
  """Creates and initializes a CogneeGraph memory fragment with optional property projections."""
63
65
  if properties_to_project is None:
@@ -71,9 +73,11 @@ async def get_memory_fragment(
71
73
  await memory_fragment.project_graph_from_db(
72
74
  graph_engine,
73
75
  node_properties_to_project=properties_to_project,
74
- edge_properties_to_project=["relationship_name"],
76
+ edge_properties_to_project=["relationship_name", "edge_text"],
75
77
  node_type=node_type,
76
78
  node_name=node_name,
79
+ relevant_ids_to_filter=relevant_ids_to_filter,
80
+ triplet_distance_penalty=triplet_distance_penalty,
77
81
  )
78
82
 
79
83
  except EntityNotFoundError:
@@ -95,6 +99,8 @@ async def brute_force_triplet_search(
95
99
  memory_fragment: Optional[CogneeGraph] = None,
96
100
  node_type: Optional[Type] = None,
97
101
  node_name: Optional[List[str]] = None,
102
+ wide_search_top_k: Optional[int] = 100,
103
+ triplet_distance_penalty: Optional[float] = 3.5,
98
104
  ) -> List[Edge]:
99
105
  """
100
106
  Performs a brute force search to retrieve the top triplets from the graph.
@@ -107,6 +113,8 @@ async def brute_force_triplet_search(
107
113
  memory_fragment (Optional[CogneeGraph]): Existing memory fragment to reuse.
108
114
  node_type: node type to filter
109
115
  node_name: node name to filter
116
+ wide_search_top_k (Optional[int]): Number of initial elements to retrieve from collections
117
+ triplet_distance_penalty (Optional[float]): Default distance penalty in graph projection
110
118
 
111
119
  Returns:
112
120
  list: The top triplet results.
@@ -116,10 +124,10 @@ async def brute_force_triplet_search(
116
124
  if top_k <= 0:
117
125
  raise ValueError("top_k must be a positive integer.")
118
126
 
119
- if memory_fragment is None:
120
- memory_fragment = await get_memory_fragment(
121
- properties_to_project, node_type=node_type, node_name=node_name
122
- )
127
+ # Setting wide search limit based on the parameters
128
+ non_global_search = node_name is None
129
+
130
+ wide_search_limit = wide_search_top_k if non_global_search else None
123
131
 
124
132
  if collections is None:
125
133
  collections = [
@@ -129,6 +137,9 @@ async def brute_force_triplet_search(
129
137
  "DocumentChunk_text",
130
138
  ]
131
139
 
140
+ if "EdgeType_relationship_name" not in collections:
141
+ collections.append("EdgeType_relationship_name")
142
+
132
143
  try:
133
144
  vector_engine = get_vector_engine()
134
145
  except Exception as e:
@@ -140,7 +151,7 @@ async def brute_force_triplet_search(
140
151
  async def search_in_collection(collection_name: str):
141
152
  try:
142
153
  return await vector_engine.search(
143
- collection_name=collection_name, query_vector=query_vector, limit=None
154
+ collection_name=collection_name, query_vector=query_vector, limit=wide_search_limit
144
155
  )
145
156
  except CollectionNotFoundError:
146
157
  return []
@@ -156,19 +167,40 @@ async def brute_force_triplet_search(
156
167
  return []
157
168
 
158
169
  # Final statistics
159
- projection_time = time.time() - start_time
170
+ vector_collection_search_time = time.time() - start_time
160
171
  logger.info(
161
- f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {projection_time:.2f}s"
172
+ f"Vector collection retrieval completed: Retrieved distances from {sum(1 for res in results if res)} collections in {vector_collection_search_time:.2f}s"
162
173
  )
163
174
 
164
175
  node_distances = {collection: result for collection, result in zip(collections, results)}
165
176
 
166
177
  edge_distances = node_distances.get("EdgeType_relationship_name", None)
167
178
 
179
+ if wide_search_limit is not None:
180
+ relevant_ids_to_filter = list(
181
+ {
182
+ str(getattr(scored_node, "id"))
183
+ for collection_name, score_collection in node_distances.items()
184
+ if collection_name != "EdgeType_relationship_name"
185
+ and isinstance(score_collection, (list, tuple))
186
+ for scored_node in score_collection
187
+ if getattr(scored_node, "id", None)
188
+ }
189
+ )
190
+ else:
191
+ relevant_ids_to_filter = None
192
+
193
+ if memory_fragment is None:
194
+ memory_fragment = await get_memory_fragment(
195
+ properties_to_project=properties_to_project,
196
+ node_type=node_type,
197
+ node_name=node_name,
198
+ relevant_ids_to_filter=relevant_ids_to_filter,
199
+ triplet_distance_penalty=triplet_distance_penalty,
200
+ )
201
+
168
202
  await memory_fragment.map_vector_distances_to_graph_nodes(node_distances=node_distances)
169
- await memory_fragment.map_vector_distances_to_graph_edges(
170
- vector_engine=vector_engine, query_vector=query_vector, edge_distances=edge_distances
171
- )
203
+ await memory_fragment.map_vector_distances_to_graph_edges(edge_distances=edge_distances)
172
204
 
173
205
  results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
174
206
 
@@ -3,7 +3,7 @@ from cognee.infrastructure.llm.LLMGateway import LLMGateway
3
3
  from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
4
4
 
5
5
 
6
- async def generate_structured_completion(
6
+ async def generate_completion(
7
7
  query: str,
8
8
  context: str,
9
9
  user_prompt_path: str,
@@ -12,7 +12,7 @@ async def generate_structured_completion(
12
12
  conversation_history: Optional[str] = None,
13
13
  response_model: Type = str,
14
14
  ) -> Any:
15
- """Generates a structured completion using LLM with given context and prompts."""
15
+ """Generates a completion using LLM with given context and prompts."""
16
16
  args = {"question": query, "context": context}
17
17
  user_prompt = render_prompt(user_prompt_path, args)
18
18
  system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
@@ -28,26 +28,6 @@ async def generate_structured_completion(
28
28
  )
29
29
 
30
30
 
31
- async def generate_completion(
32
- query: str,
33
- context: str,
34
- user_prompt_path: str,
35
- system_prompt_path: str,
36
- system_prompt: Optional[str] = None,
37
- conversation_history: Optional[str] = None,
38
- ) -> str:
39
- """Generates a completion using LLM with given context and prompts."""
40
- return await generate_structured_completion(
41
- query=query,
42
- context=context,
43
- user_prompt_path=user_prompt_path,
44
- system_prompt_path=system_prompt_path,
45
- system_prompt=system_prompt,
46
- conversation_history=conversation_history,
47
- response_model=str,
48
- )
49
-
50
-
51
31
  async def summarize_text(
52
32
  text: str,
53
33
  system_prompt_path: str = "summarize_search_results.txt",
@@ -0,0 +1 @@
1
+ from .run_custom_pipeline import run_custom_pipeline
@@ -0,0 +1,76 @@
1
+ from typing import Union, Optional, List, Type, Any
2
+ from uuid import UUID
3
+
4
+ from cognee.shared.logging_utils import get_logger
5
+
6
+ from cognee.modules.pipelines import run_pipeline
7
+ from cognee.modules.pipelines.tasks.task import Task
8
+ from cognee.modules.users.models import User
9
+ from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
10
+
11
+ logger = get_logger()
12
+
13
+
14
+ async def run_custom_pipeline(
15
+ tasks: Union[List[Task], List[str]] = None,
16
+ data: Any = None,
17
+ dataset: Union[str, UUID] = "main_dataset",
18
+ user: User = None,
19
+ vector_db_config: Optional[dict] = None,
20
+ graph_db_config: Optional[dict] = None,
21
+ use_pipeline_cache: bool = False,
22
+ incremental_loading: bool = False,
23
+ data_per_batch: int = 20,
24
+ run_in_background: bool = False,
25
+ pipeline_name: str = "custom_pipeline",
26
+ ):
27
+ """
28
+ Custom pipeline in Cognee, can work with already built graphs. Data needs to be provided which can be processed
29
+ with provided tasks.
30
+
31
+ Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
32
+
33
+ This is the core processing step in Cognee that converts raw text and documents
34
+ into an intelligent knowledge graph. It analyzes content, extracts entities and
35
+ relationships, and creates semantic connections for enhanced search and reasoning.
36
+
37
+ Args:
38
+ tasks: List of Cognee Tasks to execute.
39
+ data: The data to ingest. Can be anything when custom extraction and enrichment tasks are used.
40
+ Data provided here will be forwarded to the first extraction task in the pipeline as input.
41
+ dataset: Dataset name or dataset uuid to process.
42
+ user: User context for authentication and data access. Uses default if None.
43
+ vector_db_config: Custom vector database configuration for embeddings storage.
44
+ graph_db_config: Custom graph database configuration for relationship storage.
45
+ use_pipeline_cache: If True, pipelines with the same ID that are currently executing and pipelines with the same ID that were completed won't process data again.
46
+ Pipelines ID is created based on the generate_pipeline_id function. Pipeline status can be manually reset with the reset_dataset_pipeline_run_status function.
47
+ incremental_loading: If True, only new or modified data will be processed to avoid duplication. (Only works if data is used with the Cognee python Data model).
48
+ The incremental system stores and compares hashes of processed data in the Data model and skips data with the same content hash.
49
+ data_per_batch: Number of data items to be processed in parallel.
50
+ run_in_background: If True, starts processing asynchronously and returns immediately.
51
+ If False, waits for completion before returning.
52
+ Background mode recommended for large datasets (>100MB).
53
+ Use pipeline_run_id from return value to monitor progress.
54
+ """
55
+
56
+ custom_tasks = [
57
+ *tasks,
58
+ ]
59
+
60
+ # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
61
+ pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
62
+
63
+ # Run the run_pipeline in the background or blocking based on executor
64
+ return await pipeline_executor_func(
65
+ pipeline=run_pipeline,
66
+ tasks=custom_tasks,
67
+ user=user,
68
+ data=data,
69
+ datasets=dataset,
70
+ vector_db_config=vector_db_config,
71
+ graph_db_config=graph_db_config,
72
+ use_pipeline_cache=use_pipeline_cache,
73
+ incremental_loading=incremental_loading,
74
+ data_per_batch=data_per_batch,
75
+ pipeline_name=pipeline_name,
76
+ )
@@ -2,6 +2,7 @@ import os
2
2
  from typing import Callable, List, Optional, Type
3
3
 
4
4
  from cognee.modules.engine.models.node_set import NodeSet
5
+ from cognee.modules.retrieval.triplet_retriever import TripletRetriever
5
6
  from cognee.modules.search.types import SearchType
6
7
  from cognee.modules.search.operations import select_search_type
7
8
  from cognee.modules.search.exceptions import UnsupportedSearchTypeError
@@ -22,7 +23,6 @@ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphComplet
22
23
  from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
23
24
  GraphCompletionContextExtensionRetriever,
24
25
  )
25
- from cognee.modules.retrieval.code_retriever import CodeRetriever
26
26
  from cognee.modules.retrieval.cypher_search_retriever import CypherSearchRetriever
27
27
  from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageRetriever
28
28
 
@@ -37,6 +37,8 @@ async def get_search_type_tools(
37
37
  node_name: Optional[List[str]] = None,
38
38
  save_interaction: bool = False,
39
39
  last_k: Optional[int] = None,
40
+ wide_search_top_k: Optional[int] = 100,
41
+ triplet_distance_penalty: Optional[float] = 3.5,
40
42
  ) -> list:
41
43
  search_tasks: dict[SearchType, List[Callable]] = {
42
44
  SearchType.SUMMARIES: [
@@ -59,6 +61,18 @@ async def get_search_type_tools(
59
61
  system_prompt=system_prompt,
60
62
  ).get_context,
61
63
  ],
64
+ SearchType.TRIPLET_COMPLETION: [
65
+ TripletRetriever(
66
+ system_prompt_path=system_prompt_path,
67
+ top_k=top_k,
68
+ system_prompt=system_prompt,
69
+ ).get_completion,
70
+ TripletRetriever(
71
+ system_prompt_path=system_prompt_path,
72
+ top_k=top_k,
73
+ system_prompt=system_prompt,
74
+ ).get_context,
75
+ ],
62
76
  SearchType.GRAPH_COMPLETION: [
63
77
  GraphCompletionRetriever(
64
78
  system_prompt_path=system_prompt_path,
@@ -67,6 +81,8 @@ async def get_search_type_tools(
67
81
  node_name=node_name,
68
82
  save_interaction=save_interaction,
69
83
  system_prompt=system_prompt,
84
+ wide_search_top_k=wide_search_top_k,
85
+ triplet_distance_penalty=triplet_distance_penalty,
70
86
  ).get_completion,
71
87
  GraphCompletionRetriever(
72
88
  system_prompt_path=system_prompt_path,
@@ -75,6 +91,8 @@ async def get_search_type_tools(
75
91
  node_name=node_name,
76
92
  save_interaction=save_interaction,
77
93
  system_prompt=system_prompt,
94
+ wide_search_top_k=wide_search_top_k,
95
+ triplet_distance_penalty=triplet_distance_penalty,
78
96
  ).get_context,
79
97
  ],
80
98
  SearchType.GRAPH_COMPLETION_COT: [
@@ -85,6 +103,8 @@ async def get_search_type_tools(
85
103
  node_name=node_name,
86
104
  save_interaction=save_interaction,
87
105
  system_prompt=system_prompt,
106
+ wide_search_top_k=wide_search_top_k,
107
+ triplet_distance_penalty=triplet_distance_penalty,
88
108
  ).get_completion,
89
109
  GraphCompletionCotRetriever(
90
110
  system_prompt_path=system_prompt_path,
@@ -93,6 +113,8 @@ async def get_search_type_tools(
93
113
  node_name=node_name,
94
114
  save_interaction=save_interaction,
95
115
  system_prompt=system_prompt,
116
+ wide_search_top_k=wide_search_top_k,
117
+ triplet_distance_penalty=triplet_distance_penalty,
96
118
  ).get_context,
97
119
  ],
98
120
  SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION: [
@@ -103,6 +125,8 @@ async def get_search_type_tools(
103
125
  node_name=node_name,
104
126
  save_interaction=save_interaction,
105
127
  system_prompt=system_prompt,
128
+ wide_search_top_k=wide_search_top_k,
129
+ triplet_distance_penalty=triplet_distance_penalty,
106
130
  ).get_completion,
107
131
  GraphCompletionContextExtensionRetriever(
108
132
  system_prompt_path=system_prompt_path,
@@ -111,6 +135,8 @@ async def get_search_type_tools(
111
135
  node_name=node_name,
112
136
  save_interaction=save_interaction,
113
137
  system_prompt=system_prompt,
138
+ wide_search_top_k=wide_search_top_k,
139
+ triplet_distance_penalty=triplet_distance_penalty,
114
140
  ).get_context,
115
141
  ],
116
142
  SearchType.GRAPH_SUMMARY_COMPLETION: [
@@ -121,6 +147,8 @@ async def get_search_type_tools(
121
147
  node_name=node_name,
122
148
  save_interaction=save_interaction,
123
149
  system_prompt=system_prompt,
150
+ wide_search_top_k=wide_search_top_k,
151
+ triplet_distance_penalty=triplet_distance_penalty,
124
152
  ).get_completion,
125
153
  GraphSummaryCompletionRetriever(
126
154
  system_prompt_path=system_prompt_path,
@@ -129,12 +157,10 @@ async def get_search_type_tools(
129
157
  node_name=node_name,
130
158
  save_interaction=save_interaction,
131
159
  system_prompt=system_prompt,
160
+ wide_search_top_k=wide_search_top_k,
161
+ triplet_distance_penalty=triplet_distance_penalty,
132
162
  ).get_context,
133
163
  ],
134
- SearchType.CODE: [
135
- CodeRetriever(top_k=top_k).get_completion,
136
- CodeRetriever(top_k=top_k).get_context,
137
- ],
138
164
  SearchType.CYPHER: [
139
165
  CypherSearchRetriever().get_completion,
140
166
  CypherSearchRetriever().get_context,
@@ -145,8 +171,16 @@ async def get_search_type_tools(
145
171
  ],
146
172
  SearchType.FEEDBACK: [UserQAFeedback(last_k=last_k).add_feedback],
147
173
  SearchType.TEMPORAL: [
148
- TemporalRetriever(top_k=top_k).get_completion,
149
- TemporalRetriever(top_k=top_k).get_context,
174
+ TemporalRetriever(
175
+ top_k=top_k,
176
+ wide_search_top_k=wide_search_top_k,
177
+ triplet_distance_penalty=triplet_distance_penalty,
178
+ ).get_completion,
179
+ TemporalRetriever(
180
+ top_k=top_k,
181
+ wide_search_top_k=wide_search_top_k,
182
+ triplet_distance_penalty=triplet_distance_penalty,
183
+ ).get_context,
150
184
  ],
151
185
  SearchType.CHUNKS_LEXICAL: (
152
186
  lambda _r=JaccardChunksRetriever(top_k=top_k): [
@@ -169,7 +203,19 @@ async def get_search_type_tools(
169
203
  ):
170
204
  raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
171
205
 
172
- search_type_tools = search_tasks.get(query_type)
206
+ from cognee.modules.retrieval.registered_community_retrievers import (
207
+ registered_community_retrievers,
208
+ )
209
+
210
+ if query_type in registered_community_retrievers:
211
+ retriever = registered_community_retrievers[query_type]
212
+ retriever_instance = retriever(top_k=top_k)
213
+ search_type_tools = [
214
+ retriever_instance.get_completion,
215
+ retriever_instance.get_context,
216
+ ]
217
+ else:
218
+ search_type_tools = search_tasks.get(query_type)
173
219
 
174
220
  if not search_type_tools:
175
221
  raise UnsupportedSearchTypeError(str(query_type))
@@ -24,6 +24,8 @@ async def no_access_control_search(
24
24
  last_k: Optional[int] = None,
25
25
  only_context: bool = False,
26
26
  session_id: Optional[str] = None,
27
+ wide_search_top_k: Optional[int] = 100,
28
+ triplet_distance_penalty: Optional[float] = 3.5,
27
29
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
28
30
  search_tools = await get_search_type_tools(
29
31
  query_type=query_type,
@@ -35,6 +37,8 @@ async def no_access_control_search(
35
37
  node_name=node_name,
36
38
  save_interaction=save_interaction,
37
39
  last_k=last_k,
40
+ wide_search_top_k=wide_search_top_k,
41
+ triplet_distance_penalty=triplet_distance_penalty,
38
42
  )
39
43
  graph_engine = await get_graph_engine()
40
44
  is_empty = await graph_engine.is_empty()
@@ -1,4 +1,3 @@
1
- import os
2
1
  import json
3
2
  import asyncio
4
3
  from uuid import UUID
@@ -9,6 +8,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
9
8
  from cognee.shared.logging_utils import get_logger
10
9
  from cognee.shared.utils import send_telemetry
11
10
  from cognee.context_global_variables import set_database_global_context_variables
11
+ from cognee.context_global_variables import backend_access_control_enabled
12
12
 
13
13
  from cognee.modules.engine.models.node_set import NodeSet
14
14
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
@@ -47,6 +47,8 @@ async def search(
47
47
  only_context: bool = False,
48
48
  use_combined_context: bool = False,
49
49
  session_id: Optional[str] = None,
50
+ wide_search_top_k: Optional[int] = 100,
51
+ triplet_distance_penalty: Optional[float] = 3.5,
50
52
  ) -> Union[CombinedSearchResult, List[SearchResult]]:
51
53
  """
52
54
 
@@ -74,7 +76,7 @@ async def search(
74
76
  )
75
77
 
76
78
  # Use search function filtered by permissions if access control is enabled
77
- if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
79
+ if backend_access_control_enabled():
78
80
  search_results = await authorized_search(
79
81
  query_type=query_type,
80
82
  query_text=query_text,
@@ -90,6 +92,8 @@ async def search(
90
92
  only_context=only_context,
91
93
  use_combined_context=use_combined_context,
92
94
  session_id=session_id,
95
+ wide_search_top_k=wide_search_top_k,
96
+ triplet_distance_penalty=triplet_distance_penalty,
93
97
  )
94
98
  else:
95
99
  search_results = [
@@ -105,6 +109,8 @@ async def search(
105
109
  last_k=last_k,
106
110
  only_context=only_context,
107
111
  session_id=session_id,
112
+ wide_search_top_k=wide_search_top_k,
113
+ triplet_distance_penalty=triplet_distance_penalty,
108
114
  )
109
115
  ]
110
116
 
@@ -156,7 +162,7 @@ async def search(
156
162
  )
157
163
  else:
158
164
  # This is for maintaining backwards compatibility
159
- if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
165
+ if backend_access_control_enabled():
160
166
  return_value = []
161
167
  for search_result in search_results:
162
168
  prepared_search_results = await prepare_search_result(search_result)
@@ -172,6 +178,7 @@ async def search(
172
178
  "search_result": [context] if context else None,
173
179
  "dataset_id": datasets[0].id,
174
180
  "dataset_name": datasets[0].name,
181
+ "dataset_tenant_id": datasets[0].tenant_id,
175
182
  "graphs": graphs,
176
183
  }
177
184
  )
@@ -181,6 +188,7 @@ async def search(
181
188
  "search_result": [result] if result else None,
182
189
  "dataset_id": datasets[0].id,
183
190
  "dataset_name": datasets[0].name,
191
+ "dataset_tenant_id": datasets[0].tenant_id,
184
192
  "graphs": graphs,
185
193
  }
186
194
  )
@@ -217,6 +225,8 @@ async def authorized_search(
217
225
  only_context: bool = False,
218
226
  use_combined_context: bool = False,
219
227
  session_id: Optional[str] = None,
228
+ wide_search_top_k: Optional[int] = 100,
229
+ triplet_distance_penalty: Optional[float] = 3.5,
220
230
  ) -> Union[
221
231
  Tuple[Any, Union[List[Edge], str], List[Dataset]],
222
232
  List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
@@ -244,6 +254,8 @@ async def authorized_search(
244
254
  last_k=last_k,
245
255
  only_context=True,
246
256
  session_id=session_id,
257
+ wide_search_top_k=wide_search_top_k,
258
+ triplet_distance_penalty=triplet_distance_penalty,
247
259
  )
248
260
 
249
261
  context = {}
@@ -265,6 +277,8 @@ async def authorized_search(
265
277
  node_name=node_name,
266
278
  save_interaction=save_interaction,
267
279
  last_k=last_k,
280
+ wide_search_top_k=wide_search_top_k,
281
+ triplet_distance_penalty=triplet_distance_penalty,
268
282
  )
269
283
  search_tools = specific_search_tools
270
284
  if len(search_tools) == 2:
@@ -304,6 +318,7 @@ async def authorized_search(
304
318
  last_k=last_k,
305
319
  only_context=only_context,
306
320
  session_id=session_id,
321
+ wide_search_top_k=wide_search_top_k,
307
322
  )
308
323
 
309
324
  return search_results
@@ -323,6 +338,8 @@ async def search_in_datasets_context(
323
338
  only_context: bool = False,
324
339
  context: Optional[Any] = None,
325
340
  session_id: Optional[str] = None,
341
+ wide_search_top_k: Optional[int] = 100,
342
+ triplet_distance_penalty: Optional[float] = 3.5,
326
343
  ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
327
344
  """
328
345
  Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@@ -343,6 +360,8 @@ async def search_in_datasets_context(
343
360
  only_context: bool = False,
344
361
  context: Optional[Any] = None,
345
362
  session_id: Optional[str] = None,
363
+ wide_search_top_k: Optional[int] = 100,
364
+ triplet_distance_penalty: Optional[float] = 3.5,
346
365
  ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
347
366
  # Set database configuration in async context for each dataset user has access for
348
367
  await set_database_global_context_variables(dataset.id, dataset.owner_id)
@@ -376,6 +395,8 @@ async def search_in_datasets_context(
376
395
  node_name=node_name,
377
396
  save_interaction=save_interaction,
378
397
  last_k=last_k,
398
+ wide_search_top_k=wide_search_top_k,
399
+ triplet_distance_penalty=triplet_distance_penalty,
379
400
  )
380
401
  search_tools = specific_search_tools
381
402
  if len(search_tools) == 2:
@@ -411,6 +432,8 @@ async def search_in_datasets_context(
411
432
  only_context=only_context,
412
433
  context=context,
413
434
  session_id=session_id,
435
+ wide_search_top_k=wide_search_top_k,
436
+ triplet_distance_penalty=triplet_distance_penalty,
414
437
  )
415
438
  )
416
439
 
@@ -5,9 +5,9 @@ class SearchType(Enum):
5
5
  SUMMARIES = "SUMMARIES"
6
6
  CHUNKS = "CHUNKS"
7
7
  RAG_COMPLETION = "RAG_COMPLETION"
8
+ TRIPLET_COMPLETION = "TRIPLET_COMPLETION"
8
9
  GRAPH_COMPLETION = "GRAPH_COMPLETION"
9
10
  GRAPH_SUMMARY_COMPLETION = "GRAPH_SUMMARY_COMPLETION"
10
- CODE = "CODE"
11
11
  CYPHER = "CYPHER"
12
12
  NATURAL_LANGUAGE = "NATURAL_LANGUAGE"
13
13
  GRAPH_COMPLETION_COT = "GRAPH_COMPLETION_COT"
@@ -16,6 +16,7 @@ class ModelName(Enum):
16
16
  anthropic = "anthropic"
17
17
  gemini = "gemini"
18
18
  mistral = "mistral"
19
+ bedrock = "bedrock"
19
20
 
20
21
 
21
22
  class LLMConfig(BaseModel):
@@ -77,6 +78,10 @@ def get_settings() -> SettingsDict:
77
78
  "value": "mistral",
78
79
  "label": "Mistral",
79
80
  },
81
+ {
82
+ "value": "bedrock",
83
+ "label": "Bedrock",
84
+ },
80
85
  ]
81
86
 
82
87
  return SettingsDict.model_validate(
@@ -157,6 +162,20 @@ def get_settings() -> SettingsDict:
157
162
  "label": "Mistral Large 2.1",
158
163
  },
159
164
  ],
165
+ "bedrock": [
166
+ {
167
+ "value": "eu.anthropic.claude-sonnet-4-5-20250929-v1:0",
168
+ "label": "Claude 4.5 Sonnet",
169
+ },
170
+ {
171
+ "value": "eu.anthropic.claude-haiku-4-5-20251001-v1:0",
172
+ "label": "Claude 4.5 Haiku",
173
+ },
174
+ {
175
+ "value": "eu.amazon.nova-lite-v1:0",
176
+ "label": "Amazon Nova Lite",
177
+ },
178
+ ],
160
179
  },
161
180
  },
162
181
  vector_db={
@@ -18,7 +18,6 @@ from typing import Optional
18
18
  async def create_user(
19
19
  email: str,
20
20
  password: str,
21
- tenant_id: Optional[str] = None,
22
21
  is_superuser: bool = False,
23
22
  is_active: bool = True,
24
23
  is_verified: bool = False,
@@ -30,37 +29,23 @@ async def create_user(
30
29
  async with relational_engine.get_async_session() as session:
31
30
  async with get_user_db_context(session) as user_db:
32
31
  async with get_user_manager_context(user_db) as user_manager:
33
- if tenant_id:
34
- # Check if the tenant already exists
35
- result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
36
- tenant = result.scalars().first()
37
- if not tenant:
38
- raise TenantNotFoundError
39
-
40
- user = await user_manager.create(
41
- UserCreate(
42
- email=email,
43
- password=password,
44
- tenant_id=tenant.id,
45
- is_superuser=is_superuser,
46
- is_active=is_active,
47
- is_verified=is_verified,
48
- )
49
- )
50
- else:
51
- user = await user_manager.create(
52
- UserCreate(
53
- email=email,
54
- password=password,
55
- is_superuser=is_superuser,
56
- is_active=is_active,
57
- is_verified=is_verified,
58
- )
32
+ user = await user_manager.create(
33
+ UserCreate(
34
+ email=email,
35
+ password=password,
36
+ is_superuser=is_superuser,
37
+ is_active=is_active,
38
+ is_verified=is_verified,
59
39
  )
40
+ )
60
41
 
61
42
  if auto_login:
62
43
  await session.refresh(user)
63
44
 
45
+ # Update tenants and roles information for User object
46
+ _ = await user.awaitable_attrs.tenants
47
+ _ = await user.awaitable_attrs.roles
48
+
64
49
  return user
65
50
  except UserAlreadyExists as error:
66
51
  print(f"User {email} already exists")