cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +1 -1
  166. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +1 -1
  167. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +13 -27
  168. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  169. cognee/tests/test_add_docling_document.py +2 -2
  170. cognee/tests/test_cognee_server_start.py +84 -3
  171. cognee/tests/test_conversation_history.py +68 -5
  172. cognee/tests/test_data/example_with_header.csv +3 -0
  173. cognee/tests/test_dataset_database_handler.py +137 -0
  174. cognee/tests/test_dataset_delete.py +76 -0
  175. cognee/tests/test_edge_centered_payload.py +170 -0
  176. cognee/tests/test_edge_ingestion.py +27 -0
  177. cognee/tests/test_feedback_enrichment.py +1 -1
  178. cognee/tests/test_library.py +6 -4
  179. cognee/tests/test_load.py +62 -0
  180. cognee/tests/test_multi_tenancy.py +165 -0
  181. cognee/tests/test_parallel_databases.py +2 -0
  182. cognee/tests/test_pipeline_cache.py +164 -0
  183. cognee/tests/test_relational_db_migration.py +54 -2
  184. cognee/tests/test_search_db.py +44 -2
  185. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  186. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  187. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  188. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  189. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  190. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  191. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  192. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  193. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  194. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  195. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  196. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  197. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  198. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  199. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  200. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  201. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  202. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  203. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  204. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  205. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  206. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  207. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  208. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  209. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  210. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -6
  211. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/RECORD +215 -163
  212. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/WHEEL +1 -1
  213. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  214. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  215. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  216. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  217. cognee/modules/retrieval/code_retriever.py +0 -232
  218. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  219. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  220. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  221. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  222. cognee/tasks/repo_processor/__init__.py +0 -2
  223. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  224. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  225. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  226. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  227. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,6 +1,5 @@
1
1
  import pathlib
2
2
  import os
3
- from typing import List
4
3
  from cognee.infrastructure.databases.graph import get_graph_engine
5
4
  from cognee.infrastructure.databases.relational import (
6
5
  get_migration_relational_engine,
@@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import (
10
9
  create_db_and_tables as create_pgvector_db_and_tables,
11
10
  )
12
11
  from cognee.tasks.ingestion import migrate_relational_database
13
- from cognee.modules.search.types import SearchResult, SearchType
12
+ from cognee.modules.search.types import SearchType
14
13
  import cognee
15
14
 
16
15
 
@@ -27,6 +26,9 @@ def normalize_node_name(node_name: str) -> str:
27
26
 
28
27
 
29
28
  async def setup_test_db():
29
+ # Disable backend access control to migrate relational data
30
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
31
+
30
32
  await cognee.prune.prune_data()
31
33
  await cognee.prune.prune_system(metadata=True)
32
34
 
@@ -271,6 +273,55 @@ async def test_schema_only_migration():
271
273
  print(f"Edge counts: {edge_counts}")
272
274
 
273
275
 
276
+ async def test_search_result_quality():
277
+ from cognee.infrastructure.databases.relational import (
278
+ get_migration_relational_engine,
279
+ )
280
+
281
+ # Get relational database with original data
282
+ migration_engine = get_migration_relational_engine()
283
+ from sqlalchemy import text
284
+
285
+ async with migration_engine.engine.connect() as conn:
286
+ result = await conn.execute(
287
+ text("""
288
+ SELECT
289
+ c.CustomerId,
290
+ c.FirstName,
291
+ c.LastName,
292
+ GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids
293
+ FROM Customer AS c
294
+ LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId
295
+ GROUP BY c.CustomerId, c.FirstName, c.LastName
296
+ """)
297
+ )
298
+
299
+ for row in result:
300
+ # Get expected invoice IDs from relational DB for each Customer
301
+ customer_id = row.CustomerId
302
+ invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else []
303
+ print(f"Relational DB Customer {customer_id}: {invoice_ids}")
304
+
305
+ # Use Cognee search to get invoice IDs for the same Customer but by providing Customer name
306
+ search_results = await cognee.search(
307
+ query_type=SearchType.GRAPH_COMPLETION,
308
+ query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.",
309
+ top_k=50,
310
+ system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice",
311
+ )
312
+ print(f"Cognee search result: {search_results}")
313
+
314
+ import ast
315
+
316
+ lst = ast.literal_eval(search_results[0]) # converts string -> Python list
317
+ # Transfrom both lists to int for comparison, sorting and type consistency
318
+ lst = sorted([int(x) for x in lst])
319
+ invoice_ids = sorted([int(x) for x in invoice_ids])
320
+ assert lst == invoice_ids, (
321
+ f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}"
322
+ )
323
+
324
+
274
325
  async def test_migration_sqlite():
275
326
  database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
276
327
 
@@ -283,6 +334,7 @@ async def test_migration_sqlite():
283
334
  )
284
335
 
285
336
  await relational_db_migration()
337
+ await test_search_result_quality()
286
338
  await test_schema_only_migration()
287
339
 
288
340
 
@@ -2,6 +2,7 @@ import pathlib
2
2
  import os
3
3
  import cognee
4
4
  from cognee.infrastructure.databases.graph import get_graph_engine
5
+ from cognee.infrastructure.databases.vector import get_vector_engine
5
6
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
6
7
  from cognee.modules.graph.utils import resolve_edges_to_text
7
8
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -12,8 +13,10 @@ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphComplet
12
13
  from cognee.modules.retrieval.graph_summary_completion_retriever import (
13
14
  GraphSummaryCompletionRetriever,
14
15
  )
16
+ from cognee.modules.retrieval.triplet_retriever import TripletRetriever
15
17
  from cognee.shared.logging_utils import get_logger
16
18
  from cognee.modules.search.types import SearchType
19
+ from cognee.modules.users.methods import get_default_user
17
20
  from collections import Counter
18
21
 
19
22
  logger = get_logger()
@@ -37,6 +40,23 @@ async def main():
37
40
 
38
41
  await cognee.cognify([dataset_name])
39
42
 
43
+ user = await get_default_user()
44
+ from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
45
+
46
+ await create_triplet_embeddings(user=user, dataset=dataset_name, triplets_batch_size=5)
47
+
48
+ graph_engine = await get_graph_engine()
49
+ nodes, edges = await graph_engine.get_graph_data()
50
+
51
+ vector_engine = get_vector_engine()
52
+ collection = await vector_engine.search(
53
+ query_text="Test", limit=None, collection_name="Triplet_text"
54
+ )
55
+
56
+ assert len(edges) == len(collection), (
57
+ f"Expected {len(edges)} edges but got {len(collection)} in Triplet_text collection"
58
+ )
59
+
40
60
  context_gk = await GraphCompletionRetriever().get_context(
41
61
  query="Next to which country is Germany located?"
42
62
  )
@@ -49,6 +69,9 @@ async def main():
49
69
  context_gk_sum = await GraphSummaryCompletionRetriever().get_context(
50
70
  query="Next to which country is Germany located?"
51
71
  )
72
+ context_triplet = await TripletRetriever().get_context(
73
+ query="Next to which country is Germany located?"
74
+ )
52
75
 
53
76
  for name, context in [
54
77
  ("GraphCompletionRetriever", context_gk),
@@ -65,6 +88,13 @@ async def main():
65
88
  f"{name}: Context did not contain 'germany' or 'netherlands'; got: {context!r}"
66
89
  )
67
90
 
91
+ assert isinstance(context_triplet, str), "TripletRetriever: Context should be a string"
92
+ assert len(context_triplet) > 0, "TripletRetriever: Context should not be empty"
93
+ lower_triplet = context_triplet.lower()
94
+ assert "germany" in lower_triplet or "netherlands" in lower_triplet, (
95
+ f"TripletRetriever: Context did not contain 'germany' or 'netherlands'; got: {context_triplet!r}"
96
+ )
97
+
68
98
  triplets_gk = await GraphCompletionRetriever().get_triplets(
69
99
  query="Next to which country is Germany located?"
70
100
  )
@@ -129,6 +159,11 @@ async def main():
129
159
  query_text="Next to which country is Germany located?",
130
160
  save_interaction=True,
131
161
  )
162
+ completion_triplet = await cognee.search(
163
+ query_type=SearchType.TRIPLET_COMPLETION,
164
+ query_text="Next to which country is Germany located?",
165
+ save_interaction=True,
166
+ )
132
167
 
133
168
  await cognee.search(
134
169
  query_type=SearchType.FEEDBACK,
@@ -141,12 +176,19 @@ async def main():
141
176
  ("GRAPH_COMPLETION_COT", completion_cot),
142
177
  ("GRAPH_COMPLETION_CONTEXT_EXTENSION", completion_ext),
143
178
  ("GRAPH_SUMMARY_COMPLETION", completion_sum),
179
+ ("TRIPLET_COMPLETION", completion_triplet),
144
180
  ]:
145
181
  assert isinstance(search_results, list), f"{name}: should return a list"
146
182
  assert len(search_results) == 1, (
147
183
  f"{name}: expected single-element list, got {len(search_results)}"
148
184
  )
149
- text = search_results[0]
185
+
186
+ from cognee.context_global_variables import backend_access_control_enabled
187
+
188
+ if backend_access_control_enabled():
189
+ text = search_results[0]["search_result"][0]
190
+ else:
191
+ text = search_results[0]
150
192
  assert isinstance(text, str), f"{name}: element should be a string"
151
193
  assert text.strip(), f"{name}: string should not be empty"
152
194
  assert "netherlands" in text.lower(), (
@@ -162,7 +204,7 @@ async def main():
162
204
 
163
205
  # Assert there are exactly 4 CogneeUserInteraction nodes.
164
206
  assert type_counts.get("CogneeUserInteraction", 0) == 4, (
165
- f"Expected exactly four DCogneeUserInteraction nodes, but found {type_counts.get('CogneeUserInteraction', 0)}"
207
+ f"Expected exactly four CogneeUserInteraction nodes, but found {type_counts.get('CogneeUserInteraction', 0)}"
166
208
  )
167
209
 
168
210
  # Assert there is exactly two CogneeUserFeedback nodes.
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import pytest
2
3
  from unittest.mock import patch, AsyncMock, MagicMock
3
4
  from uuid import uuid4
@@ -5,8 +6,6 @@ from fastapi.testclient import TestClient
5
6
  from types import SimpleNamespace
6
7
  import importlib
7
8
 
8
- from cognee.api.client import app
9
-
10
9
 
11
10
  # Fixtures for reuse across test classes
12
11
  @pytest.fixture
@@ -32,6 +31,10 @@ def mock_authenticated_user():
32
31
  )
33
32
 
34
33
 
34
+ # To turn off authentication we need to set the environment variable before importing the module
35
+ # Also both require_authentication and backend access control must be false
36
+ os.environ["REQUIRE_AUTHENTICATION"] = "false"
37
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
35
38
  gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
36
39
 
37
40
 
@@ -40,6 +43,8 @@ class TestConditionalAuthenticationEndpoints:
40
43
 
41
44
  @pytest.fixture
42
45
  def client(self):
46
+ from cognee.api.client import app
47
+
43
48
  """Create a test client."""
44
49
  return TestClient(app)
45
50
 
@@ -133,6 +138,8 @@ class TestConditionalAuthenticationBehavior:
133
138
 
134
139
  @pytest.fixture
135
140
  def client(self):
141
+ from cognee.api.client import app
142
+
136
143
  return TestClient(app)
137
144
 
138
145
  @pytest.mark.parametrize(
@@ -209,6 +216,8 @@ class TestConditionalAuthenticationErrorHandling:
209
216
 
210
217
  @pytest.fixture
211
218
  def client(self):
219
+ from cognee.api.client import app
220
+
212
221
  return TestClient(app)
213
222
 
214
223
  @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
@@ -232,7 +241,7 @@ class TestConditionalAuthenticationErrorHandling:
232
241
  # The exact error message may vary depending on the actual database connection
233
242
  # The important thing is that we get a 500 error when user creation fails
234
243
 
235
- def test_current_environment_configuration(self):
244
+ def test_current_environment_configuration(self, client):
236
245
  """Test that current environment configuration is working properly."""
237
246
  # This tests the actual module state without trying to change it
238
247
  from cognee.modules.users.methods.get_authenticated_user import (
@@ -0,0 +1,252 @@
1
+ import pytest
2
+ import uuid
3
+ from fastapi.testclient import TestClient
4
+ from unittest.mock import Mock
5
+ from types import SimpleNamespace
6
+ from cognee.api.client import app
7
+ from cognee.modules.users.methods import get_authenticated_user
8
+
9
+
10
+ @pytest.fixture(scope="session")
11
+ def test_client():
12
+ # Keep a single TestClient (and event loop) for the whole module.
13
+ # Re-creating TestClient repeatedly can break async DB connections (asyncpg loop mismatch).
14
+ with TestClient(app) as c:
15
+ yield c
16
+
17
+
18
+ @pytest.fixture
19
+ def client(test_client, mock_default_user):
20
+ async def override_get_authenticated_user():
21
+ return mock_default_user
22
+
23
+ app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user
24
+ yield test_client
25
+ app.dependency_overrides.pop(get_authenticated_user, None)
26
+
27
+
28
+ @pytest.fixture
29
+ def mock_user():
30
+ user = Mock()
31
+ user.id = "test-user-123"
32
+ return user
33
+
34
+
35
+ @pytest.fixture
36
+ def mock_default_user():
37
+ """Mock default user for testing."""
38
+ return SimpleNamespace(
39
+ id=str(uuid.uuid4()),
40
+ email="default@example.com",
41
+ is_active=True,
42
+ tenant_id=str(uuid.uuid4()),
43
+ )
44
+
45
+
46
+ def test_upload_ontology_success(client):
47
+ """Test successful ontology upload"""
48
+ ontology_content = (
49
+ b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
50
+ )
51
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
52
+
53
+ response = client.post(
54
+ "/api/v1/ontologies",
55
+ files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
56
+ data={"ontology_key": unique_key, "description": "Test"},
57
+ )
58
+
59
+ assert response.status_code == 200
60
+ data = response.json()
61
+ assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
62
+ assert "uploaded_at" in data["uploaded_ontologies"][0]
63
+
64
+
65
+ def test_upload_ontology_invalid_file(client):
66
+ """Test 400 response for non-.owl files"""
67
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
68
+ response = client.post(
69
+ "/api/v1/ontologies",
70
+ files={"ontology_file": ("test.txt", b"not xml")},
71
+ data={"ontology_key": unique_key},
72
+ )
73
+ assert response.status_code == 400
74
+
75
+
76
+ def test_upload_ontology_missing_data(client):
77
+ """Test 400 response for missing file or key"""
78
+ # Missing file
79
+ response = client.post("/api/v1/ontologies", data={"ontology_key": "test"})
80
+ assert response.status_code == 400
81
+
82
+ # Missing key
83
+ response = client.post(
84
+ "/api/v1/ontologies", files=[("ontology_file", ("test.owl", b"xml", "application/xml"))]
85
+ )
86
+ assert response.status_code == 400
87
+
88
+
89
+ def test_upload_ontology_without_auth_header(client):
90
+ """Test behavior when no explicit authentication header is provided."""
91
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
92
+ response = client.post(
93
+ "/api/v1/ontologies",
94
+ files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))],
95
+ data={"ontology_key": unique_key},
96
+ )
97
+
98
+ assert response.status_code == 200
99
+ data = response.json()
100
+ assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
101
+ assert "uploaded_at" in data["uploaded_ontologies"][0]
102
+
103
+
104
+ def test_upload_multiple_ontologies_in_single_request_is_rejected(client):
105
+ """Uploading multiple ontology files in a single request should fail."""
106
+ import io
107
+
108
+ file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
109
+ file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
110
+
111
+ files = [
112
+ ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
113
+ ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
114
+ ]
115
+ data = {"ontology_key": "vehicles", "description": "Base vehicles"}
116
+
117
+ response = client.post("/api/v1/ontologies", files=files, data=data)
118
+
119
+ assert response.status_code == 400
120
+ assert "Only one ontology_file is allowed" in response.json()["error"]
121
+
122
+
123
+ def test_upload_endpoint_rejects_array_style_fields(client):
124
+ """Array-style form values should be rejected (no backwards compatibility)."""
125
+ import io
126
+ import json
127
+
128
+ file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
129
+
130
+ files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))]
131
+ data = {
132
+ "ontology_key": json.dumps(["single_key"]),
133
+ "description": json.dumps(["Single ontology"]),
134
+ }
135
+
136
+ response = client.post("/api/v1/ontologies", files=files, data=data)
137
+
138
+ assert response.status_code == 400
139
+ assert "ontology_key must be a string" in response.json()["error"]
140
+
141
+
142
+ def test_cognify_with_multiple_ontologies(client):
143
+ """Test cognify endpoint accepts multiple ontology keys"""
144
+ payload = {
145
+ "datasets": ["test_dataset"],
146
+ "ontology_key": ["ontology1", "ontology2"], # Array instead of string
147
+ "run_in_background": False,
148
+ }
149
+
150
+ response = client.post("/api/v1/cognify", json=payload)
151
+
152
+ # Should not fail due to ontology_key type
153
+ assert response.status_code in [200, 400, 409] # May fail for other reasons, not type
154
+
155
+
156
+ def test_complete_multifile_workflow(client):
157
+ """Test workflow: upload ontologies one-by-one → cognify with multiple keys"""
158
+ import io
159
+
160
+ # Step 1: Upload two ontologies (one-by-one)
161
+ file1_content = b"""<?xml version="1.0"?>
162
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
163
+ xmlns:owl="http://www.w3.org/2002/07/owl#">
164
+ <owl:Class rdf:ID="Vehicle"/>
165
+ </rdf:RDF>"""
166
+
167
+ file2_content = b"""<?xml version="1.0"?>
168
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
169
+ xmlns:owl="http://www.w3.org/2002/07/owl#">
170
+ <owl:Class rdf:ID="Manufacturer"/>
171
+ </rdf:RDF>"""
172
+
173
+ upload_response_1 = client.post(
174
+ "/api/v1/ontologies",
175
+ files=[("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml"))],
176
+ data={"ontology_key": "vehicles", "description": "Vehicle ontology"},
177
+ )
178
+ assert upload_response_1.status_code == 200
179
+
180
+ upload_response_2 = client.post(
181
+ "/api/v1/ontologies",
182
+ files=[
183
+ ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml"))
184
+ ],
185
+ data={"ontology_key": "manufacturers", "description": "Manufacturer ontology"},
186
+ )
187
+ assert upload_response_2.status_code == 200
188
+
189
+ # Step 2: Verify ontologies are listed
190
+ list_response = client.get("/api/v1/ontologies")
191
+ assert list_response.status_code == 200
192
+ ontologies = list_response.json()
193
+ assert "vehicles" in ontologies
194
+ assert "manufacturers" in ontologies
195
+
196
+ # Step 3: Test cognify with multiple ontologies
197
+ cognify_payload = {
198
+ "datasets": ["test_dataset"],
199
+ "ontology_key": ["vehicles", "manufacturers"],
200
+ "run_in_background": False,
201
+ }
202
+
203
+ cognify_response = client.post("/api/v1/cognify", json=cognify_payload)
204
+ # Should not fail due to ontology handling (may fail for dataset reasons)
205
+ assert cognify_response.status_code != 400 # Not a validation error
206
+
207
+
208
+ def test_upload_error_handling(client):
209
+ """Test error handling for invalid uploads (single-file endpoint)."""
210
+ import io
211
+ import json
212
+
213
+ # Array-style key should be rejected
214
+ file_content = b"<rdf:RDF></rdf:RDF>"
215
+ files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))]
216
+ data = {
217
+ "ontology_key": json.dumps(["key1", "key2"]),
218
+ "description": "desc1",
219
+ }
220
+
221
+ response = client.post("/api/v1/ontologies", files=files, data=data)
222
+ assert response.status_code == 400
223
+ assert "ontology_key must be a string" in response.json()["error"]
224
+
225
+ # Duplicate key should be rejected
226
+ response_1 = client.post(
227
+ "/api/v1/ontologies",
228
+ files=[("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml"))],
229
+ data={"ontology_key": "duplicate", "description": "desc1"},
230
+ )
231
+ assert response_1.status_code == 200
232
+
233
+ response_2 = client.post(
234
+ "/api/v1/ontologies",
235
+ files=[("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml"))],
236
+ data={"ontology_key": "duplicate", "description": "desc2"},
237
+ )
238
+ assert response_2.status_code == 400
239
+ assert "already exists" in response_2.json()["error"]
240
+
241
+
242
+ def test_cognify_missing_ontology_key(client):
243
+ """Test cognify with non-existent ontology key"""
244
+ payload = {
245
+ "datasets": ["test_dataset"],
246
+ "ontology_key": ["nonexistent_key"],
247
+ "run_in_background": False,
248
+ }
249
+
250
+ response = client.post("/api/v1/cognify", json=payload)
251
+ assert response.status_code == 409
252
+ assert "Ontology key 'nonexistent_key' not found" in response.json()["error"]
@@ -8,6 +8,7 @@ def test_cache_config_defaults():
8
8
  """Test that CacheConfig has the correct default values."""
9
9
  config = CacheConfig()
10
10
 
11
+ assert config.cache_backend == "fs"
11
12
  assert config.caching is False
12
13
  assert config.shared_kuzu_lock is False
13
14
  assert config.cache_host == "localhost"
@@ -19,6 +20,7 @@ def test_cache_config_defaults():
19
20
  def test_cache_config_custom_values():
20
21
  """Test that CacheConfig accepts custom values."""
21
22
  config = CacheConfig(
23
+ cache_backend="redis",
22
24
  caching=True,
23
25
  shared_kuzu_lock=True,
24
26
  cache_host="redis.example.com",
@@ -27,6 +29,7 @@ def test_cache_config_custom_values():
27
29
  agentic_lock_timeout=180,
28
30
  )
29
31
 
32
+ assert config.cache_backend == "redis"
30
33
  assert config.caching is True
31
34
  assert config.shared_kuzu_lock is True
32
35
  assert config.cache_host == "redis.example.com"
@@ -38,6 +41,7 @@ def test_cache_config_custom_values():
38
41
  def test_cache_config_to_dict():
39
42
  """Test the to_dict method returns all configuration values."""
40
43
  config = CacheConfig(
44
+ cache_backend="fs",
41
45
  caching=True,
42
46
  shared_kuzu_lock=True,
43
47
  cache_host="test-host",
@@ -49,6 +53,7 @@ def test_cache_config_to_dict():
49
53
  config_dict = config.to_dict()
50
54
 
51
55
  assert config_dict == {
56
+ "cache_backend": "fs",
52
57
  "caching": True,
53
58
  "shared_kuzu_lock": True,
54
59
  "cache_host": "test-host",
@@ -0,0 +1,27 @@
1
+ import pytest
2
+ from unittest.mock import AsyncMock, patch, MagicMock
3
+ from cognee.tasks.storage.index_data_points import index_data_points
4
+ from cognee.infrastructure.engine import DataPoint
5
+
6
+
7
+ class TestDataPoint(DataPoint):
8
+ name: str
9
+ metadata: dict = {"index_fields": ["name"]}
10
+
11
+
12
+ @pytest.mark.asyncio
13
+ async def test_index_data_points_calls_vector_engine():
14
+ """Test that index_data_points creates vector index and indexes data."""
15
+ data_points = [TestDataPoint(name="test1")]
16
+
17
+ mock_vector_engine = AsyncMock()
18
+ mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
19
+
20
+ with patch.dict(
21
+ index_data_points.__globals__,
22
+ {"get_vector_engine": lambda: mock_vector_engine},
23
+ ):
24
+ await index_data_points(data_points)
25
+
26
+ assert mock_vector_engine.create_vector_index.await_count >= 1
27
+ assert mock_vector_engine.index_data_points.await_count >= 1
@@ -5,8 +5,7 @@ from cognee.tasks.storage.index_graph_edges import index_graph_edges
5
5
 
6
6
  @pytest.mark.asyncio
7
7
  async def test_index_graph_edges_success():
8
- """Test that index_graph_edges uses the index datapoints and creates vector index."""
9
- # Create the mocks for the graph and vector engines.
8
+ """Test that index_graph_edges retrieves edges and delegates to index_data_points."""
10
9
  mock_graph_engine = AsyncMock()
11
10
  mock_graph_engine.get_graph_data.return_value = (
12
11
  None,
@@ -15,26 +14,23 @@ async def test_index_graph_edges_success():
15
14
  [{"relationship_name": "rel2"}],
16
15
  ],
17
16
  )
18
- mock_vector_engine = AsyncMock()
19
- mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
17
+ mock_index_data_points = AsyncMock()
20
18
 
21
- # Patch the globals of the function so that when it does:
22
- # vector_engine = get_vector_engine()
23
- # graph_engine = await get_graph_engine()
24
- # it uses the mocked versions.
25
19
  with patch.dict(
26
20
  index_graph_edges.__globals__,
27
21
  {
28
22
  "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
29
- "get_vector_engine": lambda: mock_vector_engine,
23
+ "index_data_points": mock_index_data_points,
30
24
  },
31
25
  ):
32
26
  await index_graph_edges()
33
27
 
34
- # Assertions on the mock calls.
35
28
  mock_graph_engine.get_graph_data.assert_awaited_once()
36
- assert mock_vector_engine.create_vector_index.await_count == 1
37
- assert mock_vector_engine.index_data_points.await_count == 1
29
+ mock_index_data_points.assert_awaited_once()
30
+
31
+ call_args = mock_index_data_points.call_args[0][0]
32
+ assert len(call_args) == 2
33
+ assert all(hasattr(item, "relationship_name") for item in call_args)
38
34
 
39
35
 
40
36
  @pytest.mark.asyncio
@@ -42,20 +38,22 @@ async def test_index_graph_edges_no_relationships():
42
38
  """Test that index_graph_edges handles empty relationships correctly."""
43
39
  mock_graph_engine = AsyncMock()
44
40
  mock_graph_engine.get_graph_data.return_value = (None, [])
45
- mock_vector_engine = AsyncMock()
41
+ mock_index_data_points = AsyncMock()
46
42
 
47
43
  with patch.dict(
48
44
  index_graph_edges.__globals__,
49
45
  {
50
46
  "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
51
- "get_vector_engine": lambda: mock_vector_engine,
47
+ "index_data_points": mock_index_data_points,
52
48
  },
53
49
  ):
54
50
  await index_graph_edges()
55
51
 
56
52
  mock_graph_engine.get_graph_data.assert_awaited_once()
57
- mock_vector_engine.create_vector_index.assert_not_awaited()
58
- mock_vector_engine.index_data_points.assert_not_awaited()
53
+ mock_index_data_points.assert_awaited_once()
54
+
55
+ call_args = mock_index_data_points.call_args[0][0]
56
+ assert len(call_args) == 0
59
57
 
60
58
 
61
59
  @pytest.mark.asyncio