cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -90,15 +90,17 @@ async def main():
90
90
  )
91
91
 
92
92
  search_results = await cognee.search(
93
- query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?"
93
+ query_type=SearchType.GRAPH_COMPLETION,
94
+ query_text="What information do you contain?",
95
+ dataset_ids=[pipeline_run_obj.dataset_id],
94
96
  )
95
- assert "Mark" in search_results[0], (
97
+ assert "Mark" in search_results[0]["search_result"][0], (
96
98
  "Failed to update document, no mention of Mark in search results"
97
99
  )
98
- assert "Cindy" in search_results[0], (
100
+ assert "Cindy" in search_results[0]["search_result"][0], (
99
101
  "Failed to update document, no mention of Cindy in search results"
100
102
  )
101
- assert "Artificial intelligence" not in search_results[0], (
103
+ assert "Artificial intelligence" not in search_results[0]["search_result"][0], (
102
104
  "Failed to update document, Artificial intelligence still mentioned in search results"
103
105
  )
104
106
 
@@ -0,0 +1,62 @@
1
+ import os
2
+ import pathlib
3
+ import asyncio
4
+ import time
5
+
6
+ import cognee
7
+ from cognee.modules.search.types import SearchType
8
+ from cognee.shared.logging_utils import get_logger
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ async def process_and_search(num_of_searches):
14
+ start_time = time.time()
15
+
16
+ await cognee.cognify()
17
+
18
+ await asyncio.gather(
19
+ *[
20
+ cognee.search(
21
+ query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION
22
+ )
23
+ for _ in range(num_of_searches)
24
+ ]
25
+ )
26
+
27
+ end_time = time.time()
28
+
29
+ return end_time - start_time
30
+
31
+
32
+ async def main():
33
+ data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
34
+ cognee.config.data_root_directory(data_directory_path)
35
+
36
+ cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
37
+ cognee.config.system_root_directory(cognee_directory_path)
38
+
39
+ num_of_pdfs = 10
40
+ num_of_reps = 5
41
+ upper_boundary_minutes = 10
42
+ average_minutes = 8
43
+
44
+ recorded_times = []
45
+ for _ in range(num_of_reps):
46
+ await cognee.prune.prune_data()
47
+ await cognee.prune.prune_system(metadata=True)
48
+
49
+ s3_input = "s3://cognee-test-load-s3-bucket"
50
+ await cognee.add(s3_input)
51
+
52
+ recorded_times.append(await process_and_search(num_of_pdfs))
53
+
54
+ average_recorded_time = sum(recorded_times) / len(recorded_times)
55
+
56
+ assert average_recorded_time <= average_minutes * 60
57
+
58
+ assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ asyncio.run(main())
@@ -0,0 +1,165 @@
1
+ import cognee
2
+ import pytest
3
+
4
+ from cognee.modules.users.exceptions import PermissionDeniedError
5
+ from cognee.modules.users.tenants.methods import select_tenant
6
+ from cognee.modules.users.methods import get_user
7
+ from cognee.shared.logging_utils import get_logger
8
+ from cognee.modules.search.types import SearchType
9
+ from cognee.modules.users.methods import create_user
10
+ from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
11
+ from cognee.modules.users.roles.methods import add_user_to_role
12
+ from cognee.modules.users.roles.methods import create_role
13
+ from cognee.modules.users.tenants.methods import create_tenant
14
+ from cognee.modules.users.tenants.methods import add_user_to_tenant
15
+ from cognee.modules.engine.operations.setup import setup
16
+ from cognee.shared.logging_utils import setup_logging, CRITICAL
17
+
18
+ logger = get_logger()
19
+
20
+
21
+ async def main():
22
+ # Create a clean slate for cognee -- reset data and system state
23
+ print("Resetting cognee data...")
24
+ await cognee.prune.prune_data()
25
+ await cognee.prune.prune_system(metadata=True)
26
+ print("Data reset complete.\n")
27
+
28
+ # Set up the necessary databases and tables for user management.
29
+ await setup()
30
+
31
+ # Add document for user_1, add it under dataset name AI
32
+ text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
33
+ At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages
34
+ this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the
35
+ preparation and manipulation of quantum state"""
36
+
37
+ print("Creating user_1: user_1@example.com")
38
+ user_1 = await create_user("user_1@example.com", "example")
39
+ await cognee.add([text], dataset_name="AI", user=user_1)
40
+
41
+ print("\nCreating user_2: user_2@example.com")
42
+ user_2 = await create_user("user_2@example.com", "example")
43
+
44
+ # Run cognify for both datasets as the appropriate user/owner
45
+ print("\nCreating different datasets for user_1 (AI dataset) and user_2 (QUANTUM dataset)")
46
+ ai_cognify_result = await cognee.cognify(["AI"], user=user_1)
47
+
48
+ # Extract dataset_ids from cognify results
49
+ def extract_dataset_id_from_cognify(cognify_result):
50
+ """Extract dataset_id from cognify output dictionary"""
51
+ for dataset_id, pipeline_result in cognify_result.items():
52
+ return dataset_id # Return the first dataset_id
53
+ return None
54
+
55
+ # Get dataset IDs from cognify results
56
+ # Note: When we want to work with datasets from other users (search, add, cognify and etc.) we must supply dataset
57
+ # information through dataset_id using dataset name only looks for datasets owned by current user
58
+ ai_dataset_id = extract_dataset_id_from_cognify(ai_cognify_result)
59
+
60
+ # We can see here that user_1 can read his own dataset (AI dataset)
61
+ search_results = await cognee.search(
62
+ query_type=SearchType.GRAPH_COMPLETION,
63
+ query_text="What is in the document?",
64
+ user=user_1,
65
+ datasets=[ai_dataset_id],
66
+ )
67
+
68
+ # Verify that user_2 cannot access user_1's dataset without permission
69
+ with pytest.raises(PermissionDeniedError):
70
+ search_results = await cognee.search(
71
+ query_type=SearchType.GRAPH_COMPLETION,
72
+ query_text="What is in the document?",
73
+ user=user_2,
74
+ datasets=[ai_dataset_id],
75
+ )
76
+
77
+ # Create new tenant and role, add user_2 to tenant and role
78
+ tenant_id = await create_tenant("CogneeLab", user_1.id)
79
+ await select_tenant(user_id=user_1.id, tenant_id=tenant_id)
80
+ role_id = await create_role(role_name="Researcher", owner_id=user_1.id)
81
+ await add_user_to_tenant(
82
+ user_id=user_2.id, tenant_id=tenant_id, owner_id=user_1.id, set_as_active_tenant=True
83
+ )
84
+ await add_user_to_role(user_id=user_2.id, role_id=role_id, owner_id=user_1.id)
85
+
86
+ # Assert that user_1 cannot give permissions on his dataset to role before switching to the correct tenant
87
+ # AI dataset was made with default tenant and not CogneeLab tenant
88
+ with pytest.raises(PermissionDeniedError):
89
+ await authorized_give_permission_on_datasets(
90
+ role_id,
91
+ [ai_dataset_id],
92
+ "read",
93
+ user_1.id,
94
+ )
95
+
96
+ # We need to refresh the user object with changes made when switching tenants
97
+ user_1 = await get_user(user_1.id)
98
+ await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1)
99
+ ai_cognee_lab_cognify_result = await cognee.cognify(["AI_COGNEE_LAB"], user=user_1)
100
+
101
+ ai_cognee_lab_dataset_id = extract_dataset_id_from_cognify(ai_cognee_lab_cognify_result)
102
+
103
+ await authorized_give_permission_on_datasets(
104
+ role_id,
105
+ [ai_cognee_lab_dataset_id],
106
+ "read",
107
+ user_1.id,
108
+ )
109
+
110
+ search_results = await cognee.search(
111
+ query_type=SearchType.GRAPH_COMPLETION,
112
+ query_text="What is in the document?",
113
+ user=user_2,
114
+ dataset_ids=[ai_cognee_lab_dataset_id],
115
+ )
116
+ for result in search_results:
117
+ print(f"{result}\n")
118
+
119
+ # Let's test changing tenants
120
+ tenant_id = await create_tenant("CogneeLab2", user_1.id)
121
+ await select_tenant(user_id=user_1.id, tenant_id=tenant_id)
122
+
123
+ user_1 = await get_user(user_1.id)
124
+ await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1)
125
+ await cognee.cognify(["AI_COGNEE_LAB"], user=user_1)
126
+
127
+ search_results = await cognee.search(
128
+ query_type=SearchType.GRAPH_COMPLETION,
129
+ query_text="What is in the document?",
130
+ user=user_1,
131
+ )
132
+
133
+ # Assert only AI_COGNEE_LAB dataset from CogneeLab2 tenant is visible as the currently selected tenant
134
+ assert len(search_results) == 1, (
135
+ f"Search results must only contain one dataset from current tenant: {search_results}"
136
+ )
137
+ assert search_results[0]["dataset_name"] == "AI_COGNEE_LAB", (
138
+ f"Dict must contain dataset name 'AI_COGNEE_LAB': {search_results[0]}"
139
+ )
140
+ assert search_results[0]["dataset_tenant_id"] == user_1.tenant_id, (
141
+ f"Dataset tenant_id must be same as user_1 tenant_id: {search_results[0]}"
142
+ )
143
+
144
+ # Switch back to no tenant (default tenant)
145
+ await select_tenant(user_id=user_1.id, tenant_id=None)
146
+ # Refresh user_1 object
147
+ user_1 = await get_user(user_1.id)
148
+ search_results = await cognee.search(
149
+ query_type=SearchType.GRAPH_COMPLETION,
150
+ query_text="What is in the document?",
151
+ user=user_1,
152
+ )
153
+ assert len(search_results) == 1, (
154
+ f"Search results must only contain one dataset from default tenant: {search_results}"
155
+ )
156
+ assert search_results[0]["dataset_name"] == "AI", (
157
+ f"Dict must contain dataset name 'AI': {search_results[0]}"
158
+ )
159
+
160
+
161
+ if __name__ == "__main__":
162
+ import asyncio
163
+
164
+ logger = setup_logging(log_level=CRITICAL)
165
+ asyncio.run(main())
@@ -33,11 +33,13 @@ async def main():
33
33
  "vector_db_url": "cognee1.test",
34
34
  "vector_db_key": "",
35
35
  "vector_db_provider": "lancedb",
36
+ "vector_db_name": "",
36
37
  }
37
38
  task_2_config = {
38
39
  "vector_db_url": "cognee2.test",
39
40
  "vector_db_key": "",
40
41
  "vector_db_provider": "lancedb",
42
+ "vector_db_name": "",
41
43
  }
42
44
 
43
45
  task_1_graph_config = {
@@ -0,0 +1,164 @@
1
+ """
2
+ Test suite for the pipeline_cache feature in Cognee pipelines.
3
+
4
+ This module tests the behavior of the `pipeline_cache` parameter which controls
5
+ whether a pipeline should skip re-execution when it has already been completed
6
+ for the same dataset.
7
+
8
+ Architecture Overview:
9
+ ---------------------
10
+ The pipeline_cache mechanism works at the dataset level:
11
+ 1. When a pipeline runs, it logs its status (INITIATED -> STARTED -> COMPLETED)
12
+ 2. Before each run, `check_pipeline_run_qualification()` checks the pipeline status
13
+ 3. If `use_pipeline_cache=True` and status is COMPLETED/STARTED, the pipeline skips
14
+ 4. If `use_pipeline_cache=False`, the pipeline always re-executes regardless of status
15
+ """
16
+
17
+ import pytest
18
+
19
+ import cognee
20
+ from cognee.modules.pipelines.tasks.task import Task
21
+ from cognee.modules.pipelines import run_pipeline
22
+ from cognee.modules.users.methods import get_default_user
23
+
24
+ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
25
+ reset_dataset_pipeline_run_status,
26
+ )
27
+ from cognee.infrastructure.databases.relational import create_db_and_tables
28
+
29
+
30
+ class ExecutionCounter:
31
+ """Helper class to track task execution counts."""
32
+
33
+ def __init__(self):
34
+ self.count = 0
35
+
36
+
37
+ async def create_counting_task(data, counter: ExecutionCounter):
38
+ """Create a task that increments a counter from the ExecutionCounter instance when executed."""
39
+ counter.count += 1
40
+ return counter
41
+
42
+
43
+ class TestPipelineCache:
44
+ """Tests for basic pipeline_cache on/off behavior."""
45
+
46
+ @pytest.mark.asyncio
47
+ async def test_pipeline_cache_off_allows_reexecution(self):
48
+ """
49
+ Test that with use_pipeline_cache=False, the pipeline re-executes
50
+ even when it has already completed for the dataset.
51
+
52
+ Expected behavior:
53
+ - First run: Pipeline executes fully, task runs once
54
+ - Second run: Pipeline executes again, task runs again (total: 2 times)
55
+ """
56
+ await cognee.prune.prune_data()
57
+ await cognee.prune.prune_system(metadata=True)
58
+ await create_db_and_tables()
59
+
60
+ counter = ExecutionCounter()
61
+ user = await get_default_user()
62
+
63
+ tasks = [Task(create_counting_task, counter=counter)]
64
+
65
+ # First run
66
+ pipeline_results_1 = []
67
+ async for result in run_pipeline(
68
+ tasks=tasks,
69
+ datasets="test_dataset_cache_off",
70
+ data=["sample data"], # Data is necessary to trigger processing
71
+ user=user,
72
+ pipeline_name="test_cache_off_pipeline",
73
+ use_pipeline_cache=False,
74
+ ):
75
+ pipeline_results_1.append(result)
76
+
77
+ first_run_count = counter.count
78
+ assert first_run_count >= 1, "Task should have executed at least once on first run"
79
+
80
+ # Second run with pipeline_cache=False
81
+ pipeline_results_2 = []
82
+ async for result in run_pipeline(
83
+ tasks=tasks,
84
+ datasets="test_dataset_cache_off",
85
+ data=["sample data"], # Data is necessary to trigger processing
86
+ user=user,
87
+ pipeline_name="test_cache_off_pipeline",
88
+ use_pipeline_cache=False,
89
+ ):
90
+ pipeline_results_2.append(result)
91
+
92
+ second_run_count = counter.count
93
+ assert second_run_count > first_run_count, (
94
+ f"With pipeline_cache=False, task should re-execute. "
95
+ f"First run: {first_run_count}, After second run: {second_run_count}"
96
+ )
97
+
98
+ @pytest.mark.asyncio
99
+ async def test_reset_pipeline_status_allows_reexecution_with_cache(self):
100
+ """
101
+ Test that resetting pipeline status allows re-execution even with
102
+ pipeline_cache=True.
103
+ """
104
+ await cognee.prune.prune_data()
105
+ await cognee.prune.prune_system(metadata=True)
106
+ await create_db_and_tables()
107
+
108
+ counter = ExecutionCounter()
109
+ user = await get_default_user()
110
+ dataset_name = "reset_status_test"
111
+ pipeline_name = "test_reset_pipeline"
112
+
113
+ tasks = [Task(create_counting_task, counter=counter)]
114
+
115
+ # First run
116
+ pipeline_result = []
117
+ async for result in run_pipeline(
118
+ tasks=tasks,
119
+ datasets=dataset_name,
120
+ user=user,
121
+ data=["sample data"], # Data is necessary to trigger processing
122
+ pipeline_name=pipeline_name,
123
+ use_pipeline_cache=True,
124
+ ):
125
+ pipeline_result.append(result)
126
+
127
+ first_run_count = counter.count
128
+ assert first_run_count >= 1
129
+
130
+ # Second run without reset - should skip
131
+ async for _ in run_pipeline(
132
+ tasks=tasks,
133
+ datasets=dataset_name,
134
+ user=user,
135
+ data=["sample data"], # Data is necessary to trigger processing
136
+ pipeline_name=pipeline_name,
137
+ use_pipeline_cache=True,
138
+ ):
139
+ pass
140
+
141
+ after_second_run = counter.count
142
+ assert after_second_run == first_run_count, "Should have skipped due to cache"
143
+
144
+ # Reset the pipeline status
145
+ await reset_dataset_pipeline_run_status(
146
+ pipeline_result[0].dataset_id, user, pipeline_names=[pipeline_name]
147
+ )
148
+
149
+ # Third run after reset - should execute
150
+ async for _ in run_pipeline(
151
+ tasks=tasks,
152
+ datasets=dataset_name,
153
+ user=user,
154
+ data=["sample data"], # Data is necessary to trigger processing
155
+ pipeline_name=pipeline_name,
156
+ use_pipeline_cache=True,
157
+ ):
158
+ pass
159
+
160
+ after_reset_run = counter.count
161
+ assert after_reset_run > after_second_run, (
162
+ f"After reset, pipeline should re-execute. "
163
+ f"Before reset: {after_second_run}, After reset run: {after_reset_run}"
164
+ )
@@ -1,6 +1,5 @@
1
1
  import pathlib
2
2
  import os
3
- from typing import List
4
3
  from cognee.infrastructure.databases.graph import get_graph_engine
5
4
  from cognee.infrastructure.databases.relational import (
6
5
  get_migration_relational_engine,
@@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import (
10
9
  create_db_and_tables as create_pgvector_db_and_tables,
11
10
  )
12
11
  from cognee.tasks.ingestion import migrate_relational_database
13
- from cognee.modules.search.types import SearchResult, SearchType
12
+ from cognee.modules.search.types import SearchType
14
13
  import cognee
15
14
 
16
15
 
@@ -27,6 +26,9 @@ def normalize_node_name(node_name: str) -> str:
27
26
 
28
27
 
29
28
  async def setup_test_db():
29
+ # Disable backend access control to migrate relational data
30
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
31
+
30
32
  await cognee.prune.prune_data()
31
33
  await cognee.prune.prune_system(metadata=True)
32
34
 
@@ -271,6 +273,55 @@ async def test_schema_only_migration():
271
273
  print(f"Edge counts: {edge_counts}")
272
274
 
273
275
 
276
+ async def test_search_result_quality():
277
+ from cognee.infrastructure.databases.relational import (
278
+ get_migration_relational_engine,
279
+ )
280
+
281
+ # Get relational database with original data
282
+ migration_engine = get_migration_relational_engine()
283
+ from sqlalchemy import text
284
+
285
+ async with migration_engine.engine.connect() as conn:
286
+ result = await conn.execute(
287
+ text("""
288
+ SELECT
289
+ c.CustomerId,
290
+ c.FirstName,
291
+ c.LastName,
292
+ GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids
293
+ FROM Customer AS c
294
+ LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId
295
+ GROUP BY c.CustomerId, c.FirstName, c.LastName
296
+ """)
297
+ )
298
+
299
+ for row in result:
300
+ # Get expected invoice IDs from relational DB for each Customer
301
+ customer_id = row.CustomerId
302
+ invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else []
303
+ print(f"Relational DB Customer {customer_id}: {invoice_ids}")
304
+
305
+ # Use Cognee search to get invoice IDs for the same Customer but by providing Customer name
306
+ search_results = await cognee.search(
307
+ query_type=SearchType.GRAPH_COMPLETION,
308
+ query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.",
309
+ top_k=50,
310
+ system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice",
311
+ )
312
+ print(f"Cognee search result: {search_results}")
313
+
314
+ import ast
315
+
316
+ lst = ast.literal_eval(search_results[0]) # converts string -> Python list
317
+ # Transfrom both lists to int for comparison, sorting and type consistency
318
+ lst = sorted([int(x) for x in lst])
319
+ invoice_ids = sorted([int(x) for x in invoice_ids])
320
+ assert lst == invoice_ids, (
321
+ f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}"
322
+ )
323
+
324
+
274
325
  async def test_migration_sqlite():
275
326
  database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
276
327
 
@@ -283,6 +334,7 @@ async def test_migration_sqlite():
283
334
  )
284
335
 
285
336
  await relational_db_migration()
337
+ await test_search_result_quality()
286
338
  await test_schema_only_migration()
287
339
 
288
340
 
@@ -2,6 +2,7 @@ import pathlib
2
2
  import os
3
3
  import cognee
4
4
  from cognee.infrastructure.databases.graph import get_graph_engine
5
+ from cognee.infrastructure.databases.vector import get_vector_engine
5
6
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
6
7
  from cognee.modules.graph.utils import resolve_edges_to_text
7
8
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -12,8 +13,10 @@ from cognee.modules.retrieval.graph_completion_cot_retriever import GraphComplet
12
13
  from cognee.modules.retrieval.graph_summary_completion_retriever import (
13
14
  GraphSummaryCompletionRetriever,
14
15
  )
16
+ from cognee.modules.retrieval.triplet_retriever import TripletRetriever
15
17
  from cognee.shared.logging_utils import get_logger
16
18
  from cognee.modules.search.types import SearchType
19
+ from cognee.modules.users.methods import get_default_user
17
20
  from collections import Counter
18
21
 
19
22
  logger = get_logger()
@@ -37,6 +40,23 @@ async def main():
37
40
 
38
41
  await cognee.cognify([dataset_name])
39
42
 
43
+ user = await get_default_user()
44
+ from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
45
+
46
+ await create_triplet_embeddings(user=user, dataset=dataset_name, triplets_batch_size=5)
47
+
48
+ graph_engine = await get_graph_engine()
49
+ nodes, edges = await graph_engine.get_graph_data()
50
+
51
+ vector_engine = get_vector_engine()
52
+ collection = await vector_engine.search(
53
+ query_text="Test", limit=None, collection_name="Triplet_text"
54
+ )
55
+
56
+ assert len(edges) == len(collection), (
57
+ f"Expected {len(edges)} edges but got {len(collection)} in Triplet_text collection"
58
+ )
59
+
40
60
  context_gk = await GraphCompletionRetriever().get_context(
41
61
  query="Next to which country is Germany located?"
42
62
  )
@@ -49,6 +69,9 @@ async def main():
49
69
  context_gk_sum = await GraphSummaryCompletionRetriever().get_context(
50
70
  query="Next to which country is Germany located?"
51
71
  )
72
+ context_triplet = await TripletRetriever().get_context(
73
+ query="Next to which country is Germany located?"
74
+ )
52
75
 
53
76
  for name, context in [
54
77
  ("GraphCompletionRetriever", context_gk),
@@ -65,6 +88,13 @@ async def main():
65
88
  f"{name}: Context did not contain 'germany' or 'netherlands'; got: {context!r}"
66
89
  )
67
90
 
91
+ assert isinstance(context_triplet, str), "TripletRetriever: Context should be a string"
92
+ assert len(context_triplet) > 0, "TripletRetriever: Context should not be empty"
93
+ lower_triplet = context_triplet.lower()
94
+ assert "germany" in lower_triplet or "netherlands" in lower_triplet, (
95
+ f"TripletRetriever: Context did not contain 'germany' or 'netherlands'; got: {context_triplet!r}"
96
+ )
97
+
68
98
  triplets_gk = await GraphCompletionRetriever().get_triplets(
69
99
  query="Next to which country is Germany located?"
70
100
  )
@@ -129,6 +159,11 @@ async def main():
129
159
  query_text="Next to which country is Germany located?",
130
160
  save_interaction=True,
131
161
  )
162
+ completion_triplet = await cognee.search(
163
+ query_type=SearchType.TRIPLET_COMPLETION,
164
+ query_text="Next to which country is Germany located?",
165
+ save_interaction=True,
166
+ )
132
167
 
133
168
  await cognee.search(
134
169
  query_type=SearchType.FEEDBACK,
@@ -141,12 +176,19 @@ async def main():
141
176
  ("GRAPH_COMPLETION_COT", completion_cot),
142
177
  ("GRAPH_COMPLETION_CONTEXT_EXTENSION", completion_ext),
143
178
  ("GRAPH_SUMMARY_COMPLETION", completion_sum),
179
+ ("TRIPLET_COMPLETION", completion_triplet),
144
180
  ]:
145
181
  assert isinstance(search_results, list), f"{name}: should return a list"
146
182
  assert len(search_results) == 1, (
147
183
  f"{name}: expected single-element list, got {len(search_results)}"
148
184
  )
149
- text = search_results[0]
185
+
186
+ from cognee.context_global_variables import backend_access_control_enabled
187
+
188
+ if backend_access_control_enabled():
189
+ text = search_results[0]["search_result"][0]
190
+ else:
191
+ text = search_results[0]
150
192
  assert isinstance(text, str), f"{name}: element should be a string"
151
193
  assert text.strip(), f"{name}: string should not be empty"
152
194
  assert "netherlands" in text.lower(), (
@@ -162,7 +204,7 @@ async def main():
162
204
 
163
205
  # Assert there are exactly 4 CogneeUserInteraction nodes.
164
206
  assert type_counts.get("CogneeUserInteraction", 0) == 4, (
165
- f"Expected exactly four DCogneeUserInteraction nodes, but found {type_counts.get('CogneeUserInteraction', 0)}"
207
+ f"Expected exactly four CogneeUserInteraction nodes, but found {type_counts.get('CogneeUserInteraction', 0)}"
166
208
  )
167
209
 
168
210
  # Assert there is exactly two CogneeUserFeedback nodes.
@@ -1,3 +1,4 @@
1
+ import os
1
2
  import pytest
2
3
  from unittest.mock import patch, AsyncMock, MagicMock
3
4
  from uuid import uuid4
@@ -5,8 +6,6 @@ from fastapi.testclient import TestClient
5
6
  from types import SimpleNamespace
6
7
  import importlib
7
8
 
8
- from cognee.api.client import app
9
-
10
9
 
11
10
  # Fixtures for reuse across test classes
12
11
  @pytest.fixture
@@ -32,6 +31,10 @@ def mock_authenticated_user():
32
31
  )
33
32
 
34
33
 
34
+ # To turn off authentication we need to set the environment variable before importing the module
35
+ # Also both require_authentication and backend access control must be false
36
+ os.environ["REQUIRE_AUTHENTICATION"] = "false"
37
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
35
38
  gau_mod = importlib.import_module("cognee.modules.users.methods.get_authenticated_user")
36
39
 
37
40
 
@@ -40,6 +43,8 @@ class TestConditionalAuthenticationEndpoints:
40
43
 
41
44
  @pytest.fixture
42
45
  def client(self):
46
+ from cognee.api.client import app
47
+
43
48
  """Create a test client."""
44
49
  return TestClient(app)
45
50
 
@@ -133,6 +138,8 @@ class TestConditionalAuthenticationBehavior:
133
138
 
134
139
  @pytest.fixture
135
140
  def client(self):
141
+ from cognee.api.client import app
142
+
136
143
  return TestClient(app)
137
144
 
138
145
  @pytest.mark.parametrize(
@@ -209,6 +216,8 @@ class TestConditionalAuthenticationErrorHandling:
209
216
 
210
217
  @pytest.fixture
211
218
  def client(self):
219
+ from cognee.api.client import app
220
+
212
221
  return TestClient(app)
213
222
 
214
223
  @patch.object(gau_mod, "get_default_user", new_callable=AsyncMock)
@@ -232,7 +241,7 @@ class TestConditionalAuthenticationErrorHandling:
232
241
  # The exact error message may vary depending on the actual database connection
233
242
  # The important thing is that we get a 500 error when user creation fails
234
243
 
235
- def test_current_environment_configuration(self):
244
+ def test_current_environment_configuration(self, client):
236
245
  """Test that current environment configuration is working properly."""
237
246
  # This tests the actual module state without trying to change it
238
247
  from cognee.modules.users.methods.get_authenticated_user import (