cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +8 -0
  3. cognee/api/v1/add/routers/get_add_router.py +3 -1
  4. cognee/api/v1/cognify/routers/get_cognify_router.py +28 -1
  5. cognee/api/v1/ontologies/__init__.py +4 -0
  6. cognee/api/v1/ontologies/ontologies.py +183 -0
  7. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +107 -0
  9. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  10. cognee/cli/commands/cognify_command.py +8 -1
  11. cognee/cli/config.py +1 -1
  12. cognee/context_global_variables.py +41 -9
  13. cognee/infrastructure/databases/cache/config.py +3 -1
  14. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  15. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  16. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  17. cognee/infrastructure/databases/graph/config.py +4 -0
  18. cognee/infrastructure/databases/graph/get_graph_engine.py +2 -0
  19. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  20. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +37 -3
  21. cognee/infrastructure/databases/vector/config.py +3 -0
  22. cognee/infrastructure/databases/vector/create_vector_engine.py +5 -1
  23. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +1 -4
  24. cognee/infrastructure/engine/models/Edge.py +13 -1
  25. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  26. cognee/infrastructure/llm/config.py +2 -0
  27. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -2
  28. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +7 -1
  29. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +7 -1
  30. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +8 -16
  31. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +12 -2
  32. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +13 -2
  33. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +5 -2
  34. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  35. cognee/infrastructure/loaders/core/__init__.py +2 -1
  36. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  37. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  38. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  39. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  40. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  41. cognee/modules/chunking/CsvChunker.py +35 -0
  42. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  43. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  44. cognee/modules/data/methods/__init__.py +1 -0
  45. cognee/modules/data/methods/create_dataset.py +4 -2
  46. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  47. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  48. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  49. cognee/modules/data/models/Dataset.py +2 -0
  50. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  51. cognee/modules/data/processing/document_types/__init__.py +1 -0
  52. cognee/modules/graph/cognee_graph/CogneeGraph.py +4 -2
  53. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  54. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  55. cognee/modules/ingestion/identify.py +4 -4
  56. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  57. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  58. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  59. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  60. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  61. cognee/modules/retrieval/base_retriever.py +7 -3
  62. cognee/modules/retrieval/completion_retriever.py +11 -4
  63. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +6 -2
  64. cognee/modules/retrieval/graph_completion_cot_retriever.py +14 -51
  65. cognee/modules/retrieval/graph_completion_retriever.py +4 -1
  66. cognee/modules/retrieval/temporal_retriever.py +9 -2
  67. cognee/modules/retrieval/utils/brute_force_triplet_search.py +1 -1
  68. cognee/modules/retrieval/utils/completion.py +2 -22
  69. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  70. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +69 -0
  71. cognee/modules/search/methods/search.py +5 -3
  72. cognee/modules/users/methods/create_user.py +12 -27
  73. cognee/modules/users/methods/get_authenticated_user.py +2 -1
  74. cognee/modules/users/methods/get_default_user.py +4 -2
  75. cognee/modules/users/methods/get_user.py +1 -1
  76. cognee/modules/users/methods/get_user_by_email.py +1 -1
  77. cognee/modules/users/models/DatasetDatabase.py +9 -0
  78. cognee/modules/users/models/Tenant.py +6 -7
  79. cognee/modules/users/models/User.py +6 -5
  80. cognee/modules/users/models/UserTenant.py +12 -0
  81. cognee/modules/users/models/__init__.py +1 -0
  82. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  83. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  84. cognee/modules/users/tenants/methods/__init__.py +1 -0
  85. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  86. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  87. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  88. cognee/shared/logging_utils.py +2 -0
  89. cognee/tasks/chunks/__init__.py +1 -0
  90. cognee/tasks/chunks/chunk_by_row.py +94 -0
  91. cognee/tasks/documents/classify_documents.py +2 -0
  92. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  93. cognee/tasks/ingestion/ingest_data.py +1 -1
  94. cognee/tasks/memify/__init__.py +2 -0
  95. cognee/tasks/memify/cognify_session.py +41 -0
  96. cognee/tasks/memify/extract_user_sessions.py +73 -0
  97. cognee/tasks/storage/index_data_points.py +33 -22
  98. cognee/tasks/storage/index_graph_edges.py +37 -57
  99. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  100. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  101. cognee/tests/test_add_docling_document.py +2 -2
  102. cognee/tests/test_cognee_server_start.py +84 -1
  103. cognee/tests/test_conversation_history.py +45 -4
  104. cognee/tests/test_data/example_with_header.csv +3 -0
  105. cognee/tests/test_delete_bmw_example.py +60 -0
  106. cognee/tests/test_edge_ingestion.py +27 -0
  107. cognee/tests/test_feedback_enrichment.py +1 -1
  108. cognee/tests/test_library.py +6 -4
  109. cognee/tests/test_load.py +62 -0
  110. cognee/tests/test_multi_tenancy.py +165 -0
  111. cognee/tests/test_parallel_databases.py +2 -0
  112. cognee/tests/test_relational_db_migration.py +54 -2
  113. cognee/tests/test_search_db.py +7 -1
  114. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  115. cognee/tests/unit/api/test_ontology_endpoint.py +264 -0
  116. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  117. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  118. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  119. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  120. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  121. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  122. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  123. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  124. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  125. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  126. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  127. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  128. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  129. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  130. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +88 -71
  131. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +135 -104
  132. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  133. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -1
  134. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  135. {cognee-0.4.1.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -7,6 +7,7 @@ import requests
7
7
  from pathlib import Path
8
8
  import sys
9
9
  import uuid
10
+ import json
10
11
 
11
12
 
12
13
  class TestCogneeServerStart(unittest.TestCase):
@@ -90,12 +91,71 @@ class TestCogneeServerStart(unittest.TestCase):
90
91
  )
91
92
  }
92
93
 
93
- payload = {"datasets": [dataset_name]}
94
+ ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
95
+ payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]}
94
96
 
95
97
  add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
96
98
  if add_response.status_code not in [200, 201]:
97
99
  add_response.raise_for_status()
98
100
 
101
+ ontology_content = b"""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
102
+ xmlns:owl="http://www.w3.org/2002/07/owl#"
103
+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
104
+ xmlns="http://example.org/ontology#"
105
+ xml:base="http://example.org/ontology">
106
+
107
+ <owl:Ontology rdf:about="http://example.org/ontology"/>
108
+
109
+ <!-- Classes -->
110
+ <owl:Class rdf:ID="Problem"/>
111
+ <owl:Class rdf:ID="HardwareProblem"/>
112
+ <owl:Class rdf:ID="SoftwareProblem"/>
113
+ <owl:Class rdf:ID="Concept"/>
114
+ <owl:Class rdf:ID="Object"/>
115
+ <owl:Class rdf:ID="Joke"/>
116
+ <owl:Class rdf:ID="Image"/>
117
+ <owl:Class rdf:ID="Person"/>
118
+
119
+ <rdf:Description rdf:about="#HardwareProblem">
120
+ <rdfs:subClassOf rdf:resource="#Problem"/>
121
+ <rdfs:comment>A failure caused by physical components.</rdfs:comment>
122
+ </rdf:Description>
123
+
124
+ <rdf:Description rdf:about="#SoftwareProblem">
125
+ <rdfs:subClassOf rdf:resource="#Problem"/>
126
+ <rdfs:comment>An error caused by software logic or configuration.</rdfs:comment>
127
+ </rdf:Description>
128
+
129
+ <rdf:Description rdf:about="#Person">
130
+ <rdfs:comment>A human being or individual.</rdfs:comment>
131
+ </rdf:Description>
132
+
133
+ <!-- Individuals -->
134
+ <Person rdf:ID="programmers">
135
+ <rdfs:label>Programmers</rdfs:label>
136
+ </Person>
137
+
138
+ <Object rdf:ID="light_bulb">
139
+ <rdfs:label>Light Bulb</rdfs:label>
140
+ </Object>
141
+
142
+ <HardwareProblem rdf:ID="hardware_problem">
143
+ <rdfs:label>Hardware Problem</rdfs:label>
144
+ </HardwareProblem>
145
+
146
+ </rdf:RDF>"""
147
+
148
+ ontology_response = requests.post(
149
+ "http://127.0.0.1:8000/api/v1/ontologies",
150
+ headers=headers,
151
+ files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
152
+ data={
153
+ "ontology_key": json.dumps([ontology_key]),
154
+ "description": json.dumps(["Test ontology"]),
155
+ },
156
+ )
157
+ self.assertEqual(ontology_response.status_code, 200)
158
+
99
159
  # Cognify request
100
160
  url = "http://127.0.0.1:8000/api/v1/cognify"
101
161
  headers = {
@@ -107,6 +167,29 @@ class TestCogneeServerStart(unittest.TestCase):
107
167
  if cognify_response.status_code not in [200, 201]:
108
168
  cognify_response.raise_for_status()
109
169
 
170
+ datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers)
171
+
172
+ datasets = datasets_response.json()
173
+ dataset_id = None
174
+ for dataset in datasets:
175
+ if dataset["name"] == dataset_name:
176
+ dataset_id = dataset["id"]
177
+ break
178
+
179
+ graph_response = requests.get(
180
+ f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers
181
+ )
182
+ self.assertEqual(graph_response.status_code, 200)
183
+
184
+ graph_data = graph_response.json()
185
+ ontology_nodes = [
186
+ node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid")
187
+ ]
188
+
189
+ self.assertGreater(
190
+ len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated"
191
+ )
192
+
110
193
  # TODO: Add test to verify cognify pipeline is complete before testing search
111
194
 
112
195
  # Search request
@@ -16,9 +16,11 @@ import cognee
16
16
  import pathlib
17
17
 
18
18
  from cognee.infrastructure.databases.cache import get_cache_engine
19
+ from cognee.infrastructure.databases.graph import get_graph_engine
19
20
  from cognee.modules.search.types import SearchType
20
21
  from cognee.shared.logging_utils import get_logger
21
22
  from cognee.modules.users.methods import get_default_user
23
+ from collections import Counter
22
24
 
23
25
  logger = get_logger()
24
26
 
@@ -54,10 +56,10 @@ async def main():
54
56
  """DataCo is a data analytics company. They help businesses make sense of their data."""
55
57
  )
56
58
 
57
- await cognee.add(text_1, dataset_name)
58
- await cognee.add(text_2, dataset_name)
59
+ await cognee.add(data=text_1, dataset_name=dataset_name)
60
+ await cognee.add(data=text_2, dataset_name=dataset_name)
59
61
 
60
- await cognee.cognify([dataset_name])
62
+ await cognee.cognify(datasets=[dataset_name])
61
63
 
62
64
  user = await get_default_user()
63
65
 
@@ -188,7 +190,6 @@ async def main():
188
190
  f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
189
191
  )
190
192
 
191
- # Verify saved
192
193
  history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
193
194
  our_qa_summary = [
194
195
  h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
@@ -228,6 +229,46 @@ async def main():
228
229
  assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
229
230
  assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
230
231
 
232
+ from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
233
+ persist_sessions_in_knowledge_graph_pipeline,
234
+ )
235
+
236
+ logger.info("Starting persist_sessions_in_knowledge_graph tests")
237
+
238
+ await persist_sessions_in_knowledge_graph_pipeline(
239
+ user=user,
240
+ session_ids=[session_id_1, session_id_2],
241
+ dataset=dataset_name,
242
+ run_in_background=False,
243
+ )
244
+
245
+ graph_engine = await get_graph_engine()
246
+ graph = await graph_engine.get_graph_data()
247
+
248
+ type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
249
+
250
+ "Tests the correct number of NodeSet nodes after session persistence"
251
+ assert type_counts.get("NodeSet", 0) == 1, (
252
+ f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1."
253
+ )
254
+
255
+ "Tests the correct number of DocumentChunk nodes after session persistence"
256
+ assert type_counts.get("DocumentChunk", 0) == 4, (
257
+ f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)."
258
+ )
259
+
260
+ from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
261
+
262
+ vector_engine = get_vector_engine()
263
+ collection_size = await vector_engine.search(
264
+ collection_name="DocumentChunk_text",
265
+ query_text="test",
266
+ limit=1000,
267
+ )
268
+ assert len(collection_size) == 4, (
269
+ f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}"
270
+ )
271
+
231
272
  await cognee.prune.prune_data()
232
273
  await cognee.prune.prune_system(metadata=True)
233
274
 
@@ -0,0 +1,3 @@
1
+ id,name,age,city,country
2
+ 1,Eric,30,Beijing,China
3
+ 2,Joe,35,Berlin,Germany
@@ -0,0 +1,60 @@
1
+ import os
2
+ import pathlib
3
+ from uuid import UUID
4
+
5
+ import cognee
6
+
7
+ from cognee.api.v1.datasets import datasets
8
+ from cognee.api.v1.visualize.visualize import visualize_graph
9
+ from cognee.context_global_variables import set_database_global_context_variables
10
+ from cognee.modules.engine.operations.setup import setup
11
+ from cognee.modules.users.methods import get_default_user
12
+
13
+ # from cognee.modules.engine.operations.setup import setup
14
+ from cognee.shared.logging_utils import get_logger
15
+
16
+ logger = get_logger()
17
+
18
+
19
+ async def main():
20
+ data_directory_path = os.path.join(
21
+ pathlib.Path(__file__).parent, ".data_storage/test_delete_bmw_example"
22
+ )
23
+ cognee.config.data_root_directory(data_directory_path)
24
+
25
+ cognee_directory_path = os.path.join(
26
+ pathlib.Path(__file__).parent, ".cognee_system/test_delete_bmw_example"
27
+ )
28
+ cognee.config.system_root_directory(cognee_directory_path)
29
+
30
+ # await cognee.prune.prune_data()
31
+ # await cognee.prune.prune_system(metadata=True)
32
+ # await setup()
33
+
34
+ # add_result = await cognee.add("Bmw is a german carmanufacturer")
35
+ # add_result = await cognee.add("Germany is located next to the netherlands")
36
+ # data_id = add_result.data_ingestion_info[0]["data_id"]
37
+
38
+ # cognify_result: dict = await cognee.cognify()
39
+ # dataset_id = list(cognify_result.keys())[0]
40
+
41
+ user = await get_default_user()
42
+ await set_database_global_context_variables("main_dataset", user.id)
43
+
44
+ graph_file_path = os.path.join(data_directory_path, "artifacts/graph-before.html")
45
+ await visualize_graph(graph_file_path)
46
+
47
+ await datasets.delete_data(
48
+ UUID("b52be2e1-9fdb-5be0-a317-d3a56e9a34c6"),
49
+ UUID("fdae2cbd-61e1-5e99-93ca-4f3e32ed0d02"),
50
+ user,
51
+ )
52
+
53
+ graph_file_path = os.path.join(data_directory_path, "artifacts/graph-after.html")
54
+ await visualize_graph(graph_file_path)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ import asyncio
59
+
60
+ asyncio.run(main())
@@ -52,6 +52,33 @@ async def test_edge_ingestion():
52
52
 
53
53
  edge_type_counts = Counter(edge_type[2] for edge_type in graph[1])
54
54
 
55
+ "Tests edge_text presence and format"
56
+ contains_edges = [edge for edge in graph[1] if edge[2] == "contains"]
57
+ assert len(contains_edges) > 0, "Expected at least one contains edge for edge_text verification"
58
+
59
+ edge_properties = contains_edges[0][3]
60
+ assert "edge_text" in edge_properties, "Expected edge_text in edge properties"
61
+
62
+ edge_text = edge_properties["edge_text"]
63
+ assert "relationship_name: contains" in edge_text, (
64
+ f"Expected 'relationship_name: contains' in edge_text, got: {edge_text}"
65
+ )
66
+ assert "entity_name:" in edge_text, f"Expected 'entity_name:' in edge_text, got: {edge_text}"
67
+ assert "entity_description:" in edge_text, (
68
+ f"Expected 'entity_description:' in edge_text, got: {edge_text}"
69
+ )
70
+
71
+ all_edge_texts = [
72
+ edge[3].get("edge_text", "") for edge in contains_edges if "edge_text" in edge[3]
73
+ ]
74
+ expected_entities = ["dave", "ana", "bob", "dexter", "apples", "cognee"]
75
+ found_entity = any(
76
+ any(entity in text.lower() for entity in expected_entities) for text in all_edge_texts
77
+ )
78
+ assert found_entity, (
79
+ f"Expected to find at least one entity name in edge_text: {all_edge_texts[:3]}"
80
+ )
81
+
55
82
  "Tests the presence of basic nested edges"
56
83
  for basic_nested_edge in basic_nested_edges:
57
84
  assert edge_type_counts.get(basic_nested_edge, 0) >= 1, (
@@ -133,7 +133,7 @@ async def main():
133
133
  extraction_tasks=extraction_tasks,
134
134
  enrichment_tasks=enrichment_tasks,
135
135
  data=[{}],
136
- dataset="feedback_enrichment_test_memify",
136
+ dataset=dataset_name,
137
137
  )
138
138
 
139
139
  nodes_after, edges_after = await graph_engine.get_graph_data()
@@ -90,15 +90,17 @@ async def main():
90
90
  )
91
91
 
92
92
  search_results = await cognee.search(
93
- query_type=SearchType.GRAPH_COMPLETION, query_text="What information do you contain?"
93
+ query_type=SearchType.GRAPH_COMPLETION,
94
+ query_text="What information do you contain?",
95
+ dataset_ids=[pipeline_run_obj.dataset_id],
94
96
  )
95
- assert "Mark" in search_results[0], (
97
+ assert "Mark" in search_results[0]["search_result"][0], (
96
98
  "Failed to update document, no mention of Mark in search results"
97
99
  )
98
- assert "Cindy" in search_results[0], (
100
+ assert "Cindy" in search_results[0]["search_result"][0], (
99
101
  "Failed to update document, no mention of Cindy in search results"
100
102
  )
101
- assert "Artificial intelligence" not in search_results[0], (
103
+ assert "Artificial intelligence" not in search_results[0]["search_result"][0], (
102
104
  "Failed to update document, Artificial intelligence still mentioned in search results"
103
105
  )
104
106
 
@@ -0,0 +1,62 @@
1
+ import os
2
+ import pathlib
3
+ import asyncio
4
+ import time
5
+
6
+ import cognee
7
+ from cognee.modules.search.types import SearchType
8
+ from cognee.shared.logging_utils import get_logger
9
+
10
+ logger = get_logger()
11
+
12
+
13
+ async def process_and_search(num_of_searches):
14
+ start_time = time.time()
15
+
16
+ await cognee.cognify()
17
+
18
+ await asyncio.gather(
19
+ *[
20
+ cognee.search(
21
+ query_text="Tell me about the document", query_type=SearchType.GRAPH_COMPLETION
22
+ )
23
+ for _ in range(num_of_searches)
24
+ ]
25
+ )
26
+
27
+ end_time = time.time()
28
+
29
+ return end_time - start_time
30
+
31
+
32
+ async def main():
33
+ data_directory_path = os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_load")
34
+ cognee.config.data_root_directory(data_directory_path)
35
+
36
+ cognee_directory_path = os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_load")
37
+ cognee.config.system_root_directory(cognee_directory_path)
38
+
39
+ num_of_pdfs = 10
40
+ num_of_reps = 5
41
+ upper_boundary_minutes = 10
42
+ average_minutes = 8
43
+
44
+ recorded_times = []
45
+ for _ in range(num_of_reps):
46
+ await cognee.prune.prune_data()
47
+ await cognee.prune.prune_system(metadata=True)
48
+
49
+ s3_input = "s3://cognee-test-load-s3-bucket"
50
+ await cognee.add(s3_input)
51
+
52
+ recorded_times.append(await process_and_search(num_of_pdfs))
53
+
54
+ average_recorded_time = sum(recorded_times) / len(recorded_times)
55
+
56
+ assert average_recorded_time <= average_minutes * 60
57
+
58
+ assert all(rec_time <= upper_boundary_minutes * 60 for rec_time in recorded_times)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ asyncio.run(main())
@@ -0,0 +1,165 @@
1
+ import cognee
2
+ import pytest
3
+
4
+ from cognee.modules.users.exceptions import PermissionDeniedError
5
+ from cognee.modules.users.tenants.methods import select_tenant
6
+ from cognee.modules.users.methods import get_user
7
+ from cognee.shared.logging_utils import get_logger
8
+ from cognee.modules.search.types import SearchType
9
+ from cognee.modules.users.methods import create_user
10
+ from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
11
+ from cognee.modules.users.roles.methods import add_user_to_role
12
+ from cognee.modules.users.roles.methods import create_role
13
+ from cognee.modules.users.tenants.methods import create_tenant
14
+ from cognee.modules.users.tenants.methods import add_user_to_tenant
15
+ from cognee.modules.engine.operations.setup import setup
16
+ from cognee.shared.logging_utils import setup_logging, CRITICAL
17
+
18
+ logger = get_logger()
19
+
20
+
21
+ async def main():
22
+ # Create a clean slate for cognee -- reset data and system state
23
+ print("Resetting cognee data...")
24
+ await cognee.prune.prune_data()
25
+ await cognee.prune.prune_system(metadata=True)
26
+ print("Data reset complete.\n")
27
+
28
+ # Set up the necessary databases and tables for user management.
29
+ await setup()
30
+
31
+ # Add document for user_1, add it under dataset name AI
32
+ text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
33
+ At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages
34
+ this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the
35
+ preparation and manipulation of quantum state"""
36
+
37
+ print("Creating user_1: user_1@example.com")
38
+ user_1 = await create_user("user_1@example.com", "example")
39
+ await cognee.add([text], dataset_name="AI", user=user_1)
40
+
41
+ print("\nCreating user_2: user_2@example.com")
42
+ user_2 = await create_user("user_2@example.com", "example")
43
+
44
+ # Run cognify for both datasets as the appropriate user/owner
45
+ print("\nCreating different datasets for user_1 (AI dataset) and user_2 (QUANTUM dataset)")
46
+ ai_cognify_result = await cognee.cognify(["AI"], user=user_1)
47
+
48
+ # Extract dataset_ids from cognify results
49
+ def extract_dataset_id_from_cognify(cognify_result):
50
+ """Extract dataset_id from cognify output dictionary"""
51
+ for dataset_id, pipeline_result in cognify_result.items():
52
+ return dataset_id # Return the first dataset_id
53
+ return None
54
+
55
+ # Get dataset IDs from cognify results
56
+ # Note: When we want to work with datasets from other users (search, add, cognify and etc.) we must supply dataset
57
+ # information through dataset_id using dataset name only looks for datasets owned by current user
58
+ ai_dataset_id = extract_dataset_id_from_cognify(ai_cognify_result)
59
+
60
+ # We can see here that user_1 can read his own dataset (AI dataset)
61
+ search_results = await cognee.search(
62
+ query_type=SearchType.GRAPH_COMPLETION,
63
+ query_text="What is in the document?",
64
+ user=user_1,
65
+ datasets=[ai_dataset_id],
66
+ )
67
+
68
+ # Verify that user_2 cannot access user_1's dataset without permission
69
+ with pytest.raises(PermissionDeniedError):
70
+ search_results = await cognee.search(
71
+ query_type=SearchType.GRAPH_COMPLETION,
72
+ query_text="What is in the document?",
73
+ user=user_2,
74
+ datasets=[ai_dataset_id],
75
+ )
76
+
77
+ # Create new tenant and role, add user_2 to tenant and role
78
+ tenant_id = await create_tenant("CogneeLab", user_1.id)
79
+ await select_tenant(user_id=user_1.id, tenant_id=tenant_id)
80
+ role_id = await create_role(role_name="Researcher", owner_id=user_1.id)
81
+ await add_user_to_tenant(
82
+ user_id=user_2.id, tenant_id=tenant_id, owner_id=user_1.id, set_as_active_tenant=True
83
+ )
84
+ await add_user_to_role(user_id=user_2.id, role_id=role_id, owner_id=user_1.id)
85
+
86
+ # Assert that user_1 cannot give permissions on his dataset to role before switching to the correct tenant
87
+ # AI dataset was made with default tenant and not CogneeLab tenant
88
+ with pytest.raises(PermissionDeniedError):
89
+ await authorized_give_permission_on_datasets(
90
+ role_id,
91
+ [ai_dataset_id],
92
+ "read",
93
+ user_1.id,
94
+ )
95
+
96
+ # We need to refresh the user object with changes made when switching tenants
97
+ user_1 = await get_user(user_1.id)
98
+ await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1)
99
+ ai_cognee_lab_cognify_result = await cognee.cognify(["AI_COGNEE_LAB"], user=user_1)
100
+
101
+ ai_cognee_lab_dataset_id = extract_dataset_id_from_cognify(ai_cognee_lab_cognify_result)
102
+
103
+ await authorized_give_permission_on_datasets(
104
+ role_id,
105
+ [ai_cognee_lab_dataset_id],
106
+ "read",
107
+ user_1.id,
108
+ )
109
+
110
+ search_results = await cognee.search(
111
+ query_type=SearchType.GRAPH_COMPLETION,
112
+ query_text="What is in the document?",
113
+ user=user_2,
114
+ dataset_ids=[ai_cognee_lab_dataset_id],
115
+ )
116
+ for result in search_results:
117
+ print(f"{result}\n")
118
+
119
+ # Let's test changing tenants
120
+ tenant_id = await create_tenant("CogneeLab2", user_1.id)
121
+ await select_tenant(user_id=user_1.id, tenant_id=tenant_id)
122
+
123
+ user_1 = await get_user(user_1.id)
124
+ await cognee.add([text], dataset_name="AI_COGNEE_LAB", user=user_1)
125
+ await cognee.cognify(["AI_COGNEE_LAB"], user=user_1)
126
+
127
+ search_results = await cognee.search(
128
+ query_type=SearchType.GRAPH_COMPLETION,
129
+ query_text="What is in the document?",
130
+ user=user_1,
131
+ )
132
+
133
+ # Assert only AI_COGNEE_LAB dataset from CogneeLab2 tenant is visible as the currently selected tenant
134
+ assert len(search_results) == 1, (
135
+ f"Search results must only contain one dataset from current tenant: {search_results}"
136
+ )
137
+ assert search_results[0]["dataset_name"] == "AI_COGNEE_LAB", (
138
+ f"Dict must contain dataset name 'AI_COGNEE_LAB': {search_results[0]}"
139
+ )
140
+ assert search_results[0]["dataset_tenant_id"] == user_1.tenant_id, (
141
+ f"Dataset tenant_id must be same as user_1 tenant_id: {search_results[0]}"
142
+ )
143
+
144
+ # Switch back to no tenant (default tenant)
145
+ await select_tenant(user_id=user_1.id, tenant_id=None)
146
+ # Refresh user_1 object
147
+ user_1 = await get_user(user_1.id)
148
+ search_results = await cognee.search(
149
+ query_type=SearchType.GRAPH_COMPLETION,
150
+ query_text="What is in the document?",
151
+ user=user_1,
152
+ )
153
+ assert len(search_results) == 1, (
154
+ f"Search results must only contain one dataset from default tenant: {search_results}"
155
+ )
156
+ assert search_results[0]["dataset_name"] == "AI", (
157
+ f"Dict must contain dataset name 'AI': {search_results[0]}"
158
+ )
159
+
160
+
161
+ if __name__ == "__main__":
162
+ import asyncio
163
+
164
+ logger = setup_logging(log_level=CRITICAL)
165
+ asyncio.run(main())
@@ -33,11 +33,13 @@ async def main():
33
33
  "vector_db_url": "cognee1.test",
34
34
  "vector_db_key": "",
35
35
  "vector_db_provider": "lancedb",
36
+ "vector_db_name": "",
36
37
  }
37
38
  task_2_config = {
38
39
  "vector_db_url": "cognee2.test",
39
40
  "vector_db_key": "",
40
41
  "vector_db_provider": "lancedb",
42
+ "vector_db_name": "",
41
43
  }
42
44
 
43
45
  task_1_graph_config = {
@@ -1,6 +1,5 @@
1
1
  import pathlib
2
2
  import os
3
- from typing import List
4
3
  from cognee.infrastructure.databases.graph import get_graph_engine
5
4
  from cognee.infrastructure.databases.relational import (
6
5
  get_migration_relational_engine,
@@ -10,7 +9,7 @@ from cognee.infrastructure.databases.vector.pgvector import (
10
9
  create_db_and_tables as create_pgvector_db_and_tables,
11
10
  )
12
11
  from cognee.tasks.ingestion import migrate_relational_database
13
- from cognee.modules.search.types import SearchResult, SearchType
12
+ from cognee.modules.search.types import SearchType
14
13
  import cognee
15
14
 
16
15
 
@@ -27,6 +26,9 @@ def normalize_node_name(node_name: str) -> str:
27
26
 
28
27
 
29
28
  async def setup_test_db():
29
+ # Disable backend access control to migrate relational data
30
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "false"
31
+
30
32
  await cognee.prune.prune_data()
31
33
  await cognee.prune.prune_system(metadata=True)
32
34
 
@@ -271,6 +273,55 @@ async def test_schema_only_migration():
271
273
  print(f"Edge counts: {edge_counts}")
272
274
 
273
275
 
276
+ async def test_search_result_quality():
277
+ from cognee.infrastructure.databases.relational import (
278
+ get_migration_relational_engine,
279
+ )
280
+
281
+ # Get relational database with original data
282
+ migration_engine = get_migration_relational_engine()
283
+ from sqlalchemy import text
284
+
285
+ async with migration_engine.engine.connect() as conn:
286
+ result = await conn.execute(
287
+ text("""
288
+ SELECT
289
+ c.CustomerId,
290
+ c.FirstName,
291
+ c.LastName,
292
+ GROUP_CONCAT(i.InvoiceId, ',') AS invoice_ids
293
+ FROM Customer AS c
294
+ LEFT JOIN Invoice AS i ON c.CustomerId = i.CustomerId
295
+ GROUP BY c.CustomerId, c.FirstName, c.LastName
296
+ """)
297
+ )
298
+
299
+ for row in result:
300
+ # Get expected invoice IDs from relational DB for each Customer
301
+ customer_id = row.CustomerId
302
+ invoice_ids = row.invoice_ids.split(",") if row.invoice_ids else []
303
+ print(f"Relational DB Customer {customer_id}: {invoice_ids}")
304
+
305
+ # Use Cognee search to get invoice IDs for the same Customer but by providing Customer name
306
+ search_results = await cognee.search(
307
+ query_type=SearchType.GRAPH_COMPLETION,
308
+ query_text=f"List me all the invoices of Customer:{row.FirstName} {row.LastName}.",
309
+ top_k=50,
310
+ system_prompt="Just return me the invoiceID as a number without any text. This is an example output: ['1', '2', '3']. Where 1, 2, 3 are invoiceIDs of an invoice",
311
+ )
312
+ print(f"Cognee search result: {search_results}")
313
+
314
+ import ast
315
+
316
+ lst = ast.literal_eval(search_results[0]) # converts string -> Python list
317
+ # Transfrom both lists to int for comparison, sorting and type consistency
318
+ lst = sorted([int(x) for x in lst])
319
+ invoice_ids = sorted([int(x) for x in invoice_ids])
320
+ assert lst == invoice_ids, (
321
+ f"Search results {lst} do not match expected invoice IDs {invoice_ids} for Customer:{customer_id}"
322
+ )
323
+
324
+
274
325
  async def test_migration_sqlite():
275
326
  database_to_migrate_path = os.path.join(pathlib.Path(__file__).parent, "test_data/")
276
327
 
@@ -283,6 +334,7 @@ async def test_migration_sqlite():
283
334
  )
284
335
 
285
336
  await relational_db_migration()
337
+ await test_search_result_quality()
286
338
  await test_schema_only_migration()
287
339
 
288
340
 
@@ -146,7 +146,13 @@ async def main():
146
146
  assert len(search_results) == 1, (
147
147
  f"{name}: expected single-element list, got {len(search_results)}"
148
148
  )
149
- text = search_results[0]
149
+
150
+ from cognee.context_global_variables import backend_access_control_enabled
151
+
152
+ if backend_access_control_enabled():
153
+ text = search_results[0]["search_result"][0]
154
+ else:
155
+ text = search_results[0]
150
156
  assert isinstance(text, str), f"{name}: element should be a string"
151
157
  assert text.strip(), f"{name}: string should not be empty"
152
158
  assert "netherlands" in text.lower(), (