cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +1 -1
  166. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +1 -1
  167. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +13 -27
  168. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  169. cognee/tests/test_add_docling_document.py +2 -2
  170. cognee/tests/test_cognee_server_start.py +84 -3
  171. cognee/tests/test_conversation_history.py +68 -5
  172. cognee/tests/test_data/example_with_header.csv +3 -0
  173. cognee/tests/test_dataset_database_handler.py +137 -0
  174. cognee/tests/test_dataset_delete.py +76 -0
  175. cognee/tests/test_edge_centered_payload.py +170 -0
  176. cognee/tests/test_edge_ingestion.py +27 -0
  177. cognee/tests/test_feedback_enrichment.py +1 -1
  178. cognee/tests/test_library.py +6 -4
  179. cognee/tests/test_load.py +62 -0
  180. cognee/tests/test_multi_tenancy.py +165 -0
  181. cognee/tests/test_parallel_databases.py +2 -0
  182. cognee/tests/test_pipeline_cache.py +164 -0
  183. cognee/tests/test_relational_db_migration.py +54 -2
  184. cognee/tests/test_search_db.py +44 -2
  185. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  186. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  187. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  188. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  189. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  190. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  191. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  192. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  193. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  194. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  195. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  196. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  197. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  198. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  199. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  200. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  201. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  202. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  203. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  204. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  205. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  206. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  207. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  208. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  209. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  210. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -6
  211. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/RECORD +215 -163
  212. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/WHEEL +1 -1
  213. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  214. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  215. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  216. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  217. cognee/modules/retrieval/code_retriever.py +0 -232
  218. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  219. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  220. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  221. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  222. cognee/tasks/repo_processor/__init__.py +0 -2
  223. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  224. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  225. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  226. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  227. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -14,9 +14,7 @@ async def test_url_saves_as_html_file():
14
14
  await cognee.prune.prune_system(metadata=True)
15
15
 
16
16
  try:
17
- original_file_path = await save_data_item_to_storage(
18
- "https://en.wikipedia.org/wiki/Large_language_model"
19
- )
17
+ original_file_path = await save_data_item_to_storage("http://example.com/")
20
18
  file_path = get_data_file_path(original_file_path)
21
19
  assert file_path.endswith(".html")
22
20
  file = Path(file_path)
@@ -44,9 +42,7 @@ async def test_saved_html_is_valid():
44
42
  await cognee.prune.prune_system(metadata=True)
45
43
 
46
44
  try:
47
- original_file_path = await save_data_item_to_storage(
48
- "https://en.wikipedia.org/wiki/Large_language_model"
49
- )
45
+ original_file_path = await save_data_item_to_storage("http://example.com/")
50
46
  file_path = get_data_file_path(original_file_path)
51
47
  content = Path(file_path).read_text()
52
48
 
@@ -72,7 +68,7 @@ async def test_add_url():
72
68
  await cognee.prune.prune_data()
73
69
  await cognee.prune.prune_system(metadata=True)
74
70
 
75
- await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
71
+ await cognee.add("http://example.com/")
76
72
 
77
73
 
78
74
  skip_in_ci = pytest.mark.skipif(
@@ -88,7 +84,7 @@ async def test_add_url_with_tavily():
88
84
  await cognee.prune.prune_data()
89
85
  await cognee.prune.prune_system(metadata=True)
90
86
 
91
- await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
87
+ await cognee.add("http://example.com/")
92
88
 
93
89
 
94
90
  @pytest.mark.asyncio
@@ -98,7 +94,7 @@ async def test_add_url_without_incremental_loading():
98
94
 
99
95
  try:
100
96
  await cognee.add(
101
- "https://en.wikipedia.org/wiki/Large_language_model",
97
+ "http://example.com/",
102
98
  incremental_loading=False,
103
99
  )
104
100
  except Exception as e:
@@ -112,7 +108,7 @@ async def test_add_url_with_incremental_loading():
112
108
 
113
109
  try:
114
110
  await cognee.add(
115
- "https://en.wikipedia.org/wiki/Large_language_model",
111
+ "http://example.com/",
116
112
  incremental_loading=True,
117
113
  )
118
114
  except Exception as e:
@@ -125,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str():
125
121
  await cognee.prune.prune_system(metadata=True)
126
122
 
127
123
  await cognee.add(
128
- "https://en.wikipedia.org/wiki/Large_language_model",
124
+ "http://example.com/",
129
125
  preferred_loaders=["beautiful_soup_loader"],
130
126
  )
131
127
 
@@ -144,7 +140,7 @@ async def test_add_url_with_extraction_rules():
144
140
 
145
141
  try:
146
142
  await cognee.add(
147
- "https://en.wikipedia.org/wiki/Large_language_model",
143
+ "http://example.com/",
148
144
  preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}},
149
145
  )
150
146
  except Exception as e:
@@ -163,9 +159,7 @@ async def test_loader_is_none_by_default():
163
159
  }
164
160
 
165
161
  try:
166
- original_file_path = await save_data_item_to_storage(
167
- "https://en.wikipedia.org/wiki/Large_language_model"
168
- )
162
+ original_file_path = await save_data_item_to_storage("http://example.com/")
169
163
  file_path = get_data_file_path(original_file_path)
170
164
  assert file_path.endswith(".html")
171
165
  file = Path(file_path)
@@ -196,9 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov
196
190
  }
197
191
 
198
192
  try:
199
- original_file_path = await save_data_item_to_storage(
200
- "https://en.wikipedia.org/wiki/Large_language_model"
201
- )
193
+ original_file_path = await save_data_item_to_storage("http://example.com/")
202
194
  file_path = get_data_file_path(original_file_path)
203
195
  assert file_path.endswith(".html")
204
196
  file = Path(file_path)
@@ -225,9 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments():
225
217
  await cognee.prune.prune_system(metadata=True)
226
218
 
227
219
  try:
228
- original_file_path = await save_data_item_to_storage(
229
- "https://en.wikipedia.org/wiki/Large_language_model"
230
- )
220
+ original_file_path = await save_data_item_to_storage("http://example.com/")
231
221
  file_path = get_data_file_path(original_file_path)
232
222
  assert file_path.endswith(".html")
233
223
  file = Path(file_path)
@@ -263,9 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr
263
253
  await cognee.prune.prune_system(metadata=True)
264
254
 
265
255
  try:
266
- original_file_path = await save_data_item_to_storage(
267
- "https://en.wikipedia.org/wiki/Large_language_model"
268
- )
256
+ original_file_path = await save_data_item_to_storage("http://example.com/")
269
257
  file_path = get_data_file_path(original_file_path)
270
258
  assert file_path.endswith(".html")
271
259
  file = Path(file_path)
@@ -302,9 +290,7 @@ async def test_beautiful_soup_loads_file_successfully():
302
290
  }
303
291
 
304
292
  try:
305
- original_file_path = await save_data_item_to_storage(
306
- "https://en.wikipedia.org/wiki/Large_language_model"
307
- )
293
+ original_file_path = await save_data_item_to_storage("http://example.com/")
308
294
  file_path = get_data_file_path(original_file_path)
309
295
  assert file_path.endswith(".html")
310
296
  original_file = Path(file_path)
@@ -55,7 +55,7 @@ async def main():
55
55
  classified_data = ingestion.classify(file)
56
56
 
57
57
  # data_id is the hash of original file contents + owner id to avoid duplicate data
58
- data_id = ingestion.identify(classified_data, await get_default_user())
58
+ data_id = await ingestion.identify(classified_data, await get_default_user())
59
59
 
60
60
  await cognee.add(file_path)
61
61
 
@@ -39,12 +39,12 @@ async def main():
39
39
 
40
40
  answer = await cognee.search("Do programmers change light bulbs?")
41
41
  assert len(answer) != 0
42
- lowercase_answer = answer[0].lower()
42
+ lowercase_answer = answer[0]["search_result"][0].lower()
43
43
  assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
44
44
 
45
45
  answer = await cognee.search("What colours are there in the presentation table?")
46
46
  assert len(answer) != 0
47
- lowercase_answer = answer[0].lower()
47
+ lowercase_answer = answer[0]["search_result"][0].lower()
48
48
  assert (
49
49
  ("red" in lowercase_answer)
50
50
  and ("blue" in lowercase_answer)
@@ -7,6 +7,7 @@ import requests
7
7
  from pathlib import Path
8
8
  import sys
9
9
  import uuid
10
+ import json
10
11
 
11
12
 
12
13
  class TestCogneeServerStart(unittest.TestCase):
@@ -24,8 +25,6 @@ class TestCogneeServerStart(unittest.TestCase):
24
25
  "--port",
25
26
  "8000",
26
27
  ],
27
- stdout=subprocess.PIPE,
28
- stderr=subprocess.PIPE,
29
28
  preexec_fn=os.setsid,
30
29
  )
31
30
  # Give the server some time to start
@@ -90,12 +89,71 @@ class TestCogneeServerStart(unittest.TestCase):
90
89
  )
91
90
  }
92
91
 
93
- payload = {"datasets": [dataset_name]}
92
+ ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
93
+ payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]}
94
94
 
95
95
  add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
96
96
  if add_response.status_code not in [200, 201]:
97
97
  add_response.raise_for_status()
98
98
 
99
+ ontology_content = b"""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
100
+ xmlns:owl="http://www.w3.org/2002/07/owl#"
101
+ xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
102
+ xmlns="http://example.org/ontology#"
103
+ xml:base="http://example.org/ontology">
104
+
105
+ <owl:Ontology rdf:about="http://example.org/ontology"/>
106
+
107
+ <!-- Classes -->
108
+ <owl:Class rdf:ID="Problem"/>
109
+ <owl:Class rdf:ID="HardwareProblem"/>
110
+ <owl:Class rdf:ID="SoftwareProblem"/>
111
+ <owl:Class rdf:ID="Concept"/>
112
+ <owl:Class rdf:ID="Object"/>
113
+ <owl:Class rdf:ID="Joke"/>
114
+ <owl:Class rdf:ID="Image"/>
115
+ <owl:Class rdf:ID="Person"/>
116
+
117
+ <rdf:Description rdf:about="#HardwareProblem">
118
+ <rdfs:subClassOf rdf:resource="#Problem"/>
119
+ <rdfs:comment>A failure caused by physical components.</rdfs:comment>
120
+ </rdf:Description>
121
+
122
+ <rdf:Description rdf:about="#SoftwareProblem">
123
+ <rdfs:subClassOf rdf:resource="#Problem"/>
124
+ <rdfs:comment>An error caused by software logic or configuration.</rdfs:comment>
125
+ </rdf:Description>
126
+
127
+ <rdf:Description rdf:about="#Person">
128
+ <rdfs:comment>A human being or individual.</rdfs:comment>
129
+ </rdf:Description>
130
+
131
+ <!-- Individuals -->
132
+ <Person rdf:ID="programmers">
133
+ <rdfs:label>Programmers</rdfs:label>
134
+ </Person>
135
+
136
+ <Object rdf:ID="light_bulb">
137
+ <rdfs:label>Light Bulb</rdfs:label>
138
+ </Object>
139
+
140
+ <HardwareProblem rdf:ID="hardware_problem">
141
+ <rdfs:label>Hardware Problem</rdfs:label>
142
+ </HardwareProblem>
143
+
144
+ </rdf:RDF>"""
145
+
146
+ ontology_response = requests.post(
147
+ "http://127.0.0.1:8000/api/v1/ontologies",
148
+ headers=headers,
149
+ files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
150
+ data={
151
+ "ontology_key": ontology_key,
152
+ "description": "Test ontology",
153
+ },
154
+ )
155
+ self.assertEqual(ontology_response.status_code, 200)
156
+
99
157
  # Cognify request
100
158
  url = "http://127.0.0.1:8000/api/v1/cognify"
101
159
  headers = {
@@ -107,6 +165,29 @@ class TestCogneeServerStart(unittest.TestCase):
107
165
  if cognify_response.status_code not in [200, 201]:
108
166
  cognify_response.raise_for_status()
109
167
 
168
+ datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers)
169
+
170
+ datasets = datasets_response.json()
171
+ dataset_id = None
172
+ for dataset in datasets:
173
+ if dataset["name"] == dataset_name:
174
+ dataset_id = dataset["id"]
175
+ break
176
+
177
+ graph_response = requests.get(
178
+ f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers
179
+ )
180
+ self.assertEqual(graph_response.status_code, 200)
181
+
182
+ graph_data = graph_response.json()
183
+ ontology_nodes = [
184
+ node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid")
185
+ ]
186
+
187
+ self.assertGreater(
188
+ len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated"
189
+ )
190
+
110
191
  # TODO: Add test to verify cognify pipeline is complete before testing search
111
192
 
112
193
  # Search request
@@ -8,17 +8,19 @@ Tests all retrievers that save conversation history to Redis cache:
8
8
  4. GRAPH_COMPLETION_CONTEXT_EXTENSION
9
9
  5. GRAPH_SUMMARY_COMPLETION
10
10
  6. TEMPORAL
11
+ 7. TRIPLET_COMPLETION
11
12
  """
12
13
 
13
14
  import os
14
- import shutil
15
15
  import cognee
16
16
  import pathlib
17
17
 
18
18
  from cognee.infrastructure.databases.cache import get_cache_engine
19
+ from cognee.infrastructure.databases.graph import get_graph_engine
19
20
  from cognee.modules.search.types import SearchType
20
21
  from cognee.shared.logging_utils import get_logger
21
22
  from cognee.modules.users.methods import get_default_user
23
+ from collections import Counter
22
24
 
23
25
  logger = get_logger()
24
26
 
@@ -54,13 +56,17 @@ async def main():
54
56
  """DataCo is a data analytics company. They help businesses make sense of their data."""
55
57
  )
56
58
 
57
- await cognee.add(text_1, dataset_name)
58
- await cognee.add(text_2, dataset_name)
59
+ await cognee.add(data=text_1, dataset_name=dataset_name)
60
+ await cognee.add(data=text_2, dataset_name=dataset_name)
59
61
 
60
- await cognee.cognify([dataset_name])
62
+ await cognee.cognify(datasets=[dataset_name])
61
63
 
62
64
  user = await get_default_user()
63
65
 
66
+ from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
67
+
68
+ await create_triplet_embeddings(user=user, dataset=dataset_name)
69
+
64
70
  cache_engine = get_cache_engine()
65
71
  assert cache_engine is not None, "Cache engine should be available for testing"
66
72
 
@@ -188,7 +194,6 @@ async def main():
188
194
  f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
189
195
  )
190
196
 
191
- # Verify saved
192
197
  history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
193
198
  our_qa_summary = [
194
199
  h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
@@ -215,6 +220,24 @@ async def main():
215
220
  ]
216
221
  assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
217
222
 
223
+ session_id_triplet = "test_session_triplet"
224
+
225
+ result_triplet = await cognee.search(
226
+ query_type=SearchType.TRIPLET_COMPLETION,
227
+ query_text="What companies are mentioned?",
228
+ session_id=session_id_triplet,
229
+ )
230
+
231
+ assert isinstance(result_triplet, list) and len(result_triplet) > 0, (
232
+ f"TRIPLET_COMPLETION should return non-empty list, got: {result_triplet!r}"
233
+ )
234
+
235
+ history_triplet = await cache_engine.get_latest_qa(str(user.id), session_id_triplet, last_n=10)
236
+ our_qa_triplet = [
237
+ h for h in history_triplet if h["question"] == "What companies are mentioned?"
238
+ ]
239
+ assert len(our_qa_triplet) == 1, "Should find Triplet question in history"
240
+
218
241
  from cognee.modules.retrieval.utils.session_cache import (
219
242
  get_conversation_history,
220
243
  )
@@ -228,6 +251,46 @@ async def main():
228
251
  assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
229
252
  assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
230
253
 
254
+ from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
255
+ persist_sessions_in_knowledge_graph_pipeline,
256
+ )
257
+
258
+ logger.info("Starting persist_sessions_in_knowledge_graph tests")
259
+
260
+ await persist_sessions_in_knowledge_graph_pipeline(
261
+ user=user,
262
+ session_ids=[session_id_1, session_id_2],
263
+ dataset=dataset_name,
264
+ run_in_background=False,
265
+ )
266
+
267
+ graph_engine = await get_graph_engine()
268
+ graph = await graph_engine.get_graph_data()
269
+
270
+ type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
271
+
272
+ "Tests the correct number of NodeSet nodes after session persistence"
273
+ assert type_counts.get("NodeSet", 0) == 1, (
274
+ f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1."
275
+ )
276
+
277
+ "Tests the correct number of DocumentChunk nodes after session persistence"
278
+ assert type_counts.get("DocumentChunk", 0) == 4, (
279
+ f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)."
280
+ )
281
+
282
+ from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
283
+
284
+ vector_engine = get_vector_engine()
285
+ collection_size = await vector_engine.search(
286
+ collection_name="DocumentChunk_text",
287
+ query_text="test",
288
+ limit=1000,
289
+ )
290
+ assert len(collection_size) == 4, (
291
+ f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}"
292
+ )
293
+
231
294
  await cognee.prune.prune_data()
232
295
  await cognee.prune.prune_system(metadata=True)
233
296
 
@@ -0,0 +1,3 @@
1
+ id,name,age,city,country
2
+ 1,Eric,30,Beijing,China
3
+ 2,Joe,35,Berlin,Germany
@@ -0,0 +1,137 @@
1
+ import asyncio
2
+ import os
3
+
4
+ # Set custom dataset database handler environment variable
5
+ os.environ["VECTOR_DATASET_DATABASE_HANDLER"] = "custom_lancedb_handler"
6
+ os.environ["GRAPH_DATASET_DATABASE_HANDLER"] = "custom_kuzu_handler"
7
+
8
+ import cognee
9
+ from cognee.modules.users.methods import get_default_user
10
+ from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
11
+ from cognee.shared.logging_utils import setup_logging, ERROR
12
+ from cognee.api.v1.search import SearchType
13
+
14
+
15
+ class LanceDBTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
16
+ @classmethod
17
+ async def create_dataset(cls, dataset_id, user):
18
+ import pathlib
19
+
20
+ cognee_directory_path = str(
21
+ pathlib.Path(
22
+ os.path.join(
23
+ pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
24
+ )
25
+ ).resolve()
26
+ )
27
+ databases_directory_path = os.path.join(cognee_directory_path, "databases", str(user.id))
28
+ os.makedirs(databases_directory_path, exist_ok=True)
29
+
30
+ vector_db_name = "test.lance.db"
31
+
32
+ return {
33
+ "vector_dataset_database_handler": "custom_lancedb_handler",
34
+ "vector_database_name": vector_db_name,
35
+ "vector_database_url": os.path.join(databases_directory_path, vector_db_name),
36
+ "vector_database_provider": "lancedb",
37
+ }
38
+
39
+
40
+ class KuzuTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
41
+ @classmethod
42
+ async def create_dataset(cls, dataset_id, user):
43
+ databases_directory_path = os.path.join("databases", str(user.id))
44
+ os.makedirs(databases_directory_path, exist_ok=True)
45
+
46
+ graph_db_name = "test.kuzu"
47
+ return {
48
+ "graph_dataset_database_handler": "custom_kuzu_handler",
49
+ "graph_database_name": graph_db_name,
50
+ "graph_database_url": os.path.join(databases_directory_path, graph_db_name),
51
+ "graph_database_provider": "kuzu",
52
+ }
53
+
54
+
55
+ async def main():
56
+ import pathlib
57
+
58
+ data_directory_path = str(
59
+ pathlib.Path(
60
+ os.path.join(
61
+ pathlib.Path(__file__).parent, ".data_storage/test_dataset_database_handler"
62
+ )
63
+ ).resolve()
64
+ )
65
+ cognee.config.data_root_directory(data_directory_path)
66
+ cognee_directory_path = str(
67
+ pathlib.Path(
68
+ os.path.join(
69
+ pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
70
+ )
71
+ ).resolve()
72
+ )
73
+ cognee.config.system_root_directory(cognee_directory_path)
74
+
75
+ # Add custom dataset database handler
76
+ from cognee.infrastructure.databases.dataset_database_handler.use_dataset_database_handler import (
77
+ use_dataset_database_handler,
78
+ )
79
+
80
+ use_dataset_database_handler(
81
+ "custom_lancedb_handler", LanceDBTestDatasetDatabaseHandler, "lancedb"
82
+ )
83
+ use_dataset_database_handler("custom_kuzu_handler", KuzuTestDatasetDatabaseHandler, "kuzu")
84
+
85
+ # Create a clean slate for cognee -- reset data and system state
86
+ print("Resetting cognee data...")
87
+ await cognee.prune.prune_data()
88
+ await cognee.prune.prune_system(metadata=True)
89
+ print("Data reset complete.\n")
90
+
91
+ # cognee knowledge graph will be created based on this text
92
+ text = """
93
+ Natural language processing (NLP) is an interdisciplinary
94
+ subfield of computer science and information retrieval.
95
+ """
96
+
97
+ print("Adding text to cognee:")
98
+ print(text.strip())
99
+
100
+ # Add the text, and make it available for cognify
101
+ await cognee.add(text)
102
+ print("Text added successfully.\n")
103
+
104
+ # Use LLMs and cognee to create knowledge graph
105
+ await cognee.cognify()
106
+ print("Cognify process complete.\n")
107
+
108
+ query_text = "Tell me about NLP"
109
+ print(f"Searching cognee for insights with query: '{query_text}'")
110
+ # Query cognee for insights on the added text
111
+ search_results = await cognee.search(
112
+ query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
113
+ )
114
+
115
+ print("Search results:")
116
+ # Display results
117
+ for result_text in search_results:
118
+ print(result_text)
119
+
120
+ default_user = await get_default_user()
121
+ # Assert that the custom database files were created based on the custom dataset database handlers
122
+ assert os.path.exists(
123
+ os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.kuzu")
124
+ ), "Graph database file not found."
125
+ assert os.path.exists(
126
+ os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.lance.db")
127
+ ), "Vector database file not found."
128
+
129
+
130
+ if __name__ == "__main__":
131
+ logger = setup_logging(log_level=ERROR)
132
+ loop = asyncio.new_event_loop()
133
+ asyncio.set_event_loop(loop)
134
+ try:
135
+ loop.run_until_complete(main())
136
+ finally:
137
+ loop.run_until_complete(loop.shutdown_asyncgens())
@@ -0,0 +1,76 @@
1
+ import os
2
+ import asyncio
3
+ import pathlib
4
+ from uuid import UUID
5
+
6
+ import cognee
7
+ from cognee.shared.logging_utils import setup_logging, ERROR
8
+ from cognee.modules.data.methods.delete_dataset import delete_dataset
9
+ from cognee.modules.data.methods.get_dataset import get_dataset
10
+ from cognee.modules.users.methods import get_default_user
11
+
12
+
13
+ async def main():
14
+ # Set data and system directory paths
15
+ data_directory_path = str(
16
+ pathlib.Path(
17
+ os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
18
+ ).resolve()
19
+ )
20
+ cognee.config.data_root_directory(data_directory_path)
21
+ cognee_directory_path = str(
22
+ pathlib.Path(
23
+ os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
24
+ ).resolve()
25
+ )
26
+ cognee.config.system_root_directory(cognee_directory_path)
27
+
28
+ # Create a clean slate for cognee -- reset data and system state
29
+ print("Resetting cognee data...")
30
+ await cognee.prune.prune_data()
31
+ await cognee.prune.prune_system(metadata=True)
32
+ print("Data reset complete.\n")
33
+
34
+ # cognee knowledge graph will be created based on this text
35
+ text = """
36
+ Natural language processing (NLP) is an interdisciplinary
37
+ subfield of computer science and information retrieval.
38
+ """
39
+
40
+ # Add the text, and make it available for cognify
41
+ await cognee.add(text, "nlp_dataset")
42
+ await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
43
+
44
+ # Use LLMs and cognee to create knowledge graph
45
+ ret_val = await cognee.cognify()
46
+ user = await get_default_user()
47
+
48
+ for val in ret_val:
49
+ dataset_id = str(val)
50
+ vector_db_path = os.path.join(
51
+ cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
52
+ )
53
+ graph_db_path = os.path.join(
54
+ cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
55
+ )
56
+
57
+ # Check if databases are properly created and exist before deletion
58
+ assert os.path.exists(graph_db_path), "Graph database file not found."
59
+ assert os.path.exists(vector_db_path), "Vector database file not found."
60
+
61
+ dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
62
+ await delete_dataset(dataset)
63
+
64
+ # Confirm databases have been deleted
65
+ assert not os.path.exists(graph_db_path), "Graph database file found."
66
+ assert not os.path.exists(vector_db_path), "Vector database file found."
67
+
68
+
69
+ if __name__ == "__main__":
70
+ logger = setup_logging(log_level=ERROR)
71
+ loop = asyncio.new_event_loop()
72
+ asyncio.set_event_loop(loop)
73
+ try:
74
+ loop.run_until_complete(main())
75
+ finally:
76
+ loop.run_until_complete(loop.shutdown_asyncgens())