cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +1 -1
  166. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +1 -1
  167. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +13 -27
  168. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  169. cognee/tests/test_add_docling_document.py +2 -2
  170. cognee/tests/test_cognee_server_start.py +84 -3
  171. cognee/tests/test_conversation_history.py +68 -5
  172. cognee/tests/test_data/example_with_header.csv +3 -0
  173. cognee/tests/test_dataset_database_handler.py +137 -0
  174. cognee/tests/test_dataset_delete.py +76 -0
  175. cognee/tests/test_edge_centered_payload.py +170 -0
  176. cognee/tests/test_edge_ingestion.py +27 -0
  177. cognee/tests/test_feedback_enrichment.py +1 -1
  178. cognee/tests/test_library.py +6 -4
  179. cognee/tests/test_load.py +62 -0
  180. cognee/tests/test_multi_tenancy.py +165 -0
  181. cognee/tests/test_parallel_databases.py +2 -0
  182. cognee/tests/test_pipeline_cache.py +164 -0
  183. cognee/tests/test_relational_db_migration.py +54 -2
  184. cognee/tests/test_search_db.py +44 -2
  185. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  186. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  187. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  188. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  189. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  190. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  191. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  192. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  193. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  194. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  195. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  196. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  197. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  198. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  199. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  200. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  201. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  202. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  203. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  204. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  205. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  206. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  207. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  208. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  209. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  210. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -6
  211. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/RECORD +215 -163
  212. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/WHEEL +1 -1
  213. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  214. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  215. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  216. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  217. cognee/modules/retrieval/code_retriever.py +0 -232
  218. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  219. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  220. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  221. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  222. cognee/tasks/repo_processor/__init__.py +0 -2
  223. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  224. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  225. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  226. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  227. {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration:
107
107
  # REQUIRE_AUTHENTICATION should be a boolean
108
108
  assert isinstance(REQUIRE_AUTHENTICATION, bool)
109
109
 
110
- # Currently should be False (optional authentication)
111
- assert not REQUIRE_AUTHENTICATION
112
-
113
110
 
114
111
  class TestConditionalAuthenticationEnvironmentVariables:
115
112
  """Test environment variable handling."""
116
113
 
117
- def test_require_authentication_default_false(self):
118
- """Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars."""
119
- with patch.dict(os.environ, {}, clear=True):
120
- # Remove module from cache to force fresh import
121
- module_name = "cognee.modules.users.methods.get_authenticated_user"
122
- if module_name in sys.modules:
123
- del sys.modules[module_name]
124
-
125
- # Import after patching environment - module will see empty environment
126
- from cognee.modules.users.methods.get_authenticated_user import (
127
- REQUIRE_AUTHENTICATION,
128
- )
129
-
130
- importlib.invalidate_caches()
131
- assert not REQUIRE_AUTHENTICATION
132
-
133
114
  def test_require_authentication_true(self):
134
115
  """Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported."""
135
116
  with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}):
@@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables:
145
126
 
146
127
  assert REQUIRE_AUTHENTICATION
147
128
 
148
- def test_require_authentication_false_explicit(self):
149
- """Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported."""
150
- with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}):
151
- # Remove module from cache to force fresh import
152
- module_name = "cognee.modules.users.methods.get_authenticated_user"
153
- if module_name in sys.modules:
154
- del sys.modules[module_name]
155
-
156
- # Import after patching environment - module will see REQUIRE_AUTHENTICATION=false
157
- from cognee.modules.users.methods.get_authenticated_user import (
158
- REQUIRE_AUTHENTICATION,
159
- )
160
-
161
- assert not REQUIRE_AUTHENTICATION
162
-
163
- def test_require_authentication_case_insensitive(self):
164
- """Test that environment variable parsing is case insensitive when imported."""
165
- test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"]
166
-
167
- for case in test_cases:
168
- with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}):
169
- # Remove module from cache to force fresh import
170
- module_name = "cognee.modules.users.methods.get_authenticated_user"
171
- if module_name in sys.modules:
172
- del sys.modules[module_name]
173
-
174
- # Import after patching environment
175
- from cognee.modules.users.methods.get_authenticated_user import (
176
- REQUIRE_AUTHENTICATION,
177
- )
178
-
179
- expected = case.lower() == "true"
180
- assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}"
181
-
182
- def test_current_require_authentication_value(self):
183
- """Test that the current REQUIRE_AUTHENTICATION module value is as expected."""
184
- from cognee.modules.users.methods.get_authenticated_user import (
185
- REQUIRE_AUTHENTICATION,
186
- )
187
-
188
- # The module-level variable should currently be False (set at import time)
189
- assert isinstance(REQUIRE_AUTHENTICATION, bool)
190
- assert not REQUIRE_AUTHENTICATION
191
-
192
129
 
193
130
  class TestConditionalAuthenticationEdgeCases:
194
131
  """Test edge cases and error scenarios."""
@@ -0,0 +1,52 @@
1
+ from itertools import product
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+ from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
7
+ from cognee.tasks.chunks import chunk_by_row
8
+
9
+ INPUT_TEXTS = "name: John, age: 30, city: New York, country: USA"
10
+ max_chunk_size_vals = [8, 32]
11
+
12
+
13
+ @pytest.mark.parametrize(
14
+ "input_text,max_chunk_size",
15
+ list(product([INPUT_TEXTS], max_chunk_size_vals)),
16
+ )
17
+ def test_chunk_by_row_isomorphism(input_text, max_chunk_size):
18
+ chunks = chunk_by_row(input_text, max_chunk_size)
19
+ reconstructed_text = ", ".join([chunk["text"] for chunk in chunks])
20
+ assert reconstructed_text == input_text, (
21
+ f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
22
+ )
23
+
24
+
25
+ @pytest.mark.parametrize(
26
+ "input_text,max_chunk_size",
27
+ list(product([INPUT_TEXTS], max_chunk_size_vals)),
28
+ )
29
+ def test_row_chunk_length(input_text, max_chunk_size):
30
+ chunks = list(chunk_by_row(data=input_text, max_chunk_size=max_chunk_size))
31
+ embedding_engine = get_embedding_engine()
32
+
33
+ chunk_lengths = np.array(
34
+ [embedding_engine.tokenizer.count_tokens(chunk["text"]) for chunk in chunks]
35
+ )
36
+
37
+ larger_chunks = chunk_lengths[chunk_lengths > max_chunk_size]
38
+ assert np.all(chunk_lengths <= max_chunk_size), (
39
+ f"{max_chunk_size = }: {larger_chunks} are too large"
40
+ )
41
+
42
+
43
+ @pytest.mark.parametrize(
44
+ "input_text,max_chunk_size",
45
+ list(product([INPUT_TEXTS], max_chunk_size_vals)),
46
+ )
47
+ def test_chunk_by_row_chunk_numbering(input_text, max_chunk_size):
48
+ chunks = chunk_by_row(data=input_text, max_chunk_size=max_chunk_size)
49
+ chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
50
+ assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
51
+ f"{chunk_indices = } are not monotonically increasing"
52
+ )
@@ -0,0 +1,288 @@
1
+ import pytest
2
+ from unittest.mock import AsyncMock, patch
3
+ import sys
4
+
5
+ from cognee.infrastructure.engine import DataPoint
6
+ from cognee.modules.engine.models import Triplet
7
+ from cognee.tasks.storage.add_data_points import (
8
+ add_data_points,
9
+ InvalidDataPointsInAddDataPointsError,
10
+ _extract_embeddable_text_from_datapoint,
11
+ _create_triplets_from_graph,
12
+ )
13
+
14
+ adp_module = sys.modules["cognee.tasks.storage.add_data_points"]
15
+
16
+
17
+ class SimplePoint(DataPoint):
18
+ text: str
19
+ metadata: dict = {"index_fields": ["text"]}
20
+
21
+
22
+ @pytest.mark.asyncio
23
+ @pytest.mark.parametrize("bad_input", [None, ["not_datapoint"]])
24
+ async def test_add_data_points_validates_inputs(bad_input):
25
+ with pytest.raises(InvalidDataPointsInAddDataPointsError):
26
+ await add_data_points(bad_input)
27
+
28
+
29
+ @pytest.mark.asyncio
30
+ @patch.object(adp_module, "index_graph_edges")
31
+ @patch.object(adp_module, "index_data_points")
32
+ @patch.object(adp_module, "get_graph_engine")
33
+ @patch.object(adp_module, "deduplicate_nodes_and_edges")
34
+ @patch.object(adp_module, "get_graph_from_model")
35
+ async def test_add_data_points_indexes_nodes_and_edges(
36
+ mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
37
+ ):
38
+ dp1 = SimplePoint(text="first")
39
+ dp2 = SimplePoint(text="second")
40
+
41
+ edge1 = (str(dp1.id), str(dp2.id), "related_to", {"edge_text": "connects"})
42
+ custom_edges = [(str(dp2.id), str(dp1.id), "custom_edge", {})]
43
+
44
+ mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
45
+ mock_dedup.side_effect = lambda n, e: (n, e)
46
+ graph_engine = AsyncMock()
47
+ mock_get_engine.return_value = graph_engine
48
+
49
+ result = await add_data_points([dp1, dp2], custom_edges=custom_edges)
50
+
51
+ assert result == [dp1, dp2]
52
+ graph_engine.add_nodes.assert_awaited_once()
53
+ mock_index_nodes.assert_awaited_once()
54
+ assert graph_engine.add_edges.await_count == 2
55
+ assert edge1 in graph_engine.add_edges.await_args_list[0].args[0]
56
+ assert graph_engine.add_edges.await_args_list[1].args[0] == custom_edges
57
+ assert mock_index_edges.await_count == 2
58
+
59
+
60
+ @pytest.mark.asyncio
61
+ @patch.object(adp_module, "index_graph_edges")
62
+ @patch.object(adp_module, "index_data_points")
63
+ @patch.object(adp_module, "get_graph_engine")
64
+ @patch.object(adp_module, "deduplicate_nodes_and_edges")
65
+ @patch.object(adp_module, "get_graph_from_model")
66
+ async def test_add_data_points_indexes_triplets_when_enabled(
67
+ mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
68
+ ):
69
+ dp1 = SimplePoint(text="source")
70
+ dp2 = SimplePoint(text="target")
71
+
72
+ edge1 = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "describes"})
73
+
74
+ mock_get_graph.side_effect = [([dp1], [edge1]), ([dp2], [])]
75
+ mock_dedup.side_effect = lambda n, e: (n, e)
76
+ graph_engine = AsyncMock()
77
+ mock_get_engine.return_value = graph_engine
78
+
79
+ await add_data_points([dp1, dp2], embed_triplets=True)
80
+
81
+ assert mock_index_nodes.await_count == 2
82
+ nodes_arg = mock_index_nodes.await_args_list[0].args[0]
83
+ triplets_arg = mock_index_nodes.await_args_list[1].args[0]
84
+ assert nodes_arg == [dp1, dp2]
85
+ assert len(triplets_arg) == 1
86
+ assert isinstance(triplets_arg[0], Triplet)
87
+ mock_index_edges.assert_awaited_once()
88
+
89
+
90
+ @pytest.mark.asyncio
91
+ @patch.object(adp_module, "index_graph_edges")
92
+ @patch.object(adp_module, "index_data_points")
93
+ @patch.object(adp_module, "get_graph_engine")
94
+ @patch.object(adp_module, "deduplicate_nodes_and_edges")
95
+ @patch.object(adp_module, "get_graph_from_model")
96
+ async def test_add_data_points_with_empty_list(
97
+ mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
98
+ ):
99
+ mock_dedup.side_effect = lambda n, e: (n, e)
100
+ graph_engine = AsyncMock()
101
+ mock_get_engine.return_value = graph_engine
102
+
103
+ result = await add_data_points([])
104
+
105
+ assert result == []
106
+ mock_get_graph.assert_not_called()
107
+ graph_engine.add_nodes.assert_awaited_once_with([])
108
+
109
+
110
+ @pytest.mark.asyncio
111
+ @patch.object(adp_module, "index_graph_edges")
112
+ @patch.object(adp_module, "index_data_points")
113
+ @patch.object(adp_module, "get_graph_engine")
114
+ @patch.object(adp_module, "deduplicate_nodes_and_edges")
115
+ @patch.object(adp_module, "get_graph_from_model")
116
+ async def test_add_data_points_with_single_datapoint(
117
+ mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
118
+ ):
119
+ dp = SimplePoint(text="single")
120
+ mock_get_graph.side_effect = [([dp], [])]
121
+ mock_dedup.side_effect = lambda n, e: (n, e)
122
+ graph_engine = AsyncMock()
123
+ mock_get_engine.return_value = graph_engine
124
+
125
+ result = await add_data_points([dp])
126
+
127
+ assert result == [dp]
128
+ mock_get_graph.assert_called_once()
129
+ mock_index_nodes.assert_awaited_once()
130
+
131
+
132
+ def test_extract_embeddable_text_from_datapoint():
133
+ dp = SimplePoint(text="hello world")
134
+ text = _extract_embeddable_text_from_datapoint(dp)
135
+ assert text == "hello world"
136
+
137
+
138
+ def test_extract_embeddable_text_with_multiple_fields():
139
+ class MultiField(DataPoint):
140
+ title: str
141
+ description: str
142
+ metadata: dict = {"index_fields": ["title", "description"]}
143
+
144
+ dp = MultiField(title="Test", description="Description")
145
+ text = _extract_embeddable_text_from_datapoint(dp)
146
+ assert text == "Test Description"
147
+
148
+
149
+ def test_extract_embeddable_text_with_no_index_fields():
150
+ class NoIndex(DataPoint):
151
+ text: str
152
+ metadata: dict = {"index_fields": []}
153
+
154
+ dp = NoIndex(text="ignored")
155
+ text = _extract_embeddable_text_from_datapoint(dp)
156
+ assert text == ""
157
+
158
+
159
+ def test_create_triplets_from_graph():
160
+ dp1 = SimplePoint(text="source node")
161
+ dp2 = SimplePoint(text="target node")
162
+ edge = (str(dp1.id), str(dp2.id), "connects_to", {"edge_text": "links"})
163
+
164
+ triplets = _create_triplets_from_graph([dp1, dp2], [edge])
165
+
166
+ assert len(triplets) == 1
167
+ assert isinstance(triplets[0], Triplet)
168
+ assert triplets[0].from_node_id == str(dp1.id)
169
+ assert triplets[0].to_node_id == str(dp2.id)
170
+ assert "source node" in triplets[0].text
171
+ assert "target node" in triplets[0].text
172
+
173
+
174
+ def test_extract_embeddable_text_with_none_datapoint():
175
+ text = _extract_embeddable_text_from_datapoint(None)
176
+ assert text == ""
177
+
178
+
179
+ def test_extract_embeddable_text_without_metadata():
180
+ class NoMetadata(DataPoint):
181
+ text: str
182
+
183
+ dp = NoMetadata(text="test")
184
+ delattr(dp, "metadata")
185
+ text = _extract_embeddable_text_from_datapoint(dp)
186
+ assert text == ""
187
+
188
+
189
+ def test_extract_embeddable_text_with_whitespace_only():
190
+ class WhitespaceField(DataPoint):
191
+ text: str
192
+ metadata: dict = {"index_fields": ["text"]}
193
+
194
+ dp = WhitespaceField(text=" ")
195
+ text = _extract_embeddable_text_from_datapoint(dp)
196
+ assert text == ""
197
+
198
+
199
+ def test_create_triplets_skips_short_edge_tuples():
200
+ dp = SimplePoint(text="node")
201
+ incomplete_edge = (str(dp.id), str(dp.id))
202
+
203
+ triplets = _create_triplets_from_graph([dp], [incomplete_edge])
204
+
205
+ assert len(triplets) == 0
206
+
207
+
208
+ def test_create_triplets_skips_missing_source_node():
209
+ dp1 = SimplePoint(text="target")
210
+ edge = ("missing_id", str(dp1.id), "relates", {})
211
+
212
+ triplets = _create_triplets_from_graph([dp1], [edge])
213
+
214
+ assert len(triplets) == 0
215
+
216
+
217
+ def test_create_triplets_skips_missing_target_node():
218
+ dp1 = SimplePoint(text="source")
219
+ edge = (str(dp1.id), "missing_id", "relates", {})
220
+
221
+ triplets = _create_triplets_from_graph([dp1], [edge])
222
+
223
+ assert len(triplets) == 0
224
+
225
+
226
+ def test_create_triplets_skips_none_relationship():
227
+ dp1 = SimplePoint(text="source")
228
+ dp2 = SimplePoint(text="target")
229
+ edge = (str(dp1.id), str(dp2.id), None, {})
230
+
231
+ triplets = _create_triplets_from_graph([dp1, dp2], [edge])
232
+
233
+ assert len(triplets) == 0
234
+
235
+
236
+ def test_create_triplets_uses_relationship_name_when_no_edge_text():
237
+ dp1 = SimplePoint(text="source")
238
+ dp2 = SimplePoint(text="target")
239
+ edge = (str(dp1.id), str(dp2.id), "connects_to", {})
240
+
241
+ triplets = _create_triplets_from_graph([dp1, dp2], [edge])
242
+
243
+ assert len(triplets) == 1
244
+ assert "connects_to" in triplets[0].text
245
+
246
+
247
+ def test_create_triplets_prevents_duplicates():
248
+ dp1 = SimplePoint(text="source")
249
+ dp2 = SimplePoint(text="target")
250
+ edge = (str(dp1.id), str(dp2.id), "relates", {"edge_text": "links"})
251
+
252
+ triplets = _create_triplets_from_graph([dp1, dp2], [edge, edge])
253
+
254
+ assert len(triplets) == 1
255
+
256
+
257
+ def test_create_triplets_skips_nodes_without_id():
258
+ class NodeNoId:
259
+ pass
260
+
261
+ dp = SimplePoint(text="valid")
262
+ node_no_id = NodeNoId()
263
+ edge = (str(dp.id), "some_id", "relates", {})
264
+
265
+ triplets = _create_triplets_from_graph([dp, node_no_id], [edge])
266
+
267
+ assert len(triplets) == 0
268
+
269
+
270
+ @pytest.mark.asyncio
271
+ @patch.object(adp_module, "index_graph_edges")
272
+ @patch.object(adp_module, "index_data_points")
273
+ @patch.object(adp_module, "get_graph_engine")
274
+ @patch.object(adp_module, "deduplicate_nodes_and_edges")
275
+ @patch.object(adp_module, "get_graph_from_model")
276
+ async def test_add_data_points_with_empty_custom_edges(
277
+ mock_get_graph, mock_dedup, mock_get_engine, mock_index_nodes, mock_index_edges
278
+ ):
279
+ dp = SimplePoint(text="test")
280
+ mock_get_graph.side_effect = [([dp], [])]
281
+ mock_dedup.side_effect = lambda n, e: (n, e)
282
+ graph_engine = AsyncMock()
283
+ mock_get_engine.return_value = graph_engine
284
+
285
+ result = await add_data_points([dp], custom_edges=[])
286
+
287
+ assert result == [dp]
288
+ assert graph_engine.add_edges.await_count == 1
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognee
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
5
5
  Project-URL: Homepage, https://www.cognee.ai
6
6
  Project-URL: Repository, https://github.com/topoteretes/cognee
@@ -16,10 +16,13 @@ Classifier: Operating System :: Microsoft :: Windows
16
16
  Classifier: Operating System :: POSIX :: Linux
17
17
  Classifier: Topic :: Software Development :: Libraries
18
18
  Requires-Python: <3.14,>=3.10
19
- Requires-Dist: aiofiles<24.0.0,>=23.2.1
19
+ Requires-Dist: aiofiles>=23.2.1
20
20
  Requires-Dist: aiohttp<4.0.0,>=3.11.14
21
+ Requires-Dist: aiolimiter>=1.2.1
21
22
  Requires-Dist: aiosqlite<1.0.0,>=0.20.0
22
23
  Requires-Dist: alembic<2,>=1.13.3
24
+ Requires-Dist: diskcache>=5.6.3
25
+ Requires-Dist: fakeredis[lua]>=2.32.0
23
26
  Requires-Dist: fastapi-users[sqlalchemy]<15.0.0,>=14.0.1
24
27
  Requires-Dist: fastapi<1.0.0,>=0.116.2
25
28
  Requires-Dist: fastembed<=0.6.0
@@ -38,7 +41,7 @@ Requires-Dist: numpy<=4.0.0,>=1.26.4
38
41
  Requires-Dist: onnxruntime<=1.22.1
39
42
  Requires-Dist: openai>=1.80.1
40
43
  Requires-Dist: pydantic-settings<3,>=2.2.1
41
- Requires-Dist: pydantic<3.0.0,>=2.10.5
44
+ Requires-Dist: pydantic<2.12.0,>=2.10.5
42
45
  Requires-Dist: pylance<=0.36.0,>=0.22.0
43
46
  Requires-Dist: pympler<2.0.0,>=1.1
44
47
  Requires-Dist: pypdf<7.0.0,>=4.1.0
@@ -96,7 +99,8 @@ Provides-Extra: docling
96
99
  Requires-Dist: docling>=2.54; extra == 'docling'
97
100
  Requires-Dist: transformers>=4.55; extra == 'docling'
98
101
  Provides-Extra: docs
99
- Requires-Dist: lxml<6.0.0; extra == 'docs'
102
+ Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'docs'
103
+ Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'docs'
100
104
  Requires-Dist: unstructured[csv,doc,docx,epub,md,odt,org,pdf,ppt,pptx,rst,rtf,tsv,xlsx]<19,>=0.18.1; extra == 'docs'
101
105
  Provides-Extra: evals
102
106
  Requires-Dist: gdown<6,>=5.2.0; extra == 'evals'
@@ -143,7 +147,8 @@ Requires-Dist: redis<6.0.0,>=5.0.3; extra == 'redis'
143
147
  Provides-Extra: scraping
144
148
  Requires-Dist: apscheduler<=3.11.0,>=3.10.0; extra == 'scraping'
145
149
  Requires-Dist: beautifulsoup4>=4.13.1; extra == 'scraping'
146
- Requires-Dist: lxml>=4.9.3; extra == 'scraping'
150
+ Requires-Dist: lxml<5,>=4.9.3; (python_version < '3.13') and extra == 'scraping'
151
+ Requires-Dist: lxml<6,>=5; (python_version >= '3.13') and extra == 'scraping'
147
152
  Requires-Dist: playwright>=1.9.0; extra == 'scraping'
148
153
  Requires-Dist: protego>=0.1; extra == 'scraping'
149
154
  Requires-Dist: tavily-python>=0.7.12; extra == 'scraping'
@@ -249,7 +254,7 @@ Let’s try Cognee in just a few lines of code. For detailed setup and configura
249
254
 
250
255
  ### Prerequisites
251
256
 
252
- - Python 3.10 to 3.12
257
+ - Python 3.10 to 3.13
253
258
 
254
259
  ### Step 1: Install Cognee
255
260