cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,252 @@
1
+ import pytest
2
+ import uuid
3
+ from fastapi.testclient import TestClient
4
+ from unittest.mock import Mock
5
+ from types import SimpleNamespace
6
+ from cognee.api.client import app
7
+ from cognee.modules.users.methods import get_authenticated_user
8
+
9
+
10
+ @pytest.fixture(scope="session")
11
+ def test_client():
12
+ # Keep a single TestClient (and event loop) for the whole module.
13
+ # Re-creating TestClient repeatedly can break async DB connections (asyncpg loop mismatch).
14
+ with TestClient(app) as c:
15
+ yield c
16
+
17
+
18
+ @pytest.fixture
19
+ def client(test_client, mock_default_user):
20
+ async def override_get_authenticated_user():
21
+ return mock_default_user
22
+
23
+ app.dependency_overrides[get_authenticated_user] = override_get_authenticated_user
24
+ yield test_client
25
+ app.dependency_overrides.pop(get_authenticated_user, None)
26
+
27
+
28
+ @pytest.fixture
29
+ def mock_user():
30
+ user = Mock()
31
+ user.id = "test-user-123"
32
+ return user
33
+
34
+
35
+ @pytest.fixture
36
+ def mock_default_user():
37
+ """Mock default user for testing."""
38
+ return SimpleNamespace(
39
+ id=str(uuid.uuid4()),
40
+ email="default@example.com",
41
+ is_active=True,
42
+ tenant_id=str(uuid.uuid4()),
43
+ )
44
+
45
+
46
+ def test_upload_ontology_success(client):
47
+ """Test successful ontology upload"""
48
+ ontology_content = (
49
+ b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
50
+ )
51
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
52
+
53
+ response = client.post(
54
+ "/api/v1/ontologies",
55
+ files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
56
+ data={"ontology_key": unique_key, "description": "Test"},
57
+ )
58
+
59
+ assert response.status_code == 200
60
+ data = response.json()
61
+ assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
62
+ assert "uploaded_at" in data["uploaded_ontologies"][0]
63
+
64
+
65
+ def test_upload_ontology_invalid_file(client):
66
+ """Test 400 response for non-.owl files"""
67
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
68
+ response = client.post(
69
+ "/api/v1/ontologies",
70
+ files={"ontology_file": ("test.txt", b"not xml")},
71
+ data={"ontology_key": unique_key},
72
+ )
73
+ assert response.status_code == 400
74
+
75
+
76
+ def test_upload_ontology_missing_data(client):
77
+ """Test 400 response for missing file or key"""
78
+ # Missing file
79
+ response = client.post("/api/v1/ontologies", data={"ontology_key": "test"})
80
+ assert response.status_code == 400
81
+
82
+ # Missing key
83
+ response = client.post(
84
+ "/api/v1/ontologies", files=[("ontology_file", ("test.owl", b"xml", "application/xml"))]
85
+ )
86
+ assert response.status_code == 400
87
+
88
+
89
+ def test_upload_ontology_without_auth_header(client):
90
+ """Test behavior when no explicit authentication header is provided."""
91
+ unique_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
92
+ response = client.post(
93
+ "/api/v1/ontologies",
94
+ files=[("ontology_file", ("test.owl", b"<rdf></rdf>", "application/xml"))],
95
+ data={"ontology_key": unique_key},
96
+ )
97
+
98
+ assert response.status_code == 200
99
+ data = response.json()
100
+ assert data["uploaded_ontologies"][0]["ontology_key"] == unique_key
101
+ assert "uploaded_at" in data["uploaded_ontologies"][0]
102
+
103
+
104
+ def test_upload_multiple_ontologies_in_single_request_is_rejected(client):
105
+ """Uploading multiple ontology files in a single request should fail."""
106
+ import io
107
+
108
+ file1_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
109
+ file2_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
110
+
111
+ files = [
112
+ ("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml")),
113
+ ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml")),
114
+ ]
115
+ data = {"ontology_key": "vehicles", "description": "Base vehicles"}
116
+
117
+ response = client.post("/api/v1/ontologies", files=files, data=data)
118
+
119
+ assert response.status_code == 400
120
+ assert "Only one ontology_file is allowed" in response.json()["error"]
121
+
122
+
123
+ def test_upload_endpoint_rejects_array_style_fields(client):
124
+ """Array-style form values should be rejected (no backwards compatibility)."""
125
+ import io
126
+ import json
127
+
128
+ file_content = b"<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'></rdf:RDF>"
129
+
130
+ files = [("ontology_file", ("single.owl", io.BytesIO(file_content), "application/xml"))]
131
+ data = {
132
+ "ontology_key": json.dumps(["single_key"]),
133
+ "description": json.dumps(["Single ontology"]),
134
+ }
135
+
136
+ response = client.post("/api/v1/ontologies", files=files, data=data)
137
+
138
+ assert response.status_code == 400
139
+ assert "ontology_key must be a string" in response.json()["error"]
140
+
141
+
142
+ def test_cognify_with_multiple_ontologies(client):
143
+ """Test cognify endpoint accepts multiple ontology keys"""
144
+ payload = {
145
+ "datasets": ["test_dataset"],
146
+ "ontology_key": ["ontology1", "ontology2"], # Array instead of string
147
+ "run_in_background": False,
148
+ }
149
+
150
+ response = client.post("/api/v1/cognify", json=payload)
151
+
152
+ # Should not fail due to ontology_key type
153
+ assert response.status_code in [200, 400, 409] # May fail for other reasons, not type
154
+
155
+
156
+ def test_complete_multifile_workflow(client):
157
+ """Test workflow: upload ontologies one-by-one → cognify with multiple keys"""
158
+ import io
159
+
160
+ # Step 1: Upload two ontologies (one-by-one)
161
+ file1_content = b"""<?xml version="1.0"?>
162
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
163
+ xmlns:owl="http://www.w3.org/2002/07/owl#">
164
+ <owl:Class rdf:ID="Vehicle"/>
165
+ </rdf:RDF>"""
166
+
167
+ file2_content = b"""<?xml version="1.0"?>
168
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
169
+ xmlns:owl="http://www.w3.org/2002/07/owl#">
170
+ <owl:Class rdf:ID="Manufacturer"/>
171
+ </rdf:RDF>"""
172
+
173
+ upload_response_1 = client.post(
174
+ "/api/v1/ontologies",
175
+ files=[("ontology_file", ("vehicles.owl", io.BytesIO(file1_content), "application/xml"))],
176
+ data={"ontology_key": "vehicles", "description": "Vehicle ontology"},
177
+ )
178
+ assert upload_response_1.status_code == 200
179
+
180
+ upload_response_2 = client.post(
181
+ "/api/v1/ontologies",
182
+ files=[
183
+ ("ontology_file", ("manufacturers.owl", io.BytesIO(file2_content), "application/xml"))
184
+ ],
185
+ data={"ontology_key": "manufacturers", "description": "Manufacturer ontology"},
186
+ )
187
+ assert upload_response_2.status_code == 200
188
+
189
+ # Step 2: Verify ontologies are listed
190
+ list_response = client.get("/api/v1/ontologies")
191
+ assert list_response.status_code == 200
192
+ ontologies = list_response.json()
193
+ assert "vehicles" in ontologies
194
+ assert "manufacturers" in ontologies
195
+
196
+ # Step 3: Test cognify with multiple ontologies
197
+ cognify_payload = {
198
+ "datasets": ["test_dataset"],
199
+ "ontology_key": ["vehicles", "manufacturers"],
200
+ "run_in_background": False,
201
+ }
202
+
203
+ cognify_response = client.post("/api/v1/cognify", json=cognify_payload)
204
+ # Should not fail due to ontology handling (may fail for dataset reasons)
205
+ assert cognify_response.status_code != 400 # Not a validation error
206
+
207
+
208
+ def test_upload_error_handling(client):
209
+ """Test error handling for invalid uploads (single-file endpoint)."""
210
+ import io
211
+ import json
212
+
213
+ # Array-style key should be rejected
214
+ file_content = b"<rdf:RDF></rdf:RDF>"
215
+ files = [("ontology_file", ("test.owl", io.BytesIO(file_content), "application/xml"))]
216
+ data = {
217
+ "ontology_key": json.dumps(["key1", "key2"]),
218
+ "description": "desc1",
219
+ }
220
+
221
+ response = client.post("/api/v1/ontologies", files=files, data=data)
222
+ assert response.status_code == 400
223
+ assert "ontology_key must be a string" in response.json()["error"]
224
+
225
+ # Duplicate key should be rejected
226
+ response_1 = client.post(
227
+ "/api/v1/ontologies",
228
+ files=[("ontology_file", ("test1.owl", io.BytesIO(file_content), "application/xml"))],
229
+ data={"ontology_key": "duplicate", "description": "desc1"},
230
+ )
231
+ assert response_1.status_code == 200
232
+
233
+ response_2 = client.post(
234
+ "/api/v1/ontologies",
235
+ files=[("ontology_file", ("test2.owl", io.BytesIO(file_content), "application/xml"))],
236
+ data={"ontology_key": "duplicate", "description": "desc2"},
237
+ )
238
+ assert response_2.status_code == 400
239
+ assert "already exists" in response_2.json()["error"]
240
+
241
+
242
+ def test_cognify_missing_ontology_key(client):
243
+ """Test cognify with non-existent ontology key"""
244
+ payload = {
245
+ "datasets": ["test_dataset"],
246
+ "ontology_key": ["nonexistent_key"],
247
+ "run_in_background": False,
248
+ }
249
+
250
+ response = client.post("/api/v1/cognify", json=payload)
251
+ assert response.status_code == 409
252
+ assert "Ontology key 'nonexistent_key' not found" in response.json()["error"]
@@ -8,6 +8,7 @@ def test_cache_config_defaults():
8
8
  """Test that CacheConfig has the correct default values."""
9
9
  config = CacheConfig()
10
10
 
11
+ assert config.cache_backend == "fs"
11
12
  assert config.caching is False
12
13
  assert config.shared_kuzu_lock is False
13
14
  assert config.cache_host == "localhost"
@@ -19,6 +20,7 @@ def test_cache_config_defaults():
19
20
  def test_cache_config_custom_values():
20
21
  """Test that CacheConfig accepts custom values."""
21
22
  config = CacheConfig(
23
+ cache_backend="redis",
22
24
  caching=True,
23
25
  shared_kuzu_lock=True,
24
26
  cache_host="redis.example.com",
@@ -27,6 +29,7 @@ def test_cache_config_custom_values():
27
29
  agentic_lock_timeout=180,
28
30
  )
29
31
 
32
+ assert config.cache_backend == "redis"
30
33
  assert config.caching is True
31
34
  assert config.shared_kuzu_lock is True
32
35
  assert config.cache_host == "redis.example.com"
@@ -38,6 +41,7 @@ def test_cache_config_custom_values():
38
41
  def test_cache_config_to_dict():
39
42
  """Test the to_dict method returns all configuration values."""
40
43
  config = CacheConfig(
44
+ cache_backend="fs",
41
45
  caching=True,
42
46
  shared_kuzu_lock=True,
43
47
  cache_host="test-host",
@@ -49,6 +53,7 @@ def test_cache_config_to_dict():
49
53
  config_dict = config.to_dict()
50
54
 
51
55
  assert config_dict == {
56
+ "cache_backend": "fs",
52
57
  "caching": True,
53
58
  "shared_kuzu_lock": True,
54
59
  "cache_host": "test-host",
@@ -0,0 +1,27 @@
1
+ import pytest
2
+ from unittest.mock import AsyncMock, patch, MagicMock
3
+ from cognee.tasks.storage.index_data_points import index_data_points
4
+ from cognee.infrastructure.engine import DataPoint
5
+
6
+
7
+ class TestDataPoint(DataPoint):
8
+ name: str
9
+ metadata: dict = {"index_fields": ["name"]}
10
+
11
+
12
+ @pytest.mark.asyncio
13
+ async def test_index_data_points_calls_vector_engine():
14
+ """Test that index_data_points creates vector index and indexes data."""
15
+ data_points = [TestDataPoint(name="test1")]
16
+
17
+ mock_vector_engine = AsyncMock()
18
+ mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
19
+
20
+ with patch.dict(
21
+ index_data_points.__globals__,
22
+ {"get_vector_engine": lambda: mock_vector_engine},
23
+ ):
24
+ await index_data_points(data_points)
25
+
26
+ assert mock_vector_engine.create_vector_index.await_count >= 1
27
+ assert mock_vector_engine.index_data_points.await_count >= 1
@@ -5,8 +5,7 @@ from cognee.tasks.storage.index_graph_edges import index_graph_edges
5
5
 
6
6
  @pytest.mark.asyncio
7
7
  async def test_index_graph_edges_success():
8
- """Test that index_graph_edges uses the index datapoints and creates vector index."""
9
- # Create the mocks for the graph and vector engines.
8
+ """Test that index_graph_edges retrieves edges and delegates to index_data_points."""
10
9
  mock_graph_engine = AsyncMock()
11
10
  mock_graph_engine.get_graph_data.return_value = (
12
11
  None,
@@ -15,26 +14,23 @@ async def test_index_graph_edges_success():
15
14
  [{"relationship_name": "rel2"}],
16
15
  ],
17
16
  )
18
- mock_vector_engine = AsyncMock()
19
- mock_vector_engine.embedding_engine.get_batch_size = MagicMock(return_value=100)
17
+ mock_index_data_points = AsyncMock()
20
18
 
21
- # Patch the globals of the function so that when it does:
22
- # vector_engine = get_vector_engine()
23
- # graph_engine = await get_graph_engine()
24
- # it uses the mocked versions.
25
19
  with patch.dict(
26
20
  index_graph_edges.__globals__,
27
21
  {
28
22
  "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
29
- "get_vector_engine": lambda: mock_vector_engine,
23
+ "index_data_points": mock_index_data_points,
30
24
  },
31
25
  ):
32
26
  await index_graph_edges()
33
27
 
34
- # Assertions on the mock calls.
35
28
  mock_graph_engine.get_graph_data.assert_awaited_once()
36
- assert mock_vector_engine.create_vector_index.await_count == 1
37
- assert mock_vector_engine.index_data_points.await_count == 1
29
+ mock_index_data_points.assert_awaited_once()
30
+
31
+ call_args = mock_index_data_points.call_args[0][0]
32
+ assert len(call_args) == 2
33
+ assert all(hasattr(item, "relationship_name") for item in call_args)
38
34
 
39
35
 
40
36
  @pytest.mark.asyncio
@@ -42,20 +38,22 @@ async def test_index_graph_edges_no_relationships():
42
38
  """Test that index_graph_edges handles empty relationships correctly."""
43
39
  mock_graph_engine = AsyncMock()
44
40
  mock_graph_engine.get_graph_data.return_value = (None, [])
45
- mock_vector_engine = AsyncMock()
41
+ mock_index_data_points = AsyncMock()
46
42
 
47
43
  with patch.dict(
48
44
  index_graph_edges.__globals__,
49
45
  {
50
46
  "get_graph_engine": AsyncMock(return_value=mock_graph_engine),
51
- "get_vector_engine": lambda: mock_vector_engine,
47
+ "index_data_points": mock_index_data_points,
52
48
  },
53
49
  ):
54
50
  await index_graph_edges()
55
51
 
56
52
  mock_graph_engine.get_graph_data.assert_awaited_once()
57
- mock_vector_engine.create_vector_index.assert_not_awaited()
58
- mock_vector_engine.index_data_points.assert_not_awaited()
53
+ mock_index_data_points.assert_awaited_once()
54
+
55
+ call_args = mock_index_data_points.call_args[0][0]
56
+ assert len(call_args) == 0
59
57
 
60
58
 
61
59
  @pytest.mark.asyncio
@@ -0,0 +1,46 @@
1
+ import pytest
2
+
3
+ from cognee.infrastructure.llm.config import LLMConfig
4
+
5
+
6
+ def test_strip_quotes_from_strings():
7
+ """
8
+ Test if the LLMConfig.strip_quotes_from_strings model validator behaves as expected.
9
+ """
10
+ config = LLMConfig(
11
+ # Strings with surrounding double quotes ("value" → value)
12
+ llm_api_key='"double_value"',
13
+ # Strings with surrounding single quotes ('value' → value)
14
+ llm_endpoint="'single_value'",
15
+ # Strings without quotes (value → value)
16
+ llm_api_version="no_quotes_value",
17
+ # Empty quoted strings ("" → empty string)
18
+ fallback_model='""',
19
+ # None values (should remain None)
20
+ baml_llm_api_key=None,
21
+ # Mixed quotes ("value' → unchanged)
22
+ fallback_endpoint="\"mixed_quote'",
23
+ # Strings with internal quotes ("internal\"quotes" → internal"quotes")
24
+ baml_llm_model='"internal"quotes"',
25
+ )
26
+
27
+ # Strings with surrounding double quotes ("value" → value)
28
+ assert config.llm_api_key == "double_value"
29
+
30
+ # Strings with surrounding single quotes ('value' → value)
31
+ assert config.llm_endpoint == "single_value"
32
+
33
+ # Strings without quotes (value → value)
34
+ assert config.llm_api_version == "no_quotes_value"
35
+
36
+ # Empty quoted strings ("" → empty string)
37
+ assert config.fallback_model == ""
38
+
39
+ # None values (should remain None)
40
+ assert config.baml_llm_api_key is None
41
+
42
+ # Mixed quotes ("value' → unchanged)
43
+ assert config.fallback_endpoint == "\"mixed_quote'"
44
+
45
+ # Strings with internal quotes ("internal\"quotes" → internal"quotes")
46
+ assert config.baml_llm_model == 'internal"quotes'
@@ -4,10 +4,7 @@ from typing import List
4
4
  from cognee.infrastructure.databases.vector.embeddings.LiteLLMEmbeddingEngine import (
5
5
  LiteLLMEmbeddingEngine,
6
6
  )
7
- from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
8
- embedding_rate_limit_async,
9
- embedding_sleep_and_retry_async,
10
- )
7
+ from cognee.shared.rate_limiting import embedding_rate_limiter_context_manager
11
8
 
12
9
 
13
10
  class MockEmbeddingEngine(LiteLLMEmbeddingEngine):
@@ -34,8 +31,6 @@ class MockEmbeddingEngine(LiteLLMEmbeddingEngine):
34
31
  self.fail_every_n_requests = fail_every_n_requests
35
32
  self.add_delay = add_delay
36
33
 
37
- @embedding_sleep_and_retry_async()
38
- @embedding_rate_limit_async
39
34
  async def embed_text(self, text: List[str]) -> List[List[float]]:
40
35
  """
41
36
  Mock implementation that returns fixed embeddings and can
@@ -52,4 +47,5 @@ class MockEmbeddingEngine(LiteLLMEmbeddingEngine):
52
47
  raise Exception(f"Mock failure on request #{self.request_count}")
53
48
 
54
49
  # Return mock embeddings of the correct dimension
55
- return [[0.1] * self.dimensions for _ in text]
50
+ async with embedding_rate_limiter_context_manager():
51
+ return [[0.1] * self.dimensions for _ in text]
@@ -6,9 +6,6 @@ import logging
6
6
  from cognee.infrastructure.llm.config import (
7
7
  get_llm_config,
8
8
  )
9
- from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
10
- EmbeddingRateLimiter,
11
- )
12
9
  from cognee.tests.unit.infrastructure.mock_embedding_engine import MockEmbeddingEngine
13
10
 
14
11
  # Configure logging
@@ -33,7 +30,6 @@ async def test_embedding_rate_limiting_realistic():
33
30
 
34
31
  # Clear the config and rate limiter caches to ensure our settings are applied
35
32
  get_llm_config.cache_clear()
36
- EmbeddingRateLimiter.reset_instance()
37
33
 
38
34
  # Create a fresh config instance and verify settings
39
35
  config = get_llm_config()
@@ -170,7 +166,6 @@ async def test_with_mock_failures():
170
166
 
171
167
  # Clear caches
172
168
  get_llm_config.cache_clear()
173
- EmbeddingRateLimiter.reset_instance()
174
169
 
175
170
  # Create a mock engine configured to fail every 3rd request
176
171
  engine = MockEmbeddingEngine()