cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/health.py +2 -12
  3. cognee/api/v1/add/add.py +46 -6
  4. cognee/api/v1/add/routers/get_add_router.py +11 -2
  5. cognee/api/v1/cognify/cognify.py +29 -9
  6. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  7. cognee/api/v1/datasets/datasets.py +11 -0
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
  9. cognee/api/v1/delete/routers/get_delete_router.py +2 -0
  10. cognee/api/v1/memify/routers/get_memify_router.py +2 -1
  11. cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
  12. cognee/api/v1/responses/default_tools.py +0 -1
  13. cognee/api/v1/responses/dispatch_function.py +1 -1
  14. cognee/api/v1/responses/routers/default_tools.py +0 -1
  15. cognee/api/v1/search/routers/get_search_router.py +3 -3
  16. cognee/api/v1/search/search.py +11 -9
  17. cognee/api/v1/settings/routers/get_settings_router.py +7 -1
  18. cognee/api/v1/sync/routers/get_sync_router.py +3 -0
  19. cognee/api/v1/ui/ui.py +45 -16
  20. cognee/api/v1/update/routers/get_update_router.py +3 -1
  21. cognee/api/v1/update/update.py +3 -3
  22. cognee/api/v1/users/routers/get_visualize_router.py +2 -0
  23. cognee/cli/_cognee.py +61 -10
  24. cognee/cli/commands/add_command.py +3 -3
  25. cognee/cli/commands/cognify_command.py +3 -3
  26. cognee/cli/commands/config_command.py +9 -7
  27. cognee/cli/commands/delete_command.py +3 -3
  28. cognee/cli/commands/search_command.py +3 -7
  29. cognee/cli/config.py +0 -1
  30. cognee/context_global_variables.py +5 -0
  31. cognee/exceptions/exceptions.py +1 -1
  32. cognee/infrastructure/databases/cache/__init__.py +2 -0
  33. cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
  34. cognee/infrastructure/databases/cache/config.py +44 -0
  35. cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
  36. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
  37. cognee/infrastructure/databases/exceptions/__init__.py +1 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
  40. cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
  41. cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
  43. cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
  44. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
  50. cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
  52. cognee/infrastructure/files/exceptions.py +1 -1
  53. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
  54. cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
  55. cognee/infrastructure/files/utils/guess_file_type.py +6 -0
  56. cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
  57. cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
  58. cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
  59. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
  68. cognee/infrastructure/loaders/LoaderEngine.py +27 -7
  69. cognee/infrastructure/loaders/external/__init__.py +7 -0
  70. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
  71. cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
  72. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  73. cognee/modules/data/exceptions/exceptions.py +1 -1
  74. cognee/modules/data/methods/__init__.py +3 -0
  75. cognee/modules/data/methods/get_dataset_data.py +4 -1
  76. cognee/modules/data/methods/has_dataset_data.py +21 -0
  77. cognee/modules/engine/models/TableRow.py +0 -1
  78. cognee/modules/ingestion/save_data_to_file.py +9 -2
  79. cognee/modules/pipelines/exceptions/exceptions.py +1 -1
  80. cognee/modules/pipelines/operations/pipeline.py +12 -1
  81. cognee/modules/pipelines/operations/run_tasks.py +25 -197
  82. cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
  83. cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
  84. cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
  85. cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
  86. cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
  87. cognee/modules/retrieval/base_graph_retriever.py +3 -1
  88. cognee/modules/retrieval/base_retriever.py +3 -1
  89. cognee/modules/retrieval/chunks_retriever.py +5 -1
  90. cognee/modules/retrieval/code_retriever.py +20 -2
  91. cognee/modules/retrieval/completion_retriever.py +50 -9
  92. cognee/modules/retrieval/cypher_search_retriever.py +11 -1
  93. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
  94. cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
  95. cognee/modules/retrieval/graph_completion_retriever.py +54 -10
  96. cognee/modules/retrieval/lexical_retriever.py +20 -2
  97. cognee/modules/retrieval/natural_language_retriever.py +10 -1
  98. cognee/modules/retrieval/summaries_retriever.py +5 -1
  99. cognee/modules/retrieval/temporal_retriever.py +62 -10
  100. cognee/modules/retrieval/user_qa_feedback.py +3 -2
  101. cognee/modules/retrieval/utils/completion.py +30 -4
  102. cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
  103. cognee/modules/retrieval/utils/session_cache.py +156 -0
  104. cognee/modules/search/methods/get_search_type_tools.py +0 -5
  105. cognee/modules/search/methods/no_access_control_search.py +12 -1
  106. cognee/modules/search/methods/search.py +51 -5
  107. cognee/modules/search/types/SearchType.py +0 -1
  108. cognee/modules/settings/get_settings.py +23 -0
  109. cognee/modules/users/methods/get_authenticated_user.py +3 -1
  110. cognee/modules/users/methods/get_default_user.py +1 -6
  111. cognee/modules/users/roles/methods/create_role.py +2 -2
  112. cognee/modules/users/tenants/methods/create_tenant.py +2 -2
  113. cognee/shared/exceptions/exceptions.py +1 -1
  114. cognee/shared/logging_utils.py +18 -11
  115. cognee/shared/utils.py +24 -2
  116. cognee/tasks/codingagents/coding_rule_associations.py +1 -2
  117. cognee/tasks/documents/exceptions/exceptions.py +1 -1
  118. cognee/tasks/feedback/__init__.py +13 -0
  119. cognee/tasks/feedback/create_enrichments.py +84 -0
  120. cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
  121. cognee/tasks/feedback/generate_improved_answers.py +130 -0
  122. cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
  123. cognee/tasks/feedback/models.py +26 -0
  124. cognee/tasks/graph/extract_graph_from_data.py +2 -0
  125. cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
  126. cognee/tasks/ingestion/ingest_data.py +11 -5
  127. cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
  128. cognee/tasks/storage/add_data_points.py +3 -10
  129. cognee/tasks/storage/index_data_points.py +19 -14
  130. cognee/tasks/storage/index_graph_edges.py +25 -11
  131. cognee/tasks/web_scraper/__init__.py +34 -0
  132. cognee/tasks/web_scraper/config.py +26 -0
  133. cognee/tasks/web_scraper/default_url_crawler.py +446 -0
  134. cognee/tasks/web_scraper/models.py +46 -0
  135. cognee/tasks/web_scraper/types.py +4 -0
  136. cognee/tasks/web_scraper/utils.py +142 -0
  137. cognee/tasks/web_scraper/web_scraper_task.py +396 -0
  138. cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
  139. cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
  140. cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
  141. cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
  142. cognee/tests/subprocesses/reader.py +25 -0
  143. cognee/tests/subprocesses/simple_cognify_1.py +31 -0
  144. cognee/tests/subprocesses/simple_cognify_2.py +31 -0
  145. cognee/tests/subprocesses/writer.py +32 -0
  146. cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
  147. cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
  148. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
  149. cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
  150. cognee/tests/test_add_docling_document.py +56 -0
  151. cognee/tests/test_chromadb.py +7 -11
  152. cognee/tests/test_concurrent_subprocess_access.py +76 -0
  153. cognee/tests/test_conversation_history.py +240 -0
  154. cognee/tests/test_feedback_enrichment.py +174 -0
  155. cognee/tests/test_kuzu.py +27 -15
  156. cognee/tests/test_lancedb.py +7 -11
  157. cognee/tests/test_library.py +32 -2
  158. cognee/tests/test_neo4j.py +24 -16
  159. cognee/tests/test_neptune_analytics_vector.py +7 -11
  160. cognee/tests/test_permissions.py +9 -13
  161. cognee/tests/test_pgvector.py +4 -4
  162. cognee/tests/test_remote_kuzu.py +8 -11
  163. cognee/tests/test_s3_file_storage.py +1 -1
  164. cognee/tests/test_search_db.py +6 -8
  165. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
  166. cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
  167. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
  168. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
  169. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
  170. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
  171. distributed/Dockerfile +0 -3
  172. distributed/entrypoint.py +21 -9
  173. distributed/signal.py +5 -0
  174. distributed/workers/data_point_saving_worker.py +64 -34
  175. distributed/workers/graph_saving_worker.py +71 -47
  176. cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
  177. cognee/modules/retrieval/insights_retriever.py +0 -133
  178. cognee/tests/test_memgraph.py +0 -109
  179. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
  180. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
  182. {cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,243 @@
1
+ import asyncio
2
+ import redis
3
+ import redis.asyncio as aioredis
4
+ from contextlib import contextmanager
5
+ from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
6
+ from cognee.infrastructure.databases.exceptions import CacheConnectionError
7
+ from cognee.shared.logging_utils import get_logger
8
+ from datetime import datetime
9
+ import json
10
+
11
+ logger = get_logger("RedisAdapter")
12
+
13
+
14
+ class RedisAdapter(CacheDBInterface):
15
+ def __init__(
16
+ self,
17
+ host,
18
+ port,
19
+ lock_name="default_lock",
20
+ username=None,
21
+ password=None,
22
+ timeout=240,
23
+ blocking_timeout=300,
24
+ connection_timeout=30,
25
+ ):
26
+ super().__init__(host, port, lock_name)
27
+
28
+ self.host = host
29
+ self.port = port
30
+ self.connection_timeout = connection_timeout
31
+
32
+ try:
33
+ self.sync_redis = redis.Redis(
34
+ host=host,
35
+ port=port,
36
+ username=username,
37
+ password=password,
38
+ socket_connect_timeout=connection_timeout,
39
+ socket_timeout=connection_timeout,
40
+ )
41
+ self.async_redis = aioredis.Redis(
42
+ host=host,
43
+ port=port,
44
+ username=username,
45
+ password=password,
46
+ decode_responses=True,
47
+ socket_connect_timeout=connection_timeout,
48
+ )
49
+ self.timeout = timeout
50
+ self.blocking_timeout = blocking_timeout
51
+
52
+ # Validate connection on initialization
53
+ self._validate_connection()
54
+ logger.info(f"Successfully connected to Redis at {host}:{port}")
55
+
56
+ except (redis.ConnectionError, redis.TimeoutError) as e:
57
+ error_msg = f"Failed to connect to Redis at {host}:{port}: {str(e)}"
58
+ logger.error(error_msg)
59
+ raise CacheConnectionError(error_msg) from e
60
+ except Exception as e:
61
+ error_msg = f"Unexpected error initializing Redis adapter: {str(e)}"
62
+ logger.error(error_msg)
63
+ raise CacheConnectionError(error_msg) from e
64
+
65
+ def _validate_connection(self):
66
+ """Validate Redis connection is available."""
67
+ try:
68
+ self.sync_redis.ping()
69
+ except (redis.ConnectionError, redis.TimeoutError) as e:
70
+ raise CacheConnectionError(
71
+ f"Cannot connect to Redis at {self.host}:{self.port}: {str(e)}"
72
+ ) from e
73
+
74
+ def acquire_lock(self):
75
+ """
76
+ Acquire the Redis lock manually. Raises if acquisition fails. (Sync because of Kuzu)
77
+ """
78
+ self.lock = self.sync_redis.lock(
79
+ name=self.lock_key,
80
+ timeout=self.timeout,
81
+ blocking_timeout=self.blocking_timeout,
82
+ )
83
+
84
+ acquired = self.lock.acquire()
85
+ if not acquired:
86
+ raise RuntimeError(f"Could not acquire Redis lock: {self.lock_key}")
87
+
88
+ return self.lock
89
+
90
+ def release_lock(self):
91
+ """
92
+ Release the Redis lock manually, if held. (Sync because of Kuzu)
93
+ """
94
+ if self.lock:
95
+ try:
96
+ self.lock.release()
97
+ self.lock = None
98
+ except redis.exceptions.LockError:
99
+ pass
100
+
101
+ @contextmanager
102
+ def hold_lock(self):
103
+ """
104
+ Context manager for acquiring and releasing the Redis lock automatically. (Sync because of Kuzu)
105
+ """
106
+ self.acquire()
107
+ try:
108
+ yield
109
+ finally:
110
+ self.release()
111
+
112
+ async def add_qa(
113
+ self,
114
+ user_id: str,
115
+ session_id: str,
116
+ question: str,
117
+ context: str,
118
+ answer: str,
119
+ ttl: int | None = 86400,
120
+ ):
121
+ """
122
+ Add a Q/A/context triplet to a Redis list for this session.
123
+ Creates the session if it doesn't exist.
124
+
125
+ Args:
126
+ user_id (str): The user ID.
127
+ session_id: Unique identifier for the session.
128
+ question: User question text.
129
+ context: Context used to answer.
130
+ answer: Assistant answer text.
131
+ ttl: Optional time-to-live (seconds). If provided, the session expires after this time.
132
+
133
+ Raises:
134
+ CacheConnectionError: If Redis connection fails or times out.
135
+ """
136
+ try:
137
+ session_key = f"agent_sessions:{user_id}:{session_id}"
138
+
139
+ qa_entry = {
140
+ "time": datetime.utcnow().isoformat(),
141
+ "question": question,
142
+ "context": context,
143
+ "answer": answer,
144
+ }
145
+
146
+ await self.async_redis.rpush(session_key, json.dumps(qa_entry))
147
+
148
+ if ttl is not None:
149
+ await self.async_redis.expire(session_key, ttl)
150
+
151
+ except (redis.ConnectionError, redis.TimeoutError) as e:
152
+ error_msg = f"Redis connection error while adding Q&A: {str(e)}"
153
+ logger.error(error_msg)
154
+ raise CacheConnectionError(error_msg) from e
155
+ except Exception as e:
156
+ error_msg = f"Unexpected error while adding Q&A to Redis: {str(e)}"
157
+ logger.error(error_msg)
158
+ raise CacheConnectionError(error_msg) from e
159
+
160
+ async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5):
161
+ """
162
+ Retrieve the most recent Q/A/context triplet(s) for the given session.
163
+ """
164
+ session_key = f"agent_sessions:{user_id}:{session_id}"
165
+ if last_n == 1:
166
+ data = await self.async_redis.lindex(session_key, -1)
167
+ return [json.loads(data)] if data else None
168
+ else:
169
+ data = await self.async_redis.lrange(session_key, -last_n, -1)
170
+ return [json.loads(d) for d in data] if data else []
171
+
172
+ async def get_all_qas(self, user_id: str, session_id: str):
173
+ """
174
+ Retrieve all Q/A/context triplets for the given session.
175
+ """
176
+ session_key = f"agent_sessions:{user_id}:{session_id}"
177
+ entries = await self.async_redis.lrange(session_key, 0, -1)
178
+ return [json.loads(e) for e in entries]
179
+
180
+ async def close(self):
181
+ """
182
+ Gracefully close the async Redis connection.
183
+ """
184
+ await self.async_redis.aclose()
185
+
186
+
187
+ async def main():
188
+ HOST = "localhost"
189
+ PORT = 6379
190
+
191
+ adapter = RedisAdapter(host=HOST, port=PORT)
192
+ session_id = "demo_session"
193
+ user_id = "demo_user_id"
194
+
195
+ print("\nAdding sample Q/A pairs...")
196
+ await adapter.add_qa(
197
+ user_id,
198
+ session_id,
199
+ "What is Redis?",
200
+ "Basic DB context",
201
+ "Redis is an in-memory data store.",
202
+ )
203
+ await adapter.add_qa(
204
+ user_id,
205
+ session_id,
206
+ "Who created Redis?",
207
+ "Historical context",
208
+ "Salvatore Sanfilippo (antirez).",
209
+ )
210
+
211
+ print("\nLatest QA:")
212
+ latest = await adapter.get_latest_qa(user_id, session_id)
213
+ print(json.dumps(latest, indent=2))
214
+
215
+ print("\nLast 2 QAs:")
216
+ last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2)
217
+ print(json.dumps(last_two, indent=2))
218
+
219
+ session_id = "session_expire_demo"
220
+
221
+ await adapter.add_qa(
222
+ user_id,
223
+ session_id,
224
+ "What is Redis?",
225
+ "Database context",
226
+ "Redis is an in-memory data store.",
227
+ )
228
+
229
+ await adapter.add_qa(
230
+ user_id,
231
+ session_id,
232
+ "Who created Redis?",
233
+ "History context",
234
+ "Salvatore Sanfilippo (antirez).",
235
+ )
236
+
237
+ print(await adapter.get_all_qas(user_id, session_id))
238
+
239
+ await adapter.close()
240
+
241
+
242
+ if __name__ == "__main__":
243
+ asyncio.run(main())
@@ -11,4 +11,5 @@ from .exceptions import (
11
11
  EmbeddingException,
12
12
  MissingQueryParameterError,
13
13
  MutuallyExclusiveQueryParametersError,
14
+ CacheConnectionError,
14
15
  )
@@ -15,7 +15,7 @@ class DatabaseNotCreatedError(CogneeSystemError):
15
15
  self,
16
16
  message: str = "The database has not been created yet. Please call `await setup()` first.",
17
17
  name: str = "DatabaseNotCreatedError",
18
- status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
18
+ status_code: int = status.HTTP_422_UNPROCESSABLE_CONTENT,
19
19
  ):
20
20
  super().__init__(message, name, status_code)
21
21
 
@@ -99,7 +99,7 @@ class EmbeddingException(CogneeConfigurationError):
99
99
  self,
100
100
  message: str = "Embedding Exception.",
101
101
  name: str = "EmbeddingException",
102
- status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
102
+ status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
103
103
  ):
104
104
  super().__init__(message, name, status_code)
105
105
 
@@ -132,3 +132,19 @@ class MutuallyExclusiveQueryParametersError(CogneeValidationError):
132
132
  ):
133
133
  message = "The search function accepts either text or embedding as input, but not both."
134
134
  super().__init__(message, name, status_code)
135
+
136
+
137
+ class CacheConnectionError(CogneeConfigurationError):
138
+ """
139
+ Raised when connection to the cache database (e.g., Redis) fails.
140
+
141
+ This error indicates that the cache service is unavailable or misconfigured.
142
+ """
143
+
144
+ def __init__(
145
+ self,
146
+ message: str = "Failed to connect to cache database. Please check your cache configuration.",
147
+ name: str = "CacheConnectionError",
148
+ status_code: int = status.HTTP_503_SERVICE_UNAVAILABLE,
149
+ ):
150
+ super().__init__(message, name, status_code)
@@ -162,5 +162,5 @@ def create_graph_engine(
162
162
 
163
163
  raise EnvironmentError(
164
164
  f"Unsupported graph database provider: {graph_database_provider}. "
165
- f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
165
+ f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'neptune', 'neptune_analytics'])}"
166
166
  )
@@ -159,6 +159,11 @@ class GraphDBInterface(ABC):
159
159
  - get_connections
160
160
  """
161
161
 
162
+ @abstractmethod
163
+ async def is_empty(self) -> bool:
164
+ logger.warning("is_empty() is not implemented")
165
+ return True
166
+
162
167
  @abstractmethod
163
168
  async def query(self, query: str, params: dict) -> List[Any]:
164
169
  """
@@ -4,7 +4,7 @@ import os
4
4
  import json
5
5
  import asyncio
6
6
  import tempfile
7
- from uuid import UUID
7
+ from uuid import UUID, uuid5, NAMESPACE_OID
8
8
  from kuzu import Connection
9
9
  from kuzu.database import Database
10
10
  from datetime import datetime, timezone
@@ -23,9 +23,14 @@ from cognee.infrastructure.engine import DataPoint
23
23
  from cognee.modules.storage.utils import JSONEncoder
24
24
  from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
25
25
  from cognee.tasks.temporal_graph.models import Timestamp
26
+ from cognee.infrastructure.databases.cache.config import get_cache_config
26
27
 
27
28
  logger = get_logger()
28
29
 
30
+ cache_config = get_cache_config()
31
+ if cache_config.shared_kuzu_lock:
32
+ from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
33
+
29
34
 
30
35
  class KuzuAdapter(GraphDBInterface):
31
36
  """
@@ -39,12 +44,20 @@ class KuzuAdapter(GraphDBInterface):
39
44
 
40
45
  def __init__(self, db_path: str):
41
46
  """Initialize Kuzu database connection and schema."""
47
+ self.open_connections = 0
48
+ self._is_closed = False
42
49
  self.db_path = db_path # Path for the database directory
43
50
  self.db: Optional[Database] = None
44
51
  self.connection: Optional[Connection] = None
45
- self.executor = ThreadPoolExecutor()
46
- self._initialize_connection()
52
+ if cache_config.shared_kuzu_lock:
53
+ self.redis_lock = get_cache_engine(
54
+ lock_key="kuzu-lock-" + str(uuid5(NAMESPACE_OID, db_path))
55
+ )
56
+ else:
57
+ self.executor = ThreadPoolExecutor()
58
+ self._initialize_connection()
47
59
  self.KUZU_ASYNC_LOCK = asyncio.Lock()
60
+ self._connection_change_lock = asyncio.Lock()
48
61
 
49
62
  def _initialize_connection(self) -> None:
50
63
  """Initialize the Kuzu database connection and schema."""
@@ -185,6 +198,15 @@ class KuzuAdapter(GraphDBInterface):
185
198
  except FileNotFoundError:
186
199
  logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
187
200
 
201
+ async def is_empty(self) -> bool:
202
+ query = """
203
+ MATCH (n)
204
+ RETURN true
205
+ LIMIT 1;
206
+ """
207
+ query_result = await self.query(query)
208
+ return len(query_result) == 0
209
+
188
210
  async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
189
211
  """
190
212
  Execute a Kuzu query asynchronously with automatic reconnection.
@@ -209,9 +231,13 @@ class KuzuAdapter(GraphDBInterface):
209
231
  params = params or {}
210
232
 
211
233
  def blocking_query():
234
+ lock_acquired = False
212
235
  try:
236
+ if cache_config.shared_kuzu_lock:
237
+ self.redis_lock.acquire_lock()
238
+ lock_acquired = True
213
239
  if not self.connection:
214
- logger.debug("Reconnecting to Kuzu database...")
240
+ logger.info("Reconnecting to Kuzu database...")
215
241
  self._initialize_connection()
216
242
 
217
243
  result = self.connection.execute(query, params)
@@ -225,12 +251,47 @@ class KuzuAdapter(GraphDBInterface):
225
251
  val = val.as_py()
226
252
  processed_rows.append(val)
227
253
  rows.append(tuple(processed_rows))
254
+
228
255
  return rows
229
256
  except Exception as e:
230
257
  logger.error(f"Query execution failed: {str(e)}")
231
258
  raise
232
-
233
- return await loop.run_in_executor(self.executor, blocking_query)
259
+ finally:
260
+ if cache_config.shared_kuzu_lock and lock_acquired:
261
+ try:
262
+ self.close()
263
+ finally:
264
+ self.redis_lock.release_lock()
265
+
266
+ if cache_config.shared_kuzu_lock:
267
+ async with self._connection_change_lock:
268
+ self.open_connections += 1
269
+ logger.info(f"Open connections after open: {self.open_connections}")
270
+ try:
271
+ result = blocking_query()
272
+ finally:
273
+ self.open_connections -= 1
274
+ logger.info(f"Open connections after close: {self.open_connections}")
275
+ return result
276
+ else:
277
+ result = await loop.run_in_executor(self.executor, blocking_query)
278
+ return result
279
+
280
+ def close(self):
281
+ if self.connection:
282
+ del self.connection
283
+ self.connection = None
284
+ if self.db:
285
+ del self.db
286
+ self.db = None
287
+ self._is_closed = True
288
+ logger.info("Kuzu database closed successfully")
289
+
290
+ def reopen(self):
291
+ if self._is_closed:
292
+ self._is_closed = False
293
+ self._initialize_connection()
294
+ logger.info("Kuzu database re-opened successfully")
234
295
 
235
296
  @asynccontextmanager
236
297
  async def get_session(self):
@@ -1305,9 +1366,15 @@ class KuzuAdapter(GraphDBInterface):
1305
1366
  params[param_name] = values
1306
1367
 
1307
1368
  where_clause = " AND ".join(where_clauses)
1308
- nodes_query = (
1309
- f"MATCH (n:Node) WHERE {where_clause} RETURN n.id, {{properties: n.properties}}"
1310
- )
1369
+ nodes_query = f"""
1370
+ MATCH (n:Node)
1371
+ WHERE {where_clause}
1372
+ RETURN n.id, {{
1373
+ name: n.name,
1374
+ type: n.type,
1375
+ properties: n.properties
1376
+ }}
1377
+ """
1311
1378
  edges_query = f"""
1312
1379
  MATCH (n1:Node)-[r:EDGE]->(n2:Node)
1313
1380
  WHERE {where_clause.replace("n.", "n1.")} AND {where_clause.replace("n.", "n2.")}
@@ -1557,44 +1624,6 @@ class KuzuAdapter(GraphDBInterface):
1557
1624
  logger.error(f"Failed to delete graph data: {e}")
1558
1625
  raise
1559
1626
 
1560
- async def clear_database(self) -> None:
1561
- """
1562
- Clear all data from the database by deleting the database files and reinitializing.
1563
-
1564
- This method removes all files associated with the database and reinitializes the Kuzu
1565
- database structure, ensuring a completely empty state. It handles exceptions that might
1566
- occur during file deletions or initializations carefully.
1567
- """
1568
- try:
1569
- if self.connection:
1570
- self.connection = None
1571
- if self.db:
1572
- self.db.close()
1573
- self.db = None
1574
-
1575
- db_dir = os.path.dirname(self.db_path)
1576
- db_name = os.path.basename(self.db_path)
1577
- file_storage = get_file_storage(db_dir)
1578
-
1579
- if await file_storage.file_exists(db_name):
1580
- await file_storage.remove_all()
1581
- logger.info(f"Deleted Kuzu database files at {self.db_path}")
1582
-
1583
- # Reinitialize the database
1584
- self._initialize_connection()
1585
- # Verify the database is empty
1586
- result = self.connection.execute("MATCH (n:Node) RETURN COUNT(n)")
1587
- count = result.get_next()[0] if result.has_next() else 0
1588
- if count > 0:
1589
- logger.warning(
1590
- f"Database still contains {count} nodes after clearing, forcing deletion"
1591
- )
1592
- self.connection.execute("MATCH (n:Node) DETACH DELETE n")
1593
- logger.info("Database cleared successfully")
1594
- except Exception as e:
1595
- logger.error(f"Error during database clearing: {e}")
1596
- raise
1597
-
1598
1627
  async def get_document_subgraph(self, data_id: str):
1599
1628
  """
1600
1629
  Get all nodes that should be deleted when removing a document.
@@ -68,6 +68,7 @@ class Neo4jAdapter(GraphDBInterface):
68
68
  auth=auth,
69
69
  max_connection_lifetime=120,
70
70
  notifications_min_severity="OFF",
71
+ keep_alive=True,
71
72
  )
72
73
 
73
74
  async def initialize(self) -> None:
@@ -86,6 +87,15 @@ class Neo4jAdapter(GraphDBInterface):
86
87
  async with self.driver.session(database=self.graph_database_name) as session:
87
88
  yield session
88
89
 
90
+ async def is_empty(self) -> bool:
91
+ query = """
92
+ RETURN EXISTS {
93
+ MATCH (n)
94
+ } AS node_exists;
95
+ """
96
+ query_result = await self.query(query)
97
+ return not query_result[0]["node_exists"]
98
+
89
99
  @deadlock_retry()
90
100
  async def query(
91
101
  self,
@@ -205,7 +215,7 @@ class Neo4jAdapter(GraphDBInterface):
205
215
  {
206
216
  "node_id": str(node.id),
207
217
  "label": type(node).__name__,
208
- "properties": self.serialize_properties(node.model_dump()),
218
+ "properties": self.serialize_properties(dict(node)),
209
219
  }
210
220
  for node in nodes
211
221
  ]
@@ -1066,7 +1076,7 @@ class Neo4jAdapter(GraphDBInterface):
1066
1076
  query_nodes = f"""
1067
1077
  MATCH (n)
1068
1078
  WHERE {where_clause}
1069
- RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties
1079
+ RETURN n.id AS id, labels(n) AS labels, properties(n) AS properties
1070
1080
  """
1071
1081
  result_nodes = await self.query(query_nodes)
1072
1082
 
@@ -1081,7 +1091,7 @@ class Neo4jAdapter(GraphDBInterface):
1081
1091
  query_edges = f"""
1082
1092
  MATCH (n)-[r]->(m)
1083
1093
  WHERE {where_clause} AND {where_clause.replace("n.", "m.")}
1084
- RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
1094
+ RETURN n.id AS source, n.id AS target, TYPE(r) AS type, properties(r) AS properties
1085
1095
  """
1086
1096
  result_edges = await self.query(query_edges)
1087
1097
 
@@ -8,7 +8,7 @@ from cognee.infrastructure.utils.calculate_backoff import calculate_backoff
8
8
  logger = get_logger("deadlock_retry")
9
9
 
10
10
 
11
- def deadlock_retry(max_retries=5):
11
+ def deadlock_retry(max_retries=10):
12
12
  """
13
13
  Decorator that automatically retries an asynchronous function when rate limit errors occur.
14
14
 
@@ -53,7 +53,7 @@ def parse_neptune_url(url: str) -> Tuple[str, str]:
53
53
  return graph_id, region
54
54
 
55
55
  except Exception as e:
56
- raise ValueError(f"Failed to parse Neptune Analytics URL '{url}': {str(e)}")
56
+ raise ValueError(f"Failed to parse Neptune Analytics URL '{url}': {str(e)}") from e
57
57
 
58
58
 
59
59
  def validate_graph_id(graph_id: str) -> bool:
@@ -283,7 +283,7 @@ class SQLAlchemyAdapter:
283
283
  try:
284
284
  data_entity = (await session.scalars(select(Data).where(Data.id == data_id))).one()
285
285
  except (ValueError, NoResultFound) as e:
286
- raise EntityNotFoundError(message=f"Entity not found: {str(e)}")
286
+ raise EntityNotFoundError(message=f"Entity not found: {str(e)}") from e
287
287
 
288
288
  # Check if other data objects point to the same raw data location
289
289
  raw_data_location_entities = (
@@ -1,8 +1,17 @@
1
- from cognee.shared.logging_utils import get_logger
1
+ import os
2
+ import logging
2
3
  from typing import List, Optional
3
4
  from fastembed import TextEmbedding
4
5
  import litellm
5
- import os
6
+ from tenacity import (
7
+ retry,
8
+ stop_after_delay,
9
+ wait_exponential_jitter,
10
+ retry_if_not_exception_type,
11
+ before_sleep_log,
12
+ )
13
+
14
+ from cognee.shared.logging_utils import get_logger
6
15
  from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
7
16
  from cognee.infrastructure.databases.exceptions import EmbeddingException
8
17
  from cognee.infrastructure.llm.tokenizer.TikToken import (
@@ -57,6 +66,13 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
57
66
  enable_mocking = str(enable_mocking).lower()
58
67
  self.mock = enable_mocking in ("true", "1", "yes")
59
68
 
69
+ @retry(
70
+ stop=stop_after_delay(128),
71
+ wait=wait_exponential_jitter(2, 128),
72
+ retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
73
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
74
+ reraise=True,
75
+ )
60
76
  async def embed_text(self, text: List[str]) -> List[List[float]]:
61
77
  """
62
78
  Embed the given text into numerical vectors.
@@ -90,7 +106,9 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
90
106
 
91
107
  except Exception as error:
92
108
  logger.error(f"Embedding error in FastembedEmbeddingEngine: {str(error)}")
93
- raise EmbeddingException(f"Failed to index data points using model {self.model}")
109
+ raise EmbeddingException(
110
+ f"Failed to index data points using model {self.model}"
111
+ ) from error
94
112
 
95
113
  def get_vector_size(self) -> int:
96
114
  """
@@ -1,15 +1,21 @@
1
1
  import asyncio
2
+ import logging
3
+
2
4
  from cognee.shared.logging_utils import get_logger
3
5
  from typing import List, Optional
4
6
  import numpy as np
5
7
  import math
8
+ from tenacity import (
9
+ retry,
10
+ stop_after_delay,
11
+ wait_exponential_jitter,
12
+ retry_if_not_exception_type,
13
+ before_sleep_log,
14
+ )
6
15
  import litellm
7
16
  import os
8
17
  from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
9
18
  from cognee.infrastructure.databases.exceptions import EmbeddingException
10
- from cognee.infrastructure.llm.tokenizer.Gemini import (
11
- GeminiTokenizer,
12
- )
13
19
  from cognee.infrastructure.llm.tokenizer.HuggingFace import (
14
20
  HuggingFaceTokenizer,
15
21
  )
@@ -19,10 +25,6 @@ from cognee.infrastructure.llm.tokenizer.Mistral import (
19
25
  from cognee.infrastructure.llm.tokenizer.TikToken import (
20
26
  TikTokenTokenizer,
21
27
  )
22
- from cognee.infrastructure.databases.vector.embeddings.embedding_rate_limiter import (
23
- embedding_rate_limit_async,
24
- embedding_sleep_and_retry_async,
25
- )
26
28
 
27
29
  litellm.set_verbose = False
28
30
  logger = get_logger("LiteLLMEmbeddingEngine")
@@ -76,8 +78,13 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
76
78
  enable_mocking = str(enable_mocking).lower()
77
79
  self.mock = enable_mocking in ("true", "1", "yes")
78
80
 
79
- @embedding_sleep_and_retry_async()
80
- @embedding_rate_limit_async
81
+ @retry(
82
+ stop=stop_after_delay(128),
83
+ wait=wait_exponential_jitter(2, 128),
84
+ retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
85
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
86
+ reraise=True,
87
+ )
81
88
  async def embed_text(self, text: List[str]) -> List[List[float]]:
82
89
  """
83
90
  Embed a list of text strings into vector representations.
@@ -150,7 +157,7 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
150
157
  litellm.exceptions.NotFoundError,
151
158
  ) as e:
152
159
  logger.error(f"Embedding error with model {self.model}: {str(e)}")
153
- raise EmbeddingException(f"Failed to index data points using model {self.model}")
160
+ raise EmbeddingException(f"Failed to index data points using model {self.model}") from e
154
161
 
155
162
  except Exception as error:
156
163
  logger.error("Error embedding text: %s", str(error))