cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. cognee/api/client.py +41 -3
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +11 -1
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  116. cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
  117. cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
  118. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  119. cognee/modules/ingestion/data_types/TextData.py +8 -2
  120. cognee/modules/ingestion/save_data_to_file.py +1 -1
  121. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  122. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  123. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  124. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  125. cognee/modules/pipelines/models/__init__.py +1 -0
  126. cognee/modules/pipelines/operations/pipeline.py +10 -2
  127. cognee/modules/pipelines/operations/run_tasks.py +251 -19
  128. cognee/modules/retrieval/code_retriever.py +3 -5
  129. cognee/modules/retrieval/completion_retriever.py +1 -1
  130. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  131. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  132. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  133. cognee/modules/retrieval/natural_language_retriever.py +3 -5
  134. cognee/modules/retrieval/utils/completion.py +6 -9
  135. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  136. cognee/modules/search/methods/search.py +5 -1
  137. cognee/modules/search/operations/__init__.py +1 -0
  138. cognee/modules/search/operations/select_search_type.py +42 -0
  139. cognee/modules/search/types/SearchType.py +1 -0
  140. cognee/modules/settings/get_settings.py +0 -4
  141. cognee/modules/settings/save_vector_db_config.py +1 -1
  142. cognee/shared/data_models.py +3 -1
  143. cognee/shared/logging_utils.py +0 -5
  144. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  145. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  146. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  147. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  148. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  149. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  150. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  151. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  152. cognee/tasks/graph/infer_data_ontology.py +5 -6
  153. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  154. cognee/tasks/ingestion/ingest_data.py +91 -61
  155. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  156. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  157. cognee/tasks/storage/index_data_points.py +1 -1
  158. cognee/tasks/storage/index_graph_edges.py +4 -1
  159. cognee/tasks/summarization/summarize_code.py +2 -3
  160. cognee/tasks/summarization/summarize_text.py +3 -2
  161. cognee/tests/test_cognee_server_start.py +12 -7
  162. cognee/tests/test_deduplication.py +2 -2
  163. cognee/tests/test_deletion.py +58 -17
  164. cognee/tests/test_graph_visualization_permissions.py +161 -0
  165. cognee/tests/test_neptune_analytics_graph.py +309 -0
  166. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  167. cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
  168. cognee/tests/test_pgvector.py +5 -5
  169. cognee/tests/test_s3.py +1 -6
  170. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  171. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  172. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  173. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  174. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  175. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  176. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  177. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  178. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
  179. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
  180. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  181. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  182. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  183. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  184. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  185. cognee/modules/data/extraction/extract_categories.py +0 -14
  186. distributed/Dockerfile +0 -34
  187. distributed/app.py +0 -4
  188. distributed/entrypoint.py +0 -71
  189. distributed/entrypoint.sh +0 -5
  190. distributed/modal_image.py +0 -11
  191. distributed/queues.py +0 -5
  192. distributed/tasks/queued_add_data_points.py +0 -13
  193. distributed/tasks/queued_add_edges.py +0 -13
  194. distributed/tasks/queued_add_nodes.py +0 -13
  195. distributed/test.py +0 -28
  196. distributed/utils.py +0 -19
  197. distributed/workers/data_point_saving_worker.py +0 -93
  198. distributed/workers/graph_saving_worker.py +0 -104
  199. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  200. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  201. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  202. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  203. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  204. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  205. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  206. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  207. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  208. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  209. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  210. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  211. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  212. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
  213. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
  214. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py CHANGED
@@ -16,6 +16,7 @@ from fastapi.openapi.utils import get_openapi
16
16
 
17
17
  from cognee.exceptions import CogneeApiError
18
18
  from cognee.shared.logging_utils import get_logger, setup_logging
19
+ from cognee.api.health import health_checker, HealthStatus
19
20
  from cognee.api.v1.permissions.routers import get_permissions_router
20
21
  from cognee.api.v1.settings.routers import get_settings_router
21
22
  from cognee.api.v1.datasets.routers import get_datasets_router
@@ -161,11 +162,48 @@ async def root():
161
162
 
162
163
 
163
164
  @app.get("/health")
164
- def health_check():
165
+ async def health_check():
165
166
  """
166
- Health check endpoint that returns the server status.
167
+ Health check endpoint for liveness/readiness probes.
167
168
  """
168
- return Response(status_code=200)
169
+ try:
170
+ health_status = await health_checker.get_health_status(detailed=False)
171
+ status_code = 503 if health_status.status == HealthStatus.UNHEALTHY else 200
172
+
173
+ return JSONResponse(
174
+ status_code=status_code,
175
+ content={
176
+ "status": "ready" if status_code == 200 else "not ready",
177
+ "health": health_status.status,
178
+ "version": health_status.version,
179
+ },
180
+ )
181
+ except Exception as e:
182
+ return JSONResponse(
183
+ status_code=503,
184
+ content={"status": "not ready", "reason": f"health check failed: {str(e)}"},
185
+ )
186
+
187
+
188
+ @app.get("/health/detailed")
189
+ async def detailed_health_check():
190
+ """
191
+ Comprehensive health status with component details.
192
+ """
193
+ try:
194
+ health_status = await health_checker.get_health_status(detailed=True)
195
+ status_code = 200
196
+ if health_status.status == HealthStatus.UNHEALTHY:
197
+ status_code = 503
198
+ elif health_status.status == HealthStatus.DEGRADED:
199
+ status_code = 200 # Degraded is still operational
200
+
201
+ return JSONResponse(status_code=status_code, content=health_status.model_dump())
202
+ except Exception as e:
203
+ return JSONResponse(
204
+ status_code=503,
205
+ content={"status": "unhealthy", "error": f"Health check system failure: {str(e)}"},
206
+ )
169
207
 
170
208
 
171
209
  app.include_router(get_auth_router(), prefix="/api/v1/auth", tags=["auth"])
cognee/api/health.py ADDED
@@ -0,0 +1,332 @@
1
+ """Health check system for cognee API."""
2
+
3
+ import time
4
+ import asyncio
5
+ from datetime import datetime, timezone
6
+ from typing import Dict, Any, Optional
7
+ from enum import Enum
8
+ from pydantic import BaseModel
9
+
10
+ from cognee.version import get_cognee_version
11
+ from cognee.shared.logging_utils import get_logger
12
+
13
+ logger = get_logger()
14
+
15
+
16
+ class HealthStatus(str, Enum):
17
+ HEALTHY = "healthy"
18
+ DEGRADED = "degraded"
19
+ UNHEALTHY = "unhealthy"
20
+
21
+
22
+ class ComponentHealth(BaseModel):
23
+ status: HealthStatus
24
+ provider: str
25
+ response_time_ms: int
26
+ details: str
27
+
28
+
29
+ class HealthResponse(BaseModel):
30
+ status: HealthStatus
31
+ timestamp: str
32
+ version: str
33
+ uptime: int
34
+ components: Dict[str, ComponentHealth]
35
+
36
+
37
+ class HealthChecker:
38
+ def __init__(self):
39
+ self.start_time = time.time()
40
+
41
+ async def check_relational_db(self) -> ComponentHealth:
42
+ """Check relational database health."""
43
+ start_time = time.time()
44
+ try:
45
+ from cognee.infrastructure.databases.relational.get_relational_engine import (
46
+ get_relational_engine,
47
+ )
48
+ from cognee.infrastructure.databases.relational.config import get_relational_config
49
+
50
+ config = get_relational_config()
51
+ engine = get_relational_engine()
52
+
53
+ # Test connection by creating a session
54
+ session = engine.get_session()
55
+ if session:
56
+ await session.close()
57
+
58
+ response_time = int((time.time() - start_time) * 1000)
59
+ return ComponentHealth(
60
+ status=HealthStatus.HEALTHY,
61
+ provider=config.db_provider,
62
+ response_time_ms=response_time,
63
+ details="Connection successful",
64
+ )
65
+ except Exception as e:
66
+ response_time = int((time.time() - start_time) * 1000)
67
+ logger.error(f"Relational DB health check failed: {str(e)}", exc_info=True)
68
+ return ComponentHealth(
69
+ status=HealthStatus.UNHEALTHY,
70
+ provider="unknown",
71
+ response_time_ms=response_time,
72
+ details=f"Connection failed: {str(e)}",
73
+ )
74
+
75
+ async def check_vector_db(self) -> ComponentHealth:
76
+ """Check vector database health."""
77
+ start_time = time.time()
78
+ try:
79
+ from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
80
+ from cognee.infrastructure.databases.vector.config import get_vectordb_config
81
+
82
+ config = get_vectordb_config()
83
+ engine = get_vector_engine()
84
+
85
+ # Test basic operation - just check if engine is accessible
86
+ if hasattr(engine, "health_check"):
87
+ await engine.health_check()
88
+ elif hasattr(engine, "list_tables"):
89
+ # For LanceDB and similar
90
+ engine.list_tables()
91
+
92
+ response_time = int((time.time() - start_time) * 1000)
93
+ return ComponentHealth(
94
+ status=HealthStatus.HEALTHY,
95
+ provider=config.vector_db_provider,
96
+ response_time_ms=response_time,
97
+ details="Index accessible",
98
+ )
99
+ except Exception as e:
100
+ response_time = int((time.time() - start_time) * 1000)
101
+ logger.error(f"Vector DB health check failed: {str(e)}", exc_info=True)
102
+ return ComponentHealth(
103
+ status=HealthStatus.UNHEALTHY,
104
+ provider="unknown",
105
+ response_time_ms=response_time,
106
+ details=f"Connection failed: {str(e)}",
107
+ )
108
+
109
+ async def check_graph_db(self) -> ComponentHealth:
110
+ """Check graph database health."""
111
+ start_time = time.time()
112
+ try:
113
+ from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
114
+ from cognee.infrastructure.databases.graph.config import get_graph_config
115
+
116
+ config = get_graph_config()
117
+ engine = await get_graph_engine()
118
+
119
+ # Test basic operation with actual graph query
120
+ if hasattr(engine, "execute"):
121
+ # For SQL-like graph DBs (Neo4j, Memgraph)
122
+ await engine.execute("MATCH () RETURN count(*) LIMIT 1")
123
+ elif hasattr(engine, "query"):
124
+ # For other graph engines
125
+ engine.query("MATCH () RETURN count(*) LIMIT 1", {})
126
+ # If engine exists but no test method, consider it healthy
127
+
128
+ response_time = int((time.time() - start_time) * 1000)
129
+ return ComponentHealth(
130
+ status=HealthStatus.HEALTHY,
131
+ provider=config.graph_database_provider,
132
+ response_time_ms=response_time,
133
+ details="Schema validated",
134
+ )
135
+ except Exception as e:
136
+ response_time = int((time.time() - start_time) * 1000)
137
+ logger.error(f"Graph DB health check failed: {str(e)}", exc_info=True)
138
+ return ComponentHealth(
139
+ status=HealthStatus.UNHEALTHY,
140
+ provider="unknown",
141
+ response_time_ms=response_time,
142
+ details=f"Connection failed: {str(e)}",
143
+ )
144
+
145
+ async def check_file_storage(self) -> ComponentHealth:
146
+ """Check file storage health."""
147
+ start_time = time.time()
148
+ try:
149
+ import os
150
+ from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
151
+ from cognee.base_config import get_base_config
152
+
153
+ base_config = get_base_config()
154
+ storage = get_file_storage(base_config.data_root_directory)
155
+
156
+ # Determine provider
157
+ provider = "s3" if base_config.data_root_directory.startswith("s3://") else "local"
158
+
159
+ # Test storage accessibility - for local storage, just check directory exists
160
+ if provider == "local":
161
+ os.makedirs(base_config.data_root_directory, exist_ok=True)
162
+ # Simple write/read test
163
+ test_file = os.path.join(base_config.data_root_directory, "health_check_test")
164
+ with open(test_file, "w") as f:
165
+ f.write("test")
166
+ os.remove(test_file)
167
+ else:
168
+ # For S3, test basic operations
169
+ test_path = "health_check_test"
170
+ await storage.store(test_path, b"test")
171
+ await storage.delete(test_path)
172
+
173
+ response_time = int((time.time() - start_time) * 1000)
174
+ return ComponentHealth(
175
+ status=HealthStatus.HEALTHY,
176
+ provider=provider,
177
+ response_time_ms=response_time,
178
+ details="Storage accessible",
179
+ )
180
+ except Exception as e:
181
+ response_time = int((time.time() - start_time) * 1000)
182
+ return ComponentHealth(
183
+ status=HealthStatus.UNHEALTHY,
184
+ provider="unknown",
185
+ response_time_ms=response_time,
186
+ details=f"Storage test failed: {str(e)}",
187
+ )
188
+
189
+ async def check_llm_provider(self) -> ComponentHealth:
190
+ """Check LLM provider health (non-critical)."""
191
+ start_time = time.time()
192
+ try:
193
+ from cognee.infrastructure.llm.get_llm_client import get_llm_client
194
+ from cognee.infrastructure.llm.config import get_llm_config
195
+
196
+ config = get_llm_config()
197
+
198
+ # Test actual API connection with minimal request
199
+ client = get_llm_client()
200
+ await client.show_prompt("test", "test")
201
+
202
+ response_time = int((time.time() - start_time) * 1000)
203
+ return ComponentHealth(
204
+ status=HealthStatus.HEALTHY,
205
+ provider=config.llm_provider,
206
+ response_time_ms=response_time,
207
+ details="API responding",
208
+ )
209
+ except Exception as e:
210
+ response_time = int((time.time() - start_time) * 1000)
211
+ logger.error(f"LLM provider health check failed: {str(e)}", exc_info=True)
212
+ return ComponentHealth(
213
+ status=HealthStatus.DEGRADED,
214
+ provider="unknown",
215
+ response_time_ms=response_time,
216
+ details=f"API check failed: {str(e)}",
217
+ )
218
+
219
+ async def check_embedding_service(self) -> ComponentHealth:
220
+ """Check embedding service health (non-critical)."""
221
+ start_time = time.time()
222
+ try:
223
+ from cognee.infrastructure.databases.vector.embeddings.get_embedding_engine import (
224
+ get_embedding_engine,
225
+ )
226
+
227
+ # Test actual embedding generation with minimal text
228
+ engine = get_embedding_engine()
229
+ await engine.embed_text("test")
230
+
231
+ response_time = int((time.time() - start_time) * 1000)
232
+ return ComponentHealth(
233
+ status=HealthStatus.HEALTHY,
234
+ provider="configured",
235
+ response_time_ms=response_time,
236
+ details="Embedding generation working",
237
+ )
238
+ except Exception as e:
239
+ response_time = int((time.time() - start_time) * 1000)
240
+ return ComponentHealth(
241
+ status=HealthStatus.DEGRADED,
242
+ provider="unknown",
243
+ response_time_ms=response_time,
244
+ details=f"Embedding test failed: {str(e)}",
245
+ )
246
+
247
+ async def get_health_status(self, detailed: bool = False) -> HealthResponse:
248
+ """Get comprehensive health status."""
249
+ components = {}
250
+
251
+ # Critical services
252
+ critical_components = [
253
+ "relational_db",
254
+ "vector_db",
255
+ "graph_db",
256
+ "file_storage",
257
+ "llm_provider",
258
+ "embedding_service",
259
+ ]
260
+
261
+ critical_checks = [
262
+ ("relational_db", self.check_relational_db()),
263
+ ("vector_db", self.check_vector_db()),
264
+ ("graph_db", self.check_graph_db()),
265
+ ("file_storage", self.check_file_storage()),
266
+ ("llm_provider", self.check_llm_provider()),
267
+ ("embedding_service", self.check_embedding_service()),
268
+ ]
269
+
270
+ # Non-critical services (only for detailed checks)
271
+ non_critical_checks = []
272
+
273
+ # Run critical checks
274
+ critical_results = await asyncio.gather(
275
+ *[check for _, check in critical_checks], return_exceptions=True
276
+ )
277
+
278
+ for (name, _), result in zip(critical_checks, critical_results):
279
+ if isinstance(result, Exception):
280
+ components[name] = ComponentHealth(
281
+ status=HealthStatus.UNHEALTHY,
282
+ provider="unknown",
283
+ response_time_ms=0,
284
+ details=f"Health check failed: {str(result)}",
285
+ )
286
+ else:
287
+ components[name] = result
288
+
289
+ # Run non-critical checks if detailed (currently none)
290
+ if detailed and non_critical_checks:
291
+ non_critical_results = await asyncio.gather(
292
+ *[check for _, check in non_critical_checks], return_exceptions=True
293
+ )
294
+
295
+ for (name, _), result in zip(non_critical_checks, non_critical_results):
296
+ if isinstance(result, Exception):
297
+ components[name] = ComponentHealth(
298
+ status=HealthStatus.DEGRADED,
299
+ provider="unknown",
300
+ response_time_ms=0,
301
+ details=f"Health check failed: {str(result)}",
302
+ )
303
+ else:
304
+ components[name] = result
305
+
306
+ # Determine overall status
307
+ critical_unhealthy = any(
308
+ comp.status == HealthStatus.UNHEALTHY
309
+ for name, comp in components.items()
310
+ if name in critical_components
311
+ )
312
+
313
+ has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())
314
+
315
+ if critical_unhealthy:
316
+ overall_status = HealthStatus.UNHEALTHY
317
+ elif has_degraded:
318
+ overall_status = HealthStatus.DEGRADED
319
+ else:
320
+ overall_status = HealthStatus.HEALTHY
321
+
322
+ return HealthResponse(
323
+ status=overall_status,
324
+ timestamp=datetime.now(timezone.utc).isoformat(),
325
+ version=get_cognee_version(),
326
+ uptime=int(time.time() - self.start_time),
327
+ components=components,
328
+ )
329
+
330
+
331
+ # Global health checker instance
332
+ health_checker = HealthChecker()
cognee/api/v1/add/add.py CHANGED
@@ -15,6 +15,8 @@ async def add(
15
15
  vector_db_config: dict = None,
16
16
  graph_db_config: dict = None,
17
17
  dataset_id: Optional[UUID] = None,
18
+ preferred_loaders: List[str] = None,
19
+ incremental_loading: bool = True,
18
20
  ):
19
21
  """
20
22
  Add data to Cognee for knowledge graph processing.
@@ -129,7 +131,7 @@ async def add(
129
131
  - LLM_MODEL: Model name (default: "gpt-4o-mini")
130
132
  - DEFAULT_USER_EMAIL: Custom default user email
131
133
  - DEFAULT_USER_PASSWORD: Custom default user password
132
- - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "qdrant", "weaviate"
134
+ - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
133
135
  - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
134
136
 
135
137
  Raises:
@@ -140,7 +142,7 @@ async def add(
140
142
  """
141
143
  tasks = [
142
144
  Task(resolve_data_directories, include_subdirectories=True),
143
- Task(ingest_data, dataset_name, user, node_set, dataset_id),
145
+ Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
144
146
  ]
145
147
 
146
148
  pipeline_run_info = None
@@ -153,6 +155,7 @@ async def add(
153
155
  pipeline_name="add_pipeline",
154
156
  vector_db_config=vector_db_config,
155
157
  graph_db_config=graph_db_config,
158
+ incremental_loading=incremental_loading,
156
159
  ):
157
160
  pipeline_run_info = run_info
158
161
 
@@ -11,6 +11,7 @@ from typing import List, Optional, Union, Literal
11
11
  from cognee.modules.users.models import User
12
12
  from cognee.modules.users.methods import get_authenticated_user
13
13
  from cognee.shared.utils import send_telemetry
14
+ from cognee.modules.pipelines.models import PipelineRunErrored
14
15
  from cognee.shared.logging_utils import get_logger
15
16
 
16
17
  logger = get_logger()
@@ -100,6 +101,8 @@ def get_add_router() -> APIRouter:
100
101
  else:
101
102
  add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
102
103
 
104
+ if isinstance(add_run, PipelineRunErrored):
105
+ return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
103
106
  return add_run.model_dump()
104
107
  except Exception as error:
105
108
  return JSONResponse(status_code=409, content={"error": str(error)})
@@ -79,7 +79,9 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
79
79
  async for run_status in non_code_pipeline_run:
80
80
  yield run_status
81
81
 
82
- async for run_status in run_tasks(tasks, dataset.id, repo_path, user, "cognify_code_pipeline"):
82
+ async for run_status in run_tasks(
83
+ tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
84
+ ):
83
85
  yield run_status
84
86
 
85
87
 
@@ -39,6 +39,7 @@ async def cognify(
39
39
  vector_db_config: dict = None,
40
40
  graph_db_config: dict = None,
41
41
  run_in_background: bool = False,
42
+ incremental_loading: bool = True,
42
43
  ):
43
44
  """
44
45
  Transform ingested data into a structured knowledge graph.
@@ -194,6 +195,7 @@ async def cognify(
194
195
  datasets=datasets,
195
196
  vector_db_config=vector_db_config,
196
197
  graph_db_config=graph_db_config,
198
+ incremental_loading=incremental_loading,
197
199
  )
198
200
  else:
199
201
  return await run_cognify_blocking(
@@ -202,6 +204,7 @@ async def cognify(
202
204
  datasets=datasets,
203
205
  vector_db_config=vector_db_config,
204
206
  graph_db_config=graph_db_config,
207
+ incremental_loading=incremental_loading,
205
208
  )
206
209
 
207
210
 
@@ -211,6 +214,7 @@ async def run_cognify_blocking(
211
214
  datasets,
212
215
  graph_db_config: dict = None,
213
216
  vector_db_config: dict = False,
217
+ incremental_loading: bool = True,
214
218
  ):
215
219
  total_run_info = {}
216
220
 
@@ -221,6 +225,7 @@ async def run_cognify_blocking(
221
225
  pipeline_name="cognify_pipeline",
222
226
  graph_db_config=graph_db_config,
223
227
  vector_db_config=vector_db_config,
228
+ incremental_loading=incremental_loading,
224
229
  ):
225
230
  if run_info.dataset_id:
226
231
  total_run_info[run_info.dataset_id] = run_info
@@ -236,6 +241,7 @@ async def run_cognify_as_background_process(
236
241
  datasets,
237
242
  graph_db_config: dict = None,
238
243
  vector_db_config: dict = False,
244
+ incremental_loading: bool = True,
239
245
  ):
240
246
  # Convert dataset to list if it's a string
241
247
  if isinstance(datasets, str):
@@ -246,6 +252,7 @@ async def run_cognify_as_background_process(
246
252
 
247
253
  async def handle_rest_of_the_run(pipeline_list):
248
254
  # Execute all provided pipelines one by one to avoid database write conflicts
255
+ # TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
249
256
  for pipeline in pipeline_list:
250
257
  while True:
251
258
  try:
@@ -270,6 +277,7 @@ async def run_cognify_as_background_process(
270
277
  pipeline_name="cognify_pipeline",
271
278
  graph_db_config=graph_db_config,
272
279
  vector_db_config=vector_db_config,
280
+ incremental_loading=incremental_loading,
273
281
  )
274
282
 
275
283
  # Save dataset Pipeline run started info
@@ -16,7 +16,11 @@ from cognee.modules.graph.methods import get_formatted_graph_data
16
16
  from cognee.modules.users.get_user_manager import get_user_manager_context
17
17
  from cognee.infrastructure.databases.relational import get_relational_engine
18
18
  from cognee.modules.users.authentication.default.default_jwt_strategy import DefaultJWTStrategy
19
- from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunInfo
19
+ from cognee.modules.pipelines.models.PipelineRunInfo import (
20
+ PipelineRunCompleted,
21
+ PipelineRunInfo,
22
+ PipelineRunErrored,
23
+ )
20
24
  from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
21
25
  get_from_queue,
22
26
  initialize_queue,
@@ -105,6 +109,9 @@ def get_cognify_router() -> APIRouter:
105
109
  datasets, user, run_in_background=payload.run_in_background
106
110
  )
107
111
 
112
+ # If any cognify run errored return JSONResponse with proper error status code
113
+ if any(isinstance(v, PipelineRunErrored) for v in cognify_run.values()):
114
+ return JSONResponse(status_code=420, content=cognify_run)
108
115
  return cognify_run
109
116
  except Exception as error:
110
117
  return JSONResponse(status_code=409, content={"error": str(error)})
@@ -7,7 +7,9 @@ from cognee.modules.cognify.config import get_cognify_config
7
7
  from cognee.infrastructure.data.chunking.config import get_chunk_config
8
8
  from cognee.infrastructure.databases.vector import get_vectordb_config
9
9
  from cognee.infrastructure.databases.graph.config import get_graph_config
10
- from cognee.infrastructure.llm.config import get_llm_config
10
+ from cognee.infrastructure.llm.config import (
11
+ get_llm_config,
12
+ )
11
13
  from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
12
14
 
13
15
 
@@ -283,14 +283,8 @@ def get_datasets_router() -> APIRouter:
283
283
  - **404 Not Found**: Dataset doesn't exist or user doesn't have access
284
284
  - **500 Internal Server Error**: Error retrieving graph data
285
285
  """
286
- from cognee.modules.data.methods import get_dataset
287
-
288
- dataset = await get_dataset(user.id, dataset_id)
289
-
290
- if dataset is None:
291
- raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
292
286
 
293
- graph_data = await get_formatted_graph_data(dataset.id, user.id)
287
+ graph_data = await get_formatted_graph_data(dataset_id, user.id)
294
288
 
295
289
  return graph_data
296
290
 
@@ -16,7 +16,11 @@ from cognee.modules.users.methods import get_default_user
16
16
  from cognee.modules.data.methods import get_authorized_existing_datasets
17
17
  from cognee.context_global_variables import set_database_global_context_variables
18
18
 
19
- from .exceptions import DocumentNotFoundError, DatasetNotFoundError, DocumentSubgraphNotFoundError
19
+ from cognee.api.v1.delete.exceptions import (
20
+ DocumentNotFoundError,
21
+ DatasetNotFoundError,
22
+ DocumentSubgraphNotFoundError,
23
+ )
20
24
 
21
25
  logger = get_logger()
22
26
 
@@ -82,17 +86,17 @@ async def delete(
82
86
  raise DocumentNotFoundError(f"Data {data_id} not found in dataset {dataset_id}")
83
87
 
84
88
  # Get the content hash for deletion
85
- content_hash = data_point.content_hash
89
+ data_id = str(data_point.id)
86
90
 
87
91
  # Use the existing comprehensive deletion logic
88
- return await delete_single_document(content_hash, dataset.id, mode)
92
+ return await delete_single_document(data_id, dataset.id, mode)
89
93
 
90
94
 
91
- async def delete_single_document(content_hash: str, dataset_id: UUID = None, mode: str = "soft"):
95
+ async def delete_single_document(data_id: str, dataset_id: UUID = None, mode: str = "soft"):
92
96
  """Delete a single document by its content hash."""
93
97
 
94
98
  # Delete from graph database
95
- deletion_result = await delete_document_subgraph(content_hash, mode)
99
+ deletion_result = await delete_document_subgraph(data_id, mode)
96
100
 
97
101
  logger.info(f"Deletion result: {deletion_result}")
98
102
 
@@ -163,12 +167,12 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
163
167
 
164
168
  # Get the data point
165
169
  data_point = (
166
- await session.execute(select(Data).filter(Data.content_hash == content_hash))
170
+ await session.execute(select(Data).filter(Data.id == UUID(data_id)))
167
171
  ).scalar_one_or_none()
168
172
 
169
173
  if data_point is None:
170
174
  raise DocumentNotFoundError(
171
- f"Document not found in relational DB with content hash: {content_hash}"
175
+ f"Document not found in relational DB with data id: {data_id}"
172
176
  )
173
177
 
174
178
  doc_id = data_point.id
@@ -203,7 +207,7 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
203
207
  "status": "success",
204
208
  "message": "Document deleted from both graph and relational databases",
205
209
  "graph_deletions": deletion_result["deleted_counts"],
206
- "content_hash": content_hash,
210
+ "data_id": data_id,
207
211
  "dataset": dataset_id,
208
212
  "deleted_node_ids": [
209
213
  str(node_id) for node_id in deleted_node_ids
@@ -211,12 +215,12 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
211
215
  }
212
216
 
213
217
 
214
- async def delete_document_subgraph(content_hash: str, mode: str = "soft"):
218
+ async def delete_document_subgraph(document_id: str, mode: str = "soft"):
215
219
  """Delete a document and all its related nodes in the correct order."""
216
220
  graph_db = await get_graph_engine()
217
- subgraph = await graph_db.get_document_subgraph(content_hash)
221
+ subgraph = await graph_db.get_document_subgraph(document_id)
218
222
  if not subgraph:
219
- raise DocumentSubgraphNotFoundError(f"Document not found with content hash: {content_hash}")
223
+ raise DocumentSubgraphNotFoundError(f"Document not found with id: {document_id}")
220
224
 
221
225
  # Delete in the correct order to maintain graph integrity
222
226
  deletion_order = [
@@ -260,6 +264,6 @@ async def delete_document_subgraph(content_hash: str, mode: str = "soft"):
260
264
  return {
261
265
  "status": "success",
262
266
  "deleted_counts": deleted_counts,
263
- "content_hash": content_hash,
267
+ "document_id": document_id,
264
268
  "deleted_node_ids": deleted_node_ids,
265
269
  }
@@ -17,7 +17,9 @@ from cognee.api.v1.responses.models import (
17
17
  )
18
18
  from cognee.api.v1.responses.dispatch_function import dispatch_function
19
19
  from cognee.api.v1.responses.default_tools import DEFAULT_TOOLS
20
- from cognee.infrastructure.llm.config import get_llm_config
20
+ from cognee.infrastructure.llm.config import (
21
+ get_llm_config,
22
+ )
21
23
  from cognee.modules.users.models import User
22
24
  from cognee.modules.users.methods import get_authenticated_user
23
25