cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/client.py
CHANGED
|
@@ -16,6 +16,7 @@ from fastapi.openapi.utils import get_openapi
|
|
|
16
16
|
|
|
17
17
|
from cognee.exceptions import CogneeApiError
|
|
18
18
|
from cognee.shared.logging_utils import get_logger, setup_logging
|
|
19
|
+
from cognee.api.health import health_checker, HealthStatus
|
|
19
20
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
|
20
21
|
from cognee.api.v1.settings.routers import get_settings_router
|
|
21
22
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
|
@@ -74,7 +75,9 @@ if CORS_ALLOWED_ORIGINS:
|
|
|
74
75
|
origin.strip() for origin in CORS_ALLOWED_ORIGINS.split(",") if origin.strip()
|
|
75
76
|
]
|
|
76
77
|
else:
|
|
77
|
-
allowed_origins = [
|
|
78
|
+
allowed_origins = [
|
|
79
|
+
"http://localhost:3000",
|
|
80
|
+
] # Block all except explicitly set origins
|
|
78
81
|
|
|
79
82
|
app.add_middleware(
|
|
80
83
|
CORSMiddleware,
|
|
@@ -159,11 +162,48 @@ async def root():
|
|
|
159
162
|
|
|
160
163
|
|
|
161
164
|
@app.get("/health")
|
|
162
|
-
def health_check():
|
|
165
|
+
async def health_check():
|
|
166
|
+
"""
|
|
167
|
+
Health check endpoint for liveness/readiness probes.
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
health_status = await health_checker.get_health_status(detailed=False)
|
|
171
|
+
status_code = 503 if health_status.status == HealthStatus.UNHEALTHY else 200
|
|
172
|
+
|
|
173
|
+
return JSONResponse(
|
|
174
|
+
status_code=status_code,
|
|
175
|
+
content={
|
|
176
|
+
"status": "ready" if status_code == 200 else "not ready",
|
|
177
|
+
"health": health_status.status,
|
|
178
|
+
"version": health_status.version,
|
|
179
|
+
},
|
|
180
|
+
)
|
|
181
|
+
except Exception as e:
|
|
182
|
+
return JSONResponse(
|
|
183
|
+
status_code=503,
|
|
184
|
+
content={"status": "not ready", "reason": f"health check failed: {str(e)}"},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@app.get("/health/detailed")
|
|
189
|
+
async def detailed_health_check():
|
|
163
190
|
"""
|
|
164
|
-
|
|
191
|
+
Comprehensive health status with component details.
|
|
165
192
|
"""
|
|
166
|
-
|
|
193
|
+
try:
|
|
194
|
+
health_status = await health_checker.get_health_status(detailed=True)
|
|
195
|
+
status_code = 200
|
|
196
|
+
if health_status.status == HealthStatus.UNHEALTHY:
|
|
197
|
+
status_code = 503
|
|
198
|
+
elif health_status.status == HealthStatus.DEGRADED:
|
|
199
|
+
status_code = 200 # Degraded is still operational
|
|
200
|
+
|
|
201
|
+
return JSONResponse(status_code=status_code, content=health_status.model_dump())
|
|
202
|
+
except Exception as e:
|
|
203
|
+
return JSONResponse(
|
|
204
|
+
status_code=503,
|
|
205
|
+
content={"status": "unhealthy", "error": f"Health check system failure: {str(e)}"},
|
|
206
|
+
)
|
|
167
207
|
|
|
168
208
|
|
|
169
209
|
app.include_router(get_auth_router(), prefix="/api/v1/auth", tags=["auth"])
|
cognee/api/health.py
ADDED
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""Health check system for cognee API."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import asyncio
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Dict, Any, Optional
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
from cognee.version import get_cognee_version
|
|
11
|
+
from cognee.shared.logging_utils import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HealthStatus(str, Enum):
|
|
17
|
+
HEALTHY = "healthy"
|
|
18
|
+
DEGRADED = "degraded"
|
|
19
|
+
UNHEALTHY = "unhealthy"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ComponentHealth(BaseModel):
|
|
23
|
+
status: HealthStatus
|
|
24
|
+
provider: str
|
|
25
|
+
response_time_ms: int
|
|
26
|
+
details: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class HealthResponse(BaseModel):
|
|
30
|
+
status: HealthStatus
|
|
31
|
+
timestamp: str
|
|
32
|
+
version: str
|
|
33
|
+
uptime: int
|
|
34
|
+
components: Dict[str, ComponentHealth]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class HealthChecker:
|
|
38
|
+
def __init__(self):
|
|
39
|
+
self.start_time = time.time()
|
|
40
|
+
|
|
41
|
+
async def check_relational_db(self) -> ComponentHealth:
|
|
42
|
+
"""Check relational database health."""
|
|
43
|
+
start_time = time.time()
|
|
44
|
+
try:
|
|
45
|
+
from cognee.infrastructure.databases.relational.get_relational_engine import (
|
|
46
|
+
get_relational_engine,
|
|
47
|
+
)
|
|
48
|
+
from cognee.infrastructure.databases.relational.config import get_relational_config
|
|
49
|
+
|
|
50
|
+
config = get_relational_config()
|
|
51
|
+
engine = get_relational_engine()
|
|
52
|
+
|
|
53
|
+
# Test connection by creating a session
|
|
54
|
+
session = engine.get_session()
|
|
55
|
+
if session:
|
|
56
|
+
await session.close()
|
|
57
|
+
|
|
58
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
59
|
+
return ComponentHealth(
|
|
60
|
+
status=HealthStatus.HEALTHY,
|
|
61
|
+
provider=config.db_provider,
|
|
62
|
+
response_time_ms=response_time,
|
|
63
|
+
details="Connection successful",
|
|
64
|
+
)
|
|
65
|
+
except Exception as e:
|
|
66
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
67
|
+
logger.error(f"Relational DB health check failed: {str(e)}", exc_info=True)
|
|
68
|
+
return ComponentHealth(
|
|
69
|
+
status=HealthStatus.UNHEALTHY,
|
|
70
|
+
provider="unknown",
|
|
71
|
+
response_time_ms=response_time,
|
|
72
|
+
details=f"Connection failed: {str(e)}",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
async def check_vector_db(self) -> ComponentHealth:
|
|
76
|
+
"""Check vector database health."""
|
|
77
|
+
start_time = time.time()
|
|
78
|
+
try:
|
|
79
|
+
from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
|
|
80
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
81
|
+
|
|
82
|
+
config = get_vectordb_config()
|
|
83
|
+
engine = get_vector_engine()
|
|
84
|
+
|
|
85
|
+
# Test basic operation - just check if engine is accessible
|
|
86
|
+
if hasattr(engine, "health_check"):
|
|
87
|
+
await engine.health_check()
|
|
88
|
+
elif hasattr(engine, "list_tables"):
|
|
89
|
+
# For LanceDB and similar
|
|
90
|
+
engine.list_tables()
|
|
91
|
+
|
|
92
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
93
|
+
return ComponentHealth(
|
|
94
|
+
status=HealthStatus.HEALTHY,
|
|
95
|
+
provider=config.vector_db_provider,
|
|
96
|
+
response_time_ms=response_time,
|
|
97
|
+
details="Index accessible",
|
|
98
|
+
)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
101
|
+
logger.error(f"Vector DB health check failed: {str(e)}", exc_info=True)
|
|
102
|
+
return ComponentHealth(
|
|
103
|
+
status=HealthStatus.UNHEALTHY,
|
|
104
|
+
provider="unknown",
|
|
105
|
+
response_time_ms=response_time,
|
|
106
|
+
details=f"Connection failed: {str(e)}",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
async def check_graph_db(self) -> ComponentHealth:
|
|
110
|
+
"""Check graph database health."""
|
|
111
|
+
start_time = time.time()
|
|
112
|
+
try:
|
|
113
|
+
from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine
|
|
114
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
115
|
+
|
|
116
|
+
config = get_graph_config()
|
|
117
|
+
engine = await get_graph_engine()
|
|
118
|
+
|
|
119
|
+
# Test basic operation with actual graph query
|
|
120
|
+
if hasattr(engine, "execute"):
|
|
121
|
+
# For SQL-like graph DBs (Neo4j, Memgraph)
|
|
122
|
+
await engine.execute("MATCH () RETURN count(*) LIMIT 1")
|
|
123
|
+
elif hasattr(engine, "query"):
|
|
124
|
+
# For other graph engines
|
|
125
|
+
engine.query("MATCH () RETURN count(*) LIMIT 1", {})
|
|
126
|
+
# If engine exists but no test method, consider it healthy
|
|
127
|
+
|
|
128
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
129
|
+
return ComponentHealth(
|
|
130
|
+
status=HealthStatus.HEALTHY,
|
|
131
|
+
provider=config.graph_database_provider,
|
|
132
|
+
response_time_ms=response_time,
|
|
133
|
+
details="Schema validated",
|
|
134
|
+
)
|
|
135
|
+
except Exception as e:
|
|
136
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
137
|
+
logger.error(f"Graph DB health check failed: {str(e)}", exc_info=True)
|
|
138
|
+
return ComponentHealth(
|
|
139
|
+
status=HealthStatus.UNHEALTHY,
|
|
140
|
+
provider="unknown",
|
|
141
|
+
response_time_ms=response_time,
|
|
142
|
+
details=f"Connection failed: {str(e)}",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
async def check_file_storage(self) -> ComponentHealth:
|
|
146
|
+
"""Check file storage health."""
|
|
147
|
+
start_time = time.time()
|
|
148
|
+
try:
|
|
149
|
+
import os
|
|
150
|
+
from cognee.infrastructure.files.storage.get_file_storage import get_file_storage
|
|
151
|
+
from cognee.base_config import get_base_config
|
|
152
|
+
|
|
153
|
+
base_config = get_base_config()
|
|
154
|
+
storage = get_file_storage(base_config.data_root_directory)
|
|
155
|
+
|
|
156
|
+
# Determine provider
|
|
157
|
+
provider = "s3" if base_config.data_root_directory.startswith("s3://") else "local"
|
|
158
|
+
|
|
159
|
+
# Test storage accessibility - for local storage, just check directory exists
|
|
160
|
+
if provider == "local":
|
|
161
|
+
os.makedirs(base_config.data_root_directory, exist_ok=True)
|
|
162
|
+
# Simple write/read test
|
|
163
|
+
test_file = os.path.join(base_config.data_root_directory, "health_check_test")
|
|
164
|
+
with open(test_file, "w") as f:
|
|
165
|
+
f.write("test")
|
|
166
|
+
os.remove(test_file)
|
|
167
|
+
else:
|
|
168
|
+
# For S3, test basic operations
|
|
169
|
+
test_path = "health_check_test"
|
|
170
|
+
await storage.store(test_path, b"test")
|
|
171
|
+
await storage.delete(test_path)
|
|
172
|
+
|
|
173
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
174
|
+
return ComponentHealth(
|
|
175
|
+
status=HealthStatus.HEALTHY,
|
|
176
|
+
provider=provider,
|
|
177
|
+
response_time_ms=response_time,
|
|
178
|
+
details="Storage accessible",
|
|
179
|
+
)
|
|
180
|
+
except Exception as e:
|
|
181
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
182
|
+
return ComponentHealth(
|
|
183
|
+
status=HealthStatus.UNHEALTHY,
|
|
184
|
+
provider="unknown",
|
|
185
|
+
response_time_ms=response_time,
|
|
186
|
+
details=f"Storage test failed: {str(e)}",
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
async def check_llm_provider(self) -> ComponentHealth:
|
|
190
|
+
"""Check LLM provider health (non-critical)."""
|
|
191
|
+
start_time = time.time()
|
|
192
|
+
try:
|
|
193
|
+
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
194
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
195
|
+
|
|
196
|
+
config = get_llm_config()
|
|
197
|
+
|
|
198
|
+
# Test actual API connection with minimal request
|
|
199
|
+
client = get_llm_client()
|
|
200
|
+
await client.show_prompt("test", "test")
|
|
201
|
+
|
|
202
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
203
|
+
return ComponentHealth(
|
|
204
|
+
status=HealthStatus.HEALTHY,
|
|
205
|
+
provider=config.llm_provider,
|
|
206
|
+
response_time_ms=response_time,
|
|
207
|
+
details="API responding",
|
|
208
|
+
)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
211
|
+
logger.error(f"LLM provider health check failed: {str(e)}", exc_info=True)
|
|
212
|
+
return ComponentHealth(
|
|
213
|
+
status=HealthStatus.DEGRADED,
|
|
214
|
+
provider="unknown",
|
|
215
|
+
response_time_ms=response_time,
|
|
216
|
+
details=f"API check failed: {str(e)}",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
async def check_embedding_service(self) -> ComponentHealth:
|
|
220
|
+
"""Check embedding service health (non-critical)."""
|
|
221
|
+
start_time = time.time()
|
|
222
|
+
try:
|
|
223
|
+
from cognee.infrastructure.databases.vector.embeddings.get_embedding_engine import (
|
|
224
|
+
get_embedding_engine,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Test actual embedding generation with minimal text
|
|
228
|
+
engine = get_embedding_engine()
|
|
229
|
+
await engine.embed_text("test")
|
|
230
|
+
|
|
231
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
232
|
+
return ComponentHealth(
|
|
233
|
+
status=HealthStatus.HEALTHY,
|
|
234
|
+
provider="configured",
|
|
235
|
+
response_time_ms=response_time,
|
|
236
|
+
details="Embedding generation working",
|
|
237
|
+
)
|
|
238
|
+
except Exception as e:
|
|
239
|
+
response_time = int((time.time() - start_time) * 1000)
|
|
240
|
+
return ComponentHealth(
|
|
241
|
+
status=HealthStatus.DEGRADED,
|
|
242
|
+
provider="unknown",
|
|
243
|
+
response_time_ms=response_time,
|
|
244
|
+
details=f"Embedding test failed: {str(e)}",
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
async def get_health_status(self, detailed: bool = False) -> HealthResponse:
|
|
248
|
+
"""Get comprehensive health status."""
|
|
249
|
+
components = {}
|
|
250
|
+
|
|
251
|
+
# Critical services
|
|
252
|
+
critical_components = [
|
|
253
|
+
"relational_db",
|
|
254
|
+
"vector_db",
|
|
255
|
+
"graph_db",
|
|
256
|
+
"file_storage",
|
|
257
|
+
"llm_provider",
|
|
258
|
+
"embedding_service",
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
critical_checks = [
|
|
262
|
+
("relational_db", self.check_relational_db()),
|
|
263
|
+
("vector_db", self.check_vector_db()),
|
|
264
|
+
("graph_db", self.check_graph_db()),
|
|
265
|
+
("file_storage", self.check_file_storage()),
|
|
266
|
+
("llm_provider", self.check_llm_provider()),
|
|
267
|
+
("embedding_service", self.check_embedding_service()),
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
# Non-critical services (only for detailed checks)
|
|
271
|
+
non_critical_checks = []
|
|
272
|
+
|
|
273
|
+
# Run critical checks
|
|
274
|
+
critical_results = await asyncio.gather(
|
|
275
|
+
*[check for _, check in critical_checks], return_exceptions=True
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
for (name, _), result in zip(critical_checks, critical_results):
|
|
279
|
+
if isinstance(result, Exception):
|
|
280
|
+
components[name] = ComponentHealth(
|
|
281
|
+
status=HealthStatus.UNHEALTHY,
|
|
282
|
+
provider="unknown",
|
|
283
|
+
response_time_ms=0,
|
|
284
|
+
details=f"Health check failed: {str(result)}",
|
|
285
|
+
)
|
|
286
|
+
else:
|
|
287
|
+
components[name] = result
|
|
288
|
+
|
|
289
|
+
# Run non-critical checks if detailed (currently none)
|
|
290
|
+
if detailed and non_critical_checks:
|
|
291
|
+
non_critical_results = await asyncio.gather(
|
|
292
|
+
*[check for _, check in non_critical_checks], return_exceptions=True
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
for (name, _), result in zip(non_critical_checks, non_critical_results):
|
|
296
|
+
if isinstance(result, Exception):
|
|
297
|
+
components[name] = ComponentHealth(
|
|
298
|
+
status=HealthStatus.DEGRADED,
|
|
299
|
+
provider="unknown",
|
|
300
|
+
response_time_ms=0,
|
|
301
|
+
details=f"Health check failed: {str(result)}",
|
|
302
|
+
)
|
|
303
|
+
else:
|
|
304
|
+
components[name] = result
|
|
305
|
+
|
|
306
|
+
# Determine overall status
|
|
307
|
+
critical_unhealthy = any(
|
|
308
|
+
comp.status == HealthStatus.UNHEALTHY
|
|
309
|
+
for name, comp in components.items()
|
|
310
|
+
if name in critical_components
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())
|
|
314
|
+
|
|
315
|
+
if critical_unhealthy:
|
|
316
|
+
overall_status = HealthStatus.UNHEALTHY
|
|
317
|
+
elif has_degraded:
|
|
318
|
+
overall_status = HealthStatus.DEGRADED
|
|
319
|
+
else:
|
|
320
|
+
overall_status = HealthStatus.HEALTHY
|
|
321
|
+
|
|
322
|
+
return HealthResponse(
|
|
323
|
+
status=overall_status,
|
|
324
|
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
|
325
|
+
version=get_cognee_version(),
|
|
326
|
+
uptime=int(time.time() - self.start_time),
|
|
327
|
+
components=components,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# Global health checker instance
|
|
332
|
+
health_checker = HealthChecker()
|
cognee/api/v1/add/add.py
CHANGED
|
@@ -15,6 +15,8 @@ async def add(
|
|
|
15
15
|
vector_db_config: dict = None,
|
|
16
16
|
graph_db_config: dict = None,
|
|
17
17
|
dataset_id: Optional[UUID] = None,
|
|
18
|
+
preferred_loaders: List[str] = None,
|
|
19
|
+
incremental_loading: bool = True,
|
|
18
20
|
):
|
|
19
21
|
"""
|
|
20
22
|
Add data to Cognee for knowledge graph processing.
|
|
@@ -129,7 +131,7 @@ async def add(
|
|
|
129
131
|
- LLM_MODEL: Model name (default: "gpt-4o-mini")
|
|
130
132
|
- DEFAULT_USER_EMAIL: Custom default user email
|
|
131
133
|
- DEFAULT_USER_PASSWORD: Custom default user password
|
|
132
|
-
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "
|
|
134
|
+
- VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
|
|
133
135
|
- GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
|
|
134
136
|
|
|
135
137
|
Raises:
|
|
@@ -140,7 +142,7 @@ async def add(
|
|
|
140
142
|
"""
|
|
141
143
|
tasks = [
|
|
142
144
|
Task(resolve_data_directories, include_subdirectories=True),
|
|
143
|
-
Task(ingest_data, dataset_name, user, node_set, dataset_id),
|
|
145
|
+
Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
|
|
144
146
|
]
|
|
145
147
|
|
|
146
148
|
pipeline_run_info = None
|
|
@@ -153,6 +155,7 @@ async def add(
|
|
|
153
155
|
pipeline_name="add_pipeline",
|
|
154
156
|
vector_db_config=vector_db_config,
|
|
155
157
|
graph_db_config=graph_db_config,
|
|
158
|
+
incremental_loading=incremental_loading,
|
|
156
159
|
):
|
|
157
160
|
pipeline_run_info = run_info
|
|
158
161
|
|
|
@@ -11,6 +11,7 @@ from typing import List, Optional, Union, Literal
|
|
|
11
11
|
from cognee.modules.users.models import User
|
|
12
12
|
from cognee.modules.users.methods import get_authenticated_user
|
|
13
13
|
from cognee.shared.utils import send_telemetry
|
|
14
|
+
from cognee.modules.pipelines.models import PipelineRunErrored
|
|
14
15
|
from cognee.shared.logging_utils import get_logger
|
|
15
16
|
|
|
16
17
|
logger = get_logger()
|
|
@@ -100,6 +101,8 @@ def get_add_router() -> APIRouter:
|
|
|
100
101
|
else:
|
|
101
102
|
add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
|
|
102
103
|
|
|
104
|
+
if isinstance(add_run, PipelineRunErrored):
|
|
105
|
+
return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
|
|
103
106
|
return add_run.model_dump()
|
|
104
107
|
except Exception as error:
|
|
105
108
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
|
@@ -79,7 +79,9 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
|
|
|
79
79
|
async for run_status in non_code_pipeline_run:
|
|
80
80
|
yield run_status
|
|
81
81
|
|
|
82
|
-
async for run_status in run_tasks(
|
|
82
|
+
async for run_status in run_tasks(
|
|
83
|
+
tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
|
|
84
|
+
):
|
|
83
85
|
yield run_status
|
|
84
86
|
|
|
85
87
|
|
cognee/api/v1/cognify/cognify.py
CHANGED
|
@@ -39,6 +39,7 @@ async def cognify(
|
|
|
39
39
|
vector_db_config: dict = None,
|
|
40
40
|
graph_db_config: dict = None,
|
|
41
41
|
run_in_background: bool = False,
|
|
42
|
+
incremental_loading: bool = True,
|
|
42
43
|
):
|
|
43
44
|
"""
|
|
44
45
|
Transform ingested data into a structured knowledge graph.
|
|
@@ -194,6 +195,7 @@ async def cognify(
|
|
|
194
195
|
datasets=datasets,
|
|
195
196
|
vector_db_config=vector_db_config,
|
|
196
197
|
graph_db_config=graph_db_config,
|
|
198
|
+
incremental_loading=incremental_loading,
|
|
197
199
|
)
|
|
198
200
|
else:
|
|
199
201
|
return await run_cognify_blocking(
|
|
@@ -202,6 +204,7 @@ async def cognify(
|
|
|
202
204
|
datasets=datasets,
|
|
203
205
|
vector_db_config=vector_db_config,
|
|
204
206
|
graph_db_config=graph_db_config,
|
|
207
|
+
incremental_loading=incremental_loading,
|
|
205
208
|
)
|
|
206
209
|
|
|
207
210
|
|
|
@@ -211,6 +214,7 @@ async def run_cognify_blocking(
|
|
|
211
214
|
datasets,
|
|
212
215
|
graph_db_config: dict = None,
|
|
213
216
|
vector_db_config: dict = False,
|
|
217
|
+
incremental_loading: bool = True,
|
|
214
218
|
):
|
|
215
219
|
total_run_info = {}
|
|
216
220
|
|
|
@@ -221,6 +225,7 @@ async def run_cognify_blocking(
|
|
|
221
225
|
pipeline_name="cognify_pipeline",
|
|
222
226
|
graph_db_config=graph_db_config,
|
|
223
227
|
vector_db_config=vector_db_config,
|
|
228
|
+
incremental_loading=incremental_loading,
|
|
224
229
|
):
|
|
225
230
|
if run_info.dataset_id:
|
|
226
231
|
total_run_info[run_info.dataset_id] = run_info
|
|
@@ -236,6 +241,7 @@ async def run_cognify_as_background_process(
|
|
|
236
241
|
datasets,
|
|
237
242
|
graph_db_config: dict = None,
|
|
238
243
|
vector_db_config: dict = False,
|
|
244
|
+
incremental_loading: bool = True,
|
|
239
245
|
):
|
|
240
246
|
# Convert dataset to list if it's a string
|
|
241
247
|
if isinstance(datasets, str):
|
|
@@ -246,6 +252,7 @@ async def run_cognify_as_background_process(
|
|
|
246
252
|
|
|
247
253
|
async def handle_rest_of_the_run(pipeline_list):
|
|
248
254
|
# Execute all provided pipelines one by one to avoid database write conflicts
|
|
255
|
+
# TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
|
|
249
256
|
for pipeline in pipeline_list:
|
|
250
257
|
while True:
|
|
251
258
|
try:
|
|
@@ -270,6 +277,7 @@ async def run_cognify_as_background_process(
|
|
|
270
277
|
pipeline_name="cognify_pipeline",
|
|
271
278
|
graph_db_config=graph_db_config,
|
|
272
279
|
vector_db_config=vector_db_config,
|
|
280
|
+
incremental_loading=incremental_loading,
|
|
273
281
|
)
|
|
274
282
|
|
|
275
283
|
# Save dataset Pipeline run started info
|
|
@@ -16,7 +16,11 @@ from cognee.modules.graph.methods import get_formatted_graph_data
|
|
|
16
16
|
from cognee.modules.users.get_user_manager import get_user_manager_context
|
|
17
17
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
18
18
|
from cognee.modules.users.authentication.default.default_jwt_strategy import DefaultJWTStrategy
|
|
19
|
-
from cognee.modules.pipelines.models.PipelineRunInfo import
|
|
19
|
+
from cognee.modules.pipelines.models.PipelineRunInfo import (
|
|
20
|
+
PipelineRunCompleted,
|
|
21
|
+
PipelineRunInfo,
|
|
22
|
+
PipelineRunErrored,
|
|
23
|
+
)
|
|
20
24
|
from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
|
|
21
25
|
get_from_queue,
|
|
22
26
|
initialize_queue,
|
|
@@ -105,6 +109,9 @@ def get_cognify_router() -> APIRouter:
|
|
|
105
109
|
datasets, user, run_in_background=payload.run_in_background
|
|
106
110
|
)
|
|
107
111
|
|
|
112
|
+
# If any cognify run errored return JSONResponse with proper error status code
|
|
113
|
+
if any(isinstance(v, PipelineRunErrored) for v in cognify_run.values()):
|
|
114
|
+
return JSONResponse(status_code=420, content=cognify_run)
|
|
108
115
|
return cognify_run
|
|
109
116
|
except Exception as error:
|
|
110
117
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
cognee/api/v1/config/config.py
CHANGED
|
@@ -7,7 +7,9 @@ from cognee.modules.cognify.config import get_cognify_config
|
|
|
7
7
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
|
8
8
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
9
9
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
10
|
-
from cognee.infrastructure.llm.config import
|
|
10
|
+
from cognee.infrastructure.llm.config import (
|
|
11
|
+
get_llm_config,
|
|
12
|
+
)
|
|
11
13
|
from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
|
|
12
14
|
|
|
13
15
|
|
|
@@ -283,14 +283,8 @@ def get_datasets_router() -> APIRouter:
|
|
|
283
283
|
- **404 Not Found**: Dataset doesn't exist or user doesn't have access
|
|
284
284
|
- **500 Internal Server Error**: Error retrieving graph data
|
|
285
285
|
"""
|
|
286
|
-
from cognee.modules.data.methods import get_dataset
|
|
287
|
-
|
|
288
|
-
dataset = await get_dataset(user.id, dataset_id)
|
|
289
|
-
|
|
290
|
-
if dataset is None:
|
|
291
|
-
raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
|
|
292
286
|
|
|
293
|
-
graph_data = await get_formatted_graph_data(
|
|
287
|
+
graph_data = await get_formatted_graph_data(dataset_id, user.id)
|
|
294
288
|
|
|
295
289
|
return graph_data
|
|
296
290
|
|
|
@@ -353,7 +347,7 @@ def get_datasets_router() -> APIRouter:
|
|
|
353
347
|
|
|
354
348
|
@router.get("/status", response_model=dict[str, PipelineRunStatus])
|
|
355
349
|
async def get_dataset_status(
|
|
356
|
-
datasets: Annotated[List[UUID], Query(alias="dataset")] =
|
|
350
|
+
datasets: Annotated[List[UUID], Query(alias="dataset")] = [],
|
|
357
351
|
user: User = Depends(get_authenticated_user),
|
|
358
352
|
):
|
|
359
353
|
"""
|
cognee/api/v1/delete/delete.py
CHANGED
|
@@ -16,7 +16,11 @@ from cognee.modules.users.methods import get_default_user
|
|
|
16
16
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
|
17
17
|
from cognee.context_global_variables import set_database_global_context_variables
|
|
18
18
|
|
|
19
|
-
from .exceptions import
|
|
19
|
+
from cognee.api.v1.delete.exceptions import (
|
|
20
|
+
DocumentNotFoundError,
|
|
21
|
+
DatasetNotFoundError,
|
|
22
|
+
DocumentSubgraphNotFoundError,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
logger = get_logger()
|
|
22
26
|
|
|
@@ -82,17 +86,17 @@ async def delete(
|
|
|
82
86
|
raise DocumentNotFoundError(f"Data {data_id} not found in dataset {dataset_id}")
|
|
83
87
|
|
|
84
88
|
# Get the content hash for deletion
|
|
85
|
-
|
|
89
|
+
data_id = str(data_point.id)
|
|
86
90
|
|
|
87
91
|
# Use the existing comprehensive deletion logic
|
|
88
|
-
return await delete_single_document(
|
|
92
|
+
return await delete_single_document(data_id, dataset.id, mode)
|
|
89
93
|
|
|
90
94
|
|
|
91
|
-
async def delete_single_document(
|
|
95
|
+
async def delete_single_document(data_id: str, dataset_id: UUID = None, mode: str = "soft"):
|
|
92
96
|
"""Delete a single document by its content hash."""
|
|
93
97
|
|
|
94
98
|
# Delete from graph database
|
|
95
|
-
deletion_result = await delete_document_subgraph(
|
|
99
|
+
deletion_result = await delete_document_subgraph(data_id, mode)
|
|
96
100
|
|
|
97
101
|
logger.info(f"Deletion result: {deletion_result}")
|
|
98
102
|
|
|
@@ -163,12 +167,12 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
|
|
|
163
167
|
|
|
164
168
|
# Get the data point
|
|
165
169
|
data_point = (
|
|
166
|
-
await session.execute(select(Data).filter(Data.
|
|
170
|
+
await session.execute(select(Data).filter(Data.id == UUID(data_id)))
|
|
167
171
|
).scalar_one_or_none()
|
|
168
172
|
|
|
169
173
|
if data_point is None:
|
|
170
174
|
raise DocumentNotFoundError(
|
|
171
|
-
f"Document not found in relational DB with
|
|
175
|
+
f"Document not found in relational DB with data id: {data_id}"
|
|
172
176
|
)
|
|
173
177
|
|
|
174
178
|
doc_id = data_point.id
|
|
@@ -203,7 +207,7 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
|
|
|
203
207
|
"status": "success",
|
|
204
208
|
"message": "Document deleted from both graph and relational databases",
|
|
205
209
|
"graph_deletions": deletion_result["deleted_counts"],
|
|
206
|
-
"
|
|
210
|
+
"data_id": data_id,
|
|
207
211
|
"dataset": dataset_id,
|
|
208
212
|
"deleted_node_ids": [
|
|
209
213
|
str(node_id) for node_id in deleted_node_ids
|
|
@@ -211,12 +215,12 @@ async def delete_single_document(content_hash: str, dataset_id: UUID = None, mod
|
|
|
211
215
|
}
|
|
212
216
|
|
|
213
217
|
|
|
214
|
-
async def delete_document_subgraph(
|
|
218
|
+
async def delete_document_subgraph(document_id: str, mode: str = "soft"):
|
|
215
219
|
"""Delete a document and all its related nodes in the correct order."""
|
|
216
220
|
graph_db = await get_graph_engine()
|
|
217
|
-
subgraph = await graph_db.get_document_subgraph(
|
|
221
|
+
subgraph = await graph_db.get_document_subgraph(document_id)
|
|
218
222
|
if not subgraph:
|
|
219
|
-
raise DocumentSubgraphNotFoundError(f"Document not found with
|
|
223
|
+
raise DocumentSubgraphNotFoundError(f"Document not found with id: {document_id}")
|
|
220
224
|
|
|
221
225
|
# Delete in the correct order to maintain graph integrity
|
|
222
226
|
deletion_order = [
|
|
@@ -260,6 +264,6 @@ async def delete_document_subgraph(content_hash: str, mode: str = "soft"):
|
|
|
260
264
|
return {
|
|
261
265
|
"status": "success",
|
|
262
266
|
"deleted_counts": deleted_counts,
|
|
263
|
-
"
|
|
267
|
+
"document_id": document_id,
|
|
264
268
|
"deleted_node_ids": deleted_node_ids,
|
|
265
269
|
}
|
|
@@ -17,7 +17,9 @@ from cognee.api.v1.responses.models import (
|
|
|
17
17
|
)
|
|
18
18
|
from cognee.api.v1.responses.dispatch_function import dispatch_function
|
|
19
19
|
from cognee.api.v1.responses.default_tools import DEFAULT_TOOLS
|
|
20
|
-
from cognee.infrastructure.llm.config import
|
|
20
|
+
from cognee.infrastructure.llm.config import (
|
|
21
|
+
get_llm_config,
|
|
22
|
+
)
|
|
21
23
|
from cognee.modules.users.models import User
|
|
22
24
|
from cognee.modules.users.methods import get_authenticated_user
|
|
23
25
|
|