cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__main__.py +4 -0
- cognee/api/v1/add/add.py +18 -11
- cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
- cognee/api/v1/cognify/cognify.py +22 -115
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/config/config.py +5 -13
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
- cognee/api/v1/delete/delete.py +1 -1
- cognee/api/v1/exceptions/__init__.py +13 -0
- cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/search.py +6 -7
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +180 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/exceptions/__init__.py +5 -5
- cognee/exceptions/exceptions.py +37 -17
- cognee/infrastructure/data/exceptions/__init__.py +7 -0
- cognee/infrastructure/data/exceptions/exceptions.py +22 -0
- cognee/infrastructure/data/utils/extract_keywords.py +3 -3
- cognee/infrastructure/databases/exceptions/__init__.py +3 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
- cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +14 -5
- cognee/infrastructure/llm/config.py +5 -5
- cognee/infrastructure/llm/exceptions.py +30 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/modules/data/exceptions/exceptions.py +18 -5
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/delete_data.py +2 -4
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
- cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
- cognee/modules/graph/exceptions/__init__.py +2 -0
- cognee/modules/graph/exceptions/exceptions.py +25 -3
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/ingestion/exceptions/exceptions.py +2 -2
- cognee/modules/ontology/exceptions/exceptions.py +4 -4
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/exceptions.py +2 -2
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +23 -138
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/exceptions/exceptions.py +12 -6
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
- cognee/modules/retrieval/graph_completion_retriever.py +89 -5
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/exceptions/__init__.py +7 -0
- cognee/modules/search/exceptions/exceptions.py +15 -0
- cognee/modules/search/methods/search.py +47 -7
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/users/exceptions/exceptions.py +6 -6
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/exceptions/exceptions.py +2 -2
- cognee/shared/logging_utils.py +142 -31
- cognee/shared/utils.py +0 -1
- cognee/tasks/completion/exceptions/exceptions.py +3 -3
- cognee/tasks/documents/classify_documents.py +4 -0
- cognee/tasks/documents/exceptions/__init__.py +11 -0
- cognee/tasks/documents/exceptions/exceptions.py +36 -0
- cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
- cognee/tasks/graph/exceptions/__init__.py +12 -0
- cognee/tasks/graph/exceptions/exceptions.py +41 -0
- cognee/tasks/graph/extract_graph_from_data.py +34 -2
- cognee/tasks/ingestion/exceptions/__init__.py +8 -0
- cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
- cognee/tasks/ingestion/resolve_data_directories.py +5 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
- cognee/tasks/storage/add_data_points.py +41 -3
- cognee/tasks/storage/exceptions/__init__.py +9 -0
- cognee/tasks/storage/exceptions/exceptions.py +13 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/summarization/exceptions/__init__.py +9 -0
- cognee/tasks/summarization/exceptions/exceptions.py +14 -0
- cognee/tasks/summarization/summarize_text.py +8 -1
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/test_delete_by_id.py +1 -1
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_search_db.py +126 -7
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
- cognee/tests/unit/modules/search/search_methods_test.py +4 -2
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
- cognee-0.2.4.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
- cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/infrastructure/pipeline/models/__init__.py +0 -0
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
|
|
|
36
36
|
class CodeFile(DataPoint):
|
|
37
37
|
name: str
|
|
38
38
|
file_path: str
|
|
39
|
+
language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
|
|
39
40
|
source_code: Optional[str] = None
|
|
40
41
|
part_of: Optional[Repository] = None
|
|
41
42
|
depends_on: Optional[List["ImportStatement"]] = []
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeValidationError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class IngestionError(
|
|
5
|
+
class IngestionError(CogneeValidationError):
|
|
6
6
|
def __init__(
|
|
7
7
|
self,
|
|
8
8
|
message: str = "Failed to load data.",
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -15,14 +15,43 @@ from typing import Protocol
|
|
|
15
15
|
# Configure external library logging
|
|
16
16
|
def configure_external_library_logging():
|
|
17
17
|
"""Configure logging for external libraries to reduce verbosity"""
|
|
18
|
+
# Set environment variables to suppress LiteLLM logging
|
|
19
|
+
os.environ.setdefault("LITELLM_LOG", "ERROR")
|
|
20
|
+
os.environ.setdefault("LITELLM_SET_VERBOSE", "False")
|
|
21
|
+
|
|
18
22
|
# Configure LiteLLM logging to reduce verbosity
|
|
19
23
|
try:
|
|
20
24
|
import litellm
|
|
21
25
|
|
|
26
|
+
# Disable verbose logging
|
|
22
27
|
litellm.set_verbose = False
|
|
23
28
|
|
|
24
|
-
#
|
|
25
|
-
|
|
29
|
+
# Set additional LiteLLM configuration
|
|
30
|
+
if hasattr(litellm, "suppress_debug_info"):
|
|
31
|
+
litellm.suppress_debug_info = True
|
|
32
|
+
if hasattr(litellm, "turn_off_message"):
|
|
33
|
+
litellm.turn_off_message = True
|
|
34
|
+
if hasattr(litellm, "_turn_on_debug"):
|
|
35
|
+
litellm._turn_on_debug = False
|
|
36
|
+
|
|
37
|
+
# Comprehensive logger suppression
|
|
38
|
+
loggers_to_suppress = [
|
|
39
|
+
"litellm",
|
|
40
|
+
"litellm.litellm_core_utils.logging_worker",
|
|
41
|
+
"litellm.litellm_core_utils",
|
|
42
|
+
"litellm.proxy",
|
|
43
|
+
"litellm.router",
|
|
44
|
+
"openai._base_client",
|
|
45
|
+
"LiteLLM", # Capital case variant
|
|
46
|
+
"LiteLLM.core",
|
|
47
|
+
"LiteLLM.logging_worker",
|
|
48
|
+
"litellm.logging_worker",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
for logger_name in loggers_to_suppress:
|
|
52
|
+
logging.getLogger(logger_name).setLevel(logging.CRITICAL)
|
|
53
|
+
logging.getLogger(logger_name).disabled = True
|
|
54
|
+
|
|
26
55
|
except ImportError:
|
|
27
56
|
# LiteLLM not available, skip configuration
|
|
28
57
|
pass
|
|
@@ -173,29 +202,17 @@ def log_database_configuration(logger):
|
|
|
173
202
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
174
203
|
|
|
175
204
|
try:
|
|
176
|
-
#
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
vector_config = get_vectordb_config()
|
|
185
|
-
if vector_config.vector_db_provider == "lancedb":
|
|
186
|
-
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
|
187
|
-
else:
|
|
188
|
-
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
|
|
189
|
-
|
|
190
|
-
# Log graph database configuration
|
|
191
|
-
graph_config = get_graph_config()
|
|
192
|
-
if graph_config.graph_database_provider == "kuzu":
|
|
193
|
-
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
|
194
|
-
else:
|
|
195
|
-
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
|
|
205
|
+
# Get base database directory path
|
|
206
|
+
from cognee.base_config import get_base_config
|
|
207
|
+
|
|
208
|
+
base_config = get_base_config()
|
|
209
|
+
databases_path = os.path.join(base_config.system_root_directory, "databases")
|
|
210
|
+
|
|
211
|
+
# Log concise database info
|
|
212
|
+
logger.info(f"Database storage: {databases_path}")
|
|
196
213
|
|
|
197
214
|
except Exception as e:
|
|
198
|
-
logger.
|
|
215
|
+
logger.debug(f"Could not retrieve database configuration: {str(e)}")
|
|
199
216
|
|
|
200
217
|
|
|
201
218
|
def cleanup_old_logs(logs_dir, max_files):
|
|
@@ -216,13 +233,22 @@ def cleanup_old_logs(logs_dir, max_files):
|
|
|
216
233
|
|
|
217
234
|
# Remove old files that exceed the maximum
|
|
218
235
|
if len(log_files) > max_files:
|
|
236
|
+
deleted_count = 0
|
|
219
237
|
for old_file in log_files[max_files:]:
|
|
220
238
|
try:
|
|
221
239
|
old_file.unlink()
|
|
222
|
-
|
|
240
|
+
deleted_count += 1
|
|
241
|
+
# Only log individual files in non-CLI mode
|
|
242
|
+
if os.getenv("COGNEE_CLI_MODE") != "true":
|
|
243
|
+
logger.info(f"Deleted old log file: {old_file}")
|
|
223
244
|
except Exception as e:
|
|
245
|
+
# Always log errors
|
|
224
246
|
logger.error(f"Failed to delete old log file {old_file}: {e}")
|
|
225
247
|
|
|
248
|
+
# In CLI mode, show compact summary
|
|
249
|
+
if os.getenv("COGNEE_CLI_MODE") == "true" and deleted_count > 0:
|
|
250
|
+
logger.info(f"Cleaned up {deleted_count} old log files")
|
|
251
|
+
|
|
226
252
|
return True
|
|
227
253
|
except Exception as e:
|
|
228
254
|
logger.error(f"Error cleaning up log files: {e}")
|
|
@@ -241,11 +267,81 @@ def setup_logging(log_level=None, name=None):
|
|
|
241
267
|
"""
|
|
242
268
|
global _is_structlog_configured
|
|
243
269
|
|
|
270
|
+
# Regular detailed logging for non-CLI usage
|
|
244
271
|
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
|
|
245
272
|
|
|
246
273
|
# Configure external library logging early to suppress verbose output
|
|
247
274
|
configure_external_library_logging()
|
|
248
275
|
|
|
276
|
+
# Add custom filter to suppress LiteLLM worker cancellation errors
|
|
277
|
+
class LiteLLMCancellationFilter(logging.Filter):
|
|
278
|
+
"""Filter to suppress LiteLLM worker cancellation messages"""
|
|
279
|
+
|
|
280
|
+
def filter(self, record):
|
|
281
|
+
# Check if this is a LiteLLM-related logger
|
|
282
|
+
if hasattr(record, "name") and "litellm" in record.name.lower():
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
# Check message content for cancellation errors
|
|
286
|
+
if hasattr(record, "msg") and record.msg:
|
|
287
|
+
msg_str = str(record.msg).lower()
|
|
288
|
+
if any(
|
|
289
|
+
keyword in msg_str
|
|
290
|
+
for keyword in [
|
|
291
|
+
"loggingworker cancelled",
|
|
292
|
+
"logging_worker.py",
|
|
293
|
+
"cancellederror",
|
|
294
|
+
"litellm:error",
|
|
295
|
+
]
|
|
296
|
+
):
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
# Check formatted message
|
|
300
|
+
try:
|
|
301
|
+
if hasattr(record, "getMessage"):
|
|
302
|
+
formatted_msg = record.getMessage().lower()
|
|
303
|
+
if any(
|
|
304
|
+
keyword in formatted_msg
|
|
305
|
+
for keyword in [
|
|
306
|
+
"loggingworker cancelled",
|
|
307
|
+
"logging_worker.py",
|
|
308
|
+
"cancellederror",
|
|
309
|
+
"litellm:error",
|
|
310
|
+
]
|
|
311
|
+
):
|
|
312
|
+
return False
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
return True
|
|
317
|
+
|
|
318
|
+
# Apply the filter to root logger and specific loggers
|
|
319
|
+
cancellation_filter = LiteLLMCancellationFilter()
|
|
320
|
+
logging.getLogger().addFilter(cancellation_filter)
|
|
321
|
+
logging.getLogger("litellm").addFilter(cancellation_filter)
|
|
322
|
+
|
|
323
|
+
# Add custom filter to suppress LiteLLM worker cancellation errors
|
|
324
|
+
class LiteLLMFilter(logging.Filter):
|
|
325
|
+
def filter(self, record):
|
|
326
|
+
# Suppress LiteLLM worker cancellation errors
|
|
327
|
+
if hasattr(record, "msg") and isinstance(record.msg, str):
|
|
328
|
+
msg_lower = record.msg.lower()
|
|
329
|
+
if any(
|
|
330
|
+
phrase in msg_lower
|
|
331
|
+
for phrase in [
|
|
332
|
+
"loggingworker cancelled",
|
|
333
|
+
"cancellederror",
|
|
334
|
+
"logging_worker.py",
|
|
335
|
+
"loggingerror",
|
|
336
|
+
]
|
|
337
|
+
):
|
|
338
|
+
return False
|
|
339
|
+
return True
|
|
340
|
+
|
|
341
|
+
# Apply filter to root logger
|
|
342
|
+
litellm_filter = LiteLLMFilter()
|
|
343
|
+
logging.getLogger().addFilter(litellm_filter)
|
|
344
|
+
|
|
249
345
|
def exception_handler(logger, method_name, event_dict):
|
|
250
346
|
"""Custom processor to handle uncaught exceptions."""
|
|
251
347
|
# Check if there's an exc_info that needs to be processed
|
|
@@ -298,11 +394,6 @@ def setup_logging(log_level=None, name=None):
|
|
|
298
394
|
# Hand back to the original hook → prints traceback and exits
|
|
299
395
|
sys.__excepthook__(exc_type, exc_value, traceback)
|
|
300
396
|
|
|
301
|
-
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
|
302
|
-
logger.info(
|
|
303
|
-
"Need help? Reach out to us on our Discord server: https://discord.gg/NQPKmU5CCg"
|
|
304
|
-
)
|
|
305
|
-
|
|
306
397
|
# Install exception handlers
|
|
307
398
|
sys.excepthook = handle_exception
|
|
308
399
|
|
|
@@ -380,18 +471,38 @@ def setup_logging(log_level=None, name=None):
|
|
|
380
471
|
# Mark logging as configured
|
|
381
472
|
_is_structlog_configured = True
|
|
382
473
|
|
|
474
|
+
from cognee.infrastructure.databases.relational.config import get_relational_config
|
|
475
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
476
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
477
|
+
|
|
478
|
+
graph_config = get_graph_config()
|
|
479
|
+
vector_config = get_vectordb_config()
|
|
480
|
+
relational_config = get_relational_config()
|
|
481
|
+
|
|
482
|
+
try:
|
|
483
|
+
# Get base database directory path
|
|
484
|
+
from cognee.base_config import get_base_config
|
|
485
|
+
|
|
486
|
+
base_config = get_base_config()
|
|
487
|
+
databases_path = os.path.join(base_config.system_root_directory, "databases")
|
|
488
|
+
except Exception as e:
|
|
489
|
+
raise ValueError from e
|
|
490
|
+
|
|
383
491
|
# Get a configured logger and log system information
|
|
384
492
|
logger = structlog.get_logger(name if name else __name__)
|
|
493
|
+
# Detailed initialization for regular usage
|
|
385
494
|
logger.info(
|
|
386
495
|
"Logging initialized",
|
|
387
496
|
python_version=PYTHON_VERSION,
|
|
388
497
|
structlog_version=STRUCTLOG_VERSION,
|
|
389
498
|
cognee_version=COGNEE_VERSION,
|
|
390
499
|
os_info=OS_INFO,
|
|
500
|
+
database_path=databases_path,
|
|
501
|
+
graph_database_name=graph_config.graph_database_name,
|
|
502
|
+
vector_config=vector_config.vector_db_provider,
|
|
503
|
+
relational_config=relational_config.db_name,
|
|
391
504
|
)
|
|
392
505
|
|
|
393
|
-
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
|
394
|
-
|
|
395
506
|
# Log database configuration
|
|
396
507
|
log_database_configuration(logger)
|
|
397
508
|
|
cognee/shared/utils.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from cognee.exceptions import
|
|
1
|
+
from cognee.exceptions import CogneeValidationError
|
|
2
2
|
from fastapi import status
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class NoRelevantDataError(
|
|
5
|
+
class NoRelevantDataError(CogneeValidationError):
|
|
6
6
|
"""
|
|
7
7
|
Represents an error when no relevant data is found during a search. This class is a
|
|
8
|
-
subclass of
|
|
8
|
+
subclass of CogneeValidationError.
|
|
9
9
|
|
|
10
10
|
Public methods:
|
|
11
11
|
|
|
@@ -10,6 +10,7 @@ from cognee.modules.data.processing.document_types import (
|
|
|
10
10
|
)
|
|
11
11
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
12
12
|
from cognee.modules.engine.utils.generate_node_id import generate_node_id
|
|
13
|
+
from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
|
|
13
14
|
|
|
14
15
|
EXTENSION_TO_DOCUMENT_CLASS = {
|
|
15
16
|
"pdf": PdfDocument, # Text documents
|
|
@@ -111,6 +112,9 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
|
|
|
111
112
|
- list[Document]: A list of Document objects created based on the classified data
|
|
112
113
|
documents.
|
|
113
114
|
"""
|
|
115
|
+
if not isinstance(data_documents, list):
|
|
116
|
+
raise WrongDataDocumentInputError("data_documents")
|
|
117
|
+
|
|
114
118
|
documents = []
|
|
115
119
|
for data_item in data_documents:
|
|
116
120
|
document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from cognee.exceptions import (
|
|
2
|
+
CogneeValidationError,
|
|
3
|
+
CogneeConfigurationError,
|
|
4
|
+
)
|
|
5
|
+
from fastapi import status
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class WrongDataDocumentInputError(CogneeValidationError):
|
|
9
|
+
"""Raised when a wrong data document is provided."""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
field: str,
|
|
14
|
+
name: str = "WrongDataDocumentInputError",
|
|
15
|
+
status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
|
|
16
|
+
):
|
|
17
|
+
message = f"Missing of invalid parameter: '{field}'."
|
|
18
|
+
super().__init__(message, name, status_code)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class InvalidChunkSizeError(CogneeValidationError):
|
|
22
|
+
def __init__(self, value):
|
|
23
|
+
super().__init__(
|
|
24
|
+
message=f"max_chunk_size must be a positive integer (got {value}).",
|
|
25
|
+
name="InvalidChunkSizeError",
|
|
26
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class InvalidChunkerError(CogneeValidationError):
|
|
31
|
+
def __init__(self):
|
|
32
|
+
super().__init__(
|
|
33
|
+
message="chunker must be a valid Chunker class.",
|
|
34
|
+
name="InvalidChunkerError",
|
|
35
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
36
|
+
)
|
|
@@ -8,6 +8,7 @@ from cognee.modules.data.models import Data
|
|
|
8
8
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
9
9
|
from cognee.modules.chunking.TextChunker import TextChunker
|
|
10
10
|
from cognee.modules.chunking.Chunker import Chunker
|
|
11
|
+
from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
async def update_document_token_count(document_id: UUID, token_count: int) -> None:
|
|
@@ -37,6 +38,13 @@ async def extract_chunks_from_documents(
|
|
|
37
38
|
- The `read` method of the `Document` class must be implemented to support the chunking operation.
|
|
38
39
|
- The `chunker` parameter determines the chunking logic and should align with the document type.
|
|
39
40
|
"""
|
|
41
|
+
if not isinstance(max_chunk_size, int) or max_chunk_size <= 0:
|
|
42
|
+
raise InvalidChunkSizeError(max_chunk_size)
|
|
43
|
+
if not isinstance(chunker, type):
|
|
44
|
+
raise InvalidChunkerError()
|
|
45
|
+
if not hasattr(chunker, "read"):
|
|
46
|
+
raise InvalidChunkerError()
|
|
47
|
+
|
|
40
48
|
for document in documents:
|
|
41
49
|
document_token_count = 0
|
|
42
50
|
|
|
@@ -48,5 +56,3 @@ async def extract_chunks_from_documents(
|
|
|
48
56
|
yield document_chunk
|
|
49
57
|
|
|
50
58
|
await update_document_token_count(document.id, document_token_count)
|
|
51
|
-
|
|
52
|
-
# todo rita
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Custom exceptions for the Cognee API.
|
|
3
|
+
|
|
4
|
+
This module defines a set of exceptions for handling various data errors
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .exceptions import (
|
|
8
|
+
InvalidDataChunksError,
|
|
9
|
+
InvalidGraphModelError,
|
|
10
|
+
InvalidOntologyAdapterError,
|
|
11
|
+
InvalidChunkGraphInputError,
|
|
12
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from cognee.exceptions import (
|
|
2
|
+
CogneeValidationError,
|
|
3
|
+
CogneeConfigurationError,
|
|
4
|
+
)
|
|
5
|
+
from fastapi import status
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InvalidDataChunksError(CogneeValidationError):
|
|
9
|
+
def __init__(self, detail: str):
|
|
10
|
+
super().__init__(
|
|
11
|
+
message=f"Invalid data_chunks: {detail}",
|
|
12
|
+
name="InvalidDataChunksError",
|
|
13
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InvalidGraphModelError(CogneeValidationError):
|
|
18
|
+
def __init__(self, got):
|
|
19
|
+
super().__init__(
|
|
20
|
+
message=f"graph_model must be a subclass of BaseModel (got {got}).",
|
|
21
|
+
name="InvalidGraphModelError",
|
|
22
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class InvalidOntologyAdapterError(CogneeConfigurationError):
|
|
27
|
+
def __init__(self, got):
|
|
28
|
+
super().__init__(
|
|
29
|
+
message=f"ontology_adapter lacks required interface (got {got}).",
|
|
30
|
+
name="InvalidOntologyAdapterError",
|
|
31
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class InvalidChunkGraphInputError(CogneeValidationError):
|
|
36
|
+
def __init__(self, detail: str):
|
|
37
|
+
super().__init__(
|
|
38
|
+
message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}",
|
|
39
|
+
name="InvalidChunkGraphInputError",
|
|
40
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
41
|
+
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Type, List
|
|
2
|
+
from typing import Type, List, Optional
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
@@ -12,6 +12,12 @@ from cognee.modules.graph.utils import (
|
|
|
12
12
|
)
|
|
13
13
|
from cognee.shared.data_models import KnowledgeGraph
|
|
14
14
|
from cognee.infrastructure.llm.LLMGateway import LLMGateway
|
|
15
|
+
from cognee.tasks.graph.exceptions import (
|
|
16
|
+
InvalidGraphModelError,
|
|
17
|
+
InvalidDataChunksError,
|
|
18
|
+
InvalidChunkGraphInputError,
|
|
19
|
+
InvalidOntologyAdapterError,
|
|
20
|
+
)
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
async def integrate_chunk_graphs(
|
|
@@ -21,6 +27,20 @@ async def integrate_chunk_graphs(
|
|
|
21
27
|
ontology_adapter: OntologyResolver,
|
|
22
28
|
) -> List[DocumentChunk]:
|
|
23
29
|
"""Updates DocumentChunk objects, integrates data points and edges into databases."""
|
|
30
|
+
|
|
31
|
+
if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
|
|
32
|
+
raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
|
|
33
|
+
if len(data_chunks) != len(chunk_graphs):
|
|
34
|
+
raise InvalidChunkGraphInputError(
|
|
35
|
+
f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs."
|
|
36
|
+
)
|
|
37
|
+
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
|
|
38
|
+
raise InvalidGraphModelError(graph_model)
|
|
39
|
+
if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
|
|
40
|
+
raise InvalidOntologyAdapterError(
|
|
41
|
+
type(ontology_adapter).__name__ if ontology_adapter else "None"
|
|
42
|
+
)
|
|
43
|
+
|
|
24
44
|
graph_engine = await get_graph_engine()
|
|
25
45
|
|
|
26
46
|
if graph_model is not KnowledgeGraph:
|
|
@@ -51,12 +71,24 @@ async def extract_graph_from_data(
|
|
|
51
71
|
data_chunks: List[DocumentChunk],
|
|
52
72
|
graph_model: Type[BaseModel],
|
|
53
73
|
ontology_adapter: OntologyResolver = None,
|
|
74
|
+
custom_prompt: Optional[str] = None,
|
|
54
75
|
) -> List[DocumentChunk]:
|
|
55
76
|
"""
|
|
56
77
|
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
|
|
57
78
|
"""
|
|
79
|
+
|
|
80
|
+
if not isinstance(data_chunks, list) or not data_chunks:
|
|
81
|
+
raise InvalidDataChunksError("must be a non-empty list of DocumentChunk.")
|
|
82
|
+
if not all(hasattr(c, "text") for c in data_chunks):
|
|
83
|
+
raise InvalidDataChunksError("each chunk must have a 'text' attribute")
|
|
84
|
+
if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
|
|
85
|
+
raise InvalidGraphModelError(graph_model)
|
|
86
|
+
|
|
58
87
|
chunk_graphs = await asyncio.gather(
|
|
59
|
-
*[
|
|
88
|
+
*[
|
|
89
|
+
LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
|
|
90
|
+
for chunk in data_chunks
|
|
91
|
+
]
|
|
60
92
|
)
|
|
61
93
|
|
|
62
94
|
# Note: Filter edges with missing source or target nodes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from cognee.exceptions import CogneeSystemError
|
|
2
|
+
from fastapi import status
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class S3FileSystemNotFoundError(CogneeSystemError):
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
name: str = "S3FileSystemNotFoundError",
|
|
9
|
+
status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
10
|
+
):
|
|
11
|
+
message = "Could not find S3FileSystem."
|
|
12
|
+
super().__init__(message, name, status_code)
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from urllib.parse import urlparse
|
|
3
3
|
from typing import List, Union, BinaryIO
|
|
4
|
+
|
|
5
|
+
from cognee.tasks.ingestion.exceptions import S3FileSystemNotFoundError
|
|
6
|
+
from cognee.exceptions import CogneeSystemError
|
|
4
7
|
from cognee.infrastructure.files.storage.s3_config import get_s3_config
|
|
5
8
|
|
|
6
9
|
|
|
@@ -54,6 +57,8 @@ async def resolve_data_directories(
|
|
|
54
57
|
else:
|
|
55
58
|
s3_files.append(key)
|
|
56
59
|
resolved_data.extend(s3_files)
|
|
60
|
+
else:
|
|
61
|
+
raise S3FileSystemNotFoundError()
|
|
57
62
|
|
|
58
63
|
elif os.path.isdir(item): # If it's a directory
|
|
59
64
|
if include_subdirectories:
|
|
@@ -180,6 +180,7 @@ async def get_local_script_dependencies(
|
|
|
180
180
|
name=file_path_relative_to_repo,
|
|
181
181
|
source_code=source_code,
|
|
182
182
|
file_path=script_path,
|
|
183
|
+
language="python",
|
|
183
184
|
)
|
|
184
185
|
return code_file_node
|
|
185
186
|
|
|
@@ -188,6 +189,7 @@ async def get_local_script_dependencies(
|
|
|
188
189
|
name=file_path_relative_to_repo,
|
|
189
190
|
source_code=None,
|
|
190
191
|
file_path=script_path,
|
|
192
|
+
language="python",
|
|
191
193
|
)
|
|
192
194
|
|
|
193
195
|
async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):
|