cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (179) hide show
  1. cognee/__main__.py +4 -0
  2. cognee/api/v1/add/add.py +18 -11
  3. cognee/api/v1/cognify/code_graph_pipeline.py +7 -1
  4. cognee/api/v1/cognify/cognify.py +22 -115
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  6. cognee/api/v1/config/config.py +5 -13
  7. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -2
  8. cognee/api/v1/delete/delete.py +1 -1
  9. cognee/api/v1/exceptions/__init__.py +13 -0
  10. cognee/api/v1/{delete → exceptions}/exceptions.py +15 -12
  11. cognee/api/v1/responses/default_tools.py +4 -0
  12. cognee/api/v1/responses/dispatch_function.py +6 -1
  13. cognee/api/v1/responses/models.py +1 -1
  14. cognee/api/v1/search/search.py +6 -7
  15. cognee/cli/__init__.py +10 -0
  16. cognee/cli/_cognee.py +180 -0
  17. cognee/cli/commands/__init__.py +1 -0
  18. cognee/cli/commands/add_command.py +80 -0
  19. cognee/cli/commands/cognify_command.py +128 -0
  20. cognee/cli/commands/config_command.py +225 -0
  21. cognee/cli/commands/delete_command.py +80 -0
  22. cognee/cli/commands/search_command.py +149 -0
  23. cognee/cli/config.py +33 -0
  24. cognee/cli/debug.py +21 -0
  25. cognee/cli/echo.py +45 -0
  26. cognee/cli/exceptions.py +23 -0
  27. cognee/cli/minimal_cli.py +97 -0
  28. cognee/cli/reference.py +26 -0
  29. cognee/cli/suppress_logging.py +12 -0
  30. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  31. cognee/eval_framework/eval_config.py +1 -1
  32. cognee/exceptions/__init__.py +5 -5
  33. cognee/exceptions/exceptions.py +37 -17
  34. cognee/infrastructure/data/exceptions/__init__.py +7 -0
  35. cognee/infrastructure/data/exceptions/exceptions.py +22 -0
  36. cognee/infrastructure/data/utils/extract_keywords.py +3 -3
  37. cognee/infrastructure/databases/exceptions/__init__.py +3 -0
  38. cognee/infrastructure/databases/exceptions/exceptions.py +57 -9
  39. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +64 -2
  41. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +49 -0
  42. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +15 -10
  43. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +2 -2
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +4 -5
  45. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -2
  46. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +5 -3
  47. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -8
  48. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  49. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  50. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +6 -6
  51. cognee/infrastructure/databases/vector/exceptions/exceptions.py +3 -3
  52. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -2
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +4 -3
  54. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  55. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  56. cognee/infrastructure/llm/LLMGateway.py +14 -5
  57. cognee/infrastructure/llm/config.py +5 -5
  58. cognee/infrastructure/llm/exceptions.py +30 -2
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  60. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  61. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +5 -5
  62. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +6 -6
  63. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -15
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +9 -7
  67. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  68. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  69. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  70. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  71. cognee/infrastructure/llm/utils.py +7 -7
  72. cognee/modules/data/exceptions/exceptions.py +18 -5
  73. cognee/modules/data/methods/__init__.py +2 -0
  74. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  75. cognee/modules/data/methods/delete_data.py +2 -4
  76. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  77. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  78. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  79. cognee/modules/data/processing/document_types/exceptions/exceptions.py +2 -2
  80. cognee/modules/graph/cognee_graph/CogneeGraph.py +6 -4
  81. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +5 -10
  82. cognee/modules/graph/exceptions/__init__.py +2 -0
  83. cognee/modules/graph/exceptions/exceptions.py +25 -3
  84. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  85. cognee/modules/ingestion/exceptions/exceptions.py +2 -2
  86. cognee/modules/ontology/exceptions/exceptions.py +4 -4
  87. cognee/modules/pipelines/__init__.py +1 -1
  88. cognee/modules/pipelines/exceptions/exceptions.py +2 -2
  89. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  90. cognee/modules/pipelines/layers/__init__.py +1 -0
  91. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  92. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  93. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +12 -0
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  96. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  97. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  98. cognee/modules/pipelines/methods/__init__.py +2 -0
  99. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  100. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  101. cognee/modules/pipelines/operations/__init__.py +0 -1
  102. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  103. cognee/modules/pipelines/operations/pipeline.py +23 -138
  104. cognee/modules/retrieval/base_feedback.py +11 -0
  105. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  106. cognee/modules/retrieval/exceptions/exceptions.py +12 -6
  107. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +9 -2
  108. cognee/modules/retrieval/graph_completion_cot_retriever.py +13 -6
  109. cognee/modules/retrieval/graph_completion_retriever.py +89 -5
  110. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  111. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  112. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  113. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  114. cognee/modules/retrieval/utils/models.py +40 -0
  115. cognee/modules/search/exceptions/__init__.py +7 -0
  116. cognee/modules/search/exceptions/exceptions.py +15 -0
  117. cognee/modules/search/methods/search.py +47 -7
  118. cognee/modules/search/types/SearchType.py +1 -0
  119. cognee/modules/settings/get_settings.py +2 -2
  120. cognee/modules/users/exceptions/exceptions.py +6 -6
  121. cognee/shared/CodeGraphEntities.py +1 -0
  122. cognee/shared/exceptions/exceptions.py +2 -2
  123. cognee/shared/logging_utils.py +142 -31
  124. cognee/shared/utils.py +0 -1
  125. cognee/tasks/completion/exceptions/exceptions.py +3 -3
  126. cognee/tasks/documents/classify_documents.py +4 -0
  127. cognee/tasks/documents/exceptions/__init__.py +11 -0
  128. cognee/tasks/documents/exceptions/exceptions.py +36 -0
  129. cognee/tasks/documents/extract_chunks_from_documents.py +8 -2
  130. cognee/tasks/graph/exceptions/__init__.py +12 -0
  131. cognee/tasks/graph/exceptions/exceptions.py +41 -0
  132. cognee/tasks/graph/extract_graph_from_data.py +34 -2
  133. cognee/tasks/ingestion/exceptions/__init__.py +8 -0
  134. cognee/tasks/ingestion/exceptions/exceptions.py +12 -0
  135. cognee/tasks/ingestion/resolve_data_directories.py +5 -0
  136. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  137. cognee/tasks/repo_processor/get_repo_file_dependencies.py +120 -48
  138. cognee/tasks/storage/add_data_points.py +41 -3
  139. cognee/tasks/storage/exceptions/__init__.py +9 -0
  140. cognee/tasks/storage/exceptions/exceptions.py +13 -0
  141. cognee/tasks/storage/index_data_points.py +1 -1
  142. cognee/tasks/summarization/exceptions/__init__.py +9 -0
  143. cognee/tasks/summarization/exceptions/exceptions.py +14 -0
  144. cognee/tasks/summarization/summarize_text.py +8 -1
  145. cognee/tests/integration/cli/__init__.py +3 -0
  146. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  147. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  148. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  149. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  150. cognee/tests/test_delete_by_id.py +1 -1
  151. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  152. cognee/tests/test_delete_soft.py +85 -0
  153. cognee/tests/test_kuzu.py +2 -2
  154. cognee/tests/test_neo4j.py +2 -2
  155. cognee/tests/test_search_db.py +126 -7
  156. cognee/tests/unit/cli/__init__.py +3 -0
  157. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  158. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  159. cognee/tests/unit/cli/test_cli_main.py +173 -0
  160. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  161. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  162. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +5 -5
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +3 -3
  164. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +3 -3
  165. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +3 -3
  166. cognee/tests/unit/modules/search/search_methods_test.py +4 -2
  167. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/METADATA +7 -5
  168. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/RECORD +172 -121
  169. cognee-0.2.4.dist-info/entry_points.txt +2 -0
  170. cognee/infrastructure/databases/exceptions/EmbeddingException.py +0 -20
  171. cognee/infrastructure/databases/graph/networkx/__init__.py +0 -0
  172. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  173. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  174. cognee/infrastructure/pipeline/models/__init__.py +0 -0
  175. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  176. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  177. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/WHEEL +0 -0
  178. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/LICENSE +0 -0
  179. {cognee-0.2.3.dev0.dist-info → cognee-0.2.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
36
36
  class CodeFile(DataPoint):
37
37
  name: str
38
38
  file_path: str
39
+ language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
39
40
  source_code: Optional[str] = None
40
41
  part_of: Optional[Repository] = None
41
42
  depends_on: Optional[List["ImportStatement"]] = []
@@ -1,8 +1,8 @@
1
- from cognee.exceptions import CogneeApiError
1
+ from cognee.exceptions import CogneeValidationError
2
2
  from fastapi import status
3
3
 
4
4
 
5
- class IngestionError(CogneeApiError):
5
+ class IngestionError(CogneeValidationError):
6
6
  def __init__(
7
7
  self,
8
8
  message: str = "Failed to load data.",
@@ -15,14 +15,43 @@ from typing import Protocol
15
15
  # Configure external library logging
16
16
  def configure_external_library_logging():
17
17
  """Configure logging for external libraries to reduce verbosity"""
18
+ # Set environment variables to suppress LiteLLM logging
19
+ os.environ.setdefault("LITELLM_LOG", "ERROR")
20
+ os.environ.setdefault("LITELLM_SET_VERBOSE", "False")
21
+
18
22
  # Configure LiteLLM logging to reduce verbosity
19
23
  try:
20
24
  import litellm
21
25
 
26
+ # Disable verbose logging
22
27
  litellm.set_verbose = False
23
28
 
24
- # Suppress LiteLLM ERROR logging using standard logging
25
- logging.getLogger("litellm").setLevel(logging.CRITICAL)
29
+ # Set additional LiteLLM configuration
30
+ if hasattr(litellm, "suppress_debug_info"):
31
+ litellm.suppress_debug_info = True
32
+ if hasattr(litellm, "turn_off_message"):
33
+ litellm.turn_off_message = True
34
+ if hasattr(litellm, "_turn_on_debug"):
35
+ litellm._turn_on_debug = False
36
+
37
+ # Comprehensive logger suppression
38
+ loggers_to_suppress = [
39
+ "litellm",
40
+ "litellm.litellm_core_utils.logging_worker",
41
+ "litellm.litellm_core_utils",
42
+ "litellm.proxy",
43
+ "litellm.router",
44
+ "openai._base_client",
45
+ "LiteLLM", # Capital case variant
46
+ "LiteLLM.core",
47
+ "LiteLLM.logging_worker",
48
+ "litellm.logging_worker",
49
+ ]
50
+
51
+ for logger_name in loggers_to_suppress:
52
+ logging.getLogger(logger_name).setLevel(logging.CRITICAL)
53
+ logging.getLogger(logger_name).disabled = True
54
+
26
55
  except ImportError:
27
56
  # LiteLLM not available, skip configuration
28
57
  pass
@@ -173,29 +202,17 @@ def log_database_configuration(logger):
173
202
  from cognee.infrastructure.databases.graph.config import get_graph_config
174
203
 
175
204
  try:
176
- # Log relational database configuration
177
- relational_config = get_relational_config()
178
- if relational_config.db_provider == "postgres":
179
- logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
180
- elif relational_config.db_provider == "sqlite":
181
- logger.info(f"SQLite path: {relational_config.db_path}")
182
-
183
- # Log vector database configuration
184
- vector_config = get_vectordb_config()
185
- if vector_config.vector_db_provider == "lancedb":
186
- logger.info(f"Vector database path: {vector_config.vector_db_url}")
187
- else:
188
- logger.info(f"Vector database URL: {vector_config.vector_db_url}")
189
-
190
- # Log graph database configuration
191
- graph_config = get_graph_config()
192
- if graph_config.graph_database_provider == "kuzu":
193
- logger.info(f"Graph database path: {graph_config.graph_file_path}")
194
- else:
195
- logger.info(f"Graph database URL: {graph_config.graph_database_url}")
205
+ # Get base database directory path
206
+ from cognee.base_config import get_base_config
207
+
208
+ base_config = get_base_config()
209
+ databases_path = os.path.join(base_config.system_root_directory, "databases")
210
+
211
+ # Log concise database info
212
+ logger.info(f"Database storage: {databases_path}")
196
213
 
197
214
  except Exception as e:
198
- logger.warning(f"Could not retrieve database configuration: {str(e)}")
215
+ logger.debug(f"Could not retrieve database configuration: {str(e)}")
199
216
 
200
217
 
201
218
  def cleanup_old_logs(logs_dir, max_files):
@@ -216,13 +233,22 @@ def cleanup_old_logs(logs_dir, max_files):
216
233
 
217
234
  # Remove old files that exceed the maximum
218
235
  if len(log_files) > max_files:
236
+ deleted_count = 0
219
237
  for old_file in log_files[max_files:]:
220
238
  try:
221
239
  old_file.unlink()
222
- logger.info(f"Deleted old log file: {old_file}")
240
+ deleted_count += 1
241
+ # Only log individual files in non-CLI mode
242
+ if os.getenv("COGNEE_CLI_MODE") != "true":
243
+ logger.info(f"Deleted old log file: {old_file}")
223
244
  except Exception as e:
245
+ # Always log errors
224
246
  logger.error(f"Failed to delete old log file {old_file}: {e}")
225
247
 
248
+ # In CLI mode, show compact summary
249
+ if os.getenv("COGNEE_CLI_MODE") == "true" and deleted_count > 0:
250
+ logger.info(f"Cleaned up {deleted_count} old log files")
251
+
226
252
  return True
227
253
  except Exception as e:
228
254
  logger.error(f"Error cleaning up log files: {e}")
@@ -241,11 +267,81 @@ def setup_logging(log_level=None, name=None):
241
267
  """
242
268
  global _is_structlog_configured
243
269
 
270
+ # Regular detailed logging for non-CLI usage
244
271
  log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
245
272
 
246
273
  # Configure external library logging early to suppress verbose output
247
274
  configure_external_library_logging()
248
275
 
276
+ # Add custom filter to suppress LiteLLM worker cancellation errors
277
+ class LiteLLMCancellationFilter(logging.Filter):
278
+ """Filter to suppress LiteLLM worker cancellation messages"""
279
+
280
+ def filter(self, record):
281
+ # Check if this is a LiteLLM-related logger
282
+ if hasattr(record, "name") and "litellm" in record.name.lower():
283
+ return False
284
+
285
+ # Check message content for cancellation errors
286
+ if hasattr(record, "msg") and record.msg:
287
+ msg_str = str(record.msg).lower()
288
+ if any(
289
+ keyword in msg_str
290
+ for keyword in [
291
+ "loggingworker cancelled",
292
+ "logging_worker.py",
293
+ "cancellederror",
294
+ "litellm:error",
295
+ ]
296
+ ):
297
+ return False
298
+
299
+ # Check formatted message
300
+ try:
301
+ if hasattr(record, "getMessage"):
302
+ formatted_msg = record.getMessage().lower()
303
+ if any(
304
+ keyword in formatted_msg
305
+ for keyword in [
306
+ "loggingworker cancelled",
307
+ "logging_worker.py",
308
+ "cancellederror",
309
+ "litellm:error",
310
+ ]
311
+ ):
312
+ return False
313
+ except Exception:
314
+ pass
315
+
316
+ return True
317
+
318
+ # Apply the filter to root logger and specific loggers
319
+ cancellation_filter = LiteLLMCancellationFilter()
320
+ logging.getLogger().addFilter(cancellation_filter)
321
+ logging.getLogger("litellm").addFilter(cancellation_filter)
322
+
323
+ # Add custom filter to suppress LiteLLM worker cancellation errors
324
+ class LiteLLMFilter(logging.Filter):
325
+ def filter(self, record):
326
+ # Suppress LiteLLM worker cancellation errors
327
+ if hasattr(record, "msg") and isinstance(record.msg, str):
328
+ msg_lower = record.msg.lower()
329
+ if any(
330
+ phrase in msg_lower
331
+ for phrase in [
332
+ "loggingworker cancelled",
333
+ "cancellederror",
334
+ "logging_worker.py",
335
+ "loggingerror",
336
+ ]
337
+ ):
338
+ return False
339
+ return True
340
+
341
+ # Apply filter to root logger
342
+ litellm_filter = LiteLLMFilter()
343
+ logging.getLogger().addFilter(litellm_filter)
344
+
249
345
  def exception_handler(logger, method_name, event_dict):
250
346
  """Custom processor to handle uncaught exceptions."""
251
347
  # Check if there's an exc_info that needs to be processed
@@ -298,11 +394,6 @@ def setup_logging(log_level=None, name=None):
298
394
  # Hand back to the original hook → prints traceback and exits
299
395
  sys.__excepthook__(exc_type, exc_value, traceback)
300
396
 
301
- logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
302
- logger.info(
303
- "Need help? Reach out to us on our Discord server: https://discord.gg/NQPKmU5CCg"
304
- )
305
-
306
397
  # Install exception handlers
307
398
  sys.excepthook = handle_exception
308
399
 
@@ -380,18 +471,38 @@ def setup_logging(log_level=None, name=None):
380
471
  # Mark logging as configured
381
472
  _is_structlog_configured = True
382
473
 
474
+ from cognee.infrastructure.databases.relational.config import get_relational_config
475
+ from cognee.infrastructure.databases.vector.config import get_vectordb_config
476
+ from cognee.infrastructure.databases.graph.config import get_graph_config
477
+
478
+ graph_config = get_graph_config()
479
+ vector_config = get_vectordb_config()
480
+ relational_config = get_relational_config()
481
+
482
+ try:
483
+ # Get base database directory path
484
+ from cognee.base_config import get_base_config
485
+
486
+ base_config = get_base_config()
487
+ databases_path = os.path.join(base_config.system_root_directory, "databases")
488
+ except Exception as e:
489
+ raise ValueError from e
490
+
383
491
  # Get a configured logger and log system information
384
492
  logger = structlog.get_logger(name if name else __name__)
493
+ # Detailed initialization for regular usage
385
494
  logger.info(
386
495
  "Logging initialized",
387
496
  python_version=PYTHON_VERSION,
388
497
  structlog_version=STRUCTLOG_VERSION,
389
498
  cognee_version=COGNEE_VERSION,
390
499
  os_info=OS_INFO,
500
+ database_path=databases_path,
501
+ graph_database_name=graph_config.graph_database_name,
502
+ vector_config=vector_config.vector_db_provider,
503
+ relational_config=relational_config.db_name,
391
504
  )
392
505
 
393
- logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
394
-
395
506
  # Log database configuration
396
507
  log_database_configuration(logger)
397
508
 
cognee/shared/utils.py CHANGED
@@ -3,7 +3,6 @@
3
3
  import os
4
4
  import requests
5
5
  from datetime import datetime, timezone
6
- import networkx as nx
7
6
  import matplotlib.pyplot as plt
8
7
  import http.server
9
8
  import socketserver
@@ -1,11 +1,11 @@
1
- from cognee.exceptions import CogneeApiError
1
+ from cognee.exceptions import CogneeValidationError
2
2
  from fastapi import status
3
3
 
4
4
 
5
- class NoRelevantDataError(CogneeApiError):
5
+ class NoRelevantDataError(CogneeValidationError):
6
6
  """
7
7
  Represents an error when no relevant data is found during a search. This class is a
8
- subclass of CogneeApiError.
8
+ subclass of CogneeValidationError.
9
9
 
10
10
  Public methods:
11
11
 
@@ -10,6 +10,7 @@ from cognee.modules.data.processing.document_types import (
10
10
  )
11
11
  from cognee.modules.engine.models.node_set import NodeSet
12
12
  from cognee.modules.engine.utils.generate_node_id import generate_node_id
13
+ from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
13
14
 
14
15
  EXTENSION_TO_DOCUMENT_CLASS = {
15
16
  "pdf": PdfDocument, # Text documents
@@ -111,6 +112,9 @@ async def classify_documents(data_documents: list[Data]) -> list[Document]:
111
112
  - list[Document]: A list of Document objects created based on the classified data
112
113
  documents.
113
114
  """
115
+ if not isinstance(data_documents, list):
116
+ raise WrongDataDocumentInputError("data_documents")
117
+
114
118
  documents = []
115
119
  for data_item in data_documents:
116
120
  document = EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](
@@ -0,0 +1,11 @@
1
+ """
2
+ Custom exceptions for the Cognee API.
3
+
4
+ This module defines a set of exceptions for handling various data errors
5
+ """
6
+
7
+ from .exceptions import (
8
+ WrongDataDocumentInputError,
9
+ InvalidChunkSizeError,
10
+ InvalidChunkerError,
11
+ )
@@ -0,0 +1,36 @@
1
+ from cognee.exceptions import (
2
+ CogneeValidationError,
3
+ CogneeConfigurationError,
4
+ )
5
+ from fastapi import status
6
+
7
+
8
+ class WrongDataDocumentInputError(CogneeValidationError):
9
+ """Raised when a wrong data document is provided."""
10
+
11
+ def __init__(
12
+ self,
13
+ field: str,
14
+ name: str = "WrongDataDocumentInputError",
15
+ status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
16
+ ):
17
+ message = f"Missing of invalid parameter: '{field}'."
18
+ super().__init__(message, name, status_code)
19
+
20
+
21
+ class InvalidChunkSizeError(CogneeValidationError):
22
+ def __init__(self, value):
23
+ super().__init__(
24
+ message=f"max_chunk_size must be a positive integer (got {value}).",
25
+ name="InvalidChunkSizeError",
26
+ status_code=status.HTTP_400_BAD_REQUEST,
27
+ )
28
+
29
+
30
+ class InvalidChunkerError(CogneeValidationError):
31
+ def __init__(self):
32
+ super().__init__(
33
+ message="chunker must be a valid Chunker class.",
34
+ name="InvalidChunkerError",
35
+ status_code=status.HTTP_400_BAD_REQUEST,
36
+ )
@@ -8,6 +8,7 @@ from cognee.modules.data.models import Data
8
8
  from cognee.infrastructure.databases.relational import get_relational_engine
9
9
  from cognee.modules.chunking.TextChunker import TextChunker
10
10
  from cognee.modules.chunking.Chunker import Chunker
11
+ from cognee.tasks.documents.exceptions import InvalidChunkSizeError, InvalidChunkerError
11
12
 
12
13
 
13
14
  async def update_document_token_count(document_id: UUID, token_count: int) -> None:
@@ -37,6 +38,13 @@ async def extract_chunks_from_documents(
37
38
  - The `read` method of the `Document` class must be implemented to support the chunking operation.
38
39
  - The `chunker` parameter determines the chunking logic and should align with the document type.
39
40
  """
41
+ if not isinstance(max_chunk_size, int) or max_chunk_size <= 0:
42
+ raise InvalidChunkSizeError(max_chunk_size)
43
+ if not isinstance(chunker, type):
44
+ raise InvalidChunkerError()
45
+ if not hasattr(chunker, "read"):
46
+ raise InvalidChunkerError()
47
+
40
48
  for document in documents:
41
49
  document_token_count = 0
42
50
 
@@ -48,5 +56,3 @@ async def extract_chunks_from_documents(
48
56
  yield document_chunk
49
57
 
50
58
  await update_document_token_count(document.id, document_token_count)
51
-
52
- # todo rita
@@ -0,0 +1,12 @@
1
+ """
2
+ Custom exceptions for the Cognee API.
3
+
4
+ This module defines a set of exceptions for handling various data errors
5
+ """
6
+
7
+ from .exceptions import (
8
+ InvalidDataChunksError,
9
+ InvalidGraphModelError,
10
+ InvalidOntologyAdapterError,
11
+ InvalidChunkGraphInputError,
12
+ )
@@ -0,0 +1,41 @@
1
+ from cognee.exceptions import (
2
+ CogneeValidationError,
3
+ CogneeConfigurationError,
4
+ )
5
+ from fastapi import status
6
+
7
+
8
+ class InvalidDataChunksError(CogneeValidationError):
9
+ def __init__(self, detail: str):
10
+ super().__init__(
11
+ message=f"Invalid data_chunks: {detail}",
12
+ name="InvalidDataChunksError",
13
+ status_code=status.HTTP_400_BAD_REQUEST,
14
+ )
15
+
16
+
17
+ class InvalidGraphModelError(CogneeValidationError):
18
+ def __init__(self, got):
19
+ super().__init__(
20
+ message=f"graph_model must be a subclass of BaseModel (got {got}).",
21
+ name="InvalidGraphModelError",
22
+ status_code=status.HTTP_400_BAD_REQUEST,
23
+ )
24
+
25
+
26
+ class InvalidOntologyAdapterError(CogneeConfigurationError):
27
+ def __init__(self, got):
28
+ super().__init__(
29
+ message=f"ontology_adapter lacks required interface (got {got}).",
30
+ name="InvalidOntologyAdapterError",
31
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
32
+ )
33
+
34
+
35
+ class InvalidChunkGraphInputError(CogneeValidationError):
36
+ def __init__(self, detail: str):
37
+ super().__init__(
38
+ message=f"Invalid chunk inputs or LLM Chunkgraphs: {detail}",
39
+ name="InvalidChunkGraphInputError",
40
+ status_code=status.HTTP_400_BAD_REQUEST,
41
+ )
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Type, List
2
+ from typing import Type, List, Optional
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
@@ -12,6 +12,12 @@ from cognee.modules.graph.utils import (
12
12
  )
13
13
  from cognee.shared.data_models import KnowledgeGraph
14
14
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
15
+ from cognee.tasks.graph.exceptions import (
16
+ InvalidGraphModelError,
17
+ InvalidDataChunksError,
18
+ InvalidChunkGraphInputError,
19
+ InvalidOntologyAdapterError,
20
+ )
15
21
 
16
22
 
17
23
  async def integrate_chunk_graphs(
@@ -21,6 +27,20 @@ async def integrate_chunk_graphs(
21
27
  ontology_adapter: OntologyResolver,
22
28
  ) -> List[DocumentChunk]:
23
29
  """Updates DocumentChunk objects, integrates data points and edges into databases."""
30
+
31
+ if not isinstance(data_chunks, list) or not isinstance(chunk_graphs, list):
32
+ raise InvalidChunkGraphInputError("data_chunks and chunk_graphs must be lists.")
33
+ if len(data_chunks) != len(chunk_graphs):
34
+ raise InvalidChunkGraphInputError(
35
+ f"length mismatch: {len(data_chunks)} chunks vs {len(chunk_graphs)} graphs."
36
+ )
37
+ if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
38
+ raise InvalidGraphModelError(graph_model)
39
+ if ontology_adapter is None or not hasattr(ontology_adapter, "get_subgraph"):
40
+ raise InvalidOntologyAdapterError(
41
+ type(ontology_adapter).__name__ if ontology_adapter else "None"
42
+ )
43
+
24
44
  graph_engine = await get_graph_engine()
25
45
 
26
46
  if graph_model is not KnowledgeGraph:
@@ -51,12 +71,24 @@ async def extract_graph_from_data(
51
71
  data_chunks: List[DocumentChunk],
52
72
  graph_model: Type[BaseModel],
53
73
  ontology_adapter: OntologyResolver = None,
74
+ custom_prompt: Optional[str] = None,
54
75
  ) -> List[DocumentChunk]:
55
76
  """
56
77
  Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
57
78
  """
79
+
80
+ if not isinstance(data_chunks, list) or not data_chunks:
81
+ raise InvalidDataChunksError("must be a non-empty list of DocumentChunk.")
82
+ if not all(hasattr(c, "text") for c in data_chunks):
83
+ raise InvalidDataChunksError("each chunk must have a 'text' attribute")
84
+ if not isinstance(graph_model, type) or not issubclass(graph_model, BaseModel):
85
+ raise InvalidGraphModelError(graph_model)
86
+
58
87
  chunk_graphs = await asyncio.gather(
59
- *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
88
+ *[
89
+ LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
90
+ for chunk in data_chunks
91
+ ]
60
92
  )
61
93
 
62
94
  # Note: Filter edges with missing source or target nodes
@@ -0,0 +1,8 @@
1
+ """
2
+ Custom exceptions for the Cognee API.
3
+
4
+ This module defines a set of exceptions for handling various application errors,
5
+ such as System, Validation, Configuration or TransientErrors
6
+ """
7
+
8
+ from .exceptions import S3FileSystemNotFoundError
@@ -0,0 +1,12 @@
1
+ from cognee.exceptions import CogneeSystemError
2
+ from fastapi import status
3
+
4
+
5
+ class S3FileSystemNotFoundError(CogneeSystemError):
6
+ def __init__(
7
+ self,
8
+ name: str = "S3FileSystemNotFoundError",
9
+ status_code: int = status.HTTP_500_INTERNAL_SERVER_ERROR,
10
+ ):
11
+ message = "Could not find S3FileSystem."
12
+ super().__init__(message, name, status_code)
@@ -1,6 +1,9 @@
1
1
  import os
2
2
  from urllib.parse import urlparse
3
3
  from typing import List, Union, BinaryIO
4
+
5
+ from cognee.tasks.ingestion.exceptions import S3FileSystemNotFoundError
6
+ from cognee.exceptions import CogneeSystemError
4
7
  from cognee.infrastructure.files.storage.s3_config import get_s3_config
5
8
 
6
9
 
@@ -54,6 +57,8 @@ async def resolve_data_directories(
54
57
  else:
55
58
  s3_files.append(key)
56
59
  resolved_data.extend(s3_files)
60
+ else:
61
+ raise S3FileSystemNotFoundError()
57
62
 
58
63
  elif os.path.isdir(item): # If it's a directory
59
64
  if include_subdirectories:
@@ -180,6 +180,7 @@ async def get_local_script_dependencies(
180
180
  name=file_path_relative_to_repo,
181
181
  source_code=source_code,
182
182
  file_path=script_path,
183
+ language="python",
183
184
  )
184
185
  return code_file_node
185
186
 
@@ -188,6 +189,7 @@ async def get_local_script_dependencies(
188
189
  name=file_path_relative_to_repo,
189
190
  source_code=None,
190
191
  file_path=script_path,
192
+ language="python",
191
193
  )
192
194
 
193
195
  async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):