cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/__main__.py +4 -0
  3. cognee/api/client.py +28 -3
  4. cognee/api/health.py +10 -13
  5. cognee/api/v1/add/add.py +20 -6
  6. cognee/api/v1/add/routers/get_add_router.py +12 -37
  7. cognee/api/v1/cloud/routers/__init__.py +1 -0
  8. cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
  9. cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
  10. cognee/api/v1/cognify/cognify.py +67 -105
  11. cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
  12. cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
  13. cognee/api/v1/memify/routers/__init__.py +1 -0
  14. cognee/api/v1/memify/routers/get_memify_router.py +100 -0
  15. cognee/api/v1/notebooks/routers/__init__.py +1 -0
  16. cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
  17. cognee/api/v1/responses/default_tools.py +4 -0
  18. cognee/api/v1/responses/dispatch_function.py +6 -1
  19. cognee/api/v1/responses/models.py +1 -1
  20. cognee/api/v1/search/routers/get_search_router.py +20 -1
  21. cognee/api/v1/search/search.py +17 -4
  22. cognee/api/v1/sync/__init__.py +17 -0
  23. cognee/api/v1/sync/routers/__init__.py +3 -0
  24. cognee/api/v1/sync/routers/get_sync_router.py +241 -0
  25. cognee/api/v1/sync/sync.py +877 -0
  26. cognee/api/v1/ui/__init__.py +1 -0
  27. cognee/api/v1/ui/ui.py +529 -0
  28. cognee/api/v1/users/routers/get_auth_router.py +13 -1
  29. cognee/base_config.py +10 -1
  30. cognee/cli/__init__.py +10 -0
  31. cognee/cli/_cognee.py +273 -0
  32. cognee/cli/commands/__init__.py +1 -0
  33. cognee/cli/commands/add_command.py +80 -0
  34. cognee/cli/commands/cognify_command.py +128 -0
  35. cognee/cli/commands/config_command.py +225 -0
  36. cognee/cli/commands/delete_command.py +80 -0
  37. cognee/cli/commands/search_command.py +149 -0
  38. cognee/cli/config.py +33 -0
  39. cognee/cli/debug.py +21 -0
  40. cognee/cli/echo.py +45 -0
  41. cognee/cli/exceptions.py +23 -0
  42. cognee/cli/minimal_cli.py +97 -0
  43. cognee/cli/reference.py +26 -0
  44. cognee/cli/suppress_logging.py +12 -0
  45. cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
  46. cognee/eval_framework/eval_config.py +1 -1
  47. cognee/infrastructure/databases/graph/config.py +10 -4
  48. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
  49. cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
  50. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
  51. cognee/infrastructure/databases/relational/__init__.py +2 -0
  52. cognee/infrastructure/databases/relational/get_async_session.py +15 -0
  53. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
  54. cognee/infrastructure/databases/relational/with_async_session.py +25 -0
  55. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
  56. cognee/infrastructure/databases/vector/config.py +13 -6
  57. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
  58. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
  59. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
  60. cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
  61. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
  62. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
  63. cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
  64. cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
  65. cognee/infrastructure/files/storage/StorageManager.py +7 -1
  66. cognee/infrastructure/files/storage/storage.py +16 -0
  67. cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
  68. cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
  69. cognee/infrastructure/llm/LLMGateway.py +32 -5
  70. cognee/infrastructure/llm/config.py +6 -4
  71. cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
  72. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
  73. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
  83. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
  84. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
  86. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
  87. cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
  88. cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
  89. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
  90. cognee/infrastructure/llm/utils.py +7 -7
  91. cognee/infrastructure/utils/run_sync.py +8 -1
  92. cognee/modules/chunking/models/DocumentChunk.py +4 -3
  93. cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
  94. cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
  95. cognee/modules/cloud/exceptions/__init__.py +2 -0
  96. cognee/modules/cloud/operations/__init__.py +1 -0
  97. cognee/modules/cloud/operations/check_api_key.py +25 -0
  98. cognee/modules/data/deletion/prune_system.py +1 -1
  99. cognee/modules/data/methods/__init__.py +2 -0
  100. cognee/modules/data/methods/check_dataset_name.py +1 -1
  101. cognee/modules/data/methods/create_authorized_dataset.py +19 -0
  102. cognee/modules/data/methods/get_authorized_dataset.py +11 -5
  103. cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
  104. cognee/modules/data/methods/get_dataset_data.py +1 -1
  105. cognee/modules/data/methods/load_or_create_datasets.py +2 -20
  106. cognee/modules/engine/models/Event.py +16 -0
  107. cognee/modules/engine/models/Interval.py +8 -0
  108. cognee/modules/engine/models/Timestamp.py +13 -0
  109. cognee/modules/engine/models/__init__.py +3 -0
  110. cognee/modules/engine/utils/__init__.py +2 -0
  111. cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
  112. cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
  113. cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
  114. cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
  115. cognee/modules/graph/utils/__init__.py +1 -0
  116. cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
  117. cognee/modules/memify/__init__.py +1 -0
  118. cognee/modules/memify/memify.py +118 -0
  119. cognee/modules/notebooks/methods/__init__.py +5 -0
  120. cognee/modules/notebooks/methods/create_notebook.py +26 -0
  121. cognee/modules/notebooks/methods/delete_notebook.py +13 -0
  122. cognee/modules/notebooks/methods/get_notebook.py +21 -0
  123. cognee/modules/notebooks/methods/get_notebooks.py +18 -0
  124. cognee/modules/notebooks/methods/update_notebook.py +17 -0
  125. cognee/modules/notebooks/models/Notebook.py +53 -0
  126. cognee/modules/notebooks/models/__init__.py +1 -0
  127. cognee/modules/notebooks/operations/__init__.py +1 -0
  128. cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
  129. cognee/modules/pipelines/__init__.py +1 -1
  130. cognee/modules/pipelines/exceptions/tasks.py +18 -0
  131. cognee/modules/pipelines/layers/__init__.py +1 -0
  132. cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
  133. cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
  134. cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
  135. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
  136. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
  137. cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
  138. cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
  139. cognee/modules/pipelines/methods/__init__.py +2 -0
  140. cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
  141. cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
  142. cognee/modules/pipelines/operations/__init__.py +0 -1
  143. cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
  144. cognee/modules/pipelines/operations/pipeline.py +24 -138
  145. cognee/modules/pipelines/operations/run_tasks.py +17 -41
  146. cognee/modules/retrieval/base_feedback.py +11 -0
  147. cognee/modules/retrieval/base_graph_retriever.py +18 -0
  148. cognee/modules/retrieval/base_retriever.py +1 -1
  149. cognee/modules/retrieval/code_retriever.py +8 -0
  150. cognee/modules/retrieval/coding_rules_retriever.py +31 -0
  151. cognee/modules/retrieval/completion_retriever.py +9 -3
  152. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
  153. cognee/modules/retrieval/cypher_search_retriever.py +1 -9
  154. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
  155. cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
  156. cognee/modules/retrieval/graph_completion_retriever.py +107 -56
  157. cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
  158. cognee/modules/retrieval/insights_retriever.py +14 -3
  159. cognee/modules/retrieval/natural_language_retriever.py +0 -4
  160. cognee/modules/retrieval/summaries_retriever.py +1 -1
  161. cognee/modules/retrieval/temporal_retriever.py +152 -0
  162. cognee/modules/retrieval/user_qa_feedback.py +83 -0
  163. cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
  164. cognee/modules/retrieval/utils/completion.py +10 -3
  165. cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
  166. cognee/modules/retrieval/utils/models.py +40 -0
  167. cognee/modules/search/methods/get_search_type_tools.py +168 -0
  168. cognee/modules/search/methods/no_access_control_search.py +47 -0
  169. cognee/modules/search/methods/search.py +239 -118
  170. cognee/modules/search/types/SearchResult.py +21 -0
  171. cognee/modules/search/types/SearchType.py +3 -0
  172. cognee/modules/search/types/__init__.py +1 -0
  173. cognee/modules/search/utils/__init__.py +2 -0
  174. cognee/modules/search/utils/prepare_search_result.py +41 -0
  175. cognee/modules/search/utils/transform_context_to_graph.py +38 -0
  176. cognee/modules/settings/get_settings.py +2 -2
  177. cognee/modules/sync/__init__.py +1 -0
  178. cognee/modules/sync/methods/__init__.py +23 -0
  179. cognee/modules/sync/methods/create_sync_operation.py +53 -0
  180. cognee/modules/sync/methods/get_sync_operation.py +107 -0
  181. cognee/modules/sync/methods/update_sync_operation.py +248 -0
  182. cognee/modules/sync/models/SyncOperation.py +142 -0
  183. cognee/modules/sync/models/__init__.py +3 -0
  184. cognee/modules/users/__init__.py +0 -1
  185. cognee/modules/users/methods/__init__.py +4 -1
  186. cognee/modules/users/methods/create_user.py +26 -1
  187. cognee/modules/users/methods/get_authenticated_user.py +36 -42
  188. cognee/modules/users/methods/get_default_user.py +3 -1
  189. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
  190. cognee/root_dir.py +19 -0
  191. cognee/shared/CodeGraphEntities.py +1 -0
  192. cognee/shared/logging_utils.py +143 -32
  193. cognee/shared/utils.py +0 -1
  194. cognee/tasks/codingagents/coding_rule_associations.py +127 -0
  195. cognee/tasks/graph/extract_graph_from_data.py +6 -2
  196. cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
  197. cognee/tasks/memify/__init__.py +2 -0
  198. cognee/tasks/memify/extract_subgraph.py +7 -0
  199. cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
  200. cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
  201. cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
  202. cognee/tasks/storage/add_data_points.py +33 -3
  203. cognee/tasks/temporal_graph/__init__.py +1 -0
  204. cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
  205. cognee/tasks/temporal_graph/enrich_events.py +34 -0
  206. cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
  207. cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
  208. cognee/tasks/temporal_graph/models.py +49 -0
  209. cognee/tests/integration/cli/__init__.py +3 -0
  210. cognee/tests/integration/cli/test_cli_integration.py +331 -0
  211. cognee/tests/integration/documents/PdfDocument_test.py +2 -2
  212. cognee/tests/integration/documents/TextDocument_test.py +2 -4
  213. cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
  214. cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
  215. cognee/tests/test_delete_soft.py +85 -0
  216. cognee/tests/test_kuzu.py +2 -2
  217. cognee/tests/test_neo4j.py +2 -2
  218. cognee/tests/test_permissions.py +3 -3
  219. cognee/tests/test_relational_db_migration.py +7 -5
  220. cognee/tests/test_search_db.py +136 -23
  221. cognee/tests/test_temporal_graph.py +167 -0
  222. cognee/tests/unit/api/__init__.py +1 -0
  223. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
  224. cognee/tests/unit/cli/__init__.py +3 -0
  225. cognee/tests/unit/cli/test_cli_commands.py +483 -0
  226. cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
  227. cognee/tests/unit/cli/test_cli_main.py +173 -0
  228. cognee/tests/unit/cli/test_cli_runner.py +62 -0
  229. cognee/tests/unit/cli/test_cli_utils.py +127 -0
  230. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
  231. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
  232. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
  233. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
  234. cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
  235. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
  236. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
  237. cognee/tests/unit/modules/users/__init__.py +1 -0
  238. cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
  239. cognee/tests/unit/processing/utils/utils_test.py +20 -1
  240. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
  241. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
  242. cognee-0.3.0.dist-info/entry_points.txt +2 -0
  243. cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
  244. cognee/infrastructure/pipeline/models/Operation.py +0 -60
  245. cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
  246. cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
  247. cognee/tests/unit/modules/search/search_methods_test.py +0 -223
  248. /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
  249. /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
  250. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
  251. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
  252. {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/root_dir.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+ from typing import Optional
2
3
 
3
4
  ROOT_DIR = Path(__file__).resolve().parent
4
5
 
@@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent
6
7
  def get_absolute_path(path_from_root: str) -> str:
7
8
  absolute_path = ROOT_DIR / path_from_root
8
9
  return str(absolute_path.resolve())
10
+
11
+
12
+ def ensure_absolute_path(path: str) -> str:
13
+ """Ensures a path is absolute.
14
+
15
+ Args:
16
+ path: The path to validate.
17
+
18
+ Returns:
19
+ Absolute path as string
20
+ """
21
+ if path is None:
22
+ raise ValueError("Path cannot be None")
23
+ path_obj = Path(path).expanduser()
24
+ if path_obj.is_absolute():
25
+ return str(path_obj.resolve())
26
+
27
+ raise ValueError(f"Path must be absolute. Got relative path: {path}")
@@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
36
36
  class CodeFile(DataPoint):
37
37
  name: str
38
38
  file_path: str
39
+ language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
39
40
  source_code: Optional[str] = None
40
41
  part_of: Optional[Repository] = None
41
42
  depends_on: Optional[List["ImportStatement"]] = []
@@ -15,14 +15,43 @@ from typing import Protocol
15
15
  # Configure external library logging
16
16
  def configure_external_library_logging():
17
17
  """Configure logging for external libraries to reduce verbosity"""
18
+ # Set environment variables to suppress LiteLLM logging
19
+ os.environ.setdefault("LITELLM_LOG", "ERROR")
20
+ os.environ.setdefault("LITELLM_SET_VERBOSE", "False")
21
+
18
22
  # Configure LiteLLM logging to reduce verbosity
19
23
  try:
20
24
  import litellm
21
25
 
26
+ # Disable verbose logging
22
27
  litellm.set_verbose = False
23
28
 
24
- # Suppress LiteLLM ERROR logging using standard logging
25
- logging.getLogger("litellm").setLevel(logging.CRITICAL)
29
+ # Set additional LiteLLM configuration
30
+ if hasattr(litellm, "suppress_debug_info"):
31
+ litellm.suppress_debug_info = True
32
+ if hasattr(litellm, "turn_off_message"):
33
+ litellm.turn_off_message = True
34
+ if hasattr(litellm, "_turn_on_debug"):
35
+ litellm._turn_on_debug = False
36
+
37
+ # Comprehensive logger suppression
38
+ loggers_to_suppress = [
39
+ "litellm",
40
+ "litellm.litellm_core_utils.logging_worker",
41
+ "litellm.litellm_core_utils",
42
+ "litellm.proxy",
43
+ "litellm.router",
44
+ "openai._base_client",
45
+ "LiteLLM", # Capital case variant
46
+ "LiteLLM.core",
47
+ "LiteLLM.logging_worker",
48
+ "litellm.logging_worker",
49
+ ]
50
+
51
+ for logger_name in loggers_to_suppress:
52
+ logging.getLogger(logger_name).setLevel(logging.CRITICAL)
53
+ logging.getLogger(logger_name).disabled = True
54
+
26
55
  except ImportError:
27
56
  # LiteLLM not available, skip configuration
28
57
  pass
@@ -173,29 +202,17 @@ def log_database_configuration(logger):
173
202
  from cognee.infrastructure.databases.graph.config import get_graph_config
174
203
 
175
204
  try:
176
- # Log relational database configuration
177
- relational_config = get_relational_config()
178
- if relational_config.db_provider == "postgres":
179
- logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
180
- elif relational_config.db_provider == "sqlite":
181
- logger.info(f"SQLite path: {relational_config.db_path}")
182
-
183
- # Log vector database configuration
184
- vector_config = get_vectordb_config()
185
- if vector_config.vector_db_provider == "lancedb":
186
- logger.info(f"Vector database path: {vector_config.vector_db_url}")
187
- else:
188
- logger.info(f"Vector database URL: {vector_config.vector_db_url}")
189
-
190
- # Log graph database configuration
191
- graph_config = get_graph_config()
192
- if graph_config.graph_database_provider == "kuzu":
193
- logger.info(f"Graph database path: {graph_config.graph_file_path}")
194
- else:
195
- logger.info(f"Graph database URL: {graph_config.graph_database_url}")
205
+ # Get base database directory path
206
+ from cognee.base_config import get_base_config
207
+
208
+ base_config = get_base_config()
209
+ databases_path = os.path.join(base_config.system_root_directory, "databases")
210
+
211
+ # Log concise database info
212
+ logger.info(f"Database storage: {databases_path}")
196
213
 
197
214
  except Exception as e:
198
- logger.warning(f"Could not retrieve database configuration: {str(e)}")
215
+ logger.debug(f"Could not retrieve database configuration: {str(e)}")
199
216
 
200
217
 
201
218
  def cleanup_old_logs(logs_dir, max_files):
@@ -216,13 +233,22 @@ def cleanup_old_logs(logs_dir, max_files):
216
233
 
217
234
  # Remove old files that exceed the maximum
218
235
  if len(log_files) > max_files:
236
+ deleted_count = 0
219
237
  for old_file in log_files[max_files:]:
220
238
  try:
221
239
  old_file.unlink()
222
- logger.info(f"Deleted old log file: {old_file}")
240
+ deleted_count += 1
241
+ # Only log individual files in non-CLI mode
242
+ if os.getenv("COGNEE_CLI_MODE") != "true":
243
+ logger.info(f"Deleted old log file: {old_file}")
223
244
  except Exception as e:
245
+ # Always log errors
224
246
  logger.error(f"Failed to delete old log file {old_file}: {e}")
225
247
 
248
+ # In CLI mode, show compact summary
249
+ if os.getenv("COGNEE_CLI_MODE") == "true" and deleted_count > 0:
250
+ logger.info(f"Cleaned up {deleted_count} old log files")
251
+
226
252
  return True
227
253
  except Exception as e:
228
254
  logger.error(f"Error cleaning up log files: {e}")
@@ -241,11 +267,81 @@ def setup_logging(log_level=None, name=None):
241
267
  """
242
268
  global _is_structlog_configured
243
269
 
244
- log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO")]
270
+ # Regular detailed logging for non-CLI usage
271
+ log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO").upper()]
245
272
 
246
273
  # Configure external library logging early to suppress verbose output
247
274
  configure_external_library_logging()
248
275
 
276
+ # Add custom filter to suppress LiteLLM worker cancellation errors
277
+ class LiteLLMCancellationFilter(logging.Filter):
278
+ """Filter to suppress LiteLLM worker cancellation messages"""
279
+
280
+ def filter(self, record):
281
+ # Check if this is a LiteLLM-related logger
282
+ if hasattr(record, "name") and "litellm" in record.name.lower():
283
+ return False
284
+
285
+ # Check message content for cancellation errors
286
+ if hasattr(record, "msg") and record.msg:
287
+ msg_str = str(record.msg).lower()
288
+ if any(
289
+ keyword in msg_str
290
+ for keyword in [
291
+ "loggingworker cancelled",
292
+ "logging_worker.py",
293
+ "cancellederror",
294
+ "litellm:error",
295
+ ]
296
+ ):
297
+ return False
298
+
299
+ # Check formatted message
300
+ try:
301
+ if hasattr(record, "getMessage"):
302
+ formatted_msg = record.getMessage().lower()
303
+ if any(
304
+ keyword in formatted_msg
305
+ for keyword in [
306
+ "loggingworker cancelled",
307
+ "logging_worker.py",
308
+ "cancellederror",
309
+ "litellm:error",
310
+ ]
311
+ ):
312
+ return False
313
+ except Exception:
314
+ pass
315
+
316
+ return True
317
+
318
+ # Apply the filter to root logger and specific loggers
319
+ cancellation_filter = LiteLLMCancellationFilter()
320
+ logging.getLogger().addFilter(cancellation_filter)
321
+ logging.getLogger("litellm").addFilter(cancellation_filter)
322
+
323
+ # Add custom filter to suppress LiteLLM worker cancellation errors
324
+ class LiteLLMFilter(logging.Filter):
325
+ def filter(self, record):
326
+ # Suppress LiteLLM worker cancellation errors
327
+ if hasattr(record, "msg") and isinstance(record.msg, str):
328
+ msg_lower = record.msg.lower()
329
+ if any(
330
+ phrase in msg_lower
331
+ for phrase in [
332
+ "loggingworker cancelled",
333
+ "cancellederror",
334
+ "logging_worker.py",
335
+ "loggingerror",
336
+ ]
337
+ ):
338
+ return False
339
+ return True
340
+
341
+ # Apply filter to root logger
342
+ litellm_filter = LiteLLMFilter()
343
+ logging.getLogger().addFilter(litellm_filter)
344
+
249
345
  def exception_handler(logger, method_name, event_dict):
250
346
  """Custom processor to handle uncaught exceptions."""
251
347
  # Check if there's an exc_info that needs to be processed
@@ -298,11 +394,6 @@ def setup_logging(log_level=None, name=None):
298
394
  # Hand back to the original hook → prints traceback and exits
299
395
  sys.__excepthook__(exc_type, exc_value, traceback)
300
396
 
301
- logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
302
- logger.info(
303
- "Need help? Reach out to us on our Discord server: https://discord.gg/NQPKmU5CCg"
304
- )
305
-
306
397
  # Install exception handlers
307
398
  sys.excepthook = handle_exception
308
399
 
@@ -380,18 +471,38 @@ def setup_logging(log_level=None, name=None):
380
471
  # Mark logging as configured
381
472
  _is_structlog_configured = True
382
473
 
474
+ from cognee.infrastructure.databases.relational.config import get_relational_config
475
+ from cognee.infrastructure.databases.vector.config import get_vectordb_config
476
+ from cognee.infrastructure.databases.graph.config import get_graph_config
477
+
478
+ graph_config = get_graph_config()
479
+ vector_config = get_vectordb_config()
480
+ relational_config = get_relational_config()
481
+
482
+ try:
483
+ # Get base database directory path
484
+ from cognee.base_config import get_base_config
485
+
486
+ base_config = get_base_config()
487
+ databases_path = os.path.join(base_config.system_root_directory, "databases")
488
+ except Exception as e:
489
+ raise ValueError from e
490
+
383
491
  # Get a configured logger and log system information
384
492
  logger = structlog.get_logger(name if name else __name__)
493
+ # Detailed initialization for regular usage
385
494
  logger.info(
386
495
  "Logging initialized",
387
496
  python_version=PYTHON_VERSION,
388
497
  structlog_version=STRUCTLOG_VERSION,
389
498
  cognee_version=COGNEE_VERSION,
390
499
  os_info=OS_INFO,
500
+ database_path=databases_path,
501
+ graph_database_name=graph_config.graph_database_name,
502
+ vector_config=vector_config.vector_db_provider,
503
+ relational_config=relational_config.db_name,
391
504
  )
392
505
 
393
- logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
394
-
395
506
  # Log database configuration
396
507
  log_database_configuration(logger)
397
508
 
cognee/shared/utils.py CHANGED
@@ -3,7 +3,6 @@
3
3
  import os
4
4
  import requests
5
5
  from datetime import datetime, timezone
6
- import networkx as nx
7
6
  import matplotlib.pyplot as plt
8
7
  import http.server
9
8
  import socketserver
@@ -0,0 +1,127 @@
1
+ from uuid import NAMESPACE_OID, uuid5
2
+
3
+ from cognee.infrastructure.databases.graph import get_graph_engine
4
+ from cognee.infrastructure.databases.vector import get_vector_engine
5
+
6
+ from cognee.low_level import DataPoint
7
+ from cognee.infrastructure.llm import LLMGateway
8
+ from cognee.shared.logging_utils import get_logger
9
+ from cognee.modules.engine.models import NodeSet
10
+ from cognee.tasks.storage import add_data_points, index_graph_edges
11
+ from typing import Optional, List, Any
12
+ from pydantic import Field
13
+
14
+ logger = get_logger("coding_rule_association")
15
+
16
+
17
+ class Rule(DataPoint):
18
+ """A single developer rule extracted from text."""
19
+
20
+ text: str = Field(..., description="The coding rule associated with the conversation")
21
+ belongs_to_set: Optional[NodeSet] = None
22
+ metadata: dict = {"index_fields": ["rule"]}
23
+
24
+
25
+ class RuleSet(DataPoint):
26
+ """Collection of parsed rules."""
27
+
28
+ rules: List[Rule] = Field(
29
+ ...,
30
+ description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.",
31
+ )
32
+
33
+
34
+ async def get_existing_rules(rules_nodeset_name: str) -> List[str]:
35
+ graph_engine = await get_graph_engine()
36
+ nodes_data, _ = await graph_engine.get_nodeset_subgraph(
37
+ node_type=NodeSet, node_name=[rules_nodeset_name]
38
+ )
39
+
40
+ existing_rules = [
41
+ item[1]["text"]
42
+ for item in nodes_data
43
+ if isinstance(item, tuple)
44
+ and len(item) == 2
45
+ and isinstance(item[1], dict)
46
+ and "text" in item[1]
47
+ ]
48
+
49
+ return existing_rules
50
+
51
+
52
+ async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]:
53
+ vector_engine = get_vector_engine()
54
+
55
+ origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1)
56
+
57
+ try:
58
+ origin_id = origin_chunk[0].id
59
+ except (AttributeError, KeyError, TypeError, IndexError):
60
+ origin_id = None
61
+
62
+ relationships = []
63
+
64
+ if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0:
65
+ for rule in rules:
66
+ try:
67
+ rule_id = getattr(rule, "id", None)
68
+ if rule_id is not None:
69
+ rel_name = "rule_associated_from"
70
+ relationships.append(
71
+ (
72
+ rule_id,
73
+ origin_id,
74
+ rel_name,
75
+ {
76
+ "relationship_name": rel_name,
77
+ "source_node_id": rule_id,
78
+ "target_node_id": origin_id,
79
+ "ontology_valid": False,
80
+ },
81
+ )
82
+ )
83
+ except Exception as e:
84
+ logger.info(f"Warning: Skipping invalid rule due to error: {e}")
85
+ else:
86
+ logger.info("No valid origin_id or rules provided.")
87
+
88
+ return relationships
89
+
90
+
91
+ async def add_rule_associations(
92
+ data: str,
93
+ rules_nodeset_name: str,
94
+ user_prompt_location: str = "coding_rule_association_agent_user.txt",
95
+ system_prompt_location: str = "coding_rule_association_agent_system.txt",
96
+ ):
97
+ if isinstance(data, list):
98
+ # If data is a list of strings join all strings in list
99
+ data = " ".join(data)
100
+
101
+ graph_engine = await get_graph_engine()
102
+ existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name)
103
+ existing_rules = "\n".join(f"- {rule}" for rule in existing_rules)
104
+
105
+ user_context = {"chat": data, "rules": existing_rules}
106
+
107
+ user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
108
+ system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
109
+
110
+ rule_list = await LLMGateway.acreate_structured_output(
111
+ text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
112
+ )
113
+
114
+ rules_nodeset = NodeSet(
115
+ id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name
116
+ )
117
+ for rule in rule_list.rules:
118
+ rule.belongs_to_set = rules_nodeset
119
+
120
+ edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules)
121
+
122
+ await add_data_points(data_points=rule_list.rules)
123
+
124
+ if len(edges_to_save) > 0:
125
+ await graph_engine.add_edges(edges_to_save)
126
+
127
+ await index_graph_edges()
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from typing import Type, List
2
+ from typing import Type, List, Optional
3
3
  from pydantic import BaseModel
4
4
 
5
5
  from cognee.infrastructure.databases.graph import get_graph_engine
@@ -71,6 +71,7 @@ async def extract_graph_from_data(
71
71
  data_chunks: List[DocumentChunk],
72
72
  graph_model: Type[BaseModel],
73
73
  ontology_adapter: OntologyResolver = None,
74
+ custom_prompt: Optional[str] = None,
74
75
  ) -> List[DocumentChunk]:
75
76
  """
76
77
  Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
@@ -84,7 +85,10 @@ async def extract_graph_from_data(
84
85
  raise InvalidGraphModelError(graph_model)
85
86
 
86
87
  chunk_graphs = await asyncio.gather(
87
- *[LLMGateway.extract_content_graph(chunk.text, graph_model) for chunk in data_chunks]
88
+ *[
89
+ LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
90
+ for chunk in data_chunks
91
+ ]
88
92
  )
89
93
 
90
94
  # Note: Filter edges with missing source or target nodes
@@ -1,11 +1,15 @@
1
1
  import os
2
+ from pathlib import Path
2
3
  from urllib.parse import urlparse
3
4
  from typing import Union, BinaryIO, Any
4
5
 
5
6
  from cognee.modules.ingestion.exceptions import IngestionError
6
7
  from cognee.modules.ingestion import save_data_to_file
8
+ from cognee.shared.logging_utils import get_logger
7
9
  from pydantic_settings import BaseSettings, SettingsConfigDict
8
10
 
11
+ logger = get_logger()
12
+
9
13
 
10
14
  class SaveDataSettings(BaseSettings):
11
15
  accept_local_file_path: bool = True
@@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
30
34
  if isinstance(data_item, str):
31
35
  parsed_url = urlparse(data_item)
32
36
 
37
+ try:
38
+ # In case data item is a string with a relative path transform data item to absolute path and check
39
+ # if the file exists
40
+ abs_path = (Path.cwd() / Path(data_item)).resolve()
41
+ abs_path.is_file()
42
+ except (OSError, ValueError):
43
+ # In case file path is too long it's most likely not a relative path
44
+ logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
45
+ abs_path = Path("")
46
+
33
47
  # data is s3 file path
34
48
  if parsed_url.scheme == "s3":
35
49
  return data_item
@@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
56
70
  return file_path
57
71
  else:
58
72
  raise IngestionError(message="Local files are not accepted.")
73
+ # Data is a relative file path
74
+ elif abs_path.is_file():
75
+ if settings.accept_local_file_path:
76
+ # Normalize path separators before creating file URL
77
+ normalized_path = os.path.normpath(abs_path)
78
+ # Use forward slashes in file URLs for consistency
79
+ url_path = normalized_path.replace(os.sep, "/")
80
+ file_path = "file://" + url_path
81
+ return file_path
59
82
 
60
83
  # data is text, save it to data storage and return the file path
61
84
  return await save_data_to_file(data_item)
@@ -0,0 +1,2 @@
1
+ from .extract_subgraph import extract_subgraph
2
+ from .extract_subgraph_chunks import extract_subgraph_chunks
@@ -0,0 +1,7 @@
1
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
2
+
3
+
4
+ async def extract_subgraph(subgraphs: list[CogneeGraph]):
5
+ for subgraph in subgraphs:
6
+ for edge in subgraph.edges:
7
+ yield edge
@@ -0,0 +1,11 @@
1
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
2
+
3
+
4
+ async def extract_subgraph_chunks(subgraphs: list[CogneeGraph]):
5
+ """
6
+ Get all Document Chunks from subgraphs and forward to next task in pipeline
7
+ """
8
+ for subgraph in subgraphs:
9
+ for node in subgraph.nodes.values():
10
+ if node.attributes["type"] == "DocumentChunk":
11
+ yield node.attributes["text"]
@@ -180,6 +180,7 @@ async def get_local_script_dependencies(
180
180
  name=file_path_relative_to_repo,
181
181
  source_code=source_code,
182
182
  file_path=script_path,
183
+ language="python",
183
184
  )
184
185
  return code_file_node
185
186
 
@@ -188,6 +189,7 @@ async def get_local_script_dependencies(
188
189
  name=file_path_relative_to_repo,
189
190
  source_code=None,
190
191
  file_path=script_path,
192
+ language="python",
191
193
  )
192
194
 
193
195
  async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):