cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/__main__.py +4 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +20 -6
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
- cognee/api/v1/cognify/cognify.py +67 -105
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +17 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/ui/__init__.py +1 -0
- cognee/api/v1/ui/ui.py +529 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +273 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +32 -5
- cognee/infrastructure/llm/config.py +6 -4
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +24 -138
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
- cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
- cognee/modules/retrieval/graph_completion_retriever.py +107 -56
- cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +239 -118
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +3 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +143 -32
- cognee/shared/utils.py +0 -1
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +136 -23
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
- cognee-0.3.0.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- cognee/tests/unit/modules/search/search_methods_test.py +0 -223
- /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
- /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
cognee/root_dir.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
ROOT_DIR = Path(__file__).resolve().parent
|
|
4
5
|
|
|
@@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent
|
|
|
6
7
|
def get_absolute_path(path_from_root: str) -> str:
|
|
7
8
|
absolute_path = ROOT_DIR / path_from_root
|
|
8
9
|
return str(absolute_path.resolve())
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def ensure_absolute_path(path: str) -> str:
|
|
13
|
+
"""Ensures a path is absolute.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
path: The path to validate.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Absolute path as string
|
|
20
|
+
"""
|
|
21
|
+
if path is None:
|
|
22
|
+
raise ValueError("Path cannot be None")
|
|
23
|
+
path_obj = Path(path).expanduser()
|
|
24
|
+
if path_obj.is_absolute():
|
|
25
|
+
return str(path_obj.resolve())
|
|
26
|
+
|
|
27
|
+
raise ValueError(f"Path must be absolute. Got relative path: {path}")
|
|
@@ -36,6 +36,7 @@ class ClassDefinition(DataPoint):
|
|
|
36
36
|
class CodeFile(DataPoint):
|
|
37
37
|
name: str
|
|
38
38
|
file_path: str
|
|
39
|
+
language: Optional[str] = None # e.g., 'python', 'javascript', 'java', etc.
|
|
39
40
|
source_code: Optional[str] = None
|
|
40
41
|
part_of: Optional[Repository] = None
|
|
41
42
|
depends_on: Optional[List["ImportStatement"]] = []
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -15,14 +15,43 @@ from typing import Protocol
|
|
|
15
15
|
# Configure external library logging
|
|
16
16
|
def configure_external_library_logging():
|
|
17
17
|
"""Configure logging for external libraries to reduce verbosity"""
|
|
18
|
+
# Set environment variables to suppress LiteLLM logging
|
|
19
|
+
os.environ.setdefault("LITELLM_LOG", "ERROR")
|
|
20
|
+
os.environ.setdefault("LITELLM_SET_VERBOSE", "False")
|
|
21
|
+
|
|
18
22
|
# Configure LiteLLM logging to reduce verbosity
|
|
19
23
|
try:
|
|
20
24
|
import litellm
|
|
21
25
|
|
|
26
|
+
# Disable verbose logging
|
|
22
27
|
litellm.set_verbose = False
|
|
23
28
|
|
|
24
|
-
#
|
|
25
|
-
|
|
29
|
+
# Set additional LiteLLM configuration
|
|
30
|
+
if hasattr(litellm, "suppress_debug_info"):
|
|
31
|
+
litellm.suppress_debug_info = True
|
|
32
|
+
if hasattr(litellm, "turn_off_message"):
|
|
33
|
+
litellm.turn_off_message = True
|
|
34
|
+
if hasattr(litellm, "_turn_on_debug"):
|
|
35
|
+
litellm._turn_on_debug = False
|
|
36
|
+
|
|
37
|
+
# Comprehensive logger suppression
|
|
38
|
+
loggers_to_suppress = [
|
|
39
|
+
"litellm",
|
|
40
|
+
"litellm.litellm_core_utils.logging_worker",
|
|
41
|
+
"litellm.litellm_core_utils",
|
|
42
|
+
"litellm.proxy",
|
|
43
|
+
"litellm.router",
|
|
44
|
+
"openai._base_client",
|
|
45
|
+
"LiteLLM", # Capital case variant
|
|
46
|
+
"LiteLLM.core",
|
|
47
|
+
"LiteLLM.logging_worker",
|
|
48
|
+
"litellm.logging_worker",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
for logger_name in loggers_to_suppress:
|
|
52
|
+
logging.getLogger(logger_name).setLevel(logging.CRITICAL)
|
|
53
|
+
logging.getLogger(logger_name).disabled = True
|
|
54
|
+
|
|
26
55
|
except ImportError:
|
|
27
56
|
# LiteLLM not available, skip configuration
|
|
28
57
|
pass
|
|
@@ -173,29 +202,17 @@ def log_database_configuration(logger):
|
|
|
173
202
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
174
203
|
|
|
175
204
|
try:
|
|
176
|
-
#
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
vector_config = get_vectordb_config()
|
|
185
|
-
if vector_config.vector_db_provider == "lancedb":
|
|
186
|
-
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
|
187
|
-
else:
|
|
188
|
-
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
|
|
189
|
-
|
|
190
|
-
# Log graph database configuration
|
|
191
|
-
graph_config = get_graph_config()
|
|
192
|
-
if graph_config.graph_database_provider == "kuzu":
|
|
193
|
-
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
|
194
|
-
else:
|
|
195
|
-
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
|
|
205
|
+
# Get base database directory path
|
|
206
|
+
from cognee.base_config import get_base_config
|
|
207
|
+
|
|
208
|
+
base_config = get_base_config()
|
|
209
|
+
databases_path = os.path.join(base_config.system_root_directory, "databases")
|
|
210
|
+
|
|
211
|
+
# Log concise database info
|
|
212
|
+
logger.info(f"Database storage: {databases_path}")
|
|
196
213
|
|
|
197
214
|
except Exception as e:
|
|
198
|
-
logger.
|
|
215
|
+
logger.debug(f"Could not retrieve database configuration: {str(e)}")
|
|
199
216
|
|
|
200
217
|
|
|
201
218
|
def cleanup_old_logs(logs_dir, max_files):
|
|
@@ -216,13 +233,22 @@ def cleanup_old_logs(logs_dir, max_files):
|
|
|
216
233
|
|
|
217
234
|
# Remove old files that exceed the maximum
|
|
218
235
|
if len(log_files) > max_files:
|
|
236
|
+
deleted_count = 0
|
|
219
237
|
for old_file in log_files[max_files:]:
|
|
220
238
|
try:
|
|
221
239
|
old_file.unlink()
|
|
222
|
-
|
|
240
|
+
deleted_count += 1
|
|
241
|
+
# Only log individual files in non-CLI mode
|
|
242
|
+
if os.getenv("COGNEE_CLI_MODE") != "true":
|
|
243
|
+
logger.info(f"Deleted old log file: {old_file}")
|
|
223
244
|
except Exception as e:
|
|
245
|
+
# Always log errors
|
|
224
246
|
logger.error(f"Failed to delete old log file {old_file}: {e}")
|
|
225
247
|
|
|
248
|
+
# In CLI mode, show compact summary
|
|
249
|
+
if os.getenv("COGNEE_CLI_MODE") == "true" and deleted_count > 0:
|
|
250
|
+
logger.info(f"Cleaned up {deleted_count} old log files")
|
|
251
|
+
|
|
226
252
|
return True
|
|
227
253
|
except Exception as e:
|
|
228
254
|
logger.error(f"Error cleaning up log files: {e}")
|
|
@@ -241,11 +267,81 @@ def setup_logging(log_level=None, name=None):
|
|
|
241
267
|
"""
|
|
242
268
|
global _is_structlog_configured
|
|
243
269
|
|
|
244
|
-
|
|
270
|
+
# Regular detailed logging for non-CLI usage
|
|
271
|
+
log_level = log_level if log_level else log_levels[os.getenv("LOG_LEVEL", "INFO").upper()]
|
|
245
272
|
|
|
246
273
|
# Configure external library logging early to suppress verbose output
|
|
247
274
|
configure_external_library_logging()
|
|
248
275
|
|
|
276
|
+
# Add custom filter to suppress LiteLLM worker cancellation errors
|
|
277
|
+
class LiteLLMCancellationFilter(logging.Filter):
|
|
278
|
+
"""Filter to suppress LiteLLM worker cancellation messages"""
|
|
279
|
+
|
|
280
|
+
def filter(self, record):
|
|
281
|
+
# Check if this is a LiteLLM-related logger
|
|
282
|
+
if hasattr(record, "name") and "litellm" in record.name.lower():
|
|
283
|
+
return False
|
|
284
|
+
|
|
285
|
+
# Check message content for cancellation errors
|
|
286
|
+
if hasattr(record, "msg") and record.msg:
|
|
287
|
+
msg_str = str(record.msg).lower()
|
|
288
|
+
if any(
|
|
289
|
+
keyword in msg_str
|
|
290
|
+
for keyword in [
|
|
291
|
+
"loggingworker cancelled",
|
|
292
|
+
"logging_worker.py",
|
|
293
|
+
"cancellederror",
|
|
294
|
+
"litellm:error",
|
|
295
|
+
]
|
|
296
|
+
):
|
|
297
|
+
return False
|
|
298
|
+
|
|
299
|
+
# Check formatted message
|
|
300
|
+
try:
|
|
301
|
+
if hasattr(record, "getMessage"):
|
|
302
|
+
formatted_msg = record.getMessage().lower()
|
|
303
|
+
if any(
|
|
304
|
+
keyword in formatted_msg
|
|
305
|
+
for keyword in [
|
|
306
|
+
"loggingworker cancelled",
|
|
307
|
+
"logging_worker.py",
|
|
308
|
+
"cancellederror",
|
|
309
|
+
"litellm:error",
|
|
310
|
+
]
|
|
311
|
+
):
|
|
312
|
+
return False
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
return True
|
|
317
|
+
|
|
318
|
+
# Apply the filter to root logger and specific loggers
|
|
319
|
+
cancellation_filter = LiteLLMCancellationFilter()
|
|
320
|
+
logging.getLogger().addFilter(cancellation_filter)
|
|
321
|
+
logging.getLogger("litellm").addFilter(cancellation_filter)
|
|
322
|
+
|
|
323
|
+
# Add custom filter to suppress LiteLLM worker cancellation errors
|
|
324
|
+
class LiteLLMFilter(logging.Filter):
|
|
325
|
+
def filter(self, record):
|
|
326
|
+
# Suppress LiteLLM worker cancellation errors
|
|
327
|
+
if hasattr(record, "msg") and isinstance(record.msg, str):
|
|
328
|
+
msg_lower = record.msg.lower()
|
|
329
|
+
if any(
|
|
330
|
+
phrase in msg_lower
|
|
331
|
+
for phrase in [
|
|
332
|
+
"loggingworker cancelled",
|
|
333
|
+
"cancellederror",
|
|
334
|
+
"logging_worker.py",
|
|
335
|
+
"loggingerror",
|
|
336
|
+
]
|
|
337
|
+
):
|
|
338
|
+
return False
|
|
339
|
+
return True
|
|
340
|
+
|
|
341
|
+
# Apply filter to root logger
|
|
342
|
+
litellm_filter = LiteLLMFilter()
|
|
343
|
+
logging.getLogger().addFilter(litellm_filter)
|
|
344
|
+
|
|
249
345
|
def exception_handler(logger, method_name, event_dict):
|
|
250
346
|
"""Custom processor to handle uncaught exceptions."""
|
|
251
347
|
# Check if there's an exc_info that needs to be processed
|
|
@@ -298,11 +394,6 @@ def setup_logging(log_level=None, name=None):
|
|
|
298
394
|
# Hand back to the original hook → prints traceback and exits
|
|
299
395
|
sys.__excepthook__(exc_type, exc_value, traceback)
|
|
300
396
|
|
|
301
|
-
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
|
302
|
-
logger.info(
|
|
303
|
-
"Need help? Reach out to us on our Discord server: https://discord.gg/NQPKmU5CCg"
|
|
304
|
-
)
|
|
305
|
-
|
|
306
397
|
# Install exception handlers
|
|
307
398
|
sys.excepthook = handle_exception
|
|
308
399
|
|
|
@@ -380,18 +471,38 @@ def setup_logging(log_level=None, name=None):
|
|
|
380
471
|
# Mark logging as configured
|
|
381
472
|
_is_structlog_configured = True
|
|
382
473
|
|
|
474
|
+
from cognee.infrastructure.databases.relational.config import get_relational_config
|
|
475
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
476
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
477
|
+
|
|
478
|
+
graph_config = get_graph_config()
|
|
479
|
+
vector_config = get_vectordb_config()
|
|
480
|
+
relational_config = get_relational_config()
|
|
481
|
+
|
|
482
|
+
try:
|
|
483
|
+
# Get base database directory path
|
|
484
|
+
from cognee.base_config import get_base_config
|
|
485
|
+
|
|
486
|
+
base_config = get_base_config()
|
|
487
|
+
databases_path = os.path.join(base_config.system_root_directory, "databases")
|
|
488
|
+
except Exception as e:
|
|
489
|
+
raise ValueError from e
|
|
490
|
+
|
|
383
491
|
# Get a configured logger and log system information
|
|
384
492
|
logger = structlog.get_logger(name if name else __name__)
|
|
493
|
+
# Detailed initialization for regular usage
|
|
385
494
|
logger.info(
|
|
386
495
|
"Logging initialized",
|
|
387
496
|
python_version=PYTHON_VERSION,
|
|
388
497
|
structlog_version=STRUCTLOG_VERSION,
|
|
389
498
|
cognee_version=COGNEE_VERSION,
|
|
390
499
|
os_info=OS_INFO,
|
|
500
|
+
database_path=databases_path,
|
|
501
|
+
graph_database_name=graph_config.graph_database_name,
|
|
502
|
+
vector_config=vector_config.vector_db_provider,
|
|
503
|
+
relational_config=relational_config.db_name,
|
|
391
504
|
)
|
|
392
505
|
|
|
393
|
-
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
|
394
|
-
|
|
395
506
|
# Log database configuration
|
|
396
507
|
log_database_configuration(logger)
|
|
397
508
|
|
cognee/shared/utils.py
CHANGED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
2
|
+
|
|
3
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
4
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
5
|
+
|
|
6
|
+
from cognee.low_level import DataPoint
|
|
7
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
9
|
+
from cognee.modules.engine.models import NodeSet
|
|
10
|
+
from cognee.tasks.storage import add_data_points, index_graph_edges
|
|
11
|
+
from typing import Optional, List, Any
|
|
12
|
+
from pydantic import Field
|
|
13
|
+
|
|
14
|
+
logger = get_logger("coding_rule_association")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Rule(DataPoint):
|
|
18
|
+
"""A single developer rule extracted from text."""
|
|
19
|
+
|
|
20
|
+
text: str = Field(..., description="The coding rule associated with the conversation")
|
|
21
|
+
belongs_to_set: Optional[NodeSet] = None
|
|
22
|
+
metadata: dict = {"index_fields": ["rule"]}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RuleSet(DataPoint):
|
|
26
|
+
"""Collection of parsed rules."""
|
|
27
|
+
|
|
28
|
+
rules: List[Rule] = Field(
|
|
29
|
+
...,
|
|
30
|
+
description="List of developer rules extracted from the input text. Each rule represents a coding best practice or guideline.",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def get_existing_rules(rules_nodeset_name: str) -> List[str]:
|
|
35
|
+
graph_engine = await get_graph_engine()
|
|
36
|
+
nodes_data, _ = await graph_engine.get_nodeset_subgraph(
|
|
37
|
+
node_type=NodeSet, node_name=[rules_nodeset_name]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
existing_rules = [
|
|
41
|
+
item[1]["text"]
|
|
42
|
+
for item in nodes_data
|
|
43
|
+
if isinstance(item, tuple)
|
|
44
|
+
and len(item) == 2
|
|
45
|
+
and isinstance(item[1], dict)
|
|
46
|
+
and "text" in item[1]
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
return existing_rules
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
async def get_origin_edges(data: str, rules: List[Rule]) -> list[Any]:
|
|
53
|
+
vector_engine = get_vector_engine()
|
|
54
|
+
|
|
55
|
+
origin_chunk = await vector_engine.search("DocumentChunk_text", data, limit=1)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
origin_id = origin_chunk[0].id
|
|
59
|
+
except (AttributeError, KeyError, TypeError, IndexError):
|
|
60
|
+
origin_id = None
|
|
61
|
+
|
|
62
|
+
relationships = []
|
|
63
|
+
|
|
64
|
+
if origin_id and isinstance(rules, (list, tuple)) and len(rules) > 0:
|
|
65
|
+
for rule in rules:
|
|
66
|
+
try:
|
|
67
|
+
rule_id = getattr(rule, "id", None)
|
|
68
|
+
if rule_id is not None:
|
|
69
|
+
rel_name = "rule_associated_from"
|
|
70
|
+
relationships.append(
|
|
71
|
+
(
|
|
72
|
+
rule_id,
|
|
73
|
+
origin_id,
|
|
74
|
+
rel_name,
|
|
75
|
+
{
|
|
76
|
+
"relationship_name": rel_name,
|
|
77
|
+
"source_node_id": rule_id,
|
|
78
|
+
"target_node_id": origin_id,
|
|
79
|
+
"ontology_valid": False,
|
|
80
|
+
},
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.info(f"Warning: Skipping invalid rule due to error: {e}")
|
|
85
|
+
else:
|
|
86
|
+
logger.info("No valid origin_id or rules provided.")
|
|
87
|
+
|
|
88
|
+
return relationships
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
async def add_rule_associations(
|
|
92
|
+
data: str,
|
|
93
|
+
rules_nodeset_name: str,
|
|
94
|
+
user_prompt_location: str = "coding_rule_association_agent_user.txt",
|
|
95
|
+
system_prompt_location: str = "coding_rule_association_agent_system.txt",
|
|
96
|
+
):
|
|
97
|
+
if isinstance(data, list):
|
|
98
|
+
# If data is a list of strings join all strings in list
|
|
99
|
+
data = " ".join(data)
|
|
100
|
+
|
|
101
|
+
graph_engine = await get_graph_engine()
|
|
102
|
+
existing_rules = await get_existing_rules(rules_nodeset_name=rules_nodeset_name)
|
|
103
|
+
existing_rules = "\n".join(f"- {rule}" for rule in existing_rules)
|
|
104
|
+
|
|
105
|
+
user_context = {"chat": data, "rules": existing_rules}
|
|
106
|
+
|
|
107
|
+
user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
|
|
108
|
+
system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
|
|
109
|
+
|
|
110
|
+
rule_list = await LLMGateway.acreate_structured_output(
|
|
111
|
+
text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
rules_nodeset = NodeSet(
|
|
115
|
+
id=uuid5(NAMESPACE_OID, name=rules_nodeset_name), name=rules_nodeset_name
|
|
116
|
+
)
|
|
117
|
+
for rule in rule_list.rules:
|
|
118
|
+
rule.belongs_to_set = rules_nodeset
|
|
119
|
+
|
|
120
|
+
edges_to_save = await get_origin_edges(data=data, rules=rule_list.rules)
|
|
121
|
+
|
|
122
|
+
await add_data_points(data_points=rule_list.rules)
|
|
123
|
+
|
|
124
|
+
if len(edges_to_save) > 0:
|
|
125
|
+
await graph_engine.add_edges(edges_to_save)
|
|
126
|
+
|
|
127
|
+
await index_graph_edges()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Type, List
|
|
2
|
+
from typing import Type, List, Optional
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
5
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
@@ -71,6 +71,7 @@ async def extract_graph_from_data(
|
|
|
71
71
|
data_chunks: List[DocumentChunk],
|
|
72
72
|
graph_model: Type[BaseModel],
|
|
73
73
|
ontology_adapter: OntologyResolver = None,
|
|
74
|
+
custom_prompt: Optional[str] = None,
|
|
74
75
|
) -> List[DocumentChunk]:
|
|
75
76
|
"""
|
|
76
77
|
Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.
|
|
@@ -84,7 +85,10 @@ async def extract_graph_from_data(
|
|
|
84
85
|
raise InvalidGraphModelError(graph_model)
|
|
85
86
|
|
|
86
87
|
chunk_graphs = await asyncio.gather(
|
|
87
|
-
*[
|
|
88
|
+
*[
|
|
89
|
+
LLMGateway.extract_content_graph(chunk.text, graph_model, custom_prompt=custom_prompt)
|
|
90
|
+
for chunk in data_chunks
|
|
91
|
+
]
|
|
88
92
|
)
|
|
89
93
|
|
|
90
94
|
# Note: Filter edges with missing source or target nodes
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from urllib.parse import urlparse
|
|
3
4
|
from typing import Union, BinaryIO, Any
|
|
4
5
|
|
|
5
6
|
from cognee.modules.ingestion.exceptions import IngestionError
|
|
6
7
|
from cognee.modules.ingestion import save_data_to_file
|
|
8
|
+
from cognee.shared.logging_utils import get_logger
|
|
7
9
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
8
10
|
|
|
11
|
+
logger = get_logger()
|
|
12
|
+
|
|
9
13
|
|
|
10
14
|
class SaveDataSettings(BaseSettings):
|
|
11
15
|
accept_local_file_path: bool = True
|
|
@@ -30,6 +34,16 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|
|
30
34
|
if isinstance(data_item, str):
|
|
31
35
|
parsed_url = urlparse(data_item)
|
|
32
36
|
|
|
37
|
+
try:
|
|
38
|
+
# In case data item is a string with a relative path transform data item to absolute path and check
|
|
39
|
+
# if the file exists
|
|
40
|
+
abs_path = (Path.cwd() / Path(data_item)).resolve()
|
|
41
|
+
abs_path.is_file()
|
|
42
|
+
except (OSError, ValueError):
|
|
43
|
+
# In case file path is too long it's most likely not a relative path
|
|
44
|
+
logger.debug(f"Data item was too long to be a possible file path: {abs_path}")
|
|
45
|
+
abs_path = Path("")
|
|
46
|
+
|
|
33
47
|
# data is s3 file path
|
|
34
48
|
if parsed_url.scheme == "s3":
|
|
35
49
|
return data_item
|
|
@@ -56,6 +70,15 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
|
|
|
56
70
|
return file_path
|
|
57
71
|
else:
|
|
58
72
|
raise IngestionError(message="Local files are not accepted.")
|
|
73
|
+
# Data is a relative file path
|
|
74
|
+
elif abs_path.is_file():
|
|
75
|
+
if settings.accept_local_file_path:
|
|
76
|
+
# Normalize path separators before creating file URL
|
|
77
|
+
normalized_path = os.path.normpath(abs_path)
|
|
78
|
+
# Use forward slashes in file URLs for consistency
|
|
79
|
+
url_path = normalized_path.replace(os.sep, "/")
|
|
80
|
+
file_path = "file://" + url_path
|
|
81
|
+
return file_path
|
|
59
82
|
|
|
60
83
|
# data is text, save it to data storage and return the file path
|
|
61
84
|
return await save_data_to_file(data_item)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
async def extract_subgraph_chunks(subgraphs: list[CogneeGraph]):
|
|
5
|
+
"""
|
|
6
|
+
Get all Document Chunks from subgraphs and forward to next task in pipeline
|
|
7
|
+
"""
|
|
8
|
+
for subgraph in subgraphs:
|
|
9
|
+
for node in subgraph.nodes.values():
|
|
10
|
+
if node.attributes["type"] == "DocumentChunk":
|
|
11
|
+
yield node.attributes["text"]
|
|
@@ -180,6 +180,7 @@ async def get_local_script_dependencies(
|
|
|
180
180
|
name=file_path_relative_to_repo,
|
|
181
181
|
source_code=source_code,
|
|
182
182
|
file_path=script_path,
|
|
183
|
+
language="python",
|
|
183
184
|
)
|
|
184
185
|
return code_file_node
|
|
185
186
|
|
|
@@ -188,6 +189,7 @@ async def get_local_script_dependencies(
|
|
|
188
189
|
name=file_path_relative_to_repo,
|
|
189
190
|
source_code=None,
|
|
190
191
|
file_path=script_path,
|
|
192
|
+
language="python",
|
|
191
193
|
)
|
|
192
194
|
|
|
193
195
|
async for part in extract_code_parts(source_code_tree.root_node, script_path=script_path):
|