cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/shared/utils.py
CHANGED
|
@@ -8,7 +8,8 @@ import http.server
|
|
|
8
8
|
import socketserver
|
|
9
9
|
from threading import Thread
|
|
10
10
|
import pathlib
|
|
11
|
-
from
|
|
11
|
+
from typing import Union, Any, Dict, List
|
|
12
|
+
from uuid import uuid4, uuid5, NAMESPACE_OID, UUID
|
|
12
13
|
|
|
13
14
|
from cognee.base_config import get_base_config
|
|
14
15
|
from cognee.shared.logging_utils import get_logger
|
|
@@ -58,7 +59,7 @@ def get_anonymous_id():
|
|
|
58
59
|
return anonymous_id
|
|
59
60
|
|
|
60
61
|
|
|
61
|
-
def _sanitize_nested_properties(obj, property_names: list[str]):
|
|
62
|
+
def _sanitize_nested_properties(obj: Any, property_names: list[str]) -> Any:
|
|
62
63
|
"""
|
|
63
64
|
Recursively replaces any property whose key matches one of `property_names`
|
|
64
65
|
(e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
|
|
@@ -78,7 +79,9 @@ def _sanitize_nested_properties(obj, property_names: list[str]):
|
|
|
78
79
|
return obj
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
82
|
+
def send_telemetry(event_name: str, user_id: Union[str, UUID], additional_properties: dict = {}):
|
|
83
|
+
if additional_properties is None:
|
|
84
|
+
additional_properties = {}
|
|
82
85
|
if os.getenv("TELEMETRY_DISABLED"):
|
|
83
86
|
return
|
|
84
87
|
|
|
@@ -108,7 +111,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
|
|
|
108
111
|
print(f"Error sending telemetry through proxy: {response.status_code}")
|
|
109
112
|
|
|
110
113
|
|
|
111
|
-
def embed_logo(p, layout_scale, logo_alpha, position):
|
|
114
|
+
def embed_logo(p: Any, layout_scale: float, logo_alpha: float, position: str):
|
|
112
115
|
"""
|
|
113
116
|
Embed a logo into the graph visualization as a watermark.
|
|
114
117
|
"""
|
|
@@ -138,7 +141,11 @@ def embed_logo(p, layout_scale, logo_alpha, position):
|
|
|
138
141
|
|
|
139
142
|
|
|
140
143
|
def start_visualization_server(
|
|
141
|
-
host="0.0.0.0",
|
|
144
|
+
host: str = "0.0.0.0",
|
|
145
|
+
port: int = 8001,
|
|
146
|
+
handler_class: type[
|
|
147
|
+
http.server.SimpleHTTPRequestHandler
|
|
148
|
+
] = http.server.SimpleHTTPRequestHandler,
|
|
142
149
|
):
|
|
143
150
|
"""
|
|
144
151
|
Spin up a simple HTTP server in a background thread to serve files.
|
cognee/tasks/chunks/__init__.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Text chunking and chunk management tasks.
|
|
3
|
+
|
|
4
|
+
This module provides functionality for splitting text into chunks using
|
|
5
|
+
different strategies (word, sentence, paragraph, or row-based) and for
|
|
6
|
+
cleaning up disconnected or obsolete chunks to support downstream
|
|
7
|
+
processing and knowledge graph workflows.
|
|
8
|
+
"""
|
|
9
|
+
|
|
1
10
|
from .chunk_by_word import chunk_by_word
|
|
2
11
|
from .chunk_by_sentence import chunk_by_sentence
|
|
3
12
|
from .chunk_by_paragraph import chunk_by_paragraph
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Task for automatically deleting unused data from the memify pipeline.
|
|
3
|
+
|
|
4
|
+
This task identifies and removes entire documents that haven't
|
|
5
|
+
been accessed by retrievers for a specified period, helping maintain system
|
|
6
|
+
efficiency and storage optimization through whole-document removal.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timezone, timedelta
|
|
11
|
+
from typing import Optional, Dict, Any
|
|
12
|
+
from uuid import UUID
|
|
13
|
+
import os
|
|
14
|
+
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
15
|
+
from cognee.infrastructure.databases.vector import get_vector_engine
|
|
16
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
17
|
+
from cognee.modules.data.models import Data, DatasetData
|
|
18
|
+
from cognee.shared.logging_utils import get_logger
|
|
19
|
+
from sqlalchemy import select, or_
|
|
20
|
+
import cognee
|
|
21
|
+
import sqlalchemy as sa
|
|
22
|
+
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def cleanup_unused_data(
|
|
28
|
+
minutes_threshold: Optional[int], dry_run: bool = True, user_id: Optional[UUID] = None
|
|
29
|
+
) -> Dict[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Identify and remove unused data from the memify pipeline.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
minutes_threshold : int
|
|
36
|
+
Minutes since last access to consider data unused
|
|
37
|
+
dry_run : bool
|
|
38
|
+
If True, only report what would be deleted without actually deleting (default: True)
|
|
39
|
+
user_id : UUID, optional
|
|
40
|
+
Limit cleanup to specific user's data (default: None)
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
Dict[str, Any]
|
|
45
|
+
Cleanup results with status, counts, and timestamp
|
|
46
|
+
"""
|
|
47
|
+
# Check 1: Environment variable must be enabled
|
|
48
|
+
if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
|
|
49
|
+
logger.warning("Cleanup skipped: ENABLE_LAST_ACCESSED is not enabled.")
|
|
50
|
+
return {
|
|
51
|
+
"status": "skipped",
|
|
52
|
+
"reason": "ENABLE_LAST_ACCESSED not enabled",
|
|
53
|
+
"unused_count": 0,
|
|
54
|
+
"deleted_count": {},
|
|
55
|
+
"cleanup_date": datetime.now(timezone.utc).isoformat(),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# Check 2: Verify tracking has actually been running
|
|
59
|
+
db_engine = get_relational_engine()
|
|
60
|
+
async with db_engine.get_async_session() as session:
|
|
61
|
+
# Count records with non-NULL last_accessed
|
|
62
|
+
tracked_count = await session.execute(
|
|
63
|
+
select(sa.func.count(Data.id)).where(Data.last_accessed.isnot(None))
|
|
64
|
+
)
|
|
65
|
+
tracked_records = tracked_count.scalar()
|
|
66
|
+
|
|
67
|
+
if tracked_records == 0:
|
|
68
|
+
logger.warning(
|
|
69
|
+
"Cleanup skipped: No records have been tracked yet. "
|
|
70
|
+
"ENABLE_LAST_ACCESSED may have been recently enabled. "
|
|
71
|
+
"Wait for retrievers to update timestamps before running cleanup."
|
|
72
|
+
)
|
|
73
|
+
return {
|
|
74
|
+
"status": "skipped",
|
|
75
|
+
"reason": "No tracked records found - tracking may be newly enabled",
|
|
76
|
+
"unused_count": 0,
|
|
77
|
+
"deleted_count": {},
|
|
78
|
+
"cleanup_date": datetime.now(timezone.utc).isoformat(),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
logger.info(
|
|
82
|
+
"Starting cleanup task",
|
|
83
|
+
minutes_threshold=minutes_threshold,
|
|
84
|
+
dry_run=dry_run,
|
|
85
|
+
user_id=str(user_id) if user_id else None,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Calculate cutoff timestamp
|
|
89
|
+
cutoff_date = datetime.now(timezone.utc) - timedelta(minutes=minutes_threshold)
|
|
90
|
+
|
|
91
|
+
# Document-level approach (recommended)
|
|
92
|
+
return await _cleanup_via_sql(cutoff_date, dry_run, user_id)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def _cleanup_via_sql(
|
|
96
|
+
cutoff_date: datetime, dry_run: bool, user_id: Optional[UUID] = None
|
|
97
|
+
) -> Dict[str, Any]:
|
|
98
|
+
"""
|
|
99
|
+
SQL-based cleanup: Query Data table for unused documents and use cognee.delete().
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
cutoff_date : datetime
|
|
104
|
+
Cutoff date for last_accessed filtering
|
|
105
|
+
dry_run : bool
|
|
106
|
+
If True, only report what would be deleted
|
|
107
|
+
user_id : UUID, optional
|
|
108
|
+
Filter by user ID if provided
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
Dict[str, Any]
|
|
113
|
+
Cleanup results
|
|
114
|
+
"""
|
|
115
|
+
db_engine = get_relational_engine()
|
|
116
|
+
|
|
117
|
+
async with db_engine.get_async_session() as session:
|
|
118
|
+
# Query for Data records with old last_accessed timestamps
|
|
119
|
+
query = (
|
|
120
|
+
select(Data, DatasetData)
|
|
121
|
+
.join(DatasetData, Data.id == DatasetData.data_id)
|
|
122
|
+
.where(or_(Data.last_accessed < cutoff_date, Data.last_accessed.is_(None)))
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if user_id:
|
|
126
|
+
from cognee.modules.data.models import Dataset
|
|
127
|
+
|
|
128
|
+
query = query.join(Dataset, DatasetData.dataset_id == Dataset.id).where(
|
|
129
|
+
Dataset.owner_id == user_id
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
result = await session.execute(query)
|
|
133
|
+
unused_data = result.all()
|
|
134
|
+
|
|
135
|
+
logger.info(f"Found {len(unused_data)} unused documents in SQL")
|
|
136
|
+
|
|
137
|
+
if dry_run:
|
|
138
|
+
return {
|
|
139
|
+
"status": "dry_run",
|
|
140
|
+
"unused_count": len(unused_data),
|
|
141
|
+
"deleted_count": {"data_items": 0, "documents": 0},
|
|
142
|
+
"cleanup_date": datetime.now(timezone.utc).isoformat(),
|
|
143
|
+
"preview": {"documents": len(unused_data)},
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Delete each document using cognee.delete()
|
|
147
|
+
deleted_count = 0
|
|
148
|
+
from cognee.modules.users.methods import get_default_user
|
|
149
|
+
|
|
150
|
+
user = await get_default_user() if user_id is None else None
|
|
151
|
+
|
|
152
|
+
for data, dataset_data in unused_data:
|
|
153
|
+
try:
|
|
154
|
+
await cognee.delete(
|
|
155
|
+
data_id=data.id,
|
|
156
|
+
dataset_id=dataset_data.dataset_id,
|
|
157
|
+
mode="hard", # Use hard mode to also remove orphaned entities
|
|
158
|
+
user=user,
|
|
159
|
+
)
|
|
160
|
+
deleted_count += 1
|
|
161
|
+
logger.info(f"Deleted document {data.id} from dataset {dataset_data.dataset_id}")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.error(f"Failed to delete document {data.id}: {e}")
|
|
164
|
+
|
|
165
|
+
logger.info("Cleanup completed", deleted_count=deleted_count)
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"status": "completed",
|
|
169
|
+
"unused_count": len(unused_data),
|
|
170
|
+
"deleted_count": {"data_items": deleted_count, "documents": deleted_count},
|
|
171
|
+
"cleanup_date": datetime.now(timezone.utc).isoformat(),
|
|
172
|
+
}
|
cognee/tasks/graph/__init__.py
CHANGED
|
@@ -1,2 +1,9 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Graph extraction and manipulation tasks.
|
|
3
|
+
|
|
4
|
+
This module provides tasks for extracting knowledge graphs from data,
|
|
5
|
+
building relationships between entities, and managing graph structures.
|
|
6
|
+
"""
|
|
7
|
+
|
|
1
8
|
from .extract_graph_from_data import extract_graph_from_data
|
|
2
9
|
from .extract_graph_from_code import extract_graph_from_code
|
cognee/tasks/memify/__init__.py
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory and subgraph extraction tasks.
|
|
3
|
+
|
|
4
|
+
This module provides tasks for extracting subgraphs, document chunks, and
|
|
5
|
+
user session data, as well as initiating session cognification workflows,
|
|
6
|
+
to support memory enrichment and downstream knowledge graph processing.
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
from .extract_subgraph import extract_subgraph
|
|
2
10
|
from .extract_subgraph_chunks import extract_subgraph_chunks
|
|
3
11
|
from .cognify_session import cognify_session
|