cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/chunks/__init__.py +9 -0
  156. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  157. cognee/tasks/graph/__init__.py +7 -0
  158. cognee/tasks/memify/__init__.py +8 -0
  159. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  160. cognee/tasks/summarization/models.py +0 -2
  161. cognee/tasks/temporal_graph/__init__.py +0 -1
  162. cognee/tasks/translation/__init__.py +96 -0
  163. cognee/tasks/translation/config.py +110 -0
  164. cognee/tasks/translation/detect_language.py +190 -0
  165. cognee/tasks/translation/exceptions.py +62 -0
  166. cognee/tasks/translation/models.py +72 -0
  167. cognee/tasks/translation/providers/__init__.py +44 -0
  168. cognee/tasks/translation/providers/azure_provider.py +192 -0
  169. cognee/tasks/translation/providers/base.py +85 -0
  170. cognee/tasks/translation/providers/google_provider.py +158 -0
  171. cognee/tasks/translation/providers/llm_provider.py +143 -0
  172. cognee/tasks/translation/translate_content.py +282 -0
  173. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  174. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  175. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  176. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  177. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  178. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  179. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  180. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  181. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  182. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  183. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  184. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  185. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  186. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  187. cognee/tests/tasks/translation/README.md +147 -0
  188. cognee/tests/tasks/translation/__init__.py +1 -0
  189. cognee/tests/tasks/translation/config_test.py +93 -0
  190. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  191. cognee/tests/tasks/translation/providers_test.py +151 -0
  192. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  193. cognee/tests/test_chromadb.py +1 -1
  194. cognee/tests/test_cleanup_unused_data.py +165 -0
  195. cognee/tests/test_delete_by_id.py +6 -6
  196. cognee/tests/test_extract_usage_frequency.py +308 -0
  197. cognee/tests/test_kuzu.py +17 -7
  198. cognee/tests/test_lancedb.py +3 -1
  199. cognee/tests/test_library.py +1 -1
  200. cognee/tests/test_neo4j.py +17 -7
  201. cognee/tests/test_neptune_analytics_vector.py +3 -1
  202. cognee/tests/test_permissions.py +172 -187
  203. cognee/tests/test_pgvector.py +3 -1
  204. cognee/tests/test_relational_db_migration.py +15 -1
  205. cognee/tests/test_remote_kuzu.py +3 -1
  206. cognee/tests/test_s3_file_storage.py +1 -1
  207. cognee/tests/test_search_db.py +97 -110
  208. cognee/tests/test_usage_logger_e2e.py +268 -0
  209. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  210. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  211. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  212. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  213. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  214. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  215. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  216. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  217. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  218. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  219. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  220. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  221. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  222. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  223. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  224. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  225. cognee/tests/unit/modules/search/test_search.py +176 -0
  226. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  227. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  228. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  229. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  230. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  231. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/RECORD +235 -147
  232. cognee/api/.env.example +0 -5
  233. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  234. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  235. cognee/modules/search/methods/no_access_control_search.py +0 -62
  236. cognee/modules/search/utils/prepare_search_result.py +0 -63
  237. cognee/tests/test_feedback_enrichment.py +0 -174
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  239. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  240. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  241. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/shared/utils.py CHANGED
@@ -8,7 +8,8 @@ import http.server
8
8
  import socketserver
9
9
  from threading import Thread
10
10
  import pathlib
11
- from uuid import uuid4, uuid5, NAMESPACE_OID
11
+ from typing import Union, Any, Dict, List
12
+ from uuid import uuid4, uuid5, NAMESPACE_OID, UUID
12
13
 
13
14
  from cognee.base_config import get_base_config
14
15
  from cognee.shared.logging_utils import get_logger
@@ -58,7 +59,7 @@ def get_anonymous_id():
58
59
  return anonymous_id
59
60
 
60
61
 
61
- def _sanitize_nested_properties(obj, property_names: list[str]):
62
+ def _sanitize_nested_properties(obj: Any, property_names: list[str]) -> Any:
62
63
  """
63
64
  Recursively replaces any property whose key matches one of `property_names`
64
65
  (e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
@@ -78,7 +79,9 @@ def _sanitize_nested_properties(obj, property_names: list[str]):
78
79
  return obj
79
80
 
80
81
 
81
- def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
82
+ def send_telemetry(event_name: str, user_id: Union[str, UUID], additional_properties: dict = {}):
83
+ if additional_properties is None:
84
+ additional_properties = {}
82
85
  if os.getenv("TELEMETRY_DISABLED"):
83
86
  return
84
87
 
@@ -108,7 +111,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
108
111
  print(f"Error sending telemetry through proxy: {response.status_code}")
109
112
 
110
113
 
111
- def embed_logo(p, layout_scale, logo_alpha, position):
114
+ def embed_logo(p: Any, layout_scale: float, logo_alpha: float, position: str):
112
115
  """
113
116
  Embed a logo into the graph visualization as a watermark.
114
117
  """
@@ -138,7 +141,11 @@ def embed_logo(p, layout_scale, logo_alpha, position):
138
141
 
139
142
 
140
143
  def start_visualization_server(
141
- host="0.0.0.0", port=8001, handler_class=http.server.SimpleHTTPRequestHandler
144
+ host: str = "0.0.0.0",
145
+ port: int = 8001,
146
+ handler_class: type[
147
+ http.server.SimpleHTTPRequestHandler
148
+ ] = http.server.SimpleHTTPRequestHandler,
142
149
  ):
143
150
  """
144
151
  Spin up a simple HTTP server in a background thread to serve files.
@@ -1,3 +1,12 @@
1
+ """
2
+ Text chunking and chunk management tasks.
3
+
4
+ This module provides functionality for splitting text into chunks using
5
+ different strategies (word, sentence, paragraph, or row-based) and for
6
+ cleaning up disconnected or obsolete chunks to support downstream
7
+ processing and knowledge graph workflows.
8
+ """
9
+
1
10
  from .chunk_by_word import chunk_by_word
2
11
  from .chunk_by_sentence import chunk_by_sentence
3
12
  from .chunk_by_paragraph import chunk_by_paragraph
@@ -0,0 +1,172 @@
1
+ """
2
+ Task for automatically deleting unused data from the memify pipeline.
3
+
4
+ This task identifies and removes entire documents that haven't
5
+ been accessed by retrievers for a specified period, helping maintain system
6
+ efficiency and storage optimization through whole-document removal.
7
+ """
8
+
9
+ import json
10
+ from datetime import datetime, timezone, timedelta
11
+ from typing import Optional, Dict, Any
12
+ from uuid import UUID
13
+ import os
14
+ from cognee.infrastructure.databases.graph import get_graph_engine
15
+ from cognee.infrastructure.databases.vector import get_vector_engine
16
+ from cognee.infrastructure.databases.relational import get_relational_engine
17
+ from cognee.modules.data.models import Data, DatasetData
18
+ from cognee.shared.logging_utils import get_logger
19
+ from sqlalchemy import select, or_
20
+ import cognee
21
+ import sqlalchemy as sa
22
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ async def cleanup_unused_data(
28
+ minutes_threshold: Optional[int], dry_run: bool = True, user_id: Optional[UUID] = None
29
+ ) -> Dict[str, Any]:
30
+ """
31
+ Identify and remove unused data from the memify pipeline.
32
+
33
+ Parameters
34
+ ----------
35
+ minutes_threshold : int
36
+ Minutes since last access to consider data unused
37
+ dry_run : bool
38
+ If True, only report what would be deleted without actually deleting (default: True)
39
+ user_id : UUID, optional
40
+ Limit cleanup to specific user's data (default: None)
41
+
42
+ Returns
43
+ -------
44
+ Dict[str, Any]
45
+ Cleanup results with status, counts, and timestamp
46
+ """
47
+ # Check 1: Environment variable must be enabled
48
+ if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
49
+ logger.warning("Cleanup skipped: ENABLE_LAST_ACCESSED is not enabled.")
50
+ return {
51
+ "status": "skipped",
52
+ "reason": "ENABLE_LAST_ACCESSED not enabled",
53
+ "unused_count": 0,
54
+ "deleted_count": {},
55
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
56
+ }
57
+
58
+ # Check 2: Verify tracking has actually been running
59
+ db_engine = get_relational_engine()
60
+ async with db_engine.get_async_session() as session:
61
+ # Count records with non-NULL last_accessed
62
+ tracked_count = await session.execute(
63
+ select(sa.func.count(Data.id)).where(Data.last_accessed.isnot(None))
64
+ )
65
+ tracked_records = tracked_count.scalar()
66
+
67
+ if tracked_records == 0:
68
+ logger.warning(
69
+ "Cleanup skipped: No records have been tracked yet. "
70
+ "ENABLE_LAST_ACCESSED may have been recently enabled. "
71
+ "Wait for retrievers to update timestamps before running cleanup."
72
+ )
73
+ return {
74
+ "status": "skipped",
75
+ "reason": "No tracked records found - tracking may be newly enabled",
76
+ "unused_count": 0,
77
+ "deleted_count": {},
78
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
79
+ }
80
+
81
+ logger.info(
82
+ "Starting cleanup task",
83
+ minutes_threshold=minutes_threshold,
84
+ dry_run=dry_run,
85
+ user_id=str(user_id) if user_id else None,
86
+ )
87
+
88
+ # Calculate cutoff timestamp
89
+ cutoff_date = datetime.now(timezone.utc) - timedelta(minutes=minutes_threshold)
90
+
91
+ # Document-level approach (recommended)
92
+ return await _cleanup_via_sql(cutoff_date, dry_run, user_id)
93
+
94
+
95
+ async def _cleanup_via_sql(
96
+ cutoff_date: datetime, dry_run: bool, user_id: Optional[UUID] = None
97
+ ) -> Dict[str, Any]:
98
+ """
99
+ SQL-based cleanup: Query Data table for unused documents and use cognee.delete().
100
+
101
+ Parameters
102
+ ----------
103
+ cutoff_date : datetime
104
+ Cutoff date for last_accessed filtering
105
+ dry_run : bool
106
+ If True, only report what would be deleted
107
+ user_id : UUID, optional
108
+ Filter by user ID if provided
109
+
110
+ Returns
111
+ -------
112
+ Dict[str, Any]
113
+ Cleanup results
114
+ """
115
+ db_engine = get_relational_engine()
116
+
117
+ async with db_engine.get_async_session() as session:
118
+ # Query for Data records with old last_accessed timestamps
119
+ query = (
120
+ select(Data, DatasetData)
121
+ .join(DatasetData, Data.id == DatasetData.data_id)
122
+ .where(or_(Data.last_accessed < cutoff_date, Data.last_accessed.is_(None)))
123
+ )
124
+
125
+ if user_id:
126
+ from cognee.modules.data.models import Dataset
127
+
128
+ query = query.join(Dataset, DatasetData.dataset_id == Dataset.id).where(
129
+ Dataset.owner_id == user_id
130
+ )
131
+
132
+ result = await session.execute(query)
133
+ unused_data = result.all()
134
+
135
+ logger.info(f"Found {len(unused_data)} unused documents in SQL")
136
+
137
+ if dry_run:
138
+ return {
139
+ "status": "dry_run",
140
+ "unused_count": len(unused_data),
141
+ "deleted_count": {"data_items": 0, "documents": 0},
142
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
143
+ "preview": {"documents": len(unused_data)},
144
+ }
145
+
146
+ # Delete each document using cognee.delete()
147
+ deleted_count = 0
148
+ from cognee.modules.users.methods import get_default_user
149
+
150
+ user = await get_default_user() if user_id is None else None
151
+
152
+ for data, dataset_data in unused_data:
153
+ try:
154
+ await cognee.delete(
155
+ data_id=data.id,
156
+ dataset_id=dataset_data.dataset_id,
157
+ mode="hard", # Use hard mode to also remove orphaned entities
158
+ user=user,
159
+ )
160
+ deleted_count += 1
161
+ logger.info(f"Deleted document {data.id} from dataset {dataset_data.dataset_id}")
162
+ except Exception as e:
163
+ logger.error(f"Failed to delete document {data.id}: {e}")
164
+
165
+ logger.info("Cleanup completed", deleted_count=deleted_count)
166
+
167
+ return {
168
+ "status": "completed",
169
+ "unused_count": len(unused_data),
170
+ "deleted_count": {"data_items": deleted_count, "documents": deleted_count},
171
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
172
+ }
@@ -1,2 +1,9 @@
1
+ """
2
+ Graph extraction and manipulation tasks.
3
+
4
+ This module provides tasks for extracting knowledge graphs from data,
5
+ building relationships between entities, and managing graph structures.
6
+ """
7
+
1
8
  from .extract_graph_from_data import extract_graph_from_data
2
9
  from .extract_graph_from_code import extract_graph_from_code
@@ -1,3 +1,11 @@
1
+ """
2
+ Memory and subgraph extraction tasks.
3
+
4
+ This module provides tasks for extracting subgraphs, document chunks, and
5
+ user session data, as well as initiating session cognification workflows,
6
+ to support memory enrichment and downstream knowledge graph processing.
7
+ """
8
+
1
9
  from .extract_subgraph import extract_subgraph
2
10
  from .extract_subgraph_chunks import extract_subgraph_chunks
3
11
  from .cognify_session import cognify_session