cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (265) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/add.py +2 -1
  23. cognee/api/v1/add/routers/get_add_router.py +2 -0
  24. cognee/api/v1/cognify/cognify.py +11 -6
  25. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  26. cognee/api/v1/config/config.py +60 -0
  27. cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
  28. cognee/api/v1/memify/routers/get_memify_router.py +3 -0
  29. cognee/api/v1/search/routers/get_search_router.py +21 -6
  30. cognee/api/v1/search/search.py +21 -5
  31. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  32. cognee/cli/commands/add_command.py +1 -1
  33. cognee/cli/commands/cognify_command.py +6 -0
  34. cognee/cli/commands/config_command.py +1 -1
  35. cognee/context_global_variables.py +5 -1
  36. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  37. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  38. cognee/infrastructure/databases/cache/config.py +6 -0
  39. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  40. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  41. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  42. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  43. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  44. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  45. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  46. cognee/infrastructure/databases/relational/config.py +16 -1
  47. cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
  48. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
  49. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  50. cognee/infrastructure/databases/vector/config.py +6 -0
  51. cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  55. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  56. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  57. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  58. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  59. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  60. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  61. cognee/infrastructure/llm/LLMGateway.py +0 -13
  62. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  64. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  66. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  67. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  68. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  69. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  70. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  71. cognee/infrastructure/llm/prompts/test.txt +1 -1
  72. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  73. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
  74. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
  83. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/models/Data.py +3 -1
  86. cognee/modules/engine/models/Entity.py +0 -1
  87. cognee/modules/engine/operations/setup.py +6 -0
  88. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  89. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  90. cognee/modules/graph/utils/__init__.py +1 -0
  91. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  92. cognee/modules/notebooks/methods/__init__.py +1 -0
  93. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  94. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  95. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  96. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  97. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  98. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  99. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  100. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  101. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  102. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  103. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  114. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  115. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  116. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  117. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  118. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  119. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  120. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  121. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  122. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  123. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  124. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  125. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  126. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  127. cognee/modules/retrieval/__init__.py +0 -1
  128. cognee/modules/retrieval/base_retriever.py +66 -10
  129. cognee/modules/retrieval/chunks_retriever.py +57 -49
  130. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  131. cognee/modules/retrieval/completion_retriever.py +29 -28
  132. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  133. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  134. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  135. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  136. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  137. cognee/modules/retrieval/lexical_retriever.py +34 -12
  138. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  139. cognee/modules/retrieval/summaries_retriever.py +51 -34
  140. cognee/modules/retrieval/temporal_retriever.py +59 -49
  141. cognee/modules/retrieval/triplet_retriever.py +32 -33
  142. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  143. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
  144. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  145. cognee/modules/search/methods/__init__.py +1 -0
  146. cognee/modules/search/methods/get_retriever_output.py +53 -0
  147. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  148. cognee/modules/search/methods/search.py +90 -222
  149. cognee/modules/search/models/SearchResultPayload.py +67 -0
  150. cognee/modules/search/types/SearchResult.py +1 -8
  151. cognee/modules/search/types/SearchType.py +1 -2
  152. cognee/modules/search/types/__init__.py +1 -1
  153. cognee/modules/search/utils/__init__.py +1 -2
  154. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  155. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  156. cognee/modules/users/authentication/default/default_transport.py +11 -1
  157. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  158. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  159. cognee/modules/users/methods/create_user.py +0 -9
  160. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  161. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  162. cognee/run_migrations.py +48 -0
  163. cognee/shared/exceptions/__init__.py +1 -3
  164. cognee/shared/exceptions/exceptions.py +11 -1
  165. cognee/shared/usage_logger.py +332 -0
  166. cognee/shared/utils.py +12 -5
  167. cognee/tasks/chunks/__init__.py +9 -0
  168. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  169. cognee/tasks/graph/__init__.py +7 -0
  170. cognee/tasks/ingestion/data_item.py +8 -0
  171. cognee/tasks/ingestion/ingest_data.py +12 -1
  172. cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
  173. cognee/tasks/memify/__init__.py +8 -0
  174. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  175. cognee/tasks/summarization/models.py +0 -2
  176. cognee/tasks/temporal_graph/__init__.py +0 -1
  177. cognee/tasks/translation/__init__.py +96 -0
  178. cognee/tasks/translation/config.py +110 -0
  179. cognee/tasks/translation/detect_language.py +190 -0
  180. cognee/tasks/translation/exceptions.py +62 -0
  181. cognee/tasks/translation/models.py +72 -0
  182. cognee/tasks/translation/providers/__init__.py +44 -0
  183. cognee/tasks/translation/providers/azure_provider.py +192 -0
  184. cognee/tasks/translation/providers/base.py +85 -0
  185. cognee/tasks/translation/providers/google_provider.py +158 -0
  186. cognee/tasks/translation/providers/llm_provider.py +143 -0
  187. cognee/tasks/translation/translate_content.py +282 -0
  188. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  189. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  190. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  191. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  192. cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
  193. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
  194. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
  195. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
  196. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
  197. cognee/tests/integration/retrieval/test_structured_output.py +258 -0
  198. cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
  199. cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
  200. cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
  201. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  202. cognee/tests/tasks/translation/README.md +147 -0
  203. cognee/tests/tasks/translation/__init__.py +1 -0
  204. cognee/tests/tasks/translation/config_test.py +93 -0
  205. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  206. cognee/tests/tasks/translation/providers_test.py +151 -0
  207. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  208. cognee/tests/test_chromadb.py +1 -1
  209. cognee/tests/test_cleanup_unused_data.py +165 -0
  210. cognee/tests/test_custom_data_label.py +68 -0
  211. cognee/tests/test_delete_by_id.py +6 -6
  212. cognee/tests/test_extract_usage_frequency.py +308 -0
  213. cognee/tests/test_kuzu.py +17 -7
  214. cognee/tests/test_lancedb.py +3 -1
  215. cognee/tests/test_library.py +1 -1
  216. cognee/tests/test_neo4j.py +17 -7
  217. cognee/tests/test_neptune_analytics_vector.py +3 -1
  218. cognee/tests/test_permissions.py +172 -187
  219. cognee/tests/test_pgvector.py +3 -1
  220. cognee/tests/test_relational_db_migration.py +15 -1
  221. cognee/tests/test_remote_kuzu.py +3 -1
  222. cognee/tests/test_s3_file_storage.py +1 -1
  223. cognee/tests/test_search_db.py +345 -205
  224. cognee/tests/test_usage_logger_e2e.py +268 -0
  225. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  226. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  227. cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
  228. cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
  229. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  230. cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
  231. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  232. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  233. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
  234. cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
  235. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
  236. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
  237. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
  238. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
  239. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
  240. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
  241. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
  242. cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
  243. cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
  244. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  245. cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
  246. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
  247. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  248. cognee/tests/unit/modules/search/test_search.py +96 -20
  249. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  250. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  251. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  252. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  253. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  254. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
  255. cognee/api/.env.example +0 -5
  256. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  257. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  258. cognee/modules/search/methods/no_access_control_search.py +0 -62
  259. cognee/modules/search/utils/prepare_search_result.py +0 -63
  260. cognee/tests/test_feedback_enrichment.py +0 -174
  261. cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
  262. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  263. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  264. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  265. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
cognee/shared/utils.py CHANGED
@@ -8,7 +8,8 @@ import http.server
8
8
  import socketserver
9
9
  from threading import Thread
10
10
  import pathlib
11
- from uuid import uuid4, uuid5, NAMESPACE_OID
11
+ from typing import Union, Any, Dict, List
12
+ from uuid import uuid4, uuid5, NAMESPACE_OID, UUID
12
13
 
13
14
  from cognee.base_config import get_base_config
14
15
  from cognee.shared.logging_utils import get_logger
@@ -58,7 +59,7 @@ def get_anonymous_id():
58
59
  return anonymous_id
59
60
 
60
61
 
61
- def _sanitize_nested_properties(obj, property_names: list[str]):
62
+ def _sanitize_nested_properties(obj: Any, property_names: list[str]) -> Any:
62
63
  """
63
64
  Recursively replaces any property whose key matches one of `property_names`
64
65
  (e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
@@ -78,7 +79,9 @@ def _sanitize_nested_properties(obj, property_names: list[str]):
78
79
  return obj
79
80
 
80
81
 
81
- def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
82
+ def send_telemetry(event_name: str, user_id: Union[str, UUID], additional_properties: dict = {}):
83
+ if additional_properties is None:
84
+ additional_properties = {}
82
85
  if os.getenv("TELEMETRY_DISABLED"):
83
86
  return
84
87
 
@@ -108,7 +111,7 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
108
111
  print(f"Error sending telemetry through proxy: {response.status_code}")
109
112
 
110
113
 
111
- def embed_logo(p, layout_scale, logo_alpha, position):
114
+ def embed_logo(p: Any, layout_scale: float, logo_alpha: float, position: str):
112
115
  """
113
116
  Embed a logo into the graph visualization as a watermark.
114
117
  """
@@ -138,7 +141,11 @@ def embed_logo(p, layout_scale, logo_alpha, position):
138
141
 
139
142
 
140
143
  def start_visualization_server(
141
- host="0.0.0.0", port=8001, handler_class=http.server.SimpleHTTPRequestHandler
144
+ host: str = "0.0.0.0",
145
+ port: int = 8001,
146
+ handler_class: type[
147
+ http.server.SimpleHTTPRequestHandler
148
+ ] = http.server.SimpleHTTPRequestHandler,
142
149
  ):
143
150
  """
144
151
  Spin up a simple HTTP server in a background thread to serve files.
@@ -1,3 +1,12 @@
1
+ """
2
+ Text chunking and chunk management tasks.
3
+
4
+ This module provides functionality for splitting text into chunks using
5
+ different strategies (word, sentence, paragraph, or row-based) and for
6
+ cleaning up disconnected or obsolete chunks to support downstream
7
+ processing and knowledge graph workflows.
8
+ """
9
+
1
10
  from .chunk_by_word import chunk_by_word
2
11
  from .chunk_by_sentence import chunk_by_sentence
3
12
  from .chunk_by_paragraph import chunk_by_paragraph
@@ -0,0 +1,172 @@
1
+ """
2
+ Task for automatically deleting unused data from the memify pipeline.
3
+
4
+ This task identifies and removes entire documents that haven't
5
+ been accessed by retrievers for a specified period, helping maintain system
6
+ efficiency and storage optimization through whole-document removal.
7
+ """
8
+
9
+ import json
10
+ from datetime import datetime, timezone, timedelta
11
+ from typing import Optional, Dict, Any
12
+ from uuid import UUID
13
+ import os
14
+ from cognee.infrastructure.databases.graph import get_graph_engine
15
+ from cognee.infrastructure.databases.vector import get_vector_engine
16
+ from cognee.infrastructure.databases.relational import get_relational_engine
17
+ from cognee.modules.data.models import Data, DatasetData
18
+ from cognee.shared.logging_utils import get_logger
19
+ from sqlalchemy import select, or_
20
+ import cognee
21
+ import sqlalchemy as sa
22
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ async def cleanup_unused_data(
28
+ minutes_threshold: Optional[int], dry_run: bool = True, user_id: Optional[UUID] = None
29
+ ) -> Dict[str, Any]:
30
+ """
31
+ Identify and remove unused data from the memify pipeline.
32
+
33
+ Parameters
34
+ ----------
35
+ minutes_threshold : int
36
+ Minutes since last access to consider data unused
37
+ dry_run : bool
38
+ If True, only report what would be deleted without actually deleting (default: True)
39
+ user_id : UUID, optional
40
+ Limit cleanup to specific user's data (default: None)
41
+
42
+ Returns
43
+ -------
44
+ Dict[str, Any]
45
+ Cleanup results with status, counts, and timestamp
46
+ """
47
+ # Check 1: Environment variable must be enabled
48
+ if os.getenv("ENABLE_LAST_ACCESSED", "false").lower() != "true":
49
+ logger.warning("Cleanup skipped: ENABLE_LAST_ACCESSED is not enabled.")
50
+ return {
51
+ "status": "skipped",
52
+ "reason": "ENABLE_LAST_ACCESSED not enabled",
53
+ "unused_count": 0,
54
+ "deleted_count": {},
55
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
56
+ }
57
+
58
+ # Check 2: Verify tracking has actually been running
59
+ db_engine = get_relational_engine()
60
+ async with db_engine.get_async_session() as session:
61
+ # Count records with non-NULL last_accessed
62
+ tracked_count = await session.execute(
63
+ select(sa.func.count(Data.id)).where(Data.last_accessed.isnot(None))
64
+ )
65
+ tracked_records = tracked_count.scalar()
66
+
67
+ if tracked_records == 0:
68
+ logger.warning(
69
+ "Cleanup skipped: No records have been tracked yet. "
70
+ "ENABLE_LAST_ACCESSED may have been recently enabled. "
71
+ "Wait for retrievers to update timestamps before running cleanup."
72
+ )
73
+ return {
74
+ "status": "skipped",
75
+ "reason": "No tracked records found - tracking may be newly enabled",
76
+ "unused_count": 0,
77
+ "deleted_count": {},
78
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
79
+ }
80
+
81
+ logger.info(
82
+ "Starting cleanup task",
83
+ minutes_threshold=minutes_threshold,
84
+ dry_run=dry_run,
85
+ user_id=str(user_id) if user_id else None,
86
+ )
87
+
88
+ # Calculate cutoff timestamp
89
+ cutoff_date = datetime.now(timezone.utc) - timedelta(minutes=minutes_threshold)
90
+
91
+ # Document-level approach (recommended)
92
+ return await _cleanup_via_sql(cutoff_date, dry_run, user_id)
93
+
94
+
95
+ async def _cleanup_via_sql(
96
+ cutoff_date: datetime, dry_run: bool, user_id: Optional[UUID] = None
97
+ ) -> Dict[str, Any]:
98
+ """
99
+ SQL-based cleanup: Query Data table for unused documents and use cognee.delete().
100
+
101
+ Parameters
102
+ ----------
103
+ cutoff_date : datetime
104
+ Cutoff date for last_accessed filtering
105
+ dry_run : bool
106
+ If True, only report what would be deleted
107
+ user_id : UUID, optional
108
+ Filter by user ID if provided
109
+
110
+ Returns
111
+ -------
112
+ Dict[str, Any]
113
+ Cleanup results
114
+ """
115
+ db_engine = get_relational_engine()
116
+
117
+ async with db_engine.get_async_session() as session:
118
+ # Query for Data records with old last_accessed timestamps
119
+ query = (
120
+ select(Data, DatasetData)
121
+ .join(DatasetData, Data.id == DatasetData.data_id)
122
+ .where(or_(Data.last_accessed < cutoff_date, Data.last_accessed.is_(None)))
123
+ )
124
+
125
+ if user_id:
126
+ from cognee.modules.data.models import Dataset
127
+
128
+ query = query.join(Dataset, DatasetData.dataset_id == Dataset.id).where(
129
+ Dataset.owner_id == user_id
130
+ )
131
+
132
+ result = await session.execute(query)
133
+ unused_data = result.all()
134
+
135
+ logger.info(f"Found {len(unused_data)} unused documents in SQL")
136
+
137
+ if dry_run:
138
+ return {
139
+ "status": "dry_run",
140
+ "unused_count": len(unused_data),
141
+ "deleted_count": {"data_items": 0, "documents": 0},
142
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
143
+ "preview": {"documents": len(unused_data)},
144
+ }
145
+
146
+ # Delete each document using cognee.delete()
147
+ deleted_count = 0
148
+ from cognee.modules.users.methods import get_default_user
149
+
150
+ user = await get_default_user() if user_id is None else None
151
+
152
+ for data, dataset_data in unused_data:
153
+ try:
154
+ await cognee.delete(
155
+ data_id=data.id,
156
+ dataset_id=dataset_data.dataset_id,
157
+ mode="hard", # Use hard mode to also remove orphaned entities
158
+ user=user,
159
+ )
160
+ deleted_count += 1
161
+ logger.info(f"Deleted document {data.id} from dataset {dataset_data.dataset_id}")
162
+ except Exception as e:
163
+ logger.error(f"Failed to delete document {data.id}: {e}")
164
+
165
+ logger.info("Cleanup completed", deleted_count=deleted_count)
166
+
167
+ return {
168
+ "status": "completed",
169
+ "unused_count": len(unused_data),
170
+ "deleted_count": {"data_items": deleted_count, "documents": deleted_count},
171
+ "cleanup_date": datetime.now(timezone.utc).isoformat(),
172
+ }
@@ -1,2 +1,9 @@
1
+ """
2
+ Graph extraction and manipulation tasks.
3
+
4
+ This module provides tasks for extracting knowledge graphs from data,
5
+ building relationships between entities, and managing graph structures.
6
+ """
7
+
1
8
  from .extract_graph_from_data import extract_graph_from_data
2
9
  from .extract_graph_from_code import extract_graph_from_code
@@ -0,0 +1,8 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any, Optional
3
+
4
+
5
+ @dataclass
6
+ class DataItem:
7
+ data: Any
8
+ label: Optional[str] = None
@@ -20,6 +20,7 @@ from cognee.modules.data.methods import (
20
20
 
21
21
  from .save_data_item_to_storage import save_data_item_to_storage
22
22
  from .data_item_to_text_file import data_item_to_text_file
23
+ from .data_item import DataItem
23
24
 
24
25
 
25
26
  async def ingest_data(
@@ -78,8 +79,16 @@ async def ingest_data(
78
79
  dataset_data_map = {str(data.id): True for data in dataset_data}
79
80
 
80
81
  for data_item in data:
82
+ # Support for DataItem (custom label + data wrapper)
83
+ current_label = None
84
+ underlying_data = data_item
85
+
86
+ if isinstance(data_item, DataItem):
87
+ underlying_data = data_item.data
88
+ current_label = data_item.label
89
+
81
90
  # Get file path of data item or create a file if it doesn't exist
82
- original_file_path = await save_data_item_to_storage(data_item)
91
+ original_file_path = await save_data_item_to_storage(underlying_data)
83
92
  # Transform file path to be OS usable
84
93
  actual_file_path = get_data_file_path(original_file_path)
85
94
 
@@ -139,6 +148,7 @@ async def ingest_data(
139
148
  data_point.external_metadata = ext_metadata
140
149
  data_point.node_set = json.dumps(node_set) if node_set else None
141
150
  data_point.tenant_id = user.tenant_id if user.tenant_id else None
151
+ data_point.label = current_label
142
152
 
143
153
  # Check if data is already in dataset
144
154
  if str(data_point.id) in dataset_data_map:
@@ -169,6 +179,7 @@ async def ingest_data(
169
179
  tenant_id=user.tenant_id if user.tenant_id else None,
170
180
  pipeline_status={},
171
181
  token_count=-1,
182
+ label=current_label,
172
183
  )
173
184
 
174
185
  new_datapoints.append(data_point)
@@ -9,6 +9,7 @@ from cognee.shared.logging_utils import get_logger
9
9
  from pydantic_settings import BaseSettings, SettingsConfigDict
10
10
 
11
11
  from cognee.tasks.web_scraper.utils import fetch_page_content
12
+ from cognee.tasks.ingestion.data_item import DataItem
12
13
 
13
14
 
14
15
  logger = get_logger()
@@ -95,5 +96,9 @@ async def save_data_item_to_storage(data_item: Union[BinaryIO, str, Any]) -> str
95
96
  # data is text, save it to data storage and return the file path
96
97
  return await save_data_to_file(data_item)
97
98
 
99
+ if isinstance(data_item, DataItem):
100
+ # If instance is DataItem use the underlying data
101
+ return await save_data_item_to_storage(data_item.data)
102
+
98
103
  # data is not a supported type
99
104
  raise IngestionError(message=f"Data type not supported: {type(data_item)}")
@@ -1,3 +1,11 @@
1
+ """
2
+ Memory and subgraph extraction tasks.
3
+
4
+ This module provides tasks for extracting subgraphs, document chunks, and
5
+ user session data, as well as initiating session cognification workflows,
6
+ to support memory enrichment and downstream knowledge graph processing.
7
+ """
8
+
1
9
  from .extract_subgraph import extract_subgraph
2
10
  from .extract_subgraph_chunks import extract_subgraph_chunks
3
11
  from .cognify_session import cognify_session