cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (265) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/add.py +2 -1
  23. cognee/api/v1/add/routers/get_add_router.py +2 -0
  24. cognee/api/v1/cognify/cognify.py +11 -6
  25. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  26. cognee/api/v1/config/config.py +60 -0
  27. cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
  28. cognee/api/v1/memify/routers/get_memify_router.py +3 -0
  29. cognee/api/v1/search/routers/get_search_router.py +21 -6
  30. cognee/api/v1/search/search.py +21 -5
  31. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  32. cognee/cli/commands/add_command.py +1 -1
  33. cognee/cli/commands/cognify_command.py +6 -0
  34. cognee/cli/commands/config_command.py +1 -1
  35. cognee/context_global_variables.py +5 -1
  36. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  37. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  38. cognee/infrastructure/databases/cache/config.py +6 -0
  39. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  40. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  41. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  42. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  43. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  44. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  45. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  46. cognee/infrastructure/databases/relational/config.py +16 -1
  47. cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
  48. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
  49. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  50. cognee/infrastructure/databases/vector/config.py +6 -0
  51. cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  55. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  56. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  57. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  58. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  59. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  60. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  61. cognee/infrastructure/llm/LLMGateway.py +0 -13
  62. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  64. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  66. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  67. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  68. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  69. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  70. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  71. cognee/infrastructure/llm/prompts/test.txt +1 -1
  72. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  73. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
  74. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
  77. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  78. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
  79. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
  80. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
  82. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
  83. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/models/Data.py +3 -1
  86. cognee/modules/engine/models/Entity.py +0 -1
  87. cognee/modules/engine/operations/setup.py +6 -0
  88. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  89. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  90. cognee/modules/graph/utils/__init__.py +1 -0
  91. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  92. cognee/modules/notebooks/methods/__init__.py +1 -0
  93. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  94. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  95. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  96. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  97. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  98. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  99. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  100. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  101. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  102. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  103. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  114. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  115. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  116. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  117. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  118. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  119. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  120. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  121. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  122. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  123. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  124. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  125. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  126. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  127. cognee/modules/retrieval/__init__.py +0 -1
  128. cognee/modules/retrieval/base_retriever.py +66 -10
  129. cognee/modules/retrieval/chunks_retriever.py +57 -49
  130. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  131. cognee/modules/retrieval/completion_retriever.py +29 -28
  132. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  133. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  134. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  135. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  136. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  137. cognee/modules/retrieval/lexical_retriever.py +34 -12
  138. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  139. cognee/modules/retrieval/summaries_retriever.py +51 -34
  140. cognee/modules/retrieval/temporal_retriever.py +59 -49
  141. cognee/modules/retrieval/triplet_retriever.py +32 -33
  142. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  143. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
  144. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  145. cognee/modules/search/methods/__init__.py +1 -0
  146. cognee/modules/search/methods/get_retriever_output.py +53 -0
  147. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  148. cognee/modules/search/methods/search.py +90 -222
  149. cognee/modules/search/models/SearchResultPayload.py +67 -0
  150. cognee/modules/search/types/SearchResult.py +1 -8
  151. cognee/modules/search/types/SearchType.py +1 -2
  152. cognee/modules/search/types/__init__.py +1 -1
  153. cognee/modules/search/utils/__init__.py +1 -2
  154. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  155. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  156. cognee/modules/users/authentication/default/default_transport.py +11 -1
  157. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  158. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  159. cognee/modules/users/methods/create_user.py +0 -9
  160. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  161. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  162. cognee/run_migrations.py +48 -0
  163. cognee/shared/exceptions/__init__.py +1 -3
  164. cognee/shared/exceptions/exceptions.py +11 -1
  165. cognee/shared/usage_logger.py +332 -0
  166. cognee/shared/utils.py +12 -5
  167. cognee/tasks/chunks/__init__.py +9 -0
  168. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  169. cognee/tasks/graph/__init__.py +7 -0
  170. cognee/tasks/ingestion/data_item.py +8 -0
  171. cognee/tasks/ingestion/ingest_data.py +12 -1
  172. cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
  173. cognee/tasks/memify/__init__.py +8 -0
  174. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  175. cognee/tasks/summarization/models.py +0 -2
  176. cognee/tasks/temporal_graph/__init__.py +0 -1
  177. cognee/tasks/translation/__init__.py +96 -0
  178. cognee/tasks/translation/config.py +110 -0
  179. cognee/tasks/translation/detect_language.py +190 -0
  180. cognee/tasks/translation/exceptions.py +62 -0
  181. cognee/tasks/translation/models.py +72 -0
  182. cognee/tasks/translation/providers/__init__.py +44 -0
  183. cognee/tasks/translation/providers/azure_provider.py +192 -0
  184. cognee/tasks/translation/providers/base.py +85 -0
  185. cognee/tasks/translation/providers/google_provider.py +158 -0
  186. cognee/tasks/translation/providers/llm_provider.py +143 -0
  187. cognee/tasks/translation/translate_content.py +282 -0
  188. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  189. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  190. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  191. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  192. cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
  193. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
  194. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
  195. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
  196. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
  197. cognee/tests/integration/retrieval/test_structured_output.py +258 -0
  198. cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
  199. cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
  200. cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
  201. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  202. cognee/tests/tasks/translation/README.md +147 -0
  203. cognee/tests/tasks/translation/__init__.py +1 -0
  204. cognee/tests/tasks/translation/config_test.py +93 -0
  205. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  206. cognee/tests/tasks/translation/providers_test.py +151 -0
  207. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  208. cognee/tests/test_chromadb.py +1 -1
  209. cognee/tests/test_cleanup_unused_data.py +165 -0
  210. cognee/tests/test_custom_data_label.py +68 -0
  211. cognee/tests/test_delete_by_id.py +6 -6
  212. cognee/tests/test_extract_usage_frequency.py +308 -0
  213. cognee/tests/test_kuzu.py +17 -7
  214. cognee/tests/test_lancedb.py +3 -1
  215. cognee/tests/test_library.py +1 -1
  216. cognee/tests/test_neo4j.py +17 -7
  217. cognee/tests/test_neptune_analytics_vector.py +3 -1
  218. cognee/tests/test_permissions.py +172 -187
  219. cognee/tests/test_pgvector.py +3 -1
  220. cognee/tests/test_relational_db_migration.py +15 -1
  221. cognee/tests/test_remote_kuzu.py +3 -1
  222. cognee/tests/test_s3_file_storage.py +1 -1
  223. cognee/tests/test_search_db.py +345 -205
  224. cognee/tests/test_usage_logger_e2e.py +268 -0
  225. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  226. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  227. cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
  228. cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
  229. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  230. cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
  231. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  232. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  233. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
  234. cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
  235. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
  236. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
  237. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
  238. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
  239. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
  240. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
  241. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
  242. cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
  243. cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
  244. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  245. cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
  246. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
  247. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  248. cognee/tests/unit/modules/search/test_search.py +96 -20
  249. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  250. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  251. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  252. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  253. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
  254. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
  255. cognee/api/.env.example +0 -5
  256. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  257. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  258. cognee/modules/search/methods/no_access_control_search.py +0 -62
  259. cognee/modules/search/utils/prepare_search_result.py +0 -63
  260. cognee/tests/test_feedback_enrichment.py +0 -174
  261. cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
  262. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
  263. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
  264. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
  265. {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,613 @@
1
+ # cognee/tasks/memify/extract_usage_frequency.py
2
+ from typing import List, Dict, Any, Optional
3
+ from datetime import datetime, timedelta
4
+ from cognee.shared.logging_utils import get_logger
5
+ from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
6
+ from cognee.modules.pipelines.tasks.task import Task
7
+ from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
8
+
9
+ logger = get_logger("extract_usage_frequency")
10
+
11
+
12
+ async def extract_usage_frequency(
13
+ subgraphs: List[CogneeGraph],
14
+ time_window: timedelta = timedelta(days=7),
15
+ min_interaction_threshold: int = 1,
16
+ ) -> Dict[str, Any]:
17
+ """
18
+ Extract usage frequency from CogneeUserInteraction nodes.
19
+
20
+ When save_interaction=True in cognee.search(), the system creates:
21
+ - CogneeUserInteraction nodes (representing the query/answer interaction)
22
+ - used_graph_element_to_answer edges (connecting interactions to graph elements used)
23
+
24
+ This function tallies how often each graph element is referenced via these edges,
25
+ enabling frequency-based ranking in downstream retrievers.
26
+
27
+ :param subgraphs: List of CogneeGraph instances containing interaction data
28
+ :param time_window: Time window to consider for interactions (default: 7 days)
29
+ :param min_interaction_threshold: Minimum interactions to track (default: 1)
30
+ :return: Dictionary containing node frequencies, edge frequencies, and metadata
31
+ """
32
+ current_time = datetime.now()
33
+ cutoff_time = current_time - time_window
34
+
35
+ # Track frequencies for graph elements (nodes and edges)
36
+ node_frequencies = {}
37
+ edge_frequencies = {}
38
+ relationship_type_frequencies = {}
39
+
40
+ # Track interaction metadata
41
+ interaction_count = 0
42
+ interactions_in_window = 0
43
+
44
+ logger.info(f"Extracting usage frequencies from {len(subgraphs)} subgraphs")
45
+ logger.info(f"Time window: {time_window}, Cutoff: {cutoff_time.isoformat()}")
46
+
47
+ for subgraph in subgraphs:
48
+ # Find all CogneeUserInteraction nodes
49
+ interaction_nodes = {}
50
+ for node_id, node in subgraph.nodes.items():
51
+ node_type = node.attributes.get("type") or node.attributes.get("node_type")
52
+
53
+ if node_type == "CogneeUserInteraction":
54
+ # Parse and validate timestamp
55
+ timestamp_value = node.attributes.get("timestamp") or node.attributes.get(
56
+ "created_at"
57
+ )
58
+ if timestamp_value is not None:
59
+ try:
60
+ # Handle various timestamp formats
61
+ interaction_time = None
62
+
63
+ if isinstance(timestamp_value, datetime):
64
+ # Already a Python datetime
65
+ interaction_time = timestamp_value
66
+ elif isinstance(timestamp_value, (int, float)):
67
+ # Unix timestamp (assume milliseconds if > 10 digits)
68
+ if timestamp_value > 10000000000:
69
+ # Milliseconds since epoch
70
+ interaction_time = datetime.fromtimestamp(timestamp_value / 1000.0)
71
+ else:
72
+ # Seconds since epoch
73
+ interaction_time = datetime.fromtimestamp(timestamp_value)
74
+ elif isinstance(timestamp_value, str):
75
+ # Try different string formats
76
+ if timestamp_value.isdigit():
77
+ # Numeric string - treat as Unix timestamp
78
+ ts_int = int(timestamp_value)
79
+ if ts_int > 10000000000:
80
+ interaction_time = datetime.fromtimestamp(ts_int / 1000.0)
81
+ else:
82
+ interaction_time = datetime.fromtimestamp(ts_int)
83
+ else:
84
+ # ISO format string
85
+ interaction_time = datetime.fromisoformat(timestamp_value)
86
+ elif hasattr(timestamp_value, "to_native"):
87
+ # Neo4j datetime object - convert to Python datetime
88
+ interaction_time = timestamp_value.to_native()
89
+ elif hasattr(timestamp_value, "year") and hasattr(timestamp_value, "month"):
90
+ # Datetime-like object - extract components
91
+ try:
92
+ interaction_time = datetime(
93
+ year=timestamp_value.year,
94
+ month=timestamp_value.month,
95
+ day=timestamp_value.day,
96
+ hour=getattr(timestamp_value, "hour", 0),
97
+ minute=getattr(timestamp_value, "minute", 0),
98
+ second=getattr(timestamp_value, "second", 0),
99
+ microsecond=getattr(timestamp_value, "microsecond", 0),
100
+ )
101
+ except (AttributeError, ValueError):
102
+ pass
103
+
104
+ if interaction_time is None:
105
+ # Last resort: try converting to string and parsing
106
+ str_value = str(timestamp_value)
107
+ if str_value.isdigit():
108
+ ts_int = int(str_value)
109
+ if ts_int > 10000000000:
110
+ interaction_time = datetime.fromtimestamp(ts_int / 1000.0)
111
+ else:
112
+ interaction_time = datetime.fromtimestamp(ts_int)
113
+ else:
114
+ interaction_time = datetime.fromisoformat(str_value)
115
+
116
+ if interaction_time is None:
117
+ raise ValueError(f"Could not parse timestamp: {timestamp_value}")
118
+
119
+ # Make sure it's timezone-naive for comparison
120
+ if interaction_time.tzinfo is not None:
121
+ interaction_time = interaction_time.replace(tzinfo=None)
122
+
123
+ interaction_nodes[node_id] = {
124
+ "node": node,
125
+ "timestamp": interaction_time,
126
+ "in_window": interaction_time >= cutoff_time,
127
+ }
128
+ interaction_count += 1
129
+ if interaction_time >= cutoff_time:
130
+ interactions_in_window += 1
131
+ except (ValueError, TypeError, AttributeError, OSError) as e:
132
+ logger.warning(
133
+ f"Failed to parse timestamp for interaction node {node_id}: {e}"
134
+ )
135
+ logger.debug(
136
+ f"Timestamp value type: {type(timestamp_value)}, value: {timestamp_value}"
137
+ )
138
+
139
+ # Process edges to find graph elements used in interactions
140
+ for edge in subgraph.edges:
141
+ relationship_type = edge.attributes.get("relationship_type")
142
+
143
+ # Look for 'used_graph_element_to_answer' edges
144
+ if relationship_type == "used_graph_element_to_answer":
145
+ # node1 should be the CogneeUserInteraction, node2 is the graph element
146
+ source_id = str(edge.node1.id)
147
+ target_id = str(edge.node2.id)
148
+
149
+ # Check if source is an interaction node in our time window
150
+ if source_id in interaction_nodes:
151
+ interaction_data = interaction_nodes[source_id]
152
+
153
+ if interaction_data["in_window"]:
154
+ # Count the graph element (target node) being used
155
+ node_frequencies[target_id] = node_frequencies.get(target_id, 0) + 1
156
+
157
+ # Also track what type of element it is for analytics
158
+ target_node = subgraph.get_node(target_id)
159
+ if target_node:
160
+ element_type = target_node.attributes.get(
161
+ "type"
162
+ ) or target_node.attributes.get("node_type")
163
+ if element_type:
164
+ relationship_type_frequencies[element_type] = (
165
+ relationship_type_frequencies.get(element_type, 0) + 1
166
+ )
167
+
168
+ # Also track general edge usage patterns
169
+ elif relationship_type and relationship_type != "used_graph_element_to_answer":
170
+ # Check if either endpoint is referenced in a recent interaction
171
+ source_id = str(edge.node1.id)
172
+ target_id = str(edge.node2.id)
173
+
174
+ # If this edge connects to any frequently accessed nodes, track the edge type
175
+ if source_id in node_frequencies or target_id in node_frequencies:
176
+ edge_key = f"{relationship_type}:{source_id}:{target_id}"
177
+ edge_frequencies[edge_key] = edge_frequencies.get(edge_key, 0) + 1
178
+
179
+ # Filter frequencies above threshold
180
+ filtered_node_frequencies = {
181
+ node_id: freq
182
+ for node_id, freq in node_frequencies.items()
183
+ if freq >= min_interaction_threshold
184
+ }
185
+
186
+ filtered_edge_frequencies = {
187
+ edge_key: freq
188
+ for edge_key, freq in edge_frequencies.items()
189
+ if freq >= min_interaction_threshold
190
+ }
191
+
192
+ logger.info(
193
+ f"Processed {interactions_in_window}/{interaction_count} interactions in time window"
194
+ )
195
+ logger.info(
196
+ f"Found {len(filtered_node_frequencies)} nodes and {len(filtered_edge_frequencies)} edges "
197
+ f"above threshold (min: {min_interaction_threshold})"
198
+ )
199
+ logger.info(f"Element type distribution: {relationship_type_frequencies}")
200
+
201
+ return {
202
+ "node_frequencies": filtered_node_frequencies,
203
+ "edge_frequencies": filtered_edge_frequencies,
204
+ "element_type_frequencies": relationship_type_frequencies,
205
+ "total_interactions": interaction_count,
206
+ "interactions_in_window": interactions_in_window,
207
+ "time_window_days": time_window.days,
208
+ "last_processed_timestamp": current_time.isoformat(),
209
+ "cutoff_timestamp": cutoff_time.isoformat(),
210
+ }
211
+
212
+
213
+ async def add_frequency_weights(
214
+ graph_adapter: GraphDBInterface, usage_frequencies: Dict[str, Any]
215
+ ) -> None:
216
+ """
217
+ Add frequency weights to graph nodes and edges using the graph adapter.
218
+
219
+ Uses direct Cypher queries for Neo4j adapter compatibility.
220
+ Writes frequency_weight properties back to the graph for use in:
221
+ - Ranking frequently referenced entities higher during retrieval
222
+ - Adjusting scoring for completion strategies
223
+ - Exposing usage metrics in dashboards or audits
224
+
225
+ :param graph_adapter: Graph database adapter interface
226
+ :param usage_frequencies: Calculated usage frequencies from extract_usage_frequency
227
+ """
228
+ node_frequencies = usage_frequencies.get("node_frequencies", {})
229
+ edge_frequencies = usage_frequencies.get("edge_frequencies", {})
230
+
231
+ logger.info(f"Adding frequency weights to {len(node_frequencies)} nodes")
232
+
233
+ # Check adapter type and use appropriate method
234
+ adapter_type = type(graph_adapter).__name__
235
+ logger.info(f"Using adapter: {adapter_type}")
236
+
237
+ nodes_updated = 0
238
+ nodes_failed = 0
239
+
240
+ # Determine which method to use based on adapter type
241
+ use_neo4j_cypher = adapter_type == "Neo4jAdapter" and hasattr(graph_adapter, "query")
242
+ use_kuzu_query = adapter_type == "KuzuAdapter" and hasattr(graph_adapter, "query")
243
+ use_get_update = hasattr(graph_adapter, "get_node_by_id") and hasattr(
244
+ graph_adapter, "update_node_properties"
245
+ )
246
+
247
+ # Method 1: Neo4j Cypher with SET (creates properties on the fly)
248
+ if use_neo4j_cypher:
249
+ try:
250
+ logger.info("Using Neo4j Cypher SET method")
251
+ last_updated = usage_frequencies.get("last_processed_timestamp")
252
+
253
+ for node_id, frequency in node_frequencies.items():
254
+ try:
255
+ query = """
256
+ MATCH (n)
257
+ WHERE n.id = $node_id
258
+ SET n.frequency_weight = $frequency,
259
+ n.frequency_updated_at = $updated_at
260
+ RETURN n.id as id
261
+ """
262
+
263
+ result = await graph_adapter.query(
264
+ query,
265
+ params={
266
+ "node_id": node_id,
267
+ "frequency": frequency,
268
+ "updated_at": last_updated,
269
+ },
270
+ )
271
+
272
+ if result and len(result) > 0:
273
+ nodes_updated += 1
274
+ else:
275
+ logger.warning(f"Node {node_id} not found or not updated")
276
+ nodes_failed += 1
277
+
278
+ except Exception as e:
279
+ logger.error(f"Error updating node {node_id}: {e}")
280
+ nodes_failed += 1
281
+
282
+ logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
283
+
284
+ except Exception as e:
285
+ logger.error(f"Neo4j Cypher update failed: {e}")
286
+ use_neo4j_cypher = False
287
+
288
+ # Method 2: Kuzu - use get_node + add_node (updates via re-adding with same ID)
289
+ elif (
290
+ use_kuzu_query and hasattr(graph_adapter, "get_node") and hasattr(graph_adapter, "add_node")
291
+ ):
292
+ logger.info("Using Kuzu get_node + add_node method")
293
+ last_updated = usage_frequencies.get("last_processed_timestamp")
294
+
295
+ for node_id, frequency in node_frequencies.items():
296
+ try:
297
+ # Get the existing node (returns a dict)
298
+ existing_node_dict = await graph_adapter.get_node(node_id)
299
+
300
+ if existing_node_dict:
301
+ # Update the dict with new properties
302
+ existing_node_dict["frequency_weight"] = frequency
303
+ existing_node_dict["frequency_updated_at"] = last_updated
304
+
305
+ # Kuzu's add_node likely just takes the dict directly, not a Node object
306
+ # Try passing the dict directly first
307
+ try:
308
+ await graph_adapter.add_node(existing_node_dict)
309
+ nodes_updated += 1
310
+ except Exception as dict_error:
311
+ # If dict doesn't work, try creating a Node object
312
+ logger.debug(f"Dict add failed, trying Node object: {dict_error}")
313
+
314
+ try:
315
+ from cognee.infrastructure.engine import Node
316
+
317
+ # Try different Node constructor patterns
318
+ try:
319
+ # Pattern 1: Just properties
320
+ node_obj = Node(existing_node_dict)
321
+ except Exception:
322
+ # Pattern 2: Type and properties
323
+ node_obj = Node(
324
+ type=existing_node_dict.get("type", "Unknown"),
325
+ **existing_node_dict,
326
+ )
327
+
328
+ await graph_adapter.add_node(node_obj)
329
+ nodes_updated += 1
330
+ except Exception as node_error:
331
+ logger.error(f"Both dict and Node object failed: {node_error}")
332
+ nodes_failed += 1
333
+ else:
334
+ logger.warning(f"Node {node_id} not found in graph")
335
+ nodes_failed += 1
336
+
337
+ except Exception as e:
338
+ logger.error(f"Error updating node {node_id}: {e}")
339
+ nodes_failed += 1
340
+
341
+ logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
342
+
343
+ # Method 3: Generic get_node_by_id + update_node_properties
344
+ elif use_get_update:
345
+ logger.info("Using get/update method for adapter")
346
+ for node_id, frequency in node_frequencies.items():
347
+ try:
348
+ # Get current node data
349
+ node_data = await graph_adapter.get_node_by_id(node_id)
350
+
351
+ if node_data:
352
+ # Tweak the properties dict - add frequency_weight
353
+ if isinstance(node_data, dict):
354
+ properties = node_data.get("properties", {})
355
+ else:
356
+ properties = getattr(node_data, "properties", {}) or {}
357
+
358
+ # Update with frequency weight
359
+ properties["frequency_weight"] = frequency
360
+ properties["frequency_updated_at"] = usage_frequencies.get(
361
+ "last_processed_timestamp"
362
+ )
363
+
364
+ # Write back via adapter
365
+ await graph_adapter.update_node_properties(node_id, properties)
366
+ nodes_updated += 1
367
+ else:
368
+ logger.warning(f"Node {node_id} not found in graph")
369
+ nodes_failed += 1
370
+
371
+ except Exception as e:
372
+ logger.error(f"Error updating node {node_id}: {e}")
373
+ nodes_failed += 1
374
+
375
+ logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
376
+ for node_id, frequency in node_frequencies.items():
377
+ try:
378
+ # Get current node data
379
+ node_data = await graph_adapter.get_node_by_id(node_id)
380
+
381
+ if node_data:
382
+ # Tweak the properties dict - add frequency_weight
383
+ if isinstance(node_data, dict):
384
+ properties = node_data.get("properties", {})
385
+ else:
386
+ properties = getattr(node_data, "properties", {}) or {}
387
+
388
+ # Update with frequency weight
389
+ properties["frequency_weight"] = frequency
390
+ properties["frequency_updated_at"] = usage_frequencies.get(
391
+ "last_processed_timestamp"
392
+ )
393
+
394
+ # Write back via adapter
395
+ await graph_adapter.update_node_properties(node_id, properties)
396
+ nodes_updated += 1
397
+ else:
398
+ logger.warning(f"Node {node_id} not found in graph")
399
+ nodes_failed += 1
400
+
401
+ except Exception as e:
402
+ logger.error(f"Error updating node {node_id}: {e}")
403
+ nodes_failed += 1
404
+
405
+ # If no method is available
406
+ if not use_neo4j_cypher and not use_kuzu_query and not use_get_update:
407
+ logger.error(f"Adapter {adapter_type} does not support required update methods")
408
+ logger.error(
409
+ "Required: either 'query' method or both 'get_node_by_id' and 'update_node_properties'"
410
+ )
411
+ return
412
+
413
+ # Update edge frequencies
414
+ # Note: Edge property updates are backend-specific
415
+ if edge_frequencies:
416
+ logger.info(f"Processing {len(edge_frequencies)} edge frequency entries")
417
+
418
+ edges_updated = 0
419
+ edges_failed = 0
420
+
421
+ for edge_key, frequency in edge_frequencies.items():
422
+ try:
423
+ # Parse edge key: "relationship_type:source_id:target_id"
424
+ parts = edge_key.split(":", 2)
425
+ if len(parts) == 3:
426
+ relationship_type, source_id, target_id = parts
427
+
428
+ # Try to update edge if adapter supports it
429
+ if hasattr(graph_adapter, "update_edge_properties"):
430
+ edge_properties = {
431
+ "frequency_weight": frequency,
432
+ "frequency_updated_at": usage_frequencies.get(
433
+ "last_processed_timestamp"
434
+ ),
435
+ }
436
+
437
+ await graph_adapter.update_edge_properties(
438
+ source_id, target_id, relationship_type, edge_properties
439
+ )
440
+ edges_updated += 1
441
+ else:
442
+ # Fallback: store in metadata or log
443
+ logger.debug(
444
+ f"Adapter doesn't support update_edge_properties for "
445
+ f"{relationship_type} ({source_id} -> {target_id})"
446
+ )
447
+
448
+ except Exception as e:
449
+ logger.error(f"Error updating edge {edge_key}: {e}")
450
+ edges_failed += 1
451
+
452
+ if edges_updated > 0:
453
+ logger.info(f"Edge update complete: {edges_updated} succeeded, {edges_failed} failed")
454
+ else:
455
+ logger.info(
456
+ "Edge frequency updates skipped (adapter may not support edge property updates)"
457
+ )
458
+
459
+ # Store aggregate statistics as metadata if supported
460
+ if hasattr(graph_adapter, "set_metadata"):
461
+ try:
462
+ metadata = {
463
+ "element_type_frequencies": usage_frequencies.get("element_type_frequencies", {}),
464
+ "total_interactions": usage_frequencies.get("total_interactions", 0),
465
+ "interactions_in_window": usage_frequencies.get("interactions_in_window", 0),
466
+ "last_frequency_update": usage_frequencies.get("last_processed_timestamp"),
467
+ }
468
+ await graph_adapter.set_metadata("usage_frequency_stats", metadata)
469
+ logger.info("Stored usage frequency statistics as metadata")
470
+ except Exception as e:
471
+ logger.warning(f"Could not store usage statistics as metadata: {e}")
472
+
473
+
474
+ async def create_usage_frequency_pipeline(
475
+ graph_adapter: GraphDBInterface,
476
+ time_window: timedelta = timedelta(days=7),
477
+ min_interaction_threshold: int = 1,
478
+ batch_size: int = 100,
479
+ ) -> tuple:
480
+ """
481
+ Create memify pipeline entry for usage frequency tracking.
482
+
483
+ This follows the same pattern as feedback enrichment flows, allowing
484
+ the frequency update to run end-to-end in a custom memify pipeline.
485
+
486
+ Use case example:
487
+ extraction_tasks, enrichment_tasks = await create_usage_frequency_pipeline(
488
+ graph_adapter=my_adapter,
489
+ time_window=timedelta(days=30),
490
+ min_interaction_threshold=2
491
+ )
492
+
493
+ # Run in memify pipeline
494
+ pipeline = Pipeline(extraction_tasks + enrichment_tasks)
495
+ results = await pipeline.run()
496
+
497
+ :param graph_adapter: Graph database adapter
498
+ :param time_window: Time window for counting interactions (default: 7 days)
499
+ :param min_interaction_threshold: Minimum interactions to track (default: 1)
500
+ :param batch_size: Batch size for processing (default: 100)
501
+ :return: Tuple of (extraction_tasks, enrichment_tasks)
502
+ """
503
+ logger.info("Creating usage frequency pipeline")
504
+ logger.info(f"Config: time_window={time_window}, threshold={min_interaction_threshold}")
505
+
506
+ extraction_tasks = [
507
+ Task(
508
+ extract_usage_frequency,
509
+ time_window=time_window,
510
+ min_interaction_threshold=min_interaction_threshold,
511
+ )
512
+ ]
513
+
514
+ enrichment_tasks = [
515
+ Task(
516
+ add_frequency_weights,
517
+ graph_adapter=graph_adapter,
518
+ task_config={"batch_size": batch_size},
519
+ )
520
+ ]
521
+
522
+ return extraction_tasks, enrichment_tasks
523
+
524
+
525
+ async def run_usage_frequency_update(
526
+ graph_adapter: GraphDBInterface,
527
+ subgraphs: List[CogneeGraph],
528
+ time_window: timedelta = timedelta(days=7),
529
+ min_interaction_threshold: int = 1,
530
+ ) -> Dict[str, Any]:
531
+ """
532
+ Convenience function to run the complete usage frequency update pipeline.
533
+
534
+ This is the main entry point for updating frequency weights on graph elements
535
+ based on CogneeUserInteraction data from cognee.search(save_interaction=True).
536
+
537
+ Example usage:
538
+ # After running searches with save_interaction=True
539
+ from cognee.tasks.memify.extract_usage_frequency import run_usage_frequency_update
540
+
541
+ # Get the graph with interactions
542
+ graph = await get_cognee_graph_with_interactions()
543
+
544
+ # Update frequency weights
545
+ stats = await run_usage_frequency_update(
546
+ graph_adapter=graph_adapter,
547
+ subgraphs=[graph],
548
+ time_window=timedelta(days=30), # Last 30 days
549
+ min_interaction_threshold=2 # At least 2 uses
550
+ )
551
+
552
+ print(f"Updated {len(stats['node_frequencies'])} nodes")
553
+
554
+ :param graph_adapter: Graph database adapter
555
+ :param subgraphs: List of CogneeGraph instances with interaction data
556
+ :param time_window: Time window for counting interactions
557
+ :param min_interaction_threshold: Minimum interactions to track
558
+ :return: Usage frequency statistics
559
+ """
560
+ logger.info("Starting usage frequency update")
561
+
562
+ try:
563
+ # Extract frequencies from interaction data
564
+ usage_frequencies = await extract_usage_frequency(
565
+ subgraphs=subgraphs,
566
+ time_window=time_window,
567
+ min_interaction_threshold=min_interaction_threshold,
568
+ )
569
+
570
+ # Add frequency weights back to the graph
571
+ await add_frequency_weights(
572
+ graph_adapter=graph_adapter, usage_frequencies=usage_frequencies
573
+ )
574
+
575
+ logger.info("Usage frequency update completed successfully")
576
+ logger.info(
577
+ f"Summary: {usage_frequencies['interactions_in_window']} interactions processed, "
578
+ f"{len(usage_frequencies['node_frequencies'])} nodes weighted"
579
+ )
580
+
581
+ return usage_frequencies
582
+
583
+ except Exception as e:
584
+ logger.error(f"Error during usage frequency update: {str(e)}")
585
+ raise
586
+
587
+
588
+ async def get_most_frequent_elements(
589
+ graph_adapter: GraphDBInterface, top_n: int = 10, element_type: Optional[str] = None
590
+ ) -> List[Dict[str, Any]]:
591
+ """
592
+ Retrieve the most frequently accessed graph elements.
593
+
594
+ Useful for analytics dashboards and understanding user behavior.
595
+
596
+ :param graph_adapter: Graph database adapter
597
+ :param top_n: Number of top elements to return
598
+ :param element_type: Optional filter by element type
599
+ :return: List of elements with their frequency weights
600
+ """
601
+ logger.info(f"Retrieving top {top_n} most frequent elements")
602
+
603
+ # This would need to be implemented based on the specific graph adapter's query capabilities
604
+ # Pseudocode:
605
+ # results = await graph_adapter.query_nodes_by_property(
606
+ # property_name='frequency_weight',
607
+ # order_by='DESC',
608
+ # limit=top_n,
609
+ # filters={'type': element_type} if element_type else None
610
+ # )
611
+
612
+ logger.warning("get_most_frequent_elements needs adapter-specific implementation")
613
+ return []
@@ -1,5 +1,4 @@
1
1
  from typing import Union
2
-
3
2
  from cognee.infrastructure.engine import DataPoint
4
3
  from cognee.modules.chunking.models import DocumentChunk
5
4
  from cognee.shared.CodeGraphEntities import CodeFile, CodePart
@@ -17,7 +16,6 @@ class TextSummary(DataPoint):
17
16
 
18
17
  text: str
19
18
  made_from: DocumentChunk
20
-
21
19
  metadata: dict = {"index_fields": ["text"]}
22
20
 
23
21
 
@@ -1 +0,0 @@
1
-