cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
- cognee/api/v1/memify/routers/get_memify_router.py +2 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +25 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +1 -0
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +31 -32
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -215
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
- cognee/tests/integration/retrieval/test_structured_output.py +62 -18
- cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
- cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
- cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +97 -110
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +176 -0
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
# cognee/tasks/memify/extract_usage_frequency.py
|
|
2
|
+
from typing import List, Dict, Any, Optional
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from cognee.shared.logging_utils import get_logger
|
|
5
|
+
from cognee.modules.graph.cognee_graph.CogneeGraph import CogneeGraph
|
|
6
|
+
from cognee.modules.pipelines.tasks.task import Task
|
|
7
|
+
from cognee.infrastructure.databases.graph.graph_db_interface import GraphDBInterface
|
|
8
|
+
|
|
9
|
+
logger = get_logger("extract_usage_frequency")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def extract_usage_frequency(
|
|
13
|
+
subgraphs: List[CogneeGraph],
|
|
14
|
+
time_window: timedelta = timedelta(days=7),
|
|
15
|
+
min_interaction_threshold: int = 1,
|
|
16
|
+
) -> Dict[str, Any]:
|
|
17
|
+
"""
|
|
18
|
+
Extract usage frequency from CogneeUserInteraction nodes.
|
|
19
|
+
|
|
20
|
+
When save_interaction=True in cognee.search(), the system creates:
|
|
21
|
+
- CogneeUserInteraction nodes (representing the query/answer interaction)
|
|
22
|
+
- used_graph_element_to_answer edges (connecting interactions to graph elements used)
|
|
23
|
+
|
|
24
|
+
This function tallies how often each graph element is referenced via these edges,
|
|
25
|
+
enabling frequency-based ranking in downstream retrievers.
|
|
26
|
+
|
|
27
|
+
:param subgraphs: List of CogneeGraph instances containing interaction data
|
|
28
|
+
:param time_window: Time window to consider for interactions (default: 7 days)
|
|
29
|
+
:param min_interaction_threshold: Minimum interactions to track (default: 1)
|
|
30
|
+
:return: Dictionary containing node frequencies, edge frequencies, and metadata
|
|
31
|
+
"""
|
|
32
|
+
current_time = datetime.now()
|
|
33
|
+
cutoff_time = current_time - time_window
|
|
34
|
+
|
|
35
|
+
# Track frequencies for graph elements (nodes and edges)
|
|
36
|
+
node_frequencies = {}
|
|
37
|
+
edge_frequencies = {}
|
|
38
|
+
relationship_type_frequencies = {}
|
|
39
|
+
|
|
40
|
+
# Track interaction metadata
|
|
41
|
+
interaction_count = 0
|
|
42
|
+
interactions_in_window = 0
|
|
43
|
+
|
|
44
|
+
logger.info(f"Extracting usage frequencies from {len(subgraphs)} subgraphs")
|
|
45
|
+
logger.info(f"Time window: {time_window}, Cutoff: {cutoff_time.isoformat()}")
|
|
46
|
+
|
|
47
|
+
for subgraph in subgraphs:
|
|
48
|
+
# Find all CogneeUserInteraction nodes
|
|
49
|
+
interaction_nodes = {}
|
|
50
|
+
for node_id, node in subgraph.nodes.items():
|
|
51
|
+
node_type = node.attributes.get("type") or node.attributes.get("node_type")
|
|
52
|
+
|
|
53
|
+
if node_type == "CogneeUserInteraction":
|
|
54
|
+
# Parse and validate timestamp
|
|
55
|
+
timestamp_value = node.attributes.get("timestamp") or node.attributes.get(
|
|
56
|
+
"created_at"
|
|
57
|
+
)
|
|
58
|
+
if timestamp_value is not None:
|
|
59
|
+
try:
|
|
60
|
+
# Handle various timestamp formats
|
|
61
|
+
interaction_time = None
|
|
62
|
+
|
|
63
|
+
if isinstance(timestamp_value, datetime):
|
|
64
|
+
# Already a Python datetime
|
|
65
|
+
interaction_time = timestamp_value
|
|
66
|
+
elif isinstance(timestamp_value, (int, float)):
|
|
67
|
+
# Unix timestamp (assume milliseconds if > 10 digits)
|
|
68
|
+
if timestamp_value > 10000000000:
|
|
69
|
+
# Milliseconds since epoch
|
|
70
|
+
interaction_time = datetime.fromtimestamp(timestamp_value / 1000.0)
|
|
71
|
+
else:
|
|
72
|
+
# Seconds since epoch
|
|
73
|
+
interaction_time = datetime.fromtimestamp(timestamp_value)
|
|
74
|
+
elif isinstance(timestamp_value, str):
|
|
75
|
+
# Try different string formats
|
|
76
|
+
if timestamp_value.isdigit():
|
|
77
|
+
# Numeric string - treat as Unix timestamp
|
|
78
|
+
ts_int = int(timestamp_value)
|
|
79
|
+
if ts_int > 10000000000:
|
|
80
|
+
interaction_time = datetime.fromtimestamp(ts_int / 1000.0)
|
|
81
|
+
else:
|
|
82
|
+
interaction_time = datetime.fromtimestamp(ts_int)
|
|
83
|
+
else:
|
|
84
|
+
# ISO format string
|
|
85
|
+
interaction_time = datetime.fromisoformat(timestamp_value)
|
|
86
|
+
elif hasattr(timestamp_value, "to_native"):
|
|
87
|
+
# Neo4j datetime object - convert to Python datetime
|
|
88
|
+
interaction_time = timestamp_value.to_native()
|
|
89
|
+
elif hasattr(timestamp_value, "year") and hasattr(timestamp_value, "month"):
|
|
90
|
+
# Datetime-like object - extract components
|
|
91
|
+
try:
|
|
92
|
+
interaction_time = datetime(
|
|
93
|
+
year=timestamp_value.year,
|
|
94
|
+
month=timestamp_value.month,
|
|
95
|
+
day=timestamp_value.day,
|
|
96
|
+
hour=getattr(timestamp_value, "hour", 0),
|
|
97
|
+
minute=getattr(timestamp_value, "minute", 0),
|
|
98
|
+
second=getattr(timestamp_value, "second", 0),
|
|
99
|
+
microsecond=getattr(timestamp_value, "microsecond", 0),
|
|
100
|
+
)
|
|
101
|
+
except (AttributeError, ValueError):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
if interaction_time is None:
|
|
105
|
+
# Last resort: try converting to string and parsing
|
|
106
|
+
str_value = str(timestamp_value)
|
|
107
|
+
if str_value.isdigit():
|
|
108
|
+
ts_int = int(str_value)
|
|
109
|
+
if ts_int > 10000000000:
|
|
110
|
+
interaction_time = datetime.fromtimestamp(ts_int / 1000.0)
|
|
111
|
+
else:
|
|
112
|
+
interaction_time = datetime.fromtimestamp(ts_int)
|
|
113
|
+
else:
|
|
114
|
+
interaction_time = datetime.fromisoformat(str_value)
|
|
115
|
+
|
|
116
|
+
if interaction_time is None:
|
|
117
|
+
raise ValueError(f"Could not parse timestamp: {timestamp_value}")
|
|
118
|
+
|
|
119
|
+
# Make sure it's timezone-naive for comparison
|
|
120
|
+
if interaction_time.tzinfo is not None:
|
|
121
|
+
interaction_time = interaction_time.replace(tzinfo=None)
|
|
122
|
+
|
|
123
|
+
interaction_nodes[node_id] = {
|
|
124
|
+
"node": node,
|
|
125
|
+
"timestamp": interaction_time,
|
|
126
|
+
"in_window": interaction_time >= cutoff_time,
|
|
127
|
+
}
|
|
128
|
+
interaction_count += 1
|
|
129
|
+
if interaction_time >= cutoff_time:
|
|
130
|
+
interactions_in_window += 1
|
|
131
|
+
except (ValueError, TypeError, AttributeError, OSError) as e:
|
|
132
|
+
logger.warning(
|
|
133
|
+
f"Failed to parse timestamp for interaction node {node_id}: {e}"
|
|
134
|
+
)
|
|
135
|
+
logger.debug(
|
|
136
|
+
f"Timestamp value type: {type(timestamp_value)}, value: {timestamp_value}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Process edges to find graph elements used in interactions
|
|
140
|
+
for edge in subgraph.edges:
|
|
141
|
+
relationship_type = edge.attributes.get("relationship_type")
|
|
142
|
+
|
|
143
|
+
# Look for 'used_graph_element_to_answer' edges
|
|
144
|
+
if relationship_type == "used_graph_element_to_answer":
|
|
145
|
+
# node1 should be the CogneeUserInteraction, node2 is the graph element
|
|
146
|
+
source_id = str(edge.node1.id)
|
|
147
|
+
target_id = str(edge.node2.id)
|
|
148
|
+
|
|
149
|
+
# Check if source is an interaction node in our time window
|
|
150
|
+
if source_id in interaction_nodes:
|
|
151
|
+
interaction_data = interaction_nodes[source_id]
|
|
152
|
+
|
|
153
|
+
if interaction_data["in_window"]:
|
|
154
|
+
# Count the graph element (target node) being used
|
|
155
|
+
node_frequencies[target_id] = node_frequencies.get(target_id, 0) + 1
|
|
156
|
+
|
|
157
|
+
# Also track what type of element it is for analytics
|
|
158
|
+
target_node = subgraph.get_node(target_id)
|
|
159
|
+
if target_node:
|
|
160
|
+
element_type = target_node.attributes.get(
|
|
161
|
+
"type"
|
|
162
|
+
) or target_node.attributes.get("node_type")
|
|
163
|
+
if element_type:
|
|
164
|
+
relationship_type_frequencies[element_type] = (
|
|
165
|
+
relationship_type_frequencies.get(element_type, 0) + 1
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Also track general edge usage patterns
|
|
169
|
+
elif relationship_type and relationship_type != "used_graph_element_to_answer":
|
|
170
|
+
# Check if either endpoint is referenced in a recent interaction
|
|
171
|
+
source_id = str(edge.node1.id)
|
|
172
|
+
target_id = str(edge.node2.id)
|
|
173
|
+
|
|
174
|
+
# If this edge connects to any frequently accessed nodes, track the edge type
|
|
175
|
+
if source_id in node_frequencies or target_id in node_frequencies:
|
|
176
|
+
edge_key = f"{relationship_type}:{source_id}:{target_id}"
|
|
177
|
+
edge_frequencies[edge_key] = edge_frequencies.get(edge_key, 0) + 1
|
|
178
|
+
|
|
179
|
+
# Filter frequencies above threshold
|
|
180
|
+
filtered_node_frequencies = {
|
|
181
|
+
node_id: freq
|
|
182
|
+
for node_id, freq in node_frequencies.items()
|
|
183
|
+
if freq >= min_interaction_threshold
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
filtered_edge_frequencies = {
|
|
187
|
+
edge_key: freq
|
|
188
|
+
for edge_key, freq in edge_frequencies.items()
|
|
189
|
+
if freq >= min_interaction_threshold
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
logger.info(
|
|
193
|
+
f"Processed {interactions_in_window}/{interaction_count} interactions in time window"
|
|
194
|
+
)
|
|
195
|
+
logger.info(
|
|
196
|
+
f"Found {len(filtered_node_frequencies)} nodes and {len(filtered_edge_frequencies)} edges "
|
|
197
|
+
f"above threshold (min: {min_interaction_threshold})"
|
|
198
|
+
)
|
|
199
|
+
logger.info(f"Element type distribution: {relationship_type_frequencies}")
|
|
200
|
+
|
|
201
|
+
return {
|
|
202
|
+
"node_frequencies": filtered_node_frequencies,
|
|
203
|
+
"edge_frequencies": filtered_edge_frequencies,
|
|
204
|
+
"element_type_frequencies": relationship_type_frequencies,
|
|
205
|
+
"total_interactions": interaction_count,
|
|
206
|
+
"interactions_in_window": interactions_in_window,
|
|
207
|
+
"time_window_days": time_window.days,
|
|
208
|
+
"last_processed_timestamp": current_time.isoformat(),
|
|
209
|
+
"cutoff_timestamp": cutoff_time.isoformat(),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
async def add_frequency_weights(
|
|
214
|
+
graph_adapter: GraphDBInterface, usage_frequencies: Dict[str, Any]
|
|
215
|
+
) -> None:
|
|
216
|
+
"""
|
|
217
|
+
Add frequency weights to graph nodes and edges using the graph adapter.
|
|
218
|
+
|
|
219
|
+
Uses direct Cypher queries for Neo4j adapter compatibility.
|
|
220
|
+
Writes frequency_weight properties back to the graph for use in:
|
|
221
|
+
- Ranking frequently referenced entities higher during retrieval
|
|
222
|
+
- Adjusting scoring for completion strategies
|
|
223
|
+
- Exposing usage metrics in dashboards or audits
|
|
224
|
+
|
|
225
|
+
:param graph_adapter: Graph database adapter interface
|
|
226
|
+
:param usage_frequencies: Calculated usage frequencies from extract_usage_frequency
|
|
227
|
+
"""
|
|
228
|
+
node_frequencies = usage_frequencies.get("node_frequencies", {})
|
|
229
|
+
edge_frequencies = usage_frequencies.get("edge_frequencies", {})
|
|
230
|
+
|
|
231
|
+
logger.info(f"Adding frequency weights to {len(node_frequencies)} nodes")
|
|
232
|
+
|
|
233
|
+
# Check adapter type and use appropriate method
|
|
234
|
+
adapter_type = type(graph_adapter).__name__
|
|
235
|
+
logger.info(f"Using adapter: {adapter_type}")
|
|
236
|
+
|
|
237
|
+
nodes_updated = 0
|
|
238
|
+
nodes_failed = 0
|
|
239
|
+
|
|
240
|
+
# Determine which method to use based on adapter type
|
|
241
|
+
use_neo4j_cypher = adapter_type == "Neo4jAdapter" and hasattr(graph_adapter, "query")
|
|
242
|
+
use_kuzu_query = adapter_type == "KuzuAdapter" and hasattr(graph_adapter, "query")
|
|
243
|
+
use_get_update = hasattr(graph_adapter, "get_node_by_id") and hasattr(
|
|
244
|
+
graph_adapter, "update_node_properties"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
# Method 1: Neo4j Cypher with SET (creates properties on the fly)
|
|
248
|
+
if use_neo4j_cypher:
|
|
249
|
+
try:
|
|
250
|
+
logger.info("Using Neo4j Cypher SET method")
|
|
251
|
+
last_updated = usage_frequencies.get("last_processed_timestamp")
|
|
252
|
+
|
|
253
|
+
for node_id, frequency in node_frequencies.items():
|
|
254
|
+
try:
|
|
255
|
+
query = """
|
|
256
|
+
MATCH (n)
|
|
257
|
+
WHERE n.id = $node_id
|
|
258
|
+
SET n.frequency_weight = $frequency,
|
|
259
|
+
n.frequency_updated_at = $updated_at
|
|
260
|
+
RETURN n.id as id
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
result = await graph_adapter.query(
|
|
264
|
+
query,
|
|
265
|
+
params={
|
|
266
|
+
"node_id": node_id,
|
|
267
|
+
"frequency": frequency,
|
|
268
|
+
"updated_at": last_updated,
|
|
269
|
+
},
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if result and len(result) > 0:
|
|
273
|
+
nodes_updated += 1
|
|
274
|
+
else:
|
|
275
|
+
logger.warning(f"Node {node_id} not found or not updated")
|
|
276
|
+
nodes_failed += 1
|
|
277
|
+
|
|
278
|
+
except Exception as e:
|
|
279
|
+
logger.error(f"Error updating node {node_id}: {e}")
|
|
280
|
+
nodes_failed += 1
|
|
281
|
+
|
|
282
|
+
logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
|
|
283
|
+
|
|
284
|
+
except Exception as e:
|
|
285
|
+
logger.error(f"Neo4j Cypher update failed: {e}")
|
|
286
|
+
use_neo4j_cypher = False
|
|
287
|
+
|
|
288
|
+
# Method 2: Kuzu - use get_node + add_node (updates via re-adding with same ID)
|
|
289
|
+
elif (
|
|
290
|
+
use_kuzu_query and hasattr(graph_adapter, "get_node") and hasattr(graph_adapter, "add_node")
|
|
291
|
+
):
|
|
292
|
+
logger.info("Using Kuzu get_node + add_node method")
|
|
293
|
+
last_updated = usage_frequencies.get("last_processed_timestamp")
|
|
294
|
+
|
|
295
|
+
for node_id, frequency in node_frequencies.items():
|
|
296
|
+
try:
|
|
297
|
+
# Get the existing node (returns a dict)
|
|
298
|
+
existing_node_dict = await graph_adapter.get_node(node_id)
|
|
299
|
+
|
|
300
|
+
if existing_node_dict:
|
|
301
|
+
# Update the dict with new properties
|
|
302
|
+
existing_node_dict["frequency_weight"] = frequency
|
|
303
|
+
existing_node_dict["frequency_updated_at"] = last_updated
|
|
304
|
+
|
|
305
|
+
# Kuzu's add_node likely just takes the dict directly, not a Node object
|
|
306
|
+
# Try passing the dict directly first
|
|
307
|
+
try:
|
|
308
|
+
await graph_adapter.add_node(existing_node_dict)
|
|
309
|
+
nodes_updated += 1
|
|
310
|
+
except Exception as dict_error:
|
|
311
|
+
# If dict doesn't work, try creating a Node object
|
|
312
|
+
logger.debug(f"Dict add failed, trying Node object: {dict_error}")
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
from cognee.infrastructure.engine import Node
|
|
316
|
+
|
|
317
|
+
# Try different Node constructor patterns
|
|
318
|
+
try:
|
|
319
|
+
# Pattern 1: Just properties
|
|
320
|
+
node_obj = Node(existing_node_dict)
|
|
321
|
+
except Exception:
|
|
322
|
+
# Pattern 2: Type and properties
|
|
323
|
+
node_obj = Node(
|
|
324
|
+
type=existing_node_dict.get("type", "Unknown"),
|
|
325
|
+
**existing_node_dict,
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
await graph_adapter.add_node(node_obj)
|
|
329
|
+
nodes_updated += 1
|
|
330
|
+
except Exception as node_error:
|
|
331
|
+
logger.error(f"Both dict and Node object failed: {node_error}")
|
|
332
|
+
nodes_failed += 1
|
|
333
|
+
else:
|
|
334
|
+
logger.warning(f"Node {node_id} not found in graph")
|
|
335
|
+
nodes_failed += 1
|
|
336
|
+
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.error(f"Error updating node {node_id}: {e}")
|
|
339
|
+
nodes_failed += 1
|
|
340
|
+
|
|
341
|
+
logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
|
|
342
|
+
|
|
343
|
+
# Method 3: Generic get_node_by_id + update_node_properties
|
|
344
|
+
elif use_get_update:
|
|
345
|
+
logger.info("Using get/update method for adapter")
|
|
346
|
+
for node_id, frequency in node_frequencies.items():
|
|
347
|
+
try:
|
|
348
|
+
# Get current node data
|
|
349
|
+
node_data = await graph_adapter.get_node_by_id(node_id)
|
|
350
|
+
|
|
351
|
+
if node_data:
|
|
352
|
+
# Tweak the properties dict - add frequency_weight
|
|
353
|
+
if isinstance(node_data, dict):
|
|
354
|
+
properties = node_data.get("properties", {})
|
|
355
|
+
else:
|
|
356
|
+
properties = getattr(node_data, "properties", {}) or {}
|
|
357
|
+
|
|
358
|
+
# Update with frequency weight
|
|
359
|
+
properties["frequency_weight"] = frequency
|
|
360
|
+
properties["frequency_updated_at"] = usage_frequencies.get(
|
|
361
|
+
"last_processed_timestamp"
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Write back via adapter
|
|
365
|
+
await graph_adapter.update_node_properties(node_id, properties)
|
|
366
|
+
nodes_updated += 1
|
|
367
|
+
else:
|
|
368
|
+
logger.warning(f"Node {node_id} not found in graph")
|
|
369
|
+
nodes_failed += 1
|
|
370
|
+
|
|
371
|
+
except Exception as e:
|
|
372
|
+
logger.error(f"Error updating node {node_id}: {e}")
|
|
373
|
+
nodes_failed += 1
|
|
374
|
+
|
|
375
|
+
logger.info(f"Node update complete: {nodes_updated} succeeded, {nodes_failed} failed")
|
|
376
|
+
for node_id, frequency in node_frequencies.items():
|
|
377
|
+
try:
|
|
378
|
+
# Get current node data
|
|
379
|
+
node_data = await graph_adapter.get_node_by_id(node_id)
|
|
380
|
+
|
|
381
|
+
if node_data:
|
|
382
|
+
# Tweak the properties dict - add frequency_weight
|
|
383
|
+
if isinstance(node_data, dict):
|
|
384
|
+
properties = node_data.get("properties", {})
|
|
385
|
+
else:
|
|
386
|
+
properties = getattr(node_data, "properties", {}) or {}
|
|
387
|
+
|
|
388
|
+
# Update with frequency weight
|
|
389
|
+
properties["frequency_weight"] = frequency
|
|
390
|
+
properties["frequency_updated_at"] = usage_frequencies.get(
|
|
391
|
+
"last_processed_timestamp"
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Write back via adapter
|
|
395
|
+
await graph_adapter.update_node_properties(node_id, properties)
|
|
396
|
+
nodes_updated += 1
|
|
397
|
+
else:
|
|
398
|
+
logger.warning(f"Node {node_id} not found in graph")
|
|
399
|
+
nodes_failed += 1
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.error(f"Error updating node {node_id}: {e}")
|
|
403
|
+
nodes_failed += 1
|
|
404
|
+
|
|
405
|
+
# If no method is available
|
|
406
|
+
if not use_neo4j_cypher and not use_kuzu_query and not use_get_update:
|
|
407
|
+
logger.error(f"Adapter {adapter_type} does not support required update methods")
|
|
408
|
+
logger.error(
|
|
409
|
+
"Required: either 'query' method or both 'get_node_by_id' and 'update_node_properties'"
|
|
410
|
+
)
|
|
411
|
+
return
|
|
412
|
+
|
|
413
|
+
# Update edge frequencies
|
|
414
|
+
# Note: Edge property updates are backend-specific
|
|
415
|
+
if edge_frequencies:
|
|
416
|
+
logger.info(f"Processing {len(edge_frequencies)} edge frequency entries")
|
|
417
|
+
|
|
418
|
+
edges_updated = 0
|
|
419
|
+
edges_failed = 0
|
|
420
|
+
|
|
421
|
+
for edge_key, frequency in edge_frequencies.items():
|
|
422
|
+
try:
|
|
423
|
+
# Parse edge key: "relationship_type:source_id:target_id"
|
|
424
|
+
parts = edge_key.split(":", 2)
|
|
425
|
+
if len(parts) == 3:
|
|
426
|
+
relationship_type, source_id, target_id = parts
|
|
427
|
+
|
|
428
|
+
# Try to update edge if adapter supports it
|
|
429
|
+
if hasattr(graph_adapter, "update_edge_properties"):
|
|
430
|
+
edge_properties = {
|
|
431
|
+
"frequency_weight": frequency,
|
|
432
|
+
"frequency_updated_at": usage_frequencies.get(
|
|
433
|
+
"last_processed_timestamp"
|
|
434
|
+
),
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
await graph_adapter.update_edge_properties(
|
|
438
|
+
source_id, target_id, relationship_type, edge_properties
|
|
439
|
+
)
|
|
440
|
+
edges_updated += 1
|
|
441
|
+
else:
|
|
442
|
+
# Fallback: store in metadata or log
|
|
443
|
+
logger.debug(
|
|
444
|
+
f"Adapter doesn't support update_edge_properties for "
|
|
445
|
+
f"{relationship_type} ({source_id} -> {target_id})"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
logger.error(f"Error updating edge {edge_key}: {e}")
|
|
450
|
+
edges_failed += 1
|
|
451
|
+
|
|
452
|
+
if edges_updated > 0:
|
|
453
|
+
logger.info(f"Edge update complete: {edges_updated} succeeded, {edges_failed} failed")
|
|
454
|
+
else:
|
|
455
|
+
logger.info(
|
|
456
|
+
"Edge frequency updates skipped (adapter may not support edge property updates)"
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# Store aggregate statistics as metadata if supported
|
|
460
|
+
if hasattr(graph_adapter, "set_metadata"):
|
|
461
|
+
try:
|
|
462
|
+
metadata = {
|
|
463
|
+
"element_type_frequencies": usage_frequencies.get("element_type_frequencies", {}),
|
|
464
|
+
"total_interactions": usage_frequencies.get("total_interactions", 0),
|
|
465
|
+
"interactions_in_window": usage_frequencies.get("interactions_in_window", 0),
|
|
466
|
+
"last_frequency_update": usage_frequencies.get("last_processed_timestamp"),
|
|
467
|
+
}
|
|
468
|
+
await graph_adapter.set_metadata("usage_frequency_stats", metadata)
|
|
469
|
+
logger.info("Stored usage frequency statistics as metadata")
|
|
470
|
+
except Exception as e:
|
|
471
|
+
logger.warning(f"Could not store usage statistics as metadata: {e}")
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
async def create_usage_frequency_pipeline(
|
|
475
|
+
graph_adapter: GraphDBInterface,
|
|
476
|
+
time_window: timedelta = timedelta(days=7),
|
|
477
|
+
min_interaction_threshold: int = 1,
|
|
478
|
+
batch_size: int = 100,
|
|
479
|
+
) -> tuple:
|
|
480
|
+
"""
|
|
481
|
+
Create memify pipeline entry for usage frequency tracking.
|
|
482
|
+
|
|
483
|
+
This follows the same pattern as feedback enrichment flows, allowing
|
|
484
|
+
the frequency update to run end-to-end in a custom memify pipeline.
|
|
485
|
+
|
|
486
|
+
Use case example:
|
|
487
|
+
extraction_tasks, enrichment_tasks = await create_usage_frequency_pipeline(
|
|
488
|
+
graph_adapter=my_adapter,
|
|
489
|
+
time_window=timedelta(days=30),
|
|
490
|
+
min_interaction_threshold=2
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
# Run in memify pipeline
|
|
494
|
+
pipeline = Pipeline(extraction_tasks + enrichment_tasks)
|
|
495
|
+
results = await pipeline.run()
|
|
496
|
+
|
|
497
|
+
:param graph_adapter: Graph database adapter
|
|
498
|
+
:param time_window: Time window for counting interactions (default: 7 days)
|
|
499
|
+
:param min_interaction_threshold: Minimum interactions to track (default: 1)
|
|
500
|
+
:param batch_size: Batch size for processing (default: 100)
|
|
501
|
+
:return: Tuple of (extraction_tasks, enrichment_tasks)
|
|
502
|
+
"""
|
|
503
|
+
logger.info("Creating usage frequency pipeline")
|
|
504
|
+
logger.info(f"Config: time_window={time_window}, threshold={min_interaction_threshold}")
|
|
505
|
+
|
|
506
|
+
extraction_tasks = [
|
|
507
|
+
Task(
|
|
508
|
+
extract_usage_frequency,
|
|
509
|
+
time_window=time_window,
|
|
510
|
+
min_interaction_threshold=min_interaction_threshold,
|
|
511
|
+
)
|
|
512
|
+
]
|
|
513
|
+
|
|
514
|
+
enrichment_tasks = [
|
|
515
|
+
Task(
|
|
516
|
+
add_frequency_weights,
|
|
517
|
+
graph_adapter=graph_adapter,
|
|
518
|
+
task_config={"batch_size": batch_size},
|
|
519
|
+
)
|
|
520
|
+
]
|
|
521
|
+
|
|
522
|
+
return extraction_tasks, enrichment_tasks
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
async def run_usage_frequency_update(
|
|
526
|
+
graph_adapter: GraphDBInterface,
|
|
527
|
+
subgraphs: List[CogneeGraph],
|
|
528
|
+
time_window: timedelta = timedelta(days=7),
|
|
529
|
+
min_interaction_threshold: int = 1,
|
|
530
|
+
) -> Dict[str, Any]:
|
|
531
|
+
"""
|
|
532
|
+
Convenience function to run the complete usage frequency update pipeline.
|
|
533
|
+
|
|
534
|
+
This is the main entry point for updating frequency weights on graph elements
|
|
535
|
+
based on CogneeUserInteraction data from cognee.search(save_interaction=True).
|
|
536
|
+
|
|
537
|
+
Example usage:
|
|
538
|
+
# After running searches with save_interaction=True
|
|
539
|
+
from cognee.tasks.memify.extract_usage_frequency import run_usage_frequency_update
|
|
540
|
+
|
|
541
|
+
# Get the graph with interactions
|
|
542
|
+
graph = await get_cognee_graph_with_interactions()
|
|
543
|
+
|
|
544
|
+
# Update frequency weights
|
|
545
|
+
stats = await run_usage_frequency_update(
|
|
546
|
+
graph_adapter=graph_adapter,
|
|
547
|
+
subgraphs=[graph],
|
|
548
|
+
time_window=timedelta(days=30), # Last 30 days
|
|
549
|
+
min_interaction_threshold=2 # At least 2 uses
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
print(f"Updated {len(stats['node_frequencies'])} nodes")
|
|
553
|
+
|
|
554
|
+
:param graph_adapter: Graph database adapter
|
|
555
|
+
:param subgraphs: List of CogneeGraph instances with interaction data
|
|
556
|
+
:param time_window: Time window for counting interactions
|
|
557
|
+
:param min_interaction_threshold: Minimum interactions to track
|
|
558
|
+
:return: Usage frequency statistics
|
|
559
|
+
"""
|
|
560
|
+
logger.info("Starting usage frequency update")
|
|
561
|
+
|
|
562
|
+
try:
|
|
563
|
+
# Extract frequencies from interaction data
|
|
564
|
+
usage_frequencies = await extract_usage_frequency(
|
|
565
|
+
subgraphs=subgraphs,
|
|
566
|
+
time_window=time_window,
|
|
567
|
+
min_interaction_threshold=min_interaction_threshold,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Add frequency weights back to the graph
|
|
571
|
+
await add_frequency_weights(
|
|
572
|
+
graph_adapter=graph_adapter, usage_frequencies=usage_frequencies
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
logger.info("Usage frequency update completed successfully")
|
|
576
|
+
logger.info(
|
|
577
|
+
f"Summary: {usage_frequencies['interactions_in_window']} interactions processed, "
|
|
578
|
+
f"{len(usage_frequencies['node_frequencies'])} nodes weighted"
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
return usage_frequencies
|
|
582
|
+
|
|
583
|
+
except Exception as e:
|
|
584
|
+
logger.error(f"Error during usage frequency update: {str(e)}")
|
|
585
|
+
raise
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
async def get_most_frequent_elements(
|
|
589
|
+
graph_adapter: GraphDBInterface, top_n: int = 10, element_type: Optional[str] = None
|
|
590
|
+
) -> List[Dict[str, Any]]:
|
|
591
|
+
"""
|
|
592
|
+
Retrieve the most frequently accessed graph elements.
|
|
593
|
+
|
|
594
|
+
Useful for analytics dashboards and understanding user behavior.
|
|
595
|
+
|
|
596
|
+
:param graph_adapter: Graph database adapter
|
|
597
|
+
:param top_n: Number of top elements to return
|
|
598
|
+
:param element_type: Optional filter by element type
|
|
599
|
+
:return: List of elements with their frequency weights
|
|
600
|
+
"""
|
|
601
|
+
logger.info(f"Retrieving top {top_n} most frequent elements")
|
|
602
|
+
|
|
603
|
+
# This would need to be implemented based on the specific graph adapter's query capabilities
|
|
604
|
+
# Pseudocode:
|
|
605
|
+
# results = await graph_adapter.query_nodes_by_property(
|
|
606
|
+
# property_name='frequency_weight',
|
|
607
|
+
# order_by='DESC',
|
|
608
|
+
# limit=top_n,
|
|
609
|
+
# filters={'type': element_type} if element_type else None
|
|
610
|
+
# )
|
|
611
|
+
|
|
612
|
+
logger.warning("get_most_frequent_elements needs adapter-specific implementation")
|
|
613
|
+
return []
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from typing import Union
|
|
2
|
-
|
|
3
2
|
from cognee.infrastructure.engine import DataPoint
|
|
4
3
|
from cognee.modules.chunking.models import DocumentChunk
|
|
5
4
|
from cognee.shared.CodeGraphEntities import CodeFile, CodePart
|
|
@@ -17,7 +16,6 @@ class TextSummary(DataPoint):
|
|
|
17
16
|
|
|
18
17
|
text: str
|
|
19
18
|
made_from: DocumentChunk
|
|
20
|
-
|
|
21
19
|
metadata: dict = {"index_fields": ["text"]}
|
|
22
20
|
|
|
23
21
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|