cognee 0.2.3.dev1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/__main__.py +4 -0
- cognee/api/client.py +28 -3
- cognee/api/health.py +10 -13
- cognee/api/v1/add/add.py +20 -6
- cognee/api/v1/add/routers/get_add_router.py +12 -37
- cognee/api/v1/cloud/routers/__init__.py +1 -0
- cognee/api/v1/cloud/routers/get_checks_router.py +23 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +14 -3
- cognee/api/v1/cognify/cognify.py +67 -105
- cognee/api/v1/cognify/routers/get_cognify_router.py +11 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +16 -5
- cognee/api/v1/memify/routers/__init__.py +1 -0
- cognee/api/v1/memify/routers/get_memify_router.py +100 -0
- cognee/api/v1/notebooks/routers/__init__.py +1 -0
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +96 -0
- cognee/api/v1/responses/default_tools.py +4 -0
- cognee/api/v1/responses/dispatch_function.py +6 -1
- cognee/api/v1/responses/models.py +1 -1
- cognee/api/v1/search/routers/get_search_router.py +20 -1
- cognee/api/v1/search/search.py +17 -4
- cognee/api/v1/sync/__init__.py +17 -0
- cognee/api/v1/sync/routers/__init__.py +3 -0
- cognee/api/v1/sync/routers/get_sync_router.py +241 -0
- cognee/api/v1/sync/sync.py +877 -0
- cognee/api/v1/ui/__init__.py +1 -0
- cognee/api/v1/ui/ui.py +529 -0
- cognee/api/v1/users/routers/get_auth_router.py +13 -1
- cognee/base_config.py +10 -1
- cognee/cli/__init__.py +10 -0
- cognee/cli/_cognee.py +273 -0
- cognee/cli/commands/__init__.py +1 -0
- cognee/cli/commands/add_command.py +80 -0
- cognee/cli/commands/cognify_command.py +128 -0
- cognee/cli/commands/config_command.py +225 -0
- cognee/cli/commands/delete_command.py +80 -0
- cognee/cli/commands/search_command.py +149 -0
- cognee/cli/config.py +33 -0
- cognee/cli/debug.py +21 -0
- cognee/cli/echo.py +45 -0
- cognee/cli/exceptions.py +23 -0
- cognee/cli/minimal_cli.py +97 -0
- cognee/cli/reference.py +26 -0
- cognee/cli/suppress_logging.py +12 -0
- cognee/eval_framework/corpus_builder/corpus_builder_executor.py +2 -2
- cognee/eval_framework/eval_config.py +1 -1
- cognee/infrastructure/databases/graph/config.py +10 -4
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -9
- cognee/infrastructure/databases/graph/kuzu/adapter.py +199 -2
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +138 -0
- cognee/infrastructure/databases/relational/__init__.py +2 -0
- cognee/infrastructure/databases/relational/get_async_session.py +15 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +6 -1
- cognee/infrastructure/databases/relational/with_async_session.py +25 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +1 -1
- cognee/infrastructure/databases/vector/config.py +13 -6
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -4
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +16 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +5 -5
- cognee/infrastructure/databases/vector/embeddings/config.py +2 -2
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +2 -6
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +10 -7
- cognee/infrastructure/files/storage/LocalFileStorage.py +9 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +5 -0
- cognee/infrastructure/files/storage/StorageManager.py +7 -1
- cognee/infrastructure/files/storage/storage.py +16 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +14 -9
- cognee/infrastructure/files/utils/get_file_metadata.py +2 -1
- cognee/infrastructure/llm/LLMGateway.py +32 -5
- cognee/infrastructure/llm/config.py +6 -4
- cognee/infrastructure/llm/prompts/extract_query_time.txt +15 -0
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +25 -0
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +30 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +16 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +2 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_event_entities.py +44 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_content_graph.py +19 -15
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/knowledge_graph/extract_event_graph.py +46 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +14 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +6 -4
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +28 -4
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +2 -2
- cognee/infrastructure/llm/tokenizer/HuggingFace/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/Mistral/adapter.py +3 -3
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +6 -6
- cognee/infrastructure/llm/utils.py +7 -7
- cognee/infrastructure/utils/run_sync.py +8 -1
- cognee/modules/chunking/models/DocumentChunk.py +4 -3
- cognee/modules/cloud/exceptions/CloudApiKeyMissingError.py +15 -0
- cognee/modules/cloud/exceptions/CloudConnectionError.py +15 -0
- cognee/modules/cloud/exceptions/__init__.py +2 -0
- cognee/modules/cloud/operations/__init__.py +1 -0
- cognee/modules/cloud/operations/check_api_key.py +25 -0
- cognee/modules/data/deletion/prune_system.py +1 -1
- cognee/modules/data/methods/__init__.py +2 -0
- cognee/modules/data/methods/check_dataset_name.py +1 -1
- cognee/modules/data/methods/create_authorized_dataset.py +19 -0
- cognee/modules/data/methods/get_authorized_dataset.py +11 -5
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +16 -0
- cognee/modules/data/methods/get_dataset_data.py +1 -1
- cognee/modules/data/methods/load_or_create_datasets.py +2 -20
- cognee/modules/engine/models/Event.py +16 -0
- cognee/modules/engine/models/Interval.py +8 -0
- cognee/modules/engine/models/Timestamp.py +13 -0
- cognee/modules/engine/models/__init__.py +3 -0
- cognee/modules/engine/utils/__init__.py +2 -0
- cognee/modules/engine/utils/generate_event_datapoint.py +46 -0
- cognee/modules/engine/utils/generate_timestamp_datapoint.py +51 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +2 -2
- cognee/modules/graph/methods/get_formatted_graph_data.py +3 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/resolve_edges_to_text.py +71 -0
- cognee/modules/memify/__init__.py +1 -0
- cognee/modules/memify/memify.py +118 -0
- cognee/modules/notebooks/methods/__init__.py +5 -0
- cognee/modules/notebooks/methods/create_notebook.py +26 -0
- cognee/modules/notebooks/methods/delete_notebook.py +13 -0
- cognee/modules/notebooks/methods/get_notebook.py +21 -0
- cognee/modules/notebooks/methods/get_notebooks.py +18 -0
- cognee/modules/notebooks/methods/update_notebook.py +17 -0
- cognee/modules/notebooks/models/Notebook.py +53 -0
- cognee/modules/notebooks/models/__init__.py +1 -0
- cognee/modules/notebooks/operations/__init__.py +1 -0
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +55 -0
- cognee/modules/pipelines/__init__.py +1 -1
- cognee/modules/pipelines/exceptions/tasks.py +18 -0
- cognee/modules/pipelines/layers/__init__.py +1 -0
- cognee/modules/pipelines/layers/check_pipeline_run_qualification.py +59 -0
- cognee/modules/pipelines/layers/pipeline_execution_mode.py +127 -0
- cognee/modules/pipelines/layers/reset_dataset_pipeline_run_status.py +28 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +34 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +55 -0
- cognee/modules/pipelines/layers/setup_and_check_environment.py +41 -0
- cognee/modules/pipelines/layers/validate_pipeline_tasks.py +20 -0
- cognee/modules/pipelines/methods/__init__.py +2 -0
- cognee/modules/pipelines/methods/get_pipeline_runs_by_dataset.py +34 -0
- cognee/modules/pipelines/methods/reset_pipeline_run_status.py +16 -0
- cognee/modules/pipelines/operations/__init__.py +0 -1
- cognee/modules/pipelines/operations/log_pipeline_run_initiated.py +1 -1
- cognee/modules/pipelines/operations/pipeline.py +24 -138
- cognee/modules/pipelines/operations/run_tasks.py +17 -41
- cognee/modules/retrieval/base_feedback.py +11 -0
- cognee/modules/retrieval/base_graph_retriever.py +18 -0
- cognee/modules/retrieval/base_retriever.py +1 -1
- cognee/modules/retrieval/code_retriever.py +8 -0
- cognee/modules/retrieval/coding_rules_retriever.py +31 -0
- cognee/modules/retrieval/completion_retriever.py +9 -3
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -0
- cognee/modules/retrieval/cypher_search_retriever.py +1 -9
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +29 -13
- cognee/modules/retrieval/graph_completion_cot_retriever.py +30 -13
- cognee/modules/retrieval/graph_completion_retriever.py +107 -56
- cognee/modules/retrieval/graph_summary_completion_retriever.py +5 -1
- cognee/modules/retrieval/insights_retriever.py +14 -3
- cognee/modules/retrieval/natural_language_retriever.py +0 -4
- cognee/modules/retrieval/summaries_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +152 -0
- cognee/modules/retrieval/user_qa_feedback.py +83 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +7 -32
- cognee/modules/retrieval/utils/completion.py +10 -3
- cognee/modules/retrieval/utils/extract_uuid_from_node.py +18 -0
- cognee/modules/retrieval/utils/models.py +40 -0
- cognee/modules/search/methods/get_search_type_tools.py +168 -0
- cognee/modules/search/methods/no_access_control_search.py +47 -0
- cognee/modules/search/methods/search.py +239 -118
- cognee/modules/search/types/SearchResult.py +21 -0
- cognee/modules/search/types/SearchType.py +3 -0
- cognee/modules/search/types/__init__.py +1 -0
- cognee/modules/search/utils/__init__.py +2 -0
- cognee/modules/search/utils/prepare_search_result.py +41 -0
- cognee/modules/search/utils/transform_context_to_graph.py +38 -0
- cognee/modules/settings/get_settings.py +2 -2
- cognee/modules/sync/__init__.py +1 -0
- cognee/modules/sync/methods/__init__.py +23 -0
- cognee/modules/sync/methods/create_sync_operation.py +53 -0
- cognee/modules/sync/methods/get_sync_operation.py +107 -0
- cognee/modules/sync/methods/update_sync_operation.py +248 -0
- cognee/modules/sync/models/SyncOperation.py +142 -0
- cognee/modules/sync/models/__init__.py +3 -0
- cognee/modules/users/__init__.py +0 -1
- cognee/modules/users/methods/__init__.py +4 -1
- cognee/modules/users/methods/create_user.py +26 -1
- cognee/modules/users/methods/get_authenticated_user.py +36 -42
- cognee/modules/users/methods/get_default_user.py +3 -1
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +2 -1
- cognee/root_dir.py +19 -0
- cognee/shared/CodeGraphEntities.py +1 -0
- cognee/shared/logging_utils.py +143 -32
- cognee/shared/utils.py +0 -1
- cognee/tasks/codingagents/coding_rule_associations.py +127 -0
- cognee/tasks/graph/extract_graph_from_data.py +6 -2
- cognee/tasks/ingestion/save_data_item_to_storage.py +23 -0
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/extract_subgraph.py +7 -0
- cognee/tasks/memify/extract_subgraph_chunks.py +11 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +2 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +144 -47
- cognee/tasks/storage/add_data_points.py +33 -3
- cognee/tasks/temporal_graph/__init__.py +1 -0
- cognee/tasks/temporal_graph/add_entities_to_event.py +85 -0
- cognee/tasks/temporal_graph/enrich_events.py +34 -0
- cognee/tasks/temporal_graph/extract_events_and_entities.py +32 -0
- cognee/tasks/temporal_graph/extract_knowledge_graph_from_events.py +41 -0
- cognee/tasks/temporal_graph/models.py +49 -0
- cognee/tests/integration/cli/__init__.py +3 -0
- cognee/tests/integration/cli/test_cli_integration.py +331 -0
- cognee/tests/integration/documents/PdfDocument_test.py +2 -2
- cognee/tests/integration/documents/TextDocument_test.py +2 -4
- cognee/tests/integration/documents/UnstructuredDocument_test.py +5 -8
- cognee/tests/{test_deletion.py → test_delete_hard.py} +0 -37
- cognee/tests/test_delete_soft.py +85 -0
- cognee/tests/test_kuzu.py +2 -2
- cognee/tests/test_neo4j.py +2 -2
- cognee/tests/test_permissions.py +3 -3
- cognee/tests/test_relational_db_migration.py +7 -5
- cognee/tests/test_search_db.py +136 -23
- cognee/tests/test_temporal_graph.py +167 -0
- cognee/tests/unit/api/__init__.py +1 -0
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +246 -0
- cognee/tests/unit/cli/__init__.py +3 -0
- cognee/tests/unit/cli/test_cli_commands.py +483 -0
- cognee/tests/unit/cli/test_cli_edge_cases.py +625 -0
- cognee/tests/unit/cli/test_cli_main.py +173 -0
- cognee/tests/unit/cli/test_cli_runner.py +62 -0
- cognee/tests/unit/cli/test_cli_utils.py +127 -0
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +12 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +10 -15
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +4 -3
- cognee/tests/unit/modules/retrieval/insights_retriever_test.py +4 -2
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +18 -2
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +225 -0
- cognee/tests/unit/modules/users/__init__.py +1 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +277 -0
- cognee/tests/unit/processing/utils/utils_test.py +20 -1
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/METADATA +13 -9
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/RECORD +247 -135
- cognee-0.3.0.dist-info/entry_points.txt +2 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +0 -1017
- cognee/infrastructure/pipeline/models/Operation.py +0 -60
- cognee/notebooks/github_analysis_step_by_step.ipynb +0 -37
- cognee/tests/tasks/descriptive_metrics/networkx_metrics_test.py +0 -7
- cognee/tests/unit/modules/search/search_methods_test.py +0 -223
- /cognee/{infrastructure/databases/graph/networkx → api/v1/memify}/__init__.py +0 -0
- /cognee/{infrastructure/pipeline/models → tasks/codingagents}/__init__.py +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/WHEEL +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.3.dev1.dist-info → cognee-0.3.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,1017 +0,0 @@
|
|
|
1
|
-
"""Adapter for NetworkX graph database."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import json
|
|
5
|
-
import asyncio
|
|
6
|
-
import numpy as np
|
|
7
|
-
from uuid import UUID
|
|
8
|
-
import networkx as nx
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
from typing import Dict, Any, List, Union, Type, Tuple
|
|
11
|
-
|
|
12
|
-
from cognee.infrastructure.databases.exceptions.exceptions import NodesetFilterNotSupportedError
|
|
13
|
-
from cognee.infrastructure.files.storage import get_file_storage
|
|
14
|
-
from cognee.shared.logging_utils import get_logger
|
|
15
|
-
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
|
16
|
-
GraphDBInterface,
|
|
17
|
-
record_graph_changes,
|
|
18
|
-
)
|
|
19
|
-
from cognee.infrastructure.engine import DataPoint
|
|
20
|
-
from cognee.infrastructure.engine.utils import parse_id
|
|
21
|
-
from cognee.modules.storage.utils import JSONEncoder
|
|
22
|
-
|
|
23
|
-
logger = get_logger()
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class NetworkXAdapter(GraphDBInterface):
|
|
27
|
-
"""
|
|
28
|
-
Manage a singleton instance of a graph database interface, utilizing the NetworkX
|
|
29
|
-
library. Handles graph data access and manipulation, including nodes and edges
|
|
30
|
-
management, persistence, and auxiliary functionalities.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
_instance = None
|
|
34
|
-
graph = None # Class variable to store the singleton instance
|
|
35
|
-
|
|
36
|
-
def __new__(cls, filename):
|
|
37
|
-
if cls._instance is None:
|
|
38
|
-
cls._instance = super().__new__(cls)
|
|
39
|
-
cls._instance.filename = filename
|
|
40
|
-
return cls._instance
|
|
41
|
-
|
|
42
|
-
def __init__(self, filename="cognee_graph.pkl"):
|
|
43
|
-
self.filename = filename
|
|
44
|
-
|
|
45
|
-
async def get_graph_data(self):
|
|
46
|
-
"""
|
|
47
|
-
Retrieve graph data including nodes and edges.
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
--------
|
|
51
|
-
|
|
52
|
-
A tuple containing a list of node data and a list of edge data.
|
|
53
|
-
"""
|
|
54
|
-
await self.load_graph_from_file()
|
|
55
|
-
return (list(self.graph.nodes(data=True)), list(self.graph.edges(data=True, keys=True)))
|
|
56
|
-
|
|
57
|
-
async def query(self, query: str, params: dict):
|
|
58
|
-
"""
|
|
59
|
-
Execute a query against the graph data. The specifics of the query execution need to be
|
|
60
|
-
implemented.
|
|
61
|
-
|
|
62
|
-
Parameters:
|
|
63
|
-
-----------
|
|
64
|
-
|
|
65
|
-
- query (str): The query string to run against the graph.
|
|
66
|
-
- params (dict): Parameters for the query, if necessary.
|
|
67
|
-
"""
|
|
68
|
-
pass
|
|
69
|
-
|
|
70
|
-
async def has_node(self, node_id: UUID) -> bool:
|
|
71
|
-
"""
|
|
72
|
-
Determine if a specific node exists in the graph.
|
|
73
|
-
|
|
74
|
-
Parameters:
|
|
75
|
-
-----------
|
|
76
|
-
|
|
77
|
-
- node_id (UUID): The identifier of the node to check.
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
--------
|
|
81
|
-
|
|
82
|
-
- bool: True if the node exists, otherwise False.
|
|
83
|
-
"""
|
|
84
|
-
return self.graph.has_node(node_id)
|
|
85
|
-
|
|
86
|
-
async def add_node(self, node: DataPoint) -> None:
|
|
87
|
-
"""
|
|
88
|
-
Add a node to the graph and persist the graph state to the file.
|
|
89
|
-
|
|
90
|
-
Parameters:
|
|
91
|
-
-----------
|
|
92
|
-
|
|
93
|
-
- node (DataPoint): The node to be added, represented as a DataPoint object.
|
|
94
|
-
"""
|
|
95
|
-
self.graph.add_node(node.id, **node.model_dump())
|
|
96
|
-
|
|
97
|
-
await self.save_graph_to_file(self.filename)
|
|
98
|
-
|
|
99
|
-
@record_graph_changes
|
|
100
|
-
async def add_nodes(self, nodes: list[DataPoint]) -> None:
|
|
101
|
-
"""
|
|
102
|
-
Bulk add multiple nodes to the graph and persist the graph state to the file.
|
|
103
|
-
|
|
104
|
-
Parameters:
|
|
105
|
-
-----------
|
|
106
|
-
|
|
107
|
-
- nodes (list[DataPoint]): A list of DataPoint objects defining the nodes to be
|
|
108
|
-
added.
|
|
109
|
-
"""
|
|
110
|
-
nodes = [(node.id, node.model_dump()) for node in nodes]
|
|
111
|
-
self.graph.add_nodes_from(nodes)
|
|
112
|
-
await self.save_graph_to_file(self.filename)
|
|
113
|
-
|
|
114
|
-
async def get_graph(self):
|
|
115
|
-
"""
|
|
116
|
-
Retrieve the current state of the graph.
|
|
117
|
-
|
|
118
|
-
Returns:
|
|
119
|
-
--------
|
|
120
|
-
|
|
121
|
-
The current graph instance.
|
|
122
|
-
"""
|
|
123
|
-
return self.graph
|
|
124
|
-
|
|
125
|
-
async def has_edge(self, from_node: str, to_node: str, edge_label: str) -> bool:
|
|
126
|
-
"""
|
|
127
|
-
Check for the existence of a specific edge in the graph.
|
|
128
|
-
|
|
129
|
-
Parameters:
|
|
130
|
-
-----------
|
|
131
|
-
|
|
132
|
-
- from_node (str): The identifier of the source node.
|
|
133
|
-
- to_node (str): The identifier of the target node.
|
|
134
|
-
- edge_label (str): The label of the edge to check.
|
|
135
|
-
|
|
136
|
-
Returns:
|
|
137
|
-
--------
|
|
138
|
-
|
|
139
|
-
- bool: True if the edge exists, otherwise False.
|
|
140
|
-
"""
|
|
141
|
-
return self.graph.has_edge(from_node, to_node, key=edge_label)
|
|
142
|
-
|
|
143
|
-
async def has_edges(self, edges):
|
|
144
|
-
"""
|
|
145
|
-
Check for the existence of multiple edges in the graph.
|
|
146
|
-
|
|
147
|
-
Parameters:
|
|
148
|
-
-----------
|
|
149
|
-
|
|
150
|
-
- edges: A list of edges to check, defined as tuples of (from_node, to_node,
|
|
151
|
-
edge_label).
|
|
152
|
-
|
|
153
|
-
Returns:
|
|
154
|
-
--------
|
|
155
|
-
|
|
156
|
-
A list of edges that exist in the graph.
|
|
157
|
-
"""
|
|
158
|
-
result = []
|
|
159
|
-
|
|
160
|
-
for from_node, to_node, edge_label in edges:
|
|
161
|
-
if self.graph.has_edge(from_node, to_node, edge_label):
|
|
162
|
-
result.append((from_node, to_node, edge_label))
|
|
163
|
-
|
|
164
|
-
return result
|
|
165
|
-
|
|
166
|
-
@record_graph_changes
|
|
167
|
-
async def add_edge(
|
|
168
|
-
self,
|
|
169
|
-
from_node: str,
|
|
170
|
-
to_node: str,
|
|
171
|
-
relationship_name: str,
|
|
172
|
-
edge_properties: Dict[str, Any] = {},
|
|
173
|
-
) -> None:
|
|
174
|
-
"""
|
|
175
|
-
Add a single edge to the graph and persist the graph state to the file.
|
|
176
|
-
|
|
177
|
-
Parameters:
|
|
178
|
-
-----------
|
|
179
|
-
|
|
180
|
-
- from_node (str): The identifier of the source node for the edge.
|
|
181
|
-
- to_node (str): The identifier of the target node for the edge.
|
|
182
|
-
- relationship_name (str): The label for the relationship as the edge is created.
|
|
183
|
-
- edge_properties (Dict[str, Any]): Additional properties for the edge, if any.
|
|
184
|
-
(default {})
|
|
185
|
-
"""
|
|
186
|
-
edge_properties["updated_at"] = datetime.now(timezone.utc)
|
|
187
|
-
self.graph.add_edge(
|
|
188
|
-
from_node,
|
|
189
|
-
to_node,
|
|
190
|
-
key=relationship_name,
|
|
191
|
-
**(edge_properties if edge_properties else {}),
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
await self.save_graph_to_file(self.filename)
|
|
195
|
-
|
|
196
|
-
@record_graph_changes
|
|
197
|
-
async def add_edges(self, edges: list[tuple[str, str, str, dict]]) -> None:
|
|
198
|
-
"""
|
|
199
|
-
Bulk add multiple edges to the graph and persist the graph state to the file.
|
|
200
|
-
|
|
201
|
-
Parameters:
|
|
202
|
-
-----------
|
|
203
|
-
|
|
204
|
-
- edges (list[tuple[str, str, str, dict]]): A list of edges defined as tuples
|
|
205
|
-
containing (from_node, to_node, relationship_name, edge_properties).
|
|
206
|
-
"""
|
|
207
|
-
if not edges:
|
|
208
|
-
logger.debug("No edges to add")
|
|
209
|
-
return
|
|
210
|
-
|
|
211
|
-
try:
|
|
212
|
-
# Validate edge format and convert UUIDs to strings
|
|
213
|
-
processed_edges = []
|
|
214
|
-
for edge in edges:
|
|
215
|
-
if len(edge) < 3 or len(edge) > 4:
|
|
216
|
-
raise ValueError(
|
|
217
|
-
f"Invalid edge format: {edge}. Expected (from_node, to_node, relationship_name[, properties])"
|
|
218
|
-
)
|
|
219
|
-
|
|
220
|
-
# Convert UUIDs to strings if needed
|
|
221
|
-
from_node = str(edge[0]) if isinstance(edge[0], UUID) else edge[0]
|
|
222
|
-
to_node = str(edge[1]) if isinstance(edge[1], UUID) else edge[1]
|
|
223
|
-
relationship_name = edge[2]
|
|
224
|
-
|
|
225
|
-
if not all(isinstance(x, str) for x in [from_node, to_node, relationship_name]):
|
|
226
|
-
raise ValueError(
|
|
227
|
-
f"First three elements of edge must be strings or UUIDs: {edge}"
|
|
228
|
-
)
|
|
229
|
-
|
|
230
|
-
# Process edge with updated_at timestamp
|
|
231
|
-
processed_edge = (
|
|
232
|
-
from_node,
|
|
233
|
-
to_node,
|
|
234
|
-
relationship_name,
|
|
235
|
-
{
|
|
236
|
-
**(edge[3] if len(edge) == 4 else {}),
|
|
237
|
-
"updated_at": datetime.now(timezone.utc),
|
|
238
|
-
},
|
|
239
|
-
)
|
|
240
|
-
processed_edges.append(processed_edge)
|
|
241
|
-
|
|
242
|
-
# Add edges to graph
|
|
243
|
-
self.graph.add_edges_from(processed_edges)
|
|
244
|
-
logger.debug(f"Added {len(processed_edges)} edges to graph")
|
|
245
|
-
|
|
246
|
-
# Save changes
|
|
247
|
-
await self.save_graph_to_file(self.filename)
|
|
248
|
-
except Exception as e:
|
|
249
|
-
logger.error(f"Failed to add edges: {e}")
|
|
250
|
-
raise
|
|
251
|
-
|
|
252
|
-
async def get_edges(self, node_id: UUID):
|
|
253
|
-
"""
|
|
254
|
-
Retrieve edges connected to a specific node.
|
|
255
|
-
|
|
256
|
-
Parameters:
|
|
257
|
-
-----------
|
|
258
|
-
|
|
259
|
-
- node_id (UUID): The identifier of the node whose edges are to be retrieved.
|
|
260
|
-
|
|
261
|
-
Returns:
|
|
262
|
-
--------
|
|
263
|
-
|
|
264
|
-
A list of edges connected to the specified node.
|
|
265
|
-
"""
|
|
266
|
-
return list(self.graph.in_edges(node_id, data=True)) + list(
|
|
267
|
-
self.graph.out_edges(node_id, data=True)
|
|
268
|
-
)
|
|
269
|
-
|
|
270
|
-
async def delete_node(self, node_id: UUID) -> None:
|
|
271
|
-
"""
|
|
272
|
-
Remove a node and its associated edges from the graph, then persist the changes.
|
|
273
|
-
|
|
274
|
-
Parameters:
|
|
275
|
-
-----------
|
|
276
|
-
|
|
277
|
-
- node_id (UUID): The identifier of the node to delete.
|
|
278
|
-
"""
|
|
279
|
-
|
|
280
|
-
if self.graph.has_node(node_id):
|
|
281
|
-
# First remove all edges connected to the node
|
|
282
|
-
for edge in list(self.graph.edges(node_id, data=True)):
|
|
283
|
-
source, target, data = edge
|
|
284
|
-
self.graph.remove_edge(source, target, key=data.get("relationship_name"))
|
|
285
|
-
|
|
286
|
-
# Then remove the node itself
|
|
287
|
-
self.graph.remove_node(node_id)
|
|
288
|
-
|
|
289
|
-
# Save the updated graph state
|
|
290
|
-
await self.save_graph_to_file(self.filename)
|
|
291
|
-
else:
|
|
292
|
-
logger.error(f"Node {node_id} not found in graph")
|
|
293
|
-
|
|
294
|
-
async def delete_nodes(self, node_ids: List[UUID]) -> None:
|
|
295
|
-
"""
|
|
296
|
-
Bulk delete nodes from the graph and persist the changes.
|
|
297
|
-
|
|
298
|
-
Parameters:
|
|
299
|
-
-----------
|
|
300
|
-
|
|
301
|
-
- node_ids (List[UUID]): A list of node identifiers to delete.
|
|
302
|
-
"""
|
|
303
|
-
self.graph.remove_nodes_from(node_ids)
|
|
304
|
-
await self.save_graph_to_file(self.filename)
|
|
305
|
-
|
|
306
|
-
async def get_disconnected_nodes(self) -> List[str]:
|
|
307
|
-
"""
|
|
308
|
-
Identify nodes that are not connected to any other nodes in the graph.
|
|
309
|
-
|
|
310
|
-
Returns:
|
|
311
|
-
--------
|
|
312
|
-
|
|
313
|
-
- List[str]: A list of identifiers for disconnected nodes.
|
|
314
|
-
"""
|
|
315
|
-
connected_components = list(nx.weakly_connected_components(self.graph))
|
|
316
|
-
|
|
317
|
-
disconnected_nodes = []
|
|
318
|
-
biggest_subgraph = max(connected_components, key=len)
|
|
319
|
-
|
|
320
|
-
for component in connected_components:
|
|
321
|
-
if component != biggest_subgraph:
|
|
322
|
-
disconnected_nodes.extend(list(component))
|
|
323
|
-
|
|
324
|
-
return disconnected_nodes
|
|
325
|
-
|
|
326
|
-
async def extract_node(self, node_id: UUID) -> dict:
|
|
327
|
-
"""
|
|
328
|
-
Retrieve data for a specific node based on its identifier.
|
|
329
|
-
|
|
330
|
-
Parameters:
|
|
331
|
-
-----------
|
|
332
|
-
|
|
333
|
-
- node_id (UUID): The identifier of the node to retrieve.
|
|
334
|
-
|
|
335
|
-
Returns:
|
|
336
|
-
--------
|
|
337
|
-
|
|
338
|
-
- dict: The data of the specified node, or None if not found.
|
|
339
|
-
"""
|
|
340
|
-
if self.graph.has_node(node_id):
|
|
341
|
-
return self.graph.nodes[node_id]
|
|
342
|
-
|
|
343
|
-
return None
|
|
344
|
-
|
|
345
|
-
async def extract_nodes(self, node_ids: List[UUID]) -> List[dict]:
|
|
346
|
-
"""
|
|
347
|
-
Retrieve data for multiple nodes based on their identifiers.
|
|
348
|
-
|
|
349
|
-
Parameters:
|
|
350
|
-
-----------
|
|
351
|
-
|
|
352
|
-
- node_ids (List[UUID]): A list of node identifiers to retrieve data.
|
|
353
|
-
|
|
354
|
-
Returns:
|
|
355
|
-
--------
|
|
356
|
-
|
|
357
|
-
- List[dict]: A list of data for each node identified that exists in the graph.
|
|
358
|
-
"""
|
|
359
|
-
return [self.graph.nodes[node_id] for node_id in node_ids if self.graph.has_node(node_id)]
|
|
360
|
-
|
|
361
|
-
async def get_predecessors(self, node_id: UUID, edge_label: str = None) -> list:
|
|
362
|
-
"""
|
|
363
|
-
Retrieve the predecessor nodes of a specified node according to a specific edge label.
|
|
364
|
-
|
|
365
|
-
Parameters:
|
|
366
|
-
-----------
|
|
367
|
-
|
|
368
|
-
- node_id (UUID): The identifier of the node for which to find predecessors.
|
|
369
|
-
- edge_label (str): The label for the edges connecting to predecessors; if None, all
|
|
370
|
-
predecessors are retrieved. (default None)
|
|
371
|
-
|
|
372
|
-
Returns:
|
|
373
|
-
--------
|
|
374
|
-
|
|
375
|
-
- list: A list of predecessor nodes.
|
|
376
|
-
"""
|
|
377
|
-
if self.graph.has_node(node_id):
|
|
378
|
-
if edge_label is None:
|
|
379
|
-
return [
|
|
380
|
-
self.graph.nodes[predecessor]
|
|
381
|
-
for predecessor in list(self.graph.predecessors(node_id))
|
|
382
|
-
]
|
|
383
|
-
|
|
384
|
-
nodes = []
|
|
385
|
-
|
|
386
|
-
for predecessor_id in list(self.graph.predecessors(node_id)):
|
|
387
|
-
if self.graph.has_edge(predecessor_id, node_id, edge_label):
|
|
388
|
-
nodes.append(self.graph.nodes[predecessor_id])
|
|
389
|
-
|
|
390
|
-
return nodes
|
|
391
|
-
|
|
392
|
-
async def get_successors(self, node_id: UUID, edge_label: str = None) -> list:
|
|
393
|
-
"""
|
|
394
|
-
Retrieve the successor nodes of a specified node according to a specific edge label.
|
|
395
|
-
|
|
396
|
-
Parameters:
|
|
397
|
-
-----------
|
|
398
|
-
|
|
399
|
-
- node_id (UUID): The identifier of the node for which to find successors.
|
|
400
|
-
- edge_label (str): The label for the edges connecting to successors; if None, all
|
|
401
|
-
successors are retrieved. (default None)
|
|
402
|
-
|
|
403
|
-
Returns:
|
|
404
|
-
--------
|
|
405
|
-
|
|
406
|
-
- list: A list of successor nodes.
|
|
407
|
-
"""
|
|
408
|
-
if self.graph.has_node(node_id):
|
|
409
|
-
if edge_label is None:
|
|
410
|
-
return [
|
|
411
|
-
self.graph.nodes[successor]
|
|
412
|
-
for successor in list(self.graph.successors(node_id))
|
|
413
|
-
]
|
|
414
|
-
|
|
415
|
-
nodes = []
|
|
416
|
-
|
|
417
|
-
for successor_id in list(self.graph.successors(node_id)):
|
|
418
|
-
if self.graph.has_edge(node_id, successor_id, edge_label):
|
|
419
|
-
nodes.append(self.graph.nodes[successor_id])
|
|
420
|
-
|
|
421
|
-
return nodes
|
|
422
|
-
|
|
423
|
-
async def get_neighbors(self, node_id: UUID) -> list:
|
|
424
|
-
"""
|
|
425
|
-
Get the neighboring nodes of a specified node, including both predecessors and
|
|
426
|
-
successors.
|
|
427
|
-
|
|
428
|
-
Parameters:
|
|
429
|
-
-----------
|
|
430
|
-
|
|
431
|
-
- node_id (UUID): The identifier of the node whose neighbors are to be retrieved.
|
|
432
|
-
|
|
433
|
-
Returns:
|
|
434
|
-
--------
|
|
435
|
-
|
|
436
|
-
- list: A list of neighboring nodes.
|
|
437
|
-
"""
|
|
438
|
-
if not self.graph.has_node(node_id):
|
|
439
|
-
return []
|
|
440
|
-
|
|
441
|
-
predecessors, successors = await asyncio.gather(
|
|
442
|
-
self.get_predecessors(node_id),
|
|
443
|
-
self.get_successors(node_id),
|
|
444
|
-
)
|
|
445
|
-
|
|
446
|
-
neighbors = predecessors + successors
|
|
447
|
-
|
|
448
|
-
return neighbors
|
|
449
|
-
|
|
450
|
-
async def get_connections(self, node_id: UUID) -> list:
|
|
451
|
-
"""
|
|
452
|
-
Get the connections of a specified node to its neighbors.
|
|
453
|
-
|
|
454
|
-
Parameters:
|
|
455
|
-
-----------
|
|
456
|
-
|
|
457
|
-
- node_id (UUID): The identifier of the node for which to get connections.
|
|
458
|
-
|
|
459
|
-
Returns:
|
|
460
|
-
--------
|
|
461
|
-
|
|
462
|
-
- list: A list of connections involving the specified node and its neighbors.
|
|
463
|
-
"""
|
|
464
|
-
if not self.graph.has_node(node_id):
|
|
465
|
-
return []
|
|
466
|
-
|
|
467
|
-
node = self.graph.nodes[node_id]
|
|
468
|
-
|
|
469
|
-
if "id" not in node:
|
|
470
|
-
return []
|
|
471
|
-
|
|
472
|
-
predecessors, successors = await asyncio.gather(
|
|
473
|
-
self.get_predecessors(node_id),
|
|
474
|
-
self.get_successors(node_id),
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
connections = []
|
|
478
|
-
|
|
479
|
-
# Handle None values for predecessors and successors
|
|
480
|
-
if predecessors is not None:
|
|
481
|
-
for neighbor in predecessors:
|
|
482
|
-
if "id" in neighbor:
|
|
483
|
-
edge_data = self.graph.get_edge_data(neighbor["id"], node["id"])
|
|
484
|
-
if edge_data is not None:
|
|
485
|
-
for edge_properties in edge_data.values():
|
|
486
|
-
connections.append((neighbor, edge_properties, node))
|
|
487
|
-
|
|
488
|
-
if successors is not None:
|
|
489
|
-
for neighbor in successors:
|
|
490
|
-
if "id" in neighbor:
|
|
491
|
-
edge_data = self.graph.get_edge_data(node["id"], neighbor["id"])
|
|
492
|
-
if edge_data is not None:
|
|
493
|
-
for edge_properties in edge_data.values():
|
|
494
|
-
connections.append((node, edge_properties, neighbor))
|
|
495
|
-
|
|
496
|
-
return connections
|
|
497
|
-
|
|
498
|
-
async def remove_connection_to_predecessors_of(
|
|
499
|
-
self, node_ids: list[UUID], edge_label: str
|
|
500
|
-
) -> None:
|
|
501
|
-
"""
|
|
502
|
-
Remove connections to predecessors of specified nodes based on an edge label and persist
|
|
503
|
-
changes.
|
|
504
|
-
|
|
505
|
-
Parameters:
|
|
506
|
-
-----------
|
|
507
|
-
|
|
508
|
-
- node_ids (list[UUID]): A list of node identifiers whose predecessor connections
|
|
509
|
-
need to be removed.
|
|
510
|
-
- edge_label (str): The label of the edges to remove.
|
|
511
|
-
"""
|
|
512
|
-
for node_id in node_ids:
|
|
513
|
-
if self.graph.has_node(node_id):
|
|
514
|
-
for predecessor_id in list(self.graph.predecessors(node_id)):
|
|
515
|
-
if self.graph.has_edge(predecessor_id, node_id, edge_label):
|
|
516
|
-
self.graph.remove_edge(predecessor_id, node_id, edge_label)
|
|
517
|
-
|
|
518
|
-
await self.save_graph_to_file(self.filename)
|
|
519
|
-
|
|
520
|
-
async def remove_connection_to_successors_of(
|
|
521
|
-
self, node_ids: list[UUID], edge_label: str
|
|
522
|
-
) -> None:
|
|
523
|
-
"""
|
|
524
|
-
Remove connections to successors of specified nodes based on an edge label and persist
|
|
525
|
-
changes.
|
|
526
|
-
|
|
527
|
-
Parameters:
|
|
528
|
-
-----------
|
|
529
|
-
|
|
530
|
-
- node_ids (list[UUID]): A list of node identifiers whose successor connections need
|
|
531
|
-
to be removed.
|
|
532
|
-
- edge_label (str): The label of the edges to remove.
|
|
533
|
-
"""
|
|
534
|
-
for node_id in node_ids:
|
|
535
|
-
if self.graph.has_node(node_id):
|
|
536
|
-
for successor_id in list(self.graph.successors(node_id)):
|
|
537
|
-
if self.graph.has_edge(node_id, successor_id, edge_label):
|
|
538
|
-
self.graph.remove_edge(node_id, successor_id, edge_label)
|
|
539
|
-
|
|
540
|
-
await self.save_graph_to_file(self.filename)
|
|
541
|
-
|
|
542
|
-
async def create_empty_graph(self, file_path: str) -> None:
|
|
543
|
-
"""
|
|
544
|
-
Initialize an empty graph and save it to a specified file path.
|
|
545
|
-
|
|
546
|
-
Parameters:
|
|
547
|
-
-----------
|
|
548
|
-
|
|
549
|
-
- file_path (str): The file path where the empty graph should be saved.
|
|
550
|
-
"""
|
|
551
|
-
self.graph = nx.MultiDiGraph()
|
|
552
|
-
|
|
553
|
-
await self.save_graph_to_file(file_path)
|
|
554
|
-
|
|
555
|
-
async def save_graph_to_file(self, file_path: str = None) -> None:
|
|
556
|
-
"""
|
|
557
|
-
Save the graph data asynchronously to a specified file in JSON format.
|
|
558
|
-
|
|
559
|
-
Parameters:
|
|
560
|
-
-----------
|
|
561
|
-
|
|
562
|
-
- file_path (str): The file path to save the graph data; if None, saves to the
|
|
563
|
-
default filename. (default None)
|
|
564
|
-
"""
|
|
565
|
-
if not file_path:
|
|
566
|
-
file_path = self.filename
|
|
567
|
-
|
|
568
|
-
graph_data = nx.readwrite.json_graph.node_link_data(self.graph, edges="links")
|
|
569
|
-
|
|
570
|
-
file_dir_path = os.path.dirname(file_path)
|
|
571
|
-
file_path = os.path.basename(file_path)
|
|
572
|
-
|
|
573
|
-
file_storage = get_file_storage(file_dir_path)
|
|
574
|
-
|
|
575
|
-
json_data = json.dumps(graph_data, cls=JSONEncoder)
|
|
576
|
-
|
|
577
|
-
await file_storage.store(file_path, json_data, overwrite=True)
|
|
578
|
-
|
|
579
|
-
async def load_graph_from_file(self, file_path: str = None):
|
|
580
|
-
"""
|
|
581
|
-
Load graph data asynchronously from a specified file in JSON format.
|
|
582
|
-
|
|
583
|
-
Parameters:
|
|
584
|
-
-----------
|
|
585
|
-
|
|
586
|
-
- file_path (str): The file path from which to load the graph data; if None, loads
|
|
587
|
-
from the default filename. (default None)
|
|
588
|
-
"""
|
|
589
|
-
if not file_path:
|
|
590
|
-
file_path = self.filename
|
|
591
|
-
try:
|
|
592
|
-
file_dir_path = os.path.dirname(file_path)
|
|
593
|
-
file_name = os.path.basename(file_path)
|
|
594
|
-
|
|
595
|
-
file_storage = get_file_storage(file_dir_path)
|
|
596
|
-
|
|
597
|
-
if await file_storage.file_exists(file_name):
|
|
598
|
-
async with file_storage.open(file_name, "r") as file:
|
|
599
|
-
graph_data = json.loads(file.read())
|
|
600
|
-
for node in graph_data["nodes"]:
|
|
601
|
-
try:
|
|
602
|
-
if not isinstance(node["id"], UUID):
|
|
603
|
-
try:
|
|
604
|
-
node["id"] = UUID(node["id"])
|
|
605
|
-
except Exception:
|
|
606
|
-
# If conversion fails, keep the original id
|
|
607
|
-
pass
|
|
608
|
-
except Exception as e:
|
|
609
|
-
logger.error(e)
|
|
610
|
-
raise e
|
|
611
|
-
|
|
612
|
-
if isinstance(node.get("updated_at"), int):
|
|
613
|
-
node["updated_at"] = datetime.fromtimestamp(
|
|
614
|
-
node["updated_at"] / 1000, tz=timezone.utc
|
|
615
|
-
)
|
|
616
|
-
elif isinstance(node.get("updated_at"), str):
|
|
617
|
-
node["updated_at"] = datetime.strptime(
|
|
618
|
-
node["updated_at"], "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
619
|
-
)
|
|
620
|
-
|
|
621
|
-
for edge in graph_data["links"]:
|
|
622
|
-
try:
|
|
623
|
-
if not isinstance(edge["source"], UUID):
|
|
624
|
-
source_id = parse_id(edge["source"])
|
|
625
|
-
else:
|
|
626
|
-
source_id = edge["source"]
|
|
627
|
-
|
|
628
|
-
if not isinstance(edge["target"], UUID):
|
|
629
|
-
target_id = parse_id(edge["target"])
|
|
630
|
-
else:
|
|
631
|
-
target_id = edge["target"]
|
|
632
|
-
|
|
633
|
-
edge["source"] = source_id
|
|
634
|
-
edge["target"] = target_id
|
|
635
|
-
edge["source_node_id"] = source_id
|
|
636
|
-
edge["target_node_id"] = target_id
|
|
637
|
-
except Exception as e:
|
|
638
|
-
logger.error(e)
|
|
639
|
-
raise e
|
|
640
|
-
|
|
641
|
-
if isinstance(
|
|
642
|
-
edge.get("updated_at"), int
|
|
643
|
-
): # Handle timestamp in milliseconds
|
|
644
|
-
edge["updated_at"] = datetime.fromtimestamp(
|
|
645
|
-
edge["updated_at"] / 1000, tz=timezone.utc
|
|
646
|
-
)
|
|
647
|
-
elif isinstance(edge.get("updated_at"), str):
|
|
648
|
-
edge["updated_at"] = datetime.strptime(
|
|
649
|
-
edge["updated_at"], "%Y-%m-%dT%H:%M:%S.%f%z"
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
self.graph = nx.readwrite.json_graph.node_link_graph(graph_data, edges="links")
|
|
653
|
-
|
|
654
|
-
for node_id, node_data in self.graph.nodes(data=True):
|
|
655
|
-
node_data["id"] = node_id
|
|
656
|
-
else:
|
|
657
|
-
# Log that the file does not exist and an empty graph is initialized
|
|
658
|
-
logger.warning("File %s not found. Initializing an empty graph.", file_path)
|
|
659
|
-
await self.create_empty_graph(file_path)
|
|
660
|
-
|
|
661
|
-
except Exception:
|
|
662
|
-
logger.error("Failed to load graph from file: %s", file_path)
|
|
663
|
-
|
|
664
|
-
await self.create_empty_graph(file_path)
|
|
665
|
-
|
|
666
|
-
async def delete_graph(self, file_path: str = None):
|
|
667
|
-
"""
|
|
668
|
-
Delete the graph file from the filesystem asynchronously.
|
|
669
|
-
|
|
670
|
-
Parameters:
|
|
671
|
-
-----------
|
|
672
|
-
|
|
673
|
-
- file_path (str): The file path of the graph to delete; if None, deletes the
|
|
674
|
-
default graph file. (default None)
|
|
675
|
-
"""
|
|
676
|
-
if file_path is None:
|
|
677
|
-
file_path = (
|
|
678
|
-
self.filename
|
|
679
|
-
) # Assuming self.filename is defined elsewhere and holds the default graph file path
|
|
680
|
-
try:
|
|
681
|
-
file_dir_path = os.path.dirname(file_path)
|
|
682
|
-
file_name = os.path.basename(file_path)
|
|
683
|
-
|
|
684
|
-
file_storage = get_file_storage(file_dir_path)
|
|
685
|
-
|
|
686
|
-
await file_storage.remove(file_name)
|
|
687
|
-
|
|
688
|
-
self.graph = None
|
|
689
|
-
logger.info("Graph deleted successfully.")
|
|
690
|
-
except Exception as error:
|
|
691
|
-
logger.error("Failed to delete graph: %s", error)
|
|
692
|
-
raise error
|
|
693
|
-
|
|
694
|
-
async def get_nodeset_subgraph(
|
|
695
|
-
self, node_type: Type[Any], node_name: List[str]
|
|
696
|
-
) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
|
|
697
|
-
"""
|
|
698
|
-
Obtain a subgraph based on specific node types and names. Not supported in this
|
|
699
|
-
implementation.
|
|
700
|
-
|
|
701
|
-
Parameters:
|
|
702
|
-
-----------
|
|
703
|
-
|
|
704
|
-
- node_type (Type[Any]): The type of nodes to include in the subgraph.
|
|
705
|
-
- node_name (List[str]): A list of node names to filter by.
|
|
706
|
-
"""
|
|
707
|
-
raise NodesetFilterNotSupportedError
|
|
708
|
-
|
|
709
|
-
async def get_filtered_graph_data(
|
|
710
|
-
self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
|
|
711
|
-
):
|
|
712
|
-
"""
|
|
713
|
-
Fetch nodes and relationships filtered by specified attributes.
|
|
714
|
-
|
|
715
|
-
Parameters:
|
|
716
|
-
-----------
|
|
717
|
-
|
|
718
|
-
- attribute_filters (List[Dict[str, List[Union[str, int]]]]): A list of dictionaries
|
|
719
|
-
defining attributes to filter on.
|
|
720
|
-
|
|
721
|
-
Returns:
|
|
722
|
-
--------
|
|
723
|
-
|
|
724
|
-
A tuple containing filtered nodes and edges based on the specified attributes.
|
|
725
|
-
"""
|
|
726
|
-
# Create filters for nodes based on the attribute filters
|
|
727
|
-
where_clauses = []
|
|
728
|
-
for attribute, values in attribute_filters[0].items():
|
|
729
|
-
where_clauses.append((attribute, values))
|
|
730
|
-
|
|
731
|
-
# Filter nodes
|
|
732
|
-
filtered_nodes = [
|
|
733
|
-
(node, data)
|
|
734
|
-
for node, data in self.graph.nodes(data=True)
|
|
735
|
-
if all(data.get(attr) in values for attr, values in where_clauses)
|
|
736
|
-
]
|
|
737
|
-
|
|
738
|
-
# Filter edges where both source and target nodes satisfy the filters
|
|
739
|
-
filtered_edges = [
|
|
740
|
-
(source, target, data.get("relationship_type", "UNKNOWN"), data)
|
|
741
|
-
for source, target, data in self.graph.edges(data=True)
|
|
742
|
-
if (
|
|
743
|
-
all(self.graph.nodes[source].get(attr) in values for attr, values in where_clauses)
|
|
744
|
-
and all(
|
|
745
|
-
self.graph.nodes[target].get(attr) in values for attr, values in where_clauses
|
|
746
|
-
)
|
|
747
|
-
)
|
|
748
|
-
]
|
|
749
|
-
|
|
750
|
-
return filtered_nodes, filtered_edges
|
|
751
|
-
|
|
752
|
-
async def get_graph_metrics(self, include_optional=False):
|
|
753
|
-
"""
|
|
754
|
-
Calculate various metrics related to the graph, optionally including optional metrics.
|
|
755
|
-
|
|
756
|
-
Parameters:
|
|
757
|
-
-----------
|
|
758
|
-
|
|
759
|
-
- include_optional: Indicates whether optional metrics should be included in the
|
|
760
|
-
calculation. (default False)
|
|
761
|
-
|
|
762
|
-
Returns:
|
|
763
|
-
--------
|
|
764
|
-
|
|
765
|
-
A dictionary containing the calculated graph metrics.
|
|
766
|
-
"""
|
|
767
|
-
graph = self.graph
|
|
768
|
-
|
|
769
|
-
def _get_mean_degree(graph):
|
|
770
|
-
degrees = [d for _, d in graph.degree()]
|
|
771
|
-
return np.mean(degrees) if degrees else 0
|
|
772
|
-
|
|
773
|
-
def _get_edge_density(graph):
|
|
774
|
-
num_nodes = graph.number_of_nodes()
|
|
775
|
-
num_edges = graph.number_of_edges()
|
|
776
|
-
num_possible_edges = num_nodes * (num_nodes - 1)
|
|
777
|
-
edge_density = num_edges / num_possible_edges if num_possible_edges > 0 else 0
|
|
778
|
-
return edge_density
|
|
779
|
-
|
|
780
|
-
def _get_diameter(graph):
|
|
781
|
-
try:
|
|
782
|
-
return nx.diameter(nx.DiGraph(graph.to_undirected()))
|
|
783
|
-
except Exception as e:
|
|
784
|
-
logger.warning("Failed to calculate diameter: %s", e)
|
|
785
|
-
return None
|
|
786
|
-
|
|
787
|
-
def _get_avg_shortest_path_length(graph):
|
|
788
|
-
try:
|
|
789
|
-
return nx.average_shortest_path_length(nx.DiGraph(graph.to_undirected()))
|
|
790
|
-
except Exception as e:
|
|
791
|
-
logger.warning("Failed to calculate average shortest path length: %s", e)
|
|
792
|
-
return None
|
|
793
|
-
|
|
794
|
-
def _get_avg_clustering(graph):
|
|
795
|
-
try:
|
|
796
|
-
return nx.average_clustering(nx.DiGraph(graph.to_undirected()))
|
|
797
|
-
except Exception as e:
|
|
798
|
-
logger.warning("Failed to calculate clustering coefficient: %s", e)
|
|
799
|
-
return None
|
|
800
|
-
|
|
801
|
-
mandatory_metrics = {
|
|
802
|
-
"num_nodes": graph.number_of_nodes(),
|
|
803
|
-
"num_edges": graph.number_of_edges(),
|
|
804
|
-
"mean_degree": _get_mean_degree(graph),
|
|
805
|
-
"edge_density": _get_edge_density(graph),
|
|
806
|
-
"num_connected_components": nx.number_weakly_connected_components(graph),
|
|
807
|
-
"sizes_of_connected_components": [
|
|
808
|
-
len(c) for c in nx.weakly_connected_components(graph)
|
|
809
|
-
],
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
if include_optional:
|
|
813
|
-
optional_metrics = {
|
|
814
|
-
"num_selfloops": sum(1 for u, v in graph.edges() if u == v),
|
|
815
|
-
"diameter": _get_diameter(graph),
|
|
816
|
-
"avg_shortest_path_length": _get_avg_shortest_path_length(graph),
|
|
817
|
-
"avg_clustering": _get_avg_clustering(graph),
|
|
818
|
-
}
|
|
819
|
-
else:
|
|
820
|
-
optional_metrics = {
|
|
821
|
-
"num_selfloops": -1,
|
|
822
|
-
"diameter": -1,
|
|
823
|
-
"avg_shortest_path_length": -1,
|
|
824
|
-
"avg_clustering": -1,
|
|
825
|
-
}
|
|
826
|
-
|
|
827
|
-
return mandatory_metrics | optional_metrics
|
|
828
|
-
|
|
829
|
-
async def get_document_subgraph(self, data_id: str):
|
|
830
|
-
"""
|
|
831
|
-
Retrieve all relevant nodes when a document is being deleted, including chunks and
|
|
832
|
-
orphaned entities.
|
|
833
|
-
|
|
834
|
-
Parameters:
|
|
835
|
-
-----------
|
|
836
|
-
|
|
837
|
-
- data_id(str): The data id identifying the document to fetch
|
|
838
|
-
related nodes for.
|
|
839
|
-
|
|
840
|
-
Returns:
|
|
841
|
-
--------
|
|
842
|
-
|
|
843
|
-
A dictionary containing the document, its chunks, orphan entities, made from nodes,
|
|
844
|
-
and orphan types.
|
|
845
|
-
"""
|
|
846
|
-
# Ensure graph is loaded
|
|
847
|
-
if self.graph is None:
|
|
848
|
-
await self.load_graph_from_file()
|
|
849
|
-
|
|
850
|
-
# Find the document node by looking for content_hash in the name field
|
|
851
|
-
document = None
|
|
852
|
-
document_node_id = None
|
|
853
|
-
for node_id, attrs in self.graph.nodes(data=True):
|
|
854
|
-
if (
|
|
855
|
-
attrs.get("type") in ["TextDocument", "PdfDocument"]
|
|
856
|
-
and attrs.get("id") == f"{data_id}"
|
|
857
|
-
):
|
|
858
|
-
document = {"id": str(node_id), **attrs} # Convert UUID to string for consistency
|
|
859
|
-
document_node_id = node_id # Keep the original UUID
|
|
860
|
-
break
|
|
861
|
-
|
|
862
|
-
if not document:
|
|
863
|
-
return None
|
|
864
|
-
|
|
865
|
-
# Find chunks connected via is_part_of (chunks point TO document)
|
|
866
|
-
chunks = []
|
|
867
|
-
for source, target, edge_data in self.graph.in_edges(document_node_id, data=True):
|
|
868
|
-
if edge_data.get("relationship_name") == "is_part_of":
|
|
869
|
-
chunks.append({"id": source, **self.graph.nodes[source]}) # Keep as UUID object
|
|
870
|
-
|
|
871
|
-
# Find entities connected to chunks (chunks point TO entities via contains)
|
|
872
|
-
entities = []
|
|
873
|
-
for chunk in chunks:
|
|
874
|
-
chunk_id = chunk["id"] # Already a UUID object
|
|
875
|
-
for source, target, edge_data in self.graph.out_edges(chunk_id, data=True):
|
|
876
|
-
if edge_data.get("relationship_name") == "contains":
|
|
877
|
-
entities.append(
|
|
878
|
-
{"id": target, **self.graph.nodes[target]}
|
|
879
|
-
) # Keep as UUID object
|
|
880
|
-
|
|
881
|
-
# Find orphaned entities (entities only connected to chunks we're deleting)
|
|
882
|
-
orphan_entities = []
|
|
883
|
-
for entity in entities:
|
|
884
|
-
entity_id = entity["id"] # Already a UUID object
|
|
885
|
-
# Get all chunks that contain this entity
|
|
886
|
-
containing_chunks = []
|
|
887
|
-
for source, target, edge_data in self.graph.in_edges(entity_id, data=True):
|
|
888
|
-
if edge_data.get("relationship_name") == "contains":
|
|
889
|
-
containing_chunks.append(source) # Keep as UUID object
|
|
890
|
-
|
|
891
|
-
# Check if all containing chunks are in our chunks list
|
|
892
|
-
chunk_ids = [chunk["id"] for chunk in chunks]
|
|
893
|
-
if containing_chunks and all(c in chunk_ids for c in containing_chunks):
|
|
894
|
-
orphan_entities.append(entity)
|
|
895
|
-
|
|
896
|
-
# Find orphaned entity types
|
|
897
|
-
orphan_types = []
|
|
898
|
-
seen_types = set() # Track seen types to avoid duplicates
|
|
899
|
-
for entity in orphan_entities:
|
|
900
|
-
entity_id = entity["id"] # Already a UUID object
|
|
901
|
-
for _, target, edge_data in self.graph.out_edges(entity_id, data=True):
|
|
902
|
-
if edge_data.get("relationship_name") in ["is_a", "instance_of"]:
|
|
903
|
-
# Check if this type is only connected to entities we're deleting
|
|
904
|
-
type_node = self.graph.nodes[target]
|
|
905
|
-
if type_node.get("type") == "EntityType" and target not in seen_types:
|
|
906
|
-
is_orphaned = True
|
|
907
|
-
# Get all incoming edges to this type node
|
|
908
|
-
for source, _, edge_data in self.graph.in_edges(target, data=True):
|
|
909
|
-
if edge_data.get("relationship_name") in ["is_a", "instance_of"]:
|
|
910
|
-
# Check if the source entity is not in our orphan_entities list
|
|
911
|
-
if source not in [e["id"] for e in orphan_entities]:
|
|
912
|
-
is_orphaned = False
|
|
913
|
-
break
|
|
914
|
-
if is_orphaned:
|
|
915
|
-
orphan_types.append({"id": target, **type_node}) # Keep as UUID object
|
|
916
|
-
seen_types.add(target) # Mark as seen
|
|
917
|
-
|
|
918
|
-
# Find nodes connected via made_from (chunks point TO summaries)
|
|
919
|
-
made_from_nodes = []
|
|
920
|
-
for chunk in chunks:
|
|
921
|
-
chunk_id = chunk["id"] # Already a UUID object
|
|
922
|
-
for source, target, edge_data in self.graph.in_edges(chunk_id, data=True):
|
|
923
|
-
if edge_data.get("relationship_name") == "made_from":
|
|
924
|
-
made_from_nodes.append(
|
|
925
|
-
{"id": source, **self.graph.nodes[source]}
|
|
926
|
-
) # Keep as UUID object
|
|
927
|
-
|
|
928
|
-
# Return UUIDs directly without string conversion
|
|
929
|
-
return {
|
|
930
|
-
"document": [{"id": document["id"], **{k: v for k, v in document.items() if k != "id"}}]
|
|
931
|
-
if document
|
|
932
|
-
else [],
|
|
933
|
-
"chunks": [
|
|
934
|
-
{"id": chunk["id"], **{k: v for k, v in chunk.items() if k != "id"}}
|
|
935
|
-
for chunk in chunks
|
|
936
|
-
],
|
|
937
|
-
"orphan_entities": [
|
|
938
|
-
{"id": entity["id"], **{k: v for k, v in entity.items() if k != "id"}}
|
|
939
|
-
for entity in orphan_entities
|
|
940
|
-
],
|
|
941
|
-
"made_from_nodes": [
|
|
942
|
-
{"id": node["id"], **{k: v for k, v in node.items() if k != "id"}}
|
|
943
|
-
for node in made_from_nodes
|
|
944
|
-
],
|
|
945
|
-
"orphan_types": [
|
|
946
|
-
{"id": type_node["id"], **{k: v for k, v in type_node.items() if k != "id"}}
|
|
947
|
-
for type_node in orphan_types
|
|
948
|
-
],
|
|
949
|
-
}
|
|
950
|
-
|
|
951
|
-
async def get_degree_one_nodes(self, node_type: str):
|
|
952
|
-
"""
|
|
953
|
-
Retrieve nodes that have only a single connection, filtered by node type.
|
|
954
|
-
|
|
955
|
-
Parameters:
|
|
956
|
-
-----------
|
|
957
|
-
|
|
958
|
-
- node_type (str): Type of nodes to filter by ('Entity' or 'EntityType').
|
|
959
|
-
|
|
960
|
-
Returns:
|
|
961
|
-
--------
|
|
962
|
-
|
|
963
|
-
A list of nodes that have a single connection of the specified type.
|
|
964
|
-
"""
|
|
965
|
-
if not node_type or node_type not in ["Entity", "EntityType"]:
|
|
966
|
-
raise ValueError("node_type must be either 'Entity' or 'EntityType'")
|
|
967
|
-
|
|
968
|
-
nodes = []
|
|
969
|
-
for node_id, node_data in self.graph.nodes(data=True):
|
|
970
|
-
if node_data.get("type") == node_type:
|
|
971
|
-
# Count both incoming and outgoing edges
|
|
972
|
-
degree = self.graph.degree(node_id)
|
|
973
|
-
if degree == 1:
|
|
974
|
-
nodes.append(node_data)
|
|
975
|
-
return nodes
|
|
976
|
-
|
|
977
|
-
async def get_node(self, node_id: UUID) -> dict:
|
|
978
|
-
"""
|
|
979
|
-
Retrieve the details of a specific node identified by its identifier.
|
|
980
|
-
|
|
981
|
-
Parameters:
|
|
982
|
-
-----------
|
|
983
|
-
|
|
984
|
-
- node_id (UUID): The identifier of the node to retrieval.
|
|
985
|
-
|
|
986
|
-
Returns:
|
|
987
|
-
--------
|
|
988
|
-
|
|
989
|
-
- dict: The data of the specified node if found, otherwise None.
|
|
990
|
-
"""
|
|
991
|
-
if self.graph.has_node(node_id):
|
|
992
|
-
return self.graph.nodes[node_id]
|
|
993
|
-
return None
|
|
994
|
-
|
|
995
|
-
async def get_nodes(self, node_ids: List[UUID] = None) -> List[dict]:
|
|
996
|
-
"""
|
|
997
|
-
Retrieve data for multiple nodes by their identifiers, or all nodes if no identifiers
|
|
998
|
-
are provided.
|
|
999
|
-
|
|
1000
|
-
Parameters:
|
|
1001
|
-
-----------
|
|
1002
|
-
|
|
1003
|
-
- node_ids (List[UUID]): List of node identifiers to fetch data for; if None,
|
|
1004
|
-
retrieves all nodes in the graph. (default None)
|
|
1005
|
-
|
|
1006
|
-
Returns:
|
|
1007
|
-
--------
|
|
1008
|
-
|
|
1009
|
-
- List[dict]: A list of node data for each found node.
|
|
1010
|
-
"""
|
|
1011
|
-
if node_ids is None:
|
|
1012
|
-
return [{"id": node_id, **data} for node_id, data in self.graph.nodes(data=True)]
|
|
1013
|
-
return [
|
|
1014
|
-
{"id": node_id, **self.graph.nodes[node_id]}
|
|
1015
|
-
for node_id in node_ids
|
|
1016
|
-
if self.graph.has_node(node_id)
|
|
1017
|
-
]
|