risk-network 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +62 -102
- risk/{annotations → annotation}/io.py +93 -92
- risk/annotation/nltk_setup.py +86 -0
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +5 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -7
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.10.dist-info/METADATA +0 -798
- risk_network-0.0.10.dist-info/RECORD +0 -40
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
@@ -1,88 +1,48 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/annotation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
import os
|
7
6
|
import re
|
8
|
-
import zipfile
|
9
7
|
from collections import Counter
|
10
8
|
from itertools import compress
|
11
9
|
from typing import Any, Dict, List, Set
|
12
10
|
|
13
11
|
import networkx as nx
|
14
|
-
import nltk
|
15
12
|
import numpy as np
|
16
13
|
import pandas as pd
|
17
|
-
from nltk.corpus import stopwords
|
18
|
-
from nltk.stem import WordNetLemmatizer
|
19
14
|
from nltk.tokenize import word_tokenize
|
15
|
+
from scipy.sparse import coo_matrix
|
20
16
|
|
17
|
+
from risk.annotation.nltk_setup import setup_nltk_resources
|
21
18
|
from risk.log import logger
|
22
|
-
from scipy.sparse import coo_matrix
|
23
19
|
|
24
20
|
|
25
|
-
def
|
26
|
-
"""
|
27
|
-
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
# Look for a ZIP file in all known NLTK data directories.
|
46
|
-
for data_path in nltk.data.path:
|
47
|
-
zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
|
48
|
-
if os.path.isfile(zip_path):
|
49
|
-
print(f"Found ZIP file for '{resource}' at: {zip_path}")
|
50
|
-
target_dir = os.path.join(data_path, "corpora")
|
51
|
-
with zipfile.ZipFile(zip_path, "r") as z:
|
52
|
-
z.extractall(path=target_dir)
|
53
|
-
print(f"Unzipped '{resource}' successfully.")
|
54
|
-
break # Stop after unzipping the first found ZIP.
|
55
|
-
|
56
|
-
# Final check: Try to check resource one last time. If it fails, rai
|
57
|
-
try:
|
58
|
-
nltk.data.find(resource_path)
|
59
|
-
print(f"Resource '{resource}' is now available.")
|
60
|
-
except LookupError:
|
61
|
-
raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
|
62
|
-
|
63
|
-
|
64
|
-
# Ensure the NLTK stopwords and WordNet resources are available
|
65
|
-
# punkt is known to have issues with the default download method, so we use a custom function if it fails
|
66
|
-
try:
|
67
|
-
ensure_nltk_resource("punkt")
|
68
|
-
except LookupError:
|
69
|
-
nltk.download("punkt")
|
70
|
-
ensure_nltk_resource("stopwords")
|
71
|
-
ensure_nltk_resource("wordnet")
|
72
|
-
# Use NLTK's stopwords - load all languages
|
73
|
-
STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
|
74
|
-
# Initialize the WordNet lemmatizer, which is used for normalizing words
|
75
|
-
LEMMATIZER = WordNetLemmatizer()
|
76
|
-
|
77
|
-
|
78
|
-
def load_annotations(
|
79
|
-
network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
|
21
|
+
def initialize_nltk():
|
22
|
+
"""Initialize all required NLTK components."""
|
23
|
+
setup_nltk_resources()
|
24
|
+
|
25
|
+
# After resources are available, initialize the components
|
26
|
+
from nltk.corpus import stopwords
|
27
|
+
from nltk.stem import WordNetLemmatizer
|
28
|
+
|
29
|
+
global STOP_WORDS, LEMMATIZER
|
30
|
+
STOP_WORDS = set(stopwords.words("english"))
|
31
|
+
LEMMATIZER = WordNetLemmatizer()
|
32
|
+
|
33
|
+
|
34
|
+
# Initialize NLTK components
|
35
|
+
initialize_nltk()
|
36
|
+
|
37
|
+
|
38
|
+
def load_annotation(
|
39
|
+
network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
|
80
40
|
) -> Dict[str, Any]:
|
81
|
-
"""Convert
|
41
|
+
"""Convert annotation input to a sparse matrix and reindex based on the network's node labels.
|
82
42
|
|
83
43
|
Args:
|
84
44
|
network (nx.Graph): The network graph.
|
85
|
-
|
45
|
+
annotation_input (Dict[str, Any]): An annotation dictionary.
|
86
46
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
87
47
|
term to be included. Defaults to 2.
|
88
48
|
|
@@ -91,18 +51,18 @@ def load_annotations(
|
|
91
51
|
matrix.
|
92
52
|
|
93
53
|
Raises:
|
94
|
-
ValueError: If no
|
95
|
-
ValueError: If no
|
54
|
+
ValueError: If no annotation is found for the nodes in the network.
|
55
|
+
ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
|
96
56
|
"""
|
97
57
|
# Step 1: Map nodes and annotations to indices
|
98
58
|
node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
|
99
59
|
node_to_idx = {node: i for i, node in enumerate(node_label_order)}
|
100
|
-
annotation_to_idx = {annotation: i for i, annotation in enumerate(
|
60
|
+
annotation_to_idx = {annotation: i for i, annotation in enumerate(annotation_input)}
|
101
61
|
# Step 2: Construct a sparse binary matrix directly
|
102
62
|
row = []
|
103
63
|
col = []
|
104
64
|
data = []
|
105
|
-
for annotation, nodes in
|
65
|
+
for annotation, nodes in annotation_input.items():
|
106
66
|
for node in nodes:
|
107
67
|
if node in node_to_idx and annotation in annotation_to_idx:
|
108
68
|
row.append(node_to_idx[node])
|
@@ -111,40 +71,40 @@ def load_annotations(
|
|
111
71
|
|
112
72
|
# Create a sparse binary matrix
|
113
73
|
num_nodes = len(node_to_idx)
|
114
|
-
|
115
|
-
|
74
|
+
num_annotation = len(annotation_to_idx)
|
75
|
+
annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
|
116
76
|
# Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
|
117
|
-
|
118
|
-
|
77
|
+
valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
|
78
|
+
annotation_pivot = annotation_pivot[:, valid_annotation]
|
119
79
|
# Step 4: Raise errors for empty matrices
|
120
|
-
if
|
80
|
+
if annotation_pivot.nnz == 0:
|
121
81
|
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
122
82
|
|
123
|
-
|
124
|
-
if
|
83
|
+
num_remaining_annotation = annotation_pivot.shape[1]
|
84
|
+
if num_remaining_annotation == 0:
|
125
85
|
raise ValueError(
|
126
86
|
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
127
87
|
)
|
128
88
|
|
129
89
|
# Step 5: Extract ordered nodes and annotations
|
130
90
|
ordered_nodes = tuple(node_label_order)
|
131
|
-
|
132
|
-
annotation for annotation, is_valid in zip(annotation_to_idx,
|
91
|
+
ordered_annotation = tuple(
|
92
|
+
annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotation) if is_valid
|
133
93
|
)
|
134
94
|
|
135
95
|
# Log the filtering details
|
136
96
|
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
137
|
-
logger.info(f"Number of input annotation terms: {
|
138
|
-
logger.info(f"Number of remaining annotation terms: {
|
97
|
+
logger.info(f"Number of input annotation terms: {num_annotation}")
|
98
|
+
logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
|
139
99
|
|
140
100
|
return {
|
141
101
|
"ordered_nodes": ordered_nodes,
|
142
|
-
"
|
143
|
-
"matrix":
|
102
|
+
"ordered_annotation": ordered_annotation,
|
103
|
+
"matrix": annotation_pivot,
|
144
104
|
}
|
145
105
|
|
146
106
|
|
147
|
-
def
|
107
|
+
def define_top_annotation(
|
148
108
|
network: nx.Graph,
|
149
109
|
ordered_annotation_labels: List[str],
|
150
110
|
neighborhood_significance_sums: List[int],
|
@@ -170,7 +130,7 @@ def define_top_annotations(
|
|
170
130
|
# Sum the columns of the significant significance matrix (positive floating point values)
|
171
131
|
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
172
132
|
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
173
|
-
|
133
|
+
annotation_significance_matrix = pd.DataFrame(
|
174
134
|
{
|
175
135
|
"id": range(len(ordered_annotation_labels)),
|
176
136
|
"full_terms": ordered_annotation_labels,
|
@@ -178,29 +138,29 @@ def define_top_annotations(
|
|
178
138
|
"significant_significance_score": significant_significance_scores,
|
179
139
|
}
|
180
140
|
)
|
181
|
-
|
141
|
+
annotation_significance_matrix["significant_annotation"] = False
|
182
142
|
# Apply size constraints to identify potential significant annotations
|
183
|
-
|
143
|
+
annotation_significance_matrix.loc[
|
184
144
|
(
|
185
|
-
|
145
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
186
146
|
>= min_cluster_size
|
187
147
|
)
|
188
148
|
& (
|
189
|
-
|
149
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
190
150
|
<= max_cluster_size
|
191
151
|
),
|
192
|
-
"
|
152
|
+
"significant_annotation",
|
193
153
|
] = True
|
194
154
|
# Initialize columns for connected components analysis
|
195
|
-
|
196
|
-
|
197
|
-
|
155
|
+
annotation_significance_matrix["num_connected_components"] = 0
|
156
|
+
annotation_significance_matrix["size_connected_components"] = None
|
157
|
+
annotation_significance_matrix["size_connected_components"] = annotation_significance_matrix[
|
198
158
|
"size_connected_components"
|
199
159
|
].astype(object)
|
200
|
-
|
160
|
+
annotation_significance_matrix["num_large_connected_components"] = 0
|
201
161
|
|
202
|
-
for attribute in
|
203
|
-
|
162
|
+
for attribute in annotation_significance_matrix.index.values[
|
163
|
+
annotation_significance_matrix["significant_annotation"]
|
204
164
|
]:
|
205
165
|
# Identify significant neighborhoods based on the binary significance matrix
|
206
166
|
significant_neighborhoods = list(
|
@@ -223,24 +183,24 @@ def define_top_annotations(
|
|
223
183
|
num_large_connected_components = len(filtered_size_connected_components)
|
224
184
|
|
225
185
|
# Assign the number of connected components
|
226
|
-
|
186
|
+
annotation_significance_matrix.loc[attribute, "num_connected_components"] = (
|
227
187
|
num_connected_components
|
228
188
|
)
|
229
189
|
# Filter out attributes with more than one connected component
|
230
|
-
|
231
|
-
|
232
|
-
"
|
190
|
+
annotation_significance_matrix.loc[
|
191
|
+
annotation_significance_matrix["num_connected_components"] > 1,
|
192
|
+
"significant_annotation",
|
233
193
|
] = False
|
234
194
|
# Assign the number of large connected components
|
235
|
-
|
195
|
+
annotation_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
236
196
|
num_large_connected_components
|
237
197
|
)
|
238
198
|
# Assign the size of connected components, ensuring it is always a list
|
239
|
-
|
199
|
+
annotation_significance_matrix.at[attribute, "size_connected_components"] = (
|
240
200
|
filtered_size_connected_components.tolist()
|
241
201
|
)
|
242
202
|
|
243
|
-
return
|
203
|
+
return annotation_significance_matrix
|
244
204
|
|
245
205
|
|
246
206
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/io
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import json
|
@@ -9,48 +9,45 @@ from typing import Any, Dict
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
-
from risk.
|
13
|
-
from risk.log import
|
12
|
+
from risk.annotation.annotation import load_annotation
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
|
15
15
|
|
16
|
-
class
|
17
|
-
"""Handles the loading and exporting of
|
16
|
+
class AnnotationIO:
|
17
|
+
"""Handles the loading and exporting of annotation in various file formats.
|
18
18
|
|
19
|
-
The
|
19
|
+
The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
|
20
20
|
and to export parameter data to various formats like JSON, CSV, and text files.
|
21
21
|
"""
|
22
22
|
|
23
|
-
def
|
24
|
-
pass
|
25
|
-
|
26
|
-
def load_json_annotation(
|
23
|
+
def load_annotation_json(
|
27
24
|
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
28
25
|
) -> Dict[str, Any]:
|
29
|
-
"""Load
|
26
|
+
"""Load annotation from a JSON file and convert them to a DataFrame.
|
30
27
|
|
31
28
|
Args:
|
32
|
-
network (NetworkX graph): The network to which the
|
33
|
-
filepath (str): Path to the JSON
|
29
|
+
network (NetworkX graph): The network to which the annotation is related.
|
30
|
+
filepath (str): Path to the JSON annotation file.
|
34
31
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
35
32
|
term to be included. Defaults to 2.
|
36
33
|
|
37
34
|
Returns:
|
38
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
35
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
39
36
|
"""
|
40
37
|
filetype = "JSON"
|
41
38
|
# Log the loading of the JSON file
|
42
|
-
params.
|
39
|
+
params.log_annotation(
|
43
40
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
44
41
|
)
|
45
|
-
_log_loading(filetype, filepath=filepath)
|
42
|
+
self._log_loading(filetype, filepath=filepath)
|
46
43
|
|
47
44
|
# Load the JSON file into a dictionary
|
48
45
|
with open(filepath, "r", encoding="utf-8") as file:
|
49
|
-
|
46
|
+
annotation_input = json.load(file)
|
50
47
|
|
51
|
-
return
|
48
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
52
49
|
|
53
|
-
def
|
50
|
+
def load_annotation_excel(
|
54
51
|
self,
|
55
52
|
network: nx.Graph,
|
56
53
|
filepath: str,
|
@@ -60,11 +57,11 @@ class AnnotationsIO:
|
|
60
57
|
nodes_delimiter: str = ";",
|
61
58
|
min_nodes_per_term: int = 2,
|
62
59
|
) -> Dict[str, Any]:
|
63
|
-
"""Load
|
60
|
+
"""Load annotation from an Excel file and associate them with the network.
|
64
61
|
|
65
62
|
Args:
|
66
|
-
network (nx.Graph): The NetworkX graph to which the
|
67
|
-
filepath (str): Path to the Excel
|
63
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
64
|
+
filepath (str): Path to the Excel annotation file.
|
68
65
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
69
66
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
70
67
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
@@ -78,10 +75,10 @@ class AnnotationsIO:
|
|
78
75
|
"""
|
79
76
|
filetype = "Excel"
|
80
77
|
# Log the loading of the Excel file
|
81
|
-
params.
|
78
|
+
params.log_annotation(
|
82
79
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
83
80
|
)
|
84
|
-
_log_loading(filetype, filepath=filepath)
|
81
|
+
self._log_loading(filetype, filepath=filepath)
|
85
82
|
|
86
83
|
# Load the specified sheet from the Excel file
|
87
84
|
annotation = pd.read_excel(filepath, sheet_name=sheet_name)
|
@@ -90,11 +87,11 @@ class AnnotationsIO:
|
|
90
87
|
lambda x: x.split(nodes_delimiter)
|
91
88
|
)
|
92
89
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
93
|
-
|
90
|
+
annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
94
91
|
|
95
|
-
return
|
92
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
96
93
|
|
97
|
-
def
|
94
|
+
def load_annotation_csv(
|
98
95
|
self,
|
99
96
|
network: nx.Graph,
|
100
97
|
filepath: str,
|
@@ -103,11 +100,11 @@ class AnnotationsIO:
|
|
103
100
|
nodes_delimiter: str = ";",
|
104
101
|
min_nodes_per_term: int = 2,
|
105
102
|
) -> Dict[str, Any]:
|
106
|
-
"""Load
|
103
|
+
"""Load annotation from a CSV file and associate them with the network.
|
107
104
|
|
108
105
|
Args:
|
109
|
-
network (nx.Graph): The NetworkX graph to which the
|
110
|
-
filepath (str): Path to the CSV
|
106
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
107
|
+
filepath (str): Path to the CSV annotation file.
|
111
108
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
112
109
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
113
110
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -120,19 +117,19 @@ class AnnotationsIO:
|
|
120
117
|
"""
|
121
118
|
filetype = "CSV"
|
122
119
|
# Log the loading of the CSV file
|
123
|
-
params.
|
120
|
+
params.log_annotation(
|
124
121
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
125
122
|
)
|
126
|
-
_log_loading(filetype, filepath=filepath)
|
123
|
+
self._log_loading(filetype, filepath=filepath)
|
127
124
|
|
128
125
|
# Load the CSV file into a dictionary
|
129
|
-
|
126
|
+
annotation_input = self._load_matrix_file(
|
130
127
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
131
128
|
)
|
132
129
|
|
133
|
-
return
|
130
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
134
131
|
|
135
|
-
def
|
132
|
+
def load_annotation_tsv(
|
136
133
|
self,
|
137
134
|
network: nx.Graph,
|
138
135
|
filepath: str,
|
@@ -141,11 +138,11 @@ class AnnotationsIO:
|
|
141
138
|
nodes_delimiter: str = ";",
|
142
139
|
min_nodes_per_term: int = 2,
|
143
140
|
) -> Dict[str, Any]:
|
144
|
-
"""Load
|
141
|
+
"""Load annotation from a TSV file and associate them with the network.
|
145
142
|
|
146
143
|
Args:
|
147
|
-
network (nx.Graph): The NetworkX graph to which the
|
148
|
-
filepath (str): Path to the TSV
|
144
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
145
|
+
filepath (str): Path to the TSV annotation file.
|
149
146
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
150
147
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
151
148
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -158,31 +155,34 @@ class AnnotationsIO:
|
|
158
155
|
"""
|
159
156
|
filetype = "TSV"
|
160
157
|
# Log the loading of the TSV file
|
161
|
-
params.
|
158
|
+
params.log_annotation(
|
162
159
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
163
160
|
)
|
164
|
-
_log_loading(filetype, filepath=filepath)
|
161
|
+
self._log_loading(filetype, filepath=filepath)
|
165
162
|
|
166
163
|
# Load the TSV file into a dictionary
|
167
|
-
|
164
|
+
annotation_input = self._load_matrix_file(
|
168
165
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
169
166
|
)
|
170
167
|
|
171
|
-
return
|
168
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
172
169
|
|
173
|
-
def
|
170
|
+
def load_annotation_dict(
|
174
171
|
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
175
172
|
) -> Dict[str, Any]:
|
176
|
-
"""Load
|
173
|
+
"""Load annotation from a provided dictionary and convert them to a dictionary annotation.
|
177
174
|
|
178
175
|
Args:
|
179
|
-
network (NetworkX graph): The network to which the
|
180
|
-
content (Dict[str, Any]): The
|
176
|
+
network (NetworkX graph): The network to which the annotation is related.
|
177
|
+
content (Dict[str, Any]): The annotation dictionary to load.
|
181
178
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
182
179
|
term to be included. Defaults to 2.
|
183
180
|
|
184
181
|
Returns:
|
185
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
182
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
183
|
+
|
184
|
+
Raises:
|
185
|
+
TypeError: If the content is not a dictionary.
|
186
186
|
"""
|
187
187
|
# Ensure the input content is a dictionary
|
188
188
|
if not isinstance(content, dict):
|
@@ -191,50 +191,51 @@ class AnnotationsIO:
|
|
191
191
|
)
|
192
192
|
|
193
193
|
filetype = "Dictionary"
|
194
|
-
# Log the loading of the
|
195
|
-
params.
|
196
|
-
_log_loading(filetype, "In-memory dictionary")
|
197
|
-
|
198
|
-
# Load the annotations as a dictionary from the provided dictionary
|
199
|
-
return load_annotations(network, content, min_nodes_per_term)
|
200
|
-
|
201
|
-
|
202
|
-
def _load_matrix_file(
|
203
|
-
filepath: str,
|
204
|
-
label_colname: str,
|
205
|
-
nodes_colname: str,
|
206
|
-
delimiter: str = ",",
|
207
|
-
nodes_delimiter: str = ";",
|
208
|
-
) -> Dict[str, Any]:
|
209
|
-
"""Load annotations from a CSV or TSV file and convert them to a dictionary.
|
210
|
-
|
211
|
-
Args:
|
212
|
-
filepath (str): Path to the annotation file.
|
213
|
-
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
-
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
-
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
-
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
217
|
-
|
218
|
-
Returns:
|
219
|
-
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
-
"""
|
221
|
-
# Load the CSV or TSV file into a DataFrame
|
222
|
-
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
-
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
-
annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
225
|
-
# Create a dictionary pairing labels with their corresponding list of nodes
|
226
|
-
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
227
|
-
return label_node_dict
|
194
|
+
# Log the loading of the annotation from the dictionary
|
195
|
+
params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
|
196
|
+
self._log_loading(filetype, "In-memory dictionary")
|
228
197
|
|
198
|
+
# Load the annotation as a dictionary from the provided dictionary
|
199
|
+
return load_annotation(network, content, min_nodes_per_term)
|
229
200
|
|
230
|
-
def
|
231
|
-
|
201
|
+
def _load_matrix_file(
|
202
|
+
self,
|
203
|
+
filepath: str,
|
204
|
+
label_colname: str,
|
205
|
+
nodes_colname: str,
|
206
|
+
delimiter: str = ",",
|
207
|
+
nodes_delimiter: str = ";",
|
208
|
+
) -> Dict[str, Any]:
|
209
|
+
"""Load annotation from a CSV or TSV file and convert them to a dictionary.
|
232
210
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
211
|
+
Args:
|
212
|
+
filepath (str): Path to the annotation file.
|
213
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
+
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
+
"""
|
221
|
+
# Load the CSV or TSV file into a DataFrame
|
222
|
+
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
+
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(
|
225
|
+
lambda x: x.split(nodes_delimiter)
|
226
|
+
)
|
227
|
+
# Create a dictionary pairing labels with their corresponding list of nodes
|
228
|
+
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
229
|
+
return label_node_dict
|
230
|
+
|
231
|
+
def _log_loading(self, filetype: str, filepath: str = "") -> None:
|
232
|
+
"""Log information about the network file being loaded.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
|
236
|
+
filepath (str, optional): The path to the file being loaded.
|
237
|
+
"""
|
238
|
+
log_header("Loading annotation")
|
239
|
+
logger.debug(f"Filetype: {filetype}")
|
240
|
+
if filepath:
|
241
|
+
logger.debug(f"Filepath: {filepath}")
|