risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/__init__.py +10 -0
- risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
- risk/{annotations → annotation}/io.py +93 -92
- risk/{annotations → annotation}/nltk_setup.py +6 -5
- risk/log/__init__.py +1 -1
- risk/log/parameters.py +26 -27
- risk/neighborhoods/__init__.py +0 -1
- risk/neighborhoods/api.py +38 -38
- risk/neighborhoods/community.py +33 -4
- risk/neighborhoods/domains.py +26 -28
- risk/neighborhoods/neighborhoods.py +8 -2
- risk/neighborhoods/stats/__init__.py +13 -0
- risk/neighborhoods/stats/permutation/__init__.py +6 -0
- risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
- risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
- risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
- risk/network/__init__.py +0 -2
- risk/network/graph/__init__.py +0 -2
- risk/network/graph/api.py +19 -19
- risk/network/graph/graph.py +73 -68
- risk/{stats/significance.py → network/graph/stats.py} +2 -2
- risk/network/graph/summary.py +12 -13
- risk/network/io.py +163 -20
- risk/network/plotter/__init__.py +0 -2
- risk/network/plotter/api.py +1 -1
- risk/network/plotter/canvas.py +36 -36
- risk/network/plotter/contour.py +14 -15
- risk/network/plotter/labels.py +303 -294
- risk/network/plotter/network.py +6 -6
- risk/network/plotter/plotter.py +8 -10
- risk/network/plotter/utils/colors.py +15 -8
- risk/network/plotter/utils/layout.py +3 -3
- risk/risk.py +6 -6
- risk_network-0.0.12.dist-info/METADATA +122 -0
- risk_network-0.0.12.dist-info/RECORD +40 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
- risk/annotations/__init__.py +0 -7
- risk/network/geometry.py +0 -150
- risk/stats/__init__.py +0 -15
- risk/stats/permutation/__init__.py +0 -6
- risk_network-0.0.11.dist-info/METADATA +0 -798
- risk_network-0.0.11.dist-info/RECORD +0 -41
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
- {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/annotation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import re
|
@@ -14,7 +14,7 @@ import pandas as pd
|
|
14
14
|
from nltk.tokenize import word_tokenize
|
15
15
|
from scipy.sparse import coo_matrix
|
16
16
|
|
17
|
-
from risk.
|
17
|
+
from risk.annotation.nltk_setup import setup_nltk_resources
|
18
18
|
from risk.log import logger
|
19
19
|
|
20
20
|
|
@@ -35,14 +35,14 @@ def initialize_nltk():
|
|
35
35
|
initialize_nltk()
|
36
36
|
|
37
37
|
|
38
|
-
def
|
39
|
-
network: nx.Graph,
|
38
|
+
def load_annotation(
|
39
|
+
network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
|
40
40
|
) -> Dict[str, Any]:
|
41
|
-
"""Convert
|
41
|
+
"""Convert annotation input to a sparse matrix and reindex based on the network's node labels.
|
42
42
|
|
43
43
|
Args:
|
44
44
|
network (nx.Graph): The network graph.
|
45
|
-
|
45
|
+
annotation_input (Dict[str, Any]): An annotation dictionary.
|
46
46
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
47
47
|
term to be included. Defaults to 2.
|
48
48
|
|
@@ -51,18 +51,18 @@ def load_annotations(
|
|
51
51
|
matrix.
|
52
52
|
|
53
53
|
Raises:
|
54
|
-
ValueError: If no
|
55
|
-
ValueError: If no
|
54
|
+
ValueError: If no annotation is found for the nodes in the network.
|
55
|
+
ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
|
56
56
|
"""
|
57
57
|
# Step 1: Map nodes and annotations to indices
|
58
58
|
node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
|
59
59
|
node_to_idx = {node: i for i, node in enumerate(node_label_order)}
|
60
|
-
annotation_to_idx = {annotation: i for i, annotation in enumerate(
|
60
|
+
annotation_to_idx = {annotation: i for i, annotation in enumerate(annotation_input)}
|
61
61
|
# Step 2: Construct a sparse binary matrix directly
|
62
62
|
row = []
|
63
63
|
col = []
|
64
64
|
data = []
|
65
|
-
for annotation, nodes in
|
65
|
+
for annotation, nodes in annotation_input.items():
|
66
66
|
for node in nodes:
|
67
67
|
if node in node_to_idx and annotation in annotation_to_idx:
|
68
68
|
row.append(node_to_idx[node])
|
@@ -71,40 +71,40 @@ def load_annotations(
|
|
71
71
|
|
72
72
|
# Create a sparse binary matrix
|
73
73
|
num_nodes = len(node_to_idx)
|
74
|
-
|
75
|
-
|
74
|
+
num_annotation = len(annotation_to_idx)
|
75
|
+
annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
|
76
76
|
# Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
|
77
|
-
|
78
|
-
|
77
|
+
valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
|
78
|
+
annotation_pivot = annotation_pivot[:, valid_annotation]
|
79
79
|
# Step 4: Raise errors for empty matrices
|
80
|
-
if
|
80
|
+
if annotation_pivot.nnz == 0:
|
81
81
|
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
82
82
|
|
83
|
-
|
84
|
-
if
|
83
|
+
num_remaining_annotation = annotation_pivot.shape[1]
|
84
|
+
if num_remaining_annotation == 0:
|
85
85
|
raise ValueError(
|
86
86
|
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
87
87
|
)
|
88
88
|
|
89
89
|
# Step 5: Extract ordered nodes and annotations
|
90
90
|
ordered_nodes = tuple(node_label_order)
|
91
|
-
|
92
|
-
annotation for annotation, is_valid in zip(annotation_to_idx,
|
91
|
+
ordered_annotation = tuple(
|
92
|
+
annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotation) if is_valid
|
93
93
|
)
|
94
94
|
|
95
95
|
# Log the filtering details
|
96
96
|
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
97
|
-
logger.info(f"Number of input annotation terms: {
|
98
|
-
logger.info(f"Number of remaining annotation terms: {
|
97
|
+
logger.info(f"Number of input annotation terms: {num_annotation}")
|
98
|
+
logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
|
99
99
|
|
100
100
|
return {
|
101
101
|
"ordered_nodes": ordered_nodes,
|
102
|
-
"
|
103
|
-
"matrix":
|
102
|
+
"ordered_annotation": ordered_annotation,
|
103
|
+
"matrix": annotation_pivot,
|
104
104
|
}
|
105
105
|
|
106
106
|
|
107
|
-
def
|
107
|
+
def define_top_annotation(
|
108
108
|
network: nx.Graph,
|
109
109
|
ordered_annotation_labels: List[str],
|
110
110
|
neighborhood_significance_sums: List[int],
|
@@ -130,7 +130,7 @@ def define_top_annotations(
|
|
130
130
|
# Sum the columns of the significant significance matrix (positive floating point values)
|
131
131
|
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
132
132
|
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
133
|
-
|
133
|
+
annotation_significance_matrix = pd.DataFrame(
|
134
134
|
{
|
135
135
|
"id": range(len(ordered_annotation_labels)),
|
136
136
|
"full_terms": ordered_annotation_labels,
|
@@ -138,29 +138,29 @@ def define_top_annotations(
|
|
138
138
|
"significant_significance_score": significant_significance_scores,
|
139
139
|
}
|
140
140
|
)
|
141
|
-
|
141
|
+
annotation_significance_matrix["significant_annotation"] = False
|
142
142
|
# Apply size constraints to identify potential significant annotations
|
143
|
-
|
143
|
+
annotation_significance_matrix.loc[
|
144
144
|
(
|
145
|
-
|
145
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
146
146
|
>= min_cluster_size
|
147
147
|
)
|
148
148
|
& (
|
149
|
-
|
149
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
150
150
|
<= max_cluster_size
|
151
151
|
),
|
152
|
-
"
|
152
|
+
"significant_annotation",
|
153
153
|
] = True
|
154
154
|
# Initialize columns for connected components analysis
|
155
|
-
|
156
|
-
|
157
|
-
|
155
|
+
annotation_significance_matrix["num_connected_components"] = 0
|
156
|
+
annotation_significance_matrix["size_connected_components"] = None
|
157
|
+
annotation_significance_matrix["size_connected_components"] = annotation_significance_matrix[
|
158
158
|
"size_connected_components"
|
159
159
|
].astype(object)
|
160
|
-
|
160
|
+
annotation_significance_matrix["num_large_connected_components"] = 0
|
161
161
|
|
162
|
-
for attribute in
|
163
|
-
|
162
|
+
for attribute in annotation_significance_matrix.index.values[
|
163
|
+
annotation_significance_matrix["significant_annotation"]
|
164
164
|
]:
|
165
165
|
# Identify significant neighborhoods based on the binary significance matrix
|
166
166
|
significant_neighborhoods = list(
|
@@ -183,24 +183,24 @@ def define_top_annotations(
|
|
183
183
|
num_large_connected_components = len(filtered_size_connected_components)
|
184
184
|
|
185
185
|
# Assign the number of connected components
|
186
|
-
|
186
|
+
annotation_significance_matrix.loc[attribute, "num_connected_components"] = (
|
187
187
|
num_connected_components
|
188
188
|
)
|
189
189
|
# Filter out attributes with more than one connected component
|
190
|
-
|
191
|
-
|
192
|
-
"
|
190
|
+
annotation_significance_matrix.loc[
|
191
|
+
annotation_significance_matrix["num_connected_components"] > 1,
|
192
|
+
"significant_annotation",
|
193
193
|
] = False
|
194
194
|
# Assign the number of large connected components
|
195
|
-
|
195
|
+
annotation_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
196
196
|
num_large_connected_components
|
197
197
|
)
|
198
198
|
# Assign the size of connected components, ensuring it is always a list
|
199
|
-
|
199
|
+
annotation_significance_matrix.at[attribute, "size_connected_components"] = (
|
200
200
|
filtered_size_connected_components.tolist()
|
201
201
|
)
|
202
202
|
|
203
|
-
return
|
203
|
+
return annotation_significance_matrix
|
204
204
|
|
205
205
|
|
206
206
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/io
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import json
|
@@ -9,48 +9,45 @@ from typing import Any, Dict
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
-
from risk.
|
13
|
-
from risk.log import
|
12
|
+
from risk.annotation.annotation import load_annotation
|
13
|
+
from risk.log import log_header, logger, params
|
14
14
|
|
15
15
|
|
16
|
-
class
|
17
|
-
"""Handles the loading and exporting of
|
16
|
+
class AnnotationIO:
|
17
|
+
"""Handles the loading and exporting of annotation in various file formats.
|
18
18
|
|
19
|
-
The
|
19
|
+
The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
|
20
20
|
and to export parameter data to various formats like JSON, CSV, and text files.
|
21
21
|
"""
|
22
22
|
|
23
|
-
def
|
24
|
-
pass
|
25
|
-
|
26
|
-
def load_json_annotation(
|
23
|
+
def load_annotation_json(
|
27
24
|
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
28
25
|
) -> Dict[str, Any]:
|
29
|
-
"""Load
|
26
|
+
"""Load annotation from a JSON file and convert them to a DataFrame.
|
30
27
|
|
31
28
|
Args:
|
32
|
-
network (NetworkX graph): The network to which the
|
33
|
-
filepath (str): Path to the JSON
|
29
|
+
network (NetworkX graph): The network to which the annotation is related.
|
30
|
+
filepath (str): Path to the JSON annotation file.
|
34
31
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
35
32
|
term to be included. Defaults to 2.
|
36
33
|
|
37
34
|
Returns:
|
38
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
35
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
39
36
|
"""
|
40
37
|
filetype = "JSON"
|
41
38
|
# Log the loading of the JSON file
|
42
|
-
params.
|
39
|
+
params.log_annotation(
|
43
40
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
44
41
|
)
|
45
|
-
_log_loading(filetype, filepath=filepath)
|
42
|
+
self._log_loading(filetype, filepath=filepath)
|
46
43
|
|
47
44
|
# Load the JSON file into a dictionary
|
48
45
|
with open(filepath, "r", encoding="utf-8") as file:
|
49
|
-
|
46
|
+
annotation_input = json.load(file)
|
50
47
|
|
51
|
-
return
|
48
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
52
49
|
|
53
|
-
def
|
50
|
+
def load_annotation_excel(
|
54
51
|
self,
|
55
52
|
network: nx.Graph,
|
56
53
|
filepath: str,
|
@@ -60,11 +57,11 @@ class AnnotationsIO:
|
|
60
57
|
nodes_delimiter: str = ";",
|
61
58
|
min_nodes_per_term: int = 2,
|
62
59
|
) -> Dict[str, Any]:
|
63
|
-
"""Load
|
60
|
+
"""Load annotation from an Excel file and associate them with the network.
|
64
61
|
|
65
62
|
Args:
|
66
|
-
network (nx.Graph): The NetworkX graph to which the
|
67
|
-
filepath (str): Path to the Excel
|
63
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
64
|
+
filepath (str): Path to the Excel annotation file.
|
68
65
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
69
66
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
70
67
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
@@ -78,10 +75,10 @@ class AnnotationsIO:
|
|
78
75
|
"""
|
79
76
|
filetype = "Excel"
|
80
77
|
# Log the loading of the Excel file
|
81
|
-
params.
|
78
|
+
params.log_annotation(
|
82
79
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
83
80
|
)
|
84
|
-
_log_loading(filetype, filepath=filepath)
|
81
|
+
self._log_loading(filetype, filepath=filepath)
|
85
82
|
|
86
83
|
# Load the specified sheet from the Excel file
|
87
84
|
annotation = pd.read_excel(filepath, sheet_name=sheet_name)
|
@@ -90,11 +87,11 @@ class AnnotationsIO:
|
|
90
87
|
lambda x: x.split(nodes_delimiter)
|
91
88
|
)
|
92
89
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
93
|
-
|
90
|
+
annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
94
91
|
|
95
|
-
return
|
92
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
96
93
|
|
97
|
-
def
|
94
|
+
def load_annotation_csv(
|
98
95
|
self,
|
99
96
|
network: nx.Graph,
|
100
97
|
filepath: str,
|
@@ -103,11 +100,11 @@ class AnnotationsIO:
|
|
103
100
|
nodes_delimiter: str = ";",
|
104
101
|
min_nodes_per_term: int = 2,
|
105
102
|
) -> Dict[str, Any]:
|
106
|
-
"""Load
|
103
|
+
"""Load annotation from a CSV file and associate them with the network.
|
107
104
|
|
108
105
|
Args:
|
109
|
-
network (nx.Graph): The NetworkX graph to which the
|
110
|
-
filepath (str): Path to the CSV
|
106
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
107
|
+
filepath (str): Path to the CSV annotation file.
|
111
108
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
112
109
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
113
110
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -120,19 +117,19 @@ class AnnotationsIO:
|
|
120
117
|
"""
|
121
118
|
filetype = "CSV"
|
122
119
|
# Log the loading of the CSV file
|
123
|
-
params.
|
120
|
+
params.log_annotation(
|
124
121
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
125
122
|
)
|
126
|
-
_log_loading(filetype, filepath=filepath)
|
123
|
+
self._log_loading(filetype, filepath=filepath)
|
127
124
|
|
128
125
|
# Load the CSV file into a dictionary
|
129
|
-
|
126
|
+
annotation_input = self._load_matrix_file(
|
130
127
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
131
128
|
)
|
132
129
|
|
133
|
-
return
|
130
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
134
131
|
|
135
|
-
def
|
132
|
+
def load_annotation_tsv(
|
136
133
|
self,
|
137
134
|
network: nx.Graph,
|
138
135
|
filepath: str,
|
@@ -141,11 +138,11 @@ class AnnotationsIO:
|
|
141
138
|
nodes_delimiter: str = ";",
|
142
139
|
min_nodes_per_term: int = 2,
|
143
140
|
) -> Dict[str, Any]:
|
144
|
-
"""Load
|
141
|
+
"""Load annotation from a TSV file and associate them with the network.
|
145
142
|
|
146
143
|
Args:
|
147
|
-
network (nx.Graph): The NetworkX graph to which the
|
148
|
-
filepath (str): Path to the TSV
|
144
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
145
|
+
filepath (str): Path to the TSV annotation file.
|
149
146
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
150
147
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
151
148
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -158,31 +155,34 @@ class AnnotationsIO:
|
|
158
155
|
"""
|
159
156
|
filetype = "TSV"
|
160
157
|
# Log the loading of the TSV file
|
161
|
-
params.
|
158
|
+
params.log_annotation(
|
162
159
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
163
160
|
)
|
164
|
-
_log_loading(filetype, filepath=filepath)
|
161
|
+
self._log_loading(filetype, filepath=filepath)
|
165
162
|
|
166
163
|
# Load the TSV file into a dictionary
|
167
|
-
|
164
|
+
annotation_input = self._load_matrix_file(
|
168
165
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
169
166
|
)
|
170
167
|
|
171
|
-
return
|
168
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
172
169
|
|
173
|
-
def
|
170
|
+
def load_annotation_dict(
|
174
171
|
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
175
172
|
) -> Dict[str, Any]:
|
176
|
-
"""Load
|
173
|
+
"""Load annotation from a provided dictionary and convert them to a dictionary annotation.
|
177
174
|
|
178
175
|
Args:
|
179
|
-
network (NetworkX graph): The network to which the
|
180
|
-
content (Dict[str, Any]): The
|
176
|
+
network (NetworkX graph): The network to which the annotation is related.
|
177
|
+
content (Dict[str, Any]): The annotation dictionary to load.
|
181
178
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
182
179
|
term to be included. Defaults to 2.
|
183
180
|
|
184
181
|
Returns:
|
185
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
182
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
183
|
+
|
184
|
+
Raises:
|
185
|
+
TypeError: If the content is not a dictionary.
|
186
186
|
"""
|
187
187
|
# Ensure the input content is a dictionary
|
188
188
|
if not isinstance(content, dict):
|
@@ -191,50 +191,51 @@ class AnnotationsIO:
|
|
191
191
|
)
|
192
192
|
|
193
193
|
filetype = "Dictionary"
|
194
|
-
# Log the loading of the
|
195
|
-
params.
|
196
|
-
_log_loading(filetype, "In-memory dictionary")
|
197
|
-
|
198
|
-
# Load the annotations as a dictionary from the provided dictionary
|
199
|
-
return load_annotations(network, content, min_nodes_per_term)
|
200
|
-
|
201
|
-
|
202
|
-
def _load_matrix_file(
|
203
|
-
filepath: str,
|
204
|
-
label_colname: str,
|
205
|
-
nodes_colname: str,
|
206
|
-
delimiter: str = ",",
|
207
|
-
nodes_delimiter: str = ";",
|
208
|
-
) -> Dict[str, Any]:
|
209
|
-
"""Load annotations from a CSV or TSV file and convert them to a dictionary.
|
210
|
-
|
211
|
-
Args:
|
212
|
-
filepath (str): Path to the annotation file.
|
213
|
-
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
-
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
-
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
-
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
217
|
-
|
218
|
-
Returns:
|
219
|
-
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
-
"""
|
221
|
-
# Load the CSV or TSV file into a DataFrame
|
222
|
-
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
-
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
-
annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
225
|
-
# Create a dictionary pairing labels with their corresponding list of nodes
|
226
|
-
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
227
|
-
return label_node_dict
|
194
|
+
# Log the loading of the annotation from the dictionary
|
195
|
+
params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
|
196
|
+
self._log_loading(filetype, "In-memory dictionary")
|
228
197
|
|
198
|
+
# Load the annotation as a dictionary from the provided dictionary
|
199
|
+
return load_annotation(network, content, min_nodes_per_term)
|
229
200
|
|
230
|
-
def
|
231
|
-
|
201
|
+
def _load_matrix_file(
|
202
|
+
self,
|
203
|
+
filepath: str,
|
204
|
+
label_colname: str,
|
205
|
+
nodes_colname: str,
|
206
|
+
delimiter: str = ",",
|
207
|
+
nodes_delimiter: str = ";",
|
208
|
+
) -> Dict[str, Any]:
|
209
|
+
"""Load annotation from a CSV or TSV file and convert them to a dictionary.
|
232
210
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
211
|
+
Args:
|
212
|
+
filepath (str): Path to the annotation file.
|
213
|
+
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
214
|
+
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
215
|
+
delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
|
216
|
+
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
217
|
+
|
218
|
+
Returns:
|
219
|
+
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
220
|
+
"""
|
221
|
+
# Load the CSV or TSV file into a DataFrame
|
222
|
+
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
223
|
+
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
224
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(
|
225
|
+
lambda x: x.split(nodes_delimiter)
|
226
|
+
)
|
227
|
+
# Create a dictionary pairing labels with their corresponding list of nodes
|
228
|
+
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
229
|
+
return label_node_dict
|
230
|
+
|
231
|
+
def _log_loading(self, filetype: str, filepath: str = "") -> None:
|
232
|
+
"""Log information about the network file being loaded.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
|
236
|
+
filepath (str, optional): The path to the file being loaded.
|
237
|
+
"""
|
238
|
+
log_header("Loading annotation")
|
239
|
+
logger.debug(f"Filetype: {filetype}")
|
240
|
+
if filepath:
|
241
|
+
logger.debug(f"Filepath: {filepath}")
|
@@ -1,19 +1,20 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/nltk_setup
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import os
|
7
7
|
import zipfile
|
8
|
-
from typing import List, Tuple
|
8
|
+
from typing import List, Optional, Tuple
|
9
9
|
|
10
10
|
import nltk
|
11
|
-
from nltk.data import find
|
11
|
+
from nltk.data import find
|
12
|
+
from nltk.data import path as nltk_data_path
|
12
13
|
|
13
14
|
from risk.log import logger
|
14
15
|
|
15
16
|
|
16
|
-
def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
|
17
|
+
def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:
|
17
18
|
"""Ensures all required NLTK resources are available and properly extracted.
|
18
19
|
Uses NLTK's default paths and mechanisms.
|
19
20
|
|
risk/log/__init__.py
CHANGED
risk/log/parameters.py
CHANGED
@@ -11,7 +11,7 @@ from typing import Any, Dict
|
|
11
11
|
|
12
12
|
import numpy as np
|
13
13
|
|
14
|
-
from risk.log.console import
|
14
|
+
from risk.log.console import log_header, logger
|
15
15
|
|
16
16
|
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
17
17
|
warnings.filterwarnings("ignore")
|
@@ -21,7 +21,7 @@ class Params:
|
|
21
21
|
"""Handles the storage and logging of various parameters for network analysis.
|
22
22
|
|
23
23
|
The Params class provides methods to log parameters related to different components of the analysis,
|
24
|
-
such as the network,
|
24
|
+
such as the network, annotation, neighborhoods, graph, and plotter settings. It also stores
|
25
25
|
the current datetime when the parameters were initialized.
|
26
26
|
"""
|
27
27
|
|
@@ -33,7 +33,7 @@ class Params:
|
|
33
33
|
def initialize(self) -> None:
|
34
34
|
"""Initialize the parameter dictionaries for different components."""
|
35
35
|
self.network = {}
|
36
|
-
self.
|
36
|
+
self.annotation = {}
|
37
37
|
self.neighborhoods = {}
|
38
38
|
self.graph = {}
|
39
39
|
self.plotter = {}
|
@@ -46,13 +46,13 @@ class Params:
|
|
46
46
|
"""
|
47
47
|
self.network = {**self.network, **kwargs}
|
48
48
|
|
49
|
-
def
|
49
|
+
def log_annotation(self, **kwargs) -> None:
|
50
50
|
"""Log annotation-related parameters.
|
51
51
|
|
52
52
|
Args:
|
53
53
|
**kwargs: Annotation parameters to log.
|
54
54
|
"""
|
55
|
-
self.
|
55
|
+
self.annotation = {**self.annotation, **kwargs}
|
56
56
|
|
57
57
|
def log_neighborhoods(self, **kwargs) -> None:
|
58
58
|
"""Log neighborhood-related parameters.
|
@@ -137,9 +137,9 @@ class Params:
|
|
137
137
|
Dict[str, Any]: A dictionary containing the processed parameters.
|
138
138
|
"""
|
139
139
|
log_header("Loading parameters")
|
140
|
-
return _convert_ndarray_to_list(
|
140
|
+
return self._convert_ndarray_to_list(
|
141
141
|
{
|
142
|
-
"
|
142
|
+
"annotation": self.annotation,
|
143
143
|
"datetime": self.datetime,
|
144
144
|
"graph": self.graph,
|
145
145
|
"neighborhoods": self.neighborhoods,
|
@@ -148,25 +148,24 @@ class Params:
|
|
148
148
|
}
|
149
149
|
)
|
150
150
|
|
151
|
+
def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
|
152
|
+
"""Recursively convert all np.ndarray values in the dictionary to lists.
|
151
153
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
Args:
|
156
|
-
d (Dict[str, Any]): The dictionary to process.
|
154
|
+
Args:
|
155
|
+
d (Dict[str, Any]): The dictionary to process.
|
157
156
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
157
|
+
Returns:
|
158
|
+
Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
|
159
|
+
"""
|
160
|
+
if isinstance(d, dict):
|
161
|
+
# Recursively process each value in the dictionary
|
162
|
+
return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
|
163
|
+
if isinstance(d, list):
|
164
|
+
# Recursively process each item in the list
|
165
|
+
return [self._convert_ndarray_to_list(v) for v in d]
|
166
|
+
if isinstance(d, np.ndarray):
|
167
|
+
# Convert numpy arrays to lists
|
168
|
+
return d.tolist()
|
169
|
+
|
170
|
+
# Return the value unchanged if it's not a dict, List, or ndarray
|
171
|
+
return d
|