risk-network 0.0.9b4__py3-none-any.whl → 0.0.9b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotations/annotations.py +26 -4
- risk/annotations/io.py +27 -19
- {risk_network-0.0.9b4.dist-info → risk_network-0.0.9b6.dist-info}/METADATA +1 -1
- {risk_network-0.0.9b4.dist-info → risk_network-0.0.9b6.dist-info}/RECORD +8 -8
- {risk_network-0.0.9b4.dist-info → risk_network-0.0.9b6.dist-info}/LICENSE +0 -0
- {risk_network-0.0.9b4.dist-info → risk_network-0.0.9b6.dist-info}/WHEEL +0 -0
- {risk_network-0.0.9b4.dist-info → risk_network-0.0.9b6.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/annotations.py
CHANGED
@@ -15,6 +15,8 @@ import pandas as pd
|
|
15
15
|
from nltk.tokenize import word_tokenize
|
16
16
|
from nltk.corpus import stopwords
|
17
17
|
|
18
|
+
from risk.log import logger
|
19
|
+
|
18
20
|
|
19
21
|
def _setup_nltk():
|
20
22
|
"""Ensure necessary NLTK data is downloaded."""
|
@@ -35,15 +37,23 @@ _setup_nltk()
|
|
35
37
|
stop_words = set(stopwords.words("english"))
|
36
38
|
|
37
39
|
|
38
|
-
def load_annotations(
|
40
|
+
def load_annotations(
|
41
|
+
network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
|
42
|
+
) -> Dict[str, Any]:
|
39
43
|
"""Convert annotations input to a DataFrame and reindex based on the network's node labels.
|
40
44
|
|
41
45
|
Args:
|
42
46
|
network (nx.Graph): The network graph.
|
43
47
|
annotations_input (Dict[str, Any]): A dictionary with annotations.
|
48
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
49
|
+
term to be included. Defaults to 2.
|
44
50
|
|
45
51
|
Returns:
|
46
52
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
|
53
|
+
|
54
|
+
Raises:
|
55
|
+
ValueError: If no annotations are found for the nodes in the network.
|
56
|
+
ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
|
47
57
|
"""
|
48
58
|
# Flatten the dictionary to a list of tuples for easier DataFrame creation
|
49
59
|
flattened_annotations = [
|
@@ -61,12 +71,24 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
|
|
61
71
|
annotations_pivot = annotations_pivot.reindex(index=node_label_order)
|
62
72
|
# Raise an error if no valid annotations are found for the nodes in the network
|
63
73
|
if annotations_pivot.notnull().sum().sum() == 0:
|
74
|
+
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
75
|
+
|
76
|
+
# Filter out annotations with fewer than min_nodes_per_term occurrences
|
77
|
+
# This assists in reducing noise and focusing on more relevant annotations for statistical analysis
|
78
|
+
num_terms_before_filtering = annotations_pivot.shape[1]
|
79
|
+
annotations_pivot = annotations_pivot.loc[
|
80
|
+
:, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
|
81
|
+
]
|
82
|
+
num_terms_after_filtering = annotations_pivot.shape[1]
|
83
|
+
# Log the number of annotations before and after filtering
|
84
|
+
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
85
|
+
logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
|
86
|
+
logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
|
87
|
+
if num_terms_after_filtering == 0:
|
64
88
|
raise ValueError(
|
65
|
-
"No
|
89
|
+
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
66
90
|
)
|
67
91
|
|
68
|
-
# Remove columns with all zeros to improve performance
|
69
|
-
annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
|
70
92
|
# Extract ordered nodes and annotations
|
71
93
|
ordered_nodes = tuple(annotations_pivot.index)
|
72
94
|
ordered_annotations = tuple(annotations_pivot.columns)
|
risk/annotations/io.py
CHANGED
@@ -25,12 +25,16 @@ class AnnotationsIO:
|
|
25
25
|
def __init__(self):
|
26
26
|
pass
|
27
27
|
|
28
|
-
def load_json_annotation(
|
28
|
+
def load_json_annotation(
|
29
|
+
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
30
|
+
) -> Dict[str, Any]:
|
29
31
|
"""Load annotations from a JSON file and convert them to a DataFrame.
|
30
32
|
|
31
33
|
Args:
|
32
34
|
network (NetworkX graph): The network to which the annotations are related.
|
33
35
|
filepath (str): Path to the JSON annotations file.
|
36
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
37
|
+
term to be included. Defaults to 2.
|
34
38
|
|
35
39
|
Returns:
|
36
40
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
@@ -40,12 +44,11 @@ class AnnotationsIO:
|
|
40
44
|
params.log_annotations(filepath=filepath, filetype=filetype)
|
41
45
|
_log_loading(filetype, filepath=filepath)
|
42
46
|
|
43
|
-
#
|
47
|
+
# Load the JSON file into a dictionary
|
44
48
|
with open(filepath, "r") as file:
|
45
49
|
annotations_input = json.load(file)
|
46
50
|
|
47
|
-
|
48
|
-
return load_annotations(network, annotations_input)
|
51
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
49
52
|
|
50
53
|
def load_excel_annotation(
|
51
54
|
self,
|
@@ -55,6 +58,7 @@ class AnnotationsIO:
|
|
55
58
|
nodes_colname: str = "nodes",
|
56
59
|
sheet_name: str = "Sheet1",
|
57
60
|
nodes_delimiter: str = ";",
|
61
|
+
min_nodes_per_term: int = 2,
|
58
62
|
) -> Dict[str, Any]:
|
59
63
|
"""Load annotations from an Excel file and associate them with the network.
|
60
64
|
|
@@ -65,6 +69,8 @@ class AnnotationsIO:
|
|
65
69
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
66
70
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
67
71
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
72
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
73
|
+
term to be included. Defaults to 2.
|
68
74
|
|
69
75
|
Returns:
|
70
76
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -82,10 +88,9 @@ class AnnotationsIO:
|
|
82
88
|
lambda x: x.split(nodes_delimiter)
|
83
89
|
)
|
84
90
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
85
|
-
|
91
|
+
annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
86
92
|
|
87
|
-
|
88
|
-
return load_annotations(network, label_node_dict)
|
93
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
89
94
|
|
90
95
|
def load_csv_annotation(
|
91
96
|
self,
|
@@ -94,6 +99,7 @@ class AnnotationsIO:
|
|
94
99
|
label_colname: str = "label",
|
95
100
|
nodes_colname: str = "nodes",
|
96
101
|
nodes_delimiter: str = ";",
|
102
|
+
min_nodes_per_term: int = 2,
|
97
103
|
) -> Dict[str, Any]:
|
98
104
|
"""Load annotations from a CSV file and associate them with the network.
|
99
105
|
|
@@ -103,6 +109,8 @@ class AnnotationsIO:
|
|
103
109
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
104
110
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
105
111
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
112
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
113
|
+
term to be included. Defaults to 2.
|
106
114
|
|
107
115
|
Returns:
|
108
116
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -118,8 +126,7 @@ class AnnotationsIO:
|
|
118
126
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
119
127
|
)
|
120
128
|
|
121
|
-
|
122
|
-
return load_annotations(network, annotations_input)
|
129
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
123
130
|
|
124
131
|
def load_tsv_annotation(
|
125
132
|
self,
|
@@ -128,6 +135,7 @@ class AnnotationsIO:
|
|
128
135
|
label_colname: str = "label",
|
129
136
|
nodes_colname: str = "nodes",
|
130
137
|
nodes_delimiter: str = ";",
|
138
|
+
min_nodes_per_term: int = 2,
|
131
139
|
) -> Dict[str, Any]:
|
132
140
|
"""Load annotations from a TSV file and associate them with the network.
|
133
141
|
|
@@ -137,6 +145,8 @@ class AnnotationsIO:
|
|
137
145
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
138
146
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
139
147
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
148
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
149
|
+
term to be included. Defaults to 2.
|
140
150
|
|
141
151
|
Returns:
|
142
152
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -152,15 +162,18 @@ class AnnotationsIO:
|
|
152
162
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
153
163
|
)
|
154
164
|
|
155
|
-
|
156
|
-
return load_annotations(network, annotations_input)
|
165
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
157
166
|
|
158
|
-
def load_dict_annotation(
|
167
|
+
def load_dict_annotation(
|
168
|
+
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
169
|
+
) -> Dict[str, Any]:
|
159
170
|
"""Load annotations from a provided dictionary and convert them to a dictionary annotation.
|
160
171
|
|
161
172
|
Args:
|
162
173
|
network (NetworkX graph): The network to which the annotations are related.
|
163
174
|
content (Dict[str, Any]): The annotations dictionary to load.
|
175
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
176
|
+
term to be included. Defaults to 2.
|
164
177
|
|
165
178
|
Returns:
|
166
179
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
@@ -176,13 +189,8 @@ class AnnotationsIO:
|
|
176
189
|
params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
|
177
190
|
_log_loading(filetype, "In-memory dictionary")
|
178
191
|
|
179
|
-
# Load the annotations
|
180
|
-
|
181
|
-
# Ensure the output is a dictionary
|
182
|
-
if not isinstance(annotations_dict, dict):
|
183
|
-
raise ValueError("Expected output to be a dictionary")
|
184
|
-
|
185
|
-
return annotations_dict
|
192
|
+
# Load the annotations as a dictionary from the provided dictionary
|
193
|
+
return load_annotations(network, content, min_nodes_per_term)
|
186
194
|
|
187
195
|
|
188
196
|
def _load_matrix_file(
|
@@ -1,9 +1,9 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=fpCtulKZFHI4Je7dm4qBJHyP9InK9uDRYmYYgog9BGQ,112
|
2
2
|
risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
|
3
3
|
risk/risk.py,sha256=De1vn8Xc-TKz6aTL0bvJI-SVrIqU3k0IWAbKc7dde1c,23618
|
4
4
|
risk/annotations/__init__.py,sha256=kXgadEXaCh0z8OyhOhTj7c3qXGmWgOhaSZ4gSzSb59U,147
|
5
|
-
risk/annotations/annotations.py,sha256=
|
6
|
-
risk/annotations/io.py,sha256=
|
5
|
+
risk/annotations/annotations.py,sha256=WVT9wzTm8lTpMw_3SnbyljWR77yExo0rb1zVgJza8nw,14284
|
6
|
+
risk/annotations/io.py,sha256=Nj_RPmn-WM1zMsssm9bVGR94SHytkEBK-wcBJ3WhqkU,10310
|
7
7
|
risk/log/__init__.py,sha256=gy7C5L6D222AYUChq5lkc0LsCJ_QMQPaFiBJKbecdac,201
|
8
8
|
risk/log/console.py,sha256=C52s3FgQ2e9kQWcXL8m7rs_pnKXt5Yy8PBHmQkOTiNo,4537
|
9
9
|
risk/log/parameters.py,sha256=o4StqYCa0kt7_Ht4mKa1DwwvhGUwkC_dGBaiUIc0GB0,5683
|
@@ -32,8 +32,8 @@ risk/stats/stats.py,sha256=z8NrhiVj4BzJ250bVLfytpmfC7RzYu7mBuIZD_l0aCA,7222
|
|
32
32
|
risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
|
33
33
|
risk/stats/permutation/permutation.py,sha256=meBNSrbRa9P8WJ54n485l0H7VQJlMSfHqdN4aCKYCtQ,10105
|
34
34
|
risk/stats/permutation/test_functions.py,sha256=lftOude6hee0pyR80HlBD32522JkDoN5hrKQ9VEbuoY,2345
|
35
|
-
risk_network-0.0.
|
36
|
-
risk_network-0.0.
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
35
|
+
risk_network-0.0.9b6.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
36
|
+
risk_network-0.0.9b6.dist-info/METADATA,sha256=0YZ5Rd4bqOid3nSHpa-S6fBBtBhYPoAQ1SlACtmDVaw,47497
|
37
|
+
risk_network-0.0.9b6.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
38
|
+
risk_network-0.0.9b6.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
39
|
+
risk_network-0.0.9b6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|