risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +133 -72
- risk/annotations/io.py +50 -34
- risk/log/__init__.py +4 -2
- risk/log/{config.py → console.py} +5 -3
- risk/log/{params.py → parameters.py} +21 -46
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +281 -96
- risk/neighborhoods/domains.py +92 -38
- risk/neighborhoods/neighborhoods.py +210 -149
- risk/network/__init__.py +1 -3
- risk/network/geometry.py +69 -58
- risk/network/graph/__init__.py +6 -0
- risk/network/graph/api.py +194 -0
- risk/network/graph/network.py +269 -0
- risk/network/graph/summary.py +254 -0
- risk/network/io.py +58 -48
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +80 -26
- risk/network/{plot → plotter}/contour.py +43 -34
- risk/network/{plot → plotter}/labels.py +123 -113
- risk/network/plotter/network.py +424 -0
- risk/network/plotter/utils/colors.py +416 -0
- risk/network/plotter/utils/layout.py +94 -0
- risk/risk.py +11 -469
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +28 -18
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +45 -39
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +17 -11
- risk/stats/stats.py +20 -16
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
- risk_network-0.0.9b26.dist-info/RECORD +44 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
- risk/network/graph.py +0 -159
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/network.py +0 -282
- risk/network/plot/plotter.py +0 -137
- risk/network/plot/utils/color.py +0 -353
- risk/network/plot/utils/layout.py +0 -53
- risk_network-0.0.8b18.dist-info/RECORD +0 -37
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/__init__.py
CHANGED
@@ -3,5 +3,5 @@ risk/annotations
|
|
3
3
|
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .annotations import define_top_annotations,
|
7
|
-
from .io import AnnotationsIO
|
6
|
+
from risk.annotations.annotations import define_top_annotations, get_weighted_description
|
7
|
+
from risk.annotations.io import AnnotationsIO
|
risk/annotations/annotations.py
CHANGED
@@ -3,6 +3,7 @@ risk/annotations/annotations
|
|
3
3
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
+
import re
|
6
7
|
from collections import Counter
|
7
8
|
from itertools import compress
|
8
9
|
from typing import Any, Dict, List, Set
|
@@ -14,6 +15,9 @@ import pandas as pd
|
|
14
15
|
from nltk.tokenize import word_tokenize
|
15
16
|
from nltk.corpus import stopwords
|
16
17
|
|
18
|
+
from risk.log import logger
|
19
|
+
from scipy.sparse import csr_matrix
|
20
|
+
|
17
21
|
|
18
22
|
def _setup_nltk():
|
19
23
|
"""Ensure necessary NLTK data is downloaded."""
|
@@ -30,107 +34,144 @@ def _setup_nltk():
|
|
30
34
|
|
31
35
|
# Ensure you have the necessary NLTK data
|
32
36
|
_setup_nltk()
|
37
|
+
# Initialize English stopwords
|
38
|
+
stop_words = set(stopwords.words("english"))
|
33
39
|
|
34
40
|
|
35
|
-
def load_annotations(
|
41
|
+
def load_annotations(
|
42
|
+
network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
|
43
|
+
) -> Dict[str, Any]:
|
36
44
|
"""Convert annotations input to a DataFrame and reindex based on the network's node labels.
|
37
45
|
|
38
46
|
Args:
|
39
|
-
|
47
|
+
network (nx.Graph): The network graph.
|
48
|
+
annotations_input (Dict[str, Any]): A dictionary with annotations.
|
49
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
50
|
+
term to be included. Defaults to 2.
|
51
|
+
use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
|
40
52
|
|
41
53
|
Returns:
|
42
|
-
|
54
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
|
55
|
+
matrix.
|
56
|
+
|
57
|
+
Raises:
|
58
|
+
ValueError: If no annotations are found for the nodes in the network.
|
59
|
+
ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
|
43
60
|
"""
|
44
61
|
# Flatten the dictionary to a list of tuples for easier DataFrame creation
|
45
62
|
flattened_annotations = [
|
46
63
|
(node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
|
47
64
|
]
|
48
65
|
# Create a DataFrame from the flattened list
|
49
|
-
annotations = pd.DataFrame(flattened_annotations, columns=["
|
50
|
-
annotations["
|
66
|
+
annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
|
67
|
+
annotations["is_member"] = 1
|
51
68
|
# Pivot to create a binary matrix with nodes as rows and annotations as columns
|
52
69
|
annotations_pivot = annotations.pivot_table(
|
53
|
-
index="
|
70
|
+
index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
|
54
71
|
)
|
55
72
|
# Reindex the annotations matrix based on the node labels from the network
|
56
|
-
node_label_order =
|
73
|
+
node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
|
57
74
|
annotations_pivot = annotations_pivot.reindex(index=node_label_order)
|
58
75
|
# Raise an error if no valid annotations are found for the nodes in the network
|
59
76
|
if annotations_pivot.notnull().sum().sum() == 0:
|
77
|
+
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
78
|
+
|
79
|
+
# Filter out annotations with fewer than min_nodes_per_term occurrences
|
80
|
+
num_terms_before_filtering = annotations_pivot.shape[1]
|
81
|
+
annotations_pivot = annotations_pivot.loc[
|
82
|
+
:, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
|
83
|
+
]
|
84
|
+
num_terms_after_filtering = annotations_pivot.shape[1]
|
85
|
+
# Log the number of annotations before and after filtering
|
86
|
+
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
87
|
+
logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
|
88
|
+
logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
|
89
|
+
if num_terms_after_filtering == 0:
|
60
90
|
raise ValueError(
|
61
|
-
"No
|
91
|
+
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
62
92
|
)
|
63
93
|
|
64
|
-
# Remove columns with all zeros to improve performance
|
65
|
-
annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
|
66
94
|
# Extract ordered nodes and annotations
|
67
95
|
ordered_nodes = tuple(annotations_pivot.index)
|
68
96
|
ordered_annotations = tuple(annotations_pivot.columns)
|
69
|
-
# Convert the annotations_pivot matrix to a numpy array
|
70
|
-
|
97
|
+
# Convert the annotations_pivot matrix to a numpy array or sparse matrix
|
98
|
+
annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
|
99
|
+
# Convert the binary annotations matrix to a sparse matrix
|
100
|
+
annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
|
71
101
|
|
72
102
|
return {
|
73
103
|
"ordered_nodes": ordered_nodes,
|
74
104
|
"ordered_annotations": ordered_annotations,
|
75
|
-
"matrix":
|
105
|
+
"matrix": annotations_pivot_binary,
|
76
106
|
}
|
77
107
|
|
78
108
|
|
79
109
|
def define_top_annotations(
|
80
110
|
network: nx.Graph,
|
81
111
|
ordered_annotation_labels: List[str],
|
82
|
-
|
83
|
-
|
112
|
+
neighborhood_significance_sums: List[int],
|
113
|
+
significant_significance_matrix: np.ndarray,
|
114
|
+
significant_binary_significance_matrix: np.ndarray,
|
84
115
|
min_cluster_size: int = 5,
|
85
116
|
max_cluster_size: int = 1000,
|
86
117
|
) -> pd.DataFrame:
|
87
|
-
"""Define top annotations based on neighborhood
|
118
|
+
"""Define top annotations based on neighborhood significance sums and binary significance matrix.
|
88
119
|
|
89
120
|
Args:
|
90
121
|
network (NetworkX graph): The network graph.
|
91
122
|
ordered_annotation_labels (list of str): List of ordered annotation labels.
|
92
|
-
|
93
|
-
|
123
|
+
neighborhood_significance_sums (list of int): List of neighborhood significance sums.
|
124
|
+
significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
|
125
|
+
significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
|
94
126
|
min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
|
95
127
|
max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
|
96
128
|
|
97
129
|
Returns:
|
98
130
|
pd.DataFrame: DataFrame with top annotations and their properties.
|
99
131
|
"""
|
100
|
-
#
|
101
|
-
|
132
|
+
# Sum the columns of the significant significance matrix (positive floating point values)
|
133
|
+
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
134
|
+
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
135
|
+
annotations_significance_matrix = pd.DataFrame(
|
102
136
|
{
|
103
137
|
"id": range(len(ordered_annotation_labels)),
|
104
|
-
"
|
105
|
-
"
|
138
|
+
"full_terms": ordered_annotation_labels,
|
139
|
+
"significant_neighborhood_significance_sums": neighborhood_significance_sums,
|
140
|
+
"significant_significance_score": significant_significance_scores,
|
106
141
|
}
|
107
142
|
)
|
108
|
-
|
109
|
-
# Apply size constraints to identify potential
|
110
|
-
|
111
|
-
(
|
112
|
-
|
113
|
-
|
143
|
+
annotations_significance_matrix["significant_annotations"] = False
|
144
|
+
# Apply size constraints to identify potential significant annotations
|
145
|
+
annotations_significance_matrix.loc[
|
146
|
+
(
|
147
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
148
|
+
>= min_cluster_size
|
149
|
+
)
|
150
|
+
& (
|
151
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
152
|
+
<= max_cluster_size
|
153
|
+
),
|
154
|
+
"significant_annotations",
|
114
155
|
] = True
|
115
156
|
# Initialize columns for connected components analysis
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
"
|
157
|
+
annotations_significance_matrix["num_connected_components"] = 0
|
158
|
+
annotations_significance_matrix["size_connected_components"] = None
|
159
|
+
annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
|
160
|
+
"size_connected_components"
|
120
161
|
].astype(object)
|
121
|
-
|
162
|
+
annotations_significance_matrix["num_large_connected_components"] = 0
|
122
163
|
|
123
|
-
for attribute in
|
124
|
-
|
164
|
+
for attribute in annotations_significance_matrix.index.values[
|
165
|
+
annotations_significance_matrix["significant_annotations"]
|
125
166
|
]:
|
126
|
-
# Identify
|
127
|
-
|
128
|
-
compress(list(network),
|
167
|
+
# Identify significant neighborhoods based on the binary significance matrix
|
168
|
+
significant_neighborhoods = list(
|
169
|
+
compress(list(network), significant_binary_significance_matrix[:, attribute])
|
129
170
|
)
|
130
|
-
|
131
|
-
# Analyze connected components within the
|
171
|
+
significant_network = nx.subgraph(network, significant_neighborhoods)
|
172
|
+
# Analyze connected components within the significant subnetwork
|
132
173
|
connected_components = sorted(
|
133
|
-
nx.connected_components(
|
174
|
+
nx.connected_components(significant_network), key=len, reverse=True
|
134
175
|
)
|
135
176
|
size_connected_components = np.array([len(c) for c in connected_components])
|
136
177
|
|
@@ -144,55 +185,75 @@ def define_top_annotations(
|
|
144
185
|
num_large_connected_components = len(filtered_size_connected_components)
|
145
186
|
|
146
187
|
# Assign the number of connected components
|
147
|
-
|
188
|
+
annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
|
148
189
|
num_connected_components
|
149
190
|
)
|
150
191
|
# Filter out attributes with more than one connected component
|
151
|
-
|
152
|
-
|
192
|
+
annotations_significance_matrix.loc[
|
193
|
+
annotations_significance_matrix["num_connected_components"] > 1,
|
194
|
+
"significant_annotations",
|
153
195
|
] = False
|
154
196
|
# Assign the number of large connected components
|
155
|
-
|
197
|
+
annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
156
198
|
num_large_connected_components
|
157
199
|
)
|
158
200
|
# Assign the size of connected components, ensuring it is always a list
|
159
|
-
|
201
|
+
annotations_significance_matrix.at[attribute, "size_connected_components"] = (
|
160
202
|
filtered_size_connected_components.tolist()
|
161
203
|
)
|
162
204
|
|
163
|
-
return
|
205
|
+
return annotations_significance_matrix
|
164
206
|
|
165
207
|
|
166
|
-
def
|
167
|
-
"""
|
168
|
-
|
208
|
+
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
209
|
+
"""Generate a weighted description from words and their corresponding scores,
|
210
|
+
with support for stopwords filtering and improved weighting logic.
|
169
211
|
|
170
212
|
Args:
|
171
213
|
words_column (pd.Series): A pandas Series containing strings to process.
|
214
|
+
scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
|
172
215
|
|
173
216
|
Returns:
|
174
|
-
str: A coherent description formed from the most frequent and significant words.
|
217
|
+
str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
|
175
218
|
"""
|
176
|
-
#
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
219
|
+
# Handle case where all scores are the same
|
220
|
+
if scores_column.max() == scores_column.min():
|
221
|
+
normalized_scores = pd.Series([1] * len(scores_column))
|
222
|
+
else:
|
223
|
+
# Normalize the significance scores to be between 0 and 1
|
224
|
+
normalized_scores = (scores_column - scores_column.min()) / (
|
225
|
+
scores_column.max() - scores_column.min()
|
226
|
+
)
|
227
|
+
|
228
|
+
# Combine words and normalized scores to create weighted words
|
229
|
+
weighted_words = []
|
230
|
+
for word, score in zip(words_column, normalized_scores):
|
231
|
+
word = str(word)
|
232
|
+
if word not in stop_words: # Skip stopwords
|
233
|
+
weight = max(1, int((0 if pd.isna(score) else score) * 10))
|
234
|
+
weighted_words.extend([word] * weight)
|
235
|
+
|
236
|
+
# Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
|
237
|
+
tokens = word_tokenize(" ".join(weighted_words))
|
238
|
+
# Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
|
239
|
+
combined_tokens = []
|
240
|
+
for token in tokens:
|
241
|
+
# Match patterns like '4-alpha' or '5-hydroxy' and keep them together
|
242
|
+
if re.match(r"^\d+-\w+", token):
|
243
|
+
combined_tokens.append(token)
|
244
|
+
elif token.replace(".", "", 1).isdigit(): # Handle pure numeric tokens
|
245
|
+
# Ignore pure numbers as descriptions unless necessary
|
246
|
+
continue
|
247
|
+
else:
|
248
|
+
combined_tokens.append(token)
|
249
|
+
|
250
|
+
# Prevent descriptions like just '4' from being selected
|
251
|
+
if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
|
252
|
+
return "N/A" # Return "N/A" for cases where it's just a number
|
253
|
+
|
254
|
+
# Simplify the word list and generate the description
|
255
|
+
simplified_words = _simplify_word_list(combined_tokens)
|
256
|
+
description = _generate_coherent_description(simplified_words)
|
196
257
|
|
197
258
|
return description
|
198
259
|
|
@@ -255,7 +316,7 @@ def _generate_coherent_description(words: List[str]) -> str:
|
|
255
316
|
If there is only one unique entry, return it directly.
|
256
317
|
|
257
318
|
Args:
|
258
|
-
words (
|
319
|
+
words (List): A list of words or numerical string values.
|
259
320
|
|
260
321
|
Returns:
|
261
322
|
str: A coherent description formed by arranging the words in a logical sequence.
|
risk/annotations/io.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
"""
|
2
2
|
risk/annotations/io
|
3
3
|
~~~~~~~~~~~~~~~~~~~
|
4
|
-
|
5
|
-
This file contains the code for the RISK class and command-line access.
|
6
4
|
"""
|
7
5
|
|
8
6
|
import json
|
@@ -25,27 +23,32 @@ class AnnotationsIO:
|
|
25
23
|
def __init__(self):
|
26
24
|
pass
|
27
25
|
|
28
|
-
def load_json_annotation(
|
26
|
+
def load_json_annotation(
|
27
|
+
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
28
|
+
) -> Dict[str, Any]:
|
29
29
|
"""Load annotations from a JSON file and convert them to a DataFrame.
|
30
30
|
|
31
31
|
Args:
|
32
32
|
network (NetworkX graph): The network to which the annotations are related.
|
33
33
|
filepath (str): Path to the JSON annotations file.
|
34
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
35
|
+
term to be included. Defaults to 2.
|
34
36
|
|
35
37
|
Returns:
|
36
|
-
|
38
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
37
39
|
"""
|
38
40
|
filetype = "JSON"
|
39
41
|
# Log the loading of the JSON file
|
40
|
-
params.log_annotations(
|
42
|
+
params.log_annotations(
|
43
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
44
|
+
)
|
41
45
|
_log_loading(filetype, filepath=filepath)
|
42
46
|
|
43
|
-
#
|
44
|
-
with open(filepath, "r") as file:
|
47
|
+
# Load the JSON file into a dictionary
|
48
|
+
with open(filepath, "r", encoding="utf-8") as file:
|
45
49
|
annotations_input = json.load(file)
|
46
50
|
|
47
|
-
|
48
|
-
return load_annotations(network, annotations_input)
|
51
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
49
52
|
|
50
53
|
def load_excel_annotation(
|
51
54
|
self,
|
@@ -55,6 +58,7 @@ class AnnotationsIO:
|
|
55
58
|
nodes_colname: str = "nodes",
|
56
59
|
sheet_name: str = "Sheet1",
|
57
60
|
nodes_delimiter: str = ";",
|
61
|
+
min_nodes_per_term: int = 2,
|
58
62
|
) -> Dict[str, Any]:
|
59
63
|
"""Load annotations from an Excel file and associate them with the network.
|
60
64
|
|
@@ -65,6 +69,8 @@ class AnnotationsIO:
|
|
65
69
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
66
70
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
67
71
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
72
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
73
|
+
term to be included. Defaults to 2.
|
68
74
|
|
69
75
|
Returns:
|
70
76
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -72,18 +78,21 @@ class AnnotationsIO:
|
|
72
78
|
"""
|
73
79
|
filetype = "Excel"
|
74
80
|
# Log the loading of the Excel file
|
75
|
-
params.log_annotations(
|
81
|
+
params.log_annotations(
|
82
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
83
|
+
)
|
76
84
|
_log_loading(filetype, filepath=filepath)
|
77
85
|
|
78
86
|
# Load the specified sheet from the Excel file
|
79
|
-
|
87
|
+
annotation = pd.read_excel(filepath, sheet_name=sheet_name)
|
80
88
|
# Split the nodes column by the specified nodes_delimiter
|
81
|
-
|
89
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(
|
90
|
+
lambda x: x.split(nodes_delimiter)
|
91
|
+
)
|
82
92
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
83
|
-
|
93
|
+
annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
84
94
|
|
85
|
-
|
86
|
-
return load_annotations(network, label_node_dict)
|
95
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
87
96
|
|
88
97
|
def load_csv_annotation(
|
89
98
|
self,
|
@@ -92,6 +101,7 @@ class AnnotationsIO:
|
|
92
101
|
label_colname: str = "label",
|
93
102
|
nodes_colname: str = "nodes",
|
94
103
|
nodes_delimiter: str = ";",
|
104
|
+
min_nodes_per_term: int = 2,
|
95
105
|
) -> Dict[str, Any]:
|
96
106
|
"""Load annotations from a CSV file and associate them with the network.
|
97
107
|
|
@@ -101,6 +111,8 @@ class AnnotationsIO:
|
|
101
111
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
102
112
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
103
113
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
114
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
115
|
+
term to be included. Defaults to 2.
|
104
116
|
|
105
117
|
Returns:
|
106
118
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -108,7 +120,9 @@ class AnnotationsIO:
|
|
108
120
|
"""
|
109
121
|
filetype = "CSV"
|
110
122
|
# Log the loading of the CSV file
|
111
|
-
params.log_annotations(
|
123
|
+
params.log_annotations(
|
124
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
125
|
+
)
|
112
126
|
_log_loading(filetype, filepath=filepath)
|
113
127
|
|
114
128
|
# Load the CSV file into a dictionary
|
@@ -116,8 +130,7 @@ class AnnotationsIO:
|
|
116
130
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
117
131
|
)
|
118
132
|
|
119
|
-
|
120
|
-
return load_annotations(network, annotations_input)
|
133
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
121
134
|
|
122
135
|
def load_tsv_annotation(
|
123
136
|
self,
|
@@ -126,6 +139,7 @@ class AnnotationsIO:
|
|
126
139
|
label_colname: str = "label",
|
127
140
|
nodes_colname: str = "nodes",
|
128
141
|
nodes_delimiter: str = ";",
|
142
|
+
min_nodes_per_term: int = 2,
|
129
143
|
) -> Dict[str, Any]:
|
130
144
|
"""Load annotations from a TSV file and associate them with the network.
|
131
145
|
|
@@ -135,6 +149,8 @@ class AnnotationsIO:
|
|
135
149
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
136
150
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
137
151
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
152
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
153
|
+
term to be included. Defaults to 2.
|
138
154
|
|
139
155
|
Returns:
|
140
156
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -142,7 +158,9 @@ class AnnotationsIO:
|
|
142
158
|
"""
|
143
159
|
filetype = "TSV"
|
144
160
|
# Log the loading of the TSV file
|
145
|
-
params.log_annotations(
|
161
|
+
params.log_annotations(
|
162
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
163
|
+
)
|
146
164
|
_log_loading(filetype, filepath=filepath)
|
147
165
|
|
148
166
|
# Load the TSV file into a dictionary
|
@@ -150,18 +168,21 @@ class AnnotationsIO:
|
|
150
168
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
151
169
|
)
|
152
170
|
|
153
|
-
|
154
|
-
return load_annotations(network, annotations_input)
|
171
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
155
172
|
|
156
|
-
def load_dict_annotation(
|
173
|
+
def load_dict_annotation(
|
174
|
+
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
175
|
+
) -> Dict[str, Any]:
|
157
176
|
"""Load annotations from a provided dictionary and convert them to a dictionary annotation.
|
158
177
|
|
159
178
|
Args:
|
160
179
|
network (NetworkX graph): The network to which the annotations are related.
|
161
|
-
content (
|
180
|
+
content (Dict[str, Any]): The annotations dictionary to load.
|
181
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
182
|
+
term to be included. Defaults to 2.
|
162
183
|
|
163
184
|
Returns:
|
164
|
-
|
185
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
165
186
|
"""
|
166
187
|
# Ensure the input content is a dictionary
|
167
188
|
if not isinstance(content, dict):
|
@@ -174,13 +195,8 @@ class AnnotationsIO:
|
|
174
195
|
params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
|
175
196
|
_log_loading(filetype, "In-memory dictionary")
|
176
197
|
|
177
|
-
# Load the annotations
|
178
|
-
|
179
|
-
# Ensure the output is a dictionary
|
180
|
-
if not isinstance(annotations_dict, dict):
|
181
|
-
raise ValueError("Expected output to be a dictionary")
|
182
|
-
|
183
|
-
return annotations_dict
|
198
|
+
# Load the annotations as a dictionary from the provided dictionary
|
199
|
+
return load_annotations(network, content, min_nodes_per_term)
|
184
200
|
|
185
201
|
|
186
202
|
def _load_matrix_file(
|
@@ -203,11 +219,11 @@ def _load_matrix_file(
|
|
203
219
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
204
220
|
"""
|
205
221
|
# Load the CSV or TSV file into a DataFrame
|
206
|
-
|
222
|
+
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
207
223
|
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
208
|
-
|
224
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
209
225
|
# Create a dictionary pairing labels with their corresponding list of nodes
|
210
|
-
label_node_dict =
|
226
|
+
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
211
227
|
return label_node_dict
|
212
228
|
|
213
229
|
|
risk/log/__init__.py
CHANGED
@@ -3,7 +3,9 @@ risk/log
|
|
3
3
|
~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .
|
7
|
-
from .
|
6
|
+
from risk.log.console import logger, log_header, set_global_verbosity
|
7
|
+
from risk.log.parameters import Params
|
8
8
|
|
9
|
+
# Initialize the global parameters logger
|
9
10
|
params = Params()
|
11
|
+
params.initialize()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/log/
|
3
|
-
|
2
|
+
risk/log/console
|
3
|
+
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import logging
|
@@ -16,8 +16,10 @@ def in_jupyter():
|
|
16
16
|
shell = get_ipython().__class__.__name__
|
17
17
|
if shell == "ZMQInteractiveShell": # Jupyter Notebook or QtConsole
|
18
18
|
return True
|
19
|
-
|
19
|
+
if shell == "TerminalInteractiveShell": # Terminal running IPython
|
20
20
|
return False
|
21
|
+
|
22
|
+
return False # Other type (?)
|
21
23
|
except NameError:
|
22
24
|
return False # Not in Jupyter
|
23
25
|
|