risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +2 -2
- risk/annotations/__init__.py +2 -2
- risk/annotations/annotations.py +74 -47
- risk/annotations/io.py +47 -31
- risk/log/__init__.py +4 -2
- risk/log/{config.py → console.py} +5 -3
- risk/log/{params.py → parameters.py} +17 -42
- risk/neighborhoods/__init__.py +3 -5
- risk/neighborhoods/api.py +446 -0
- risk/neighborhoods/community.py +255 -77
- risk/neighborhoods/domains.py +62 -31
- risk/neighborhoods/neighborhoods.py +156 -160
- risk/network/__init__.py +1 -3
- risk/network/geometry.py +65 -57
- risk/network/graph/__init__.py +6 -0
- risk/network/graph/api.py +194 -0
- risk/network/{graph.py → graph/network.py} +87 -37
- risk/network/graph/summary.py +254 -0
- risk/network/io.py +56 -47
- risk/network/plotter/__init__.py +6 -0
- risk/network/plotter/api.py +54 -0
- risk/network/{plot → plotter}/canvas.py +7 -4
- risk/network/{plot → plotter}/contour.py +22 -19
- risk/network/{plot → plotter}/labels.py +69 -74
- risk/network/{plot → plotter}/network.py +170 -34
- risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
- risk/network/{plot → plotter}/utils/layout.py +8 -5
- risk/risk.py +11 -500
- risk/stats/__init__.py +8 -4
- risk/stats/binom.py +51 -0
- risk/stats/chi2.py +69 -0
- risk/stats/hypergeom.py +27 -17
- risk/stats/permutation/__init__.py +1 -1
- risk/stats/permutation/permutation.py +44 -38
- risk/stats/permutation/test_functions.py +25 -17
- risk/stats/poisson.py +15 -9
- risk/stats/stats.py +15 -13
- risk/stats/zscore.py +68 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
- risk_network-0.0.9b26.dist-info/RECORD +44 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
- risk/network/plot/__init__.py +0 -6
- risk/network/plot/plotter.py +0 -137
- risk_network-0.0.8b26.dist-info/RECORD +0 -37
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
- {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotations/__init__.py
CHANGED
@@ -3,5 +3,5 @@ risk/annotations
|
|
3
3
|
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .annotations import define_top_annotations, get_weighted_description
|
7
|
-
from .io import AnnotationsIO
|
6
|
+
from risk.annotations.annotations import define_top_annotations, get_weighted_description
|
7
|
+
from risk.annotations.io import AnnotationsIO
|
risk/annotations/annotations.py
CHANGED
@@ -15,6 +15,9 @@ import pandas as pd
|
|
15
15
|
from nltk.tokenize import word_tokenize
|
16
16
|
from nltk.corpus import stopwords
|
17
17
|
|
18
|
+
from risk.log import logger
|
19
|
+
from scipy.sparse import csr_matrix
|
20
|
+
|
18
21
|
|
19
22
|
def _setup_nltk():
|
20
23
|
"""Ensure necessary NLTK data is downloaded."""
|
@@ -35,15 +38,25 @@ _setup_nltk()
|
|
35
38
|
stop_words = set(stopwords.words("english"))
|
36
39
|
|
37
40
|
|
38
|
-
def load_annotations(
|
41
|
+
def load_annotations(
|
42
|
+
network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
|
43
|
+
) -> Dict[str, Any]:
|
39
44
|
"""Convert annotations input to a DataFrame and reindex based on the network's node labels.
|
40
45
|
|
41
46
|
Args:
|
42
47
|
network (nx.Graph): The network graph.
|
43
48
|
annotations_input (Dict[str, Any]): A dictionary with annotations.
|
49
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
50
|
+
term to be included. Defaults to 2.
|
51
|
+
use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
|
44
52
|
|
45
53
|
Returns:
|
46
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations
|
54
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
|
55
|
+
matrix.
|
56
|
+
|
57
|
+
Raises:
|
58
|
+
ValueError: If no annotations are found for the nodes in the network.
|
59
|
+
ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
|
47
60
|
"""
|
48
61
|
# Flatten the dictionary to a list of tuples for easier DataFrame creation
|
49
62
|
flattened_annotations = [
|
@@ -57,95 +70,108 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
|
|
57
70
|
index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
|
58
71
|
)
|
59
72
|
# Reindex the annotations matrix based on the node labels from the network
|
60
|
-
node_label_order =
|
73
|
+
node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
|
61
74
|
annotations_pivot = annotations_pivot.reindex(index=node_label_order)
|
62
75
|
# Raise an error if no valid annotations are found for the nodes in the network
|
63
76
|
if annotations_pivot.notnull().sum().sum() == 0:
|
77
|
+
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
78
|
+
|
79
|
+
# Filter out annotations with fewer than min_nodes_per_term occurrences
|
80
|
+
num_terms_before_filtering = annotations_pivot.shape[1]
|
81
|
+
annotations_pivot = annotations_pivot.loc[
|
82
|
+
:, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
|
83
|
+
]
|
84
|
+
num_terms_after_filtering = annotations_pivot.shape[1]
|
85
|
+
# Log the number of annotations before and after filtering
|
86
|
+
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
87
|
+
logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
|
88
|
+
logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
|
89
|
+
if num_terms_after_filtering == 0:
|
64
90
|
raise ValueError(
|
65
|
-
"No
|
91
|
+
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
66
92
|
)
|
67
93
|
|
68
|
-
# Remove columns with all zeros to improve performance
|
69
|
-
annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
|
70
94
|
# Extract ordered nodes and annotations
|
71
95
|
ordered_nodes = tuple(annotations_pivot.index)
|
72
96
|
ordered_annotations = tuple(annotations_pivot.columns)
|
73
|
-
# Convert the annotations_pivot matrix to a numpy array
|
74
|
-
|
97
|
+
# Convert the annotations_pivot matrix to a numpy array or sparse matrix
|
98
|
+
annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
|
99
|
+
# Convert the binary annotations matrix to a sparse matrix
|
100
|
+
annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
|
75
101
|
|
76
102
|
return {
|
77
103
|
"ordered_nodes": ordered_nodes,
|
78
104
|
"ordered_annotations": ordered_annotations,
|
79
|
-
"matrix":
|
105
|
+
"matrix": annotations_pivot_binary,
|
80
106
|
}
|
81
107
|
|
82
108
|
|
83
109
|
def define_top_annotations(
|
84
110
|
network: nx.Graph,
|
85
111
|
ordered_annotation_labels: List[str],
|
86
|
-
|
87
|
-
|
88
|
-
|
112
|
+
neighborhood_significance_sums: List[int],
|
113
|
+
significant_significance_matrix: np.ndarray,
|
114
|
+
significant_binary_significance_matrix: np.ndarray,
|
89
115
|
min_cluster_size: int = 5,
|
90
116
|
max_cluster_size: int = 1000,
|
91
117
|
) -> pd.DataFrame:
|
92
|
-
"""Define top annotations based on neighborhood
|
118
|
+
"""Define top annotations based on neighborhood significance sums and binary significance matrix.
|
93
119
|
|
94
120
|
Args:
|
95
121
|
network (NetworkX graph): The network graph.
|
96
122
|
ordered_annotation_labels (list of str): List of ordered annotation labels.
|
97
|
-
|
98
|
-
|
99
|
-
|
123
|
+
neighborhood_significance_sums (list of int): List of neighborhood significance sums.
|
124
|
+
significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
|
125
|
+
significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
|
100
126
|
min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
|
101
127
|
max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
|
102
128
|
|
103
129
|
Returns:
|
104
130
|
pd.DataFrame: DataFrame with top annotations and their properties.
|
105
131
|
"""
|
106
|
-
# Sum the columns of the significant
|
107
|
-
|
108
|
-
# Create DataFrame to store annotations, their neighborhood
|
109
|
-
|
132
|
+
# Sum the columns of the significant significance matrix (positive floating point values)
|
133
|
+
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
134
|
+
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
135
|
+
annotations_significance_matrix = pd.DataFrame(
|
110
136
|
{
|
111
137
|
"id": range(len(ordered_annotation_labels)),
|
112
138
|
"full_terms": ordered_annotation_labels,
|
113
|
-
"
|
114
|
-
"
|
139
|
+
"significant_neighborhood_significance_sums": neighborhood_significance_sums,
|
140
|
+
"significant_significance_score": significant_significance_scores,
|
115
141
|
}
|
116
142
|
)
|
117
|
-
|
143
|
+
annotations_significance_matrix["significant_annotations"] = False
|
118
144
|
# Apply size constraints to identify potential significant annotations
|
119
|
-
|
145
|
+
annotations_significance_matrix.loc[
|
120
146
|
(
|
121
|
-
|
147
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
122
148
|
>= min_cluster_size
|
123
149
|
)
|
124
150
|
& (
|
125
|
-
|
151
|
+
annotations_significance_matrix["significant_neighborhood_significance_sums"]
|
126
152
|
<= max_cluster_size
|
127
153
|
),
|
128
154
|
"significant_annotations",
|
129
155
|
] = True
|
130
156
|
# Initialize columns for connected components analysis
|
131
|
-
|
132
|
-
|
133
|
-
|
157
|
+
annotations_significance_matrix["num_connected_components"] = 0
|
158
|
+
annotations_significance_matrix["size_connected_components"] = None
|
159
|
+
annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
|
134
160
|
"size_connected_components"
|
135
161
|
].astype(object)
|
136
|
-
|
162
|
+
annotations_significance_matrix["num_large_connected_components"] = 0
|
137
163
|
|
138
|
-
for attribute in
|
139
|
-
|
164
|
+
for attribute in annotations_significance_matrix.index.values[
|
165
|
+
annotations_significance_matrix["significant_annotations"]
|
140
166
|
]:
|
141
|
-
# Identify
|
142
|
-
|
143
|
-
compress(list(network),
|
167
|
+
# Identify significant neighborhoods based on the binary significance matrix
|
168
|
+
significant_neighborhoods = list(
|
169
|
+
compress(list(network), significant_binary_significance_matrix[:, attribute])
|
144
170
|
)
|
145
|
-
|
146
|
-
# Analyze connected components within the
|
171
|
+
significant_network = nx.subgraph(network, significant_neighborhoods)
|
172
|
+
# Analyze connected components within the significant subnetwork
|
147
173
|
connected_components = sorted(
|
148
|
-
nx.connected_components(
|
174
|
+
nx.connected_components(significant_network), key=len, reverse=True
|
149
175
|
)
|
150
176
|
size_connected_components = np.array([len(c) for c in connected_components])
|
151
177
|
|
@@ -159,23 +185,24 @@ def define_top_annotations(
|
|
159
185
|
num_large_connected_components = len(filtered_size_connected_components)
|
160
186
|
|
161
187
|
# Assign the number of connected components
|
162
|
-
|
188
|
+
annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
|
163
189
|
num_connected_components
|
164
190
|
)
|
165
191
|
# Filter out attributes with more than one connected component
|
166
|
-
|
167
|
-
|
192
|
+
annotations_significance_matrix.loc[
|
193
|
+
annotations_significance_matrix["num_connected_components"] > 1,
|
194
|
+
"significant_annotations",
|
168
195
|
] = False
|
169
196
|
# Assign the number of large connected components
|
170
|
-
|
197
|
+
annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
171
198
|
num_large_connected_components
|
172
199
|
)
|
173
200
|
# Assign the size of connected components, ensuring it is always a list
|
174
|
-
|
201
|
+
annotations_significance_matrix.at[attribute, "size_connected_components"] = (
|
175
202
|
filtered_size_connected_components.tolist()
|
176
203
|
)
|
177
204
|
|
178
|
-
return
|
205
|
+
return annotations_significance_matrix
|
179
206
|
|
180
207
|
|
181
208
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
@@ -184,16 +211,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
|
|
184
211
|
|
185
212
|
Args:
|
186
213
|
words_column (pd.Series): A pandas Series containing strings to process.
|
187
|
-
scores_column (pd.Series): A pandas Series containing
|
214
|
+
scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
|
188
215
|
|
189
216
|
Returns:
|
190
|
-
str: A coherent description formed from the most frequent and significant words, weighed by
|
217
|
+
str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
|
191
218
|
"""
|
192
219
|
# Handle case where all scores are the same
|
193
220
|
if scores_column.max() == scores_column.min():
|
194
221
|
normalized_scores = pd.Series([1] * len(scores_column))
|
195
222
|
else:
|
196
|
-
# Normalize the
|
223
|
+
# Normalize the significance scores to be between 0 and 1
|
197
224
|
normalized_scores = (scores_column - scores_column.min()) / (
|
198
225
|
scores_column.max() - scores_column.min()
|
199
226
|
)
|
risk/annotations/io.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
"""
|
2
2
|
risk/annotations/io
|
3
3
|
~~~~~~~~~~~~~~~~~~~
|
4
|
-
|
5
|
-
This file contains the code for the RISK class and command-line access.
|
6
4
|
"""
|
7
5
|
|
8
6
|
import json
|
@@ -25,27 +23,32 @@ class AnnotationsIO:
|
|
25
23
|
def __init__(self):
|
26
24
|
pass
|
27
25
|
|
28
|
-
def load_json_annotation(
|
26
|
+
def load_json_annotation(
|
27
|
+
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
28
|
+
) -> Dict[str, Any]:
|
29
29
|
"""Load annotations from a JSON file and convert them to a DataFrame.
|
30
30
|
|
31
31
|
Args:
|
32
32
|
network (NetworkX graph): The network to which the annotations are related.
|
33
33
|
filepath (str): Path to the JSON annotations file.
|
34
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
35
|
+
term to be included. Defaults to 2.
|
34
36
|
|
35
37
|
Returns:
|
36
38
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
37
39
|
"""
|
38
40
|
filetype = "JSON"
|
39
41
|
# Log the loading of the JSON file
|
40
|
-
params.log_annotations(
|
42
|
+
params.log_annotations(
|
43
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
44
|
+
)
|
41
45
|
_log_loading(filetype, filepath=filepath)
|
42
46
|
|
43
|
-
#
|
44
|
-
with open(filepath, "r") as file:
|
47
|
+
# Load the JSON file into a dictionary
|
48
|
+
with open(filepath, "r", encoding="utf-8") as file:
|
45
49
|
annotations_input = json.load(file)
|
46
50
|
|
47
|
-
|
48
|
-
return load_annotations(network, annotations_input)
|
51
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
49
52
|
|
50
53
|
def load_excel_annotation(
|
51
54
|
self,
|
@@ -55,6 +58,7 @@ class AnnotationsIO:
|
|
55
58
|
nodes_colname: str = "nodes",
|
56
59
|
sheet_name: str = "Sheet1",
|
57
60
|
nodes_delimiter: str = ";",
|
61
|
+
min_nodes_per_term: int = 2,
|
58
62
|
) -> Dict[str, Any]:
|
59
63
|
"""Load annotations from an Excel file and associate them with the network.
|
60
64
|
|
@@ -65,6 +69,8 @@ class AnnotationsIO:
|
|
65
69
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
66
70
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
67
71
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
72
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
73
|
+
term to be included. Defaults to 2.
|
68
74
|
|
69
75
|
Returns:
|
70
76
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -72,18 +78,21 @@ class AnnotationsIO:
|
|
72
78
|
"""
|
73
79
|
filetype = "Excel"
|
74
80
|
# Log the loading of the Excel file
|
75
|
-
params.log_annotations(
|
81
|
+
params.log_annotations(
|
82
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
83
|
+
)
|
76
84
|
_log_loading(filetype, filepath=filepath)
|
77
85
|
|
78
86
|
# Load the specified sheet from the Excel file
|
79
|
-
|
87
|
+
annotation = pd.read_excel(filepath, sheet_name=sheet_name)
|
80
88
|
# Split the nodes column by the specified nodes_delimiter
|
81
|
-
|
89
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(
|
90
|
+
lambda x: x.split(nodes_delimiter)
|
91
|
+
)
|
82
92
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
83
|
-
|
93
|
+
annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
84
94
|
|
85
|
-
|
86
|
-
return load_annotations(network, label_node_dict)
|
95
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
87
96
|
|
88
97
|
def load_csv_annotation(
|
89
98
|
self,
|
@@ -92,6 +101,7 @@ class AnnotationsIO:
|
|
92
101
|
label_colname: str = "label",
|
93
102
|
nodes_colname: str = "nodes",
|
94
103
|
nodes_delimiter: str = ";",
|
104
|
+
min_nodes_per_term: int = 2,
|
95
105
|
) -> Dict[str, Any]:
|
96
106
|
"""Load annotations from a CSV file and associate them with the network.
|
97
107
|
|
@@ -101,6 +111,8 @@ class AnnotationsIO:
|
|
101
111
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
102
112
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
103
113
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
114
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
115
|
+
term to be included. Defaults to 2.
|
104
116
|
|
105
117
|
Returns:
|
106
118
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -108,7 +120,9 @@ class AnnotationsIO:
|
|
108
120
|
"""
|
109
121
|
filetype = "CSV"
|
110
122
|
# Log the loading of the CSV file
|
111
|
-
params.log_annotations(
|
123
|
+
params.log_annotations(
|
124
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
125
|
+
)
|
112
126
|
_log_loading(filetype, filepath=filepath)
|
113
127
|
|
114
128
|
# Load the CSV file into a dictionary
|
@@ -116,8 +130,7 @@ class AnnotationsIO:
|
|
116
130
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
117
131
|
)
|
118
132
|
|
119
|
-
|
120
|
-
return load_annotations(network, annotations_input)
|
133
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
121
134
|
|
122
135
|
def load_tsv_annotation(
|
123
136
|
self,
|
@@ -126,6 +139,7 @@ class AnnotationsIO:
|
|
126
139
|
label_colname: str = "label",
|
127
140
|
nodes_colname: str = "nodes",
|
128
141
|
nodes_delimiter: str = ";",
|
142
|
+
min_nodes_per_term: int = 2,
|
129
143
|
) -> Dict[str, Any]:
|
130
144
|
"""Load annotations from a TSV file and associate them with the network.
|
131
145
|
|
@@ -135,6 +149,8 @@ class AnnotationsIO:
|
|
135
149
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
136
150
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
137
151
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
152
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
153
|
+
term to be included. Defaults to 2.
|
138
154
|
|
139
155
|
Returns:
|
140
156
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -142,7 +158,9 @@ class AnnotationsIO:
|
|
142
158
|
"""
|
143
159
|
filetype = "TSV"
|
144
160
|
# Log the loading of the TSV file
|
145
|
-
params.log_annotations(
|
161
|
+
params.log_annotations(
|
162
|
+
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
163
|
+
)
|
146
164
|
_log_loading(filetype, filepath=filepath)
|
147
165
|
|
148
166
|
# Load the TSV file into a dictionary
|
@@ -150,15 +168,18 @@ class AnnotationsIO:
|
|
150
168
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
151
169
|
)
|
152
170
|
|
153
|
-
|
154
|
-
return load_annotations(network, annotations_input)
|
171
|
+
return load_annotations(network, annotations_input, min_nodes_per_term)
|
155
172
|
|
156
|
-
def load_dict_annotation(
|
173
|
+
def load_dict_annotation(
|
174
|
+
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
175
|
+
) -> Dict[str, Any]:
|
157
176
|
"""Load annotations from a provided dictionary and convert them to a dictionary annotation.
|
158
177
|
|
159
178
|
Args:
|
160
179
|
network (NetworkX graph): The network to which the annotations are related.
|
161
180
|
content (Dict[str, Any]): The annotations dictionary to load.
|
181
|
+
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
182
|
+
term to be included. Defaults to 2.
|
162
183
|
|
163
184
|
Returns:
|
164
185
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
|
@@ -174,13 +195,8 @@ class AnnotationsIO:
|
|
174
195
|
params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
|
175
196
|
_log_loading(filetype, "In-memory dictionary")
|
176
197
|
|
177
|
-
# Load the annotations
|
178
|
-
|
179
|
-
# Ensure the output is a dictionary
|
180
|
-
if not isinstance(annotations_dict, dict):
|
181
|
-
raise ValueError("Expected output to be a dictionary")
|
182
|
-
|
183
|
-
return annotations_dict
|
198
|
+
# Load the annotations as a dictionary from the provided dictionary
|
199
|
+
return load_annotations(network, content, min_nodes_per_term)
|
184
200
|
|
185
201
|
|
186
202
|
def _load_matrix_file(
|
@@ -203,11 +219,11 @@ def _load_matrix_file(
|
|
203
219
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
|
204
220
|
"""
|
205
221
|
# Load the CSV or TSV file into a DataFrame
|
206
|
-
|
222
|
+
annotation = pd.read_csv(filepath, delimiter=delimiter)
|
207
223
|
# Split the nodes column by the nodes_delimiter to handle multiple nodes per label
|
208
|
-
|
224
|
+
annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
|
209
225
|
# Create a dictionary pairing labels with their corresponding list of nodes
|
210
|
-
label_node_dict =
|
226
|
+
label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
211
227
|
return label_node_dict
|
212
228
|
|
213
229
|
|
risk/log/__init__.py
CHANGED
@@ -3,7 +3,9 @@ risk/log
|
|
3
3
|
~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .
|
7
|
-
from .
|
6
|
+
from risk.log.console import logger, log_header, set_global_verbosity
|
7
|
+
from risk.log.parameters import Params
|
8
8
|
|
9
|
+
# Initialize the global parameters logger
|
9
10
|
params = Params()
|
11
|
+
params.initialize()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/log/
|
3
|
-
|
2
|
+
risk/log/console
|
3
|
+
~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import logging
|
@@ -16,8 +16,10 @@ def in_jupyter():
|
|
16
16
|
shell = get_ipython().__class__.__name__
|
17
17
|
if shell == "ZMQInteractiveShell": # Jupyter Notebook or QtConsole
|
18
18
|
return True
|
19
|
-
|
19
|
+
if shell == "TerminalInteractiveShell": # Terminal running IPython
|
20
20
|
return False
|
21
|
+
|
22
|
+
return False # Other type (?)
|
21
23
|
except NameError:
|
22
24
|
return False # Not in Jupyter
|
23
25
|
|
@@ -1,50 +1,22 @@
|
|
1
1
|
"""
|
2
|
-
risk/log/
|
3
|
-
|
2
|
+
risk/log/parameters
|
3
|
+
~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import csv
|
7
7
|
import json
|
8
8
|
import warnings
|
9
9
|
from datetime import datetime
|
10
|
-
from functools import wraps
|
11
10
|
from typing import Any, Dict
|
12
11
|
|
13
12
|
import numpy as np
|
14
13
|
|
15
|
-
from .
|
14
|
+
from risk.log.console import logger, log_header
|
16
15
|
|
17
16
|
# Suppress all warnings - this is to resolve warnings from multiprocessing
|
18
17
|
warnings.filterwarnings("ignore")
|
19
18
|
|
20
19
|
|
21
|
-
def _safe_param_export(func):
|
22
|
-
"""A decorator to wrap parameter export functions in a try-except block for safe execution.
|
23
|
-
|
24
|
-
Args:
|
25
|
-
func (function): The function to be wrapped.
|
26
|
-
|
27
|
-
Returns:
|
28
|
-
function: The wrapped function with error handling.
|
29
|
-
"""
|
30
|
-
|
31
|
-
@wraps(func)
|
32
|
-
def wrapper(*args, **kwargs):
|
33
|
-
try:
|
34
|
-
result = func(*args, **kwargs)
|
35
|
-
filepath = (
|
36
|
-
kwargs.get("filepath") or args[1]
|
37
|
-
) # Assuming filepath is always the second argument
|
38
|
-
logger.info(f"Parameters successfully exported to filepath: {filepath}")
|
39
|
-
return result
|
40
|
-
except Exception as e:
|
41
|
-
filepath = kwargs.get("filepath") or args[1]
|
42
|
-
logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
|
43
|
-
return None
|
44
|
-
|
45
|
-
return wrapper
|
46
|
-
|
47
|
-
|
48
20
|
class Params:
|
49
21
|
"""Handles the storage and logging of various parameters for network analysis.
|
50
22
|
|
@@ -106,7 +78,6 @@ class Params:
|
|
106
78
|
"""
|
107
79
|
self.plotter = {**self.plotter, **kwargs}
|
108
80
|
|
109
|
-
@_safe_param_export
|
110
81
|
def to_csv(self, filepath: str) -> None:
|
111
82
|
"""Export the parameters to a CSV file.
|
112
83
|
|
@@ -116,7 +87,7 @@ class Params:
|
|
116
87
|
# Load the parameter dictionary
|
117
88
|
params = self.load()
|
118
89
|
# Open the file in write mode
|
119
|
-
with open(filepath, "w", newline="") as csv_file:
|
90
|
+
with open(filepath, "w", encoding="utf-8", newline="") as csv_file:
|
120
91
|
writer = csv.writer(csv_file)
|
121
92
|
# Write the header
|
122
93
|
writer.writerow(["parent_key", "child_key", "value"])
|
@@ -128,17 +99,19 @@ class Params:
|
|
128
99
|
else:
|
129
100
|
writer.writerow([parent_key, "", parent_value])
|
130
101
|
|
131
|
-
|
102
|
+
logger.info(f"Parameters exported to CSV file: {filepath}")
|
103
|
+
|
132
104
|
def to_json(self, filepath: str) -> None:
|
133
105
|
"""Export the parameters to a JSON file.
|
134
106
|
|
135
107
|
Args:
|
136
108
|
filepath (str): The path where the JSON file will be saved.
|
137
109
|
"""
|
138
|
-
with open(filepath, "w") as json_file:
|
110
|
+
with open(filepath, "w", encoding="utf-8") as json_file:
|
139
111
|
json.dump(self.load(), json_file, indent=4)
|
140
112
|
|
141
|
-
|
113
|
+
logger.info(f"Parameters exported to JSON file: {filepath}")
|
114
|
+
|
142
115
|
def to_txt(self, filepath: str) -> None:
|
143
116
|
"""Export the parameters to a text file.
|
144
117
|
|
@@ -148,13 +121,15 @@ class Params:
|
|
148
121
|
# Load the parameter dictionary
|
149
122
|
params = self.load()
|
150
123
|
# Open the file in write mode
|
151
|
-
with open(filepath, "w") as txt_file:
|
124
|
+
with open(filepath, "w", encoding="utf-8") as txt_file:
|
152
125
|
for key, value in params.items():
|
153
126
|
# Write the key and its corresponding value
|
154
127
|
txt_file.write(f"{key}: {value}\n")
|
155
128
|
# Add a blank line after each entry
|
156
129
|
txt_file.write("\n")
|
157
130
|
|
131
|
+
logger.info(f"Parameters exported to text file: {filepath}")
|
132
|
+
|
158
133
|
def load(self) -> Dict[str, Any]:
|
159
134
|
"""Load and process various parameters, converting any np.ndarray values to lists.
|
160
135
|
|
@@ -186,12 +161,12 @@ def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
|
|
186
161
|
if isinstance(d, dict):
|
187
162
|
# Recursively process each value in the dictionary
|
188
163
|
return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
|
189
|
-
|
164
|
+
if isinstance(d, list):
|
190
165
|
# Recursively process each item in the list
|
191
166
|
return [_convert_ndarray_to_list(v) for v in d]
|
192
|
-
|
167
|
+
if isinstance(d, np.ndarray):
|
193
168
|
# Convert numpy arrays to lists
|
194
169
|
return d.tolist()
|
195
|
-
|
196
|
-
|
197
|
-
|
170
|
+
|
171
|
+
# Return the value unchanged if it's not a dict, List, or ndarray
|
172
|
+
return d
|
risk/neighborhoods/__init__.py
CHANGED
@@ -3,8 +3,6 @@ risk/neighborhoods
|
|
3
3
|
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
|
-
from .domains import define_domains,
|
7
|
-
from .neighborhoods import
|
8
|
-
|
9
|
-
process_neighborhoods,
|
10
|
-
)
|
6
|
+
from risk.neighborhoods.domains import define_domains, trim_domains
|
7
|
+
from risk.neighborhoods.api import NeighborhoodsAPI
|
8
|
+
from risk.neighborhoods.neighborhoods import process_neighborhoods
|