risk-network 0.0.13b3__py3-none-any.whl → 0.0.13b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- risk/__init__.py +1 -1
- risk/annotation/annotation.py +22 -8
- risk/annotation/io.py +58 -20
- {risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/METADATA +2 -2
- {risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/RECORD +8 -8
- {risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/WHEEL +1 -1
- {risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/licenses/LICENSE +0 -0
- {risk_network-0.0.13b3.dist-info → risk_network-0.0.13b4.dist-info}/top_level.txt +0 -0
risk/__init__.py
CHANGED
risk/annotation/annotation.py
CHANGED
@@ -36,7 +36,10 @@ initialize_nltk()
|
|
36
36
|
|
37
37
|
|
38
38
|
def load_annotation(
|
39
|
-
network: nx.Graph,
|
39
|
+
network: nx.Graph,
|
40
|
+
annotation_input: Dict[str, Any],
|
41
|
+
min_nodes_per_term: int = 1,
|
42
|
+
max_nodes_per_term: int = 10_000,
|
40
43
|
) -> Dict[str, Any]:
|
41
44
|
"""Convert annotation input to a sparse matrix and reindex based on the network's node labels.
|
42
45
|
|
@@ -44,7 +47,9 @@ def load_annotation(
|
|
44
47
|
network (nx.Graph): The network graph.
|
45
48
|
annotation_input (Dict[str, Any]): An annotation dictionary.
|
46
49
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
47
|
-
term to be included. Defaults to
|
50
|
+
term to be included. Defaults to 1.
|
51
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
52
|
+
term. Defaults to 10_000.
|
48
53
|
|
49
54
|
Returns:
|
50
55
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
|
@@ -52,7 +57,6 @@ def load_annotation(
|
|
52
57
|
|
53
58
|
Raises:
|
54
59
|
ValueError: If no annotation is found for the nodes in the network.
|
55
|
-
ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
|
56
60
|
"""
|
57
61
|
# Step 1: Map nodes and annotations to indices
|
58
62
|
node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
|
@@ -72,9 +76,18 @@ def load_annotation(
|
|
72
76
|
# Create a sparse binary matrix
|
73
77
|
num_nodes = len(node_to_idx)
|
74
78
|
num_annotation = len(annotation_to_idx)
|
75
|
-
|
76
|
-
|
77
|
-
|
79
|
+
# Convert to a sparse matrix and set the data type to uint8 for binary representation
|
80
|
+
annotation_pivot = (
|
81
|
+
coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr().astype(np.uint8)
|
82
|
+
)
|
83
|
+
# Step 3: Filter out annotations with too few or too many nodes
|
84
|
+
valid_annotation = np.array(
|
85
|
+
[
|
86
|
+
annotation_pivot[:, i].sum() >= min_nodes_per_term
|
87
|
+
and annotation_pivot[:, i].sum() <= max_nodes_per_term
|
88
|
+
for i in range(num_annotation)
|
89
|
+
]
|
90
|
+
)
|
78
91
|
annotation_pivot = annotation_pivot[:, valid_annotation]
|
79
92
|
# Step 4: Raise errors for empty matrices
|
80
93
|
if annotation_pivot.nnz == 0:
|
@@ -83,7 +96,7 @@ def load_annotation(
|
|
83
96
|
num_remaining_annotation = annotation_pivot.shape[1]
|
84
97
|
if num_remaining_annotation == 0:
|
85
98
|
raise ValueError(
|
86
|
-
f"No annotation terms found with at least {min_nodes_per_term} nodes
|
99
|
+
f"No annotation terms found with at least {min_nodes_per_term} nodes and at most {max_nodes_per_term} nodes."
|
87
100
|
)
|
88
101
|
|
89
102
|
# Step 5: Extract ordered nodes and annotations
|
@@ -94,6 +107,7 @@ def load_annotation(
|
|
94
107
|
|
95
108
|
# Log the filtering details
|
96
109
|
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
110
|
+
logger.info(f"Maximum number of nodes per annotation term: {max_nodes_per_term}")
|
97
111
|
logger.info(f"Number of input annotation terms: {num_annotation}")
|
98
112
|
logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
|
99
113
|
|
@@ -122,7 +136,7 @@ def define_top_annotation(
|
|
122
136
|
significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
|
123
137
|
significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
|
124
138
|
min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
|
125
|
-
max_cluster_size (int, optional): Maximum cluster size. Defaults to
|
139
|
+
max_cluster_size (int, optional): Maximum cluster size. Defaults to 10_000.
|
126
140
|
|
127
141
|
Returns:
|
128
142
|
pd.DataFrame: DataFrame with top annotations and their properties.
|
risk/annotation/io.py
CHANGED
@@ -21,7 +21,11 @@ class AnnotationIO:
|
|
21
21
|
"""
|
22
22
|
|
23
23
|
def load_annotation_json(
|
24
|
-
self,
|
24
|
+
self,
|
25
|
+
network: nx.Graph,
|
26
|
+
filepath: str,
|
27
|
+
min_nodes_per_term: int = 1,
|
28
|
+
max_nodes_per_term: int = 10_000,
|
25
29
|
) -> Dict[str, Any]:
|
26
30
|
"""Load annotation from a JSON file and convert them to a DataFrame.
|
27
31
|
|
@@ -29,7 +33,9 @@ class AnnotationIO:
|
|
29
33
|
network (NetworkX graph): The network to which the annotation is related.
|
30
34
|
filepath (str): Path to the JSON annotation file.
|
31
35
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
32
|
-
term to be included. Defaults to
|
36
|
+
term to be included. Defaults to 1.
|
37
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
38
|
+
term to be included. Defaults to 10_000.
|
33
39
|
|
34
40
|
Returns:
|
35
41
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
@@ -37,7 +43,10 @@ class AnnotationIO:
|
|
37
43
|
filetype = "JSON"
|
38
44
|
# Log the loading of the JSON file
|
39
45
|
params.log_annotation(
|
40
|
-
filetype=filetype,
|
46
|
+
filetype=filetype,
|
47
|
+
filepath=filepath,
|
48
|
+
min_nodes_per_term=min_nodes_per_term,
|
49
|
+
max_nodes_per_term=max_nodes_per_term,
|
41
50
|
)
|
42
51
|
self._log_loading_annotation(filetype, filepath=filepath)
|
43
52
|
|
@@ -45,7 +54,7 @@ class AnnotationIO:
|
|
45
54
|
with open(filepath, "r", encoding="utf-8") as file:
|
46
55
|
annotation_input = json.load(file)
|
47
56
|
|
48
|
-
return load_annotation(network, annotation_input, min_nodes_per_term)
|
57
|
+
return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
|
49
58
|
|
50
59
|
def load_annotation_excel(
|
51
60
|
self,
|
@@ -55,7 +64,8 @@ class AnnotationIO:
|
|
55
64
|
nodes_colname: str = "nodes",
|
56
65
|
sheet_name: str = "Sheet1",
|
57
66
|
nodes_delimiter: str = ";",
|
58
|
-
min_nodes_per_term: int =
|
67
|
+
min_nodes_per_term: int = 1,
|
68
|
+
max_nodes_per_term: int = 10_000,
|
59
69
|
) -> Dict[str, Any]:
|
60
70
|
"""Load annotation from an Excel file and associate them with the network.
|
61
71
|
|
@@ -67,7 +77,9 @@ class AnnotationIO:
|
|
67
77
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
68
78
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
69
79
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
70
|
-
term to be included. Defaults to
|
80
|
+
term to be included. Defaults to 1.
|
81
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
82
|
+
term to be included. Defaults to 10_000.
|
71
83
|
|
72
84
|
Returns:
|
73
85
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -76,7 +88,10 @@ class AnnotationIO:
|
|
76
88
|
filetype = "Excel"
|
77
89
|
# Log the loading of the Excel file
|
78
90
|
params.log_annotation(
|
79
|
-
filetype=filetype,
|
91
|
+
filetype=filetype,
|
92
|
+
filepath=filepath,
|
93
|
+
min_nodes_per_term=min_nodes_per_term,
|
94
|
+
max_nodes_per_term=max_nodes_per_term,
|
80
95
|
)
|
81
96
|
self._log_loading_annotation(filetype, filepath=filepath)
|
82
97
|
|
@@ -89,7 +104,7 @@ class AnnotationIO:
|
|
89
104
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
90
105
|
annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
91
106
|
|
92
|
-
return load_annotation(network, annotation_input, min_nodes_per_term)
|
107
|
+
return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
|
93
108
|
|
94
109
|
def load_annotation_csv(
|
95
110
|
self,
|
@@ -98,7 +113,8 @@ class AnnotationIO:
|
|
98
113
|
label_colname: str = "label",
|
99
114
|
nodes_colname: str = "nodes",
|
100
115
|
nodes_delimiter: str = ";",
|
101
|
-
min_nodes_per_term: int =
|
116
|
+
min_nodes_per_term: int = 1,
|
117
|
+
max_nodes_per_term: int = 10_000,
|
102
118
|
) -> Dict[str, Any]:
|
103
119
|
"""Load annotation from a CSV file and associate them with the network.
|
104
120
|
|
@@ -109,7 +125,9 @@ class AnnotationIO:
|
|
109
125
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
110
126
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
111
127
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
112
|
-
term to be included. Defaults to
|
128
|
+
term to be included. Defaults to 1.
|
129
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
130
|
+
term to be included. Defaults to 10_000.
|
113
131
|
|
114
132
|
Returns:
|
115
133
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -118,7 +136,10 @@ class AnnotationIO:
|
|
118
136
|
filetype = "CSV"
|
119
137
|
# Log the loading of the CSV file
|
120
138
|
params.log_annotation(
|
121
|
-
filetype=filetype,
|
139
|
+
filetype=filetype,
|
140
|
+
filepath=filepath,
|
141
|
+
min_nodes_per_term=min_nodes_per_term,
|
142
|
+
max_nodes_per_term=max_nodes_per_term,
|
122
143
|
)
|
123
144
|
self._log_loading_annotation(filetype, filepath=filepath)
|
124
145
|
|
@@ -127,7 +148,7 @@ class AnnotationIO:
|
|
127
148
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
128
149
|
)
|
129
150
|
|
130
|
-
return load_annotation(network, annotation_input, min_nodes_per_term)
|
151
|
+
return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
|
131
152
|
|
132
153
|
def load_annotation_tsv(
|
133
154
|
self,
|
@@ -136,7 +157,8 @@ class AnnotationIO:
|
|
136
157
|
label_colname: str = "label",
|
137
158
|
nodes_colname: str = "nodes",
|
138
159
|
nodes_delimiter: str = ";",
|
139
|
-
min_nodes_per_term: int =
|
160
|
+
min_nodes_per_term: int = 1,
|
161
|
+
max_nodes_per_term: int = 10_000,
|
140
162
|
) -> Dict[str, Any]:
|
141
163
|
"""Load annotation from a TSV file and associate them with the network.
|
142
164
|
|
@@ -147,7 +169,9 @@ class AnnotationIO:
|
|
147
169
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
148
170
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
149
171
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
150
|
-
term to be included. Defaults to
|
172
|
+
term to be included. Defaults to 1.
|
173
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
174
|
+
term to be included. Defaults to 10_000.
|
151
175
|
|
152
176
|
Returns:
|
153
177
|
Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
|
@@ -156,7 +180,10 @@ class AnnotationIO:
|
|
156
180
|
filetype = "TSV"
|
157
181
|
# Log the loading of the TSV file
|
158
182
|
params.log_annotation(
|
159
|
-
filetype=filetype,
|
183
|
+
filetype=filetype,
|
184
|
+
filepath=filepath,
|
185
|
+
min_nodes_per_term=min_nodes_per_term,
|
186
|
+
max_nodes_per_term=max_nodes_per_term,
|
160
187
|
)
|
161
188
|
self._log_loading_annotation(filetype, filepath=filepath)
|
162
189
|
|
@@ -165,10 +192,14 @@ class AnnotationIO:
|
|
165
192
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
166
193
|
)
|
167
194
|
|
168
|
-
return load_annotation(network, annotation_input, min_nodes_per_term)
|
195
|
+
return load_annotation(network, annotation_input, min_nodes_per_term, max_nodes_per_term)
|
169
196
|
|
170
197
|
def load_annotation_dict(
|
171
|
-
self,
|
198
|
+
self,
|
199
|
+
network: nx.Graph,
|
200
|
+
content: Dict[str, Any],
|
201
|
+
min_nodes_per_term: int = 1,
|
202
|
+
max_nodes_per_term: int = 10_000,
|
172
203
|
) -> Dict[str, Any]:
|
173
204
|
"""Load annotation from a provided dictionary and convert them to a dictionary annotation.
|
174
205
|
|
@@ -176,7 +207,9 @@ class AnnotationIO:
|
|
176
207
|
network (NetworkX graph): The network to which the annotation is related.
|
177
208
|
content (Dict[str, Any]): The annotation dictionary to load.
|
178
209
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
179
|
-
term to be included. Defaults to
|
210
|
+
term to be included. Defaults to 1.
|
211
|
+
max_nodes_per_term (int, optional): The maximum number of network nodes allowed for each annotation
|
212
|
+
term to be included. Defaults to 10_000.
|
180
213
|
|
181
214
|
Returns:
|
182
215
|
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
@@ -192,11 +225,16 @@ class AnnotationIO:
|
|
192
225
|
|
193
226
|
filetype = "Dictionary"
|
194
227
|
# Log the loading of the annotation from the dictionary
|
195
|
-
params.log_annotation(
|
228
|
+
params.log_annotation(
|
229
|
+
filepath="In-memory dictionary",
|
230
|
+
filetype=filetype,
|
231
|
+
min_nodes_per_term=min_nodes_per_term,
|
232
|
+
max_nodes_per_term=max_nodes_per_term,
|
233
|
+
)
|
196
234
|
self._log_loading_annotation(filetype, "In-memory dictionary")
|
197
235
|
|
198
236
|
# Load the annotation as a dictionary from the provided dictionary
|
199
|
-
return load_annotation(network, content, min_nodes_per_term)
|
237
|
+
return load_annotation(network, content, min_nodes_per_term, max_nodes_per_term)
|
200
238
|
|
201
239
|
def _load_matrix_file(
|
202
240
|
self,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: risk-network
|
3
|
-
Version: 0.0.
|
4
|
-
Summary: A Python package for
|
3
|
+
Version: 0.0.13b4
|
4
|
+
Summary: A Python package for scalable network analysis and high-quality visualization.
|
5
5
|
Author-email: Ira Horecka <ira89@icloud.com>
|
6
6
|
License: GPL-3.0-or-later
|
7
7
|
Project-URL: Homepage, https://github.com/riskportal/network
|
@@ -1,8 +1,8 @@
|
|
1
|
-
risk/__init__.py,sha256=
|
1
|
+
risk/__init__.py,sha256=64n4kde42cujId1bWBqXdxznWZaSEVq1NvS_gqlvt1g,127
|
2
2
|
risk/risk.py,sha256=Wjuxob5bI70Tpz9t71i05g94AQ3qXEMjfEcm5IV9HSY,1118
|
3
3
|
risk/annotation/__init__.py,sha256=1EbGo41ClQb5ESTtitjOhrZhaLzzwr5aT-RYDX8w-h4,185
|
4
|
-
risk/annotation/annotation.py,sha256=
|
5
|
-
risk/annotation/io.py,sha256=
|
4
|
+
risk/annotation/annotation.py,sha256=EExSfYbZu4EUyA5vl7EDadGefyf-sJw_UmUxgXbuKng,15151
|
5
|
+
risk/annotation/io.py,sha256=Rhob9GKgdfHZIMVyaRJa15YOAPMDbqg3y_b8vckPeoM,12391
|
6
6
|
risk/annotation/nltk_setup.py,sha256=14B6L56_dwIgAOC9Rl4dNd4-b-aEngUCoJP9L9kEilU,3572
|
7
7
|
risk/log/__init__.py,sha256=en-hKzuFtQWos4oZd8PxJ9u9Pe5bdihiqH9-qk_5ppw,217
|
8
8
|
risk/log/console.py,sha256=PgjyEvyhYLUSHXPUKEqOmxsDsfrjPICIgqo_cAHq0N8,4575
|
@@ -33,8 +33,8 @@ risk/network/plotter/network.py,sha256=c9rPQ5mjil0sxVQnprRaKMAUqT6PZmKiATWz0m-Tv
|
|
33
33
|
risk/network/plotter/plotter.py,sha256=WZcOrBW3vBQ_aLwv8c8pXJO8ZlyswHHHfEsiLxzEYaI,6121
|
34
34
|
risk/network/plotter/utils/colors.py,sha256=xZt4877ORTQqySiMh-tUGe0sXvhLbXO04iGNeBDkbbw,19144
|
35
35
|
risk/network/plotter/utils/layout.py,sha256=Lty16T-Q-oWwo9fXqm-nnS_dMS3BMhuFt4SFqxFC3Ng,3610
|
36
|
-
risk_network-0.0.
|
37
|
-
risk_network-0.0.
|
38
|
-
risk_network-0.0.
|
39
|
-
risk_network-0.0.
|
40
|
-
risk_network-0.0.
|
36
|
+
risk_network-0.0.13b4.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
37
|
+
risk_network-0.0.13b4.dist-info/METADATA,sha256=Atc8HAHrKKIbdjfZUx4bwo5GifV3OGTrShjr0Ewd2T4,6853
|
38
|
+
risk_network-0.0.13b4.dist-info/WHEEL,sha256=GHB6lJx2juba1wDgXDNlMTyM13ckjBMKf-OnwgKOCtA,91
|
39
|
+
risk_network-0.0.13b4.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
|
40
|
+
risk_network-0.0.13b4.dist-info/RECORD,,
|
File without changes
|
File without changes
|