risk-network 0.0.12b1__tar.gz → 0.0.12b3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {risk_network-0.0.12b1/src/risk_network.egg-info → risk_network-0.0.12b3}/PKG-INFO +1 -1
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/__init__.py +1 -1
- risk_network-0.0.12b3/src/risk/annotation/__init__.py +10 -0
- risk_network-0.0.12b1/src/risk/annotations/annotations.py → risk_network-0.0.12b3/src/risk/annotation/annotation.py +44 -44
- {risk_network-0.0.12b1/src/risk/annotations → risk_network-0.0.12b3/src/risk/annotation}/io.py +46 -46
- {risk_network-0.0.12b1/src/risk/annotations → risk_network-0.0.12b3/src/risk/annotation}/nltk_setup.py +4 -4
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/log/parameters.py +5 -5
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/api.py +36 -36
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/domains.py +20 -24
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/neighborhoods.py +4 -4
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/stats/permutation/permutation.py +17 -17
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/stats/permutation/test_functions.py +2 -2
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/stats/tests.py +41 -41
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/graph/api.py +17 -17
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/graph/graph.py +17 -11
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/graph/summary.py +10 -10
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/io.py +12 -12
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/canvas.py +1 -1
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/contour.py +3 -3
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/labels.py +72 -74
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/network.py +6 -6
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/plotter.py +6 -6
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/utils/colors.py +12 -8
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/utils/layout.py +3 -3
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/risk.py +2 -2
- {risk_network-0.0.12b1 → risk_network-0.0.12b3/src/risk_network.egg-info}/PKG-INFO +1 -1
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk_network.egg-info/SOURCES.txt +5 -5
- risk_network-0.0.12b3/tests/test_load_annotation.py +291 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_load_graph.py +37 -43
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_load_io_combinations.py +8 -8
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_load_neighborhoods.py +60 -60
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_load_network.py +22 -22
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_log.py +6 -6
- risk_network-0.0.12b1/src/risk/annotations/__init__.py +0 -10
- risk_network-0.0.12b1/tests/test_load_annotations.py +0 -291
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/LICENSE +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/README.md +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/pyproject.toml +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/setup.cfg +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/log/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/log/console.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/community.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/stats/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/neighborhoods/stats/permutation/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/graph/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/graph/stats.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/__init__.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk/network/plotter/api.py +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk_network.egg-info/dependency_links.txt +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk_network.egg-info/requires.txt +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/src/risk_network.egg-info/top_level.txt +0 -0
- {risk_network-0.0.12b1 → risk_network-0.0.12b3}/tests/test_load_plotter.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/annotation
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import re
|
@@ -14,7 +14,7 @@ import pandas as pd
|
|
14
14
|
from nltk.tokenize import word_tokenize
|
15
15
|
from scipy.sparse import coo_matrix
|
16
16
|
|
17
|
-
from risk.
|
17
|
+
from risk.annotation.nltk_setup import setup_nltk_resources
|
18
18
|
from risk.log import logger
|
19
19
|
|
20
20
|
|
@@ -35,14 +35,14 @@ def initialize_nltk():
|
|
35
35
|
initialize_nltk()
|
36
36
|
|
37
37
|
|
38
|
-
def
|
39
|
-
network: nx.Graph,
|
38
|
+
def load_annotation(
|
39
|
+
network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
|
40
40
|
) -> Dict[str, Any]:
|
41
|
-
"""Convert
|
41
|
+
"""Convert annotation input to a sparse matrix and reindex based on the network's node labels.
|
42
42
|
|
43
43
|
Args:
|
44
44
|
network (nx.Graph): The network graph.
|
45
|
-
|
45
|
+
annotation_input (Dict[str, Any]): An annotation dictionary.
|
46
46
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
47
47
|
term to be included. Defaults to 2.
|
48
48
|
|
@@ -51,18 +51,18 @@ def load_annotations(
|
|
51
51
|
matrix.
|
52
52
|
|
53
53
|
Raises:
|
54
|
-
ValueError: If no
|
55
|
-
ValueError: If no
|
54
|
+
ValueError: If no annotation is found for the nodes in the network.
|
55
|
+
ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
|
56
56
|
"""
|
57
57
|
# Step 1: Map nodes and annotations to indices
|
58
58
|
node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
|
59
59
|
node_to_idx = {node: i for i, node in enumerate(node_label_order)}
|
60
|
-
annotation_to_idx = {annotation: i for i, annotation in enumerate(
|
60
|
+
annotation_to_idx = {annotation: i for i, annotation in enumerate(annotation_input)}
|
61
61
|
# Step 2: Construct a sparse binary matrix directly
|
62
62
|
row = []
|
63
63
|
col = []
|
64
64
|
data = []
|
65
|
-
for annotation, nodes in
|
65
|
+
for annotation, nodes in annotation_input.items():
|
66
66
|
for node in nodes:
|
67
67
|
if node in node_to_idx and annotation in annotation_to_idx:
|
68
68
|
row.append(node_to_idx[node])
|
@@ -71,40 +71,40 @@ def load_annotations(
|
|
71
71
|
|
72
72
|
# Create a sparse binary matrix
|
73
73
|
num_nodes = len(node_to_idx)
|
74
|
-
|
75
|
-
|
74
|
+
num_annotation = len(annotation_to_idx)
|
75
|
+
annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
|
76
76
|
# Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
|
77
|
-
|
78
|
-
|
77
|
+
valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
|
78
|
+
annotation_pivot = annotation_pivot[:, valid_annotation]
|
79
79
|
# Step 4: Raise errors for empty matrices
|
80
|
-
if
|
80
|
+
if annotation_pivot.nnz == 0:
|
81
81
|
raise ValueError("No terms found in the annotation file for the nodes in the network.")
|
82
82
|
|
83
|
-
|
84
|
-
if
|
83
|
+
num_remaining_annotation = annotation_pivot.shape[1]
|
84
|
+
if num_remaining_annotation == 0:
|
85
85
|
raise ValueError(
|
86
86
|
f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
|
87
87
|
)
|
88
88
|
|
89
89
|
# Step 5: Extract ordered nodes and annotations
|
90
90
|
ordered_nodes = tuple(node_label_order)
|
91
|
-
|
92
|
-
annotation for annotation, is_valid in zip(annotation_to_idx,
|
91
|
+
ordered_annotation = tuple(
|
92
|
+
annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotation) if is_valid
|
93
93
|
)
|
94
94
|
|
95
95
|
# Log the filtering details
|
96
96
|
logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
|
97
|
-
logger.info(f"Number of input annotation terms: {
|
98
|
-
logger.info(f"Number of remaining annotation terms: {
|
97
|
+
logger.info(f"Number of input annotation terms: {num_annotation}")
|
98
|
+
logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
|
99
99
|
|
100
100
|
return {
|
101
101
|
"ordered_nodes": ordered_nodes,
|
102
|
-
"
|
103
|
-
"matrix":
|
102
|
+
"ordered_annotation": ordered_annotation,
|
103
|
+
"matrix": annotation_pivot,
|
104
104
|
}
|
105
105
|
|
106
106
|
|
107
|
-
def
|
107
|
+
def define_top_annotation(
|
108
108
|
network: nx.Graph,
|
109
109
|
ordered_annotation_labels: List[str],
|
110
110
|
neighborhood_significance_sums: List[int],
|
@@ -130,7 +130,7 @@ def define_top_annotations(
|
|
130
130
|
# Sum the columns of the significant significance matrix (positive floating point values)
|
131
131
|
significant_significance_scores = significant_significance_matrix.sum(axis=0)
|
132
132
|
# Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
|
133
|
-
|
133
|
+
annotation_significance_matrix = pd.DataFrame(
|
134
134
|
{
|
135
135
|
"id": range(len(ordered_annotation_labels)),
|
136
136
|
"full_terms": ordered_annotation_labels,
|
@@ -138,29 +138,29 @@ def define_top_annotations(
|
|
138
138
|
"significant_significance_score": significant_significance_scores,
|
139
139
|
}
|
140
140
|
)
|
141
|
-
|
141
|
+
annotation_significance_matrix["significant_annotation"] = False
|
142
142
|
# Apply size constraints to identify potential significant annotations
|
143
|
-
|
143
|
+
annotation_significance_matrix.loc[
|
144
144
|
(
|
145
|
-
|
145
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
146
146
|
>= min_cluster_size
|
147
147
|
)
|
148
148
|
& (
|
149
|
-
|
149
|
+
annotation_significance_matrix["significant_neighborhood_significance_sums"]
|
150
150
|
<= max_cluster_size
|
151
151
|
),
|
152
|
-
"
|
152
|
+
"significant_annotation",
|
153
153
|
] = True
|
154
154
|
# Initialize columns for connected components analysis
|
155
|
-
|
156
|
-
|
157
|
-
|
155
|
+
annotation_significance_matrix["num_connected_components"] = 0
|
156
|
+
annotation_significance_matrix["size_connected_components"] = None
|
157
|
+
annotation_significance_matrix["size_connected_components"] = annotation_significance_matrix[
|
158
158
|
"size_connected_components"
|
159
159
|
].astype(object)
|
160
|
-
|
160
|
+
annotation_significance_matrix["num_large_connected_components"] = 0
|
161
161
|
|
162
|
-
for attribute in
|
163
|
-
|
162
|
+
for attribute in annotation_significance_matrix.index.values[
|
163
|
+
annotation_significance_matrix["significant_annotation"]
|
164
164
|
]:
|
165
165
|
# Identify significant neighborhoods based on the binary significance matrix
|
166
166
|
significant_neighborhoods = list(
|
@@ -183,24 +183,24 @@ def define_top_annotations(
|
|
183
183
|
num_large_connected_components = len(filtered_size_connected_components)
|
184
184
|
|
185
185
|
# Assign the number of connected components
|
186
|
-
|
186
|
+
annotation_significance_matrix.loc[attribute, "num_connected_components"] = (
|
187
187
|
num_connected_components
|
188
188
|
)
|
189
189
|
# Filter out attributes with more than one connected component
|
190
|
-
|
191
|
-
|
192
|
-
"
|
190
|
+
annotation_significance_matrix.loc[
|
191
|
+
annotation_significance_matrix["num_connected_components"] > 1,
|
192
|
+
"significant_annotation",
|
193
193
|
] = False
|
194
194
|
# Assign the number of large connected components
|
195
|
-
|
195
|
+
annotation_significance_matrix.loc[attribute, "num_large_connected_components"] = (
|
196
196
|
num_large_connected_components
|
197
197
|
)
|
198
198
|
# Assign the size of connected components, ensuring it is always a list
|
199
|
-
|
199
|
+
annotation_significance_matrix.at[attribute, "size_connected_components"] = (
|
200
200
|
filtered_size_connected_components.tolist()
|
201
201
|
)
|
202
202
|
|
203
|
-
return
|
203
|
+
return annotation_significance_matrix
|
204
204
|
|
205
205
|
|
206
206
|
def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
|
{risk_network-0.0.12b1/src/risk/annotations → risk_network-0.0.12b3/src/risk/annotation}/io.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/io
|
3
|
+
~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import json
|
@@ -9,45 +9,45 @@ from typing import Any, Dict
|
|
9
9
|
import networkx as nx
|
10
10
|
import pandas as pd
|
11
11
|
|
12
|
-
from risk.
|
12
|
+
from risk.annotation.annotation import load_annotation
|
13
13
|
from risk.log import log_header, logger, params
|
14
14
|
|
15
15
|
|
16
|
-
class
|
17
|
-
"""Handles the loading and exporting of
|
16
|
+
class AnnotationIO:
|
17
|
+
"""Handles the loading and exporting of annotation in various file formats.
|
18
18
|
|
19
|
-
The
|
19
|
+
The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
|
20
20
|
and to export parameter data to various formats like JSON, CSV, and text files.
|
21
21
|
"""
|
22
22
|
|
23
|
-
def
|
23
|
+
def load_annotation_json(
|
24
24
|
self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
|
25
25
|
) -> Dict[str, Any]:
|
26
|
-
"""Load
|
26
|
+
"""Load annotation from a JSON file and convert them to a DataFrame.
|
27
27
|
|
28
28
|
Args:
|
29
|
-
network (NetworkX graph): The network to which the
|
30
|
-
filepath (str): Path to the JSON
|
29
|
+
network (NetworkX graph): The network to which the annotation is related.
|
30
|
+
filepath (str): Path to the JSON annotation file.
|
31
31
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
32
32
|
term to be included. Defaults to 2.
|
33
33
|
|
34
34
|
Returns:
|
35
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
35
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
36
36
|
"""
|
37
37
|
filetype = "JSON"
|
38
38
|
# Log the loading of the JSON file
|
39
|
-
params.
|
39
|
+
params.log_annotation(
|
40
40
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
41
41
|
)
|
42
42
|
self._log_loading(filetype, filepath=filepath)
|
43
43
|
|
44
44
|
# Load the JSON file into a dictionary
|
45
45
|
with open(filepath, "r", encoding="utf-8") as file:
|
46
|
-
|
46
|
+
annotation_input = json.load(file)
|
47
47
|
|
48
|
-
return
|
48
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
49
49
|
|
50
|
-
def
|
50
|
+
def load_annotation_excel(
|
51
51
|
self,
|
52
52
|
network: nx.Graph,
|
53
53
|
filepath: str,
|
@@ -57,11 +57,11 @@ class AnnotationsIO:
|
|
57
57
|
nodes_delimiter: str = ";",
|
58
58
|
min_nodes_per_term: int = 2,
|
59
59
|
) -> Dict[str, Any]:
|
60
|
-
"""Load
|
60
|
+
"""Load annotation from an Excel file and associate them with the network.
|
61
61
|
|
62
62
|
Args:
|
63
|
-
network (nx.Graph): The NetworkX graph to which the
|
64
|
-
filepath (str): Path to the Excel
|
63
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
64
|
+
filepath (str): Path to the Excel annotation file.
|
65
65
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
66
66
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
67
67
|
sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
|
@@ -75,7 +75,7 @@ class AnnotationsIO:
|
|
75
75
|
"""
|
76
76
|
filetype = "Excel"
|
77
77
|
# Log the loading of the Excel file
|
78
|
-
params.
|
78
|
+
params.log_annotation(
|
79
79
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
80
80
|
)
|
81
81
|
self._log_loading(filetype, filepath=filepath)
|
@@ -87,11 +87,11 @@ class AnnotationsIO:
|
|
87
87
|
lambda x: x.split(nodes_delimiter)
|
88
88
|
)
|
89
89
|
# Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
|
90
|
-
|
90
|
+
annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
|
91
91
|
|
92
|
-
return
|
92
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
93
93
|
|
94
|
-
def
|
94
|
+
def load_annotation_csv(
|
95
95
|
self,
|
96
96
|
network: nx.Graph,
|
97
97
|
filepath: str,
|
@@ -100,11 +100,11 @@ class AnnotationsIO:
|
|
100
100
|
nodes_delimiter: str = ";",
|
101
101
|
min_nodes_per_term: int = 2,
|
102
102
|
) -> Dict[str, Any]:
|
103
|
-
"""Load
|
103
|
+
"""Load annotation from a CSV file and associate them with the network.
|
104
104
|
|
105
105
|
Args:
|
106
|
-
network (nx.Graph): The NetworkX graph to which the
|
107
|
-
filepath (str): Path to the CSV
|
106
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
107
|
+
filepath (str): Path to the CSV annotation file.
|
108
108
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
109
109
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
110
110
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -117,19 +117,19 @@ class AnnotationsIO:
|
|
117
117
|
"""
|
118
118
|
filetype = "CSV"
|
119
119
|
# Log the loading of the CSV file
|
120
|
-
params.
|
120
|
+
params.log_annotation(
|
121
121
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
122
122
|
)
|
123
123
|
self._log_loading(filetype, filepath=filepath)
|
124
124
|
|
125
125
|
# Load the CSV file into a dictionary
|
126
|
-
|
126
|
+
annotation_input = self._load_matrix_file(
|
127
127
|
filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
|
128
128
|
)
|
129
129
|
|
130
|
-
return
|
130
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
131
131
|
|
132
|
-
def
|
132
|
+
def load_annotation_tsv(
|
133
133
|
self,
|
134
134
|
network: nx.Graph,
|
135
135
|
filepath: str,
|
@@ -138,11 +138,11 @@ class AnnotationsIO:
|
|
138
138
|
nodes_delimiter: str = ";",
|
139
139
|
min_nodes_per_term: int = 2,
|
140
140
|
) -> Dict[str, Any]:
|
141
|
-
"""Load
|
141
|
+
"""Load annotation from a TSV file and associate them with the network.
|
142
142
|
|
143
143
|
Args:
|
144
|
-
network (nx.Graph): The NetworkX graph to which the
|
145
|
-
filepath (str): Path to the TSV
|
144
|
+
network (nx.Graph): The NetworkX graph to which the annotation is related.
|
145
|
+
filepath (str): Path to the TSV annotation file.
|
146
146
|
label_colname (str): Name of the column containing the labels (e.g., GO terms).
|
147
147
|
nodes_colname (str): Name of the column containing the nodes associated with each label.
|
148
148
|
nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
|
@@ -155,31 +155,31 @@ class AnnotationsIO:
|
|
155
155
|
"""
|
156
156
|
filetype = "TSV"
|
157
157
|
# Log the loading of the TSV file
|
158
|
-
params.
|
158
|
+
params.log_annotation(
|
159
159
|
filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
|
160
160
|
)
|
161
161
|
self._log_loading(filetype, filepath=filepath)
|
162
162
|
|
163
163
|
# Load the TSV file into a dictionary
|
164
|
-
|
164
|
+
annotation_input = self._load_matrix_file(
|
165
165
|
filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
|
166
166
|
)
|
167
167
|
|
168
|
-
return
|
168
|
+
return load_annotation(network, annotation_input, min_nodes_per_term)
|
169
169
|
|
170
|
-
def
|
170
|
+
def load_annotation_dict(
|
171
171
|
self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
|
172
172
|
) -> Dict[str, Any]:
|
173
|
-
"""Load
|
173
|
+
"""Load annotation from a provided dictionary and convert them to a dictionary annotation.
|
174
174
|
|
175
175
|
Args:
|
176
|
-
network (NetworkX graph): The network to which the
|
177
|
-
content (Dict[str, Any]): The
|
176
|
+
network (NetworkX graph): The network to which the annotation is related.
|
177
|
+
content (Dict[str, Any]): The annotation dictionary to load.
|
178
178
|
min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
|
179
179
|
term to be included. Defaults to 2.
|
180
180
|
|
181
181
|
Returns:
|
182
|
-
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the
|
182
|
+
Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
|
183
183
|
|
184
184
|
Raises:
|
185
185
|
TypeError: If the content is not a dictionary.
|
@@ -191,12 +191,12 @@ class AnnotationsIO:
|
|
191
191
|
)
|
192
192
|
|
193
193
|
filetype = "Dictionary"
|
194
|
-
# Log the loading of the
|
195
|
-
params.
|
194
|
+
# Log the loading of the annotation from the dictionary
|
195
|
+
params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
|
196
196
|
self._log_loading(filetype, "In-memory dictionary")
|
197
197
|
|
198
|
-
# Load the
|
199
|
-
return
|
198
|
+
# Load the annotation as a dictionary from the provided dictionary
|
199
|
+
return load_annotation(network, content, min_nodes_per_term)
|
200
200
|
|
201
201
|
def _load_matrix_file(
|
202
202
|
self,
|
@@ -206,7 +206,7 @@ class AnnotationsIO:
|
|
206
206
|
delimiter: str = ",",
|
207
207
|
nodes_delimiter: str = ";",
|
208
208
|
) -> Dict[str, Any]:
|
209
|
-
"""Load
|
209
|
+
"""Load annotation from a CSV or TSV file and convert them to a dictionary.
|
210
210
|
|
211
211
|
Args:
|
212
212
|
filepath (str): Path to the annotation file.
|
@@ -235,7 +235,7 @@ class AnnotationsIO:
|
|
235
235
|
filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
|
236
236
|
filepath (str, optional): The path to the file being loaded.
|
237
237
|
"""
|
238
|
-
log_header("Loading
|
238
|
+
log_header("Loading annotation")
|
239
239
|
logger.debug(f"Filetype: {filetype}")
|
240
240
|
if filepath:
|
241
241
|
logger.debug(f"Filepath: {filepath}")
|
@@ -1,11 +1,11 @@
|
|
1
1
|
"""
|
2
|
-
risk/
|
3
|
-
|
2
|
+
risk/annotation/nltk_setup
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
4
4
|
"""
|
5
5
|
|
6
6
|
import os
|
7
7
|
import zipfile
|
8
|
-
from typing import List, Tuple
|
8
|
+
from typing import List, Optional, Tuple
|
9
9
|
|
10
10
|
import nltk
|
11
11
|
from nltk.data import find
|
@@ -14,7 +14,7 @@ from nltk.data import path as nltk_data_path
|
|
14
14
|
from risk.log import logger
|
15
15
|
|
16
16
|
|
17
|
-
def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
|
17
|
+
def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:
|
18
18
|
"""Ensures all required NLTK resources are available and properly extracted.
|
19
19
|
Uses NLTK's default paths and mechanisms.
|
20
20
|
|
@@ -21,7 +21,7 @@ class Params:
|
|
21
21
|
"""Handles the storage and logging of various parameters for network analysis.
|
22
22
|
|
23
23
|
The Params class provides methods to log parameters related to different components of the analysis,
|
24
|
-
such as the network,
|
24
|
+
such as the network, annotation, neighborhoods, graph, and plotter settings. It also stores
|
25
25
|
the current datetime when the parameters were initialized.
|
26
26
|
"""
|
27
27
|
|
@@ -33,7 +33,7 @@ class Params:
|
|
33
33
|
def initialize(self) -> None:
|
34
34
|
"""Initialize the parameter dictionaries for different components."""
|
35
35
|
self.network = {}
|
36
|
-
self.
|
36
|
+
self.annotation = {}
|
37
37
|
self.neighborhoods = {}
|
38
38
|
self.graph = {}
|
39
39
|
self.plotter = {}
|
@@ -46,13 +46,13 @@ class Params:
|
|
46
46
|
"""
|
47
47
|
self.network = {**self.network, **kwargs}
|
48
48
|
|
49
|
-
def
|
49
|
+
def log_annotation(self, **kwargs) -> None:
|
50
50
|
"""Log annotation-related parameters.
|
51
51
|
|
52
52
|
Args:
|
53
53
|
**kwargs: Annotation parameters to log.
|
54
54
|
"""
|
55
|
-
self.
|
55
|
+
self.annotation = {**self.annotation, **kwargs}
|
56
56
|
|
57
57
|
def log_neighborhoods(self, **kwargs) -> None:
|
58
58
|
"""Log neighborhood-related parameters.
|
@@ -139,7 +139,7 @@ class Params:
|
|
139
139
|
log_header("Loading parameters")
|
140
140
|
return self._convert_ndarray_to_list(
|
141
141
|
{
|
142
|
-
"
|
142
|
+
"annotation": self.annotation,
|
143
143
|
"datetime": self.datetime,
|
144
144
|
"graph": self.graph,
|
145
145
|
"neighborhoods": self.neighborhoods,
|