risk-network 0.0.11__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotation/__init__.py +10 -0
  3. risk/{annotations/annotations.py → annotation/annotation.py} +44 -44
  4. risk/{annotations → annotation}/io.py +93 -92
  5. risk/{annotations → annotation}/nltk_setup.py +6 -5
  6. risk/log/__init__.py +1 -1
  7. risk/log/parameters.py +26 -27
  8. risk/neighborhoods/__init__.py +0 -1
  9. risk/neighborhoods/api.py +38 -38
  10. risk/neighborhoods/community.py +33 -4
  11. risk/neighborhoods/domains.py +26 -28
  12. risk/neighborhoods/neighborhoods.py +8 -2
  13. risk/neighborhoods/stats/__init__.py +13 -0
  14. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  15. risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
  16. risk/{stats → neighborhoods/stats}/permutation/test_functions.py +4 -4
  17. risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
  18. risk/network/__init__.py +0 -2
  19. risk/network/graph/__init__.py +0 -2
  20. risk/network/graph/api.py +19 -19
  21. risk/network/graph/graph.py +73 -68
  22. risk/{stats/significance.py → network/graph/stats.py} +2 -2
  23. risk/network/graph/summary.py +12 -13
  24. risk/network/io.py +163 -20
  25. risk/network/plotter/__init__.py +0 -2
  26. risk/network/plotter/api.py +1 -1
  27. risk/network/plotter/canvas.py +36 -36
  28. risk/network/plotter/contour.py +14 -15
  29. risk/network/plotter/labels.py +303 -294
  30. risk/network/plotter/network.py +6 -6
  31. risk/network/plotter/plotter.py +8 -10
  32. risk/network/plotter/utils/colors.py +15 -8
  33. risk/network/plotter/utils/layout.py +3 -3
  34. risk/risk.py +6 -6
  35. risk_network-0.0.12.dist-info/METADATA +122 -0
  36. risk_network-0.0.12.dist-info/RECORD +40 -0
  37. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
  38. risk/annotations/__init__.py +0 -7
  39. risk/network/geometry.py +0 -150
  40. risk/stats/__init__.py +0 -15
  41. risk/stats/permutation/__init__.py +0 -6
  42. risk_network-0.0.11.dist-info/METADATA +0 -798
  43. risk_network-0.0.11.dist-info/RECORD +0 -41
  44. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
  45. {risk_network-0.0.11.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.11"
10
+ __version__ = "0.0.12"
@@ -0,0 +1,10 @@
1
+ """
2
+ risk/annotation
3
+ ~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.annotation.annotation import (
7
+ define_top_annotation,
8
+ get_weighted_description,
9
+ )
10
+ from risk.annotation.io import AnnotationIO
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/annotations/annotations
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/annotation/annotation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import re
@@ -14,7 +14,7 @@ import pandas as pd
14
14
  from nltk.tokenize import word_tokenize
15
15
  from scipy.sparse import coo_matrix
16
16
 
17
- from risk.annotations.nltk_setup import setup_nltk_resources
17
+ from risk.annotation.nltk_setup import setup_nltk_resources
18
18
  from risk.log import logger
19
19
 
20
20
 
@@ -35,14 +35,14 @@ def initialize_nltk():
35
35
  initialize_nltk()
36
36
 
37
37
 
38
- def load_annotations(
39
- network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
38
+ def load_annotation(
39
+ network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
40
40
  ) -> Dict[str, Any]:
41
- """Convert annotations input to a sparse matrix and reindex based on the network's node labels.
41
+ """Convert annotation input to a sparse matrix and reindex based on the network's node labels.
42
42
 
43
43
  Args:
44
44
  network (nx.Graph): The network graph.
45
- annotations_input (Dict[str, Any]): A dictionary with annotations.
45
+ annotation_input (Dict[str, Any]): An annotation dictionary.
46
46
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
47
47
  term to be included. Defaults to 2.
48
48
 
@@ -51,18 +51,18 @@ def load_annotations(
51
51
  matrix.
52
52
 
53
53
  Raises:
54
- ValueError: If no annotations are found for the nodes in the network.
55
- ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
54
+ ValueError: If no annotation is found for the nodes in the network.
55
+ ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
56
56
  """
57
57
  # Step 1: Map nodes and annotations to indices
58
58
  node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
59
59
  node_to_idx = {node: i for i, node in enumerate(node_label_order)}
60
- annotation_to_idx = {annotation: i for i, annotation in enumerate(annotations_input)}
60
+ annotation_to_idx = {annotation: i for i, annotation in enumerate(annotation_input)}
61
61
  # Step 2: Construct a sparse binary matrix directly
62
62
  row = []
63
63
  col = []
64
64
  data = []
65
- for annotation, nodes in annotations_input.items():
65
+ for annotation, nodes in annotation_input.items():
66
66
  for node in nodes:
67
67
  if node in node_to_idx and annotation in annotation_to_idx:
68
68
  row.append(node_to_idx[node])
@@ -71,40 +71,40 @@ def load_annotations(
71
71
 
72
72
  # Create a sparse binary matrix
73
73
  num_nodes = len(node_to_idx)
74
- num_annotations = len(annotation_to_idx)
75
- annotations_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotations)).tocsr()
74
+ num_annotation = len(annotation_to_idx)
75
+ annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
76
76
  # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
77
- valid_annotations = annotations_pivot.sum(axis=0).A1 >= min_nodes_per_term
78
- annotations_pivot = annotations_pivot[:, valid_annotations]
77
+ valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
78
+ annotation_pivot = annotation_pivot[:, valid_annotation]
79
79
  # Step 4: Raise errors for empty matrices
80
- if annotations_pivot.nnz == 0:
80
+ if annotation_pivot.nnz == 0:
81
81
  raise ValueError("No terms found in the annotation file for the nodes in the network.")
82
82
 
83
- num_remaining_annotations = annotations_pivot.shape[1]
84
- if num_remaining_annotations == 0:
83
+ num_remaining_annotation = annotation_pivot.shape[1]
84
+ if num_remaining_annotation == 0:
85
85
  raise ValueError(
86
86
  f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
87
87
  )
88
88
 
89
89
  # Step 5: Extract ordered nodes and annotations
90
90
  ordered_nodes = tuple(node_label_order)
91
- ordered_annotations = tuple(
92
- annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotations) if is_valid
91
+ ordered_annotation = tuple(
92
+ annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotation) if is_valid
93
93
  )
94
94
 
95
95
  # Log the filtering details
96
96
  logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
97
- logger.info(f"Number of input annotation terms: {num_annotations}")
98
- logger.info(f"Number of remaining annotation terms: {num_remaining_annotations}")
97
+ logger.info(f"Number of input annotation terms: {num_annotation}")
98
+ logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
99
99
 
100
100
  return {
101
101
  "ordered_nodes": ordered_nodes,
102
- "ordered_annotations": ordered_annotations,
103
- "matrix": annotations_pivot,
102
+ "ordered_annotation": ordered_annotation,
103
+ "matrix": annotation_pivot,
104
104
  }
105
105
 
106
106
 
107
- def define_top_annotations(
107
+ def define_top_annotation(
108
108
  network: nx.Graph,
109
109
  ordered_annotation_labels: List[str],
110
110
  neighborhood_significance_sums: List[int],
@@ -130,7 +130,7 @@ def define_top_annotations(
130
130
  # Sum the columns of the significant significance matrix (positive floating point values)
131
131
  significant_significance_scores = significant_significance_matrix.sum(axis=0)
132
132
  # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
133
- annotations_significance_matrix = pd.DataFrame(
133
+ annotation_significance_matrix = pd.DataFrame(
134
134
  {
135
135
  "id": range(len(ordered_annotation_labels)),
136
136
  "full_terms": ordered_annotation_labels,
@@ -138,29 +138,29 @@ def define_top_annotations(
138
138
  "significant_significance_score": significant_significance_scores,
139
139
  }
140
140
  )
141
- annotations_significance_matrix["significant_annotations"] = False
141
+ annotation_significance_matrix["significant_annotation"] = False
142
142
  # Apply size constraints to identify potential significant annotations
143
- annotations_significance_matrix.loc[
143
+ annotation_significance_matrix.loc[
144
144
  (
145
- annotations_significance_matrix["significant_neighborhood_significance_sums"]
145
+ annotation_significance_matrix["significant_neighborhood_significance_sums"]
146
146
  >= min_cluster_size
147
147
  )
148
148
  & (
149
- annotations_significance_matrix["significant_neighborhood_significance_sums"]
149
+ annotation_significance_matrix["significant_neighborhood_significance_sums"]
150
150
  <= max_cluster_size
151
151
  ),
152
- "significant_annotations",
152
+ "significant_annotation",
153
153
  ] = True
154
154
  # Initialize columns for connected components analysis
155
- annotations_significance_matrix["num_connected_components"] = 0
156
- annotations_significance_matrix["size_connected_components"] = None
157
- annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
155
+ annotation_significance_matrix["num_connected_components"] = 0
156
+ annotation_significance_matrix["size_connected_components"] = None
157
+ annotation_significance_matrix["size_connected_components"] = annotation_significance_matrix[
158
158
  "size_connected_components"
159
159
  ].astype(object)
160
- annotations_significance_matrix["num_large_connected_components"] = 0
160
+ annotation_significance_matrix["num_large_connected_components"] = 0
161
161
 
162
- for attribute in annotations_significance_matrix.index.values[
163
- annotations_significance_matrix["significant_annotations"]
162
+ for attribute in annotation_significance_matrix.index.values[
163
+ annotation_significance_matrix["significant_annotation"]
164
164
  ]:
165
165
  # Identify significant neighborhoods based on the binary significance matrix
166
166
  significant_neighborhoods = list(
@@ -183,24 +183,24 @@ def define_top_annotations(
183
183
  num_large_connected_components = len(filtered_size_connected_components)
184
184
 
185
185
  # Assign the number of connected components
186
- annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
186
+ annotation_significance_matrix.loc[attribute, "num_connected_components"] = (
187
187
  num_connected_components
188
188
  )
189
189
  # Filter out attributes with more than one connected component
190
- annotations_significance_matrix.loc[
191
- annotations_significance_matrix["num_connected_components"] > 1,
192
- "significant_annotations",
190
+ annotation_significance_matrix.loc[
191
+ annotation_significance_matrix["num_connected_components"] > 1,
192
+ "significant_annotation",
193
193
  ] = False
194
194
  # Assign the number of large connected components
195
- annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
195
+ annotation_significance_matrix.loc[attribute, "num_large_connected_components"] = (
196
196
  num_large_connected_components
197
197
  )
198
198
  # Assign the size of connected components, ensuring it is always a list
199
- annotations_significance_matrix.at[attribute, "size_connected_components"] = (
199
+ annotation_significance_matrix.at[attribute, "size_connected_components"] = (
200
200
  filtered_size_connected_components.tolist()
201
201
  )
202
202
 
203
- return annotations_significance_matrix
203
+ return annotation_significance_matrix
204
204
 
205
205
 
206
206
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/annotations/io
3
- ~~~~~~~~~~~~~~~~~~~
2
+ risk/annotation/io
3
+ ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import json
@@ -9,48 +9,45 @@ from typing import Any, Dict
9
9
  import networkx as nx
10
10
  import pandas as pd
11
11
 
12
- from risk.annotations.annotations import load_annotations
13
- from risk.log import params, logger, log_header
12
+ from risk.annotation.annotation import load_annotation
13
+ from risk.log import log_header, logger, params
14
14
 
15
15
 
16
- class AnnotationsIO:
17
- """Handles the loading and exporting of annotations in various file formats.
16
+ class AnnotationIO:
17
+ """Handles the loading and exporting of annotation in various file formats.
18
18
 
19
- The AnnotationsIO class provides methods to load annotations from different file types (JSON, CSV, Excel, etc.)
19
+ The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
20
20
  and to export parameter data to various formats like JSON, CSV, and text files.
21
21
  """
22
22
 
23
- def __init__(self):
24
- pass
25
-
26
- def load_json_annotation(
23
+ def load_annotation_json(
27
24
  self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
25
  ) -> Dict[str, Any]:
29
- """Load annotations from a JSON file and convert them to a DataFrame.
26
+ """Load annotation from a JSON file and convert them to a DataFrame.
30
27
 
31
28
  Args:
32
- network (NetworkX graph): The network to which the annotations are related.
33
- filepath (str): Path to the JSON annotations file.
29
+ network (NetworkX graph): The network to which the annotation is related.
30
+ filepath (str): Path to the JSON annotation file.
34
31
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
35
32
  term to be included. Defaults to 2.
36
33
 
37
34
  Returns:
38
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
35
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
39
36
  """
40
37
  filetype = "JSON"
41
38
  # Log the loading of the JSON file
42
- params.log_annotations(
39
+ params.log_annotation(
43
40
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
41
  )
45
- _log_loading(filetype, filepath=filepath)
42
+ self._log_loading(filetype, filepath=filepath)
46
43
 
47
44
  # Load the JSON file into a dictionary
48
45
  with open(filepath, "r", encoding="utf-8") as file:
49
- annotations_input = json.load(file)
46
+ annotation_input = json.load(file)
50
47
 
51
- return load_annotations(network, annotations_input, min_nodes_per_term)
48
+ return load_annotation(network, annotation_input, min_nodes_per_term)
52
49
 
53
- def load_excel_annotation(
50
+ def load_annotation_excel(
54
51
  self,
55
52
  network: nx.Graph,
56
53
  filepath: str,
@@ -60,11 +57,11 @@ class AnnotationsIO:
60
57
  nodes_delimiter: str = ";",
61
58
  min_nodes_per_term: int = 2,
62
59
  ) -> Dict[str, Any]:
63
- """Load annotations from an Excel file and associate them with the network.
60
+ """Load annotation from an Excel file and associate them with the network.
64
61
 
65
62
  Args:
66
- network (nx.Graph): The NetworkX graph to which the annotations are related.
67
- filepath (str): Path to the Excel annotations file.
63
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
64
+ filepath (str): Path to the Excel annotation file.
68
65
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
69
66
  nodes_colname (str): Name of the column containing the nodes associated with each label.
70
67
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
@@ -78,10 +75,10 @@ class AnnotationsIO:
78
75
  """
79
76
  filetype = "Excel"
80
77
  # Log the loading of the Excel file
81
- params.log_annotations(
78
+ params.log_annotation(
82
79
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
80
  )
84
- _log_loading(filetype, filepath=filepath)
81
+ self._log_loading(filetype, filepath=filepath)
85
82
 
86
83
  # Load the specified sheet from the Excel file
87
84
  annotation = pd.read_excel(filepath, sheet_name=sheet_name)
@@ -90,11 +87,11 @@ class AnnotationsIO:
90
87
  lambda x: x.split(nodes_delimiter)
91
88
  )
92
89
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
93
- annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
90
+ annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
94
91
 
95
- return load_annotations(network, annotations_input, min_nodes_per_term)
92
+ return load_annotation(network, annotation_input, min_nodes_per_term)
96
93
 
97
- def load_csv_annotation(
94
+ def load_annotation_csv(
98
95
  self,
99
96
  network: nx.Graph,
100
97
  filepath: str,
@@ -103,11 +100,11 @@ class AnnotationsIO:
103
100
  nodes_delimiter: str = ";",
104
101
  min_nodes_per_term: int = 2,
105
102
  ) -> Dict[str, Any]:
106
- """Load annotations from a CSV file and associate them with the network.
103
+ """Load annotation from a CSV file and associate them with the network.
107
104
 
108
105
  Args:
109
- network (nx.Graph): The NetworkX graph to which the annotations are related.
110
- filepath (str): Path to the CSV annotations file.
106
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
107
+ filepath (str): Path to the CSV annotation file.
111
108
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
112
109
  nodes_colname (str): Name of the column containing the nodes associated with each label.
113
110
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -120,19 +117,19 @@ class AnnotationsIO:
120
117
  """
121
118
  filetype = "CSV"
122
119
  # Log the loading of the CSV file
123
- params.log_annotations(
120
+ params.log_annotation(
124
121
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
122
  )
126
- _log_loading(filetype, filepath=filepath)
123
+ self._log_loading(filetype, filepath=filepath)
127
124
 
128
125
  # Load the CSV file into a dictionary
129
- annotations_input = _load_matrix_file(
126
+ annotation_input = self._load_matrix_file(
130
127
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
131
128
  )
132
129
 
133
- return load_annotations(network, annotations_input, min_nodes_per_term)
130
+ return load_annotation(network, annotation_input, min_nodes_per_term)
134
131
 
135
- def load_tsv_annotation(
132
+ def load_annotation_tsv(
136
133
  self,
137
134
  network: nx.Graph,
138
135
  filepath: str,
@@ -141,11 +138,11 @@ class AnnotationsIO:
141
138
  nodes_delimiter: str = ";",
142
139
  min_nodes_per_term: int = 2,
143
140
  ) -> Dict[str, Any]:
144
- """Load annotations from a TSV file and associate them with the network.
141
+ """Load annotation from a TSV file and associate them with the network.
145
142
 
146
143
  Args:
147
- network (nx.Graph): The NetworkX graph to which the annotations are related.
148
- filepath (str): Path to the TSV annotations file.
144
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
145
+ filepath (str): Path to the TSV annotation file.
149
146
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
150
147
  nodes_colname (str): Name of the column containing the nodes associated with each label.
151
148
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -158,31 +155,34 @@ class AnnotationsIO:
158
155
  """
159
156
  filetype = "TSV"
160
157
  # Log the loading of the TSV file
161
- params.log_annotations(
158
+ params.log_annotation(
162
159
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
160
  )
164
- _log_loading(filetype, filepath=filepath)
161
+ self._log_loading(filetype, filepath=filepath)
165
162
 
166
163
  # Load the TSV file into a dictionary
167
- annotations_input = _load_matrix_file(
164
+ annotation_input = self._load_matrix_file(
168
165
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
169
166
  )
170
167
 
171
- return load_annotations(network, annotations_input, min_nodes_per_term)
168
+ return load_annotation(network, annotation_input, min_nodes_per_term)
172
169
 
173
- def load_dict_annotation(
170
+ def load_annotation_dict(
174
171
  self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
175
172
  ) -> Dict[str, Any]:
176
- """Load annotations from a provided dictionary and convert them to a dictionary annotation.
173
+ """Load annotation from a provided dictionary and convert them to a dictionary annotation.
177
174
 
178
175
  Args:
179
- network (NetworkX graph): The network to which the annotations are related.
180
- content (Dict[str, Any]): The annotations dictionary to load.
176
+ network (NetworkX graph): The network to which the annotation is related.
177
+ content (Dict[str, Any]): The annotation dictionary to load.
181
178
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
182
179
  term to be included. Defaults to 2.
183
180
 
184
181
  Returns:
185
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
182
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
183
+
184
+ Raises:
185
+ TypeError: If the content is not a dictionary.
186
186
  """
187
187
  # Ensure the input content is a dictionary
188
188
  if not isinstance(content, dict):
@@ -191,50 +191,51 @@ class AnnotationsIO:
191
191
  )
192
192
 
193
193
  filetype = "Dictionary"
194
- # Log the loading of the annotations from the dictionary
195
- params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
196
- _log_loading(filetype, "In-memory dictionary")
197
-
198
- # Load the annotations as a dictionary from the provided dictionary
199
- return load_annotations(network, content, min_nodes_per_term)
200
-
201
-
202
- def _load_matrix_file(
203
- filepath: str,
204
- label_colname: str,
205
- nodes_colname: str,
206
- delimiter: str = ",",
207
- nodes_delimiter: str = ";",
208
- ) -> Dict[str, Any]:
209
- """Load annotations from a CSV or TSV file and convert them to a dictionary.
210
-
211
- Args:
212
- filepath (str): Path to the annotation file.
213
- label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
- nodes_colname (str): Name of the column containing the nodes associated with each label.
215
- delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
- nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
-
218
- Returns:
219
- Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
- """
221
- # Load the CSV or TSV file into a DataFrame
222
- annotation = pd.read_csv(filepath, delimiter=delimiter)
223
- # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
- annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
225
- # Create a dictionary pairing labels with their corresponding list of nodes
226
- label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
227
- return label_node_dict
194
+ # Log the loading of the annotation from the dictionary
195
+ params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
196
+ self._log_loading(filetype, "In-memory dictionary")
228
197
 
198
+ # Load the annotation as a dictionary from the provided dictionary
199
+ return load_annotation(network, content, min_nodes_per_term)
229
200
 
230
- def _log_loading(filetype: str, filepath: str = "") -> None:
231
- """Log information about the network file being loaded.
201
+ def _load_matrix_file(
202
+ self,
203
+ filepath: str,
204
+ label_colname: str,
205
+ nodes_colname: str,
206
+ delimiter: str = ",",
207
+ nodes_delimiter: str = ";",
208
+ ) -> Dict[str, Any]:
209
+ """Load annotation from a CSV or TSV file and convert them to a dictionary.
232
210
 
233
- Args:
234
- filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
235
- filepath (str, optional): The path to the file being loaded.
236
- """
237
- log_header("Loading annotations")
238
- logger.debug(f"Filetype: {filetype}")
239
- if filepath:
240
- logger.debug(f"Filepath: {filepath}")
211
+ Args:
212
+ filepath (str): Path to the annotation file.
213
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
215
+ delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
+
218
+ Returns:
219
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
+ """
221
+ # Load the CSV or TSV file into a DataFrame
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
223
+ # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
225
+ lambda x: x.split(nodes_delimiter)
226
+ )
227
+ # Create a dictionary pairing labels with their corresponding list of nodes
228
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
229
+ return label_node_dict
230
+
231
+ def _log_loading(self, filetype: str, filepath: str = "") -> None:
232
+ """Log information about the network file being loaded.
233
+
234
+ Args:
235
+ filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
236
+ filepath (str, optional): The path to the file being loaded.
237
+ """
238
+ log_header("Loading annotation")
239
+ logger.debug(f"Filetype: {filetype}")
240
+ if filepath:
241
+ logger.debug(f"Filepath: {filepath}")
@@ -1,19 +1,20 @@
1
1
  """
2
- risk/annotations/nltk_setup
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/annotation/nltk_setup
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import os
7
7
  import zipfile
8
- from typing import List, Tuple
8
+ from typing import List, Optional, Tuple
9
9
 
10
10
  import nltk
11
- from nltk.data import find, path as nltk_data_path
11
+ from nltk.data import find
12
+ from nltk.data import path as nltk_data_path
12
13
 
13
14
  from risk.log import logger
14
15
 
15
16
 
16
- def setup_nltk_resources(required_resources: List[Tuple[str, str]] = None) -> None:
17
+ def setup_nltk_resources(required_resources: Optional[List[Tuple[str, str]]] = None) -> None:
17
18
  """Ensures all required NLTK resources are available and properly extracted.
18
19
  Uses NLTK's default paths and mechanisms.
19
20
 
risk/log/__init__.py CHANGED
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from risk.log.console import logger, log_header, set_global_verbosity
6
+ from risk.log.console import log_header, logger, set_global_verbosity
7
7
  from risk.log.parameters import Params
8
8
 
9
9
  # Initialize the global parameters logger
risk/log/parameters.py CHANGED
@@ -11,7 +11,7 @@ from typing import Any, Dict
11
11
 
12
12
  import numpy as np
13
13
 
14
- from risk.log.console import logger, log_header
14
+ from risk.log.console import log_header, logger
15
15
 
16
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
17
17
  warnings.filterwarnings("ignore")
@@ -21,7 +21,7 @@ class Params:
21
21
  """Handles the storage and logging of various parameters for network analysis.
22
22
 
23
23
  The Params class provides methods to log parameters related to different components of the analysis,
24
- such as the network, annotations, neighborhoods, graph, and plotter settings. It also stores
24
+ such as the network, annotation, neighborhoods, graph, and plotter settings. It also stores
25
25
  the current datetime when the parameters were initialized.
26
26
  """
27
27
 
@@ -33,7 +33,7 @@ class Params:
33
33
  def initialize(self) -> None:
34
34
  """Initialize the parameter dictionaries for different components."""
35
35
  self.network = {}
36
- self.annotations = {}
36
+ self.annotation = {}
37
37
  self.neighborhoods = {}
38
38
  self.graph = {}
39
39
  self.plotter = {}
@@ -46,13 +46,13 @@ class Params:
46
46
  """
47
47
  self.network = {**self.network, **kwargs}
48
48
 
49
- def log_annotations(self, **kwargs) -> None:
49
+ def log_annotation(self, **kwargs) -> None:
50
50
  """Log annotation-related parameters.
51
51
 
52
52
  Args:
53
53
  **kwargs: Annotation parameters to log.
54
54
  """
55
- self.annotations = {**self.annotations, **kwargs}
55
+ self.annotation = {**self.annotation, **kwargs}
56
56
 
57
57
  def log_neighborhoods(self, **kwargs) -> None:
58
58
  """Log neighborhood-related parameters.
@@ -137,9 +137,9 @@ class Params:
137
137
  Dict[str, Any]: A dictionary containing the processed parameters.
138
138
  """
139
139
  log_header("Loading parameters")
140
- return _convert_ndarray_to_list(
140
+ return self._convert_ndarray_to_list(
141
141
  {
142
- "annotations": self.annotations,
142
+ "annotation": self.annotation,
143
143
  "datetime": self.datetime,
144
144
  "graph": self.graph,
145
145
  "neighborhoods": self.neighborhoods,
@@ -148,25 +148,24 @@ class Params:
148
148
  }
149
149
  )
150
150
 
151
+ def _convert_ndarray_to_list(self, d: Dict[str, Any]) -> Dict[str, Any]:
152
+ """Recursively convert all np.ndarray values in the dictionary to lists.
151
153
 
152
- def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
153
- """Recursively convert all np.ndarray values in the dictionary to lists.
154
-
155
- Args:
156
- d (Dict[str, Any]): The dictionary to process.
154
+ Args:
155
+ d (Dict[str, Any]): The dictionary to process.
157
156
 
158
- Returns:
159
- Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
160
- """
161
- if isinstance(d, dict):
162
- # Recursively process each value in the dictionary
163
- return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
164
- if isinstance(d, list):
165
- # Recursively process each item in the list
166
- return [_convert_ndarray_to_list(v) for v in d]
167
- if isinstance(d, np.ndarray):
168
- # Convert numpy arrays to lists
169
- return d.tolist()
170
-
171
- # Return the value unchanged if it's not a dict, List, or ndarray
172
- return d
157
+ Returns:
158
+ Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
159
+ """
160
+ if isinstance(d, dict):
161
+ # Recursively process each value in the dictionary
162
+ return {k: self._convert_ndarray_to_list(v) for k, v in d.items()}
163
+ if isinstance(d, list):
164
+ # Recursively process each item in the list
165
+ return [self._convert_ndarray_to_list(v) for v in d]
166
+ if isinstance(d, np.ndarray):
167
+ # Convert numpy arrays to lists
168
+ return d.tolist()
169
+
170
+ # Return the value unchanged if it's not a dict, List, or ndarray
171
+ return d