risk-network 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotation/__init__.py +10 -0
  3. risk/{annotations/annotations.py → annotation/annotation.py} +62 -102
  4. risk/{annotations → annotation}/io.py +93 -92
  5. risk/annotation/nltk_setup.py +86 -0
  6. risk/log/__init__.py +1 -1
  7. risk/log/parameters.py +26 -27
  8. risk/neighborhoods/__init__.py +0 -1
  9. risk/neighborhoods/api.py +38 -38
  10. risk/neighborhoods/community.py +33 -4
  11. risk/neighborhoods/domains.py +26 -28
  12. risk/neighborhoods/neighborhoods.py +8 -2
  13. risk/neighborhoods/stats/__init__.py +13 -0
  14. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  15. risk/{stats → neighborhoods/stats}/permutation/permutation.py +24 -21
  16. risk/{stats → neighborhoods/stats}/permutation/test_functions.py +5 -4
  17. risk/{stats/stat_tests.py → neighborhoods/stats/tests.py} +62 -54
  18. risk/network/__init__.py +0 -2
  19. risk/network/graph/__init__.py +0 -2
  20. risk/network/graph/api.py +19 -19
  21. risk/network/graph/graph.py +73 -68
  22. risk/{stats/significance.py → network/graph/stats.py} +2 -2
  23. risk/network/graph/summary.py +12 -13
  24. risk/network/io.py +163 -20
  25. risk/network/plotter/__init__.py +0 -2
  26. risk/network/plotter/api.py +1 -1
  27. risk/network/plotter/canvas.py +36 -36
  28. risk/network/plotter/contour.py +14 -15
  29. risk/network/plotter/labels.py +303 -294
  30. risk/network/plotter/network.py +6 -6
  31. risk/network/plotter/plotter.py +8 -10
  32. risk/network/plotter/utils/colors.py +15 -8
  33. risk/network/plotter/utils/layout.py +3 -3
  34. risk/risk.py +6 -7
  35. risk_network-0.0.12.dist-info/METADATA +122 -0
  36. risk_network-0.0.12.dist-info/RECORD +40 -0
  37. {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/WHEEL +1 -1
  38. risk/annotations/__init__.py +0 -7
  39. risk/network/geometry.py +0 -150
  40. risk/stats/__init__.py +0 -15
  41. risk/stats/permutation/__init__.py +0 -6
  42. risk_network-0.0.10.dist-info/METADATA +0 -798
  43. risk_network-0.0.10.dist-info/RECORD +0 -40
  44. {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info/licenses}/LICENSE +0 -0
  45. {risk_network-0.0.10.dist-info → risk_network-0.0.12.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: Regional Inference of Significant Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.10"
10
+ __version__ = "0.0.12"
@@ -0,0 +1,10 @@
1
+ """
2
+ risk/annotation
3
+ ~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from risk.annotation.annotation import (
7
+ define_top_annotation,
8
+ get_weighted_description,
9
+ )
10
+ from risk.annotation.io import AnnotationIO
@@ -1,88 +1,48 @@
1
1
  """
2
- risk/annotations/annotations
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/annotation/annotation
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- import os
7
6
  import re
8
- import zipfile
9
7
  from collections import Counter
10
8
  from itertools import compress
11
9
  from typing import Any, Dict, List, Set
12
10
 
13
11
  import networkx as nx
14
- import nltk
15
12
  import numpy as np
16
13
  import pandas as pd
17
- from nltk.corpus import stopwords
18
- from nltk.stem import WordNetLemmatizer
19
14
  from nltk.tokenize import word_tokenize
15
+ from scipy.sparse import coo_matrix
20
16
 
17
+ from risk.annotation.nltk_setup import setup_nltk_resources
21
18
  from risk.log import logger
22
- from scipy.sparse import coo_matrix
23
19
 
24
20
 
25
- def ensure_nltk_resource(resource: str) -> None:
26
- """Ensure the specified NLTK resource is available."""
27
- # Define the path to the resource within the NLTK data directory
28
- resource_path = f"corpora/{resource}"
29
- # Check if the resource is already available.
30
- try:
31
- nltk.data.find(resource_path)
32
- return
33
- except LookupError:
34
- print(f"Resource '{resource}' not found. Attempting to download...")
35
-
36
- # Download the resource.
37
- nltk.download(resource)
38
- # Check again after downloading.
39
- try:
40
- nltk.data.find(resource_path)
41
- return
42
- except LookupError:
43
- print(f"Resource '{resource}' still not found after download. Checking for a ZIP file...")
44
-
45
- # Look for a ZIP file in all known NLTK data directories.
46
- for data_path in nltk.data.path:
47
- zip_path = os.path.join(data_path, "corpora", f"{resource}.zip")
48
- if os.path.isfile(zip_path):
49
- print(f"Found ZIP file for '{resource}' at: {zip_path}")
50
- target_dir = os.path.join(data_path, "corpora")
51
- with zipfile.ZipFile(zip_path, "r") as z:
52
- z.extractall(path=target_dir)
53
- print(f"Unzipped '{resource}' successfully.")
54
- break # Stop after unzipping the first found ZIP.
55
-
56
- # Final check: Try to check resource one last time. If it fails, rai
57
- try:
58
- nltk.data.find(resource_path)
59
- print(f"Resource '{resource}' is now available.")
60
- except LookupError:
61
- raise LookupError(f"Resource '{resource}' could not be found, downloaded, or unzipped.")
62
-
63
-
64
- # Ensure the NLTK stopwords and WordNet resources are available
65
- # punkt is known to have issues with the default download method, so we use a custom function if it fails
66
- try:
67
- ensure_nltk_resource("punkt")
68
- except LookupError:
69
- nltk.download("punkt")
70
- ensure_nltk_resource("stopwords")
71
- ensure_nltk_resource("wordnet")
72
- # Use NLTK's stopwords - load all languages
73
- STOP_WORDS = set(word for lang in stopwords.fileids() for word in stopwords.words(lang))
74
- # Initialize the WordNet lemmatizer, which is used for normalizing words
75
- LEMMATIZER = WordNetLemmatizer()
76
-
77
-
78
- def load_annotations(
79
- network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
21
+ def initialize_nltk():
22
+ """Initialize all required NLTK components."""
23
+ setup_nltk_resources()
24
+
25
+ # After resources are available, initialize the components
26
+ from nltk.corpus import stopwords
27
+ from nltk.stem import WordNetLemmatizer
28
+
29
+ global STOP_WORDS, LEMMATIZER
30
+ STOP_WORDS = set(stopwords.words("english"))
31
+ LEMMATIZER = WordNetLemmatizer()
32
+
33
+
34
+ # Initialize NLTK components
35
+ initialize_nltk()
36
+
37
+
38
+ def load_annotation(
39
+ network: nx.Graph, annotation_input: Dict[str, Any], min_nodes_per_term: int = 2
80
40
  ) -> Dict[str, Any]:
81
- """Convert annotations input to a sparse matrix and reindex based on the network's node labels.
41
+ """Convert annotation input to a sparse matrix and reindex based on the network's node labels.
82
42
 
83
43
  Args:
84
44
  network (nx.Graph): The network graph.
85
- annotations_input (Dict[str, Any]): A dictionary with annotations.
45
+ annotation_input (Dict[str, Any]): An annotation dictionary.
86
46
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
87
47
  term to be included. Defaults to 2.
88
48
 
@@ -91,18 +51,18 @@ def load_annotations(
91
51
  matrix.
92
52
 
93
53
  Raises:
94
- ValueError: If no annotations are found for the nodes in the network.
95
- ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
54
+ ValueError: If no annotation is found for the nodes in the network.
55
+ ValueError: If no annotation has at least min_nodes_per_term nodes in the network.
96
56
  """
97
57
  # Step 1: Map nodes and annotations to indices
98
58
  node_label_order = [attr["label"] for _, attr in network.nodes(data=True) if "label" in attr]
99
59
  node_to_idx = {node: i for i, node in enumerate(node_label_order)}
100
- annotation_to_idx = {annotation: i for i, annotation in enumerate(annotations_input)}
60
+ annotation_to_idx = {annotation: i for i, annotation in enumerate(annotation_input)}
101
61
  # Step 2: Construct a sparse binary matrix directly
102
62
  row = []
103
63
  col = []
104
64
  data = []
105
- for annotation, nodes in annotations_input.items():
65
+ for annotation, nodes in annotation_input.items():
106
66
  for node in nodes:
107
67
  if node in node_to_idx and annotation in annotation_to_idx:
108
68
  row.append(node_to_idx[node])
@@ -111,40 +71,40 @@ def load_annotations(
111
71
 
112
72
  # Create a sparse binary matrix
113
73
  num_nodes = len(node_to_idx)
114
- num_annotations = len(annotation_to_idx)
115
- annotations_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotations)).tocsr()
74
+ num_annotation = len(annotation_to_idx)
75
+ annotation_pivot = coo_matrix((data, (row, col)), shape=(num_nodes, num_annotation)).tocsr()
116
76
  # Step 3: Filter out annotations with fewer than min_nodes_per_term occurrences
117
- valid_annotations = annotations_pivot.sum(axis=0).A1 >= min_nodes_per_term
118
- annotations_pivot = annotations_pivot[:, valid_annotations]
77
+ valid_annotation = annotation_pivot.sum(axis=0).A1 >= min_nodes_per_term
78
+ annotation_pivot = annotation_pivot[:, valid_annotation]
119
79
  # Step 4: Raise errors for empty matrices
120
- if annotations_pivot.nnz == 0:
80
+ if annotation_pivot.nnz == 0:
121
81
  raise ValueError("No terms found in the annotation file for the nodes in the network.")
122
82
 
123
- num_remaining_annotations = annotations_pivot.shape[1]
124
- if num_remaining_annotations == 0:
83
+ num_remaining_annotation = annotation_pivot.shape[1]
84
+ if num_remaining_annotation == 0:
125
85
  raise ValueError(
126
86
  f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
127
87
  )
128
88
 
129
89
  # Step 5: Extract ordered nodes and annotations
130
90
  ordered_nodes = tuple(node_label_order)
131
- ordered_annotations = tuple(
132
- annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotations) if is_valid
91
+ ordered_annotation = tuple(
92
+ annotation for annotation, is_valid in zip(annotation_to_idx, valid_annotation) if is_valid
133
93
  )
134
94
 
135
95
  # Log the filtering details
136
96
  logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
137
- logger.info(f"Number of input annotation terms: {num_annotations}")
138
- logger.info(f"Number of remaining annotation terms: {num_remaining_annotations}")
97
+ logger.info(f"Number of input annotation terms: {num_annotation}")
98
+ logger.info(f"Number of remaining annotation terms: {num_remaining_annotation}")
139
99
 
140
100
  return {
141
101
  "ordered_nodes": ordered_nodes,
142
- "ordered_annotations": ordered_annotations,
143
- "matrix": annotations_pivot,
102
+ "ordered_annotation": ordered_annotation,
103
+ "matrix": annotation_pivot,
144
104
  }
145
105
 
146
106
 
147
- def define_top_annotations(
107
+ def define_top_annotation(
148
108
  network: nx.Graph,
149
109
  ordered_annotation_labels: List[str],
150
110
  neighborhood_significance_sums: List[int],
@@ -170,7 +130,7 @@ def define_top_annotations(
170
130
  # Sum the columns of the significant significance matrix (positive floating point values)
171
131
  significant_significance_scores = significant_significance_matrix.sum(axis=0)
172
132
  # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
173
- annotations_significance_matrix = pd.DataFrame(
133
+ annotation_significance_matrix = pd.DataFrame(
174
134
  {
175
135
  "id": range(len(ordered_annotation_labels)),
176
136
  "full_terms": ordered_annotation_labels,
@@ -178,29 +138,29 @@ def define_top_annotations(
178
138
  "significant_significance_score": significant_significance_scores,
179
139
  }
180
140
  )
181
- annotations_significance_matrix["significant_annotations"] = False
141
+ annotation_significance_matrix["significant_annotation"] = False
182
142
  # Apply size constraints to identify potential significant annotations
183
- annotations_significance_matrix.loc[
143
+ annotation_significance_matrix.loc[
184
144
  (
185
- annotations_significance_matrix["significant_neighborhood_significance_sums"]
145
+ annotation_significance_matrix["significant_neighborhood_significance_sums"]
186
146
  >= min_cluster_size
187
147
  )
188
148
  & (
189
- annotations_significance_matrix["significant_neighborhood_significance_sums"]
149
+ annotation_significance_matrix["significant_neighborhood_significance_sums"]
190
150
  <= max_cluster_size
191
151
  ),
192
- "significant_annotations",
152
+ "significant_annotation",
193
153
  ] = True
194
154
  # Initialize columns for connected components analysis
195
- annotations_significance_matrix["num_connected_components"] = 0
196
- annotations_significance_matrix["size_connected_components"] = None
197
- annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
155
+ annotation_significance_matrix["num_connected_components"] = 0
156
+ annotation_significance_matrix["size_connected_components"] = None
157
+ annotation_significance_matrix["size_connected_components"] = annotation_significance_matrix[
198
158
  "size_connected_components"
199
159
  ].astype(object)
200
- annotations_significance_matrix["num_large_connected_components"] = 0
160
+ annotation_significance_matrix["num_large_connected_components"] = 0
201
161
 
202
- for attribute in annotations_significance_matrix.index.values[
203
- annotations_significance_matrix["significant_annotations"]
162
+ for attribute in annotation_significance_matrix.index.values[
163
+ annotation_significance_matrix["significant_annotation"]
204
164
  ]:
205
165
  # Identify significant neighborhoods based on the binary significance matrix
206
166
  significant_neighborhoods = list(
@@ -223,24 +183,24 @@ def define_top_annotations(
223
183
  num_large_connected_components = len(filtered_size_connected_components)
224
184
 
225
185
  # Assign the number of connected components
226
- annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
186
+ annotation_significance_matrix.loc[attribute, "num_connected_components"] = (
227
187
  num_connected_components
228
188
  )
229
189
  # Filter out attributes with more than one connected component
230
- annotations_significance_matrix.loc[
231
- annotations_significance_matrix["num_connected_components"] > 1,
232
- "significant_annotations",
190
+ annotation_significance_matrix.loc[
191
+ annotation_significance_matrix["num_connected_components"] > 1,
192
+ "significant_annotation",
233
193
  ] = False
234
194
  # Assign the number of large connected components
235
- annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
195
+ annotation_significance_matrix.loc[attribute, "num_large_connected_components"] = (
236
196
  num_large_connected_components
237
197
  )
238
198
  # Assign the size of connected components, ensuring it is always a list
239
- annotations_significance_matrix.at[attribute, "size_connected_components"] = (
199
+ annotation_significance_matrix.at[attribute, "size_connected_components"] = (
240
200
  filtered_size_connected_components.tolist()
241
201
  )
242
202
 
243
- return annotations_significance_matrix
203
+ return annotation_significance_matrix
244
204
 
245
205
 
246
206
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/annotations/io
3
- ~~~~~~~~~~~~~~~~~~~
2
+ risk/annotation/io
3
+ ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import json
@@ -9,48 +9,45 @@ from typing import Any, Dict
9
9
  import networkx as nx
10
10
  import pandas as pd
11
11
 
12
- from risk.annotations.annotations import load_annotations
13
- from risk.log import params, logger, log_header
12
+ from risk.annotation.annotation import load_annotation
13
+ from risk.log import log_header, logger, params
14
14
 
15
15
 
16
- class AnnotationsIO:
17
- """Handles the loading and exporting of annotations in various file formats.
16
+ class AnnotationIO:
17
+ """Handles the loading and exporting of annotation in various file formats.
18
18
 
19
- The AnnotationsIO class provides methods to load annotations from different file types (JSON, CSV, Excel, etc.)
19
+ The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
20
20
  and to export parameter data to various formats like JSON, CSV, and text files.
21
21
  """
22
22
 
23
- def __init__(self):
24
- pass
25
-
26
- def load_json_annotation(
23
+ def load_annotation_json(
27
24
  self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
25
  ) -> Dict[str, Any]:
29
- """Load annotations from a JSON file and convert them to a DataFrame.
26
+ """Load annotation from a JSON file and convert them to a DataFrame.
30
27
 
31
28
  Args:
32
- network (NetworkX graph): The network to which the annotations are related.
33
- filepath (str): Path to the JSON annotations file.
29
+ network (NetworkX graph): The network to which the annotation is related.
30
+ filepath (str): Path to the JSON annotation file.
34
31
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
35
32
  term to be included. Defaults to 2.
36
33
 
37
34
  Returns:
38
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
35
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
39
36
  """
40
37
  filetype = "JSON"
41
38
  # Log the loading of the JSON file
42
- params.log_annotations(
39
+ params.log_annotation(
43
40
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
41
  )
45
- _log_loading(filetype, filepath=filepath)
42
+ self._log_loading(filetype, filepath=filepath)
46
43
 
47
44
  # Load the JSON file into a dictionary
48
45
  with open(filepath, "r", encoding="utf-8") as file:
49
- annotations_input = json.load(file)
46
+ annotation_input = json.load(file)
50
47
 
51
- return load_annotations(network, annotations_input, min_nodes_per_term)
48
+ return load_annotation(network, annotation_input, min_nodes_per_term)
52
49
 
53
- def load_excel_annotation(
50
+ def load_annotation_excel(
54
51
  self,
55
52
  network: nx.Graph,
56
53
  filepath: str,
@@ -60,11 +57,11 @@ class AnnotationsIO:
60
57
  nodes_delimiter: str = ";",
61
58
  min_nodes_per_term: int = 2,
62
59
  ) -> Dict[str, Any]:
63
- """Load annotations from an Excel file and associate them with the network.
60
+ """Load annotation from an Excel file and associate them with the network.
64
61
 
65
62
  Args:
66
- network (nx.Graph): The NetworkX graph to which the annotations are related.
67
- filepath (str): Path to the Excel annotations file.
63
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
64
+ filepath (str): Path to the Excel annotation file.
68
65
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
69
66
  nodes_colname (str): Name of the column containing the nodes associated with each label.
70
67
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
@@ -78,10 +75,10 @@ class AnnotationsIO:
78
75
  """
79
76
  filetype = "Excel"
80
77
  # Log the loading of the Excel file
81
- params.log_annotations(
78
+ params.log_annotation(
82
79
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
80
  )
84
- _log_loading(filetype, filepath=filepath)
81
+ self._log_loading(filetype, filepath=filepath)
85
82
 
86
83
  # Load the specified sheet from the Excel file
87
84
  annotation = pd.read_excel(filepath, sheet_name=sheet_name)
@@ -90,11 +87,11 @@ class AnnotationsIO:
90
87
  lambda x: x.split(nodes_delimiter)
91
88
  )
92
89
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
93
- annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
90
+ annotation_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
94
91
 
95
- return load_annotations(network, annotations_input, min_nodes_per_term)
92
+ return load_annotation(network, annotation_input, min_nodes_per_term)
96
93
 
97
- def load_csv_annotation(
94
+ def load_annotation_csv(
98
95
  self,
99
96
  network: nx.Graph,
100
97
  filepath: str,
@@ -103,11 +100,11 @@ class AnnotationsIO:
103
100
  nodes_delimiter: str = ";",
104
101
  min_nodes_per_term: int = 2,
105
102
  ) -> Dict[str, Any]:
106
- """Load annotations from a CSV file and associate them with the network.
103
+ """Load annotation from a CSV file and associate them with the network.
107
104
 
108
105
  Args:
109
- network (nx.Graph): The NetworkX graph to which the annotations are related.
110
- filepath (str): Path to the CSV annotations file.
106
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
107
+ filepath (str): Path to the CSV annotation file.
111
108
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
112
109
  nodes_colname (str): Name of the column containing the nodes associated with each label.
113
110
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -120,19 +117,19 @@ class AnnotationsIO:
120
117
  """
121
118
  filetype = "CSV"
122
119
  # Log the loading of the CSV file
123
- params.log_annotations(
120
+ params.log_annotation(
124
121
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
122
  )
126
- _log_loading(filetype, filepath=filepath)
123
+ self._log_loading(filetype, filepath=filepath)
127
124
 
128
125
  # Load the CSV file into a dictionary
129
- annotations_input = _load_matrix_file(
126
+ annotation_input = self._load_matrix_file(
130
127
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
131
128
  )
132
129
 
133
- return load_annotations(network, annotations_input, min_nodes_per_term)
130
+ return load_annotation(network, annotation_input, min_nodes_per_term)
134
131
 
135
- def load_tsv_annotation(
132
+ def load_annotation_tsv(
136
133
  self,
137
134
  network: nx.Graph,
138
135
  filepath: str,
@@ -141,11 +138,11 @@ class AnnotationsIO:
141
138
  nodes_delimiter: str = ";",
142
139
  min_nodes_per_term: int = 2,
143
140
  ) -> Dict[str, Any]:
144
- """Load annotations from a TSV file and associate them with the network.
141
+ """Load annotation from a TSV file and associate them with the network.
145
142
 
146
143
  Args:
147
- network (nx.Graph): The NetworkX graph to which the annotations are related.
148
- filepath (str): Path to the TSV annotations file.
144
+ network (nx.Graph): The NetworkX graph to which the annotation is related.
145
+ filepath (str): Path to the TSV annotation file.
149
146
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
150
147
  nodes_colname (str): Name of the column containing the nodes associated with each label.
151
148
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
@@ -158,31 +155,34 @@ class AnnotationsIO:
158
155
  """
159
156
  filetype = "TSV"
160
157
  # Log the loading of the TSV file
161
- params.log_annotations(
158
+ params.log_annotation(
162
159
  filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
160
  )
164
- _log_loading(filetype, filepath=filepath)
161
+ self._log_loading(filetype, filepath=filepath)
165
162
 
166
163
  # Load the TSV file into a dictionary
167
- annotations_input = _load_matrix_file(
164
+ annotation_input = self._load_matrix_file(
168
165
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
169
166
  )
170
167
 
171
- return load_annotations(network, annotations_input, min_nodes_per_term)
168
+ return load_annotation(network, annotation_input, min_nodes_per_term)
172
169
 
173
- def load_dict_annotation(
170
+ def load_annotation_dict(
174
171
  self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
175
172
  ) -> Dict[str, Any]:
176
- """Load annotations from a provided dictionary and convert them to a dictionary annotation.
173
+ """Load annotation from a provided dictionary and convert them to a dictionary annotation.
177
174
 
178
175
  Args:
179
- network (NetworkX graph): The network to which the annotations are related.
180
- content (Dict[str, Any]): The annotations dictionary to load.
176
+ network (NetworkX graph): The network to which the annotation is related.
177
+ content (Dict[str, Any]): The annotation dictionary to load.
181
178
  min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
182
179
  term to be included. Defaults to 2.
183
180
 
184
181
  Returns:
185
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
182
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotation matrix.
183
+
184
+ Raises:
185
+ TypeError: If the content is not a dictionary.
186
186
  """
187
187
  # Ensure the input content is a dictionary
188
188
  if not isinstance(content, dict):
@@ -191,50 +191,51 @@ class AnnotationsIO:
191
191
  )
192
192
 
193
193
  filetype = "Dictionary"
194
- # Log the loading of the annotations from the dictionary
195
- params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
196
- _log_loading(filetype, "In-memory dictionary")
197
-
198
- # Load the annotations as a dictionary from the provided dictionary
199
- return load_annotations(network, content, min_nodes_per_term)
200
-
201
-
202
- def _load_matrix_file(
203
- filepath: str,
204
- label_colname: str,
205
- nodes_colname: str,
206
- delimiter: str = ",",
207
- nodes_delimiter: str = ";",
208
- ) -> Dict[str, Any]:
209
- """Load annotations from a CSV or TSV file and convert them to a dictionary.
210
-
211
- Args:
212
- filepath (str): Path to the annotation file.
213
- label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
- nodes_colname (str): Name of the column containing the nodes associated with each label.
215
- delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
- nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
-
218
- Returns:
219
- Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
- """
221
- # Load the CSV or TSV file into a DataFrame
222
- annotation = pd.read_csv(filepath, delimiter=delimiter)
223
- # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
- annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
225
- # Create a dictionary pairing labels with their corresponding list of nodes
226
- label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
227
- return label_node_dict
194
+ # Log the loading of the annotation from the dictionary
195
+ params.log_annotation(filepath="In-memory dictionary", filetype=filetype)
196
+ self._log_loading(filetype, "In-memory dictionary")
228
197
 
198
+ # Load the annotation as a dictionary from the provided dictionary
199
+ return load_annotation(network, content, min_nodes_per_term)
229
200
 
230
- def _log_loading(filetype: str, filepath: str = "") -> None:
231
- """Log information about the network file being loaded.
201
+ def _load_matrix_file(
202
+ self,
203
+ filepath: str,
204
+ label_colname: str,
205
+ nodes_colname: str,
206
+ delimiter: str = ",",
207
+ nodes_delimiter: str = ";",
208
+ ) -> Dict[str, Any]:
209
+ """Load annotation from a CSV or TSV file and convert them to a dictionary.
232
210
 
233
- Args:
234
- filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
235
- filepath (str, optional): The path to the file being loaded.
236
- """
237
- log_header("Loading annotations")
238
- logger.debug(f"Filetype: {filetype}")
239
- if filepath:
240
- logger.debug(f"Filepath: {filepath}")
211
+ Args:
212
+ filepath (str): Path to the annotation file.
213
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
214
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
215
+ delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
216
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
217
+
218
+ Returns:
219
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
220
+ """
221
+ # Load the CSV or TSV file into a DataFrame
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
223
+ # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
225
+ lambda x: x.split(nodes_delimiter)
226
+ )
227
+ # Create a dictionary pairing labels with their corresponding list of nodes
228
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
229
+ return label_node_dict
230
+
231
+ def _log_loading(self, filetype: str, filepath: str = "") -> None:
232
+ """Log information about the network file being loaded.
233
+
234
+ Args:
235
+ filetype (str): The type of the file being loaded (e.g., 'Cytoscape').
236
+ filepath (str, optional): The path to the file being loaded.
237
+ """
238
+ log_header("Loading annotation")
239
+ logger.debug(f"Filetype: {filetype}")
240
+ if filepath:
241
+ logger.debug(f"Filepath: {filepath}")