risk-network 0.0.8b18__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +133 -72
  4. risk/annotations/io.py +50 -34
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +21 -46
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +281 -96
  11. risk/neighborhoods/domains.py +92 -38
  12. risk/neighborhoods/neighborhoods.py +210 -149
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +69 -58
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/graph/network.py +269 -0
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +58 -48
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +80 -26
  23. risk/network/{plot → plotter}/contour.py +43 -34
  24. risk/network/{plot → plotter}/labels.py +123 -113
  25. risk/network/plotter/network.py +424 -0
  26. risk/network/plotter/utils/colors.py +416 -0
  27. risk/network/plotter/utils/layout.py +94 -0
  28. risk/risk.py +11 -469
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +28 -18
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +45 -39
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +17 -11
  37. risk/stats/stats.py +20 -16
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/graph.py +0 -159
  43. risk/network/plot/__init__.py +0 -6
  44. risk/network/plot/network.py +0 -282
  45. risk/network/plot/plotter.py +0 -137
  46. risk/network/plot/utils/color.py +0 -353
  47. risk/network/plot/utils/layout.py +0 -53
  48. risk_network-0.0.8b18.dist-info/RECORD +0 -37
  49. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  50. {risk_network-0.0.8b18.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -2,9 +2,9 @@
2
2
  risk
3
3
  ~~~~
4
4
 
5
- RISK: RISK Infers Spatial Kinships
5
+ RISK: Regional Inference of Significant Kinships
6
6
  """
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.18"
10
+ __version__ = "0.0.9-beta.26"
@@ -3,5 +3,5 @@ risk/annotations
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .annotations import define_top_annotations, get_description
7
- from .io import AnnotationsIO
6
+ from risk.annotations.annotations import define_top_annotations, get_weighted_description
7
+ from risk.annotations.io import AnnotationsIO
@@ -3,6 +3,7 @@ risk/annotations/annotations
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import re
6
7
  from collections import Counter
7
8
  from itertools import compress
8
9
  from typing import Any, Dict, List, Set
@@ -14,6 +15,9 @@ import pandas as pd
14
15
  from nltk.tokenize import word_tokenize
15
16
  from nltk.corpus import stopwords
16
17
 
18
+ from risk.log import logger
19
+ from scipy.sparse import csr_matrix
20
+
17
21
 
18
22
  def _setup_nltk():
19
23
  """Ensure necessary NLTK data is downloaded."""
@@ -30,107 +34,144 @@ def _setup_nltk():
30
34
 
31
35
  # Ensure you have the necessary NLTK data
32
36
  _setup_nltk()
37
+ # Initialize English stopwords
38
+ stop_words = set(stopwords.words("english"))
33
39
 
34
40
 
35
- def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
41
+ def load_annotations(
42
+ network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
43
+ ) -> Dict[str, Any]:
36
44
  """Convert annotations input to a DataFrame and reindex based on the network's node labels.
37
45
 
38
46
  Args:
39
- annotations_input (dict): A dictionary with annotations.
47
+ network (nx.Graph): The network graph.
48
+ annotations_input (Dict[str, Any]): A dictionary with annotations.
49
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
50
+ term to be included. Defaults to 2.
51
+ use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
40
52
 
41
53
  Returns:
42
- dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
54
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
55
+ matrix.
56
+
57
+ Raises:
58
+ ValueError: If no annotations are found for the nodes in the network.
59
+ ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
43
60
  """
44
61
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
45
62
  flattened_annotations = [
46
63
  (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
47
64
  ]
48
65
  # Create a DataFrame from the flattened list
49
- annotations = pd.DataFrame(flattened_annotations, columns=["Node", "Annotations"])
50
- annotations["Is Member"] = 1
66
+ annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
67
+ annotations["is_member"] = 1
51
68
  # Pivot to create a binary matrix with nodes as rows and annotations as columns
52
69
  annotations_pivot = annotations.pivot_table(
53
- index="Node", columns="Annotations", values="Is Member", fill_value=0, dropna=False
70
+ index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
54
71
  )
55
72
  # Reindex the annotations matrix based on the node labels from the network
56
- node_label_order = list(nx.get_node_attributes(network, "label").values())
73
+ node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
57
74
  annotations_pivot = annotations_pivot.reindex(index=node_label_order)
58
75
  # Raise an error if no valid annotations are found for the nodes in the network
59
76
  if annotations_pivot.notnull().sum().sum() == 0:
77
+ raise ValueError("No terms found in the annotation file for the nodes in the network.")
78
+
79
+ # Filter out annotations with fewer than min_nodes_per_term occurrences
80
+ num_terms_before_filtering = annotations_pivot.shape[1]
81
+ annotations_pivot = annotations_pivot.loc[
82
+ :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
83
+ ]
84
+ num_terms_after_filtering = annotations_pivot.shape[1]
85
+ # Log the number of annotations before and after filtering
86
+ logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
87
+ logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
88
+ logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
89
+ if num_terms_after_filtering == 0:
60
90
  raise ValueError(
61
- "No annotations found in the annotations file for the nodes in the network."
91
+ f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
62
92
  )
63
93
 
64
- # Remove columns with all zeros to improve performance
65
- annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
66
94
  # Extract ordered nodes and annotations
67
95
  ordered_nodes = tuple(annotations_pivot.index)
68
96
  ordered_annotations = tuple(annotations_pivot.columns)
69
- # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
70
- annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
97
+ # Convert the annotations_pivot matrix to a numpy array or sparse matrix
98
+ annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
99
+ # Convert the binary annotations matrix to a sparse matrix
100
+ annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
71
101
 
72
102
  return {
73
103
  "ordered_nodes": ordered_nodes,
74
104
  "ordered_annotations": ordered_annotations,
75
- "matrix": annotations_pivot_numpy,
105
+ "matrix": annotations_pivot_binary,
76
106
  }
77
107
 
78
108
 
79
109
  def define_top_annotations(
80
110
  network: nx.Graph,
81
111
  ordered_annotation_labels: List[str],
82
- neighborhood_enrichment_sums: List[int],
83
- binary_enrichment_matrix: np.ndarray,
112
+ neighborhood_significance_sums: List[int],
113
+ significant_significance_matrix: np.ndarray,
114
+ significant_binary_significance_matrix: np.ndarray,
84
115
  min_cluster_size: int = 5,
85
116
  max_cluster_size: int = 1000,
86
117
  ) -> pd.DataFrame:
87
- """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
118
+ """Define top annotations based on neighborhood significance sums and binary significance matrix.
88
119
 
89
120
  Args:
90
121
  network (NetworkX graph): The network graph.
91
122
  ordered_annotation_labels (list of str): List of ordered annotation labels.
92
- neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
93
- binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
123
+ neighborhood_significance_sums (list of int): List of neighborhood significance sums.
124
+ significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
125
+ significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
94
126
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
95
127
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
96
128
 
97
129
  Returns:
98
130
  pd.DataFrame: DataFrame with top annotations and their properties.
99
131
  """
100
- # Create DataFrame to store annotations and their neighborhood enrichment sums
101
- annotations_enrichment_matrix = pd.DataFrame(
132
+ # Sum the columns of the significant significance matrix (positive floating point values)
133
+ significant_significance_scores = significant_significance_matrix.sum(axis=0)
134
+ # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
135
+ annotations_significance_matrix = pd.DataFrame(
102
136
  {
103
137
  "id": range(len(ordered_annotation_labels)),
104
- "words": ordered_annotation_labels,
105
- "neighborhood enrichment sums": neighborhood_enrichment_sums,
138
+ "full_terms": ordered_annotation_labels,
139
+ "significant_neighborhood_significance_sums": neighborhood_significance_sums,
140
+ "significant_significance_score": significant_significance_scores,
106
141
  }
107
142
  )
108
- annotations_enrichment_matrix["top attributes"] = False
109
- # Apply size constraints to identify potential top attributes
110
- annotations_enrichment_matrix.loc[
111
- (annotations_enrichment_matrix["neighborhood enrichment sums"] >= min_cluster_size)
112
- & (annotations_enrichment_matrix["neighborhood enrichment sums"] <= max_cluster_size),
113
- "top attributes",
143
+ annotations_significance_matrix["significant_annotations"] = False
144
+ # Apply size constraints to identify potential significant annotations
145
+ annotations_significance_matrix.loc[
146
+ (
147
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
148
+ >= min_cluster_size
149
+ )
150
+ & (
151
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
152
+ <= max_cluster_size
153
+ ),
154
+ "significant_annotations",
114
155
  ] = True
115
156
  # Initialize columns for connected components analysis
116
- annotations_enrichment_matrix["num connected components"] = 0
117
- annotations_enrichment_matrix["size connected components"] = None
118
- annotations_enrichment_matrix["size connected components"] = annotations_enrichment_matrix[
119
- "size connected components"
157
+ annotations_significance_matrix["num_connected_components"] = 0
158
+ annotations_significance_matrix["size_connected_components"] = None
159
+ annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
160
+ "size_connected_components"
120
161
  ].astype(object)
121
- annotations_enrichment_matrix["num large connected components"] = 0
162
+ annotations_significance_matrix["num_large_connected_components"] = 0
122
163
 
123
- for attribute in annotations_enrichment_matrix.index.values[
124
- annotations_enrichment_matrix["top attributes"]
164
+ for attribute in annotations_significance_matrix.index.values[
165
+ annotations_significance_matrix["significant_annotations"]
125
166
  ]:
126
- # Identify enriched neighborhoods based on the binary enrichment matrix
127
- enriched_neighborhoods = list(
128
- compress(list(network), binary_enrichment_matrix[:, attribute])
167
+ # Identify significant neighborhoods based on the binary significance matrix
168
+ significant_neighborhoods = list(
169
+ compress(list(network), significant_binary_significance_matrix[:, attribute])
129
170
  )
130
- enriched_network = nx.subgraph(network, enriched_neighborhoods)
131
- # Analyze connected components within the enriched subnetwork
171
+ significant_network = nx.subgraph(network, significant_neighborhoods)
172
+ # Analyze connected components within the significant subnetwork
132
173
  connected_components = sorted(
133
- nx.connected_components(enriched_network), key=len, reverse=True
174
+ nx.connected_components(significant_network), key=len, reverse=True
134
175
  )
135
176
  size_connected_components = np.array([len(c) for c in connected_components])
136
177
 
@@ -144,55 +185,75 @@ def define_top_annotations(
144
185
  num_large_connected_components = len(filtered_size_connected_components)
145
186
 
146
187
  # Assign the number of connected components
147
- annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
188
+ annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
148
189
  num_connected_components
149
190
  )
150
191
  # Filter out attributes with more than one connected component
151
- annotations_enrichment_matrix.loc[
152
- annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
192
+ annotations_significance_matrix.loc[
193
+ annotations_significance_matrix["num_connected_components"] > 1,
194
+ "significant_annotations",
153
195
  ] = False
154
196
  # Assign the number of large connected components
155
- annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
197
+ annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
156
198
  num_large_connected_components
157
199
  )
158
200
  # Assign the size of connected components, ensuring it is always a list
159
- annotations_enrichment_matrix.at[attribute, "size connected components"] = (
201
+ annotations_significance_matrix.at[attribute, "size_connected_components"] = (
160
202
  filtered_size_connected_components.tolist()
161
203
  )
162
204
 
163
- return annotations_enrichment_matrix
205
+ return annotations_significance_matrix
164
206
 
165
207
 
166
- def get_description(words_column: pd.Series) -> str:
167
- """Process input Series to identify and return the top frequent, significant words,
168
- filtering based on stopwords and gracefully handling numerical strings.
208
+ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
209
+ """Generate a weighted description from words and their corresponding scores,
210
+ with support for stopwords filtering and improved weighting logic.
169
211
 
170
212
  Args:
171
213
  words_column (pd.Series): A pandas Series containing strings to process.
214
+ scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
172
215
 
173
216
  Returns:
174
- str: A coherent description formed from the most frequent and significant words.
217
+ str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
175
218
  """
176
- # Concatenate all rows into a single string and tokenize into words
177
- all_words = words_column.str.cat(sep=" ")
178
- tokens = word_tokenize(all_words)
179
-
180
- # Separate numeric tokens
181
- numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
182
- # If there's only one unique numeric value, return it directly as a string
183
- unique_numeric_values = set(numeric_tokens)
184
- if len(unique_numeric_values) == 1:
185
- return f"{list(unique_numeric_values)[0]}"
186
-
187
- # Ensure that all values in 'words' are strings and include both alphabetic and numeric tokens
188
- words = [
189
- str(word) # Convert to string to ensure consistent processing
190
- for word in tokens
191
- if word.isalpha()
192
- or word.replace(".", "", 1).isdigit() # Keep alphabetic words and numeric strings
193
- ]
194
- # Generate a coherent description from the processed words
195
- description = _generate_coherent_description(words)
219
+ # Handle case where all scores are the same
220
+ if scores_column.max() == scores_column.min():
221
+ normalized_scores = pd.Series([1] * len(scores_column))
222
+ else:
223
+ # Normalize the significance scores to be between 0 and 1
224
+ normalized_scores = (scores_column - scores_column.min()) / (
225
+ scores_column.max() - scores_column.min()
226
+ )
227
+
228
+ # Combine words and normalized scores to create weighted words
229
+ weighted_words = []
230
+ for word, score in zip(words_column, normalized_scores):
231
+ word = str(word)
232
+ if word not in stop_words: # Skip stopwords
233
+ weight = max(1, int((0 if pd.isna(score) else score) * 10))
234
+ weighted_words.extend([word] * weight)
235
+
236
+ # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
237
+ tokens = word_tokenize(" ".join(weighted_words))
238
+ # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
239
+ combined_tokens = []
240
+ for token in tokens:
241
+ # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
242
+ if re.match(r"^\d+-\w+", token):
243
+ combined_tokens.append(token)
244
+ elif token.replace(".", "", 1).isdigit(): # Handle pure numeric tokens
245
+ # Ignore pure numbers as descriptions unless necessary
246
+ continue
247
+ else:
248
+ combined_tokens.append(token)
249
+
250
+ # Prevent descriptions like just '4' from being selected
251
+ if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
252
+ return "N/A" # Return "N/A" for cases where it's just a number
253
+
254
+ # Simplify the word list and generate the description
255
+ simplified_words = _simplify_word_list(combined_tokens)
256
+ description = _generate_coherent_description(simplified_words)
196
257
 
197
258
  return description
198
259
 
@@ -255,7 +316,7 @@ def _generate_coherent_description(words: List[str]) -> str:
255
316
  If there is only one unique entry, return it directly.
256
317
 
257
318
  Args:
258
- words (list): A list of words or numerical string values.
319
+ words (List): A list of words or numerical string values.
259
320
 
260
321
  Returns:
261
322
  str: A coherent description formed by arranging the words in a logical sequence.
risk/annotations/io.py CHANGED
@@ -1,8 +1,6 @@
1
1
  """
2
2
  risk/annotations/io
3
3
  ~~~~~~~~~~~~~~~~~~~
4
-
5
- This file contains the code for the RISK class and command-line access.
6
4
  """
7
5
 
8
6
  import json
@@ -25,27 +23,32 @@ class AnnotationsIO:
25
23
  def __init__(self):
26
24
  pass
27
25
 
28
- def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
26
+ def load_json_annotation(
27
+ self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
+ ) -> Dict[str, Any]:
29
29
  """Load annotations from a JSON file and convert them to a DataFrame.
30
30
 
31
31
  Args:
32
32
  network (NetworkX graph): The network to which the annotations are related.
33
33
  filepath (str): Path to the JSON annotations file.
34
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
35
+ term to be included. Defaults to 2.
34
36
 
35
37
  Returns:
36
- dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
38
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
37
39
  """
38
40
  filetype = "JSON"
39
41
  # Log the loading of the JSON file
40
- params.log_annotations(filepath=filepath, filetype=filetype)
42
+ params.log_annotations(
43
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
+ )
41
45
  _log_loading(filetype, filepath=filepath)
42
46
 
43
- # Open and read the JSON file
44
- with open(filepath, "r") as file:
47
+ # Load the JSON file into a dictionary
48
+ with open(filepath, "r", encoding="utf-8") as file:
45
49
  annotations_input = json.load(file)
46
50
 
47
- # Load the annotations into the provided network
48
- return load_annotations(network, annotations_input)
51
+ return load_annotations(network, annotations_input, min_nodes_per_term)
49
52
 
50
53
  def load_excel_annotation(
51
54
  self,
@@ -55,6 +58,7 @@ class AnnotationsIO:
55
58
  nodes_colname: str = "nodes",
56
59
  sheet_name: str = "Sheet1",
57
60
  nodes_delimiter: str = ";",
61
+ min_nodes_per_term: int = 2,
58
62
  ) -> Dict[str, Any]:
59
63
  """Load annotations from an Excel file and associate them with the network.
60
64
 
@@ -65,6 +69,8 @@ class AnnotationsIO:
65
69
  nodes_colname (str): Name of the column containing the nodes associated with each label.
66
70
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
67
71
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
72
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
73
+ term to be included. Defaults to 2.
68
74
 
69
75
  Returns:
70
76
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -72,18 +78,21 @@ class AnnotationsIO:
72
78
  """
73
79
  filetype = "Excel"
74
80
  # Log the loading of the Excel file
75
- params.log_annotations(filepath=filepath, filetype=filetype)
81
+ params.log_annotations(
82
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
+ )
76
84
  _log_loading(filetype, filepath=filepath)
77
85
 
78
86
  # Load the specified sheet from the Excel file
79
- df = pd.read_excel(filepath, sheet_name=sheet_name)
87
+ annotation = pd.read_excel(filepath, sheet_name=sheet_name)
80
88
  # Split the nodes column by the specified nodes_delimiter
81
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
89
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
90
+ lambda x: x.split(nodes_delimiter)
91
+ )
82
92
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
83
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
93
+ annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
84
94
 
85
- # Load the annotations into the provided network
86
- return load_annotations(network, label_node_dict)
95
+ return load_annotations(network, annotations_input, min_nodes_per_term)
87
96
 
88
97
  def load_csv_annotation(
89
98
  self,
@@ -92,6 +101,7 @@ class AnnotationsIO:
92
101
  label_colname: str = "label",
93
102
  nodes_colname: str = "nodes",
94
103
  nodes_delimiter: str = ";",
104
+ min_nodes_per_term: int = 2,
95
105
  ) -> Dict[str, Any]:
96
106
  """Load annotations from a CSV file and associate them with the network.
97
107
 
@@ -101,6 +111,8 @@ class AnnotationsIO:
101
111
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
102
112
  nodes_colname (str): Name of the column containing the nodes associated with each label.
103
113
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
114
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
115
+ term to be included. Defaults to 2.
104
116
 
105
117
  Returns:
106
118
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -108,7 +120,9 @@ class AnnotationsIO:
108
120
  """
109
121
  filetype = "CSV"
110
122
  # Log the loading of the CSV file
111
- params.log_annotations(filepath=filepath, filetype=filetype)
123
+ params.log_annotations(
124
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
+ )
112
126
  _log_loading(filetype, filepath=filepath)
113
127
 
114
128
  # Load the CSV file into a dictionary
@@ -116,8 +130,7 @@ class AnnotationsIO:
116
130
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
117
131
  )
118
132
 
119
- # Load the annotations into the provided network
120
- return load_annotations(network, annotations_input)
133
+ return load_annotations(network, annotations_input, min_nodes_per_term)
121
134
 
122
135
  def load_tsv_annotation(
123
136
  self,
@@ -126,6 +139,7 @@ class AnnotationsIO:
126
139
  label_colname: str = "label",
127
140
  nodes_colname: str = "nodes",
128
141
  nodes_delimiter: str = ";",
142
+ min_nodes_per_term: int = 2,
129
143
  ) -> Dict[str, Any]:
130
144
  """Load annotations from a TSV file and associate them with the network.
131
145
 
@@ -135,6 +149,8 @@ class AnnotationsIO:
135
149
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
136
150
  nodes_colname (str): Name of the column containing the nodes associated with each label.
137
151
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
152
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
153
+ term to be included. Defaults to 2.
138
154
 
139
155
  Returns:
140
156
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -142,7 +158,9 @@ class AnnotationsIO:
142
158
  """
143
159
  filetype = "TSV"
144
160
  # Log the loading of the TSV file
145
- params.log_annotations(filepath=filepath, filetype=filetype)
161
+ params.log_annotations(
162
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
+ )
146
164
  _log_loading(filetype, filepath=filepath)
147
165
 
148
166
  # Load the TSV file into a dictionary
@@ -150,18 +168,21 @@ class AnnotationsIO:
150
168
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
151
169
  )
152
170
 
153
- # Load the annotations into the provided network
154
- return load_annotations(network, annotations_input)
171
+ return load_annotations(network, annotations_input, min_nodes_per_term)
155
172
 
156
- def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
173
+ def load_dict_annotation(
174
+ self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
175
+ ) -> Dict[str, Any]:
157
176
  """Load annotations from a provided dictionary and convert them to a dictionary annotation.
158
177
 
159
178
  Args:
160
179
  network (NetworkX graph): The network to which the annotations are related.
161
- content (dict): The annotations dictionary to load.
180
+ content (Dict[str, Any]): The annotations dictionary to load.
181
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
182
+ term to be included. Defaults to 2.
162
183
 
163
184
  Returns:
164
- dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
185
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
165
186
  """
166
187
  # Ensure the input content is a dictionary
167
188
  if not isinstance(content, dict):
@@ -174,13 +195,8 @@ class AnnotationsIO:
174
195
  params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
175
196
  _log_loading(filetype, "In-memory dictionary")
176
197
 
177
- # Load the annotations into the provided network
178
- annotations_dict = load_annotations(network, content)
179
- # Ensure the output is a dictionary
180
- if not isinstance(annotations_dict, dict):
181
- raise ValueError("Expected output to be a dictionary")
182
-
183
- return annotations_dict
198
+ # Load the annotations as a dictionary from the provided dictionary
199
+ return load_annotations(network, content, min_nodes_per_term)
184
200
 
185
201
 
186
202
  def _load_matrix_file(
@@ -203,11 +219,11 @@ def _load_matrix_file(
203
219
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
204
220
  """
205
221
  # Load the CSV or TSV file into a DataFrame
206
- df = pd.read_csv(filepath, delimiter=delimiter)
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
207
223
  # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
208
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
209
225
  # Create a dictionary pairing labels with their corresponding list of nodes
210
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
226
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
211
227
  return label_node_dict
212
228
 
213
229
 
risk/log/__init__.py CHANGED
@@ -3,7 +3,9 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .config import logger, log_header, set_global_verbosity
7
- from .params import Params
6
+ from risk.log.console import logger, log_header, set_global_verbosity
7
+ from risk.log.parameters import Params
8
8
 
9
+ # Initialize the global parameters logger
9
10
  params = Params()
11
+ params.initialize()
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/log/config
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/console
3
+ ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import logging
@@ -16,8 +16,10 @@ def in_jupyter():
16
16
  shell = get_ipython().__class__.__name__
17
17
  if shell == "ZMQInteractiveShell": # Jupyter Notebook or QtConsole
18
18
  return True
19
- elif shell == "TerminalInteractiveShell": # Terminal running IPython
19
+ if shell == "TerminalInteractiveShell": # Terminal running IPython
20
20
  return False
21
+
22
+ return False # Other type (?)
21
23
  except NameError:
22
24
  return False # Not in Jupyter
23
25