risk-network 0.0.8b26__py3-none-any.whl → 0.0.9b26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. risk/__init__.py +2 -2
  2. risk/annotations/__init__.py +2 -2
  3. risk/annotations/annotations.py +74 -47
  4. risk/annotations/io.py +47 -31
  5. risk/log/__init__.py +4 -2
  6. risk/log/{config.py → console.py} +5 -3
  7. risk/log/{params.py → parameters.py} +17 -42
  8. risk/neighborhoods/__init__.py +3 -5
  9. risk/neighborhoods/api.py +446 -0
  10. risk/neighborhoods/community.py +255 -77
  11. risk/neighborhoods/domains.py +62 -31
  12. risk/neighborhoods/neighborhoods.py +156 -160
  13. risk/network/__init__.py +1 -3
  14. risk/network/geometry.py +65 -57
  15. risk/network/graph/__init__.py +6 -0
  16. risk/network/graph/api.py +194 -0
  17. risk/network/{graph.py → graph/network.py} +87 -37
  18. risk/network/graph/summary.py +254 -0
  19. risk/network/io.py +56 -47
  20. risk/network/plotter/__init__.py +6 -0
  21. risk/network/plotter/api.py +54 -0
  22. risk/network/{plot → plotter}/canvas.py +7 -4
  23. risk/network/{plot → plotter}/contour.py +22 -19
  24. risk/network/{plot → plotter}/labels.py +69 -74
  25. risk/network/{plot → plotter}/network.py +170 -34
  26. risk/network/{plot/utils/color.py → plotter/utils/colors.py} +104 -112
  27. risk/network/{plot → plotter}/utils/layout.py +8 -5
  28. risk/risk.py +11 -500
  29. risk/stats/__init__.py +8 -4
  30. risk/stats/binom.py +51 -0
  31. risk/stats/chi2.py +69 -0
  32. risk/stats/hypergeom.py +27 -17
  33. risk/stats/permutation/__init__.py +1 -1
  34. risk/stats/permutation/permutation.py +44 -38
  35. risk/stats/permutation/test_functions.py +25 -17
  36. risk/stats/poisson.py +15 -9
  37. risk/stats/stats.py +15 -13
  38. risk/stats/zscore.py +68 -0
  39. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/METADATA +9 -5
  40. risk_network-0.0.9b26.dist-info/RECORD +44 -0
  41. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/WHEEL +1 -1
  42. risk/network/plot/__init__.py +0 -6
  43. risk/network/plot/plotter.py +0 -137
  44. risk_network-0.0.8b26.dist-info/RECORD +0 -37
  45. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/LICENSE +0 -0
  46. {risk_network-0.0.8b26.dist-info → risk_network-0.0.9b26.dist-info}/top_level.txt +0 -0
risk/__init__.py CHANGED
@@ -2,9 +2,9 @@
2
2
  risk
3
3
  ~~~~
4
4
 
5
- RISK: RISK Infers Spatial Kinships
5
+ RISK: Regional Inference of Significant Kinships
6
6
  """
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.26"
10
+ __version__ = "0.0.9-beta.26"
@@ -3,5 +3,5 @@ risk/annotations
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .annotations import define_top_annotations, get_weighted_description
7
- from .io import AnnotationsIO
6
+ from risk.annotations.annotations import define_top_annotations, get_weighted_description
7
+ from risk.annotations.io import AnnotationsIO
@@ -15,6 +15,9 @@ import pandas as pd
15
15
  from nltk.tokenize import word_tokenize
16
16
  from nltk.corpus import stopwords
17
17
 
18
+ from risk.log import logger
19
+ from scipy.sparse import csr_matrix
20
+
18
21
 
19
22
  def _setup_nltk():
20
23
  """Ensure necessary NLTK data is downloaded."""
@@ -35,15 +38,25 @@ _setup_nltk()
35
38
  stop_words = set(stopwords.words("english"))
36
39
 
37
40
 
38
- def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
41
+ def load_annotations(
42
+ network: nx.Graph, annotations_input: Dict[str, Any], min_nodes_per_term: int = 2
43
+ ) -> Dict[str, Any]:
39
44
  """Convert annotations input to a DataFrame and reindex based on the network's node labels.
40
45
 
41
46
  Args:
42
47
  network (nx.Graph): The network graph.
43
48
  annotations_input (Dict[str, Any]): A dictionary with annotations.
49
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
50
+ term to be included. Defaults to 2.
51
+ use_sparse (bool, optional): Whether to return the annotations matrix as a sparse matrix. Defaults to True.
44
52
 
45
53
  Returns:
46
- Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
54
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the sparse binary annotations
55
+ matrix.
56
+
57
+ Raises:
58
+ ValueError: If no annotations are found for the nodes in the network.
59
+ ValueError: If no annotations have at least min_nodes_per_term nodes in the network.
47
60
  """
48
61
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
49
62
  flattened_annotations = [
@@ -57,95 +70,108 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
57
70
  index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
58
71
  )
59
72
  # Reindex the annotations matrix based on the node labels from the network
60
- node_label_order = list(nx.get_node_attributes(network, "label").values())
73
+ node_label_order = (attr["label"] for _, attr in network.nodes(data=True) if "label" in attr)
61
74
  annotations_pivot = annotations_pivot.reindex(index=node_label_order)
62
75
  # Raise an error if no valid annotations are found for the nodes in the network
63
76
  if annotations_pivot.notnull().sum().sum() == 0:
77
+ raise ValueError("No terms found in the annotation file for the nodes in the network.")
78
+
79
+ # Filter out annotations with fewer than min_nodes_per_term occurrences
80
+ num_terms_before_filtering = annotations_pivot.shape[1]
81
+ annotations_pivot = annotations_pivot.loc[
82
+ :, (annotations_pivot.sum(axis=0) >= min_nodes_per_term)
83
+ ]
84
+ num_terms_after_filtering = annotations_pivot.shape[1]
85
+ # Log the number of annotations before and after filtering
86
+ logger.info(f"Minimum number of nodes per annotation term: {min_nodes_per_term}")
87
+ logger.info(f"Number of input annotation terms: {num_terms_before_filtering}")
88
+ logger.info(f"Number of remaining annotation terms: {num_terms_after_filtering}")
89
+ if num_terms_after_filtering == 0:
64
90
  raise ValueError(
65
- "No annotations found in the annotations file for the nodes in the network."
91
+ f"No annotation terms found with at least {min_nodes_per_term} nodes in the network."
66
92
  )
67
93
 
68
- # Remove columns with all zeros to improve performance
69
- annotations_pivot = annotations_pivot.loc[:, annotations_pivot.sum(axis=0) != 0]
70
94
  # Extract ordered nodes and annotations
71
95
  ordered_nodes = tuple(annotations_pivot.index)
72
96
  ordered_annotations = tuple(annotations_pivot.columns)
73
- # Convert the annotations_pivot matrix to a numpy array and ensure it's binary
74
- annotations_pivot_numpy = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
97
+ # Convert the annotations_pivot matrix to a numpy array or sparse matrix
98
+ annotations_pivot_binary = (annotations_pivot.fillna(0).to_numpy() > 0).astype(int)
99
+ # Convert the binary annotations matrix to a sparse matrix
100
+ annotations_pivot_binary = csr_matrix(annotations_pivot_binary)
75
101
 
76
102
  return {
77
103
  "ordered_nodes": ordered_nodes,
78
104
  "ordered_annotations": ordered_annotations,
79
- "matrix": annotations_pivot_numpy,
105
+ "matrix": annotations_pivot_binary,
80
106
  }
81
107
 
82
108
 
83
109
  def define_top_annotations(
84
110
  network: nx.Graph,
85
111
  ordered_annotation_labels: List[str],
86
- neighborhood_enrichment_sums: List[int],
87
- significant_enrichment_matrix: np.ndarray,
88
- significant_binary_enrichment_matrix: np.ndarray,
112
+ neighborhood_significance_sums: List[int],
113
+ significant_significance_matrix: np.ndarray,
114
+ significant_binary_significance_matrix: np.ndarray,
89
115
  min_cluster_size: int = 5,
90
116
  max_cluster_size: int = 1000,
91
117
  ) -> pd.DataFrame:
92
- """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
118
+ """Define top annotations based on neighborhood significance sums and binary significance matrix.
93
119
 
94
120
  Args:
95
121
  network (NetworkX graph): The network graph.
96
122
  ordered_annotation_labels (list of str): List of ordered annotation labels.
97
- neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
98
- significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
- significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
123
+ neighborhood_significance_sums (list of int): List of neighborhood significance sums.
124
+ significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
125
+ significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
100
126
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
101
127
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
102
128
 
103
129
  Returns:
104
130
  pd.DataFrame: DataFrame with top annotations and their properties.
105
131
  """
106
- # Sum the columns of the significant enrichment matrix (positive floating point values)
107
- significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
108
- # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
109
- annotations_enrichment_matrix = pd.DataFrame(
132
+ # Sum the columns of the significant significance matrix (positive floating point values)
133
+ significant_significance_scores = significant_significance_matrix.sum(axis=0)
134
+ # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
135
+ annotations_significance_matrix = pd.DataFrame(
110
136
  {
111
137
  "id": range(len(ordered_annotation_labels)),
112
138
  "full_terms": ordered_annotation_labels,
113
- "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
114
- "significant_enrichment_score": significant_enrichment_scores,
139
+ "significant_neighborhood_significance_sums": neighborhood_significance_sums,
140
+ "significant_significance_score": significant_significance_scores,
115
141
  }
116
142
  )
117
- annotations_enrichment_matrix["significant_annotations"] = False
143
+ annotations_significance_matrix["significant_annotations"] = False
118
144
  # Apply size constraints to identify potential significant annotations
119
- annotations_enrichment_matrix.loc[
145
+ annotations_significance_matrix.loc[
120
146
  (
121
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
147
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
122
148
  >= min_cluster_size
123
149
  )
124
150
  & (
125
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
151
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
126
152
  <= max_cluster_size
127
153
  ),
128
154
  "significant_annotations",
129
155
  ] = True
130
156
  # Initialize columns for connected components analysis
131
- annotations_enrichment_matrix["num_connected_components"] = 0
132
- annotations_enrichment_matrix["size_connected_components"] = None
133
- annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
157
+ annotations_significance_matrix["num_connected_components"] = 0
158
+ annotations_significance_matrix["size_connected_components"] = None
159
+ annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
134
160
  "size_connected_components"
135
161
  ].astype(object)
136
- annotations_enrichment_matrix["num_large_connected_components"] = 0
162
+ annotations_significance_matrix["num_large_connected_components"] = 0
137
163
 
138
- for attribute in annotations_enrichment_matrix.index.values[
139
- annotations_enrichment_matrix["significant_annotations"]
164
+ for attribute in annotations_significance_matrix.index.values[
165
+ annotations_significance_matrix["significant_annotations"]
140
166
  ]:
141
- # Identify enriched neighborhoods based on the binary enrichment matrix
142
- enriched_neighborhoods = list(
143
- compress(list(network), significant_binary_enrichment_matrix[:, attribute])
167
+ # Identify significant neighborhoods based on the binary significance matrix
168
+ significant_neighborhoods = list(
169
+ compress(list(network), significant_binary_significance_matrix[:, attribute])
144
170
  )
145
- enriched_network = nx.subgraph(network, enriched_neighborhoods)
146
- # Analyze connected components within the enriched subnetwork
171
+ significant_network = nx.subgraph(network, significant_neighborhoods)
172
+ # Analyze connected components within the significant subnetwork
147
173
  connected_components = sorted(
148
- nx.connected_components(enriched_network), key=len, reverse=True
174
+ nx.connected_components(significant_network), key=len, reverse=True
149
175
  )
150
176
  size_connected_components = np.array([len(c) for c in connected_components])
151
177
 
@@ -159,23 +185,24 @@ def define_top_annotations(
159
185
  num_large_connected_components = len(filtered_size_connected_components)
160
186
 
161
187
  # Assign the number of connected components
162
- annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
188
+ annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
163
189
  num_connected_components
164
190
  )
165
191
  # Filter out attributes with more than one connected component
166
- annotations_enrichment_matrix.loc[
167
- annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
192
+ annotations_significance_matrix.loc[
193
+ annotations_significance_matrix["num_connected_components"] > 1,
194
+ "significant_annotations",
168
195
  ] = False
169
196
  # Assign the number of large connected components
170
- annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
197
+ annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
171
198
  num_large_connected_components
172
199
  )
173
200
  # Assign the size of connected components, ensuring it is always a list
174
- annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
201
+ annotations_significance_matrix.at[attribute, "size_connected_components"] = (
175
202
  filtered_size_connected_components.tolist()
176
203
  )
177
204
 
178
- return annotations_enrichment_matrix
205
+ return annotations_significance_matrix
179
206
 
180
207
 
181
208
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +211,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
184
211
 
185
212
  Args:
186
213
  words_column (pd.Series): A pandas Series containing strings to process.
187
- scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
214
+ scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
188
215
 
189
216
  Returns:
190
- str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
217
+ str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
191
218
  """
192
219
  # Handle case where all scores are the same
193
220
  if scores_column.max() == scores_column.min():
194
221
  normalized_scores = pd.Series([1] * len(scores_column))
195
222
  else:
196
- # Normalize the enrichment scores to be between 0 and 1
223
+ # Normalize the significance scores to be between 0 and 1
197
224
  normalized_scores = (scores_column - scores_column.min()) / (
198
225
  scores_column.max() - scores_column.min()
199
226
  )
risk/annotations/io.py CHANGED
@@ -1,8 +1,6 @@
1
1
  """
2
2
  risk/annotations/io
3
3
  ~~~~~~~~~~~~~~~~~~~
4
-
5
- This file contains the code for the RISK class and command-line access.
6
4
  """
7
5
 
8
6
  import json
@@ -25,27 +23,32 @@ class AnnotationsIO:
25
23
  def __init__(self):
26
24
  pass
27
25
 
28
- def load_json_annotation(self, network: nx.Graph, filepath: str) -> Dict[str, Any]:
26
+ def load_json_annotation(
27
+ self, network: nx.Graph, filepath: str, min_nodes_per_term: int = 2
28
+ ) -> Dict[str, Any]:
29
29
  """Load annotations from a JSON file and convert them to a DataFrame.
30
30
 
31
31
  Args:
32
32
  network (NetworkX graph): The network to which the annotations are related.
33
33
  filepath (str): Path to the JSON annotations file.
34
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
35
+ term to be included. Defaults to 2.
34
36
 
35
37
  Returns:
36
38
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
37
39
  """
38
40
  filetype = "JSON"
39
41
  # Log the loading of the JSON file
40
- params.log_annotations(filepath=filepath, filetype=filetype)
42
+ params.log_annotations(
43
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
44
+ )
41
45
  _log_loading(filetype, filepath=filepath)
42
46
 
43
- # Open and read the JSON file
44
- with open(filepath, "r") as file:
47
+ # Load the JSON file into a dictionary
48
+ with open(filepath, "r", encoding="utf-8") as file:
45
49
  annotations_input = json.load(file)
46
50
 
47
- # Load the annotations into the provided network
48
- return load_annotations(network, annotations_input)
51
+ return load_annotations(network, annotations_input, min_nodes_per_term)
49
52
 
50
53
  def load_excel_annotation(
51
54
  self,
@@ -55,6 +58,7 @@ class AnnotationsIO:
55
58
  nodes_colname: str = "nodes",
56
59
  sheet_name: str = "Sheet1",
57
60
  nodes_delimiter: str = ";",
61
+ min_nodes_per_term: int = 2,
58
62
  ) -> Dict[str, Any]:
59
63
  """Load annotations from an Excel file and associate them with the network.
60
64
 
@@ -65,6 +69,8 @@ class AnnotationsIO:
65
69
  nodes_colname (str): Name of the column containing the nodes associated with each label.
66
70
  sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
67
71
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
72
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
73
+ term to be included. Defaults to 2.
68
74
 
69
75
  Returns:
70
76
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -72,18 +78,21 @@ class AnnotationsIO:
72
78
  """
73
79
  filetype = "Excel"
74
80
  # Log the loading of the Excel file
75
- params.log_annotations(filepath=filepath, filetype=filetype)
81
+ params.log_annotations(
82
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
83
+ )
76
84
  _log_loading(filetype, filepath=filepath)
77
85
 
78
86
  # Load the specified sheet from the Excel file
79
- df = pd.read_excel(filepath, sheet_name=sheet_name)
87
+ annotation = pd.read_excel(filepath, sheet_name=sheet_name)
80
88
  # Split the nodes column by the specified nodes_delimiter
81
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
89
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
90
+ lambda x: x.split(nodes_delimiter)
91
+ )
82
92
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
83
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
93
+ annotations_input = annotation.set_index(label_colname)[nodes_colname].to_dict()
84
94
 
85
- # Load the annotations into the provided network
86
- return load_annotations(network, label_node_dict)
95
+ return load_annotations(network, annotations_input, min_nodes_per_term)
87
96
 
88
97
  def load_csv_annotation(
89
98
  self,
@@ -92,6 +101,7 @@ class AnnotationsIO:
92
101
  label_colname: str = "label",
93
102
  nodes_colname: str = "nodes",
94
103
  nodes_delimiter: str = ";",
104
+ min_nodes_per_term: int = 2,
95
105
  ) -> Dict[str, Any]:
96
106
  """Load annotations from a CSV file and associate them with the network.
97
107
 
@@ -101,6 +111,8 @@ class AnnotationsIO:
101
111
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
102
112
  nodes_colname (str): Name of the column containing the nodes associated with each label.
103
113
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
114
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
115
+ term to be included. Defaults to 2.
104
116
 
105
117
  Returns:
106
118
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -108,7 +120,9 @@ class AnnotationsIO:
108
120
  """
109
121
  filetype = "CSV"
110
122
  # Log the loading of the CSV file
111
- params.log_annotations(filepath=filepath, filetype=filetype)
123
+ params.log_annotations(
124
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
125
+ )
112
126
  _log_loading(filetype, filepath=filepath)
113
127
 
114
128
  # Load the CSV file into a dictionary
@@ -116,8 +130,7 @@ class AnnotationsIO:
116
130
  filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
117
131
  )
118
132
 
119
- # Load the annotations into the provided network
120
- return load_annotations(network, annotations_input)
133
+ return load_annotations(network, annotations_input, min_nodes_per_term)
121
134
 
122
135
  def load_tsv_annotation(
123
136
  self,
@@ -126,6 +139,7 @@ class AnnotationsIO:
126
139
  label_colname: str = "label",
127
140
  nodes_colname: str = "nodes",
128
141
  nodes_delimiter: str = ";",
142
+ min_nodes_per_term: int = 2,
129
143
  ) -> Dict[str, Any]:
130
144
  """Load annotations from a TSV file and associate them with the network.
131
145
 
@@ -135,6 +149,8 @@ class AnnotationsIO:
135
149
  label_colname (str): Name of the column containing the labels (e.g., GO terms).
136
150
  nodes_colname (str): Name of the column containing the nodes associated with each label.
137
151
  nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
152
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
153
+ term to be included. Defaults to 2.
138
154
 
139
155
  Returns:
140
156
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
@@ -142,7 +158,9 @@ class AnnotationsIO:
142
158
  """
143
159
  filetype = "TSV"
144
160
  # Log the loading of the TSV file
145
- params.log_annotations(filepath=filepath, filetype=filetype)
161
+ params.log_annotations(
162
+ filetype=filetype, filepath=filepath, min_nodes_per_term=min_nodes_per_term
163
+ )
146
164
  _log_loading(filetype, filepath=filepath)
147
165
 
148
166
  # Load the TSV file into a dictionary
@@ -150,15 +168,18 @@ class AnnotationsIO:
150
168
  filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
151
169
  )
152
170
 
153
- # Load the annotations into the provided network
154
- return load_annotations(network, annotations_input)
171
+ return load_annotations(network, annotations_input, min_nodes_per_term)
155
172
 
156
- def load_dict_annotation(self, network: nx.Graph, content: Dict[str, Any]) -> Dict[str, Any]:
173
+ def load_dict_annotation(
174
+ self, network: nx.Graph, content: Dict[str, Any], min_nodes_per_term: int = 2
175
+ ) -> Dict[str, Any]:
157
176
  """Load annotations from a provided dictionary and convert them to a dictionary annotation.
158
177
 
159
178
  Args:
160
179
  network (NetworkX graph): The network to which the annotations are related.
161
180
  content (Dict[str, Any]): The annotations dictionary to load.
181
+ min_nodes_per_term (int, optional): The minimum number of network nodes required for each annotation
182
+ term to be included. Defaults to 2.
162
183
 
163
184
  Returns:
164
185
  Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
@@ -174,13 +195,8 @@ class AnnotationsIO:
174
195
  params.log_annotations(filepath="In-memory dictionary", filetype=filetype)
175
196
  _log_loading(filetype, "In-memory dictionary")
176
197
 
177
- # Load the annotations into the provided network
178
- annotations_dict = load_annotations(network, content)
179
- # Ensure the output is a dictionary
180
- if not isinstance(annotations_dict, dict):
181
- raise ValueError("Expected output to be a dictionary")
182
-
183
- return annotations_dict
198
+ # Load the annotations as a dictionary from the provided dictionary
199
+ return load_annotations(network, content, min_nodes_per_term)
184
200
 
185
201
 
186
202
  def _load_matrix_file(
@@ -203,11 +219,11 @@ def _load_matrix_file(
203
219
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
204
220
  """
205
221
  # Load the CSV or TSV file into a DataFrame
206
- df = pd.read_csv(filepath, delimiter=delimiter)
222
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
207
223
  # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
208
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
224
+ annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
209
225
  # Create a dictionary pairing labels with their corresponding list of nodes
210
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
226
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
211
227
  return label_node_dict
212
228
 
213
229
 
risk/log/__init__.py CHANGED
@@ -3,7 +3,9 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .config import logger, log_header, set_global_verbosity
7
- from .params import Params
6
+ from risk.log.console import logger, log_header, set_global_verbosity
7
+ from risk.log.parameters import Params
8
8
 
9
+ # Initialize the global parameters logger
9
10
  params = Params()
11
+ params.initialize()
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/log/config
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/console
3
+ ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import logging
@@ -16,8 +16,10 @@ def in_jupyter():
16
16
  shell = get_ipython().__class__.__name__
17
17
  if shell == "ZMQInteractiveShell": # Jupyter Notebook or QtConsole
18
18
  return True
19
- elif shell == "TerminalInteractiveShell": # Terminal running IPython
19
+ if shell == "TerminalInteractiveShell": # Terminal running IPython
20
20
  return False
21
+
22
+ return False # Other type (?)
21
23
  except NameError:
22
24
  return False # Not in Jupyter
23
25
 
@@ -1,50 +1,22 @@
1
1
  """
2
- risk/log/params
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/parameters
3
+ ~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import csv
7
7
  import json
8
8
  import warnings
9
9
  from datetime import datetime
10
- from functools import wraps
11
10
  from typing import Any, Dict
12
11
 
13
12
  import numpy as np
14
13
 
15
- from .config import logger, log_header
14
+ from risk.log.console import logger, log_header
16
15
 
17
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
17
  warnings.filterwarnings("ignore")
19
18
 
20
19
 
21
- def _safe_param_export(func):
22
- """A decorator to wrap parameter export functions in a try-except block for safe execution.
23
-
24
- Args:
25
- func (function): The function to be wrapped.
26
-
27
- Returns:
28
- function: The wrapped function with error handling.
29
- """
30
-
31
- @wraps(func)
32
- def wrapper(*args, **kwargs):
33
- try:
34
- result = func(*args, **kwargs)
35
- filepath = (
36
- kwargs.get("filepath") or args[1]
37
- ) # Assuming filepath is always the second argument
38
- logger.info(f"Parameters successfully exported to filepath: {filepath}")
39
- return result
40
- except Exception as e:
41
- filepath = kwargs.get("filepath") or args[1]
42
- logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
43
- return None
44
-
45
- return wrapper
46
-
47
-
48
20
  class Params:
49
21
  """Handles the storage and logging of various parameters for network analysis.
50
22
 
@@ -106,7 +78,6 @@ class Params:
106
78
  """
107
79
  self.plotter = {**self.plotter, **kwargs}
108
80
 
109
- @_safe_param_export
110
81
  def to_csv(self, filepath: str) -> None:
111
82
  """Export the parameters to a CSV file.
112
83
 
@@ -116,7 +87,7 @@ class Params:
116
87
  # Load the parameter dictionary
117
88
  params = self.load()
118
89
  # Open the file in write mode
119
- with open(filepath, "w", newline="") as csv_file:
90
+ with open(filepath, "w", encoding="utf-8", newline="") as csv_file:
120
91
  writer = csv.writer(csv_file)
121
92
  # Write the header
122
93
  writer.writerow(["parent_key", "child_key", "value"])
@@ -128,17 +99,19 @@ class Params:
128
99
  else:
129
100
  writer.writerow([parent_key, "", parent_value])
130
101
 
131
- @_safe_param_export
102
+ logger.info(f"Parameters exported to CSV file: {filepath}")
103
+
132
104
  def to_json(self, filepath: str) -> None:
133
105
  """Export the parameters to a JSON file.
134
106
 
135
107
  Args:
136
108
  filepath (str): The path where the JSON file will be saved.
137
109
  """
138
- with open(filepath, "w") as json_file:
110
+ with open(filepath, "w", encoding="utf-8") as json_file:
139
111
  json.dump(self.load(), json_file, indent=4)
140
112
 
141
- @_safe_param_export
113
+ logger.info(f"Parameters exported to JSON file: {filepath}")
114
+
142
115
  def to_txt(self, filepath: str) -> None:
143
116
  """Export the parameters to a text file.
144
117
 
@@ -148,13 +121,15 @@ class Params:
148
121
  # Load the parameter dictionary
149
122
  params = self.load()
150
123
  # Open the file in write mode
151
- with open(filepath, "w") as txt_file:
124
+ with open(filepath, "w", encoding="utf-8") as txt_file:
152
125
  for key, value in params.items():
153
126
  # Write the key and its corresponding value
154
127
  txt_file.write(f"{key}: {value}\n")
155
128
  # Add a blank line after each entry
156
129
  txt_file.write("\n")
157
130
 
131
+ logger.info(f"Parameters exported to text file: {filepath}")
132
+
158
133
  def load(self) -> Dict[str, Any]:
159
134
  """Load and process various parameters, converting any np.ndarray values to lists.
160
135
 
@@ -186,12 +161,12 @@ def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
186
161
  if isinstance(d, dict):
187
162
  # Recursively process each value in the dictionary
188
163
  return {k: _convert_ndarray_to_list(v) for k, v in d.items()}
189
- elif isinstance(d, list):
164
+ if isinstance(d, list):
190
165
  # Recursively process each item in the list
191
166
  return [_convert_ndarray_to_list(v) for v in d]
192
- elif isinstance(d, np.ndarray):
167
+ if isinstance(d, np.ndarray):
193
168
  # Convert numpy arrays to lists
194
169
  return d.tolist()
195
- else:
196
- # Return the value unchanged if it's not a dict, List, or ndarray
197
- return d
170
+
171
+ # Return the value unchanged if it's not a dict, List, or ndarray
172
+ return d
@@ -3,8 +3,6 @@ risk/neighborhoods
3
3
  ~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from .domains import define_domains, trim_domains_and_top_annotations
7
- from .neighborhoods import (
8
- get_network_neighborhoods,
9
- process_neighborhoods,
10
- )
6
+ from risk.neighborhoods.domains import define_domains, trim_domains
7
+ from risk.neighborhoods.api import NeighborhoodsAPI
8
+ from risk.neighborhoods.neighborhoods import process_neighborhoods