risk-network 0.0.8b27__py3-none-any.whl → 0.0.9b2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.27"
10
+ __version__ = "0.0.9-beta.2"
@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
83
83
  def define_top_annotations(
84
84
  network: nx.Graph,
85
85
  ordered_annotation_labels: List[str],
86
- neighborhood_enrichment_sums: List[int],
87
- significant_enrichment_matrix: np.ndarray,
88
- significant_binary_enrichment_matrix: np.ndarray,
86
+ neighborhood_significance_sums: List[int],
87
+ significant_significance_matrix: np.ndarray,
88
+ significant_binary_significance_matrix: np.ndarray,
89
89
  min_cluster_size: int = 5,
90
90
  max_cluster_size: int = 1000,
91
91
  ) -> pd.DataFrame:
92
- """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
92
+ """Define top annotations based on neighborhood significance sums and binary significance matrix.
93
93
 
94
94
  Args:
95
95
  network (NetworkX graph): The network graph.
96
96
  ordered_annotation_labels (list of str): List of ordered annotation labels.
97
- neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
98
- significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
- significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
97
+ neighborhood_significance_sums (list of int): List of neighborhood significance sums.
98
+ significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
+ significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
100
100
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
101
101
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
102
102
 
103
103
  Returns:
104
104
  pd.DataFrame: DataFrame with top annotations and their properties.
105
105
  """
106
- # Sum the columns of the significant enrichment matrix (positive floating point values)
107
- significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
108
- # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
109
- annotations_enrichment_matrix = pd.DataFrame(
106
+ # Sum the columns of the significant significance matrix (positive floating point values)
107
+ significant_significance_scores = significant_significance_matrix.sum(axis=0)
108
+ # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
109
+ annotations_significance_matrix = pd.DataFrame(
110
110
  {
111
111
  "id": range(len(ordered_annotation_labels)),
112
112
  "full_terms": ordered_annotation_labels,
113
- "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
114
- "significant_enrichment_score": significant_enrichment_scores,
113
+ "significant_neighborhood_significance_sums": neighborhood_significance_sums,
114
+ "significant_significance_score": significant_significance_scores,
115
115
  }
116
116
  )
117
- annotations_enrichment_matrix["significant_annotations"] = False
117
+ annotations_significance_matrix["significant_annotations"] = False
118
118
  # Apply size constraints to identify potential significant annotations
119
- annotations_enrichment_matrix.loc[
119
+ annotations_significance_matrix.loc[
120
120
  (
121
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
121
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
122
122
  >= min_cluster_size
123
123
  )
124
124
  & (
125
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
125
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
126
126
  <= max_cluster_size
127
127
  ),
128
128
  "significant_annotations",
129
129
  ] = True
130
130
  # Initialize columns for connected components analysis
131
- annotations_enrichment_matrix["num_connected_components"] = 0
132
- annotations_enrichment_matrix["size_connected_components"] = None
133
- annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
131
+ annotations_significance_matrix["num_connected_components"] = 0
132
+ annotations_significance_matrix["size_connected_components"] = None
133
+ annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
134
134
  "size_connected_components"
135
135
  ].astype(object)
136
- annotations_enrichment_matrix["num_large_connected_components"] = 0
136
+ annotations_significance_matrix["num_large_connected_components"] = 0
137
137
 
138
- for attribute in annotations_enrichment_matrix.index.values[
139
- annotations_enrichment_matrix["significant_annotations"]
138
+ for attribute in annotations_significance_matrix.index.values[
139
+ annotations_significance_matrix["significant_annotations"]
140
140
  ]:
141
- # Identify enriched neighborhoods based on the binary enrichment matrix
142
- enriched_neighborhoods = list(
143
- compress(list(network), significant_binary_enrichment_matrix[:, attribute])
141
+ # Identify significant neighborhoods based on the binary significance matrix
142
+ significant_neighborhoods = list(
143
+ compress(list(network), significant_binary_significance_matrix[:, attribute])
144
144
  )
145
- enriched_network = nx.subgraph(network, enriched_neighborhoods)
146
- # Analyze connected components within the enriched subnetwork
145
+ significant_network = nx.subgraph(network, significant_neighborhoods)
146
+ # Analyze connected components within the significant subnetwork
147
147
  connected_components = sorted(
148
- nx.connected_components(enriched_network), key=len, reverse=True
148
+ nx.connected_components(significant_network), key=len, reverse=True
149
149
  )
150
150
  size_connected_components = np.array([len(c) for c in connected_components])
151
151
 
@@ -159,23 +159,24 @@ def define_top_annotations(
159
159
  num_large_connected_components = len(filtered_size_connected_components)
160
160
 
161
161
  # Assign the number of connected components
162
- annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
162
+ annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
163
163
  num_connected_components
164
164
  )
165
165
  # Filter out attributes with more than one connected component
166
- annotations_enrichment_matrix.loc[
167
- annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
166
+ annotations_significance_matrix.loc[
167
+ annotations_significance_matrix["num_connected_components"] > 1,
168
+ "significant_annotations",
168
169
  ] = False
169
170
  # Assign the number of large connected components
170
- annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
171
+ annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
171
172
  num_large_connected_components
172
173
  )
173
174
  # Assign the size of connected components, ensuring it is always a list
174
- annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
175
+ annotations_significance_matrix.at[attribute, "size_connected_components"] = (
175
176
  filtered_size_connected_components.tolist()
176
177
  )
177
178
 
178
- return annotations_enrichment_matrix
179
+ return annotations_significance_matrix
179
180
 
180
181
 
181
182
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
184
185
 
185
186
  Args:
186
187
  words_column (pd.Series): A pandas Series containing strings to process.
187
- scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
188
+ scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
188
189
 
189
190
  Returns:
190
- str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
191
+ str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
191
192
  """
192
193
  # Handle case where all scores are the same
193
194
  if scores_column.max() == scores_column.min():
194
195
  normalized_scores = pd.Series([1] * len(scores_column))
195
196
  else:
196
- # Normalize the enrichment scores to be between 0 and 1
197
+ # Normalize the significance scores to be between 0 and 1
197
198
  normalized_scores = (scores_column - scores_column.min()) / (
198
199
  scores_column.max() - scores_column.min()
199
200
  )
risk/annotations/io.py CHANGED
@@ -76,11 +76,13 @@ class AnnotationsIO:
76
76
  _log_loading(filetype, filepath=filepath)
77
77
 
78
78
  # Load the specified sheet from the Excel file
79
- df = pd.read_excel(filepath, sheet_name=sheet_name)
79
+ annotation = pd.read_excel(filepath, sheet_name=sheet_name)
80
80
  # Split the nodes column by the specified nodes_delimiter
81
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
81
+ annotation[nodes_colname] = annotation[nodes_colname].apply(
82
+ lambda x: x.split(nodes_delimiter)
83
+ )
82
84
  # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
83
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
85
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
84
86
 
85
87
  # Load the annotations into the provided network
86
88
  return load_annotations(network, label_node_dict)
@@ -203,11 +205,11 @@ def _load_matrix_file(
203
205
  Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
204
206
  """
205
207
  # Load the CSV or TSV file into a DataFrame
206
- df = pd.read_csv(filepath, delimiter=delimiter)
208
+ annotation = pd.read_csv(filepath, delimiter=delimiter)
207
209
  # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
208
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
210
+ annotation[nodes_colname] = annotation[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
209
211
  # Create a dictionary pairing labels with their corresponding list of nodes
210
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
212
+ label_node_dict = annotation.set_index(label_colname)[nodes_colname].to_dict()
211
213
  return label_node_dict
212
214
 
213
215
 
risk/log/__init__.py CHANGED
@@ -4,6 +4,8 @@ risk/log
4
4
  """
5
5
 
6
6
  from .console import logger, log_header, set_global_verbosity
7
- from .params import Params
7
+ from .parameters import Params
8
8
 
9
+ # Initialize the global parameters logger
9
10
  params = Params()
11
+ params.initialize()
@@ -1,50 +1,22 @@
1
1
  """
2
- risk/log/params
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/parameters
3
+ ~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import csv
7
7
  import json
8
8
  import warnings
9
9
  from datetime import datetime
10
- from functools import wraps
11
10
  from typing import Any, Dict
12
11
 
13
12
  import numpy as np
14
13
 
15
- from .console import logger, log_header
14
+ from risk.log.console import logger, log_header
16
15
 
17
16
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
17
  warnings.filterwarnings("ignore")
19
18
 
20
19
 
21
- def _safe_param_export(func):
22
- """A decorator to wrap parameter export functions in a try-except block for safe execution.
23
-
24
- Args:
25
- func (function): The function to be wrapped.
26
-
27
- Returns:
28
- function: The wrapped function with error handling.
29
- """
30
-
31
- @wraps(func)
32
- def wrapper(*args, **kwargs):
33
- try:
34
- result = func(*args, **kwargs)
35
- filepath = (
36
- kwargs.get("filepath") or args[1]
37
- ) # Assuming filepath is always the second argument
38
- logger.info(f"Parameters successfully exported to filepath: {filepath}")
39
- return result
40
- except Exception as e:
41
- filepath = kwargs.get("filepath") or args[1]
42
- logger.error(f"An error occurred while exporting parameters to {filepath}: {e}")
43
- return None
44
-
45
- return wrapper
46
-
47
-
48
20
  class Params:
49
21
  """Handles the storage and logging of various parameters for network analysis.
50
22
 
@@ -106,7 +78,6 @@ class Params:
106
78
  """
107
79
  self.plotter = {**self.plotter, **kwargs}
108
80
 
109
- @_safe_param_export
110
81
  def to_csv(self, filepath: str) -> None:
111
82
  """Export the parameters to a CSV file.
112
83
 
@@ -128,7 +99,8 @@ class Params:
128
99
  else:
129
100
  writer.writerow([parent_key, "", parent_value])
130
101
 
131
- @_safe_param_export
102
+ logger.info(f"Parameters exported to CSV file: {filepath}")
103
+
132
104
  def to_json(self, filepath: str) -> None:
133
105
  """Export the parameters to a JSON file.
134
106
 
@@ -138,7 +110,8 @@ class Params:
138
110
  with open(filepath, "w") as json_file:
139
111
  json.dump(self.load(), json_file, indent=4)
140
112
 
141
- @_safe_param_export
113
+ logger.info(f"Parameters exported to JSON file: {filepath}")
114
+
142
115
  def to_txt(self, filepath: str) -> None:
143
116
  """Export the parameters to a text file.
144
117
 
@@ -155,6 +128,8 @@ class Params:
155
128
  # Add a blank line after each entry
156
129
  txt_file.write("\n")
157
130
 
131
+ logger.info(f"Parameters exported to text file: {filepath}")
132
+
158
133
  def load(self) -> Dict[str, Any]:
159
134
  """Load and process various parameters, converting any np.ndarray values to lists.
160
135
 
@@ -20,17 +20,17 @@ from risk.log import logger
20
20
 
21
21
  def define_domains(
22
22
  top_annotations: pd.DataFrame,
23
- significant_neighborhoods_enrichment: np.ndarray,
23
+ significant_neighborhoods_significance: np.ndarray,
24
24
  linkage_criterion: str,
25
25
  linkage_method: str,
26
26
  linkage_metric: str,
27
27
  ) -> pd.DataFrame:
28
- """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
28
+ """Define domains and assign nodes to these domains based on their significance scores and clustering,
29
29
  handling errors by assigning unique domains when clustering fails.
30
30
 
31
31
  Args:
32
32
  top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
33
- significant_neighborhoods_enrichment (np.ndarray): The binary enrichment matrix below alpha.
33
+ significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
34
34
  linkage_criterion (str): The clustering criterion for defining groups.
35
35
  linkage_method (str): The linkage method for clustering.
36
36
  linkage_metric (str): The linkage metric for clustering.
@@ -40,7 +40,7 @@ def define_domains(
40
40
  """
41
41
  try:
42
42
  # Transpose the matrix to cluster annotations
43
- m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
43
+ m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
44
44
  best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
45
45
  m, linkage_criterion, linkage_method, linkage_metric
46
46
  )
@@ -65,13 +65,13 @@ def define_domains(
65
65
  top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
66
66
 
67
67
  # Create DataFrames to store domain information
68
- node_to_enrichment = pd.DataFrame(
69
- data=significant_neighborhoods_enrichment,
68
+ node_to_significance = pd.DataFrame(
69
+ data=significant_neighborhoods_significance,
70
70
  columns=[top_annotations.index.values, top_annotations["domain"]],
71
71
  )
72
- node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
72
+ node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
73
73
 
74
- # Find the maximum enrichment score for each node
74
+ # Find the maximum significance score for each node
75
75
  t_max = node_to_domain.loc[:, 1:].max(axis=1)
76
76
  t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
77
77
  t_idxmax[t_max == 0] = 0
@@ -101,7 +101,7 @@ def trim_domains_and_top_annotations(
101
101
  max_cluster_size (int, optional): Maximum size of a cluster to be retained. Defaults to 1000.
102
102
 
103
103
  Returns:
104
- Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
104
+ Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
105
105
  - Trimmed annotations (pd.DataFrame)
106
106
  - Trimmed domains (pd.DataFrame)
107
107
  - A DataFrame with domain labels (pd.DataFrame)
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
119
119
  top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
120
120
  domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
121
121
 
122
- # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
122
+ # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
123
123
  top_annotations["normalized_value"] = top_annotations.groupby("domain")[
124
- "significant_neighborhood_enrichment_sums"
124
+ "significant_neighborhood_significance_sums"
125
125
  ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
126
- # Modify the lambda function to pass both full_terms and significant_enrichment_score
126
+ # Modify the lambda function to pass both full_terms and significant_significance_score
127
127
  top_annotations["combined_terms"] = top_annotations.apply(
128
128
  lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
129
129
  )
130
130
 
131
- # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
131
+ # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
132
132
  domain_labels = (
133
133
  top_annotations.groupby("domain")
134
134
  .agg(
135
135
  full_terms=("full_terms", lambda x: list(x)),
136
- enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
136
+ significance_scores=("significant_significance_score", lambda x: list(x)),
137
137
  )
138
138
  .reset_index()
139
139
  )
140
140
  domain_labels["combined_terms"] = domain_labels.apply(
141
141
  lambda row: get_weighted_description(
142
- pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
142
+ pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
143
143
  ),
144
144
  axis=1,
145
145
  )
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
150
150
  "domain": "id",
151
151
  "combined_terms": "normalized_description",
152
152
  "full_terms": "full_descriptions",
153
- "enrichment_scores": "enrichment_scores",
153
+ "significance_scores": "significance_scores",
154
154
  }
155
155
  ).set_index("id")
156
156
 
@@ -177,7 +177,7 @@ def _optimize_silhouette_across_linkage_and_metrics(
177
177
  linkage_metric (str): Linkage metric for clustering.
178
178
 
179
179
  Returns:
180
- Tuple[str, str, float]: A tuple containing:
180
+ Tuple[str, str, float]:
181
181
  - Best linkage method (str)
182
182
  - Best linkage metric (str)
183
183
  - Best threshold (float)
@@ -231,7 +231,7 @@ def _find_best_silhouette_score(
231
231
  resolution (float, optional): Desired resolution for the best threshold. Defaults to 0.001.
232
232
 
233
233
  Returns:
234
- Tuple[float, float]: A tuple containing:
234
+ Tuple[float, float]:
235
235
  - Best threshold (float): The threshold that yields the best silhouette score.
236
236
  - Best silhouette score (float): The highest silhouette score achieved.
237
237
  """