risk-network 0.0.8b26__tar.gz → 0.0.9b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/PKG-INFO +1 -1
  2. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/__init__.py +1 -1
  3. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/annotations.py +39 -38
  4. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/__init__.py +1 -1
  5. risk_network-0.0.8b26/risk/log/config.py → risk_network-0.0.9b1/risk/log/console.py +2 -2
  6. risk_network-0.0.9b1/risk/log/enrichment.py +18 -0
  7. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/log/params.py +1 -1
  8. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/domains.py +15 -15
  9. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/neighborhoods.py +101 -89
  10. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/graph.py +25 -25
  11. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/canvas.py +3 -1
  12. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/contour.py +1 -1
  13. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/labels.py +1 -1
  14. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/network.py +28 -28
  15. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/utils/color.py +27 -27
  16. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/risk.py +20 -18
  17. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/stats.py +13 -13
  18. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/PKG-INFO +1 -1
  19. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/SOURCES.txt +2 -1
  20. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/LICENSE +0 -0
  21. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/MANIFEST.in +0 -0
  22. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/README.md +0 -0
  23. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/pyproject.toml +0 -0
  24. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/__init__.py +0 -0
  25. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/annotations/io.py +0 -0
  26. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/constants.py +0 -0
  27. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/__init__.py +0 -0
  28. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/neighborhoods/community.py +0 -0
  29. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/__init__.py +0 -0
  30. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/geometry.py +0 -0
  31. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/io.py +0 -0
  32. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/__init__.py +0 -0
  33. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/plotter.py +0 -0
  34. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/network/plot/utils/layout.py +0 -0
  35. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/__init__.py +0 -0
  36. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/hypergeom.py +0 -0
  37. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/__init__.py +0 -0
  38. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/permutation.py +0 -0
  39. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/permutation/test_functions.py +0 -0
  40. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk/stats/poisson.py +0 -0
  41. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/dependency_links.txt +0 -0
  42. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/requires.txt +0 -0
  43. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/risk_network.egg-info/top_level.txt +0 -0
  44. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/setup.cfg +0 -0
  45. {risk_network-0.0.8b26 → risk_network-0.0.9b1}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.8b26
3
+ Version: 0.0.9b1
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.8-beta.26"
10
+ __version__ = "0.0.9-beta.1"
@@ -83,69 +83,69 @@ def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Di
83
83
  def define_top_annotations(
84
84
  network: nx.Graph,
85
85
  ordered_annotation_labels: List[str],
86
- neighborhood_enrichment_sums: List[int],
87
- significant_enrichment_matrix: np.ndarray,
88
- significant_binary_enrichment_matrix: np.ndarray,
86
+ neighborhood_significance_sums: List[int],
87
+ significant_significance_matrix: np.ndarray,
88
+ significant_binary_significance_matrix: np.ndarray,
89
89
  min_cluster_size: int = 5,
90
90
  max_cluster_size: int = 1000,
91
91
  ) -> pd.DataFrame:
92
- """Define top annotations based on neighborhood enrichment sums and binary enrichment matrix.
92
+ """Define top annotations based on neighborhood significance sums and binary significance matrix.
93
93
 
94
94
  Args:
95
95
  network (NetworkX graph): The network graph.
96
96
  ordered_annotation_labels (list of str): List of ordered annotation labels.
97
- neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
98
- significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
- significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
97
+ neighborhood_significance_sums (list of int): List of neighborhood significance sums.
98
+ significant_significance_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
+ significant_binary_significance_matrix (np.ndarray): Binary significance matrix below alpha threshold.
100
100
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
101
101
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
102
102
 
103
103
  Returns:
104
104
  pd.DataFrame: DataFrame with top annotations and their properties.
105
105
  """
106
- # Sum the columns of the significant enrichment matrix (positive floating point values)
107
- significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
108
- # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
109
- annotations_enrichment_matrix = pd.DataFrame(
106
+ # Sum the columns of the significant significance matrix (positive floating point values)
107
+ significant_significance_scores = significant_significance_matrix.sum(axis=0)
108
+ # Create DataFrame to store annotations, their neighborhood significance sums, and significance scores
109
+ annotations_significance_matrix = pd.DataFrame(
110
110
  {
111
111
  "id": range(len(ordered_annotation_labels)),
112
112
  "full_terms": ordered_annotation_labels,
113
- "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
114
- "significant_enrichment_score": significant_enrichment_scores,
113
+ "significant_neighborhood_significance_sums": neighborhood_significance_sums,
114
+ "significant_significance_score": significant_significance_scores,
115
115
  }
116
116
  )
117
- annotations_enrichment_matrix["significant_annotations"] = False
117
+ annotations_significance_matrix["significant_annotations"] = False
118
118
  # Apply size constraints to identify potential significant annotations
119
- annotations_enrichment_matrix.loc[
119
+ annotations_significance_matrix.loc[
120
120
  (
121
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
121
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
122
122
  >= min_cluster_size
123
123
  )
124
124
  & (
125
- annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
125
+ annotations_significance_matrix["significant_neighborhood_significance_sums"]
126
126
  <= max_cluster_size
127
127
  ),
128
128
  "significant_annotations",
129
129
  ] = True
130
130
  # Initialize columns for connected components analysis
131
- annotations_enrichment_matrix["num_connected_components"] = 0
132
- annotations_enrichment_matrix["size_connected_components"] = None
133
- annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
131
+ annotations_significance_matrix["num_connected_components"] = 0
132
+ annotations_significance_matrix["size_connected_components"] = None
133
+ annotations_significance_matrix["size_connected_components"] = annotations_significance_matrix[
134
134
  "size_connected_components"
135
135
  ].astype(object)
136
- annotations_enrichment_matrix["num_large_connected_components"] = 0
136
+ annotations_significance_matrix["num_large_connected_components"] = 0
137
137
 
138
- for attribute in annotations_enrichment_matrix.index.values[
139
- annotations_enrichment_matrix["significant_annotations"]
138
+ for attribute in annotations_significance_matrix.index.values[
139
+ annotations_significance_matrix["significant_annotations"]
140
140
  ]:
141
- # Identify enriched neighborhoods based on the binary enrichment matrix
142
- enriched_neighborhoods = list(
143
- compress(list(network), significant_binary_enrichment_matrix[:, attribute])
141
+ # Identify significant neighborhoods based on the binary significance matrix
142
+ significant_neighborhoods = list(
143
+ compress(list(network), significant_binary_significance_matrix[:, attribute])
144
144
  )
145
- enriched_network = nx.subgraph(network, enriched_neighborhoods)
146
- # Analyze connected components within the enriched subnetwork
145
+ significant_network = nx.subgraph(network, significant_neighborhoods)
146
+ # Analyze connected components within the significant subnetwork
147
147
  connected_components = sorted(
148
- nx.connected_components(enriched_network), key=len, reverse=True
148
+ nx.connected_components(significant_network), key=len, reverse=True
149
149
  )
150
150
  size_connected_components = np.array([len(c) for c in connected_components])
151
151
 
@@ -159,23 +159,24 @@ def define_top_annotations(
159
159
  num_large_connected_components = len(filtered_size_connected_components)
160
160
 
161
161
  # Assign the number of connected components
162
- annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
162
+ annotations_significance_matrix.loc[attribute, "num_connected_components"] = (
163
163
  num_connected_components
164
164
  )
165
165
  # Filter out attributes with more than one connected component
166
- annotations_enrichment_matrix.loc[
167
- annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
166
+ annotations_significance_matrix.loc[
167
+ annotations_significance_matrix["num_connected_components"] > 1,
168
+ "significant_annotations",
168
169
  ] = False
169
170
  # Assign the number of large connected components
170
- annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
171
+ annotations_significance_matrix.loc[attribute, "num_large_connected_components"] = (
171
172
  num_large_connected_components
172
173
  )
173
174
  # Assign the size of connected components, ensuring it is always a list
174
- annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
175
+ annotations_significance_matrix.at[attribute, "size_connected_components"] = (
175
176
  filtered_size_connected_components.tolist()
176
177
  )
177
178
 
178
- return annotations_enrichment_matrix
179
+ return annotations_significance_matrix
179
180
 
180
181
 
181
182
  def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
@@ -184,16 +185,16 @@ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series)
184
185
 
185
186
  Args:
186
187
  words_column (pd.Series): A pandas Series containing strings to process.
187
- scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
188
+ scores_column (pd.Series): A pandas Series containing significance scores to weigh the terms.
188
189
 
189
190
  Returns:
190
- str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
191
+ str: A coherent description formed from the most frequent and significant words, weighed by significance scores.
191
192
  """
192
193
  # Handle case where all scores are the same
193
194
  if scores_column.max() == scores_column.min():
194
195
  normalized_scores = pd.Series([1] * len(scores_column))
195
196
  else:
196
- # Normalize the enrichment scores to be between 0 and 1
197
+ # Normalize the significance scores to be between 0 and 1
197
198
  normalized_scores = (scores_column - scores_column.min()) / (
198
199
  scores_column.max() - scores_column.min()
199
200
  )
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .config import logger, log_header, set_global_verbosity
6
+ from .console import logger, log_header, set_global_verbosity
7
7
  from .params import Params
8
8
 
9
9
  params = Params()
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/log/config
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/console
3
+ ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import logging
@@ -0,0 +1,18 @@
1
+ """
2
+ risk/log/enrichment
3
+ ~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ import csv
7
+ import json
8
+ import warnings
9
+ from datetime import datetime
10
+ from functools import wraps
11
+ from typing import Any, Dict
12
+
13
+ import numpy as np
14
+
15
+ from .console import logger, log_header
16
+
17
+ # Suppress all warnings - this is to resolve warnings from multiprocessing
18
+ warnings.filterwarnings("ignore")
@@ -12,7 +12,7 @@ from typing import Any, Dict
12
12
 
13
13
  import numpy as np
14
14
 
15
- from .config import logger, log_header
15
+ from .console import logger, log_header
16
16
 
17
17
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
18
  warnings.filterwarnings("ignore")
@@ -20,17 +20,17 @@ from risk.log import logger
20
20
 
21
21
  def define_domains(
22
22
  top_annotations: pd.DataFrame,
23
- significant_neighborhoods_enrichment: np.ndarray,
23
+ significant_neighborhoods_significance: np.ndarray,
24
24
  linkage_criterion: str,
25
25
  linkage_method: str,
26
26
  linkage_metric: str,
27
27
  ) -> pd.DataFrame:
28
- """Define domains and assign nodes to these domains based on their enrichment scores and clustering,
28
+ """Define domains and assign nodes to these domains based on their significance scores and clustering,
29
29
  handling errors by assigning unique domains when clustering fails.
30
30
 
31
31
  Args:
32
32
  top_annotations (pd.DataFrame): DataFrame of top annotations data for the network nodes.
33
- significant_neighborhoods_enrichment (np.ndarray): The binary enrichment matrix below alpha.
33
+ significant_neighborhoods_significance (np.ndarray): The binary significance matrix below alpha.
34
34
  linkage_criterion (str): The clustering criterion for defining groups.
35
35
  linkage_method (str): The linkage method for clustering.
36
36
  linkage_metric (str): The linkage metric for clustering.
@@ -40,7 +40,7 @@ def define_domains(
40
40
  """
41
41
  try:
42
42
  # Transpose the matrix to cluster annotations
43
- m = significant_neighborhoods_enrichment[:, top_annotations["significant_annotations"]].T
43
+ m = significant_neighborhoods_significance[:, top_annotations["significant_annotations"]].T
44
44
  best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
45
45
  m, linkage_criterion, linkage_method, linkage_metric
46
46
  )
@@ -65,13 +65,13 @@ def define_domains(
65
65
  top_annotations["domain"] = range(1, n_rows + 1) # Assign unique domains
66
66
 
67
67
  # Create DataFrames to store domain information
68
- node_to_enrichment = pd.DataFrame(
69
- data=significant_neighborhoods_enrichment,
68
+ node_to_significance = pd.DataFrame(
69
+ data=significant_neighborhoods_significance,
70
70
  columns=[top_annotations.index.values, top_annotations["domain"]],
71
71
  )
72
- node_to_domain = node_to_enrichment.groupby(level="domain", axis=1).sum()
72
+ node_to_domain = node_to_significance.groupby(level="domain", axis=1).sum()
73
73
 
74
- # Find the maximum enrichment score for each node
74
+ # Find the maximum significance score for each node
75
75
  t_max = node_to_domain.loc[:, 1:].max(axis=1)
76
76
  t_idxmax = node_to_domain.loc[:, 1:].idxmax(axis=1)
77
77
  t_idxmax[t_max == 0] = 0
@@ -119,27 +119,27 @@ def trim_domains_and_top_annotations(
119
119
  top_annotations["domain"].replace(to_remove, invalid_domain_id, inplace=True)
120
120
  domains.loc[domains["primary_domain"].isin(to_remove), ["primary_domain"]] = invalid_domain_id
121
121
 
122
- # Normalize "num enriched neighborhoods" by percentile for each domain and scale to 0-10
122
+ # Normalize "num significant neighborhoods" by percentile for each domain and scale to 0-10
123
123
  top_annotations["normalized_value"] = top_annotations.groupby("domain")[
124
- "significant_neighborhood_enrichment_sums"
124
+ "significant_neighborhood_significance_sums"
125
125
  ].transform(lambda x: (x.rank(pct=True) * 10).apply(np.ceil).astype(int))
126
- # Modify the lambda function to pass both full_terms and significant_enrichment_score
126
+ # Modify the lambda function to pass both full_terms and significant_significance_score
127
127
  top_annotations["combined_terms"] = top_annotations.apply(
128
128
  lambda row: " ".join([str(row["full_terms"])] * row["normalized_value"]), axis=1
129
129
  )
130
130
 
131
- # Perform the groupby operation while retaining the other columns and adding the weighting with enrichment scores
131
+ # Perform the groupby operation while retaining the other columns and adding the weighting with significance scores
132
132
  domain_labels = (
133
133
  top_annotations.groupby("domain")
134
134
  .agg(
135
135
  full_terms=("full_terms", lambda x: list(x)),
136
- enrichment_scores=("significant_enrichment_score", lambda x: list(x)),
136
+ significance_scores=("significant_significance_score", lambda x: list(x)),
137
137
  )
138
138
  .reset_index()
139
139
  )
140
140
  domain_labels["combined_terms"] = domain_labels.apply(
141
141
  lambda row: get_weighted_description(
142
- pd.Series(row["full_terms"]), pd.Series(row["enrichment_scores"])
142
+ pd.Series(row["full_terms"]), pd.Series(row["significance_scores"])
143
143
  ),
144
144
  axis=1,
145
145
  )
@@ -150,7 +150,7 @@ def trim_domains_and_top_annotations(
150
150
  "domain": "id",
151
151
  "combined_terms": "normalized_description",
152
152
  "full_terms": "full_descriptions",
153
- "enrichment_scores": "enrichment_scores",
153
+ "significance_scores": "significance_scores",
154
154
  }
155
155
  ).set_index("id")
156
156