risk-network 0.0.12b0__py3-none-any.whl → 0.0.12b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. risk/__init__.py +1 -1
  2. risk/annotations/__init__.py +10 -0
  3. risk/annotations/annotations.py +354 -0
  4. risk/annotations/io.py +241 -0
  5. risk/annotations/nltk_setup.py +86 -0
  6. risk/log/__init__.py +11 -0
  7. risk/log/console.py +141 -0
  8. risk/log/parameters.py +171 -0
  9. risk/neighborhoods/__init__.py +7 -0
  10. risk/neighborhoods/api.py +442 -0
  11. risk/neighborhoods/community.py +441 -0
  12. risk/neighborhoods/domains.py +360 -0
  13. risk/neighborhoods/neighborhoods.py +514 -0
  14. risk/neighborhoods/stats/__init__.py +13 -0
  15. risk/neighborhoods/stats/permutation/__init__.py +6 -0
  16. risk/neighborhoods/stats/permutation/permutation.py +240 -0
  17. risk/neighborhoods/stats/permutation/test_functions.py +70 -0
  18. risk/neighborhoods/stats/tests.py +275 -0
  19. risk/network/__init__.py +4 -0
  20. risk/network/graph/__init__.py +4 -0
  21. risk/network/graph/api.py +200 -0
  22. risk/network/graph/graph.py +268 -0
  23. risk/network/graph/stats.py +166 -0
  24. risk/network/graph/summary.py +253 -0
  25. risk/network/io.py +693 -0
  26. risk/network/plotter/__init__.py +4 -0
  27. risk/network/plotter/api.py +54 -0
  28. risk/network/plotter/canvas.py +291 -0
  29. risk/network/plotter/contour.py +329 -0
  30. risk/network/plotter/labels.py +935 -0
  31. risk/network/plotter/network.py +294 -0
  32. risk/network/plotter/plotter.py +141 -0
  33. risk/network/plotter/utils/colors.py +419 -0
  34. risk/network/plotter/utils/layout.py +94 -0
  35. risk_network-0.0.12b1.dist-info/METADATA +122 -0
  36. risk_network-0.0.12b1.dist-info/RECORD +40 -0
  37. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/WHEEL +1 -1
  38. risk_network-0.0.12b0.dist-info/METADATA +0 -796
  39. risk_network-0.0.12b0.dist-info/RECORD +0 -7
  40. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/licenses/LICENSE +0 -0
  41. {risk_network-0.0.12b0.dist-info → risk_network-0.0.12b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,253 @@
1
+ """
2
+ risk/network/graph/summary
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from typing import Any, Dict, Tuple, Union
7
+
8
+ import numpy as np
9
+ import pandas as pd
10
+ from statsmodels.stats.multitest import fdrcorrection
11
+
12
+ from risk.log.console import log_header, logger
13
+
14
+
15
+ class Summary:
16
+ """Handles the processing, storage, and export of network analysis results.
17
+
18
+ The Results class provides methods to process significance and depletion data, compute
19
+ FDR-corrected q-values, and structure information on domains and annotations into a
20
+ DataFrame. It also offers functionality to export the processed data in CSV, JSON,
21
+ and text formats for analysis and reporting.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ annotations: Dict[str, Any],
27
+ neighborhoods: Dict[str, Any],
28
+ graph, # Avoid type hinting Graph to prevent circular imports
29
+ ):
30
+ """Initialize the Results object with analysis components.
31
+
32
+ Args:
33
+ annotations (Dict[str, Any]): Annotation data, including ordered annotations and matrix of associations.
34
+ neighborhoods (Dict[str, Any]): Neighborhood data containing p-values for significance and depletion analysis.
35
+ graph (Graph): Graph object representing domain-to-node and node-to-label mappings.
36
+ """
37
+ self.annotations = annotations
38
+ self.neighborhoods = neighborhoods
39
+ self.graph = graph
40
+
41
+ def to_csv(self, filepath: str) -> None:
42
+ """Export significance results to a CSV file.
43
+
44
+ Args:
45
+ filepath (str): The path where the CSV file will be saved.
46
+ """
47
+ # Load results and export directly to CSV
48
+ results = self.load()
49
+ results.to_csv(filepath, index=False)
50
+ logger.info(f"Analysis summary exported to CSV file: {filepath}")
51
+
52
+ def to_json(self, filepath: str) -> None:
53
+ """Export significance results to a JSON file.
54
+
55
+ Args:
56
+ filepath (str): The path where the JSON file will be saved.
57
+ """
58
+ # Load results and export directly to JSON
59
+ results = self.load()
60
+ results.to_json(filepath, orient="records", indent=4)
61
+ logger.info(f"Analysis summary exported to JSON file: {filepath}")
62
+
63
+ def to_txt(self, filepath: str) -> None:
64
+ """Export significance results to a text file.
65
+
66
+ Args:
67
+ filepath (str): The path where the text file will be saved.
68
+ """
69
+ # Load results and export directly to text file
70
+ results = self.load()
71
+ with open(filepath, "w", encoding="utf-8") as txt_file:
72
+ txt_file.write(results.to_string(index=False))
73
+
74
+ logger.info(f"Analysis summary exported to text file: {filepath}")
75
+
76
+ def load(self) -> pd.DataFrame:
77
+ """Load and process domain and annotation data into a DataFrame with significance metrics.
78
+
79
+ Returns:
80
+ pd.DataFrame: Processed DataFrame containing significance scores, p-values, q-values,
81
+ and annotation member information.
82
+ """
83
+ log_header("Loading analysis summary")
84
+ # Calculate significance and depletion q-values from p-value matrices in `annotations`
85
+ enrichment_pvals = self.neighborhoods["enrichment_pvals"]
86
+ depletion_pvals = self.neighborhoods["depletion_pvals"]
87
+ enrichment_qvals = self._calculate_qvalues(enrichment_pvals)
88
+ depletion_qvals = self._calculate_qvalues(depletion_pvals)
89
+
90
+ # Initialize DataFrame with domain and annotation details
91
+ results = pd.DataFrame(
92
+ [
93
+ {"Domain ID": domain_id, "Annotation": desc, "Summed Significance Score": score}
94
+ for domain_id, info in self.graph.domain_id_to_domain_info_map.items()
95
+ for desc, score in zip(info["full_descriptions"], info["significance_scores"])
96
+ ]
97
+ )
98
+ # Sort by Domain ID and Summed Significance Score
99
+ results = results.sort_values(
100
+ by=["Domain ID", "Summed Significance Score"], ascending=[True, False]
101
+ ).reset_index(drop=True)
102
+
103
+ # Add minimum p-values and q-values to DataFrame
104
+ results[
105
+ [
106
+ "Enrichment P-Value",
107
+ "Enrichment Q-value",
108
+ "Depletion P-Value",
109
+ "Depletion Q-value",
110
+ ]
111
+ ] = results.apply(
112
+ lambda row: self._get_significance_values(
113
+ row["Domain ID"],
114
+ row["Annotation"],
115
+ enrichment_pvals,
116
+ depletion_pvals,
117
+ enrichment_qvals,
118
+ depletion_qvals,
119
+ ),
120
+ axis=1,
121
+ result_type="expand",
122
+ )
123
+ # Add annotation members and their counts
124
+ results["Annotation Members in Network"] = results["Annotation"].apply(
125
+ lambda desc: self._get_annotation_members(desc)
126
+ )
127
+ results["Annotation Members in Network Count"] = results[
128
+ "Annotation Members in Network"
129
+ ].apply(lambda x: len(x.split(";")) if x else 0)
130
+
131
+ # Reorder columns and drop rows with NaN values
132
+ results = (
133
+ results[
134
+ [
135
+ "Domain ID",
136
+ "Annotation",
137
+ "Annotation Members in Network",
138
+ "Annotation Members in Network Count",
139
+ "Summed Significance Score",
140
+ "Enrichment P-Value",
141
+ "Enrichment Q-value",
142
+ "Depletion P-Value",
143
+ "Depletion Q-value",
144
+ ]
145
+ ]
146
+ .dropna()
147
+ .reset_index(drop=True)
148
+ )
149
+
150
+ # Convert annotations list to a DataFrame for comparison then merge with results
151
+ ordered_annotations = pd.DataFrame({"Annotation": self.annotations["ordered_annotations"]})
152
+ # Merge to ensure all annotations are present, filling missing rows with defaults
153
+ results = pd.merge(ordered_annotations, results, on="Annotation", how="left").fillna(
154
+ {
155
+ "Domain ID": -1,
156
+ "Annotation Members in Network": "",
157
+ "Annotation Members in Network Count": 0,
158
+ "Summed Significance Score": 0.0,
159
+ "Enrichment P-Value": 1.0,
160
+ "Enrichment Q-value": 1.0,
161
+ "Depletion P-Value": 1.0,
162
+ "Depletion Q-value": 1.0,
163
+ }
164
+ )
165
+ # Convert "Domain ID" and "Annotation Members in Network Count" to integers
166
+ results["Domain ID"] = results["Domain ID"].astype(int)
167
+ results["Annotation Members in Network Count"] = results[
168
+ "Annotation Members in Network Count"
169
+ ].astype(int)
170
+
171
+ return results
172
+
173
+ def _calculate_qvalues(self, pvals: np.ndarray) -> np.ndarray:
174
+ """Calculate q-values (FDR) for each row of a p-value matrix.
175
+
176
+ Args:
177
+ pvals (np.ndarray): 2D array of p-values.
178
+
179
+ Returns:
180
+ np.ndarray: 2D array of q-values, with FDR correction applied row-wise.
181
+ """
182
+ return np.apply_along_axis(lambda row: fdrcorrection(row)[1], 1, pvals)
183
+
184
+ def _get_significance_values(
185
+ self,
186
+ domain_id: int,
187
+ description: str,
188
+ enrichment_pvals: np.ndarray,
189
+ depletion_pvals: np.ndarray,
190
+ enrichment_qvals: np.ndarray,
191
+ depletion_qvals: np.ndarray,
192
+ ) -> Tuple[Union[float, None], Union[float, None], Union[float, None], Union[float, None]]:
193
+ """Retrieve the most significant p-values and q-values (FDR) for a given annotation.
194
+
195
+ Args:
196
+ domain_id (int): The domain ID associated with the annotation.
197
+ description (str): The annotation description.
198
+ enrichment_pvals (np.ndarray): Matrix of significance p-values.
199
+ depletion_pvals (np.ndarray): Matrix of depletion p-values.
200
+ enrichment_qvals (np.ndarray): Matrix of significance q-values.
201
+ depletion_qvals (np.ndarray): Matrix of depletion q-values.
202
+
203
+ Returns:
204
+ Tuple[Union[float, None], Union[float, None], Union[float, None], Union[float, None]]:
205
+ Minimum significance p-value, significance q-value, depletion p-value, depletion q-value.
206
+ """
207
+ try:
208
+ annotation_idx = self.annotations["ordered_annotations"].index(description)
209
+ except ValueError:
210
+ return None, None, None, None # Description not found
211
+
212
+ node_indices = self.graph.domain_id_to_node_ids_map.get(domain_id, [])
213
+ if not node_indices:
214
+ return None, None, None, None # No associated nodes
215
+
216
+ sig_p = enrichment_pvals[node_indices, annotation_idx]
217
+ dep_p = depletion_pvals[node_indices, annotation_idx]
218
+ sig_q = enrichment_qvals[node_indices, annotation_idx]
219
+ dep_q = depletion_qvals[node_indices, annotation_idx]
220
+
221
+ return (
222
+ np.min(sig_p) if sig_p.size > 0 else None,
223
+ np.min(sig_q) if sig_q.size > 0 else None,
224
+ np.min(dep_p) if dep_p.size > 0 else None,
225
+ np.min(dep_q) if dep_q.size > 0 else None,
226
+ )
227
+
228
+ def _get_annotation_members(self, description: str) -> str:
229
+ """Retrieve node labels associated with a given annotation description.
230
+
231
+ Args:
232
+ description (str): The annotation description.
233
+
234
+ Returns:
235
+ str: ';'-separated string of node labels that are associated with the annotation.
236
+ """
237
+ try:
238
+ annotation_idx = self.annotations["ordered_annotations"].index(description)
239
+ except ValueError:
240
+ return "" # Description not found
241
+
242
+ # Get the column (safely) from the sparse matrix
243
+ column = self.annotations["matrix"][:, annotation_idx]
244
+ # Convert the column to a dense array if needed
245
+ column = column.toarray().ravel() # Convert to a 1D dense array
246
+ # Get nodes present for the annotation and sort by node label - use np.where on the dense array
247
+ nodes_present = np.where(column == 1)[0]
248
+ node_labels = sorted(
249
+ self.graph.node_id_to_node_label_map[node_id]
250
+ for node_id in nodes_present
251
+ if node_id in self.graph.node_id_to_node_label_map
252
+ )
253
+ return ";".join(node_labels)