risk-network 0.0.7b0__py3-none-any.whl → 0.0.7b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.7-beta.0"
10
+ __version__ = "0.0.7-beta.1"
@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Tuple
9
9
  import networkx as nx
10
10
  import numpy as np
11
11
  from sklearn.exceptions import DataConversionWarning
12
+ from sklearn.metrics.pairwise import cosine_similarity
12
13
 
13
14
  from risk.neighborhoods.community import (
14
15
  calculate_dijkstra_neighborhoods,
@@ -93,7 +94,7 @@ def _create_percentile_limited_subgraph(G: nx.Graph, edge_length_percentile: flo
93
94
  def process_neighborhoods(
94
95
  network: nx.Graph,
95
96
  neighborhoods: Dict[str, Any],
96
- impute_depth: int = 1,
97
+ impute_depth: int = 0,
97
98
  prune_threshold: float = 0.0,
98
99
  ) -> Dict[str, Any]:
99
100
  """Process neighborhoods based on the imputation and pruning settings.
@@ -101,7 +102,7 @@ def process_neighborhoods(
101
102
  Args:
102
103
  network (nx.Graph): The network data structure used for imputing and pruning neighbors.
103
104
  neighborhoods (dict): Dictionary containing 'enrichment_matrix', 'binary_enrichment_matrix', and 'significant_enrichment_matrix'.
104
- impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
105
+ impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
105
106
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
106
107
 
107
108
  Returns:
@@ -167,55 +168,135 @@ def _impute_neighbors(
167
168
  - np.ndarray: The imputed alpha threshold matrix.
168
169
  - np.ndarray: The significant enrichment matrix with non-significant entries set to zero.
169
170
  """
170
- # Calculate shortest distances for each node to determine the distance threshold
171
- shortest_distances = []
172
- for node in network.nodes():
173
- try:
174
- neighbors = [
175
- n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0
176
- ]
177
- except IndexError as e:
178
- raise IndexError(
179
- f"Failed to find neighbors for node '{node}': Ensure that the node exists in the network and that the binary enrichment matrix is correctly indexed."
180
- ) from e
181
-
182
- # Calculate the shortest distance to a neighbor
183
- if neighbors:
184
- shortest_distance = min([_get_euclidean_distance(node, n, network) for n in neighbors])
185
- shortest_distances.append(shortest_distance)
171
+ # Calculate the distance threshold value based on the shortest distances
172
+ enrichment_matrix, binary_enrichment_matrix = _impute_neighbors_with_similarity(
173
+ network, enrichment_matrix, binary_enrichment_matrix, max_depth=max_depth
174
+ )
175
+ # Create a matrix where non-significant entries are set to zero
176
+ significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
177
+
178
+ return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
179
+
180
+
181
+ def _impute_neighbors_with_similarity(
182
+ network: nx.Graph,
183
+ enrichment_matrix: np.ndarray,
184
+ binary_enrichment_matrix: np.ndarray,
185
+ max_depth: int = 3,
186
+ ) -> Tuple[np.ndarray, np.ndarray]:
187
+ """Impute non-enriched nodes based on the closest enriched neighbors' profiles and their similarity.
186
188
 
189
+ Args:
190
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
191
+ enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
192
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
193
+ max_depth (int): Maximum depth of nodes to traverse for imputing values.
194
+
195
+ Returns:
196
+ Tuple[np.ndarray, np.ndarray]: A tuple containing:
197
+ - The imputed enrichment matrix.
198
+ - The imputed alpha threshold matrix.
199
+ """
187
200
  depth = 1
188
201
  rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
189
202
  while len(rows_to_impute) and depth <= max_depth:
190
203
  next_rows_to_impute = []
191
- for row_index in rows_to_impute:
192
- neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
193
- valid_neighbors = [
194
- n
195
- for n in neighbors
196
- if n != row_index
197
- and binary_enrichment_matrix[n].sum() != 0
198
- and enrichment_matrix[n].sum() != 0
199
- ]
200
- if valid_neighbors:
201
- closest_neighbor = min(
202
- valid_neighbors, key=lambda n: _get_euclidean_distance(row_index, n, network)
204
+ # Iterate over all enriched nodes
205
+ for row_index in range(binary_enrichment_matrix.shape[0]):
206
+ if binary_enrichment_matrix[row_index].sum() != 0:
207
+ enrichment_matrix, binary_enrichment_matrix = _process_node_imputation(
208
+ row_index, network, enrichment_matrix, binary_enrichment_matrix, depth
203
209
  )
204
- # Impute the row with the closest valid neighbor's data
205
- enrichment_matrix[row_index] = enrichment_matrix[closest_neighbor]
206
- binary_enrichment_matrix[row_index] = binary_enrichment_matrix[
207
- closest_neighbor
208
- ] / np.sqrt(depth + 1)
209
- else:
210
- next_rows_to_impute.append(row_index)
211
210
 
212
- rows_to_impute = next_rows_to_impute
211
+ # Update rows to impute for the next iteration
212
+ rows_to_impute = np.where(binary_enrichment_matrix.sum(axis=1) == 0)[0]
213
213
  depth += 1
214
214
 
215
- # Create a matrix where non-significant entries are set to zero
216
- significant_enrichment_matrix = np.where(binary_enrichment_matrix == 1, enrichment_matrix, 0)
215
+ return enrichment_matrix, binary_enrichment_matrix
217
216
 
218
- return enrichment_matrix, binary_enrichment_matrix, significant_enrichment_matrix
217
+
218
+ def _process_node_imputation(
219
+ row_index: int,
220
+ network: nx.Graph,
221
+ enrichment_matrix: np.ndarray,
222
+ binary_enrichment_matrix: np.ndarray,
223
+ depth: int,
224
+ ) -> Tuple[np.ndarray, np.ndarray]:
225
+ """Process the imputation for a single node based on its enriched neighbors.
226
+
227
+ Args:
228
+ row_index (int): The index of the enriched node being processed.
229
+ network (nx.Graph): The network graph with nodes having IDs matching the matrix indices.
230
+ enrichment_matrix (np.ndarray): The enrichment matrix with rows to be imputed.
231
+ binary_enrichment_matrix (np.ndarray): The alpha threshold matrix to be imputed similarly.
232
+ depth (int): Current depth for traversal.
233
+
234
+ Returns:
235
+ Tuple[np.ndarray, np.ndarray]: The modified enrichment matrix and binary threshold matrix.
236
+ """
237
+ # Check neighbors at the current depth
238
+ neighbors = nx.single_source_shortest_path_length(network, row_index, cutoff=depth)
239
+ # Filter annotated neighbors (already enriched)
240
+ annotated_neighbors = [
241
+ n
242
+ for n in neighbors
243
+ if n != row_index
244
+ and binary_enrichment_matrix[n].sum() != 0
245
+ and enrichment_matrix[n].sum() != 0
246
+ ]
247
+ # Filter non-enriched neighbors
248
+ valid_neighbors = [
249
+ n
250
+ for n in neighbors
251
+ if n != row_index
252
+ and binary_enrichment_matrix[n].sum() == 0
253
+ and enrichment_matrix[n].sum() == 0
254
+ ]
255
+ # If there are valid non-enriched neighbors
256
+ if valid_neighbors and annotated_neighbors:
257
+ # Calculate distances to annotated neighbors
258
+ distances_to_annotated = [
259
+ _get_euclidean_distance(row_index, n, network) for n in annotated_neighbors
260
+ ]
261
+ # Calculate the IQR to identify outliers
262
+ q1, q3 = np.percentile(distances_to_annotated, [25, 75])
263
+ iqr = q3 - q1
264
+ lower_bound = q1 - 1.5 * iqr
265
+ upper_bound = q3 + 1.5 * iqr
266
+ # Filter valid non-enriched neighbors that fall within the IQR bounds
267
+ valid_neighbors_within_iqr = [
268
+ n
269
+ for n in valid_neighbors
270
+ if lower_bound <= _get_euclidean_distance(row_index, n, network) <= upper_bound
271
+ ]
272
+ # If there are any valid neighbors within the IQR
273
+ if valid_neighbors_within_iqr:
274
+ # If more than one valid neighbor is within the IQR, compute pairwise cosine similarities
275
+ if len(valid_neighbors_within_iqr) > 1:
276
+ # Find the most similar neighbor based on pairwise cosine similarities
277
+ def sum_pairwise_cosine_similarities(neighbor):
278
+ return sum(
279
+ cosine_similarity(
280
+ enrichment_matrix[neighbor].reshape(1, -1),
281
+ enrichment_matrix[other_neighbor].reshape(1, -1),
282
+ )[0][0]
283
+ for other_neighbor in valid_neighbors_within_iqr
284
+ if other_neighbor != neighbor
285
+ )
286
+
287
+ most_similar_neighbor = max(
288
+ valid_neighbors_within_iqr, key=sum_pairwise_cosine_similarities
289
+ )
290
+ else:
291
+ most_similar_neighbor = valid_neighbors_within_iqr[0]
292
+
293
+ # Impute the most similar non-enriched neighbor with the enriched node's data, scaled by depth
294
+ enrichment_matrix[most_similar_neighbor] = enrichment_matrix[row_index] / np.sqrt(
295
+ depth + 1
296
+ )
297
+ binary_enrichment_matrix[most_similar_neighbor] = binary_enrichment_matrix[row_index]
298
+
299
+ return enrichment_matrix, binary_enrichment_matrix
219
300
 
220
301
 
221
302
  def _prune_neighbors(
@@ -240,27 +321,27 @@ def _prune_neighbors(
240
321
  """
241
322
  # Identify indices with non-zero rows in the binary enrichment matrix
242
323
  non_zero_indices = np.where(binary_enrichment_matrix.sum(axis=1) != 0)[0]
243
- average_distances = []
324
+ median_distances = []
244
325
  for node in non_zero_indices:
245
326
  neighbors = [n for n in network.neighbors(node) if binary_enrichment_matrix[n].sum() != 0]
246
327
  if neighbors:
247
- average_distance = np.mean(
328
+ median_distance = np.median(
248
329
  [_get_euclidean_distance(node, n, network) for n in neighbors]
249
330
  )
250
- average_distances.append(average_distance)
331
+ median_distances.append(median_distance)
251
332
 
252
333
  # Calculate the distance threshold value based on rank
253
- distance_threshold_value = _calculate_threshold(average_distances, 1 - distance_threshold)
334
+ distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
254
335
  # Prune nodes that are outliers based on the distance threshold
255
336
  for row_index in non_zero_indices:
256
337
  neighbors = [
257
338
  n for n in network.neighbors(row_index) if binary_enrichment_matrix[n].sum() != 0
258
339
  ]
259
340
  if neighbors:
260
- average_distance = np.mean(
341
+ median_distance = np.median(
261
342
  [_get_euclidean_distance(row_index, n, network) for n in neighbors]
262
343
  )
263
- if average_distance >= distance_threshold_value:
344
+ if median_distance >= distance_threshold_value:
264
345
  enrichment_matrix[row_index] = 0
265
346
  binary_enrichment_matrix[row_index] = 0
266
347
 
@@ -305,18 +386,18 @@ def _get_node_position(network: nx.Graph, node: Any) -> np.ndarray:
305
386
  )
306
387
 
307
388
 
308
- def _calculate_threshold(average_distances: List, distance_threshold: float) -> float:
309
- """Calculate the distance threshold based on the given average distances and a percentile threshold.
389
+ def _calculate_threshold(median_distances: List, distance_threshold: float) -> float:
390
+ """Calculate the distance threshold based on the given median distances and a percentile threshold.
310
391
 
311
392
  Args:
312
- average_distances (list): An array of average distances.
393
+ median_distances (list): An array of median distances.
313
394
  distance_threshold (float): A percentile threshold (0 to 1) used to determine the distance cutoff.
314
395
 
315
396
  Returns:
316
397
  float: The calculated distance threshold value.
317
398
  """
318
- # Sort the average distances
319
- sorted_distances = np.sort(average_distances)
399
+ # Sort the median distances
400
+ sorted_distances = np.sort(median_distances)
320
401
  # Compute the rank percentiles for the sorted distances
321
402
  rank_percentiles = np.linspace(0, 1, len(sorted_distances))
322
403
  # Interpolating the ranks to 1000 evenly spaced percentiles
risk/risk.py CHANGED
@@ -237,7 +237,7 @@ class RISK(NetworkIO, AnnotationsIO):
237
237
  tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
238
238
  pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
239
239
  fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
240
- impute_depth: int = 1,
240
+ impute_depth: int = 0,
241
241
  prune_threshold: float = 0.0,
242
242
  linkage_criterion: str = "distance",
243
243
  linkage_method: str = "average",
@@ -254,7 +254,7 @@ class RISK(NetworkIO, AnnotationsIO):
254
254
  tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
255
255
  pval_cutoff (float, optional): p-value cutoff for significance. Defaults to 0.01.
256
256
  fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
257
- impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
257
+ impute_depth (int, optional): Depth for imputing neighbors. Defaults to 0.
258
258
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
259
259
  linkage_criterion (str, optional): Clustering criterion for defining domains. Defaults to "distance".
260
260
  linkage_method (str, optional): Clustering method to use. Defaults to "average".
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b0
3
+ Version: 0.0.7b1
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -1,6 +1,6 @@
1
- risk/__init__.py,sha256=Qyktssx5ZswUpqcPtSMq9Zn-zzJXl2fka6MqbHS-JxQ,112
1
+ risk/__init__.py,sha256=kKRKe-z54BZkkomARTvXCfcVgS-KX50Kgryn6By_kdc,112
2
2
  risk/constants.py,sha256=XInRaH78Slnw_sWgAsBFbUHkyA0h0jL0DKGuQNbOvjM,550
3
- risk/risk.py,sha256=CKDIzVo9Jvl-fgzIlk5ZtJL9pIBMma24WK6EYdVu5po,20648
3
+ risk/risk.py,sha256=jhfOv60iZdOssCF35tAjJ_br9w8aIqPFT2owVTehgtA,20648
4
4
  risk/annotations/__init__.py,sha256=vUpVvMRE5if01Ic8QY6M2Ae3EFGJHdugEe9PdEkAW4Y,138
5
5
  risk/annotations/annotations.py,sha256=K7cUA6vYTKYAvj0xHqrAwNEYtmPq4H7LDYENAOVQdQ0,11014
6
6
  risk/annotations/io.py,sha256=lo7NKqOVkeeBp58JBxWJHtA0xjL5Yoxqe9Ox0daKlZk,9457
@@ -10,7 +10,7 @@ risk/log/params.py,sha256=Rfdg5UcGCrG80m6V79FyORERWUqIzHFO7tGiY4zAImM,6347
10
10
  risk/neighborhoods/__init__.py,sha256=tKKEg4lsbqFukpgYlUGxU_v_9FOqK7V0uvM9T2QzoL0,206
11
11
  risk/neighborhoods/community.py,sha256=7ebo1Q5KokSQISnxZIh2SQxsKXdXm8aVkp-h_DiQ3K0,6818
12
12
  risk/neighborhoods/domains.py,sha256=bxJUxqFTynzX0mf3E8-AA4_Rfccje1reeVVhfzb1-pE,10672
13
- risk/neighborhoods/neighborhoods.py,sha256=sHmjFFl2U5qV9YbQCRbpbI36j7dS7IFfFwwRb1_-AuM,13945
13
+ risk/neighborhoods/neighborhoods.py,sha256=cEk4gDvIkBky5POZhtHnO78iV-NXu4BeV-e5XdhYOkM,17508
14
14
  risk/network/__init__.py,sha256=iEPeJdZfqp0toxtbElryB8jbz9_t_k4QQ3iDvKE8C_0,126
15
15
  risk/network/geometry.py,sha256=H1yGVVqgbfpzBzJwEheDLfvGLSA284jGQQTn612L4Vc,6759
16
16
  risk/network/graph.py,sha256=7haHu4M3fleqbrIzs6HC9jnKizSERzmmAYSmUwdoSXA,13953
@@ -23,8 +23,8 @@ risk/stats/stats.py,sha256=kvShov-94W6ffgDUTb522vB9hDJQSyTsYif_UIaFfSM,7059
23
23
  risk/stats/permutation/__init__.py,sha256=neJp7FENC-zg_CGOXqv-iIvz1r5XUKI9Ruxhmq7kDOI,105
24
24
  risk/stats/permutation/permutation.py,sha256=qLWdwxEY6nmkYPxpM8HLDcd2mbqYv9Qr7CKtJvhLqIM,9220
25
25
  risk/stats/permutation/test_functions.py,sha256=HuDIM-V1jkkfE1rlaIqrWWBSKZt3dQ1f-YEDjWpnLSE,2343
26
- risk_network-0.0.7b0.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
27
- risk_network-0.0.7b0.dist-info/METADATA,sha256=Yokjvu7qlqWV6F_qJQ9O6TIwKw_9XpD_2qgwQHyimRY,43142
28
- risk_network-0.0.7b0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
29
- risk_network-0.0.7b0.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
30
- risk_network-0.0.7b0.dist-info/RECORD,,
26
+ risk_network-0.0.7b1.dist-info/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
27
+ risk_network-0.0.7b1.dist-info/METADATA,sha256=I0cAqenkwnGxhVcAkX_ipuB3rvmHV4OcR9S7tjOdaC8,43142
28
+ risk_network-0.0.7b1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
29
+ risk_network-0.0.7b1.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
30
+ risk_network-0.0.7b1.dist-info/RECORD,,