risk-network 0.0.13b5__py3-none-any.whl → 0.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
risk/__init__.py CHANGED
@@ -5,7 +5,7 @@ risk
5
5
  RISK: Regional Inference of Significant Kinships
6
6
  """
7
7
 
8
- from .risk import RISK
8
+ from ._risk import RISK
9
9
 
10
10
  __all__ = ["RISK"]
11
- __version__ = "0.0.13-beta.5"
11
+ __version__ = "0.0.14"
@@ -7,4 +7,4 @@ from ._annotation import (
7
7
  define_top_annotation,
8
8
  get_weighted_description,
9
9
  )
10
- from ._io import AnnotationIO
10
+ from ._io import AnnotationHandler
risk/_annotation/_io.py CHANGED
@@ -13,11 +13,11 @@ from .._log import log_header, logger, params
13
13
  from ._annotation import load_annotation
14
14
 
15
15
 
16
- class AnnotationIO:
16
+ class AnnotationHandler:
17
17
  """
18
18
  Handles the loading and exporting of annotation in various file formats.
19
19
 
20
- The AnnotationIO class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
20
+ The AnnotationHandler class provides methods to load annotation from different file types (JSON, CSV, Excel, etc.)
21
21
  and to export parameter data to various formats like JSON, CSV, and text files.
22
22
  """
23
23
 
@@ -29,9 +29,6 @@ class NeighborhoodsAPI:
29
29
  The NeighborhoodsAPI class provides methods to load neighborhood results from statistical tests.
30
30
  """
31
31
 
32
- def __init__(self) -> None:
33
- pass
34
-
35
32
  def load_neighborhoods_binom(
36
33
  self,
37
34
  network: nx.Graph,
@@ -54,37 +54,48 @@ def define_domains(
54
54
  Raises:
55
55
  ValueError: If the clustering criterion is set to "off" or if an error occurs during clustering.
56
56
  """
57
- try:
58
- if linkage_criterion == "off":
59
- raise ValueError("Clustering is turned off.")
57
+ # Validate args first; let user mistakes raise immediately
58
+ clustering_off = _validate_clustering_args(
59
+ linkage_criterion, linkage_method, linkage_metric, linkage_threshold
60
+ )
60
61
 
62
+ # If clustering is turned off, assign unique domains and skip
63
+ if clustering_off:
64
+ n_rows = len(top_annotation)
65
+ logger.warning("Clustering is turned off. Skipping clustering.")
66
+ top_annotation["domain"] = range(1, n_rows + 1)
67
+ else:
61
68
  # Transpose the matrix to cluster annotations
62
69
  m = significant_neighborhoods_significance[:, top_annotation["significant_annotation"]].T
63
70
  # Safeguard the matrix by replacing NaN, Inf, and -Inf values
64
71
  m = _safeguard_matrix(m)
65
- # Optimize silhouette score across different linkage methods and distance metrics
66
- best_linkage, best_metric, best_threshold = _optimize_silhouette_across_linkage_and_metrics(
67
- m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
68
- )
69
- # Perform hierarchical clustering
70
- Z = linkage(m, method=best_linkage, metric=best_metric)
71
- logger.warning(
72
- f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
73
- )
74
- # Calculate the optimal threshold for clustering
75
- max_d_optimal = np.max(Z[:, 2]) * best_threshold
76
- # Assign domains to the annotation matrix
77
- domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
78
- top_annotation["domain"] = 0
79
- top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
80
- except (ValueError, LinAlgError):
81
- # If a ValueError is encountered, handle it by assigning unique domains
82
- n_rows = len(top_annotation)
83
- if linkage_criterion == "off":
84
- logger.warning("Clustering is turned off. Skipping clustering.")
85
- else:
86
- logger.error("Error encountered. Skipping clustering.")
87
- top_annotation["domain"] = range(1, n_rows + 1) # Assign unique domains
72
+ try:
73
+ # Optimize silhouette score across different linkage methods and distance metrics
74
+ (
75
+ best_linkage,
76
+ best_metric,
77
+ best_threshold,
78
+ ) = _optimize_silhouette_across_linkage_and_metrics(
79
+ m, linkage_criterion, linkage_method, linkage_metric, linkage_threshold
80
+ )
81
+ # Perform hierarchical clustering
82
+ Z = linkage(m, method=best_linkage, metric=best_metric)
83
+ logger.warning(
84
+ f"Linkage criterion: '{linkage_criterion}'\nLinkage method: '{best_linkage}'\nLinkage metric: '{best_metric}'\nLinkage threshold: {round(best_threshold, 3)}"
85
+ )
86
+ # Calculate the optimal threshold for clustering
87
+ max_d_optimal = np.max(Z[:, 2]) * best_threshold
88
+ # Assign domains to the annotation matrix
89
+ domains = fcluster(Z, max_d_optimal, criterion=linkage_criterion)
90
+ top_annotation["domain"] = 0
91
+ top_annotation.loc[top_annotation["significant_annotation"], "domain"] = domains
92
+ except (LinAlgError, ValueError):
93
+ # Numerical errors or degenerate input are handled gracefully (not user error)
94
+ n_rows = len(top_annotation)
95
+ logger.error(
96
+ "Clustering failed due to numerical or data degeneracy. Assigning unique domains."
97
+ )
98
+ top_annotation["domain"] = range(1, n_rows + 1)
88
99
 
89
100
  # Create DataFrames to store domain information
90
101
  node_to_significance = pd.DataFrame(
@@ -184,6 +195,46 @@ def trim_domains(
184
195
  return valid_domains, valid_trimmed_domains_matrix
185
196
 
186
197
 
198
+ def _validate_clustering_args(
199
+ linkage_criterion: str,
200
+ linkage_method: str,
201
+ linkage_metric: str,
202
+ linkage_threshold: Union[float, str],
203
+ ) -> bool:
204
+ """
205
+ Validate user-provided clustering arguments.
206
+
207
+ Returns:
208
+ bool: True if clustering is turned off (criterion == 'off'); False otherwise.
209
+
210
+ Raises:
211
+ ValueError: If any argument is invalid (user error).
212
+ """
213
+ # Allow opting out of clustering without raising
214
+ if linkage_criterion == "off":
215
+ return True
216
+ # Validate linkage method (allow "auto")
217
+ if linkage_method != "auto" and linkage_method not in LINKAGE_METHODS:
218
+ raise ValueError(
219
+ f"Invalid linkage_method '{linkage_method}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METHODS)}"
220
+ )
221
+ # Validate linkage metric (allow "auto")
222
+ if linkage_metric != "auto" and linkage_metric not in LINKAGE_METRICS:
223
+ raise ValueError(
224
+ f"Invalid linkage_metric '{linkage_metric}'. Allowed values are 'auto' or one of: {sorted(LINKAGE_METRICS)}"
225
+ )
226
+ # Validate linkage threshold (allow "auto"; otherwise must be float in (0, 1])
227
+ if linkage_threshold != "auto":
228
+ try:
229
+ lt = float(linkage_threshold)
230
+ except (TypeError, ValueError):
231
+ raise ValueError("linkage_threshold must be 'auto' or a float in the interval (0, 1].")
232
+ if not (0.0 < lt <= 1.0):
233
+ raise ValueError(f"linkage_threshold must be within (0, 1]. Received: {lt}")
234
+
235
+ return False
236
+
237
+
187
238
  def _safeguard_matrix(matrix: np.ndarray) -> np.ndarray:
188
239
  """
189
240
  Safeguard the matrix by replacing NaN, Inf, and -Inf values.
@@ -394,34 +394,33 @@ def _prune_neighbors(
394
394
  # Identify indices with non-zero rows in the binary significance matrix
395
395
  non_zero_indices = np.where(significant_binary_significance_matrix.sum(axis=1) != 0)[0]
396
396
  median_distances = []
397
+ distance_lookup = {}
397
398
  for node in non_zero_indices:
398
- neighbors = [
399
- n
400
- for n in network.neighbors(node)
401
- if significant_binary_significance_matrix[n].sum() != 0
402
- ]
403
- if neighbors:
404
- median_distance = np.median(
405
- [_get_euclidean_distance(node, n, network) for n in neighbors]
406
- )
407
- median_distances.append(median_distance)
399
+ dist = _median_distance_to_significant_neighbors(
400
+ node, network, significant_binary_significance_matrix
401
+ )
402
+ if dist is not None:
403
+ median_distances.append(dist)
404
+ distance_lookup[node] = dist
405
+
406
+ if not median_distances:
407
+ logger.warning("No significant neighbors found for pruning.")
408
+ significant_significance_matrix = np.where(
409
+ significant_binary_significance_matrix == 1, significance_matrix, 0
410
+ )
411
+ return (
412
+ significance_matrix,
413
+ significant_binary_significance_matrix,
414
+ significant_significance_matrix,
415
+ )
408
416
 
409
417
  # Calculate the distance threshold value based on rank
410
418
  distance_threshold_value = _calculate_threshold(median_distances, 1 - distance_threshold)
411
419
  # Prune nodes that are outliers based on the distance threshold
412
- for row_index in non_zero_indices:
413
- neighbors = [
414
- n
415
- for n in network.neighbors(row_index)
416
- if significant_binary_significance_matrix[n].sum() != 0
417
- ]
418
- if neighbors:
419
- median_distance = np.median(
420
- [_get_euclidean_distance(row_index, n, network) for n in neighbors]
421
- )
422
- if median_distance >= distance_threshold_value:
423
- significance_matrix[row_index] = 0
424
- significant_binary_significance_matrix[row_index] = 0
420
+ for node, dist in distance_lookup.items():
421
+ if dist >= distance_threshold_value:
422
+ significance_matrix[node] = 0
423
+ significant_binary_significance_matrix[node] = 0
425
424
 
426
425
  # Create a matrix where non-significant entries are set to zero
427
426
  significant_significance_matrix = np.where(
@@ -435,6 +434,29 @@ def _prune_neighbors(
435
434
  )
436
435
 
437
436
 
437
+ def _median_distance_to_significant_neighbors(
438
+ node, network, significance_mask
439
+ ) -> Union[float, None]:
440
+ """
441
+ Calculate the median distance from a node to its significant neighbors.
442
+
443
+ Args:
444
+ node (Any): The node for which the median distance is being calculated.
445
+ network (nx.Graph): The network graph containing the nodes.
446
+ significance_mask (np.ndarray): Binary matrix indicating significant nodes.
447
+
448
+ Returns:
449
+ Union[float, None]: The median distance to significant neighbors, or None if no significant neighbors exist.
450
+ """
451
+ neighbors = [n for n in network.neighbors(node) if significance_mask[n].sum() != 0]
452
+ if not neighbors:
453
+ return None
454
+ # Calculate distances to significant neighbors
455
+ distances = [_get_euclidean_distance(node, n, network) for n in neighbors]
456
+
457
+ return np.median(distances)
458
+
459
+
438
460
  def _get_euclidean_distance(node1: Any, node2: Any, network: nx.Graph) -> float:
439
461
  """
440
462
  Calculate the Euclidean distance between two nodes in the network.
risk/_network/__init__.py CHANGED
@@ -4,5 +4,5 @@ risk/_network
4
4
  """
5
5
 
6
6
  from ._graph import GraphAPI
7
- from ._io import NetworkIO
7
+ from ._io import NetworkAPI
8
8
  from ._plotter import PlotterAPI
@@ -27,9 +27,6 @@ class GraphAPI:
27
27
  The GraphAPI class provides methods to load and process network graphs, annotations, and neighborhoods.
28
28
  """
29
29
 
30
- def __init__(self) -> None:
31
- pass
32
-
33
30
  def load_graph(
34
31
  self,
35
32
  network: nx.Graph,
@@ -84,7 +84,7 @@ class Summary:
84
84
 
85
85
  Returns:
86
86
  pd.DataFrame: Processed DataFrame containing significance scores, p-values, q-values,
87
- and annotation member information.
87
+ and matched annotation members information.
88
88
  """
89
89
  log_header("Loading analysis summary")
90
90
  # Calculate significance and depletion q-values from p-value matrices in annotation
@@ -109,9 +109,9 @@ class Summary:
109
109
  # Add minimum p-values and q-values to DataFrame
110
110
  results[
111
111
  [
112
- "Enrichment P-Value",
112
+ "Enrichment P-value",
113
113
  "Enrichment Q-value",
114
- "Depletion P-Value",
114
+ "Depletion P-value",
115
115
  "Depletion Q-value",
116
116
  ]
117
117
  ] = results.apply(
@@ -126,13 +126,13 @@ class Summary:
126
126
  axis=1,
127
127
  result_type="expand",
128
128
  )
129
- # Add annotation members and their counts
130
- results["Annotation Members in Network"] = results["Annotation"].apply(
129
+ # Add matched annotation members and their counts
130
+ results["Matched Members"] = results["Annotation"].apply(
131
131
  lambda desc: self._get_annotation_members(desc)
132
132
  )
133
- results["Annotation Members in Network Count"] = results[
134
- "Annotation Members in Network"
135
- ].apply(lambda x: len(x.split(";")) if x else 0)
133
+ results["Matched Count"] = results["Matched Members"].apply(
134
+ lambda x: len(x.split(";")) if x else 0
135
+ )
136
136
 
137
137
  # Reorder columns and drop rows with NaN values
138
138
  results = (
@@ -140,12 +140,12 @@ class Summary:
140
140
  [
141
141
  "Domain ID",
142
142
  "Annotation",
143
- "Annotation Members in Network",
144
- "Annotation Members in Network Count",
143
+ "Matched Members",
144
+ "Matched Count",
145
145
  "Summed Significance Score",
146
- "Enrichment P-Value",
146
+ "Enrichment P-value",
147
147
  "Enrichment Q-value",
148
- "Depletion P-Value",
148
+ "Depletion P-value",
149
149
  "Depletion Q-value",
150
150
  ]
151
151
  ]
@@ -159,20 +159,18 @@ class Summary:
159
159
  results = pd.merge(ordered_annotation, results, on="Annotation", how="left").fillna(
160
160
  {
161
161
  "Domain ID": -1,
162
- "Annotation Members in Network": "",
163
- "Annotation Members in Network Count": 0,
162
+ "Matched Members": "",
163
+ "Matched Count": 0,
164
164
  "Summed Significance Score": 0.0,
165
- "Enrichment P-Value": 1.0,
165
+ "Enrichment P-value": 1.0,
166
166
  "Enrichment Q-value": 1.0,
167
- "Depletion P-Value": 1.0,
167
+ "Depletion P-value": 1.0,
168
168
  "Depletion Q-value": 1.0,
169
169
  }
170
170
  )
171
- # Convert "Domain ID" and "Annotation Members in Network Count" to integers
171
+ # Convert "Domain ID" and "Matched Count" to integers
172
172
  results["Domain ID"] = results["Domain ID"].astype(int)
173
- results["Annotation Members in Network Count"] = results[
174
- "Annotation Members in Network Count"
175
- ].astype(int)
173
+ results["Matched Count"] = results["Matched Count"].astype(int)
176
174
 
177
175
  return results
178
176
 
risk/_network/_io.py CHANGED
@@ -18,114 +18,188 @@ import pandas as pd
18
18
  from .._log import log_header, logger, params
19
19
 
20
20
 
21
- class NetworkIO:
21
+ class NetworkAPI:
22
22
  """
23
- A class for loading, processing, and managing network data.
24
-
25
- The NetworkIO class provides methods to load network data from various formats (e.g., GPickle, NetworkX)
26
- and process the network by adjusting node coordinates, calculating edge lengths, and validating graph structure.
23
+ Public-facing interface for loading and initializing network data.
24
+ Delegates to the NetworkIO worker class for actual I/O and processing.
27
25
  """
28
26
 
29
- def __init__(
27
+ def load_network_gpickle(
30
28
  self,
29
+ filepath: str,
31
30
  compute_sphere: bool = True,
32
31
  surface_depth: float = 0.0,
33
32
  min_edges_per_node: int = 0,
34
- ):
33
+ ) -> nx.Graph:
35
34
  """
36
- Initialize the NetworkIO class.
35
+ Load a network from a GPickle file via NetworkIO.
37
36
 
38
37
  Args:
39
- compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
40
- surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
41
- min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
38
+ filepath (str): Path to the GPickle file.
39
+ compute_sphere (bool, optional): Override or use API default. Defaults to True.
40
+ surface_depth (float, optional): Override or use API default. Defaults to 0.0.
41
+ min_edges_per_node (int, optional): Override or use API default. Defaults to 0.
42
+ Returns:
43
+ nx.Graph: Loaded and processed network.
42
44
  """
43
- self.compute_sphere = compute_sphere
44
- self.surface_depth = surface_depth
45
- self.min_edges_per_node = min_edges_per_node
46
- # Log the initialization of the NetworkIO class
47
- params.log_network(
45
+ io = NetworkIO(
48
46
  compute_sphere=compute_sphere,
49
47
  surface_depth=surface_depth,
50
48
  min_edges_per_node=min_edges_per_node,
51
49
  )
50
+ return io.load_network_gpickle(filepath=filepath)
52
51
 
53
- def load_network_gpickle(
52
+ def load_network_networkx(
54
53
  self,
55
- filepath: str,
54
+ network: nx.Graph,
56
55
  compute_sphere: bool = True,
57
56
  surface_depth: float = 0.0,
58
57
  min_edges_per_node: int = 0,
59
58
  ) -> nx.Graph:
60
59
  """
61
- Load a network from a GPickle file.
60
+ Load a NetworkX graph via NetworkIO.
62
61
 
63
62
  Args:
64
- filepath (str): Path to the GPickle file.
65
- compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
66
- surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
67
- min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
63
+ network (nx.Graph): A NetworkX graph object.
64
+ compute_sphere (bool, optional): Override or use API default. Defaults to True.
65
+ surface_depth (float, optional): Override or use API default. Defaults to 0.0.
66
+ min_edges_per_node (int, optional): Override or use API default. Defaults to 0.
67
+ Returns:
68
+ nx.Graph: Processed network.
69
+ """
70
+ io = NetworkIO(
71
+ compute_sphere=compute_sphere,
72
+ surface_depth=surface_depth,
73
+ min_edges_per_node=min_edges_per_node,
74
+ )
75
+ return io.load_network_networkx(network=network)
68
76
 
77
+ def load_network_cytoscape(
78
+ self,
79
+ filepath: str,
80
+ source_label: str = "source",
81
+ target_label: str = "target",
82
+ view_name: str = "",
83
+ compute_sphere: bool = True,
84
+ surface_depth: float = 0.0,
85
+ min_edges_per_node: int = 0,
86
+ ) -> nx.Graph:
87
+ """
88
+ Load a network from a Cytoscape file via NetworkIO.
89
+
90
+ Args:
91
+ filepath (str): Path to the Cytoscape file.
92
+ source_label (str, optional): Source node label. Defaults to "source".
93
+ target_label (str, optional): Target node label. Defaults to "target".
94
+ view_name (str, optional): Specific view name to load. Defaults to "".
95
+ compute_sphere (bool, optional): Override or use API default. Defaults to True.
96
+ surface_depth (float, optional): Override or use API default. Defaults to 0.0.
97
+ min_edges_per_node (int, optional): Override or use API default. Defaults to 0.
69
98
  Returns:
70
99
  nx.Graph: Loaded and processed network.
71
100
  """
72
- networkio = NetworkIO(
101
+ io = NetworkIO(
73
102
  compute_sphere=compute_sphere,
74
103
  surface_depth=surface_depth,
75
104
  min_edges_per_node=min_edges_per_node,
76
105
  )
77
- return networkio._load_network_gpickle(filepath=filepath)
106
+ return io.load_network_cytoscape(
107
+ filepath=filepath,
108
+ source_label=source_label,
109
+ target_label=target_label,
110
+ view_name=view_name,
111
+ )
78
112
 
79
- def _load_network_gpickle(self, filepath: str) -> nx.Graph:
113
+ def load_network_cyjs(
114
+ self,
115
+ filepath: str,
116
+ source_label: str = "source",
117
+ target_label: str = "target",
118
+ compute_sphere: bool = True,
119
+ surface_depth: float = 0.0,
120
+ min_edges_per_node: int = 0,
121
+ ) -> nx.Graph:
80
122
  """
81
- Private method to load a network from a GPickle file.
123
+ Load a network from a Cytoscape JSON (.cyjs) file via NetworkIO.
82
124
 
83
125
  Args:
84
- filepath (str): Path to the GPickle file.
85
-
126
+ filepath (str): Path to the Cytoscape JSON file.
127
+ source_label (str, optional): Source node label. Defaults to "source".
128
+ target_label (str, optional): Target node label. Defaults to "target".
129
+ compute_sphere (bool, optional): Override or use API default. Defaults to True.
130
+ surface_depth (float, optional): Override or use API default. Defaults to 0.0.
131
+ min_edges_per_node (int, optional): Override or use API default. Defaults to 0.
86
132
  Returns:
87
133
  nx.Graph: Loaded and processed network.
88
134
  """
89
- filetype = "GPickle"
90
- # Log the loading of the GPickle file
91
- params.log_network(filetype=filetype, filepath=filepath)
92
- self._log_loading_network(filetype, filepath=filepath)
135
+ io = NetworkIO(
136
+ compute_sphere=compute_sphere,
137
+ surface_depth=surface_depth,
138
+ min_edges_per_node=min_edges_per_node,
139
+ )
140
+ return io.load_network_cyjs(
141
+ filepath=filepath,
142
+ source_label=source_label,
143
+ target_label=target_label,
144
+ )
93
145
 
94
- with open(filepath, "rb") as f:
95
- G = pickle.load(f)
96
146
 
97
- # Initialize the graph
98
- return self._initialize_graph(G)
147
+ class NetworkIO:
148
+ """
149
+ A class for loading, processing, and managing network data.
99
150
 
100
- def load_network_networkx(
151
+ The NetworkIO class provides methods to load network data from various formats (e.g., GPickle, NetworkX)
152
+ and process the network by adjusting node coordinates, calculating edge lengths, and validating graph structure.
153
+ """
154
+
155
+ def __init__(
101
156
  self,
102
- network: nx.Graph,
103
157
  compute_sphere: bool = True,
104
158
  surface_depth: float = 0.0,
105
159
  min_edges_per_node: int = 0,
106
- ) -> nx.Graph:
160
+ ):
107
161
  """
108
- Load a NetworkX graph.
162
+ Initialize the NetworkIO class.
109
163
 
110
164
  Args:
111
- network (nx.Graph): A NetworkX graph object.
112
165
  compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
113
166
  surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
114
- min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
115
-
116
- Returns:
117
- nx.Graph: Loaded and processed network.
167
+ min_edges_per_node (int, optional): Minimum number of edges per node (k-core threshold). Defaults to 0.
118
168
  """
119
- networkio = NetworkIO(
169
+ self.compute_sphere = compute_sphere
170
+ self.surface_depth = surface_depth
171
+ self.min_edges_per_node = min_edges_per_node
172
+ # Log the initialization of the NetworkIO class
173
+ params.log_network(
120
174
  compute_sphere=compute_sphere,
121
175
  surface_depth=surface_depth,
122
176
  min_edges_per_node=min_edges_per_node,
123
177
  )
124
- return networkio._load_network_networkx(network=network)
125
178
 
126
- def _load_network_networkx(self, network: nx.Graph) -> nx.Graph:
179
+ def load_network_gpickle(self, filepath: str) -> nx.Graph:
127
180
  """
128
- Private method to load a NetworkX graph.
181
+ Load a network from a GPickle file.
182
+
183
+ Args:
184
+ filepath (str): Path to the GPickle file.
185
+
186
+ Returns:
187
+ nx.Graph: Loaded and processed network.
188
+ """
189
+ filetype = "GPickle"
190
+ # Log the loading of the GPickle file
191
+ params.log_network(filetype=filetype, filepath=filepath)
192
+ self._log_loading_network(filetype, filepath=filepath)
193
+
194
+ with open(filepath, "rb") as f:
195
+ G = pickle.load(f)
196
+
197
+ # Initialize the graph
198
+ return self._initialize_graph(G)
199
+
200
+ def load_network_networkx(self, network: nx.Graph) -> nx.Graph:
201
+ """
202
+ Load a NetworkX graph.
129
203
 
130
204
  Args:
131
205
  network (nx.Graph): A NetworkX graph object.
@@ -149,47 +223,10 @@ class NetworkIO:
149
223
  source_label: str = "source",
150
224
  target_label: str = "target",
151
225
  view_name: str = "",
152
- compute_sphere: bool = True,
153
- surface_depth: float = 0.0,
154
- min_edges_per_node: int = 0,
155
226
  ) -> nx.Graph:
156
227
  """
157
228
  Load a network from a Cytoscape file.
158
229
 
159
- Args:
160
- filepath (str): Path to the Cytoscape file.
161
- source_label (str, optional): Source node label. Defaults to "source".
162
- target_label (str, optional): Target node label. Defaults to "target".
163
- view_name (str, optional): Specific view name to load. Defaults to "".
164
- compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
165
- surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
166
- min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
167
-
168
- Returns:
169
- nx.Graph: Loaded and processed network.
170
- """
171
- networkio = NetworkIO(
172
- compute_sphere=compute_sphere,
173
- surface_depth=surface_depth,
174
- min_edges_per_node=min_edges_per_node,
175
- )
176
- return networkio._load_network_cytoscape(
177
- filepath=filepath,
178
- source_label=source_label,
179
- target_label=target_label,
180
- view_name=view_name,
181
- )
182
-
183
- def _load_network_cytoscape(
184
- self,
185
- filepath: str,
186
- source_label: str = "source",
187
- target_label: str = "target",
188
- view_name: str = "",
189
- ) -> nx.Graph:
190
- """
191
- Private method to load a network from a Cytoscape file.
192
-
193
230
  Args:
194
231
  filepath (str): Path to the Cytoscape file.
195
232
  source_label (str, optional): Source node label. Defaults to "source".
@@ -315,44 +352,10 @@ class NetworkIO:
315
352
  if os.path.exists(tmp_dir):
316
353
  shutil.rmtree(tmp_dir)
317
354
 
318
- def load_network_cyjs(
319
- self,
320
- filepath: str,
321
- source_label: str = "source",
322
- target_label: str = "target",
323
- compute_sphere: bool = True,
324
- surface_depth: float = 0.0,
325
- min_edges_per_node: int = 0,
326
- ) -> nx.Graph:
355
+ def load_network_cyjs(self, filepath, source_label="source", target_label="target"):
327
356
  """
328
357
  Load a network from a Cytoscape JSON (.cyjs) file.
329
358
 
330
- Args:
331
- filepath (str): Path to the Cytoscape JSON file.
332
- source_label (str, optional): Source node label. Default is "source".
333
- target_label (str, optional): Target node label. Default is "target".
334
- compute_sphere (bool, optional): Whether to map nodes to a sphere. Defaults to True.
335
- surface_depth (float, optional): Surface depth for the sphere. Defaults to 0.0.
336
- min_edges_per_node (int, optional): Minimum number of edges per node. Defaults to 0.
337
-
338
- Returns:
339
- NetworkX graph: Loaded and processed network.
340
- """
341
- networkio = NetworkIO(
342
- compute_sphere=compute_sphere,
343
- surface_depth=surface_depth,
344
- min_edges_per_node=min_edges_per_node,
345
- )
346
- return networkio._load_network_cyjs(
347
- filepath=filepath,
348
- source_label=source_label,
349
- target_label=target_label,
350
- )
351
-
352
- def _load_network_cyjs(self, filepath, source_label="source", target_label="target"):
353
- """
354
- Private method to load a network from a Cytoscape JSON (.cyjs) file.
355
-
356
359
  Args:
357
360
  filepath (str): Path to the Cytoscape JSON file.
358
361
  source_label (str, optional): Source node label. Default is "source".
@@ -437,20 +440,14 @@ class NetworkIO:
437
440
  num_initial_edges = G.number_of_edges()
438
441
  # Remove self-loops to ensure correct edge count
439
442
  G.remove_edges_from(nx.selfloop_edges(G))
440
- # Iteratively remove nodes with fewer edges than the threshold
441
- while True:
442
- nodes_to_remove = [
443
- node
444
- for node, degree in dict(G.degree()).items()
445
- if degree < self.min_edges_per_node
446
- ]
447
- if not nodes_to_remove:
448
- break # Exit loop if no nodes meet the condition
449
- G.remove_nodes_from(nodes_to_remove)
450
-
451
- # Remove isolated nodes
452
- isolates = list(nx.isolates(G))
453
- G.remove_nodes_from(isolates)
443
+ # Apply canonical node k-core pruning if requested
444
+ if self.min_edges_per_node > 0:
445
+ # networkx.k_core returns a subgraph; to preserve in-place behavior, copy back
446
+ core = nx.k_core(G, k=self.min_edges_per_node)
447
+ # Rebuild G in-place to keep external references valid
448
+ G.clear()
449
+ G.add_nodes_from(core.nodes(data=True))
450
+ G.add_edges_from(core.edges(data=True))
454
451
 
455
452
  # Log the number of nodes and edges before and after cleaning
456
453
  num_final_nodes = G.number_of_nodes()
@@ -19,9 +19,6 @@ class PlotterAPI:
19
19
  The PlotterAPI class provides methods to load and configure Plotter objects for plotting network graphs.
20
20
  """
21
21
 
22
- def __init__(self) -> None:
23
- pass
24
-
25
22
  def load_plotter(
26
23
  self,
27
24
  graph: Graph,
@@ -1,15 +1,15 @@
1
1
  """
2
- risk/risk
3
- ~~~~~~~~~
2
+ risk/_risk
3
+ ~~~~~~~~~~
4
4
  """
5
5
 
6
- from ._annotation import AnnotationIO
6
+ from ._annotation import AnnotationHandler
7
7
  from ._log import params, set_global_verbosity
8
8
  from ._neighborhoods import NeighborhoodsAPI
9
- from ._network import GraphAPI, NetworkIO, PlotterAPI
9
+ from ._network import GraphAPI, NetworkAPI, PlotterAPI
10
10
 
11
11
 
12
- class RISK(NetworkIO, AnnotationIO, NeighborhoodsAPI, GraphAPI, PlotterAPI):
12
+ class RISK(NetworkAPI, AnnotationHandler, NeighborhoodsAPI, GraphAPI, PlotterAPI):
13
13
  """
14
14
  RISK: A class for network analysis and visualization.
15
15
 
@@ -0,0 +1,115 @@
1
+ Metadata-Version: 2.4
2
+ Name: risk-network
3
+ Version: 0.0.14
4
+ Summary: A Python package for scalable network analysis and high-quality visualization.
5
+ Author-email: Ira Horecka <ira89@icloud.com>
6
+ License: GPL-3.0-or-later
7
+ Project-URL: Homepage, https://github.com/riskportal/network
8
+ Project-URL: Issues, https://github.com/riskportal/network/issues
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
+ Classifier: Topic :: Scientific/Engineering :: Visualization
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: ipywidgets
25
+ Requires-Dist: leidenalg
26
+ Requires-Dist: markov_clustering
27
+ Requires-Dist: matplotlib
28
+ Requires-Dist: networkx
29
+ Requires-Dist: nltk
30
+ Requires-Dist: numpy
31
+ Requires-Dist: openpyxl
32
+ Requires-Dist: pandas
33
+ Requires-Dist: python-igraph
34
+ Requires-Dist: python-louvain
35
+ Requires-Dist: scikit-learn
36
+ Requires-Dist: scipy
37
+ Requires-Dist: statsmodels
38
+ Requires-Dist: threadpoolctl
39
+ Requires-Dist: tqdm
40
+ Dynamic: license-file
41
+
42
+ # RISK Network
43
+
44
+ <p align="center">
45
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
46
+ </p>
47
+
48
+ <br>
49
+
50
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
51
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
52
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
53
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
54
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
55
+ ![Tests](https://github.com/riskportal/network/actions/workflows/ci.yml/badge.svg)
56
+
57
+ **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. It integrates community detection algorithms, rigorous overrepresentation analysis, and a modular framework for diverse network types. RISK identifies biologically coherent relationships within networks and generates publication-ready visualizations, making it a useful tool for biological and interdisciplinary network analysis.
58
+
59
+ For a full description of RISK and its applications, see:
60
+ <br>
61
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
62
+ <br>
63
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
64
+
65
+ ## Documentation and Tutorial
66
+
67
+ Full documentation is available at:
68
+
69
+ - **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
70
+ - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
71
+
72
+ ## Installation
73
+
74
+ RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
75
+
76
+ ```bash
77
+ pip install risk-network --upgrade
78
+ ```
79
+
80
+ ## Key Features of RISK
81
+
82
+ - **Broad Data Compatibility**: Accepts multiple network formats (NetworkX, Cytoscape, GPickle) and user-provided annotations formatted as term–to–gene membership tables (JSON, CSV, TSV, Excel, or Python dictionaries).
83
+ - **Flexible Clustering**: Offers Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap, with user-defined resolution parameters to detect both coarse and fine-grained modules.
84
+ - **Statistical Testing**: Provides hypergeometric, binomial, chi-squared, Poisson, z-score, and permutation tests, balancing speed with statistical rigor.
85
+ - **High-Resolution Visualization**: Generates publication-ready figures with contour overlays, customizable node/edge properties, and export to SVG, PNG, or PDF.
86
+
87
+ ## Example Usage
88
+
89
+ We applied RISK to a _Saccharomyces cerevisiae_ protein–protein interaction (PPI) network (Michaelis _et al_., 2023; 3,839 proteins, 30,955 interactions). RISK identified compact, functional modules overrepresented in Gene Ontology Biological Process (GO BP) terms (Ashburner _et al_., 2000), revealing biological organization including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
90
+
91
+ [![Yeast PPI network annotated with GO BP terms](https://i.imgur.com/jQKatLY.jpeg)](https://i.imgur.com/jQKatLY.jpeg)
92
+ **RISK workflow overview and analysis of the yeast PPI network**. GO BP terms are color-coded to represent key cellular processes—including ribosomal assembly, mitochondrial organization, and RNA polymerase activity (P < 0.0001).
93
+
94
+ ## Citation
95
+
96
+ If you use RISK in your research, please cite the following:
97
+
98
+ **Horecka and Röst (2025)**, _"RISK: a next-generation tool for biological network annotation and visualization"_.
99
+ <br>
100
+ DOI: [10.5281/zenodo.xxxxxxx](https://doi.org/10.5281/zenodo.xxxxxxx)
101
+
102
+ ## Contributing
103
+
104
+ We welcome contributions from the community:
105
+
106
+ - [Issues Tracker](https://github.com/riskportal/network/issues)
107
+ - [Source Code](https://github.com/riskportal/network/tree/main/risk)
108
+
109
+ ## Support
110
+
111
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/network/issues) on GitHub.
112
+
113
+ ## License
114
+
115
+ RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
@@ -1,31 +1,31 @@
1
- risk/__init__.py,sha256=lyhdzjETYJ9Jg8da35gncJ_u2_QrElRsW5t1c69OIJY,142
2
- risk/risk.py,sha256=l7Btltgd-K00rmUV4_jTgJTwikSmMb-9A2IUQN5PnY8,1040
3
- risk/_annotation/__init__.py,sha256=LBLL5P_MdfwWaxkHBQHfQTPY-FF8hIGoUGKyHF1Wg4s,159
1
+ risk/__init__.py,sha256=dIsUIx_RUj6VLoSixswIhpxEO91S4LoLQT86GuAMllU,136
2
+ risk/_risk.py,sha256=VULCdM41BlWKM1ou4Qc579ffZ9dMZkfhAwKYgbaEeKM,1054
3
+ risk/_annotation/__init__.py,sha256=zr7w1DHkmvrkKFGKdPhrcvZHV-xsfd5TZOaWtFiP4Dc,164
4
4
  risk/_annotation/_annotation.py,sha256=03vcnkdi4HGH5UUyokUyOdyyjXOLoKSmLFuK7VAl41c,15174
5
- risk/_annotation/_io.py,sha256=gEq6STSWFIFjSWoGXJfwxTME4GDJZTOgPeXZABgSdXc,12447
5
+ risk/_annotation/_io.py,sha256=xic3dkEA54X82HbyWfCiXrUpAhPWFPBZ69R8jw31omQ,12457
6
6
  risk/_annotation/_nltk_setup.py,sha256=aHHnElLOKiouVDrZ3uON0CSFmBxvzmYfjYPi07v2rJM,3584
7
7
  risk/_log/__init__.py,sha256=LX6BsfcGOH0RbAdQaUmIU-LVMmArDdKwn0jFtj45FYo,205
8
8
  risk/_log/_console.py,sha256=1jSFzY3w0-vVqIBCgc-IhyJPNT6vRg8GSGxhyw_D9MI,4653
9
9
  risk/_log/_parameters.py,sha256=8FkeeBtULDFVw3UijLArK-G3OIjy6YXyRXmPPckK7fU,5893
10
10
  risk/_neighborhoods/__init__.py,sha256=eKwjpEUKSUmAirRZ_qPTVF7MLkvhCn_fulPVq158wM8,185
11
- risk/_neighborhoods/_api.py,sha256=6vt25y38wSvEhMvNxy9Rl-CrQc9ARSuvGwexWagg5z0,23344
11
+ risk/_neighborhoods/_api.py,sha256=s1f4d_nEPWc66KDmOUUpRNXzp6dfoevw45ewOg9eMNo,23298
12
12
  risk/_neighborhoods/_community.py,sha256=Tr-EHO91EWbMmNr_z21UCngiqWOlWIqcjwBig_VXI8c,17850
13
- risk/_neighborhoods/_domains.py,sha256=He8G2-E9-yYQB8ChUtMFr51HVlfRj5EaxGu3sGVNUCo,14630
14
- risk/_neighborhoods/_neighborhoods.py,sha256=9H7BickJx9GdnOo5d5wpdtXkcWyvzq2w6FAy1rwLBtk,20614
13
+ risk/_neighborhoods/_domains.py,sha256=Q3MUWW9KjuERpxs4H1dNFhalDjdatMkWSnB12BerUDU,16580
14
+ risk/_neighborhoods/_neighborhoods.py,sha256=9hpQCYG0d9fZLYj-fVACgLJBtw3dW8C-0YbE2OWuX-M,21436
15
15
  risk/_neighborhoods/_stats/__init__.py,sha256=nL83A3unzpCTzRDPanCiqU1RsKPJJNDe46S9igoe3pg,264
16
16
  risk/_neighborhoods/_stats/_tests.py,sha256=-ioHdyrsgW63YnypKFpanatauuKrF3LT7aMZ3b6otrU,12091
17
17
  risk/_neighborhoods/_stats/_permutation/__init__.py,sha256=nfTaW29CK8OZCdFnpMVlHnFaqr1E4AZp6mvhlUazHXM,140
18
18
  risk/_neighborhoods/_stats/_permutation/_permutation.py,sha256=e5qVuYWGhiAn5Jv8VILk-WYMOO4km48cGdRYTOl355M,10661
19
19
  risk/_neighborhoods/_stats/_permutation/_test_functions.py,sha256=lGI_MkdbW4UHI0jWN_T1OattRjXrq_qmzAmOfels670,3165
20
- risk/_network/__init__.py,sha256=LbXsJGU2-ydDMw5_qgwizE6YHMljGDuOGc6TO-jk4Pk,126
21
- risk/_network/_io.py,sha256=vOSfAWnj1Q4jQSVo9BqY-nwQIoEG-CYZ_Cv2clopVw0,28090
20
+ risk/_network/__init__.py,sha256=YrAMfhL0CMWQb3sY-mn1VxK44zZAWeFAvHrBONH9I-A,127
21
+ risk/_network/_io.py,sha256=wmokwNViz24Gv6o3OFRTYHftYYz4sN64VigGrdEGzy8,28057
22
22
  risk/_network/_graph/__init__.py,sha256=SFgxgxUiZK4vvw6bdQ04DSMXEr8xjMaQV-Wne6wAIqM,104
23
- risk/_network/_graph/_api.py,sha256=zH7n-ulqLcbgHdAfLu1yWErdR5G4LgSqR7DcN2qApco,8520
23
+ risk/_network/_graph/_api.py,sha256=sp3_mLJDP_xQexYBjyM17iyzLb2oGmiC050kcw-jVho,8474
24
24
  risk/_network/_graph/_graph.py,sha256=x2EWT_ZVwxh7m9a01yG4WMdmAxBxiaxX3CvkqP9QAXE,12486
25
25
  risk/_network/_graph/_stats.py,sha256=6mxZkuL6LJlwKDsBbP22DAVkNUEhq-JZwYMKhFKD08k,7359
26
- risk/_network/_graph/_summary.py,sha256=4eGhCArssePDg4LXr3sg5bUpNn7KFK9oPZcCz5lJKEQ,10334
26
+ risk/_network/_graph/_summary.py,sha256=I8FhMdpawGbvCJHPpsyvbtM7Qa0xXzwKvjnX9N8HSm8,10141
27
27
  risk/_network/_plotter/__init__.py,sha256=qFRtQKSBGIqmUGwmA7VPL7hTHBb9yvRIt0nLISXnwkY,84
28
- risk/_network/_plotter/_api.py,sha256=MHZIMTlul2u-Ve5m9r-eUljI2oC3IRklNn7AVlfmzGs,1773
28
+ risk/_network/_plotter/_api.py,sha256=OaV1CCRGsz98wEEzyEhaq2CqEuZh6t2qS7g_rY6HJJs,1727
29
29
  risk/_network/_plotter/_canvas.py,sha256=H7rPz4Gv7ED3bDHMif4cf2usdU4ifmxzXeug5A_no68,13599
30
30
  risk/_network/_plotter/_contour.py,sha256=E3ILjlv-VBSbK3ClwObB84TvP1D48_B47ODXwtApjIE,15557
31
31
  risk/_network/_plotter/_labels.py,sha256=8JXzEOIBQefwr1ngF-2ZYCnYLZXs2Erz-R1c28NnsL0,46915
@@ -34,8 +34,8 @@ risk/_network/_plotter/_plotter.py,sha256=F2hw-spUdsXjvuG36o0YFR3Pnd-CZOHYUq4vW0
34
34
  risk/_network/_plotter/_utils/__init__.py,sha256=JXgjKiBWvXx0X2IeFnrOh5YZQGQoELbhJZ0Zh2mFEOo,211
35
35
  risk/_network/_plotter/_utils/_colors.py,sha256=JCliSvz8_-TsjilaRHSEsqdXFBUYlzhXKOSRGdCm9Kw,19177
36
36
  risk/_network/_plotter/_utils/_layout.py,sha256=GyGLc2U1WWUVL1Te9uPi_CLqlW_E4TImXRAL5TeA5D8,3633
37
- risk_network-0.0.13b5.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
- risk_network-0.0.13b5.dist-info/METADATA,sha256=dkYs8JCdMr945DZ-7bJ95MX0zuxfbkw1VpOhuLtCE_U,6853
39
- risk_network-0.0.13b5.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
40
- risk_network-0.0.13b5.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
- risk_network-0.0.13b5.dist-info/RECORD,,
37
+ risk_network-0.0.14.dist-info/licenses/LICENSE,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
38
+ risk_network-0.0.14.dist-info/METADATA,sha256=wdfJmWVphRRLwTAIURl5xSa9vLzl1SCQk8_HB5ezfG0,5698
39
+ risk_network-0.0.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
40
+ risk_network-0.0.14.dist-info/top_level.txt,sha256=NX7C2PFKTvC1JhVKv14DFlFAIFnKc6Lpsu1ZfxvQwVw,5
41
+ risk_network-0.0.14.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,125 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: risk-network
3
- Version: 0.0.13b5
4
- Summary: A Python package for scalable network analysis and high-quality visualization.
5
- Author-email: Ira Horecka <ira89@icloud.com>
6
- License: GPL-3.0-or-later
7
- Project-URL: Homepage, https://github.com/riskportal/network
8
- Project-URL: Issues, https://github.com/riskportal/network/issues
9
- Classifier: Development Status :: 4 - Beta
10
- Classifier: Intended Audience :: Developers
11
- Classifier: Intended Audience :: Science/Research
12
- Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
- Classifier: Operating System :: OS Independent
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.8
16
- Classifier: Programming Language :: Python :: 3 :: Only
17
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
18
- Classifier: Topic :: Scientific/Engineering :: Information Analysis
19
- Classifier: Topic :: Scientific/Engineering :: Visualization
20
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
- Requires-Python: >=3.8
22
- Description-Content-Type: text/markdown
23
- License-File: LICENSE
24
- Requires-Dist: ipywidgets
25
- Requires-Dist: leidenalg
26
- Requires-Dist: markov_clustering
27
- Requires-Dist: matplotlib
28
- Requires-Dist: networkx
29
- Requires-Dist: nltk
30
- Requires-Dist: numpy
31
- Requires-Dist: openpyxl
32
- Requires-Dist: pandas
33
- Requires-Dist: python-igraph
34
- Requires-Dist: python-louvain
35
- Requires-Dist: scikit-learn
36
- Requires-Dist: scipy
37
- Requires-Dist: statsmodels
38
- Requires-Dist: threadpoolctl
39
- Requires-Dist: tqdm
40
- Dynamic: license-file
41
-
42
- # RISK Network
43
-
44
- <p align="center">
45
- <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
46
- </p>
47
-
48
- <br>
49
-
50
- ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
51
- [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
52
- ![License](https://img.shields.io/badge/license-GPLv3-purple)
53
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
54
- ![Downloads](https://img.shields.io/pypi/dm/risk-network)
55
- ![Tests](https://github.com/riskportal/network/actions/workflows/ci.yml/badge.svg)
56
-
57
- **RISK** (Regional Inference of Significant Kinships) is a next-generation tool for biological network annotation and visualization. RISK integrates community detection-based clustering, rigorous statistical enrichment analysis, and a modular framework to uncover biologically meaningful relationships and generate high-resolution visualizations. RISK supports diverse data formats and is optimized for large-scale network analysis, making it a valuable resource for researchers in systems biology and beyond.
58
-
59
- ## Documentation and Tutorial
60
-
61
- Full documentation is available at:
62
-
63
- - **Docs:** [https://riskportal.github.io/network-tutorial](https://riskportal.github.io/network-tutorial)
64
- - **Tutorial Jupyter Notebook Repository:** [https://github.com/riskportal/network-tutorial](https://github.com/riskportal/network-tutorial)
65
-
66
- ## Installation
67
-
68
- RISK is compatible with Python 3.8 or later and runs on all major operating systems. To install the latest version of RISK, run:
69
-
70
- ```bash
71
- pip install risk-network --upgrade
72
- ```
73
-
74
- ## Features
75
-
76
- - **Comprehensive Network Analysis**: Analyze biological networks (e.g., protein–protein interaction and genetic interaction networks) as well as non-biological networks.
77
- - **Advanced Clustering Algorithms**: Supports Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap for identifying structured network regions.
78
- - **Flexible Visualization**: Produce customizable, high-resolution network visualizations with kernel density estimate overlays, adjustable node and edge attributes, and export options in SVG, PNG, and PDF formats.
79
- - **Efficient Data Handling**: Supports multiple input/output formats, including JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
80
- - **Statistical Analysis**: Assess functional enrichment using hypergeometric, permutation (network-aware), binomial, chi-squared, Poisson, and z-score tests, ensuring statistical adaptability across datasets.
81
- - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
82
-
83
- ## Example Usage
84
-
85
- We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network from Michaelis et al. (2023), filtering for proteins with six or more interactions to emphasize core functional relationships. RISK identified compact, statistically enriched clusters corresponding to biological processes such as ribosomal assembly and mitochondrial organization.
86
-
87
- [![Figure 1](https://i.imgur.com/lJHJrJr.jpeg)](https://i.imgur.com/lJHJrJr.jpeg)
88
-
89
- This figure highlights RISK’s capability to detect both established and novel functional modules within the yeast interactome.
90
-
91
- ## Citation
92
-
93
- If you use RISK in your research, please reference the following:
94
-
95
- **Horecka et al.**, *"RISK: a next-generation tool for biological network annotation and visualization"*, 2025.
96
- DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
97
-
98
- ## Software Architecture and Implementation
99
-
100
- RISK features a streamlined, modular architecture designed to meet diverse research needs. RISK’s modular design enables users to run individual components—such as clustering, statistical testing, or visualization—independently or in combination, depending on the analysis workflow. It includes dedicated modules for:
101
-
102
- - **Data I/O**: Supports JSON, CSV, TSV, Excel, Cytoscape, and GPickle formats.
103
- - **Clustering**: Supports multiple clustering methods, including Louvain, Leiden, Markov Clustering, Greedy Modularity, Label Propagation, Spinglass, and Walktrap. Provides flexible distance metrics tailored to network structure.
104
- - **Statistical Analysis**: Provides a suite of tests for overrepresentation analysis of annotations.
105
- - **Visualization**: Offers customizable, high-resolution output in multiple formats, including SVG, PNG, and PDF.
106
- - **Configuration Management**: Centralized parameters in risk.params ensure reproducibility and easy tuning for large-scale analyses.
107
-
108
- ## Performance and Efficiency
109
-
110
- Benchmarking results demonstrate that RISK efficiently scales to networks exceeding hundreds of thousands of edges, maintaining low execution times and optimal memory usage across statistical tests.
111
-
112
- ## Contributing
113
-
114
- We welcome contributions from the community:
115
-
116
- - [Issues Tracker](https://github.com/riskportal/network/issues)
117
- - [Source Code](https://github.com/riskportal/network/tree/main/risk)
118
-
119
- ## Support
120
-
121
- If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/riskportal/network/issues) on GitHub.
122
-
123
- ## License
124
-
125
- RISK is open source under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).