risk-network 0.0.3b0__tar.gz → 0.0.3b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/MANIFEST.in +1 -2
  2. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/PKG-INFO +14 -8
  3. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/README.md +12 -6
  4. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/pyproject.toml +2 -2
  5. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/__init__.py +1 -1
  6. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/annotations/annotations.py +9 -9
  7. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/annotations/io.py +62 -49
  8. risk_network-0.0.3b0/risk/neighborhoods/graph.py → risk_network-0.0.3b1/risk/neighborhoods/community.py +2 -2
  9. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/neighborhoods/neighborhoods.py +1 -1
  10. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/network/io.py +2 -2
  11. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/network/plot.py +33 -42
  12. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/risk.py +1 -8
  13. {risk_network-0.0.3b0/risk/stats/permutation/_python → risk_network-0.0.3b1/risk/stats}/permutation.py +25 -20
  14. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/stats/stats.py +108 -104
  15. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk_network.egg-info/PKG-INFO +14 -8
  16. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk_network.egg-info/SOURCES.txt +2 -5
  17. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk_network.egg-info/requires.txt +1 -1
  18. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/setup.py +3 -15
  19. risk_network-0.0.3b0/risk/stats/permutation/__init__.py +0 -15
  20. risk_network-0.0.3b0/risk/stats/permutation/_cython/permutation.pyx +0 -82
  21. risk_network-0.0.3b0/risk/stats/permutation/_cython/setup.py +0 -11
  22. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/LICENSE +0 -0
  23. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/annotations/__init__.py +0 -0
  24. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/constants.py +0 -0
  25. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/log/__init__.py +0 -0
  26. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/log/console.py +0 -0
  27. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/log/params.py +0 -0
  28. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/neighborhoods/__init__.py +0 -0
  29. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/neighborhoods/domains.py +0 -0
  30. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/network/__init__.py +0 -0
  31. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/network/geometry.py +0 -0
  32. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/network/graph.py +0 -0
  33. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk/stats/__init__.py +0 -0
  34. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk_network.egg-info/dependency_links.txt +0 -0
  35. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/risk_network.egg-info/top_level.txt +0 -0
  36. {risk_network-0.0.3b0 → risk_network-0.0.3b1}/setup.cfg +0 -0
@@ -1,6 +1,5 @@
1
- # Include all Python and Cython source files
1
+ # Include all Python source files
2
2
  recursive-include risk *.py
3
- recursive-include risk *.pyx
4
3
 
5
4
  # Include important project files in the distribution
6
5
  include README.md
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.3b0
3
+ Version: 0.0.3b1
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -694,13 +694,13 @@ Classifier: Development Status :: 4 - Beta
694
694
  Requires-Python: >=3.7
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
- Requires-Dist: cython
698
697
  Requires-Dist: ipywidgets
699
698
  Requires-Dist: markov_clustering
700
699
  Requires-Dist: matplotlib
701
700
  Requires-Dist: networkx
702
701
  Requires-Dist: nltk==3.8.1
703
702
  Requires-Dist: numpy
703
+ Requires-Dist: openpyxl
704
704
  Requires-Dist: pandas
705
705
  Requires-Dist: python-louvain
706
706
  Requires-Dist: scikit-learn
@@ -709,15 +709,21 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- # RISK
713
-
714
- <ins>Regional Inference of Significant Kinships</ins>
712
+ <p align="center">
713
+ <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="400" />
714
+ <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="400" />
715
+ </p>
715
716
 
716
- <p align="left">
717
- <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="40%" />
718
- <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="40%" />
717
+ <p align="center">
718
+ <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
719
+ <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.7+-blue.svg" alt="Python 3.7+"></a>
720
+ <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
719
721
  </p>
720
722
 
723
+ ## RISK
724
+
725
+ #### Regional Inference of Significant Kinships
726
+
721
727
  RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
722
728
 
723
729
  ## Features
@@ -1,12 +1,18 @@
1
- # RISK
2
-
3
- <ins>Regional Inference of Significant Kinships</ins>
1
+ <p align="center">
2
+ <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="400" />
3
+ <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="400" />
4
+ </p>
4
5
 
5
- <p align="left">
6
- <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="40%" />
7
- <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="40%" />
6
+ <p align="center">
7
+ <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
8
+ <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.7+-blue.svg" alt="Python 3.7+"></a>
9
+ <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
8
10
  </p>
9
11
 
12
+ ## RISK
13
+
14
+ #### Regional Inference of Significant Kinships
15
+
10
16
  RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
11
17
 
12
18
  ## Features
@@ -1,5 +1,5 @@
1
1
  [build-system]
2
- requires = ["setuptools", "wheel", "Cython", "numpy"]
2
+ requires = ["setuptools", "wheel", "numpy"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
@@ -26,13 +26,13 @@ classifiers = [
26
26
  "Development Status :: 4 - Beta",
27
27
  ]
28
28
  dependencies = [
29
- "cython",
30
29
  "ipywidgets",
31
30
  "markov_clustering",
32
31
  "matplotlib",
33
32
  "networkx",
34
33
  "nltk==3.8.1",
35
34
  "numpy",
35
+ "openpyxl",
36
36
  "pandas",
37
37
  "python-louvain",
38
38
  "scikit-learn",
@@ -10,4 +10,4 @@ RISK: RISK Infers Spatial Kinship
10
10
 
11
11
  from risk.risk import RISK
12
12
 
13
- __version__ = "0.0.3-beta.0"
13
+ __version__ = "0.0.3-beta.1"
@@ -139,15 +139,15 @@ def define_top_annotations(
139
139
  size_connected_components <= max_cluster_size,
140
140
  )
141
141
  )
142
- annotations_enrichment_matrix.loc[
143
- attribute, "num connected components"
144
- ] = num_connected_components
145
- annotations_enrichment_matrix.at[
146
- attribute, "size connected components"
147
- ] = size_connected_components
148
- annotations_enrichment_matrix.loc[
149
- attribute, "num large connected components"
150
- ] = num_large_connected_components
142
+ annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
143
+ num_connected_components
144
+ )
145
+ annotations_enrichment_matrix.at[attribute, "size connected components"] = (
146
+ size_connected_components
147
+ )
148
+ annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
149
+ num_large_connected_components
150
+ )
151
151
 
152
152
  # Filter out attributes with more than one connected component
153
153
  annotations_enrichment_matrix.loc[
@@ -45,66 +45,70 @@ class AnnotationsIO:
45
45
  # Process the JSON data and return it in the context of the network
46
46
  return load_annotations(network, annotations_input)
47
47
 
48
- def load_csv_annotation(
48
+ def load_excel_annotation(
49
49
  self,
50
50
  filepath: str,
51
51
  network: nx.Graph,
52
52
  label_colname: str = "label",
53
53
  nodes_colname: str = "nodes",
54
- delimiter: str = ";",
54
+ sheet_name: str = "Sheet1",
55
+ nodes_delimiter: str = ";",
55
56
  ) -> Dict[str, Any]:
56
- """Load annotations from a CSV file and convert them to a DataFrame.
57
+ """Load annotations from an Excel file and associate them with the network.
57
58
 
58
59
  Args:
59
- filepath (str): Path to the CSV annotations file.
60
- network (NetworkX graph): The network to which the annotations are related.
61
- label_colname (str): Name of the column containing the labels.
62
- nodes_colname (str): Name of the column containing the nodes.
63
- delimiter (str): Delimiter used to parse the nodes column (default is ';').
60
+ filepath (str): Path to the Excel annotations file.
61
+ network (nx.Graph): The NetworkX graph to which the annotations are related.
62
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
63
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
64
+ sheet_name (str, optional): The name of the Excel sheet to load (default is 'Sheet1').
65
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
64
66
 
65
67
  Returns:
66
- pd.DataFrame: DataFrame containing the labels and parsed nodes.
68
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
69
+ linked to the provided network.
67
70
  """
68
- filetype = "CSV"
71
+ filetype = "Excel"
69
72
  params.log_annotations(filepath=filepath, filetype=filetype)
70
73
  _log_loading(filetype, filepath=filepath)
71
- # Load the CSV file into a dictionary
72
- annotations_input = _load_matrix_file(filepath, label_colname, nodes_colname, delimiter)
73
- # Process and return the annotations in the context of the network
74
- return load_annotations(network, annotations_input)
74
+ # Load the specified sheet from the Excel file
75
+ df = pd.read_excel(filepath, sheet_name=sheet_name)
76
+ # Split the nodes column by the specified nodes_delimiter
77
+ df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
78
+ # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
79
+ label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
80
+ return load_annotations(network, label_node_dict)
75
81
 
76
- def load_excel_annotation(
82
+ def load_csv_annotation(
77
83
  self,
78
84
  filepath: str,
79
85
  network: nx.Graph,
80
86
  label_colname: str = "label",
81
87
  nodes_colname: str = "nodes",
82
- sheet_name: str = "Sheet1",
83
- delimiter: str = ";",
88
+ nodes_delimiter: str = ";",
84
89
  ) -> Dict[str, Any]:
85
- """Load annotations from an Excel file and convert them to a dictionary.
90
+ """Load annotations from a CSV file and associate them with the network.
86
91
 
87
92
  Args:
88
- filepath (str): Path to the Excel annotations file.
89
- network (NetworkX graph): The network to which the annotations are related.
90
- label_colname (str): Name of the column containing the labels.
91
- nodes_colname (str): Name of the column containing the nodes.
92
- sheet_name (str): The name of the Excel sheet to load (default is 'Sheet1').
93
- delimiter (str): Delimiter used to parse the nodes column (default is ';').
93
+ filepath (str): Path to the CSV annotations file.
94
+ network (nx.Graph): The NetworkX graph to which the annotations are related.
95
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
96
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
97
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
94
98
 
95
99
  Returns:
96
- dict: A dictionary where each label is paired with its respective list of nodes.
100
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
101
+ linked to the provided network.
97
102
  """
98
- filetype = "Excel"
103
+ filetype = "CSV"
99
104
  params.log_annotations(filepath=filepath, filetype=filetype)
100
105
  _log_loading(filetype, filepath=filepath)
101
- # Load the specified sheet from the Excel file
102
- df = pd.read_excel(filepath, sheet_name=sheet_name)
103
- # Split the nodes column by the specified delimiter
104
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(delimiter))
105
- # Convert the DataFrame to a dictionary pairing labels with their corresponding nodes
106
- label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
107
- return load_annotations(network, label_node_dict)
106
+ # Load the CSV file into a dictionary
107
+ annotations_input = _load_matrix_file(
108
+ filepath, label_colname, nodes_colname, delimiter=",", nodes_delimiter=nodes_delimiter
109
+ )
110
+ # Process and return the annotations in the context of the network
111
+ return load_annotations(network, annotations_input)
108
112
 
109
113
  def load_tsv_annotation(
110
114
  self,
@@ -112,47 +116,56 @@ class AnnotationsIO:
112
116
  network: nx.Graph,
113
117
  label_colname: str = "label",
114
118
  nodes_colname: str = "nodes",
119
+ nodes_delimiter: str = ";",
115
120
  ) -> Dict[str, Any]:
116
- """Load annotations from a TSV file and convert them to a DataFrame.
121
+ """Load annotations from a TSV file and associate them with the network.
117
122
 
118
123
  Args:
119
124
  filepath (str): Path to the TSV annotations file.
120
- network (NetworkX graph): The network to which the annotations are related.
121
- label_colname (str): Name of the column containing the labels.
122
- nodes_colname (str): Name of the column containing the nodes.
125
+ network (nx.Graph): The NetworkX graph to which the annotations are related.
126
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
127
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
128
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
123
129
 
124
130
  Returns:
125
- pd.DataFrame: DataFrame containing the labels and parsed nodes.
131
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes,
132
+ linked to the provided network.
126
133
  """
127
134
  filetype = "TSV"
128
135
  params.log_annotations(filepath=filepath, filetype=filetype)
129
136
  _log_loading(filetype, filepath=filepath)
130
- # Load the TSV file with tab delimiter and convert to dictionary
137
+ # Load the TSV file into a dictionary
131
138
  annotations_input = _load_matrix_file(
132
- filepath, label_colname, nodes_colname, delimiter="\t"
139
+ filepath, label_colname, nodes_colname, delimiter="\t", nodes_delimiter=nodes_delimiter
133
140
  )
134
141
  # Process and return the annotations in the context of the network
135
142
  return load_annotations(network, annotations_input)
136
143
 
137
144
 
138
145
  def _load_matrix_file(
139
- filepath: str, label_colname: str, nodes_colname: str, delimiter: str = ";"
146
+ filepath: str,
147
+ label_colname: str,
148
+ nodes_colname: str,
149
+ delimiter: str = ",",
150
+ nodes_delimiter: str = ";",
140
151
  ) -> Dict[str, Any]:
141
152
  """Load annotations from a CSV or TSV file and convert them to a dictionary.
142
153
 
143
154
  Args:
144
155
  filepath (str): Path to the annotation file.
145
- label_colname (str): Name of the column containing the labels.
146
- nodes_colname (str): Name of the column containing the nodes.
147
- delimiter (str): Delimiter used to parse the nodes column (default is ';').
156
+ label_colname (str): Name of the column containing the labels (e.g., GO terms).
157
+ nodes_colname (str): Name of the column containing the nodes associated with each label.
158
+ delimiter (str, optional): Delimiter used to separate columns in the file (default is ',').
159
+ nodes_delimiter (str, optional): Delimiter used to separate multiple nodes within the nodes column (default is ';').
148
160
 
149
161
  Returns:
150
- dict: A dictionary where each label is paired with its respective list of nodes.
162
+ Dict[str, Any]: A dictionary where each label is paired with its respective list of nodes.
151
163
  """
152
- df = pd.read_csv(filepath)
153
- # Split the nodes column by the delimiter
154
- df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(delimiter))
155
- # Create a dictionary pairing labels with their corresponding nodes
164
+ # Load the CSV or TSV file into a DataFrame
165
+ df = pd.read_csv(filepath, delimiter=delimiter)
166
+ # Split the nodes column by the nodes_delimiter to handle multiple nodes per label
167
+ df[nodes_colname] = df[nodes_colname].apply(lambda x: x.split(nodes_delimiter))
168
+ # Create a dictionary pairing labels with their corresponding list of nodes
156
169
  label_node_dict = df.set_index(label_colname)[nodes_colname].to_dict()
157
170
  return label_node_dict
158
171
 
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/neighborhoods/graph
3
- ~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/neighborhoods/community
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import community as community_louvain
@@ -10,7 +10,7 @@ import networkx as nx
10
10
  import numpy as np
11
11
  from sklearn.exceptions import DataConversionWarning
12
12
 
13
- from risk.neighborhoods.graph import (
13
+ from risk.neighborhoods.community import (
14
14
  calculate_dijkstra_neighborhoods,
15
15
  calculate_label_propagation_neighborhoods,
16
16
  calculate_louvain_neighborhoods,
@@ -317,10 +317,10 @@ class NetworkIO:
317
317
  print(f"Filetype: {filetype}")
318
318
  if filepath:
319
319
  print(f"Filepath: {filepath}")
320
- print(f"Project to sphere: {self.compute_sphere}")
320
+ print(f"Projection: {'Sphere' if self.compute_sphere else 'Plane'}")
321
321
  if self.compute_sphere:
322
322
  print(f"Surface depth: {self.surface_depth}")
323
323
  print(f"Edge length threshold: {self.edge_length_threshold}")
324
- print(f"Include edge weights: {self.include_edge_weight}")
324
+ print(f"Edge weight: {'Included' if self.include_edge_weight else 'Excluded'}")
325
325
  if self.include_edge_weight:
326
326
  print(f"Weight label: {self.weight_label}")
@@ -45,21 +45,24 @@ class NetworkPlotter:
45
45
  outline_scale (float, optional): Outline scaling factor for the perimeter diameter. Defaults to 1.0.
46
46
  """
47
47
  self.network_graph = network_graph
48
- self.ax = None # Initialize the axis attribute
49
- # Initialize the plot with the given parameters
50
- self._initialize_plot(figsize, background_color, plot_outline, outline_color, outline_scale)
48
+ # Initialize the plot with the specified parameters
49
+ self.ax = self._initialize_plot(
50
+ network_graph, figsize, background_color, plot_outline, outline_color, outline_scale
51
+ )
51
52
 
52
53
  def _initialize_plot(
53
54
  self,
55
+ network_graph: NetworkGraph,
54
56
  figsize: tuple,
55
57
  background_color: str,
56
58
  plot_outline: bool,
57
59
  outline_color: str,
58
60
  outline_scale: float,
59
- ) -> tuple:
61
+ ) -> plt.Axes:
60
62
  """Set up the plot with figure size, optional circle perimeter, and background color.
61
63
 
62
64
  Args:
65
+ network_graph (NetworkGraph): The network data and attributes to be visualized.
63
66
  figsize (tuple): Size of the figure in inches (width, height).
64
67
  background_color (str): Background color of the plot.
65
68
  plot_outline (bool): Whether to plot the network perimeter circle.
@@ -67,10 +70,10 @@ class NetworkPlotter:
67
70
  outline_scale (float): Outline scaling factor for the perimeter diameter.
68
71
 
69
72
  Returns:
70
- tuple: The created matplotlib figure and axis.
73
+ plt.Axes: The axis object for the plot.
71
74
  """
72
75
  # Extract node coordinates from the network graph
73
- node_coordinates = self.network_graph.node_coordinates
76
+ node_coordinates = network_graph.node_coordinates
74
77
  # Calculate the center and radius of the bounding box around the network
75
78
  center, radius = _calculate_bounding_box(node_coordinates)
76
79
  # Scale the radius by the outline_scale factor
@@ -107,9 +110,7 @@ class NetworkPlotter:
107
110
  ax.set_yticks([])
108
111
  ax.patch.set_visible(False) # Hide the axis background
109
112
 
110
- # Store the axis for further use and return the figure and axis
111
- self.ax = ax
112
- return fig, ax
113
+ return ax
113
114
 
114
115
  def plot_network(
115
116
  self,
@@ -436,7 +437,12 @@ class NetworkPlotter:
436
437
  arrow_color = self.get_annotated_contour_colors(color=arrow_color)
437
438
 
438
439
  # Calculate the center and radius of the network
439
- domain_centroids = self._calculate_domain_centroids()
440
+ domain_centroids = {}
441
+ for domain, nodes in self.network_graph.domain_to_nodes.items():
442
+ if nodes: # Skip if the domain has no nodes
443
+ domain_centroids[domain] = self._calculate_domain_centroid(nodes)
444
+
445
+ # Calculate the bounding box around the network
440
446
  center, radius = _calculate_bounding_box(
441
447
  self.network_graph.node_coordinates, radius_margin=perimeter_scale
442
448
  )
@@ -467,31 +473,26 @@ class NetworkPlotter:
467
473
  arrowprops=dict(arrowstyle="->", color=arrow_color[idx], linewidth=arrow_linewidth),
468
474
  )
469
475
 
470
- def _calculate_domain_centroids(self) -> Dict[Any, np.ndarray]:
471
- """Calculate the most centrally located node within each domain based on the node positions.
476
+ def _calculate_domain_centroid(self, nodes: list) -> tuple:
477
+ """Calculate the most centrally located node in .
478
+
479
+ Args:
480
+ nodes (list): List of node labels to include in the subnetwork.
472
481
 
473
482
  Returns:
474
- Dict[Any, np.ndarray]: A dictionary mapping each domain to its central node's coordinates.
483
+ tuple: A tuple containing the domain's central node coordinates.
475
484
  """
476
- domain_central_nodes = {}
477
- for domain, nodes in self.network_graph.domain_to_nodes.items():
478
- if not nodes: # Skip if the domain has no nodes
479
- continue
480
-
481
- # Extract positions of all nodes in the domain
482
- node_positions = self.network_graph.node_coordinates[nodes, :]
483
- # Calculate the pairwise distance matrix between all nodes in the domain
484
- distances_matrix = np.linalg.norm(
485
- node_positions[:, np.newaxis] - node_positions, axis=2
486
- )
487
- # Sum the distances for each node to all other nodes in the domain
488
- sum_distances = np.sum(distances_matrix, axis=1)
489
- # Identify the node with the smallest total distance to others (the centroid)
490
- central_node_idx = np.argmin(sum_distances)
491
- # Map the domain to the coordinates of its central node
492
- domain_central_nodes[domain] = node_positions[central_node_idx]
493
-
494
- return domain_central_nodes
485
+ # Extract positions of all nodes in the domain
486
+ node_positions = self.network_graph.node_coordinates[nodes, :]
487
+ # Calculate the pairwise distance matrix between all nodes in the domain
488
+ distances_matrix = np.linalg.norm(node_positions[:, np.newaxis] - node_positions, axis=2)
489
+ # Sum the distances for each node to all other nodes in the domain
490
+ sum_distances = np.sum(distances_matrix, axis=1)
491
+ # Identify the node with the smallest total distance to others (the centroid)
492
+ central_node_idx = np.argmin(sum_distances)
493
+ # Map the domain to the coordinates of its central node
494
+ domain_central_node = node_positions[central_node_idx]
495
+ return domain_central_node
495
496
 
496
497
  def get_annotated_node_colors(
497
498
  self, nonenriched_color: str = "white", random_seed: int = 888, **kwargs
@@ -604,16 +605,6 @@ class NetworkPlotter:
604
605
 
605
606
  return np.array(annotated_colors)
606
607
 
607
- @staticmethod
608
- def close(*args, **kwargs) -> None:
609
- """Close the current plot.
610
-
611
- Args:
612
- *args: Positional arguments passed to `plt.close`.
613
- **kwargs: Keyword arguments passed to `plt.close`.
614
- """
615
- plt.close(*args, **kwargs)
616
-
617
608
  @staticmethod
618
609
  def savefig(*args, **kwargs) -> None:
619
610
  """Save the current plot to a file.
@@ -98,7 +98,6 @@ class RISK(NetworkIO, AnnotationsIO):
98
98
  score_metric: str = "sum",
99
99
  null_distribution: str = "network",
100
100
  num_permutations: int = 1000,
101
- use_cython=True,
102
101
  random_seed: int = 888,
103
102
  max_workers: int = 1,
104
103
  ) -> Dict[str, Any]:
@@ -122,7 +121,6 @@ class RISK(NetworkIO, AnnotationsIO):
122
121
  score_metric=score_metric,
123
122
  null_distribution=null_distribution,
124
123
  num_permutations=num_permutations,
125
- use_cython=use_cython,
126
124
  random_seed=random_seed,
127
125
  max_workers=max_workers,
128
126
  )
@@ -153,7 +151,6 @@ class RISK(NetworkIO, AnnotationsIO):
153
151
  score_metric=score_metric,
154
152
  null_distribution=null_distribution,
155
153
  num_permutations=num_permutations,
156
- use_cython=use_cython,
157
154
  random_seed=random_seed,
158
155
  max_workers=max_workers,
159
156
  )
@@ -167,7 +164,6 @@ class RISK(NetworkIO, AnnotationsIO):
167
164
  neighborhoods: Dict[str, Any],
168
165
  tail: str = "right", # OPTIONS: "right" (enrichment), "left" (depletion), "both"
169
166
  pval_cutoff: float = 0.01, # OPTIONS: Any value between 0 to 1
170
- apply_fdr: bool = False,
171
167
  fdr_cutoff: float = 0.9999, # OPTIONS: Any value between 0 to 1
172
168
  impute_depth: int = 1,
173
169
  prune_threshold: float = 0.0,
@@ -185,7 +181,6 @@ class RISK(NetworkIO, AnnotationsIO):
185
181
  neighborhoods (dict): Neighborhood enrichment data.
186
182
  tail (str, optional): Type of significance tail ("right", "left", "both"). Defaults to "right".
187
183
  pval_cutoff (float, optional): P-value cutoff for significance. Defaults to 0.01.
188
- apply_fdr (bool, optional): Whether to apply FDR correction. Defaults to False.
189
184
  fdr_cutoff (float, optional): FDR cutoff for significance. Defaults to 0.9999.
190
185
  impute_depth (int, optional): Depth for imputing neighbors. Defaults to 1.
191
186
  prune_threshold (float, optional): Distance threshold for pruning neighbors. Defaults to 0.0.
@@ -203,7 +198,6 @@ class RISK(NetworkIO, AnnotationsIO):
203
198
  params.log_graph(
204
199
  tail=tail,
205
200
  pval_cutoff=pval_cutoff,
206
- apply_fdr=apply_fdr,
207
201
  fdr_cutoff=fdr_cutoff,
208
202
  impute_depth=impute_depth,
209
203
  prune_threshold=prune_threshold,
@@ -215,7 +209,7 @@ class RISK(NetworkIO, AnnotationsIO):
215
209
  )
216
210
 
217
211
  print(f"P-value cutoff: {pval_cutoff}")
218
- print(f"FDR cutoff: {'N/A' if not apply_fdr else apply_fdr}")
212
+ print(f"FDR BH cutoff: {fdr_cutoff}")
219
213
  print(
220
214
  f"Significance tail: '{tail}' ({'enrichment' if tail == 'right' else 'depletion' if tail == 'left' else 'both'})"
221
215
  )
@@ -225,7 +219,6 @@ class RISK(NetworkIO, AnnotationsIO):
225
219
  neighborhoods["enrichment_pvals"],
226
220
  tail=tail,
227
221
  pval_cutoff=pval_cutoff,
228
- apply_fdr=apply_fdr,
229
222
  fdr_cutoff=fdr_cutoff,
230
223
  )
231
224
 
@@ -1,12 +1,15 @@
1
1
  """
2
- risk/stats/permutation/_python/permutation
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
+ risk/stats/permutation
3
+ ~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import numpy as np
7
7
 
8
+ # Note: Cython optimizations provided minimal performance benefits.
9
+ # The final version with Cython is archived in the `cython_permutation` branch.
8
10
 
9
- def compute_neighborhood_score_by_sum_python(
11
+
12
+ def compute_neighborhood_score_by_sum(
10
13
  neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
11
14
  ) -> np.ndarray:
12
15
  """Compute the sum of attribute values for each neighborhood.
@@ -18,12 +21,12 @@ def compute_neighborhood_score_by_sum_python(
18
21
  Returns:
19
22
  np.ndarray: Sum of attribute values for each neighborhood.
20
23
  """
21
- # Directly compute the dot product to get the sum of attribute values in each neighborhood
24
+ # Calculate the neighborhood score as the dot product of neighborhoods and annotations
22
25
  neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
23
26
  return neighborhood_score
24
27
 
25
28
 
26
- def compute_neighborhood_score_by_stdev_python(
29
+ def compute_neighborhood_score_by_stdev(
27
30
  neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
28
31
  ) -> np.ndarray:
29
32
  """Compute the standard deviation of neighborhood scores.
@@ -37,21 +40,20 @@ def compute_neighborhood_score_by_stdev_python(
37
40
  """
38
41
  # Calculate the neighborhood score as the dot product of neighborhoods and annotations
39
42
  neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
40
- # Calculate the number of elements in each neighborhood and reshape for broadcasting
43
+ # Calculate the number of elements in each neighborhood
41
44
  N = np.sum(neighborhoods_matrix, axis=1)
42
- N_reshaped = N[:, None]
43
45
  # Compute the mean of the neighborhood scores
44
- M = neighborhood_score / N_reshaped
45
- # Compute the mean of squares (EXX) for annotation values
46
- EXX = np.dot(neighborhoods_matrix, np.power(annotation_matrix, 2)) / N_reshaped
46
+ M = neighborhood_score / N[:, None]
47
+ # Compute the mean of squares (EXX) directly using squared annotation matrix
48
+ EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N[:, None]
47
49
  # Calculate variance as EXX - M^2
48
- variance = EXX - np.power(M, 2)
50
+ variance = EXX - M**2
49
51
  # Compute the standard deviation as the square root of the variance
50
52
  stdev = np.sqrt(variance)
51
53
  return stdev
52
54
 
53
55
 
54
- def compute_neighborhood_score_by_z_score_python(
56
+ def compute_neighborhood_score_by_z_score(
55
57
  neighborhoods_matrix: np.ndarray, annotation_matrix: np.ndarray
56
58
  ) -> np.ndarray:
57
59
  """Compute Z-scores for neighborhood scores.
@@ -66,18 +68,21 @@ def compute_neighborhood_score_by_z_score_python(
66
68
  # Calculate the neighborhood score as the dot product of neighborhoods and annotations
67
69
  neighborhood_score = np.dot(neighborhoods_matrix, annotation_matrix)
68
70
  # Calculate the number of elements in each neighborhood
69
- N = np.dot(neighborhoods_matrix, np.ones(annotation_matrix.shape))
71
+ N = np.dot(
72
+ neighborhoods_matrix, np.ones(annotation_matrix.shape[1], dtype=annotation_matrix.dtype)
73
+ )
70
74
  # Compute the mean of the neighborhood scores
71
75
  M = neighborhood_score / N
72
- # Compute the mean of squares (EXX) and the squared mean (EEX)
73
- EXX = np.dot(neighborhoods_matrix, np.power(annotation_matrix, 2)) / N
74
- EEX = np.power(M, 2)
76
+ # Compute the mean of squares (EXX)
77
+ EXX = np.dot(neighborhoods_matrix, annotation_matrix**2) / N
75
78
  # Calculate the standard deviation for each neighborhood
76
- std = np.sqrt(EXX - EEX)
79
+ variance = EXX - M**2
80
+ std = np.sqrt(variance)
77
81
  # Calculate Z-scores, handling cases where std is 0 or N is less than 3
78
82
  with np.errstate(divide="ignore", invalid="ignore"):
79
- z_scores = np.divide(M, std)
80
- z_scores[std == 0] = np.nan # Handle division by zero
81
- z_scores[N < 3] = np.nan # Apply threshold for minimum number of elements
83
+ z_scores = M / std
84
+ z_scores[(std == 0) | (N < 3)] = (
85
+ np.nan
86
+ ) # Handle division by zero and apply minimum threshold
82
87
 
83
88
  return z_scores
@@ -3,11 +3,14 @@ risk/stats/stats
3
3
  ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
- from multiprocessing import Pool, Lock
7
- from typing import Any, Callable, Union
6
+ import sys
7
+ from contextlib import contextmanager
8
+ from multiprocessing import get_context, Lock
9
+ from typing import Any, Callable, Generator, Union
8
10
 
9
11
  import numpy as np
10
12
  from statsmodels.stats.multitest import fdrcorrection
13
+ from threadpoolctl import threadpool_limits
11
14
 
12
15
 
13
16
  def _is_notebook() -> bool:
@@ -35,23 +38,15 @@ else:
35
38
 
36
39
 
37
40
  from risk.stats.permutation import (
38
- compute_neighborhood_score_by_sum_cython,
39
- compute_neighborhood_score_by_stdev_cython,
40
- compute_neighborhood_score_by_z_score_cython,
41
- compute_neighborhood_score_by_sum_python,
42
- compute_neighborhood_score_by_stdev_python,
43
- compute_neighborhood_score_by_z_score_python,
41
+ compute_neighborhood_score_by_sum,
42
+ compute_neighborhood_score_by_stdev,
43
+ compute_neighborhood_score_by_z_score,
44
44
  )
45
45
 
46
- CYTHON_DISPATCH_PERMUTATION_TABLE = {
47
- "sum": compute_neighborhood_score_by_sum_cython,
48
- "stdev": compute_neighborhood_score_by_stdev_cython,
49
- "z_score": compute_neighborhood_score_by_z_score_cython,
50
- }
51
- PYTHON_DISPATCH_PERMUTATION_TABLE = {
52
- "sum": compute_neighborhood_score_by_sum_python,
53
- "stdev": compute_neighborhood_score_by_stdev_python,
54
- "z_score": compute_neighborhood_score_by_z_score_python,
46
+ DISPATCH_PERMUTATION_TABLE = {
47
+ "sum": compute_neighborhood_score_by_sum,
48
+ "stdev": compute_neighborhood_score_by_stdev,
49
+ "z_score": compute_neighborhood_score_by_z_score,
55
50
  }
56
51
 
57
52
 
@@ -61,7 +56,6 @@ def compute_permutation(
61
56
  score_metric: str = "sum",
62
57
  null_distribution: str = "network",
63
58
  num_permutations: int = 1000,
64
- use_cython: bool = True,
65
59
  random_seed: int = 888,
66
60
  max_workers: int = 1,
67
61
  ) -> dict:
@@ -74,7 +68,6 @@ def compute_permutation(
74
68
  null_distribution (str, optional): Type of null distribution ('network' or other). Defaults to "network".
75
69
  num_permutations (int, optional): Number of permutations to run. Defaults to 1000.
76
70
  random_seed (int, optional): Seed for random number generation. Defaults to 888.
77
- use_cython (bool, optional): Whether to use Cython for computation. Defaults to True.
78
71
  max_workers (int, optional): Number of workers for multiprocessing. Defaults to 1.
79
72
 
80
73
  Returns:
@@ -83,11 +76,8 @@ def compute_permutation(
83
76
  # Ensure that the matrices are in the correct format and free of NaN values
84
77
  neighborhoods = neighborhoods.astype(np.float32)
85
78
  annotations = annotations.astype(np.float32)
86
- # Retrieve the appropriate scoring function based on the metric and Cython usage
87
- if use_cython:
88
- neighborhood_score_func = CYTHON_DISPATCH_PERMUTATION_TABLE[score_metric]
89
- else:
90
- neighborhood_score_func = PYTHON_DISPATCH_PERMUTATION_TABLE[score_metric]
79
+ # Retrieve the appropriate neighborhood score function based on the metric
80
+ neighborhood_score_func = DISPATCH_PERMUTATION_TABLE[score_metric]
91
81
  # Run the permutation test to calculate depletion and enrichment counts
92
82
  counts_depletion, counts_enrichment = _run_permutation_test(
93
83
  neighborhoods=neighborhoods,
@@ -135,7 +125,7 @@ def _run_permutation_test(
135
125
  """
136
126
  # Set the random seed for reproducibility
137
127
  np.random.seed(random_seed)
138
- # Determine indices based on null distribution type
128
+ # Determine the indices to use based on the null distribution type
139
129
  if null_distribution == "network":
140
130
  idxs = range(annotations.shape[0])
141
131
  else:
@@ -154,45 +144,29 @@ def _run_permutation_test(
154
144
  # Initialize count matrices for depletion and enrichment
155
145
  counts_depletion = np.zeros(observed_neighborhood_scores.shape)
156
146
  counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
157
- # Determine subset size for each worker
147
+ # Determine the number of permutations to run in each worker process
158
148
  subset_size = num_permutations // max_workers
159
149
  remainder = num_permutations % max_workers
160
150
 
161
- if max_workers == 1:
162
- # If single-threaded, run the permutation process directly
163
- local_counts_depletion, local_counts_enrichment = _permutation_process_subset(
164
- annotations,
165
- np.array(idxs),
166
- neighborhoods_matrix_obsv,
167
- observed_neighborhood_scores,
168
- neighborhood_score_func,
169
- num_permutations,
170
- 0,
171
- False,
172
- )
173
- counts_depletion = np.add(counts_depletion, local_counts_depletion)
174
- counts_enrichment = np.add(counts_enrichment, local_counts_enrichment)
175
- else:
176
- # Prepare parameters for multiprocessing
177
- params_list = [
178
- (
179
- annotations,
180
- idxs,
181
- neighborhoods_matrix_obsv,
182
- observed_neighborhood_scores,
183
- neighborhood_score_func,
184
- subset_size + (1 if i < remainder else 0),
185
- i,
186
- True,
187
- )
188
- for i in range(max_workers)
189
- ]
190
-
191
- # Initialize a multiprocessing pool with a lock
192
- lock = Lock()
193
- with Pool(max_workers, initializer=_init, initargs=(lock,)) as pool:
151
+ # Use the spawn context for creating a new multiprocessing pool
152
+ ctx = get_context("spawn")
153
+ with ctx.Pool(max_workers, initializer=_init, initargs=(Lock(),)) as pool:
154
+ with threadpool_limits(limits=1, user_api="blas"):
155
+ params_list = [
156
+ (
157
+ annotations,
158
+ np.array(idxs),
159
+ neighborhoods_matrix_obsv,
160
+ observed_neighborhood_scores,
161
+ neighborhood_score_func,
162
+ subset_size + (1 if i < remainder else 0),
163
+ i,
164
+ max_workers,
165
+ True,
166
+ )
167
+ for i in range(max_workers)
168
+ ]
194
169
  results = pool.starmap(_permutation_process_subset, params_list)
195
- # Accumulate results from each worker
196
170
  for local_counts_depletion, local_counts_enrichment in results:
197
171
  counts_depletion = np.add(counts_depletion, local_counts_depletion)
198
172
  counts_enrichment = np.add(counts_enrichment, local_counts_enrichment)
@@ -208,6 +182,7 @@ def _permutation_process_subset(
208
182
  neighborhood_score_func: Callable,
209
183
  subset_size: int,
210
184
  worker_id: int,
185
+ max_workers: int,
211
186
  use_lock: bool,
212
187
  ) -> tuple:
213
188
  """Process a subset of permutations for the permutation test.
@@ -220,6 +195,7 @@ def _permutation_process_subset(
220
195
  neighborhood_score_func (Callable): Function to calculate neighborhood scores.
221
196
  subset_size (int): Number of permutations to run in this subset.
222
197
  worker_id (int): ID of the worker process.
198
+ max_workers (int): Number of worker processes.
223
199
  use_lock (bool): Whether to use a lock for multiprocessing synchronization.
224
200
 
225
201
  Returns:
@@ -229,52 +205,32 @@ def _permutation_process_subset(
229
205
  local_counts_depletion = np.zeros(observed_neighborhood_scores.shape)
230
206
  local_counts_enrichment = np.zeros(observed_neighborhood_scores.shape)
231
207
 
232
- if _is_notebook():
233
- # Hack to ensure progress bar displays correctly in Jupyter notebooks
234
- print(" ", end="", flush=True)
235
-
236
208
  # Initialize progress bar for tracking permutation progress
237
- text = f"Worker {worker_id + 1} Progress"
238
- if use_lock:
239
- with lock:
240
- # Set mininterval to 0.1 to prevent rapid updates and improve performance
241
- progress = tqdm(
242
- total=subset_size, desc=text, position=worker_id, leave=False, mininterval=0.1
209
+ text = f"Worker {worker_id + 1} of {max_workers} progress"
210
+ leave = worker_id == max_workers - 1 # Only leave the progress bar for the last worker
211
+
212
+ with _tqdm_context(
213
+ total=subset_size, desc=text, position=0, leave=leave, use_lock=use_lock
214
+ ) as progress:
215
+ for _ in range(subset_size):
216
+ # Permute the annotation matrix
217
+ annotation_matrix_permut = annotation_matrix[np.random.permutation(idxs)]
218
+ # Calculate permuted neighborhood scores
219
+ with np.errstate(invalid="ignore", divide="ignore"):
220
+ permuted_neighborhood_scores = neighborhood_score_func(
221
+ neighborhoods_matrix_obsv, annotation_matrix_permut
222
+ )
223
+ # Update local depletion and enrichment counts based on permuted scores
224
+ local_counts_depletion = np.add(
225
+ local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
243
226
  )
244
- else:
245
- progress = tqdm(
246
- total=subset_size, desc=text, position=worker_id, leave=False, mininterval=0.1
247
- )
248
-
249
- for _ in range(subset_size):
250
- # Permute the annotation matrix
251
- annotation_matrix_permut = annotation_matrix[np.random.permutation(idxs)]
252
- # Calculate permuted neighborhood scores
253
- with np.errstate(invalid="ignore", divide="ignore"):
254
- permuted_neighborhood_scores = neighborhood_score_func(
255
- neighborhoods_matrix_obsv, annotation_matrix_permut
227
+ local_counts_enrichment = np.add(
228
+ local_counts_enrichment,
229
+ permuted_neighborhood_scores >= observed_neighborhood_scores,
256
230
  )
257
- # Update local depletion and enrichment counts based on permuted scores
258
- local_counts_depletion = np.add(
259
- local_counts_depletion, permuted_neighborhood_scores <= observed_neighborhood_scores
260
- )
261
- local_counts_enrichment = np.add(
262
- local_counts_enrichment, permuted_neighborhood_scores >= observed_neighborhood_scores
263
- )
264
- # Update progress bar
265
- if use_lock:
266
- with lock:
267
- progress.update(1)
268
- else:
231
+ # Update progress bar
269
232
  progress.update(1)
270
233
 
271
- # Close the progress bar once processing is complete
272
- if use_lock:
273
- with lock:
274
- progress.close()
275
- else:
276
- progress.close()
277
-
278
234
  return local_counts_depletion, local_counts_enrichment
279
235
 
280
236
 
@@ -288,12 +244,61 @@ def _init(lock_: Any) -> None:
288
244
  lock = lock_ # Assign the provided lock to a global variable
289
245
 
290
246
 
247
+ @contextmanager
248
+ def _tqdm_context(
249
+ total: int, desc: str, position: int, leave: bool = False, use_lock: bool = False
250
+ ) -> Generator:
251
+ """A context manager for a `tqdm` progress bar.
252
+
253
+ Args:
254
+ total (int): The total number of iterations for the progress bar.
255
+ desc (str): Description for the progress bar.
256
+ position (int): The position of the progress bar (useful for multiple bars).
257
+ leave (bool): Whether to leave the progress bar after completion.
258
+ use_lock (bool): Whether to use a lock for multiprocessing synchronization.
259
+
260
+ Yields:
261
+ tqdm: A `tqdm` progress bar object.
262
+ """
263
+ # Set default parameters for the progress bar
264
+ min_interval = 0.1
265
+ # Use a lock for multiprocessing synchronization if specified
266
+ if use_lock:
267
+ with lock:
268
+ # Create a progress bar with specified parameters and direct output to stderr
269
+ progress = tqdm(
270
+ total=total,
271
+ desc=desc,
272
+ position=position,
273
+ leave=leave,
274
+ mininterval=min_interval,
275
+ file=sys.stderr,
276
+ )
277
+ try:
278
+ yield progress # Yield the progress bar to the calling context
279
+ finally:
280
+ progress.close() # Ensure the progress bar is closed properly
281
+ else:
282
+ # Create a progress bar without using a lock
283
+ progress = tqdm(
284
+ total=total,
285
+ desc=desc,
286
+ position=position,
287
+ leave=leave,
288
+ mininterval=min_interval,
289
+ file=sys.stderr,
290
+ )
291
+ try:
292
+ yield progress # Yield the progress bar to the calling context
293
+ finally:
294
+ progress.close() # Ensure the progress bar is closed properly
295
+
296
+
291
297
  def calculate_significance_matrices(
292
298
  depletion_pvals: np.ndarray,
293
299
  enrichment_pvals: np.ndarray,
294
300
  tail: str = "right",
295
301
  pval_cutoff: float = 0.05,
296
- apply_fdr: bool = False,
297
302
  fdr_cutoff: float = 0.05,
298
303
  ) -> dict:
299
304
  """Calculate significance matrices based on p-values and specified tail.
@@ -303,14 +308,13 @@ def calculate_significance_matrices(
303
308
  enrichment_pvals (np.ndarray): Matrix of enrichment p-values.
304
309
  tail (str, optional): The tail type for significance selection ('left', 'right', 'both'). Defaults to 'right'.
305
310
  pval_cutoff (float, optional): Cutoff for p-value significance. Defaults to 0.05.
306
- apply_fdr (bool, optional): Whether to apply FDR correction. Defaults to False.
307
311
  fdr_cutoff (float, optional): Cutoff for FDR significance if applied. Defaults to 0.05.
308
312
 
309
313
  Returns:
310
314
  dict: Dictionary containing the enrichment matrix, binary significance matrix,
311
315
  and the matrix of significant enrichment values.
312
316
  """
313
- if apply_fdr:
317
+ if fdr_cutoff < 1.0:
314
318
  # Apply FDR correction to depletion p-values
315
319
  depletion_qvals = np.apply_along_axis(fdrcorrection, 1, depletion_pvals)[:, 1, :]
316
320
  depletion_alpha_threshold_matrix = _compute_threshold_matrix(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.3b0
3
+ Version: 0.0.3b1
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -694,13 +694,13 @@ Classifier: Development Status :: 4 - Beta
694
694
  Requires-Python: >=3.7
695
695
  Description-Content-Type: text/markdown
696
696
  License-File: LICENSE
697
- Requires-Dist: cython
698
697
  Requires-Dist: ipywidgets
699
698
  Requires-Dist: markov_clustering
700
699
  Requires-Dist: matplotlib
701
700
  Requires-Dist: networkx
702
701
  Requires-Dist: nltk==3.8.1
703
702
  Requires-Dist: numpy
703
+ Requires-Dist: openpyxl
704
704
  Requires-Dist: pandas
705
705
  Requires-Dist: python-louvain
706
706
  Requires-Dist: scikit-learn
@@ -709,15 +709,21 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- # RISK
713
-
714
- <ins>Regional Inference of Significant Kinships</ins>
712
+ <p align="center">
713
+ <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="400" />
714
+ <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="400" />
715
+ </p>
715
716
 
716
- <p align="left">
717
- <img src="./docs/github/risk-logo-dark.png#gh-dark-mode-only" width="40%" />
718
- <img src="./docs/github/risk-logo-light.png#gh-light-mode-only" width="40%" />
717
+ <p align="center">
718
+ <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
719
+ <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.7+-blue.svg" alt="Python 3.7+"></a>
720
+ <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
719
721
  </p>
720
722
 
723
+ ## RISK
724
+
725
+ #### Regional Inference of Significant Kinships
726
+
721
727
  RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
722
728
 
723
729
  ## Features
@@ -13,8 +13,8 @@ risk/log/__init__.py
13
13
  risk/log/console.py
14
14
  risk/log/params.py
15
15
  risk/neighborhoods/__init__.py
16
+ risk/neighborhoods/community.py
16
17
  risk/neighborhoods/domains.py
17
- risk/neighborhoods/graph.py
18
18
  risk/neighborhoods/neighborhoods.py
19
19
  risk/network/__init__.py
20
20
  risk/network/geometry.py
@@ -22,11 +22,8 @@ risk/network/graph.py
22
22
  risk/network/io.py
23
23
  risk/network/plot.py
24
24
  risk/stats/__init__.py
25
+ risk/stats/permutation.py
25
26
  risk/stats/stats.py
26
- risk/stats/permutation/__init__.py
27
- risk/stats/permutation/_cython/permutation.pyx
28
- risk/stats/permutation/_cython/setup.py
29
- risk/stats/permutation/_python/permutation.py
30
27
  risk_network.egg-info/PKG-INFO
31
28
  risk_network.egg-info/SOURCES.txt
32
29
  risk_network.egg-info/dependency_links.txt
@@ -1,10 +1,10 @@
1
- cython
2
1
  ipywidgets
3
2
  markov_clustering
4
3
  matplotlib
5
4
  networkx
6
5
  nltk==3.8.1
7
6
  numpy
7
+ openpyxl
8
8
  pandas
9
9
  python-louvain
10
10
  scikit-learn
@@ -1,7 +1,5 @@
1
1
  import re
2
- from setuptools import setup, Extension, find_packages
3
- from Cython.Build import cythonize
4
-
2
+ from setuptools import setup, find_packages
5
3
  import numpy
6
4
 
7
5
 
@@ -14,15 +12,6 @@ def find_version():
14
12
  raise RuntimeError("Unable to find version string.")
15
13
 
16
14
 
17
- # Cython extension modules
18
- extensions = [
19
- Extension(
20
- name="risk.stats.permutation._cython.permutation",
21
- sources=["risk/stats/permutation/_cython/permutation.pyx"],
22
- include_dirs=[numpy.get_include()],
23
- ),
24
- ]
25
-
26
15
  # Setup function
27
16
  setup(
28
17
  name="risk-network",
@@ -34,16 +23,15 @@ setup(
34
23
  long_description_content_type="text/markdown",
35
24
  license="GPL-3.0-or-later",
36
25
  packages=find_packages(),
37
- ext_modules=cythonize(extensions), # Compile Cython extensions
38
26
  include_package_data=True,
39
27
  install_requires=[
40
- "cython",
41
- "numpy",
42
28
  "ipywidgets",
43
29
  "markov_clustering",
44
30
  "matplotlib",
45
31
  "networkx",
46
32
  "nltk==3.8.1",
33
+ "numpy",
34
+ "openpyxl",
47
35
  "pandas",
48
36
  "python-louvain",
49
37
  "scikit-learn",
@@ -1,15 +0,0 @@
1
- """
2
- risk/stats/permutation
3
- ~~~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- from risk.stats.permutation._cython.permutation import (
7
- compute_neighborhood_score_by_sum_cython,
8
- compute_neighborhood_score_by_stdev_cython,
9
- compute_neighborhood_score_by_z_score_cython,
10
- )
11
- from risk.stats.permutation._python.permutation import (
12
- compute_neighborhood_score_by_sum_python,
13
- compute_neighborhood_score_by_stdev_python,
14
- compute_neighborhood_score_by_z_score_python,
15
- )
@@ -1,82 +0,0 @@
1
- # cython: language_level=3
2
- import numpy as np
3
- cimport numpy as np
4
- cimport cython
5
- from threadpoolctl import threadpool_limits
6
-
7
-
8
- @cython.boundscheck(False) # Disable bounds checking for entire function
9
- @cython.wraparound(False) # Disable negative index wrapping for entire function
10
- def compute_neighborhood_score_by_sum_cython(
11
- np.ndarray[np.float32_t, ndim=2] neighborhoods,
12
- np.ndarray[np.float32_t, ndim=2] annotation_matrix,
13
- ):
14
- cdef np.float32_t[:, :] neighborhood_score
15
- # Limit the number of threads used by np.dot
16
- with threadpool_limits(limits=1, user_api='blas'):
17
- neighborhood_score = np.dot(neighborhoods, annotation_matrix)
18
-
19
- return np.asarray(neighborhood_score)
20
-
21
-
22
- @cython.boundscheck(False)
23
- @cython.wraparound(False)
24
- def compute_neighborhood_score_by_stdev_cython(
25
- np.ndarray[np.float32_t, ndim=2] neighborhoods,
26
- np.ndarray[np.float32_t, ndim=2] annotation_matrix,
27
- ):
28
- cdef np.ndarray[np.float32_t, ndim=2] neighborhood_score
29
- cdef np.ndarray[np.float32_t, ndim=2] EXX
30
- # Perform dot product directly using the inputs with limited threads
31
- with threadpool_limits(limits=1, user_api='blas'):
32
- neighborhood_score = np.dot(neighborhoods, annotation_matrix)
33
-
34
- # Sum across rows for neighborhoods to get N, reshape for broadcasting
35
- cdef np.ndarray[np.float32_t, ndim=1] N = np.sum(neighborhoods, axis=1)
36
- cdef np.ndarray[np.float32_t, ndim=2] N_reshaped = N[:, None]
37
- # Mean of the dot product
38
- cdef np.ndarray[np.float32_t, ndim=2] M = neighborhood_score / N_reshaped
39
- # Compute the mean of squares (EXX) with limited threads
40
- with threadpool_limits(limits=1, user_api='blas'):
41
- EXX = np.dot(neighborhoods, np.power(annotation_matrix, 2)) / N_reshaped
42
-
43
- # Variance computation
44
- cdef np.ndarray[np.float32_t, ndim=2] variance = EXX - M**2
45
- # Standard deviation computation
46
- cdef np.ndarray[np.float32_t, ndim=2] stdev = np.sqrt(variance)
47
-
48
- return stdev
49
-
50
-
51
- @cython.boundscheck(False)
52
- @cython.wraparound(False)
53
- def compute_neighborhood_score_by_z_score_cython(
54
- np.ndarray[np.float32_t, ndim=2] neighborhoods,
55
- np.ndarray[np.float32_t, ndim=2] annotation_matrix,
56
- ):
57
- cdef np.ndarray[np.float32_t, ndim=2] neighborhood_score
58
- cdef np.ndarray[np.float32_t, ndim=2] EXX
59
- # Perform dot product directly using the inputs with limited threads
60
- with threadpool_limits(limits=1, user_api='blas'):
61
- neighborhood_score = np.dot(neighborhoods, annotation_matrix)
62
-
63
- # Sum across rows for neighborhoods to get N, reshape for broadcasting
64
- cdef np.ndarray[np.float32_t, ndim=1] N = np.sum(neighborhoods, axis=1)
65
- cdef np.ndarray[np.float32_t, ndim=2] N_reshaped = N[:, None]
66
- # Mean of the dot product
67
- cdef np.ndarray[np.float32_t, ndim=2] M = neighborhood_score / N_reshaped
68
- # Compute the mean of squares (EXX) with limited threads
69
- with threadpool_limits(limits=1, user_api='blas'):
70
- EXX = np.dot(neighborhoods, np.power(annotation_matrix, 2)) / N_reshaped
71
-
72
- # Variance computation
73
- cdef np.ndarray[np.float32_t, ndim=2] variance = EXX - M**2
74
- # Standard deviation computation
75
- cdef np.ndarray[np.float32_t, ndim=2] stdev = np.sqrt(variance)
76
- # Z-score computation with error handling
77
- with np.errstate(divide='ignore', invalid='ignore'):
78
- neighborhood_score = np.divide(M, stdev)
79
- # Handle divisions by zero or stdev == 0
80
- neighborhood_score[np.isnan(neighborhood_score)] = 0 # Assuming requirement to reset NaN results to 0
81
-
82
- return neighborhood_score
@@ -1,11 +0,0 @@
1
- """
2
- risk/stats/permutation/_cython/setup
3
- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
- """
5
-
6
- # setup.py
7
- from setuptools import setup
8
- from Cython.Build import cythonize
9
- import numpy as np
10
-
11
- setup(ext_modules=cythonize("permutation.pyx"), include_dirs=[np.get_include()])
File without changes
File without changes