risk-network 0.0.7b12__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {risk_network-0.0.7b12 → risk_network-0.0.8}/PKG-INFO +84 -21
  2. risk_network-0.0.8/README.md +102 -0
  3. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/__init__.py +1 -1
  4. risk_network-0.0.8/risk/annotations/__init__.py +7 -0
  5. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/annotations/annotations.py +85 -53
  6. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/annotations/io.py +3 -3
  7. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/log/__init__.py +1 -1
  8. risk_network-0.0.7b12/risk/log/config.py → risk_network-0.0.8/risk/log/console.py +2 -2
  9. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/log/params.py +6 -6
  10. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/neighborhoods/community.py +68 -61
  11. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/neighborhoods/domains.py +41 -18
  12. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/neighborhoods/neighborhoods.py +134 -69
  13. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/network/geometry.py +5 -2
  14. risk_network-0.0.8/risk/network/graph.py +219 -0
  15. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/network/io.py +44 -6
  16. risk_network-0.0.8/risk/network/plot/__init__.py +6 -0
  17. risk_network-0.0.8/risk/network/plot/canvas.py +290 -0
  18. risk_network-0.0.8/risk/network/plot/contour.py +327 -0
  19. risk_network-0.0.8/risk/network/plot/labels.py +929 -0
  20. risk_network-0.0.8/risk/network/plot/network.py +288 -0
  21. risk_network-0.0.8/risk/network/plot/plotter.py +137 -0
  22. risk_network-0.0.8/risk/network/plot/utils/color.py +424 -0
  23. risk_network-0.0.8/risk/network/plot/utils/layout.py +91 -0
  24. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/risk.py +70 -37
  25. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/hypergeom.py +1 -1
  26. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/permutation/permutation.py +21 -8
  27. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/poisson.py +2 -2
  28. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/stats.py +12 -10
  29. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk_network.egg-info/PKG-INFO +84 -21
  30. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk_network.egg-info/SOURCES.txt +9 -2
  31. risk_network-0.0.7b12/README.md +0 -39
  32. risk_network-0.0.7b12/risk/annotations/__init__.py +0 -7
  33. risk_network-0.0.7b12/risk/network/graph.py +0 -385
  34. risk_network-0.0.7b12/risk/network/plot.py +0 -1450
  35. {risk_network-0.0.7b12 → risk_network-0.0.8}/LICENSE +0 -0
  36. {risk_network-0.0.7b12 → risk_network-0.0.8}/MANIFEST.in +0 -0
  37. {risk_network-0.0.7b12 → risk_network-0.0.8}/pyproject.toml +0 -0
  38. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/constants.py +0 -0
  39. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/neighborhoods/__init__.py +0 -0
  40. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/network/__init__.py +0 -0
  41. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/__init__.py +0 -0
  42. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/permutation/__init__.py +0 -0
  43. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk/stats/permutation/test_functions.py +0 -0
  44. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk_network.egg-info/dependency_links.txt +0 -0
  45. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk_network.egg-info/requires.txt +0 -0
  46. {risk_network-0.0.7b12 → risk_network-0.0.8}/risk_network.egg-info/top_level.txt +0 -0
  47. {risk_network-0.0.7b12 → risk_network-0.0.8}/setup.cfg +0 -0
  48. {risk_network-0.0.7b12 → risk_network-0.0.8}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: risk-network
3
- Version: 0.0.7b12
3
+ Version: 0.0.8
4
4
  Summary: A Python package for biological network analysis
5
5
  Author: Ira Horecka
6
6
  Author-email: Ira Horecka <ira89@icloud.com>
@@ -709,42 +709,105 @@ Requires-Dist: statsmodels
709
709
  Requires-Dist: threadpoolctl
710
710
  Requires-Dist: tqdm
711
711
 
712
- <p align="center">
713
- <img src="https://i.imgur.com/Fo9EmnK.png" width="400" />
714
- </p>
712
+ # RISK Network
715
713
 
716
714
  <p align="center">
717
- <a href="https://pypi.python.org/pypi/risk-network"><img src="https://img.shields.io/pypi/v/risk-network.svg" alt="pypiv"></a>
718
- <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.8+-blue.svg" alt="Python 3.8+"></a>
719
- <a href="https://raw.githubusercontent.com/irahorecka/chrono24/main/LICENSE"><img src="https://img.shields.io/badge/License-GPLv3-blue.svg" alt="License: GPL v3"></a>
715
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
720
716
  </p>
721
717
 
722
- ## RISK
718
+ <br>
719
+
720
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
721
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
722
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
723
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
724
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
725
+ ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
726
+
727
+ **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
723
728
 
724
- #### RISK Infers Spatial Kinships
729
+ ## Documentation and Tutorial
730
+
731
+ - **Documentation**: Comprehensive documentation is available [here](Documentation link).
732
+ - **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
733
+ We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
734
+
735
+ ## Installation
725
736
 
726
- RISK is a software tool for visualizing spatial relationships in networks. It aims to enhance network analysis by integrating advanced network annotation algorithms, such as Louvain and Markov Clustering, to identify key functional modules and pathways.
737
+ RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
738
+
739
+ ```bash
740
+ pip install risk-network
741
+ ```
727
742
 
728
743
  ## Features
729
744
 
730
- - Spatial analysis of biological networks
731
- - Functional enrichment detection
732
- - Optimized performance
745
+ - **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
746
+ - **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
747
+ - **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
748
+ - **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
749
+ - **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
750
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
733
751
 
734
- ## Example
752
+ ## Example Usage
735
753
 
736
- *Saccharomyces cerevisiae* proteins oriented by physical interactions discovered through affinity enrichment and mass spectrometry (Michaelis et al., 2023).
754
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
737
755
 
738
- ![PPI Network Demo](https://i.imgur.com/NnyK6nO.png)
756
+ ![RISK Main Figure](https://i.imgur.com/5OP3Hqe.jpeg)
739
757
 
740
- ## Installation
758
+ RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
759
+
760
+ ## Citation
761
+
762
+ If you use RISK in your research, please cite the following:
763
+
764
+ **Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
765
+
766
+ ## Software Architecture and Implementation
741
767
 
742
- Coming soon...
768
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
743
769
 
744
- ## Usage
770
+ ### Supported Data Formats
745
771
 
746
- Coming soon...
772
+ - **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
773
+ - **Visualization Outputs**: SVG, PNG, PDF.
774
+
775
+ ### Clustering Algorithms
776
+
777
+ - **Available Algorithms**:
778
+ - Greedy Modularity
779
+ - Label Propagation
780
+ - Louvain
781
+ - Markov Clustering
782
+ - Spinglass
783
+ - Walktrap
784
+ - **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
785
+
786
+ ### Statistical Tests
787
+
788
+ - **Hypergeometric Test**
789
+ - **Permutation Test** (single- or multi-process modes)
790
+ - **Poisson Test**
791
+
792
+ ## Performance and Efficiency
793
+
794
+ In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
795
+
796
+ ## Contributing
797
+
798
+ We welcome contributions from the community. Please use the following resources:
799
+
800
+ - [Issues Tracker](https://github.com/irahorecka/risk/issues)
801
+ - [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
802
+
803
+ ## Support
804
+
805
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
747
806
 
748
807
  ## License
749
808
 
750
- This project is licensed under the GPL-3.0 license.
809
+ RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
810
+
811
+ ---
812
+
813
+ **Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
@@ -0,0 +1,102 @@
1
+ # RISK Network
2
+
3
+ <p align="center">
4
+ <img src="https://i.imgur.com/8TleEJs.png" width="50%" />
5
+ </p>
6
+
7
+ <br>
8
+
9
+ ![Python](https://img.shields.io/badge/python-3.8%2B-yellow)
10
+ [![pypiv](https://img.shields.io/pypi/v/risk-network.svg)](https://pypi.python.org/pypi/risk-network)
11
+ ![License](https://img.shields.io/badge/license-GPLv3-purple)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.xxxxxxx.svg)](https://doi.org/10.5281/zenodo.xxxxxxx)
13
+ ![Downloads](https://img.shields.io/pypi/dm/risk-network)
14
+ ![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20macos%20%7C%20windows-lightgrey)
15
+
16
+ **RISK (RISK Infers Spatial Kinships)** is a next-generation tool designed to streamline the analysis of biological and non-biological networks. RISK enhances network analysis with its modular architecture, extensive file format support, and advanced clustering algorithms. It simplifies the creation of publication-quality figures, making it an important tool for researchers across disciplines.
17
+
18
+ ## Documentation and Tutorial
19
+
20
+ - **Documentation**: Comprehensive documentation is available [here](Documentation link).
21
+ - **Tutorial**: An interactive Jupyter notebook tutorial can be found [here](https://github.com/riskportal/network-tutorial).
22
+ We highly recommend new users to consult the documentation and tutorial early on to fully leverage RISK's capabilities.
23
+
24
+ ## Installation
25
+
26
+ RISK is compatible with Python 3.8 and later versions and operates on all major operating systems. Install RISK via pip:
27
+
28
+ ```bash
29
+ pip install risk-network
30
+ ```
31
+
32
+ ## Features
33
+
34
+ - **Comprehensive Network Analysis**: Analyze biological networks such as protein–protein interaction (PPI) and gene regulatory networks, as well as non-biological networks.
35
+ - **Advanced Clustering Algorithms**: Utilize algorithms like Louvain, Markov Clustering, Spinglass, and more to identify key functional modules.
36
+ - **Flexible Visualization**: Generate clear, publication-quality figures with customizable node and edge attributes, including colors, shapes, sizes, and labels.
37
+ - **Efficient Data Handling**: Optimized for large datasets, supporting multiple file formats such as JSON, CSV, TSV, Excel, Cytoscape, and GPickle.
38
+ - **Statistical Analysis**: Integrated statistical tests, including hypergeometric, permutation, and Poisson tests, to assess the significance of enriched regions.
39
+ - **Cross-Domain Applicability**: Suitable for network analysis across biological and non-biological domains, including social and communication networks.
40
+
41
+ ## Example Usage
42
+
43
+ We applied RISK to a *Saccharomyces cerevisiae* protein–protein interaction network, revealing both established and novel functional relationships. The visualization below highlights key biological processes such as ribosomal assembly and mitochondrial organization.
44
+
45
+ ![RISK Main Figure](https://i.imgur.com/5OP3Hqe.jpeg)
46
+
47
+ RISK successfully detected both known and novel functional clusters within the yeast interactome. Clusters related to Golgi transport and actin nucleation were clearly defined and closely located, showcasing RISK's ability to map well-characterized interactions. Additionally, RISK identified links between mRNA processing pathways and vesicle trafficking proteins, consistent with recent studies demonstrating the role of vesicles in mRNA localization and stability.
48
+
49
+ ## Citation
50
+
51
+ If you use RISK in your research, please cite the following:
52
+
53
+ **Horecka**, *et al.*, "RISK: a next-generation tool for biological network annotation and visualization", **[Journal Name]**, 2024. DOI: [10.1234/zenodo.xxxxxxx](https://doi.org/10.1234/zenodo.xxxxxxx)
54
+
55
+ ## Software Architecture and Implementation
56
+
57
+ RISK features a streamlined, modular architecture designed to meet diverse research needs. Each module focuses on a specific task—such as network input/output, statistical analysis, or visualization—ensuring ease of adaptation and extension. This design enhances flexibility and reduces development overhead for users integrating RISK into their workflows.
58
+
59
+ ### Supported Data Formats
60
+
61
+ - **Input/Output**: JSON, CSV, TSV, Excel, Cytoscape, GPickle.
62
+ - **Visualization Outputs**: SVG, PNG, PDF.
63
+
64
+ ### Clustering Algorithms
65
+
66
+ - **Available Algorithms**:
67
+ - Greedy Modularity
68
+ - Label Propagation
69
+ - Louvain
70
+ - Markov Clustering
71
+ - Spinglass
72
+ - Walktrap
73
+ - **Distance Metrics**: Supports both spherical and Euclidean distance metrics.
74
+
75
+ ### Statistical Tests
76
+
77
+ - **Hypergeometric Test**
78
+ - **Permutation Test** (single- or multi-process modes)
79
+ - **Poisson Test**
80
+
81
+ ## Performance and Efficiency
82
+
83
+ In benchmarking tests using the yeast interactome network, RISK demonstrated substantial improvements over previous tools in both computational performance and memory efficiency. RISK processed the dataset approximately **3.25 times faster**, reducing CPU time by **69%**, and required **25% less peak memory usage**, underscoring its efficient utilization of computational resources.
84
+
85
+ ## Contributing
86
+
87
+ We welcome contributions from the community. Please use the following resources:
88
+
89
+ - [Issues Tracker](https://github.com/irahorecka/risk/issues)
90
+ - [Source Code](https://github.com/irahorecka/risk/tree/main/risk)
91
+
92
+ ## Support
93
+
94
+ If you encounter issues or have suggestions for new features, please use the [Issues Tracker](https://github.com/irahorecka/risk/issues) on GitHub.
95
+
96
+ ## License
97
+
98
+ RISK is freely available as open-source software under the [GNU General Public License v3.0](https://www.gnu.org/licenses/gpl-3.0.en.html).
99
+
100
+ ---
101
+
102
+ **Note**: For detailed documentation and to access the interactive tutorial, please visit the links provided in the [Documentation and Tutorial](#documentation-and-tutorial) section.
@@ -7,4 +7,4 @@ RISK: RISK Infers Spatial Kinships
7
7
 
8
8
  from risk.risk import RISK
9
9
 
10
- __version__ = "0.0.7-beta.12"
10
+ __version__ = "0.0.8"
@@ -0,0 +1,7 @@
1
+ """
2
+ risk/annotations
3
+ ~~~~~~~~~~~~~~~~
4
+ """
5
+
6
+ from .annotations import define_top_annotations, get_weighted_description
7
+ from .io import AnnotationsIO
@@ -3,6 +3,7 @@ risk/annotations/annotations
3
3
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
+ import re
6
7
  from collections import Counter
7
8
  from itertools import compress
8
9
  from typing import Any, Dict, List, Set
@@ -30,27 +31,30 @@ def _setup_nltk():
30
31
 
31
32
  # Ensure you have the necessary NLTK data
32
33
  _setup_nltk()
34
+ # Initialize English stopwords
35
+ stop_words = set(stopwords.words("english"))
33
36
 
34
37
 
35
38
  def load_annotations(network: nx.Graph, annotations_input: Dict[str, Any]) -> Dict[str, Any]:
36
39
  """Convert annotations input to a DataFrame and reindex based on the network's node labels.
37
40
 
38
41
  Args:
39
- annotations_input (dict): A dictionary with annotations.
42
+ network (nx.Graph): The network graph.
43
+ annotations_input (Dict[str, Any]): A dictionary with annotations.
40
44
 
41
45
  Returns:
42
- dict: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
46
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the binary annotations matrix.
43
47
  """
44
48
  # Flatten the dictionary to a list of tuples for easier DataFrame creation
45
49
  flattened_annotations = [
46
50
  (node, annotation) for annotation, nodes in annotations_input.items() for node in nodes
47
51
  ]
48
52
  # Create a DataFrame from the flattened list
49
- annotations = pd.DataFrame(flattened_annotations, columns=["Node", "Annotations"])
50
- annotations["Is Member"] = 1
53
+ annotations = pd.DataFrame(flattened_annotations, columns=["node", "annotations"])
54
+ annotations["is_member"] = 1
51
55
  # Pivot to create a binary matrix with nodes as rows and annotations as columns
52
56
  annotations_pivot = annotations.pivot_table(
53
- index="Node", columns="Annotations", values="Is Member", fill_value=0, dropna=False
57
+ index="node", columns="annotations", values="is_member", fill_value=0, dropna=False
54
58
  )
55
59
  # Reindex the annotations matrix based on the node labels from the network
56
60
  node_label_order = list(nx.get_node_attributes(network, "label").values())
@@ -80,7 +84,8 @@ def define_top_annotations(
80
84
  network: nx.Graph,
81
85
  ordered_annotation_labels: List[str],
82
86
  neighborhood_enrichment_sums: List[int],
83
- binary_enrichment_matrix: np.ndarray,
87
+ significant_enrichment_matrix: np.ndarray,
88
+ significant_binary_enrichment_matrix: np.ndarray,
84
89
  min_cluster_size: int = 5,
85
90
  max_cluster_size: int = 1000,
86
91
  ) -> pd.DataFrame:
@@ -90,42 +95,52 @@ def define_top_annotations(
90
95
  network (NetworkX graph): The network graph.
91
96
  ordered_annotation_labels (list of str): List of ordered annotation labels.
92
97
  neighborhood_enrichment_sums (list of int): List of neighborhood enrichment sums.
93
- binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
98
+ significant_enrichment_matrix (np.ndarray): Enrichment matrix below alpha threshold.
99
+ significant_binary_enrichment_matrix (np.ndarray): Binary enrichment matrix below alpha threshold.
94
100
  min_cluster_size (int, optional): Minimum cluster size. Defaults to 5.
95
101
  max_cluster_size (int, optional): Maximum cluster size. Defaults to 1000.
96
102
 
97
103
  Returns:
98
104
  pd.DataFrame: DataFrame with top annotations and their properties.
99
105
  """
100
- # Create DataFrame to store annotations and their neighborhood enrichment sums
106
+ # Sum the columns of the significant enrichment matrix (positive floating point values)
107
+ significant_enrichment_scores = significant_enrichment_matrix.sum(axis=0)
108
+ # Create DataFrame to store annotations, their neighborhood enrichment sums, and enrichment scores
101
109
  annotations_enrichment_matrix = pd.DataFrame(
102
110
  {
103
111
  "id": range(len(ordered_annotation_labels)),
104
- "words": ordered_annotation_labels,
105
- "neighborhood enrichment sums": neighborhood_enrichment_sums,
112
+ "full_terms": ordered_annotation_labels,
113
+ "significant_neighborhood_enrichment_sums": neighborhood_enrichment_sums,
114
+ "significant_enrichment_score": significant_enrichment_scores,
106
115
  }
107
116
  )
108
- annotations_enrichment_matrix["top attributes"] = False
109
- # Apply size constraints to identify potential top attributes
117
+ annotations_enrichment_matrix["significant_annotations"] = False
118
+ # Apply size constraints to identify potential significant annotations
110
119
  annotations_enrichment_matrix.loc[
111
- (annotations_enrichment_matrix["neighborhood enrichment sums"] >= min_cluster_size)
112
- & (annotations_enrichment_matrix["neighborhood enrichment sums"] <= max_cluster_size),
113
- "top attributes",
120
+ (
121
+ annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
122
+ >= min_cluster_size
123
+ )
124
+ & (
125
+ annotations_enrichment_matrix["significant_neighborhood_enrichment_sums"]
126
+ <= max_cluster_size
127
+ ),
128
+ "significant_annotations",
114
129
  ] = True
115
130
  # Initialize columns for connected components analysis
116
- annotations_enrichment_matrix["num connected components"] = 0
117
- annotations_enrichment_matrix["size connected components"] = None
118
- annotations_enrichment_matrix["size connected components"] = annotations_enrichment_matrix[
119
- "size connected components"
131
+ annotations_enrichment_matrix["num_connected_components"] = 0
132
+ annotations_enrichment_matrix["size_connected_components"] = None
133
+ annotations_enrichment_matrix["size_connected_components"] = annotations_enrichment_matrix[
134
+ "size_connected_components"
120
135
  ].astype(object)
121
- annotations_enrichment_matrix["num large connected components"] = 0
136
+ annotations_enrichment_matrix["num_large_connected_components"] = 0
122
137
 
123
138
  for attribute in annotations_enrichment_matrix.index.values[
124
- annotations_enrichment_matrix["top attributes"]
139
+ annotations_enrichment_matrix["significant_annotations"]
125
140
  ]:
126
141
  # Identify enriched neighborhoods based on the binary enrichment matrix
127
142
  enriched_neighborhoods = list(
128
- compress(list(network), binary_enrichment_matrix[:, attribute])
143
+ compress(list(network), significant_binary_enrichment_matrix[:, attribute])
129
144
  )
130
145
  enriched_network = nx.subgraph(network, enriched_neighborhoods)
131
146
  # Analyze connected components within the enriched subnetwork
@@ -144,57 +159,74 @@ def define_top_annotations(
144
159
  num_large_connected_components = len(filtered_size_connected_components)
145
160
 
146
161
  # Assign the number of connected components
147
- annotations_enrichment_matrix.loc[attribute, "num connected components"] = (
162
+ annotations_enrichment_matrix.loc[attribute, "num_connected_components"] = (
148
163
  num_connected_components
149
164
  )
150
165
  # Filter out attributes with more than one connected component
151
166
  annotations_enrichment_matrix.loc[
152
- annotations_enrichment_matrix["num connected components"] > 1, "top attributes"
167
+ annotations_enrichment_matrix["num_connected_components"] > 1, "significant_annotations"
153
168
  ] = False
154
169
  # Assign the number of large connected components
155
- annotations_enrichment_matrix.loc[attribute, "num large connected components"] = (
170
+ annotations_enrichment_matrix.loc[attribute, "num_large_connected_components"] = (
156
171
  num_large_connected_components
157
172
  )
158
173
  # Assign the size of connected components, ensuring it is always a list
159
- annotations_enrichment_matrix.at[attribute, "size connected components"] = (
174
+ annotations_enrichment_matrix.at[attribute, "size_connected_components"] = (
160
175
  filtered_size_connected_components.tolist()
161
176
  )
162
177
 
163
178
  return annotations_enrichment_matrix
164
179
 
165
180
 
166
- def get_description(words_column: pd.Series) -> str:
167
- """Process input Series to identify and return the top frequent, significant words,
168
- filtering based on stopwords and gracefully handling numerical strings.
181
+ def get_weighted_description(words_column: pd.Series, scores_column: pd.Series) -> str:
182
+ """Generate a weighted description from words and their corresponding scores,
183
+ with support for stopwords filtering and improved weighting logic.
169
184
 
170
185
  Args:
171
186
  words_column (pd.Series): A pandas Series containing strings to process.
187
+ scores_column (pd.Series): A pandas Series containing enrichment scores to weigh the terms.
172
188
 
173
189
  Returns:
174
- str: A coherent description formed from the most frequent and significant words.
190
+ str: A coherent description formed from the most frequent and significant words, weighed by enrichment scores.
175
191
  """
176
- # Concatenate all rows into a single string and tokenize into words
177
- all_words = words_column.str.cat(sep=" ")
178
- tokens = word_tokenize(all_words)
179
-
180
- # Separate numeric tokens
181
- numeric_tokens = [token for token in tokens if token.replace(".", "", 1).isdigit()]
182
- # If there's only one unique numeric value, return it directly as a string
183
- unique_numeric_values = set(numeric_tokens)
184
- if len(unique_numeric_values) == 1:
185
- return f"{list(unique_numeric_values)[0]}"
186
-
187
- # Ensure that all values in 'words' are strings and include both alphabetic and numeric tokens
188
- words = [
189
- str(
190
- word.lower() if word.istitle() else word
191
- ) # Convert to string and lowercase all words except proper nouns (e.g., RNA, mRNA)
192
- for word in tokens
193
- if word.isalpha()
194
- or word.replace(".", "", 1).isdigit() # Keep alphabetic words and numeric strings
195
- ]
196
- # Generate a coherent description from the processed words
197
- description = _generate_coherent_description(words)
192
+ # Handle case where all scores are the same
193
+ if scores_column.max() == scores_column.min():
194
+ normalized_scores = pd.Series([1] * len(scores_column))
195
+ else:
196
+ # Normalize the enrichment scores to be between 0 and 1
197
+ normalized_scores = (scores_column - scores_column.min()) / (
198
+ scores_column.max() - scores_column.min()
199
+ )
200
+
201
+ # Combine words and normalized scores to create weighted words
202
+ weighted_words = []
203
+ for word, score in zip(words_column, normalized_scores):
204
+ word = str(word)
205
+ if word not in stop_words: # Skip stopwords
206
+ weight = max(1, int((0 if pd.isna(score) else score) * 10))
207
+ weighted_words.extend([word] * weight)
208
+
209
+ # Tokenize the weighted words, but preserve number-word patterns like '4-alpha'
210
+ tokens = word_tokenize(" ".join(weighted_words))
211
+ # Ensure we treat "4-alpha" or other "number-word" patterns as single tokens
212
+ combined_tokens = []
213
+ for token in tokens:
214
+ # Match patterns like '4-alpha' or '5-hydroxy' and keep them together
215
+ if re.match(r"^\d+-\w+", token):
216
+ combined_tokens.append(token)
217
+ elif token.replace(".", "", 1).isdigit(): # Handle pure numeric tokens
218
+ # Ignore pure numbers as descriptions unless necessary
219
+ continue
220
+ else:
221
+ combined_tokens.append(token)
222
+
223
+ # Prevent descriptions like just '4' from being selected
224
+ if len(combined_tokens) == 1 and combined_tokens[0].isdigit():
225
+ return "N/A" # Return "N/A" for cases where it's just a number
226
+
227
+ # Simplify the word list and generate the description
228
+ simplified_words = _simplify_word_list(combined_tokens)
229
+ description = _generate_coherent_description(simplified_words)
198
230
 
199
231
  return description
200
232
 
@@ -257,7 +289,7 @@ def _generate_coherent_description(words: List[str]) -> str:
257
289
  If there is only one unique entry, return it directly.
258
290
 
259
291
  Args:
260
- words (list): A list of words or numerical string values.
292
+ words (List): A list of words or numerical string values.
261
293
 
262
294
  Returns:
263
295
  str: A coherent description formed by arranging the words in a logical sequence.
@@ -33,7 +33,7 @@ class AnnotationsIO:
33
33
  filepath (str): Path to the JSON annotations file.
34
34
 
35
35
  Returns:
36
- dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
36
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
37
37
  """
38
38
  filetype = "JSON"
39
39
  # Log the loading of the JSON file
@@ -158,10 +158,10 @@ class AnnotationsIO:
158
158
 
159
159
  Args:
160
160
  network (NetworkX graph): The network to which the annotations are related.
161
- content (dict): The annotations dictionary to load.
161
+ content (Dict[str, Any]): The annotations dictionary to load.
162
162
 
163
163
  Returns:
164
- dict: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
164
+ Dict[str, Any]: A dictionary containing ordered nodes, ordered annotations, and the annotations matrix.
165
165
  """
166
166
  # Ensure the input content is a dictionary
167
167
  if not isinstance(content, dict):
@@ -3,7 +3,7 @@ risk/log
3
3
  ~~~~~~~~
4
4
  """
5
5
 
6
- from .config import logger, log_header, set_global_verbosity
6
+ from .console import logger, log_header, set_global_verbosity
7
7
  from .params import Params
8
8
 
9
9
  params = Params()
@@ -1,6 +1,6 @@
1
1
  """
2
- risk/log/config
3
- ~~~~~~~~~~~~~~~
2
+ risk/log/console
3
+ ~~~~~~~~~~~~~~~~
4
4
  """
5
5
 
6
6
  import logging
@@ -12,7 +12,7 @@ from typing import Any, Dict
12
12
 
13
13
  import numpy as np
14
14
 
15
- from .config import logger, log_header
15
+ from .console import logger, log_header
16
16
 
17
17
  # Suppress all warnings - this is to resolve warnings from multiprocessing
18
18
  warnings.filterwarnings("ignore")
@@ -159,7 +159,7 @@ class Params:
159
159
  """Load and process various parameters, converting any np.ndarray values to lists.
160
160
 
161
161
  Returns:
162
- dict: A dictionary containing the processed parameters.
162
+ Dict[str, Any]: A dictionary containing the processed parameters.
163
163
  """
164
164
  log_header("Loading parameters")
165
165
  return _convert_ndarray_to_list(
@@ -174,14 +174,14 @@ class Params:
174
174
  )
175
175
 
176
176
 
177
- def _convert_ndarray_to_list(d: Any) -> Any:
177
+ def _convert_ndarray_to_list(d: Dict[str, Any]) -> Dict[str, Any]:
178
178
  """Recursively convert all np.ndarray values in the dictionary to lists.
179
179
 
180
180
  Args:
181
- d (dict): The dictionary to process.
181
+ d (Dict[str, Any]): The dictionary to process.
182
182
 
183
183
  Returns:
184
- dict: The processed dictionary with np.ndarray values converted to lists.
184
+ Dict[str, Any]: The processed dictionary with np.ndarray values converted to lists.
185
185
  """
186
186
  if isinstance(d, dict):
187
187
  # Recursively process each value in the dictionary
@@ -193,5 +193,5 @@ def _convert_ndarray_to_list(d: Any) -> Any:
193
193
  # Convert numpy arrays to lists
194
194
  return d.tolist()
195
195
  else:
196
- # Return the value unchanged if it's not a dict, list, or ndarray
196
+ # Return the value unchanged if it's not a dict, List, or ndarray
197
197
  return d