acore 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acore-0.1.0/AUTHORS.rst +13 -0
- acore-0.1.0/CONTRIBUTING.rst +128 -0
- acore-0.1.0/HISTORY.rst +8 -0
- acore-0.1.0/LICENSE +33 -0
- acore-0.1.0/MANIFEST.in +11 -0
- acore-0.1.0/PKG-INFO +112 -0
- acore-0.1.0/README.rst +72 -0
- acore-0.1.0/acore/__init__.py +1 -0
- acore-0.1.0/acore/correlation_analysis.py +194 -0
- acore-0.1.0/acore/differential_regulation.py +743 -0
- acore-0.1.0/acore/enrichment_analysis.py +296 -0
- acore-0.1.0/acore/exploratory_analysis.py +297 -0
- acore-0.1.0/acore/imputation_analysis.py +111 -0
- acore-0.1.0/acore/kaplan_meier_analysis.py +108 -0
- acore-0.1.0/acore/multiple_testing.py +189 -0
- acore-0.1.0/acore/network_analysis.py +146 -0
- acore-0.1.0/acore/normalization_analysis.py +258 -0
- acore-0.1.0/acore/power_analysis.py +43 -0
- acore-0.1.0/acore/publications_analysis.py +62 -0
- acore-0.1.0/acore/tda_analysis.py +39 -0
- acore-0.1.0/acore/utils.py +297 -0
- acore-0.1.0/acore/wgcna_analysis.py +0 -0
- acore-0.1.0/acore.egg-info/PKG-INFO +112 -0
- acore-0.1.0/acore.egg-info/SOURCES.txt +42 -0
- acore-0.1.0/acore.egg-info/dependency_links.txt +1 -0
- acore-0.1.0/acore.egg-info/not-zip-safe +1 -0
- acore-0.1.0/acore.egg-info/requires.txt +19 -0
- acore-0.1.0/acore.egg-info/top_level.txt +1 -0
- acore-0.1.0/docs/authors.rst +1 -0
- acore-0.1.0/docs/conf.py +198 -0
- acore-0.1.0/docs/contributing.rst +1 -0
- acore-0.1.0/docs/index.rst +20 -0
- acore-0.1.0/docs/installation.rst +51 -0
- acore-0.1.0/docs/readme.rst +1 -0
- acore-0.1.0/docs/usage.rst +7 -0
- acore-0.1.0/pyproject.toml +61 -0
- acore-0.1.0/setup.cfg +26 -0
- acore-0.1.0/setup.py +18 -0
- acore-0.1.0/tests/__init__.py +0 -0
- acore-0.1.0/tests/test_correlation.py +44 -0
- acore-0.1.0/tests/test_differential_regulation.py +117 -0
- acore-0.1.0/tests/test_enrichment.py +35 -0
- acore-0.1.0/tests/test_exploratory.py +94 -0
acore-0.1.0/AUTHORS.rst
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
.. highlight:: shell
|
|
2
|
+
|
|
3
|
+
============
|
|
4
|
+
Contributing
|
|
5
|
+
============
|
|
6
|
+
|
|
7
|
+
Contributions are welcome, and they are greatly appreciated! Every little bit
|
|
8
|
+
helps, and credit will always be given.
|
|
9
|
+
|
|
10
|
+
You can contribute in many ways:
|
|
11
|
+
|
|
12
|
+
Types of Contributions
|
|
13
|
+
----------------------
|
|
14
|
+
|
|
15
|
+
Report Bugs
|
|
16
|
+
~~~~~~~~~~~
|
|
17
|
+
|
|
18
|
+
Report bugs at https://github.com/albsantosdel/acore/issues.
|
|
19
|
+
|
|
20
|
+
If you are reporting a bug, please include:
|
|
21
|
+
|
|
22
|
+
* Your operating system name and version.
|
|
23
|
+
* Any details about your local setup that might be helpful in troubleshooting.
|
|
24
|
+
* Detailed steps to reproduce the bug.
|
|
25
|
+
|
|
26
|
+
Fix Bugs
|
|
27
|
+
~~~~~~~~
|
|
28
|
+
|
|
29
|
+
Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
|
|
30
|
+
wanted" is open to whoever wants to implement it.
|
|
31
|
+
|
|
32
|
+
Implement Features
|
|
33
|
+
~~~~~~~~~~~~~~~~~~
|
|
34
|
+
|
|
35
|
+
Look through the GitHub issues for features. Anything tagged with "enhancement"
|
|
36
|
+
and "help wanted" is open to whoever wants to implement it.
|
|
37
|
+
|
|
38
|
+
Write Documentation
|
|
39
|
+
~~~~~~~~~~~~~~~~~~~
|
|
40
|
+
|
|
41
|
+
acore could always use more documentation, whether as part of the
|
|
42
|
+
official acore docs, in docstrings, or even on the web in blog posts,
|
|
43
|
+
articles, and such.
|
|
44
|
+
|
|
45
|
+
Submit Feedback
|
|
46
|
+
~~~~~~~~~~~~~~~
|
|
47
|
+
|
|
48
|
+
The best way to send feedback is to file an issue at https://github.com/albsantosdel/acore/issues.
|
|
49
|
+
|
|
50
|
+
If you are proposing a feature:
|
|
51
|
+
|
|
52
|
+
* Explain in detail how it would work.
|
|
53
|
+
* Keep the scope as narrow as possible, to make it easier to implement.
|
|
54
|
+
* Remember that this is a volunteer-driven project, and that contributions
|
|
55
|
+
are welcome :)
|
|
56
|
+
|
|
57
|
+
Get Started!
|
|
58
|
+
------------
|
|
59
|
+
|
|
60
|
+
Ready to contribute? Here's how to set up `acore` for local development.
|
|
61
|
+
|
|
62
|
+
1. Fork the `acore` repo on GitHub.
|
|
63
|
+
2. Clone your fork locally::
|
|
64
|
+
|
|
65
|
+
$ git clone git@github.com:your_name_here/acore.git
|
|
66
|
+
|
|
67
|
+
3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
|
|
68
|
+
|
|
69
|
+
$ mkvirtualenv acore
|
|
70
|
+
$ cd acore/
|
|
71
|
+
$ python setup.py develop
|
|
72
|
+
|
|
73
|
+
4. Create a branch for local development::
|
|
74
|
+
|
|
75
|
+
$ git checkout -b name-of-your-bugfix-or-feature
|
|
76
|
+
|
|
77
|
+
Now you can make your changes locally.
|
|
78
|
+
|
|
79
|
+
5. When you're done making changes, check that your changes pass flake8 and the
|
|
80
|
+
tests, including testing other Python versions with tox::
|
|
81
|
+
|
|
82
|
+
$ flake8 acore tests
|
|
83
|
+
$ python setup.py test or pytest
|
|
84
|
+
$ tox
|
|
85
|
+
|
|
86
|
+
To get flake8 and tox, just pip install them into your virtualenv.
|
|
87
|
+
|
|
88
|
+
6. Commit your changes and push your branch to GitHub::
|
|
89
|
+
|
|
90
|
+
$ git add .
|
|
91
|
+
$ git commit -m "Your detailed description of your changes."
|
|
92
|
+
$ git push origin name-of-your-bugfix-or-feature
|
|
93
|
+
|
|
94
|
+
7. Submit a pull request through the GitHub website.
|
|
95
|
+
|
|
96
|
+
Pull Request Guidelines
|
|
97
|
+
-----------------------
|
|
98
|
+
|
|
99
|
+
Before you submit a pull request, check that it meets these guidelines:
|
|
100
|
+
|
|
101
|
+
1. The pull request should include tests.
|
|
102
|
+
2. If the pull request adds functionality, the docs should be updated. Put
|
|
103
|
+
your new functionality into a function with a docstring, and add the
|
|
104
|
+
feature to the list in README.rst.
|
|
105
|
+
3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check
|
|
106
|
+
https://travis-ci.com/albsantosdel/acore/pull_requests
|
|
107
|
+
and make sure that the tests pass for all supported Python versions.
|
|
108
|
+
|
|
109
|
+
Tips
|
|
110
|
+
----
|
|
111
|
+
|
|
112
|
+
To run a subset of tests::
|
|
113
|
+
|
|
114
|
+
$ pytest tests.test_acore
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
Deploying
|
|
118
|
+
---------
|
|
119
|
+
|
|
120
|
+
A reminder for the maintainers on how to deploy.
|
|
121
|
+
Make sure all your changes are committed (including an entry in HISTORY.rst).
|
|
122
|
+
Then run::
|
|
123
|
+
|
|
124
|
+
$ bump2version patch # possible: major / minor / patch
|
|
125
|
+
$ git push
|
|
126
|
+
$ git push --tags
|
|
127
|
+
|
|
128
|
+
Travis will then deploy to PyPI if tests pass.
|
acore-0.1.0/HISTORY.rst
ADDED
acore-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
GNU GENERAL PUBLIC LICENSE
|
|
2
|
+
Version 3, 29 June 2007
|
|
3
|
+
|
|
4
|
+
A Python package with statistical functions to analyse multimodal molecular data
|
|
5
|
+
Copyright (C) 2023 Alberto Santos Delgado
|
|
6
|
+
|
|
7
|
+
This program is free software: you can redistribute it and/or modify
|
|
8
|
+
it under the terms of the GNU General Public License as published by
|
|
9
|
+
the Free Software Foundation, either version 3 of the License, or
|
|
10
|
+
(at your option) any later version.
|
|
11
|
+
|
|
12
|
+
This program is distributed in the hope that it will be useful,
|
|
13
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15
|
+
GNU General Public License for more details.
|
|
16
|
+
|
|
17
|
+
You should have received a copy of the GNU General Public License
|
|
18
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
|
+
|
|
20
|
+
Also add information on how to contact you by electronic and paper mail.
|
|
21
|
+
|
|
22
|
+
You should also get your employer (if you work as a programmer) or school,
|
|
23
|
+
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
|
24
|
+
For more information on this, and how to apply and follow the GNU GPL, see
|
|
25
|
+
<http://www.gnu.org/licenses/>.
|
|
26
|
+
|
|
27
|
+
The GNU General Public License does not permit incorporating your program
|
|
28
|
+
into proprietary programs. If your program is a subroutine library, you
|
|
29
|
+
may consider it more useful to permit linking proprietary applications with
|
|
30
|
+
the library. If this is what you want to do, use the GNU Lesser General
|
|
31
|
+
Public License instead of this License. But first, please read
|
|
32
|
+
<http://www.gnu.org/philosophy/why-not-lgpl.html>.
|
|
33
|
+
|
acore-0.1.0/MANIFEST.in
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
include AUTHORS.rst
|
|
2
|
+
include CONTRIBUTING.rst
|
|
3
|
+
include HISTORY.rst
|
|
4
|
+
include LICENSE
|
|
5
|
+
include README.rst
|
|
6
|
+
|
|
7
|
+
recursive-include tests *
|
|
8
|
+
recursive-exclude * __pycache__
|
|
9
|
+
recursive-exclude * *.py[co]
|
|
10
|
+
|
|
11
|
+
recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif
|
acore-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: acore
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package with statistical functions to analyse multimodal molecular data
|
|
5
|
+
Home-page: https://github.com/Multiomics-Analytics-Group/acore
|
|
6
|
+
Author-email: Alberto Santos Delgado <albsad@dtu.dk>
|
|
7
|
+
License: GNU General Public License v3
|
|
8
|
+
Project-URL: Homepage, https://github.com/Multiomics-Analytics-Group/acore
|
|
9
|
+
Project-URL: Issues, https://github.com/Multiomics-Analytics-Group/acore/issues
|
|
10
|
+
Project-URL: Documentation, https://analytics-core.readthedocs.io/
|
|
11
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
14
|
+
Classifier: Natural Language :: English
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Requires-Python: >=3.6
|
|
19
|
+
Description-Content-Type: text/x-rst
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
License-File: AUTHORS.rst
|
|
22
|
+
Requires-Dist: click>=7.0
|
|
23
|
+
Requires-Dist: numpy==1.23.2
|
|
24
|
+
Requires-Dist: pandas==2.0.2
|
|
25
|
+
Requires-Dist: scipy==1.10.1
|
|
26
|
+
Requires-Dist: networkx==3.1
|
|
27
|
+
Requires-Dist: biopython==1.81
|
|
28
|
+
Requires-Dist: combat==0.3.3
|
|
29
|
+
Requires-Dist: gseapy==1.0.4
|
|
30
|
+
Requires-Dist: kmapper==2.0.1
|
|
31
|
+
Requires-Dist: lifelines==0.27.7
|
|
32
|
+
Requires-Dist: pingouin==0.5.3
|
|
33
|
+
Requires-Dist: python-louvain==0.16
|
|
34
|
+
Requires-Dist: PyWGCNA==1.16.8
|
|
35
|
+
Requires-Dist: snfpy==0.2.2
|
|
36
|
+
Requires-Dist: umap-learn==0.5.3
|
|
37
|
+
Requires-Dist: statsmodels
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: pytest>=3; extra == "dev"
|
|
40
|
+
|
|
41
|
+
==============
|
|
42
|
+
Analytics Core
|
|
43
|
+
==============
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
.. start-badges
|
|
47
|
+
|
|
48
|
+
.. list-table::
|
|
49
|
+
:stub-columns: 1
|
|
50
|
+
|
|
51
|
+
* - docs
|
|
52
|
+
- |docs|
|
|
53
|
+
* - tests
|
|
54
|
+
- | |Pylint|
|
|
55
|
+
* - package
|
|
56
|
+
- | |version| |wheel| |supported-versions| |supported-implementations|
|
|
57
|
+
|
|
58
|
+
.. |docs| image:: https://readthedocs.org/projects/acore/badge/?style=flat
|
|
59
|
+
:target: https://analytics-core.readthedocs.io/
|
|
60
|
+
:alt: Documentation Status
|
|
61
|
+
|
|
62
|
+
.. |Pylint| image:: https://github.com/Multiomics-Analytics-Group/acore/actions/workflows/tox-gha.yml/badge.svg
|
|
63
|
+
:alt: GitHub Actions Tox Status
|
|
64
|
+
:target: https://github.com/Multiomics-Analytics-Group/acore/actions/workflows/tox.yml
|
|
65
|
+
|
|
66
|
+
.. |version| image:: https://img.shields.io/pypi/v/acore.svg
|
|
67
|
+
:alt: PyPI Package latest release
|
|
68
|
+
:target: https://pypi.org/project/acore
|
|
69
|
+
|
|
70
|
+
.. |wheel| image:: https://img.shields.io/pypi/wheel/acore.svg
|
|
71
|
+
:alt: PyPI Wheel
|
|
72
|
+
:target: https://pypi.org/project/acore
|
|
73
|
+
|
|
74
|
+
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/acore.svg
|
|
75
|
+
:alt: Supported versions
|
|
76
|
+
:target: https://pypi.org/project/acore
|
|
77
|
+
|
|
78
|
+
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/acore.svg
|
|
79
|
+
:alt: Supported implementations
|
|
80
|
+
:target: https://pypi.org/project/acore
|
|
81
|
+
|
|
82
|
+
.. end-badges
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
A Python package with statistical functions to analyse multimodal molecular data
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
* Free software: GNU General Public License v3
|
|
91
|
+
* Documentation: https://analytics-core.readthedocs.io.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
Installation
|
|
95
|
+
============
|
|
96
|
+
|
|
97
|
+
::
|
|
98
|
+
|
|
99
|
+
pip install acore
|
|
100
|
+
|
|
101
|
+
You can also install the in-development version with::
|
|
102
|
+
|
|
103
|
+
pip install https://github.com/Multiomics-Analytics-Group/acore/archive/main.zip
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
Development
|
|
108
|
+
===========
|
|
109
|
+
|
|
110
|
+
To run all the tests run::
|
|
111
|
+
|
|
112
|
+
pytest
|
acore-0.1.0/README.rst
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
==============
|
|
2
|
+
Analytics Core
|
|
3
|
+
==============
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
.. start-badges
|
|
7
|
+
|
|
8
|
+
.. list-table::
|
|
9
|
+
:stub-columns: 1
|
|
10
|
+
|
|
11
|
+
* - docs
|
|
12
|
+
- |docs|
|
|
13
|
+
* - tests
|
|
14
|
+
- | |Pylint|
|
|
15
|
+
* - package
|
|
16
|
+
- | |version| |wheel| |supported-versions| |supported-implementations|
|
|
17
|
+
|
|
18
|
+
.. |docs| image:: https://readthedocs.org/projects/acore/badge/?style=flat
|
|
19
|
+
:target: https://analytics-core.readthedocs.io/
|
|
20
|
+
:alt: Documentation Status
|
|
21
|
+
|
|
22
|
+
.. |Pylint| image:: https://github.com/Multiomics-Analytics-Group/acore/actions/workflows/tox-gha.yml/badge.svg
|
|
23
|
+
:alt: GitHub Actions Tox Status
|
|
24
|
+
:target: https://github.com/Multiomics-Analytics-Group/acore/actions/workflows/tox.yml
|
|
25
|
+
|
|
26
|
+
.. |version| image:: https://img.shields.io/pypi/v/acore.svg
|
|
27
|
+
:alt: PyPI Package latest release
|
|
28
|
+
:target: https://pypi.org/project/acore
|
|
29
|
+
|
|
30
|
+
.. |wheel| image:: https://img.shields.io/pypi/wheel/acore.svg
|
|
31
|
+
:alt: PyPI Wheel
|
|
32
|
+
:target: https://pypi.org/project/acore
|
|
33
|
+
|
|
34
|
+
.. |supported-versions| image:: https://img.shields.io/pypi/pyversions/acore.svg
|
|
35
|
+
:alt: Supported versions
|
|
36
|
+
:target: https://pypi.org/project/acore
|
|
37
|
+
|
|
38
|
+
.. |supported-implementations| image:: https://img.shields.io/pypi/implementation/acore.svg
|
|
39
|
+
:alt: Supported implementations
|
|
40
|
+
:target: https://pypi.org/project/acore
|
|
41
|
+
|
|
42
|
+
.. end-badges
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
A Python package with statistical functions to analyse multimodal molecular data
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
* Free software: GNU General Public License v3
|
|
51
|
+
* Documentation: https://analytics-core.readthedocs.io.
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
Installation
|
|
55
|
+
============
|
|
56
|
+
|
|
57
|
+
::
|
|
58
|
+
|
|
59
|
+
pip install acore
|
|
60
|
+
|
|
61
|
+
You can also install the in-development version with::
|
|
62
|
+
|
|
63
|
+
pip install https://github.com/Multiomics-Analytics-Group/acore/archive/main.zip
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
Development
|
|
68
|
+
===========
|
|
69
|
+
|
|
70
|
+
To run all the tests run::
|
|
71
|
+
|
|
72
|
+
pytest
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from scipy import stats
|
|
5
|
+
import pingouin as pg
|
|
6
|
+
from scipy.special import betainc
|
|
7
|
+
import acore.utils as utils
|
|
8
|
+
from acore.multiple_testing import apply_pvalue_correction
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def calculate_correlations(x, y, method='pearson'):
|
|
12
|
+
"""
|
|
13
|
+
Calculates a Spearman (nonparametric) or a Pearson (parametric) correlation coefficient and p-value to test for non-correlation.
|
|
14
|
+
|
|
15
|
+
:param ndarray x: array 1
|
|
16
|
+
:param ndarray y: array 2
|
|
17
|
+
:param str method: chooses which kind of correlation method to run
|
|
18
|
+
:return: Tuple with two floats, correlation coefficient and two-tailed p-value.
|
|
19
|
+
|
|
20
|
+
Example::
|
|
21
|
+
|
|
22
|
+
result = calculate_correlations(x, y, method='pearson')
|
|
23
|
+
"""
|
|
24
|
+
if method == "pearson":
|
|
25
|
+
coefficient, pvalue = stats.pearsonr(x, y)
|
|
26
|
+
elif method == "spearman":
|
|
27
|
+
coefficient, pvalue = stats.spearmanr(x, y)
|
|
28
|
+
|
|
29
|
+
return (coefficient, pvalue)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def run_correlation(df, alpha=0.05, subject='subject', group='group', method='pearson', correction='fdr_bh'):
|
|
33
|
+
"""
|
|
34
|
+
This function calculates pairwise correlations for columns in dataframe, and returns it in the shape of a edge list with 'weight' as correlation score, and the ajusted p-values.
|
|
35
|
+
|
|
36
|
+
:param df: pandas dataframe with samples as rows and features as columns.
|
|
37
|
+
:param str subject: name of column containing subject identifiers.
|
|
38
|
+
:param str group: name of column containing group identifiers.
|
|
39
|
+
:param str method: method to use for correlation calculation ('pearson', 'spearman').
|
|
40
|
+
:param floar alpha: error rate. Values velow alpha are considered significant.
|
|
41
|
+
:param string correction: type of correction see apply_pvalue_correction for methods
|
|
42
|
+
:return: Pandas dataframe with columns: 'node1', 'node2', 'weight', 'padj' and 'rejected'.
|
|
43
|
+
|
|
44
|
+
Example::
|
|
45
|
+
|
|
46
|
+
result = run_correlation(df, alpha=0.05, subject='subject', group='group', method='pearson', correction='fdr_bh')
|
|
47
|
+
"""
|
|
48
|
+
correlation = pd.DataFrame()
|
|
49
|
+
# ToDo
|
|
50
|
+
# The Repeated measurements correlation calculation is too time consuming so it only runs if
|
|
51
|
+
# the number of features is less than 200
|
|
52
|
+
if utils.check_is_paired(df, subject, group):
|
|
53
|
+
if len(df[subject].unique()) > 2:
|
|
54
|
+
if len(df.columns) < 200:
|
|
55
|
+
correlation = run_rm_correlation(df, alpha=alpha, subject=subject, correction=correction)
|
|
56
|
+
else:
|
|
57
|
+
df = df.dropna(axis=1)._get_numeric_data()
|
|
58
|
+
if not df.empty:
|
|
59
|
+
r, p = run_efficient_correlation(df, method=method)
|
|
60
|
+
rdf = pd.DataFrame(r, index=df.columns, columns=df.columns)
|
|
61
|
+
pdf = pd.DataFrame(p, index=df.columns, columns=df.columns)
|
|
62
|
+
correlation = utils.convertToEdgeList(rdf, ["node1", "node2", "weight"])
|
|
63
|
+
pvalues = utils.convertToEdgeList(pdf, ["node1", "node2", "pvalue"])
|
|
64
|
+
correlation = pd.merge(correlation, pvalues, on=['node1', 'node2'])
|
|
65
|
+
|
|
66
|
+
rejected, padj = apply_pvalue_correction(correlation["pvalue"].tolist(), alpha=alpha, method=correction)
|
|
67
|
+
correlation["padj"] = padj
|
|
68
|
+
correlation["rejected"] = rejected
|
|
69
|
+
correlation = correlation[correlation.rejected]
|
|
70
|
+
correlation["pvalue"] = correlation["pvalue"].apply(lambda x: str(round(x, 5)))
|
|
71
|
+
correlation["padj"] = correlation["padj"].apply(lambda x: str(round(x, 5)))
|
|
72
|
+
|
|
73
|
+
return correlation
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def run_multi_correlation(df_dict, alpha=0.05, subject='subject', on=['subject', 'biological_sample'], group='group', method='pearson', correction='fdr_bh'):
|
|
77
|
+
"""
|
|
78
|
+
This function merges all input dataframes and calculates pairwise correlations for all columns.
|
|
79
|
+
|
|
80
|
+
:param dict df_dict: dictionary of pandas dataframes with samples as rows and features as columns.
|
|
81
|
+
:param str subject: name of the column containing subject identifiers.
|
|
82
|
+
:param str group: name of the column containing group identifiers.
|
|
83
|
+
:param list on: column names to join dataframes on (must be found in all dataframes).
|
|
84
|
+
:param str method: method to use for correlation calculation ('pearson', 'spearman').
|
|
85
|
+
:param float alpha: error rate. Values velow alpha are considered significant.
|
|
86
|
+
:param string correction: type of correction see apply_pvalue_correction for methods
|
|
87
|
+
:return: Pandas dataframe with columns: 'node1', 'node2', 'weight', 'padj' and 'rejected'.
|
|
88
|
+
|
|
89
|
+
Example::
|
|
90
|
+
|
|
91
|
+
result = run_multi_correlation(df_dict, alpha=0.05, subject='subject', on=['subject', 'biological_sample'] , group='group', method='pearson', correction='fdr_bh')
|
|
92
|
+
"""
|
|
93
|
+
multidf = pd.DataFrame()
|
|
94
|
+
correlation = None
|
|
95
|
+
for dtype in df_dict:
|
|
96
|
+
if multidf.empty:
|
|
97
|
+
if isinstance(df_dict[dtype], pd.DataFrame):
|
|
98
|
+
multidf = df_dict[dtype]
|
|
99
|
+
else:
|
|
100
|
+
if isinstance(df_dict[dtype], pd.DataFrame):
|
|
101
|
+
multidf = pd.merge(multidf, df_dict[dtype], how='inner', on=on)
|
|
102
|
+
if not multidf.empty:
|
|
103
|
+
correlation = run_correlation(multidf, alpha=alpha, subject=subject, group=group, method=method, correction=correction)
|
|
104
|
+
|
|
105
|
+
return correlation
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def calculate_rm_correlation(df, x, y, subject):
|
|
109
|
+
"""
|
|
110
|
+
Computes correlation and p-values between two columns a and b in df.
|
|
111
|
+
|
|
112
|
+
:param df: pandas dataframe with subjects as rows and two features and columns.
|
|
113
|
+
:param str x: feature a name.
|
|
114
|
+
:param str y: feature b name.
|
|
115
|
+
:param subject: column name containing the covariate variable.
|
|
116
|
+
:return: Tuple with values for: feature a, feature b, correlation, p-value and degrees of freedom.
|
|
117
|
+
|
|
118
|
+
Example::
|
|
119
|
+
|
|
120
|
+
result = calculate_rm_correlation(df, x='feature a', y='feature b', subject='subject')
|
|
121
|
+
"""
|
|
122
|
+
result = pg.rm_corr(data=df, x=x, y=y, subject=subject)
|
|
123
|
+
|
|
124
|
+
return (x, y, result["r"].values[0], result["pval"].values[0], result["dof"].values[0])
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def run_rm_correlation(df, alpha=0.05, subject='subject', correction='fdr_bh'):
|
|
128
|
+
"""
|
|
129
|
+
Computes pairwise repeated measurements correlations for all columns in dataframe, and returns results as an edge list with 'weight' as correlation score, p-values, degrees of freedom and ajusted p-values.
|
|
130
|
+
|
|
131
|
+
:param df: pandas dataframe with samples as rows and features as columns.
|
|
132
|
+
:param str subject: name of column containing subject identifiers.
|
|
133
|
+
:param float alpha: error rate. Values velow alpha are considered significant.
|
|
134
|
+
:param string correction: type of correction type see apply_pvalue_correction for methods
|
|
135
|
+
:return: Pandas dataframe with columns: 'node1', 'node2', 'weight', 'pvalue', 'dof', 'padj' and 'rejected'.
|
|
136
|
+
|
|
137
|
+
Example::
|
|
138
|
+
|
|
139
|
+
result = run_rm_correlation(df, alpha=0.05, subject='subject', correction='fdr_bh')
|
|
140
|
+
"""
|
|
141
|
+
rows = []
|
|
142
|
+
if not df.empty:
|
|
143
|
+
df = df.set_index(subject)._get_numeric_data().dropna(axis=1)
|
|
144
|
+
df.columns = df.columns.astype(str)
|
|
145
|
+
combinations = itertools.combinations(df.columns, 2)
|
|
146
|
+
df = df.reset_index()
|
|
147
|
+
for x, y in combinations:
|
|
148
|
+
row = [x, y]
|
|
149
|
+
subset = df[[x, y, subject]]
|
|
150
|
+
row.extend(pg.rm_corr(subset, x, y, subject).values.tolist()[0])
|
|
151
|
+
rows.append(row)
|
|
152
|
+
|
|
153
|
+
correlation = pd.DataFrame(rows, columns=["node1", "node2", "weight", "dof", "pvalue", "CI95%", "power"])
|
|
154
|
+
rejected, padj = apply_pvalue_correction(correlation["pvalue"].tolist(), alpha=alpha, method=correction)
|
|
155
|
+
correlation["padj"] = padj
|
|
156
|
+
correlation["rejected"] = rejected
|
|
157
|
+
correlation = correlation[correlation.rejected]
|
|
158
|
+
correlation["padj"] = correlation["padj"].apply(lambda x: str(round(x, 5)))
|
|
159
|
+
|
|
160
|
+
return correlation
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def run_efficient_correlation(data, method='pearson'):
|
|
164
|
+
"""
|
|
165
|
+
Calculates pairwise correlations and returns lower triangle of the matrix with correlation values and p-values.
|
|
166
|
+
|
|
167
|
+
:param data: pandas dataframe with samples as index and features as columns (numeric data only).
|
|
168
|
+
:param str method: method to use for correlation calculation ('pearson', 'spearman').
|
|
169
|
+
:return: Two numpy arrays: correlation and p-values.
|
|
170
|
+
|
|
171
|
+
Example::
|
|
172
|
+
|
|
173
|
+
result = run_efficient_correlation(data, method='pearson')
|
|
174
|
+
"""
|
|
175
|
+
matrix = data.values
|
|
176
|
+
if method == 'pearson':
|
|
177
|
+
r = np.corrcoef(matrix, rowvar=False)
|
|
178
|
+
elif method == 'spearman':
|
|
179
|
+
r, p = stats.spearmanr(matrix, axis=0)
|
|
180
|
+
|
|
181
|
+
diagonal = np.triu_indices(r.shape[0], 1)
|
|
182
|
+
rf = r[diagonal]
|
|
183
|
+
df = matrix.shape[1] - 2
|
|
184
|
+
ts = rf * rf * (df / (1 - rf * rf))
|
|
185
|
+
pf = betainc(0.5 * df, 0.5, df / (df + ts))
|
|
186
|
+
p = np.zeros(shape=r.shape)
|
|
187
|
+
p[np.triu_indices(p.shape[0], 1)] = pf
|
|
188
|
+
p[np.tril_indices(p.shape[0], -1)] = pf
|
|
189
|
+
p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0])
|
|
190
|
+
|
|
191
|
+
r[diagonal] = np.nan
|
|
192
|
+
p[diagonal] = np.nan
|
|
193
|
+
|
|
194
|
+
return r, p
|