flexynesis 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flexynesis-0.2.7/LICENCE +25 -0
- flexynesis-0.2.7/PKG-INFO +126 -0
- flexynesis-0.2.7/README.md +90 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/__main__.py +7 -26
- flexynesis-0.2.7/flexynesis.egg-info/PKG-INFO +126 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis.egg-info/SOURCES.txt +1 -1
- {flexynesis-0.2.5 → flexynesis-0.2.7}/pyproject.toml +2 -1
- flexynesis-0.2.5/LICENCE.md +0 -404
- flexynesis-0.2.5/PKG-INFO +0 -273
- flexynesis-0.2.5/README.md +0 -237
- flexynesis-0.2.5/flexynesis.egg-info/PKG-INFO +0 -273
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/__init__.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/cli.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/config.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/data.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/feature_selection.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/main.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/__init__.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/crossmodal_pred.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/direct_pred.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/gnn_early.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/on_ice/direct_pred_cnn.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/on_ice/direct_pred_gcnn.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/on_ice/modules_on_ice.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/supervised_vae.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/models/triplet_encoder.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/modules.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis/utils.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis.egg-info/dependency_links.txt +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis.egg-info/entry_points.txt +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis.egg-info/requires.txt +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/flexynesis.egg-info/top_level.txt +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/setup.cfg +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/tests/__init__.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/tests/unit/__init__.py +0 -0
- {flexynesis-0.2.5 → flexynesis-0.2.7}/tests/unit/test_smoke.py +0 -0
flexynesis-0.2.7/LICENCE
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
```txt
|
|
2
|
+
Modified MIT License for Academic and Non-Commercial Use
|
|
3
|
+
|
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
5
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
6
|
+
in the Software for academic, research, and educational purposes without
|
|
7
|
+
restriction, including without limitation the rights to use, copy, modify,
|
|
8
|
+
merge, publish, and distribute copies of the Software, and to permit persons
|
|
9
|
+
to whom the Software is furnished to do so, subject to the following conditions:
|
|
10
|
+
|
|
11
|
+
Commercial use of this software or any derivative works is prohibited without
|
|
12
|
+
explicit permission and a separate commercial license from the copyright holders.
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
SOFTWARE.
|
|
24
|
+
|
|
25
|
+
```
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: flexynesis
|
|
3
|
+
Version: 0.2.7
|
|
4
|
+
Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
|
|
5
|
+
Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
|
|
6
|
+
Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Requires-Python: <3.12,>=3.11
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENCE
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: lightning
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: scikit-optimize
|
|
19
|
+
Requires-Dist: scipy
|
|
20
|
+
Requires-Dist: seaborn
|
|
21
|
+
Requires-Dist: torch
|
|
22
|
+
Requires-Dist: torchvision
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Requires-Dist: umap-learn
|
|
25
|
+
Requires-Dist: rich
|
|
26
|
+
Requires-Dist: captum
|
|
27
|
+
Requires-Dist: ipywidgets
|
|
28
|
+
Requires-Dist: torch_geometric
|
|
29
|
+
Requires-Dist: ipykernel
|
|
30
|
+
Requires-Dist: lifelines
|
|
31
|
+
Requires-Dist: papermill
|
|
32
|
+
Requires-Dist: scikit-survival
|
|
33
|
+
Requires-Dist: python-louvain
|
|
34
|
+
Provides-Extra: test
|
|
35
|
+
Requires-Dist: pytest; extra == "test"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<img alt="logo" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/logo.png" width="50%" height="50%">
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
[](https://pepy.tech/project/flexynesis)
|
|
43
|
+

|
|
44
|
+

|
|
45
|
+
|
|
46
|
+
# flexynesis
|
|
47
|
+
|
|
48
|
+
A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical
|
|
49
|
+
endpoint prediction. The package includes multiple types of deep learning architectures such as simple
|
|
50
|
+
fully connected networks, supervised variational autoencoders, graph convolutional networks, multi-triplet networks
|
|
51
|
+
different options of data layer fusion, and automates feature selection and hyperparameter optimisation. The tools are continuosly benchmarked on publicly available datasets mostly related to the study of cancer. Some of the applications of the methods
|
|
52
|
+
we develop are drug response modeling in cancer patients or preclinical models (such as cell lines and
|
|
53
|
+
patient-derived xenografts), cancer subtype prediction, or any other clinically relevant outcome prediction
|
|
54
|
+
that can be formulated as a regression, classification, survival, or cross-modality prediction problem.
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<img alt="workflow" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/graphical_abstract.jpg">
|
|
58
|
+
</p>
|
|
59
|
+
|
|
60
|
+
# Citing our work
|
|
61
|
+
|
|
62
|
+
In order to refer to our work, please cite our manuscript currently available at [BioRxiv](https://biorxiv.org/cgi/content/short/2024.07.16.603606v1).
|
|
63
|
+
|
|
64
|
+
# Getting started with Flexynesis
|
|
65
|
+
|
|
66
|
+
## Command-line tutorial
|
|
67
|
+
|
|
68
|
+
- [Getting Started with Flexynesis](https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis/site/getting_started/)
|
|
69
|
+
|
|
70
|
+
## Jupyter notebooks for interactive usage
|
|
71
|
+
|
|
72
|
+
- [Modeling Breast Cancer Subtypes](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/brca_subtypes.ipynb)
|
|
73
|
+
- [Survival Markers of Lower Grade Gliomas](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/survival_subtypes_LGG_GBM.ipynb)
|
|
74
|
+
- [Unsupervised Analysis of Bone Marrow Cells](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/unsupervised_analysis_single_cell.ipynb)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Benchmarks
|
|
78
|
+
|
|
79
|
+
For the latest benchmark results see:
|
|
80
|
+
https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis-benchmark-datasets/dashboard.html
|
|
81
|
+
|
|
82
|
+
The code for the benchmarking pipeline is at: https://github.com/BIMSBbioinfo/flexynesis-benchmarks
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# Defining Kernel for the Jupyter Notebook
|
|
86
|
+
|
|
87
|
+
For interactively using flexynesis on Jupyter notebooks, one can define the kernel to make
|
|
88
|
+
flexynesis and its dependencies available on the jupyter session.
|
|
89
|
+
|
|
90
|
+
Assuming you have already defined an environment and installed the package:
|
|
91
|
+
```
|
|
92
|
+
conda activate flexynesisenv
|
|
93
|
+
python -m ipykernel install --user --name "flexynesisenv" --display-name "flexynesisenv"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
# Compiling Notebooks
|
|
97
|
+
|
|
98
|
+
`papermill` can be used to compile the tutorials under `examples/tutorials`.
|
|
99
|
+
|
|
100
|
+
If the purpose is to quickly check if the notebook can be run; set HPO_ITER to 1.
|
|
101
|
+
This sets hyperparameter optimisation steps to 1.
|
|
102
|
+
For longer training runs to see more meaningful results from the notebook, increase this number to e.g. 50.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
papermill examples/tutorials/brca_subtypes.ipynb brca_subtypes.ipynb -p HPO_ITER 1
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The output from papermill can be converted to an html file as follows:
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
jupyter nbconvert --to html brca_subtypes.ipynb
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
# Documentation
|
|
117
|
+
|
|
118
|
+
Documentation generated using [mkdocs](https://mkdocstrings.github.io/)
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
pip install mkdocstrings[python]
|
|
122
|
+
mkdocs build --clean
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
|
|
2
|
+
<p align="center">
|
|
3
|
+
<img alt="logo" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/logo.png" width="50%" height="50%">
|
|
4
|
+
</p>
|
|
5
|
+
|
|
6
|
+
[](https://pepy.tech/project/flexynesis)
|
|
7
|
+

|
|
8
|
+

|
|
9
|
+
|
|
10
|
+
# flexynesis
|
|
11
|
+
|
|
12
|
+
A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical
|
|
13
|
+
endpoint prediction. The package includes multiple types of deep learning architectures such as simple
|
|
14
|
+
fully connected networks, supervised variational autoencoders, graph convolutional networks, multi-triplet networks
|
|
15
|
+
different options of data layer fusion, and automates feature selection and hyperparameter optimisation. The tools are continuosly benchmarked on publicly available datasets mostly related to the study of cancer. Some of the applications of the methods
|
|
16
|
+
we develop are drug response modeling in cancer patients or preclinical models (such as cell lines and
|
|
17
|
+
patient-derived xenografts), cancer subtype prediction, or any other clinically relevant outcome prediction
|
|
18
|
+
that can be formulated as a regression, classification, survival, or cross-modality prediction problem.
|
|
19
|
+
|
|
20
|
+
<p align="center">
|
|
21
|
+
<img alt="workflow" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/graphical_abstract.jpg">
|
|
22
|
+
</p>
|
|
23
|
+
|
|
24
|
+
# Citing our work
|
|
25
|
+
|
|
26
|
+
In order to refer to our work, please cite our manuscript currently available at [BioRxiv](https://biorxiv.org/cgi/content/short/2024.07.16.603606v1).
|
|
27
|
+
|
|
28
|
+
# Getting started with Flexynesis
|
|
29
|
+
|
|
30
|
+
## Command-line tutorial
|
|
31
|
+
|
|
32
|
+
- [Getting Started with Flexynesis](https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis/site/getting_started/)
|
|
33
|
+
|
|
34
|
+
## Jupyter notebooks for interactive usage
|
|
35
|
+
|
|
36
|
+
- [Modeling Breast Cancer Subtypes](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/brca_subtypes.ipynb)
|
|
37
|
+
- [Survival Markers of Lower Grade Gliomas](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/survival_subtypes_LGG_GBM.ipynb)
|
|
38
|
+
- [Unsupervised Analysis of Bone Marrow Cells](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/unsupervised_analysis_single_cell.ipynb)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Benchmarks
|
|
42
|
+
|
|
43
|
+
For the latest benchmark results see:
|
|
44
|
+
https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis-benchmark-datasets/dashboard.html
|
|
45
|
+
|
|
46
|
+
The code for the benchmarking pipeline is at: https://github.com/BIMSBbioinfo/flexynesis-benchmarks
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Defining Kernel for the Jupyter Notebook
|
|
50
|
+
|
|
51
|
+
For interactively using flexynesis on Jupyter notebooks, one can define the kernel to make
|
|
52
|
+
flexynesis and its dependencies available on the jupyter session.
|
|
53
|
+
|
|
54
|
+
Assuming you have already defined an environment and installed the package:
|
|
55
|
+
```
|
|
56
|
+
conda activate flexynesisenv
|
|
57
|
+
python -m ipykernel install --user --name "flexynesisenv" --display-name "flexynesisenv"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
# Compiling Notebooks
|
|
61
|
+
|
|
62
|
+
`papermill` can be used to compile the tutorials under `examples/tutorials`.
|
|
63
|
+
|
|
64
|
+
If the purpose is to quickly check if the notebook can be run; set HPO_ITER to 1.
|
|
65
|
+
This sets hyperparameter optimisation steps to 1.
|
|
66
|
+
For longer training runs to see more meaningful results from the notebook, increase this number to e.g. 50.
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
papermill examples/tutorials/brca_subtypes.ipynb brca_subtypes.ipynb -p HPO_ITER 1
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The output from papermill can be converted to an html file as follows:
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
jupyter nbconvert --to html brca_subtypes.ipynb
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
# Documentation
|
|
81
|
+
|
|
82
|
+
Documentation generated using [mkdocs](https://mkdocstrings.github.io/)
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
pip install mkdocstrings[python]
|
|
86
|
+
mkdocs build --clean
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
|
|
@@ -21,12 +21,11 @@ def main():
|
|
|
21
21
|
--model_class (str): The kind of model class to instantiate. Choices are ["DirectPred", "GNN", "supervised_vae", "MultiTripletNetwork", "CrossModalPred", "RandomForest", "SVM", "RandomSurvivalForest"]. (Required)
|
|
22
22
|
--gnn_conv_type (str): If model_class is set to GNN, choose which graph convolution type to use. Choices are ["GC", "GCN", "SAGE"].
|
|
23
23
|
--target_variables (str): Which variables in 'clin.csv' to use for predictions, comma-separated if multiple. Optional if survival variables are not set to None.
|
|
24
|
-
--batch_variables (str): Which variables in 'clin.csv' to use for data integration/batch correction, comma-separated if multiple. Optional.
|
|
25
24
|
--surv_event_var (str): Which column in 'clin.csv' to use as event/status indicator for survival modeling.
|
|
26
25
|
--surv_time_var (str): Which column in 'clin.csv' to use as time/duration indicator for survival modeling.
|
|
27
26
|
--config_path (str): Optional path to an external hyperparameter configuration file in YAML format.
|
|
28
27
|
--fusion_type (str): How to fuse the omics layers. Choices are ["early", "intermediate"]. Default is 'intermediate'.
|
|
29
|
-
--hpo_iter (int): Number of iterations for hyperparameter optimisation. Default is
|
|
28
|
+
--hpo_iter (int): Number of iterations for hyperparameter optimisation. Default is 100.
|
|
30
29
|
--finetuning_samples (int): Number of samples from the test dataset to use for fine-tuning the model. Set to 0 to disable fine-tuning. Default is 0.
|
|
31
30
|
--variance_threshold (float): Variance threshold (as percentile) to drop low variance features. Default is 1; set to 0 for no variance filtering.
|
|
32
31
|
--correlation_threshold (float): Correlation threshold to drop highly redundant features. Default is 0.8; set to 1 for no redundancy filtering.
|
|
@@ -64,14 +63,11 @@ def main():
|
|
|
64
63
|
help="(Optional if survival variables are not set to None)."
|
|
65
64
|
"Which variables in 'clin.csv' to use for predictions, comma-separated if multiple",
|
|
66
65
|
type = str, default = None)
|
|
67
|
-
parser.add_argument("--batch_variables",
|
|
68
|
-
help="(Optional) Which variables in 'clin.csv' to use for data integration / batch correction, comma-separated if multiple",
|
|
69
|
-
type = str, default = None)
|
|
70
66
|
parser.add_argument("--surv_event_var", help="Which column in 'clin.csv' to use as event/status indicator for survival modeling", type = str, default = None)
|
|
71
67
|
parser.add_argument("--surv_time_var", help="Which column in 'clin.csv' to use as time/duration indicator for survival modeling", type = str, default = None)
|
|
72
68
|
parser.add_argument('--config_path', type=str, default=None, help='Optional path to an external hyperparameter configuration file in YAML format.')
|
|
73
69
|
parser.add_argument("--fusion_type", help="How to fuse the omics layers", type=str, choices=["early", "intermediate"], default = 'intermediate')
|
|
74
|
-
parser.add_argument("--hpo_iter", help="Number of iterations for hyperparameter optimisation", type=int, default =
|
|
70
|
+
parser.add_argument("--hpo_iter", help="Number of iterations for hyperparameter optimisation", type=int, default = 100)
|
|
75
71
|
parser.add_argument("--finetuning_samples", help="Number of samples from the test dataset to use for fine-tuning the model. Set to 0 to disable fine-tuning", type=int, default = 0)
|
|
76
72
|
parser.add_argument("--variance_threshold", help="Variance threshold (as percentile) to drop low variance features (default is 1; set to 0 for no variance filtering)", type=float, default = 1)
|
|
77
73
|
parser.add_argument("--correlation_threshold", help="Correlation threshold to drop highly redundant features (default is 0.8; set to 1 for no redundancy filtering)", type=float, default = 0.8)
|
|
@@ -94,7 +90,7 @@ def main():
|
|
|
94
90
|
parser.add_argument("--prefix", help="Job prefix to use for output files", type=str, default = 'job')
|
|
95
91
|
parser.add_argument("--log_transform", help="whether to apply log-transformation to input data matrices", type=str, choices=['True', 'False'], default = 'False')
|
|
96
92
|
parser.add_argument("--early_stop_patience", help="How many epochs to wait when no improvements in validation loss is observed (default 10; set to -1 to disable early stopping)", type=int, default = 10)
|
|
97
|
-
parser.add_argument("--hpo_patience", help="How many hyperparamater optimisation iterations to wait for when no improvements are observed (default is 10; set to 0 to disable early stopping)", type=int, default =
|
|
93
|
+
parser.add_argument("--hpo_patience", help="How many hyperparamater optimisation iterations to wait for when no improvements are observed (default is 10; set to 0 to disable early stopping)", type=int, default = 20)
|
|
98
94
|
parser.add_argument("--use_cv", action="store_true",
|
|
99
95
|
help="(Optional) If set, the a 5-fold cross-validation training will be done. Otherwise, a single trainig on 80 percent of the dataset is done.")
|
|
100
96
|
parser.add_argument("--use_loss_weighting", help="whether to apply loss-balancing using uncertainty weights method", type=str, choices=['True', 'False'], default = 'True')
|
|
@@ -119,11 +115,10 @@ def main():
|
|
|
119
115
|
|
|
120
116
|
# 2. Check for required variables for model classes
|
|
121
117
|
if args.model_class != "supervised_vae" and args.model_class != 'CrossModalPred':
|
|
122
|
-
if not any([args.target_variables, args.surv_event_var
|
|
118
|
+
if not any([args.target_variables, args.surv_event_var]):
|
|
123
119
|
parser.error(''.join(["When selecting a model other than 'supervised_vae' or 'CrossModalPred',",
|
|
124
120
|
"you must provide at least one of --target_variables, ",
|
|
125
|
-
"survival variables (--surv_event_var and --surv_time_var)"
|
|
126
|
-
"or --batch_variables."]))
|
|
121
|
+
"or survival variables (--surv_event_var and --surv_time_var)"]))
|
|
127
122
|
|
|
128
123
|
# 3. Check for compatibility of fusion_type with GNN
|
|
129
124
|
if args.fusion_type == "early":
|
|
@@ -274,7 +269,7 @@ def main():
|
|
|
274
269
|
tuner = flexynesis.HyperparameterTuning(dataset = train_dataset,
|
|
275
270
|
model_class = model_class,
|
|
276
271
|
target_variables = args.target_variables.strip().split(',') if args.target_variables is not None else [],
|
|
277
|
-
batch_variables =
|
|
272
|
+
batch_variables = None,
|
|
278
273
|
surv_event_var = args.surv_event_var,
|
|
279
274
|
surv_time_var = args.surv_time_var,
|
|
280
275
|
config_name = config_name,
|
|
@@ -322,7 +317,7 @@ def main():
|
|
|
322
317
|
embeddings_test.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'embeddings_test.csv'])), header=True)
|
|
323
318
|
|
|
324
319
|
# evaluate predictions; (if any supervised learning happened)
|
|
325
|
-
if any([args.target_variables, args.surv_event_var
|
|
320
|
+
if any([args.target_variables, args.surv_event_var]):
|
|
326
321
|
if not args.disable_marker_finding: # unless marker discovery is disabled
|
|
327
322
|
# compute feature importance values
|
|
328
323
|
print("[INFO] Computing variable importance scores")
|
|
@@ -345,20 +340,6 @@ def main():
|
|
|
345
340
|
metrics_df.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'stats.csv'])), header=True, index=False)
|
|
346
341
|
|
|
347
342
|
|
|
348
|
-
# also filter embeddings to remove batch-associated dims and only keep target-variable associated dims
|
|
349
|
-
if args.batch_variables is not None:
|
|
350
|
-
print("[INFO] Printing filtered embeddings")
|
|
351
|
-
embeddings_train_filtered = flexynesis.remove_batch_associated_variables(data = embeddings_train,
|
|
352
|
-
batch_dict={x: train_dataset.ann[x] for x in model.batch_variables} if model.batch_variables is not None else None,
|
|
353
|
-
target_dict={x: train_dataset.ann[x] for x in model.target_variables},
|
|
354
|
-
variable_types=train_dataset.variable_types)
|
|
355
|
-
# filter test embeddings to keep the same dims as the filtered training embeddings
|
|
356
|
-
embeddings_test_filtered = embeddings_test[embeddings_train_filtered.columns]
|
|
357
|
-
|
|
358
|
-
# save
|
|
359
|
-
embeddings_train_filtered.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'embeddings_train.filtered.csv'])), header=True)
|
|
360
|
-
embeddings_test_filtered.to_csv(os.path.join(args.outdir, '.'.join([args.prefix, 'embeddings_test.filtered.csv'])), header=True)
|
|
361
|
-
|
|
362
343
|
# for architectures with decoders; print decoded output layers
|
|
363
344
|
if args.model_class == 'CrossModalPred':
|
|
364
345
|
print("[INFO] Printing decoded output layers")
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: flexynesis
|
|
3
|
+
Version: 0.2.7
|
|
4
|
+
Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
|
|
5
|
+
Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
|
|
6
|
+
Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis
|
|
7
|
+
Classifier: Development Status :: 3 - Alpha
|
|
8
|
+
Classifier: Intended Audience :: Developers
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
10
|
+
Requires-Python: <3.12,>=3.11
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENCE
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: lightning
|
|
17
|
+
Requires-Dist: pyyaml
|
|
18
|
+
Requires-Dist: scikit-optimize
|
|
19
|
+
Requires-Dist: scipy
|
|
20
|
+
Requires-Dist: seaborn
|
|
21
|
+
Requires-Dist: torch
|
|
22
|
+
Requires-Dist: torchvision
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Requires-Dist: umap-learn
|
|
25
|
+
Requires-Dist: rich
|
|
26
|
+
Requires-Dist: captum
|
|
27
|
+
Requires-Dist: ipywidgets
|
|
28
|
+
Requires-Dist: torch_geometric
|
|
29
|
+
Requires-Dist: ipykernel
|
|
30
|
+
Requires-Dist: lifelines
|
|
31
|
+
Requires-Dist: papermill
|
|
32
|
+
Requires-Dist: scikit-survival
|
|
33
|
+
Requires-Dist: python-louvain
|
|
34
|
+
Provides-Extra: test
|
|
35
|
+
Requires-Dist: pytest; extra == "test"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
<p align="center">
|
|
39
|
+
<img alt="logo" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/logo.png" width="50%" height="50%">
|
|
40
|
+
</p>
|
|
41
|
+
|
|
42
|
+
[](https://pepy.tech/project/flexynesis)
|
|
43
|
+

|
|
44
|
+

|
|
45
|
+
|
|
46
|
+
# flexynesis
|
|
47
|
+
|
|
48
|
+
A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical
|
|
49
|
+
endpoint prediction. The package includes multiple types of deep learning architectures such as simple
|
|
50
|
+
fully connected networks, supervised variational autoencoders, graph convolutional networks, multi-triplet networks
|
|
51
|
+
different options of data layer fusion, and automates feature selection and hyperparameter optimisation. The tools are continuosly benchmarked on publicly available datasets mostly related to the study of cancer. Some of the applications of the methods
|
|
52
|
+
we develop are drug response modeling in cancer patients or preclinical models (such as cell lines and
|
|
53
|
+
patient-derived xenografts), cancer subtype prediction, or any other clinically relevant outcome prediction
|
|
54
|
+
that can be formulated as a regression, classification, survival, or cross-modality prediction problem.
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<img alt="workflow" src="https://github.com/BIMSBbioinfo/flexynesis/raw/main/img/graphical_abstract.jpg">
|
|
58
|
+
</p>
|
|
59
|
+
|
|
60
|
+
# Citing our work
|
|
61
|
+
|
|
62
|
+
In order to refer to our work, please cite our manuscript currently available at [BioRxiv](https://biorxiv.org/cgi/content/short/2024.07.16.603606v1).
|
|
63
|
+
|
|
64
|
+
# Getting started with Flexynesis
|
|
65
|
+
|
|
66
|
+
## Command-line tutorial
|
|
67
|
+
|
|
68
|
+
- [Getting Started with Flexynesis](https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis/site/getting_started/)
|
|
69
|
+
|
|
70
|
+
## Jupyter notebooks for interactive usage
|
|
71
|
+
|
|
72
|
+
- [Modeling Breast Cancer Subtypes](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/brca_subtypes.ipynb)
|
|
73
|
+
- [Survival Markers of Lower Grade Gliomas](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/survival_subtypes_LGG_GBM.ipynb)
|
|
74
|
+
- [Unsupervised Analysis of Bone Marrow Cells](https://github.com/BIMSBbioinfo/flexynesis/blob/main/examples/tutorials/unsupervised_analysis_single_cell.ipynb)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# Benchmarks
|
|
78
|
+
|
|
79
|
+
For the latest benchmark results see:
|
|
80
|
+
https://bimsbstatic.mdc-berlin.de/akalin/buyar/flexynesis-benchmark-datasets/dashboard.html
|
|
81
|
+
|
|
82
|
+
The code for the benchmarking pipeline is at: https://github.com/BIMSBbioinfo/flexynesis-benchmarks
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# Defining Kernel for the Jupyter Notebook
|
|
86
|
+
|
|
87
|
+
For interactively using flexynesis on Jupyter notebooks, one can define the kernel to make
|
|
88
|
+
flexynesis and its dependencies available on the jupyter session.
|
|
89
|
+
|
|
90
|
+
Assuming you have already defined an environment and installed the package:
|
|
91
|
+
```
|
|
92
|
+
conda activate flexynesisenv
|
|
93
|
+
python -m ipykernel install --user --name "flexynesisenv" --display-name "flexynesisenv"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
# Compiling Notebooks
|
|
97
|
+
|
|
98
|
+
`papermill` can be used to compile the tutorials under `examples/tutorials`.
|
|
99
|
+
|
|
100
|
+
If the purpose is to quickly check if the notebook can be run; set HPO_ITER to 1.
|
|
101
|
+
This sets hyperparameter optimisation steps to 1.
|
|
102
|
+
For longer training runs to see more meaningful results from the notebook, increase this number to e.g. 50.
|
|
103
|
+
|
|
104
|
+
Example:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
papermill examples/tutorials/brca_subtypes.ipynb brca_subtypes.ipynb -p HPO_ITER 1
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
The output from papermill can be converted to an html file as follows:
|
|
111
|
+
|
|
112
|
+
```
|
|
113
|
+
jupyter nbconvert --to html brca_subtypes.ipynb
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
# Documentation
|
|
117
|
+
|
|
118
|
+
Documentation generated using [mkdocs](https://mkdocstrings.github.io/)
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
pip install mkdocstrings[python]
|
|
122
|
+
mkdocs build --clean
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
|
|
@@ -4,7 +4,8 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "flexynesis"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.7"
|
|
8
|
+
license = {file = "LICENSE"}
|
|
8
9
|
authors = [
|
|
9
10
|
{name = "Bora Uyar", email = "bora.uyar@mdc-berlin.de"},
|
|
10
11
|
{name = "Taras Savchyn", email = "Taras.Savchyn@mdc-berlin.de"},
|