pertTF 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. perttf-0.1.1/.github/workflows/publish-pypi.yml +48 -0
  2. perttf-0.1.1/.github/workflows/publish-testpypi.yml +50 -0
  3. perttf-0.1.1/.gitignore +11 -0
  4. perttf-0.1.1/INSTALL.md +45 -0
  5. perttf-0.1.1/LICENSE +21 -0
  6. perttf-0.1.1/PKG-INFO +79 -0
  7. perttf-0.1.1/README.md +55 -0
  8. perttf-0.1.1/assets/img/LOGO.png +0 -0
  9. perttf-0.1.1/demos/run_on_hpc/.env_example +4 -0
  10. perttf-0.1.1/demos/run_on_hpc/README.md +21 -0
  11. perttf-0.1.1/demos/run_on_hpc/perturb_celltype.py +400 -0
  12. perttf-0.1.1/demos/run_on_hpc/pyproject.toml +27 -0
  13. perttf-0.1.1/demos/run_on_hpc/slurm_example.sh +84 -0
  14. perttf-0.1.1/demos/tutorials/INFERENCE.md +94 -0
  15. perttf-0.1.1/demos/tutorials/LORA_FINETUNING.md +172 -0
  16. perttf-0.1.1/demos/tutorials/lora_finetuning_tutorial.ipynb +2419 -0
  17. perttf-0.1.1/pertTF.egg-info/PKG-INFO +79 -0
  18. perttf-0.1.1/pertTF.egg-info/SOURCES.txt +42 -0
  19. perttf-0.1.1/pertTF.egg-info/dependency_links.txt +1 -0
  20. perttf-0.1.1/pertTF.egg-info/requires.txt +1 -0
  21. perttf-0.1.1/pertTF.egg-info/top_level.txt +1 -0
  22. perttf-0.1.1/perttf/__init__.py +3 -0
  23. perttf-0.1.1/perttf/custom_loss.py +702 -0
  24. perttf-0.1.1/perttf/model/base_model.py +495 -0
  25. perttf-0.1.1/perttf/model/composition_change_analysis.py +119 -0
  26. perttf-0.1.1/perttf/model/config_gen.py +74 -0
  27. perttf-0.1.1/perttf/model/expr_sampler.py +232 -0
  28. perttf-0.1.1/perttf/model/hf.py +772 -0
  29. perttf-0.1.1/perttf/model/modules.py +1147 -0
  30. perttf-0.1.1/perttf/model/pertTF.py +578 -0
  31. perttf-0.1.1/perttf/model/pert_emb.py +190 -0
  32. perttf-0.1.1/perttf/model/train_data_gen.py +64 -0
  33. perttf-0.1.1/perttf/model/train_function.py +1031 -0
  34. perttf-0.1.1/perttf/utils/custom_tokenizer.py +475 -0
  35. perttf-0.1.1/perttf/utils/load_pretrain.py +144 -0
  36. perttf-0.1.1/perttf/utils/logger.py +17 -0
  37. perttf-0.1.1/perttf/utils/misc.py +123 -0
  38. perttf-0.1.1/perttf/utils/ot.py +206 -0
  39. perttf-0.1.1/perttf/utils/pert_data_loader.py +770 -0
  40. perttf-0.1.1/perttf/utils/plot.py +269 -0
  41. perttf-0.1.1/perttf/utils/preprocessor.py +307 -0
  42. perttf-0.1.1/perttf/utils/set_optimizer.py +60 -0
  43. perttf-0.1.1/pyproject.toml +45 -0
  44. perttf-0.1.1/setup.cfg +4 -0
@@ -0,0 +1,48 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ permissions:
7
+ contents: read
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.11'
20
+
21
+ - name: Build package
22
+ run: |
23
+ python -m pip install --upgrade pip build
24
+ python -m build
25
+
26
+ - name: Upload artifacts
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: python-package-distributions
30
+ path: dist/
31
+
32
+ publish-pypi:
33
+ runs-on: ubuntu-latest
34
+ needs: build
35
+ environment:
36
+ name: pypi
37
+ url: https://pypi.org/p/pertTF
38
+ permissions:
39
+ id-token: write
40
+ steps:
41
+ - name: Download artifacts
42
+ uses: actions/download-artifact@v4
43
+ with:
44
+ name: python-package-distributions
45
+ path: dist/
46
+
47
+ - name: Publish to PyPI
48
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,50 @@
1
+ name: Publish to TestPyPI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+
6
+ permissions:
7
+ contents: read
8
+
9
+ jobs:
10
+ build:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.11'
20
+
21
+ - name: Build package
22
+ run: |
23
+ python -m pip install --upgrade pip build
24
+ python -m build
25
+
26
+ - name: Upload artifacts
27
+ uses: actions/upload-artifact@v4
28
+ with:
29
+ name: python-package-distributions
30
+ path: dist/
31
+
32
+ publish-testpypi:
33
+ runs-on: ubuntu-latest
34
+ needs: build
35
+ environment:
36
+ name: testpypi
37
+ url: https://test.pypi.org/p/pertTF
38
+ permissions:
39
+ id-token: write
40
+ steps:
41
+ - name: Download artifacts
42
+ uses: actions/download-artifact@v4
43
+ with:
44
+ name: python-package-distributions
45
+ path: dist/
46
+
47
+ - name: Publish to TestPyPI
48
+ uses: pypa/gh-action-pypi-publish@release/v1
49
+ with:
50
+ repository-url: https://test.pypi.org/legacy/
@@ -0,0 +1,11 @@
1
+ scgpt
2
+ .DS_Store
3
+ *.h5ad
4
+ wandb
5
+ save
6
+ *__pycache__*
7
+ demos/run_on_hpc/.env
8
+ data/*.h5ad
9
+ runs/*
10
+ demos/run_on_hpc/slurm.sh
11
+
@@ -0,0 +1,45 @@
1
+
2
+ ## Install Newest PertTF version
3
+ **Newest Version of PertTF can use Flash Attention for at least > 2x speed up, works on GPU enabled machines**
4
+
5
+ This version still requires torchtext to be installed, because scgpt imports it, but it will not throw errors for torch > 2.3.0 version, this allows flash-attention to be utilized. pertTF no longer imports torchtext componenets, thus will work fine
6
+
7
+ ### Colab Install
8
+ **NOTE for GOOGLE COLAB installation, check python, cuda and torch versions**
9
+ - Tesla T4s can only use flash v1
10
+ - V100s have no flash attn support (hopefully if SDPA layers are integrated into pertTF.py it can bring better performance)
11
+ - A100/L4 can use flash v2
12
+
13
+ When installing flash v2 directly via `pip install flash-attn=xxx` as that will force compilation which takes forever, always install wheels
14
+ currently colab comes with pytorch 2.8 and cuda 12.6
15
+ The following might work (probably needs some tweeking)
16
+ ```
17
+ pip install orbax==0.1.7
18
+ pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu12torch2.8cxx11abiFALSE-cp312-cp312-linux_x86_64.whl
19
+ pip install pandas scanpy "scvi-tools<1.0" numba datasets transformers==4.33.2 wandb cell-gears==0.1.2 torch_geometric pyarrow sentencepiece
20
+ ```
21
+
22
+ ### Clean Install on HPC
23
+ ```bash
24
+ mamba create -n torch_flashv2 python=3.10 cuda-toolkit=12.8 'gxx>=6.0.0,<12.0' cudnn jupyter ipykernel ca-certificates matplotlib -y -c pytorch -c nvidia -c conda-forge
25
+ pip install torch==2.6.0 torchvision orbax==0.1.7 torchdata torchmetrics
26
+ pip install pandas scanpy "scvi-tools<1.0" numba --upgrade "numpy<1.24" scib datasets transformers==4.33.2 wandb cell-gears==0.1.2 torch_geometric pyarrow sentencepiece
27
+
28
+ # Install Flash attention v1 with the following (2x speed up) uncommented
29
+ # pip install flash-attn==1.0.5 --no-build-isolation
30
+
31
+ ## Install Flash attention v2 (1.5x fasters than v1) with the following (or find a wheel that fits your python, cuda and torch version in github)
32
+ pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
33
+
34
+ ```
35
+
36
+ #### INSTALL with FLASH ATTENTION v3 Beta (only on H200 architecture, 1.5-2x speed up over flash v2),
37
+ ```bash
38
+ mamba create -n pertTF_flashv3 python=3.10 'gxx>=6.0.0,<12.0' cudnn jupyter ipykernel ca-certificates matplotlib -y -c pytorch -c nvidia -c conda-forge
39
+ pip install torch==2.8.0 torchvision orbax==0.1.7 torchdata torchmetrics
40
+ pip install pandas scanpy "scvi-tools<1.0" numba --upgrade "numpy<1.24" scib datasets==2.14.5 transformers==4.33.2 wandb cell-gears==0.0.2 torch_geometric pyarrow==15.0.2 ninja packages sentencepiece
41
+ # To install flash attention v3 (1.5-2x speed up over v2) git required > 30mins and > 400GB RAM with 32 CPUS
42
+ git clone https://github.com/Dao-AILab/flash-attention.git
43
+ cd flash-attention
44
+ python setup.py install
45
+ ```
perttf-0.1.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Wei Li
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
perttf-0.1.1/PKG-INFO ADDED
@@ -0,0 +1,79 @@
1
+ Metadata-Version: 2.4
2
+ Name: pertTF
3
+ Version: 0.1.1
4
+ Summary: Transformer model with single-cell perturbation predictions
5
+ Author-email: Wei Li <wei@weililab.org>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/davidliwei/pertTF
8
+ Project-URL: Repository, https://github.com/davidliwei/pertTF
9
+ Keywords: single-cell,transformer,perturbation,bioinformatics
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.7
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Requires-Python: >=3.7
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: scanpy>=1.10.0
23
+ Dynamic: license-file
24
+
25
+ <img src="assets/img/LOGO.png" alt="" width="800"/>
26
+
27
+ **pertTF is a transformer model designed for single-cell perturbation predictions.**
28
+ -----
29
+ # Installation
30
+ ## Prerequisite environment
31
+ pertTF require `torch > 2.3.0` and `cuda > 12.0`
32
+
33
+ best way to install is to set up a seperate envrionment with conda or mamba
34
+ ```bash
35
+ # create independent environment (recommonded)
36
+ mamba create -n pertTF_env python=3.10 cuda-toolkit=12.8 'gxx>=6.0.0,<12.0' cudnn ca-certificates -y -c pytorch -c nvidia -c conda-forge
37
+
38
+ # pip install required packages
39
+ # it is best to install torch == 2.6.0 to match the flash attention compiled wheel below
40
+ # higher versions of torch may present difficulties for installing flash attention 2
41
+ pip install torch==2.6.0 torchvision orbax==0.1.7 torchdata torchmetrics pandas scanpy numba --upgrade "numpy<1.24" datasets transformers==4.33.2 wandb torch_geometric pyarrow sentencepiece huggingface_hub omegaconf
42
+ ```
43
+ flash attention is strongly recommended for training or finetuning
44
+
45
+ ```bash
46
+ # flash attention 2 installation
47
+ #check ABI true/false first
48
+ python -c "import torch;print(torch._C._GLIBCXX_USE_CXX11_ABI)"
49
+ # install appropraite version (the example below is for ABI=FALSE)
50
+ pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
51
+
52
+ # flash attention 3 installation (recommended for torch > 2.6.0 and hopper GPUs)
53
+ # To install flash attention v3 (1.5-2x speed up over v2) requires > 30mins, > 400GB RAM, 32 CPUS (aim for more than this)
54
+ git clone https://github.com/Dao-AILab/flash-attention.git
55
+ cd flash-attention
56
+ python setup.py install
57
+ ```
58
+ ## pertTF installation
59
+ You can install and use pertTF in two ways.
60
+
61
+ The first way, pertTF is avaiable on PyPI. Use the following command to install pertTF:
62
+ ```bash
63
+ pip install -i https://test.pypi.org/simple/ pertTF
64
+ ```
65
+ The second way is suitable for you to run the most recent pertTF source code. First, fork our pertTF GitHub repository:
66
+
67
+ ```bash
68
+ git clone https://github.com/davidliwei/pertTF.git
69
+ ```
70
+ Then, in your python code, you can directly use the pertTF package:
71
+ ```python
72
+ import sys
73
+ sys.path.insert(0, '/content/pertTF/')
74
+ ```
75
+ -----------------------
76
+
77
+ ## Tutorials
78
+
79
+ - [Inference Tutorial](demos/tutorials/INFERENCE.md)
perttf-0.1.1/README.md ADDED
@@ -0,0 +1,55 @@
1
+ <img src="assets/img/LOGO.png" alt="" width="800"/>
2
+
3
+ **pertTF is a transformer model designed for single-cell perturbation predictions.**
4
+ -----
5
+ # Installation
6
+ ## Prerequisite environment
7
+ pertTF require `torch > 2.3.0` and `cuda > 12.0`
8
+
9
+ best way to install is to set up a seperate envrionment with conda or mamba
10
+ ```bash
11
+ # create independent environment (recommonded)
12
+ mamba create -n pertTF_env python=3.10 cuda-toolkit=12.8 'gxx>=6.0.0,<12.0' cudnn ca-certificates -y -c pytorch -c nvidia -c conda-forge
13
+
14
+ # pip install required packages
15
+ # it is best to install torch == 2.6.0 to match the flash attention compiled wheel below
16
+ # higher versions of torch may present difficulties for installing flash attention 2
17
+ pip install torch==2.6.0 torchvision orbax==0.1.7 torchdata torchmetrics pandas scanpy numba --upgrade "numpy<1.24" datasets transformers==4.33.2 wandb torch_geometric pyarrow sentencepiece huggingface_hub omegaconf
18
+ ```
19
+ flash attention is strongly recommended for training or finetuning
20
+
21
+ ```bash
22
+ # flash attention 2 installation
23
+ #check ABI true/false first
24
+ python -c "import torch;print(torch._C._GLIBCXX_USE_CXX11_ABI)"
25
+ # install appropraite version (the example below is for ABI=FALSE)
26
+ pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
27
+
28
+ # flash attention 3 installation (recommended for torch > 2.6.0 and hopper GPUs)
29
+ # To install flash attention v3 (1.5-2x speed up over v2) requires > 30mins, > 400GB RAM, 32 CPUS (aim for more than this)
30
+ git clone https://github.com/Dao-AILab/flash-attention.git
31
+ cd flash-attention
32
+ python setup.py install
33
+ ```
34
+ ## pertTF installation
35
+ You can install and use pertTF in two ways.
36
+
37
+ The first way, pertTF is avaiable on PyPI. Use the following command to install pertTF:
38
+ ```bash
39
+ pip install -i https://test.pypi.org/simple/ pertTF
40
+ ```
41
+ The second way is suitable for you to run the most recent pertTF source code. First, fork our pertTF GitHub repository:
42
+
43
+ ```bash
44
+ git clone https://github.com/davidliwei/pertTF.git
45
+ ```
46
+ Then, in your python code, you can directly use the pertTF package:
47
+ ```python
48
+ import sys
49
+ sys.path.insert(0, '/content/pertTF/')
50
+ ```
51
+ -----------------------
52
+
53
+ ## Tutorials
54
+
55
+ - [Inference Tutorial](demos/tutorials/INFERENCE.md)
Binary file
@@ -0,0 +1,4 @@
1
+ WANDB_API_KEY=<WANDB_API_KEY_GOES_HERE>
2
+ WANDB_SILENT=true
3
+ WANDB__EXECUTABLE=/usr/local/bin/python
4
+ KMP_WARNINGS=off
@@ -0,0 +1,21 @@
1
+ ### scGPT Slurm Job Submission on HPC
2
+
3
+ This repository contains the necessary scripts to run scGPT on a High Performance Computing (HPC) cluster using Slurm. The scripts are designed to be used with the [scGPT](https://github.com/bowang-lab/scGPT) repository.
4
+
5
+ ### Usage
6
+
7
+ 1. Clone this repository to your HPC cluster.
8
+ 2. Update the `.env_example` file with your wandb API key and save it as `.env` in the same directory.
9
+ 3. Create a `data` directory within the repository directory. Place the `.h5ad` file you want to use with scGPT into the `data` directory.
10
+ 4. Adjust the `slurm_example.sh` script to match the correct paths and parameters for your HPC cluster. Ensure you update `$DATA_PATH` in the same script to point to your `.h5ad` file.
11
+
12
+ You can then use the following command to submit a job to the HPC cluster:
13
+ ```bash
14
+ sbatch slurm.sh
15
+ ```
16
+
17
+ The `slurm.sh` file will check whether the required software is installed and will proceed to set up the working directory if it is not. This includes:
18
+
19
+ - Cloning the scGPT repository
20
+ - Downloading the necessary data
21
+ - Setting up the Python environment (using UV)