HInt-ppi 0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hint_ppi-0.1/HInt_ppi.egg-info/PKG-INFO +29 -0
- hint_ppi-0.1/HInt_ppi.egg-info/SOURCES.txt +8 -0
- hint_ppi-0.1/HInt_ppi.egg-info/dependency_links.txt +1 -0
- hint_ppi-0.1/HInt_ppi.egg-info/entry_points.txt +2 -0
- hint_ppi-0.1/HInt_ppi.egg-info/requires.txt +15 -0
- hint_ppi-0.1/HInt_ppi.egg-info/top_level.txt +1 -0
- hint_ppi-0.1/PKG-INFO +29 -0
- hint_ppi-0.1/README.md +159 -0
- hint_ppi-0.1/setup.cfg +4 -0
- hint_ppi-0.1/setup.py +36 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: HInt-ppi
|
|
3
|
+
Version: 0.1
|
|
4
|
+
Summary: A tool to find homologous interactions and speed up AlphaFold-based structural modeling.
|
|
5
|
+
Home-page: https://github.com/Qrouger/HInt
|
|
6
|
+
Author: Quentin Rouger
|
|
7
|
+
Author-email: quentin.rouger@univ-rennes.fr
|
|
8
|
+
License: GPL-3.0 license
|
|
9
|
+
Requires-Dist: alphapulldown
|
|
10
|
+
Requires-Dist: matplotlib
|
|
11
|
+
Requires-Dist: nvidia-ml-py
|
|
12
|
+
Requires-Dist: torch==2.4.0
|
|
13
|
+
Requires-Dist: ihm
|
|
14
|
+
Requires-Dist: scipy==1.16.0
|
|
15
|
+
Requires-Dist: setuptools==80.9.0
|
|
16
|
+
Requires-Dist: jax[cuda12]==0.5.3
|
|
17
|
+
Requires-Dist: numpy==1.26.4
|
|
18
|
+
Requires-Dist: torchdata==0.9.0
|
|
19
|
+
Requires-Dist: pandas
|
|
20
|
+
Requires-Dist: pydantic
|
|
21
|
+
Requires-Dist: packaging
|
|
22
|
+
Requires-Dist: opt_einsum
|
|
23
|
+
Requires-Dist: torch-geometric
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: author-email
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: summary
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
hint_ppi-0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: HInt-ppi
|
|
3
|
+
Version: 0.1
|
|
4
|
+
Summary: A tool to find homologous interactions and speed up AlphaFold-based structural modeling.
|
|
5
|
+
Home-page: https://github.com/Qrouger/HInt
|
|
6
|
+
Author: Quentin Rouger
|
|
7
|
+
Author-email: quentin.rouger@univ-rennes.fr
|
|
8
|
+
License: GPL-3.0 license
|
|
9
|
+
Requires-Dist: alphapulldown
|
|
10
|
+
Requires-Dist: matplotlib
|
|
11
|
+
Requires-Dist: nvidia-ml-py
|
|
12
|
+
Requires-Dist: torch==2.4.0
|
|
13
|
+
Requires-Dist: ihm
|
|
14
|
+
Requires-Dist: scipy==1.16.0
|
|
15
|
+
Requires-Dist: setuptools==80.9.0
|
|
16
|
+
Requires-Dist: jax[cuda12]==0.5.3
|
|
17
|
+
Requires-Dist: numpy==1.26.4
|
|
18
|
+
Requires-Dist: torchdata==0.9.0
|
|
19
|
+
Requires-Dist: pandas
|
|
20
|
+
Requires-Dist: pydantic
|
|
21
|
+
Requires-Dist: packaging
|
|
22
|
+
Requires-Dist: opt_einsum
|
|
23
|
+
Requires-Dist: torch-geometric
|
|
24
|
+
Dynamic: author
|
|
25
|
+
Dynamic: author-email
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: summary
|
hint_ppi-0.1/README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# <img src="https://github.com/user-attachments/assets/f4701588-b624-4afa-aa8f-9a3352a6572c" alt="HInt logo" width="200"/><br>
|
|
2
|
+
|
|
3
|
+
HInt is an optimized and scalable pipeline designed for high-throughput identification of homologous proteins that retain conserved functional interactions despite substantial sequence and structural divergence. By combining efficient MSA reuse, parallelized structure prediction, and automated interaction scoring, HInt significantly accelerates large-scale interaction screening while maintaining high predictive accuracy. This enables the systematic discovery of conserved interaction networks that remain undetectable through sequence or structural similarity alone.
|
|
4
|
+
|
|
5
|
+
# 1.Instalations
|
|
6
|
+
```bash
|
|
7
|
+
conda create -n HInt -c conda-forge -c bioconda python==3.11 pdbfixer==1.9 mafft kalign2 hhsuite hmmer mmseqs2 git
|
|
8
|
+
conda activate HInt
|
|
9
|
+
pip install alphapulldown==2.1.4 nvidia-ml-py torch==2.4.0 ihm scipy==1.16.0 setuptools==80.9.0
|
|
10
|
+
pip install --no-warn-conflicts \ "colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold"
|
|
11
|
+
pip install -U "jax[cuda12]"==0.5.3 numpy==1.26.4
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## A. Download the GPU-indexed MMseqs2 database (2 hours, 1.5T)
|
|
15
|
+
To accelerate MSA generation, it is strongly recommended to store the databases on NVMe or SSD drives rather than on HDD storage.<br>
|
|
16
|
+
```bash
|
|
17
|
+
wget https://raw.githubusercontent.com/sokrypton/ColabFold/main/setup_databases.sh
|
|
18
|
+
chmod +x setup_databases.sh
|
|
19
|
+
GPU=1 ./setup_databases.sh ./mmseq_database
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
test : colabfold_search R388.fasta /data/colab_fold_data . --gpu 1 --db-load-mode 2 <br>
|
|
23
|
+
|
|
24
|
+
## B. Download AlphaFold database (3 hours, 2.7T)
|
|
25
|
+
```bash
|
|
26
|
+
sudo apt install aria2
|
|
27
|
+
git clone https://github.com/KosinskiLab/alphafold.git
|
|
28
|
+
cd alphafold
|
|
29
|
+
scripts/download_all_data.sh /<Database Directory> > download.log 2> download_all.log
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## C.1. Install deeplocpro (Prokaryote)
|
|
33
|
+
```bash
|
|
34
|
+
git clone https://github.com/Jaimomar99/deeplocpro
|
|
35
|
+
cd deeplocpro
|
|
36
|
+
pip install .
|
|
37
|
+
```
|
|
38
|
+
## C.2. Install deeploc (Eukaryote)
|
|
39
|
+
|
|
40
|
+
Download deeploc2 package here : https://services.healthtech.dtu.dk/services/DeepLoc-2.0/
|
|
41
|
+
```bash
|
|
42
|
+
cd deeploc2_package
|
|
43
|
+
pip install .
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## D. Install SignalP5
|
|
47
|
+
|
|
48
|
+
Download SignalP5 here : [https://services.healthtech.dtu.dk/services/SignalP-5.0/9-Downloads.php](https://services.healthtech.dtu.dk/cgi-bin/sw_request?software=signalp&version=5.0&packageversion=5.0b&platform=Darwin)<br>
|
|
49
|
+
```bash
|
|
50
|
+
tar -xvzf signalp-5.0b.Linux.tar.gz
|
|
51
|
+
cd signalp-5.0b/
|
|
52
|
+
sudo cp bin/signalp /usr/local/bin
|
|
53
|
+
sudo cp -r lib/* /usr/local/lib
|
|
54
|
+
```
|
|
55
|
+
## E. Install ccp4
|
|
56
|
+
Download ccp4 package here : https://www.ccp4.ac.uk/download/#os=linux
|
|
57
|
+
```bash
|
|
58
|
+
tar xvzf ccp4-9-setup.tar.gz
|
|
59
|
+
./ccp4-9-setup
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
<br>
|
|
63
|
+
|
|
64
|
+
# 2.Input parameters
|
|
65
|
+
## Setup HInt.txt
|
|
66
|
+
|
|
67
|
+
<br>
|
|
68
|
+
|
|
69
|
+
### The First part of HInt.txt file contains all a priori information about the query protein.
|
|
70
|
+
|
|
71
|
+
**Signal_peptide** : Indicates whether the protein has a signal peptide (Options : Yes,No or None). <br>
|
|
72
|
+
|
|
73
|
+
**DeepLoc** : Cellular localisation(s) of the protein. Multiple localizations can be specified, separated by commas. <br>
|
|
74
|
+
|
|
75
|
+
- Eukaryotes : Cytoplasm, Nucleus, Extracellular, Cell membrane, Mitochondrion, Plastid, Endoplasmic reticulum, Lysosome/Vacuole, Golgo apparatus, Peroxisome.
|
|
76
|
+
- Prokaryotes : Cell wall & surface, Extracellular, Cytoplasmic, Cytoplasmic Membrane, Outer Membrane, Periplasmic.
|
|
77
|
+
|
|
78
|
+
**Max_protein_lenght** : Maximum lenght of the protein you search (integer). <br>
|
|
79
|
+
|
|
80
|
+
**Min_protein_lenght** : Minimum lenght of the protein you search (integer). <br>
|
|
81
|
+
|
|
82
|
+
**Homo-oligomer** : Known homo-oligomerization state of the protein (integer : 1 to 20). <br>
|
|
83
|
+
|
|
84
|
+
**Interact_with** : Names of proteins expected to interact with the query protein (UniprotID or protein fasta name).
|
|
85
|
+
<details>
|
|
86
|
+
<summary>Advanced bait uses and examples </summary>
|
|
87
|
+
|
|
88
|
+
One bait :
|
|
89
|
+
```
|
|
90
|
+
Interact_with : UniprotID1
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Region of a bait :
|
|
94
|
+
```
|
|
95
|
+
Interact_with : UniprotID1(20-200)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Multiple baits : # First has to be the principal. For now you can put a maximum of 3 differents bait
|
|
99
|
+
```
|
|
100
|
+
Interact_with : UniprotID1, UniprotID2
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Multimer bait : # Create a unique bait with multiple proteins
|
|
104
|
+
```
|
|
105
|
+
Interact_with : [Uniprot1, Uniprot2]
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
And you can mixed up all of theses examples ! <br>
|
|
109
|
+
/!\ HInt don't support multiple regions for baits proteins
|
|
110
|
+
</details>
|
|
111
|
+
|
|
112
|
+
**Organism** : Organism of interest for SignalP5 and DeepLoc (arch, gram+, gram-, or euk). <br>
|
|
113
|
+
|
|
114
|
+
>[!TIP]
|
|
115
|
+
>If you don’t know the information or want to skip it, you can leave this field blank.
|
|
116
|
+
|
|
117
|
+
<br>
|
|
118
|
+
|
|
119
|
+
### Second part of HInt.txt are paths.
|
|
120
|
+
|
|
121
|
+
**Path_AlphaFold_Data** : Path of AlphaFold databse (string).
|
|
122
|
+
|
|
123
|
+
**Path_ccp4** : Path of CCP4 package (string). Default set on /opt/xtal/ccp4-9.
|
|
124
|
+
|
|
125
|
+
**Path_MMseqs2_Data** : Path of GPU-indexed MMseqs2 database (string).
|
|
126
|
+
>[!NOTE]
|
|
127
|
+
>This Path is not mandatory. If not set also MMseqs2-GPU will no be used.
|
|
128
|
+
|
|
129
|
+
**Path_Uniprot_ID** : Path to the protein sequence file (string).
|
|
130
|
+
|
|
131
|
+
**Path_Pickle_Feature** : Path where MSA files will be saved (string).
|
|
132
|
+
|
|
133
|
+
<br>
|
|
134
|
+
|
|
135
|
+
## Setup protein file
|
|
136
|
+
The protein file must contain all UniProt IDs or all sequences in FASTA format for both preys and baits. <br>
|
|
137
|
+
This can be protein ncbi fasta file, classic fasta file, uniprotID's or a combination of all. <br>
|
|
138
|
+
>Protein file exemple
|
|
139
|
+
|
|
140
|
+
>[!TIP]
|
|
141
|
+
>The use of UniprotIDs is recommended for pipeline speed.
|
|
142
|
+
|
|
143
|
+
<br>
|
|
144
|
+
|
|
145
|
+
## Run HInt
|
|
146
|
+
You need to be in the directory with HInt.txt file.
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
HInt --cpu <Integer> --gpu <Integer(s)> --multi_job_per_gpu <Boolean>
|
|
150
|
+
```
|
|
151
|
+
--cpu : Number of CPUs available for computation. Enables CPU parallelization. By default, set to half of the available CPUs. <br>
|
|
152
|
+
--gpu : Index(es) of GPU(s) you want to uses. Declare multiple GPU allows GPU parallelisation. By default set on GPU 0. <br>
|
|
153
|
+
--multi_job_per_gpu : Allows multiple jobs to run on a single GPU, reducing time of modelisation. By default set on True. <br>
|
|
154
|
+
|
|
155
|
+
<br>
|
|
156
|
+
|
|
157
|
+
## Folder structure
|
|
158
|
+
|
|
159
|
+
# 3.Example
|
hint_ppi-0.1/setup.cfg
ADDED
hint_ppi-0.1/setup.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Install script for setuptools."""
|
|
2
|
+
from setuptools import find_packages
|
|
3
|
+
from setuptools import setup
|
|
4
|
+
|
|
5
|
+
setup(
|
|
6
|
+
name='HInt-ppi',
|
|
7
|
+
version='0.1',
|
|
8
|
+
description=(
|
|
9
|
+
'A tool to find homologous interactions and speed up AlphaFold-based structural modeling.'
|
|
10
|
+
),
|
|
11
|
+
author='Quentin Rouger',
|
|
12
|
+
author_email='quentin.rouger@univ-rennes.fr',
|
|
13
|
+
license='GPL-3.0 license',
|
|
14
|
+
url='https://github.com/Qrouger/HInt',
|
|
15
|
+
include_package_data=True,
|
|
16
|
+
packages=find_packages(),
|
|
17
|
+
install_requires=[
|
|
18
|
+
'alphapulldown',
|
|
19
|
+
'matplotlib',
|
|
20
|
+
'nvidia-ml-py',
|
|
21
|
+
'torch==2.4.0',
|
|
22
|
+
'ihm',
|
|
23
|
+
'scipy==1.16.0',
|
|
24
|
+
'setuptools==80.9.0',
|
|
25
|
+
#'colabfold[alphafold-minus-jax] @ git+https://github.com/sokrypton/ColabFold',
|
|
26
|
+
'jax[cuda12]==0.5.3',
|
|
27
|
+
'numpy==1.26.4',
|
|
28
|
+
'torchdata==0.9.0',
|
|
29
|
+
'pandas',
|
|
30
|
+
'pydantic',
|
|
31
|
+
'packaging',
|
|
32
|
+
'opt_einsum',
|
|
33
|
+
'torch-geometric'
|
|
34
|
+
],
|
|
35
|
+
entry_points={'console_scripts': ['PPIFold=PPIFold.PPIFold:main',],}
|
|
36
|
+
)
|