RP3Net 0.0.1__tar.gz → 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/pypi-publish.yml +1 -1
- {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/python-app.yml +1 -1
- {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/testpypi-publish.yml +1 -1
- rp3net-0.0.2/Dockerfile +26 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/PKG-INFO +18 -2
- {rp3net-0.0.1 → rp3net-0.0.2}/README.md +16 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/pyproject.toml +2 -2
- rp3net-0.0.2/rp3_colab.ipynb +221 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/PKG-INFO +18 -2
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/SOURCES.txt +2 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/requires.txt +1 -1
- {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/python-app-training.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/.gitignore +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/.vscode/launch.json +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/.vscode/settings.json +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/LICENSE +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_a.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_b.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_d.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_ebi_test.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_mac.yml +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/setup.cfg +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/config.json +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/special_tokens_map.json +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/tokenizer_config.json +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/vocab.txt +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/layers.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/model.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/rp3_main.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/rp3_train.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/cli.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/data.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/data_emlc.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/lm.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/lm_emlc.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/metrics.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/fasta.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/torch.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/util.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/dependency_links.txt +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/entry_points.txt +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/top_level.txt +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests/rp3_test.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests/split_test.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests_ebi/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests_ebi/rp3_ebi_test.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests_training/__init__.py +0 -0
- {rp3net-0.0.1 → rp3net-0.0.2}/tests_training/rp3_public_checkpoint_test.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# This workflow will install Python dependencies, run tests and lint with a single version of Python
|
|
2
2
|
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
3
3
|
|
|
4
|
-
name:
|
|
4
|
+
name: CI
|
|
5
5
|
|
|
6
6
|
on:
|
|
7
7
|
push:
|
rp3net-0.0.2/Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# docker build --platform linux/amd64 -t rp3net:test_1 .
|
|
2
|
+
FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
|
|
3
|
+
|
|
4
|
+
RUN apt-get update && \
|
|
5
|
+
apt-get install -y git wget && \
|
|
6
|
+
mkdir -p /rp3/python && \
|
|
7
|
+
cd /rp3 && \
|
|
8
|
+
python -m venv --clear --system-site-packages python && \
|
|
9
|
+
/rp3/python/bin/pip install --upgrade pip && \
|
|
10
|
+
/rp3/python/bin/pip install RP3Net jupyter && \
|
|
11
|
+
/rp3/python/bin/jupyter server --generate-config && \
|
|
12
|
+
wget -nv -nc https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/rp3net_v0.1_d.ckpt && \
|
|
13
|
+
wget -nv -nc https://raw.githubusercontent.com/RP3Net/RP3Net/refs/heads/main/rp3_colab.ipynb
|
|
14
|
+
|
|
15
|
+
# /root/.jupyter/jupyter_server_config.py is generarated by `jupyter server --generate-config`
|
|
16
|
+
RUN <<EOT cat >> /root/.jupyter/jupyter_server_config.py
|
|
17
|
+
c.ServerApp.ip = '0.0.0.0'
|
|
18
|
+
c.ServerApp.open_browser = False
|
|
19
|
+
c.ServerApp.allow_root = True
|
|
20
|
+
c.ExtensionApp.open_browser = False
|
|
21
|
+
c.IdentityProvider.token = ''
|
|
22
|
+
EOT
|
|
23
|
+
|
|
24
|
+
ENV PATH="/rp3/python/bin:$PATH"
|
|
25
|
+
WORKDIR /rp3
|
|
26
|
+
EXPOSE 8888
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: RP3Net
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: RP3Net: Modelling of recombinant soluble protein production in E. coli
|
|
5
5
|
Maintainer-email: Evgeny Tankhilevich <evgeny@ebi.ac.uk>
|
|
6
6
|
License: MIT
|
|
@@ -14,7 +14,7 @@ Requires-Dist: torch==2.5.*
|
|
|
14
14
|
Requires-Dist: pandas
|
|
15
15
|
Requires-Dist: transformers==4.46.*
|
|
16
16
|
Requires-Dist: ml_collections
|
|
17
|
-
Requires-Dist: peft
|
|
17
|
+
Requires-Dist: peft<0.18
|
|
18
18
|
Requires-Dist: tqdm
|
|
19
19
|
Provides-Extra: training
|
|
20
20
|
Requires-Dist: lightning[pytorch-extra]==2.4.*; extra == "training"
|
|
@@ -23,8 +23,24 @@ Requires-Dist: wandb; extra == "training"
|
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
25
|
# RP3Net
|
|
26
|
+
|
|
27
|
+
[](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
|
|
28
|
+
[](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
|
|
29
|
+
[](https://doi.org/10.1101/2025.05.13.652824)
|
|
30
|
+
[](https://pypi.org/project/RP3Net/)
|
|
31
|
+
|
|
26
32
|
RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
|
|
27
33
|
|
|
34
|
+
# Try it out
|
|
35
|
+
The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
|
|
36
|
+
|
|
37
|
+
## Docker
|
|
38
|
+
Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
|
|
39
|
+
```
|
|
40
|
+
docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
|
|
41
|
+
```
|
|
42
|
+
To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
|
|
43
|
+
|
|
28
44
|
# Checkpoints
|
|
29
45
|
* https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
|
|
30
46
|
|
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
# RP3Net
|
|
2
|
+
|
|
3
|
+
[](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
|
|
4
|
+
[](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
|
|
5
|
+
[](https://doi.org/10.1101/2025.05.13.652824)
|
|
6
|
+
[](https://pypi.org/project/RP3Net/)
|
|
7
|
+
|
|
2
8
|
RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
|
|
3
9
|
|
|
10
|
+
# Try it out
|
|
11
|
+
The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
|
|
12
|
+
|
|
13
|
+
## Docker
|
|
14
|
+
Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
|
|
15
|
+
```
|
|
16
|
+
docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
|
|
17
|
+
```
|
|
18
|
+
To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
|
|
19
|
+
|
|
4
20
|
# Checkpoints
|
|
5
21
|
* https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
|
|
6
22
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "RP3Net"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.2"
|
|
8
8
|
description = "RP3Net: Modelling of recombinant soluble protein production in E. coli"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10,<4.0"
|
|
@@ -22,7 +22,7 @@ dependencies = [
|
|
|
22
22
|
"pandas",
|
|
23
23
|
"transformers==4.46.*",
|
|
24
24
|
"ml_collections",
|
|
25
|
-
"peft",
|
|
25
|
+
"peft<0.18",
|
|
26
26
|
"tqdm"
|
|
27
27
|
]
|
|
28
28
|
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": 9,
|
|
6
|
+
"id": "48f4e51a",
|
|
7
|
+
"metadata": {
|
|
8
|
+
"id": "48f4e51a"
|
|
9
|
+
},
|
|
10
|
+
"outputs": [],
|
|
11
|
+
"source": [
|
|
12
|
+
"#@title Enter the construct sequences in FASTA format and hit `Runtime` -> `Run all`\n",
|
|
13
|
+
"batch_size = 4 # @param {\"type\":\"integer\"}\n",
|
|
14
|
+
"input_fasta = \"\"\"\n",
|
|
15
|
+
">CONSTR_000001\n",
|
|
16
|
+
"MTVFFVTRLVKKHDKLSKQQIEDFAEKLMTILFETYRSHWHSDCPSKGQAFRCIRINNNQ\n",
|
|
17
|
+
"NKDPILERACVESNVDFSHLGLPKEMTIWVDPFEVCCRYGEKNHPFTVASFKGRWEEWEL\n",
|
|
18
|
+
"YQQISYAVSRASSDVSSGTSCDEESCGSHHHHHH\n",
|
|
19
|
+
">CONSTR_000002\n",
|
|
20
|
+
"MDYTKPLEHPPVKRNEEAQVHDKLNSGMVSNMEGTAGGERPSVVNGDSGKSGGVGDPREP\n",
|
|
21
|
+
"LGCLQEGSGCHPTTESFEKSVREDASPLPHVCCCKQDALILQRGLHHEDGSQHIGLLHPG\n",
|
|
22
|
+
"DRGPDHEYVLVEEAECGSHHHHHH\n",
|
|
23
|
+
">CONSTR_000003\n",
|
|
24
|
+
"MHHHHHHENLYFQGSLEVRGQLQSALLILGEPKEGGMPMNISIMPSSLQMKTPEGCTEIQ\n",
|
|
25
|
+
"LPAEVRLVPSSCRGLQFVVGDGLHLRLQTQAKLGTKLISMFNQSSQTQE\n",
|
|
26
|
+
">CONSTR_000004\n",
|
|
27
|
+
"MECPEGQLPISSENDSTPTVSTSEVTSQQEPQILVDRGSETTYESSADIAGDEGTQIPAD\n",
|
|
28
|
+
"EDTQTDADSSAQAAAQAPENFQEGKDMSESQDEVPDEVENGSHHHHHH\n",
|
|
29
|
+
">CONSTR_000005\n",
|
|
30
|
+
"MSTAPSEDIWKKFELVPSPPTSPPWGLGPGAGDPAPGIGPPEPWPGGCTGDEAESRGHSK\n",
|
|
31
|
+
"GWGRNYASIIRRDCMWSGFSARERLERAVSDRLAPGAPRGNPPKASAAPDCTPSLEAGNP\n",
|
|
32
|
+
"APAAPCPLGEPKTQACSGSESPSDSENEEIDVVTVEKRQSLGIRKPVTITVRADPLDPCM\n",
|
|
33
|
+
"KHFHGSHHHHHH\n",
|
|
34
|
+
">CONSTR_000006\n",
|
|
35
|
+
"MEKARHETFAAEMRQNDKIMCILENRKKRDRKNLCRAINDFQQSFQKPETRREFDLSDPL\n",
|
|
36
|
+
"ALKKDLPARQSDNDVRNTISGMQGSHHHHHH\n",
|
|
37
|
+
">CONSTR_000007\n",
|
|
38
|
+
"MLMKKAYELSVLCDCEIALIIFNSANRLFQYASTDMDRVLLKYTEYSEPHESRTNTDILE\n",
|
|
39
|
+
"TLKRRGIGLDGPELEPDEGPEEPGEKFRRLAGEGGDPGSHHHHHH\n",
|
|
40
|
+
">CONSTR_000008\n",
|
|
41
|
+
"MPTESASCSTARQTKQKRKSHSLSIRRTNSSEQERTGLPRDMLEGQDSKLPSSVRSTLLE\n",
|
|
42
|
+
"LFGQIEREFENLYIENLELRREIDTLNERLAAEGQAIDGAELSKGQLKTKASHSTSQLSQ\n",
|
|
43
|
+
"KLKTTYKASTSKIVSSFKTTTSRAACQLVKEYIGHRDGIWDVSVAKTQPVVLGTASADHT\n",
|
|
44
|
+
"ALLWSIETGKCLVKYAGHVGSVNSIKFHPSEQLALTASGDQTAHIWRYAVQLPTPQPVAD\n",
|
|
45
|
+
"TSISGEDEVECSDKDEPDLDGDVSSDCPTIRVPLTSLKSHQGVVIASDWLVGGKQAVTAS\n",
|
|
46
|
+
"WDRTANLYDVETSELVHSLTGHDQELTHCCTHPTQRLVVTSSRDTTFRLWDFRDPSIHSV\n",
|
|
47
|
+
"NVFQGHTDTVTSAVFTVGDNVVSGSDDRTVKVWDLKNMRSPIATIRTDSAINRINVCVGQ\n",
|
|
48
|
+
"KIIALPHDNRQVRLFDMSGVRLARLPRSSRQGHRRMVCCSAWSEDHPVCNLFTCGFDRQA\n",
|
|
49
|
+
"IGWNINIPALLQEKGSHHHHHH\n",
|
|
50
|
+
">CONSTR_000009\n",
|
|
51
|
+
"MHHHHHHENLYFQGSPTESASCSTARQTKQKRKSHSLSIRRTNSSEQERTGLPRDMLEGQ\n",
|
|
52
|
+
"DSKLPSSVRSTLLELFGQIEREFENLYIENLELRREIDTLNERLAAEGQAIDGAELSKGQ\n",
|
|
53
|
+
"LKTKASHSTSQLSQKLKTTYKASTSKIVSSFKTTTSRAACQLVKEYIGHRDGIWDVSVAK\n",
|
|
54
|
+
"TQPVVLGTASADHTALLWSIETGKCLVKYAGHVGSVNSIKFHPSEQLALTASGDQTAHIW\n",
|
|
55
|
+
"RYAVQLPTPQPVADTSISGEDEVECSDKDEPDLDGDVSSDCPTIRVPLTSLKSHQGVVIA\n",
|
|
56
|
+
"SDWLVGGKQAVTASWDRTANLYDVETSELVHSLTGHDQELTHCCTHPTQRLVVTSSRDTT\n",
|
|
57
|
+
"FRLWDFRDPSIHSVNVFQGHTDTVTSAVFTVGDNVVSGSDDRTVKVWDLKNMRSPIATIR\n",
|
|
58
|
+
"TDSAINRINVCVGQKIIALPHDNRQVRLFDMSGVRLARLPRSSRQGHRRMVCCSAWSEDH\n",
|
|
59
|
+
"PVCNLFTCGFDRQAIGWNINIPALLQEK\n",
|
|
60
|
+
">CONSTR_000010\n",
|
|
61
|
+
"MRDEIATTVFFVTRLVKKHDKLSKQQIEDFAEKLMTILFETYRSHWHSDCPSKGQAFRCI\n",
|
|
62
|
+
"RINNNQNKDPILERACVESNVDFSHLGLPKEMTIWVDPFEVCCRYGEKNHPFTVASFKGR\n",
|
|
63
|
+
"WEEWELYQQISYAVSRASSDVSSGTSCDEESCSKEPRVIPKVSNPKSIYQVENLKQPFQS\n",
|
|
64
|
+
"WLQIPRKKNVVDGRVGLLGNTYHGSQKHPKCYRPAMHRLDRILGSHHHHHH\n",
|
|
65
|
+
"\"\"\""
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"cell_type": "code",
|
|
70
|
+
"execution_count": null,
|
|
71
|
+
"id": "b5e9d39a",
|
|
72
|
+
"metadata": {
|
|
73
|
+
"cellView": "form",
|
|
74
|
+
"collapsed": true,
|
|
75
|
+
"id": "b5e9d39a"
|
|
76
|
+
},
|
|
77
|
+
"outputs": [],
|
|
78
|
+
"source": [
|
|
79
|
+
"#@title Install the dependencies and download the checkpoint\n",
|
|
80
|
+
"%%bash\n",
|
|
81
|
+
"\n",
|
|
82
|
+
"set -e\n",
|
|
83
|
+
"\n",
|
|
84
|
+
"pip install RP3Net 'torchvision==0.20.1'\n",
|
|
85
|
+
"wget -nv -nc https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/rp3net_v0.1_d.ckpt"
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"cell_type": "code",
|
|
90
|
+
"execution_count": 5,
|
|
91
|
+
"id": "15ec2247",
|
|
92
|
+
"metadata": {
|
|
93
|
+
"cellView": "form",
|
|
94
|
+
"id": "15ec2247"
|
|
95
|
+
},
|
|
96
|
+
"outputs": [],
|
|
97
|
+
"source": [
|
|
98
|
+
"#@title Imports\n",
|
|
99
|
+
"import re\n",
|
|
100
|
+
"import io\n",
|
|
101
|
+
"import pandas as pd\n",
|
|
102
|
+
"import RP3Net as rp3\n",
|
|
103
|
+
"from tqdm.notebook import tqdm\n",
|
|
104
|
+
"RE_FASTA_HEADER = re.compile(r'^>([\\w\\-.:#*]+)') # https://www.ncbi.nlm.nih.gov/genbank/fastaformat/"
|
|
105
|
+
]
|
|
106
|
+
},
|
|
107
|
+
{
|
|
108
|
+
"cell_type": "code",
|
|
109
|
+
"execution_count": 6,
|
|
110
|
+
"id": "c5d31cfc",
|
|
111
|
+
"metadata": {
|
|
112
|
+
"cellView": "form",
|
|
113
|
+
"id": "c5d31cfc"
|
|
114
|
+
},
|
|
115
|
+
"outputs": [],
|
|
116
|
+
"source": [
|
|
117
|
+
"#@title Helper functions\n",
|
|
118
|
+
"def iter_fasta(io):\n",
|
|
119
|
+
" fasta_id, sequence = None, None\n",
|
|
120
|
+
" for line in io:\n",
|
|
121
|
+
" line = line.strip()\n",
|
|
122
|
+
" if len(line) == 0:\n",
|
|
123
|
+
" continue\n",
|
|
124
|
+
" m = RE_FASTA_HEADER.match(line)\n",
|
|
125
|
+
" if m:\n",
|
|
126
|
+
" if fasta_id is not None:\n",
|
|
127
|
+
" yield fasta_id, ''.join(sequence)\n",
|
|
128
|
+
" sequence = []\n",
|
|
129
|
+
" fasta_id = m.group(1)\n",
|
|
130
|
+
" else:\n",
|
|
131
|
+
" sequence.append(line)\n",
|
|
132
|
+
" if fasta_id is not None:\n",
|
|
133
|
+
" yield fasta_id, ''.join(sequence)\n",
|
|
134
|
+
"\n",
|
|
135
|
+
"def parse_fasta(s):\n",
|
|
136
|
+
" return {id: seq for id, seq in iter_fasta(io.StringIO(s))}\n",
|
|
137
|
+
"\n",
|
|
138
|
+
"def batches():\n",
|
|
139
|
+
" fasta_map = parse_fasta(input_fasta)\n",
|
|
140
|
+
" fasta_keys = list(fasta_map.keys())\n",
|
|
141
|
+
" r = tqdm(range(0, len(fasta_map), batch_size), desc='RP3Net Inference')\n",
|
|
142
|
+
" for i in r:\n",
|
|
143
|
+
" yield {k: fasta_map[k] for k in fasta_keys[i:i + batch_size]}"
|
|
144
|
+
]
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"cell_type": "code",
|
|
148
|
+
"execution_count": 7,
|
|
149
|
+
"id": "b9721c04",
|
|
150
|
+
"metadata": {
|
|
151
|
+
"id": "b9721c04"
|
|
152
|
+
},
|
|
153
|
+
"outputs": [],
|
|
154
|
+
"source": [
|
|
155
|
+
"#@title Load the model\n",
|
|
156
|
+
"m = rp3.load_model(rp3.RP3_DEFAULT_CONFIG, 'rp3net_v0.1_d.ckpt')\n"
|
|
157
|
+
]
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
"cell_type": "code",
|
|
161
|
+
"execution_count": null,
|
|
162
|
+
"id": "z1-PoDkqoVSc",
|
|
163
|
+
"metadata": {
|
|
164
|
+
"id": "z1-PoDkqoVSc",
|
|
165
|
+
"outputId": "5a788b91-2bdf-419f-c524-2c70cd56c2af"
|
|
166
|
+
},
|
|
167
|
+
"outputs": [],
|
|
168
|
+
"source": [
|
|
169
|
+
"#@title Run the prediction on GPU\n",
|
|
170
|
+
"m = m.to(device='cuda')\n",
|
|
171
|
+
"scores_map = dict()\n",
|
|
172
|
+
"for b in batches():\n",
|
|
173
|
+
" scores_map |= m.predict(b, device='cuda')\n"
|
|
174
|
+
]
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
"cell_type": "code",
|
|
178
|
+
"execution_count": 11,
|
|
179
|
+
"id": "BvfWdEhXpiGJ",
|
|
180
|
+
"metadata": {
|
|
181
|
+
"colab": {
|
|
182
|
+
"base_uri": "https://localhost:8080/"
|
|
183
|
+
},
|
|
184
|
+
"id": "BvfWdEhXpiGJ",
|
|
185
|
+
"outputId": "f062f810-c709-4b00-c566-1b5b232f2585"
|
|
186
|
+
},
|
|
187
|
+
"outputs": [
|
|
188
|
+
{
|
|
189
|
+
"name": "stdout",
|
|
190
|
+
"output_type": "stream",
|
|
191
|
+
"text": [
|
|
192
|
+
" id score\n",
|
|
193
|
+
"0 CONSTR_000001 0.691543\n",
|
|
194
|
+
"1 CONSTR_000002 0.971137\n",
|
|
195
|
+
"2 CONSTR_000003 0.931065\n",
|
|
196
|
+
"3 CONSTR_000004 0.972745\n",
|
|
197
|
+
"4 CONSTR_000005 0.928140\n",
|
|
198
|
+
"5 CONSTR_000006 0.977404\n",
|
|
199
|
+
"6 CONSTR_000007 0.744749\n",
|
|
200
|
+
"7 CONSTR_000008 0.009805\n",
|
|
201
|
+
"8 CONSTR_000009 0.009679\n",
|
|
202
|
+
"9 CONSTR_000010 0.433345\n"
|
|
203
|
+
]
|
|
204
|
+
}
|
|
205
|
+
],
|
|
206
|
+
"source": [
|
|
207
|
+
"#@title Print and save the results\n",
|
|
208
|
+
"df = pd.DataFrame([[id, score] for (id, score) in scores_map.items()], columns=['id', 'score'])\n",
|
|
209
|
+
"print(df)\n",
|
|
210
|
+
"df.to_csv(\"rp3_scores.csv\", index=False)"
|
|
211
|
+
]
|
|
212
|
+
}
|
|
213
|
+
],
|
|
214
|
+
"metadata": {
|
|
215
|
+
"language_info": {
|
|
216
|
+
"name": "python"
|
|
217
|
+
}
|
|
218
|
+
},
|
|
219
|
+
"nbformat": 4,
|
|
220
|
+
"nbformat_minor": 5
|
|
221
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: RP3Net
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: RP3Net: Modelling of recombinant soluble protein production in E. coli
|
|
5
5
|
Maintainer-email: Evgeny Tankhilevich <evgeny@ebi.ac.uk>
|
|
6
6
|
License: MIT
|
|
@@ -14,7 +14,7 @@ Requires-Dist: torch==2.5.*
|
|
|
14
14
|
Requires-Dist: pandas
|
|
15
15
|
Requires-Dist: transformers==4.46.*
|
|
16
16
|
Requires-Dist: ml_collections
|
|
17
|
-
Requires-Dist: peft
|
|
17
|
+
Requires-Dist: peft<0.18
|
|
18
18
|
Requires-Dist: tqdm
|
|
19
19
|
Provides-Extra: training
|
|
20
20
|
Requires-Dist: lightning[pytorch-extra]==2.4.*; extra == "training"
|
|
@@ -23,8 +23,24 @@ Requires-Dist: wandb; extra == "training"
|
|
|
23
23
|
Dynamic: license-file
|
|
24
24
|
|
|
25
25
|
# RP3Net
|
|
26
|
+
|
|
27
|
+
[](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
|
|
28
|
+
[](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
|
|
29
|
+
[](https://doi.org/10.1101/2025.05.13.652824)
|
|
30
|
+
[](https://pypi.org/project/RP3Net/)
|
|
31
|
+
|
|
26
32
|
RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
|
|
27
33
|
|
|
34
|
+
# Try it out
|
|
35
|
+
The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
|
|
36
|
+
|
|
37
|
+
## Docker
|
|
38
|
+
Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
|
|
39
|
+
```
|
|
40
|
+
docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
|
|
41
|
+
```
|
|
42
|
+
To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
|
|
43
|
+
|
|
28
44
|
# Checkpoints
|
|
29
45
|
* https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
|
|
30
46
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|