RP3Net 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/pypi-publish.yml +1 -1
  2. {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/python-app.yml +1 -1
  3. {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/testpypi-publish.yml +1 -1
  4. rp3net-0.0.2/Dockerfile +26 -0
  5. {rp3net-0.0.1 → rp3net-0.0.2}/PKG-INFO +18 -2
  6. {rp3net-0.0.1 → rp3net-0.0.2}/README.md +16 -0
  7. {rp3net-0.0.1 → rp3net-0.0.2}/pyproject.toml +2 -2
  8. rp3net-0.0.2/rp3_colab.ipynb +221 -0
  9. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/PKG-INFO +18 -2
  10. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/SOURCES.txt +2 -0
  11. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/requires.txt +1 -1
  12. {rp3net-0.0.1 → rp3net-0.0.2}/.github/workflows/python-app-training.yml +0 -0
  13. {rp3net-0.0.1 → rp3net-0.0.2}/.gitignore +0 -0
  14. {rp3net-0.0.1 → rp3net-0.0.2}/.vscode/launch.json +0 -0
  15. {rp3net-0.0.1 → rp3net-0.0.2}/.vscode/settings.json +0 -0
  16. {rp3net-0.0.1 → rp3net-0.0.2}/LICENSE +0 -0
  17. {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_a.yml +0 -0
  18. {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_b.yml +0 -0
  19. {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_d.yml +0 -0
  20. {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_ebi_test.yml +0 -0
  21. {rp3net-0.0.1 → rp3net-0.0.2}/config/trainer_mac.yml +0 -0
  22. {rp3net-0.0.1 → rp3net-0.0.2}/setup.cfg +0 -0
  23. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/__init__.py +0 -0
  24. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/config.json +0 -0
  25. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/special_tokens_map.json +0 -0
  26. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/tokenizer_config.json +0 -0
  27. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/fm_cfg/esm2_650m/vocab.txt +0 -0
  28. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/__init__.py +0 -0
  29. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/layers.py +0 -0
  30. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/model/model.py +0 -0
  31. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/rp3_main.py +0 -0
  32. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/rp3_train.py +0 -0
  33. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/__init__.py +0 -0
  34. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/cli.py +0 -0
  35. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/data.py +0 -0
  36. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/data_emlc.py +0 -0
  37. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/lm.py +0 -0
  38. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/lm_emlc.py +0 -0
  39. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/training/metrics.py +0 -0
  40. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/__init__.py +0 -0
  41. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/fasta.py +0 -0
  42. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/torch.py +0 -0
  43. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net/util/util.py +0 -0
  44. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/dependency_links.txt +0 -0
  45. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/entry_points.txt +0 -0
  46. {rp3net-0.0.1 → rp3net-0.0.2}/src/RP3Net.egg-info/top_level.txt +0 -0
  47. {rp3net-0.0.1 → rp3net-0.0.2}/tests/__init__.py +0 -0
  48. {rp3net-0.0.1 → rp3net-0.0.2}/tests/rp3_test.py +0 -0
  49. {rp3net-0.0.1 → rp3net-0.0.2}/tests/split_test.py +0 -0
  50. {rp3net-0.0.1 → rp3net-0.0.2}/tests_ebi/__init__.py +0 -0
  51. {rp3net-0.0.1 → rp3net-0.0.2}/tests_ebi/rp3_ebi_test.py +0 -0
  52. {rp3net-0.0.1 → rp3net-0.0.2}/tests_training/__init__.py +0 -0
  53. {rp3net-0.0.1 → rp3net-0.0.2}/tests_training/rp3_public_checkpoint_test.py +0 -0
@@ -1,4 +1,4 @@
1
- name: Publish Python distribution to PyPI and TestPyPI
1
+ name: Publish Python distribution to PyPI
2
2
 
3
3
  on:
4
4
  push:
@@ -1,7 +1,7 @@
1
1
  # This workflow will install Python dependencies, run tests and lint with a single version of Python
2
2
  # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3
3
 
4
- name: Build and test RP3Net without training dependencies
4
+ name: CI
5
5
 
6
6
  on:
7
7
  push:
@@ -1,4 +1,4 @@
1
- name: Publish Python distribution to PyPI and TestPyPI
1
+ name: Publish Python distribution to TestPyPI
2
2
 
3
3
  on:
4
4
  push:
@@ -0,0 +1,26 @@
1
+ # docker build --platform linux/amd64 -t rp3net:test_1 .
2
+ FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime
3
+
4
+ RUN apt-get update && \
5
+ apt-get install -y git wget && \
6
+ mkdir -p /rp3/python && \
7
+ cd /rp3 && \
8
+ python -m venv --clear --system-site-packages python && \
9
+ /rp3/python/bin/pip install --upgrade pip && \
10
+ /rp3/python/bin/pip install RP3Net jupyter && \
11
+ /rp3/python/bin/jupyter server --generate-config && \
12
+ wget -nv -nc https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/rp3net_v0.1_d.ckpt && \
13
+ wget -nv -nc https://raw.githubusercontent.com/RP3Net/RP3Net/refs/heads/main/rp3_colab.ipynb
14
+
15
+ # /root/.jupyter/jupyter_server_config.py is generarated by `jupyter server --generate-config`
16
+ RUN <<EOT cat >> /root/.jupyter/jupyter_server_config.py
17
+ c.ServerApp.ip = '0.0.0.0'
18
+ c.ServerApp.open_browser = False
19
+ c.ServerApp.allow_root = True
20
+ c.ExtensionApp.open_browser = False
21
+ c.IdentityProvider.token = ''
22
+ EOT
23
+
24
+ ENV PATH="/rp3/python/bin:$PATH"
25
+ WORKDIR /rp3
26
+ EXPOSE 8888
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RP3Net
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: RP3Net: Modelling of recombinant soluble protein production in E. coli
5
5
  Maintainer-email: Evgeny Tankhilevich <evgeny@ebi.ac.uk>
6
6
  License: MIT
@@ -14,7 +14,7 @@ Requires-Dist: torch==2.5.*
14
14
  Requires-Dist: pandas
15
15
  Requires-Dist: transformers==4.46.*
16
16
  Requires-Dist: ml_collections
17
- Requires-Dist: peft
17
+ Requires-Dist: peft<0.18
18
18
  Requires-Dist: tqdm
19
19
  Provides-Extra: training
20
20
  Requires-Dist: lightning[pytorch-extra]==2.4.*; extra == "training"
@@ -23,8 +23,24 @@ Requires-Dist: wandb; extra == "training"
23
23
  Dynamic: license-file
24
24
 
25
25
  # RP3Net
26
+
27
+ [![CI](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml/badge.svg)](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
28
+ [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
29
+ [![DOI:10.1101/2025.05.13.652824](http://img.shields.io/badge/DOI-10.1101/2021.01.08.425840-B31B1B.svg)](https://doi.org/10.1101/2025.05.13.652824)
30
+ [![PyPI - Version](https://img.shields.io/pypi/v/RP3Net)](https://pypi.org/project/RP3Net/)
31
+
26
32
  RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
27
33
 
34
+ # Try it out
35
+ The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
36
+
37
+ ## Docker
38
+ Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
39
+ ```
40
+ docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
41
+ ```
42
+ To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
43
+
28
44
  # Checkpoints
29
45
  * https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
30
46
 
@@ -1,6 +1,22 @@
1
1
  # RP3Net
2
+
3
+ [![CI](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml/badge.svg)](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
4
+ [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
5
+ [![DOI:10.1101/2025.05.13.652824](http://img.shields.io/badge/DOI-10.1101/2021.01.08.425840-B31B1B.svg)](https://doi.org/10.1101/2025.05.13.652824)
6
+ [![PyPI - Version](https://img.shields.io/pypi/v/RP3Net)](https://pypi.org/project/RP3Net/)
7
+
2
8
  RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
3
9
 
10
+ # Try it out
11
+ The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
12
+
13
+ ## Docker
14
+ Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
15
+ ```
16
+ docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
17
+ ```
18
+ To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
19
+
4
20
  # Checkpoints
5
21
  * https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
6
22
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "RP3Net"
7
- version = "0.0.1"
7
+ version = "0.0.2"
8
8
  description = "RP3Net: Modelling of recombinant soluble protein production in E. coli"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10,<4.0"
@@ -22,7 +22,7 @@ dependencies = [
22
22
  "pandas",
23
23
  "transformers==4.46.*",
24
24
  "ml_collections",
25
- "peft",
25
+ "peft<0.18",
26
26
  "tqdm"
27
27
  ]
28
28
 
@@ -0,0 +1,221 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "id": "48f4e51a",
7
+ "metadata": {
8
+ "id": "48f4e51a"
9
+ },
10
+ "outputs": [],
11
+ "source": [
12
+ "#@title Enter the construct sequences in FASTA format and hit `Runtime` -> `Run all`\n",
13
+ "batch_size = 4 # @param {\"type\":\"integer\"}\n",
14
+ "input_fasta = \"\"\"\n",
15
+ ">CONSTR_000001\n",
16
+ "MTVFFVTRLVKKHDKLSKQQIEDFAEKLMTILFETYRSHWHSDCPSKGQAFRCIRINNNQ\n",
17
+ "NKDPILERACVESNVDFSHLGLPKEMTIWVDPFEVCCRYGEKNHPFTVASFKGRWEEWEL\n",
18
+ "YQQISYAVSRASSDVSSGTSCDEESCGSHHHHHH\n",
19
+ ">CONSTR_000002\n",
20
+ "MDYTKPLEHPPVKRNEEAQVHDKLNSGMVSNMEGTAGGERPSVVNGDSGKSGGVGDPREP\n",
21
+ "LGCLQEGSGCHPTTESFEKSVREDASPLPHVCCCKQDALILQRGLHHEDGSQHIGLLHPG\n",
22
+ "DRGPDHEYVLVEEAECGSHHHHHH\n",
23
+ ">CONSTR_000003\n",
24
+ "MHHHHHHENLYFQGSLEVRGQLQSALLILGEPKEGGMPMNISIMPSSLQMKTPEGCTEIQ\n",
25
+ "LPAEVRLVPSSCRGLQFVVGDGLHLRLQTQAKLGTKLISMFNQSSQTQE\n",
26
+ ">CONSTR_000004\n",
27
+ "MECPEGQLPISSENDSTPTVSTSEVTSQQEPQILVDRGSETTYESSADIAGDEGTQIPAD\n",
28
+ "EDTQTDADSSAQAAAQAPENFQEGKDMSESQDEVPDEVENGSHHHHHH\n",
29
+ ">CONSTR_000005\n",
30
+ "MSTAPSEDIWKKFELVPSPPTSPPWGLGPGAGDPAPGIGPPEPWPGGCTGDEAESRGHSK\n",
31
+ "GWGRNYASIIRRDCMWSGFSARERLERAVSDRLAPGAPRGNPPKASAAPDCTPSLEAGNP\n",
32
+ "APAAPCPLGEPKTQACSGSESPSDSENEEIDVVTVEKRQSLGIRKPVTITVRADPLDPCM\n",
33
+ "KHFHGSHHHHHH\n",
34
+ ">CONSTR_000006\n",
35
+ "MEKARHETFAAEMRQNDKIMCILENRKKRDRKNLCRAINDFQQSFQKPETRREFDLSDPL\n",
36
+ "ALKKDLPARQSDNDVRNTISGMQGSHHHHHH\n",
37
+ ">CONSTR_000007\n",
38
+ "MLMKKAYELSVLCDCEIALIIFNSANRLFQYASTDMDRVLLKYTEYSEPHESRTNTDILE\n",
39
+ "TLKRRGIGLDGPELEPDEGPEEPGEKFRRLAGEGGDPGSHHHHHH\n",
40
+ ">CONSTR_000008\n",
41
+ "MPTESASCSTARQTKQKRKSHSLSIRRTNSSEQERTGLPRDMLEGQDSKLPSSVRSTLLE\n",
42
+ "LFGQIEREFENLYIENLELRREIDTLNERLAAEGQAIDGAELSKGQLKTKASHSTSQLSQ\n",
43
+ "KLKTTYKASTSKIVSSFKTTTSRAACQLVKEYIGHRDGIWDVSVAKTQPVVLGTASADHT\n",
44
+ "ALLWSIETGKCLVKYAGHVGSVNSIKFHPSEQLALTASGDQTAHIWRYAVQLPTPQPVAD\n",
45
+ "TSISGEDEVECSDKDEPDLDGDVSSDCPTIRVPLTSLKSHQGVVIASDWLVGGKQAVTAS\n",
46
+ "WDRTANLYDVETSELVHSLTGHDQELTHCCTHPTQRLVVTSSRDTTFRLWDFRDPSIHSV\n",
47
+ "NVFQGHTDTVTSAVFTVGDNVVSGSDDRTVKVWDLKNMRSPIATIRTDSAINRINVCVGQ\n",
48
+ "KIIALPHDNRQVRLFDMSGVRLARLPRSSRQGHRRMVCCSAWSEDHPVCNLFTCGFDRQA\n",
49
+ "IGWNINIPALLQEKGSHHHHHH\n",
50
+ ">CONSTR_000009\n",
51
+ "MHHHHHHENLYFQGSPTESASCSTARQTKQKRKSHSLSIRRTNSSEQERTGLPRDMLEGQ\n",
52
+ "DSKLPSSVRSTLLELFGQIEREFENLYIENLELRREIDTLNERLAAEGQAIDGAELSKGQ\n",
53
+ "LKTKASHSTSQLSQKLKTTYKASTSKIVSSFKTTTSRAACQLVKEYIGHRDGIWDVSVAK\n",
54
+ "TQPVVLGTASADHTALLWSIETGKCLVKYAGHVGSVNSIKFHPSEQLALTASGDQTAHIW\n",
55
+ "RYAVQLPTPQPVADTSISGEDEVECSDKDEPDLDGDVSSDCPTIRVPLTSLKSHQGVVIA\n",
56
+ "SDWLVGGKQAVTASWDRTANLYDVETSELVHSLTGHDQELTHCCTHPTQRLVVTSSRDTT\n",
57
+ "FRLWDFRDPSIHSVNVFQGHTDTVTSAVFTVGDNVVSGSDDRTVKVWDLKNMRSPIATIR\n",
58
+ "TDSAINRINVCVGQKIIALPHDNRQVRLFDMSGVRLARLPRSSRQGHRRMVCCSAWSEDH\n",
59
+ "PVCNLFTCGFDRQAIGWNINIPALLQEK\n",
60
+ ">CONSTR_000010\n",
61
+ "MRDEIATTVFFVTRLVKKHDKLSKQQIEDFAEKLMTILFETYRSHWHSDCPSKGQAFRCI\n",
62
+ "RINNNQNKDPILERACVESNVDFSHLGLPKEMTIWVDPFEVCCRYGEKNHPFTVASFKGR\n",
63
+ "WEEWELYQQISYAVSRASSDVSSGTSCDEESCSKEPRVIPKVSNPKSIYQVENLKQPFQS\n",
64
+ "WLQIPRKKNVVDGRVGLLGNTYHGSQKHPKCYRPAMHRLDRILGSHHHHHH\n",
65
+ "\"\"\""
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": null,
71
+ "id": "b5e9d39a",
72
+ "metadata": {
73
+ "cellView": "form",
74
+ "collapsed": true,
75
+ "id": "b5e9d39a"
76
+ },
77
+ "outputs": [],
78
+ "source": [
79
+ "#@title Install the dependencies and download the checkpoint\n",
80
+ "%%bash\n",
81
+ "\n",
82
+ "set -e\n",
83
+ "\n",
84
+ "pip install RP3Net 'torchvision==0.20.1'\n",
85
+ "wget -nv -nc https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/rp3net_v0.1_d.ckpt"
86
+ ]
87
+ },
88
+ {
89
+ "cell_type": "code",
90
+ "execution_count": 5,
91
+ "id": "15ec2247",
92
+ "metadata": {
93
+ "cellView": "form",
94
+ "id": "15ec2247"
95
+ },
96
+ "outputs": [],
97
+ "source": [
98
+ "#@title Imports\n",
99
+ "import re\n",
100
+ "import io\n",
101
+ "import pandas as pd\n",
102
+ "import RP3Net as rp3\n",
103
+ "from tqdm.notebook import tqdm\n",
104
+ "RE_FASTA_HEADER = re.compile(r'^>([\\w\\-.:#*]+)') # https://www.ncbi.nlm.nih.gov/genbank/fastaformat/"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 6,
110
+ "id": "c5d31cfc",
111
+ "metadata": {
112
+ "cellView": "form",
113
+ "id": "c5d31cfc"
114
+ },
115
+ "outputs": [],
116
+ "source": [
117
+ "#@title Helper functions\n",
118
+ "def iter_fasta(io):\n",
119
+ " fasta_id, sequence = None, None\n",
120
+ " for line in io:\n",
121
+ " line = line.strip()\n",
122
+ " if len(line) == 0:\n",
123
+ " continue\n",
124
+ " m = RE_FASTA_HEADER.match(line)\n",
125
+ " if m:\n",
126
+ " if fasta_id is not None:\n",
127
+ " yield fasta_id, ''.join(sequence)\n",
128
+ " sequence = []\n",
129
+ " fasta_id = m.group(1)\n",
130
+ " else:\n",
131
+ " sequence.append(line)\n",
132
+ " if fasta_id is not None:\n",
133
+ " yield fasta_id, ''.join(sequence)\n",
134
+ "\n",
135
+ "def parse_fasta(s):\n",
136
+ " return {id: seq for id, seq in iter_fasta(io.StringIO(s))}\n",
137
+ "\n",
138
+ "def batches():\n",
139
+ " fasta_map = parse_fasta(input_fasta)\n",
140
+ " fasta_keys = list(fasta_map.keys())\n",
141
+ " r = tqdm(range(0, len(fasta_map), batch_size), desc='RP3Net Inference')\n",
142
+ " for i in r:\n",
143
+ " yield {k: fasta_map[k] for k in fasta_keys[i:i + batch_size]}"
144
+ ]
145
+ },
146
+ {
147
+ "cell_type": "code",
148
+ "execution_count": 7,
149
+ "id": "b9721c04",
150
+ "metadata": {
151
+ "id": "b9721c04"
152
+ },
153
+ "outputs": [],
154
+ "source": [
155
+ "#@title Load the model\n",
156
+ "m = rp3.load_model(rp3.RP3_DEFAULT_CONFIG, 'rp3net_v0.1_d.ckpt')\n"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "id": "z1-PoDkqoVSc",
163
+ "metadata": {
164
+ "id": "z1-PoDkqoVSc",
165
+ "outputId": "5a788b91-2bdf-419f-c524-2c70cd56c2af"
166
+ },
167
+ "outputs": [],
168
+ "source": [
169
+ "#@title Run the prediction on GPU\n",
170
+ "m = m.to(device='cuda')\n",
171
+ "scores_map = dict()\n",
172
+ "for b in batches():\n",
173
+ " scores_map |= m.predict(b, device='cuda')\n"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 11,
179
+ "id": "BvfWdEhXpiGJ",
180
+ "metadata": {
181
+ "colab": {
182
+ "base_uri": "https://localhost:8080/"
183
+ },
184
+ "id": "BvfWdEhXpiGJ",
185
+ "outputId": "f062f810-c709-4b00-c566-1b5b232f2585"
186
+ },
187
+ "outputs": [
188
+ {
189
+ "name": "stdout",
190
+ "output_type": "stream",
191
+ "text": [
192
+ " id score\n",
193
+ "0 CONSTR_000001 0.691543\n",
194
+ "1 CONSTR_000002 0.971137\n",
195
+ "2 CONSTR_000003 0.931065\n",
196
+ "3 CONSTR_000004 0.972745\n",
197
+ "4 CONSTR_000005 0.928140\n",
198
+ "5 CONSTR_000006 0.977404\n",
199
+ "6 CONSTR_000007 0.744749\n",
200
+ "7 CONSTR_000008 0.009805\n",
201
+ "8 CONSTR_000009 0.009679\n",
202
+ "9 CONSTR_000010 0.433345\n"
203
+ ]
204
+ }
205
+ ],
206
+ "source": [
207
+ "#@title Print and save the results\n",
208
+ "df = pd.DataFrame([[id, score] for (id, score) in scores_map.items()], columns=['id', 'score'])\n",
209
+ "print(df)\n",
210
+ "df.to_csv(\"rp3_scores.csv\", index=False)"
211
+ ]
212
+ }
213
+ ],
214
+ "metadata": {
215
+ "language_info": {
216
+ "name": "python"
217
+ }
218
+ },
219
+ "nbformat": 4,
220
+ "nbformat_minor": 5
221
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RP3Net
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: RP3Net: Modelling of recombinant soluble protein production in E. coli
5
5
  Maintainer-email: Evgeny Tankhilevich <evgeny@ebi.ac.uk>
6
6
  License: MIT
@@ -14,7 +14,7 @@ Requires-Dist: torch==2.5.*
14
14
  Requires-Dist: pandas
15
15
  Requires-Dist: transformers==4.46.*
16
16
  Requires-Dist: ml_collections
17
- Requires-Dist: peft
17
+ Requires-Dist: peft<0.18
18
18
  Requires-Dist: tqdm
19
19
  Provides-Extra: training
20
20
  Requires-Dist: lightning[pytorch-extra]==2.4.*; extra == "training"
@@ -23,8 +23,24 @@ Requires-Dist: wandb; extra == "training"
23
23
  Dynamic: license-file
24
24
 
25
25
  # RP3Net
26
+
27
+ [![CI](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml/badge.svg)](https://github.com/RP3Net/RP3Net/actions/workflows/python-app.yml)
28
+ [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb)
29
+ [![DOI:10.1101/2025.05.13.652824](http://img.shields.io/badge/DOI-10.1101/2021.01.08.425840-B31B1B.svg)](https://doi.org/10.1101/2025.05.13.652824)
30
+ [![PyPI - Version](https://img.shields.io/pypi/v/RP3Net)](https://pypi.org/project/RP3Net/)
31
+
26
32
  RP3Net is an AI model for predicting the results of recombinant small-scale protein production in _E. coli_ from the construct sequence. See [the preprint](https://www.biorxiv.org/content/10.1101/2025.05.13.652824v1) and [supplements](https://ftp.ebi.ac.uk/pub/software/RP3Net/) for more details on how it works.
27
33
 
34
+ # Try it out
35
+ The simplest way to run the model inference is to open the [Colab notebook](https://colab.research.google.com/github/RP3Net/RP3Net/blob/main/rp3_colab.ipynb), paste the sequeces in FASTA format into the first cell and hit `Runtime -> Run All`. Use a runtime with a GPU to speed things up.
36
+
37
+ ## Docker
38
+ Another way to try out the model without disclosing the sequences is via the [Docker image](https://hub.docker.com/r/rp3net/rp3net): `docker pull rp3net/rp3net`. The image contains the binary installation of the package, the checkpoint, a Jupyter server and the notebook. It supports CUDA. To run the docker contaier from the command line, using mounts to send the data in and out:
39
+ ```
40
+ docker run -v /path/to/my/files:/mnt/rp3 rp3net/rp3net rp3 -p rp3net_v0.1_d.ckpt -f /mnt/rp3/sequences.fasta.gz -o /mnt/rp3/scores.csv.gz --log_file /dev/null
41
+ ```
42
+ To interact with the container via the notebook, run `docker run -p 8888:8888 rp3net/rp3net jupyter lab`, open http://localhost:8888 in the browser and open `rp3_colab.ipynb`. There is no need to download the checkpoint and install the dependencies.
43
+
28
44
  # Checkpoints
29
45
  * https://ftp.ebi.ac.uk/pub/software/RP3Net/v0.1/checkpoints/
30
46
 
@@ -1,7 +1,9 @@
1
1
  .gitignore
2
+ Dockerfile
2
3
  LICENSE
3
4
  README.md
4
5
  pyproject.toml
6
+ rp3_colab.ipynb
5
7
  .github/workflows/pypi-publish.yml
6
8
  .github/workflows/python-app-training.yml
7
9
  .github/workflows/python-app.yml
@@ -2,7 +2,7 @@ torch==2.5.*
2
2
  pandas
3
3
  transformers==4.46.*
4
4
  ml_collections
5
- peft
5
+ peft<0.18
6
6
  tqdm
7
7
 
8
8
  [training]
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes