biointerface 0.2.3__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biointerface-0.2.3 → biointerface-0.3.1}/HISTORY.rst +25 -11
- {biointerface-0.2.3 → biointerface-0.3.1}/PKG-INFO +15 -7
- {biointerface-0.2.3 → biointerface-0.3.1}/README.rst +2 -3
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/usage.rst +34 -3
- {biointerface-0.2.3 → biointerface-0.3.1}/pyproject.toml +54 -43
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface/__init__.py +1 -1
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface/core.py +127 -52
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface.egg-info/PKG-INFO +15 -7
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface.egg-info/SOURCES.txt +3 -1
- biointerface-0.3.1/src/biointerface.egg-info/requires.txt +18 -0
- biointerface-0.3.1/tests/data/gattaca.cif +547 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/tests/test_biointerface.py +1 -0
- biointerface-0.3.1/tests/test_core.py +105 -0
- biointerface-0.2.3/src/biointerface.egg-info/requires.txt +0 -9
- {biointerface-0.2.3 → biointerface-0.3.1}/AUTHORS.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/CONTRIBUTING.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/LICENSE +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/MANIFEST.in +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/Makefile +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/_build/html/_static/file.png +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/_build/html/_static/minus.png +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/_build/html/_static/plus.png +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/authors.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/biointerface.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/conf.py +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/contributing.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/history.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/index.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/installation.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/make.bat +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/modules.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/docs/readme.rst +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/setup.cfg +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface.egg-info/dependency_links.txt +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/src/biointerface.egg-info/top_level.txt +0 -0
- {biointerface-0.2.3 → biointerface-0.3.1}/tests/__init__.py +0 -0
|
@@ -2,28 +2,32 @@
|
|
|
2
2
|
History
|
|
3
3
|
=======
|
|
4
4
|
|
|
5
|
-
0.1
|
|
5
|
+
0.3.1 (2025-03-05)
|
|
6
6
|
------------------
|
|
7
7
|
|
|
8
|
-
*
|
|
8
|
+
* Proper tests
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
0.
|
|
11
|
+
0.3.0 (2025-03-05)
|
|
12
12
|
------------------
|
|
13
13
|
|
|
14
|
-
*
|
|
14
|
+
* Feature: get all continous protein-bound double-strand nucleic acids;
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
0.2.
|
|
17
|
+
0.2.3 (2025-03-05)
|
|
18
18
|
------------------
|
|
19
19
|
|
|
20
|
-
*
|
|
20
|
+
* fix: bump-my-version was updating pdbnucleicacids version too
|
|
21
|
+
|
|
22
|
+
* fix: import classes instead of modules
|
|
23
|
+
|
|
24
|
+
* chore: remove old tox environments
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
0.2.2 (2025-03-04)
|
|
24
28
|
------------------
|
|
25
29
|
|
|
26
|
-
* Interface methods to get residues
|
|
30
|
+
* Interface getter methods to get residues
|
|
27
31
|
|
|
28
32
|
* Interface raise error condition in case of no protein in the structure
|
|
29
33
|
|
|
@@ -32,11 +36,21 @@ History
|
|
|
32
36
|
* Full documentation
|
|
33
37
|
|
|
34
38
|
|
|
35
|
-
0.2.
|
|
39
|
+
0.2.1 (2025-03-1)
|
|
36
40
|
------------------
|
|
37
41
|
|
|
38
|
-
*
|
|
42
|
+
* Gitlab CI
|
|
39
43
|
|
|
40
|
-
* fix: import classes instead of modules
|
|
41
44
|
|
|
42
|
-
|
|
45
|
+
0.2.0 (2025-02-28)
|
|
46
|
+
------------------
|
|
47
|
+
|
|
48
|
+
* Features: Interface class, with getter methods for atoms, dataframes etc
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
0.1.0 (2025-02-08)
|
|
52
|
+
------------------
|
|
53
|
+
|
|
54
|
+
* First release on PyPI.
|
|
55
|
+
|
|
56
|
+
* Feature: interface as pandas dataframe
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: biointerface
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures.
|
|
5
5
|
Author-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
6
6
|
Maintainer-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
@@ -14,12 +14,21 @@ License-File: LICENSE
|
|
|
14
14
|
License-File: AUTHORS.rst
|
|
15
15
|
Requires-Dist: pandas
|
|
16
16
|
Requires-Dist: biopython
|
|
17
|
-
Requires-Dist: pdbnucleicacids>=0.2.
|
|
17
|
+
Requires-Dist: pdbnucleicacids>=0.2.2
|
|
18
18
|
Provides-Extra: dev
|
|
19
|
-
Requires-Dist:
|
|
19
|
+
Requires-Dist: spyder-kernels; extra == "dev"
|
|
20
|
+
Requires-Dist: flake8; extra == "dev"
|
|
21
|
+
Requires-Dist: ruff; extra == "dev"
|
|
20
22
|
Requires-Dist: mypy; extra == "dev"
|
|
21
23
|
Requires-Dist: pytest; extra == "dev"
|
|
22
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: tox; extra == "dev"
|
|
25
|
+
Requires-Dist: coverage; extra == "dev"
|
|
26
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
27
|
+
Requires-Dist: watchdog; extra == "dev"
|
|
28
|
+
Requires-Dist: bump-my-version; extra == "dev"
|
|
29
|
+
Requires-Dist: wheel; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
23
32
|
|
|
24
33
|
============
|
|
25
34
|
BioInterface
|
|
@@ -101,13 +110,12 @@ Feaures
|
|
|
101
110
|
|
|
102
111
|
* Interface data as ``pandas`` DataFrame;
|
|
103
112
|
|
|
113
|
+
* Get all continous protein-bound double-strand nucleic acids;
|
|
114
|
+
|
|
104
115
|
|
|
105
116
|
TODO
|
|
106
117
|
--------
|
|
107
118
|
|
|
108
|
-
* Extract continous bound DNA sequence
|
|
109
|
-
|
|
110
|
-
* Proper tests (WIP)
|
|
111
119
|
|
|
112
120
|
|
|
113
121
|
Credits
|
|
@@ -35,7 +35,7 @@ You can extract a single Protein-DNA interface from a single protein chain.
|
|
|
35
35
|
# extract interface from a specific protein chain
|
|
36
36
|
face = Interface(
|
|
37
37
|
structure=structure,
|
|
38
|
-
protein_chain_id="
|
|
38
|
+
protein_chain_id="F",
|
|
39
39
|
search_radius=5.0
|
|
40
40
|
)
|
|
41
41
|
face
|
|
@@ -169,7 +169,7 @@ You can get all Protein-DNA interface features as a ``pandas`` DataFrame.
|
|
|
169
169
|
'prot_atom_coord_z', 'dna_chain_id', 'dna_res_hetfield',
|
|
170
170
|
'dna_res_number', 'dna_res_icode', 'dna_res_name', 'dna_atom_name',
|
|
171
171
|
'dna_atom_altloc', 'dna_atom_element', 'dna_atom_coord_x',
|
|
172
|
-
'dna_atom_coord_y', 'dna_atom_coord_z', '
|
|
172
|
+
'dna_atom_coord_y', 'dna_atom_coord_z', 'euclidean_distance'],
|
|
173
173
|
dtype='object')
|
|
174
174
|
|
|
175
175
|
.. code-block:: python
|
|
@@ -178,7 +178,7 @@ You can get all Protein-DNA interface features as a ``pandas`` DataFrame.
|
|
|
178
178
|
|
|
179
179
|
.. code-block:: console
|
|
180
180
|
|
|
181
|
-
protein_chain_id prot_res_hetfield prot_res_number ...
|
|
181
|
+
protein_chain_id prot_res_hetfield prot_res_number ... euclidean_distance
|
|
182
182
|
0 F 148 ... 4.458498
|
|
183
183
|
1 F 148 ... 3.964944
|
|
184
184
|
2 F 148 ... 4.066739
|
|
@@ -192,3 +192,34 @@ You can get all Protein-DNA interface features as a ``pandas`` DataFrame.
|
|
|
192
192
|
257 F 157 ... 4.299844
|
|
193
193
|
|
|
194
194
|
[258 rows x 23 columns]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
Protein-Bound Nucleic Acids
|
|
198
|
+
---------------------------
|
|
199
|
+
|
|
200
|
+
BioInterface can extract all double-strand nucleic acids bound by
|
|
201
|
+
the input protein, as a ``DoubleStrandNucleicAcid`` class from the package
|
|
202
|
+
PDBNucleicAcids_.
|
|
203
|
+
|
|
204
|
+
.. code-block:: python
|
|
205
|
+
|
|
206
|
+
bound_dsna_list = face.get_bound_double_strands()
|
|
207
|
+
bound_dsna = dsna_list[0]
|
|
208
|
+
bound_dsna
|
|
209
|
+
|
|
210
|
+
.. code-block:: console
|
|
211
|
+
|
|
212
|
+
<DoubleStrandNucleicAcid type='dsDNA' i-th strand='A' j-th strand='B'
|
|
213
|
+
length=9>
|
|
214
|
+
|
|
215
|
+
The ``DoubleStrandNucleicAcid`` class has other useful methods.
|
|
216
|
+
|
|
217
|
+
.. code-block:: python
|
|
218
|
+
|
|
219
|
+
bound_dsna.get_i_strand().get_seq()
|
|
220
|
+
|
|
221
|
+
.. code-block:: console
|
|
222
|
+
|
|
223
|
+
Seq('GTTTCATAG')
|
|
224
|
+
|
|
225
|
+
.. _PDBNucleicAcids: https://gitlab.com/MorfeoRenai/pdbnucleicacids
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "biointerface"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures."
|
|
9
9
|
readme = "README.rst"
|
|
10
10
|
authors = [
|
|
@@ -20,15 +20,24 @@ license = {text = "MIT license"}
|
|
|
20
20
|
dependencies = [
|
|
21
21
|
"pandas",
|
|
22
22
|
"biopython",
|
|
23
|
-
"pdbnucleicacids >= 0.2.
|
|
23
|
+
"pdbnucleicacids >= 0.2.2",
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
[project.optional-dependencies]
|
|
27
27
|
dev = [
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
28
|
+
"spyder-kernels",
|
|
29
|
+
"flake8", # lint
|
|
30
|
+
"ruff", # lint & format
|
|
31
|
+
"mypy", # type check
|
|
32
|
+
"pytest", # test
|
|
33
|
+
"tox",
|
|
34
|
+
"coverage", # test
|
|
35
|
+
"sphinx", # docs
|
|
36
|
+
"watchdog", # docs
|
|
37
|
+
"bump-my-version", # tags & version
|
|
38
|
+
"wheel", # build
|
|
39
|
+
"build", # build
|
|
40
|
+
"twine", # deploy
|
|
32
41
|
]
|
|
33
42
|
|
|
34
43
|
[project.urls]
|
|
@@ -72,41 +81,6 @@ testpaths = [
|
|
|
72
81
|
]
|
|
73
82
|
|
|
74
83
|
|
|
75
|
-
# bump-my-version
|
|
76
|
-
# ----
|
|
77
|
-
|
|
78
|
-
[tool.bumpversion]
|
|
79
|
-
current_version = "0.2.3"
|
|
80
|
-
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
81
|
-
serialize = ["{major}.{minor}.{patch}"]
|
|
82
|
-
search = "{current_version}"
|
|
83
|
-
replace = "{new_version}"
|
|
84
|
-
regex = false
|
|
85
|
-
ignore_missing_version = false
|
|
86
|
-
ignore_missing_files = false
|
|
87
|
-
tag = true
|
|
88
|
-
sign_tags = false
|
|
89
|
-
tag_name = "v{new_version}"
|
|
90
|
-
tag_message = "Bump version: {current_version} → {new_version}"
|
|
91
|
-
allow_dirty = false
|
|
92
|
-
commit = true
|
|
93
|
-
message = "Bump version: {current_version} → {new_version}"
|
|
94
|
-
commit_args = ""
|
|
95
|
-
setup_hooks = []
|
|
96
|
-
pre_commit_hooks = []
|
|
97
|
-
post_commit_hooks = []
|
|
98
|
-
|
|
99
|
-
[[tool.bumpversion.files]]
|
|
100
|
-
filename = "src/biointerface/__init__.py"
|
|
101
|
-
search = "{current_version}"
|
|
102
|
-
replace = "{new_version}"
|
|
103
|
-
|
|
104
|
-
[[tool.bumpversion.files]]
|
|
105
|
-
filename = "pyproject.toml"
|
|
106
|
-
search = 'version = "{current_version}"'
|
|
107
|
-
replace = 'version = "{new_version}"'
|
|
108
|
-
|
|
109
|
-
|
|
110
84
|
# ruff
|
|
111
85
|
# ----
|
|
112
86
|
|
|
@@ -134,8 +108,11 @@ envlist = ["py310", "py311", "py312", "py313"]
|
|
|
134
108
|
[tool.tox.env_run_base] # Configurazione generale per tutti gli ambienti di test
|
|
135
109
|
description = "Run the tests with pytest"
|
|
136
110
|
deps = [
|
|
137
|
-
|
|
138
|
-
"
|
|
111
|
+
"flake8", # lint
|
|
112
|
+
"ruff", # lint & format
|
|
113
|
+
"mypy", # type check
|
|
114
|
+
"pytest", # test
|
|
115
|
+
#"-r requirements-dev.txt" # Se hai un file requirements per lo sviluppo
|
|
139
116
|
]
|
|
140
117
|
commands = [["pytest"]]
|
|
141
118
|
|
|
@@ -162,3 +139,37 @@ description = "Run the tests on Python 3.13"
|
|
|
162
139
|
#deps = ["flake8"]
|
|
163
140
|
#commands = ["flake8 src tests"]
|
|
164
141
|
|
|
142
|
+
|
|
143
|
+
# bump-my-version
|
|
144
|
+
# ----
|
|
145
|
+
|
|
146
|
+
[tool.bumpversion]
|
|
147
|
+
current_version = "0.3.1"
|
|
148
|
+
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
149
|
+
serialize = ["{major}.{minor}.{patch}"]
|
|
150
|
+
search = "{current_version}"
|
|
151
|
+
replace = "{new_version}"
|
|
152
|
+
regex = false
|
|
153
|
+
ignore_missing_version = false
|
|
154
|
+
ignore_missing_files = false
|
|
155
|
+
tag = true
|
|
156
|
+
sign_tags = false
|
|
157
|
+
tag_name = "v{new_version}"
|
|
158
|
+
tag_message = "Bump version: {current_version} → {new_version}"
|
|
159
|
+
allow_dirty = false
|
|
160
|
+
commit = true
|
|
161
|
+
message = "Bump version: {current_version} → {new_version}"
|
|
162
|
+
commit_args = ""
|
|
163
|
+
setup_hooks = []
|
|
164
|
+
pre_commit_hooks = []
|
|
165
|
+
post_commit_hooks = []
|
|
166
|
+
|
|
167
|
+
[[tool.bumpversion.files]]
|
|
168
|
+
filename = "src/biointerface/__init__.py"
|
|
169
|
+
search = "{current_version}"
|
|
170
|
+
replace = "{new_version}"
|
|
171
|
+
|
|
172
|
+
[[tool.bumpversion.files]]
|
|
173
|
+
filename = "pyproject.toml"
|
|
174
|
+
search = 'version = "{current_version}"'
|
|
175
|
+
replace = 'version = "{new_version}"'
|
|
@@ -13,53 +13,15 @@ from Bio.PDB.PDBExceptions import PDBConstructionException
|
|
|
13
13
|
|
|
14
14
|
import pandas as pd
|
|
15
15
|
|
|
16
|
-
from PDBNucleicAcids.NucleicAcid import
|
|
16
|
+
from PDBNucleicAcids.NucleicAcid import (
|
|
17
|
+
NABuilder,
|
|
18
|
+
DSNABuilder,
|
|
19
|
+
DoubleStrandNucleicAcid,
|
|
20
|
+
)
|
|
17
21
|
|
|
22
|
+
import copy
|
|
18
23
|
|
|
19
|
-
|
|
20
|
-
"""
|
|
21
|
-
Extract all Protein-DNA interfaces found in a structure.
|
|
22
|
-
|
|
23
|
-
Parameters
|
|
24
|
-
----------
|
|
25
|
-
structure : Bio.PDB.Structure
|
|
26
|
-
Biopython Structure entity.
|
|
27
|
-
search_radius : float | int, optional
|
|
28
|
-
Search radius, measured in Armstrong, within which Protein-DNA
|
|
29
|
-
interactions are found. Default is 5.0
|
|
30
|
-
|
|
31
|
-
Returns
|
|
32
|
-
-------
|
|
33
|
-
list
|
|
34
|
-
List of all Protein-DNA interfaces found in a structure.
|
|
35
|
-
|
|
36
|
-
"""
|
|
37
|
-
# build nucleic acids
|
|
38
|
-
builder = NABuilder()
|
|
39
|
-
na_list = builder.build_nucleic_acids(structure)
|
|
40
|
-
if not na_list:
|
|
41
|
-
return []
|
|
42
|
-
|
|
43
|
-
# dna_chain_ids = list({na.get_chain_id() for na in na_list})
|
|
44
|
-
|
|
45
|
-
# build peptides
|
|
46
|
-
builder = PPBuilder()
|
|
47
|
-
pp_list = builder.build_peptides(structure)
|
|
48
|
-
if not pp_list:
|
|
49
|
-
return []
|
|
50
|
-
|
|
51
|
-
prot_chain_ids = list({pp[0].parent.id for pp in pp_list})
|
|
52
|
-
|
|
53
|
-
face_list = []
|
|
54
|
-
for prot_chain_id in prot_chain_ids:
|
|
55
|
-
face = Interface(
|
|
56
|
-
structure=structure,
|
|
57
|
-
protein_chain_id=prot_chain_id,
|
|
58
|
-
search_radius=search_radius,
|
|
59
|
-
)
|
|
60
|
-
face_list.append(face)
|
|
61
|
-
|
|
62
|
-
return face_list
|
|
24
|
+
import warnings
|
|
63
25
|
|
|
64
26
|
|
|
65
27
|
class Interface:
|
|
@@ -90,7 +52,7 @@ class Interface:
|
|
|
90
52
|
{atom.parent.parent.id for atom in dna_atoms}
|
|
91
53
|
)
|
|
92
54
|
|
|
93
|
-
def __repr__(self):
|
|
55
|
+
def __repr__(self) -> str:
|
|
94
56
|
"""Return string representation of the nucleic acid."""
|
|
95
57
|
return f"<Interface chains={self.protein_chain_id}:\
|
|
96
58
|
{''.join(self._dna_chain_ids)} contacts={len(self.contacts)} search_radius=\
|
|
@@ -98,7 +60,7 @@ class Interface:
|
|
|
98
60
|
|
|
99
61
|
def _extract_contacts(self) -> list[tuple[Atom]]:
|
|
100
62
|
"""
|
|
101
|
-
Extract interface contacts.
|
|
63
|
+
Extract interface contacts (PRIVATE).
|
|
102
64
|
|
|
103
65
|
Raises
|
|
104
66
|
------
|
|
@@ -262,7 +224,7 @@ chain id: {self.protein_chain_id}"
|
|
|
262
224
|
Atom element
|
|
263
225
|
Atomic coordinates (x, y, z)
|
|
264
226
|
From both protein and DNA atoms
|
|
265
|
-
Euclidean distance
|
|
227
|
+
Euclidean distance between atom pair in contact
|
|
266
228
|
|
|
267
229
|
Returns
|
|
268
230
|
-------
|
|
@@ -273,7 +235,6 @@ chain id: {self.protein_chain_id}"
|
|
|
273
235
|
data = []
|
|
274
236
|
|
|
275
237
|
for na_atom, prot_atom in self.contacts:
|
|
276
|
-
|
|
277
238
|
prot_res_hetfield = prot_atom.parent.id[0]
|
|
278
239
|
prot_res_number = prot_atom.parent.id[1]
|
|
279
240
|
prot_res_icode = prot_atom.parent.id[2]
|
|
@@ -297,7 +258,7 @@ chain id: {self.protein_chain_id}"
|
|
|
297
258
|
dna_atom_coord_y = na_atom.coord[1]
|
|
298
259
|
dna_atom_coord_z = na_atom.coord[2]
|
|
299
260
|
|
|
300
|
-
|
|
261
|
+
euclidean_distance = na_atom - prot_atom
|
|
301
262
|
|
|
302
263
|
row = (
|
|
303
264
|
self.protein_chain_id,
|
|
@@ -322,7 +283,7 @@ chain id: {self.protein_chain_id}"
|
|
|
322
283
|
dna_atom_coord_x,
|
|
323
284
|
dna_atom_coord_y,
|
|
324
285
|
dna_atom_coord_z,
|
|
325
|
-
|
|
286
|
+
euclidean_distance,
|
|
326
287
|
)
|
|
327
288
|
|
|
328
289
|
data.append(row)
|
|
@@ -352,12 +313,126 @@ chain id: {self.protein_chain_id}"
|
|
|
352
313
|
"dna_atom_coord_x",
|
|
353
314
|
"dna_atom_coord_y",
|
|
354
315
|
"dna_atom_coord_z",
|
|
355
|
-
"
|
|
316
|
+
"euclidean_distance",
|
|
356
317
|
],
|
|
357
318
|
)
|
|
358
319
|
|
|
359
320
|
return df
|
|
360
321
|
|
|
322
|
+
def get_bound_double_strands(self) -> list[DoubleStrandNucleicAcid]:
|
|
323
|
+
"""
|
|
324
|
+
Get all double-strand nucleic acids bound by the protein.
|
|
325
|
+
|
|
326
|
+
The output double stranded nucleic acids (DSNAs) are subsequences
|
|
327
|
+
of the full DSNAs found in the structure,
|
|
328
|
+
since proteins usually do not bind the whole DSNA.
|
|
329
|
+
|
|
330
|
+
This method allows for "gaps" of unbound base-pairs inside the
|
|
331
|
+
DSNA, only the base pairs at the ends are trimmed accourding
|
|
332
|
+
to being protein-bound or not.
|
|
333
|
+
|
|
334
|
+
A visual example of "gaps":
|
|
335
|
+
``Input full DSNA: GATATACAAGCCA``
|
|
336
|
+
|
|
337
|
+
``Protein-bound: **** **``
|
|
338
|
+
|
|
339
|
+
``Output protein-bound DSNA: TATACAAG``
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
bound_dsna_list : list[DoubleStrandNucleicAcid]
|
|
344
|
+
List of double-strand nucleic acids bound by the protein.
|
|
345
|
+
|
|
346
|
+
"""
|
|
347
|
+
bound_nucleotides = self.get_nucleotides()
|
|
348
|
+
|
|
349
|
+
builder = DSNABuilder()
|
|
350
|
+
dsna_list = builder.build_double_strands(self.structure)
|
|
351
|
+
bound_dsna_list = []
|
|
352
|
+
for dsna in dsna_list:
|
|
353
|
+
bound_dsna = copy.copy(dsna)
|
|
354
|
+
while (
|
|
355
|
+
bound_dsna[0].i_res not in bound_nucleotides
|
|
356
|
+
and bound_dsna[0].j_res not in bound_nucleotides
|
|
357
|
+
):
|
|
358
|
+
# if the FIRST base pair isn't bound by protein
|
|
359
|
+
# then discard it and check the next FIRST base pair
|
|
360
|
+
bound_dsna.pop(0)
|
|
361
|
+
|
|
362
|
+
while (
|
|
363
|
+
bound_dsna[-1].i_res not in bound_nucleotides
|
|
364
|
+
and bound_dsna[-1].j_res not in bound_nucleotides
|
|
365
|
+
):
|
|
366
|
+
# if the LAST base pair isn't bound by protein
|
|
367
|
+
# then discard it and check the next LAST base pair
|
|
368
|
+
bound_dsna.pop(-1)
|
|
369
|
+
|
|
370
|
+
if len(bound_dsna) > 0:
|
|
371
|
+
# in this case, there is an actual bound DSNA
|
|
372
|
+
bound_dsna_list.append(bound_dsna)
|
|
373
|
+
|
|
374
|
+
for bp in bound_dsna:
|
|
375
|
+
if (
|
|
376
|
+
bp.i_res not in bound_nucleotides
|
|
377
|
+
and bp.j_res not in bound_nucleotides
|
|
378
|
+
):
|
|
379
|
+
warnings.warn(
|
|
380
|
+
f"Warning: there are unbound base-pairs \
|
|
381
|
+
inside the resulting DoubleStrandNucleicAcid - {bp}"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return bound_dsna_list
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def build_interfaces(structure, search_radius=5.0) -> list[Interface]:
|
|
388
|
+
"""
|
|
389
|
+
Extract all Protein-DNA interfaces found in a structure.
|
|
390
|
+
|
|
391
|
+
Parameters
|
|
392
|
+
----------
|
|
393
|
+
structure : Bio.PDB.Structure
|
|
394
|
+
Biopython Structure entity.
|
|
395
|
+
search_radius : float | int, optional
|
|
396
|
+
Search radius, measured in Armstrong, within which Protein-DNA
|
|
397
|
+
interactions are found. Default is 5.0
|
|
398
|
+
|
|
399
|
+
Returns
|
|
400
|
+
-------
|
|
401
|
+
list
|
|
402
|
+
List of all Protein-DNA interfaces found in a structure.
|
|
403
|
+
|
|
404
|
+
"""
|
|
405
|
+
# build nucleic acids
|
|
406
|
+
builder = NABuilder()
|
|
407
|
+
na_list = builder.build_nucleic_acids(structure)
|
|
408
|
+
if not na_list:
|
|
409
|
+
return []
|
|
410
|
+
|
|
411
|
+
# dna_chain_ids = list({na.get_chain_id() for na in na_list})
|
|
412
|
+
|
|
413
|
+
# build peptides
|
|
414
|
+
builder = PPBuilder()
|
|
415
|
+
pp_list = builder.build_peptides(structure)
|
|
416
|
+
if not pp_list:
|
|
417
|
+
return []
|
|
418
|
+
|
|
419
|
+
prot_chain_ids = list({pp[0].parent.id for pp in pp_list})
|
|
420
|
+
|
|
421
|
+
face_list = []
|
|
422
|
+
for prot_chain_id in prot_chain_ids:
|
|
423
|
+
# extract interface
|
|
424
|
+
face = Interface(
|
|
425
|
+
structure=structure,
|
|
426
|
+
protein_chain_id=prot_chain_id,
|
|
427
|
+
search_radius=search_radius,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# skip empty interfaces
|
|
431
|
+
if len(face.get_atomic_contacts()) > 0:
|
|
432
|
+
face_list.append(face)
|
|
433
|
+
|
|
434
|
+
return face_list
|
|
435
|
+
|
|
361
436
|
|
|
362
437
|
# def export_atom_list(structure_id, atom_list, out_filepath):
|
|
363
438
|
# """Export atom list."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: biointerface
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures.
|
|
5
5
|
Author-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
6
6
|
Maintainer-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
@@ -14,12 +14,21 @@ License-File: LICENSE
|
|
|
14
14
|
License-File: AUTHORS.rst
|
|
15
15
|
Requires-Dist: pandas
|
|
16
16
|
Requires-Dist: biopython
|
|
17
|
-
Requires-Dist: pdbnucleicacids>=0.2.
|
|
17
|
+
Requires-Dist: pdbnucleicacids>=0.2.2
|
|
18
18
|
Provides-Extra: dev
|
|
19
|
-
Requires-Dist:
|
|
19
|
+
Requires-Dist: spyder-kernels; extra == "dev"
|
|
20
|
+
Requires-Dist: flake8; extra == "dev"
|
|
21
|
+
Requires-Dist: ruff; extra == "dev"
|
|
20
22
|
Requires-Dist: mypy; extra == "dev"
|
|
21
23
|
Requires-Dist: pytest; extra == "dev"
|
|
22
|
-
Requires-Dist:
|
|
24
|
+
Requires-Dist: tox; extra == "dev"
|
|
25
|
+
Requires-Dist: coverage; extra == "dev"
|
|
26
|
+
Requires-Dist: sphinx; extra == "dev"
|
|
27
|
+
Requires-Dist: watchdog; extra == "dev"
|
|
28
|
+
Requires-Dist: bump-my-version; extra == "dev"
|
|
29
|
+
Requires-Dist: wheel; extra == "dev"
|
|
30
|
+
Requires-Dist: build; extra == "dev"
|
|
31
|
+
Requires-Dist: twine; extra == "dev"
|
|
23
32
|
|
|
24
33
|
============
|
|
25
34
|
BioInterface
|
|
@@ -101,13 +110,12 @@ Feaures
|
|
|
101
110
|
|
|
102
111
|
* Interface data as ``pandas`` DataFrame;
|
|
103
112
|
|
|
113
|
+
* Get all continous protein-bound double-strand nucleic acids;
|
|
114
|
+
|
|
104
115
|
|
|
105
116
|
TODO
|
|
106
117
|
--------
|
|
107
118
|
|
|
108
|
-
* Extract continous bound DNA sequence
|
|
109
|
-
|
|
110
|
-
* Proper tests (WIP)
|
|
111
119
|
|
|
112
120
|
|
|
113
121
|
Credits
|