biointerface 0.3.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {biointerface-0.3.2 → biointerface-0.4.0}/CONTRIBUTING.rst +1 -1
- {biointerface-0.3.2 → biointerface-0.4.0}/HISTORY.rst +7 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/PKG-INFO +3 -6
- {biointerface-0.3.2 → biointerface-0.4.0}/README.rst +0 -4
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/installation.rst +2 -9
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/usage.rst +1 -1
- {biointerface-0.3.2 → biointerface-0.4.0}/pyproject.toml +2 -2
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface/__init__.py +1 -1
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface/core.py +74 -75
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface.egg-info/PKG-INFO +3 -6
- {biointerface-0.3.2 → biointerface-0.4.0}/tests/test_core.py +2 -2
- {biointerface-0.3.2 → biointerface-0.4.0}/AUTHORS.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/LICENSE +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/MANIFEST.in +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/Makefile +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/authors.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/biointerface.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/conf.py +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/contributing.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/history.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/index.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/make.bat +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/modules.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/docs/readme.rst +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/setup.cfg +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface.egg-info/SOURCES.txt +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface.egg-info/dependency_links.txt +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface.egg-info/requires.txt +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/src/biointerface.egg-info/top_level.txt +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/tests/__init__.py +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/tests/data/gattaca.cif +0 -0
- {biointerface-0.3.2 → biointerface-0.4.0}/tests/test_biointerface.py +0 -0
|
@@ -110,7 +110,7 @@ Before you submit a pull request, check that it meets these guidelines:
|
|
|
110
110
|
2. If the pull request adds functionality, the docs should be updated. Put
|
|
111
111
|
your new functionality into a function with a docstring, and add the
|
|
112
112
|
feature to the list in README.rst.
|
|
113
|
-
3. The pull request should work for Python 3.10, 3.11, 3.12 and 3.13
|
|
113
|
+
3. The pull request should work for Python 3.10, 3.11, 3.12 and 3.13
|
|
114
114
|
and for PyPy. Make sure that the tests pass for all supported Python versions.
|
|
115
115
|
|
|
116
116
|
Tips
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: biointerface
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures.
|
|
5
5
|
Author-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
6
6
|
Maintainer-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
@@ -29,6 +29,7 @@ Requires-Dist: bump-my-version; extra == "dev"
|
|
|
29
29
|
Requires-Dist: wheel; extra == "dev"
|
|
30
30
|
Requires-Dist: build; extra == "dev"
|
|
31
31
|
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
32
33
|
|
|
33
34
|
============
|
|
34
35
|
BioInterface
|
|
@@ -46,10 +47,6 @@ BioInterface
|
|
|
46
47
|
:target: https://gitlab.com/MorfeoRenai/biointerface/-/commits/main
|
|
47
48
|
:alt: Coverage Status
|
|
48
49
|
|
|
49
|
-
.. image:: https://pyup.io/repos/github/MorfeoRenai/biointerface/shield.svg
|
|
50
|
-
:target: https://pyup.io/repos/github/MorfeoRenai/biointerface/
|
|
51
|
-
:alt: Updates
|
|
52
|
-
|
|
53
50
|
|
|
54
51
|
BioInterface is a `Biopython <https://biopython.org/>`_ based package that extracts Protein-DNA
|
|
55
52
|
interfaces in a PDB structures.
|
|
@@ -14,10 +14,6 @@ BioInterface
|
|
|
14
14
|
:target: https://gitlab.com/MorfeoRenai/biointerface/-/commits/main
|
|
15
15
|
:alt: Coverage Status
|
|
16
16
|
|
|
17
|
-
.. image:: https://pyup.io/repos/github/MorfeoRenai/biointerface/shield.svg
|
|
18
|
-
:target: https://pyup.io/repos/github/MorfeoRenai/biointerface/
|
|
19
|
-
:alt: Updates
|
|
20
|
-
|
|
21
17
|
|
|
22
18
|
BioInterface is a `Biopython <https://biopython.org/>`_ based package that extracts Protein-DNA
|
|
23
19
|
interfaces in a PDB structures.
|
|
@@ -32,15 +32,9 @@ You can either clone the public repository:
|
|
|
32
32
|
|
|
33
33
|
.. code-block:: console
|
|
34
34
|
|
|
35
|
-
$ git clone git
|
|
35
|
+
$ git clone git@gitlab.com:MorfeoRenai/biointerface.git
|
|
36
36
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
.. code-block:: console
|
|
40
|
-
|
|
41
|
-
$ curl -OJL https://gitlab.com/MorfeoRenai/biointerface/tarball/main
|
|
42
|
-
|
|
43
|
-
Once you have a copy of the source, you can install it with:
|
|
37
|
+
Once you have a copy of the source, go into the directory and you can install it with:
|
|
44
38
|
|
|
45
39
|
.. code-block:: console
|
|
46
40
|
|
|
@@ -48,4 +42,3 @@ Once you have a copy of the source, you can install it with:
|
|
|
48
42
|
|
|
49
43
|
|
|
50
44
|
.. _Gitlab repo: https://gitlab.com/MorfeoRenai/biointerface
|
|
51
|
-
.. _tarball: https://gitlab.com/MorfeoRenai/biointerface/tarball/main
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "biointerface"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures."
|
|
9
9
|
readme = "README.rst"
|
|
10
10
|
authors = [
|
|
@@ -144,7 +144,7 @@ description = "Run the tests on Python 3.13"
|
|
|
144
144
|
# ----
|
|
145
145
|
|
|
146
146
|
[tool.bumpversion]
|
|
147
|
-
current_version = "0.
|
|
147
|
+
current_version = "0.4.0"
|
|
148
148
|
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
|
|
149
149
|
serialize = ["{major}.{minor}.{patch}"]
|
|
150
150
|
search = "{current_version}"
|
|
@@ -6,18 +6,11 @@ from Bio.PDB.Residue import Residue
|
|
|
6
6
|
from Bio.PDB.Atom import Atom
|
|
7
7
|
from Bio.PDB.PDBExceptions import PDBConstructionException
|
|
8
8
|
|
|
9
|
-
# from Bio.PDB.Structure import Structure
|
|
10
|
-
# from Bio.PDB.Model import Model
|
|
11
|
-
# from Bio.PDB.Chain import Chain
|
|
12
|
-
# from Bio.PDB import MMCIFIO
|
|
13
|
-
|
|
14
9
|
import pandas as pd
|
|
15
10
|
|
|
16
|
-
from PDBNucleicAcids.NucleicAcid import
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
DoubleStrandNucleicAcid,
|
|
20
|
-
)
|
|
11
|
+
from PDBNucleicAcids.NucleicAcid import NABuilder
|
|
12
|
+
from PDBNucleicAcids.NucleicAcid import DSNABuilder
|
|
13
|
+
from PDBNucleicAcids.NucleicAcid import DoubleStrandNucleicAcid
|
|
21
14
|
|
|
22
15
|
import copy
|
|
23
16
|
|
|
@@ -36,11 +29,11 @@ class Interface:
|
|
|
36
29
|
Chain id of a protein that may interact with DNA.
|
|
37
30
|
search_radius : float | int, optional
|
|
38
31
|
Search radius, measured in Armstrong, within which Protein-DNA
|
|
39
|
-
interactions are found. Default is
|
|
32
|
+
interactions are found. Default is 4.0
|
|
40
33
|
|
|
41
34
|
"""
|
|
42
35
|
|
|
43
|
-
def __init__(self, structure, protein_chain_id, search_radius=
|
|
36
|
+
def __init__(self, structure, protein_chain_id, search_radius=4.0) -> None:
|
|
44
37
|
self.structure = structure
|
|
45
38
|
self.protein_chain_id = protein_chain_id
|
|
46
39
|
self.search_radius = search_radius
|
|
@@ -210,7 +203,7 @@ chain id: {self.protein_chain_id}"
|
|
|
210
203
|
"""
|
|
211
204
|
return list({atom_pair[0].parent for atom_pair in self.contacts})
|
|
212
205
|
|
|
213
|
-
def
|
|
206
|
+
def as_dataframe(self) -> pd.DataFrame:
|
|
214
207
|
"""
|
|
215
208
|
Get all data from the interface, as a dataframe.
|
|
216
209
|
|
|
@@ -334,9 +327,9 @@ chain id: {self.protein_chain_id}"
|
|
|
334
327
|
A visual example of "gaps":
|
|
335
328
|
``Input full DSNA: GATATACAAGCCA``
|
|
336
329
|
|
|
337
|
-
``Protein-bound: ****
|
|
330
|
+
``Protein-bound: **** ** ``
|
|
338
331
|
|
|
339
|
-
``Output protein-bound DSNA: TATACAAG``
|
|
332
|
+
``Output protein-bound DSNA: TATACAAG ``
|
|
340
333
|
|
|
341
334
|
Returns
|
|
342
335
|
-------
|
|
@@ -373,20 +366,80 @@ chain id: {self.protein_chain_id}"
|
|
|
373
366
|
# in this case, there is an actual bound DSNA
|
|
374
367
|
bound_dsna_list.append(bound_dsna)
|
|
375
368
|
|
|
369
|
+
unbound_bps = []
|
|
376
370
|
for bp in bound_dsna:
|
|
377
371
|
if (
|
|
378
372
|
bp.i_res not in bound_nucleotides
|
|
379
373
|
and bp.j_res not in bound_nucleotides
|
|
380
374
|
):
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
375
|
+
unbound_bps.append(bp)
|
|
376
|
+
|
|
377
|
+
if unbound_bps:
|
|
378
|
+
warnings.warn(
|
|
379
|
+
f"Warning: There are {len(unbound_bps)} unbound \
|
|
380
|
+
base-pairs inside {bound_dsna} - {unbound_bps}"
|
|
381
|
+
)
|
|
385
382
|
|
|
386
383
|
return bound_dsna_list
|
|
387
384
|
|
|
385
|
+
def fixed_protein_atoms_number(self, num_atoms) -> None:
|
|
386
|
+
"""Filter contacts by a fixed number of protein atoms."""
|
|
387
|
+
# cast list into dataframe, ready to be sorted
|
|
388
|
+
df = pd.DataFrame(self.contacts, columns=["na_atom", "protein_atom"])
|
|
389
|
+
df["euclidean_distance"] = df.apply(
|
|
390
|
+
lambda row: row["na_atom"] - row["protein_atom"], axis=1
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# aggregate: for each atom, its minimum distance from DSNA
|
|
394
|
+
agg = df.groupby(["protein_atom"]).min()
|
|
395
|
+
agg = agg.reset_index()
|
|
396
|
+
agg = agg.sort_values(by="euclidean_distance", ascending=True)
|
|
397
|
+
|
|
398
|
+
# get closest n atoms to DSNA
|
|
399
|
+
top_protein_atoms = agg.head(num_atoms)["protein_atom"].tolist()
|
|
400
|
+
|
|
401
|
+
if len(top_protein_atoms) <= num_atoms:
|
|
402
|
+
raise Exception("Not enough atoms.")
|
|
403
|
+
|
|
404
|
+
# select contacts by top n atoms
|
|
405
|
+
selected_contacts = [
|
|
406
|
+
(na_atom, protein_atom)
|
|
407
|
+
for na_atom, protein_atom in self.contacts
|
|
408
|
+
if protein_atom in top_protein_atoms
|
|
409
|
+
]
|
|
410
|
+
|
|
411
|
+
self.contacts = selected_contacts
|
|
412
|
+
|
|
413
|
+
def fixed_na_atoms_number(self, num_atoms) -> None:
|
|
414
|
+
"""Filter contacts by a fixed number of nucleic acid atoms."""
|
|
415
|
+
# cast list into dataframe, ready to be sorted
|
|
416
|
+
df = pd.DataFrame(self.contacts, columns=["na_atom", "protein_atom"])
|
|
417
|
+
df["euclidean_distance"] = df.apply(
|
|
418
|
+
lambda row: row["na_atom"] - row["protein_atom"], axis=1
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# aggregate: for each atom, its minimum distance from DSNA
|
|
422
|
+
agg = df.groupby(["na_atom"]).min()
|
|
423
|
+
agg = agg.reset_index()
|
|
424
|
+
agg = agg.sort_values(by="euclidean_distance", ascending=True)
|
|
425
|
+
|
|
426
|
+
# get closest n atoms to DSNA
|
|
427
|
+
top_na_atoms = agg.head(num_atoms)["na_atom"].tolist()
|
|
428
|
+
|
|
429
|
+
if len(top_na_atoms) <= num_atoms:
|
|
430
|
+
raise Exception("Not enough atoms.")
|
|
431
|
+
|
|
432
|
+
# select contacts by top n atoms
|
|
433
|
+
selected_contacts = [
|
|
434
|
+
(na_atom, protein_atom)
|
|
435
|
+
for na_atom, protein_atom in self.contacts
|
|
436
|
+
if na_atom in top_na_atoms
|
|
437
|
+
]
|
|
438
|
+
|
|
439
|
+
self.contacts = selected_contacts
|
|
440
|
+
|
|
388
441
|
|
|
389
|
-
def build_interfaces(structure, search_radius=
|
|
442
|
+
def build_interfaces(structure, search_radius=4.0) -> list[Interface]:
|
|
390
443
|
"""
|
|
391
444
|
Extract all Protein-DNA interfaces found in a structure.
|
|
392
445
|
|
|
@@ -396,7 +449,7 @@ def build_interfaces(structure, search_radius=5.0) -> list[Interface]:
|
|
|
396
449
|
Biopython Structure entity.
|
|
397
450
|
search_radius : float | int, optional
|
|
398
451
|
Search radius, measured in Armstrong, within which Protein-DNA
|
|
399
|
-
interactions are found. Default is
|
|
452
|
+
interactions are found. Default is 4.0
|
|
400
453
|
|
|
401
454
|
Returns
|
|
402
455
|
-------
|
|
@@ -434,57 +487,3 @@ def build_interfaces(structure, search_radius=5.0) -> list[Interface]:
|
|
|
434
487
|
face_list.append(face)
|
|
435
488
|
|
|
436
489
|
return face_list
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
# def export_atom_list(structure_id, atom_list, out_filepath):
|
|
440
|
-
# """Export atom list."""
|
|
441
|
-
# # not in Path but in string
|
|
442
|
-
# out_filepath = str(out_filepath)
|
|
443
|
-
|
|
444
|
-
# new_structure = Structure.Structure(structure_id)
|
|
445
|
-
# for atom in atom_list:
|
|
446
|
-
# _add_atom_to_new_structure(atom, new_structure)
|
|
447
|
-
|
|
448
|
-
# # Prepare IO object
|
|
449
|
-
# io = MMCIFIO()
|
|
450
|
-
# io.set_structure(new_structure)
|
|
451
|
-
|
|
452
|
-
# # Esporta la nuova struttura in un file PDB
|
|
453
|
-
# # necessita di string type filepath
|
|
454
|
-
# io.save(out_filepath)
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
# def _add_atom_to_new_structure(atom, new_structure):
|
|
458
|
-
# model_id = (
|
|
459
|
-
# atom.get_parent().get_parent().get_parent().id
|
|
460
|
-
# ) # Ottieni l'ID del modello
|
|
461
|
-
# chain_id = atom.get_parent().get_parent().id # Ottieni l'ID della catena
|
|
462
|
-
# residue_id = atom.get_parent().id # Ottieni l'ID del residuo
|
|
463
|
-
# resname = atom.get_parent().resname # Nome del residuo
|
|
464
|
-
|
|
465
|
-
# # Controlla se il modello esiste già nel nuovo modello
|
|
466
|
-
# if model_id in [model.id for model in new_structure]:
|
|
467
|
-
# new_model = new_structure[model_id]
|
|
468
|
-
# else:
|
|
469
|
-
# new_model = Model.Model(model_id)
|
|
470
|
-
# new_structure.add(new_model)
|
|
471
|
-
|
|
472
|
-
# # Controlla se la catena esiste già nel nuovo modello
|
|
473
|
-
# if chain_id in [chain.id for chain in new_model]:
|
|
474
|
-
# new_chain = new_model[chain_id]
|
|
475
|
-
# else:
|
|
476
|
-
# new_chain = Chain.Chain(chain_id)
|
|
477
|
-
# new_model.add(new_chain)
|
|
478
|
-
|
|
479
|
-
# # Controlla se il residuo esiste già nella nuova catena
|
|
480
|
-
# if residue_id in [res.id for res in new_chain]:
|
|
481
|
-
# new_residue = new_chain[residue_id]
|
|
482
|
-
# else:
|
|
483
|
-
# new_residue = Residue.Residue(
|
|
484
|
-
# residue_id, resname, atom.get_parent().segid
|
|
485
|
-
# )
|
|
486
|
-
# new_chain.add(new_residue)
|
|
487
|
-
|
|
488
|
-
# # Copia l'atomo e aggiungilo al residuo
|
|
489
|
-
# new_atom = atom.copy()
|
|
490
|
-
# new_residue.add(new_atom)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: biointerface
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: BioInterface is a Biopython based package that extracts Protein-DNA interfaces in a PDB structures.
|
|
5
5
|
Author-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
6
6
|
Maintainer-email: Alessandro Pandolfi <alessandro.pandolfi@protonmail.com>
|
|
@@ -29,6 +29,7 @@ Requires-Dist: bump-my-version; extra == "dev"
|
|
|
29
29
|
Requires-Dist: wheel; extra == "dev"
|
|
30
30
|
Requires-Dist: build; extra == "dev"
|
|
31
31
|
Requires-Dist: twine; extra == "dev"
|
|
32
|
+
Dynamic: license-file
|
|
32
33
|
|
|
33
34
|
============
|
|
34
35
|
BioInterface
|
|
@@ -46,10 +47,6 @@ BioInterface
|
|
|
46
47
|
:target: https://gitlab.com/MorfeoRenai/biointerface/-/commits/main
|
|
47
48
|
:alt: Coverage Status
|
|
48
49
|
|
|
49
|
-
.. image:: https://pyup.io/repos/github/MorfeoRenai/biointerface/shield.svg
|
|
50
|
-
:target: https://pyup.io/repos/github/MorfeoRenai/biointerface/
|
|
51
|
-
:alt: Updates
|
|
52
|
-
|
|
53
50
|
|
|
54
51
|
BioInterface is a `Biopython <https://biopython.org/>`_ based package that extracts Protein-DNA
|
|
55
52
|
interfaces in a PDB structures.
|
|
@@ -59,10 +59,10 @@ def test_get_residues():
|
|
|
59
59
|
) # Devono avere nomi di residui
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
def
|
|
62
|
+
def test_as_dataframe():
|
|
63
63
|
structure = get_test_structure()
|
|
64
64
|
face = Interface(structure=structure, protein_chain_id="C")
|
|
65
|
-
df = face.
|
|
65
|
+
df = face.as_dataframe()
|
|
66
66
|
assert df is not None
|
|
67
67
|
assert not df.empty
|
|
68
68
|
assert set(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|