biotite 0.39.0__cp312-cp312-macosx_11_0_arm64.whl → 0.41.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +3 -3
- biotite/application/dssp/app.py +18 -18
- biotite/database/pubchem/download.py +23 -23
- biotite/database/pubchem/query.py +7 -7
- biotite/database/rcsb/download.py +19 -14
- biotite/file.py +17 -9
- biotite/sequence/align/banded.c +256 -235
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/cigar.py +60 -15
- biotite/sequence/align/kmeralphabet.c +241 -220
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmersimilarity.c +213 -194
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpp +231 -203
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localgapped.c +256 -235
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.c +233 -212
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/multiple.c +253 -232
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/pairwise.c +272 -251
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.c +213 -194
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.c +215 -195
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/tracetable.c +213 -193
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/annotation.py +2 -2
- biotite/sequence/codec.c +233 -212
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/io/fasta/convert.py +27 -24
- biotite/sequence/phylo/nj.c +213 -194
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.c +225 -200
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.c +213 -194
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/structure/__init__.py +2 -0
- biotite/structure/basepairs.py +7 -12
- biotite/structure/bonds.c +1435 -1277
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/celllist.c +215 -195
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/charges.c +1050 -1099
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/dotbracket.py +2 -0
- biotite/structure/filter.py +30 -37
- biotite/structure/info/__init__.py +5 -8
- biotite/structure/info/atoms.py +31 -68
- biotite/structure/info/bonds.py +47 -101
- biotite/structure/info/ccd/README.rst +8 -0
- biotite/structure/info/ccd/amino_acids.txt +1663 -0
- biotite/structure/info/ccd/carbohydrates.txt +1135 -0
- biotite/structure/info/ccd/components.bcif +0 -0
- biotite/structure/info/ccd/nucleotides.txt +798 -0
- biotite/structure/info/ccd.py +95 -0
- biotite/structure/info/groups.py +90 -0
- biotite/structure/info/masses.py +21 -20
- biotite/structure/info/misc.py +78 -25
- biotite/structure/info/standardize.py +17 -12
- biotite/structure/integrity.py +19 -70
- biotite/structure/io/__init__.py +2 -4
- biotite/structure/io/ctab.py +12 -106
- biotite/structure/io/general.py +167 -181
- biotite/structure/io/gro/file.py +16 -16
- biotite/structure/io/mmtf/__init__.py +3 -0
- biotite/structure/io/mmtf/convertarray.c +217 -196
- biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.c +215 -195
- biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.c +223 -202
- biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.c +213 -194
- biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/file.py +34 -26
- biotite/structure/io/mol/__init__.py +4 -2
- biotite/structure/io/mol/convert.py +71 -7
- biotite/structure/io/mol/ctab.py +414 -0
- biotite/structure/io/mol/header.py +116 -0
- biotite/structure/io/mol/{file.py → mol.py} +69 -82
- biotite/structure/io/mol/sdf.py +909 -0
- biotite/structure/io/npz/__init__.py +3 -0
- biotite/structure/io/npz/file.py +21 -18
- biotite/structure/io/pdb/__init__.py +3 -3
- biotite/structure/io/pdb/file.py +89 -34
- biotite/structure/io/pdb/hybrid36.c +63 -43
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/file.py +32 -32
- biotite/structure/io/pdbx/__init__.py +12 -6
- biotite/structure/io/pdbx/bcif.py +648 -0
- biotite/structure/io/pdbx/cif.py +1032 -0
- biotite/structure/io/pdbx/component.py +246 -0
- biotite/structure/io/pdbx/convert.py +858 -386
- biotite/structure/io/pdbx/encoding.c +112803 -0
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbx/legacy.py +267 -0
- biotite/structure/molecules.py +151 -151
- biotite/structure/repair.py +253 -0
- biotite/structure/sasa.c +213 -194
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/sequence.py +112 -0
- biotite/structure/superimpose.py +618 -116
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/METADATA +3 -3
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/RECORD +109 -103
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/WHEEL +1 -1
- biotite/structure/info/amino_acids.json +0 -1556
- biotite/structure/info/amino_acids.py +0 -42
- biotite/structure/info/carbohydrates.json +0 -1122
- biotite/structure/info/carbohydrates.py +0 -39
- biotite/structure/info/intra_bonds.msgpack +0 -0
- biotite/structure/info/link_types.msgpack +0 -1
- biotite/structure/info/nucleotides.json +0 -772
- biotite/structure/info/nucleotides.py +0 -39
- biotite/structure/info/residue_masses.msgpack +0 -0
- biotite/structure/info/residue_names.msgpack +0 -3
- biotite/structure/info/residues.msgpack +0 -0
- biotite/structure/io/pdbx/file.py +0 -652
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/LICENSE.rst +0 -0
- {biotite-0.39.0.dist-info → biotite-0.41.0.dist-info}/top_level.txt +0 -0
biotite/structure/io/__init__.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"""
|
|
6
6
|
A subpackage for reading and writing structure related data.
|
|
7
7
|
|
|
8
|
-
Macromolecular structure files (PDB, PDBx/mmCIF,
|
|
8
|
+
Macromolecular structure files (PDB, PDBx/mmCIF, BinaryCIF, etc.) and
|
|
9
9
|
small molecule files (MOL, SDF, etc.) can be used
|
|
10
10
|
to load an :class:`AtomArray` or :class:`AtomArrayStack`.
|
|
11
11
|
|
|
@@ -15,10 +15,8 @@ only one *altloc* can be chosen for each atom. Hence, the amount of
|
|
|
15
15
|
atoms may be lower in the atom array (stack) than in respective
|
|
16
16
|
structure file.
|
|
17
17
|
|
|
18
|
-
The recommended format for reading structure files is
|
|
18
|
+
The recommended format for reading structure files is *BinaryCIF*.
|
|
19
19
|
It has by far the shortest parsing time and file size.
|
|
20
|
-
Furthermore, chemical bond information can be read from MMTF files
|
|
21
|
-
as :class:`BondList` instances.
|
|
22
20
|
|
|
23
21
|
Besides the mentioned structure formats, Gromacs trajectory files can be
|
|
24
22
|
loaded, if `mdtraj` is installed.
|
biotite/structure/io/ctab.py
CHANGED
|
@@ -2,46 +2,20 @@
|
|
|
2
2
|
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
3
|
# information.
|
|
4
4
|
|
|
5
|
-
"""
|
|
6
|
-
Functions for parsing and writing an :class:`AtomArray` from/to
|
|
7
|
-
*MDL* connection tables (Ctab).
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
5
|
__name__ = "biotite.structure.io"
|
|
11
6
|
__author__ = "Patrick Kunzmann"
|
|
12
7
|
__all__ = ["read_structure_from_ctab", "write_structure_to_ctab"]
|
|
13
8
|
|
|
14
9
|
import warnings
|
|
15
|
-
|
|
16
|
-
from biotite.structure.error import BadStructureError
|
|
17
|
-
from ..atoms import AtomArray, AtomArrayStack
|
|
18
|
-
from ..bonds import BondList, BondType
|
|
19
|
-
|
|
20
|
-
BOND_TYPE_MAPPING = {
|
|
21
|
-
1: BondType.SINGLE,
|
|
22
|
-
2: BondType.DOUBLE,
|
|
23
|
-
3: BondType.TRIPLE,
|
|
24
|
-
6: BondType.SINGLE,
|
|
25
|
-
7: BondType.DOUBLE,
|
|
26
|
-
8: BondType.ANY,
|
|
27
|
-
}
|
|
28
|
-
BOND_TYPE_MAPPING_REV = {
|
|
29
|
-
BondType.SINGLE: 1,
|
|
30
|
-
BondType.DOUBLE: 2,
|
|
31
|
-
BondType.TRIPLE: 3,
|
|
32
|
-
BondType.AROMATIC_SINGLE: 1,
|
|
33
|
-
BondType.AROMATIC_DOUBLE: 2,
|
|
34
|
-
BondType.ANY: 8,
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
CHARGE_MAPPING = {0: 0, 1: 3, 2: 2, 3: 1, 5: -1, 6: -2, 7: -3}
|
|
38
|
-
CHARGE_MAPPING_REV = {val: key for key, val in CHARGE_MAPPING.items()}
|
|
10
|
+
from ..bonds import BondType
|
|
39
11
|
|
|
40
12
|
|
|
41
13
|
def read_structure_from_ctab(ctab_lines):
|
|
42
14
|
"""
|
|
43
15
|
Parse a *MDL* connection table (Ctab) to obtain an
|
|
44
|
-
:class:`AtomArray`. :footcite:`Dalby1992
|
|
16
|
+
:class:`AtomArray`. :footcite:`Dalby1992`.
|
|
17
|
+
|
|
18
|
+
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
|
|
45
19
|
|
|
46
20
|
Parameters
|
|
47
21
|
----------
|
|
@@ -60,41 +34,9 @@ def read_structure_from_ctab(ctab_lines):
|
|
|
60
34
|
|
|
61
35
|
.. footbibliography::
|
|
62
36
|
"""
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
atoms = AtomArray(n_atoms)
|
|
68
|
-
atoms.add_annotation("charge", int)
|
|
69
|
-
for i, line in enumerate(atom_lines):
|
|
70
|
-
atoms.coord[i, 0] = float(line[0:10])
|
|
71
|
-
atoms.coord[i, 1] = float(line[10:20])
|
|
72
|
-
atoms.coord[i, 2] = float(line[20:30])
|
|
73
|
-
atoms.element[i] = line[31:34].strip().upper()
|
|
74
|
-
charge = CHARGE_MAPPING.get(int(line[36:39]))
|
|
75
|
-
if charge is None:
|
|
76
|
-
warnings.warn(
|
|
77
|
-
f"Cannot handle MDL charge type {int(line[36 : 39])}, "
|
|
78
|
-
f"0 is used instead"
|
|
79
|
-
)
|
|
80
|
-
charge = 0
|
|
81
|
-
atoms.charge[i] = charge
|
|
82
|
-
|
|
83
|
-
bond_array = np.zeros((n_bonds, 3), dtype=np.uint32)
|
|
84
|
-
for i, line in enumerate(bond_lines):
|
|
85
|
-
bond_type = BOND_TYPE_MAPPING.get(int(line[6:9]))
|
|
86
|
-
if bond_type is None:
|
|
87
|
-
warnings.warn(
|
|
88
|
-
f"Cannot handle MDL bond type {int(line[6 : 9])}, "
|
|
89
|
-
f"BondType.ANY is used instead"
|
|
90
|
-
)
|
|
91
|
-
bond_type = BondType.ANY
|
|
92
|
-
bond_array[i, 0] = int(line[0:3]) - 1
|
|
93
|
-
bond_array[i, 1] = int(line[3:6]) - 1
|
|
94
|
-
bond_array[i, 2] = bond_type
|
|
95
|
-
atoms.bonds = BondList(n_atoms, bond_array)
|
|
96
|
-
|
|
97
|
-
return atoms
|
|
37
|
+
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
|
|
38
|
+
from biotite.structure.io.mol.ctab import read_structure_from_ctab
|
|
39
|
+
return read_structure_from_ctab(ctab_lines)
|
|
98
40
|
|
|
99
41
|
|
|
100
42
|
def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
@@ -102,6 +44,8 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
|
102
44
|
Convert an :class:`AtomArray` into a
|
|
103
45
|
*MDL* connection table (Ctab). :footcite:`Dalby1992`
|
|
104
46
|
|
|
47
|
+
DEPRECATED: Moved to :mod:`biotite.structure.io.mol.ctab`.
|
|
48
|
+
|
|
105
49
|
Parameters
|
|
106
50
|
----------
|
|
107
51
|
atoms : AtomArray
|
|
@@ -123,44 +67,6 @@ def write_structure_to_ctab(atoms, default_bond_type=BondType.ANY):
|
|
|
123
67
|
|
|
124
68
|
.. footbibliography::
|
|
125
69
|
"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
"but only a single model can be written"
|
|
130
|
-
)
|
|
131
|
-
if atoms.bonds is None:
|
|
132
|
-
raise BadStructureError("Input AtomArray has no associated BondList")
|
|
133
|
-
|
|
134
|
-
try:
|
|
135
|
-
charge = atoms.charge
|
|
136
|
-
except AttributeError:
|
|
137
|
-
charge = np.zeros(atoms.array_length(), dtype=int)
|
|
138
|
-
|
|
139
|
-
atom_lines = [
|
|
140
|
-
f"{atoms.coord[i,0]:>10.5f}"
|
|
141
|
-
f"{atoms.coord[i,1]:>10.5f}"
|
|
142
|
-
f"{atoms.coord[i,2]:>10.5f}"
|
|
143
|
-
f" {atoms.element[i]:>3}"
|
|
144
|
-
f" {CHARGE_MAPPING_REV.get(charge[i], 0):>3d}" + f"{0:>3d}" * 10
|
|
145
|
-
for i in range(atoms.array_length())
|
|
146
|
-
]
|
|
147
|
-
|
|
148
|
-
default_bond_value = BOND_TYPE_MAPPING_REV[default_bond_type]
|
|
149
|
-
|
|
150
|
-
bond_lines = [
|
|
151
|
-
f"{i+1:>3d}{j+1:>3d}"
|
|
152
|
-
f"{BOND_TYPE_MAPPING_REV.get(bond_type, default_bond_value):>3d}"
|
|
153
|
-
+ f"{0:>3d}" * 4
|
|
154
|
-
for i, j, bond_type in atoms.bonds.as_array()
|
|
155
|
-
]
|
|
156
|
-
|
|
157
|
-
counts_line = (
|
|
158
|
-
f"{len(atom_lines):>3d}{len(bond_lines):>3d}"
|
|
159
|
-
" 0 0 0 0 0 0 0 1 V2000"
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
return [counts_line] + atom_lines + bond_lines + ["M END"]
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def _get_counts(counts_line):
|
|
166
|
-
return int(counts_line[0:3]), int(counts_line[3:6])
|
|
70
|
+
warnings.warn("Moved to biotite.structure.io.mol.ctab", DeprecationWarning)
|
|
71
|
+
from biotite.structure.io.mol.ctab import write_structure_to_ctab
|
|
72
|
+
return write_structure_to_ctab(atoms, default_bond_type)
|
biotite/structure/io/general.py
CHANGED
|
@@ -11,9 +11,10 @@ __name__ = "biotite.structure.io"
|
|
|
11
11
|
__author__ = "Patrick Kunzmann"
|
|
12
12
|
__all__ = ["load_structure", "save_structure"]
|
|
13
13
|
|
|
14
|
+
import datetime
|
|
14
15
|
import os.path
|
|
15
16
|
import io
|
|
16
|
-
from ..atoms import
|
|
17
|
+
from ..atoms import AtomArrayStack
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
def load_structure(file_path, template=None, **kwargs):
|
|
@@ -21,12 +22,12 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
21
22
|
Load an :class:`AtomArray` or class`AtomArrayStack` from a structure
|
|
22
23
|
file without the need to manually instantiate a :class:`File`
|
|
23
24
|
object.
|
|
24
|
-
|
|
25
|
+
|
|
25
26
|
Internally this function uses a :class:`File` object, based on the
|
|
26
27
|
file extension.
|
|
27
28
|
Trajectory files furthermore require specification of the `template`
|
|
28
29
|
parameter.
|
|
29
|
-
|
|
30
|
+
|
|
30
31
|
Parameters
|
|
31
32
|
----------
|
|
32
33
|
file_path : str
|
|
@@ -40,13 +41,13 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
40
41
|
This does not affect files given via the `template` parameter.
|
|
41
42
|
The only exception is the `atom_i`, which is applied to the template
|
|
42
43
|
as well if number of atoms do not match.
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
Returns
|
|
45
46
|
-------
|
|
46
47
|
array : AtomArray or AtomArrayStack
|
|
47
48
|
If the file contains multiple models, an AtomArrayStack is
|
|
48
49
|
returned, otherwise an AtomArray is returned.
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
Raises
|
|
51
52
|
------
|
|
52
53
|
ValueError
|
|
@@ -61,91 +62,83 @@ def load_structure(file_path, template=None, **kwargs):
|
|
|
61
62
|
|
|
62
63
|
# We only need the suffix here
|
|
63
64
|
_, suffix = os.path.splitext(file_path)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return array
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
65
|
+
match suffix:
|
|
66
|
+
case ".pdb":
|
|
67
|
+
from .pdb import PDBFile
|
|
68
|
+
file = PDBFile.read(file_path)
|
|
69
|
+
array = file.get_structure(**kwargs)
|
|
70
|
+
return _as_single_model_if_possible(array)
|
|
71
|
+
case ".pdbqt":
|
|
72
|
+
from .pdbqt import PDBQTFile
|
|
73
|
+
file = PDBQTFile.read(file_path)
|
|
74
|
+
array = file.get_structure(**kwargs)
|
|
75
|
+
return _as_single_model_if_possible(array)
|
|
76
|
+
case ".cif" | ".pdbx":
|
|
77
|
+
from .pdbx import CIFFile, get_structure
|
|
78
|
+
file = CIFFile.read(file_path)
|
|
79
|
+
array = get_structure(file, **kwargs)
|
|
80
|
+
return _as_single_model_if_possible(array)
|
|
81
|
+
case ".bcif":
|
|
82
|
+
from .pdbx import BinaryCIFFile, get_structure
|
|
83
|
+
file = BinaryCIFFile.read(file_path)
|
|
84
|
+
array = get_structure(file, **kwargs)
|
|
85
|
+
return _as_single_model_if_possible(array)
|
|
86
|
+
case ".gro":
|
|
87
|
+
from .gro import GROFile
|
|
88
|
+
file = GROFile.read(file_path)
|
|
89
|
+
array = file.get_structure(**kwargs)
|
|
90
|
+
return _as_single_model_if_possible(array)
|
|
91
|
+
case ".mmtf":
|
|
92
|
+
from .mmtf import MMTFFile, get_structure
|
|
93
|
+
file = MMTFFile.read(file_path)
|
|
94
|
+
array = get_structure(file, **kwargs)
|
|
95
|
+
return _as_single_model_if_possible(array)
|
|
96
|
+
case ".npz":
|
|
97
|
+
from .npz import NpzFile
|
|
98
|
+
file = NpzFile.read(file_path)
|
|
99
|
+
array = file.get_structure(**kwargs)
|
|
100
|
+
return _as_single_model_if_possible(array)
|
|
101
|
+
case ".mol":
|
|
102
|
+
from .mol import MOLFile
|
|
103
|
+
file = MOLFile.read(file_path)
|
|
104
|
+
array = file.get_structure(**kwargs)
|
|
105
|
+
# MOL and SDF files only contain a single model
|
|
99
106
|
return array
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
if isinstance(array, AtomArrayStack) and array.stack_depth() == 1:
|
|
105
|
-
# Stack containing only one model -> return as atom array
|
|
106
|
-
return array[0]
|
|
107
|
-
else:
|
|
107
|
+
case ".sdf" | ".sd":
|
|
108
|
+
from .mol import SDFile, get_structure
|
|
109
|
+
file = SDFile.read(file_path)
|
|
110
|
+
array = get_structure(file, **kwargs)
|
|
108
111
|
return array
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
#
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
if suffix == ".tng":
|
|
140
|
-
traj_file_cls = TNGFile
|
|
141
|
-
if suffix == ".dcd":
|
|
142
|
-
traj_file_cls = DCDFile
|
|
143
|
-
if suffix == ".netcdf":
|
|
144
|
-
traj_file_cls = NetCDFFile
|
|
145
|
-
file = traj_file_cls.read(file_path, **kwargs)
|
|
146
|
-
return file.get_structure(template)
|
|
147
|
-
else:
|
|
148
|
-
raise ValueError(f"Unknown file format '{suffix}'")
|
|
112
|
+
case ".trr" | ".xtc" | ".tng" | ".dcd" | ".netcdf":
|
|
113
|
+
if template is None:
|
|
114
|
+
raise TypeError(
|
|
115
|
+
"Template must be specified for trajectory files"
|
|
116
|
+
)
|
|
117
|
+
# Filter template for atom ids, if an unfiltered template
|
|
118
|
+
if (
|
|
119
|
+
"atom_i" in kwargs
|
|
120
|
+
and template.shape[-1] != len(kwargs["atom_i"])
|
|
121
|
+
):
|
|
122
|
+
template = template[..., kwargs["atom_i"]]
|
|
123
|
+
from .trr import TRRFile
|
|
124
|
+
from .xtc import XTCFile
|
|
125
|
+
from .tng import TNGFile
|
|
126
|
+
from .dcd import DCDFile
|
|
127
|
+
from .netcdf import NetCDFFile
|
|
128
|
+
if suffix == ".trr":
|
|
129
|
+
traj_file_cls = TRRFile
|
|
130
|
+
if suffix == ".xtc":
|
|
131
|
+
traj_file_cls = XTCFile
|
|
132
|
+
if suffix == ".tng":
|
|
133
|
+
traj_file_cls = TNGFile
|
|
134
|
+
if suffix == ".dcd":
|
|
135
|
+
traj_file_cls = DCDFile
|
|
136
|
+
if suffix == ".netcdf":
|
|
137
|
+
traj_file_cls = NetCDFFile
|
|
138
|
+
file = traj_file_cls.read(file_path, **kwargs)
|
|
139
|
+
return file.get_structure(template)
|
|
140
|
+
case unknown_suffix:
|
|
141
|
+
raise ValueError(f"Unknown file format '{unknown_suffix}'")
|
|
149
142
|
|
|
150
143
|
|
|
151
144
|
def save_structure(file_path, array, **kwargs):
|
|
@@ -153,10 +146,10 @@ def save_structure(file_path, array, **kwargs):
|
|
|
153
146
|
Save an :class:`AtomArray` or class`AtomArrayStack` to a structure
|
|
154
147
|
file without the need to manually instantiate a :class:`File`
|
|
155
148
|
object.
|
|
156
|
-
|
|
149
|
+
|
|
157
150
|
Internally this function uses a :class:`File` object, based on the
|
|
158
151
|
file extension.
|
|
159
|
-
|
|
152
|
+
|
|
160
153
|
Parameters
|
|
161
154
|
----------
|
|
162
155
|
file_path : str
|
|
@@ -174,98 +167,91 @@ def save_structure(file_path, array, **kwargs):
|
|
|
174
167
|
"""
|
|
175
168
|
# We only need the suffix here
|
|
176
169
|
_, suffix = os.path.splitext(file_path)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
170
|
+
match suffix:
|
|
171
|
+
case ".pdb":
|
|
172
|
+
from .pdb import PDBFile
|
|
173
|
+
file = PDBFile()
|
|
174
|
+
file.set_structure(array, **kwargs)
|
|
175
|
+
file.write(file_path)
|
|
176
|
+
case ".pdbqt":
|
|
177
|
+
from .pdbqt import PDBQTFile
|
|
178
|
+
file = PDBQTFile()
|
|
179
|
+
file.set_structure(array, **kwargs)
|
|
180
|
+
file.write(file_path)
|
|
181
|
+
case ".cif" | ".pdbx":
|
|
182
|
+
from .pdbx import CIFFile, set_structure
|
|
183
|
+
file = CIFFile()
|
|
184
|
+
set_structure(file, array, **kwargs)
|
|
185
|
+
file.write(file_path)
|
|
186
|
+
case ".bcif":
|
|
187
|
+
from .pdbx import BinaryCIFFile, set_structure
|
|
188
|
+
file = BinaryCIFFile()
|
|
189
|
+
set_structure(file, array, **kwargs)
|
|
190
|
+
file.write(file_path)
|
|
191
|
+
case ".gro":
|
|
192
|
+
from .gro import GROFile
|
|
193
|
+
file = GROFile()
|
|
194
|
+
file.set_structure(array, **kwargs)
|
|
195
|
+
file.write(file_path)
|
|
196
|
+
case ".mmtf":
|
|
197
|
+
from .mmtf import MMTFFile, set_structure
|
|
198
|
+
file = MMTFFile()
|
|
199
|
+
set_structure(file, array, **kwargs)
|
|
200
|
+
file.write(file_path)
|
|
201
|
+
case ".npz":
|
|
202
|
+
from .npz import NpzFile
|
|
203
|
+
file = NpzFile()
|
|
204
|
+
file.set_structure(array, **kwargs)
|
|
205
|
+
file.write(file_path)
|
|
206
|
+
case ".mol":
|
|
207
|
+
from .mol import MOLFile
|
|
208
|
+
file = MOLFile()
|
|
209
|
+
file.set_structure(array, **kwargs)
|
|
210
|
+
file.header = _mol_header()
|
|
211
|
+
file.write(file_path)
|
|
212
|
+
case ".sdf" | ".sd":
|
|
213
|
+
from .mol import SDFile, SDRecord, set_structure
|
|
214
|
+
record = SDRecord()
|
|
215
|
+
record.set_structure(array, **kwargs)
|
|
216
|
+
record.header = _mol_header()
|
|
217
|
+
file = SDFile({"Molecule": record})
|
|
218
|
+
file.write(file_path)
|
|
219
|
+
case ".trr" | ".xtc" | ".tng" | ".dcd" | ".netcdf":
|
|
220
|
+
from .trr import TRRFile
|
|
221
|
+
from .xtc import XTCFile
|
|
222
|
+
from .tng import TNGFile
|
|
223
|
+
from .dcd import DCDFile
|
|
224
|
+
from .netcdf import NetCDFFile
|
|
225
|
+
if suffix == ".trr":
|
|
226
|
+
traj_file_cls = TRRFile
|
|
227
|
+
if suffix == ".xtc":
|
|
228
|
+
traj_file_cls = XTCFile
|
|
229
|
+
if suffix == ".tng":
|
|
230
|
+
traj_file_cls = TNGFile
|
|
231
|
+
if suffix == ".dcd":
|
|
232
|
+
traj_file_cls = DCDFile
|
|
233
|
+
if suffix == ".netcdf":
|
|
234
|
+
traj_file_cls = NetCDFFile
|
|
235
|
+
file = traj_file_cls()
|
|
236
|
+
file.set_structure(array, **kwargs)
|
|
237
|
+
file.write(file_path)
|
|
238
|
+
case unknown_suffix:
|
|
239
|
+
raise ValueError(f"Unknown file format '{unknown_suffix}'")
|
|
234
240
|
|
|
235
|
-
# Helper function to estimate elements from atom names
|
|
236
|
-
_elements = [elem.upper() for elem in
|
|
237
|
-
["H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg",
|
|
238
|
-
"Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
|
|
239
|
-
"Co", "Ni", "Cu", "Zn", "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",
|
|
240
|
-
"Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te",
|
|
241
|
-
"I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb",
|
|
242
|
-
"Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt",
|
|
243
|
-
"Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa",
|
|
244
|
-
"U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", "Rf",
|
|
245
|
-
"Db", "Sg", "Bh", "Hs", "Mt", "Ds", "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts",
|
|
246
|
-
"Og"]
|
|
247
|
-
]
|
|
248
|
-
def _guess_element(atom_name):
|
|
249
|
-
# remove digits (1H -> H)
|
|
250
|
-
elem = "".join([i for i in atom_name if not i.isdigit()])
|
|
251
|
-
elem = elem.upper()
|
|
252
|
-
if len(elem) == 0:
|
|
253
|
-
return ""
|
|
254
241
|
|
|
255
|
-
|
|
256
|
-
if
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
242
|
+
def _as_single_model_if_possible(atoms):
|
|
243
|
+
if isinstance(atoms, AtomArrayStack) and atoms.stack_depth() == 1:
|
|
244
|
+
# Stack containing only one model -> return as atom array
|
|
245
|
+
return atoms[0]
|
|
246
|
+
else:
|
|
247
|
+
return atoms
|
|
260
248
|
|
|
261
|
-
# Exactly match element abbreviations
|
|
262
|
-
try:
|
|
263
|
-
return _elements[_elements.index(elem[:2])]
|
|
264
|
-
except ValueError:
|
|
265
|
-
try:
|
|
266
|
-
return _elements[_elements.index(elem[0])]
|
|
267
|
-
except ValueError:
|
|
268
|
-
pass
|
|
269
249
|
|
|
270
|
-
|
|
271
|
-
|
|
250
|
+
def _mol_header():
|
|
251
|
+
from .mol import Header
|
|
252
|
+
return Header(
|
|
253
|
+
mol_name="Molecule",
|
|
254
|
+
program="Biotite",
|
|
255
|
+
time=datetime.datetime.now(),
|
|
256
|
+
dimensions="3D",
|
|
257
|
+
)
|