biotite 0.41.2__cp310-cp310-macosx_11_0_arm64.whl → 1.0.1__cp310-cp310-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-310-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-310-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-310-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-310-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-310-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-310-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-310-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-310-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-310-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-310-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-310-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-310-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-310-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-310-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-310-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-310-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-310-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -1,214 +0,0 @@
|
|
|
1
|
-
# This source code is part of the Biotite package and is distributed
|
|
2
|
-
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
-
# information.
|
|
4
|
-
|
|
5
|
-
__name__ = "biotite.structure.io.mmtf"
|
|
6
|
-
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["list_assemblies", "get_assembly"]
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
import numpy as np
|
|
11
|
-
from .convertfile import get_structure
|
|
12
|
-
from ...chains import get_chain_starts
|
|
13
|
-
from ...util import matrix_rotate
|
|
14
|
-
from ....file import InvalidFileError
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def list_assemblies(file):
|
|
18
|
-
"""
|
|
19
|
-
List the biological assemblies that are available for the
|
|
20
|
-
structure in the given file.
|
|
21
|
-
|
|
22
|
-
This function receives the data from the ``"bioAssemblyList"`` field
|
|
23
|
-
in the file.
|
|
24
|
-
Consequently, this field must be present in the file.
|
|
25
|
-
|
|
26
|
-
Parameters
|
|
27
|
-
----------
|
|
28
|
-
file : MMTFFile
|
|
29
|
-
The file object.
|
|
30
|
-
|
|
31
|
-
Returns
|
|
32
|
-
-------
|
|
33
|
-
assemblies : list of str
|
|
34
|
-
A list that contains the available assembly IDs.
|
|
35
|
-
|
|
36
|
-
Examples
|
|
37
|
-
--------
|
|
38
|
-
>>> import os.path
|
|
39
|
-
>>> file = MMTFFile.read(os.path.join(path_to_structures, "1f2n.mmtf"))
|
|
40
|
-
>>> print(list_assemblies(file))
|
|
41
|
-
['1', '2', '3', '4', '5', '6']
|
|
42
|
-
"""
|
|
43
|
-
return [assembly["name"] for assembly in file["bioAssemblyList"]]
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def get_assembly(file, assembly_id=None, model=None, altloc="first",
|
|
47
|
-
extra_fields=[], include_bonds=False):
|
|
48
|
-
"""
|
|
49
|
-
Build the given biological assembly.
|
|
50
|
-
|
|
51
|
-
This function receives the data from ``bioAssemblyList`` field in
|
|
52
|
-
the file.
|
|
53
|
-
Consequently, this field must be present in the file.
|
|
54
|
-
|
|
55
|
-
Parameters
|
|
56
|
-
----------
|
|
57
|
-
file : MMTFFile
|
|
58
|
-
The file object.
|
|
59
|
-
assembly_id : str
|
|
60
|
-
The assembly to build.
|
|
61
|
-
Available assembly IDs can be obtained via
|
|
62
|
-
:func:`list_assemblies()`.
|
|
63
|
-
model : int, optional
|
|
64
|
-
If this parameter is given, the function will return an
|
|
65
|
-
:class:`AtomArray` from the atoms corresponding to the given
|
|
66
|
-
model number (starting at 1).
|
|
67
|
-
Negative values are used to index models starting from the
|
|
68
|
-
last model instead of the first model.
|
|
69
|
-
If this parameter is omitted, an :class:`AtomArrayStack`
|
|
70
|
-
containing all models will be returned, even if the
|
|
71
|
-
structure contains only one model.
|
|
72
|
-
altloc : {'first', 'occupancy', 'all'}
|
|
73
|
-
This parameter defines how *altloc* IDs are handled:
|
|
74
|
-
- ``'first'`` - Use atoms that have the first
|
|
75
|
-
*altloc* ID appearing in a residue.
|
|
76
|
-
- ``'occupancy'`` - Use atoms that have the *altloc* ID
|
|
77
|
-
with the highest occupancy for a residue.
|
|
78
|
-
- ``'all'`` - Use all atoms.
|
|
79
|
-
Note that this leads to duplicate atoms.
|
|
80
|
-
When this option is chosen, the ``altloc_id``
|
|
81
|
-
annotation array is added to the returned structure.
|
|
82
|
-
extra_fields : list of str, optional
|
|
83
|
-
The strings in the list are optional annotation categories
|
|
84
|
-
that should be stored in the output array or stack.
|
|
85
|
-
These are valid values:
|
|
86
|
-
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
|
|
87
|
-
``'charge'``.
|
|
88
|
-
include_bonds : bool, optional
|
|
89
|
-
If set to true, a :class:`BondList` will be created for the
|
|
90
|
-
resulting :class:`AtomArray` containing the bond information
|
|
91
|
-
from the file.
|
|
92
|
-
All bonds have :attr:`BondType.ANY`, since the PDB format
|
|
93
|
-
does not support bond orders.
|
|
94
|
-
|
|
95
|
-
Raises
|
|
96
|
-
------
|
|
97
|
-
NotImplementedError
|
|
98
|
-
If any transformation required by the assembly only affects a
|
|
99
|
-
part of the atoms (not every chain) and the number of chains
|
|
100
|
-
as detected by :func:`get_chain_count()` is different from
|
|
101
|
-
the ``chainNameList`` field.
|
|
102
|
-
This limitation of this function exists, as the
|
|
103
|
-
:class:`AtomArray` of the asymmetric unit used for constructing
|
|
104
|
-
the assembly has not the chain index information required by the
|
|
105
|
-
``bioAssemblyList`` field.
|
|
106
|
-
In short, :func:`get_assembly()` does not work for a significant
|
|
107
|
-
portion of the PDB.
|
|
108
|
-
If you require reliable assembly building for any PDB entry,
|
|
109
|
-
you should use the analogous function for PDB or mmCIF files
|
|
110
|
-
instead.
|
|
111
|
-
|
|
112
|
-
Returns
|
|
113
|
-
-------
|
|
114
|
-
assembly : AtomArray or AtomArrayStack
|
|
115
|
-
The assembly.
|
|
116
|
-
The return type depends on the `model` parameter.
|
|
117
|
-
|
|
118
|
-
Examples
|
|
119
|
-
--------
|
|
120
|
-
|
|
121
|
-
>>> import os.path
|
|
122
|
-
>>> file = MMTFFile.read(os.path.join(path_to_structures, "1f2n.mmtf"))
|
|
123
|
-
>>> assembly = get_assembly(file, model=1)
|
|
124
|
-
"""
|
|
125
|
-
structure = get_structure(
|
|
126
|
-
file, model, altloc, extra_fields, include_bonds
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
# Get transformations for chosen assembly
|
|
130
|
-
selected_assembly = None
|
|
131
|
-
if not "bioAssemblyList" in file:
|
|
132
|
-
raise InvalidFileError(
|
|
133
|
-
"File does not contain assembly information "
|
|
134
|
-
"(missing 'bioAssemblyList')"
|
|
135
|
-
)
|
|
136
|
-
for assembly in file["bioAssemblyList"]:
|
|
137
|
-
current_assembly_id = assembly["name"]
|
|
138
|
-
transform_list = assembly["transformList"]
|
|
139
|
-
if assembly_id is None or current_assembly_id == assembly_id:
|
|
140
|
-
selected_assembly = transform_list
|
|
141
|
-
break
|
|
142
|
-
if selected_assembly is None:
|
|
143
|
-
raise KeyError(
|
|
144
|
-
f"The assembly ID '{assembly_id}' is not found"
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
# In most cases the transformations in an assembly applies to all
|
|
148
|
-
# atoms equally ('apply_to_all == True')
|
|
149
|
-
# If this is the case, the selection of atoms for each
|
|
150
|
-
# transformation can be omitted, improving the performance
|
|
151
|
-
chain_index_count = len(file["chainNameList"])
|
|
152
|
-
apply_to_all = True
|
|
153
|
-
for transformation in selected_assembly:
|
|
154
|
-
# If the number of affected chains matches the number of total
|
|
155
|
-
# chains, all atoms are affected
|
|
156
|
-
if len(transformation["chainIndexList"]) != chain_index_count:
|
|
157
|
-
apply_to_all = False
|
|
158
|
-
# If the transformations in the assembly do not apply to all atoms,
|
|
159
|
-
# but only to certain chains we need the ranges of these chains
|
|
160
|
-
# in the base structure (the asymmetric unit)
|
|
161
|
-
if not apply_to_all:
|
|
162
|
-
chains_starts = get_chain_starts(
|
|
163
|
-
structure, add_exclusive_stop=True
|
|
164
|
-
)
|
|
165
|
-
# Furthermore the number of chains determined by Biotite via
|
|
166
|
-
# 'get_chain_starts()' must corresponds to the number of chains
|
|
167
|
-
# in the MMTF file
|
|
168
|
-
# If this is not the case the assembly cannot be read using
|
|
169
|
-
# this function due to the shortcoming in 'get_structure()'
|
|
170
|
-
if len(chains_starts) != chain_index_count:
|
|
171
|
-
raise NotImplementedError(
|
|
172
|
-
"The structure file is not suitable for this function, as the "
|
|
173
|
-
"number of chains in the file do not match the automatically "
|
|
174
|
-
"detected number of chains"
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
# Apply transformations for set of chains (or all chains) and add
|
|
178
|
-
# the transformed atoms to assembly
|
|
179
|
-
assembly = None
|
|
180
|
-
for transformation in selected_assembly:
|
|
181
|
-
if apply_to_all:
|
|
182
|
-
affected_coord = structure.coord
|
|
183
|
-
else:
|
|
184
|
-
# Mask atoms affected by this transformation
|
|
185
|
-
affected_mask = np.zeros(structure.array_length(), dtype=bool)
|
|
186
|
-
for chain_i in transformation["chainIndexList"]:
|
|
187
|
-
chain_start = chains_starts[chain_i]
|
|
188
|
-
chain_stop = chains_starts[chain_i+1]
|
|
189
|
-
affected_mask[chain_start : chain_stop] = True
|
|
190
|
-
affected_coord = structure.coord[..., affected_mask, :]
|
|
191
|
-
# Apply the transformation
|
|
192
|
-
transformed_coord = _apply_transformation(
|
|
193
|
-
affected_coord, transformation["matrix"]
|
|
194
|
-
)
|
|
195
|
-
sub_assembly = structure.copy()
|
|
196
|
-
sub_assembly.coord = transformed_coord
|
|
197
|
-
# Add transformed coordinates to assembly
|
|
198
|
-
if assembly is None:
|
|
199
|
-
assembly = sub_assembly
|
|
200
|
-
else:
|
|
201
|
-
assembly += sub_assembly
|
|
202
|
-
|
|
203
|
-
return assembly
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _apply_transformation(coord, mmtf_matrix):
|
|
207
|
-
# Obtain matrix from flattened form
|
|
208
|
-
matrix = np.array(mmtf_matrix).reshape(4, 4)
|
|
209
|
-
# Separate rotation and translation part
|
|
210
|
-
rotation = matrix[:3, :3]
|
|
211
|
-
translation = matrix[:3, 3]
|
|
212
|
-
coord = matrix_rotate(coord, rotation)
|
|
213
|
-
coord += translation
|
|
214
|
-
return coord
|
|
Binary file
|
|
@@ -1,341 +0,0 @@
|
|
|
1
|
-
# This source code is part of the Biotite package and is distributed
|
|
2
|
-
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
|
|
3
|
-
# information.
|
|
4
|
-
|
|
5
|
-
__name__ = "biotite.structure.io.mmtf"
|
|
6
|
-
__author__ = "Patrick Kunzmann"
|
|
7
|
-
__all__ = ["set_structure"]
|
|
8
|
-
|
|
9
|
-
cimport cython
|
|
10
|
-
cimport numpy as np
|
|
11
|
-
|
|
12
|
-
import numpy as np
|
|
13
|
-
from .file import MMTFFile
|
|
14
|
-
from ...atoms import Atom, AtomArray, AtomArrayStack
|
|
15
|
-
from ...bonds import BondList
|
|
16
|
-
from ...error import BadStructureError
|
|
17
|
-
from ...residues import get_residue_starts
|
|
18
|
-
from ...box import unitcell_from_vectors
|
|
19
|
-
from ...info.misc import link_type
|
|
20
|
-
|
|
21
|
-
ctypedef np.int8_t int8
|
|
22
|
-
ctypedef np.int16_t int16
|
|
23
|
-
ctypedef np.int32_t int32
|
|
24
|
-
ctypedef np.uint8_t uint8
|
|
25
|
-
ctypedef np.uint16_t uint16
|
|
26
|
-
ctypedef np.uint32_t uint32
|
|
27
|
-
ctypedef np.uint64_t uint64
|
|
28
|
-
ctypedef np.float32_t float32
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def set_structure(file, array):
|
|
32
|
-
"""
|
|
33
|
-
set_structure(file, array)
|
|
34
|
-
|
|
35
|
-
Set the relevant fields of an MMTF file with the content of an
|
|
36
|
-
:class:`AtomArray` or :class:`AtomArrayStack`.
|
|
37
|
-
|
|
38
|
-
All required and some optional fields of the MMTF file will be set
|
|
39
|
-
or overriden if the field does already exist. Fields are removed
|
|
40
|
-
when they are optional and when setting the structure information
|
|
41
|
-
could invalidate its content (e.g. altLocList).
|
|
42
|
-
|
|
43
|
-
Parameters
|
|
44
|
-
----------
|
|
45
|
-
file : MMTFFile
|
|
46
|
-
The file object.
|
|
47
|
-
array : AtomArray or AtomArrayStack
|
|
48
|
-
The structure to be written. If a stack is given, each array in
|
|
49
|
-
the stack will be in a separate model.
|
|
50
|
-
|
|
51
|
-
Notes
|
|
52
|
-
-----
|
|
53
|
-
As the MMTF format only supports one unit cell, individual unit
|
|
54
|
-
cells for each model are not supported.
|
|
55
|
-
Instead only the first box in an :class:`AtomArrayStack` is written
|
|
56
|
-
into the file.
|
|
57
|
-
|
|
58
|
-
Examples
|
|
59
|
-
--------
|
|
60
|
-
|
|
61
|
-
>>> import os.path
|
|
62
|
-
>>> file = MMTFFile()
|
|
63
|
-
>>> set_structure(file, atom_array)
|
|
64
|
-
>>> file.write(os.path.join(path_to_directory, "structure.mmtf"))
|
|
65
|
-
|
|
66
|
-
"""
|
|
67
|
-
cdef bint include_bonds = (array.bonds is not None)
|
|
68
|
-
|
|
69
|
-
cdef int i=0, j=0
|
|
70
|
-
cdef array_length = array.array_length()
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
# Get annotation arrays from atom array (stack)
|
|
74
|
-
cdef np.ndarray arr_chain_id = array.chain_id
|
|
75
|
-
cdef np.ndarray arr_res_id = array.res_id
|
|
76
|
-
cdef np.ndarray arr_ins_code = array.ins_code
|
|
77
|
-
cdef np.ndarray arr_res_name = array.res_name
|
|
78
|
-
cdef np.ndarray arr_hetero = array.hetero
|
|
79
|
-
cdef np.ndarray arr_atom_name = array.atom_name
|
|
80
|
-
cdef np.ndarray arr_element = array.element
|
|
81
|
-
cdef np.ndarray arr_charge = None
|
|
82
|
-
if "charge" in array.get_annotation_categories():
|
|
83
|
-
arr_charge = array.charge
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
# Residue start indices
|
|
87
|
-
# Since the stop of i is the start of i+1,
|
|
88
|
-
# The exclusive end of the atom array is appended
|
|
89
|
-
# to enable convenient usage in the following loops
|
|
90
|
-
cdef np.ndarray starts = np.append(get_residue_starts(array),
|
|
91
|
-
[array_length])
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
### Preparing the group list ###
|
|
95
|
-
# List of 'groupType' dictsfor setting the file's 'groupList'
|
|
96
|
-
cdef list residues
|
|
97
|
-
# Maps 'groupType' values (not the keys) to the index in 'residues'
|
|
98
|
-
# Necessary a 'groupType' are dictionaries, which are not hashable
|
|
99
|
-
cdef dict residue_dict
|
|
100
|
-
# An entry in 'residues'
|
|
101
|
-
cdef dict group_type
|
|
102
|
-
# An entry in 'residue_dict'
|
|
103
|
-
cdef tuple hashable_group_type
|
|
104
|
-
# Index to list of residues
|
|
105
|
-
cdef int residue_i
|
|
106
|
-
# List of indices to list of residues
|
|
107
|
-
cdef np.ndarray res_types
|
|
108
|
-
# Start and exclusive stop of on residue interval
|
|
109
|
-
cdef int start
|
|
110
|
-
cdef int stop
|
|
111
|
-
# Amount of atoms in a residue
|
|
112
|
-
cdef int res_length
|
|
113
|
-
# Name of a residue
|
|
114
|
-
cdef res_name
|
|
115
|
-
# BondList for inter-residue bonds
|
|
116
|
-
# intra-residue bonds are successively removed
|
|
117
|
-
if include_bonds:
|
|
118
|
-
inter_bonds = array.bonds.copy()
|
|
119
|
-
# 'len(starts)-1' since 'starts' has the end
|
|
120
|
-
# of the atom array appended
|
|
121
|
-
res_types = np.zeros(len(starts)-1, dtype=np.int32)
|
|
122
|
-
residues = []
|
|
123
|
-
residue_dict = {}
|
|
124
|
-
for i in range(len(starts)-1):
|
|
125
|
-
start = starts[i]
|
|
126
|
-
stop = starts[i+1]
|
|
127
|
-
res_length = stop - start
|
|
128
|
-
res_name = arr_res_name[start]
|
|
129
|
-
# Get intra-residue bonds of this residue
|
|
130
|
-
if include_bonds:
|
|
131
|
-
intra_bonds = array.bonds[start:stop]
|
|
132
|
-
|
|
133
|
-
# Create 'groupType' dictionary for current residue
|
|
134
|
-
group_type = {}
|
|
135
|
-
group_type["atomNameList"] = tuple(
|
|
136
|
-
arr_atom_name[start:stop].tolist()
|
|
137
|
-
)
|
|
138
|
-
group_type["elementList"] = tuple(
|
|
139
|
-
[e.capitalize() for e in arr_element[start:stop]]
|
|
140
|
-
)
|
|
141
|
-
if arr_charge is not None:
|
|
142
|
-
group_type["formalChargeList"] = tuple(
|
|
143
|
-
arr_charge[start:stop].tolist()
|
|
144
|
-
)
|
|
145
|
-
else:
|
|
146
|
-
group_type["formalChargeList"] = (0,) * (stop-start)
|
|
147
|
-
group_type["groupName"] = res_name
|
|
148
|
-
link = link_type(res_name)
|
|
149
|
-
# Use 'NON-POLYMER' as default
|
|
150
|
-
if link is None:
|
|
151
|
-
link = "NON-POLYMER"
|
|
152
|
-
group_type["chemCompType"] = link
|
|
153
|
-
# Add intra-residue bonds
|
|
154
|
-
if include_bonds:
|
|
155
|
-
intra_bonds = array.bonds[start:stop]
|
|
156
|
-
bond_array = intra_bonds.as_array()
|
|
157
|
-
group_type["bondAtomList"] = tuple(
|
|
158
|
-
bond_array[:,:2].flatten().tolist()
|
|
159
|
-
)
|
|
160
|
-
group_type["bondOrderList"] = tuple(
|
|
161
|
-
bond_array[:,2].tolist()
|
|
162
|
-
)
|
|
163
|
-
else:
|
|
164
|
-
group_type["bondAtomList"] = ()
|
|
165
|
-
group_type["bondOrderList"] = ()
|
|
166
|
-
|
|
167
|
-
# Find index of current residue in later 'groupList'
|
|
168
|
-
hashable_group_type = tuple(group_type.values())
|
|
169
|
-
residue_i = residue_dict.get(hashable_group_type, -1)
|
|
170
|
-
if residue_i == -1:
|
|
171
|
-
# Add new residue if not yet existing in 'groupList'
|
|
172
|
-
residue_i = len(residues)
|
|
173
|
-
residues.append(group_type)
|
|
174
|
-
residue_dict[hashable_group_type] = residue_i
|
|
175
|
-
|
|
176
|
-
# Remove intra-residue bonds from all bonds
|
|
177
|
-
# to obtain inter-residue bonds
|
|
178
|
-
# If the residue is already known is irrelevant for this case
|
|
179
|
-
if include_bonds:
|
|
180
|
-
# Offset is required to obtain original indices
|
|
181
|
-
# for bond removal
|
|
182
|
-
intra_bonds.offset_indices(start)
|
|
183
|
-
inter_bonds.remove_bonds(intra_bonds)
|
|
184
|
-
# Put new or already known residue to sequence of residue types
|
|
185
|
-
res_types[i] = residue_i
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
### Convert annotation arrays into MMTF arrays ###
|
|
189
|
-
# Pessimistic assumption on length of arrays
|
|
190
|
-
# -> At maximum as large as atom array
|
|
191
|
-
cdef np.ndarray chain_names = np.zeros(array_length, dtype="U4")
|
|
192
|
-
cdef np.ndarray res_per_chain = np.zeros(array_length, dtype=np.int32)
|
|
193
|
-
# Variables for storing last and current chain ID
|
|
194
|
-
cdef last_chain_id = arr_chain_id[0]
|
|
195
|
-
cdef curr_chain_id
|
|
196
|
-
# Counter for chain length
|
|
197
|
-
cdef int res_counter = 0
|
|
198
|
-
i = 0
|
|
199
|
-
j = 0
|
|
200
|
-
for i in range(len(starts)-1):
|
|
201
|
-
start = starts[i]
|
|
202
|
-
curr_chain_id = arr_chain_id[start]
|
|
203
|
-
if curr_chain_id != last_chain_id:
|
|
204
|
-
# New chain
|
|
205
|
-
chain_names[j] = last_chain_id
|
|
206
|
-
res_per_chain[j] = res_counter
|
|
207
|
-
last_chain_id = curr_chain_id
|
|
208
|
-
# Reset residue-per-chain counter
|
|
209
|
-
res_counter = 1
|
|
210
|
-
j += 1
|
|
211
|
-
else:
|
|
212
|
-
res_counter += 1
|
|
213
|
-
# Add last element
|
|
214
|
-
chain_names[j] = last_chain_id
|
|
215
|
-
res_per_chain[j] = res_counter
|
|
216
|
-
j += 1
|
|
217
|
-
# Trim to correct size
|
|
218
|
-
chain_names = chain_names[:j]
|
|
219
|
-
res_per_chain = res_per_chain[:j]
|
|
220
|
-
# Residue IDs from residue starts
|
|
221
|
-
cdef np.ndarray res_ids = arr_res_id[starts[:-1]].astype(np.int32)
|
|
222
|
-
cdef np.ndarray res_inscodes
|
|
223
|
-
res_inscodes = arr_ins_code[starts[:-1]]
|
|
224
|
-
|
|
225
|
-
### Adapt arrays for multiple models
|
|
226
|
-
cdef int model_count = 1
|
|
227
|
-
cdef int chains_per_model = len(chain_names)
|
|
228
|
-
if isinstance(array, AtomArrayStack):
|
|
229
|
-
# Multi-model
|
|
230
|
-
model_count = array.stack_depth()
|
|
231
|
-
chain_names = np.tile(chain_names, model_count)
|
|
232
|
-
res_per_chain = np.tile(res_per_chain, model_count)
|
|
233
|
-
res_ids = np.tile(res_ids, model_count)
|
|
234
|
-
res_inscodes = np.tile(res_inscodes, model_count)
|
|
235
|
-
res_types = np.tile(res_types, model_count)
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
### Remove arrays from file ###
|
|
239
|
-
# Arrays are removed if they are optional
|
|
240
|
-
# and if setting the structure information invalidates its content
|
|
241
|
-
_delete_record(file, "bondAtomList")
|
|
242
|
-
_delete_record(file, "bondOrderList")
|
|
243
|
-
_delete_record(file, "bFactorList")
|
|
244
|
-
_delete_record(file, "atomIdList")
|
|
245
|
-
_delete_record(file, "altLocList")
|
|
246
|
-
_delete_record(file, "occupancyList")
|
|
247
|
-
_delete_record(file, "secStructList")
|
|
248
|
-
_delete_record(file, "insCodeList")
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
### Put prepared arrays into file ###
|
|
252
|
-
cdef np.ndarray coord
|
|
253
|
-
if isinstance(array, AtomArrayStack):
|
|
254
|
-
coord = array.coord.reshape(
|
|
255
|
-
(array.stack_depth() * array.array_length(), 3)
|
|
256
|
-
).astype(np.float32, copy=False)
|
|
257
|
-
else:
|
|
258
|
-
coord = array.coord.astype(np.float32, copy=False)
|
|
259
|
-
file.set_array("xCoordList", coord[:,0], codec=10, param=1000)
|
|
260
|
-
file.set_array("yCoordList", coord[:,1], codec=10, param=1000)
|
|
261
|
-
file.set_array("zCoordList", coord[:,2], codec=10, param=1000)
|
|
262
|
-
|
|
263
|
-
file["numModels"] = model_count
|
|
264
|
-
file["chainsPerModel"] = [chains_per_model] * model_count
|
|
265
|
-
file["numChains"] = len(chain_names)
|
|
266
|
-
file.set_array("chainNameList", chain_names, codec=5, param=4)
|
|
267
|
-
file.set_array("chainIdList", chain_names, codec=5, param=4)
|
|
268
|
-
file["groupsPerChain"] = res_per_chain.tolist()
|
|
269
|
-
file["numGroups"] = len(res_ids)
|
|
270
|
-
file.set_array("groupIdList", res_ids, codec=8)
|
|
271
|
-
file.set_array("insCodeList", res_inscodes, codec=6)
|
|
272
|
-
file.set_array("groupTypeList", res_types, codec=4)
|
|
273
|
-
file["groupList"] = residues
|
|
274
|
-
file["numAtoms"] = model_count * array_length
|
|
275
|
-
|
|
276
|
-
# Optional annotation arrays
|
|
277
|
-
categories = array.get_annotation_categories()
|
|
278
|
-
if "atom_id" in categories:
|
|
279
|
-
file.set_array("atomIdList",
|
|
280
|
-
np.tile(array.atom_id.astype(np.int32), model_count),
|
|
281
|
-
codec=8)
|
|
282
|
-
if "b_factor" in categories:
|
|
283
|
-
file.set_array("bFactorList",
|
|
284
|
-
np.tile(array.b_factor.astype(np.float32), model_count),
|
|
285
|
-
codec=10, param=100)
|
|
286
|
-
if "occupancy" in categories:
|
|
287
|
-
file.set_array("occupancyList",
|
|
288
|
-
np.tile(array.occupancy.astype(np.float32), model_count),
|
|
289
|
-
codec=9, param=100)
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
### Add inter-residue bonds ###
|
|
293
|
-
if include_bonds:
|
|
294
|
-
all_inter_bonds = inter_bonds
|
|
295
|
-
# Repeat the inter-residue bonds for each additional model
|
|
296
|
-
for i in range(model_count-1):
|
|
297
|
-
all_inter_bonds += inter_bonds
|
|
298
|
-
bond_array = all_inter_bonds.as_array()
|
|
299
|
-
file.set_array("bondAtomList",
|
|
300
|
-
bond_array[:,:2].flatten().astype(np.int32),
|
|
301
|
-
codec=4)
|
|
302
|
-
file.set_array("bondOrderList",
|
|
303
|
-
bond_array[:,2].astype(np.int8),
|
|
304
|
-
codec=2)
|
|
305
|
-
file["numBonds"] = array.bonds.get_bond_count() * model_count
|
|
306
|
-
else:
|
|
307
|
-
file["numBonds"] = 0
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
### Add unit cell ###
|
|
311
|
-
if array.box is not None:
|
|
312
|
-
if isinstance(array, AtomArray):
|
|
313
|
-
box = array.box
|
|
314
|
-
elif isinstance(array, AtomArrayStack):
|
|
315
|
-
# Use box of first model, since MMTF does not support
|
|
316
|
-
# multiple boxes
|
|
317
|
-
box = array.box[0]
|
|
318
|
-
len_a, len_b, len_c, alpha, beta, gamma = unitcell_from_vectors(box)
|
|
319
|
-
file["unitCell"] = [
|
|
320
|
-
len_a, len_b, len_c,
|
|
321
|
-
np.rad2deg(alpha), np.rad2deg(beta), np.rad2deg(gamma)
|
|
322
|
-
]
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
### Add additional information ###
|
|
326
|
-
# Only set additional information, if not already set
|
|
327
|
-
try:
|
|
328
|
-
val = file["mmtfVersion"]
|
|
329
|
-
except KeyError:
|
|
330
|
-
file["mmtfVersion"] = "1.0.0"
|
|
331
|
-
try:
|
|
332
|
-
val = file["mmtfProducer"]
|
|
333
|
-
except KeyError:
|
|
334
|
-
file["mmtfProducer"] = "UNKNOWN"
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
def _delete_record(file, record):
|
|
338
|
-
try:
|
|
339
|
-
del file[record]
|
|
340
|
-
except:
|
|
341
|
-
pass
|
|
Binary file
|