biotite 0.41.2__cp311-cp311-macosx_11_0_arm64.whl → 1.0.1__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-311-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-311-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-311-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-311-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-311-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-311-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-311-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-311-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-311-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-311-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +246 -236
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-311-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-311-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-311-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +83 -78
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-311-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +140 -110
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +260 -258
- biotite/structure/io/pdbx/encoding.cpython-311-darwin.so +0 -0
- biotite/structure/io/trajfile.py +90 -107
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-311-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/METADATA +6 -5
- biotite-1.0.1.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-311-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.1.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -18,30 +18,41 @@ __all__ = [
|
|
|
18
18
|
import itertools
|
|
19
19
|
import warnings
|
|
20
20
|
import numpy as np
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
from .
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
21
|
+
from biotite.file import InvalidFileError
|
|
22
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
23
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
24
|
+
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
25
|
+
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
26
|
+
from biotite.structure.error import BadStructureError
|
|
27
|
+
from biotite.structure.filter import (
|
|
28
|
+
filter_first_altloc,
|
|
29
|
+
filter_highest_occupancy_altloc,
|
|
30
|
+
)
|
|
31
|
+
from biotite.structure.io.pdbx.bcif import (
|
|
32
|
+
BinaryCIFBlock,
|
|
33
|
+
BinaryCIFColumn,
|
|
34
|
+
BinaryCIFFile,
|
|
35
|
+
)
|
|
36
|
+
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
|
|
37
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
38
|
+
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
|
|
39
|
+
from biotite.structure.residues import get_residue_count, get_residue_starts_for
|
|
40
|
+
from biotite.structure.util import matrix_rotate
|
|
36
41
|
|
|
37
42
|
# Cond types in `struct_conn` category that refer to covalent bonds
|
|
38
43
|
PDBX_COVALENT_TYPES = [
|
|
39
|
-
"covale",
|
|
40
|
-
"
|
|
44
|
+
"covale",
|
|
45
|
+
"covale_base",
|
|
46
|
+
"covale_phosphate",
|
|
47
|
+
"covale_sugar",
|
|
48
|
+
"disulf",
|
|
49
|
+
"modres",
|
|
50
|
+
"modres_link",
|
|
51
|
+
"metalc",
|
|
41
52
|
]
|
|
42
53
|
# Map 'struct_conn' bond orders to 'BondType'...
|
|
43
54
|
PDBX_BOND_ORDER_TO_TYPE = {
|
|
44
|
-
"":
|
|
55
|
+
"": BondType.ANY,
|
|
45
56
|
"sing": BondType.SINGLE,
|
|
46
57
|
"doub": BondType.DOUBLE,
|
|
47
58
|
"trip": BondType.TRIPLE,
|
|
@@ -61,13 +72,13 @@ PDBX_BOND_TYPE_TO_ORDER = {
|
|
|
61
72
|
}
|
|
62
73
|
# Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
|
|
63
74
|
COMP_BOND_ORDER_TO_TYPE = {
|
|
64
|
-
("SING", "N")
|
|
65
|
-
("DOUB", "N")
|
|
66
|
-
("TRIP", "N")
|
|
67
|
-
("QUAD", "N")
|
|
68
|
-
("SING", "Y")
|
|
69
|
-
("DOUB", "Y")
|
|
70
|
-
("TRIP", "Y")
|
|
75
|
+
("SING", "N"): BondType.SINGLE,
|
|
76
|
+
("DOUB", "N"): BondType.DOUBLE,
|
|
77
|
+
("TRIP", "N"): BondType.TRIPLE,
|
|
78
|
+
("QUAD", "N"): BondType.QUADRUPLE,
|
|
79
|
+
("SING", "Y"): BondType.AROMATIC_SINGLE,
|
|
80
|
+
("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
|
|
81
|
+
("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
|
|
71
82
|
}
|
|
72
83
|
# ...and vice versa
|
|
73
84
|
COMP_BOND_TYPE_TO_ORDER = {
|
|
@@ -98,16 +109,15 @@ def _filter(category, index):
|
|
|
98
109
|
Column = Category.subcomponent_class()
|
|
99
110
|
Data = Column.subcomponent_class()
|
|
100
111
|
|
|
101
|
-
return Category(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
Data(column.mask.array[index])
|
|
106
|
-
if column.mask is not None else None
|
|
112
|
+
return Category(
|
|
113
|
+
{
|
|
114
|
+
key: Column(
|
|
115
|
+
Data(column.data.array[index]),
|
|
116
|
+
(Data(column.mask.array[index]) if column.mask is not None else None),
|
|
107
117
|
)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
118
|
+
for key, column in category.items()
|
|
119
|
+
}
|
|
120
|
+
)
|
|
111
121
|
|
|
112
122
|
|
|
113
123
|
def get_sequence(pdbx_file, data_block=None):
|
|
@@ -134,26 +144,47 @@ def get_sequence(pdbx_file, data_block=None):
|
|
|
134
144
|
|
|
135
145
|
Returns
|
|
136
146
|
-------
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
(equivalent to
|
|
147
|
+
sequence_dict : Dictionary of Sequences
|
|
148
|
+
Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
|
|
149
|
+
(often equivalent to chain_id and atom_site.auth_asym_id
|
|
150
|
+
in most cases). Dictionary values are sequences.
|
|
151
|
+
|
|
152
|
+
Notes
|
|
153
|
+
-----
|
|
154
|
+
The ``entity_poly.pdbx_seq_one_letter_code_can`` field contains the initial
|
|
155
|
+
complete sequence. If the structure represents a truncated or spliced
|
|
156
|
+
version of this initial sequence, it will include only a subset of the
|
|
157
|
+
initial sequence. Use biotite.structure.get_residues to retrieve only
|
|
158
|
+
the residues that are represented in the structure.
|
|
140
159
|
"""
|
|
160
|
+
|
|
141
161
|
block = _get_block(pdbx_file, data_block)
|
|
162
|
+
poly_category = block["entity_poly"]
|
|
142
163
|
|
|
143
|
-
poly_category= block["entity_poly"]
|
|
144
164
|
seq_string = poly_category["pdbx_seq_one_letter_code_can"].as_array(str)
|
|
145
165
|
seq_type = poly_category["type"].as_array(str)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
166
|
+
|
|
167
|
+
sequences = [
|
|
168
|
+
_convert_string_to_sequence(string, stype)
|
|
169
|
+
for string, stype in zip(seq_string, seq_type)
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
strand_ids = poly_category["pdbx_strand_id"].as_array(str)
|
|
173
|
+
strand_ids = [strand_id.split(",") for strand_id in strand_ids]
|
|
174
|
+
|
|
175
|
+
sequence_dict = {
|
|
176
|
+
strand_id: sequence
|
|
177
|
+
for sequence, strand_ids in zip(sequences, strand_ids)
|
|
178
|
+
for strand_id in strand_ids
|
|
179
|
+
if sequence is not None
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return sequence_dict
|
|
152
183
|
|
|
153
184
|
|
|
154
185
|
def get_model_count(pdbx_file, data_block=None):
|
|
155
186
|
"""
|
|
156
|
-
Get the number of models contained in a
|
|
187
|
+
Get the number of models contained in a file.
|
|
157
188
|
|
|
158
189
|
Parameters
|
|
159
190
|
----------
|
|
@@ -172,17 +203,23 @@ def get_model_count(pdbx_file, data_block=None):
|
|
|
172
203
|
The number of models.
|
|
173
204
|
"""
|
|
174
205
|
block = _get_block(pdbx_file, data_block)
|
|
175
|
-
return len(
|
|
176
|
-
block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32)
|
|
177
|
-
)
|
|
206
|
+
return len(
|
|
207
|
+
_get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
|
|
208
|
+
)
|
|
178
209
|
|
|
179
210
|
|
|
180
|
-
def get_structure(
|
|
181
|
-
|
|
182
|
-
|
|
211
|
+
def get_structure(
|
|
212
|
+
pdbx_file,
|
|
213
|
+
model=None,
|
|
214
|
+
data_block=None,
|
|
215
|
+
altloc="first",
|
|
216
|
+
extra_fields=None,
|
|
217
|
+
use_author_fields=True,
|
|
218
|
+
include_bonds=False,
|
|
219
|
+
):
|
|
183
220
|
"""
|
|
184
221
|
Create an :class:`AtomArray` or :class:`AtomArrayStack` from the
|
|
185
|
-
``atom_site`` category in a
|
|
222
|
+
``atom_site`` category in a file.
|
|
186
223
|
|
|
187
224
|
Parameters
|
|
188
225
|
----------
|
|
@@ -228,7 +265,7 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
228
265
|
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
229
266
|
the ID of the residue.
|
|
230
267
|
While, the ``label_xxx`` fields can be used as official pointers
|
|
231
|
-
to other categories in the
|
|
268
|
+
to other categories in the file, the ``auth_xxx``
|
|
232
269
|
fields are set by the author(s) of the structure and are
|
|
233
270
|
consistent with the corresponding values in PDB files.
|
|
234
271
|
If `use_author_fields` is true, the annotation arrays will be
|
|
@@ -290,12 +327,21 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
290
327
|
"instead"
|
|
291
328
|
)
|
|
292
329
|
|
|
293
|
-
atoms.coord[:, :, 0] =
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
330
|
+
atoms.coord[:, :, 0] = (
|
|
331
|
+
atom_site["Cartn_x"]
|
|
332
|
+
.as_array(np.float32)
|
|
333
|
+
.reshape((model_count, model_length))
|
|
334
|
+
)
|
|
335
|
+
atoms.coord[:, :, 1] = (
|
|
336
|
+
atom_site["Cartn_y"]
|
|
337
|
+
.as_array(np.float32)
|
|
338
|
+
.reshape((model_count, model_length))
|
|
339
|
+
)
|
|
340
|
+
atoms.coord[:, :, 2] = (
|
|
341
|
+
atom_site["Cartn_z"]
|
|
342
|
+
.as_array(np.float32)
|
|
343
|
+
.reshape((model_count, model_length))
|
|
344
|
+
)
|
|
299
345
|
|
|
300
346
|
box = _get_box(block)
|
|
301
347
|
if box is not None:
|
|
@@ -325,31 +371,25 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
325
371
|
atoms.box = _get_box(block)
|
|
326
372
|
|
|
327
373
|
# The below part is the same for both, AtomArray and AtomArrayStack
|
|
328
|
-
_fill_annotations(
|
|
329
|
-
atoms, model_atom_site, extra_fields, use_author_fields
|
|
330
|
-
)
|
|
374
|
+
_fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
|
|
331
375
|
if include_bonds:
|
|
332
376
|
if "chem_comp_bond" in block:
|
|
333
377
|
try:
|
|
334
|
-
custom_bond_dict = _parse_intra_residue_bonds(
|
|
335
|
-
block["chem_comp_bond"]
|
|
336
|
-
)
|
|
378
|
+
custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
|
|
337
379
|
except KeyError:
|
|
338
380
|
warnings.warn(
|
|
339
381
|
"The 'chem_comp_bond' category has missing columns, "
|
|
340
382
|
"falling back to using Chemical Component Dictionary",
|
|
341
|
-
UserWarning
|
|
383
|
+
UserWarning,
|
|
342
384
|
)
|
|
343
385
|
custom_bond_dict = None
|
|
344
|
-
bonds = connect_via_residue_names(
|
|
345
|
-
atoms, custom_bond_dict=custom_bond_dict
|
|
346
|
-
)
|
|
386
|
+
bonds = connect_via_residue_names(atoms, custom_bond_dict=custom_bond_dict)
|
|
347
387
|
else:
|
|
348
388
|
bonds = connect_via_residue_names(atoms)
|
|
349
389
|
if "struct_conn" in block:
|
|
350
|
-
bonds = bonds.merge(
|
|
351
|
-
model_atom_site, block["struct_conn"]
|
|
352
|
-
)
|
|
390
|
+
bonds = bonds.merge(
|
|
391
|
+
_parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
|
|
392
|
+
)
|
|
353
393
|
atoms.bonds = bonds
|
|
354
394
|
atoms = _filter_altloc(atoms, model_atom_site, altloc)
|
|
355
395
|
|
|
@@ -357,10 +397,6 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
357
397
|
|
|
358
398
|
|
|
359
399
|
def _get_block(pdbx_component, block_name):
|
|
360
|
-
if isinstance(pdbx_component, PDBxFile):
|
|
361
|
-
# The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
|
|
362
|
-
pdbx_component = pdbx_component.cif_file
|
|
363
|
-
|
|
364
400
|
if not isinstance(pdbx_component, (CIFBlock, BinaryCIFBlock)):
|
|
365
401
|
# Determine block
|
|
366
402
|
if block_name is None:
|
|
@@ -372,24 +408,24 @@ def _get_block(pdbx_component, block_name):
|
|
|
372
408
|
|
|
373
409
|
|
|
374
410
|
def _get_or_fallback(category, key, fallback_key):
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
411
|
+
"""
|
|
412
|
+
Return column related to key in category if it exists,
|
|
413
|
+
otherwise try to get the column related to fallback key.
|
|
414
|
+
"""
|
|
415
|
+
if key not in category:
|
|
416
|
+
warnings.warn(
|
|
417
|
+
f"Attribute '{key}' not found within 'atom_site' category. "
|
|
418
|
+
f"The fallback attribute '{fallback_key}' will be used instead",
|
|
419
|
+
UserWarning,
|
|
420
|
+
)
|
|
421
|
+
try:
|
|
422
|
+
return category[fallback_key]
|
|
423
|
+
except KeyError as key_exc:
|
|
424
|
+
raise InvalidFileError(
|
|
425
|
+
f"Fallback attribute '{fallback_key}' not found within "
|
|
426
|
+
"'atom_site' category"
|
|
427
|
+
) from key_exc
|
|
428
|
+
return category[key]
|
|
393
429
|
|
|
394
430
|
|
|
395
431
|
def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
@@ -408,78 +444,52 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
408
444
|
instead of ``label_``.
|
|
409
445
|
"""
|
|
410
446
|
|
|
411
|
-
prefix, alt_prefix = (
|
|
412
|
-
("auth", "label") if use_author_fields else ("label", "auth")
|
|
413
|
-
)
|
|
447
|
+
prefix, alt_prefix = ("auth", "label") if use_author_fields else ("label", "auth")
|
|
414
448
|
|
|
415
449
|
array.set_annotation(
|
|
416
450
|
"chain_id",
|
|
417
451
|
_get_or_fallback(
|
|
418
452
|
atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
|
|
419
|
-
).as_array(
|
|
453
|
+
).as_array(str),
|
|
420
454
|
)
|
|
421
455
|
array.set_annotation(
|
|
422
456
|
"res_id",
|
|
423
457
|
_get_or_fallback(
|
|
424
458
|
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
|
|
425
|
-
).as_array(int, -1)
|
|
426
|
-
)
|
|
427
|
-
array.set_annotation(
|
|
428
|
-
"ins_code",
|
|
429
|
-
atom_site["pdbx_PDB_ins_code"].as_array("U1", "")
|
|
459
|
+
).as_array(int, -1),
|
|
430
460
|
)
|
|
461
|
+
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array(str, ""))
|
|
431
462
|
array.set_annotation(
|
|
432
463
|
"res_name",
|
|
433
464
|
_get_or_fallback(
|
|
434
465
|
atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
|
|
435
|
-
).as_array(
|
|
436
|
-
)
|
|
437
|
-
array.set_annotation(
|
|
438
|
-
"hetero",
|
|
439
|
-
atom_site["group_PDB"].as_array(str) == "HETATM"
|
|
466
|
+
).as_array(str),
|
|
440
467
|
)
|
|
468
|
+
array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
|
|
441
469
|
array.set_annotation(
|
|
442
470
|
"atom_name",
|
|
443
471
|
_get_or_fallback(
|
|
444
472
|
atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
|
|
445
|
-
).as_array(
|
|
446
|
-
)
|
|
447
|
-
array.set_annotation(
|
|
448
|
-
"element",
|
|
449
|
-
atom_site["type_symbol"].as_array("U2")
|
|
473
|
+
).as_array(str),
|
|
450
474
|
)
|
|
475
|
+
array.set_annotation("element", atom_site["type_symbol"].as_array(str))
|
|
451
476
|
|
|
452
477
|
if "atom_id" in extra_fields:
|
|
453
|
-
array.set_annotation(
|
|
454
|
-
"atom_id",
|
|
455
|
-
atom_site["id"].as_array(int)
|
|
456
|
-
)
|
|
478
|
+
array.set_annotation("atom_id", atom_site["id"].as_array(int))
|
|
457
479
|
extra_fields.remove("atom_id")
|
|
458
480
|
if "b_factor" in extra_fields:
|
|
459
|
-
array.set_annotation(
|
|
460
|
-
"b_factor",
|
|
461
|
-
atom_site["B_iso_or_equiv"].as_array(float)
|
|
462
|
-
)
|
|
481
|
+
array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float))
|
|
463
482
|
extra_fields.remove("b_factor")
|
|
464
483
|
if "occupancy" in extra_fields:
|
|
465
|
-
array.set_annotation(
|
|
466
|
-
"occupancy",
|
|
467
|
-
atom_site["occupancy"].as_array(float)
|
|
468
|
-
)
|
|
484
|
+
array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
|
|
469
485
|
extra_fields.remove("occupancy")
|
|
470
486
|
if "charge" in extra_fields:
|
|
471
|
-
array.set_annotation(
|
|
472
|
-
"charge",
|
|
473
|
-
atom_site["pdbx_formal_charge"].as_array(int, 0)
|
|
474
|
-
)
|
|
487
|
+
array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0))
|
|
475
488
|
extra_fields.remove("charge")
|
|
476
489
|
|
|
477
490
|
# Handle all remaining custom fields
|
|
478
491
|
for field in extra_fields:
|
|
479
|
-
array.set_annotation(
|
|
480
|
-
field,
|
|
481
|
-
atom_site[field].as_array(str)
|
|
482
|
-
)
|
|
492
|
+
array.set_annotation(field, atom_site[field].as_array(str))
|
|
483
493
|
|
|
484
494
|
|
|
485
495
|
def _parse_intra_residue_bonds(chem_comp_bond):
|
|
@@ -493,7 +503,7 @@ def _parse_intra_residue_bonds(chem_comp_bond):
|
|
|
493
503
|
chem_comp_bond["atom_id_1"].as_array(str),
|
|
494
504
|
chem_comp_bond["atom_id_2"].as_array(str),
|
|
495
505
|
chem_comp_bond["value_order"].as_array(str),
|
|
496
|
-
chem_comp_bond["pdbx_aromatic_flag"].as_array(str)
|
|
506
|
+
chem_comp_bond["pdbx_aromatic_flag"].as_array(str),
|
|
497
507
|
):
|
|
498
508
|
if res_name not in custom_bond_dict:
|
|
499
509
|
custom_bond_dict[res_name] = {}
|
|
@@ -514,33 +524,32 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
514
524
|
IDENTITY = "1_555"
|
|
515
525
|
# Columns in 'atom_site' that should be matched by 'struct_conn'
|
|
516
526
|
COLUMNS = [
|
|
517
|
-
"label_asym_id",
|
|
518
|
-
"
|
|
519
|
-
"
|
|
527
|
+
"label_asym_id",
|
|
528
|
+
"label_comp_id",
|
|
529
|
+
"label_seq_id",
|
|
530
|
+
"label_atom_id",
|
|
531
|
+
"label_alt_id",
|
|
532
|
+
"auth_asym_id",
|
|
533
|
+
"auth_comp_id",
|
|
534
|
+
"auth_seq_id",
|
|
535
|
+
"pdbx_PDB_ins_code",
|
|
520
536
|
]
|
|
521
537
|
|
|
522
538
|
covale_mask = np.isin(
|
|
523
539
|
struct_conn["conn_type_id"].as_array(str), PDBX_COVALENT_TYPES
|
|
524
540
|
)
|
|
525
541
|
if "ptnr1_symmetry" in struct_conn:
|
|
526
|
-
covale_mask &= (
|
|
527
|
-
struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
528
|
-
)
|
|
542
|
+
covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
529
543
|
if "ptnr2_symmetry" in struct_conn:
|
|
530
|
-
covale_mask &= (
|
|
531
|
-
struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
532
|
-
)
|
|
544
|
+
covale_mask &= struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
533
545
|
|
|
534
546
|
atom_indices = [None] * 2
|
|
535
547
|
for i in range(2):
|
|
536
548
|
reference_arrays = []
|
|
537
549
|
query_arrays = []
|
|
538
550
|
for col_name in COLUMNS:
|
|
539
|
-
struct_conn_col_name = _get_struct_conn_col_name(col_name, i+1)
|
|
540
|
-
if
|
|
541
|
-
col_name not in atom_site
|
|
542
|
-
or struct_conn_col_name not in struct_conn
|
|
543
|
-
):
|
|
551
|
+
struct_conn_col_name = _get_struct_conn_col_name(col_name, i + 1)
|
|
552
|
+
if col_name not in atom_site or struct_conn_col_name not in struct_conn:
|
|
544
553
|
continue
|
|
545
554
|
# Ensure both arrays have the same dtype to allow comparison
|
|
546
555
|
reference = atom_site[col_name].as_array()
|
|
@@ -568,7 +577,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
568
577
|
atoms_indices_2 = atoms_indices_2[mapping_exists_mask]
|
|
569
578
|
|
|
570
579
|
# Interpret missing values as ANY bonds
|
|
571
|
-
bond_order = struct_conn["pdbx_value_order"].as_array(
|
|
580
|
+
bond_order = struct_conn["pdbx_value_order"].as_array(str, "")
|
|
572
581
|
# Consecutively apply the same masks as applied to the atom indices
|
|
573
582
|
# Logical combination does not work here,
|
|
574
583
|
# as the second mask was created based on already filtered data
|
|
@@ -577,7 +586,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
577
586
|
|
|
578
587
|
return BondList(
|
|
579
588
|
atom_site.row_count,
|
|
580
|
-
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1)
|
|
589
|
+
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
|
|
581
590
|
)
|
|
582
591
|
|
|
583
592
|
|
|
@@ -587,10 +596,13 @@ def _find_matches(query_arrays, reference_arrays):
|
|
|
587
596
|
`reference_arrays` where all query values the reference counterpart.
|
|
588
597
|
If no match is found for a query, the corresponding index is -1.
|
|
589
598
|
"""
|
|
590
|
-
match_masks_for_all_columns = np.stack(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
599
|
+
match_masks_for_all_columns = np.stack(
|
|
600
|
+
[
|
|
601
|
+
query[:, np.newaxis] == reference[np.newaxis, :]
|
|
602
|
+
for query, reference in zip(query_arrays, reference_arrays)
|
|
603
|
+
],
|
|
604
|
+
axis=-1,
|
|
605
|
+
)
|
|
594
606
|
match_masks = np.all(match_masks_for_all_columns, axis=-1)
|
|
595
607
|
query_matches, reference_matches = np.where(match_masks)
|
|
596
608
|
|
|
@@ -664,14 +676,8 @@ def _filter_model(atom_site, model_starts, model):
|
|
|
664
676
|
Reduce the ``atom_site`` category to the values for the given
|
|
665
677
|
model.
|
|
666
678
|
"""
|
|
667
|
-
Category = type(atom_site)
|
|
668
|
-
Column = Category.subcomponent_class()
|
|
669
|
-
Data = Column.subcomponent_class()
|
|
670
|
-
|
|
671
679
|
# Append exclusive stop
|
|
672
|
-
model_starts = np.append(
|
|
673
|
-
model_starts, [atom_site.row_count]
|
|
674
|
-
)
|
|
680
|
+
model_starts = np.append(model_starts, [atom_site.row_count])
|
|
675
681
|
# Indexing starts at 0, but model number starts at 1
|
|
676
682
|
model_index = model - 1
|
|
677
683
|
index = slice(model_starts[model_index], model_starts[model_index + 1])
|
|
@@ -757,9 +763,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
757
763
|
# Fill PDBx columns from information
|
|
758
764
|
# in structures' attribute arrays as good as possible
|
|
759
765
|
atom_site = Category()
|
|
760
|
-
atom_site["group_PDB"] = np.where(
|
|
761
|
-
array.hetero, "HETATM", "ATOM"
|
|
762
|
-
)
|
|
766
|
+
atom_site["group_PDB"] = np.where(array.hetero, "HETATM", "ATOM")
|
|
763
767
|
atom_site["type_symbol"] = np.copy(array.element)
|
|
764
768
|
atom_site["label_atom_id"] = np.copy(array.atom_name)
|
|
765
769
|
atom_site["label_alt_id"] = Column(
|
|
@@ -773,7 +777,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
773
777
|
atom_site["label_seq_id"] = np.copy(array.res_id)
|
|
774
778
|
atom_site["pdbx_PDB_ins_code"] = Column(
|
|
775
779
|
np.copy(array.ins_code),
|
|
776
|
-
np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT)
|
|
780
|
+
np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT),
|
|
777
781
|
)
|
|
778
782
|
atom_site["auth_seq_id"] = atom_site["label_seq_id"]
|
|
779
783
|
atom_site["auth_comp_id"] = atom_site["label_comp_id"]
|
|
@@ -790,11 +794,11 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
790
794
|
if "charge" in annot_categories:
|
|
791
795
|
atom_site["pdbx_formal_charge"] = Column(
|
|
792
796
|
np.array([f"{c:+d}" if c != 0 else "?" for c in array.charge]),
|
|
793
|
-
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT)
|
|
797
|
+
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
|
|
794
798
|
)
|
|
795
799
|
|
|
796
800
|
if array.bonds is not None:
|
|
797
|
-
struct_conn =
|
|
801
|
+
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
798
802
|
if struct_conn is not None:
|
|
799
803
|
block["struct_conn"] = struct_conn
|
|
800
804
|
if include_bonds:
|
|
@@ -804,24 +808,20 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
804
808
|
|
|
805
809
|
# In case of a single model handle each coordinate
|
|
806
810
|
# simply like a flattened array
|
|
807
|
-
if
|
|
808
|
-
|
|
811
|
+
if isinstance(array, AtomArray) or (
|
|
812
|
+
isinstance(array, AtomArrayStack) and array.stack_depth() == 1
|
|
809
813
|
):
|
|
810
814
|
# 'ravel' flattens coord without copy
|
|
811
815
|
# in case of stack with stack_depth = 1
|
|
812
816
|
atom_site["Cartn_x"] = np.copy(np.ravel(array.coord[..., 0]))
|
|
813
817
|
atom_site["Cartn_y"] = np.copy(np.ravel(array.coord[..., 1]))
|
|
814
818
|
atom_site["Cartn_z"] = np.copy(np.ravel(array.coord[..., 2]))
|
|
815
|
-
atom_site["pdbx_PDB_model_num"] = np.ones(
|
|
816
|
-
array.array_length(), dtype=np.int32
|
|
817
|
-
)
|
|
819
|
+
atom_site["pdbx_PDB_model_num"] = np.ones(array.array_length(), dtype=np.int32)
|
|
818
820
|
# In case of multiple models repeat annotations
|
|
819
821
|
# and use model specific coordinates
|
|
820
822
|
else:
|
|
821
823
|
atom_site = _repeat(atom_site, array.stack_depth())
|
|
822
|
-
coord = np.reshape(
|
|
823
|
-
array.coord, (array.stack_depth() * array.array_length(), 3)
|
|
824
|
-
)
|
|
824
|
+
coord = np.reshape(array.coord, (array.stack_depth() * array.array_length(), 3))
|
|
825
825
|
atom_site["Cartn_x"] = np.copy(coord[:, 0])
|
|
826
826
|
atom_site["Cartn_y"] = np.copy(coord[:, 1])
|
|
827
827
|
atom_site["Cartn_z"] = np.copy(coord[:, 2])
|
|
@@ -829,11 +829,9 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
829
829
|
np.arange(1, array.stack_depth() + 1, dtype=np.int32),
|
|
830
830
|
repeats=array.array_length(),
|
|
831
831
|
)
|
|
832
|
-
if
|
|
832
|
+
if "atom_id" not in annot_categories:
|
|
833
833
|
# Count from 1
|
|
834
|
-
atom_site["id"] = np.arange(
|
|
835
|
-
1, len(atom_site["group_PDB"]) + 1
|
|
836
|
-
)
|
|
834
|
+
atom_site["id"] = np.arange(1, len(atom_site["group_PDB"]) + 1)
|
|
837
835
|
block["atom_site"] = atom_site
|
|
838
836
|
|
|
839
837
|
# Write box into file
|
|
@@ -870,10 +868,6 @@ def _check_non_empty(array):
|
|
|
870
868
|
|
|
871
869
|
|
|
872
870
|
def _get_or_create_block(pdbx_component, block_name):
|
|
873
|
-
if isinstance(pdbx_component, PDBxFile):
|
|
874
|
-
# The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
|
|
875
|
-
pdbx_component = pdbx_component.cif_file
|
|
876
|
-
|
|
877
871
|
Block = pdbx_component.subcomponent_class()
|
|
878
872
|
|
|
879
873
|
if isinstance(pdbx_component, (CIFFile, BinaryCIFFile)):
|
|
@@ -901,7 +895,7 @@ def _determine_entity_id(chain_id):
|
|
|
901
895
|
for i in range(len(chain_id)):
|
|
902
896
|
try:
|
|
903
897
|
entity_id[i] = id_translation[chain_id[i]]
|
|
904
|
-
except:
|
|
898
|
+
except KeyError:
|
|
905
899
|
# chain_id is not in dictionary -> new entry
|
|
906
900
|
id_translation[chain_id[i]] = id
|
|
907
901
|
entity_id[i] = id_translation[chain_id[i]]
|
|
@@ -926,8 +920,11 @@ def _repeat(category, repetitions):
|
|
|
926
920
|
data = Data(np.tile(column.data.array, repetitions), data_encoding)
|
|
927
921
|
else:
|
|
928
922
|
data = Data(np.tile(column.data.array, repetitions))
|
|
929
|
-
mask =
|
|
930
|
-
|
|
923
|
+
mask = (
|
|
924
|
+
Data(np.tile(column.mask.array, repetitions))
|
|
925
|
+
if column.mask is not None
|
|
926
|
+
else None
|
|
927
|
+
)
|
|
931
928
|
category_dict[key] = Column(data, mask)
|
|
932
929
|
return Category(category_dict)
|
|
933
930
|
|
|
@@ -967,28 +964,37 @@ def _set_intra_residue_bonds(array, atom_site):
|
|
|
967
964
|
aromatic_flag[i] = aromatic
|
|
968
965
|
any_mask = bond_array[:, 2] == BondType.ANY
|
|
969
966
|
|
|
970
|
-
|
|
967
|
+
# Remove already existing residue and atom name combinations
|
|
968
|
+
# These appear when the structure contains a residue multiple times
|
|
969
|
+
atom_id_1 = array.atom_name[bond_array[:, 0]]
|
|
970
|
+
atom_id_2 = array.atom_name[bond_array[:, 1]]
|
|
971
971
|
# Take the residue name from the first atom index, as the residue
|
|
972
972
|
# name is the same for both atoms, since we have only intra bonds
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
973
|
+
comp_id = array.res_name[bond_array[:, 0]]
|
|
974
|
+
_, unique_indices = np.unique(
|
|
975
|
+
np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
|
|
976
|
+
)
|
|
977
|
+
unique_indices.sort()
|
|
978
|
+
|
|
979
|
+
chem_comp_bond = Category()
|
|
980
|
+
n_bonds = len(unique_indices)
|
|
981
|
+
chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
|
|
982
|
+
chem_comp_bond["comp_id"] = comp_id[unique_indices]
|
|
983
|
+
chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
|
|
984
|
+
chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
|
|
976
985
|
chem_comp_bond["value_order"] = Column(
|
|
977
|
-
value_order,
|
|
978
|
-
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
986
|
+
value_order[unique_indices],
|
|
987
|
+
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
|
|
979
988
|
)
|
|
980
989
|
chem_comp_bond["pdbx_aromatic_flag"] = Column(
|
|
981
|
-
aromatic_flag,
|
|
982
|
-
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
990
|
+
aromatic_flag[unique_indices],
|
|
991
|
+
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
|
|
983
992
|
)
|
|
984
993
|
# BondList does not contain stereo information
|
|
985
994
|
# -> all values are missing
|
|
986
995
|
chem_comp_bond["pdbx_stereo_config"] = Column(
|
|
987
|
-
np.zeros(
|
|
988
|
-
np.full(
|
|
989
|
-
)
|
|
990
|
-
chem_comp_bond["pdbx_ordinal"] = np.arange(
|
|
991
|
-
1, len(bond_array) + 1, dtype=np.int32
|
|
996
|
+
np.zeros(n_bonds, dtype="U1"),
|
|
997
|
+
np.full(n_bonds, MaskValue.MISSING),
|
|
992
998
|
)
|
|
993
999
|
return chem_comp_bond
|
|
994
1000
|
|
|
@@ -1001,8 +1007,11 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1001
1007
|
``atom_site`` category.
|
|
1002
1008
|
"""
|
|
1003
1009
|
COLUMNS = [
|
|
1004
|
-
"label_asym_id",
|
|
1005
|
-
"
|
|
1010
|
+
"label_asym_id",
|
|
1011
|
+
"label_comp_id",
|
|
1012
|
+
"label_seq_id",
|
|
1013
|
+
"label_atom_id",
|
|
1014
|
+
"pdbx_PDB_ins_code",
|
|
1006
1015
|
]
|
|
1007
1016
|
|
|
1008
1017
|
Category = type(atom_site)
|
|
@@ -1011,17 +1020,17 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1011
1020
|
bond_array = _filter_bonds(array, "inter")
|
|
1012
1021
|
if len(bond_array) == 0:
|
|
1013
1022
|
return None
|
|
1023
|
+
|
|
1014
1024
|
struct_conn = Category()
|
|
1015
1025
|
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
|
|
1016
1026
|
struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
|
|
1017
1027
|
struct_conn["pdbx_value_order"] = Column(
|
|
1018
|
-
np.array(
|
|
1019
|
-
[PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]
|
|
1020
|
-
),
|
|
1028
|
+
np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
|
|
1021
1029
|
np.where(
|
|
1022
1030
|
bond_array[:, 2] == BondType.ANY,
|
|
1023
|
-
MaskValue.MISSING,
|
|
1024
|
-
|
|
1031
|
+
MaskValue.MISSING,
|
|
1032
|
+
MaskValue.PRESENT,
|
|
1033
|
+
),
|
|
1025
1034
|
)
|
|
1026
1035
|
# Write the identifying annotation...
|
|
1027
1036
|
for col_name in COLUMNS:
|
|
@@ -1029,8 +1038,9 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1029
1038
|
# ...for each bond partner
|
|
1030
1039
|
for i in range(2):
|
|
1031
1040
|
atom_indices = bond_array[:, i]
|
|
1032
|
-
struct_conn[_get_struct_conn_col_name(col_name, i+1)]
|
|
1033
|
-
|
|
1041
|
+
struct_conn[_get_struct_conn_col_name(col_name, i + 1)] = annot[
|
|
1042
|
+
atom_indices
|
|
1043
|
+
]
|
|
1034
1044
|
return struct_conn
|
|
1035
1045
|
|
|
1036
1046
|
|
|
@@ -1042,9 +1052,9 @@ def _filter_bonds(array, connection):
|
|
|
1042
1052
|
bond_array = array.bonds.as_array()
|
|
1043
1053
|
# To save computation time call 'get_residue_starts_for()' only once
|
|
1044
1054
|
# with indices of the first and second atom of each bond
|
|
1045
|
-
residue_starts_1, residue_starts_2 =
|
|
1046
|
-
array, bond_array[:, :2].flatten()
|
|
1047
|
-
)
|
|
1055
|
+
residue_starts_1, residue_starts_2 = (
|
|
1056
|
+
get_residue_starts_for(array, bond_array[:, :2].flatten()).reshape(-1, 2).T
|
|
1057
|
+
)
|
|
1048
1058
|
if connection == "intra":
|
|
1049
1059
|
return bond_array[residue_starts_1 == residue_starts_2]
|
|
1050
1060
|
elif connection == "inter":
|
|
@@ -1053,12 +1063,11 @@ def _filter_bonds(array, connection):
|
|
|
1053
1063
|
raise ValueError("Invalid 'connection' option")
|
|
1054
1064
|
|
|
1055
1065
|
|
|
1056
|
-
def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
1057
|
-
res_name=None):
|
|
1066
|
+
def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
|
|
1058
1067
|
"""
|
|
1059
1068
|
Create an :class:`AtomArray` for a chemical component from the
|
|
1060
1069
|
``chem_comp_atom`` and, if available, the ``chem_comp_bond``
|
|
1061
|
-
category in a
|
|
1070
|
+
category in a file.
|
|
1062
1071
|
|
|
1063
1072
|
Parameters
|
|
1064
1073
|
----------
|
|
@@ -1140,12 +1149,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1140
1149
|
|
|
1141
1150
|
array = AtomArray(atom_category.row_count)
|
|
1142
1151
|
|
|
1143
|
-
array.hetero[
|
|
1144
|
-
array.res_name
|
|
1145
|
-
array.atom_name
|
|
1146
|
-
array.element
|
|
1147
|
-
array.
|
|
1148
|
-
array.charge = atom_category["charge"].as_array(int, 0)
|
|
1152
|
+
array.set_annotation("hetero", np.full(len(atom_category["comp_id"]), True))
|
|
1153
|
+
array.set_annotation("res_name", atom_category["comp_id"].as_array(str))
|
|
1154
|
+
array.set_annotation("atom_name", atom_category["atom_id"].as_array(str))
|
|
1155
|
+
array.set_annotation("element", atom_category["type_symbol"].as_array(str))
|
|
1156
|
+
array.set_annotation("charge", atom_category["charge"].as_array(int, 0))
|
|
1149
1157
|
|
|
1150
1158
|
coord_fields = [f"pdbx_model_Cartn_{dim}_ideal" for dim in ("x", "y", "z")]
|
|
1151
1159
|
alt_coord_fields = [f"model_Cartn_{dim}" for dim in ("x", "y", "z")]
|
|
@@ -1154,16 +1162,16 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1154
1162
|
coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
|
|
1155
1163
|
try:
|
|
1156
1164
|
for i, field in enumerate(coord_fields):
|
|
1157
|
-
array.coord[:,i] = atom_category[field].as_array(np.float32)
|
|
1165
|
+
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1158
1166
|
except KeyError as err:
|
|
1159
1167
|
key = err.args[0]
|
|
1160
1168
|
warnings.warn(
|
|
1161
1169
|
f"Attribute '{key}' not found within 'chem_comp_atom' category. "
|
|
1162
1170
|
f"The fallback coordinates will be used instead",
|
|
1163
|
-
UserWarning
|
|
1171
|
+
UserWarning,
|
|
1164
1172
|
)
|
|
1165
1173
|
for i, field in enumerate(alt_coord_fields):
|
|
1166
|
-
array.coord[:,i] = atom_category[field].as_array(np.float32)
|
|
1174
|
+
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1167
1175
|
|
|
1168
1176
|
try:
|
|
1169
1177
|
bond_category = block["chem_comp_bond"]
|
|
@@ -1173,9 +1181,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1173
1181
|
)
|
|
1174
1182
|
except KeyError:
|
|
1175
1183
|
warnings.warn(
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
UserWarning
|
|
1184
|
+
"Category 'chem_comp_bond' not found. " "No bonds will be parsed",
|
|
1185
|
+
UserWarning,
|
|
1179
1186
|
)
|
|
1180
1187
|
else:
|
|
1181
1188
|
bonds = BondList(array.array_length())
|
|
@@ -1183,7 +1190,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1183
1190
|
bond_category["atom_id_1"].as_array(str),
|
|
1184
1191
|
bond_category["atom_id_2"].as_array(str),
|
|
1185
1192
|
bond_category["value_order"].as_array(str),
|
|
1186
|
-
bond_category["pdbx_aromatic_flag"].as_array(str)
|
|
1193
|
+
bond_category["pdbx_aromatic_flag"].as_array(str),
|
|
1187
1194
|
):
|
|
1188
1195
|
atom_i = np.where(array.atom_name == atom1)[0][0]
|
|
1189
1196
|
atom_j = np.where(array.atom_name == atom2)[0][0]
|
|
@@ -1225,9 +1232,7 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1225
1232
|
Category = block.subcomponent_class()
|
|
1226
1233
|
|
|
1227
1234
|
if get_residue_count(array) > 1:
|
|
1228
|
-
raise BadStructureError(
|
|
1229
|
-
"The input atom array must comprise only one residue"
|
|
1230
|
-
)
|
|
1235
|
+
raise BadStructureError("The input atom array must comprise only one residue")
|
|
1231
1236
|
res_name = array.res_name[0]
|
|
1232
1237
|
|
|
1233
1238
|
annot_categories = array.get_annotation_categories()
|
|
@@ -1250,31 +1255,28 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1250
1255
|
atom_cat["pdbx_model_Cartn_z_ideal"] = atom_cat["model_Cartn_z"]
|
|
1251
1256
|
atom_cat["pdbx_component_atom_id"] = atom_cat["atom_id"]
|
|
1252
1257
|
atom_cat["pdbx_component_comp_id"] = atom_cat["comp_id"]
|
|
1253
|
-
atom_cat["pdbx_ordinal"] = np.arange(
|
|
1254
|
-
1, array.array_length() + 1
|
|
1255
|
-
).astype(str)
|
|
1258
|
+
atom_cat["pdbx_ordinal"] = np.arange(1, array.array_length() + 1).astype(str)
|
|
1256
1259
|
block["chem_comp_atom"] = atom_cat
|
|
1257
1260
|
|
|
1258
1261
|
if array.bonds is not None and array.bonds.get_bond_count() > 0:
|
|
1259
1262
|
bond_array = array.bonds.as_array()
|
|
1260
1263
|
order_flags = []
|
|
1261
1264
|
aromatic_flags = []
|
|
1262
|
-
for bond_type in bond_array[:,2]:
|
|
1265
|
+
for bond_type in bond_array[:, 2]:
|
|
1263
1266
|
order_flag, aromatic_flag = COMP_BOND_TYPE_TO_ORDER[bond_type]
|
|
1264
1267
|
order_flags.append(order_flag)
|
|
1265
1268
|
aromatic_flags.append(aromatic_flag)
|
|
1266
1269
|
|
|
1267
1270
|
bond_cat = Category()
|
|
1268
1271
|
bond_cat["comp_id"] = np.full(len(bond_array), res_name)
|
|
1269
|
-
bond_cat["atom_id_1"] = array.atom_name[bond_array[:,0]]
|
|
1270
|
-
bond_cat["atom_id_2"] = array.atom_name[bond_array[:,1]]
|
|
1272
|
+
bond_cat["atom_id_1"] = array.atom_name[bond_array[:, 0]]
|
|
1273
|
+
bond_cat["atom_id_2"] = array.atom_name[bond_array[:, 1]]
|
|
1271
1274
|
bond_cat["value_order"] = np.array(order_flags)
|
|
1272
1275
|
bond_cat["pdbx_aromatic_flag"] = np.array(aromatic_flags)
|
|
1273
|
-
bond_cat["pdbx_ordinal"] = np.arange(
|
|
1274
|
-
1, len(bond_array) + 1
|
|
1275
|
-
).astype(str)
|
|
1276
|
+
bond_cat["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1).astype(str)
|
|
1276
1277
|
block["chem_comp_bond"] = bond_cat
|
|
1277
1278
|
|
|
1279
|
+
|
|
1278
1280
|
def list_assemblies(pdbx_file, data_block=None):
|
|
1279
1281
|
"""
|
|
1280
1282
|
List the biological assemblies that are available for the structure
|
|
@@ -1325,14 +1327,21 @@ def list_assemblies(pdbx_file, data_block=None):
|
|
|
1325
1327
|
id: details
|
|
1326
1328
|
for id, details in zip(
|
|
1327
1329
|
assembly_category["id"].as_array(str),
|
|
1328
|
-
assembly_category["details"].as_array(str)
|
|
1330
|
+
assembly_category["details"].as_array(str),
|
|
1329
1331
|
)
|
|
1330
1332
|
}
|
|
1331
1333
|
|
|
1332
1334
|
|
|
1333
|
-
def get_assembly(
|
|
1334
|
-
|
|
1335
|
-
|
|
1335
|
+
def get_assembly(
|
|
1336
|
+
pdbx_file,
|
|
1337
|
+
assembly_id=None,
|
|
1338
|
+
model=None,
|
|
1339
|
+
data_block=None,
|
|
1340
|
+
altloc="first",
|
|
1341
|
+
extra_fields=None,
|
|
1342
|
+
use_author_fields=True,
|
|
1343
|
+
include_bonds=False,
|
|
1344
|
+
):
|
|
1336
1345
|
"""
|
|
1337
1346
|
Build the given biological assembly.
|
|
1338
1347
|
|
|
@@ -1389,7 +1398,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1389
1398
|
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
1390
1399
|
the ID of the residue.
|
|
1391
1400
|
While, the ``label_xxx`` fields can be used as official pointers
|
|
1392
|
-
to other categories in the
|
|
1401
|
+
to other categories in the file, the ``auth_xxx``
|
|
1393
1402
|
fields are set by the author(s) of the structure and are
|
|
1394
1403
|
consistent with the corresponding values in PDB files.
|
|
1395
1404
|
If `use_author_fields` is true, the annotation arrays will be
|
|
@@ -1422,9 +1431,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1422
1431
|
try:
|
|
1423
1432
|
assembly_gen_category = block["pdbx_struct_assembly_gen"]
|
|
1424
1433
|
except KeyError:
|
|
1425
|
-
raise InvalidFileError(
|
|
1426
|
-
"File has no 'pdbx_struct_assembly_gen' category"
|
|
1427
|
-
)
|
|
1434
|
+
raise InvalidFileError("File has no 'pdbx_struct_assembly_gen' category")
|
|
1428
1435
|
|
|
1429
1436
|
try:
|
|
1430
1437
|
struct_oper_category = block["pdbx_struct_oper_list"]
|
|
@@ -1457,7 +1464,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1457
1464
|
altloc,
|
|
1458
1465
|
extra_fields_and_asym,
|
|
1459
1466
|
use_author_fields,
|
|
1460
|
-
include_bonds
|
|
1467
|
+
include_bonds,
|
|
1461
1468
|
)
|
|
1462
1469
|
|
|
1463
1470
|
### Get transformations and apply them to the affected asym IDs
|
|
@@ -1473,9 +1480,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1473
1480
|
operations = _parse_operation_expression(op_expr)
|
|
1474
1481
|
asym_ids = asym_id_expr.split(",")
|
|
1475
1482
|
# Filter affected asym IDs
|
|
1476
|
-
sub_structure = structure[
|
|
1477
|
-
..., np.isin(structure.label_asym_id, asym_ids)
|
|
1478
|
-
]
|
|
1483
|
+
sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
|
|
1479
1484
|
sub_assembly = _apply_transformations(
|
|
1480
1485
|
sub_structure, transformations, operations
|
|
1481
1486
|
)
|
|
@@ -1534,10 +1539,9 @@ def _get_transformations(struct_oper):
|
|
|
1534
1539
|
for i in (1, 2, 3)
|
|
1535
1540
|
]
|
|
1536
1541
|
)
|
|
1537
|
-
translation_vector = np.array(
|
|
1538
|
-
struct_oper[f"vector[{i}]"].as_array(float)[index]
|
|
1539
|
-
|
|
1540
|
-
])
|
|
1542
|
+
translation_vector = np.array(
|
|
1543
|
+
[struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
|
|
1544
|
+
)
|
|
1541
1545
|
transformation_dict[id] = (rotation_matrix, translation_vector)
|
|
1542
1546
|
return transformation_dict
|
|
1543
1547
|
|
|
@@ -1592,6 +1596,4 @@ def _convert_string_to_sequence(string, stype):
|
|
|
1592
1596
|
elif stype in _other_type_list:
|
|
1593
1597
|
return None
|
|
1594
1598
|
else:
|
|
1595
|
-
raise InvalidFileError(
|
|
1596
|
-
"mmCIF _entity_poly.type unsupported" " type: " + stype
|
|
1597
|
-
)
|
|
1599
|
+
raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)
|