biotite 0.41.2__cp312-cp312-macosx_11_0_arm64.whl → 1.0.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/__init__.py +2 -3
- biotite/application/__init__.py +1 -1
- biotite/application/application.py +20 -10
- biotite/application/autodock/__init__.py +1 -1
- biotite/application/autodock/app.py +74 -79
- biotite/application/blast/__init__.py +1 -1
- biotite/application/blast/alignment.py +19 -10
- biotite/application/blast/webapp.py +92 -85
- biotite/application/clustalo/__init__.py +1 -1
- biotite/application/clustalo/app.py +46 -61
- biotite/application/dssp/__init__.py +1 -1
- biotite/application/dssp/app.py +8 -11
- biotite/application/localapp.py +62 -60
- biotite/application/mafft/__init__.py +1 -1
- biotite/application/mafft/app.py +16 -22
- biotite/application/msaapp.py +78 -89
- biotite/application/muscle/__init__.py +1 -1
- biotite/application/muscle/app3.py +50 -64
- biotite/application/muscle/app5.py +23 -31
- biotite/application/sra/__init__.py +1 -1
- biotite/application/sra/app.py +64 -68
- biotite/application/tantan/__init__.py +1 -1
- biotite/application/tantan/app.py +22 -45
- biotite/application/util.py +7 -9
- biotite/application/viennarna/rnaalifold.py +34 -28
- biotite/application/viennarna/rnafold.py +24 -39
- biotite/application/viennarna/rnaplot.py +36 -21
- biotite/application/viennarna/util.py +17 -12
- biotite/application/webapp.py +13 -14
- biotite/copyable.py +13 -13
- biotite/database/__init__.py +1 -1
- biotite/database/entrez/__init__.py +1 -1
- biotite/database/entrez/check.py +2 -3
- biotite/database/entrez/dbnames.py +7 -5
- biotite/database/entrez/download.py +55 -49
- biotite/database/entrez/key.py +1 -1
- biotite/database/entrez/query.py +62 -23
- biotite/database/error.py +2 -1
- biotite/database/pubchem/__init__.py +1 -1
- biotite/database/pubchem/download.py +43 -45
- biotite/database/pubchem/error.py +2 -2
- biotite/database/pubchem/query.py +34 -31
- biotite/database/pubchem/throttle.py +3 -4
- biotite/database/rcsb/__init__.py +1 -1
- biotite/database/rcsb/download.py +44 -52
- biotite/database/rcsb/query.py +85 -80
- biotite/database/uniprot/check.py +6 -3
- biotite/database/uniprot/download.py +6 -11
- biotite/database/uniprot/query.py +115 -31
- biotite/file.py +12 -31
- biotite/sequence/__init__.py +3 -3
- biotite/sequence/align/__init__.py +2 -2
- biotite/sequence/align/alignment.py +99 -90
- biotite/sequence/align/banded.cpython-312-darwin.so +0 -0
- biotite/sequence/align/buckets.py +12 -10
- biotite/sequence/align/cigar.py +43 -52
- biotite/sequence/align/kmeralphabet.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmeralphabet.pyx +55 -51
- biotite/sequence/align/kmersimilarity.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.cpython-312-darwin.so +0 -0
- biotite/sequence/align/kmertable.pyx +3 -2
- biotite/sequence/align/localgapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/localungapped.cpython-312-darwin.so +0 -0
- biotite/sequence/align/matrix.py +81 -82
- biotite/sequence/align/multiple.cpython-312-darwin.so +0 -0
- biotite/sequence/align/multiple.pyx +1 -1
- biotite/sequence/align/pairwise.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.cpython-312-darwin.so +0 -0
- biotite/sequence/align/permutation.pyx +12 -4
- biotite/sequence/align/selector.cpython-312-darwin.so +0 -0
- biotite/sequence/align/selector.pyx +52 -54
- biotite/sequence/align/statistics.py +32 -33
- biotite/sequence/align/tracetable.cpython-312-darwin.so +0 -0
- biotite/sequence/alphabet.py +51 -65
- biotite/sequence/annotation.py +78 -77
- biotite/sequence/codec.cpython-312-darwin.so +0 -0
- biotite/sequence/codon.py +90 -79
- biotite/sequence/graphics/__init__.py +1 -1
- biotite/sequence/graphics/alignment.py +184 -103
- biotite/sequence/graphics/colorschemes.py +10 -12
- biotite/sequence/graphics/dendrogram.py +79 -34
- biotite/sequence/graphics/features.py +133 -99
- biotite/sequence/graphics/logo.py +22 -28
- biotite/sequence/graphics/plasmid.py +229 -178
- biotite/sequence/io/fasta/__init__.py +1 -1
- biotite/sequence/io/fasta/convert.py +44 -33
- biotite/sequence/io/fasta/file.py +42 -55
- biotite/sequence/io/fastq/__init__.py +1 -1
- biotite/sequence/io/fastq/convert.py +11 -14
- biotite/sequence/io/fastq/file.py +68 -112
- biotite/sequence/io/genbank/__init__.py +2 -2
- biotite/sequence/io/genbank/annotation.py +12 -20
- biotite/sequence/io/genbank/file.py +74 -76
- biotite/sequence/io/genbank/metadata.py +74 -62
- biotite/sequence/io/genbank/sequence.py +13 -14
- biotite/sequence/io/general.py +39 -30
- biotite/sequence/io/gff/__init__.py +2 -2
- biotite/sequence/io/gff/convert.py +10 -15
- biotite/sequence/io/gff/file.py +81 -65
- biotite/sequence/phylo/__init__.py +1 -1
- biotite/sequence/phylo/nj.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/tree.cpython-312-darwin.so +0 -0
- biotite/sequence/phylo/upgma.cpython-312-darwin.so +0 -0
- biotite/sequence/profile.py +57 -28
- biotite/sequence/search.py +17 -15
- biotite/sequence/seqtypes.py +200 -164
- biotite/sequence/sequence.py +15 -17
- biotite/structure/__init__.py +3 -3
- biotite/structure/atoms.py +221 -235
- biotite/structure/basepairs.py +260 -271
- biotite/structure/bonds.cpython-312-darwin.so +0 -0
- biotite/structure/bonds.pyx +29 -32
- biotite/structure/box.py +67 -71
- biotite/structure/celllist.cpython-312-darwin.so +0 -0
- biotite/structure/chains.py +55 -39
- biotite/structure/charges.cpython-312-darwin.so +0 -0
- biotite/structure/compare.py +32 -32
- biotite/structure/density.py +13 -18
- biotite/structure/dotbracket.py +20 -22
- biotite/structure/error.py +10 -2
- biotite/structure/filter.py +82 -77
- biotite/structure/geometry.py +130 -119
- biotite/structure/graphics/atoms.py +60 -43
- biotite/structure/graphics/rna.py +81 -68
- biotite/structure/hbond.py +112 -93
- biotite/structure/info/__init__.py +0 -2
- biotite/structure/info/atoms.py +10 -11
- biotite/structure/info/bonds.py +41 -43
- biotite/structure/info/ccd.py +4 -5
- biotite/structure/info/groups.py +1 -3
- biotite/structure/info/masses.py +5 -10
- biotite/structure/info/misc.py +1 -1
- biotite/structure/info/radii.py +20 -20
- biotite/structure/info/standardize.py +15 -26
- biotite/structure/integrity.py +18 -71
- biotite/structure/io/__init__.py +3 -4
- biotite/structure/io/dcd/__init__.py +1 -1
- biotite/structure/io/dcd/file.py +22 -20
- biotite/structure/io/general.py +47 -61
- biotite/structure/io/gro/__init__.py +1 -1
- biotite/structure/io/gro/file.py +73 -72
- biotite/structure/io/mol/__init__.py +1 -1
- biotite/structure/io/mol/convert.py +8 -11
- biotite/structure/io/mol/ctab.py +37 -36
- biotite/structure/io/mol/header.py +14 -10
- biotite/structure/io/mol/mol.py +9 -53
- biotite/structure/io/mol/sdf.py +47 -50
- biotite/structure/io/netcdf/__init__.py +1 -1
- biotite/structure/io/netcdf/file.py +24 -23
- biotite/structure/io/pdb/__init__.py +1 -1
- biotite/structure/io/pdb/convert.py +32 -20
- biotite/structure/io/pdb/file.py +151 -172
- biotite/structure/io/pdb/hybrid36.cpython-312-darwin.so +0 -0
- biotite/structure/io/pdbqt/__init__.py +1 -1
- biotite/structure/io/pdbqt/convert.py +17 -11
- biotite/structure/io/pdbqt/file.py +128 -80
- biotite/structure/io/pdbx/__init__.py +1 -2
- biotite/structure/io/pdbx/bcif.py +36 -44
- biotite/structure/io/pdbx/cif.py +64 -62
- biotite/structure/io/pdbx/component.py +10 -16
- biotite/structure/io/pdbx/convert.py +235 -246
- biotite/structure/io/pdbx/encoding.cpython-312-darwin.so +0 -0
- biotite/structure/io/trajfile.py +76 -93
- biotite/structure/io/trr/__init__.py +1 -1
- biotite/structure/io/trr/file.py +12 -15
- biotite/structure/io/xtc/__init__.py +1 -1
- biotite/structure/io/xtc/file.py +11 -14
- biotite/structure/mechanics.py +9 -11
- biotite/structure/molecules.py +3 -4
- biotite/structure/pseudoknots.py +53 -67
- biotite/structure/rdf.py +23 -21
- biotite/structure/repair.py +137 -86
- biotite/structure/residues.py +26 -16
- biotite/structure/sasa.cpython-312-darwin.so +0 -0
- biotite/structure/{resutil.py → segments.py} +24 -23
- biotite/structure/sequence.py +10 -11
- biotite/structure/sse.py +100 -119
- biotite/structure/superimpose.py +39 -77
- biotite/structure/transform.py +97 -71
- biotite/structure/util.py +11 -13
- biotite/version.py +2 -2
- biotite/visualize.py +69 -55
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/METADATA +5 -5
- biotite-1.0.0.dist-info/RECORD +322 -0
- biotite/structure/io/ctab.py +0 -72
- biotite/structure/io/mmtf/__init__.py +0 -21
- biotite/structure/io/mmtf/assembly.py +0 -214
- biotite/structure/io/mmtf/convertarray.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/convertarray.pyx +0 -341
- biotite/structure/io/mmtf/convertfile.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/convertfile.pyx +0 -501
- biotite/structure/io/mmtf/decode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/decode.pyx +0 -152
- biotite/structure/io/mmtf/encode.cpython-312-darwin.so +0 -0
- biotite/structure/io/mmtf/encode.pyx +0 -183
- biotite/structure/io/mmtf/file.py +0 -233
- biotite/structure/io/npz/__init__.py +0 -20
- biotite/structure/io/npz/file.py +0 -152
- biotite/structure/io/pdbx/legacy.py +0 -267
- biotite/structure/io/tng/__init__.py +0 -13
- biotite/structure/io/tng/file.py +0 -46
- biotite/temp.py +0 -86
- biotite-0.41.2.dist-info/RECORD +0 -340
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/WHEEL +0 -0
- {biotite-0.41.2.dist-info → biotite-1.0.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -18,30 +18,41 @@ __all__ = [
|
|
|
18
18
|
import itertools
|
|
19
19
|
import warnings
|
|
20
20
|
import numpy as np
|
|
21
|
-
from
|
|
22
|
-
from
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
from
|
|
26
|
-
from
|
|
27
|
-
from
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
from .
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
21
|
+
from biotite.file import InvalidFileError
|
|
22
|
+
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
23
|
+
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
|
|
24
|
+
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
25
|
+
from biotite.structure.box import unitcell_from_vectors, vectors_from_unitcell
|
|
26
|
+
from biotite.structure.error import BadStructureError
|
|
27
|
+
from biotite.structure.filter import (
|
|
28
|
+
filter_first_altloc,
|
|
29
|
+
filter_highest_occupancy_altloc,
|
|
30
|
+
)
|
|
31
|
+
from biotite.structure.io.pdbx.bcif import (
|
|
32
|
+
BinaryCIFBlock,
|
|
33
|
+
BinaryCIFColumn,
|
|
34
|
+
BinaryCIFFile,
|
|
35
|
+
)
|
|
36
|
+
from biotite.structure.io.pdbx.cif import CIFBlock, CIFFile
|
|
37
|
+
from biotite.structure.io.pdbx.component import MaskValue
|
|
38
|
+
from biotite.structure.io.pdbx.encoding import StringArrayEncoding
|
|
39
|
+
from biotite.structure.residues import get_residue_count, get_residue_starts_for
|
|
40
|
+
from biotite.structure.util import matrix_rotate
|
|
36
41
|
|
|
37
42
|
# Cond types in `struct_conn` category that refer to covalent bonds
|
|
38
43
|
PDBX_COVALENT_TYPES = [
|
|
39
|
-
"covale",
|
|
40
|
-
"
|
|
44
|
+
"covale",
|
|
45
|
+
"covale_base",
|
|
46
|
+
"covale_phosphate",
|
|
47
|
+
"covale_sugar",
|
|
48
|
+
"disulf",
|
|
49
|
+
"modres",
|
|
50
|
+
"modres_link",
|
|
51
|
+
"metalc",
|
|
41
52
|
]
|
|
42
53
|
# Map 'struct_conn' bond orders to 'BondType'...
|
|
43
54
|
PDBX_BOND_ORDER_TO_TYPE = {
|
|
44
|
-
"":
|
|
55
|
+
"": BondType.ANY,
|
|
45
56
|
"sing": BondType.SINGLE,
|
|
46
57
|
"doub": BondType.DOUBLE,
|
|
47
58
|
"trip": BondType.TRIPLE,
|
|
@@ -61,13 +72,13 @@ PDBX_BOND_TYPE_TO_ORDER = {
|
|
|
61
72
|
}
|
|
62
73
|
# Map 'chem_comp_bond' bond orders and aromaticity to 'BondType'...
|
|
63
74
|
COMP_BOND_ORDER_TO_TYPE = {
|
|
64
|
-
("SING", "N")
|
|
65
|
-
("DOUB", "N")
|
|
66
|
-
("TRIP", "N")
|
|
67
|
-
("QUAD", "N")
|
|
68
|
-
("SING", "Y")
|
|
69
|
-
("DOUB", "Y")
|
|
70
|
-
("TRIP", "Y")
|
|
75
|
+
("SING", "N"): BondType.SINGLE,
|
|
76
|
+
("DOUB", "N"): BondType.DOUBLE,
|
|
77
|
+
("TRIP", "N"): BondType.TRIPLE,
|
|
78
|
+
("QUAD", "N"): BondType.QUADRUPLE,
|
|
79
|
+
("SING", "Y"): BondType.AROMATIC_SINGLE,
|
|
80
|
+
("DOUB", "Y"): BondType.AROMATIC_DOUBLE,
|
|
81
|
+
("TRIP", "Y"): BondType.AROMATIC_TRIPLE,
|
|
71
82
|
}
|
|
72
83
|
# ...and vice versa
|
|
73
84
|
COMP_BOND_TYPE_TO_ORDER = {
|
|
@@ -98,16 +109,15 @@ def _filter(category, index):
|
|
|
98
109
|
Column = Category.subcomponent_class()
|
|
99
110
|
Data = Column.subcomponent_class()
|
|
100
111
|
|
|
101
|
-
return Category(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
Data(column.mask.array[index])
|
|
106
|
-
if column.mask is not None else None
|
|
112
|
+
return Category(
|
|
113
|
+
{
|
|
114
|
+
key: Column(
|
|
115
|
+
Data(column.data.array[index]),
|
|
116
|
+
(Data(column.mask.array[index]) if column.mask is not None else None),
|
|
107
117
|
)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
118
|
+
for key, column in category.items()
|
|
119
|
+
}
|
|
120
|
+
)
|
|
111
121
|
|
|
112
122
|
|
|
113
123
|
def get_sequence(pdbx_file, data_block=None):
|
|
@@ -134,26 +144,47 @@ def get_sequence(pdbx_file, data_block=None):
|
|
|
134
144
|
|
|
135
145
|
Returns
|
|
136
146
|
-------
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
(equivalent to
|
|
147
|
+
sequence_dict : Dictionary of Sequences
|
|
148
|
+
Dictionary keys are derived from ``entity_poly.pdbx_strand_id``
|
|
149
|
+
(often equivalent to chain_id and atom_site.auth_asym_id
|
|
150
|
+
in most cases). Dictionary values are sequences.
|
|
151
|
+
|
|
152
|
+
Notes
|
|
153
|
+
-----
|
|
154
|
+
The ``entity_poly.pdbx_seq_one_letter_code_can`` field contains the initial
|
|
155
|
+
complete sequence. If the structure represents a truncated or spliced
|
|
156
|
+
version of this initial sequence, it will include only a subset of the
|
|
157
|
+
initial sequence. Use biotite.structure.get_residues to retrieve only
|
|
158
|
+
the residues that are represented in the structure.
|
|
140
159
|
"""
|
|
160
|
+
|
|
141
161
|
block = _get_block(pdbx_file, data_block)
|
|
162
|
+
poly_category = block["entity_poly"]
|
|
142
163
|
|
|
143
|
-
poly_category= block["entity_poly"]
|
|
144
164
|
seq_string = poly_category["pdbx_seq_one_letter_code_can"].as_array(str)
|
|
145
165
|
seq_type = poly_category["type"].as_array(str)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
166
|
+
|
|
167
|
+
sequences = [
|
|
168
|
+
_convert_string_to_sequence(string, stype)
|
|
169
|
+
for string, stype in zip(seq_string, seq_type)
|
|
170
|
+
]
|
|
171
|
+
|
|
172
|
+
strand_ids = poly_category["pdbx_strand_id"].as_array(str)
|
|
173
|
+
strand_ids = [strand_id.split(",") for strand_id in strand_ids]
|
|
174
|
+
|
|
175
|
+
sequence_dict = {
|
|
176
|
+
strand_id: sequence
|
|
177
|
+
for sequence, strand_ids in zip(sequences, strand_ids)
|
|
178
|
+
for strand_id in strand_ids
|
|
179
|
+
if sequence is not None
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
return sequence_dict
|
|
152
183
|
|
|
153
184
|
|
|
154
185
|
def get_model_count(pdbx_file, data_block=None):
|
|
155
186
|
"""
|
|
156
|
-
Get the number of models contained in a
|
|
187
|
+
Get the number of models contained in a file.
|
|
157
188
|
|
|
158
189
|
Parameters
|
|
159
190
|
----------
|
|
@@ -172,17 +203,23 @@ def get_model_count(pdbx_file, data_block=None):
|
|
|
172
203
|
The number of models.
|
|
173
204
|
"""
|
|
174
205
|
block = _get_block(pdbx_file, data_block)
|
|
175
|
-
return len(
|
|
176
|
-
block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32)
|
|
177
|
-
)
|
|
206
|
+
return len(
|
|
207
|
+
_get_model_starts(block["atom_site"]["pdbx_PDB_model_num"].as_array(np.int32))
|
|
208
|
+
)
|
|
178
209
|
|
|
179
210
|
|
|
180
|
-
def get_structure(
|
|
181
|
-
|
|
182
|
-
|
|
211
|
+
def get_structure(
|
|
212
|
+
pdbx_file,
|
|
213
|
+
model=None,
|
|
214
|
+
data_block=None,
|
|
215
|
+
altloc="first",
|
|
216
|
+
extra_fields=None,
|
|
217
|
+
use_author_fields=True,
|
|
218
|
+
include_bonds=False,
|
|
219
|
+
):
|
|
183
220
|
"""
|
|
184
221
|
Create an :class:`AtomArray` or :class:`AtomArrayStack` from the
|
|
185
|
-
``atom_site`` category in a
|
|
222
|
+
``atom_site`` category in a file.
|
|
186
223
|
|
|
187
224
|
Parameters
|
|
188
225
|
----------
|
|
@@ -228,7 +265,7 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
228
265
|
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
229
266
|
the ID of the residue.
|
|
230
267
|
While, the ``label_xxx`` fields can be used as official pointers
|
|
231
|
-
to other categories in the
|
|
268
|
+
to other categories in the file, the ``auth_xxx``
|
|
232
269
|
fields are set by the author(s) of the structure and are
|
|
233
270
|
consistent with the corresponding values in PDB files.
|
|
234
271
|
If `use_author_fields` is true, the annotation arrays will be
|
|
@@ -290,12 +327,21 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
290
327
|
"instead"
|
|
291
328
|
)
|
|
292
329
|
|
|
293
|
-
atoms.coord[:, :, 0] =
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
330
|
+
atoms.coord[:, :, 0] = (
|
|
331
|
+
atom_site["Cartn_x"]
|
|
332
|
+
.as_array(np.float32)
|
|
333
|
+
.reshape((model_count, model_length))
|
|
334
|
+
)
|
|
335
|
+
atoms.coord[:, :, 1] = (
|
|
336
|
+
atom_site["Cartn_y"]
|
|
337
|
+
.as_array(np.float32)
|
|
338
|
+
.reshape((model_count, model_length))
|
|
339
|
+
)
|
|
340
|
+
atoms.coord[:, :, 2] = (
|
|
341
|
+
atom_site["Cartn_z"]
|
|
342
|
+
.as_array(np.float32)
|
|
343
|
+
.reshape((model_count, model_length))
|
|
344
|
+
)
|
|
299
345
|
|
|
300
346
|
box = _get_box(block)
|
|
301
347
|
if box is not None:
|
|
@@ -325,31 +371,25 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
325
371
|
atoms.box = _get_box(block)
|
|
326
372
|
|
|
327
373
|
# The below part is the same for both, AtomArray and AtomArrayStack
|
|
328
|
-
_fill_annotations(
|
|
329
|
-
atoms, model_atom_site, extra_fields, use_author_fields
|
|
330
|
-
)
|
|
374
|
+
_fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
|
|
331
375
|
if include_bonds:
|
|
332
376
|
if "chem_comp_bond" in block:
|
|
333
377
|
try:
|
|
334
|
-
custom_bond_dict = _parse_intra_residue_bonds(
|
|
335
|
-
block["chem_comp_bond"]
|
|
336
|
-
)
|
|
378
|
+
custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
|
|
337
379
|
except KeyError:
|
|
338
380
|
warnings.warn(
|
|
339
381
|
"The 'chem_comp_bond' category has missing columns, "
|
|
340
382
|
"falling back to using Chemical Component Dictionary",
|
|
341
|
-
UserWarning
|
|
383
|
+
UserWarning,
|
|
342
384
|
)
|
|
343
385
|
custom_bond_dict = None
|
|
344
|
-
bonds = connect_via_residue_names(
|
|
345
|
-
atoms, custom_bond_dict=custom_bond_dict
|
|
346
|
-
)
|
|
386
|
+
bonds = connect_via_residue_names(atoms, custom_bond_dict=custom_bond_dict)
|
|
347
387
|
else:
|
|
348
388
|
bonds = connect_via_residue_names(atoms)
|
|
349
389
|
if "struct_conn" in block:
|
|
350
|
-
bonds = bonds.merge(
|
|
351
|
-
model_atom_site, block["struct_conn"]
|
|
352
|
-
)
|
|
390
|
+
bonds = bonds.merge(
|
|
391
|
+
_parse_inter_residue_bonds(model_atom_site, block["struct_conn"])
|
|
392
|
+
)
|
|
353
393
|
atoms.bonds = bonds
|
|
354
394
|
atoms = _filter_altloc(atoms, model_atom_site, altloc)
|
|
355
395
|
|
|
@@ -357,10 +397,6 @@ def get_structure(pdbx_file, model=None, data_block=None, altloc="first",
|
|
|
357
397
|
|
|
358
398
|
|
|
359
399
|
def _get_block(pdbx_component, block_name):
|
|
360
|
-
if isinstance(pdbx_component, PDBxFile):
|
|
361
|
-
# The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
|
|
362
|
-
pdbx_component = pdbx_component.cif_file
|
|
363
|
-
|
|
364
400
|
if not isinstance(pdbx_component, (CIFBlock, BinaryCIFBlock)):
|
|
365
401
|
# Determine block
|
|
366
402
|
if block_name is None:
|
|
@@ -372,24 +408,24 @@ def _get_block(pdbx_component, block_name):
|
|
|
372
408
|
|
|
373
409
|
|
|
374
410
|
def _get_or_fallback(category, key, fallback_key):
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
411
|
+
"""
|
|
412
|
+
Return column related to key in category if it exists,
|
|
413
|
+
otherwise try to get the column related to fallback key.
|
|
414
|
+
"""
|
|
415
|
+
if key not in category:
|
|
416
|
+
warnings.warn(
|
|
417
|
+
f"Attribute '{key}' not found within 'atom_site' category. "
|
|
418
|
+
f"The fallback attribute '{fallback_key}' will be used instead",
|
|
419
|
+
UserWarning,
|
|
420
|
+
)
|
|
421
|
+
try:
|
|
422
|
+
return category[fallback_key]
|
|
423
|
+
except KeyError as key_exc:
|
|
424
|
+
raise InvalidFileError(
|
|
425
|
+
f"Fallback attribute '{fallback_key}' not found within "
|
|
426
|
+
"'atom_site' category"
|
|
427
|
+
) from key_exc
|
|
428
|
+
return category[key]
|
|
393
429
|
|
|
394
430
|
|
|
395
431
|
def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
@@ -408,78 +444,52 @@ def _fill_annotations(array, atom_site, extra_fields, use_author_fields):
|
|
|
408
444
|
instead of ``label_``.
|
|
409
445
|
"""
|
|
410
446
|
|
|
411
|
-
prefix, alt_prefix = (
|
|
412
|
-
("auth", "label") if use_author_fields else ("label", "auth")
|
|
413
|
-
)
|
|
447
|
+
prefix, alt_prefix = ("auth", "label") if use_author_fields else ("label", "auth")
|
|
414
448
|
|
|
415
449
|
array.set_annotation(
|
|
416
450
|
"chain_id",
|
|
417
451
|
_get_or_fallback(
|
|
418
452
|
atom_site, f"{prefix}_asym_id", f"{alt_prefix}_asym_id"
|
|
419
|
-
).as_array("U4")
|
|
453
|
+
).as_array("U4"),
|
|
420
454
|
)
|
|
421
455
|
array.set_annotation(
|
|
422
456
|
"res_id",
|
|
423
457
|
_get_or_fallback(
|
|
424
458
|
atom_site, f"{prefix}_seq_id", f"{alt_prefix}_seq_id"
|
|
425
|
-
).as_array(int, -1)
|
|
426
|
-
)
|
|
427
|
-
array.set_annotation(
|
|
428
|
-
"ins_code",
|
|
429
|
-
atom_site["pdbx_PDB_ins_code"].as_array("U1", "")
|
|
459
|
+
).as_array(int, -1),
|
|
430
460
|
)
|
|
461
|
+
array.set_annotation("ins_code", atom_site["pdbx_PDB_ins_code"].as_array("U1", ""))
|
|
431
462
|
array.set_annotation(
|
|
432
463
|
"res_name",
|
|
433
464
|
_get_or_fallback(
|
|
434
465
|
atom_site, f"{prefix}_comp_id", f"{alt_prefix}_comp_id"
|
|
435
|
-
).as_array("U5")
|
|
436
|
-
)
|
|
437
|
-
array.set_annotation(
|
|
438
|
-
"hetero",
|
|
439
|
-
atom_site["group_PDB"].as_array(str) == "HETATM"
|
|
466
|
+
).as_array("U5"),
|
|
440
467
|
)
|
|
468
|
+
array.set_annotation("hetero", atom_site["group_PDB"].as_array(str) == "HETATM")
|
|
441
469
|
array.set_annotation(
|
|
442
470
|
"atom_name",
|
|
443
471
|
_get_or_fallback(
|
|
444
472
|
atom_site, f"{prefix}_atom_id", f"{alt_prefix}_atom_id"
|
|
445
|
-
).as_array("U6")
|
|
446
|
-
)
|
|
447
|
-
array.set_annotation(
|
|
448
|
-
"element",
|
|
449
|
-
atom_site["type_symbol"].as_array("U2")
|
|
473
|
+
).as_array("U6"),
|
|
450
474
|
)
|
|
475
|
+
array.set_annotation("element", atom_site["type_symbol"].as_array("U2"))
|
|
451
476
|
|
|
452
477
|
if "atom_id" in extra_fields:
|
|
453
|
-
array.set_annotation(
|
|
454
|
-
"atom_id",
|
|
455
|
-
atom_site["id"].as_array(int)
|
|
456
|
-
)
|
|
478
|
+
array.set_annotation("atom_id", atom_site["id"].as_array(int))
|
|
457
479
|
extra_fields.remove("atom_id")
|
|
458
480
|
if "b_factor" in extra_fields:
|
|
459
|
-
array.set_annotation(
|
|
460
|
-
"b_factor",
|
|
461
|
-
atom_site["B_iso_or_equiv"].as_array(float)
|
|
462
|
-
)
|
|
481
|
+
array.set_annotation("b_factor", atom_site["B_iso_or_equiv"].as_array(float))
|
|
463
482
|
extra_fields.remove("b_factor")
|
|
464
483
|
if "occupancy" in extra_fields:
|
|
465
|
-
array.set_annotation(
|
|
466
|
-
"occupancy",
|
|
467
|
-
atom_site["occupancy"].as_array(float)
|
|
468
|
-
)
|
|
484
|
+
array.set_annotation("occupancy", atom_site["occupancy"].as_array(float))
|
|
469
485
|
extra_fields.remove("occupancy")
|
|
470
486
|
if "charge" in extra_fields:
|
|
471
|
-
array.set_annotation(
|
|
472
|
-
"charge",
|
|
473
|
-
atom_site["pdbx_formal_charge"].as_array(int, 0)
|
|
474
|
-
)
|
|
487
|
+
array.set_annotation("charge", atom_site["pdbx_formal_charge"].as_array(int, 0))
|
|
475
488
|
extra_fields.remove("charge")
|
|
476
489
|
|
|
477
490
|
# Handle all remaining custom fields
|
|
478
491
|
for field in extra_fields:
|
|
479
|
-
array.set_annotation(
|
|
480
|
-
field,
|
|
481
|
-
atom_site[field].as_array(str)
|
|
482
|
-
)
|
|
492
|
+
array.set_annotation(field, atom_site[field].as_array(str))
|
|
483
493
|
|
|
484
494
|
|
|
485
495
|
def _parse_intra_residue_bonds(chem_comp_bond):
|
|
@@ -493,7 +503,7 @@ def _parse_intra_residue_bonds(chem_comp_bond):
|
|
|
493
503
|
chem_comp_bond["atom_id_1"].as_array(str),
|
|
494
504
|
chem_comp_bond["atom_id_2"].as_array(str),
|
|
495
505
|
chem_comp_bond["value_order"].as_array(str),
|
|
496
|
-
chem_comp_bond["pdbx_aromatic_flag"].as_array(str)
|
|
506
|
+
chem_comp_bond["pdbx_aromatic_flag"].as_array(str),
|
|
497
507
|
):
|
|
498
508
|
if res_name not in custom_bond_dict:
|
|
499
509
|
custom_bond_dict[res_name] = {}
|
|
@@ -514,33 +524,32 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
514
524
|
IDENTITY = "1_555"
|
|
515
525
|
# Columns in 'atom_site' that should be matched by 'struct_conn'
|
|
516
526
|
COLUMNS = [
|
|
517
|
-
"label_asym_id",
|
|
518
|
-
"
|
|
519
|
-
"
|
|
527
|
+
"label_asym_id",
|
|
528
|
+
"label_comp_id",
|
|
529
|
+
"label_seq_id",
|
|
530
|
+
"label_atom_id",
|
|
531
|
+
"label_alt_id",
|
|
532
|
+
"auth_asym_id",
|
|
533
|
+
"auth_comp_id",
|
|
534
|
+
"auth_seq_id",
|
|
535
|
+
"pdbx_PDB_ins_code",
|
|
520
536
|
]
|
|
521
537
|
|
|
522
538
|
covale_mask = np.isin(
|
|
523
539
|
struct_conn["conn_type_id"].as_array(str), PDBX_COVALENT_TYPES
|
|
524
540
|
)
|
|
525
541
|
if "ptnr1_symmetry" in struct_conn:
|
|
526
|
-
covale_mask &= (
|
|
527
|
-
struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
528
|
-
)
|
|
542
|
+
covale_mask &= struct_conn["ptnr1_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
529
543
|
if "ptnr2_symmetry" in struct_conn:
|
|
530
|
-
covale_mask &= (
|
|
531
|
-
struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
532
|
-
)
|
|
544
|
+
covale_mask &= struct_conn["ptnr2_symmetry"].as_array(str, IDENTITY) == IDENTITY
|
|
533
545
|
|
|
534
546
|
atom_indices = [None] * 2
|
|
535
547
|
for i in range(2):
|
|
536
548
|
reference_arrays = []
|
|
537
549
|
query_arrays = []
|
|
538
550
|
for col_name in COLUMNS:
|
|
539
|
-
struct_conn_col_name = _get_struct_conn_col_name(col_name, i+1)
|
|
540
|
-
if
|
|
541
|
-
col_name not in atom_site
|
|
542
|
-
or struct_conn_col_name not in struct_conn
|
|
543
|
-
):
|
|
551
|
+
struct_conn_col_name = _get_struct_conn_col_name(col_name, i + 1)
|
|
552
|
+
if col_name not in atom_site or struct_conn_col_name not in struct_conn:
|
|
544
553
|
continue
|
|
545
554
|
# Ensure both arrays have the same dtype to allow comparison
|
|
546
555
|
reference = atom_site[col_name].as_array()
|
|
@@ -577,7 +586,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
577
586
|
|
|
578
587
|
return BondList(
|
|
579
588
|
atom_site.row_count,
|
|
580
|
-
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1)
|
|
589
|
+
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
|
|
581
590
|
)
|
|
582
591
|
|
|
583
592
|
|
|
@@ -587,10 +596,13 @@ def _find_matches(query_arrays, reference_arrays):
|
|
|
587
596
|
`reference_arrays` where all query values the reference counterpart.
|
|
588
597
|
If no match is found for a query, the corresponding index is -1.
|
|
589
598
|
"""
|
|
590
|
-
match_masks_for_all_columns = np.stack(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
599
|
+
match_masks_for_all_columns = np.stack(
|
|
600
|
+
[
|
|
601
|
+
query[:, np.newaxis] == reference[np.newaxis, :]
|
|
602
|
+
for query, reference in zip(query_arrays, reference_arrays)
|
|
603
|
+
],
|
|
604
|
+
axis=-1,
|
|
605
|
+
)
|
|
594
606
|
match_masks = np.all(match_masks_for_all_columns, axis=-1)
|
|
595
607
|
query_matches, reference_matches = np.where(match_masks)
|
|
596
608
|
|
|
@@ -664,14 +676,8 @@ def _filter_model(atom_site, model_starts, model):
|
|
|
664
676
|
Reduce the ``atom_site`` category to the values for the given
|
|
665
677
|
model.
|
|
666
678
|
"""
|
|
667
|
-
Category = type(atom_site)
|
|
668
|
-
Column = Category.subcomponent_class()
|
|
669
|
-
Data = Column.subcomponent_class()
|
|
670
|
-
|
|
671
679
|
# Append exclusive stop
|
|
672
|
-
model_starts = np.append(
|
|
673
|
-
model_starts, [atom_site.row_count]
|
|
674
|
-
)
|
|
680
|
+
model_starts = np.append(model_starts, [atom_site.row_count])
|
|
675
681
|
# Indexing starts at 0, but model number starts at 1
|
|
676
682
|
model_index = model - 1
|
|
677
683
|
index = slice(model_starts[model_index], model_starts[model_index + 1])
|
|
@@ -757,9 +763,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
757
763
|
# Fill PDBx columns from information
|
|
758
764
|
# in structures' attribute arrays as good as possible
|
|
759
765
|
atom_site = Category()
|
|
760
|
-
atom_site["group_PDB"] = np.where(
|
|
761
|
-
array.hetero, "HETATM", "ATOM"
|
|
762
|
-
)
|
|
766
|
+
atom_site["group_PDB"] = np.where(array.hetero, "HETATM", "ATOM")
|
|
763
767
|
atom_site["type_symbol"] = np.copy(array.element)
|
|
764
768
|
atom_site["label_atom_id"] = np.copy(array.atom_name)
|
|
765
769
|
atom_site["label_alt_id"] = Column(
|
|
@@ -773,7 +777,7 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
773
777
|
atom_site["label_seq_id"] = np.copy(array.res_id)
|
|
774
778
|
atom_site["pdbx_PDB_ins_code"] = Column(
|
|
775
779
|
np.copy(array.ins_code),
|
|
776
|
-
np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT)
|
|
780
|
+
np.where(array.ins_code == "", MaskValue.INAPPLICABLE, MaskValue.PRESENT),
|
|
777
781
|
)
|
|
778
782
|
atom_site["auth_seq_id"] = atom_site["label_seq_id"]
|
|
779
783
|
atom_site["auth_comp_id"] = atom_site["label_comp_id"]
|
|
@@ -790,11 +794,11 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
790
794
|
if "charge" in annot_categories:
|
|
791
795
|
atom_site["pdbx_formal_charge"] = Column(
|
|
792
796
|
np.array([f"{c:+d}" if c != 0 else "?" for c in array.charge]),
|
|
793
|
-
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT)
|
|
797
|
+
np.where(array.charge == 0, MaskValue.MISSING, MaskValue.PRESENT),
|
|
794
798
|
)
|
|
795
799
|
|
|
796
800
|
if array.bonds is not None:
|
|
797
|
-
struct_conn =
|
|
801
|
+
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
798
802
|
if struct_conn is not None:
|
|
799
803
|
block["struct_conn"] = struct_conn
|
|
800
804
|
if include_bonds:
|
|
@@ -804,24 +808,20 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
804
808
|
|
|
805
809
|
# In case of a single model handle each coordinate
|
|
806
810
|
# simply like a flattened array
|
|
807
|
-
if
|
|
808
|
-
|
|
811
|
+
if isinstance(array, AtomArray) or (
|
|
812
|
+
isinstance(array, AtomArrayStack) and array.stack_depth() == 1
|
|
809
813
|
):
|
|
810
814
|
# 'ravel' flattens coord without copy
|
|
811
815
|
# in case of stack with stack_depth = 1
|
|
812
816
|
atom_site["Cartn_x"] = np.copy(np.ravel(array.coord[..., 0]))
|
|
813
817
|
atom_site["Cartn_y"] = np.copy(np.ravel(array.coord[..., 1]))
|
|
814
818
|
atom_site["Cartn_z"] = np.copy(np.ravel(array.coord[..., 2]))
|
|
815
|
-
atom_site["pdbx_PDB_model_num"] = np.ones(
|
|
816
|
-
array.array_length(), dtype=np.int32
|
|
817
|
-
)
|
|
819
|
+
atom_site["pdbx_PDB_model_num"] = np.ones(array.array_length(), dtype=np.int32)
|
|
818
820
|
# In case of multiple models repeat annotations
|
|
819
821
|
# and use model specific coordinates
|
|
820
822
|
else:
|
|
821
823
|
atom_site = _repeat(atom_site, array.stack_depth())
|
|
822
|
-
coord = np.reshape(
|
|
823
|
-
array.coord, (array.stack_depth() * array.array_length(), 3)
|
|
824
|
-
)
|
|
824
|
+
coord = np.reshape(array.coord, (array.stack_depth() * array.array_length(), 3))
|
|
825
825
|
atom_site["Cartn_x"] = np.copy(coord[:, 0])
|
|
826
826
|
atom_site["Cartn_y"] = np.copy(coord[:, 1])
|
|
827
827
|
atom_site["Cartn_z"] = np.copy(coord[:, 2])
|
|
@@ -829,11 +829,9 @@ def set_structure(pdbx_file, array, data_block=None, include_bonds=False):
|
|
|
829
829
|
np.arange(1, array.stack_depth() + 1, dtype=np.int32),
|
|
830
830
|
repeats=array.array_length(),
|
|
831
831
|
)
|
|
832
|
-
if
|
|
832
|
+
if "atom_id" not in annot_categories:
|
|
833
833
|
# Count from 1
|
|
834
|
-
atom_site["id"] = np.arange(
|
|
835
|
-
1, len(atom_site["group_PDB"]) + 1
|
|
836
|
-
)
|
|
834
|
+
atom_site["id"] = np.arange(1, len(atom_site["group_PDB"]) + 1)
|
|
837
835
|
block["atom_site"] = atom_site
|
|
838
836
|
|
|
839
837
|
# Write box into file
|
|
@@ -870,10 +868,6 @@ def _check_non_empty(array):
|
|
|
870
868
|
|
|
871
869
|
|
|
872
870
|
def _get_or_create_block(pdbx_component, block_name):
|
|
873
|
-
if isinstance(pdbx_component, PDBxFile):
|
|
874
|
-
# The deprecated 'PDBxFile' is a thin wrapper around 'CIFFile'
|
|
875
|
-
pdbx_component = pdbx_component.cif_file
|
|
876
|
-
|
|
877
871
|
Block = pdbx_component.subcomponent_class()
|
|
878
872
|
|
|
879
873
|
if isinstance(pdbx_component, (CIFFile, BinaryCIFFile)):
|
|
@@ -901,7 +895,7 @@ def _determine_entity_id(chain_id):
|
|
|
901
895
|
for i in range(len(chain_id)):
|
|
902
896
|
try:
|
|
903
897
|
entity_id[i] = id_translation[chain_id[i]]
|
|
904
|
-
except:
|
|
898
|
+
except KeyError:
|
|
905
899
|
# chain_id is not in dictionary -> new entry
|
|
906
900
|
id_translation[chain_id[i]] = id
|
|
907
901
|
entity_id[i] = id_translation[chain_id[i]]
|
|
@@ -926,8 +920,11 @@ def _repeat(category, repetitions):
|
|
|
926
920
|
data = Data(np.tile(column.data.array, repetitions), data_encoding)
|
|
927
921
|
else:
|
|
928
922
|
data = Data(np.tile(column.data.array, repetitions))
|
|
929
|
-
mask =
|
|
930
|
-
|
|
923
|
+
mask = (
|
|
924
|
+
Data(np.tile(column.mask.array, repetitions))
|
|
925
|
+
if column.mask is not None
|
|
926
|
+
else None
|
|
927
|
+
)
|
|
931
928
|
category_dict[key] = Column(data, mask)
|
|
932
929
|
return Category(category_dict)
|
|
933
930
|
|
|
@@ -974,22 +971,18 @@ def _set_intra_residue_bonds(array, atom_site):
|
|
|
974
971
|
chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
|
|
975
972
|
chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
|
|
976
973
|
chem_comp_bond["value_order"] = Column(
|
|
977
|
-
value_order,
|
|
978
|
-
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
974
|
+
value_order, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
979
975
|
)
|
|
980
976
|
chem_comp_bond["pdbx_aromatic_flag"] = Column(
|
|
981
|
-
aromatic_flag,
|
|
982
|
-
np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
977
|
+
aromatic_flag, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
|
|
983
978
|
)
|
|
984
979
|
# BondList does not contain stereo information
|
|
985
980
|
# -> all values are missing
|
|
986
981
|
chem_comp_bond["pdbx_stereo_config"] = Column(
|
|
987
982
|
np.zeros(len(bond_array), dtype="U1"),
|
|
988
|
-
np.full(len(bond_array), MaskValue.MISSING)
|
|
989
|
-
)
|
|
990
|
-
chem_comp_bond["pdbx_ordinal"] = np.arange(
|
|
991
|
-
1, len(bond_array) + 1, dtype=np.int32
|
|
983
|
+
np.full(len(bond_array), MaskValue.MISSING),
|
|
992
984
|
)
|
|
985
|
+
chem_comp_bond["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1, dtype=np.int32)
|
|
993
986
|
return chem_comp_bond
|
|
994
987
|
|
|
995
988
|
|
|
@@ -1001,8 +994,11 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1001
994
|
``atom_site`` category.
|
|
1002
995
|
"""
|
|
1003
996
|
COLUMNS = [
|
|
1004
|
-
"label_asym_id",
|
|
1005
|
-
"
|
|
997
|
+
"label_asym_id",
|
|
998
|
+
"label_comp_id",
|
|
999
|
+
"label_seq_id",
|
|
1000
|
+
"label_atom_id",
|
|
1001
|
+
"pdbx_PDB_ins_code",
|
|
1006
1002
|
]
|
|
1007
1003
|
|
|
1008
1004
|
Category = type(atom_site)
|
|
@@ -1015,13 +1011,12 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1015
1011
|
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
|
|
1016
1012
|
struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
|
|
1017
1013
|
struct_conn["pdbx_value_order"] = Column(
|
|
1018
|
-
np.array(
|
|
1019
|
-
[PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]
|
|
1020
|
-
),
|
|
1014
|
+
np.array([PDBX_BOND_TYPE_TO_ORDER[btype] for btype in bond_array[:, 2]]),
|
|
1021
1015
|
np.where(
|
|
1022
1016
|
bond_array[:, 2] == BondType.ANY,
|
|
1023
|
-
MaskValue.MISSING,
|
|
1024
|
-
|
|
1017
|
+
MaskValue.MISSING,
|
|
1018
|
+
MaskValue.PRESENT,
|
|
1019
|
+
),
|
|
1025
1020
|
)
|
|
1026
1021
|
# Write the identifying annotation...
|
|
1027
1022
|
for col_name in COLUMNS:
|
|
@@ -1029,8 +1024,9 @@ def _set_inter_residue_bonds(array, atom_site):
|
|
|
1029
1024
|
# ...for each bond partner
|
|
1030
1025
|
for i in range(2):
|
|
1031
1026
|
atom_indices = bond_array[:, i]
|
|
1032
|
-
struct_conn[_get_struct_conn_col_name(col_name, i+1)]
|
|
1033
|
-
|
|
1027
|
+
struct_conn[_get_struct_conn_col_name(col_name, i + 1)] = annot[
|
|
1028
|
+
atom_indices
|
|
1029
|
+
]
|
|
1034
1030
|
return struct_conn
|
|
1035
1031
|
|
|
1036
1032
|
|
|
@@ -1042,9 +1038,9 @@ def _filter_bonds(array, connection):
|
|
|
1042
1038
|
bond_array = array.bonds.as_array()
|
|
1043
1039
|
# To save computation time call 'get_residue_starts_for()' only once
|
|
1044
1040
|
# with indices of the first and second atom of each bond
|
|
1045
|
-
residue_starts_1, residue_starts_2 =
|
|
1046
|
-
array, bond_array[:, :2].flatten()
|
|
1047
|
-
)
|
|
1041
|
+
residue_starts_1, residue_starts_2 = (
|
|
1042
|
+
get_residue_starts_for(array, bond_array[:, :2].flatten()).reshape(-1, 2).T
|
|
1043
|
+
)
|
|
1048
1044
|
if connection == "intra":
|
|
1049
1045
|
return bond_array[residue_starts_1 == residue_starts_2]
|
|
1050
1046
|
elif connection == "inter":
|
|
@@ -1053,12 +1049,11 @@ def _filter_bonds(array, connection):
|
|
|
1053
1049
|
raise ValueError("Invalid 'connection' option")
|
|
1054
1050
|
|
|
1055
1051
|
|
|
1056
|
-
def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
1057
|
-
res_name=None):
|
|
1052
|
+
def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
|
|
1058
1053
|
"""
|
|
1059
1054
|
Create an :class:`AtomArray` for a chemical component from the
|
|
1060
1055
|
``chem_comp_atom`` and, if available, the ``chem_comp_bond``
|
|
1061
|
-
category in a
|
|
1056
|
+
category in a file.
|
|
1062
1057
|
|
|
1063
1058
|
Parameters
|
|
1064
1059
|
----------
|
|
@@ -1154,16 +1149,16 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1154
1149
|
coord_fields, alt_coord_fields = alt_coord_fields, coord_fields
|
|
1155
1150
|
try:
|
|
1156
1151
|
for i, field in enumerate(coord_fields):
|
|
1157
|
-
array.coord[:,i] = atom_category[field].as_array(np.float32)
|
|
1152
|
+
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1158
1153
|
except KeyError as err:
|
|
1159
1154
|
key = err.args[0]
|
|
1160
1155
|
warnings.warn(
|
|
1161
1156
|
f"Attribute '{key}' not found within 'chem_comp_atom' category. "
|
|
1162
1157
|
f"The fallback coordinates will be used instead",
|
|
1163
|
-
UserWarning
|
|
1158
|
+
UserWarning,
|
|
1164
1159
|
)
|
|
1165
1160
|
for i, field in enumerate(alt_coord_fields):
|
|
1166
|
-
array.coord[:,i] = atom_category[field].as_array(np.float32)
|
|
1161
|
+
array.coord[:, i] = atom_category[field].as_array(np.float32)
|
|
1167
1162
|
|
|
1168
1163
|
try:
|
|
1169
1164
|
bond_category = block["chem_comp_bond"]
|
|
@@ -1173,9 +1168,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1173
1168
|
)
|
|
1174
1169
|
except KeyError:
|
|
1175
1170
|
warnings.warn(
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
UserWarning
|
|
1171
|
+
"Category 'chem_comp_bond' not found. " "No bonds will be parsed",
|
|
1172
|
+
UserWarning,
|
|
1179
1173
|
)
|
|
1180
1174
|
else:
|
|
1181
1175
|
bonds = BondList(array.array_length())
|
|
@@ -1183,7 +1177,7 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True,
|
|
|
1183
1177
|
bond_category["atom_id_1"].as_array(str),
|
|
1184
1178
|
bond_category["atom_id_2"].as_array(str),
|
|
1185
1179
|
bond_category["value_order"].as_array(str),
|
|
1186
|
-
bond_category["pdbx_aromatic_flag"].as_array(str)
|
|
1180
|
+
bond_category["pdbx_aromatic_flag"].as_array(str),
|
|
1187
1181
|
):
|
|
1188
1182
|
atom_i = np.where(array.atom_name == atom1)[0][0]
|
|
1189
1183
|
atom_j = np.where(array.atom_name == atom2)[0][0]
|
|
@@ -1225,9 +1219,7 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1225
1219
|
Category = block.subcomponent_class()
|
|
1226
1220
|
|
|
1227
1221
|
if get_residue_count(array) > 1:
|
|
1228
|
-
raise BadStructureError(
|
|
1229
|
-
"The input atom array must comprise only one residue"
|
|
1230
|
-
)
|
|
1222
|
+
raise BadStructureError("The input atom array must comprise only one residue")
|
|
1231
1223
|
res_name = array.res_name[0]
|
|
1232
1224
|
|
|
1233
1225
|
annot_categories = array.get_annotation_categories()
|
|
@@ -1250,31 +1242,28 @@ def set_component(pdbx_file, array, data_block=None):
|
|
|
1250
1242
|
atom_cat["pdbx_model_Cartn_z_ideal"] = atom_cat["model_Cartn_z"]
|
|
1251
1243
|
atom_cat["pdbx_component_atom_id"] = atom_cat["atom_id"]
|
|
1252
1244
|
atom_cat["pdbx_component_comp_id"] = atom_cat["comp_id"]
|
|
1253
|
-
atom_cat["pdbx_ordinal"] = np.arange(
|
|
1254
|
-
1, array.array_length() + 1
|
|
1255
|
-
).astype(str)
|
|
1245
|
+
atom_cat["pdbx_ordinal"] = np.arange(1, array.array_length() + 1).astype(str)
|
|
1256
1246
|
block["chem_comp_atom"] = atom_cat
|
|
1257
1247
|
|
|
1258
1248
|
if array.bonds is not None and array.bonds.get_bond_count() > 0:
|
|
1259
1249
|
bond_array = array.bonds.as_array()
|
|
1260
1250
|
order_flags = []
|
|
1261
1251
|
aromatic_flags = []
|
|
1262
|
-
for bond_type in bond_array[:,2]:
|
|
1252
|
+
for bond_type in bond_array[:, 2]:
|
|
1263
1253
|
order_flag, aromatic_flag = COMP_BOND_TYPE_TO_ORDER[bond_type]
|
|
1264
1254
|
order_flags.append(order_flag)
|
|
1265
1255
|
aromatic_flags.append(aromatic_flag)
|
|
1266
1256
|
|
|
1267
1257
|
bond_cat = Category()
|
|
1268
1258
|
bond_cat["comp_id"] = np.full(len(bond_array), res_name)
|
|
1269
|
-
bond_cat["atom_id_1"] = array.atom_name[bond_array[:,0]]
|
|
1270
|
-
bond_cat["atom_id_2"] = array.atom_name[bond_array[:,1]]
|
|
1259
|
+
bond_cat["atom_id_1"] = array.atom_name[bond_array[:, 0]]
|
|
1260
|
+
bond_cat["atom_id_2"] = array.atom_name[bond_array[:, 1]]
|
|
1271
1261
|
bond_cat["value_order"] = np.array(order_flags)
|
|
1272
1262
|
bond_cat["pdbx_aromatic_flag"] = np.array(aromatic_flags)
|
|
1273
|
-
bond_cat["pdbx_ordinal"] = np.arange(
|
|
1274
|
-
1, len(bond_array) + 1
|
|
1275
|
-
).astype(str)
|
|
1263
|
+
bond_cat["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1).astype(str)
|
|
1276
1264
|
block["chem_comp_bond"] = bond_cat
|
|
1277
1265
|
|
|
1266
|
+
|
|
1278
1267
|
def list_assemblies(pdbx_file, data_block=None):
|
|
1279
1268
|
"""
|
|
1280
1269
|
List the biological assemblies that are available for the structure
|
|
@@ -1325,14 +1314,21 @@ def list_assemblies(pdbx_file, data_block=None):
|
|
|
1325
1314
|
id: details
|
|
1326
1315
|
for id, details in zip(
|
|
1327
1316
|
assembly_category["id"].as_array(str),
|
|
1328
|
-
assembly_category["details"].as_array(str)
|
|
1317
|
+
assembly_category["details"].as_array(str),
|
|
1329
1318
|
)
|
|
1330
1319
|
}
|
|
1331
1320
|
|
|
1332
1321
|
|
|
1333
|
-
def get_assembly(
|
|
1334
|
-
|
|
1335
|
-
|
|
1322
|
+
def get_assembly(
|
|
1323
|
+
pdbx_file,
|
|
1324
|
+
assembly_id=None,
|
|
1325
|
+
model=None,
|
|
1326
|
+
data_block=None,
|
|
1327
|
+
altloc="first",
|
|
1328
|
+
extra_fields=None,
|
|
1329
|
+
use_author_fields=True,
|
|
1330
|
+
include_bonds=False,
|
|
1331
|
+
):
|
|
1336
1332
|
"""
|
|
1337
1333
|
Build the given biological assembly.
|
|
1338
1334
|
|
|
@@ -1389,7 +1385,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1389
1385
|
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
1390
1386
|
the ID of the residue.
|
|
1391
1387
|
While, the ``label_xxx`` fields can be used as official pointers
|
|
1392
|
-
to other categories in the
|
|
1388
|
+
to other categories in the file, the ``auth_xxx``
|
|
1393
1389
|
fields are set by the author(s) of the structure and are
|
|
1394
1390
|
consistent with the corresponding values in PDB files.
|
|
1395
1391
|
If `use_author_fields` is true, the annotation arrays will be
|
|
@@ -1422,9 +1418,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1422
1418
|
try:
|
|
1423
1419
|
assembly_gen_category = block["pdbx_struct_assembly_gen"]
|
|
1424
1420
|
except KeyError:
|
|
1425
|
-
raise InvalidFileError(
|
|
1426
|
-
"File has no 'pdbx_struct_assembly_gen' category"
|
|
1427
|
-
)
|
|
1421
|
+
raise InvalidFileError("File has no 'pdbx_struct_assembly_gen' category")
|
|
1428
1422
|
|
|
1429
1423
|
try:
|
|
1430
1424
|
struct_oper_category = block["pdbx_struct_oper_list"]
|
|
@@ -1457,7 +1451,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1457
1451
|
altloc,
|
|
1458
1452
|
extra_fields_and_asym,
|
|
1459
1453
|
use_author_fields,
|
|
1460
|
-
include_bonds
|
|
1454
|
+
include_bonds,
|
|
1461
1455
|
)
|
|
1462
1456
|
|
|
1463
1457
|
### Get transformations and apply them to the affected asym IDs
|
|
@@ -1473,9 +1467,7 @@ def get_assembly(pdbx_file, assembly_id=None, model=None, data_block=None,
|
|
|
1473
1467
|
operations = _parse_operation_expression(op_expr)
|
|
1474
1468
|
asym_ids = asym_id_expr.split(",")
|
|
1475
1469
|
# Filter affected asym IDs
|
|
1476
|
-
sub_structure = structure[
|
|
1477
|
-
..., np.isin(structure.label_asym_id, asym_ids)
|
|
1478
|
-
]
|
|
1470
|
+
sub_structure = structure[..., np.isin(structure.label_asym_id, asym_ids)]
|
|
1479
1471
|
sub_assembly = _apply_transformations(
|
|
1480
1472
|
sub_structure, transformations, operations
|
|
1481
1473
|
)
|
|
@@ -1534,10 +1526,9 @@ def _get_transformations(struct_oper):
|
|
|
1534
1526
|
for i in (1, 2, 3)
|
|
1535
1527
|
]
|
|
1536
1528
|
)
|
|
1537
|
-
translation_vector = np.array(
|
|
1538
|
-
struct_oper[f"vector[{i}]"].as_array(float)[index]
|
|
1539
|
-
|
|
1540
|
-
])
|
|
1529
|
+
translation_vector = np.array(
|
|
1530
|
+
[struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
|
|
1531
|
+
)
|
|
1541
1532
|
transformation_dict[id] = (rotation_matrix, translation_vector)
|
|
1542
1533
|
return transformation_dict
|
|
1543
1534
|
|
|
@@ -1592,6 +1583,4 @@ def _convert_string_to_sequence(string, stype):
|
|
|
1592
1583
|
elif stype in _other_type_list:
|
|
1593
1584
|
return None
|
|
1594
1585
|
else:
|
|
1595
|
-
raise InvalidFileError(
|
|
1596
|
-
"mmCIF _entity_poly.type unsupported" " type: " + stype
|
|
1597
|
-
)
|
|
1586
|
+
raise InvalidFileError("mmCIF _entity_poly.type unsupported" " type: " + stype)
|