biotite 1.2.0__cp311-cp311-win_amd64.whl → 1.4.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biotite might be problematic. Click here for more details.
- biotite/application/viennarna/rnaplot.py +7 -7
- biotite/interface/openmm/__init__.py +4 -0
- biotite/interface/pymol/__init__.py +3 -0
- biotite/interface/pymol/object.py +3 -1
- biotite/interface/rdkit/__init__.py +4 -0
- biotite/interface/rdkit/mol.py +5 -5
- biotite/interface/version.py +23 -0
- biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/banded.pyx +1 -1
- biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/multiple.pyx +1 -2
- biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/pairwise.pyx +2 -4
- biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
- biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
- biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
- biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
- biotite/structure/basepairs.py +13 -14
- biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
- biotite/structure/bonds.pyx +67 -6
- biotite/structure/box.py +141 -3
- biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
- biotite/structure/celllist.pyx +0 -1
- biotite/structure/chains.py +15 -21
- biotite/structure/charges.cp311-win_amd64.pyd +0 -0
- biotite/structure/compare.py +2 -0
- biotite/structure/dotbracket.py +4 -4
- biotite/structure/graphics/rna.py +19 -16
- biotite/structure/hbond.py +1 -2
- biotite/structure/info/components.bcif +0 -0
- biotite/structure/io/pdb/convert.py +84 -2
- biotite/structure/io/pdb/file.py +94 -7
- biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/bcif.py +6 -3
- biotite/structure/io/pdbx/cif.py +5 -2
- biotite/structure/io/pdbx/compress.py +71 -34
- biotite/structure/io/pdbx/convert.py +226 -58
- biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
- biotite/structure/io/pdbx/encoding.pyx +39 -23
- biotite/structure/pseudoknots.py +6 -6
- biotite/structure/residues.py +10 -27
- biotite/structure/rings.py +118 -2
- biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
- biotite/structure/sasa.pyx +28 -29
- biotite/structure/segments.py +55 -0
- biotite/structure/spacegroups.json +1567 -0
- biotite/structure/spacegroups.license +26 -0
- biotite/structure/superimpose.py +1 -191
- biotite/structure/transform.py +220 -1
- biotite/version.py +2 -2
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/METADATA +4 -34
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/RECORD +62 -60
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/WHEEL +1 -1
- {biotite-1.2.0.dist-info → biotite-1.4.0.dist-info}/licenses/LICENSE.rst +0 -0
|
@@ -3,6 +3,7 @@ __name__ = "biotite.structure.io.pdbx"
|
|
|
3
3
|
__author__ = "Patrick Kunzmann"
|
|
4
4
|
|
|
5
5
|
import itertools
|
|
6
|
+
import warnings
|
|
6
7
|
import msgpack
|
|
7
8
|
import numpy as np
|
|
8
9
|
import biotite.structure.io.pdbx.bcif as bcif
|
|
@@ -17,7 +18,7 @@ from biotite.structure.io.pdbx.encoding import (
|
|
|
17
18
|
)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def compress(data, float_tolerance=1e-6):
|
|
21
|
+
def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
|
|
21
22
|
"""
|
|
22
23
|
Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
|
|
23
24
|
different data encodings for each data array and selecting the one, which results in
|
|
@@ -29,6 +30,12 @@ def compress(data, float_tolerance=1e-6):
|
|
|
29
30
|
The data to compress.
|
|
30
31
|
float_tolerance : float, optional
|
|
31
32
|
The relative error that is accepted when compressing floating point numbers.
|
|
33
|
+
DEPRECATED: Use `rtol` instead.
|
|
34
|
+
rtol, atol : float, optional
|
|
35
|
+
The compression factor of floating point numbers is chosen such that
|
|
36
|
+
either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
|
|
37
|
+
for each value, i.e. the difference between the compressed and uncompressed
|
|
38
|
+
value is smaller than the tolerance.
|
|
32
39
|
|
|
33
40
|
Returns
|
|
34
41
|
-------
|
|
@@ -49,64 +56,79 @@ def compress(data, float_tolerance=1e-6):
|
|
|
49
56
|
>>> pdbx_file.write(uncompressed_file)
|
|
50
57
|
>>> _ = uncompressed_file.seek(0)
|
|
51
58
|
>>> print(f"{len(uncompressed_file.read()) // 1000} KB")
|
|
52
|
-
|
|
59
|
+
937 KB
|
|
53
60
|
>>> # Write compressed file
|
|
54
61
|
>>> pdbx_file = compress(pdbx_file)
|
|
55
62
|
>>> compressed_file = BytesIO()
|
|
56
63
|
>>> pdbx_file.write(compressed_file)
|
|
57
64
|
>>> _ = compressed_file.seek(0)
|
|
58
65
|
>>> print(f"{len(compressed_file.read()) // 1000} KB")
|
|
59
|
-
|
|
66
|
+
114 KB
|
|
60
67
|
"""
|
|
68
|
+
if float_tolerance is not None:
|
|
69
|
+
warnings.warn(
|
|
70
|
+
"The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
)
|
|
73
|
+
|
|
61
74
|
match type(data):
|
|
62
75
|
case bcif.BinaryCIFFile:
|
|
63
|
-
return _compress_file(data,
|
|
76
|
+
return _compress_file(data, rtol, atol)
|
|
64
77
|
case bcif.BinaryCIFBlock:
|
|
65
|
-
return _compress_block(data,
|
|
78
|
+
return _compress_block(data, rtol, atol)
|
|
66
79
|
case bcif.BinaryCIFCategory:
|
|
67
|
-
return _compress_category(data,
|
|
80
|
+
return _compress_category(data, rtol, atol)
|
|
68
81
|
case bcif.BinaryCIFColumn:
|
|
69
|
-
return _compress_column(data,
|
|
82
|
+
return _compress_column(data, rtol, atol)
|
|
70
83
|
case bcif.BinaryCIFData:
|
|
71
|
-
return _compress_data(data,
|
|
84
|
+
return _compress_data(data, rtol, atol)
|
|
72
85
|
case _:
|
|
73
86
|
raise TypeError(f"Unsupported type {type(data).__name__}")
|
|
74
87
|
|
|
75
88
|
|
|
76
|
-
def _compress_file(bcif_file,
|
|
89
|
+
def _compress_file(bcif_file, rtol, atol):
|
|
77
90
|
compressed_file = bcif.BinaryCIFFile()
|
|
78
91
|
for block_name, bcif_block in bcif_file.items():
|
|
79
|
-
|
|
92
|
+
try:
|
|
93
|
+
compressed_block = _compress_block(bcif_block, rtol, atol)
|
|
94
|
+
except Exception:
|
|
95
|
+
raise ValueError(f"Failed to compress block '{block_name}'")
|
|
80
96
|
compressed_file[block_name] = compressed_block
|
|
81
97
|
return compressed_file
|
|
82
98
|
|
|
83
99
|
|
|
84
|
-
def _compress_block(bcif_block,
|
|
100
|
+
def _compress_block(bcif_block, rtol, atol):
|
|
85
101
|
compressed_block = bcif.BinaryCIFBlock()
|
|
86
102
|
for category_name, bcif_category in bcif_block.items():
|
|
87
|
-
|
|
103
|
+
try:
|
|
104
|
+
compressed_category = _compress_category(bcif_category, rtol, atol)
|
|
105
|
+
except Exception:
|
|
106
|
+
raise ValueError(f"Failed to compress category '{category_name}'")
|
|
88
107
|
compressed_block[category_name] = compressed_category
|
|
89
108
|
return compressed_block
|
|
90
109
|
|
|
91
110
|
|
|
92
|
-
def _compress_category(bcif_category,
|
|
111
|
+
def _compress_category(bcif_category, rtol, atol):
|
|
93
112
|
compressed_category = bcif.BinaryCIFCategory()
|
|
94
113
|
for column_name, bcif_column in bcif_category.items():
|
|
95
|
-
|
|
114
|
+
try:
|
|
115
|
+
compressed_column = _compress_column(bcif_column, rtol, atol)
|
|
116
|
+
except Exception:
|
|
117
|
+
raise ValueError(f"Failed to compress column '{column_name}'")
|
|
96
118
|
compressed_category[column_name] = compressed_column
|
|
97
119
|
return compressed_category
|
|
98
120
|
|
|
99
121
|
|
|
100
|
-
def _compress_column(bcif_column,
|
|
101
|
-
data = _compress_data(bcif_column.data,
|
|
122
|
+
def _compress_column(bcif_column, rtol, atol):
|
|
123
|
+
data = _compress_data(bcif_column.data, rtol, atol)
|
|
102
124
|
if bcif_column.mask is not None:
|
|
103
|
-
mask = _compress_data(bcif_column.mask,
|
|
125
|
+
mask = _compress_data(bcif_column.mask, rtol, atol)
|
|
104
126
|
else:
|
|
105
127
|
mask = None
|
|
106
128
|
return bcif.BinaryCIFColumn(data, mask)
|
|
107
129
|
|
|
108
130
|
|
|
109
|
-
def _compress_data(bcif_data,
|
|
131
|
+
def _compress_data(bcif_data, rtol, atol):
|
|
110
132
|
array = bcif_data.array
|
|
111
133
|
if len(array) == 1:
|
|
112
134
|
# No need to compress a single value -> Use default uncompressed encoding
|
|
@@ -123,16 +145,28 @@ def _compress_data(bcif_data, float_tolerance):
|
|
|
123
145
|
return bcif.BinaryCIFData(array, [encoding])
|
|
124
146
|
|
|
125
147
|
elif np.issubdtype(array.dtype, np.floating):
|
|
148
|
+
if not np.isfinite(array).all():
|
|
149
|
+
# NaN/inf values cannot be represented by integers
|
|
150
|
+
# -> do not use integer encoding
|
|
151
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
126
152
|
to_integer_encoding = FixedPointEncoding(
|
|
127
|
-
10 ** _get_decimal_places(array,
|
|
153
|
+
10 ** _get_decimal_places(array, rtol, atol)
|
|
128
154
|
)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# The float array is smaller -> encode it directly as bytes
|
|
155
|
+
try:
|
|
156
|
+
integer_array = to_integer_encoding.encode(array)
|
|
157
|
+
except ValueError:
|
|
158
|
+
# With the given tolerances integer underflow/overflow would occur
|
|
159
|
+
# -> do not use integer encoding
|
|
135
160
|
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
161
|
+
else:
|
|
162
|
+
best_encoding, size_compressed = _find_best_integer_compression(
|
|
163
|
+
integer_array
|
|
164
|
+
)
|
|
165
|
+
if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
|
|
166
|
+
return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
|
|
167
|
+
else:
|
|
168
|
+
# The float array is smaller -> encode it directly as bytes
|
|
169
|
+
return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
|
|
136
170
|
|
|
137
171
|
elif np.issubdtype(array.dtype, np.integer):
|
|
138
172
|
array = _to_smallest_integer_type(array)
|
|
@@ -273,7 +307,7 @@ def _data_size_in_file(data):
|
|
|
273
307
|
return len(bytes_in_file)
|
|
274
308
|
|
|
275
309
|
|
|
276
|
-
def _get_decimal_places(array,
|
|
310
|
+
def _get_decimal_places(array, rtol, atol):
|
|
277
311
|
"""
|
|
278
312
|
Get the number of decimal places in a floating point array.
|
|
279
313
|
|
|
@@ -281,21 +315,24 @@ def _get_decimal_places(array, tol):
|
|
|
281
315
|
----------
|
|
282
316
|
array : numpy.ndarray
|
|
283
317
|
The array to analyze.
|
|
284
|
-
|
|
285
|
-
The relative tolerance allowed when the values are cut off after
|
|
286
|
-
number of decimal places.
|
|
318
|
+
rtol, atol : float, optional
|
|
319
|
+
The relative and absolute tolerance allowed when the values are cut off after
|
|
320
|
+
the returned number of decimal places.
|
|
287
321
|
|
|
288
322
|
Returns
|
|
289
323
|
-------
|
|
290
324
|
decimals : int
|
|
291
325
|
The number of decimal places.
|
|
292
326
|
"""
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
327
|
+
if rtol <= 0 and atol <= 0:
|
|
328
|
+
raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
|
|
329
|
+
# 0 would give NaN when rounding on decimals
|
|
330
|
+
array = array[array != 0]
|
|
331
|
+
for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
|
|
297
332
|
error = np.abs(np.round(array, decimals) - array)
|
|
298
|
-
if
|
|
333
|
+
if decimals == 100:
|
|
334
|
+
raise
|
|
335
|
+
if np.all((error < rtol * np.abs(array)) | (error < atol)):
|
|
299
336
|
return decimals
|
|
300
337
|
|
|
301
338
|
|
|
@@ -13,17 +13,30 @@ __all__ = [
|
|
|
13
13
|
"set_component",
|
|
14
14
|
"list_assemblies",
|
|
15
15
|
"get_assembly",
|
|
16
|
+
"get_unit_cell",
|
|
16
17
|
"get_sse",
|
|
17
18
|
]
|
|
18
19
|
|
|
19
20
|
import itertools
|
|
20
21
|
import warnings
|
|
22
|
+
from collections import defaultdict
|
|
21
23
|
import numpy as np
|
|
22
24
|
from biotite.file import InvalidFileError
|
|
23
25
|
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence
|
|
24
|
-
from biotite.structure.atoms import
|
|
26
|
+
from biotite.structure.atoms import (
|
|
27
|
+
AtomArray,
|
|
28
|
+
AtomArrayStack,
|
|
29
|
+
concatenate,
|
|
30
|
+
repeat,
|
|
31
|
+
)
|
|
25
32
|
from biotite.structure.bonds import BondList, BondType, connect_via_residue_names
|
|
26
|
-
from biotite.structure.box import
|
|
33
|
+
from biotite.structure.box import (
|
|
34
|
+
coord_to_fraction,
|
|
35
|
+
fraction_to_coord,
|
|
36
|
+
space_group_transforms,
|
|
37
|
+
unitcell_from_vectors,
|
|
38
|
+
vectors_from_unitcell,
|
|
39
|
+
)
|
|
27
40
|
from biotite.structure.error import BadStructureError
|
|
28
41
|
from biotite.structure.filter import _canonical_aa_list as canonical_aa_list
|
|
29
42
|
from biotite.structure.filter import (
|
|
@@ -33,6 +46,7 @@ from biotite.structure.filter import (
|
|
|
33
46
|
filter_first_altloc,
|
|
34
47
|
filter_highest_occupancy_altloc,
|
|
35
48
|
)
|
|
49
|
+
from biotite.structure.geometry import centroid
|
|
36
50
|
from biotite.structure.io.pdbx.bcif import (
|
|
37
51
|
BinaryCIFBlock,
|
|
38
52
|
BinaryCIFColumn,
|
|
@@ -46,7 +60,7 @@ from biotite.structure.residues import (
|
|
|
46
60
|
get_residue_positions,
|
|
47
61
|
get_residue_starts_for,
|
|
48
62
|
)
|
|
49
|
-
from biotite.structure.
|
|
63
|
+
from biotite.structure.transform import AffineTransformation
|
|
50
64
|
|
|
51
65
|
# Bond types in `struct_conn` category that refer to covalent bonds
|
|
52
66
|
PDBX_BOND_TYPE_ID_TO_TYPE = {
|
|
@@ -125,8 +139,7 @@ _other_type_list = [
|
|
|
125
139
|
|
|
126
140
|
def _filter(category, index):
|
|
127
141
|
"""
|
|
128
|
-
Reduce the
|
|
129
|
-
model.
|
|
142
|
+
Reduce the given category to the values selected by the given index,
|
|
130
143
|
"""
|
|
131
144
|
Category = type(category)
|
|
132
145
|
Column = Category.subcomponent_class()
|
|
@@ -391,7 +404,16 @@ def get_structure(
|
|
|
391
404
|
|
|
392
405
|
# The below part is the same for both, AtomArray and AtomArrayStack
|
|
393
406
|
_fill_annotations(atoms, model_atom_site, extra_fields, use_author_fields)
|
|
407
|
+
|
|
408
|
+
atoms, altloc_filtered_atom_site = _filter_altloc(atoms, model_atom_site, altloc)
|
|
409
|
+
|
|
394
410
|
if include_bonds:
|
|
411
|
+
if altloc == "all":
|
|
412
|
+
raise ValueError(
|
|
413
|
+
"Bond computation is not supported with `altloc='all', consider using "
|
|
414
|
+
"'connect_via_residue_names()' afterwards"
|
|
415
|
+
)
|
|
416
|
+
|
|
395
417
|
if "chem_comp_bond" in block:
|
|
396
418
|
try:
|
|
397
419
|
custom_bond_dict = _parse_intra_residue_bonds(block["chem_comp_bond"])
|
|
@@ -407,10 +429,13 @@ def get_structure(
|
|
|
407
429
|
bonds = connect_via_residue_names(atoms)
|
|
408
430
|
if "struct_conn" in block:
|
|
409
431
|
bonds = bonds.merge(
|
|
410
|
-
_parse_inter_residue_bonds(
|
|
432
|
+
_parse_inter_residue_bonds(
|
|
433
|
+
altloc_filtered_atom_site,
|
|
434
|
+
block["struct_conn"],
|
|
435
|
+
atom_count=atoms.array_length(),
|
|
436
|
+
)
|
|
411
437
|
)
|
|
412
438
|
atoms.bonds = bonds
|
|
413
|
-
atoms = _filter_altloc(atoms, model_atom_site, altloc)
|
|
414
439
|
|
|
415
440
|
return atoms
|
|
416
441
|
|
|
@@ -570,11 +595,12 @@ def _parse_intra_residue_bonds(chem_comp_bond):
|
|
|
570
595
|
return custom_bond_dict
|
|
571
596
|
|
|
572
597
|
|
|
573
|
-
def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
598
|
+
def _parse_inter_residue_bonds(atom_site, struct_conn, atom_count=None):
|
|
574
599
|
"""
|
|
575
600
|
Create inter-residue bonds by parsing the ``struct_conn`` category.
|
|
576
601
|
The atom indices of each bond are found by matching the bond labels
|
|
577
602
|
to the ``atom_site`` category.
|
|
603
|
+
If atom_count is None, it will be inferred from the ``atom_site`` category.
|
|
578
604
|
"""
|
|
579
605
|
# Identity symmetry operation
|
|
580
606
|
IDENTITY = "1_555"
|
|
@@ -643,7 +669,7 @@ def _parse_inter_residue_bonds(atom_site, struct_conn):
|
|
|
643
669
|
bond_types = [PDBX_BOND_TYPE_ID_TO_TYPE[type_id] for type_id in bond_type_id]
|
|
644
670
|
|
|
645
671
|
return BondList(
|
|
646
|
-
atom_site.row_count,
|
|
672
|
+
atom_count if atom_count is not None else atom_site.row_count,
|
|
647
673
|
np.stack([atoms_indices_1, atoms_indices_2, bond_types], axis=-1),
|
|
648
674
|
)
|
|
649
675
|
|
|
@@ -739,25 +765,31 @@ def _get_struct_conn_col_name(col_name, partner):
|
|
|
739
765
|
|
|
740
766
|
|
|
741
767
|
def _filter_altloc(array, atom_site, altloc):
|
|
768
|
+
"""
|
|
769
|
+
Filter the given :class:`AtomArray` and ``atom_site`` category to the rows
|
|
770
|
+
specified by the given *altloc* identifier.
|
|
771
|
+
"""
|
|
742
772
|
altloc_ids = atom_site.get("label_alt_id")
|
|
743
773
|
occupancy = atom_site.get("occupancy")
|
|
744
774
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
return array
|
|
775
|
+
if altloc == "all":
|
|
776
|
+
array.set_annotation("altloc_id", altloc_ids.as_array(str))
|
|
777
|
+
return array, atom_site
|
|
778
|
+
elif altloc_ids is None or (
|
|
779
|
+
altloc_ids.mask is not None
|
|
780
|
+
and (altloc_ids.mask.array != MaskValue.PRESENT).all()
|
|
781
|
+
):
|
|
782
|
+
# No altlocs in atom_site category
|
|
783
|
+
return array, atom_site
|
|
748
784
|
elif altloc == "occupancy" and occupancy is not None:
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
),
|
|
754
|
-
]
|
|
785
|
+
mask = filter_highest_occupancy_altloc(
|
|
786
|
+
array, altloc_ids.as_array(str), occupancy.as_array(float)
|
|
787
|
+
)
|
|
788
|
+
return array[..., mask], _filter(atom_site, mask)
|
|
755
789
|
# 'first' is also fallback if file has no occupancy information
|
|
756
790
|
elif altloc == "first":
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
array.set_annotation("altloc_id", altloc_ids.as_array(str))
|
|
760
|
-
return array
|
|
791
|
+
mask = filter_first_altloc(array, altloc_ids.as_array(str))
|
|
792
|
+
return array[..., mask], _filter(atom_site, mask)
|
|
761
793
|
else:
|
|
762
794
|
raise ValueError(f"'{altloc}' is not a valid 'altloc' option")
|
|
763
795
|
|
|
@@ -844,11 +876,7 @@ def set_structure(
|
|
|
844
876
|
this parameter is ignored.
|
|
845
877
|
If the file is empty, a new data block will be created.
|
|
846
878
|
include_bonds : bool, optional
|
|
847
|
-
|
|
848
|
-
intra-residue bonds will be written into the ``chem_comp_bond``
|
|
849
|
-
category.
|
|
850
|
-
Inter-residue bonds will be written into the ``struct_conn``
|
|
851
|
-
independent of this parameter.
|
|
879
|
+
DEPRECATED: Has no effect anymore.
|
|
852
880
|
extra_fields : list of str, optional
|
|
853
881
|
List of additional fields from the ``atom_site`` category
|
|
854
882
|
that should be written into the file.
|
|
@@ -869,6 +897,13 @@ def set_structure(
|
|
|
869
897
|
>>> set_structure(file, atom_array)
|
|
870
898
|
>>> file.write(os.path.join(path_to_directory, "structure.cif"))
|
|
871
899
|
"""
|
|
900
|
+
if include_bonds:
|
|
901
|
+
warnings.warn(
|
|
902
|
+
"`include_bonds` parameter is deprecated, "
|
|
903
|
+
"intra-residue are always written, if available",
|
|
904
|
+
DeprecationWarning,
|
|
905
|
+
)
|
|
906
|
+
|
|
872
907
|
_check_non_empty(array)
|
|
873
908
|
|
|
874
909
|
block = _get_or_create_block(pdbx_file, data_block)
|
|
@@ -946,10 +981,9 @@ def set_structure(
|
|
|
946
981
|
struct_conn = _set_inter_residue_bonds(array, atom_site)
|
|
947
982
|
if struct_conn is not None:
|
|
948
983
|
block["struct_conn"] = struct_conn
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
block["chem_comp_bond"] = chem_comp_bond
|
|
984
|
+
chem_comp_bond = _set_intra_residue_bonds(array, atom_site)
|
|
985
|
+
if chem_comp_bond is not None:
|
|
986
|
+
block["chem_comp_bond"] = chem_comp_bond
|
|
953
987
|
|
|
954
988
|
# In case of a single model handle each coordinate
|
|
955
989
|
# simply like a flattened array
|
|
@@ -1623,11 +1657,11 @@ def get_assembly(
|
|
|
1623
1657
|
If set to true, a :class:`BondList` will be created for the
|
|
1624
1658
|
resulting :class:`AtomArray` containing the bond information
|
|
1625
1659
|
from the file.
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1660
|
+
Inter-residue bonds, will be read from the ``struct_conn``
|
|
1661
|
+
category.
|
|
1662
|
+
Intra-residue bonds will be read from the ``chem_comp_bond``, if
|
|
1663
|
+
available, otherwise they will be derived from the Chemical
|
|
1664
|
+
Component Dictionary.
|
|
1631
1665
|
|
|
1632
1666
|
Returns
|
|
1633
1667
|
-------
|
|
@@ -1686,7 +1720,7 @@ def get_assembly(
|
|
|
1686
1720
|
)
|
|
1687
1721
|
|
|
1688
1722
|
### Get transformations and apply them to the affected asym IDs
|
|
1689
|
-
|
|
1723
|
+
chain_ops = defaultdict(list)
|
|
1690
1724
|
for id, op_expr, asym_id_expr in zip(
|
|
1691
1725
|
assembly_gen_category["assembly_id"].as_array(str),
|
|
1692
1726
|
assembly_gen_category["oper_expression"].as_array(str),
|
|
@@ -1695,19 +1729,22 @@ def get_assembly(
|
|
|
1695
1729
|
# Find the operation expressions for given assembly ID
|
|
1696
1730
|
# We already asserted that the ID is actually present
|
|
1697
1731
|
if id == assembly_id:
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1732
|
+
for chain_id in asym_id_expr.split(","):
|
|
1733
|
+
chain_ops[chain_id].extend(_parse_operation_expression(op_expr))
|
|
1734
|
+
|
|
1735
|
+
sub_assemblies = []
|
|
1736
|
+
for asym_id, op_list in chain_ops.items():
|
|
1737
|
+
sub_struct = structure[..., structure.label_asym_id == asym_id]
|
|
1738
|
+
sub_assembly = _apply_transformations(sub_struct, transformations, op_list)
|
|
1739
|
+
# Merge the chain's sub_assembly into the rest of the assembly
|
|
1740
|
+
sub_assemblies.append(sub_assembly)
|
|
1741
|
+
assembly = concatenate(sub_assemblies)
|
|
1742
|
+
|
|
1743
|
+
# Sort AtomArray or AtomArrayStack by 'sym_id'
|
|
1744
|
+
max_sym_id = assembly.sym_id.max()
|
|
1745
|
+
assembly = concatenate(
|
|
1746
|
+
[assembly[..., assembly.sym_id == sym_id] for sym_id in range(max_sym_id + 1)]
|
|
1747
|
+
)
|
|
1711
1748
|
|
|
1712
1749
|
# Remove 'label_asym_id', if it was not included in the original
|
|
1713
1750
|
# user-supplied 'extra_fields'
|
|
@@ -1730,11 +1767,7 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1730
1767
|
# Execute for each transformation step
|
|
1731
1768
|
# in the operation expression
|
|
1732
1769
|
for op_step in operation:
|
|
1733
|
-
|
|
1734
|
-
# Rotate
|
|
1735
|
-
coord = matrix_rotate(coord, rotation_matrix)
|
|
1736
|
-
# Translate
|
|
1737
|
-
coord += translation_vector
|
|
1770
|
+
coord = transformation_dict[op_step].apply(coord)
|
|
1738
1771
|
assembly_coord[i] = coord
|
|
1739
1772
|
|
|
1740
1773
|
assembly = repeat(structure, assembly_coord)
|
|
@@ -1746,8 +1779,7 @@ def _apply_transformations(structure, transformation_dict, operations):
|
|
|
1746
1779
|
|
|
1747
1780
|
def _get_transformations(struct_oper):
|
|
1748
1781
|
"""
|
|
1749
|
-
Get transformation
|
|
1750
|
-
translation for each operation ID in ``pdbx_struct_oper_list``.
|
|
1782
|
+
Get affine transformation for each operation ID in ``pdbx_struct_oper_list``.
|
|
1751
1783
|
"""
|
|
1752
1784
|
transformation_dict = {}
|
|
1753
1785
|
for index, id in enumerate(struct_oper["id"].as_array(str)):
|
|
@@ -1763,7 +1795,9 @@ def _get_transformations(struct_oper):
|
|
|
1763
1795
|
translation_vector = np.array(
|
|
1764
1796
|
[struct_oper[f"vector[{i}]"].as_array(float)[index] for i in (1, 2, 3)]
|
|
1765
1797
|
)
|
|
1766
|
-
transformation_dict[id] = (
|
|
1798
|
+
transformation_dict[id] = AffineTransformation(
|
|
1799
|
+
np.zeros(3), rotation_matrix, translation_vector
|
|
1800
|
+
)
|
|
1767
1801
|
return transformation_dict
|
|
1768
1802
|
|
|
1769
1803
|
|
|
@@ -1820,6 +1854,140 @@ def _convert_string_to_sequence(string, stype):
|
|
|
1820
1854
|
raise InvalidFileError("mmCIF _entity_poly.type unsupported type: " + stype)
|
|
1821
1855
|
|
|
1822
1856
|
|
|
1857
|
+
def get_unit_cell(
|
|
1858
|
+
pdbx_file,
|
|
1859
|
+
center=True,
|
|
1860
|
+
model=None,
|
|
1861
|
+
data_block=None,
|
|
1862
|
+
altloc="first",
|
|
1863
|
+
extra_fields=None,
|
|
1864
|
+
use_author_fields=True,
|
|
1865
|
+
include_bonds=False,
|
|
1866
|
+
):
|
|
1867
|
+
"""
|
|
1868
|
+
Build a structure model containing all symmetric copies of the structure within a
|
|
1869
|
+
single unit cell.
|
|
1870
|
+
|
|
1871
|
+
This function receives the data from the ``symmetry`` and ``atom_site`` categories
|
|
1872
|
+
in the file.
|
|
1873
|
+
Consequently, these categories must be present in the file.
|
|
1874
|
+
|
|
1875
|
+
Parameters
|
|
1876
|
+
----------
|
|
1877
|
+
pdbx_file : CIFFile or CIFBlock or BinaryCIFFile or BinaryCIFBlock
|
|
1878
|
+
The file object.
|
|
1879
|
+
center : bool, optional
|
|
1880
|
+
If set to true, each symmetric copy will be moved inside the unit cell
|
|
1881
|
+
dimensions, if its centroid is outside.
|
|
1882
|
+
By default, the copies are are created using the raw space group
|
|
1883
|
+
transformations, which may put them one unit cell length further away.
|
|
1884
|
+
model : int, optional
|
|
1885
|
+
If this parameter is given, the function will return an
|
|
1886
|
+
:class:`AtomArray` from the atoms corresponding to the given
|
|
1887
|
+
model number (starting at 1).
|
|
1888
|
+
Negative values are used to index models starting from the last
|
|
1889
|
+
model insted of the first model.
|
|
1890
|
+
If this parameter is omitted, an :class:`AtomArrayStack`
|
|
1891
|
+
containing all models will be returned, even if the structure
|
|
1892
|
+
contains only one model.
|
|
1893
|
+
data_block : str, optional
|
|
1894
|
+
The name of the data block.
|
|
1895
|
+
Default is the first (and most times only) data block of the
|
|
1896
|
+
file.
|
|
1897
|
+
If the data block object is passed directly to `pdbx_file`,
|
|
1898
|
+
this parameter is ignored.
|
|
1899
|
+
altloc : {'first', 'occupancy', 'all'}
|
|
1900
|
+
This parameter defines how *altloc* IDs are handled:
|
|
1901
|
+
- ``'first'`` - Use atoms that have the first *altloc* ID
|
|
1902
|
+
appearing in a residue.
|
|
1903
|
+
- ``'occupancy'`` - Use atoms that have the *altloc* ID
|
|
1904
|
+
with the highest occupancy for a residue.
|
|
1905
|
+
- ``'all'`` - Use all atoms.
|
|
1906
|
+
Note that this leads to duplicate atoms.
|
|
1907
|
+
When this option is chosen, the ``altloc_id`` annotation
|
|
1908
|
+
array is added to the returned structure.
|
|
1909
|
+
extra_fields : list of str, optional
|
|
1910
|
+
The strings in the list are entry names, that are
|
|
1911
|
+
additionally added as annotation arrays.
|
|
1912
|
+
The annotation category name will be the same as the PDBx
|
|
1913
|
+
subcategory name.
|
|
1914
|
+
The array type is always `str`.
|
|
1915
|
+
An exception are the special field identifiers:
|
|
1916
|
+
``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and ``'charge'``.
|
|
1917
|
+
These will convert the fitting subcategory into an
|
|
1918
|
+
annotation array with reasonable type.
|
|
1919
|
+
use_author_fields : bool, optional
|
|
1920
|
+
Some fields can be read from two alternative sources,
|
|
1921
|
+
for example both, ``label_seq_id`` and ``auth_seq_id`` describe
|
|
1922
|
+
the ID of the residue.
|
|
1923
|
+
While, the ``label_xxx`` fields can be used as official pointers
|
|
1924
|
+
to other categories in the file, the ``auth_xxx``
|
|
1925
|
+
fields are set by the author(s) of the structure and are
|
|
1926
|
+
consistent with the corresponding values in PDB files.
|
|
1927
|
+
If `use_author_fields` is true, the annotation arrays will be
|
|
1928
|
+
read from the ``auth_xxx`` fields (if applicable),
|
|
1929
|
+
otherwise from the the ``label_xxx`` fields.
|
|
1930
|
+
include_bonds : bool, optional
|
|
1931
|
+
If set to true, a :class:`BondList` will be created for the
|
|
1932
|
+
resulting :class:`AtomArray` containing the bond information
|
|
1933
|
+
from the file.
|
|
1934
|
+
Inter-residue bonds, will be read from the ``struct_conn``
|
|
1935
|
+
category.
|
|
1936
|
+
Intra-residue bonds will be read from the ``chem_comp_bond``, if
|
|
1937
|
+
available, otherwise they will be derived from the Chemical
|
|
1938
|
+
Component Dictionary.
|
|
1939
|
+
|
|
1940
|
+
Returns
|
|
1941
|
+
-------
|
|
1942
|
+
unit_cell : AtomArray or AtomArrayStack
|
|
1943
|
+
The structure representing the unit cell.
|
|
1944
|
+
The return type depends on the `model` parameter.
|
|
1945
|
+
Contains the `sym_id` annotation, which enumerates the copies of the asymmetric
|
|
1946
|
+
unit in the unit cell.
|
|
1947
|
+
|
|
1948
|
+
Examples
|
|
1949
|
+
--------
|
|
1950
|
+
|
|
1951
|
+
>>> import os.path
|
|
1952
|
+
>>> file = CIFFile.read(os.path.join(path_to_structures, "1f2n.cif"))
|
|
1953
|
+
>>> unit_cell = get_unit_cell(file, model=1)
|
|
1954
|
+
"""
|
|
1955
|
+
block = _get_block(pdbx_file, data_block)
|
|
1956
|
+
|
|
1957
|
+
try:
|
|
1958
|
+
space_group = block["symmetry"]["space_group_name_H-M"].as_item()
|
|
1959
|
+
except KeyError:
|
|
1960
|
+
raise InvalidFileError("File has no 'symmetry.space_group_name_H-M' field")
|
|
1961
|
+
transforms = space_group_transforms(space_group)
|
|
1962
|
+
|
|
1963
|
+
asym = get_structure(
|
|
1964
|
+
pdbx_file,
|
|
1965
|
+
model,
|
|
1966
|
+
data_block,
|
|
1967
|
+
altloc,
|
|
1968
|
+
extra_fields,
|
|
1969
|
+
use_author_fields,
|
|
1970
|
+
include_bonds,
|
|
1971
|
+
)
|
|
1972
|
+
|
|
1973
|
+
fractional_asym_coord = coord_to_fraction(asym.coord, asym.box)
|
|
1974
|
+
unit_cell_copies = []
|
|
1975
|
+
for transform in transforms:
|
|
1976
|
+
fractional_coord = transform.apply(fractional_asym_coord)
|
|
1977
|
+
if center:
|
|
1978
|
+
# If the centroid is outside the box, move the copy inside the box
|
|
1979
|
+
orig_centroid = centroid(fractional_coord)
|
|
1980
|
+
new_centroid = orig_centroid % 1
|
|
1981
|
+
fractional_coord += (new_centroid - orig_centroid)[..., np.newaxis, :]
|
|
1982
|
+
unit_cell_copies.append(fraction_to_coord(fractional_coord, asym.box))
|
|
1983
|
+
|
|
1984
|
+
unit_cell = repeat(asym, np.stack(unit_cell_copies, axis=0))
|
|
1985
|
+
unit_cell.set_annotation(
|
|
1986
|
+
"sym_id", np.repeat(np.arange(len(transforms)), asym.array_length())
|
|
1987
|
+
)
|
|
1988
|
+
return unit_cell
|
|
1989
|
+
|
|
1990
|
+
|
|
1823
1991
|
def get_sse(pdbx_file, data_block=None, match_model=None):
|
|
1824
1992
|
"""
|
|
1825
1993
|
Get the secondary structure from a PDBx file.
|
|
Binary file
|