biotite 1.2.0__cp311-cp311-win_amd64.whl → 1.3.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biotite might be problematic. Click here for more details.

Files changed (56) hide show
  1. biotite/application/viennarna/rnaplot.py +7 -7
  2. biotite/interface/openmm/__init__.py +4 -0
  3. biotite/interface/pymol/__init__.py +3 -0
  4. biotite/interface/rdkit/__init__.py +4 -0
  5. biotite/interface/version.py +23 -0
  6. biotite/sequence/align/banded.cp311-win_amd64.pyd +0 -0
  7. biotite/sequence/align/banded.pyx +1 -1
  8. biotite/sequence/align/kmeralphabet.cp311-win_amd64.pyd +0 -0
  9. biotite/sequence/align/kmersimilarity.cp311-win_amd64.pyd +0 -0
  10. biotite/sequence/align/kmertable.cp311-win_amd64.pyd +0 -0
  11. biotite/sequence/align/localgapped.cp311-win_amd64.pyd +0 -0
  12. biotite/sequence/align/localungapped.cp311-win_amd64.pyd +0 -0
  13. biotite/sequence/align/multiple.cp311-win_amd64.pyd +0 -0
  14. biotite/sequence/align/multiple.pyx +1 -2
  15. biotite/sequence/align/pairwise.cp311-win_amd64.pyd +0 -0
  16. biotite/sequence/align/pairwise.pyx +2 -4
  17. biotite/sequence/align/permutation.cp311-win_amd64.pyd +0 -0
  18. biotite/sequence/align/selector.cp311-win_amd64.pyd +0 -0
  19. biotite/sequence/align/tracetable.cp311-win_amd64.pyd +0 -0
  20. biotite/sequence/codec.cp311-win_amd64.pyd +0 -0
  21. biotite/sequence/phylo/nj.cp311-win_amd64.pyd +0 -0
  22. biotite/sequence/phylo/tree.cp311-win_amd64.pyd +0 -0
  23. biotite/sequence/phylo/upgma.cp311-win_amd64.pyd +0 -0
  24. biotite/structure/basepairs.py +13 -14
  25. biotite/structure/bonds.cp311-win_amd64.pyd +0 -0
  26. biotite/structure/box.py +140 -2
  27. biotite/structure/celllist.cp311-win_amd64.pyd +0 -0
  28. biotite/structure/celllist.pyx +0 -1
  29. biotite/structure/chains.py +15 -21
  30. biotite/structure/charges.cp311-win_amd64.pyd +0 -0
  31. biotite/structure/dotbracket.py +4 -4
  32. biotite/structure/graphics/rna.py +19 -16
  33. biotite/structure/hbond.py +1 -2
  34. biotite/structure/info/components.bcif +0 -0
  35. biotite/structure/io/pdb/convert.py +84 -2
  36. biotite/structure/io/pdb/file.py +79 -2
  37. biotite/structure/io/pdb/hybrid36.cp311-win_amd64.pyd +0 -0
  38. biotite/structure/io/pdbx/compress.py +69 -32
  39. biotite/structure/io/pdbx/convert.py +207 -44
  40. biotite/structure/io/pdbx/encoding.cp311-win_amd64.pyd +0 -0
  41. biotite/structure/io/pdbx/encoding.pyx +39 -23
  42. biotite/structure/pseudoknots.py +6 -6
  43. biotite/structure/residues.py +10 -27
  44. biotite/structure/rings.py +1 -1
  45. biotite/structure/sasa.cp311-win_amd64.pyd +0 -0
  46. biotite/structure/sasa.pyx +28 -29
  47. biotite/structure/segments.py +55 -0
  48. biotite/structure/spacegroups.json +1567 -0
  49. biotite/structure/spacegroups.license +26 -0
  50. biotite/structure/superimpose.py +1 -191
  51. biotite/structure/transform.py +220 -1
  52. biotite/version.py +2 -2
  53. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/METADATA +4 -34
  54. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/RECORD +56 -54
  55. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/WHEEL +1 -1
  56. {biotite-1.2.0.dist-info → biotite-1.3.0.dist-info}/licenses/LICENSE.rst +0 -0
@@ -15,9 +15,11 @@ __all__ = [
15
15
  "set_structure",
16
16
  "list_assemblies",
17
17
  "get_assembly",
18
- "get_symmetry_mates",
18
+ "get_unit_cell",
19
19
  ]
20
20
 
21
+ import warnings
22
+
21
23
 
22
24
  def get_model_count(pdb_file):
23
25
  """
@@ -232,6 +234,80 @@ def get_assembly(
232
234
  )
233
235
 
234
236
 
237
+ def get_unit_cell(
238
+ pdb_file, model=None, altloc="first", extra_fields=[], include_bonds=False
239
+ ):
240
+ """
241
+ Build a structure model containing all symmetric copies
242
+ of the structure within a single unit cell, given by the space
243
+ group.
244
+
245
+ This function receives the data from ``REMARK 290`` records in
246
+ the file.
247
+ Consequently, this remark must be present in the file, which is
248
+ usually only true for crystal structures.
249
+
250
+ Parameters
251
+ ----------
252
+ pdb_file : PDBFile
253
+ The file object.
254
+ model : int, optional
255
+ If this parameter is given, the function will return an
256
+ :class:`AtomArray` from the atoms corresponding to the given
257
+ model number (starting at 1).
258
+ Negative values are used to index models starting from the
259
+ last model instead of the first model.
260
+ If this parameter is omitted, an :class:`AtomArrayStack`
261
+ containing all models will be returned, even if the
262
+ structure contains only one model.
263
+ altloc : {'first', 'occupancy', 'all'}
264
+ This parameter defines how *altloc* IDs are handled:
265
+ - ``'first'`` - Use atoms that have the first
266
+ *altloc* ID appearing in a residue.
267
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
268
+ with the highest occupancy for a residue.
269
+ - ``'all'`` - Use all atoms.
270
+ Note that this leads to duplicate atoms.
271
+ When this option is chosen, the ``altloc_id``
272
+ annotation array is added to the returned structure.
273
+ extra_fields : list of str, optional
274
+ The strings in the list are optional annotation categories
275
+ that should be stored in the output array or stack.
276
+ These are valid values:
277
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
278
+ ``'charge'``.
279
+ include_bonds : bool, optional
280
+ If set to true, a :class:`BondList` will be created for the
281
+ resulting :class:`AtomArray` containing the bond information
282
+ from the file.
283
+ Bonds, whose order could not be determined from the
284
+ *Chemical Component Dictionary*
285
+ (e.g. especially inter-residue bonds),
286
+ have :attr:`BondType.ANY`, since the PDB format itself does
287
+ not support bond orders.
288
+
289
+ Returns
290
+ -------
291
+ symmetry_mates : AtomArray or AtomArrayStack
292
+ All atoms within a single unit cell.
293
+ The return type depends on the `model` parameter.
294
+
295
+ Notes
296
+ -----
297
+ To expand the structure beyond a single unit cell, use
298
+ :func:`repeat_box()` with the return value as its
299
+ input.
300
+
301
+ Examples
302
+ --------
303
+
304
+ >>> import os.path
305
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
306
+ >>> atoms_in_unit_cell = get_unit_cell(file, model=1)
307
+ """
308
+ return pdb_file.get_unit_cell(model, altloc, extra_fields, include_bonds)
309
+
310
+
235
311
  def get_symmetry_mates(
236
312
  pdb_file, model=None, altloc="first", extra_fields=[], include_bonds=False
237
313
  ):
@@ -245,6 +321,8 @@ def get_symmetry_mates(
245
321
  Consequently, this remark must be present in the file, which is
246
322
  usually only true for crystal structures.
247
323
 
324
+ DEPRECATED: Use :func:`get_unit_cell()` instead.
325
+
248
326
  Parameters
249
327
  ----------
250
328
  pdb_file : PDBFile
@@ -303,4 +381,8 @@ def get_symmetry_mates(
303
381
  >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
304
382
  >>> atoms_in_unit_cell = get_symmetry_mates(file, model=1)
305
383
  """
306
- return pdb_file.get_symmetry_mates(model, altloc, extra_fields, include_bonds)
384
+ warnings.warn(
385
+ "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
386
+ DeprecationWarning,
387
+ )
388
+ return pdb_file.get_unit_cell(model, altloc, extra_fields, include_bonds)
@@ -954,7 +954,7 @@ class PDBFile(TextFile):
954
954
 
955
955
  return assembly
956
956
 
957
- def get_symmetry_mates(
957
+ def get_unit_cell(
958
958
  self, model=None, altloc="first", extra_fields=[], include_bonds=False
959
959
  ):
960
960
  """
@@ -1021,7 +1021,7 @@ class PDBFile(TextFile):
1021
1021
 
1022
1022
  >>> import os.path
1023
1023
  >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1024
- >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1024
+ >>> atoms_in_unit_cell = file.get_unit_cell(model=1)
1025
1025
  """
1026
1026
  # Get base structure
1027
1027
  structure = self.get_structure(
@@ -1041,6 +1041,83 @@ class PDBFile(TextFile):
1041
1041
  rotations, translations = _parse_transformations(transform_lines)
1042
1042
  return _apply_transformations(structure, rotations, translations)
1043
1043
 
1044
+ def get_symmetry_mates(
1045
+ self, model=None, altloc="first", extra_fields=[], include_bonds=False
1046
+ ):
1047
+ """
1048
+ Build a structure model containing all symmetric copies
1049
+ of the structure within a single unit cell, given by the space
1050
+ group.
1051
+
1052
+ This function receives the data from ``REMARK 290`` records in
1053
+ the file.
1054
+ Consequently, this remark must be present in the file, which is
1055
+ usually only true for crystal structures.
1056
+
1057
+ DEPRECATED: Use :meth:`get_unit_cell()` instead.
1058
+
1059
+ Parameters
1060
+ ----------
1061
+ model : int, optional
1062
+ If this parameter is given, the function will return an
1063
+ :class:`AtomArray` from the atoms corresponding to the given
1064
+ model number (starting at 1).
1065
+ Negative values are used to index models starting from the
1066
+ last model instead of the first model.
1067
+ If this parameter is omitted, an :class:`AtomArrayStack`
1068
+ containing all models will be returned, even if the
1069
+ structure contains only one model.
1070
+ altloc : {'first', 'occupancy', 'all'}
1071
+ This parameter defines how *altloc* IDs are handled:
1072
+ - ``'first'`` - Use atoms that have the first
1073
+ *altloc* ID appearing in a residue.
1074
+ - ``'occupancy'`` - Use atoms that have the *altloc* ID
1075
+ with the highest occupancy for a residue.
1076
+ - ``'all'`` - Use all atoms.
1077
+ Note that this leads to duplicate atoms.
1078
+ When this option is chosen, the ``altloc_id``
1079
+ annotation array is added to the returned structure.
1080
+ extra_fields : list of str, optional
1081
+ The strings in the list are optional annotation categories
1082
+ that should be stored in the output array or stack.
1083
+ These are valid values:
1084
+ ``'atom_id'``, ``'b_factor'``, ``'occupancy'`` and
1085
+ ``'charge'``.
1086
+ include_bonds : bool, optional
1087
+ If set to true, a :class:`BondList` will be created for the
1088
+ resulting :class:`AtomArray` containing the bond information
1089
+ from the file.
1090
+ Bonds, whose order could not be determined from the
1091
+ *Chemical Component Dictionary*
1092
+ (e.g. especially inter-residue bonds),
1093
+ have :attr:`BondType.ANY`, since the PDB format itself does
1094
+ not support bond orders.
1095
+
1096
+ Returns
1097
+ -------
1098
+ symmetry_mates : AtomArray or AtomArrayStack
1099
+ All atoms within a single unit cell.
1100
+ The return type depends on the `model` parameter.
1101
+
1102
+ Notes
1103
+ -----
1104
+ To expand the structure beyond a single unit cell, use
1105
+ :func:`repeat_box()` with the return value as its
1106
+ input.
1107
+
1108
+ Examples
1109
+ --------
1110
+
1111
+ >>> import os.path
1112
+ >>> file = PDBFile.read(os.path.join(path_to_structures, "1aki.pdb"))
1113
+ >>> atoms_in_unit_cell = file.get_symmetry_mates(model=1)
1114
+ """
1115
+ warnings.warn(
1116
+ "'get_symmetry_mates()' is deprecated, use 'get_unit_cell()' instead",
1117
+ DeprecationWarning,
1118
+ )
1119
+ return self.get_unit_cell(model, altloc, extra_fields, include_bonds)
1120
+
1044
1121
  def _index_models_and_atoms(self):
1045
1122
  # Line indices where a new model starts
1046
1123
  self._model_start_i = np.array(
@@ -3,6 +3,7 @@ __name__ = "biotite.structure.io.pdbx"
3
3
  __author__ = "Patrick Kunzmann"
4
4
 
5
5
  import itertools
6
+ import warnings
6
7
  import msgpack
7
8
  import numpy as np
8
9
  import biotite.structure.io.pdbx.bcif as bcif
@@ -17,7 +18,7 @@ from biotite.structure.io.pdbx.encoding import (
17
18
  )
18
19
 
19
20
 
20
- def compress(data, float_tolerance=1e-6):
21
+ def compress(data, float_tolerance=None, rtol=1e-6, atol=1e-4):
21
22
  """
22
23
  Try to reduce the size of a *BinaryCIF* file (or block, category, etc.) by testing
23
24
  different data encodings for each data array and selecting the one, which results in
@@ -29,6 +30,12 @@ def compress(data, float_tolerance=1e-6):
29
30
  The data to compress.
30
31
  float_tolerance : float, optional
31
32
  The relative error that is accepted when compressing floating point numbers.
33
+ DEPRECATED: Use `rtol` instead.
34
+ rtol, atol : float, optional
35
+ The compression factor of floating point numbers is chosen such that
36
+ either the relative (`rtol`) or absolute (`atol`) tolerance is fulfilled
37
+ for each value, i.e. the difference between the compressed and uncompressed
38
+ value is smaller than the tolerance.
32
39
 
33
40
  Returns
34
41
  -------
@@ -58,55 +65,70 @@ def compress(data, float_tolerance=1e-6):
58
65
  >>> print(f"{len(compressed_file.read()) // 1000} KB")
59
66
  111 KB
60
67
  """
68
+ if float_tolerance is not None:
69
+ warnings.warn(
70
+ "The 'float_tolerance' parameter is deprecated, use 'rtol' instead",
71
+ DeprecationWarning,
72
+ )
73
+
61
74
  match type(data):
62
75
  case bcif.BinaryCIFFile:
63
- return _compress_file(data, float_tolerance)
76
+ return _compress_file(data, rtol, atol)
64
77
  case bcif.BinaryCIFBlock:
65
- return _compress_block(data, float_tolerance)
78
+ return _compress_block(data, rtol, atol)
66
79
  case bcif.BinaryCIFCategory:
67
- return _compress_category(data, float_tolerance)
80
+ return _compress_category(data, rtol, atol)
68
81
  case bcif.BinaryCIFColumn:
69
- return _compress_column(data, float_tolerance)
82
+ return _compress_column(data, rtol, atol)
70
83
  case bcif.BinaryCIFData:
71
- return _compress_data(data, float_tolerance)
84
+ return _compress_data(data, rtol, atol)
72
85
  case _:
73
86
  raise TypeError(f"Unsupported type {type(data).__name__}")
74
87
 
75
88
 
76
- def _compress_file(bcif_file, float_tolerance):
89
+ def _compress_file(bcif_file, rtol, atol):
77
90
  compressed_file = bcif.BinaryCIFFile()
78
91
  for block_name, bcif_block in bcif_file.items():
79
- compressed_block = _compress_block(bcif_block, float_tolerance)
92
+ try:
93
+ compressed_block = _compress_block(bcif_block, rtol, atol)
94
+ except Exception:
95
+ raise ValueError(f"Failed to compress block '{block_name}'")
80
96
  compressed_file[block_name] = compressed_block
81
97
  return compressed_file
82
98
 
83
99
 
84
- def _compress_block(bcif_block, float_tolerance):
100
+ def _compress_block(bcif_block, rtol, atol):
85
101
  compressed_block = bcif.BinaryCIFBlock()
86
102
  for category_name, bcif_category in bcif_block.items():
87
- compressed_category = _compress_category(bcif_category, float_tolerance)
103
+ try:
104
+ compressed_category = _compress_category(bcif_category, rtol, atol)
105
+ except Exception:
106
+ raise ValueError(f"Failed to compress category '{category_name}'")
88
107
  compressed_block[category_name] = compressed_category
89
108
  return compressed_block
90
109
 
91
110
 
92
- def _compress_category(bcif_category, float_tolerance):
111
+ def _compress_category(bcif_category, rtol, atol):
93
112
  compressed_category = bcif.BinaryCIFCategory()
94
113
  for column_name, bcif_column in bcif_category.items():
95
- compressed_column = _compress_column(bcif_column, float_tolerance)
114
+ try:
115
+ compressed_column = _compress_column(bcif_column, rtol, atol)
116
+ except Exception:
117
+ raise ValueError(f"Failed to compress column '{column_name}'")
96
118
  compressed_category[column_name] = compressed_column
97
119
  return compressed_category
98
120
 
99
121
 
100
- def _compress_column(bcif_column, float_tolerance):
101
- data = _compress_data(bcif_column.data, float_tolerance)
122
+ def _compress_column(bcif_column, rtol, atol):
123
+ data = _compress_data(bcif_column.data, rtol, atol)
102
124
  if bcif_column.mask is not None:
103
- mask = _compress_data(bcif_column.mask, float_tolerance)
125
+ mask = _compress_data(bcif_column.mask, rtol, atol)
104
126
  else:
105
127
  mask = None
106
128
  return bcif.BinaryCIFColumn(data, mask)
107
129
 
108
130
 
109
- def _compress_data(bcif_data, float_tolerance):
131
+ def _compress_data(bcif_data, rtol, atol):
110
132
  array = bcif_data.array
111
133
  if len(array) == 1:
112
134
  # No need to compress a single value -> Use default uncompressed encoding
@@ -123,16 +145,28 @@ def _compress_data(bcif_data, float_tolerance):
123
145
  return bcif.BinaryCIFData(array, [encoding])
124
146
 
125
147
  elif np.issubdtype(array.dtype, np.floating):
148
+ if not np.isfinite(array).all():
149
+ # NaN/inf values cannot be represented by integers
150
+ # -> do not use integer encoding
151
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
126
152
  to_integer_encoding = FixedPointEncoding(
127
- 10 ** _get_decimal_places(array, float_tolerance)
153
+ 10 ** _get_decimal_places(array, rtol, atol)
128
154
  )
129
- integer_array = to_integer_encoding.encode(array)
130
- best_encoding, size_compressed = _find_best_integer_compression(integer_array)
131
- if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
132
- return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
133
- else:
134
- # The float array is smaller -> encode it directly as bytes
155
+ try:
156
+ integer_array = to_integer_encoding.encode(array)
157
+ except ValueError:
158
+ # With the given tolerances integer underflow/overflow would occur
159
+ # -> do not use integer encoding
135
160
  return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
161
+ else:
162
+ best_encoding, size_compressed = _find_best_integer_compression(
163
+ integer_array
164
+ )
165
+ if size_compressed < _data_size_in_file(bcif.BinaryCIFData(array)):
166
+ return bcif.BinaryCIFData(array, [to_integer_encoding] + best_encoding)
167
+ else:
168
+ # The float array is smaller -> encode it directly as bytes
169
+ return bcif.BinaryCIFData(array, [ByteArrayEncoding()])
136
170
 
137
171
  elif np.issubdtype(array.dtype, np.integer):
138
172
  array = _to_smallest_integer_type(array)
@@ -273,7 +307,7 @@ def _data_size_in_file(data):
273
307
  return len(bytes_in_file)
274
308
 
275
309
 
276
- def _get_decimal_places(array, tol):
310
+ def _get_decimal_places(array, rtol, atol):
277
311
  """
278
312
  Get the number of decimal places in a floating point array.
279
313
 
@@ -281,21 +315,24 @@ def _get_decimal_places(array, tol):
281
315
  ----------
282
316
  array : numpy.ndarray
283
317
  The array to analyze.
284
- tol : float, optional
285
- The relative tolerance allowed when the values are cut off after the returned
286
- number of decimal places.
318
+ rtol, atol : float, optional
319
+ The relative and absolute tolerance allowed when the values are cut off after
320
+ the returned number of decimal places.
287
321
 
288
322
  Returns
289
323
  -------
290
324
  decimals : int
291
325
  The number of decimal places.
292
326
  """
293
- # Decimals of NaN or infinite values do not make sense
294
- # and 0 would give NaN when rounding on decimals
295
- array = array[np.isfinite(array) & (array != 0)]
296
- for decimals in itertools.count(start=-_order_magnitude(array)):
327
+ if rtol <= 0 and atol <= 0:
328
+ raise ValueError("At least one of 'rtol' and 'atol' must be greater than 0")
329
+ # 0 would give NaN when rounding on decimals
330
+ array = array[array != 0]
331
+ for decimals in itertools.count(start=min(0, -_order_magnitude(array))):
297
332
  error = np.abs(np.round(array, decimals) - array)
298
- if np.all(error < tol * np.abs(array)):
333
+ if decimals == 100:
334
+ raise
335
+ if np.all((error < rtol * np.abs(array)) | (error < atol)):
299
336
  return decimals
300
337
 
301
338